Apache Avro: Java Code Generation and Specific Mapping
Although code generation is not required for using Apache Avro, Java and C++ implementation can generate code to represent data for Avro schema. If we have the schema before read or write data, code generation can optimize the performance. In Java, this is called the specific mapping.
1. Code Generation
Suppose we have a schema file demorecord.avsc as below.
{
"namespace": "avro",
"type": "record",
"name": "DemoRecord",
"aliases": ["LinkedLongs"], // old name for this
"fields" : [
{"name": "desp", "type": "string"},
{"name": "value", "type": "int"}, // each element has a long
{"name": "next", "type": ["DemoRecord", "null"]} // optional next element
]
}
We can use the following command to generate the Java class.
java -cp <path_to_avro_tools>/avro-tools-1.6.3.jar org.apache.avro.tool.Main compile schema <schema_file> <output_directory>
In our example, we have the avro-tools-1.6.3.jar in the local directory and we simply output to the current directory. Then the command is as below.
java -cp avro-tools-1.6.3.jar org.apache.avro.tool.Main compile schema demorecord.avsc .
The generated code is as below.
/**
* Autogenerated by Avro
*
* DO NOT EDIT DIRECTLY
*/
package avro;
@SuppressWarnings("all")
public class DemoRecord extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"DemoRecord\",\"namespace\":\"avro\",\"fields\":[{\"name\":\"desp\",\"type\":\"string\"},{\"name\":\"value\",\"type\":\"int\"},{\"name\":\"next\",\"type\":[\"DemoRecord\",\"null\"]}],\"aliases\":[\"LinkedLongs\"]}");
@Deprecated public java.lang.CharSequence desp;
@Deprecated public int value;
@Deprecated public avro.DemoRecord next;
public org.apache.avro.Schema getSchema() { return SCHEMA$; }
// Used by DatumWriter. Applications should not call.
public java.lang.Object get(int field$) {
switch (field$) {
case 0: return desp;
case 1: return value;
case 2: return next;
default: throw new org.apache.avro.AvroRuntimeException("Bad index");
}
}
// Used by DatumReader. Applications should not call.
@SuppressWarnings(value="unchecked")
public void put(int field$, java.lang.Object value$) {
switch (field$) {
case 0: desp = (java.lang.CharSequence)value$; break;
case 1: value = (java.lang.Integer)value$; break;
case 2: next = (avro.DemoRecord)value$; break;
default: throw new org.apache.avro.AvroRuntimeException("Bad index");
}
}
/**
* Gets the value of the 'desp' field.
*/
public java.lang.CharSequence getDesp() {
return desp;
}
/**
* Sets the value of the 'desp' field.
* @param value the value to set.
*/
public void setDesp(java.lang.CharSequence value) {
this.desp = value;
}
/**
* Gets the value of the 'value' field.
*/
public java.lang.Integer getValue() {
return value;
}
/**
* Sets the value of the 'value' field.
* @param value the value to set.
*/
public void setValue(java.lang.Integer value) {
this.value = value;
}
/**
* Gets the value of the 'next' field.
*/
public avro.DemoRecord getNext() {
return next;
}
/**
* Sets the value of the 'next' field.
* @param value the value to set.
*/
public void setNext(avro.DemoRecord value) {
this.next = value;
}
/** Creates a new DemoRecord RecordBuilder */
public static avro.DemoRecord.Builder newBuilder() {
return new avro.DemoRecord.Builder();
}
/** Creates a new DemoRecord RecordBuilder by copying an existing Builder */
public static avro.DemoRecord.Builder newBuilder(avro.DemoRecord.Builder other) {
return new avro.DemoRecord.Builder(other);
}
/** Creates a new DemoRecord RecordBuilder by copying an existing DemoRecord instance */
public static avro.DemoRecord.Builder newBuilder(avro.DemoRecord other) {
return new avro.DemoRecord.Builder(other);
}
/**
* RecordBuilder for DemoRecord instances.
*/
public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<DemoRecord>
implements org.apache.avro.data.RecordBuilder<DemoRecord> {
private java.lang.CharSequence desp;
private int value;
private avro.DemoRecord next;
/** Creates a new Builder */
private Builder() {
super(avro.DemoRecord.SCHEMA$);
}
/** Creates a Builder by copying an existing Builder */
private Builder(avro.DemoRecord.Builder other) {
super(other);
}
/** Creates a Builder by copying an existing DemoRecord instance */
private Builder(avro.DemoRecord other) {
super(avro.DemoRecord.SCHEMA$);
if (isValidValue(fields()[0], other.desp)) {
this.desp = (java.lang.CharSequence) data().deepCopy(fields()[0].schema(), other.desp);
fieldSetFlags()[0] = true;
}
if (isValidValue(fields()[1], other.value)) {
this.value = (java.lang.Integer) data().deepCopy(fields()[1].schema(), other.value);
fieldSetFlags()[1] = true;
}
if (isValidValue(fields()[2], other.next)) {
this.next = (avro.DemoRecord) data().deepCopy(fields()[2].schema(), other.next);
fieldSetFlags()[2] = true;
}
}
/** Gets the value of the 'desp' field */
public java.lang.CharSequence getDesp() {
return desp;
}
/** Sets the value of the 'desp' field */
public avro.DemoRecord.Builder setDesp(java.lang.CharSequence value) {
validate(fields()[0], value);
this.desp = value;
fieldSetFlags()[0] = true;
return this;
}
/** Checks whether the 'desp' field has been set */
public boolean hasDesp() {
return fieldSetFlags()[0];
}
/** Clears the value of the 'desp' field */
public avro.DemoRecord.Builder clearDesp() {
desp = null;
fieldSetFlags()[0] = false;
return this;
}
/** Gets the value of the 'value' field */
public java.lang.Integer getValue() {
return value;
}
/** Sets the value of the 'value' field */
public avro.DemoRecord.Builder setValue(int value) {
validate(fields()[1], value);
this.value = value;
fieldSetFlags()[1] = true;
return this;
}
/** Checks whether the 'value' field has been set */
public boolean hasValue() {
return fieldSetFlags()[1];
}
/** Clears the value of the 'value' field */
public avro.DemoRecord.Builder clearValue() {
fieldSetFlags()[1] = false;
return this;
}
/** Gets the value of the 'next' field */
public avro.DemoRecord getNext() {
return next;
}
/** Sets the value of the 'next' field */
public avro.DemoRecord.Builder setNext(avro.DemoRecord value) {
validate(fields()[2], value);
this.next = value;
fieldSetFlags()[2] = true;
return this;
}
/** Checks whether the 'next' field has been set */
public boolean hasNext() {
return fieldSetFlags()[2];
}
/** Clears the value of the 'next' field */
public avro.DemoRecord.Builder clearNext() {
next = null;
fieldSetFlags()[2] = false;
return this;
}
@Override
public DemoRecord build() {
try {
DemoRecord record = new DemoRecord();
record.desp = fieldSetFlags()[0] ? this.desp : (java.lang.CharSequence) defaultValue(fields()[0]);
record.value = fieldSetFlags()[1] ? this.value : (java.lang.Integer) defaultValue(fields()[1]);
record.next = fieldSetFlags()[2] ? this.next : (avro.DemoRecord) defaultValue(fields()[2]);
return record;
} catch (Exception e) {
throw new org.apache.avro.AvroRuntimeException(e);
}
}
}
}
Another way for code generation is to use maven. Below is the pom.xml file that can be used to generate Java code for all schemas under src/main/avro directory relative to our project, and the output directory will be target/generated-sources/avro/ directory relative to our project.
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>avro</groupId>
<artifactId>avroDemo</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>avroDemo</name>
<url>http://maven.apache.org</url>
<properties>
<java.version>1.6</java.version>
<maven.compiler.source>${java.version}</maven.compiler.source>
<maven.compiler.target>${java.version}</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.avro</groupId>
<artifactId>avro-maven-plugin</artifactId>
<version>1.7.1</version>
<configuration>
</configuration>
<executions>
<execution>
<phase>generate-sources</phase>
<goals>
<goal>schema</goal>
</goals>
<configuration>
<sourceDirectory>src/main/avro</sourceDirectory>
<outputDirectory>target/generated-sources/avro/</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>1.7.1</version>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro-tools</artifactId>
<version>1.7.1</version>
</dependency>
</dependencies>
</project>
With this pom.xml file, we can type “mvn compile” command to generate the Java code for the schema at src/main/avro folder.
2. Use the Generated Code
We can use the generated code to serialize data and store into a file. Below is the sample code.
private static void writeData() throws IOException {
FileOutputStream out = new FileOutputStream(FNAME);
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
SpecificDatumWriter<DemoRecord> writer =
new SpecificDatumWriter<DemoRecord>(DemoRecord.class);
DemoRecord record1 = new DemoRecord();
record1.desp = new Utf8("record 1");
record1.value = 1;
record1.next = null;
DemoRecord record2 = new DemoRecord();
record2.desp = new Utf8("record 2");
record2.value = 2;
record2.next = record1;
DemoRecord record3 = new DemoRecord();
record3.desp = new Utf8("record 3");
record3.value = 3;
record3.next = record2;
writer.write(record3, encoder);
encoder.flush();
}
Note that there’s no need for us to write all three records into the file since record3 links back to record2, which subsequently links back to record1. Avro will track the linked records and serialize all the objects.
We can later on de-serialize the data as shown below.
private static void readData() throws IOException {
FileInputStream in = new FileInputStream(FNAME);
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(in, null);
SpecificDatumReader<DemoRecord> reader =
new SpecificDatumReader<DemoRecord>(DemoRecord.class);
DemoRecord record = new DemoRecord();
reader.read(record, decoder);
System.out.println(record.desp + ":" + record.value);
while (record.next != null) {
record = record.next;
System.out.println(record.desp + ":" + record.value);
}
}
The output of running the code is as below.
record 3:3
record 2:2
record 1:1
You can download the source code here.
2 comments on “Apache Avro: Java Code Generation and Specific Mapping”
Leave a Reply Cancel reply
40% Discount on My Book — Android NDK Cookbook
Android NDK Cookbook ebook 40% discount with promotion code MREANC40 at Packt Publishing The promotion code is valid until 15th June.Categories
- Android Apps (18)
- Android Audio Editor (1)
- TS 2 (3)
- Video Converter Android (8)
- Video2Gif (1)
- Android Tutorial (26)
- Android Dev Tools (1)
- API illustrated (8)
- Multimedia API (3)
- ffmpeg on Android (4)
- NDK (6)
- UI (5)
- Animation (1)
- Code Snippet (2)
- Coding Beyond Technique (18)
- a word, a world (4)
- Bug Rectified (4)
- Programming Habit (1)
- Software as a Career (1)
- Software as User Experience (1)
- Compilers and Related (2)
- ELF (2)
- Computer Languages (31)
- C/C++ (13)
- Java (9)
- JavaScript (2)
- PHP (1)
- Python (8)
- Data Structure & Algorithms (29)
- Bits (1)
- Data Structure (5)
- Integers (10)
- BigInteger (1)
- Prime (4)
- Search (3)
- Sorting (5)
- Strings (5)
- Database (1)
- SQLite (1)
- Digital Signal Processing (33)
- Distributed Systems (17)
- Apache Cassandra (6)
- Apache Hadoop (8)
- Apache Avro (3)
- Apache Nutch (3)
- Apache Solr (1)
- Linux Study Notes (40)
- crontab (1)
- Linux Kernel Programming (8)
- Linux Programming (12)
- IPC (2)
- Linux Network Programming (5)
- Linux Signals (2)
- Linux Shell Scripting (1)
- ssh (3)
- Machinery (30)
- misc (1)
- My Ideas (1)
- My Project (3)
- Mobile Caching (1)
- Selective Decoding (2)
- My Publication (1)
- My Readings (1)
- Networking (15)
- Program for Performance (8)
- Uncategorized (1)
- Virtual Machine (2)
- Web Dev (8)
- web components (3)
- Android Apps (18)
Recent Comments
Archives
- May 2013 (1)
- April 2013 (1)
- March 2013 (4)
- December 2012 (2)
- November 2012 (6)
- October 2012 (6)
- September 2012 (3)
- August 2012 (13)
- July 2012 (15)
- June 2012 (3)
- May 2012 (8)
- April 2012 (4)
- March 2012 (13)
- February 2012 (19)
- January 2012 (9)
- December 2011 (11)
- November 2011 (12)
- October 2011 (4)
- September 2011 (12)
- August 2011 (16)
- July 2011 (15)
- June 2011 (6)
- May 2011 (10)
- April 2011 (13)
- March 2011 (20)
- February 2011 (4)
- November 2010 (2)
- May 2010 (1)
- April 2010 (1)
- February 2010 (1)





Hi i am getting error in the following lines.. What the problem will be.. Do i need to add any jar files..
private Builder(avro.DemoRecord other) {
super(avro.DemoRecord.SCHEMA$);
if (isValidValue(fields()[0], other.desp)) {
this.desp = data().deepCopy(fields()[0].schema(), other.desp);
fieldSetFlags()[0] = true;
}
if (isValidValue(fields()[1], other.value)) {
this.value = data().deepCopy(fields()[1].schema(), other.value);
fieldSetFlags()[1] = true;
}
if (isValidValue(fields()[2], other.next)) {
this.next = data().deepCopy(fields()[2].schema(), other.next);
fieldSetFlags()[2] = true;
}
}