2012-08-08 46 views
14

Avro序列化在Hadoop用户中非常流行,但实例很难找到。使用apache avro反射

任何人都可以帮我用这个示例代码?我最感兴趣的是使用Reflect API来读/写文件并使用Union和Null注释。

public class Reflect { 

    public class Packet { 
     int cost; 
     @Nullable TimeStamp stamp; 
     public Packet(int cost, TimeStamp stamp){ 
      this.cost = cost; 
      this.stamp = stamp; 
     } 
    } 

    public class TimeStamp { 
     int hour = 0; 
     int second = 0; 
     public TimeStamp(int hour, int second){ 
      this.hour = hour; 
      this.second = second; 
     } 
    } 

    public static void main(String[] args) throws IOException { 
     TimeStamp stamp; 
     Packet packet; 

     stamp = new TimeStamp(12, 34); 
     packet = new Packet(9, stamp); 
     write(file, packet); 

     packet = new Packet(8, null); 
     write(file, packet); 
     file.close(); 

     // open file to read. 
     packet = read(file); 
     packet = read(file); 
    } 
} 

回答

27

下面是上述程序的一个版本。

这也使用文件上的压缩。

import java.io.File; 
import org.apache.avro.Schema; 
import org.apache.avro.file.DataFileWriter; 
import org.apache.avro.file.DataFileReader; 
import org.apache.avro.file.CodecFactory; 
import org.apache.avro.io.DatumWriter; 
import org.apache.avro.io.DatumReader; 
import org.apache.avro.reflect.ReflectData; 
import org.apache.avro.reflect.ReflectDatumWriter; 
import org.apache.avro.reflect.ReflectDatumReader; 
import org.apache.avro.reflect.Nullable; 

public class Reflect { 

    public static class Packet { 
    int cost; 
    @Nullable TimeStamp stamp; 
    public Packet() {}      // required to read 
    public Packet(int cost, TimeStamp stamp){ 
     this.cost = cost; 
     this.stamp = stamp; 
    } 
    } 

    public static class TimeStamp { 
    int hour = 0; 
    int second = 0; 
    public TimeStamp() {}      // required to read 
    public TimeStamp(int hour, int second){ 
     this.hour = hour; 
     this.second = second; 
    } 
    } 

    public static void main(String[] args) throws Exception { 
    // one argument: a file name 
    File file = new File(args[0]); 

    // get the reflected schema for packets 
    Schema schema = ReflectData.get().getSchema(Packet.class); 

    // create a file of packets 
    DatumWriter<Packet> writer = new ReflectDatumWriter<Packet>(Packet.class); 
    DataFileWriter<Packet> out = new DataFileWriter<Packet>(writer) 
     .setCodec(CodecFactory.deflateCodec(9)) 
     .create(schema, file); 

    // write 100 packets to the file, odds with null timestamp 
    for (int i = 0; i < 100; i++) { 
     out.append(new Packet(i, (i%2==0) ? new TimeStamp(12, i) : null)); 
    } 

    // close the output file 
    out.close(); 

    // open a file of packets 
    DatumReader<Packet> reader = new ReflectDatumReader<Packet>(Packet.class); 
    DataFileReader<Packet> in = new DataFileReader<Packet>(file, reader); 

    // read 100 packets from the file & print them as JSON 
    for (Packet packet : in) { 
     System.out.println(ReflectData.get().toString(packet)); 
    } 

    // close the input file 
    in.close(); 
    } 

} 
-1

https://sites.google.com/site/developertips/Home/java/apache-avro见例如3这表明你如何使用反射API写入和读取Java类。

+1

看过那一个。这意味着写入一个流。所以,如果写入文件,我怀疑它不会包含标题。如果是这样,它可能不会用不同的编程语言来解释它。此外,该示例并未解决使用更多复杂数据结构的可空字段或联合。 – fodon 2012-08-08 17:45:57

+0

我同意@fodon。我希望能够看到更详细的示例,因为我相信存在测试未考虑到的一些错误。 – Dan 2015-06-24 12:46:34

+1

@LordAragon链接不再工作。这也是它从参考文献中分享实际内容的良好做法的另一个原因。 – Sankalp 2016-09-12 12:04:28