2014-02-24 80 views
3

我想要二进制编码我的JSON字符串。下面是我的JSON字符串,我创建了一个简单的方法来完成转换,但我不确定我所做的方式是否正确?如何使用Apache Avro Avro二进制编码JSON字符串?

public static void main(String args[]) throws Exception{ 
try{ 
    Schema schema = new Parser().parse((TestExample.class.getResourceAsStream("/3233.avsc"))); 
    String json="{"+ 
     " \"location\" : {"+ 
     " \"devices\":["+ 
     "  {"+ 
     "  \"did\":\"9abd09-439bcd-629a8f\","+ 
     "  \"dt\":\"browser\","+ 
     "  \"usl\":{"+ 
     "   \"pos\":{"+ 
     "   \"source\":\"GPS\","+ 
     "   \"lat\":90.0,"+ 
     "   \"long\":101.0,"+ 
     "   \"acc\":100"+ 
     "   },"+ 
     "   \"addSource\":\"LL\","+ 
     "   \"add\":["+ 
     "   {"+ 
     "    \"val\":\"2123\","+ 
     "    \"type\" : \"NUM\""+ 
     "   },"+ 
     "   {"+ 
     "    \"val\":\"Harris ST\","+ 
     "    \"type\" : \"ST\""+ 
     "   }"+ 
     "   ],"+ 
     "   \"ei\":{"+ 
     "   \"ibm\":true,"+ 
     "   \"sr\":10,"+ 
     "   \"ienz\":true,"+ 
     "   \"enz\":100,"+ 
     "   \"enr\":10"+ 
     "   },"+ 
     "   \"lm\":1390598086120"+ 
     "  }"+ 
     "  }"+ 
     " ],"+ 
     " \"ver\" : \"1.0\""+ 
     " }"+ 
     "}"; 

    byte[] avroByteArray = fromJsonToAvro(json,schema); 

} catch (Exception ex) { 
    // log an exception 
} 

下面的方法将其转换我的JSON字符串Avro的二进制编码 -

private static byte[] fromJsonToAvro(String json, Schema schema) throws Exception { 

    InputStream input = new ByteArrayInputStream(json.getBytes()); 
    DataInputStream din = new DataInputStream(input); 

    Decoder decoder = DecoderFactory.get().jsonDecoder(schema, din); 

    DatumReader<Object> reader = new GenericDatumReader<Object>(schema); 
    Object datum = reader.read(null, decoder); 


    GenericDatumWriter<Object> w = new GenericDatumWriter<Object>(schema); 
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); 

    Encoder e = EncoderFactory.get().binaryEncoder(outputStream, null); 

    w.write(datum, e); 
    e.flush(); 

    return outputStream.toByteArray(); 
} 

任何人都可以去看看,让我知道我想Avro的二进制的方式是否我的JSON字符串是否正确不?

+0

对于它的价值,在[Apache的Avro的规范](http://avro.apache.org/docs/1.7.6/spec.html)。 –

+0

目前尚不清楚将JSON“转换为Avro”的含义是否意味着什么,因为根据规范,Avro符号仅仅是放置在JSON字符串格式上的一组特定约束。 –

+0

无论如何,Apache似乎都提供了[一组实用程序](http://avro.apache。org/docs/current/api/java/index.html),所以目前还不清楚为什么你需要编写自己的。 –

回答

14

我认为OP是正确的。如果这是Avro数据文件,这将会写Avro自己记录的数据,而不会出现模式。

这里的内Avro公司本身(有用的几个例子,如果你正在使用文件
        •从JSON来的Avro:DataFileWriteTool
        •从Avro公司以JSON:DataFileReadTool

这里有一个完整的例子。

@Grapes([ 
    @Grab(group='org.apache.avro', module='avro', version='1.7.7') 
]) 

import java.io.ByteArrayInputStream; 
import java.io.ByteArrayOutputStream; 
import java.io.DataInputStream; 
import java.io.EOFException; 
import java.io.IOException; 
import java.io.InputStream; 

import org.apache.avro.Schema; 
import org.apache.avro.generic.GenericDatumReader; 
import org.apache.avro.generic.GenericDatumWriter; 
import org.apache.avro.generic.GenericRecord; 
import org.apache.avro.io.DatumReader; 
import org.apache.avro.io.DatumWriter; 
import org.apache.avro.io.Decoder; 
import org.apache.avro.io.DecoderFactory; 
import org.apache.avro.io.Encoder; 
import org.apache.avro.io.EncoderFactory; 
import org.apache.avro.io.JsonEncoder; 

String schema = '''{ 
    "type":"record", 
    "namespace":"foo", 
    "name":"Person", 
    "fields":[ 
    { 
     "name":"name", 
     "type":"string" 
    }, 
    { 
     "name":"age", 
     "type":"int" 
    } 
    ] 
}''' 
String json = "{" + 
    "\"name\":\"Frank\"," + 
    "\"age\":47" + 
"}" 

assert avroToJson(jsonToAvro(json, schema), schema) == json 


public static byte[] jsonToAvro(String json, String schemaStr) throws IOException { 
    InputStream input = null; 
    GenericDatumWriter<GenericRecord> writer = null; 
    Encoder encoder = null; 
    ByteArrayOutputStream output = null; 
    try { 
     Schema schema = new Schema.Parser().parse(schemaStr); 
     DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema); 
     input = new ByteArrayInputStream(json.getBytes()); 
     output = new ByteArrayOutputStream(); 
     DataInputStream din = new DataInputStream(input); 
     writer = new GenericDatumWriter<GenericRecord>(schema); 
     Decoder decoder = DecoderFactory.get().jsonDecoder(schema, din); 
     encoder = EncoderFactory.get().binaryEncoder(output, null); 
     GenericRecord datum; 
     while (true) { 
      try { 
       datum = reader.read(null, decoder); 
      } catch (EOFException eofe) { 
       break; 
      } 
      writer.write(datum, encoder); 
     } 
     encoder.flush(); 
     return output.toByteArray(); 
    } finally { 
     try { input.close(); } catch (Exception e) { } 
    } 
} 

public static String avroToJson(byte[] avro, String schemaStr) throws IOException { 
    boolean pretty = false; 
    GenericDatumReader<GenericRecord> reader = null; 
    JsonEncoder encoder = null; 
    ByteArrayOutputStream output = null; 
    try { 
     Schema schema = new Schema.Parser().parse(schemaStr); 
     reader = new GenericDatumReader<GenericRecord>(schema); 
     InputStream input = new ByteArrayInputStream(avro); 
     output = new ByteArrayOutputStream(); 
     DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema); 
     encoder = EncoderFactory.get().jsonEncoder(schema, output, pretty); 
     Decoder decoder = DecoderFactory.get().binaryDecoder(input, null); 
     GenericRecord datum; 
     while (true) { 
      try { 
       datum = reader.read(null, decoder); 
      } catch (EOFException eofe) { 
       break; 
      } 
      writer.write(datum, encoder); 
     } 
     encoder.flush(); 
     output.flush(); 
     return new String(output.toByteArray()); 
    } finally { 
     try { if (output != null) output.close(); } catch (Exception e) { } 
    } 
} 

为了完整起见,下面是一个示例,如果您正在使用流(Avro称为这些container files)而不是记录。请注意,当您从JSON回到Avro时,您不需要传递架构。这是因为它存在于流中。

@Grapes([ 
    @Grab(group='org.apache.avro', module='avro', version='1.7.7') 
]) 

// writes Avro as a http://avro.apache.org/docs/current/spec.html#Object+Container+Files rather than a sequence of records 

import java.io.ByteArrayInputStream; 
import java.io.ByteArrayOutputStream; 
import java.io.DataInputStream; 
import java.io.EOFException; 
import java.io.IOException; 
import java.io.InputStream; 

import org.apache.avro.Schema; 
import org.apache.avro.file.DataFileStream; 
import org.apache.avro.file.DataFileWriter; 
import org.apache.avro.generic.GenericDatumReader; 
import org.apache.avro.generic.GenericDatumWriter; 
import org.apache.avro.generic.GenericRecord; 
import org.apache.avro.io.DatumReader; 
import org.apache.avro.io.DatumWriter; 
import org.apache.avro.io.Decoder; 
import org.apache.avro.io.DecoderFactory; 
import org.apache.avro.io.Encoder; 
import org.apache.avro.io.EncoderFactory; 
import org.apache.avro.io.JsonEncoder; 


String schema = '''{ 
    "type":"record", 
    "namespace":"foo", 
    "name":"Person", 
    "fields":[ 
    { 
     "name":"name", 
     "type":"string" 
    }, 
    { 
     "name":"age", 
     "type":"int" 
    } 
    ] 
}''' 
String json = "{" + 
    "\"name\":\"Frank\"," + 
    "\"age\":47" + 
"}" 

assert avroToJson(jsonToAvro(json, schema)) == json 


public static byte[] jsonToAvro(String json, String schemaStr) throws IOException { 
    InputStream input = null; 
    DataFileWriter<GenericRecord> writer = null; 
    Encoder encoder = null; 
    ByteArrayOutputStream output = null; 
    try { 
     Schema schema = new Schema.Parser().parse(schemaStr); 
     DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema); 
     input = new ByteArrayInputStream(json.getBytes()); 
     output = new ByteArrayOutputStream(); 
     DataInputStream din = new DataInputStream(input); 
     writer = new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>()); 
     writer.create(schema, output); 
     Decoder decoder = DecoderFactory.get().jsonDecoder(schema, din); 
     GenericRecord datum; 
     while (true) { 
      try { 
       datum = reader.read(null, decoder); 
      } catch (EOFException eofe) { 
       break; 
      } 
      writer.append(datum); 
     } 
     writer.flush(); 
     return output.toByteArray(); 
    } finally { 
     try { input.close(); } catch (Exception e) { } 
    } 
} 

public static String avroToJson(byte[] avro) throws IOException { 
    boolean pretty = false; 
    GenericDatumReader<GenericRecord> reader = null; 
    JsonEncoder encoder = null; 
    ByteArrayOutputStream output = null; 
    try { 
     reader = new GenericDatumReader<GenericRecord>(); 
     InputStream input = new ByteArrayInputStream(avro); 
     DataFileStream<GenericRecord> streamReader = new DataFileStream<GenericRecord>(input, reader); 
     output = new ByteArrayOutputStream(); 
     Schema schema = streamReader.getSchema(); 
     DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema); 
     encoder = EncoderFactory.get().jsonEncoder(schema, output, pretty); 
     for (GenericRecord datum : streamReader) { 
      writer.write(datum, encoder); 
     } 
     encoder.flush(); 
     output.flush(); 
     return new String(output.toByteArray()); 
    } finally { 
     try { if (output != null) output.close(); } catch (Exception e) { } 
    } 
} 
0

您可以使用avro-tools的JSON文件({input_file}.json)到Avro的文件({output_file}.avro)转换,当你知道了JSON文件的架构({schema_file}.avsc) 。就像下面:

java -jar the/path/of/avro-tools-1.8.1.jar fromjson {input_file}.json --schema-file {schema_file}.avsc > {output_file}.avro 

顺便说一句,{schema_file}.avsc文件的内容是初级讲座:

{"type": "record", 
"name": "User", 
    "fields": [ 
     {"name": "name", "type": "string"}, 
     {"name": "favorite_number", "type": ["int", "null"]}, 
     {"name": "favorite_color", "type": ["string", "null"]} 
    ] 
} 

Download avro-tools-1.8.1

Download others avro-tools