2017-04-26 68 views
-2

我试图使用DDL在蜂巢错误嵌套JSON在蜂巢

{ 
    "id": "0001", 
    "type": "donut", 
    "name": "Cake", 
    "ppu": 0.55, 
    "batters": 
     { 
      "batter": 
       [ 
        { "id": "1001", "type": "Regular" }, 
        { "id": "1002", "type": "Chocolate" }, 
        { "id": "1003", "type": "Blueberry" }, 
        { "id": "1004", "type": "Devil's Food" } 
       ] 
     }, 
    "topping": 
     [ 
      { "id": "5001", "type": "None" }, 
      { "id": "5002", "type": "Glazed" }, 
      { "id": "5005", "type": "Sugar" }, 
      { "id": "5007", "type": "Powdered Sugar" }, 
      { "id": "5006", "type": "Chocolate with Sprinkles" }, 
      { "id": "5003", "type": "Chocolate" }, 
      { "id": "5004", "type": "Maple" } 
     ] 
} 

加载该JSON数据命令

ADD JAR /home/cloudera/Downloads/json-serde-1.3.6-SNAPSHOT-jar-with-dependencies.jar; 

CREATE EXTERNAL TABLE format.json_serde (
    `id` string, 
    `type` string, 
    `name` string, 
`ppu` float,  
    batters` struct < `batter`:array < struct <`bid`:string, `btype`:string >>>, 
    `topping`:array < struct<`tid`:int, `ttype`:string>> 
) 
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'; 

抛出我的错误

FAILED: ParseException line 7:11 cannot recognize input near ':' 'array' '<' in column type </b> 

回答

0
  1. 你有错别字
    ttype`:串Af - Ag型:串
    battersstruct打者结构
    摘心:阵列摘心阵列

  2. JSON SERDE映射是由名字来完成。
    您的结构字段名称应与实际名称匹配,例如id而不是bidtid,否则您将为这些字段获得NULL值。

  3. 已经有一个JSON SerDe,它是Hive安装的一部分。 https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-RowFormats&SerDe


create external table json_serde 
( 
    id  string 
    ,type string 
    ,name string 
    ,ppu  float 
    ,batters struct<batter:array<struct<id:string,type:string>>> 
    ,topping array<struct<id:string,type:string>> 
) 
row format serde 
'org.apache.hive.hcatalog.data.JsonSerDe' 
stored as textfile 
; 

select * from json_serde 
; 

+------+-------+------+-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ 
| id | type | name |  ppu  |                  batters                  |                             topping                             | 
+------+-------+------+-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ 
| 0001 | donut | Cake | 0.550000011920929 | {"batter":[{"id":"1001","type":"Regular"},{"id":"1002","type":"Chocolate"},{"id":"1003","type":"Blueberry"},{"id":"1004","type":"Devil'sFood"}]} | [{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5005","type":"Sugar"},{"id":"5007","type":"PowderedSugar"},{"id":"5006","type":"ChocolatewithSprinkles"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}] | 
+------+-------+------+-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ 
+0

谢谢,当我试图查询它返回folllowing错误**无法用异常产生java.io.IOException:org.apache.hadoop .hive.serde2.SerDeException:org.codehaus.jackson.JsonParseException:意外的输入结束:OBJECT的预期关闭标记(来自:[Source:[email protected]; line:1,column:0]) at [Source:java.io.ByteArrayInputStream的@ 73304204; line:1,column:3] **如果您对输入数据进行了任何格式化,请让我知道 –

+0

任何JSON文档都应该写入一行。我不知道任何知道如何处理多行文档的JSON SerDe。 –

0

它工作时,我删除附近体贴分号。感谢


CREATE EXTERNAL TABLE format.json_serde(
id串,
type串,
name串,
ppu浮子,

batters结构< batter:阵列< 结构< bid:串, btype:string >>>,


topping阵列<结构< tid:字符串,ttype:字符串>> )