2017-09-08 33 views
0
<Search> 

<Country>USA</Country> 
<Region>West</Region> 
<Address> 
    <Home> 
     <Item> 
       <id>Number</id> 
       <value>135</value> 
      </Item> 
     <Item> 
       <id>Street</id> 
       <value>Pacific</value> 
      </Item> 
     <Item> 
       <id>City</id> 
       <value>Irvine</value> 
      </Item> 
     </Home> 
    <Home> 
     <Item> 
       <id>Number</id> 
       <value>1672</value> 
      </Item> 
     <Item> 
       <id>Street</id> 
       <value>Madison</value> 
      </Item> 
     <Item> 
       <id>City</id> 
       <value>Denver</value> 
      </Item> 
     </Home> 
    </Address> 

我试图创建下面的表结构,但我没有得到期望的结果蜂房XML-SERDE - 键/值对 - 地图

我试图创建下面的表结构,但我我没有得到 我试图创建下面的表结构,但我没有得到 我试图创建下面的表结构所期望的结果,但我没有得到期望的结果

Country Region      Map 
USA  West    {Number:135,Street:Pacific,City:Irvine} 
USA  West    {Number:1672,Street:Madison,City:Denver} 

`CREATE EXTERNAL TABLE search(
country string, 
region string, 
search array<struct<item:map<string,string>>> 
) 
PARTITIONED BY(date STRING) 
ROW FORMAT SERDE 'com.ibm.spss.hive.serde2.xml.XmlSerDe' 
WITH SERDEPROPERTIES(
"column.xpath.country" = "/Search/country/text()", 
"column.xpath.region" = "/Search/region/text()", 
"column.xpath.item"="/Search/Address/Home/Item" 
) 
STORED AS 
INPUTFORMAT 'com.ibm.spss.hive.serde2.xml.XmlInputFormat' 
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat' 
LOCATION '/search' 
TBLPROPERTIES (
"xmlinput.start"="", 
"xmlinput.end"="" 
); 

是期望的结果这可能或任何其他建议如何以上述格式获取这些数据。任何帮助都会很棒。谢谢。 `

回答

0

鉴于XML,你能做的最好的大概是这样的:

DROP TABLE IF EXISTS xml_47; 

CREATE TABLE xml_47(
    country string, 
    region string, 
    address array<struct<Home:array<struct<Item:struct<id:string,value:string>>>>> 
) 
ROW FORMAT SERDE 'com.ibm.spss.hive.serde2.xml.XmlSerDe' 
WITH SERDEPROPERTIES(
"column.xpath.country" = "/Search/Country/text()", 
"column.xpath.region" = "/Search/Region/text()", 
"column.xpath.address"="/Search/Address/Home" 
) 
STORED AS 
INPUTFORMAT 'com.ibm.spss.hive.serde2.xml.XmlInputFormat' 
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat' 
TBLPROPERTIES (
"xmlinput.start"="<Search>", 
"xmlinput.end"="</Search>" 
); 

load data local inpath '/Users/dvasilen/Misc/XML/47.xml' OVERWRITE into table xml_47; 

select * from xml_47; 

这里是输出:

USA West [{"home":[{"item":{"id":"Number","value":"135"}},{"item":{"id":"Street","value":"Pacific"}},{"item":{"id":"City","value":"Irvine"}}]},{"home":[{"item":{"id":"Number","value":"1672"}},{"item":{"id":"Street","value":"Madison"}},{"item":{"id":"City","value":"Denver"}}]}] 
Time taken: 0.067 seconds, Fetched: 1 row(s) 

要进入的元素:

select address[0].home[0] from xml_47; 
OK 
{"item":{"id":"Number","value":"135"}} 
Time taken: 0.076 seconds, Fetched: 1 row(s) 

为了获得所需的输出:

USA West {Number:135,Street:Pacific,City:Irvine} 
USA West {Number:1672,Street:Madison,City:Denver} 

你将不得不使用侧位片拼合address阵列https://cwiki.apache.org/confluence/display/Hive/LanguageManual+LateralView

+0

我能够得到这种格式 美国WEST [号码,街道,城市] [135,太平洋,欧文] 美国西部数据[NUMBER,Street,City] [1672,Madison,Denver] 如何从那里获得以下格式 USA WEST [编号:135,Street:Pacific,City:Irvine] USA WEST [编号:1672街:麦迪逊市:掘金] – Paciferous