我想一个方法来解析XML文档不知道它的文档结构或硬编码的节点/元素名称返回我一个地图中键,值配对。不知道它解析XML文档的结构,在Java中
我目前使用解析STAX,目前执行工作的XML文档,但有趣的是它不解析整个文档。不知何故,它跳过数据。
通过查看XML文档和测试输出,你会发现不是所有的值都打印出来。我可能会错过什么?
代码:
public Map<String, String> p(File file) throws Exception {
Map<String, String> map = new HashMap<String,String>();
XMLStreamReader xr = XMLInputFactory.newInstance().createXMLStreamReader(new FileInputStream(file));
while(xr.hasNext()) {
int e = xr.next();
if (e == XMLStreamReader.START_ELEMENT) {
String name = xr.getLocalName();
xr.next();
String value = null;
try {
value = xr.getText();
} catch (IllegalStateException exep) {
exep.printStackTrace();
}
map.put(name, value);
}
}
return map;
}
@Test
public void test() throws Exception, FactoryConfigurationError, Exception {
File f = new File("xmlDir/request.xml");
Map<String,String> map = p(f);
// Print all Key/Value pairs
for(Map.Entry<String, String> entry: map.entrySet()) {
String key = entry.getKey();
String value = entry.getValue();
logger.debug("Key: "+key);
logger.debug("Value: "+value);
}
Assert.assertEquals(map.get("MonthlyPlanPremiumAmtPP"), "136");
Assert.assertEquals(map.get("MonthlyAdvancedPTCAmtPP"), "125");
Assert.assertEquals(map.get("AdjustedGrossIncomeAmt"), "22000");
Assert.assertEquals(map.get("TotalExemptionsCnt"), "1");
}
输出:
2015-08-18 16:21:44,408 : Key: IRS1095A
2015-08-18 16:21:44,409 : Value:
2015-08-18 16:21:44,409 : Key: MonthlyAdvancedPTCAmtPP
2015-08-18 16:21:44,409 : Value: 125
2015-08-18 16:21:44,409 : Key: IndividualReturnFilingStatusCd
2015-08-18 16:21:44,409 : Value: 1
2015-08-18 16:21:44,409 : Key: IRS1040
2015-08-18 16:21:44,409 : Value:
2015-08-18 16:21:44,409 : Key: MonthlyPTCInformationGrpPP
2015-08-18 16:21:44,409 : Value:
2015-08-18 16:21:44,409 : Key: MonthlyPremiumSLCSPAmtPP
2015-08-18 16:21:44,409 : Value: 250
2015-08-18 16:21:44,409 : Key: Filer
2015-08-18 16:21:44,409 : Value:
2015-08-18 16:21:44,409 : Key: TotalPremiumSLCSPAmtPP
2015-08-18 16:21:44,409 : Value: 3000
2015-08-18 16:21:44,409 : Key: ResidentStateAbbreviationCdPP
2015-08-18 16:21:44,409 : Value: CA
2015-08-18 16:21:44,409 : Key: TotalPlanPremiumAmtPP
2015-08-18 16:21:44,409 : Value: 1632
2015-08-18 16:21:44,409 : Key: TotalExemptionsCnt
2015-08-18 16:21:44,409 : Value: 1
2015-08-18 16:21:44,409 : Key: TotalAdvancedPTCAmtPP
2015-08-18 16:21:44,409 : Value: 1500
2015-08-18 16:21:44,409 : Key: MonthlyPlanPremiumAmtPP
2015-08-18 16:21:44,409 : Value: 136
2015-08-18 16:21:44,409 : Key: RecipientSSNPP
2015-08-18 16:21:44,409 : Value: 555-11-2222
2015-08-18 16:21:44,409 : Key: WagesSalariesAndTipsAmt
2015-08-18 16:21:44,409 : Value: 22000
2015-08-18 16:21:44,409 : Key: MonthCdPP
2015-08-18 16:21:44,409 : Value: NOVEMBER
2015-08-18 16:21:44,409 : Key: ReturnData
2015-08-18 16:21:44,409 : Value:
2015-08-18 16:21:44,409 : Key: PrimaryResidentStatesInfoGrpPP
2015-08-18 16:21:44,409 : Value:
2015-08-18 16:21:44,409 : Key: SelfSelectPINGrp
2015-08-18 16:21:44,409 : Value:
2015-08-18 16:21:44,409 : Key: ResidentStateInfoPP
2015-08-18 16:21:44,409 : Value:
2015-08-18 16:21:44,409 : Key: Return
2015-08-18 16:21:44,409 : Value:
2015-08-18 16:21:44,409 : Key: PrimaryBirthDt
2015-08-18 16:21:44,409 : Value: 1970-01-01
2015-08-18 16:21:44,409 : Key: ReturnHeader
2015-08-18 16:21:44,409 : Value:
2015-08-18 16:21:44,409 : Key: AdjustedGrossIncomeAmt
2015-08-18 16:21:44,409 : Value: 22000
2015-08-18 16:21:44,409 : Key: PrimarySSN
2015-08-18 16:21:44,409 : Value: 555-11-2222
XML文档:request.xml
<Return xmlns="http://www.irs.gov/efile">
<ReturnData>
<IRS1095A uuid="a77f40a2-af31-4404-a27d-4c1eaad730c2">
<MonthlyPTCInformationGrpPP uuid="69dc9dd5-5415-4ee4-a199-19b2dbb701be">
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
<MonthCdPP>SEPTEMBER</MonthCdPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
</MonthlyPTCInformationGrpPP>
<MonthlyPTCInformationGrpPP uuid="8495fa61-0e7c-45e3-8f07-9765f4ef2fc3">
<MonthCdPP>OCTOBER</MonthCdPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
</MonthlyPTCInformationGrpPP>
<MonthlyPTCInformationGrpPP uuid="7de1052f-6107-41da-aea4-e4495018fc80">
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
<MonthCdPP>APRIL</MonthCdPP>
</MonthlyPTCInformationGrpPP>
<MonthlyPTCInformationGrpPP uuid="634d5af9-51fb-42ee-a90d-5a4f421e6854">
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthCdPP>JUNE</MonthCdPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
</MonthlyPTCInformationGrpPP>
<MonthlyPTCInformationGrpPP uuid="a2f7de3f-650c-4a5e-b26c-30cfd7782d6c">
<MonthCdPP>MAY</MonthCdPP>
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
</MonthlyPTCInformationGrpPP>
<MonthlyPTCInformationGrpPP uuid="a77f40a2-af31-4404-a27d-4c1eaad730c2">
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
<MonthCdPP>JANUARY</MonthCdPP>
</MonthlyPTCInformationGrpPP>
<MonthlyPTCInformationGrpPP uuid="01650aee-9d5d-4ce1-9079-ebedea3bf416">
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
<MonthCdPP>MARCH</MonthCdPP>
</MonthlyPTCInformationGrpPP>
<MonthlyPTCInformationGrpPP uuid="581ba189-222d-4999-aa1a-3b290666ef5f">
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
<MonthCdPP>AUGUST</MonthCdPP>
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
</MonthlyPTCInformationGrpPP>
<TotalPremiumSLCSPAmtPP>3000</TotalPremiumSLCSPAmtPP>
<MonthlyPTCInformationGrpPP uuid="549ff57a-58dc-4365-b05c-e3e520b3e8cb">
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
<MonthCdPP>DECEMBER</MonthCdPP>
</MonthlyPTCInformationGrpPP>
<MonthlyPTCInformationGrpPP uuid="195836cf-32b3-4316-99d4-6b1eab31e16d">
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthCdPP>JULY</MonthCdPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
</MonthlyPTCInformationGrpPP>
<MonthlyPTCInformationGrpPP uuid="c1289d91-7ce1-41ee-9c8a-f72212e82752">
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
<MonthCdPP>FEBRUARY</MonthCdPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
</MonthlyPTCInformationGrpPP>
<TotalAdvancedPTCAmtPP>1500</TotalAdvancedPTCAmtPP>
<RecipientSSNPP>555-11-2222</RecipientSSNPP>
<MonthlyPTCInformationGrpPP uuid="50876222-165d-442a-81e0-0b05dc3c30fb">
<MonthlyAdvancedPTCAmtPP>125</MonthlyAdvancedPTCAmtPP>
<MonthlyPlanPremiumAmtPP>136</MonthlyPlanPremiumAmtPP>
<MonthCdPP>NOVEMBER</MonthCdPP>
<MonthlyPremiumSLCSPAmtPP>250</MonthlyPremiumSLCSPAmtPP>
</MonthlyPTCInformationGrpPP>
<TotalPlanPremiumAmtPP>1632</TotalPlanPremiumAmtPP>
</IRS1095A>
<IRS1040>
<IndividualReturnFilingStatusCd>1</IndividualReturnFilingStatusCd>
<WagesSalariesAndTipsAmt>22000</WagesSalariesAndTipsAmt>
<TotalExemptionsCnt>1</TotalExemptionsCnt>
<AdjustedGrossIncomeAmt>22000</AdjustedGrossIncomeAmt>
</IRS1040>
</ReturnData>
<ReturnHeader>
<SelfSelectPINGrp>
<PrimaryBirthDt>1970-01-01</PrimaryBirthDt>
</SelfSelectPINGrp>
<Filer>
<PrimarySSN>555-11-2222</PrimarySSN>
<PrimaryResidentStatesInfoGrpPP>
<ResidentStateInfoPP uuid="a77f40a2-af31-4404-a27d-4c1eaad730c2">
<ResidentStateAbbreviationCdPP>CA</ResidentStateAbbreviationCdPP>
</ResidentStateInfoPP>
</PrimaryResidentStatesInfoGrpPP>
</Filer>
</ReturnHeader>
</Return>
我注意到重复的条目被跳过,只打印出唯一身份例如,有多个节点,除了与不同 –
mosawi
应该改变键的名称与一些递增,以避免这种PB都一样吗?不是一个哈希映射关键的唯一性? – skoll
或者只是不使用地图,而是使用列表而不是列表 – skoll