2011-12-01 52 views
0

我试图用SAX解析器解析UTF-8的XML文件,我使用的解析器,但它导致一个例外是消息“需要一个元素”黑莓解析UTF-8 XML文件

<?xml version='1.0' encoding='UTF-8' standalone='yes' ?> 
<config> 
<filepath>/mnt/sdcard/Audio_Recorder/anonymous22242.3gp</filepath> 
<filename>anonymous22242.3gp</filename> 
<annotation> 
    <file>anonymous22242.3gp</file> 
    <timestamp>0:06</timestamp> 
    <note>test1</note> 
</annotation> 
<annotation> 
    <file>anonymous22242.3gp</file> 
    <timestamp>0:09</timestamp> 
    <note>لول</note> 
</annotation> 
<annotation> 
    <file>anonymous22242.3gp</file> 
    <timestamp>0:09</timestamp> 
    <note>لولو</note> 
</annotation> 
</config> 


 private static String fileDirectory; 
private final static ArrayList<String> allFileNames = new ArrayList<String>(); 
private final static ArrayList<String[]> allAnnotations = new ArrayList<String[]>(); 
private static String[] currentAnnotation = new String[3]; 

public static void main(String[] args) { 
// TODO Auto-generated method stub 
try { 

    SAXParserFactory factory = SAXParserFactory.newInstance(); 
    SAXParser playbackParser = factory.newSAXParser(); 

    DefaultHandler handler = new DefaultHandler() { 

     boolean audioFullPath = false; 
     boolean audioName = false; 
     boolean annotationFile = false; 
     boolean annotationTimestamp = false; 
     boolean annotationNote = false; 

     public void startElement(String uri, String localName, 
       String qName, Attributes attributes) 
       throws SAXException { 

      System.out.println("Start Element :" + qName); 

      if (qName.equalsIgnoreCase("filepath")) { 
       audioFullPath = true; 
      } 

      if (qName.equalsIgnoreCase("filename")) { 
       audioName = true; 
      } 

      if (qName.equalsIgnoreCase("file")) { 
       annotationFile = true; 
      } 

      if (qName.equalsIgnoreCase("timestamp")) { 
       annotationTimestamp = true; 
      } 

      if (qName.equalsIgnoreCase("note")) { 
       annotationNote = true; 
      } 

     } 

     public void endElement(String uri, String localName, 
       String qName) throws SAXException { 

      System.out.println("End Element :" + qName); 

     } 

     public void characters(char ch[], int start, int length) 
       throws SAXException { 

      if (audioFullPath) { 
       String filePath = new String(ch, start, length); 
       System.out.println("Full Path : " + filePath); 
       fileDirectory = filePath; 
       audioFullPath = false; 
      } 

      if (audioName) { 
       String fileName = new String(ch, start, length); 
       System.out.println("File Name : " + fileName); 
       allFileNames.add(fileName); 
       audioName = false; 
      } 

      if (annotationFile) { 
       String fileName = new String(ch, start, length); 
       currentAnnotation[0] = fileName; 
       annotationFile = false; 
      } 

      if (annotationTimestamp) { 
       String timestamp = new String(ch, start, length); 
       currentAnnotation[1] = timestamp; 
       annotationTimestamp = false; 
      } 
      if (annotationNote) { 
       String note = new String(ch, start, length); 
       currentAnnotation[2] = note; 
       annotationNote = false; 
       allAnnotations.add(currentAnnotation); 
      } 

     } 

    }; 

    InputStream inputStream = getStream("http://www.example.com/example.xml"); 
    Reader xmlReader = new InputStreamReader(inputStream, "UTF-8"); 

    InputSource xmlSource = new InputSource(xmlReader); 
    xmlSource.setEncoding("UTF-8"); 

    playbackParser.parse(xmlSource, handler); 

    System.out.println(fileDirectory); 
    System.out.println(allFileNames); 
    System.out.println(allAnnotations); 

} catch (Exception e) { 
    e.printStackTrace(); 
} 
} 
} 

public Static InputStream getStream(String url) 
{ 
    try 
    { 
     connection = getConnection(url); 
     connection.setRequestProperty("User-Agent",System.getProperty("microedition.profiles")); 
     connection.setRequestProperty("Connection", "Keep-Alive"); 
     connection.setRequestProperty("Content-Type", "text/xml; charset=UTF-8"); 

     inputStream = connection.openInputStream(); 
     return inputStream; 
    } 
    catch(Exception e) 
    { 
     System.out.println("NNNNNNN "+e.getMessage()); 

     return null; 
     } 

    } 

public HttpConnection getConnection(String url) 
{ 

    try 
    { 
     connection = (HttpConnection) Connector.open(url+getConnectionString()); 


    } 
    catch(Exception e) 

    { 

    } 


    return connection; 
} 

但是当我传递给parse方法InputStream的,而不是它的InputSource分析文件,但仍然有

0123之间用阿拉伯语字符的问题
playbackParser.parse(inputStream, handler); 
+0

“ArrayList ”无法为BlackBerry编译,因为BlackBerries只运行java-me。为什么在这个BlackBerry标签? –

回答

1

您显示的XML包含未编码的阿拉伯字符。这违反了XML声明的Encoding,这意味着XML格式不正确。 SAX解析器按顺序逐个处理数据,触发每件作品的事件。它不会检测到这种编码错误,直到它到达包含那些错误字符的片段。对此你无能为力。 XML需要由其原始作者修复。

+0

包含来自其他语言的字符时如何编码XML? –

+1

需要使用在XML的prolog的'Encoding'属性中指定的字符集对XML中的所有字符进行编码。在这种情况下,您的示例XML的字符集是“UTF-8”(当没有指定时,它是XML的默认字符集)。生成XML时,阿拉伯字符不是UTF-8编码,而是错误。在UTF-8中,Unicode字符串“لول”被编码为字节八位字节“D9 84 D9 88 D9 84”,而“لولو”被编码为“D9 84 D9 88 D9 84 D9 88”。 –

+0

@Remy不幸的是,在这种情况下,stackoverflow是一个基于字符的问答网站,所以很难判断这个文件是否被正确编码,因为这些字符是从编辑器正确转换后的编辑器的副本文件系统表示。我不认为任何人会更高兴地看到d9 84 d9 88 d9 84的cp1252表示形式。 –