2012-11-28 59 views
0

我有一个不断追加到的XML文件。我需要重复读取XML中的数据,但是在每次传递时我都不想检索上一次运行中处理的数据。从指定的起始位置读取XML文件

由于我知道文件在处理时间有多长,我想我可以使用文件的长度(减去结尾/联系人标记)来确定我最后一次离开的位置。了解这一点,从文件中的特定字节位置开始检索所有联系人标记的最佳方法是什么?

<?xml version="1.0"?> 
<Contacts> 
    <Contact> 
     <Name>Todd</Name> 
     <Email>[email protected]</Email> 
    </Contact> 
    <Contact> 
     <Name>Sarah</Name> 
     <Email>[email protected]</Email> 
    </Contact> 
</Contacts> 

这段代码抓住了所有的联系人。我想限制它,所以它只有第一次接触(在116个字节)的数据后,拿起

var xdoc = XDocument.Load(PATH_TO_FILE); 
var contact = xdoc.Descendants("Contact").Select(x => (string)x).ToArray(); 
+0

只是出于好奇:你为什么不使用数据库呢? –

+0

XML是我无法更改的供应商产品的输出。该请求是生成一个近乎实时的解析这些XML文件的方法,然后处理其中的数据。 – Rethic

回答

0

我找到了一种方法来保存/通过索引位置检索。这也会起作用。

int position = 1; 
var contacts = xdoc 
    .Descendants("Contact") 
    .Select((x, index) => new { Contact = x, Index = index }) 
    .Where(x => x.Index >= position) 
    .Select(x => x.Contact); 
1

您可以创建棘手的流,模拟Ducument的开始元素在自定义位置。 它非常粗糙的代码,但它的工作

void Main() 
{ 
var xml = 
    @"<Contacts><Contact><Name>Todd</Name><Email>[email protected]</Email></Contact><Contact> 
     <Name>Sarah1</Name> 
     <Email>[email protected]</Email> 
    </Contact> 
    <Contact> 
     <Name>Sarah2</Name> 
     <Email>[email protected]</Email> 
    </Contact> 
</Contacts>"; 

    var ms = new MemoryStream(Encoding.UTF8.GetBytes(xml)); 
    ms.Position = 74; 
    var reader = XmlReader.Create(new CustomReader("<Contacts>",ms)); 

    var xdoc = XDocument.Load(reader); 
    var contact = xdoc.Descendants("Contact").Select(x => x).ToArray(); 

    contact.Dump(); 
} 

public class CustomReader : Stream 
{ 
    private readonly string _element; 
    private readonly Stream _stream; 
    private int _offset; 

    public CustomReader(string element, Stream stream) 
    { 
     _element = element; 
     _stream = stream; 
     _offset = -element.Length; 
    } 

    public override bool CanRead 
    { 
     get { return true; } 
    } 

    public override bool CanSeek 
    { 
     get { return false; } 
    } 

    public override bool CanWrite 
    { 
     get { return false; } 
    } 

    public override void Close() 
    { 
     _stream.Close(); 
     base.Close(); 
    } 

    public override void Flush() 
    { 
     throw new NotImplementedException(); 
    } 

    public override long Length 
    { 
     get { throw new NotImplementedException(); } 
    } 

    public override long Position 
    { 
     get { throw new NotImplementedException(); } 
     set { throw new NotImplementedException(); } 
    } 

    public override int Read(byte[] buffer, int offset, int count) 
    { 
     if (count == 0) return 0; 

     if (_offset < 0) 
     { 
      var buf = Encoding.UTF8.GetBytes(_element); 
      Buffer.BlockCopy(buf, 0, buffer, offset, buf.Length); 
      _offset = 0; 
      return buf.Length; 
     } 

     return _stream.Read(buffer, offset, count); 
    } 

    public override long Seek(long offset, SeekOrigin origin) 
    { 
     throw new NotImplementedException(); 
    } 

    public override void SetLength(long value) 
    { 
     throw new NotImplementedException(); 
    } 

    public override void Write(byte[] buffer, int offset, int count) 
    { 
     throw new NotImplementedException(); 
    } 
} 
1

如果你不想打破读取XML的一致性。你不能避免建设XDocument一些第一要素的东西,如:

class XmlSkipReader : XmlReader 
{ 
    private readonly XmlReader _reader; 
    private readonly int _skip; 
    private int _level, _skipped; 
    public XmlSkipReader(XmlReader reader, int skip) 
    { 
     _reader = reader; 
     _skip = skip; 
    } 

    public override bool Read() 
    { 
     if (_skipped == _skip) return _reader.Read(); 
     if (_level < 1) 
     { 
      if(!_reader.Read()) return false; 
      switch(_reader.NodeType) 
      { 
       case XmlNodeType.Element: ++_level; break; 
      } 
      return true; 
     } 
     if(!_reader.Read()) return false; 
     switch(_reader.NodeType) 
     { 
      case XmlNodeType.Element: 
       ++_level; 
       break; 
      default: return true; 
     } 

     for(; _skipped < _skip; ++_skipped) 
     { 
      while(_level > 1) 
      { 
       if(!_reader.Read()) return false; 
       switch(_reader.NodeType) 
       { 
        case XmlNodeType.Element: 
         ++_level; 
         break; 
        case XmlNodeType.EndElement: 
         --_level; 
         break; 
       } 
      } 
     } 
     return _reader.Read(); 
    } 
    // rest is just proxy to _reader 
} 

void Main() 
{ 
    var xml = "<?xml version=\"1.0\"?>" + @" 
<Contacts> 
    <Contact> 
     <Name>Todd</Name> 
     <Email>[email protected]</Email> 
    </Contact> 
    <Contact> 
     <Name>Sarah</Name> 
     <Email>[email protected]</Email> 
    </Contact> 
</Contacts>"; 
    using(var sr = new StringReader(xml)) 
    using(var xr = XmlReader.Create(sr)) 
    using(var xr2 = new XmlSkipReader(xr, 1)) 
    { 
     var xdoc = XDocument.Load(xr2); 
     xdoc.Descendants("Contact").Dump(); 
    } 
} 
2

如果您仍然希望从特定的偏移读取和保持自己的高水平。这里是XmlTailReader这使的文档中的其他根元素已经结束仅-ING标签内容:

class XmlTailReader : XmlReader 
{ 
    private readonly XmlReader _reader; 
    private readonly XmlReader _fakeReader; 
    private int _level; 
    enum Fake { Start, Align, None, End }; 
    private Fake _fake; 

    public XmlTailReader(XmlReader reader, string rootTag = "root") 
    { 
     _reader = reader; 
     _fake = Fake.Start; 

     var doc = new XmlDocument(); 
     var root = doc.CreateElement(rootTag); 
     doc.AppendChild(root); 
     // make sure that we'll get Element/EndElement 
     root.AppendChild(doc.CreateComment("dummy")); 
     _fakeReader = new XmlNodeReader(root); 
    } 

    private XmlReader Proxy 
    { 
     get 
     { 
      switch(_fake) 
      { 
      case Fake.Start: 
      case Fake.Align: 
      case Fake.End: 
       return _fakeReader; 
      default: 
       return _reader; 
      } 
     } 
    } 

    public override bool Read() 
    { 
     switch(_fake) 
     { 
     case Fake.Start: 
      if (!_fakeReader.Read()) return false; 
      if (NodeType == XmlNodeType.Element) 
      { 
       ++_level; 
       _fake = Fake.Align; 
      } 
      return true; 
     case Fake.Align: 
      _fake = Fake.None; 
      while(true) // align to first Element 
      { 
       if (!_reader.Read()) return false; 
       if (NodeType == XmlNodeType.Element) 
       { 
        ++_level; 
        break; 
       } 
      } 
      return true; 
     case Fake.None: 
      try 
      { 
       if (!_reader.Read()) return false; 
      } 
      catch (XmlException e) 
      { 
       // if (!e.Message.StartsWith("Unexpected end tag.")) throw; 
       // reading of extra-closing tag cause "Unexpected end tag" 
       // so use this as event for transition too 
       _fake = Fake.End; 
       if (!_fakeReader.Read()) return false; 
       return true; 
      } 
      switch(NodeType) 
      { 
      case XmlNodeType.Element: 
       ++_level; 
       break; 
      case XmlNodeType.EndElement: 
       if (--_level == 0) 
       { 
        _fake = Fake.End; 
        if (!_fakeReader.Read()) return false; 
       } 
       break; 
      } 
      return true; 
     default: 
      return Proxy.Read(); 
     } 
    } 

    public override string Value 
    { 
     get { return Proxy.Value; } 
    } 

    public override XmlNodeType NodeType 
    { 
     get { return Proxy.NodeType; } 
    } 
    // rest use Proxy property for forwarding 
} 

void Main() 
{ 
    var xml = "<?xml version=\"1.0\"?>" + @" 
<Contacts> 
    <Contact> 
     <Name>Todd</Name> 
     <Email>[email protected]</Email> 
    </Contact> 
    <Contact> 
     <Name>Sarah</Name> 
     <Email>[email protected]</Email> 
    </Contact> 
    <Contact> 
     <Name>Peter</Name> 
     <Email>[email protected]</Email> 
    </Contact> 
</Contacts>"; 
    const string tag = "</Contact>"; 
    var xml2 = xml.Substring(xml.IndexOf(tag) + tag.Length); 
    using(var sr = new StringReader(xml2)) 
    using(var xr = XmlReader.Create(sr, new XmlReaderSettings { ConformanceLevel = ConformanceLevel.Fragment, })) 
    using(var xr2 = new XmlTailReader(xr, "xxx")) 
    { 
     var xdoc = XDocument.Load(xr2); 
     xdoc.Descendants("Contact").Dump(); 
    } 
} 

注意ConformanceLevel应该是片段这样的阅读。