我需要编写应用程序来删除非常大的XML文件(大约3,5 GB)中的特定文本行。删除XML文件中的文本行C#.NET
我写了这个代码:
string directoryPath;
OpenFileDialog ofd = new OpenFileDialog();
private void button1_Click(object sender, EventArgs e)
{
ofd.Filter = "XML|*.xml";
if (ofd.ShowDialog() == DialogResult.OK)
{
directoryPath = Path.GetDirectoryName(ofd.FileName);
textBox2.Text = directoryPath;
textBox1.Text = ofd.SafeFileName;
}
}
private void Replace()
{
StreamReader readerFile = new StreamReader(ofd.FileName, System.Text.Encoding.UTF8);
while (!readerFile.EndOfStream)
{
string stringReplaced;
string replaceResult = textBox2.Text + "\\" + "replace_results";
Directory.CreateDirectory(replaceResult);
StreamWriter writerFile = new StreamWriter(replaceResult + "\\" + textBox1.Text, true);
StringBuilder sb = new StringBuilder();
char[] buff = new char[10 * 1024 * 1024];
int xx = readerFile.ReadBlock(buff, 0, buff.Length);
sb.Append(buff);
stringReplaced = sb.ToString();
stringReplaced = stringReplaced.Replace("line to remove", string.Empty);
writerFile.WriteLine(stringReplaced);
writerFile.Close();
writerFile.Dispose();
stringReplaced = null;
sb = null;
}
readerFile.Close();
readerFile.Dispose();
}
private void button2_Click(object sender, EventArgs e)
{
if (!backgroundWorker1.IsBusy)
{
backgroundWorker1.RunWorkerAsync();
toolStripStatusLabel1.Text = "Replacing in progress...";
}
}
private void backgroundWorker1_DoWork(object sender, DoWorkEventArgs e)
{
try
{
Replace();
toolStripStatusLabel1.Text = "Replacing complete!";
}
catch
{
toolStripStatusLabel1.Text = "Error! Replacing aborted!";
}
}
}
它的工作原理,但不是很好,因为新的文件(后删除线)比原来的文件,并在新文件的末尾添加一些垃圾更大(大量的点),截图:
https://images81.fotosik.pl/615/873833aa0e23b36f.jpg
如何解决我的代码,使新的文件一样的旧文件,只是没有具体的线路?
我怎么只能行做这项工作没有出内存异常?你能给我代码吗? :) – gos
@gos:中央循环应该是:读取行,进程,写入行。记忆中一次只有一行。 – Richard
@gos:查看扩展的答案。 – Richard