2013-08-26 291 views
0

我想比较两个输入的csv文件以查看是否有添加或删除的行。什么是最好的方式去做这件事。我没有使用列名,因为列的名称对于所有文件都不一致。比较两个excel文件的差异

private void compare_btn_Click(object sender, EventArgs e) 
     { 
      string firstFile = firstExcel_txt.Text; 
      var results = ReadExcel(openFileDialog1); 
      string secondFile = secondExcel_txt.Text; 
      var results2 = ReadExcel(openFileDialog2); 

     } 

阅读:

public object ReadExcel(OpenFileDialog openFileDialog) 
     { 
      var _excelFile = new ExcelQueryFactory(openFileDialog.FileName); 
      var _info = from c in _excelFile.WorksheetNoHeader() select c; 
      string header1, header2, header3; 
      foreach (var item in _info) 
      { 
       header1 = item.ElementAt(0); 
       header2 = item.ElementAt(1); 
       header3 = item.ElementAt(2); 
      } 
      return _info; 
     } 

我如何能做到这一点任何帮助将是巨大的。

+1

最好也是最准确的方法是将它们都转换为字节数组,并在转换它们时进行比较。以下链接将帮助您将Excel表转换为字节数组:http://www.c-sharpcorner.com/UploadFile/1a81c5/convert-file-to-byte-array-and-byte-array-to-files/ – Max

+0

Masriyah你只有3列,或者你只是简化了你的代码?我没有看到你在哪里保持Excel文件的内容来执行比较 –

+0

或者你可以放弃列和哈希其余的。如果这两个文件的哈希匹配,那么他们具有相同的数据,逐字。取决于所使用的算法,哈希碰撞的可能性很小,但它很小,在碰撞之前地狱会冻结。 – Renan

回答

1

我建议你计算哈希为Excel文件中的每一行,那么你就可以继续并比较每一行的散列以查看它是否与其他文件上的任何散列匹配(请参阅源代码中的注释)

我还提供了一些类来存储Excel文件的内容

using System.Security.Cryptography; 

private void compare_btn_Click(object sender, EventArgs e) 
{ 
    string firstFile = firstExcel_txt.Text; 
    ExcelInfo file1 = ReadExcel(openFileDialog1); 

    string secondFile = secondExcel_txt.Text; 
    ExcelInfo file2 = ReadExcel(openFileDialog2); 

    CompareExcels(file1,file2) ; 
}  

public void CompareExcels(ExcelInfo fileA, ExcelInfo fileB) 
{ 
    foreach(ExcelRow rowA in fileA.excelRows) 
    { 
     //If the current hash of a row of fileA does not exists in fileB then it was removed 
     if(! fileB.ContainsHash(rowA.hash)) 
     { 
      Console.WriteLine("Row removed" + rowA.ToString()); 
     } 
    } 

    foreach(ExcelRow rowB in fileB.excelRows) 
    { 
     //If the current hash of a row of fileB does not exists in fileA then it was added 
     if(! fileA.ContainsHash(rowB.hash)) 
     { 
      Console.WriteLine("Row added" + rowB.ToString()); 
     } 
    } 
} 

public Class ExcelRow 
{ 
    public List<String> lstCells ; 
    public byte[] hash 

    public ExcelRow() 
    { 
     lstCells = new List<String>() ; 
    } 
    public override string ToString() 
    { 
     string resp ; 

     resp = string.Empty ; 

     foreach(string cellText in lstCells) 
     { 
      if(resp != string.Empty) 
      { 
       resp = resp + "," + cellText ; 
      } 
      else 
      { 
       resp = cellText ; 
      } 
     } 
     return resp ; 
    }  
    public void CalculateHash() 
    { 
     byte[] rowBytes ; 
     byte[] cellBytes ; 
     int pos ; 
     int numRowBytes ; 

     //Determine how much bytes are required to store a single excel row 
     numRowBytes = 0 ; 
     foreach(string cellText in lstCells) 
     { 
      numRowBytes += NumBytes(cellText) ; 
     }  

     //Allocate space to calculate the HASH of a single row 

     rowBytes= new byte[numRowBytes] 
     pos = 0 ; 

     //Concatenate the cellText of each cell, converted to bytes,into a single byte array 
     foreach(string cellText in lstCells) 
     { 
      cellBytes = GetBytes(cellText) ; 
      System.Buffer.BlockCopy(cellBytes, 0, rowBytes, pos, cellBytes.Length); 
      pos = cellBytes.Length ; 

     } 

     hash = new MD5CryptoServiceProvider().ComputeHash(rowBytes); 

    } 
    static int NumBytes(string str) 
    { 
     return str.Length * sizeof(char); 
    } 

    static byte[] GetBytes(string str) 
    { 
     byte[] bytes = new byte[NumBytes(str)]; 
     System.Buffer.BlockCopy(str.ToCharArray(), 0, bytes, 0, bytes.Length); 
     return bytes; 
    } 
} 
public Class ExcelInfo 
{ 
    public List<ExcelRow> excelRows ; 

    public ExcelInfo() 
    { 
     excelRows = new List<ExcelRow>(); 
    } 
    public bool ContainsHash(byte[] hashToLook) 
    { 
     bool found ; 

     found = false ; 

     foreach(ExcelRow eRow in excelRows) 
     { 
      found = EqualHash(eRow.hash, hashToLook) ; 

      if(found) 
      { 
       break ; 
      } 
     } 

     return found ; 
    } 
    public static EqualHash(byte[] hashA, byte[] hashB) 
    { 
     bool bEqual ; 
     int i ; 

     bEqual = false; 
     if (hashA.Length == hashB.Length) 
     { 
      i = 0; 
      while ((i < hashA.Length) && (hashA[i] == hashB[i])) 
      { 
       i++ ; 
      } 
      if (i == hashA.Length) 
      { 
       bEqual = true; 
      } 
     } 
     return bEqual ; 
    } 
} 

public ExcelInfo ReadExcel(OpenFileDialog openFileDialog) 
{ 
    var _excelFile = new ExcelQueryFactory(openFileDialog.FileName); 
    var _info = from c in _excelFile.WorksheetNoHeader() select c; 

    ExcelRow excelRow ; 
    ExcelInfo resp ; 

    resp = new ExcelInfo() ; 

    foreach (var item in _info) 
    { 
     excelRow = new ExcelRow() ; 

     //Add all the cells (with a for each) 
     excelRow.lstCells.Add(item.ElementAt(0)); 
     excelRow.lstCells.Add(item.ElementAt(1)); 
     .... 
     //Add the last cell of the row 
     excelRow.lstCells.Add(item.ElementAt(N)); 

     //Calculate the hash of the row 
     excelRow.CalculateHash() ; 

     //Add the row to the ExcelInfo object 
     resp.excelRows.Add(excelRow) ; 
    } 
    return resp ; 
} 
+0

我会继续尝试,并会让你知道谢谢。 – Masriyah

+0

为我在ReadExcel方法返回'return _info'它抛出一个错误,我缺少一个强制转换,并不能从linq IQuerable转换为ExcelInfo(ExcelFileReader)。 – Masriyah

+1

@Masriyah对不起,你需要“返回” –

0

最准确的方法是将它们二者转换为byte arrays,检查差异当两者都转换为一个数组,使用以下,关于如何转换Excel片byte arrays链路的简单例子

Convert Excel to Byte[]

现在你已经转换既您的Excel工作表中,以一个byte [],你应该检查他们通过检查不同如果字节数组相等,yes或no。

的检查可以通过几种方式来实现以下使用linq像:

using System.Linq; //SequenceEqual 

byte[] FirstExcelFileBytes = null; 
byte[] SecondExcelFileBytes = null; 

FirstExcelFileBytes = GetFirstExcelFile(); 
SecondExcelFileBytes = GetSecondExcelFile(); 

if (FirstExcelFileBytes.SequenceEqual<byte>(SecondExcelFileBytes) == true) 
{ 
     MessageBox.Show("Arrays are equal"); 
} 
else 
{ 
    MessageBox.Show("Arrays don't match"); 
} 

有足够多的其他方式找到比较字节数组,你应该做一些研究哪些将最适合你。

使用下面的链接,以检查之类的东西Row addedrow removed

Compare excelsheets

+0

我相信这会有很大的帮助。我正在寻找更多的东西,像一排被添加或删除 - 是否有可能? – Masriyah

+0

帮助链接是为了实现这一目标,比较字节数组将返回true或false – Max