2017-08-07 78 views
0

我在写一个使用OpenXML读取excel文件的程序。该文件还包含日期,所以我需要一个识别它们的函数。这里在stackoverflow我发现这个解决方案:How to distinguish inline numbers from OLE Automation date numbers in OpenXML SpreadSheet?NumberingFormats始终为空,为什么?

我tryed将其应用到我的代码,但我每次碰到下面的错误时间:

“类型‘System.NullReferenceException’未处理的异常发生在OpenXML_Prova.exe

更多信息: 你调用的对象是空的。”

在GetDateTimeCellFormats的第一行,其中似乎NumberingFormats始终为空。

下面是完整的代码:

using DocumentFormat.OpenXml; 
using DocumentFormat.OpenXml.Packaging; 
using DocumentFormat.OpenXml.Spreadsheet; 
using System; 
using System.Collections.Generic; 
using System.IO; 
using System.Linq; 
using System.Text; 
using System.Text.RegularExpressions; 
using System.Threading.Tasks; 



namespace OpenXML_Prova 
{ 
    class Program 
    { 

     DocumentFormat.OpenXml.Spreadsheet.NumberingFormats numb = new NumberingFormats(); 

     static uint[] builtInDateTimeNumberFormatIDs = new uint[] { 14, 15, 16, 17, 18, 19, 20, 21, 22, /*27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57, 58 */}; 
     static Dictionary<uint, NumberingFormat> builtInDateTimeNumberFormats = builtInDateTimeNumberFormatIDs.ToDictionary(id => id, id => new NumberingFormat { NumberFormatId = id }); 
     static Regex dateTimeFormatRegex = new Regex(@"((?=([^[]*\[[^[\]]*\])*([^[]*[ymdhs]+[^\]]*))|.*\[(h|mm|ss)\].*)", RegexOptions.Compiled); 

     static void Main(string[] args) { 
      Program prova = new Program(); 
      prova.ReadFile(); 
     } 

     void ReadFile() 
     { 

      var filePath = @"C:\\Users\\m.p\\Desktop\\report_fatturato_brevissimo.xlsx"; 
      using (FileStream fs = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) 
      { 
       using (SpreadsheetDocument doc = SpreadsheetDocument.Open(fs, false)) 
       { 
        WorkbookPart workbookPart = doc.WorkbookPart; 
        SharedStringTablePart sstpart = workbookPart.GetPartsOfType<SharedStringTablePart>().First(); 
        SharedStringTable sst = sstpart.SharedStringTable; 

        WorksheetPart worksheetPart = workbookPart.WorksheetParts.First(); 
        Worksheet sheet = worksheetPart.Worksheet; 

        var cells = sheet.Descendants<Cell>(); 
        var rows = sheet.Descendants<Row>(); 

        Console.WriteLine("Row count = {0}", rows.LongCount()); 
        Console.WriteLine("Cell count = {0}", cells.LongCount()); 

        // Or... via each row 
        foreach (Row row in rows) 
        { 
         foreach (Cell c in row.Elements<Cell>()) 
         { 
          if ((c.DataType != null) && (c.DataType == CellValues.SharedString)) 
          { 
           int ssid = int.Parse(c.CellValue.Text); 
           string str = sst.ChildElements[ssid].InnerText; 
           Console.Write/*Line*/(/*"Shared string {0}: {1}", ssid, */str + "\t"); 
          } 
          else 
          { 
           if (IsDateTimeCell(workbookPart, c) == true) 
           { 
            int ssid = int.Parse(c.CellValue.Text); 
            DateTime date = DateTime.FromOADate(Double.Parse(c.CellValue.Text)); 
            Console.Write/*Line*/(/*"Shared string {0}: {1}", ssid, */date + "\t"); 
           } 
           else if (c.CellValue != null) 
           { 
            Console.Write/*Line*/(/*"Cell contents: {0}", */c.CellValue.Text + "\t"); 
           } 
          } 
         } 
         Console.WriteLine(""); 
        } 
       } 
      } 
      Console.Read(); 
     } 


     public static Dictionary<uint, NumberingFormat> GetDateTimeCellFormats(WorkbookPart wbPart) 
     { 

//Error on the following instruction: NumberingFormats seems to be always null 
      var dateNumberFormats = wbPart.WorkbookStylesPart.Stylesheet.NumberingFormats 
       .Descendants<NumberingFormat>() 
       .Where(nf => dateTimeFormatRegex.Match(nf.FormatCode.Value).Success) 
       .ToDictionary(nf => nf.NumberFormatId.Value); 

      var cellFormats = wbPart.WorkbookStylesPart.Stylesheet.CellFormats 
       .Descendants<CellFormat>(); 

      var dateCellFormats = new Dictionary<uint, NumberingFormat>(); 
      uint styleIndex = 0; 
      foreach (var cellFormat in cellFormats) 
      { 
       if (cellFormat.ApplyNumberFormat != null && cellFormat.ApplyNumberFormat.Value) 
       { 
        if (dateNumberFormats.ContainsKey(cellFormat.NumberFormatId.Value)) 
        { 
         dateCellFormats.Add(styleIndex, dateNumberFormats[cellFormat.NumberFormatId.Value]); 
        } 
        else if (builtInDateTimeNumberFormats.ContainsKey(cellFormat.NumberFormatId.Value)) 
        { 
         dateCellFormats.Add(styleIndex, builtInDateTimeNumberFormats[cellFormat.NumberFormatId.Value]); 
        } 
       } 

       styleIndex++; 
      } 

      return dateCellFormats; 
     } 

     //Usage Example 
     public static bool IsDateTimeCell(WorkbookPart wbPart, Cell cell) 
     { 
      if (cell.StyleIndex == null) 
       return false; 

      var dateTimeCellFormats = GetDateTimeCellFormats(wbPart); 

      return dateTimeCellFormats.ContainsKey(cell.StyleIndex); 
     } 


    } 
} 

回答

0

Excel用于在电子表格单元格,基本格式OpenXML has 164 built-in formats。默认的日期时间格式是这些内置类型的一部分。

仅当您使用Excel(自定义格式)创建新格式时,它们是否会包含在StylesheetNumberingFormats部分。

我建议你换你的代码像下面检查NumberingFormats,避免NullReferenceException

if (wbPart.WorkbookStylesPart.Stylesheet.NumberingFormats != null) {

然后,你将需要添加一些代码来检测的日期时间格式。 这个SO article tries来确定可能的日期时间样式。查看一些评论和上面的第一个链接,以参考提供更多细节的规范。