2017-07-31 17 views
0

我在DataTable中有大约300K行。第一列是“utcDT”,其中包含带分钟的DateTime。如何通过Day方法提高DataTable组的速度

我想按日期将数据分组到“ReportDailyData”列表中。我的方法在下面,但大约需要8秒钟才能运行。我需要做得更快。

有没有更好的方式来做到这一点?

public class ReportDailyData 
{ 
    public DateTime UtcDT; 
    public double Day_Pnl; 
    public int TradeCount; 
    public int Volume; 

    public ReportDailyData(DateTime utcDT, double day_Pnl, int tradeCount, int volume) 
    { 
     UtcDT = utcDT; 
     Day_Pnl = day_Pnl; 
     TradeCount = tradeCount; 
     Volume = volume; 
    } 

    public string AsString() 
    { 
     return UtcDT.ToString("yyyyMMdd") + "," + Day_Pnl.ToString("F2") + "," + TradeCount + "," + Volume; 
    } 
} 

public static DataTable Data; 
public static DataSpecification DataSpec; 

public void Go() 
{ 
    //Fill Data and DataSpec elsewhere 

    var dailylist = GetDailyData(); 
} 

    public List<ReportDailyData> GetDailyData() 
    { 
     List<ReportDailyData> dailyDatas = new List<ReportDailyData>(); 
     DateTime currentDT = DataSpec.FromDT.Date; 
     while (currentDT <= DataSpec.ToDT.Date) 
     { 
      var rowsForCurrentDT = Data.AsEnumerable().Where(x => x.Field<DateTime>("utcDT").Date == currentDT).ToList(); 
      if (rowsForCurrentDT.Any()) 
      { 
       double day_Pnl = rowsForCurrentDT.Sum(x => x.Field<double>("Bar_Pnl")); 
       var positions = rowsForCurrentDT.Select(x => x.Field<double>("Position")).ToList(); 
       var deltas = positions.Zip(positions.Skip(1), (current, next) => next - current); 
       int tradeCount = deltas.Where(x => x != 0).Count(); 
       int volume = (int)deltas.Where(x => x != 0).Sum(x => Math.Abs(x)); 
       dailyDatas.Add(new ReportDailyData(currentDT, day_Pnl, tradeCount, volume)); 
      } 
      else 
      { 
       dailyDatas.Add(new ReportDailyData(currentDT, 0, 0, 0)); 
      } 

      currentDT = currentDT.AddDays(1); 
     } 

     return dailyDatas; 

    } 

回答

0

如果我理解正确 - 您想对某些数据收集执行分组,那是对的吗?

如果是这样 - 为什么不使用linq:GroupBy方法?

一个简单的例子如下:

void Main() 
    { 
     var data = new List<MyData>(); 
     data.Add(new MyData() { UtcDT = DateTime.UtcNow, Volume = 1 }); 
     data.Add(new MyData() { UtcDT = DateTime.UtcNow.AddDays(-1), Volume = 1 }); 
     data.Add(new MyData() { UtcDT = DateTime.UtcNow.AddDays(-1), Volume = 4 }); 
     data.Add(new MyData() { UtcDT = DateTime.UtcNow.AddDays(-2), Volume = 5 }); 
     var result = GroupReportDataAndFormat(data); 
    } 

    public Dictionary<DateTime, int> GroupReportDataAndFormat(List<MyData> data) 
    { 
     return data.GroupBy(t => t.UtcDT.Date).ToDictionary(k => k.Key, v => v.Sum(s => s.Volume)); 
    } 

    public class MyData 
    { 
     public DateTime UtcDT { get; set; } 
     public int Volume { get; set; } 
    } 

当然 - 性能方面的原因,你应该做分组在数据库级(撰写查询返回的数据,在已分组)

= == UPDATE =====

MainInMoon:我已经更新的解决方案,以满足您的情况:

void Main() 
{ 
    var data = new List<MyData>(); 

    data.Add(new MyData() { UtcDT = DateTime.UtcNow, DayPnl = 1, Positions = 3 }); 
    data.Add(new MyData() { UtcDT = DateTime.UtcNow.AddDays(-1), DayPnl = 1, Positions = 4 }); 
    data.Add(new MyData() { UtcDT = DateTime.UtcNow.AddDays(-1), DayPnl = 4, Positions = 5 }); 
    data.Add(new MyData() { UtcDT = DateTime.UtcNow.AddDays(-2), DayPnl = 5, Positions = 6 }); 
    var result = GroupReportDataAndFormat(data); 
} 

public Dictionary<DateTime, GroupResult> GroupReportDataAndFormat(List<MyData> data) 
{ 
    return data.GroupBy(t => t.UtcDT.Date).ToDictionary(
     k => k.Key, v => new GroupResult 
     { 
      DayPnlSum = v.Sum(s => s.DayPnl), 
      Deltas = v.Select(t => t.Positions).Zip(v.Select(s => s.Positions).Skip(1), (current, next) => next - current) 
     }); 
} 

public class GroupResult 
{ 
    public double DayPnlSum { get; set; } 
    public IEnumerable<double> Deltas { get; set; } 
    public int TradeCount 
    { 
     get 
     { 
      return Deltas.Where(x => x != 0).Count(); 
     } 
    } 
    public int Volume 
    { 
     get 
     { 
      return (int)Deltas.Where(x => x != 0).Sum(x => Math.Abs(x)); 
     } 
    } 
} 
public class MyData 
{ 
    public DateTime UtcDT { get; set; } 
    public int DayPnl { get; set; } 
    public double Positions { get; set; } 
} 

当然,您可以更改在分组期间计算的TradeCount和Volume属性(非延迟加载)

+0

我在我的版本中有更复杂的Zip。因此,为什么我不使用分组依据。 – ManInMoon

+0

有没有办法把你的代码中的Zip部分? – ManInMoon

+0

我已经更新了解决方案以适应您的要求 – Piotr

0

我会建议:排序上utcDT,然后线性枚举结果,并手动进行分组和聚集成一个新的数据结构。对于您遇到的每个新的utcDT值,创建一个新的ReportDailyData实例,然后开始将值聚合到其中,直到utcDT具有相同的值。