我有以下代码来读取一个大文件,说超过一百万行。我正在使用Parallel和Linq方法。有没有更好的方法来做到这一点?如果是,那么如何?在C#中读取csv文件的最佳方法,以提高时间效率
private static void ReadFile()
{
float floatTester = 0;
List<float[]> result = File.ReadLines(@"largedata.csv")
.Where(l => !string.IsNullOrWhiteSpace(l))
.Select(l => new { Line = l, Fields = l.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries) })
.Select(x => x.Fields
.Where(f => Single.TryParse(f, out floatTester))
.Select(f => floatTester).ToArray())
.ToList();
// now get your totals
int numberOfLinesWithData = result.Count;
int numberOfAllFloats = result.Sum(fa => fa.Length);
MessageBox.Show(numberOfAllFloats.ToString());
}
private static readonly char[] Separators = { ',', ' ' };
private static void ProcessFile()
{
var lines = File.ReadAllLines("largedata.csv");
var numbers = ProcessRawNumbers(lines);
var rowTotal = new List<double>();
var totalElements = 0;
foreach (var values in numbers)
{
var sumOfRow = values.Sum();
rowTotal.Add(sumOfRow);
totalElements += values.Count;
}
MessageBox.Show(totalElements.ToString());
}
private static List<List<double>> ProcessRawNumbers(IEnumerable<string> lines)
{
var numbers = new List<List<double>>();
/*System.Threading.Tasks.*/
Parallel.ForEach(lines, line =>
{
lock (numbers)
{
numbers.Add(ProcessLine(line));
}
});
return numbers;
}
private static List<double> ProcessLine(string line)
{
var list = new List<double>();
foreach (var s in line.Split(Separators, StringSplitOptions.RemoveEmptyEntries))
{
double i;
if (Double.TryParse(s, out i))
{
list.Add(i);
}
}
return list;
}
private void button1_Click(object sender, EventArgs e)
{
Stopwatch stopWatchParallel = new Stopwatch();
stopWatchParallel.Start();
ProcessFile();
stopWatchParallel.Stop();
// Get the elapsed time as a TimeSpan value.
TimeSpan ts = stopWatchParallel.Elapsed;
// Format and display the TimeSpan value.
string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}",
ts.Hours, ts.Minutes, ts.Seconds,
ts.Milliseconds/10);
MessageBox.Show(elapsedTime);
Stopwatch stopWatchLinQ = new Stopwatch();
stopWatchLinQ.Start();
ReadFile();
stopWatchLinQ.Stop();
// Get the elapsed time as a TimeSpan value.
TimeSpan ts2 = stopWatchLinQ.Elapsed;
// Format and display the TimeSpan value.
string elapsedTimeLinQ = String.Format("{0:00}:{1:00}:{2:00}.{3:00}",
ts2.Hours, ts.Minutes, ts.Seconds,
ts2.Milliseconds/10);
MessageBox.Show(elapsedTimeLinQ);
}
一般而言,Parallell是用于CPU绑定任务的。当你锁定Parallell.ForEach时,你不会有任何可扩展性,因此我看不出它对你有很大的帮助。 – FuleSnabel 2012-08-10 07:14:16
最好在[codereview](http://codereview.stackexchange.com/faq)上提问。 – 2012-08-10 07:48:36