CSV(Comma-Separated Values)是最常见的数据交换格式之一。在C#中高效读取CSV文件需要考虑性能、内存使用和错误处理等多个方面。本文将提供从基础到高级的完整解决方案。
![图片[1]_C#实现高效读取CSV文件的完整指南_知途无界](https://zhituwujie.com/wp-content/uploads/2025/12/d2b5ca33bd20251207100652.png)
1. 基础方法:使用 TextFieldParser
Microsoft.VisualBasic.TextFieldParser 是一个强大且易用的CSV解析工具,性能优秀且内置在.NET Framework中。
安装和引用
// 在 .NET Core/.NET 5+ 项目中,需要添加 Microsoft.VisualBasic NuGet 包
// Install-Package Microsoft.VisualBasic
using Microsoft.VisualBasic.FileIO;
using System.Globalization;
基本实现
public class CsvReaderUsingTextFieldParser
{
public static List<string[]> ReadCsvBasic(string filePath)
{
var records = new List<string[]>();
using var parser = new TextFieldParser(filePath)
{
TextFieldType = FieldType.Delimited,
Delimiters = new string[] { "," },
HasFieldsEnclosedInQuotes = true,
TrimWhiteSpace = true,
Culture = CultureInfo.InvariantCulture
};
while (!parser.EndOfData)
{
try
{
string[] fields = parser.ReadFields();
records.Add(fields);
}
catch (MalformedLineException ex)
{
Console.WriteLine($"行解析错误 {parser.LineNumber}: {ex.Message}");
// 可以选择跳过错误行或终止处理
continue;
}
}
return records;
}
}
高级配置版本
public class AdvancedCsvReader
{
public class CsvOptions
{
public char Delimiter { get; set; } = ',';
public bool HasHeader { get; set; } = false;
public bool TrimWhitespace { get; set; } = true;
public bool SkipEmptyLines { get; set; } = true;
public string[] CommentTokens { get; set; } = { "#", "//" };
public int BufferSize { get; set; } = 65536; // 64KB buffer
}
public static List<Dictionary<string, string>> ReadCsvWithHeaders(string filePath, CsvOptions options = null)
{
options ??= new CsvOptions();
var records = new List<Dictionary<string, string>>();
string[] headers = null;
using var parser = new TextFieldParser(filePath)
{
TextFieldType = FieldType.Delimited,
Delimiters = new string[] { options.Delimiter.ToString() },
HasFieldsEnclosedInQuotes = true,
TrimWhiteSpace = options.TrimWhitespace,
Culture = CultureInfo.InvariantCulture
};
// 设置缓冲区大小以提高性能
SetParserBufferSize(parser, options.BufferSize);
int lineNumber = 0;
while (!parser.EndOfData)
{
lineNumber++;
string[] fields;
try
{
fields = parser.ReadFields();
// 跳过空行
if (options.SkipEmptyLines && fields.All(string.IsNullOrWhiteSpace))
continue;
// 处理注释行
if (options.CommentTokens != null &&
options.CommentTokens.Any(token => fields[0]?.StartsWith(token) == true))
continue;
// 读取标题行
if (lineNumber == 1 && options.HasHeader)
{
headers = fields;
continue;
}
// 处理数据行
if (options.HasHeader && headers != null)
{
var record = new Dictionary<string, string>();
for (int i = 0; i < headers.Length && i < fields.Length; i++)
{
record[headers[i]] = fields[i];
}
records.Add(record);
}
else
{
records.Add(fields.Select((field, index) => new { field, index })
.ToDictionary(x => $"Column{x.index}", x => x.field));
}
}
catch (MalformedLineException ex)
{
Console.WriteLine($"解析错误 行 {parser.LineNumber}: {ex.Message}");
continue;
}
catch (Exception ex)
{
Console.WriteLine($"未知错误 行 {parser.LineNumber}: {ex.Message}");
continue;
}
}
return records;
}
private static void SetParserBufferSize(TextFieldParser parser, int bufferSize)
{
// TextFieldParser 没有直接设置缓冲区大小的属性
// 但可以通过反射设置(不推荐在生产环境中使用)
// 实际上,对于大文件,操作系统会自动管理缓冲区
}
}
2. 高性能方案:自定义流式读取器
对于超大文件或性能要求极高的场景,自定义流式读取器是最佳选择。
高性能CSV读取器
public class HighPerformanceCsvReader : IDisposable
{
private readonly StreamReader _reader;
private readonly char _delimiter;
private readonly char _quoteChar;
private readonly bool _trimWhitespace;
private long _bytesRead;
private int _lineNumber;
public HighPerformanceCsvReader(string filePath, char delimiter = ',', char quoteChar = '"', bool trimWhitespace = true)
{
_reader = new StreamReader(filePath, Encoding.UTF8, detectEncodingFromByteOrderMarks: true, bufferSize: 65536);
_delimiter = delimiter;
_quoteChar = quoteChar;
_trimWhitespace = trimWhitespace;
_lineNumber = 0;
}
public IEnumerable<string[]> ReadRecords()
{
string line;
while ((line = _reader.ReadLine()) != null)
{
_lineNumber++;
_bytesRead += line.Length + Environment.NewLine.Length;
// 跳过空行
if (string.IsNullOrWhiteSpace(line))
continue;
yield return ParseLine(line);
}
}
public IEnumerable<T> ReadRecords<T>(Func<string[], T> mapper) where T : class
{
foreach (var fields in ReadRecords())
{
yield return mapper(fields);
}
}
private string[] ParseLine(string line)
{
var fields = new List<string>();
var currentField = new StringBuilder();
bool inQuotes = false;
bool wasInQuotes = false;
for (int i = 0; i < line.Length; i++)
{
char currentChar = line[i];
char? nextChar = i < line.Length - 1 ? line[i + 1] : null;
if (currentChar == _quoteChar)
{
if (inQuotes)
{
// 检查是否是转义的引号 ("")
if (nextChar == _quoteChar)
{
currentField.Append(_quoteChar);
i++; // 跳过下一个引号
}
else
{
inQuotes = false;
wasInQuotes = true;
}
}
else
{
inQuotes = true;
}
}
else if (currentChar == _delimiter && !inQuotes)
{
string fieldValue = currentField.ToString();
if (_trimWhitespace && !wasInQuotes)
fieldValue = fieldValue.Trim();
fields.Add(fieldValue);
currentField.Clear();
wasInQuotes = false;
}
else
{
currentField.Append(currentChar);
}
}
// 添加最后一个字段
string lastField = currentField.ToString();
if (_trimWhitespace && !wasInQuotes)
lastField = lastField.Trim();
fields.Add(lastField);
return fields.ToArray();
}
public void Dispose()
{
_reader?.Dispose();
}
// 性能监控属性
public long BytesRead => _bytesRead;
public int LineNumber => _lineNumber;
}
使用示例
public class Person
{
public string Name { get; set; }
public int Age { get; set; }
public string Email { get; set; }
}
public class HighPerformanceExample
{
public static void ProcessLargeCsv()
{
var csvStats = new List<ProcessingStats>();
using var reader = new HighPerformanceCsvReader("large_file.csv");
var stopwatch = Stopwatch.StartNew();
var persons = reader.ReadRecords(fields => new Person
{
Name = fields.Length > 0 ? fields[0] : null,
Age = fields.Length > 1 && int.TryParse(fields[1], out int age) ? age : 0,
Email = fields.Length > 2 ? fields[2] : null
});
int count = 0;
foreach (var person in persons)
{
// 处理每条记录
ProcessPerson(person);
count++;
// 每处理10000条记录显示进度
if (count % 10000 == 0)
{
Console.WriteLine($"已处理 {count} 条记录,读取字节: {reader.BytesRead / 1024 / 1024} MB");
}
}
stopwatch.Stop();
Console.WriteLine($"处理完成: {count} 条记录,耗时: {stopwatch.Elapsed.TotalSeconds:F2} 秒");
}
private static void ProcessPerson(Person person)
{
// 实际的业务处理逻辑
// 这里可以添加数据库保存、计算等操作
}
}
3. 使用第三方库:CsvHelper
CsvHelper 是最流行的 CSV 处理库,功能强大且性能优异。
安装和基本使用
// Install-Package CsvHelper
using CsvHelper;
using CsvHelper.Configuration;
using System.Globalization;
using System.IO;
public class CsvHelperExample
{
public class PersonMap : ClassMap<Person>
{
public PersonMap()
{
Map(m => m.Name).Name("Full Name");
Map(m => m.Age).Name("Age").TypeConverterOption.CultureInfo(CultureInfo.InvariantCulture);
Map(m => m.Email).Name("Email Address");
Map(m => m.RegisterDate).Name("Register Date").TypeConverterOption.Format("yyyy-MM-dd");
}
}
public static void ReadWithCsvHelper(string filePath)
{
var config = new CsvConfiguration(CultureInfo.InvariantCulture)
{
HasHeaderRecord = true,
Delimiter = ",",
TrimOptions = TrimOptions.Trim,
MissingFieldFound = null, // 忽略缺少的字段
HeaderValidated = null, // 忽略头部验证错误
BadDataFound = context =>
{
Console.WriteLine($"发现损坏数据: {context.RawRecord}");
}
};
using var reader = new StreamReader(filePath);
using var csv = new CsvReader(reader, config);
// 注册映射
csv.Context.RegisterClassMap<PersonMap>();
// 读取所有记录
var records = csv.GetRecords<Person>().ToList();
Console.WriteLine($"读取了 {records.Count} 条记录");
}
public static void ReadWithAsync(string filePath)
{
var config = new CsvConfiguration(CultureInfo.InvariantCulture)
{
HasHeaderRecord = true,
PrepareHeaderForMatch = args => args.Header.ToLower()
};
using var reader = new StreamReader(filePath);
using var csv = new CsvReader(reader, config);
// 异步读取
var records = new List<Person>();
while (csv.Read())
{
var record = csv.GetRecord<Person>();
records.Add(record);
}
}
public static void WriteCsvExample(string outputPath)
{
var records = new List<Person>
{
new Person { Name = "张三", Age = 25, Email = "zhangsan@example.com" },
new Person { Name = "李四", Age = 30, Email = "lisi@example.com" }
};
using var writer = new StreamWriter(outputPath);
using var csv = new CsvWriter(writer, CultureInfo.InvariantCulture);
csv.WriteRecords(records);
}
}
4. 性能对比和最佳实践
性能测试方法
public class PerformanceBenchmark
{
public static void RunBenchmark(string testFile)
{
var methods = new Dictionary<string, Func<string, List<Person>>>
{
["TextFieldParser"] = path => TextFieldParserMethod(path),
["HighPerformance"] = path => HighPerformanceMethod(path),
["CsvHelper"] = path => CsvHelperMethod(path)
};
foreach (var method in methods)
{
var stopwatch = Stopwatch.StartNew();
var records = method.Value(testFile);
stopwatch.Stop();
Console.WriteLine($"{method.Key}: {records.Count} 条记录, " +
$"耗时: {stopwatch.Elapsed.TotalSeconds:F2}秒, " +
$"速度: {records.Count / stopwatch.Elapsed.TotalSeconds:F0} 条/秒");
}
}
private static List<Person> TextFieldParserMethod(string path)
{
// TextFieldParser 实现
return new List<Person>();
}
private static List<Person> HighPerformanceMethod(string path)
{
// 高性能读取器实现
return new List<Person>();
}
private static List<Person> CsvHelperMethod(string path)
{
// CsvHelper 实现
return new List<Person>();
}
}
性能对比结果(示例)
| 方法 | 10MB文件 | 100MB文件 | 1GB文件 | 内存使用 | 特点 |
|---|---|---|---|---|---|
| TextFieldParser | 快 | 中等 | 慢 | 中等 | 内置支持,易用性好 |
| 高性能流式 | 最快 | 最快 | 最快 | 最低 | 最高性能,适合大文件 |
| CsvHelper | 中等 | 快 | 中等 | 较高 | 功能丰富,映射方便 |
5. 最佳实践总结
1. 选择合适的方案
- 小文件 (<10MB):使用 CsvHelper,开发效率高
- 中等文件 (10MB-1GB):使用 TextFieldParser,平衡性能和易用性
- 大文件 (>1GB):使用自定义流式读取器,内存效率最高
2. 性能优化技巧
// 1. 使用适当的缓冲区大小
using var reader = new StreamReader(filePath, Encoding.UTF8, true, bufferSize: 65536);
// 2. 异步处理(对于I/O密集型操作)
public async Task ProcessCsvAsync(string filePath)
{
using var reader = new StreamReader(filePath);
// 异步读取和处理
}
// 3. 批量处理而不是逐条处理
var batchSize = 1000;
var batch = new List<Person>(batchSize);
foreach (var record in records)
{
batch.Add(record);
if (batch.Count >= batchSize)
{
await SaveBatchToDatabase(batch);
batch.Clear();
}
}
// 4. 并行处理(注意线程安全)
var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount };
Parallel.ForEach(records, parallelOptions, ProcessRecord);
3. 错误处理和验证
public class RobustCsvProcessor
{
public class ValidationResult
{
public bool IsValid { get; set; }
public List<string> Errors { get; set; } = new();
}
public ValidationResult ValidateRecord(Person person)
{
var result = new ValidationResult();
if (string.IsNullOrEmpty(person.Name))
result.Errors.Add("姓名不能为空");
if (person.Age < 0 || person.Age > 150)
result.Errors.Add("年龄必须在0-150之间");
if (!IsValidEmail(person.Email))
result.Errors.Add("邮箱格式不正确");
result.IsValid = result.Errors.Count == 0;
return result;
}
private bool IsValidEmail(string email)
{
try
{
var addr = new System.Net.Mail.MailAddress(email);
return addr.Address == email;
}
catch
{
return false;
}
}
}
4. 内存管理
// 对于超大型文件,使用生产者-消费者模式
public class ProducerConsumerCsvReader
{
private readonly BlockingCollection<string[]> _dataQueue = new(blockingCollectionSize: 1000);
private const int MaxDegreeOfParallelism = 4;
public void ProcessLargeFile(string filePath)
{
// 生产者线程
var producer = Task.Run(() =>
{
using var reader = new HighPerformanceCsvReader(filePath);
foreach (var record in reader.ReadRecords())
{
_dataQueue.Add(record);
}
_dataQueue.CompleteAdding();
});
// 消费者线程
var consumers = Enumerable.Range(0, MaxDegreeOfParallelism)
.Select(i => Task.Run(() => Consumer(i)))
.ToArray();
Task.WaitAll(new[] { producer }.Concat(consumers).ToArray());
}
private void Consumer(int consumerId)
{
foreach (var record in _dataQueue.GetConsumingEnumerable())
{
try
{
ProcessRecord(record);
}
catch (Exception ex)
{
Console.WriteLine($"消费者 {consumerId} 处理记录时出错: {ex.Message}");
}
}
}
}
通过本文提供的各种方案和最佳实践,你可以根据具体需求选择最适合的CSV读取方法,在保证性能的同时处理各种复杂的CSV文件场景。
© 版权声明
文中内容均来源于公开资料,受限于信息的时效性和复杂性,可能存在误差或遗漏。我们已尽力确保内容的准确性,但对于因信息变更或错误导致的任何后果,本站不承担任何责任。如需引用本文内容,请注明出处并尊重原作者的版权。
THE END

























暂无评论内容