using System.Text; using MesETL.App.HostedServices.Abstractions; using Microsoft.Extensions.Logging; namespace MesETL.App.Services.ETL; /// /// CSV文件读取 /// public class CsvReader : IDataReader { protected readonly string? FilePath; protected readonly Lazy Reader; private Stream? _stream; protected readonly ILogger? Logger; protected readonly string TableName; public DataRecord Current { get; protected set; } = default!; public string[] Headers { get; } public string Delimiter { get; } public char QuoteChar { get; } public CsvReader(Stream stream, string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null) : this(tableName, headers, delimiter, quoteChar, logger) { Reader = new Lazy(() => new StreamReader(stream),false); } public CsvReader(string filePath, string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null) : this(tableName, headers, delimiter, quoteChar, logger) { FilePath = filePath; Reader = new Lazy(() => { _stream = File.OpenRead(filePath); return new StreamReader(_stream); }); } private CsvReader(string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null) { TableName = tableName; Headers = headers; Logger = logger; Delimiter = delimiter; QuoteChar = quoteChar; Reader = null!; } public virtual async ValueTask ReadAsync() { var str = await Reader.Value.ReadLineAsync(); if (string.IsNullOrWhiteSpace(str)) return false; var fields = ParseRowFaster(str, QuoteChar, Delimiter[0]); Current = new DataRecord(fields, TableName, Headers); return true; } public static string[] ParseRow(ReadOnlySpan source, char quoteChar, char delimiter) { var result = new List(); var index = -1; var current = new StringBuilder(source.Length); var hasQuote = false; var hasSlash = false; while (index < source.Length - 1) { index++; var currChar = source[index]; if (hasSlash == false && currChar == '\\') { hasSlash = true; current.Append('\\'); continue; } if (hasSlash == false && currChar == quoteChar) { hasQuote = !hasQuote; current.Append(currChar); continue; } if (hasQuote == false && currChar == delimiter) { result.Add(current.ToString()); current.Clear(); } else { current.Append(currChar); } hasSlash = false; } result.Add(current.ToString()); return result.ToArray(); } public static List ParseRowFaster(ReadOnlySpan source, char quoteChar, char delimiter, int columnCount = 10) { var result = new List(columnCount); var index = -1; var hasQuote = false; var hasSlash = false; var start = 0; var end = 0; var len = source.Length - 1; while (index < len) { ++index; var currChar = source[index]; if (!hasSlash) { if (currChar is '\\') { hasSlash = true; ++end; continue; } if (currChar == quoteChar) { hasQuote = !hasQuote; ++end; continue; } } if (!hasQuote && currChar == delimiter) { result.Add(source[start..(end)].ToString()); start = end + 1; ++end; } else { ++end; } hasSlash = false; } result.Add(source[start..end].ToString()); return result; } public virtual void Dispose() { if (Reader.IsValueCreated) { Reader.Value.Dispose(); _stream?.Dispose(); } } }