using System.Text; using ConsoleApp2.Helpers; using Microsoft.Extensions.Logging; namespace ConsoleApp2.Services; /// /// CSV文件读取 /// public class CsvSource { private readonly string _filePath; private readonly StreamReader _reader; private readonly ILogger? _logger; private readonly string _tableName; public DataRecord Current { get; private set; } public string[]? Headers { get; } public string? CurrentRaw { get; private set; } public string Delimiter { get; private set; } public char QuoteChar { get; private set; } public CsvSource(string filePath, string[]? headers = null, string delimiter = ",", char quoteChar = '"', ILogger? logger = null) { _filePath = filePath; Headers = headers; _logger = logger; Delimiter = delimiter; QuoteChar = quoteChar; var fs = File.OpenRead(filePath); _reader = new StreamReader(fs); _tableName = DumpDataHelper.GetTableName(filePath); } public async ValueTask ReadAsync() { var str = await _reader.ReadLineAsync(); if (string.IsNullOrWhiteSpace(str)) return false; CurrentRaw = str; var fields = ParseRow2(str, QuoteChar, Delimiter); Current = new DataRecord(fields, _tableName, Headers); return true; } public string[] ParseRow(string row, char quoteChar, string delimiter) { var span = row.AsSpan(); var result = new List(); if (span.Length == 0) throw new ArgumentException("The row is empty", nameof(row)); var isInQuote = span[0] == quoteChar; var start = 0; for (var i = 1; i < span.Length; i++) { if (span[i] == quoteChar) { isInQuote = !isInQuote; } // delimiter需要足够复杂 else if (/*!isInQuote && */span.Length > i + delimiter.Length && span[i..(i + delimiter.Length)].Equals(delimiter, StringComparison.CurrentCulture)) // field matched { string field; if (span[start] == quoteChar && span[i - 1] == quoteChar) // enclosed by quoteChar field = span[(start + 1)..(i - 1)].ToString(); // escape quoteChar else field = span[start..i].ToString(); start = i + delimiter.Length; if (field == "\\N") field = "NULL"; result.Add(field); continue; } } result.Add(span[start..].ToString()); for (var i = 0; i < result.Count; i++) { var field = result[i]; if (DumpDataHelper.CheckHexField(field) && StringExtensions.CheckJsonHex(field)) { result[i] = StringExtensions.FromHex(field); } } return result.ToArray(); } public string[] ParseRow2(ReadOnlySpan source, char quoteChar, string delimiter) { var result = new List(); var index = -1; StringBuilder current = new StringBuilder(); bool hasQuote = false; bool hasSlash = false; while (index < source.Length-1) { index++; if (hasSlash == false && source[index] == '\\') { hasSlash = true; current.Append('\\'); continue; } if (hasSlash ==false && source[index] == quoteChar) { hasQuote = !hasQuote; current.Append(source[index]); continue; } if (hasQuote==false && source[index] == delimiter[0]) { result.Add(current.ToString()); current.Clear(); } else { current.Append(source[index]); } hasSlash = false; } result.Add(current.ToString()); return result.ToArray(); } }