using System.Reflection.PortableExecutable; using System.Text; using System.Text.RegularExpressions; using ConsoleApp2.Helpers; using ConsoleApp2.HostedServices.Abstractions; using Microsoft.Extensions.Logging; namespace ConsoleApp2.Services; /// /// CSV文件读取 /// public class CsvSource:IDataSource { protected readonly string _inputDir; //protected readonly StreamReader _reader; private readonly ILogger? _logger; protected readonly string _tableName; protected string? _sqlFilePath; protected readonly string? _sqlFileText; protected string[]? headers; protected string[]? csvFiles; public string? CurrentRaw { get; protected set; } public string Delimiter { get; private set; } public char QuoteChar { get; private set; } public CsvSource(string inputDir,string tableName,string delimiter = ",", char quoteChar = '"', ILogger? logger = null) { _inputDir = inputDir; _tableName = tableName; _logger = logger; Delimiter = delimiter; QuoteChar = quoteChar; string pattern = $"^.*\\.{tableName}\\..*\\.sql$"; _sqlFilePath = Directory.GetFiles(_inputDir).FirstOrDefault(s => Regex.Match(s, pattern).Success); } public string[] ParseRow(string row, char quoteChar, string delimiter) { var span = row.AsSpan(); var result = new List(); if (span.Length == 0) throw new ArgumentException("The row is empty", nameof(row)); var isInQuote = span[0] == quoteChar; var start = 0; for (var i = 1; i < span.Length; i++) { if (span[i] == quoteChar) { isInQuote = !isInQuote; } // delimiter需要足够复杂 else if (/*!isInQuote && */span.Length > i + delimiter.Length && span[i..(i + delimiter.Length)].Equals(delimiter, StringComparison.CurrentCulture)) // field matched { string field; if (span[start] == quoteChar && span[i - 1] == quoteChar) // enclosed by quoteChar field = span[(start + 1)..(i - 1)].ToString(); // escape quoteChar else field = span[start..i].ToString(); start = i + delimiter.Length; if (field == "\\N") field = "NULL"; result.Add(field); continue; } } result.Add(span[start..].ToString()); for (var i = 0; i < result.Count; i++) { var field = result[i]; if (DumpDataHelper.CheckHexField(field) && StringExtensions.CheckJsonHex(field)) { result[i] = StringExtensions.FromHex(field); } } return result.ToArray(); } public string[] ParseRow2(ReadOnlySpan source, char quoteChar, string delimiter) { var result = new List(); var index = -1; StringBuilder current = new StringBuilder(); bool hasQuote = false; bool hasSlash = false; while (index < source.Length-1) { index++; if (hasSlash == false && source[index] == '\\') { hasSlash = true; current.Append('\\'); continue; } if (hasSlash ==false && source[index] == quoteChar) { hasQuote = !hasQuote; current.Append(source[index]); continue; } if (hasQuote==false && source[index] == delimiter[0]) { result.Add(current.ToString()); current.Clear(); } else { current.Append(source[index]); } hasSlash = false; } result.Add(current.ToString()); return result.ToArray(); } public virtual async Task GetHeaderAndCsvFiles() { var text = await File.ReadAllTextAsync(_sqlFilePath); headers = await DumpDataHelper.GetCsvHeadersFromSqlFileAsync(text); csvFiles = await DumpDataHelper.GetCsvFileNamesFromSqlFileAsync(text, new Regex(@"'.+\.dat.zst'")); } public virtual async Task DoEnqueue(Action action) { await GetHeaderAndCsvFiles(); foreach (var file in csvFiles) { var filePath= Path.Combine(_inputDir, file); using (var fs = File.OpenRead(filePath)) { using (StreamReader sr = new StreamReader(fs)) { while (!sr.EndOfStream) { var line = await sr.ReadLineAsync(); var fields = ParseRow2(line, QuoteChar, Delimiter); var record = new DataRecord(fields, _tableName, headers); action?.Invoke(record); } } } } } public virtual async Task GetTestRecord() { await GetHeaderAndCsvFiles(); var file = csvFiles.FirstOrDefault(); if (file != null) { var filePath = Path.Combine(_inputDir, file); using (var fs = File.OpenRead(filePath)) { using (StreamReader sr = new StreamReader(fs)) { var line = await sr.ReadLineAsync(); var fields = ParseRow2(line, QuoteChar, Delimiter); var record = new DataRecord(fields, _tableName, headers); return record; } } } return null; } public void Dispose() { // _reader.Dispose(); } }