MES-ETL/MesETL.App/Services/ETL/CsvReader.cs

160 lines
4.5 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System.Text;
using MesETL.App.HostedServices.Abstractions;
using Microsoft.Extensions.Logging;
namespace MesETL.App.Services.ETL;
/// <summary>
/// CSV文件读取
/// </summary>
public class CsvReader : IDataReader
{
protected readonly string? FilePath;
protected readonly Lazy<StreamReader> Reader;
private Stream? _stream;
protected readonly ILogger? Logger;
protected readonly string TableName;
public DataRecord Current { get; protected set; } = default!;
public string[] Headers { get; }
public string Delimiter { get; }
public char QuoteChar { get; }
public CsvReader(Stream stream, string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
: this(tableName, headers, delimiter, quoteChar, logger)
{
Reader = new Lazy<StreamReader>(() => new StreamReader(stream),false);
}
public CsvReader(string filePath, string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
: this(tableName, headers, delimiter, quoteChar, logger)
{
FilePath = filePath;
Reader = new Lazy<StreamReader>(() =>
{
_stream = File.OpenRead(filePath);
return new StreamReader(_stream);
});
}
private CsvReader(string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
{
TableName = tableName;
Headers = headers;
Logger = logger;
Delimiter = delimiter;
QuoteChar = quoteChar;
Reader = null!;
}
public virtual async ValueTask<bool> ReadAsync()
{
var str = await Reader.Value.ReadLineAsync();
if (string.IsNullOrWhiteSpace(str))
return false;
var fields = ParseRowFaster(str, QuoteChar, Delimiter[0]);
Current = new DataRecord(fields, TableName, Headers);
return true;
}
public static string[] ParseRow(ReadOnlySpan<char> source, char quoteChar, char delimiter)
{
var result = new List<string>();
var index = -1;
var current = new StringBuilder(source.Length);
var hasQuote = false;
var hasSlash = false;
while (index < source.Length - 1)
{
index++;
var currChar = source[index];
if (hasSlash == false && currChar == '\\')
{
hasSlash = true;
current.Append('\\');
continue;
}
if (hasSlash == false && currChar == quoteChar)
{
hasQuote = !hasQuote;
current.Append(currChar);
continue;
}
if (hasQuote == false && currChar == delimiter)
{
result.Add(current.ToString());
current.Clear();
}
else
{
current.Append(currChar);
}
hasSlash = false;
}
result.Add(current.ToString());
return result.ToArray();
}
public static List<string> ParseRowFaster(ReadOnlySpan<char> source, char quoteChar, char delimiter, int columnCount = 10)
{
var result = new List<string>(columnCount);
var index = -1;
var hasQuote = false;
var hasSlash = false;
var start = 0;
var end = 0;
var len = source.Length - 1;
while (index < len)
{
++index;
var currChar = source[index];
if (!hasSlash)
{
if (currChar is '\\')
{
hasSlash = true;
++end;
continue;
}
if (currChar == quoteChar)
{
hasQuote = !hasQuote;
++end;
continue;
}
}
if (!hasQuote && currChar == delimiter)
{
result.Add(source[start..(end)].ToString()); // 超大型字符串会在LOH中分配内存没救
start = end + 1;
++end;
}
else
{
++end;
}
hasSlash = false;
}
result.Add(source[start..end].ToString());
return result;
}
public virtual void Dispose()
{
if (Reader.IsValueCreated)
{
Reader.Value.Dispose();
_stream?.Dispose();
}
}
}