160 lines
4.5 KiB
C#
160 lines
4.5 KiB
C#
using System.Text;
|
||
using MesETL.App.HostedServices.Abstractions;
|
||
using Microsoft.Extensions.Logging;
|
||
|
||
namespace MesETL.App.Services.ETL;
|
||
|
||
/// <summary>
|
||
/// CSV文件读取
|
||
/// </summary>
|
||
public class CsvReader : IDataReader
|
||
{
|
||
protected readonly string? FilePath;
|
||
protected readonly Lazy<StreamReader> Reader;
|
||
private Stream? _stream;
|
||
protected readonly ILogger? Logger;
|
||
protected readonly string TableName;
|
||
|
||
public DataRecord Current { get; protected set; } = default!;
|
||
public string[] Headers { get; }
|
||
public string Delimiter { get; }
|
||
public char QuoteChar { get; }
|
||
|
||
public CsvReader(Stream stream, string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
|
||
: this(tableName, headers, delimiter, quoteChar, logger)
|
||
{
|
||
Reader = new Lazy<StreamReader>(() => new StreamReader(stream),false);
|
||
}
|
||
|
||
public CsvReader(string filePath, string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
|
||
: this(tableName, headers, delimiter, quoteChar, logger)
|
||
{
|
||
FilePath = filePath;
|
||
Reader = new Lazy<StreamReader>(() =>
|
||
{
|
||
_stream = File.OpenRead(filePath);
|
||
return new StreamReader(_stream);
|
||
});
|
||
}
|
||
|
||
private CsvReader(string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
|
||
{
|
||
TableName = tableName;
|
||
Headers = headers;
|
||
Logger = logger;
|
||
Delimiter = delimiter;
|
||
QuoteChar = quoteChar;
|
||
Reader = null!;
|
||
}
|
||
|
||
public virtual async ValueTask<bool> ReadAsync()
|
||
{
|
||
var str = await Reader.Value.ReadLineAsync();
|
||
if (string.IsNullOrWhiteSpace(str))
|
||
return false;
|
||
|
||
var fields = ParseRowFaster(str, QuoteChar, Delimiter[0]);
|
||
Current = new DataRecord(fields, TableName, Headers);
|
||
return true;
|
||
}
|
||
|
||
public static string[] ParseRow(ReadOnlySpan<char> source, char quoteChar, char delimiter)
|
||
{
|
||
var result = new List<string>();
|
||
var index = -1;
|
||
var current = new StringBuilder(source.Length);
|
||
var hasQuote = false;
|
||
var hasSlash = false;
|
||
while (index < source.Length - 1)
|
||
{
|
||
index++;
|
||
var currChar = source[index];
|
||
if (hasSlash == false && currChar == '\\')
|
||
{
|
||
hasSlash = true;
|
||
current.Append('\\');
|
||
continue;
|
||
}
|
||
|
||
if (hasSlash == false && currChar == quoteChar)
|
||
{
|
||
hasQuote = !hasQuote;
|
||
current.Append(currChar);
|
||
continue;
|
||
}
|
||
|
||
if (hasQuote == false && currChar == delimiter)
|
||
{
|
||
result.Add(current.ToString());
|
||
current.Clear();
|
||
}
|
||
else
|
||
{
|
||
current.Append(currChar);
|
||
}
|
||
|
||
hasSlash = false;
|
||
}
|
||
|
||
result.Add(current.ToString());
|
||
return result.ToArray();
|
||
}
|
||
|
||
public static List<string> ParseRowFaster(ReadOnlySpan<char> source, char quoteChar, char delimiter, int columnCount = 10)
|
||
{
|
||
var result = new List<string>(columnCount);
|
||
var index = -1;
|
||
var hasQuote = false;
|
||
var hasSlash = false;
|
||
var start = 0;
|
||
var end = 0;
|
||
var len = source.Length - 1;
|
||
while (index < len)
|
||
{
|
||
++index;
|
||
var currChar = source[index];
|
||
|
||
if (!hasSlash)
|
||
{
|
||
if (currChar is '\\')
|
||
{
|
||
hasSlash = true;
|
||
++end;
|
||
continue;
|
||
}
|
||
|
||
if (currChar == quoteChar)
|
||
{
|
||
hasQuote = !hasQuote;
|
||
++end;
|
||
continue;
|
||
}
|
||
}
|
||
|
||
if (!hasQuote && currChar == delimiter)
|
||
{
|
||
result.Add(source[start..(end)].ToString()); // 超大型字符串会在LOH中分配内存,没救
|
||
start = end + 1;
|
||
++end;
|
||
}
|
||
else
|
||
{
|
||
++end;
|
||
}
|
||
|
||
hasSlash = false;
|
||
}
|
||
|
||
result.Add(source[start..end].ToString());
|
||
return result;
|
||
}
|
||
|
||
public virtual void Dispose()
|
||
{
|
||
if (Reader.IsValueCreated)
|
||
{
|
||
Reader.Value.Dispose();
|
||
_stream?.Dispose();
|
||
}
|
||
}
|
||
} |