152 lines
4.3 KiB
C#
152 lines
4.3 KiB
C#
using System.Text;
|
|
using MesETL.App.HostedServices.Abstractions;
|
|
using Microsoft.Extensions.Logging;
|
|
|
|
namespace MesETL.App.Services.ETL;
|
|
|
|
/// <summary>
|
|
/// CSV文件读取
|
|
/// </summary>
|
|
public class CsvReader : IDataReader
|
|
{
|
|
protected readonly string? FilePath;
|
|
protected readonly Lazy<StreamReader> Reader;
|
|
protected readonly ILogger? Logger;
|
|
protected readonly string TableName;
|
|
|
|
public DataRecord Current { get; protected set; } = null!;
|
|
public string[] Headers { get; }
|
|
public string Delimiter { get; }
|
|
public char QuoteChar { get; }
|
|
|
|
public CsvReader(Stream stream, string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
|
|
: this(tableName, headers, delimiter, quoteChar, logger)
|
|
{
|
|
Reader = new Lazy<StreamReader>(() => new StreamReader(stream));
|
|
}
|
|
|
|
public CsvReader(string filePath, string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
|
|
: this(tableName, headers, delimiter, quoteChar, logger)
|
|
{
|
|
var fs = File.OpenRead(filePath);
|
|
FilePath = filePath;
|
|
Reader = new Lazy<StreamReader>(() => new StreamReader(fs));
|
|
}
|
|
|
|
private CsvReader(string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
|
|
{
|
|
TableName = tableName;
|
|
Headers = headers;
|
|
Logger = logger;
|
|
Delimiter = delimiter;
|
|
QuoteChar = quoteChar;
|
|
Reader = null!;
|
|
}
|
|
|
|
public virtual async ValueTask<bool> ReadAsync()
|
|
{
|
|
var str = await Reader.Value.ReadLineAsync();
|
|
if (string.IsNullOrWhiteSpace(str))
|
|
return false;
|
|
|
|
var fields = ParseRow(str, QuoteChar, Delimiter[0]);
|
|
Current = new DataRecord(fields, TableName, Headers);
|
|
return true;
|
|
}
|
|
|
|
public static string[] ParseRow(ReadOnlySpan<char> source, char quoteChar, char delimiter)
|
|
{
|
|
var result = new List<string>();
|
|
var index = -1;
|
|
var current = new StringBuilder(source.Length);
|
|
var hasQuote = false;
|
|
var hasSlash = false;
|
|
while (index < source.Length - 1)
|
|
{
|
|
index++;
|
|
var currChar = source[index];
|
|
if (hasSlash == false && currChar == '\\')
|
|
{
|
|
hasSlash = true;
|
|
current.Append('\\');
|
|
continue;
|
|
}
|
|
|
|
if (hasSlash == false && currChar == quoteChar)
|
|
{
|
|
hasQuote = !hasQuote;
|
|
current.Append(currChar);
|
|
continue;
|
|
}
|
|
|
|
if (hasQuote == false && currChar == delimiter)
|
|
{
|
|
result.Add(current.ToString());
|
|
current.Clear();
|
|
}
|
|
else
|
|
{
|
|
current.Append(currChar);
|
|
}
|
|
|
|
hasSlash = false;
|
|
}
|
|
|
|
result.Add(current.ToString());
|
|
return result.ToArray();
|
|
}
|
|
|
|
public static List<string> ParseRowFaster(ReadOnlySpan<char> source, char quoteChar, char delimiter, int columnCount = 10)
|
|
{
|
|
var result = new List<string>(columnCount);
|
|
var index = -1;
|
|
var hasQuote = false;
|
|
var hasSlash = false;
|
|
var start = 0;
|
|
var end = 0;
|
|
var len = source.Length - 1;
|
|
while (index < len)
|
|
{
|
|
++index;
|
|
var currChar = source[index];
|
|
|
|
if (!hasSlash)
|
|
{
|
|
if (currChar is '\\')
|
|
{
|
|
hasSlash = true;
|
|
++end;
|
|
continue;
|
|
}
|
|
|
|
if (currChar == quoteChar)
|
|
{
|
|
hasQuote = !hasQuote;
|
|
++end;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (!hasQuote && currChar == delimiter)
|
|
{
|
|
result.Add(source[start..(end + 1)].ToString());
|
|
start = end + 2;
|
|
}
|
|
else
|
|
{
|
|
++end;
|
|
}
|
|
|
|
hasSlash = false;
|
|
}
|
|
|
|
result.Add(source[start..(end + 1)].ToString());
|
|
return result;
|
|
}
|
|
|
|
public virtual void Dispose()
|
|
{
|
|
if(Reader.IsValueCreated)
|
|
Reader.Value.Dispose();
|
|
}
|
|
} |