MES-ETL/MesETL.App/Services/ETL/CsvReader.cs

106 lines
3.1 KiB
C#

using System.Text;
using MesETL.App.HostedServices.Abstractions;
using Microsoft.Extensions.Logging;
namespace MesETL.App.Services.ETL;
/// <summary>
/// CSV文件读取
/// </summary>
public class CsvReader : IDataReader
{
protected readonly string? FilePath;
protected readonly Lazy<StreamReader> Reader;
protected readonly ILogger? Logger;
protected readonly string TableName;
public DataRecord Current { get; protected set; } = null!;
public string[] Headers { get; }
public string? CurrentRaw { get; protected set; }
public string Delimiter { get; }
public char QuoteChar { get; }
public CsvReader(Stream stream, string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
: this(tableName, headers, delimiter, quoteChar, logger)
{
Reader = new Lazy<StreamReader>(() => new StreamReader(stream));
}
public CsvReader(string filePath, string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
: this(tableName, headers, delimiter, quoteChar, logger)
{
var fs = File.OpenRead(filePath);
FilePath = filePath;
Reader = new Lazy<StreamReader>(() => new StreamReader(fs));
}
private CsvReader(string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
{
TableName = tableName;
Headers = headers;
Logger = logger;
Delimiter = delimiter;
QuoteChar = quoteChar;
Reader = null!;
}
public virtual async ValueTask<bool> ReadAsync()
{
var str = await Reader.Value.ReadLineAsync();
if (string.IsNullOrWhiteSpace(str))
return false;
CurrentRaw = str;
var fields = ParseRow(str, QuoteChar, Delimiter);
Current = new DataRecord(fields, TableName, Headers){RawField = str};
return true;
}
public string[] ParseRow(ReadOnlySpan<char> source, char quoteChar, string delimiter)
{
var result = new List<string>();
var index = -1;
var current = new StringBuilder();
var hasQuote = false;
var hasSlash = false;
while (index < source.Length - 1)
{
index++;
if (hasSlash == false && source[index] == '\\')
{
hasSlash = true;
current.Append('\\');
continue;
}
if (hasSlash == false && source[index] == quoteChar)
{
hasQuote = !hasQuote;
current.Append(source[index]);
continue;
}
if (hasQuote == false && source[index] == delimiter[0])
{
result.Add(current.ToString());
current.Clear();
}
else
{
current.Append(source[index]);
}
hasSlash = false;
}
result.Add(current.ToString());
return result.ToArray();
}
public virtual void Dispose()
{
if(Reader.IsValueCreated)
Reader.Value.Dispose();
}
}