MES-ETL/MesETL.App/Services/ETL/CsvReader.cs

160 lines
4.5 KiB
C#
Raw Normal View History

2024-01-29 09:29:16 +08:00
using System.Text;
using MesETL.App.HostedServices.Abstractions;
2024-01-29 09:29:16 +08:00
using Microsoft.Extensions.Logging;
namespace MesETL.App.Services.ETL;
2024-01-29 09:29:16 +08:00
/// <summary>
/// CSV文件读取
/// </summary>
public class CsvReader : IDataReader
{
protected readonly string? FilePath;
protected readonly Lazy<StreamReader> Reader;
2024-02-09 19:08:57 +08:00
private Stream? _stream;
2024-01-29 09:29:16 +08:00
protected readonly ILogger? Logger;
protected readonly string TableName;
2024-02-09 19:08:57 +08:00
public DataRecord Current { get; protected set; } = default!;
2024-01-29 09:29:16 +08:00
public string[] Headers { get; }
public string Delimiter { get; }
public char QuoteChar { get; }
public CsvReader(Stream stream, string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
: this(tableName, headers, delimiter, quoteChar, logger)
{
2024-02-09 19:08:57 +08:00
Reader = new Lazy<StreamReader>(() => new StreamReader(stream),false);
2024-01-29 09:29:16 +08:00
}
public CsvReader(string filePath, string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
: this(tableName, headers, delimiter, quoteChar, logger)
{
FilePath = filePath;
2024-02-09 19:08:57 +08:00
Reader = new Lazy<StreamReader>(() =>
{
_stream = File.OpenRead(filePath);
return new StreamReader(_stream);
});
2024-01-29 09:29:16 +08:00
}
private CsvReader(string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
{
TableName = tableName;
Headers = headers;
Logger = logger;
Delimiter = delimiter;
QuoteChar = quoteChar;
Reader = null!;
}
public virtual async ValueTask<bool> ReadAsync()
{
var str = await Reader.Value.ReadLineAsync();
if (string.IsNullOrWhiteSpace(str))
return false;
2024-02-09 13:41:40 +08:00
var fields = ParseRowFaster(str, QuoteChar, Delimiter[0]);
2024-02-08 17:38:23 +08:00
Current = new DataRecord(fields, TableName, Headers);
2024-01-29 09:29:16 +08:00
return true;
}
2024-02-08 22:19:59 +08:00
public static string[] ParseRow(ReadOnlySpan<char> source, char quoteChar, char delimiter)
2024-01-29 09:29:16 +08:00
{
var result = new List<string>();
var index = -1;
2024-02-08 22:19:59 +08:00
var current = new StringBuilder(source.Length);
2024-01-29 09:29:16 +08:00
var hasQuote = false;
var hasSlash = false;
while (index < source.Length - 1)
{
index++;
2024-02-08 22:19:59 +08:00
var currChar = source[index];
if (hasSlash == false && currChar == '\\')
2024-01-29 09:29:16 +08:00
{
hasSlash = true;
current.Append('\\');
continue;
}
2024-02-08 22:19:59 +08:00
if (hasSlash == false && currChar == quoteChar)
2024-01-29 09:29:16 +08:00
{
hasQuote = !hasQuote;
2024-02-08 22:19:59 +08:00
current.Append(currChar);
2024-01-29 09:29:16 +08:00
continue;
}
2024-02-08 22:19:59 +08:00
if (hasQuote == false && currChar == delimiter)
2024-01-29 09:29:16 +08:00
{
result.Add(current.ToString());
current.Clear();
}
else
{
2024-02-08 22:19:59 +08:00
current.Append(currChar);
2024-01-29 09:29:16 +08:00
}
hasSlash = false;
}
result.Add(current.ToString());
return result.ToArray();
}
2024-02-08 22:19:59 +08:00
public static List<string> ParseRowFaster(ReadOnlySpan<char> source, char quoteChar, char delimiter, int columnCount = 10)
{
var result = new List<string>(columnCount);
var index = -1;
var hasQuote = false;
var hasSlash = false;
var start = 0;
var end = 0;
var len = source.Length - 1;
while (index < len)
{
++index;
var currChar = source[index];
if (!hasSlash)
{
if (currChar is '\\')
{
hasSlash = true;
++end;
continue;
}
if (currChar == quoteChar)
{
hasQuote = !hasQuote;
++end;
continue;
}
}
if (!hasQuote && currChar == delimiter)
{
2024-02-09 19:08:57 +08:00
result.Add(source[start..(end)].ToString());
start = end + 1;
++end;
2024-02-08 22:19:59 +08:00
}
else
{
++end;
}
hasSlash = false;
}
2024-02-09 19:08:57 +08:00
result.Add(source[start..end].ToString());
2024-02-08 22:19:59 +08:00
return result;
}
2024-01-29 09:29:16 +08:00
public virtual void Dispose()
{
2024-02-09 19:08:57 +08:00
if (Reader.IsValueCreated)
{
2024-01-29 09:29:16 +08:00
Reader.Value.Dispose();
2024-02-09 19:08:57 +08:00
_stream?.Dispose();
}
2024-01-29 09:29:16 +08:00
}
}