2024-12-11 18:08:16 +08:00
|
|
|
|
using System.Runtime.InteropServices;
|
|
|
|
|
using System.Text;
|
|
|
|
|
using Microsoft.Extensions.Logging;
|
2024-01-29 09:29:16 +08:00
|
|
|
|
using ZstdSharp;
|
|
|
|
|
|
2024-02-02 17:14:41 +08:00
|
|
|
|
namespace MesETL.App.Services.ETL;
|
2024-01-29 09:29:16 +08:00
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// 解压ZST文件,从中读取CSV数据
|
|
|
|
|
/// </summary>
|
|
|
|
|
public class ZstReader : CsvReader
|
|
|
|
|
{
|
|
|
|
|
protected new readonly Lazy<StreamReader> Reader;
|
2024-02-09 19:08:57 +08:00
|
|
|
|
private Stream? _stream;
|
2024-01-29 09:29:16 +08:00
|
|
|
|
|
2024-12-11 18:08:16 +08:00
|
|
|
|
private readonly List<char> _str = new(1024);
|
|
|
|
|
private readonly char[] _charBuffer = new char[1024];
|
|
|
|
|
private int _charLen = 0;
|
|
|
|
|
private int _charPos = 0;
|
|
|
|
|
|
|
|
|
|
|
2024-01-29 09:29:16 +08:00
|
|
|
|
public ZstReader(string filePath, string tableName, string[] headers, string delimiter = ",", char quoteChar = '\"', ILogger? logger = null)
|
|
|
|
|
: base(filePath, tableName, headers, delimiter, quoteChar, logger)
|
|
|
|
|
{
|
2024-02-09 19:08:57 +08:00
|
|
|
|
Reader = new Lazy<StreamReader>(() =>
|
|
|
|
|
{
|
|
|
|
|
_stream = new DecompressionStream(File.OpenRead(filePath));
|
|
|
|
|
return new StreamReader(_stream);
|
|
|
|
|
}, false);
|
2024-12-11 18:08:16 +08:00
|
|
|
|
ReadBuffer();
|
2024-01-29 09:29:16 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public ZstReader(Stream stream, string tableName, string[] headers, string delimiter = ",", char quoteChar = '\"', ILogger? logger = null)
|
|
|
|
|
: base(stream, tableName, headers, delimiter, quoteChar, logger)
|
|
|
|
|
{
|
|
|
|
|
var ds = new DecompressionStream(stream);
|
2024-02-09 19:08:57 +08:00
|
|
|
|
Reader = new Lazy<StreamReader>(() => new StreamReader(ds), false);
|
2024-12-11 18:08:16 +08:00
|
|
|
|
ReadBuffer();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private int ReadBuffer()
|
|
|
|
|
{
|
|
|
|
|
_charLen = _charPos = 0;
|
|
|
|
|
_charLen = Reader.Value.ReadBlock(_charBuffer);
|
|
|
|
|
return _charLen;
|
2024-01-29 09:29:16 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public override async ValueTask<bool> ReadAsync()
|
|
|
|
|
{
|
2024-12-11 18:08:16 +08:00
|
|
|
|
// 缓冲区已经读取完毕,并且流状态为EOF
|
|
|
|
|
if (_charPos == _charLen && ReadBuffer() == 0)
|
2024-01-29 09:29:16 +08:00
|
|
|
|
return false;
|
|
|
|
|
|
2024-12-11 18:08:16 +08:00
|
|
|
|
do
|
|
|
|
|
{
|
|
|
|
|
// 读取缓冲区
|
|
|
|
|
var span = _charBuffer.AsSpan(_charPos, _charLen - _charPos);
|
|
|
|
|
var newLineIdx = span.IndexOfAny('\r', '\n');
|
|
|
|
|
// 读取到行,结合当前构建字符串转换进行转换
|
|
|
|
|
if (newLineIdx >= 0)
|
|
|
|
|
{
|
|
|
|
|
if (_str.Count == 0) // => 可以提高一点性能...
|
|
|
|
|
{
|
|
|
|
|
var fields = ParseRowFaster(span[..newLineIdx], QuoteChar, Delimiter[0]);
|
|
|
|
|
Current = new DataRecord(fields, TableName, Headers);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
_str.AddRange(span[..newLineIdx]);
|
|
|
|
|
var fields = ParseRowFaster(CollectionsMarshal.AsSpan(_str), QuoteChar, Delimiter[0]);
|
|
|
|
|
Current = new DataRecord(fields, TableName, Headers);
|
|
|
|
|
}
|
|
|
|
|
_str.Clear();
|
|
|
|
|
|
|
|
|
|
var ch = span[newLineIdx];
|
|
|
|
|
_charPos += newLineIdx + 1;
|
|
|
|
|
if (ch == '\r' && (_charPos < _charLen || ReadBuffer() > 0) && _charBuffer[_charPos] == '\n') // 跳过CRLF
|
|
|
|
|
++_charPos;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
// 未读取到行,将缓冲区插入构建字符串
|
|
|
|
|
_str.AddRange(span);
|
|
|
|
|
} while (ReadBuffer() > 0);
|
|
|
|
|
|
|
|
|
|
var f = ParseRowFaster(CollectionsMarshal.AsSpan(_str), QuoteChar, Delimiter[0]);
|
|
|
|
|
Current = new DataRecord(f, TableName, Headers);
|
|
|
|
|
_str.Clear();
|
2024-01-29 09:29:16 +08:00
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public override void Dispose()
|
|
|
|
|
{
|
|
|
|
|
base.Dispose();
|
2024-02-09 19:08:57 +08:00
|
|
|
|
if (Reader.IsValueCreated)
|
|
|
|
|
{
|
2024-01-29 09:29:16 +08:00
|
|
|
|
Reader.Value.Dispose();
|
2024-02-09 19:08:57 +08:00
|
|
|
|
_stream?.Dispose();
|
|
|
|
|
}
|
2024-01-29 09:29:16 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|