MES-ETL/ConsoleApp2/Services/ZstSource.cs

127 lines
4.9 KiB
C#
Raw Normal View History

2024-01-12 16:50:37 +08:00
using ConsoleApp2.Helpers;
using Microsoft.Extensions.Logging;
using System.IO;
using System.Text.RegularExpressions;
using ZstdSharp;
namespace ConsoleApp2.Services
{
public class ZstSource : CsvSource
{
public ZstSource(string inputDir, string tableName, string delimiter = ",", char quoteChar = '"',
ILogger? logger = null) : base(inputDir, tableName, delimiter = ",", quoteChar = '"', logger = null)
{
//throw new Exception("aaa");
string pattern = $"^.*\\.{tableName}\\..*\\.sql.zst$";
_sqlFilePath = Directory.GetFiles(_inputDir).FirstOrDefault(s => Regex.Match(s, pattern).Success);
}
private async Task<string> DecompressFile(string filePath)
{
using (var input = File.OpenRead(filePath))
{
using (var decopress = new DecompressionStream(input))
{
var ms = new MemoryStream();
decopress.CopyTo(ms);
ms.Seek(0, SeekOrigin.Begin);
StreamReader reader = new StreamReader(ms);
var text = await reader.ReadToEndAsync();
return text;
}
}
}
public override async Task<string[]> GetHeaders()
{
var text = await DecompressFile(_sqlFilePath);
return await DumpDataHelper.GetCsvHeadersFromSqlFileAsync(text);
}
public override async Task<string[]> GetCsvFiles()
{
var text = await DecompressFile(_sqlFilePath);
return await DumpDataHelper.GetCsvFileNamesFromSqlFileAsync(text, new Regex(@"'.+\.dat.zst'"));
}
public override async Task DoEnqueue(Action<DataRecord> action)
{
var sourceFiles = await GetCsvFiles();
var headers = await GetHeaders();
foreach (var file in sourceFiles)
{
var filePath = Path.Combine(_inputDir, file);
using (var input = File.OpenRead(filePath))
{
using (var decopress = new DecompressionStream(input))
{
var ms = new MemoryStream();
decopress.CopyTo(ms);
ms.Seek(0, SeekOrigin.Begin);
StreamReader reader = new StreamReader(ms);
while (!reader.EndOfStream)
{
var line = await reader.ReadLineAsync();
var fields = ParseRow2(line, QuoteChar, Delimiter);
var record = new DataRecord(fields, _tableName, headers);
action?.Invoke(record);
}
}
}
//var headers = await GetHeaders();
//using (StreamReader sr = new StreamReader(file))
//{
// while (!sr.EndOfStream)
// {
// var line = await sr.ReadLineAsync();
// var fields = ParseRow2(line, QuoteChar, Delimiter);
// var record = new DataRecord(fields, _tableName, headers);
// action?.Invoke(record);
// }
//}
}
}
public override async Task<DataRecord?> GetTestRecord()
{
var sourceFiles = await GetCsvFiles();
var file = sourceFiles.FirstOrDefault();
if (file != null)
{
var headers = await GetHeaders();
var filePath = Path.Combine(_inputDir, file);
using (var input = File.OpenRead(filePath))
{
using (var decopress = new DecompressionStream(input))
{
var ms = new MemoryStream();
decopress.CopyTo(ms);
ms.Seek(0, SeekOrigin.Begin);
StreamReader reader = new StreamReader(ms);
var line = await reader.ReadLineAsync();
var fields = ParseRow2(line, QuoteChar, Delimiter);
var record = new DataRecord(fields, _tableName, headers);
return record;
}
}
//using (var fs = File.OpenRead(filePath))
//{
// using (StreamReader sr = new StreamReader(fs))
// {
// var line = await sr.ReadLineAsync();
// var fields = ParseRow2(line, QuoteChar, Delimiter);
// var record = new DataRecord(fields, _tableName, headers);
// return record;
// }
//}
}
return null;
}
public void Dispose()
{
//_reader.Dispose();
}
}
}