This commit is contained in:
2024-02-09 19:08:57 +08:00
parent 913c725fe1
commit 73895fbce4
17 changed files with 216 additions and 56 deletions

View File

@@ -1,5 +1,6 @@
using System.Collections.Concurrent;
using System.Diagnostics.CodeAnalysis;
using TaskExtensions = MesETL.App.Helpers.TaskExtensions;
namespace MesETL.App.Services;
@@ -10,6 +11,9 @@ public class DataRecordQueue : IDisposable
{
private readonly BlockingCollection<DataRecord> _queue;
private long _currentCharCount;
private readonly long _maxCharCount = 2_147_483_648; // 4GiB
public int Count => _queue.Count;
public bool IsCompleted => _queue.IsCompleted;
public bool IsAddingCompleted => _queue.IsAddingCompleted;
@@ -17,19 +21,23 @@ public class DataRecordQueue : IDisposable
public event Action? OnRecordWrite;
public event Action? OnRecordRead;
public DataRecordQueue() : this(500_000) // 默认容量最大500K
public DataRecordQueue() : this(500_000, 2_147_483_648) // 默认容量最大500K
{
}
public DataRecordQueue(int boundedCapacity)
public DataRecordQueue(int boundedCapacity, long maxCharCount)
{
_queue = new BlockingCollection<DataRecord>(boundedCapacity);
_maxCharCount = maxCharCount;
}
public void CompleteAdding() => _queue.CompleteAdding();
public bool TryDequeue([MaybeNullWhen(false)] out DataRecord record)
{
if (_queue.TryTake(out record))
{
Interlocked.Add(ref _currentCharCount, -record.FieldCharCount);
OnRecordRead?.Invoke();
return true;
}
@@ -37,13 +45,14 @@ public class DataRecordQueue : IDisposable
return false;
}
public DataRecord Dequeue() => _queue.Take();
public void CompleteAdding() => _queue.CompleteAdding();
public void Enqueue(DataRecord record)
public async Task EnqueueAsync(DataRecord record)
{
var charCount = record.FieldCharCount;
if(_currentCharCount + charCount > _maxCharCount)
await TaskExtensions.WaitUntil(() => _currentCharCount + charCount < _maxCharCount, 50);
_queue.Add(record);
Interlocked.Add(ref _currentCharCount, charCount);
OnRecordWrite?.Invoke();
}

View File

@@ -11,10 +11,11 @@ public class CsvReader : IDataReader
{
protected readonly string? FilePath;
protected readonly Lazy<StreamReader> Reader;
private Stream? _stream;
protected readonly ILogger? Logger;
protected readonly string TableName;
public DataRecord Current { get; protected set; } = null!;
public DataRecord Current { get; protected set; } = default!;
public string[] Headers { get; }
public string Delimiter { get; }
public char QuoteChar { get; }
@@ -22,15 +23,18 @@ public class CsvReader : IDataReader
public CsvReader(Stream stream, string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
: this(tableName, headers, delimiter, quoteChar, logger)
{
Reader = new Lazy<StreamReader>(() => new StreamReader(stream));
Reader = new Lazy<StreamReader>(() => new StreamReader(stream),false);
}
public CsvReader(string filePath, string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
: this(tableName, headers, delimiter, quoteChar, logger)
{
var fs = File.OpenRead(filePath);
FilePath = filePath;
Reader = new Lazy<StreamReader>(() => new StreamReader(fs));
Reader = new Lazy<StreamReader>(() =>
{
_stream = File.OpenRead(filePath);
return new StreamReader(_stream);
});
}
private CsvReader(string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
@@ -129,8 +133,9 @@ public class CsvReader : IDataReader
if (!hasQuote && currChar == delimiter)
{
result.Add(source[start..(end + 1)].ToString());
start = end + 2;
result.Add(source[start..(end)].ToString());
start = end + 1;
++end;
}
else
{
@@ -140,13 +145,16 @@ public class CsvReader : IDataReader
hasSlash = false;
}
result.Add(source[start..(end + 1)].ToString());
result.Add(source[start..end].ToString());
return result;
}
public virtual void Dispose()
{
if(Reader.IsValueCreated)
if (Reader.IsValueCreated)
{
Reader.Value.Dispose();
_stream?.Dispose();
}
}
}

View File

@@ -61,7 +61,7 @@ public partial class MySqlDestination : IDisposable, IAsyncDisposable
return;
var cmd = _conn.CreateCommand();
cmd.CommandTimeout = 3 * 60;
cmd.CommandTimeout = 0;
try
{

View File

@@ -9,20 +9,24 @@ namespace MesETL.App.Services.ETL;
public class ZstReader : CsvReader
{
protected new readonly Lazy<StreamReader> Reader;
private Stream? _stream;
public ZstReader(string filePath, string tableName, string[] headers, string delimiter = ",", char quoteChar = '\"', ILogger? logger = null)
: base(filePath, tableName, headers, delimiter, quoteChar, logger)
{
var ds = new DecompressionStream(File.OpenRead(filePath));
Reader = new Lazy<StreamReader>(() => new StreamReader(ds));
Reader = new Lazy<StreamReader>(() =>
{
_stream = new DecompressionStream(File.OpenRead(filePath));
return new StreamReader(_stream);
}, false);
}
public ZstReader(Stream stream, string tableName, string[] headers, string delimiter = ",", char quoteChar = '\"', ILogger? logger = null)
: base(stream, tableName, headers, delimiter, quoteChar, logger)
{
var ds = new DecompressionStream(stream);
Reader = new Lazy<StreamReader>(() => new StreamReader(ds));
Reader = new Lazy<StreamReader>(() => new StreamReader(ds), false);
}
public override async ValueTask<bool> ReadAsync()
@@ -31,7 +35,7 @@ public class ZstReader : CsvReader
if (string.IsNullOrWhiteSpace(str))
return false;
var fields = ParseRow(str, QuoteChar, Delimiter[0]);
var fields = ParseRowFaster(str, QuoteChar, Delimiter[0]);
Current = new DataRecord(fields, TableName, Headers);
return true;
}
@@ -39,8 +43,11 @@ public class ZstReader : CsvReader
public override void Dispose()
{
base.Dispose();
if(Reader.IsValueCreated)
if (Reader.IsValueCreated)
{
Reader.Value.Dispose();
_stream?.Dispose();
}
}
}

View File

@@ -1,4 +1,5 @@
using MesETL.App.Cache;
using System.Text;
using MesETL.App.Cache;
namespace MesETL.App.Services.Loggers;

View File

@@ -9,7 +9,8 @@ public class RecordQueuePool
public IReadOnlyDictionary<string, DataRecordQueue> Queues => _queues;
public void AddQueue(string key, int boundedCapacity = 200_0000) => AddQueue(key, new DataRecordQueue(boundedCapacity));
public void AddQueue(string key, int boundedCapacity = 200_0000, long maxCharCount = 2_147_483_648)
=> AddQueue(key, new DataRecordQueue(boundedCapacity, maxCharCount));
public void AddQueue(string key, DataRecordQueue queue)
{