This commit is contained in:
陈梓阳 2024-02-09 19:08:57 +08:00
parent 913c725fe1
commit 73895fbce4
17 changed files with 216 additions and 56 deletions

View File

@ -0,0 +1,51 @@
using System.Collections.Concurrent;
namespace MesETL.App.Cache;
public class MemoryCache : ICacher
{
private readonly ConcurrentDictionary<string, string> _stringCache = new();
private readonly ConcurrentDictionary<string, Dictionary<string, string>> _hashCache = new();
public static MemoryCache Instance { get; private set; }
public MemoryCache()
{
Instance = this;
}
public Task<string?> GetStringAsync(string key)
{
return _stringCache.TryGetValue(key, out var value) ? Task.FromResult<string?>(value) : Task.FromResult((string?)null);
}
public Task SetStringAsync(string key, string value)
{
_stringCache[key] = value;
return Task.CompletedTask;
}
public Task<bool> ExistsAsync(string key)
{
return Task.FromResult(_stringCache.ContainsKey(key));
}
public Task SetHashAsync(string key, IReadOnlyDictionary<string, string> hash)
{
_hashCache[key] = hash.ToDictionary(x => x.Key, x => x.Value);
return Task.CompletedTask;
}
public Task<Dictionary<string, string>> GetHashAsync(string key)
{
return Task.FromResult(_hashCache[key]);
}
public void Delete(Func<string,bool> keySelector)
{
foreach (var k in _stringCache.Keys.Where(keySelector))
{
_stringCache.TryRemove(k, out _);
}
}
}

View File

@ -57,6 +57,7 @@ public static class RedisCacheExtensions
{
var conn = ConnectionMultiplexer.Connect(options.Configuration
?? throw new ApplicationException("未配置Redis连接字符串"));
services.AddSingleton(conn);
services.AddSingleton<ICacher>(new RedisCache(conn, options.Database, options.InstanceName));
return services;
}

View File

@ -44,6 +44,7 @@ public class DataRecord : ICloneable
public IList<string> Headers { get; }
public string TableName { get; }
public string? Database { get; set; }
public long FieldCharCount { get; }
public DataRecord(IEnumerable<string> fields, string tableName, IEnumerable<string> headers, string? database = null)
@ -57,6 +58,8 @@ public class DataRecord : ICloneable
throw new ArgumentException(
$"The number of fields does not match the number of headers. Expected: {Headers.Count} Got: {Fields.Count} Fields: {string.Join(',', Fields)}",
nameof(fields));
FieldCharCount = Fields.Sum(x => (long)x.Length);
}
public string this[int index]

View File

@ -73,11 +73,12 @@ public class FileInputService : IInputService
while (await source.ReadAsync())
{
var record = source.Current;
_producerQueue.Enqueue(record);
await _producerQueue.EnqueueAsync(record);
_context.AddInput();
}
_logger.LogInformation("Input of table: '{TableName}' finished", info.TableName);
_dataInputOptions.Value.OnTableInputCompleted?.Invoke(info.TableName);
}
_context.CompleteInput();
@ -91,6 +92,7 @@ public class FileInputService : IInputService
private IEnumerable<FileInputInfo> GetFilesInOrder(FileInputInfo[] inputFiles)
{
var tableOrder = _dataInputOptions.Value.TableOrder;
var ignoreTable = _dataInputOptions.Value.TableIgnoreList;
if (tableOrder is null or { Length: 0 })
return inputFiles;
@ -102,7 +104,7 @@ public class FileInputService : IInputService
{
var target = inputFiles.FirstOrDefault(f =>
f.TableName.Equals(tableName, StringComparison.OrdinalIgnoreCase));
if (target is not null)
if (target is not null && !ignoreTable.Contains(target.TableName))
yield return target;
}
}

View File

@ -1,4 +1,5 @@
using System.Diagnostics;
using System.Text;
using MesETL.App.Services;
using MesETL.App.Services.Loggers;
using Microsoft.Extensions.DependencyInjection;
@ -14,6 +15,8 @@ public class TaskMonitorService
private readonly ProcessContext _context;
private readonly DataRecordQueue _producerQueue;
private readonly RecordQueuePool _queuePool;
private string _outputPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Log/progress.txt");
public TaskMonitorService(ProcessContext context,
[FromKeyedServices(Const.ConstVar.Producer)]
@ -79,15 +82,24 @@ public class TaskMonitorService
{
logger.LogStatus("Monitor: Progress status", new Dictionary<string, string>
{
{"Input",_context.IsInputCompleted ? "completed" : $"running {inputSpeed:F2} records/s" },
{"Transform", _context.IsTransformCompleted ? "completed" : $"running {transformSpeed:F2} records/s" },
{"Output", _context.IsOutputCompleted ? "completed" : $"running {outputSpeed:F2} records/s" }
{"Input",_context.IsInputCompleted ? "OK" : $"{inputSpeed:F2}/s" },
{"Transform", _context.IsTransformCompleted ? "OK" : $"{transformSpeed:F2}/s" },
{"Output", _context.IsOutputCompleted ? "OK" : $"{outputSpeed:F2}/s" },
{"| Input Queue", _producerQueue.Count.ToString() },
{"Output Queue", _queuePool.Queues.Values.Sum(queue => queue.Count).ToString()},
});
logger.LogStatus("Monitor: Table output progress",
_context.TableProgress
.ToDictionary(kv => kv.Key, kv => kv.Value.ToString()),
ITaskMonitorLogger.LogLevel.Progress);
var dict = _context.TableProgress
.ToDictionary(kv => kv.Key, kv => kv.Value.ToString());
logger.LogStatus("Monitor: Table output progress", dict, ITaskMonitorLogger.LogLevel.Progress);
var sb = new StringBuilder("Table Progress: \n");
foreach (var kv in _context.TableProgress)
{
sb.AppendLine($"{kv.Key}: {kv.Value}");
}
await File.WriteAllTextAsync(_outputPath, sb.ToString(), CancellationToken.None);
logger.LogStatus("Monitor: Process count", new Dictionary<string, string>
{
@ -95,14 +107,8 @@ public class TaskMonitorService
{"Transform", transformCount.ToString()},
{"Output", outputCount.ToString()}
}, ITaskMonitorLogger.LogLevel.Progress);
logger.LogStatus("Monitor: Queue", new Dictionary<string, string>
{
{"Producer queue records", _producerQueue.Count.ToString() },
{"Output queues", _queuePool.Queues.Count.ToString() },
{"Output queue records", _queuePool.Queues.Values.Sum(queue => queue.Count).ToString()},
});
}
await Task.Delay(5000, stoppingToken);

View File

@ -46,9 +46,26 @@ public class TransformService : ITransformService
{
_logger.LogInformation("***** Data transform service started, thread id: {ThreadId} *****", Environment.CurrentManagedThreadId);
// var tasks = new List<Task>();
// for (int i = 0; i < 4; i++)
// {
// tasks.Add(Task.Run(TransformWorker, cancellationToken));
// }
//
// await Task.WhenAll(tasks);
await TransformWorker();
_logger.LogInformation("***** Data transformation service finished *****");
}
public async Task TransformWorker()
{
while (!_context.IsInputCompleted || _producerQueue.Count > 0)
{
if (!_producerQueue.TryDequeue(out var record)) continue;
if (!_producerQueue.TryDequeue(out var record))
{
continue;
}
try
{
@ -80,7 +97,7 @@ public class TransformService : ITransformService
?? throw new ApplicationException("未配置数据库过滤器");
record.Database = dbFilter(record);
_queuePool[record.Database].Enqueue(record);
await _queuePool[record.Database].EnqueueAsync(record);
_context.AddTransform();
if (_options.Value.EnableReBuilder)
@ -93,7 +110,7 @@ public class TransformService : ITransformService
{
if(dbFilter is not null)
rc.Database =dbFilter.Invoke(record);
_queuePool[record.Database].Enqueue(rc);
await _queuePool[record.Database].EnqueueAsync(rc);
_context.AddTransform();
}
}
@ -110,7 +127,5 @@ public class TransformService : ITransformService
}
}
_context.CompleteTransform();
_logger.LogInformation("***** Data transformation service finished *****");
}
}

View File

@ -37,11 +37,15 @@ namespace MesETL.App.Options
#region ManualSet
public string[]? TableOrder { get; set; }
public string[] TableIgnoreList { get; set; } = [];
/// <summary>
/// 配置如何从文件名转换为表名和表头
/// </summary>
public Func<string, FileInputInfo?>? FileInputMetaBuilder { get; set; } //TODO: 抽离
public Action<string>? OnTableInputCompleted { get; set; }
#endregion
}

View File

@ -79,6 +79,7 @@ async Task RunProgram()
options.UseMock = inputOptions.UseMock;
options.TableMockConfig = inputOptions.TableMockConfig;
options.MockCountMultiplier = inputOptions.MockCountMultiplier;
options.TableIgnoreList = inputOptions.TableIgnoreList;
// 配置文件输入方法
options.FileInputMetaBuilder = fileName =>
@ -112,6 +113,23 @@ async Task RunProgram()
}
return null;
};
// 配置表输入完成事件,字典清理
options.OnTableInputCompleted = table =>
{
switch (table)
{
case TableNames.OrderBlockPlan:
MemoryCache.Instance.Delete(s => s.StartsWith(TableNames.Order));
break;
case TableNames.OrderItem:
MemoryCache.Instance.Delete(s => s.StartsWith(TableNames.OrderBlockPlan));
break;
case TableNames.OrderProcessSchedule:
MemoryCache.Instance.Delete(s => s.StartsWith(TableNames.OrderProcess));
break;
}
};
options.TableOrder =
[
@ -131,9 +149,6 @@ async Task RunProgram()
TableNames.OrderModuleExtra,
TableNames.OrderModuleItem,
TableNames.OrderPackage,
#if USE_TEST_DB
TableNames.OrderPatchDetail,
#endif
TableNames.OrderProcess,
TableNames.OrderProcessStep,
@ -597,17 +612,19 @@ async Task RunProgram()
host.Services.AddSingleton<ProcessContext>();
var prodLen = host.Configuration.GetRequiredSection("RecordQueue").GetValue<int>("ProducerQueueLength");
var consLen = host.Configuration.GetRequiredSection("RecordQueue").GetValue<int>("ConsumerQueueLength");
host.Services.AddKeyedSingleton<DataRecordQueue>(ConstVar.Producer, new DataRecordQueue(prodLen));
host.Services.AddRecordQueuePool(tenantDbOptions.DbGroup.Keys.Select(key => (key:key, queue:new DataRecordQueue(consLen))).ToArray());
host.Services.AddSingleton<ITaskMonitorLogger, CacheTaskMonitorLogger>();
var maxCharCount = host.Configuration.GetRequiredSection("RecordQueue").GetValue<long>("MaxByteCount") / 2;
host.Services.AddKeyedSingleton<DataRecordQueue>(ConstVar.Producer, new DataRecordQueue(prodLen, maxCharCount));
host.Services.AddRecordQueuePool(tenantDbOptions.DbGroup.Keys.Select(key => (key:key, queue:new DataRecordQueue(consLen, maxCharCount))).ToArray());
// host.Services.AddSingleton<ITaskMonitorLogger, CacheTaskMonitorLogger>();
host.Services.AddSingleton<ITaskMonitorLogger, LoggerTaskMonitorLogger>();
host.Services.AddHostedService<MainHostedService>();
host.Services.AddSingleton<IInputService, FileInputService>();
host.Services.AddSingleton<ITransformService, TransformService>();
host.Services.AddSingleton<IOutputService, VoidOutputService>();
host.Services.AddSingleton<IOutputService, OutputService>();
host.Services.AddSingleton<TaskMonitorService>();
host.Services.AddRedisCache(redisOptions);
// host.Services.AddRedisCache(redisOptions);
host.Services.AddSingleton<ICacher, MemoryCache>();
var app = host.Build();
await app.RunAsync();
}

View File

@ -1,5 +1,6 @@
using System.Collections.Concurrent;
using System.Diagnostics.CodeAnalysis;
using TaskExtensions = MesETL.App.Helpers.TaskExtensions;
namespace MesETL.App.Services;
@ -10,6 +11,9 @@ public class DataRecordQueue : IDisposable
{
private readonly BlockingCollection<DataRecord> _queue;
private long _currentCharCount;
private readonly long _maxCharCount = 2_147_483_648; // 4GiB
public int Count => _queue.Count;
public bool IsCompleted => _queue.IsCompleted;
public bool IsAddingCompleted => _queue.IsAddingCompleted;
@ -17,19 +21,23 @@ public class DataRecordQueue : IDisposable
public event Action? OnRecordWrite;
public event Action? OnRecordRead;
public DataRecordQueue() : this(500_000) // 默认容量最大500K
public DataRecordQueue() : this(500_000, 2_147_483_648) // 默认容量最大500K
{
}
public DataRecordQueue(int boundedCapacity)
public DataRecordQueue(int boundedCapacity, long maxCharCount)
{
_queue = new BlockingCollection<DataRecord>(boundedCapacity);
_maxCharCount = maxCharCount;
}
public void CompleteAdding() => _queue.CompleteAdding();
public bool TryDequeue([MaybeNullWhen(false)] out DataRecord record)
{
if (_queue.TryTake(out record))
{
Interlocked.Add(ref _currentCharCount, -record.FieldCharCount);
OnRecordRead?.Invoke();
return true;
}
@ -37,13 +45,14 @@ public class DataRecordQueue : IDisposable
return false;
}
public DataRecord Dequeue() => _queue.Take();
public void CompleteAdding() => _queue.CompleteAdding();
public void Enqueue(DataRecord record)
public async Task EnqueueAsync(DataRecord record)
{
var charCount = record.FieldCharCount;
if(_currentCharCount + charCount > _maxCharCount)
await TaskExtensions.WaitUntil(() => _currentCharCount + charCount < _maxCharCount, 50);
_queue.Add(record);
Interlocked.Add(ref _currentCharCount, charCount);
OnRecordWrite?.Invoke();
}

View File

@ -11,10 +11,11 @@ public class CsvReader : IDataReader
{
protected readonly string? FilePath;
protected readonly Lazy<StreamReader> Reader;
private Stream? _stream;
protected readonly ILogger? Logger;
protected readonly string TableName;
public DataRecord Current { get; protected set; } = null!;
public DataRecord Current { get; protected set; } = default!;
public string[] Headers { get; }
public string Delimiter { get; }
public char QuoteChar { get; }
@ -22,15 +23,18 @@ public class CsvReader : IDataReader
public CsvReader(Stream stream, string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
: this(tableName, headers, delimiter, quoteChar, logger)
{
Reader = new Lazy<StreamReader>(() => new StreamReader(stream));
Reader = new Lazy<StreamReader>(() => new StreamReader(stream),false);
}
public CsvReader(string filePath, string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
: this(tableName, headers, delimiter, quoteChar, logger)
{
var fs = File.OpenRead(filePath);
FilePath = filePath;
Reader = new Lazy<StreamReader>(() => new StreamReader(fs));
Reader = new Lazy<StreamReader>(() =>
{
_stream = File.OpenRead(filePath);
return new StreamReader(_stream);
});
}
private CsvReader(string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
@ -129,8 +133,9 @@ public class CsvReader : IDataReader
if (!hasQuote && currChar == delimiter)
{
result.Add(source[start..(end + 1)].ToString());
start = end + 2;
result.Add(source[start..(end)].ToString());
start = end + 1;
++end;
}
else
{
@ -140,13 +145,16 @@ public class CsvReader : IDataReader
hasSlash = false;
}
result.Add(source[start..(end + 1)].ToString());
result.Add(source[start..end].ToString());
return result;
}
public virtual void Dispose()
{
if(Reader.IsValueCreated)
if (Reader.IsValueCreated)
{
Reader.Value.Dispose();
_stream?.Dispose();
}
}
}

View File

@ -61,7 +61,7 @@ public partial class MySqlDestination : IDisposable, IAsyncDisposable
return;
var cmd = _conn.CreateCommand();
cmd.CommandTimeout = 3 * 60;
cmd.CommandTimeout = 0;
try
{

View File

@ -9,20 +9,24 @@ namespace MesETL.App.Services.ETL;
public class ZstReader : CsvReader
{
protected new readonly Lazy<StreamReader> Reader;
private Stream? _stream;
public ZstReader(string filePath, string tableName, string[] headers, string delimiter = ",", char quoteChar = '\"', ILogger? logger = null)
: base(filePath, tableName, headers, delimiter, quoteChar, logger)
{
var ds = new DecompressionStream(File.OpenRead(filePath));
Reader = new Lazy<StreamReader>(() => new StreamReader(ds));
Reader = new Lazy<StreamReader>(() =>
{
_stream = new DecompressionStream(File.OpenRead(filePath));
return new StreamReader(_stream);
}, false);
}
public ZstReader(Stream stream, string tableName, string[] headers, string delimiter = ",", char quoteChar = '\"', ILogger? logger = null)
: base(stream, tableName, headers, delimiter, quoteChar, logger)
{
var ds = new DecompressionStream(stream);
Reader = new Lazy<StreamReader>(() => new StreamReader(ds));
Reader = new Lazy<StreamReader>(() => new StreamReader(ds), false);
}
public override async ValueTask<bool> ReadAsync()
@ -31,7 +35,7 @@ public class ZstReader : CsvReader
if (string.IsNullOrWhiteSpace(str))
return false;
var fields = ParseRow(str, QuoteChar, Delimiter[0]);
var fields = ParseRowFaster(str, QuoteChar, Delimiter[0]);
Current = new DataRecord(fields, TableName, Headers);
return true;
}
@ -39,8 +43,11 @@ public class ZstReader : CsvReader
public override void Dispose()
{
base.Dispose();
if(Reader.IsValueCreated)
if (Reader.IsValueCreated)
{
Reader.Value.Dispose();
_stream?.Dispose();
}
}
}

View File

@ -1,4 +1,5 @@
using MesETL.App.Cache;
using System.Text;
using MesETL.App.Cache;
namespace MesETL.App.Services.Loggers;

View File

@ -9,7 +9,8 @@ public class RecordQueuePool
public IReadOnlyDictionary<string, DataRecordQueue> Queues => _queues;
public void AddQueue(string key, int boundedCapacity = 200_0000) => AddQueue(key, new DataRecordQueue(boundedCapacity));
public void AddQueue(string key, int boundedCapacity = 200_0000, long maxCharCount = 2_147_483_648)
=> AddQueue(key, new DataRecordQueue(boundedCapacity, maxCharCount));
public void AddQueue(string key, DataRecordQueue queue)
{

View File

@ -7,13 +7,14 @@
"Input":{
"InputDir": "D:\\Dump\\NewMockData", // Csv
"UseMock": false, // 使
"MockCountMultiplier": 1 //
"MockCountMultiplier": 1, //
"TableIgnoreList": ["order_box_block"] //
},
"Transform":{
"StrictMode": false, // true
"EnableFilter": false, //
"EnableReplacer": false, //
"EnableReBuilder": false, //
"EnableFilter": true, //
"EnableReplacer": true, //
"EnableReBuilder": true, //
"CleanDate": "202301" //
},
"Output":{
@ -26,6 +27,7 @@
"RecordQueue":{
"ProducerQueueLength": 50000, //
"ConsumerQueueLength": 10000, //
"MaxByteCount": 3221225472 //
},
"RedisCache": {
"Configuration": "192.168.1.246:6380",

View File

@ -107,6 +107,24 @@ public class DatabaseToolBox
}).ToArray();
}
[Theory]
[InlineData(["mesdb_1"])]
[InlineData(["mesdb_2"])]
[InlineData(["mesdb_3"])]
[InlineData(["mesdb_4"])]
[InlineData(["mesdb_5"])]
public async Task ShowIndex(string database)
{
var indexes = await GetAllTableIndexes(database);
var sb = new StringBuilder();
foreach (var (tableName, indexName, isUnique, columnName, tableIndexType) in indexes!)
{
sb.AppendLine($"Drop {(isUnique ? "UNIQUE" : string.Empty)} INDEX `{indexName}` ON `{database}`.`{tableName}`;");
}
_output.WriteLine(sb.ToString());
}
[Theory]
[InlineData(["cferp_test_1", "D:/Indexes_cferp_test_1.json"])]
[InlineData(["cferp_test_2", "D:/Indexes_cferp_test_2.json"])]

View File

@ -1,3 +1,4 @@
using System.Collections.Concurrent;
using System.Diagnostics;
using MesETL.App.Services.ETL;
using Xunit.Abstractions;
@ -67,8 +68,22 @@ public class Test
[MemberData(nameof(ParseRowData))]
public void ParseRowFasterTest(string row)
{
var fields = CsvReader.ParseRow(row, '"', ',');
var fields = CsvReader.ParseRowFaster(row, '"', ',');
_output.WriteLine(string.Join(',', fields));
}
[Fact]
public void DictMemoryTest()
{
var dict = new ConcurrentDictionary<string, string>();
for (int i = 0; i < 3000000; i++)
{
dict.AddOrUpdate(Guid.NewGuid().ToString(), Random.Shared.NextInt64(1000000000L, 9999999999L).ToString(), (_, __) => Random.Shared.NextInt64(1000000000L, 9999999999L).ToString());
}
while (true)
{
}
}
}