Optimize structure

This commit is contained in:
陈梓阳 2024-02-10 17:12:26 +08:00
parent aa7041962a
commit 571805250b
26 changed files with 193 additions and 52 deletions

View File

@ -7,7 +7,7 @@ public class MemoryCache : ICacher
private readonly ConcurrentDictionary<string, string> _stringCache = new();
private readonly ConcurrentDictionary<string, Dictionary<string, string>> _hashCache = new();
public static MemoryCache Instance { get; private set; }
public static MemoryCache? Instance { get; private set; }
public MemoryCache()
{

View File

@ -69,14 +69,17 @@ public class FileInputService : IInputService
{
_logger.LogInformation("Reading file: {FileName}, table: {TableName}", info.FileName, info.TableName);
using var source = _dataReaderFactory.CreateReader(info.FileName,info.TableName,info.Headers);
var count = 0;
while (await source.ReadAsync())
{
var record = source.Current;
await _producerQueue.EnqueueAsync(record);
count++;
_context.AddInput();
}
_context.AddTableInput(info.TableName, count);
_logger.LogInformation("Input of table: '{TableName}' finished", info.TableName);
_dataInputOptions.Value.OnTableInputCompleted?.Invoke(info.TableName);
}

View File

@ -5,6 +5,7 @@ using MesETL.App.HostedServices.Abstractions;
using MesETL.App.Options;
using MesETL.App.Services;
using MesETL.App.Services.ErrorRecorder;
using MesETL.Shared.Helper;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;

View File

@ -3,11 +3,12 @@ using MesETL.App.HostedServices.Abstractions;
using MesETL.App.Options;
using MesETL.App.Services;
using MesETL.App.Services.ErrorRecorder;
using MesETL.Shared.Helper;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MySqlConnector;
using MySqlDestination = MesETL.App.Services.ETL.MySqlDestination;
using TaskExtensions = MesETL.App.Helpers.TaskExtensions;
using TaskExtensions = MesETL.Shared.Helper.TaskExtensions;
namespace MesETL.App.HostedServices;
@ -136,6 +137,7 @@ public class OutputService : IOutputService
await output.FlushAsync(_outputOptions.Value.MaxAllowedPacket);
foreach (var (key, value) in tableOutput)
{
_context.AddOutput(value);
_context.AddTableOutput(key, value);
}
_logger.LogTrace("Flushed {Count} records", tableOutput.Values.Sum(i => i));

View File

@ -107,24 +107,24 @@ public class TaskMonitorService
});
var dict = _context.TableProgress
.ToDictionary(kv => kv.Key, kv => kv.Value.ToString());
logger.LogStatus("Monitor: Table output progress", dict, ITaskMonitorLogger.LogLevel.Progress);
.ToDictionary(kv => kv.Key, kv => $"{kv.Value.input}/{kv.Value.output}");
logger.LogStatus("Monitor: Table progress", dict, ITaskMonitorLogger.LogLevel.Progress);
var sb = new StringBuilder("Table Progress: \n");
foreach (var kv in _context.TableProgress)
foreach (var kv in dict)
{
sb.AppendLine($"{kv.Key}: {kv.Value}");
sb.Append(kv.Key).AppendLine(kv.Value);
}
sb.AppendLine($"LongestCharCount: {_producerQueue.LongestFieldCharCount}");
await File.WriteAllTextAsync(_outputPath, sb.ToString(), CancellationToken.None);
logger.LogStatus("Monitor: Process count", new Dictionary<string, string>
{
{"Input", inputCount.ToString()},
{"Transform", transformCount.ToString()},
{"Output", outputCount.ToString()}
}, ITaskMonitorLogger.LogLevel.Progress);
// logger.LogStatus("Monitor: Process count", new Dictionary<string, string>
// {
// {"Input", inputCount.ToString()},
// {"Transform", transformCount.ToString()},
// {"Output", outputCount.ToString()}
// }, ITaskMonitorLogger.LogLevel.Progress);
}

View File

@ -24,7 +24,7 @@ public class VoidOutputService : IOutputService
_logger.LogInformation("***** Void Output Service Started *****");
while (!_context.IsTransformCompleted || _queuePool.Queues.Count > 0)
{
foreach (var pair in _queuePool.Queues) // 内存优化
foreach (var pair in _queuePool.Queues)
{
if (_context.IsTransformCompleted && pair.Value.Count == 0)
{

View File

@ -22,7 +22,6 @@
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="MySqlConnector" Version="2.3.3" />
<PackageReference Include="Serilog" Version="3.1.2-dev-02097" />
<PackageReference Include="Serilog.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
@ -32,4 +31,8 @@
<PackageReference Include="ZstdSharp.Port" Version="0.7.4" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\MesETL.Shared\MesETL.Shared.csproj" />
</ItemGroup>
</Project>

View File

@ -120,18 +120,18 @@ async Task RunProgram()
switch (table)
{
case TableNames.OrderBlockPlan:
MemoryCache.Instance.Delete(s => s.StartsWith(TableNames.Order));
MemoryCache.Instance?.Delete(s => s.StartsWith(TableNames.Order));
break;
case TableNames.OrderItem:
MemoryCache.Instance.Delete(s => s.StartsWith(TableNames.OrderBlockPlan));
MemoryCache.Instance?.Delete(s => s.StartsWith(TableNames.OrderBlockPlan));
break;
case TableNames.OrderProcessSchedule:
MemoryCache.Instance.Delete(s => s.StartsWith(TableNames.OrderProcess));
MemoryCache.Instance?.Delete(s => s.StartsWith(TableNames.OrderProcess));
break;
}
};
options.TableOrder =
options.TableOrder = inputOptions.TableOrder ??
[
TableNames.Machine,
@ -253,14 +253,14 @@ async Task RunProgram()
return false;
break;
}
// OrderBlockPlan删除CreateTime < 202301的Json列合法检查
// OrderBlockPlan删除CreateTime < 202301的
case TableNames.OrderBlockPlan:
{
var time = DateTime.Parse(record["CreateTime"].Trim('"','\''));
if (time < oldestTime)
return false;
// if (!DumpDataHelper.IsJson(record["OrderNos"])) return false;
// if (!DumpDataHelper.IsJson(record["OrderNos"])) return false; //Json列合法检查
break;
}
// OrderBlockPlanResult删除对应order_block_plan.ID不存在的对象
@ -270,10 +270,16 @@ async Task RunProgram()
return false;
break;
}
// case TableNames.OrderBlockPlanResult:
// {
// if (DateTime.Parse(record["SaveTime"].Trim('"', '\'')) < oldestTime)
// return false;
// break;
// }
// OrderDataGoods Json列合法检查
case TableNames.OrderDataGoods:
{
if (!DumpDataHelper.IsJson(record["ExtraProp"])) return false;
// if (!DumpDataHelper.IsJson(record["ExtraProp"])) return false;
break;
}
// OrderModule删除OrderNo < 202301的
@ -623,8 +629,8 @@ async Task RunProgram()
host.Services.AddSingleton<ITransformService, TransformService>();
host.Services.AddSingleton<IOutputService, OutputService>();
host.Services.AddSingleton<TaskMonitorService>();
// host.Services.AddRedisCache(redisOptions);
host.Services.AddSingleton<ICacher, MemoryCache>();
host.Services.AddRedisCache(redisOptions);
// host.Services.AddSingleton<ICacher, MemoryCache>();
var app = host.Build();
await app.RunAsync();
}

View File

@ -1,6 +1,6 @@
using System.Collections.Concurrent;
using System.Diagnostics.CodeAnalysis;
using TaskExtensions = MesETL.App.Helpers.TaskExtensions;
using TaskExtensions = MesETL.Shared.Helper.TaskExtensions;
namespace MesETL.App.Services;
@ -50,6 +50,9 @@ public class DataRecordQueue : IDisposable
public async Task EnqueueAsync(DataRecord record)
{
if (_queue.Count >= _queue.BoundedCapacity)
await Task.Delay(500);
var charCount = record.FieldCharCount;
LongestFieldCharCount = Math.Max(LongestFieldCharCount, charCount);
if(_currentCharCount + charCount > _maxCharCount)

View File

@ -3,6 +3,7 @@ using System.Text.RegularExpressions;
using MesETL.App.Const;
using MesETL.App.Helpers;
using MesETL.App.Options;
using MesETL.Shared.Helper;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MySqlConnector;

View File

@ -1,4 +1,5 @@
using MesETL.App.Helpers;
using MesETL.Shared.Helper;
using Microsoft.Extensions.Logging;
namespace MesETL.App.Services.ErrorRecorder;

View File

@ -11,7 +11,7 @@ public class ProcessContext
private long _inputCount;
private long _transformCount;
private long _outputCount;
private readonly ConcurrentDictionary<string, long> _tableProgress = new();
private readonly ConcurrentDictionary<string, (long input, long output)> _tableProgress = new();
public bool HasException => _hasException;
public bool IsInputCompleted { get; private set; }
public bool IsTransformCompleted { get; private set; }
@ -37,7 +37,7 @@ public class ProcessContext
// TableName -> Count
public IReadOnlyDictionary<string, long> TableProgress => _tableProgress;
public IReadOnlyDictionary<string, (long input, long output)> TableProgress => _tableProgress;
public void CompleteInput() => IsInputCompleted = true;
@ -55,16 +55,21 @@ public class ProcessContext
public void AddOutput() => Interlocked.Increment(ref _outputCount);
public void AddOutput(int count) => Interlocked.Add(ref _outputCount, count);
public void AddTableOutput(string table, int count)
public void AddTableInput(string table, int count)
{
_tableProgress.AddOrUpdate(table, count, (k, v) => v + count);
AddOutput(count);
_tableProgress.AddOrUpdate(table, (input:count, output:0), (k, tuple) =>
{
tuple.input += count;
return tuple;
});
}
public long GetTableOutput(string table)
public void AddTableOutput(string table, int count)
{
if(!_tableProgress.TryGetValue(table, out var count))
throw new ApplicationException($"未找到表{table}输出记录");
return count;
_tableProgress.AddOrUpdate(table, (input:0, output:count), (k, tuple) =>
{
tuple.output += count;
return tuple;
});
}
}

View File

@ -1,5 +1,5 @@
using ApplicationException = System.ApplicationException;
using TaskExtensions = MesETL.App.Helpers.TaskExtensions;
using TaskExtensions = MesETL.Shared.Helper.TaskExtensions;
namespace MesETL.App.Services;

View File

@ -6,9 +6,10 @@
}
},
"Input":{
"InputDir": "D:\\Dump\\NewMockData", // Csv
"InputDir": "D:\\Dump\\MyDumper-ZST 2024-02-05", // Csv
"UseMock": false, // 使
"MockCountMultiplier": 1, //
"TableOrder": ["order_block_plan", "order_block_plan_result"], //
"TableIgnoreList": [] //
},
"Transform":{

View File

@ -1,7 +1,7 @@
using System.Data;
using MySqlConnector;
namespace MesETL.App.Helpers;
namespace MesETL.Shared.Helper;
public static class DatabaseHelper
{
@ -15,11 +15,11 @@ public static class DatabaseHelper
return new MySqlConnection(newConnStr);
}
public static async Task<DataSet> QueryTableAsync(string connStr, string sql)
public static async Task<DataSet> QueryTableAsync(string connStr, string sql, CancellationToken ct = default)
{
await using var conn = CreateConnection(connStr);
if(conn.State is not ConnectionState.Open)
await conn.OpenAsync();
await conn.OpenAsync(ct);
await using var cmd = conn.CreateCommand();
cmd.CommandText = sql;
var ds = new DataSet();
@ -27,24 +27,24 @@ public static class DatabaseHelper
return ds;
}
public static async Task<object?> QueryScalarAsync(string connStr, string sql)
public static async Task<object?> QueryScalarAsync(string connStr, string sql, CancellationToken ct = default)
{
await using var conn = CreateConnection(connStr);
if(conn.State is not ConnectionState.Open)
await conn.OpenAsync();
await conn.OpenAsync(ct);
await using var cmd = conn.CreateCommand();
cmd.CommandText = sql;
return await cmd.ExecuteScalarAsync();
return await cmd.ExecuteScalarAsync(ct);
}
public static async Task<int> NonQueryAsync(string connStr, string sql)
public static async Task<int> NonQueryAsync(string connStr, string sql, CancellationToken ct = default)
{
await using var conn = CreateConnection(connStr);
if(conn.State is not ConnectionState.Open)
await conn.OpenAsync();
await conn.OpenAsync(ct);
await using var cmd = conn.CreateCommand();
cmd.CommandText = sql;
return await cmd.ExecuteNonQueryAsync();
return await cmd.ExecuteNonQueryAsync(ct);
}
public static async Task<int> TransactionAsync(string connStr, string sql, params MySqlParameter[] parameters)

View File

@ -1,4 +1,4 @@
namespace MesETL.App.Helpers;
namespace MesETL.Shared.Helper;
public static class DictionaryExtensions
{

View File

@ -1,7 +1,7 @@
using System.Diagnostics.CodeAnalysis;
using System.Reflection;
namespace MesETL.App.Helpers;
namespace MesETL.Shared.Helper;
#nullable disable
public static class EnumerableExtensions
{

View File

@ -1,7 +1,7 @@
using System.Globalization;
using System.Text;
namespace MesETL.App.Helpers;
namespace MesETL.Shared.Helper;
public static class StringExtensions
{

View File

@ -1,4 +1,4 @@
namespace MesETL.App.Helpers;
namespace MesETL.Shared.Helper;
public static class TaskExtensions
{

View File

@ -0,0 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="MySqlConnector" Version="2.3.5" />
</ItemGroup>
</Project>

View File

@ -1,5 +1,6 @@
using System.Data;
using MesETL.App.Helpers;
using MesETL.Shared.Helper;
using MySqlConnector;
using Xunit.Abstractions;

View File

@ -1,6 +1,7 @@
using System.Data;
using System.Text;
using MesETL.App.Helpers;
using MesETL.Shared.Helper;
using MySqlConnector;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;

View File

@ -1,4 +1,5 @@
using MesETL.App.Helpers;
using MesETL.Shared.Helper;
namespace TestProject1;

View File

@ -0,0 +1,18 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\MesETL.Shared\MesETL.Shared.csproj" />
</ItemGroup>
</Project>

69
MesETL.Tool/Program.cs Normal file
View File

@ -0,0 +1,69 @@
using System.Collections.Concurrent;
using System.Data;
using MesETL.Shared.Helper;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Hosting;
var host = Host.CreateApplicationBuilder(args);
host.Configuration.AddCommandLine(args, new Dictionary<string, string>
{
{ "-s", "ConnectionString" },
{ "--ConnectionString", "ConnectionString" },
{ "-B", "Databases" },
{ "--Databases", "Databases" },
{ "-a", "All" }
});
host.Build();
var connStr = host.Configuration.GetValue<string>("ConnectionString") ?? throw new ApplicationException("没有配置数据库连接字符串");
var databases = host.Configuration.GetValue<string>("Databases")?.Split(',').ToList() ?? throw new ApplicationException("没有配置数据库");
var all = host.Configuration.GetValue<bool>("All");
if (args.Length > 1 && args[0] == "count")
{
var result =await CountDatabasesAsync(connStr, databases);
if (all)
{
foreach (var (k, v) in result)
{
Console.WriteLine(k + ":");
Console.WriteLine(v.Select(pair => new { TABLE_NAME = pair.Key, COUNT = pair.Value }).ToMarkdownTable());
}
}
else
{
var allCount = result.Aggregate(new Dictionary<string, long>(), (dict, pair) =>
{
foreach (var (k, v) in pair.Value)
{
dict.AddOrUpdate(k, v, (key, num) => num + v);
}
return dict;
});
Console.WriteLine(allCount.Select(pair => new { TABLE_NAME = pair.Key, COUNT = pair.Value }).ToMarkdownTable());
}
}
async Task<IDictionary<string, IDictionary<string,long>>> CountDatabasesAsync(string connStr, IList<string> dbNames, CancellationToken cancellationToken = default)
{
var result = new ConcurrentDictionary<string, IDictionary<string,long>>();
var tables = await DatabaseHelper.QueryTableAsync(connStr,
$"""
SELECT TABLE_NAME FROM information_schema.`TABLES` WHERE TABLE_SCHEMA = '{dbNames[0]}';
""");
await Parallel.ForEachAsync(dbNames, async (dbName, ct) =>
{
await Parallel.ForEachAsync(tables.Tables[0].Rows.Cast<DataRow>(), async (row, ct) =>
{
var tableName = row[0].ToString()!;
var count = (long)(await DatabaseHelper.QueryScalarAsync(connStr,
$"SELECT COUNT(1) FROM `{dbName}`.`{tableName}`;", ct))!;
result.AddOrUpdate(dbName, new ConcurrentDictionary<string, long>(), (db, dict) =>
{
dict.AddOrUpdate(tableName, count, (table, num) => num + count);
return dict;
});
});
});
return result;
}

View File

@ -4,6 +4,10 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MesETL.App", "MesETL.App\Me
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MesETL.Test", "MesETL.Test\MesETL.Test.csproj", "{8679D5B6-5853-446E-9882-7B7A8E270500}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MesETL.Tool", "MesETL.Tool\MesETL.Tool.csproj", "{68307B05-3D66-4322-A42F-C044C1E8BA3B}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MesETL.Shared", "MesETL.Shared\MesETL.Shared.csproj", "{FE134001-0E22-458B-BEF2-29712A29087E}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@ -18,5 +22,13 @@ Global
{8679D5B6-5853-446E-9882-7B7A8E270500}.Debug|Any CPU.Build.0 = Debug|Any CPU
{8679D5B6-5853-446E-9882-7B7A8E270500}.Release|Any CPU.ActiveCfg = Release|Any CPU
{8679D5B6-5853-446E-9882-7B7A8E270500}.Release|Any CPU.Build.0 = Release|Any CPU
{68307B05-3D66-4322-A42F-C044C1E8BA3B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{68307B05-3D66-4322-A42F-C044C1E8BA3B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{68307B05-3D66-4322-A42F-C044C1E8BA3B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{68307B05-3D66-4322-A42F-C044C1E8BA3B}.Release|Any CPU.Build.0 = Release|Any CPU
{FE134001-0E22-458B-BEF2-29712A29087E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{FE134001-0E22-458B-BEF2-29712A29087E}.Debug|Any CPU.Build.0 = Debug|Any CPU
{FE134001-0E22-458B-BEF2-29712A29087E}.Release|Any CPU.ActiveCfg = Release|Any CPU
{FE134001-0E22-458B-BEF2-29712A29087E}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal