This commit is contained in:
2024-01-29 09:29:16 +08:00
parent 4f96b77e55
commit 083090c62b
63 changed files with 2479 additions and 1491 deletions

View File

@@ -6,8 +6,9 @@ using System.Threading.Tasks;
namespace ConsoleApp2.HostedServices.Abstractions
{
public interface IDataSource:IDisposable
public interface IDataReader : IDisposable
{
public Task DoEnqueue(Action<DataRecord> action);
DataRecord Current { get; }
ValueTask<bool> ReadAsync();
}
}

View File

@@ -5,5 +5,5 @@ namespace ConsoleApp2.HostedServices.Abstractions;
public interface IInputService
{
public Task ExecuteAsync(TasksOptions tasksOptions, DataRecordQueue producerQueue, ProcessContext context, CancellationToken cancellationToken);
public Task ExecuteAsync(CancellationToken cancellationToken);
}

View File

@@ -5,5 +5,5 @@ namespace ConsoleApp2.HostedServices.Abstractions;
public interface IOutputService
{
public Task ExecuteAsync(TasksOptions tasksOptions, DataRecordQueue consumerQueue, ProcessContext context, CancellationToken cancellationToken);
public Task ExecuteAsync(CancellationToken ct);
}

View File

@@ -5,5 +5,5 @@ namespace ConsoleApp2.HostedServices.Abstractions;
public interface ITransformService
{
public Task ExecuteAsync(TasksOptions tasksOptions, DataRecordQueue producerQueue, DataRecordQueue consumerQueue, ProcessContext context, CancellationToken cancellationToken);
public Task ExecuteAsync(CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,111 @@
using ConsoleApp2.Const;
using ConsoleApp2.HostedServices.Abstractions;
using ConsoleApp2.Options;
using ConsoleApp2.Services;
using ConsoleApp2.Services.ETL;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace ConsoleApp2.HostedServices;
public record FileInputInfo
{
public required string FileName { get; init; }
public required string TableName { get; init; }
public required string[] Headers { get; init; }
}
public enum FileInputType
{
MyDumperCsv,
MyDumperZst,
ErrorLog,
}
/// <summary>
/// 从输入目录中导入文件
/// </summary>
public class FileInputService : IInputService
{
private readonly ILogger _logger;
private readonly DataRecordQueue _producerQueue;
private readonly IOptions<DataInputOptions> _dataInputOptions;
private readonly ProcessContext _context;
private readonly DataReaderFactory _dataReaderFactory;
public FileInputService(ILogger<FileInputService> logger,
IOptions<DataInputOptions> dataInputOptions,
ProcessContext context,
[FromKeyedServices(ProcessStep.Produce)] DataRecordQueue producerQueue,
DataReaderFactory dataReaderFactory)
{
_logger = logger;
_dataInputOptions = dataInputOptions;
_context = context;
_producerQueue = producerQueue;
_dataReaderFactory = dataReaderFactory;
}
public async Task ExecuteAsync(CancellationToken cancellationToken)
{
var inputDir = _dataInputOptions.Value.InputDir ?? throw new ApplicationException("未配置文件输入目录");
_logger.LogInformation("***** Input service started, working directory: {InputDir} *****", inputDir);
var trans = _dataInputOptions.Value.FileInputMetaBuilder;
if(trans is null) throw new ApplicationException("未配置文件名-表名映射委托");
FileInputInfo[] infoArr = Directory.GetFiles(inputDir)
.Select(f => trans(f))
.Where(info => info is not null).ToArray()!;
var orderedInfo = GetFilesInOrder(infoArr).ToArray();
_logger.LogInformation("***** {Count} files founded in directory{OrderedCount} files is matched with configuration *****", infoArr.Length, orderedInfo.Length);
foreach (var info in orderedInfo)
{
_logger.LogDebug("Table {TableName}: {FileName}", info.TableName, info.FileName);
}
foreach (var info in orderedInfo)
{
_logger.LogInformation("Reading file: {FileName}, table: {TableName}", info.FileName, info.TableName);
var source = _dataReaderFactory.CreateReader(info.FileName,info.TableName,info.Headers);
while (await source.ReadAsync())
{
var record = source.Current;
_producerQueue.Enqueue(record);
_context.AddInput();
}
_logger.LogInformation("Input of table: '{TableName}' finished", info.TableName);
}
_context.CompleteInput();
_logger.LogInformation("***** Input service finished *****");
}
/// <summary>
/// 读取配置,按照配置的表顺序来返回
/// </summary>
/// <returns></returns>
private IEnumerable<FileInputInfo> GetFilesInOrder(FileInputInfo[] inputFiles)
{
var tableOrder = _dataInputOptions.Value.TableOrder;
if (tableOrder is null or { Length: 0 })
return inputFiles;
return Yield();
IEnumerable<FileInputInfo> Yield()
{
foreach (var tableName in tableOrder)
{
var target = inputFiles.FirstOrDefault(f =>
f.TableName.Equals(tableName, StringComparison.OrdinalIgnoreCase));
if (target is not null)
yield return target;
}
}
}
}

View File

@@ -1,59 +0,0 @@
using ConsoleApp2.Const;
using ConsoleApp2.Helpers;
using ConsoleApp2.HostedServices.Abstractions;
using ConsoleApp2.Options;
using ConsoleApp2.Services;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace ConsoleApp2.HostedServices;
/// <summary>
/// 从MyDumper导出的CSV文件中导入表头和数据
/// </summary>
public class InputService : IInputService
{
private readonly ILogger _logger;
private readonly IOptions<DataInputOptions> _dataInputOptions;
private readonly ProcessContext _context;
public InputService(ILogger<InputService> logger,
IOptions<DataInputOptions> dataInputOptions,
ProcessContext context)
{
_logger = logger;
_dataInputOptions = dataInputOptions;
_context = context;
}
public async Task ExecuteAsync(TasksOptions tasksOptions, DataRecordQueue producerQueue, ProcessContext context,CancellationToken cancellationToken)
{
var inputDir = _dataInputOptions.Value.InputDir;
_logger.LogInformation("***** Csv input service start, working dir: {InputDir}, thread id: {ThreadId} *****", inputDir, Environment.CurrentManagedThreadId);
var files = Directory.GetFiles(inputDir);
if (files.Length == 0)
{
_logger.LogInformation("No source files found in {InputDir}", inputDir);
return;
}
var count = 0;
foreach (var tableName in tasksOptions.TableInfoConfig.Keys)
{
_logger.LogInformation("Working table: {tableName}", tableName);
var source = _dataInputOptions.Value.CreateSource?.Invoke(tableName);
await source.DoEnqueue((record) =>
{
_context.AddInput();
producerQueue.Enqueue(record);
count++;
});
_logger.LogInformation("table:'{tableName}' input completed", tableName);
}
context.CompleteInput();
_logger.LogInformation("***** Csv input service completed *****");
}
}

View File

@@ -1,110 +1,226 @@
using ConsoleApp2.HostedServices.Abstractions;
using System.Diagnostics;
using System.Text;
using ConsoleApp2.Helpers;
using ConsoleApp2.Helpers.Database;
using ConsoleApp2.HostedServices.Abstractions;
using ConsoleApp2.Options;
using ConsoleApp2.Services;
using ConsoleApp2.Services.ErrorRecorder;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace ConsoleApp2.HostedServices;
public class MainHostedService : BackgroundService
{
private readonly ILogger _logger;
private Stopwatch? _stopwatch;
private readonly IInputService _input;
private readonly ITransformService _transform;
private readonly IOutputService _output;
private readonly ILogger _logger;
private readonly ProcessContext _context;
private readonly IOptions<DatabaseOutputOptions> _databaseOptions;
private readonly IOptions<TenantDbOptions> _tenantDbOptions;
private readonly IConfiguration _config;
public MainHostedService(ILogger<MainHostedService> logger, IInputService input, ITransformService transform, IOutputService output, ProcessContext context)
public MainHostedService(IInputService input,
ITransformService transform,
IOutputService output,
ILogger<MainHostedService> logger,
IOptions<TenantDbOptions> tenantDbOptions,
IOptions<DatabaseOutputOptions> databaseOptions,
IConfiguration config,
ProcessContext context)
{
_logger = logger;
_input = input;
_transform = transform;
_output = output;
_logger = logger;
_tenantDbOptions = tenantDbOptions;
_databaseOptions = databaseOptions;
_config = config;
_context = context;
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
var taskFun = (TasksOptions taskOp, DataRecordQueue producerQueue, DataRecordQueue consumerQueue, ProcessContext context) =>
_stopwatch = Stopwatch.StartNew();
var inputTask = ExecuteAndCatch(
async () => await _input.ExecuteAsync(stoppingToken), "文件输入程序出现异常", stoppingToken);
var transformTask = ExecuteAndCatch(
async () => await _transform.ExecuteAsync(stoppingToken), "转换程序出现异常", stoppingToken);
var outputTask = ExecuteAndCatch(
async () => await _output.ExecuteAsync(stoppingToken), "输出程序出现异常", stoppingToken);
await Task.WhenAll(inputTask, transformTask, outputTask);
_stopwatch.Stop();
_logger.LogInformation("***** All tasks completed *****");
_logger.LogInformation("***** ElapseTime: {Time}", (_stopwatch.ElapsedMilliseconds / 1000f).ToString("F3"));
await Task.Delay(5000, stoppingToken);
if (!stoppingToken.IsCancellationRequested)
{
var inputTask = Task.Run(async () =>
{
try
{
await _input.ExecuteAsync(taskOp, producerQueue, context, stoppingToken);
}
catch (Exception ex)
{
_logger.LogCritical("Exception occurred on inputService:{Message},{StackTrace}", ex.Message, ex.StackTrace);
throw;
}
await ExportResultAsync();
_logger.LogInformation("The execution result export to {Path}",
Path.Combine(AppDomain.CurrentDomain.BaseDirectory, $"Result-{ErrorRecorder.UID}.md"));
if (_config["RestoreIndex"] is not null)
await RestoreIndexAsync();
});
var transformTask = Task.Run(async () =>
{
try
{
await _transform.ExecuteAsync(taskOp, producerQueue, consumerQueue, context, stoppingToken);
}
catch (Exception ex)
{
_logger.LogCritical("Exception occurred on transformService:{Message},{StackTrace}", ex.Message, ex.StackTrace);
throw;
}
});
var outputTask = Task.Run(async () =>
{
try
{
await _output.ExecuteAsync(taskOp, consumerQueue, context,stoppingToken);
}
catch (Exception ex)
{
_logger.LogCritical("Exception occurred on outputService:{Message},{StackTrace}", ex.Message, ex.StackTrace);
throw;
}
});
};
var bigTablesDic = new Dictionary<string, TableInfo>
Environment.Exit(0);
}
else Environment.Exit(1);
}
private Task ExecuteAndCatch(Func<Task> func, string message, CancellationToken ct)
{
return Task.Run(async () =>
{
{"order_block_plan",new TableInfo{SimulaRowCount=2725553 }},//CreateTime < 202301的删除
{"order_block_plan_result",new TableInfo{SimulaRowCount=1174096 }},
{"order_box_block",new TableInfo{SimulaRowCount=29755672 }},
{"order_item",new TableInfo{SimulaRowCount=1345520079 }},
{"simple_plan_order",new TableInfo{SimulaRowCount=351470 }},//CreateTime < 202301的删除
};
taskFun(new TasksOptions { TableInfoConfig = bigTablesDic, OutPutOptions = new OutPutOptions { FlushCount = 10000, OutPutTaskCount = 2 } },
new DataRecordQueue(), new DataRecordQueue(),new ProcessContext());
var smallTablesDic = new Dictionary<string, TableInfo>
try
{
await func();
}
catch (Exception e)
{
_logger.LogCritical(e, "{Msg}\t{ErrMsg}", message, e.Message);
_context.AddException(e);
Environment.Exit(1);
}
}, ct);
}
/// <summary>
/// 还原所有数据库的索引...
/// </summary>
/// <returns></returns>
/// <exception cref="ApplicationException"></exception>
private Task RestoreIndexAsync()
{
var databases = _tenantDbOptions.Value.DbList?.Keys
?? throw new ApplicationException("无法还原索引,因为分库配置中没有配置数据库");
var connStr = _databaseOptions.Value.ConnectionString
?? throw new ApplicationException("无法还原索引,因为没有配置数据库连接字符串");
var list = new List<Task>();
foreach(var db in databases)
{
{"machine",new TableInfo{SimulaRowCount=14655 }},
{"order",new TableInfo{SimulaRowCount=5019216 }},
{"order_data_block",new TableInfo{SimulaRowCount=731800334 }},
{"order_data_goods",new TableInfo{SimulaRowCount=25803671 }},
{"order_data_parts",new TableInfo{SimulaRowCount=468517543 }},
{"order_module",new TableInfo{SimulaRowCount=103325385 }},
{"order_module_extra",new TableInfo{SimulaRowCount=54361321 }},
{"order_module_item",new TableInfo{SimulaRowCount=69173339 }},
{"order_package",new TableInfo{SimulaRowCount=16196195 }},
{"order_process",new TableInfo{SimulaRowCount=3892685 }},//orderNo < 202301的
{"order_process_step",new TableInfo{SimulaRowCount=8050349 }},//orderNo < 202301的删除
{"order_process_step_item",new TableInfo{SimulaRowCount=14538058 }},//orderNo < 202301的删除
{"order_scrap_board",new TableInfo{SimulaRowCount=123998 }},
{"process_group",new TableInfo{SimulaRowCount=1253 }},
{"process_info",new TableInfo{SimulaRowCount=7839 }},
{"process_item_exp",new TableInfo{SimulaRowCount=28 }},
{"process_schdule_capacity",new TableInfo{SimulaRowCount=39736 }},
{"process_step_efficiency",new TableInfo{SimulaRowCount=8 }},
{"report_template",new TableInfo{SimulaRowCount=7337 }},
{"simple_package",new TableInfo{SimulaRowCount=130436 }},//orderNo < 202301的删除
{"sys_config",new TableInfo{SimulaRowCount=2296 }},
{"work_calendar",new TableInfo{SimulaRowCount=11 }},
{"work_shift",new TableInfo{SimulaRowCount=59 }},
{"work_time",new TableInfo{SimulaRowCount=62 }},
var task = DatabaseHelper.NonQueryAsync(connStr + $";Database={db};",
"""
CREATE INDEX `idx_CompanyID` ON `machine` (`CompanyID`);
CREATE INDEX `idx_companyid` ON `order` (`CompanyID`);
CREATE INDEX `idx_CompanyID` ON `order_block_plan` (`CompanyID`);
CREATE INDEX `idx_PlanID` ON `order_block_plan_item` (`PlanID`);
CREATE INDEX `idx_orderno` ON `order_box_block` (`OrderNo`);
CREATE INDEX `index_OrderNo` ON `order_data_block` (`OrderNo`);
CREATE INDEX `index_OrderNo` ON `order_data_goods` (`OrderNo`);
CREATE INDEX `index_OrderNo` ON `order_data_parts` (`OrderNo`);
CREATE INDEX `index_ItemNo` ON `order_item` (`ItemNo`);
CREATE INDEX `index_OrderNo` ON `order_item` (`OrderNo`);
CREATE INDEX `index_PackageID` ON `order_item` (`PackageID`);
CREATE INDEX `index_PlanID` ON `order_item` (`PlanID`);
CREATE INDEX `idx_OrderNo` ON `order_module` (`OrderNo`);
CREATE INDEX `index_OrderNo` ON `order_module_extra` (`OrderNo`);
CREATE INDEX `index_OrderNo` ON `order_module_item` (`OrderNo`);
CREATE INDEX `idx_OrderNo` ON `order_package` (`OrderNo`);
CREATE INDEX `idx_PakageNo` ON `order_package` (`PakageNo`);
CREATE INDEX `idx_PackageID` ON `order_package_item` (`PackageID`);
CREATE INDEX `idx_companyid` ON `order_patch_detail` (`CompanyID`);
CREATE INDEX `idx_OrderNo` ON `order_process` (`OrderNo`);
CREATE INDEX `index_CompanyID` ON `order_process_schdule` (`CompanyID`);
CREATE INDEX `IX_order_process_step_OrderProcessID` ON `order_process_step` (`OrderProcessID`);
CREATE INDEX `idx_OrderProcessID` ON `order_process_step_item` (`OrderProcessID`);
CREATE INDEX `idx_OrderProcessStepID` ON `order_process_step_item` (`OrderProcessStepID`);
CREATE INDEX `idx_CompanyID` ON `order_scrap_board` (`CompanyID`);
CREATE INDEX `idx_CompanyID` ON `process_group` (`CompanyID`);
CREATE INDEX `idx_CompanyID` ON `process_info` (`CompanyID`);
CREATE INDEX `index_CompanyID` ON `process_item_exp` (`CompanyID`);
CREATE INDEX `idx_CompanyID` ON `process_schdule_capacity` (`CompanyID`);
CREATE INDEX `idx_CompanyID` ON `process_step_efficiency` (`CompanyID`);
CREATE INDEX `idx_CompanyID` ON `report_template` (`CompanyID`);
CREATE INDEX `indx_OrderNo` ON `simple_package` (`OrderNo`);
CREATE INDEX `idx_CompanyID` ON `simple_plan_order` (`CompanyID`);
CREATE INDEX `idx_CompanyID` ON `sys_config` (`CompanyID`);
CREATE INDEX `idx` ON `work_calendar` (`CompanyID`);
CREATE INDEX `idx_CompanyID` ON `work_shift` (`CompanyID`);
CREATE INDEX `IX_work_time_ShiftID` ON `work_time` (`ShiftID`);
""");
list.Add(task);
}
return Task.WhenAll(list);
}
private async Task ExportResultAsync()
{
var sb = new StringBuilder();
if (_context.HasException)
sb.AppendLine("# Program Completed With Error");
else sb.AppendLine("# Program Completed Successfully");
sb.AppendLine("## Process Count");
var processCount = new[]
{
new { State = "Input", Count = _context.InputCount },
new { State = "Transform", Count = _context.TransformCount },
new { State = "Output", Count = _context.OutputCount }
};
taskFun(new TasksOptions { TableInfoConfig = smallTablesDic, OutPutOptions = new OutPutOptions { FlushCount = 20000, OutPutTaskCount = 4 } },
new DataRecordQueue(), new DataRecordQueue(), new ProcessContext());
sb.AppendLine(processCount.ToMarkdownTable());
sb.AppendLine("\n---\n");
sb.AppendLine("## Table Output Progress");
var tableOutputProgress = _context.TableProgress.Select(pair =>
new { Table = pair.Key, Count = pair.Value });
sb.AppendLine(tableOutputProgress.ToMarkdownTable());
sb.AppendLine("\n---\n");
sb.AppendLine("## Result");
var elapsedTime = (_stopwatch!.ElapsedMilliseconds / 1000f);
var result = new[]
{
new { Field = "ElapsedTime", Value = elapsedTime.ToString("F2") },
new
{
Field = "Average Output Speed",
Value = (_context.OutputCount / elapsedTime).ToString("F2") + "records/s"
}
};
sb.AppendLine(result.ToMarkdownTable());
await File.WriteAllTextAsync(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, $"Result-{ErrorRecorder.UID}.md"),
sb.ToString());
}
}

View File

@@ -2,8 +2,11 @@
using ConsoleApp2.HostedServices.Abstractions;
using ConsoleApp2.Options;
using ConsoleApp2.Services;
using ConsoleApp2.Services.ErrorRecorder;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MySqlDestination = ConsoleApp2.Services.ETL.MySqlDestination;
using TaskExtensions = ConsoleApp2.Helpers.TaskExtensions;
namespace ConsoleApp2.HostedServices;
@@ -14,96 +17,116 @@ public class OutputService : IOutputService
{
private readonly ILogger _logger;
private readonly IOptions<DatabaseOutputOptions> _outputOptions;
private readonly IOptions<DataTransformOptions> _transformOptions;
private readonly ProcessContext _context;
private readonly TaskManager _taskManager;
private readonly ErrorRecorder _errorRecorder;
private readonly ErrorRecorderFactory _errorRecorderFactory;
private readonly RecordQueuePool _queuePool;
public OutputService(ILogger<OutputService> logger,
IOptions<DatabaseOutputOptions> outputOptions,
ProcessContext context,
TaskManager taskManager,
IOptions<DataTransformOptions> transformOptions,
ErrorRecorder errorRecorder)
RecordQueuePool queuePool,
ErrorRecorderFactory errorRecorderFactory)
{
_logger = logger;
_outputOptions = outputOptions;
_context = context;
_taskManager = taskManager;
_transformOptions = transformOptions;
_errorRecorder = errorRecorder;
_queuePool = queuePool;
_errorRecorderFactory = errorRecorderFactory;
}
public async Task ExecuteAsync(TasksOptions tasksOptions, DataRecordQueue consumerQueue, ProcessContext context, CancellationToken cancellationToken)
public async Task ExecuteAsync(CancellationToken ct)
{
_logger.LogInformation("***** Mysql output service started *****");
_taskManager.CreateTasks(async () =>
_logger.LogInformation("***** Output service started *****");
var dbTaskManager = new TaskManager(5);
var dbTasks = new Dictionary<string, Task>();
while (!_context.IsTransformCompleted)
{
//k: database v: records按照要导出的数据库名分组
var databaseDict = new Dictionary<string, List<DataRecord>>();
while (!context.IsTransformCompleted || consumerQueue.Count > 0)
foreach (var (db, queue) in _queuePool.Queues)
{
if (!consumerQueue.TryDequeue(out var record)) continue;
var dbName = record.Database;
var records = databaseDict.AddOrUpdate(dbName, [record], (_, list) =>
if (!dbTasks.ContainsKey(db))
{
list.Add(record);
return list;
});
if (records.Count >= tasksOptions.OutPutOptions.FlushCount)
{
await FlushAsync(dbName, records);
records.Clear();
dbTasks.Add(db, await dbTaskManager.CreateTaskAsync(
async () => await StartDatabaseWorker(db, queue, ct), ct));
}
}
foreach (var (db, records) in databaseDict)
await Task.Delay(500, ct);
}
await TaskExtensions.WaitUntil(() => dbTaskManager.RunningTaskCount == 0, 25, ct);
_context.CompleteOutput();
_logger.LogInformation("***** Output service finished *****");
}
private async Task StartDatabaseWorker(string db, DataRecordQueue queue, CancellationToken ct = default)
{
_logger.LogInformation("*****开启输出线程,数据库: {db} *****", db);
var taskManager = new TaskManager(_outputOptions.Value.MaxDatabaseOutputTask);
var tmp = new List<DataRecord>();
while (!_context.IsTransformCompleted || queue.Count > 0)
{
if (ct.IsCancellationRequested)
break;
if (!queue.TryDequeue(out var record)) continue;
var dbName = record.Database ?? throw new ApplicationException("输出的记录缺少数据库名");
if(dbName != db)
throw new ApplicationException($"输出记录的数据与当前输出线程不匹配,记录:{dbName}, 输出线程:{db}");
tmp.Add(record);
if (tmp.Count >= _outputOptions.Value.FlushCount)
{
if (records.Count > 0)
var list = tmp;
tmp = [];
await taskManager.CreateTaskAsync(async arg => // 转换为方法组
{
await FlushAsync(db, records);
records.Clear();
}
var tuple = arg as Tuple<string, List<DataRecord>>;
try
{
await FlushAsync(tuple!.Item1, tuple.Item2);
}
catch (Exception e)
{
_logger.LogError(e, "输出记录时发生错误");
throw;
}
}, Tuple.Create(dbName, list), ct);
}
}
databaseDict.Clear();
_logger.LogInformation("***** Mysql output thread completed *****");
}, tasksOptions.OutPutOptions.OutPutTaskCount);
await _taskManager.WaitAll();
//_context.CompleteOutput();
_logger.LogInformation("***** Mysql output service completed *****");
// 等待所有子任务完成
await TaskExtensions.WaitUntil(() => taskManager.RunningTaskCount == 0, 10, ct);
// 清理剩余记录
if (tmp.Count > 0)
{
await FlushAsync(db, tmp);
}
_logger.LogInformation("*****输出线程结束,数据库: {db} *****", db);
}
private async Task FlushAsync(string dbName, IEnumerable<DataRecord> records)
{
var count = 0;
var connStr = _outputOptions.Value.ConnectionString ?? throw new InvalidOperationException("ConnectionString is null");
await using var output = new MySqlDestination($"{connStr};Database={dbName};", _logger, _context, _transformOptions, _errorRecorder);
//if (records == null || records.Count() == 0) return;
//var dbName = $"cferp_test_1";
//if (records != null && records.Count() > 0)
//{
// dbName = $"cferp_test_{records.FirstOrDefault()?.CompanyID}";
//}
var connStr = _outputOptions.Value.ConnectionString ??
throw new InvalidOperationException("连接字符串为空");
await using var output = new MySqlDestination($"{connStr};Database={dbName};", _logger,
_outputOptions, _errorRecorderFactory.CreateOutput(dbName), _context);
//await using var output = new MySqlDestination(new MySqlConnectionStringBuilder
//{
// Server = "127.0.0.1",
// Port = 34309,
// Database = dbName,
// UserID = "root",
// Password = "123456",
// MaximumPoolSize = 50,
//}.ConnectionString, _logger,true);
var tableOutput = new Dictionary<string, int>();
foreach (var record in records)
{
await output.WriteRecordAsync(record);
count++;
tableOutput.AddOrUpdate(record.TableName, 1, (_, v) => v + 1);
}
await output.FlushAsync(_outputOptions.Value.MaxAllowedPacket);
_context.AddOutput(count);
foreach (var (key, value) in tableOutput)
{
_context.AddTableOutput(key, value);
}
_logger.LogTrace("Flushed {Count} records", tableOutput.Values.Sum(i => i));
}
}

View File

@@ -1,6 +1,7 @@
using System.Diagnostics;
using ConsoleApp2.Const;
using ConsoleApp2.Services;
using ConsoleApp2.Services.Loggers;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
@@ -12,33 +13,33 @@ namespace ConsoleApp2.HostedServices;
/// </summary>
public class TaskMonitorService : BackgroundService
{
private readonly IHostApplicationLifetime _lifetime;
private readonly ILogger<TaskMonitorService> _logger;
private readonly IEnumerable<ITaskMonitorLogger> _monitorLoggers;
private readonly ProcessContext _context;
private readonly DataRecordQueue _producerQueue;
private readonly DataRecordQueue _consumerQueue;
private readonly RecordQueuePool _queuePool;
public TaskMonitorService(IHostApplicationLifetime lifetime,
public TaskMonitorService(
ILogger<TaskMonitorService> logger,
ProcessContext context,
[FromKeyedServices(ProcessStep.Producer)]
[FromKeyedServices(ProcessStep.Produce)]
DataRecordQueue producerQueue,
[FromKeyedServices(ProcessStep.Consumer)]
DataRecordQueue consumerQueue)
RecordQueuePool queuePool,
IEnumerable<ITaskMonitorLogger> monitorLoggers)
{
_lifetime = lifetime;
_logger = logger;
_context = context;
_producerQueue = producerQueue;
_consumerQueue = consumerQueue;
_queuePool = queuePool;
_monitorLoggers = monitorLoggers;
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
await Task.Factory.StartNew(Monitor, stoppingToken);
await Task.Run(() => Monitor(stoppingToken), stoppingToken);
}
private async Task Monitor()
private async Task Monitor(CancellationToken stoppingToken)
{
var sw = Stopwatch.StartNew();
var lastTime = sw.ElapsedMilliseconds;
@@ -47,7 +48,7 @@ public class TaskMonitorService : BackgroundService
var lastOutputCount = _context.OutputCount;
bool endCheck = false;
while (true)
while (!stoppingToken.IsCancellationRequested)
{
EndCheck:
// var running = 0;
@@ -82,22 +83,40 @@ public class TaskMonitorService : BackgroundService
var inputSpeed = (inputCount - lastInputCount) / elapseTime;
var transformSpeed = (transformCount - lastTransformCount) / elapseTime;
var outputSpeed = (outputCount - lastOutputCount) / elapseTime;
// _logger.LogInformation(
// "Task monitor: running: {Running}, error: {Error}, completed: {Completed}, canceled: {Canceled}, outputSpeed: {Speed} records/s",
// running, error, completed, canceled, outputSpeed);
_logger.LogInformation(
"Process monitor: input: {inputStatus}, transform: {transformStatus}, output: {outputStatus}\nInput: {InputCount}, Transform: {TransformCount}, Output: {OutputCount}",
_context.IsInputCompleted ? "completed" : $"running {inputSpeed:F2} records/s",
_context.IsTransformCompleted ? "completed" : $"running {transformSpeed:F2} records/s",
_context.IsOutputCompleted ? "completed" : $"running {outputSpeed:F2} records/s",
inputCount,
transformCount,
outputCount);
_logger.LogInformation("Queue monitor: producer queue: {ProducerQueue}, consumer queue: {ConsumerQueue}",
_producerQueue.Count, _consumerQueue.Count);
foreach (var logger in _monitorLoggers)
{
logger.LogStatus("Monitor: Progress status", new Dictionary<string, string>
{
{"Input",_context.IsInputCompleted ? "completed" : $"running {inputSpeed:F2} records/s" },
{"Transform", _context.IsTransformCompleted ? "completed" : $"running {transformSpeed:F2} records/s" },
{"Output", _context.IsOutputCompleted ? "completed" : $"running {outputSpeed:F2} records/s" }
});
await Task.Delay(5000);
logger.LogStatus("Monitor: Table output progress",
_context.TableProgress
.ToDictionary(kv => kv.Key, kv => kv.Value.ToString()),
ITaskMonitorLogger.LogLevel.Progress);
logger.LogStatus("Monitor: Process count", new Dictionary<string, string>
{
{"Input", inputCount.ToString()},
{"Transform", transformCount.ToString()},
{"Output", outputCount.ToString()}
}, ITaskMonitorLogger.LogLevel.Progress);
logger.LogStatus("Monitor: Queue", new Dictionary<string, string>
{
{"Producer queue records", _producerQueue.Count.ToString() },
{"Output queues", _queuePool.Queues.Count.ToString() },
{"Output queue records", _queuePool.Queues.Values.Sum(queue => queue.Count).ToString()},
});
}
await Task.Delay(5000, stoppingToken);
lastTime = time;
lastInputCount = inputCount;
@@ -114,9 +133,6 @@ public class TaskMonitorService : BackgroundService
break;
}
}
_logger.LogInformation("***** All tasks completed *****");
_logger.LogInformation("***** ElapseTime: {Time}", (sw.ElapsedMilliseconds / 1000f).ToString("F3"));
// _lifetime.StopApplication();
}
}

View File

@@ -1,83 +0,0 @@
using ConsoleApp2.HostedServices.Abstractions;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using ConsoleApp2.Const;
using ConsoleApp2.Options;
using ConsoleApp2.Services;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using System.Reflection.PortableExecutable;
using System.Collections.Concurrent;
using ConsoleApp2.SimulationService;
namespace ConsoleApp2.HostedServices
{
public class TestInputService : IInputService
{
private readonly ILogger _logger;
private readonly IOptions<CsvOptions> _csvOptions;
private readonly DataRecordQueue _producerQueue;
private readonly ProcessContext _context;
public TestInputService(ILogger<TestInputService> logger,
IOptions<CsvOptions> csvOptions,
[FromKeyedServices(ProcessStep.Producer)] DataRecordQueue producerQueue,
ProcessContext context)
{
_logger = logger;
_csvOptions = csvOptions;
_producerQueue = producerQueue;
_context = context;
}
public async Task ExecuteAsync(TasksOptions tasksOptions, DataRecordQueue producerQueue, ProcessContext context, CancellationToken cancellationToken)
{
var tableName = "order_item";
var headers = new string[] { "ID","OrderNo","ItemNo","ItemType","RoomID","BoxID","DataID","PlanID","PackageID","Num","CompanyID","ShardKey" };
var dataCount = 1200000000L;
var tempCount = 80000;
var tempRecords=new List<DataRecord>();
var comanyID = 1;
short[] shareKeys = { 23040, 23070, 23100, 24000, 24040, 24070, 24100, 25000, 25040, 25070, 25100 };
int[] companyIds = { 1, 2, 3, 4 };
var sk = shareKeys.First();
var companyID = companyIds.First();
var shareKeyInterval = 20000;
var getShareKeyTimes = 0;
var getCompanyIDTimes = 0;
var shareKeyIntervalCount = 0;
for (long i = 1; i <= dataCount; i++)
{
shareKeyIntervalCount++;
if (shareKeyIntervalCount > shareKeyInterval) {
sk=DataHelper.GetShareKey(getShareKeyTimes);
getShareKeyTimes++;
shareKeyIntervalCount = 0;
}
var fields = new string[] { i.ToString(), "20220104020855", (220105981029 + i).ToString(), "1", "482278", "482279", "3768774", "0", "0", "1", companyID.ToString(), sk.ToString() };
var record = new DataRecord(fields, tableName, headers, comanyID);
tempRecords.Add(record);
if (tempRecords.Count >= tempCount)
{
foreach (var rc in tempRecords)
{
_context.AddInput();
_producerQueue.Enqueue(rc);
if (cancellationToken.IsCancellationRequested)
return;
}
tempRecords.Clear();
companyID = DataHelper. GetCompanyId(getCompanyIDTimes);
getCompanyIDTimes++;
}
}
_context.CompleteInput();
_logger.LogInformation("***** Csv input service completed *****");
}
}
}

View File

@@ -1,15 +1,17 @@
using ConsoleApp2.Const;
using ConsoleApp2.Cache;
using ConsoleApp2.Const;
using ConsoleApp2.HostedServices.Abstractions;
using ConsoleApp2.Options;
using ConsoleApp2.Services;
using Microsoft.Extensions.Caching.Distributed;
using ConsoleApp2.Services.ErrorRecorder;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StackExchange.Redis;
namespace ConsoleApp2.HostedServices;
public record DataTransformContext(DataRecord Record, ICacher Cacher, ILogger Logger);
/// <summary>
/// 数据处理服务,对导入后的数据进行处理
/// </summary>
@@ -17,65 +19,99 @@ public class TransformService : ITransformService
{
private readonly ILogger _logger;
private readonly IOptions<DataTransformOptions> _options;
private readonly DataRecordQueue _producerQueue;
private readonly RecordQueuePool _queuePool;
private readonly ProcessContext _context;
private readonly IDistributedCache _cache;
private readonly TaskManager _taskManager;
private readonly ICacher _cache;
private readonly ErrorRecorderFactory _errorRecorderFactory;
public TransformService(ILogger<TransformService> logger,
IOptions<DataTransformOptions> options,
[FromKeyedServices(ProcessStep.Produce)] DataRecordQueue producerQueue,
RecordQueuePool queuePool,
ProcessContext context,
IDistributedCache cache,
TaskManager taskManager)
ICacher cache,
ErrorRecorderFactory errorRecorderFactory)
{
_logger = logger;
_options = options;
_producerQueue = producerQueue;
_queuePool = queuePool;
_context = context;
_cache = cache;
_taskManager = taskManager;
_errorRecorderFactory = errorRecorderFactory;
}
public async Task ExecuteAsync(TasksOptions tasksOptions, DataRecordQueue producerQueue, DataRecordQueue consumerQueue, ProcessContext context, CancellationToken cancellationToken)
public async Task ExecuteAsync(CancellationToken cancellationToken)
{
_logger.LogInformation("***** Data transform service started, thread id: {ThreadId} *****", Environment.CurrentManagedThreadId);
_taskManager.CreateTasks(async () =>
while (!_context.IsInputCompleted || _producerQueue.Count > 0)
{
while ((!context.IsInputCompleted || producerQueue.Count > 0))
{
if (!producerQueue.TryDequeue(out var record)) continue;
if (!_producerQueue.TryDequeue(out var record)) continue;
//过滤不要的record
if (await _options.Value.RecordFilter?.Invoke(record, _cache) == false) continue;
//修改record
_options.Value.RecordModify?.Invoke(record);
//缓存record
await _options.Value.RecordCache?.Invoke(record, _cache);
//替换record
var replaceRecord = await _options.Value.RecordReplace?.Invoke(record, _cache);
if (replaceRecord != null)
try
{
var context = new DataTransformContext(record, _cache, _logger);
if (_options.Value.EnableFilter)
{
record = replaceRecord;
// 数据过滤
var filter = _options.Value.RecordFilter;
if (filter is not null && await filter(context) == false) continue;
}
//计算需要分流的数据库
record.Database = _options.Value.DatabaseFilter.Invoke(record);
consumerQueue.Enqueue(record);
_context.AddTransform();
//数据增加
var addRecords = _options.Value.RecordAdd?.Invoke(record);
if (addRecords is { Count: > 0 })
if (_options.Value.EnableReplacer)
{
foreach (var rc in addRecords)
// 数据替换
var replacer = _options.Value.RecordModify;
if (replacer is not null)
{
rc.Database = _options.Value.DatabaseFilter.Invoke(record);
consumerQueue.Enqueue(rc);
_context.AddTransform();
record = await replacer(context);
}
}
// 字段缓存
var cacher = _options.Value.RecordCache;
if(cacher is not null)
await cacher.Invoke(context);
//计算需要分流的数据库
var dbFilter = _options.Value.DatabaseFilter
?? throw new ApplicationException("未配置数据库过滤器");
record.Database = dbFilter(record);
_queuePool[record.Database].Enqueue(record);
_context.AddTransform();
if (_options.Value.EnableReBuilder)
{
//数据重建
var addRecords = _options.Value.RecordReBuild?.Invoke(context);
if (addRecords is { Count: > 0 })
{
foreach (var rc in addRecords)
{
if(dbFilter is not null)
rc.Database =dbFilter.Invoke(record);
_queuePool[record.Database].Enqueue(rc);
_context.AddTransform();
}
}
}
}
context.CompleteTransform();
},tasksOptions.TransformTaskCount,cancellationToken);
await _taskManager.WaitAll();
_logger.LogInformation("***** Data transformation service completed *****");
catch (Exception e)
{
_context.AddException(e);
var errorRecorder = _errorRecorderFactory.CreateTransform();
await errorRecorder.LogErrorRecordAsync(record, e);
if (!_options.Value.StrictMode)
_logger.LogError(e, "数据转换时发生错误");
else throw;
}
}
_context.CompleteTransform();
_logger.LogInformation("***** Data transformation service finished *****");
}
}

View File

@@ -1,9 +1,5 @@
using ConsoleApp2.Const;
using ConsoleApp2.HostedServices.Abstractions;
using ConsoleApp2.Options;
using ConsoleApp2.HostedServices.Abstractions;
using ConsoleApp2.Services;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
namespace ConsoleApp2.HostedServices;
@@ -12,28 +8,36 @@ namespace ConsoleApp2.HostedServices;
public class VoidOutputService : IOutputService
{
private readonly ILogger _logger;
private readonly DataRecordQueue _consumerQueue;
private readonly RecordQueuePool _queuePool;
private readonly ProcessContext _context;
public VoidOutputService([FromKeyedServices(ProcessStep.Consumer)] DataRecordQueue consumerQueue,
ProcessContext context, ILogger<VoidOutputService> logger)
public VoidOutputService(
ProcessContext context, ILogger<VoidOutputService> logger, RecordQueuePool queuePool)
{
_consumerQueue = consumerQueue;
_context = context;
_logger = logger;
_queuePool = queuePool;
}
public Task ExecuteAsync(TasksOptions tasksOptions, DataRecordQueue consumerQueue, ProcessContext context, CancellationToken cancellationToken)
public Task ExecuteAsync(CancellationToken ct)
{
_logger.LogInformation("***** Void output service started, thread id: {ThreadId} *****", Environment.CurrentManagedThreadId);
while (!_context.IsTransformCompleted || _consumerQueue.Count > 0)
_logger.LogInformation("***** Void Output Service Started *****");
while (!_context.IsTransformCompleted || _queuePool.Queues.Count > 0)
{
if (_consumerQueue.TryDequeue(out var record))
foreach (var pair in _queuePool.Queues) // 内存优化
{
if (_context.IsTransformCompleted && pair.Value.Count == 0)
{
_queuePool.RemoveQueue(pair.Key);
continue;
}
if(!pair.Value.TryDequeue(out var record)) continue;
_context.AddOutput();
}
}
_context.CompleteOutput();
_logger.LogInformation("***** Void output service completed *****");
_logger.LogInformation("***** Void Output Service Stopped *****");
return Task.CompletedTask;
}
}