MES-ETL/MesETL.App/HostedServices/OutputService.cs

158 lines
6.0 KiB
C#
Raw Normal View History

2024-02-15 16:18:50 +08:00
using System.Buffers;
using System.Text;
2024-12-10 14:03:09 +08:00
using MesETL.App.Const;
2024-02-15 16:18:50 +08:00
using MesETL.App.Helpers;
using MesETL.App.HostedServices.Abstractions;
using MesETL.App.Options;
using MesETL.App.Services;
using MesETL.App.Services.ErrorRecorder;
2024-02-10 17:12:26 +08:00
using MesETL.Shared.Helper;
2023-12-29 16:16:05 +08:00
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
2024-02-01 13:41:59 +08:00
using MySqlConnector;
using MySqlDestination = MesETL.App.Services.ETL.MySqlDestination;
2024-02-10 17:12:26 +08:00
using TaskExtensions = MesETL.Shared.Helper.TaskExtensions;
2023-12-29 16:16:05 +08:00
namespace MesETL.App.HostedServices;
2023-12-29 16:16:05 +08:00
2024-12-10 14:03:09 +08:00
public record DataOutputContext(IServiceProvider Serivces);
2024-01-04 09:00:44 +08:00
/// <summary>
/// 数据导出服务将数据导出至MySql服务
/// </summary>
public class OutputService : IOutputService
2023-12-29 16:16:05 +08:00
{
private readonly ILogger _logger;
2024-01-16 15:35:54 +08:00
private readonly IOptions<DatabaseOutputOptions> _outputOptions;
2023-12-29 16:16:05 +08:00
private readonly ProcessContext _context;
2024-01-29 09:29:16 +08:00
private readonly ErrorRecorderFactory _errorRecorderFactory;
private readonly RecordQueuePool _queuePool;
2024-12-10 14:03:09 +08:00
private readonly IServiceProvider _services;
2023-12-29 16:16:05 +08:00
2024-01-04 09:00:44 +08:00
public OutputService(ILogger<OutputService> logger,
2024-01-16 15:35:54 +08:00
IOptions<DatabaseOutputOptions> outputOptions,
2024-01-04 09:00:44 +08:00
ProcessContext context,
2024-01-29 09:29:16 +08:00
RecordQueuePool queuePool,
2024-12-10 14:03:09 +08:00
ErrorRecorderFactory errorRecorderFactory,
IServiceProvider services)
2023-12-29 16:16:05 +08:00
{
_logger = logger;
2024-01-16 15:35:54 +08:00
_outputOptions = outputOptions;
2023-12-29 16:16:05 +08:00
_context = context;
2024-01-29 09:29:16 +08:00
_queuePool = queuePool;
_errorRecorderFactory = errorRecorderFactory;
2024-12-10 14:03:09 +08:00
_services = services;
2023-12-29 16:16:05 +08:00
}
2024-01-29 09:29:16 +08:00
public async Task ExecuteAsync(CancellationToken ct)
2023-12-29 16:16:05 +08:00
{
2024-12-10 14:03:09 +08:00
_logger.LogInformation("***** 输出服务已启动 *****");
var dbTaskManager = new TaskManager(_queuePool.Queues.Count);
2024-01-29 09:29:16 +08:00
var dbTasks = new Dictionary<string, Task>();
while (!_context.IsTransformCompleted)
2023-12-29 16:16:05 +08:00
{
2024-01-29 09:29:16 +08:00
foreach (var (db, queue) in _queuePool.Queues)
2023-12-29 16:16:05 +08:00
{
2024-01-29 09:29:16 +08:00
if (!dbTasks.ContainsKey(db))
{
2024-01-29 09:29:16 +08:00
dbTasks.Add(db, await dbTaskManager.CreateTaskAsync(
async () => await StartDatabaseWorker(db, queue, ct), ct));
2024-01-12 16:50:37 +08:00
}
2023-12-29 16:16:05 +08:00
}
2024-01-29 09:29:16 +08:00
await Task.Delay(500, ct);
}
await TaskExtensions.WaitUntil(() => dbTaskManager.RunningTaskCount == 0, 25, ct);
_context.CompleteOutput();
2024-12-10 14:03:09 +08:00
_outputOptions.Value.OutputFinished?.Invoke(new DataOutputContext(_services));
_logger.LogInformation("***** 输出服务执行完毕 *****");
2024-01-29 09:29:16 +08:00
}
private async Task StartDatabaseWorker(string db, DataRecordQueue queue, CancellationToken ct = default)
{
_logger.LogInformation("*****开启输出线程,数据库: {db} *****", db);
var taskManager = new TaskManager(_outputOptions.Value.MaxDatabaseOutputTask);
2024-02-15 16:18:50 +08:00
var ignoreOutput = new HashSet<string>(_outputOptions.Value.NoOutput);
var tmp = new List<DataRecord>(_outputOptions.Value.FlushCount);
2024-01-29 09:29:16 +08:00
while (!_context.IsTransformCompleted || queue.Count > 0)
{
if (ct.IsCancellationRequested)
break;
2024-12-10 14:03:09 +08:00
if (!queue.TryDequeue(out var record) || record.Ignore || ignoreOutput.Contains(record.TableName))
continue;
2024-01-29 09:29:16 +08:00
var dbName = record.Database ?? throw new ApplicationException("输出的记录缺少数据库名");
if(dbName != db)
throw new ApplicationException($"输出记录的数据与当前输出线程不匹配,记录:{dbName}, 输出线程:{db}");
tmp.Add(record);
if (tmp.Count >= _outputOptions.Value.FlushCount)
2023-12-29 16:16:05 +08:00
{
2024-01-29 09:29:16 +08:00
var list = tmp;
tmp = [];
await taskManager.CreateTaskAsync(async arg => // 转换为方法组
{
2024-01-29 09:29:16 +08:00
var tuple = arg as Tuple<string, List<DataRecord>>;
try
{
await FlushAsync(tuple!.Item1, tuple.Item2);
}
catch (Exception e)
{
_logger.LogError(e, "输出记录时发生错误");
throw;
}
}, Tuple.Create(dbName, list), ct);
2023-12-29 16:16:05 +08:00
}
2024-01-29 09:29:16 +08:00
}
2024-01-29 09:29:16 +08:00
// 等待所有子任务完成
await TaskExtensions.WaitUntil(() => taskManager.RunningTaskCount == 0, 10, ct);
_logger.LogDebug("输出线程结束,清理剩余记录[{Count}]", tmp.Count);
2024-01-29 09:29:16 +08:00
// 清理剩余记录
if (tmp.Count > 0)
{
await FlushAsync(db, tmp);
}
2024-01-15 17:26:44 +08:00
2024-12-10 14:03:09 +08:00
_logger.LogInformation("***** 输出线程结束,数据库: {db} *****", db);
2023-12-29 16:16:05 +08:00
}
private async Task FlushAsync(string dbName, IEnumerable<DataRecord> records)
2023-12-29 16:16:05 +08:00
{
2024-02-01 13:41:59 +08:00
var connStr = new MySqlConnectionStringBuilder(_outputOptions.Value.ConnectionString
?? throw new ApplicationException("未配置数据库连接字符串"))
{
CharacterSet = "utf8mb4",
AllowUserVariables = true,
IgnoreCommandTransaction = true,
TreatTinyAsBoolean = false,
ConnectionTimeout = 60,
DefaultCommandTimeout = 0,
SslMode = MySqlSslMode.None,
Database = dbName
}.ConnectionString;
await using var output = new MySqlDestination(connStr, _logger,
2024-01-29 09:29:16 +08:00
_outputOptions, _errorRecorderFactory.CreateOutput(dbName), _context);
2024-01-12 16:50:37 +08:00
2024-01-29 09:29:16 +08:00
var tableOutput = new Dictionary<string, int>();
2023-12-29 16:16:05 +08:00
foreach (var record in records)
{
await output.WriteRecordAsync(record);
2024-01-29 09:29:16 +08:00
tableOutput.AddOrUpdate(record.TableName, 1, (_, v) => v + 1);
2023-12-29 16:16:05 +08:00
}
2024-01-29 09:29:16 +08:00
2024-01-16 15:35:54 +08:00
await output.FlushAsync(_outputOptions.Value.MaxAllowedPacket);
2024-01-29 09:29:16 +08:00
foreach (var (key, value) in tableOutput)
{
2024-02-10 17:12:26 +08:00
_context.AddOutput(value);
2024-01-29 09:29:16 +08:00
_context.AddTableOutput(key, value);
}
2024-12-10 14:03:09 +08:00
_logger.LogTrace("输出任务:刷新了 {Count} 条记录", tableOutput.Values.Sum(i => i));
2023-12-29 16:16:05 +08:00
}
}