MES-ETL/MesETL.App/Services/ETL/MySqlDestination.cs

267 lines
9.3 KiB
C#
Raw Normal View History

2024-01-29 09:29:16 +08:00
using System.Text;
using System.Text.RegularExpressions;
2024-02-06 15:37:21 +08:00
using MesETL.App.Const;
using MesETL.App.Helpers;
using MesETL.App.Options;
2024-02-10 17:12:26 +08:00
using MesETL.Shared.Helper;
2023-12-29 16:16:05 +08:00
using Microsoft.Extensions.Logging;
2024-01-16 18:00:23 +08:00
using Microsoft.Extensions.Options;
2023-12-29 16:16:05 +08:00
using MySqlConnector;
namespace MesETL.App.Services.ETL;
2023-12-29 16:16:05 +08:00
2024-01-04 09:00:44 +08:00
/// <summary>
/// Mysql导出
/// </summary>
public partial class MySqlDestination : IDisposable, IAsyncDisposable
2023-12-29 16:16:05 +08:00
{
2024-12-10 14:03:09 +08:00
/// <summary>
/// table => records
/// </summary>
2023-12-29 16:16:05 +08:00
private readonly Dictionary<string, IList<DataRecord>> _recordCache;
private readonly MySqlConnection _conn;
private readonly ILogger _logger;
2024-01-29 09:29:16 +08:00
private readonly IOptions<DatabaseOutputOptions> _options;
private readonly ErrorRecorder.OutputErrorRecorder _outputErrorRecorder;
2024-01-12 16:50:37 +08:00
private readonly ProcessContext _context;
public MySqlDestination(
string connStr,
ILogger logger,
2024-01-29 09:29:16 +08:00
IOptions<DatabaseOutputOptions> options,
ErrorRecorder.OutputErrorRecorder outputErrorRecorder,
ProcessContext context)
2023-12-29 16:16:05 +08:00
{
_conn = new MySqlConnection(connStr);
_conn.Open();
_recordCache = new Dictionary<string, IList<DataRecord>>();
_logger = logger;
2024-01-29 09:29:16 +08:00
_options = options;
_outputErrorRecorder = outputErrorRecorder;
2024-01-12 16:50:37 +08:00
_context = context;
}
2024-01-16 15:35:54 +08:00
2023-12-29 16:16:05 +08:00
public Task WriteRecordAsync(DataRecord record)
{
2024-01-29 09:29:16 +08:00
_recordCache.AddOrUpdate(record.TableName, [record], (_, value) =>
2023-12-29 16:16:05 +08:00
{
value.Add(record);
return value;
});
return Task.CompletedTask;
}
public async Task WriteRecordsAsync(IEnumerable<DataRecord> records)
{
foreach (var record in records)
{
await WriteRecordAsync(record);
}
}
2024-01-16 15:35:54 +08:00
public async Task FlushAsync(int maxAllowPacket)
2023-12-29 16:16:05 +08:00
{
if (_recordCache.Count == 0)
return;
2024-01-12 16:50:37 +08:00
await using var cmd = _conn.CreateCommand();
2024-02-09 19:08:57 +08:00
cmd.CommandTimeout = 0;
2024-01-16 15:35:54 +08:00
2023-12-29 16:16:05 +08:00
try
{
2024-12-10 14:03:09 +08:00
var executionList = GetExecutionList(_recordCache, maxAllowPacket);
foreach (var insertSql in executionList)
2024-01-15 17:26:44 +08:00
{
cmd.CommandText = insertSql;
try
{
await cmd.ExecuteNonQueryAsync();
}
catch (Exception e)
{
2024-01-29 09:29:16 +08:00
_logger.LogError(e, "插入数据库时发生错误, sql: {Sql}", cmd.CommandText.Omit(1000));
_context.AddException(e);
var match = MatchTableName().Match(cmd.CommandText);
if (match is { Success: true, Groups.Count: > 1 })
{
var tableName = match.Groups[1].Value;
2024-01-29 09:29:16 +08:00
await _outputErrorRecorder.LogErrorSqlAsync(cmd.CommandText, tableName, e);
}
2024-01-29 09:29:16 +08:00
else await _outputErrorRecorder.LogErrorSqlAsync(cmd.CommandText, e);
}
2024-01-15 17:26:44 +08:00
}
2023-12-29 16:16:05 +08:00
_recordCache.Clear();
}
catch (Exception e)
{
2024-01-29 09:29:16 +08:00
_logger.LogError(e, "序列化记录时发生错误");
throw;
2023-12-29 16:16:05 +08:00
}
2024-01-04 09:00:44 +08:00
finally
{
2024-01-15 17:26:44 +08:00
await cmd.DisposeAsync();
2024-01-04 09:00:44 +08:00
}
2023-12-29 16:16:05 +08:00
}
[GeneratedRegex("INSERT INTO `([^`]+)`")]
private static partial Regex MatchTableName();
2023-12-29 16:16:05 +08:00
2024-12-10 14:03:09 +08:00
public IEnumerable<string> GetExecutionList(IDictionary<string, IList<DataRecord>> tableRecords, int maxAllowPacket)
2023-12-29 16:16:05 +08:00
{
2024-01-29 09:29:16 +08:00
var sb = new StringBuilder("SET AUTOCOMMIT = 1;\n");
2024-02-15 16:18:50 +08:00
var appendCount = 0;
2023-12-29 16:16:05 +08:00
foreach (var (tableName, records) in tableRecords)
{
if (records.Count == 0)
continue;
2024-01-16 15:35:54 +08:00
var recordIdx = 0;
StartBuild:
var noCommas = true;
2024-12-10 14:03:09 +08:00
// 标准列顺序,插入时的字段需要按照该顺序排列
var headers = records[0].Headers;
2024-01-16 15:35:54 +08:00
// INSERT INTO ... VALUES >>>
sb.Append($"INSERT INTO `{tableName}`(");
2024-12-10 14:03:09 +08:00
for (var i = 0; i < headers.Count; i++)
2023-12-29 16:16:05 +08:00
{
var header = records[0].Headers[i];
2024-01-16 15:35:54 +08:00
sb.Append($"`{header}`");
2024-12-10 14:03:09 +08:00
if (i != headers.Count - 1)
2024-01-16 15:35:54 +08:00
sb.Append(',');
2023-12-29 16:16:05 +08:00
}
2024-01-16 15:35:54 +08:00
sb.Append(") VALUES ");
// ([FIELDS]), >>>
for (;recordIdx < records.Count; recordIdx++)
2023-12-29 16:16:05 +08:00
{
2024-01-16 15:35:54 +08:00
var record = records[recordIdx];
2024-12-10 14:03:09 +08:00
// 数据列校验
if (record.Headers.Count != headers.Count)
{
throw new InvalidOperationException($"数据异常,数据列数量出现冲突,表名:{tableName}");
}
2024-01-16 15:35:54 +08:00
var recordSb = new StringBuilder();
2024-01-12 16:50:37 +08:00
recordSb.Append('(');
2024-12-10 14:03:09 +08:00
for (var idx = 0; idx < headers.Count; idx++)
2023-12-29 16:16:05 +08:00
{
2024-12-10 14:03:09 +08:00
var header = headers[idx];
// TODO: 可进行性能优化
var field = record[header];
2024-01-16 15:35:54 +08:00
// 在这里处理特殊列
#region HandleFields
2024-02-05 16:47:36 +08:00
2024-02-06 16:35:20 +08:00
if (field.Length == 2 && field == ConstVar.MyDumperNull) // MyDumper导出的NULL为'\N''\'不是转义字符)
2024-01-16 15:35:54 +08:00
{
2024-02-06 15:37:21 +08:00
recordSb.Append(ConstVar.Null);
2024-01-16 15:35:54 +08:00
goto Escape;
}
2024-12-10 14:03:09 +08:00
switch (_options.Value.GetColumnType(record.TableName, header))
2024-01-15 17:26:44 +08:00
{
2024-01-16 15:35:54 +08:00
case ColumnType.Text:
2024-01-29 09:29:16 +08:00
if(string.IsNullOrEmpty(field))
recordSb.Append("''");
2024-02-06 15:37:21 +08:00
else if (field == ConstVar.Null)
recordSb.Append(ConstVar.Null);
2024-01-29 09:29:16 +08:00
else recordSb.Append($"_utf8mb4 0x{field}");
2024-01-16 15:35:54 +08:00
break;
case ColumnType.Blob:
if (string.IsNullOrEmpty(field))
recordSb.Append("''");
2024-02-06 15:37:21 +08:00
else if (field == ConstVar.Null)
recordSb.Append(ConstVar.Null);
2024-01-16 15:35:54 +08:00
else recordSb.Append($"0x{field}");
break;
2024-12-10 14:03:09 +08:00
case ColumnType.Json: // Mydumper v0.16.7-5导出的Json为字符串且会将逗号转义需要适配
if(string.IsNullOrEmpty(field))
recordSb.Append(ConstVar.Null);
2024-01-29 09:29:16 +08:00
else if (_options.Value.TreatJsonAsHex)
recordSb.Append($"_utf8mb4 0x{field}");
2024-12-10 14:03:09 +08:00
else recordSb.AppendLine(field.Replace("\\,", ","));
2024-01-18 14:36:36 +08:00
break;
2024-01-16 15:35:54 +08:00
case ColumnType.UnDefine:
default:
recordSb.Append(field);
break;
2024-01-15 17:26:44 +08:00
}
2024-01-16 15:35:54 +08:00
Escape:
#endregion
2024-12-10 14:03:09 +08:00
if (idx != headers.Count - 1)
2024-01-12 16:50:37 +08:00
recordSb.Append(',');
2023-12-29 16:16:05 +08:00
}
2024-01-12 16:50:37 +08:00
recordSb.Append(')');
2023-12-29 16:16:05 +08:00
2024-01-16 15:35:54 +08:00
// 若字符数量即将大于限制则返回SQL清空StringBuilder保留当前记录的索引值然后转到StartBuild标签重新开始一轮INSERT
2024-01-29 09:29:16 +08:00
if (sb.Length + recordSb.Length + 23 > maxAllowPacket)
2024-01-12 16:50:37 +08:00
{
2024-02-15 16:18:50 +08:00
if (appendCount == 0) // 如果单条记录超出maxAllowedPacket
{
sb.Append(recordSb);
_logger.LogWarning("{Table}表单条数据的SQL超出了配置的MaxAllowedPacket字符数{Count}", tableName,
sb.Length + recordSb.Length + 23);
}
TryAddForUpdateSuffix(tableName, sb);
2024-01-29 09:29:16 +08:00
sb.Append(';').AppendLine();
sb.Append("SET AUTOCOMMIT = 1;");
2024-01-16 15:35:54 +08:00
yield return sb.ToString();
sb.Clear();
goto StartBuild;
2024-01-12 16:50:37 +08:00
}
2024-01-16 15:35:54 +08:00
if (!noCommas)
sb.Append(',').AppendLine();
noCommas = false;
sb.Append(recordSb); // StringBuilder.Append(StringBuilder)不会分配多余的内存
2024-02-15 16:18:50 +08:00
appendCount++;
2024-01-12 16:50:37 +08:00
}
2024-01-16 15:35:54 +08:00
2024-02-15 16:18:50 +08:00
TryAddForUpdateSuffix(tableName, sb);
2024-01-16 15:35:54 +08:00
sb.Append(';');
2024-01-29 09:29:16 +08:00
sb.Append("COMMIT;");
2024-01-16 15:35:54 +08:00
yield return sb.ToString();
sb.Clear();
2024-01-15 17:26:44 +08:00
}
2023-12-29 16:16:05 +08:00
}
2024-02-15 16:18:50 +08:00
/// <summary>
/// 数据必须是同一张表
/// </summary>
/// <param name="tableName"></param>
/// <param name="sb"></param>
private void TryAddForUpdateSuffix(string tableName, StringBuilder sb)
{
var forUpdate = _options.Value.TryGetForUpdate(tableName, out var forUpdateSql);
if (forUpdate)
{
sb.AppendLine($"""
AS new
ON DUPLICATE KEY UPDATE
{forUpdateSql}
""");
}
}
2023-12-29 16:16:05 +08:00
public void Dispose()
{
2024-01-04 09:00:44 +08:00
_conn.Close();
2023-12-29 16:16:05 +08:00
_conn.Dispose();
_recordCache.Clear();
2023-12-29 16:16:05 +08:00
}
public async ValueTask DisposeAsync()
{
2024-01-04 09:00:44 +08:00
await _conn.CloseAsync();
2023-12-29 16:16:05 +08:00
await _conn.DisposeAsync();
_recordCache.Clear();
2023-12-29 16:16:05 +08:00
}
}