20 Commits

Author SHA1 Message Date
3dbfaffd05 整理项目结构 2024-11-15 14:10:35 +08:00
CZY
c6d97fdc86 新增清理规则 2024-02-26 09:26:18 +08:00
CZY
f689e1b659 添加配置项 2024-02-15 16:18:50 +08:00
CZY
f6af04bfcd fix cache error 2024-02-10 17:45:13 +08:00
CZY
571805250b Optimize structure 2024-02-10 17:12:26 +08:00
CZY
aa7041962a add gc interval 2024-02-10 00:05:50 +08:00
CZY
73895fbce4 Update 2024-02-09 23:18:34 +08:00
CZY
913c725fe1 update 2024-02-09 13:41:40 +08:00
CZY
41a1dc8a4f Csv解析性能优化 2024-02-08 22:19:59 +08:00
CZY
8db7c71170 优化性能 2024-02-08 17:38:23 +08:00
CZY
20cc78c667 新增非法字段检查 2024-02-06 16:35:20 +08:00
CZY
d58c9d5177 新增非法字段检查 2024-02-06 15:37:21 +08:00
CZY
719cd2d8e7 错误修正 2024-02-05 16:47:36 +08:00
CZY
5cda84797b 修改根命名空间,修改分库配置。 2024-02-02 17:14:41 +08:00
CZY
e0de5d1c58 项目重命名 2024-02-01 15:25:42 +08:00
CZY
70cf0322e4 Update 2024-02-01 13:41:59 +08:00
CZY
083090c62b Update 2024-02-01 10:04:00 +08:00
CZY
4f96b77e55 添加数据修复程序 2024-01-24 14:41:19 +08:00
CZY
8e5efa83f1 修复Linux环境下appsettings.json配置文件可能因控制台工作目录不同而无法读取的问题;
修复数据替换时order_module表的ShardKey无法正确赋值的问题;
旧数据库order_data_block.CompanyID数据有误,在数据替换时重新计算;
2024-01-22 15:49:32 +08:00
CZY
8da3110ecd 添加数据分库;
修复taskManager中异步方法没有正常等待的错误;
删除无用的异常捕获;
2024-01-19 11:17:22 +08:00
98 changed files with 4140 additions and 2105 deletions

View File

@@ -1,16 +0,0 @@

Microsoft Visual Studio Solution File, Format Version 12.00
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ConsoleApp2", "ConsoleApp2\ConsoleApp2.csproj", "{155E4B04-E88C-4BA4-AED2-B13E0A0432B5}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{155E4B04-E88C-4BA4-AED2-B13E0A0432B5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{155E4B04-E88C-4BA4-AED2-B13E0A0432B5}.Debug|Any CPU.Build.0 = Debug|Any CPU
{155E4B04-E88C-4BA4-AED2-B13E0A0432B5}.Release|Any CPU.ActiveCfg = Release|Any CPU
{155E4B04-E88C-4BA4-AED2-B13E0A0432B5}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal

View File

@@ -1,7 +0,0 @@
namespace ConsoleApp2.Const;
public static class ProcessStep
{
public const string Producer = "Producer";
public const string Consumer = "Consumer";
}

View File

@@ -1,90 +0,0 @@
using System.ComponentModel.Design;
namespace ConsoleApp2;
public class DataRecord
{
public static bool TryGetField(DataRecord record, string columnName, out string value)
{
value = string.Empty;
if (record.Headers is null)
throw new InvalidOperationException("Cannot get field when headers of a record have not been set.");
var idx = Array.IndexOf(record.Headers, columnName); //可能可以优化
if (idx == -1)
return false;
value = record.Fields[idx];
return true;
}
public static string GetField(DataRecord record, string columnName)
{
if (record.Headers is null)
throw new InvalidOperationException("Headers have not been set.");
var idx = Array.IndexOf(record.Headers, columnName);
if (idx is -1)
throw new IndexOutOfRangeException("Column name not found in this record.");
return record.Fields[idx];
}
public string[] Fields { get; }
public string[] Headers { get; }
public string TableName { get; }
public string? Database { get; set; }
public int CompanyID { get; set; }
public DataRecord(string[] fields, string tableName, string[] headers, int companyID=0)
{
if (fields.Length != headers.Length)
throw new ArgumentException(
$"The number of fields does not match the number of headers. Expected: {headers.Length} Got: {fields.Length} Fields: {string.Join(',', fields)}",
nameof(fields));
Fields = fields;
TableName = tableName;
Headers = headers;
CompanyID = companyID;
}
public string this[int index]
{
get => Fields[index];
set => Fields[index] = value;
}
public string this[string columnName] => GetField(this, columnName);
public int Count => Fields.Length;
public bool TryGetField(string columnName, out string value) => TryGetField(this, columnName, out value);
public bool SetField(string columnName, string value) => SetField(this, columnName,value);
public string GetCacheKey(string columnName) => GetCacheKey(this, columnName);
public bool SetField( DataRecord record,string columnName,string value)
{
if (record.Headers is null)
throw new InvalidOperationException("Headers have not been set.");
var idx = Array.IndexOf(record.Headers, columnName);
if (idx is -1)
throw new IndexOutOfRangeException("Column name not found in this record.");
record.Fields[idx] = value;
return true;
}
public string GetCacheKey(DataRecord record, string columnName)
{
if (TryGetField(record, columnName, out var value))
{
return $"{TableName}_{value}";
}else
throw new IndexOutOfRangeException($"Column name:{columnName} not found in this record.");
}
}

View File

@@ -1,95 +0,0 @@
using ConsoleApp2.Options;
using System.Text;
using System.Text.RegularExpressions;
namespace ConsoleApp2.Helpers;
public static partial class DumpDataHelper
{
[GeneratedRegex(@"'.+\.dat'")]
private static partial Regex MatchDatFile();
[GeneratedRegex(@"\([^)]*\)")]
private static partial Regex MatchBrackets();
public static string[] GetCsvHeadersFromSqlFileAsync(string txt)
{
var match = MatchBrackets().Match(txt);
return ParseHeader(match.ValueSpan);
}
private static string[] ParseHeader(ReadOnlySpan<char> headerStr)
{
headerStr = headerStr[1..^1];
Span<Range> ranges = stackalloc Range[50];
var count = headerStr.Split(ranges, ',');
var arr = new string[count];
for (var i = 0; i < count; i++)
{
arr[i] = headerStr[ranges[i]].Trim("@`").ToString(); // 消除列名的反引号,如果是变量则消除@
}
return arr;
}
public static string GetTableName(ReadOnlySpan<char> filePath)
{
filePath = filePath[(filePath.LastIndexOf('\\') + 1)..];
var firstDotIdx = -1;
var secondDotIdx = -1;
var times = 0;
for (var i = 0; i < filePath.Length; i++)
{
if (filePath[i] == '.')
{
++times;
if(times == 1)
firstDotIdx = i;
if (times == 2)
{
secondDotIdx = i;
break;
}
}
}
return filePath[(firstDotIdx+1)..secondDotIdx].ToString();
}
public static string[] GetCsvFileNamesFromSqlFileAsync(string txt,Regex regex)
{
var matches = regex.Matches(txt);
return matches.Select(match => match.ValueSpan[1..^1].ToString()).ToArray();
}
public static bool CheckHexField(string? str)
{
if (string.IsNullOrWhiteSpace(str))
return false;
if (str.StartsWith('\"'))
return false;
var isAllDigit = true;
foreach (var c in str)
{
if (!char.IsAsciiHexDigit(c))
return false;
if (!char.IsNumber(c))
isAllDigit = false;
}
if (isAllDigit) //避免全数字
return false;
return true;
}
// public static string EliminateEscapeChars(ReadOnlySpan<char> str)
// {
// char[] escapeChars = ['0','\''];
// }
}

View File

@@ -1,31 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApp2.Helpers
{
public static class ValidateConsole
{
public static void ValidateInput<T>(Func<string,bool> converter,string message)
{
Console.Write(message);
string ? input = Console.ReadLine();
while (true)
{
if (!string.IsNullOrEmpty(input))
{
var result = converter(input);
if (result == false)
{
Console.WriteLine($"输入的内容不合法,请重新输入!");
input = Console.ReadLine();
}
else break;
}
break;
}
}
}
}

View File

@@ -1,13 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApp2.HostedServices.Abstractions
{
public interface IDataSource:IDisposable
{
public Task DoEnqueue(Action<DataRecord> action);
}
}

View File

@@ -1,9 +0,0 @@
using ConsoleApp2.Options;
using ConsoleApp2.Services;
namespace ConsoleApp2.HostedServices.Abstractions;
public interface IInputService
{
public Task ExecuteAsync(TasksOptions tasksOptions, DataRecordQueue producerQueue, ProcessContext context, CancellationToken cancellationToken);
}

View File

@@ -1,9 +0,0 @@
using ConsoleApp2.Options;
using ConsoleApp2.Services;
namespace ConsoleApp2.HostedServices.Abstractions;
public interface IOutputService
{
public void ExecuteAsync(TasksOptions tasksOptions, DataRecordQueue consumerQueue, ProcessContext context, CancellationToken cancellationToken);
}

View File

@@ -1,9 +0,0 @@
using ConsoleApp2.Options;
using ConsoleApp2.Services;
namespace ConsoleApp2.HostedServices.Abstractions;
public interface ITransformService
{
public Task ExecuteAsync(TasksOptions tasksOptions, DataRecordQueue producerQueue, DataRecordQueue consumerQueue, ProcessContext context, CancellationToken cancellationToken);
}

View File

@@ -1,66 +0,0 @@
using ConsoleApp2.Const;
using ConsoleApp2.Helpers;
using ConsoleApp2.HostedServices.Abstractions;
using ConsoleApp2.Options;
using ConsoleApp2.Services;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace ConsoleApp2.HostedServices;
/// <summary>
/// 从MyDumper导出的CSV文件中导入表头和数据
/// </summary>
public class InputService : IInputService
{
private readonly ILogger _logger;
private readonly IOptions<DataInputOptions> _dataInputOptions;
private readonly ProcessContext _context;
public InputService(ILogger<InputService> logger,
IOptions<DataInputOptions> dataInputOptions,
ProcessContext context)
{
_logger = logger;
_dataInputOptions = dataInputOptions;
_context = context;
}
public async Task ExecuteAsync(TasksOptions tasksOptions, DataRecordQueue producerQueue, ProcessContext context, CancellationToken cancellationToken)
{
var inputDir = _dataInputOptions.Value.InputDir;
_logger.LogInformation("***** Csv input service start, working dir: {InputDir}, thread id: {ThreadId} *****", inputDir, Environment.CurrentManagedThreadId);
var files = Directory.GetFiles(inputDir);
if (files.Length == 0)
{
_logger.LogInformation("No source files found in {InputDir}", inputDir);
return;
}
var count = 0;
foreach (var tableName in tasksOptions.TableInfoConfig.Keys)
{
_logger.LogInformation("Working table: {tableName}", tableName);
var source = _dataInputOptions.Value.CreateSource?.Invoke(tableName);
if (source != null)
{
await source.DoEnqueue((record) =>
{
_context.AddInput();
producerQueue.Enqueue(record);
count++;
});
}
if (!_context.GetExceptions().IsEmpty)
{
_logger.LogInformation("***** Csv input service is canceled *****");
return;
}
_logger.LogInformation("table:'{tableName}' input completed", tableName);
}
context.CompleteInput();
_logger.LogInformation("***** Csv input service completed *****");
}
}

View File

@@ -1,125 +0,0 @@
using ConsoleApp2.HostedServices.Abstractions;
using ConsoleApp2.Options;
using ConsoleApp2.Services;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
namespace ConsoleApp2.HostedServices;
public class MainHostedService : IHostedService
{
private readonly ILogger _logger;
private readonly IInputService _input;
private readonly ITransformService _transform;
private readonly IOutputService _output;
private readonly ProcessContext _context;
private readonly Timer? _bigTableTimer=null;
private readonly Timer? _smallTableTimer=null;
public MainHostedService(ILogger<MainHostedService> logger, IInputService input, ITransformService transform, IOutputService output, ProcessContext context)
{
_logger = logger;
_input = input;
_transform = transform;
_output = output;
_context = context;
}
public Task StartAsync(CancellationToken cancellationToken)
{
var taskFun = (TasksOptions taskOp, DataRecordQueue producerQueue, DataRecordQueue consumerQueue, ProcessContext context,Timer? timer) =>
{
Task.Factory.StartNew(async () =>
{
try
{
await _input.ExecuteAsync(taskOp, producerQueue, context, cancellationToken);
}
catch (Exception ex)
{
_context.AddException(ex);
_logger.LogError("Exception occurred on inputService:{Message},{StackTrace}", ex.Message, ex.StackTrace);
}
});
Task.Factory.StartNew(async () =>
{
try
{
await _transform.ExecuteAsync(taskOp, producerQueue, consumerQueue, context, cancellationToken);
}
catch (Exception ex)
{
_context.AddException(ex);
_logger.LogError("Exception occurred on transformService:{Message},{StackTrace}", ex.Message, ex.StackTrace);
}
});
Task.Factory.StartNew(() =>
{
try
{
timer = new Timer((object? state) =>
{
_output.ExecuteAsync(taskOp, consumerQueue, context, cancellationToken);
},null, TimeSpan.Zero,TimeSpan.FromSeconds(0.5));
}
catch (Exception ex)
{
_context.AddException(ex);
_logger.LogError("Exception occurred on outputService:{Message},{StackTrace}", ex.Message, ex.StackTrace);
}
});
};
var bigTablesDic = new Dictionary<string, TableInfo>
{
{"order",new TableInfo{SimulaRowCount=5019216 }},
{"order_block_plan",new TableInfo{SimulaRowCount=2725553 }},//CreateTime < 202301的删除
{"order_block_plan_result",new TableInfo{SimulaRowCount=1174096 }},
{"order_box_block",new TableInfo{SimulaRowCount=29755672 }},
{"order_item",new TableInfo{SimulaRowCount=1345520079 }},
{"simple_plan_order",new TableInfo{SimulaRowCount=351470 }},//CreateTime < 202301的删除
};
var bigTableContext = new ProcessContext();
var bigTableOptions = new TasksOptions { TableInfoConfig = bigTablesDic, OutPutOptions = new OutPutOptions { FlushCount = 20000, OutPutTaskCount = 2 } };
taskFun(bigTableOptions, new DataRecordQueue(), new DataRecordQueue(), bigTableContext,_bigTableTimer);
var smallTablesDic = new Dictionary<string, TableInfo>
{
{"machine",new TableInfo{SimulaRowCount=14655 }},
{"order_data_block",new TableInfo{SimulaRowCount=731800334 }},
{"order_data_goods",new TableInfo{SimulaRowCount=25803671 }},
{"order_data_parts",new TableInfo{SimulaRowCount=468517543 }},
{"order_module",new TableInfo{SimulaRowCount=103325385 }},
{"order_module_extra",new TableInfo{SimulaRowCount=54361321 }},
{"order_module_item",new TableInfo{SimulaRowCount=69173339 }},
{"order_package",new TableInfo{SimulaRowCount=16196195 }},
{"order_process",new TableInfo{SimulaRowCount=3892685 }},//orderNo < 202301的
{"order_process_step",new TableInfo{SimulaRowCount=8050349 }},//orderNo < 202301的删除
{"order_process_step_item",new TableInfo{SimulaRowCount=14538058 }},//orderNo < 202301的删除
{"order_scrap_board",new TableInfo{SimulaRowCount=123998 }},
{"process_group",new TableInfo{SimulaRowCount=1253 }},
{"process_info",new TableInfo{SimulaRowCount=7839 }},
{"process_item_exp",new TableInfo{SimulaRowCount=28 }},
{"process_schdule_capacity",new TableInfo{SimulaRowCount=39736 }},
{"process_step_efficiency",new TableInfo{SimulaRowCount=8 }},
{"report_template",new TableInfo{SimulaRowCount=7337 }},
{"simple_package",new TableInfo{SimulaRowCount=130436 }},//orderNo < 202301的删除
{"sys_config",new TableInfo{SimulaRowCount=2296 }},
{"work_calendar",new TableInfo{SimulaRowCount=11 }},
{"work_shift",new TableInfo{SimulaRowCount=59 }},
{"work_time",new TableInfo{SimulaRowCount=62 }},
};
var smallTableContext = new ProcessContext();
taskFun(new TasksOptions { TableInfoConfig = smallTablesDic, OutPutOptions = new OutPutOptions { FlushCount = 20000, OutPutTaskCount = 4 } },
new DataRecordQueue(), new DataRecordQueue(), smallTableContext,_smallTableTimer);
return Task.CompletedTask;
}
public Task StopAsync(CancellationToken cancellationToken)
{
throw new NotImplementedException();
}
}

View File

@@ -1,94 +0,0 @@

using ConsoleApp2.HostedServices.Abstractions;
using ConsoleApp2.Options;
using ConsoleApp2.Services;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace ConsoleApp2.HostedServices;
/// <summary>
/// 数据导出服务将数据导出至MySql服务
/// </summary>
public class OutputService : IOutputService
{
private readonly ILogger _logger;
private readonly IOptions<DatabaseOutputOptions> _outputOptions;
private readonly IOptions<DataTransformOptions> _transformOptions;
private readonly ProcessContext _context;
private readonly TaskManager _taskManager;
private readonly ErrorRecorder _errorRecorder;
public OutputService(ILogger<OutputService> logger,
IOptions<DatabaseOutputOptions> outputOptions,
ProcessContext context,
TaskManager taskManager,
IOptions<DataTransformOptions> transformOptions,
ErrorRecorder errorRecorder)
{
_logger = logger;
_outputOptions = outputOptions;
_context = context;
_taskManager = taskManager;
_transformOptions = transformOptions;
_errorRecorder = errorRecorder;
}
private int _runingTaskCount;
public int RuningTaskCount
{
get => _runingTaskCount;
}
public void DoTask() => Interlocked.Increment(ref _runingTaskCount);
public void FinishTask() => Interlocked.Decrement(ref _runingTaskCount);
public void ExecuteAsync(TasksOptions tasksOptions, DataRecordQueue consumerQueue, ProcessContext context, CancellationToken cancellationToken)
{
if (context.IsTransformCompleted == false && consumerQueue.Count < tasksOptions.OutPutOptions.FlushCount) return;
if (RuningTaskCount >= tasksOptions.OutPutOptions.OutPutTaskCount ) return;
var records = new List<DataRecord>();
for (int i = 0; i < tasksOptions.OutPutOptions.FlushCount; i++)
{
if (consumerQueue.TryDequeue(out var record)) records.Add(record);
else break;
}
if (records.Count > 0)
{
ThreadPool.QueueUserWorkItem(async (queueState) =>
{
DoTask();
await FlushAsync(records);
FinishTask();
});
}
}
private async Task FlushAsync(IEnumerable<DataRecord> records)
{
var count = 0;
await using var output = new MySqlDestination(
_outputOptions.Value.ConnectionString ?? throw new InvalidOperationException("Connection string is required"),
_logger, _context, _transformOptions, _errorRecorder);
//if (records == null || records.Count() == 0) return;
//var dbName = $"cferp_test_1";
//if (records != null && records.Count() > 0)
//{
// dbName = $"cferp_test_{records.FirstOrDefault()?.CompanyID}";
//}
//await using var output = new MySqlDestination(new MySqlConnectionStringBuilder
//{
// Server = "127.0.0.1",
// Port = 34309,
// Database = dbName,
// UserID = "root",
// Password = "123456",
// MaximumPoolSize = 50,
//}.ConnectionString, _logger,true);
foreach (var record in records)
{
await output.WriteRecordAsync(record);
count++;
}
await output.FlushAsync(_outputOptions.Value.MaxAllowedPacket);
_context.AddOutput(count);
}
}

View File

@@ -1,88 +0,0 @@
using ConsoleApp2.Const;
using ConsoleApp2.HostedServices.Abstractions;
using ConsoleApp2.Options;
using ConsoleApp2.Services;
using Microsoft.Extensions.Caching.Distributed;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StackExchange.Redis;
namespace ConsoleApp2.HostedServices;
/// <summary>
/// 数据处理服务,对导入后的数据进行处理
/// </summary>
public class TransformService : ITransformService
{
private readonly ILogger _logger;
private readonly IOptions<DataTransformOptions> _options;
private readonly ProcessContext _context;
private readonly IDistributedCache _cache;
public TransformService(ILogger<TransformService> logger,
IOptions<DataTransformOptions> options,
ProcessContext context,
IDistributedCache cache)
{
_logger = logger;
_options = options;
_context = context;
_cache = cache;
}
public async Task ExecuteAsync(TasksOptions tasksOptions, DataRecordQueue producerQueue, DataRecordQueue consumerQueue, ProcessContext context, CancellationToken cancellationToken)
{
_logger.LogInformation("***** Data transform service started, thread id: {ThreadId} *****", Environment.CurrentManagedThreadId);
while ((!context.IsInputCompleted || producerQueue.Count > 0))
{
if (_context.GetExceptions().Count > 0)
{
_logger.LogInformation("***** Csv transform service is canceled *****");
return;
}
if (!producerQueue.TryDequeue(out var record)) continue;
//过滤不要的record
if (_options.Value.RecordFilter != null)
{
var result = await _options.Value.RecordFilter.Invoke(record, _cache);
if (result == false) continue;
}
record.Database = _options.Value.DatabaseFilter?.Invoke(record);
//修改record
_options.Value.RecordModify?.Invoke(record);
//缓存record
if (_options.Value.RecordCache != null)
{
await _options.Value.RecordCache.Invoke(record, _cache);
}
//替换record
if (_options.Value.RecordReplace != null)
{
var result = await _options.Value.RecordReplace.Invoke(record, _cache);
if (result != null)
{
record = result;
}
}
consumerQueue.Enqueue(record);
_context.AddTransform();
//数据增加
var addRecords = _options.Value.RecordAdd?.Invoke(record);
if (addRecords != null && addRecords.Count > 0)
{
foreach (var rc in addRecords)
{
consumerQueue.Enqueue(rc);
_context.AddTransform();
}
}
}
context.CompleteTransform();
_logger.LogInformation("***** Data transformation service completed *****");
}
}

View File

@@ -1,38 +0,0 @@
using ConsoleApp2.Const;
using ConsoleApp2.HostedServices.Abstractions;
using ConsoleApp2.Options;
using ConsoleApp2.Services;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
namespace ConsoleApp2.HostedServices;
// 空输出服务,测试用
public class VoidOutputService : IOutputService
{
private readonly ILogger _logger;
private readonly DataRecordQueue _consumerQueue;
private readonly ProcessContext _context;
public VoidOutputService([FromKeyedServices(ProcessStep.Consumer)] DataRecordQueue consumerQueue,
ProcessContext context, ILogger<VoidOutputService> logger)
{
_consumerQueue = consumerQueue;
_context = context;
_logger = logger;
}
public void ExecuteAsync(TasksOptions tasksOptions, DataRecordQueue consumerQueue, ProcessContext context, CancellationToken cancellationToken)
{
_logger.LogInformation("***** Void output service started, thread id: {ThreadId} *****", Environment.CurrentManagedThreadId);
while (!_context.IsTransformCompleted || _consumerQueue.Count > 0)
{
if (_consumerQueue.TryDequeue(out var record))
_context.AddOutput();
}
_context.CompleteOutput();
_logger.LogInformation("***** Void output service completed *****");
}
}

View File

@@ -1,18 +0,0 @@
using System;
using System.ComponentModel;
using System.Configuration;
namespace ConsoleApp2.Options
{
public class CommandOptions
{
public string InputDir { get; set; } = "./MyDumper";
public bool IsMock { get; set; } = false;
public string NoFilterTables { get; set; }="";//不需要过滤的表列表
public bool Isutf8mb4 { get; set; } = true;
public short OldestShardKey { get; set; } = 23010;
public string OldestTime { get; set; } = "202301";
}
}

View File

@@ -1,19 +0,0 @@
namespace ConsoleApp2.Options;
public class CsvOptions
{
/// <summary>
/// MyDumper导出的CSV文件目录
/// </summary>
//public string InputDir { get; set; } = "./";
/// <summary>
/// 字符串的包围符号,默认为双引号"
/// </summary>
public char QuoteChar { get; set; } = '"';
/// <summary>
/// 每个字段的分割符,默认逗号,
/// </summary>
public string Delimiter { get; set; } = ",";
}

View File

@@ -1,18 +0,0 @@
using ConsoleApp2.Services;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Reflection.PortableExecutable;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApp2.Options
{
public enum InputFileType { CSV, JWT, JSV }
public class DataInputOptions
{
public string InputDir { get; set; } = "./";
public Func<string, CsvSource>? CreateSource { get; set; }
}
}

View File

@@ -1,36 +0,0 @@
using Microsoft.Extensions.Caching.Distributed;
using StackExchange.Redis;
namespace ConsoleApp2.Options;
public enum ColumnType
{
Blob,
Text,
Json,
UnDefine,
}
public class DataTransformOptions
{
public Func<DataRecord, string>? DatabaseFilter { get; set; }
public Func<string, string>? TransformBinary { get; set; }//Binary转字符串方法
public Func<DataRecord, IDistributedCache, Task<bool>>? RecordFilter { get; set; }//数据过滤方法
public Action<DataRecord>? RecordModify { get; set; }//数据修改
public Func<DataRecord, IDistributedCache, Task<DataRecord?>>? RecordReplace { get; set; }//数据替换
public Func<DataRecord, IList<DataRecord>?>? RecordAdd { get; set; }//数据替换
public Func<DataRecord, IDistributedCache, Task>? RecordCache { get; set; }//数据缓存
/// <summary>
/// 配置导入数据的特殊列
/// </summary>
public Dictionary<string, ColumnType> ColumnTypeConfig { get; set; } = new(); // "table.column" -> type
public ColumnType GetColumnType(string table, string column)
{
return ColumnTypeConfig.GetValueOrDefault($"{table}.{column}", ColumnType.UnDefine);
}
}

View File

@@ -1,10 +0,0 @@
namespace ConsoleApp2.Options;
public class DatabaseOutputOptions
{
/// <summary>
/// 数据库连接字符串
/// </summary>
public string? ConnectionString { get; set; }
public int MaxAllowedPacket { get; set; } = 64*1024*1024;
}

View File

@@ -1,24 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApp2.Options
{
public class TableInfo
{
public long SimulaRowCount { get; set; }//模拟的记录条数
}
public class TasksOptions
{
public Dictionary<string, TableInfo> TableInfoConfig { get; set; } = new();
public int TransformTaskCount { get; set; } = 1;
public OutPutOptions OutPutOptions { get; set; }=new();
}
public class OutPutOptions
{
public int FlushCount { get; set; } = 10000;
public int OutPutTaskCount { get; set; } = 1;
}
}

View File

@@ -1,460 +0,0 @@
using ConsoleApp2;
using ConsoleApp2.Const;
using ConsoleApp2.HostedServices;
using ConsoleApp2.HostedServices.Abstractions;
using ConsoleApp2.Options;
using ConsoleApp2.Services;
using Microsoft.Extensions.Caching.StackExchangeRedis;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using MySqlConnector;
using Serilog;
using Microsoft.Extensions.Caching.Distributed;
using Serilog.Events;
await RunProgram();
return;
async Task RunProgram()
{
//var inputDir= "D:\\MyDumper";
//ValidateConsole.ValidateInput<string>((_inputDir) =>
//{
// if (Directory.Exists(_inputDir))
// {
// inputDir = _inputDir;
// return true;
// }
// else return false;
//}, "请输入读取csv文件的目录(默认为当前目录下MyDumper文件夹):");
//var maxTask = 16;
//ValidateConsole.ValidateInput<string>((_inputDir) =>
//{
// _ = int.TryParse(_inputDir.ToString(), out var _taskCount);
// if (_taskCount > 0) {
// maxTask = _taskCount;
// return true;
// }
// else return false;
//}, "请输入执行输出的线程数量(默认为16):");
//var flushCount = 2_0000;
//ValidateConsole.ValidateInput<string>((_inputDir) =>
//{
// _ = int.TryParse(_inputDir.ToString(), out var _flashCount);
// if (_flashCount > 0)
// {
// flushCount = _flashCount;
// return true;
// } else return false;
//}, "请输入单次插入的行数(默认为20000):");
ThreadPool.SetMaxThreads(8, 4);
var host = Host.CreateApplicationBuilder(args);
var commandOptions = host.Configuration.GetSection("CmdOptions").Get<CommandOptions>() ?? new CommandOptions();
Console.WriteLine($"InputDir:{commandOptions?.InputDir}");
if (commandOptions == null) throw new ArgumentNullException("commandOptions is null");
var oldestTime = DateTime.ParseExact(commandOptions.OldestTime, "yyyyMM", System.Globalization.DateTimeFormatInfo.InvariantInfo);
host.Services.Configure<CsvOptions>(option =>
{
option.Delimiter = ",";
option.QuoteChar = '"';
});
host.Services.Configure<DataInputOptions>(options =>
{
options.InputDir = commandOptions.InputDir;
var _csvOptions = new CsvOptions { Delimiter = ",", QuoteChar = '"' };
options.CreateSource = (string tableName) =>
{
var source = new ZstSource(commandOptions.InputDir, tableName, _csvOptions.Delimiter, _csvOptions.QuoteChar);
return source;
};
});
host.Services.Configure<DataTransformOptions>(options =>
{
if (commandOptions.IsMock) return;
options.DatabaseFilter = record => "cferp_test";
options.TransformBinary = field => commandOptions != null && commandOptions.Isutf8mb4 ? $"_utf8mb4 0x{field}" : $"0x{field}";
var noFilterTables = commandOptions.NoFilterTables.Split(",");
//数据过滤
options.RecordFilter = async (record, cache) =>
{
if (noFilterTables.Contains(record.TableName)) return true;
if (record.TryGetField("ShardKey", out var skStr))
{
short.TryParse(skStr, out var sk);
if (sk < commandOptions.OldestShardKey) return false;
}
if (record.TryGetField("CreateTime", out var createTime))
{
_ = DateTime.TryParse(createTime.Replace("\"", ""), out var time);
if (time < oldestTime) return false;
}
if (record.TryGetField("OrderNo", out var orderNo))
{
try
{
var yearMonth = orderNo.Substring(0, 6);
var dt = DateTime.ParseExact(yearMonth, "yyyyMM", System.Globalization.DateTimeFormatInfo.InvariantInfo);
if (dt < oldestTime) return false;
}
catch (Exception)
{
return false;//订单号转换失败,跳过
}
}
if (record.TableName == "order_package")
{
if (record.TryGetField("PakageNo", out var pkNo))
{
if (pkNo.Length <= 2) return false;
}
}
if (record.TableName == "order_block_plan")
{
if (record.TryGetField("OrderNos", out var nos))
{
if (nos.Length <= 2) return false;
}
}
if (record.TableName == "order_process_step" || record.TableName == "order_process_step_item")
{
//如果缓存中不存在OrderProcessID,则丢弃
if(record.TryGetField("OrderProcessID",out string orderProcessID))
{
var value = await cache.GetStringAsync($"order_process_{orderProcessID}");
if (string.IsNullOrEmpty(value)) return false;
}
}
if (record.TableName == "order_block_plan_result" )
{
//如果缓存中不存在ID,则丢弃(ID 对应order_block_plan中的ID)
if (record.TryGetField("ID", out string id))
{
var value = await cache.GetStringAsync($"order_block_plan_{id}");
if (string.IsNullOrEmpty(value)) return false;
}
}
return true;
};
//数据修改
options.RecordModify = (record) =>
{
if (record.TableName == "order_block_plan")
{
if (record.TryGetField("OrderNos", out var nos))
{
if (nos.Length <= 2) record.SetField("OrderNos", "");
}
}
if (record.TableName == "order_process")//修改order_process.NextStepID的默认值为0
{
if (record.TryGetField("NextStepID", out var idStr))
{
if (idStr == "\\N")
{
record.SetField("NextStepID", "0");
}
}
}
};
//数据缓存
options.RecordCache = async (record, cache) =>
{
if (record.TableName == "order")
{
if (record.TryGetField("OrderNo", out var orderNo))
{
if (record.TryGetField("CompanyID", out var companyid))
{
await cache.SetStringAsync(record.GetCacheKey("OrderNo"), companyid);
}
}
}
if (record.TableName == "order_process")
{
if (record.TryGetField("OrderNo", out var orderNo))
{
var yearMonth = orderNo.Substring(2, 4);
var sk = yearMonth + "0";
if( record.TryGetField("ID", out var id))
{
await cache.SetStringAsync(record.GetCacheKey("ID"), sk);
}
}
}
if (record.TableName == "order_block_plan")
{
if (record.TryGetField("CompanyID", out var companyid))
{
record.TryGetField("ID", out var id);
await cache.SetStringAsync(record.GetCacheKey("ID"), companyid);
}
}
};
//数据替换
options.RecordReplace = async (record, cache) =>
{
//删除数据源里simple_plan_order.ProcessState 字段和值
if (record.TableName == "simple_plan_order")//修改order_process.NextStepID的默认值为0
{
var nextStepIdIndex = Array.IndexOf(record.Headers, "ProcessState");
if (nextStepIdIndex > -1)
{
var headers = record.Headers.Where(t => t != "ProcessState").ToArray();
var fs = record.Fields.ToList();
fs.RemoveAt(nextStepIdIndex);
var fields = fs.ToArray();
return new DataRecord(fields, record.TableName, headers, record.CompanyID);
}
}
if (record.TableName == "order")//修改order_process.NextStepID的默认值为0
{
var nextStepIdIndex = Array.IndexOf(record.Headers, "IsBatch");
if (nextStepIdIndex > -1)
{
var headers = record.Headers.Where(t => t != "IsBatch").ToArray();
var fs = record.Fields.ToList();
fs.RemoveAt(nextStepIdIndex);
var fields = fs.ToArray();
return new DataRecord(fields, record.TableName, headers, record.CompanyID);
}
}
if (record.TableName == "order_block_plan_result")//修改order_process.NextStepID的默认值为0
{
if (record.TryGetField("ID", out var id))
{
var headers = new List<string>(record.Headers);
var fields =new List<string>(record.Fields);
headers.Add("CompanyID");
var companyidResult =await cache.GetStringAsync($"order_block_plan_{id}");
_ = int.TryParse(companyidResult, out var companyid);
fields.Add(companyid.ToString());
return new DataRecord(fields.ToArray(), record.TableName, headers.ToArray(), companyid);
}
}
if(record.TableName == "order_box_block")
{
if (!record.TryGetField("CompanyID", out var companyid))
{
if (record.TryGetField("OrderNo", out var orderNo))
{
var headers = new List<string>(record.Headers);
var fields = new List<string>(record.Fields);
headers.Add("CompanyID");
var companyidResult = await cache.GetStringAsync($"order_{orderNo}");
_ = int.TryParse(companyidResult, out var cpid);
fields.Add(cpid.ToString());
return new DataRecord(fields.ToArray(), record.TableName, headers.ToArray(), cpid);
}
}
}
if (record.TableName == "order_module")
{
if (record.TryGetField("ViewFileName",out var value))
{
var index=Array.IndexOf(record.Headers, "ViewFileName");
var headers = new List<string>(record.Headers);
headers.RemoveAt(index);
var fields = new List<string>(record.Fields);
fields.RemoveAt(index);
return new DataRecord(fields.ToArray(), record.TableName, headers.ToArray(), record.CompanyID);
}
}
if (record.TableName == "order_process")
{
if (!record.TryGetField("ShardKey", out var skStr))
{
if(record.TryGetField("OrderNo", out var orderNo))
{
var yearMonth = orderNo.Substring(2, 4);
var sk = yearMonth + "0";
var headers = new List<string>(record.Headers);
var fields = new List<string>(record.Fields);
headers.Add("ShardKey");
fields.Add(sk);
return new DataRecord(fields.ToArray(), record.TableName, headers.ToArray(), record.CompanyID);
}
}
}
if(record.TableName == "order_process_step"|| record.TableName == "order_process_step_item")
{
if (!record.TryGetField("ShardKey",out var sk))
{
if (record.TryGetField("OrderProcessID",out var processID))
{
var shardKey =await cache.GetStringAsync($"order_process_{processID}");
var headers = new List<string>(record.Headers);
var fields = new List<string>(record.Fields);
headers.Add("ShardKey");
fields.Add(shardKey??"0");
return new DataRecord(fields.ToArray(), record.TableName, headers.ToArray(), record.CompanyID);
}
}
}
if(record.TableName == "order_moudle")
{
if (!record.TryGetField("ShardKey", out var skStr))
{
if (record.TryGetField("OrderNo", out var orderNo))
{
var yearMonth = orderNo.Substring(2, 4);
var sk = yearMonth + "0";
var headers = new List<string>(record.Headers);
var fields = new List<string>(record.Fields);
headers.Add("ShardKey");
fields.Add(sk);
return new DataRecord(fields.ToArray(), record.TableName, headers.ToArray(), record.CompanyID);
}
}
}
return null;
};
//数据生成
options.RecordAdd = (record) =>
{
var resultList = new List<DataRecord>();
if (record.TableName == "order_item")
{
record.TryGetField("ID", out var itemID);
record.TryGetField("ShardKey", out var shardKey);
record.TryGetField("PlanID", out var planID);
record.TryGetField("PackageID", out var packageID);
record.TryGetField("CompanyID", out var companyID);
_=int.TryParse(planID, out var pid);
if (pid > 0)
{
resultList.Add(new DataRecord(new[] { itemID, shardKey, planID, companyID },
"order_block_plan_item",
new[] { "ItemID", "ShardKey", "PlanID", "CompanyID" }));
}
_ = int.TryParse(packageID, out var pkid);
if(pkid > 0)
{
resultList.Add(new DataRecord(new[] { itemID, shardKey, packageID, companyID },
"order_package_item",
new[] { "ItemID", "ShardKey", "PackageID", "CompanyID" }
));
}
}
return resultList;
};
options.ColumnTypeConfig = new()
{
{ "simple_plan_order.PlaceData", ColumnType.Blob },
{ "order_block_plan_result.PlaceData", ColumnType.Blob },
{ "order_box_block.Data", ColumnType.Blob },
{ "order_data_goods.ExtraProp", ColumnType.Text },
{ "order_module_extra.JsonStr", ColumnType.Text },
{ "process_info.Users", ColumnType.Text },
{ "order_process_schdule.CustomOrderNo", ColumnType.Text },
{ "order_process_schdule.OrderProcessStepName", ColumnType.Text },
{ "order_process_schdule.AreaName", ColumnType.Text },
{ "order_process_schdule.ConsigneeAddress", ColumnType.Text },
{ "order_process_schdule.ConsigneePhone", ColumnType.Text },
{ "report_source.Sql", ColumnType.Text },
{ "report_source.KeyValue", ColumnType.Text },
{ "report_source.Setting", ColumnType.Text },
{ "order_data_block.RemarkJson", ColumnType.Text },
{ "order_patch_detail.BlockDetail", ColumnType.Text },
{ "order_scrap_board.OutLineJson", ColumnType.Text },
{ "simple_package.Items", ColumnType.Text },
{ "order_batch_pack_config.Setting", ColumnType.Text },
{ "machine.Settings", ColumnType.Text },
{ "sys_config.Value", ColumnType.Text },
{ "sys_config.JsonStr", ColumnType.Text },
{ "process_item_exp.ItemJson", ColumnType.Text },
{ "report_template.Template", ColumnType.Text },
{ "report_template.SourceConfig", ColumnType.Text },
{ "order_block_plan.OrderNos", ColumnType.Json },
{ "order_block_plan.BlockInfo", ColumnType.Text },
};
});
host.Services.Configure<DatabaseOutputOptions>(options =>
{
//options.ConnectionString = new MySqlConnectionStringBuilder
//{
// Server = "127.0.0.1",
// Port = 33309,
// Database = "cferp_test",
// UserID = "root",
// Password = "123456",
// MaximumPoolSize = 50, // 这个值应当小于 max_connections
//}.ConnectionString;
options.ConnectionString = new MySqlConnectionStringBuilder(host.Configuration.GetConnectionString("MySqlMaster")??"")
{
CharacterSet = "utf8",
AllowUserVariables = true,
IgnoreCommandTransaction = true,
TreatTinyAsBoolean = false,
MaximumPoolSize = 50,
SslMode = MySqlSslMode.None,
}.ConnectionString;
});
host.Services.AddLogging(builder =>
{
builder.ClearProviders();
builder.AddSerilog(new LoggerConfiguration()
.WriteTo.Console()
.WriteTo.File("./log/error.log", restrictedToMinimumLevel:LogEventLevel.Error)
// .WriteTo.File("./log/info.log", restrictedToMinimumLevel:LogEventLevel.Information) //性能考虑暂不使用
.CreateLogger()
);
});
host.Services.AddScoped<ProcessContext>();
host.Services.AddKeyedSingleton<DataRecordQueue>(ProcessStep.Producer);
host.Services.AddKeyedSingleton<DataRecordQueue>(ProcessStep.Consumer);
host.Services.AddTransient<TaskManager>();
host.Services.AddSingleton<ErrorRecorder>();
host.Services.AddHostedService<MainHostedService>();
host.Services.AddHostedService<TaskMonitorService>();
if(commandOptions.IsMock)host.Services.AddSingleton<IInputService,InputService>();
else host.Services.AddSingleton<IInputService, InputService>();
host.Services.AddSingleton<ITransformService, TransformService>();
host.Services.AddSingleton<IOutputService, OutputService>();
var redisOptions = host.Configuration.GetSection("RedisCacheOptions").Get<RedisCacheOptions>() ?? new RedisCacheOptions();
host.Services.AddStackExchangeRedisCache(options =>
{
options.Configuration = redisOptions.Configuration;
options.InstanceName = redisOptions.InstanceName;
});
var app = host.Build();
await app.RunAsync();
}

View File

@@ -1,49 +0,0 @@
## 说明
使用该程序来对MyDumper导出的CSV数据进行读取转换然后导出到其他数据库中。
1. 用MyDumper从数据库导出CSV数据
使用MyDumper Docker镜像
```sh
docker run --rm --net=host -v D:/Dump:/home/backup mydumper/mydumper:v0.15.2-6 mydumper `
-h 127.0.0.1 -P 33306 -u root -p 123456 `
-B cferp_test --no-schemas --csv --hex-blob `
-o /home/backup
```
将挂载卷,数据库连接和输出目录替换
不导出数据库结构(--no-schemas)
导出完的目录下应当包含.sql文件以及.dat文件
2. 在Program.cs中修改`CsvOptions`配置
```cs
host.Services.Configure<CsvOptions>(option =>
{
option.Delimiter = ",";
option.QuoteChar = '"';
option.InputDir = "D:/Dump/Test";
});
```
将`option.InputDir`配置为MyDumper导出的数据目录
3. 在Program.cs中修改`DatabaseOutputOptions`配置
```cs
host.Services.Configure<DatabaseOutputOptions>(options =>
{
options.ConnectionString = new MySqlConnectionStringBuilder
{
Server = "127.0.0.1",
Port = 33306,
Database = "cferp_test_1",
UserID = "root",
Password = "123456",
MaximumPoolSize = 50,
}.ConnectionString;
options.MaxTask = 16;
options.FlushCount = 200;
});
```
将`MySqlConnectionStringBuilder`的属性修改为程序要导出至的数据库
> 后续将这些配置通过命令行传递
4. 运行程序
> 注意,测试数据库`cferp_test`中的`order_process_step`表存在外键,如果要导出到和测试库同结构的数据库,记得先把外键删除。

View File

@@ -1,183 +0,0 @@
using System.Reflection.PortableExecutable;
using System.Text;
using System.Text.RegularExpressions;
using ConsoleApp2.Helpers;
using ConsoleApp2.HostedServices.Abstractions;
using Microsoft.Extensions.Logging;
namespace ConsoleApp2.Services;
/// <summary>
/// CSV文件读取
/// </summary>
public class CsvSource:IDataSource
{
protected readonly string _inputDir;
//protected readonly StreamReader _reader;
private readonly ILogger? _logger;
protected readonly string _tableName;
protected string _sqlFilePath=string.Empty;
protected readonly string? _sqlFileText;
protected string[] headers=Array.Empty<string>();
protected string[] csvFiles = Array.Empty<string>();
public string? CurrentRaw { get; protected set; }
public string Delimiter { get; private set; }
public char QuoteChar { get; private set; }
public CsvSource(string inputDir,string tableName,string delimiter = ",", char quoteChar = '"',
ILogger? logger = null)
{
_inputDir = inputDir;
_tableName = tableName;
_logger = logger;
Delimiter = delimiter;
QuoteChar = quoteChar;
}
public string[] ParseRow(string row, char quoteChar, string delimiter)
{
var span = row.AsSpan();
var result = new List<string>();
if (span.Length == 0)
throw new ArgumentException("The row is empty", nameof(row));
var isInQuote = span[0] == quoteChar;
var start = 0;
for (var i = 1; i < span.Length; i++)
{
if (span[i] == quoteChar)
{
isInQuote = !isInQuote;
}
// delimiter需要足够复杂
else if (/*!isInQuote && */span.Length > i + delimiter.Length && span[i..(i + delimiter.Length)].Equals(delimiter, StringComparison.CurrentCulture)) // field matched
{
string field;
if (span[start] == quoteChar && span[i - 1] == quoteChar) // enclosed by quoteChar
field = span[(start + 1)..(i - 1)].ToString(); // escape quoteChar
else
field = span[start..i].ToString();
start = i + delimiter.Length;
if (field == "\\N")
field = "NULL";
result.Add(field);
continue;
}
}
result.Add(span[start..].ToString());
for (var i = 0; i < result.Count; i++)
{
var field = result[i];
if (DumpDataHelper.CheckHexField(field) && StringExtensions.CheckJsonHex(field))
{
result[i] = StringExtensions.FromHex(field);
}
}
return result.ToArray();
}
public string[] ParseRow2(ReadOnlySpan<char> source, char quoteChar, string delimiter)
{
var result = new List<string>();
var index = -1;
StringBuilder current = new StringBuilder();
bool hasQuote = false;
bool hasSlash = false;
while (index < source.Length-1)
{
index++;
if (hasSlash == false && source[index] == '\\')
{
hasSlash = true;
current.Append('\\');
continue;
}
if (hasSlash ==false && source[index] == quoteChar)
{
hasQuote = !hasQuote;
current.Append(source[index]);
continue;
}
if (hasQuote==false && source[index] == delimiter[0])
{
result.Add(current.ToString());
current.Clear();
}
else
{
current.Append(source[index]);
}
hasSlash = false;
}
result.Add(current.ToString());
return result.ToArray();
}
public virtual async Task GetHeaderAndCsvFiles()
{
string pattern = $"^.*\\.{_tableName}\\..*\\.sql$";
_sqlFilePath = Directory.GetFiles(_inputDir).FirstOrDefault(s => Regex.Match(s, pattern).Success) ?? "";
var text = await File.ReadAllTextAsync(_sqlFilePath);
headers = DumpDataHelper.GetCsvHeadersFromSqlFileAsync(text);
csvFiles = DumpDataHelper.GetCsvFileNamesFromSqlFileAsync(text, new Regex(@"'.+\.dat.zst'"));
}
public virtual async Task DoEnqueue(Action<DataRecord> action)
{
await GetHeaderAndCsvFiles();
foreach (var file in csvFiles)
{
var filePath= Path.Combine(_inputDir, file);
using var fs = File.OpenRead(filePath);
{
using StreamReader sr = new (fs);
{
while (!sr.EndOfStream)
{
var line = await sr.ReadLineAsync();
var fields = ParseRow2(line, QuoteChar, Delimiter);
var record = new DataRecord(fields, _tableName, headers);
action?.Invoke(record);
}
}
}
}
}
public virtual async Task<DataRecord?> GetTestRecord()
{
await GetHeaderAndCsvFiles();
var file = csvFiles.FirstOrDefault();
if (file != null)
{
var filePath = Path.Combine(_inputDir, file);
using var fs = File.OpenRead(filePath);
{
using StreamReader sr = new(fs);
{
var line = await sr.ReadLineAsync();
var fields = ParseRow2(line, QuoteChar, Delimiter);
var record = new DataRecord(fields, _tableName, headers);
return record;
}
}
}
return null;
}
public void Dispose()
{
// _reader.Dispose();
}
}

View File

@@ -1,51 +0,0 @@
using System.Collections.Concurrent;
using System.Diagnostics.CodeAnalysis;
namespace ConsoleApp2.Services;
/// <summary>
/// 数据队列
/// </summary>
public class DataRecordQueue : IDisposable
{
private readonly BlockingCollection<DataRecord> _queue;
public int Count => _queue.Count;
public bool IsCompleted => _queue.IsCompleted;
public bool IsAddingCompleted => _queue.IsAddingCompleted;
public event Action? OnRecordWrite;
public event Action? OnRecordRead;
public DataRecordQueue()
{
_queue = new BlockingCollection<DataRecord>(2000_000); // 队列最长为20W条记录
}
public bool TryDequeue([MaybeNullWhen(false)] out DataRecord record)
{
if (_queue.TryTake(out record))
{
OnRecordRead?.Invoke();
return true;
}
return false;
}
public DataRecord Dequeue() => _queue.Take();
public void CompleteAdding() => _queue.CompleteAdding();
public void Enqueue(DataRecord record)
{
_queue.Add(record);
OnRecordWrite?.Invoke();
}
public void Dispose()
{
_queue.Dispose();
}
}

View File

@@ -1,41 +0,0 @@
using ConsoleApp2.Helpers;
using ConsoleApp2.HostedServices.Abstractions;
using Microsoft.Extensions.Logging;
using ServiceStack.Text;
namespace ConsoleApp2.Services;
/// <summary>
/// 读取Jsv格式文件
/// </summary>
[Obsolete]
public class JsvSource:IDataSource
{
//private readonly string _inputDir;
//private readonly JsvStringSerializer _jsv;
//private readonly StreamReader? _reader;
// ReSharper disable once PrivateFieldCanBeConvertedToLocalVariable
//private readonly ILogger? _logger;
//private readonly string _tableName;
public DataRecord Current { get; protected set; } = null!;
public string[]? Headers { get; }
//public bool EndOfSource => _reader.EndOfStream;
public JsvSource(string inputDir,string tableName, ILogger? logger = null)
{
//_inputDir = inputDir;
//_tableName = tableName;
//_jsv = new JsvStringSerializer();
//_logger = logger;
}
public Task DoEnqueue(Action<DataRecord> action)
{
return Task.CompletedTask;
}
public void Dispose()
{
// _reader?.Dispose();
}
}

View File

@@ -1,57 +0,0 @@
using System.Collections.Concurrent;
namespace ConsoleApp2.Services;
/// <summary>
/// 处理上下文类,标识处理进度
/// </summary>
public class ProcessContext
{
private int _inputCount;
private int _transformCount;
private int _outputCount;
private ConcurrentBag<Exception> _exceptionList = new ConcurrentBag<Exception>();
public bool IsInputCompleted { get; private set; }
public bool IsTransformCompleted { get; private set; }
public bool IsOutputCompleted { get; private set; }
public int InputCount
{
get => _inputCount;
private set => _inputCount = value;
}
public int TransformCount
{
get => _transformCount;
private set => _transformCount = value;
}
public int OutputCount
{
get => _outputCount;
private set => _outputCount = value;
}
public void AddException(Exception ex)
{
_exceptionList.Add(ex);
}
public ConcurrentBag<Exception> GetExceptions()
{
return _exceptionList;
}
public void CompleteInput() => IsInputCompleted = true;
public void CompleteTransform() => IsTransformCompleted = true;
public void CompleteOutput() => IsOutputCompleted = true;
public void AddInput() => Interlocked.Increment(ref _inputCount);
public void AddInput(int count) => Interlocked.Add(ref _inputCount, count);
public void AddTransform() => Interlocked.Increment(ref _transformCount);
public void AddTransform(int count) => Interlocked.Add(ref _transformCount, count);
public void AddOutput() => Interlocked.Increment(ref _outputCount);
public void AddOutput(int count) => Interlocked.Add(ref _outputCount, count);
}

View File

@@ -1,48 +0,0 @@
using System.Collections.Concurrent;
using Microsoft.Extensions.Logging;
namespace ConsoleApp2.Services;
/// <summary>
/// 快速批量创建和等待任务
/// </summary>
public class TaskManager
{
private readonly ConcurrentBag<Task> _tasks;
private readonly ILogger _logger;
public int TaskCount => _tasks.Count;
public int RunningTaskCount => _tasks.Count(task => !task.IsCompleted);
public IReadOnlyCollection<Task> Tasks => _tasks;
public TaskManager(ILogger<TaskManager> logger)
{
_tasks = new ConcurrentBag<Task>();
_logger = logger;
}
public void CreateTask<TResult>(Func<TResult> func, CancellationToken cancellationToken = default)
{
var task = Task.Factory.StartNew(func, cancellationToken);
_tasks.Add(task);
_logger.LogDebug("New task created");
}
public void CreateTasks<TResult>(Func<TResult> func,int taskCount, CancellationToken cancellationToken = default)
{
for (int i = 0; i < taskCount; i++)
{
CreateTask(func, cancellationToken);
}
}
public async Task WaitAll()
{
await Task.WhenAll(_tasks);
}
public void ClearTask()
{
if(RunningTaskCount != 0)
throw new InvalidOperationException("Unable to clear task. There are still running tasks");
_tasks.Clear();
}
}

View File

@@ -1,90 +0,0 @@
using ConsoleApp2.Helpers;
using Microsoft.Extensions.Logging;
using System.Text.RegularExpressions;
using ZstdSharp;
namespace ConsoleApp2.Services
{
public class ZstSource : CsvSource
{
public ZstSource(string inputDir, string tableName, string delimiter = ",", char quoteChar = '"',
ILogger? logger = null) : base(inputDir, tableName, delimiter = ",", quoteChar = '"', logger = null)
{
}
private static async Task<string> DecompressFile(string filePath)
{
using var input = File.OpenRead(filePath);
{
using var decopress = new DecompressionStream(input);
{
var ms = new MemoryStream();
decopress.CopyTo(ms);
ms.Seek(0, SeekOrigin.Begin);
StreamReader reader = new(ms);
var text = await reader.ReadToEndAsync();
return text;
}
}
}
public override async Task GetHeaderAndCsvFiles()
{
string pattern = $"^.*\\.{_tableName}\\..*\\.sql.zst$";
_sqlFilePath = Directory.GetFiles(_inputDir).FirstOrDefault(s => Regex.Match(s, pattern).Success) ?? "";
var text = await DecompressFile(_sqlFilePath);
headers= DumpDataHelper.GetCsvHeadersFromSqlFileAsync(text);
csvFiles= DumpDataHelper.GetCsvFileNamesFromSqlFileAsync(text, new Regex(@"'.+\.dat.zst'"));
}
public override async Task DoEnqueue(Action<DataRecord> action)
{
await GetHeaderAndCsvFiles();
foreach (var file in csvFiles)
{
var filePath = Path.Combine(_inputDir, file);
using var input = File.OpenRead(filePath);
{
using var decopress = new DecompressionStream(input);
{
using var reader = new StreamReader(decopress);
{
while (!reader.EndOfStream)
{
var line = await reader.ReadLineAsync();
var fields = ParseRow2(line, QuoteChar, Delimiter);
var record = new DataRecord(fields, _tableName, headers);
action?.Invoke(record);
}
}
}
}
}
}
public override async Task<DataRecord?> GetTestRecord()
{
await GetHeaderAndCsvFiles();
var file = csvFiles?.FirstOrDefault();
if (file != null)
{
var filePath = Path.Combine(_inputDir, file);
using var input = File.OpenRead(filePath);
{
using var decopress = new DecompressionStream(input);
{
using var reader = new StreamReader(decopress);
{
var line = await reader.ReadLineAsync();
var fields = ParseRow2(line, QuoteChar, Delimiter);
var record = new DataRecord(fields, _tableName, headers);
return record;
}
}
}
}
return null;
}
}
}

View File

@@ -1,28 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace ConsoleApp2.SimulationService
{
public static partial class DataHelper
{
public static short[] shareKeys = {23000, 23040, 23070, 23100, 24000, 24040, 24070, 24100, 25000, 25040, 25070, 25100 };
public static int[] companyIds = { 1, 2, 3, 4 };
private static T getArrayValue<T>(int index, T[] array)//按index取数据,超过数组长度,index从0开始再取
{
return array[index % array.Length];
}
public static short GetShareKey(int index)
{
return getArrayValue(index, shareKeys);
}
public static int GetCompanyId(int index)
{
return getArrayValue(index, companyIds);
}
}
}

View File

@@ -1,136 +0,0 @@
using ConsoleApp2.Const;
using ConsoleApp2.Helpers;
using ConsoleApp2.HostedServices;
using ConsoleApp2.HostedServices.Abstractions;
using ConsoleApp2.Options;
using ConsoleApp2.Services;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using System.Text.RegularExpressions;
namespace ConsoleApp2.SimulationService
{
public class SimulationInputService : IInputService
{
private readonly ILogger _logger;
private readonly IOptions<DataInputOptions> _dataInputOptions;
private readonly ProcessContext _context;
public SimulationInputService(ILogger<InputService> logger,
IOptions<DataInputOptions> dataInputOptions,
ProcessContext context)
{
_logger = logger;
_dataInputOptions = dataInputOptions;
_context = context;
}
public async Task ExecuteAsync(TasksOptions tasksOptions, DataRecordQueue producerQueue, ProcessContext context, CancellationToken cancellationToken)
{
var inputDir = _dataInputOptions.Value.InputDir;
_logger.LogInformation("***** simulation input service start, working dir: {InputDir}, thread id: {ThreadId} *****", inputDir, Environment.CurrentManagedThreadId);
var files = Directory.GetFiles(inputDir);
if (files.Length == 0)
{
_logger.LogInformation("No source files found in {InputDir}", inputDir);
return;
}
foreach (var tableName in tasksOptions.TableInfoConfig.Keys)
{
_logger.LogInformation("Working table: {tableName}", tableName);
var dataCount = tasksOptions.TableInfoConfig[tableName].SimulaRowCount;//当前表要生成的总数据量
var companyTotallCount = 1000;//当前表每个公司生成的总数据量
var tempRecords = new List<DataRecord>();
var sk = DataHelper.shareKeys.First();
var companyID = DataHelper.companyIds.First();
var shareKeyInterval = 20000;//每个sharekey的数据量
var getShareKeyTimes = 0;//sharekey生成的次数,每生成一次改变sharekey的值
var getCompanyIDTimes = 0;//公司生成的次数,每生成一次改变companyID的值
var shareKeyIntervalCount = 0;
var source = _dataInputOptions.Value.CreateSource?.Invoke(tableName);
if (source == null) throw new NullReferenceException($"create table source:{tableName} failed!");
var testRecord = await source.GetTestRecord();
if(testRecord == null) throw new NullReferenceException($"create testRecord failed, tableName:{tableName}");
for (long i = 1; i <= dataCount; i++)
{
shareKeyIntervalCount++;
if (shareKeyIntervalCount > shareKeyInterval)
{
sk = DataHelper.GetShareKey(getShareKeyTimes);
getShareKeyTimes++;
shareKeyIntervalCount = 0;
}
var fields = new string[testRecord.Fields.Length];
Array.Copy(testRecord.Fields, fields, testRecord.Fields.Length);
var record = new DataRecord(fields, testRecord.TableName, testRecord.Headers, companyID);
//更新record的ID、OrderNo,ShardKey值
if (record.Headers.Contains("ID"))
{
var index = Array.IndexOf(record.Headers, "ID");
if (index > -1)
{
record.Fields[index] = i.ToString();
}
}
if (record.TableName == "order_box_block" && record.Headers.Contains("BoxID"))
{
var index = Array.IndexOf(record.Headers, "BoxID");
if (index > -1)
{
record.Fields[index] = i.ToString();
}
}
if ((record.TableName == "order_block_plan_item" || record.TableName == "order_package_item") && record.Headers.Contains("ItemID"))
{
var index = Array.IndexOf(record.Headers, "ItemID");
if (index > -1)
{
record.Fields[index] = i.ToString();
}
}
if (record.TableName == "order" && record.Headers.Contains("OrderNo"))
{
var index = Array.IndexOf(record.Headers, "OrderNo");
if (index > -1)
{
record.Fields[index] = i.ToString();
}
}
if (record.Headers.Contains("ShardKey"))
{
var index = Array.IndexOf(record.Headers, "ShardKey");
if (index > -1)
{
record.Fields[index] = sk.ToString();
}
}
tempRecords.Add(record);
if (tempRecords.Count >= companyTotallCount || i >= dataCount - 1)
{
foreach (var rc in tempRecords)
{
_context.AddInput();
producerQueue.Enqueue(rc);
if (cancellationToken.IsCancellationRequested)
return;
}
tempRecords.Clear();
companyID = DataHelper.GetCompanyId(getCompanyIDTimes);
getCompanyIDTimes++;
}
}
_logger.LogInformation("table:'{tableName}' simulation input completed", tableName);
//}
//_logger.LogInformation("File '{File}' input completed", Path.GetFileName(sqlPath));
}
context.CompleteInput();
_logger.LogInformation("***** Csv input service completed *****");
}
}
}

View File

@@ -1,18 +0,0 @@
{
"CmdOptions": {
"InputFileType": "CSV",
"InputDir": "D:/MyDumper-ZST",
"TaskCount": 6,
"FlushCount": 10000,
"Isutf8mb4": true,
"OldestShardKey": 23000,
"OldestTime": "202301"
},
"ConnectionStrings": {
"MySqlMaster": "Server=127.0.0.1;Port=33309;UserId=root;Password=123456;Database=cferp_test;"
},
"RedisCacheOptions": {
"Configuration": "192.168.1.246:6380",
"InstanceName" : "mes-etl:"
}
}

View File

@@ -0,0 +1,33 @@
using MesETL.App.Const;
namespace MesETL.App.Cache;
#nullable disable
public static class CacheKeysFunc
{
/// <summary>
/// Order表 由OrderNo获取对应的CompanyID
/// </summary>
/// <param name="orderNo"></param>
/// <returns></returns>
public static string Order_OrderNo_CompanyID(string orderNo) => BuildCacheKey(TableNames.Order, "OrderNo", orderNo, "CompanyID");
/// <summary>
/// OrderBlockPlan表 由ID获取对应的CompanyID
/// </summary>
/// <param name="id"></param>
/// <returns></returns>
public static string OrderBlockPlan_ID_CompanyID(string id) => BuildCacheKey(TableNames.OrderBlockPlan, "ID", id, "CompanyID");
/// <summary>
/// OrderProcess表 由ID 获取对应的ShardKey
/// </summary>
/// <param name="id"></param>
/// <returns></returns>
public static string OrderProcess_ID_ShardKey(string id) => BuildCacheKey(TableNames.OrderProcess, "ID", id, "ShardKey");
// 数据缓存键格式为[TableName]-[ColumnName@ColumnValue]-[CacheColumnName]
static string BuildCacheKey(string tableName, string columnName, string columnValue, string cacheColumnName)
=> $"{tableName}-{columnName}@{columnValue}-{cacheColumnName}";
}

View File

@@ -0,0 +1,10 @@
namespace MesETL.App.Cache;
public interface ICacher
{
Task<string?> GetStringAsync(string key);
Task SetStringAsync(string key, string value);
Task<bool> ExistsAsync(string key);
Task SetHashAsync(string key, IReadOnlyDictionary<string, string> hash);
Task<Dictionary<string, string>> GetHashAsync(string key);
}

View File

@@ -0,0 +1,51 @@
using System.Collections.Concurrent;
namespace MesETL.App.Cache;
public class MemoryCache : ICacher
{
private readonly ConcurrentDictionary<string, string> _stringCache = new();
private readonly ConcurrentDictionary<string, Dictionary<string, string>> _hashCache = new();
public static MemoryCache? Instance { get; private set; }
public MemoryCache()
{
Instance = this;
}
public Task<string?> GetStringAsync(string key)
{
return _stringCache.TryGetValue(key, out var value) ? Task.FromResult<string?>(value) : Task.FromResult((string?)null);
}
public Task SetStringAsync(string key, string value)
{
_stringCache[key] = value;
return Task.CompletedTask;
}
public Task<bool> ExistsAsync(string key)
{
return Task.FromResult(_stringCache.ContainsKey(key));
}
public Task SetHashAsync(string key, IReadOnlyDictionary<string, string> hash)
{
_hashCache[key] = hash.ToDictionary(x => x.Key, x => x.Value);
return Task.CompletedTask;
}
public Task<Dictionary<string, string>> GetHashAsync(string key)
{
return Task.FromResult(_hashCache[key]);
}
public void Delete(Func<string,bool> keySelector)
{
foreach (var k in _stringCache.Keys.Where(keySelector))
{
_stringCache.TryRemove(k, out _);
}
}
}

View File

@@ -0,0 +1,64 @@
using MesETL.App.Options;
using Microsoft.Extensions.DependencyInjection;
using StackExchange.Redis;
namespace MesETL.App.Cache;
public class RedisCache : ICacher
{
private readonly IDatabase _db;
public string KeyPrefix { get; set; }
public RedisCache(IConnectionMultiplexer conn, int dataBase, string keyPrefix = "")
{
_db = conn.GetDatabase(dataBase);
KeyPrefix = keyPrefix;
}
public async Task<string?> GetStringAsync(string key)
{
var value = await _db.StringGetAsync($"{KeyPrefix}{key}");
return !value.HasValue ? null : value.ToString();
}
public async Task SetStringAsync(string key, string value)
{
if (!await _db.StringSetAsync($"{KeyPrefix}{key}", value))
throw new RedisCommandException("设置Redis缓存失败");
}
public Task<bool> ExistsAsync(string key)
{
return _db.KeyExistsAsync($"{KeyPrefix}{key}");
}
public Task SetHashAsync(string key, IReadOnlyDictionary<string, string> hash)
{
return _db.HashSetAsync($"{KeyPrefix}{key}", hash.Select(pair => new HashEntry(pair.Key, pair.Value)).ToArray());
}
public async Task<Dictionary<string, string>> GetHashAsync(string key)
{
var entries = await _db.HashGetAllAsync($"{KeyPrefix}{key}");
var result = new Dictionary<string, string>();
foreach (var entry in entries)
{
result.Add(entry.Name.ToString(), entry.Value.ToString());
}
return result;
}
}
public static class RedisCacheExtensions
{
public static IServiceCollection AddRedisCache(this IServiceCollection services, RedisCacheOptions options)
{
var conn = ConnectionMultiplexer.Connect(options.Configuration
?? throw new ApplicationException("未配置Redis连接字符串"));
services.AddSingleton(conn);
services.AddSingleton<ICacher>(new RedisCache(conn, options.Database, options.InstanceName));
return services;
}
}

View File

@@ -0,0 +1,8 @@
namespace MesETL.App.Const;
public static class ConstVar
{
public const string Producer = "Producer";
public const string Null = "NULL";
public const string MyDumperNull = @"\N";
}

View File

@@ -0,0 +1,38 @@
namespace MesETL.App.Const;
public static class TableNames
{
public const string Machine = "machine";
public const string Order = "order";
public const string OrderBlockPlan = "order_block_plan";
public const string OrderBlockPlanItem = "order_block_plan_item";
public const string OrderBlockPlanResult = "order_block_plan_result";
public const string OrderBoxBlock = "order_box_block";
public const string OrderDataBlock = "order_data_block";
public const string OrderDataGoods = "order_data_goods";
public const string OrderDataParts = "order_data_parts";
public const string OrderItem = "order_item";
public const string OrderModule = "order_module";
public const string OrderModuleExtra = "order_module_extra";
public const string OrderModuleItem = "order_module_item";
public const string OrderPackage = "order_package";
public const string OrderPackageItem = "order_package_item";
public const string OrderPatchDetail = "order_patch_detail";
public const string OrderProcess = "order_process";
public const string OrderProcessSchedule = "order_process_schdule";
public const string OrderProcessStep = "order_process_step";
public const string OrderProcessStepItem = "order_process_step_item";
public const string OrderScrapBoard = "order_scrap_board";
public const string ProcessGroup = "process_group";
public const string ProcessInfo = "process_info";
public const string ProcessItemExp = "process_item_exp";
public const string ProcessScheduleCapacity = "process_schdule_capacity";
public const string ProcessStepEfficiency = "process_step_efficiency";
public const string ReportTemplate = "report_template";
public const string SimplePackage = "simple_package";
public const string SimplePlanOrder = "simple_plan_order";
public const string SysConfig = "sys_config";
public const string WorkCalendar = "work_calendar";
public const string WorkShift = "work_shift";
public const string WorkTime = "work_time";
}

119
MesETL.App/DataRecord.cs Normal file
View File

@@ -0,0 +1,119 @@
namespace MesETL.App;
public class DataRecord : ICloneable
{
public static bool TryGetField(DataRecord record, string columnName, out string value)
{
value = string.Empty;
if (record.Headers is null)
throw new InvalidOperationException("Cannot get field when headers of a record have not been set.");
var idx = IndexOfIgnoreCase(record.Headers, columnName);
if (idx == -1)
return false;
value = record.Fields[idx];
return true;
}
public static string GetField(DataRecord record, string columnName)
{
if (record.Headers is null)
throw new InvalidOperationException("Headers have not been set.");
var idx = IndexOfIgnoreCase(record.Headers, columnName);
if (idx is -1)
throw new IndexOutOfRangeException(
$"Column name '{columnName}' not found in this record, table name '{record.TableName}'.");
return record.Fields[idx];
}
private static int IndexOfIgnoreCase(IList<string> list, string value)
{
var idx = -1;
for (var i = 0; i < list.Count; i++)
{
if (list[i].Equals(value, StringComparison.OrdinalIgnoreCase))
{
idx = i;
break;
}
}
return idx;
}
public IList<string> Fields { get; }
public IList<string> Headers { get; }
public string TableName { get; }
public string? Database { get; set; }
public long FieldCharCount { get; }
public DataRecord(IEnumerable<string> fields, string tableName, IEnumerable<string> headers, string? database = null)
{
Fields = fields.ToList();
TableName = tableName;
Headers = headers.ToList();
Database = database;
if (Fields.Count != Headers.Count)
throw new ArgumentException(
$"The number of fields does not match the number of headers. Expected: {Headers.Count} Got: {Fields.Count} Fields: {string.Join(',', Fields)}",
nameof(fields));
FieldCharCount = Fields.Sum(x => (long)x.Length);
}
public string this[int index]
{
get => Fields[index];
set => Fields[index] = value;
}
public string this[string columnName]
{
get => GetField(this, columnName);
set => SetField(columnName, value);
}
public int FieldCount => Fields.Count;
public bool TryGetField(string columnName, out string value) => TryGetField(this, columnName, out value);
public bool SetField(string columnName, string value) => SetField(this, columnName, value);
public bool SetField(DataRecord record, string columnName, string value)
{
if (record.Headers is null)
throw new InvalidOperationException("Headers have not been set.");
var idx = IndexOfIgnoreCase(record.Headers, columnName);
if (idx is -1)
throw new IndexOutOfRangeException(
$"Column name '{columnName}' not found in this record, table name '{record.TableName}");
record.Fields[idx] = value;
return true;
}
public void AddField(string columnName, string value)
{
if (IndexOfIgnoreCase(Headers, columnName) != -1)
throw new InvalidOperationException($"{TableName}: 列名 '{columnName}' 已存在");
Fields.Add(value);
Headers.Add(columnName);
}
public void RemoveField(string columnName)
{
var idx = IndexOfIgnoreCase(Headers, columnName);
if (idx == -1)
throw new InvalidOperationException($"{TableName}: 列名 '{columnName}' 不存在");
Fields.RemoveAt(idx);
Headers.Remove(columnName);
}
public bool HeaderExists(string columnName) => IndexOfIgnoreCase(Headers, columnName) != -1;
public object Clone()
{
return new DataRecord(new List<string>(Fields), TableName, new List<string>(Headers), Database);
}
}

View File

@@ -0,0 +1,138 @@
using System.Text.Json;
using System.Text.RegularExpressions;
using ZstdSharp;
namespace MesETL.App.Helpers;
public static partial class DumpDataHelper
{
[GeneratedRegex(@"'.+\.dat'")]
private static partial Regex MatchDatFile();
[GeneratedRegex(@"\([^)]*\)")]
private static partial Regex MatchBrackets();
/// <summary>
/// 从MyDumper导出的SQL文件内容中读取表头
/// </summary>
/// <param name="content"></param>
/// <returns></returns>
/// <exception cref="ArgumentException"></exception>
public static string[] GetCsvHeadersFromSqlFile(string content)
{
var match = MatchBrackets().Match(content);
if (!match.Success)
throw new ArgumentException("输入的SQL内容有误无法提取表头", nameof(content));
return ParseHeader(match.ValueSpan);
string[] ParseHeader(ReadOnlySpan<char> headerStr)
{
headerStr = headerStr[1..^1];
Span<Range> ranges = stackalloc Range[50];
var count = headerStr.Split(ranges, ',');
var arr = new string[count];
for (var i = 0; i < count; i++)
{
arr[i] = headerStr[ranges[i]].Trim("@`").ToString(); // 消除列名的反引号,如果是变量则消除@
}
return arr;
}
}
/// <summary>
/// 从MyDumper导出的Csv文件名解析出表名
/// </summary>
/// <param name="filePath"></param>
/// <returns></returns>
public static string GetTableNameFromCsvFileName(ReadOnlySpan<char> filePath)
{
filePath = filePath[(filePath.LastIndexOf('\\') + 1)..];
var firstDotIdx = -1;
var secondDotIdx = -1;
var times = 0;
for (var i = 0; i < filePath.Length; i++)
{
if (filePath[i] == '.')
{
++times;
if(times == 1)
firstDotIdx = i;
if (times == 2)
{
secondDotIdx = i;
break;
}
}
}
return filePath[(firstDotIdx+1)..secondDotIdx].ToString();
}
/// <summary>
/// 从MyDumper导出的SQL文件内容中读取CSV文件名
/// </summary>
/// <param name="txt"></param>
/// <param name="regex"></param>
/// <returns></returns>
public static Task<string[]> GetCsvFileNamesFromSqlFileAsync(string txt, Regex regex)
{
//var txt = await File.ReadAllTextAsync(filePath);
var matches = regex.Matches(txt);
return Task.FromResult(matches.Select(match => match.ValueSpan[1..^1].ToString()).ToArray());
}
/// <summary>
/// 检查字符串是否为16进制
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
public static bool CheckHexField(string? str)
{
if (string.IsNullOrWhiteSpace(str))
return false;
if (str.StartsWith('\"'))
return false;
var isAllDigit = true;
foreach (var c in str)
{
if (!char.IsAsciiHexDigit(c))
return false;
if (!char.IsNumber(c))
isAllDigit = false;
}
if (isAllDigit) //避免全数字
return false;
return true;
}
/// <summary>
/// 将输入流以ZSTD标准解压为字符串
/// </summary>
/// <param name="stream"></param>
/// <returns></returns>
public static async Task<string> DecompressZstAsStringAsync(Stream stream)
{
await using var ds = new DecompressionStream(stream);
var reader = new StreamReader(ds);
return await reader.ReadToEndAsync();
}
public static bool IsJson(string str)
{
try
{
JsonDocument.Parse(str);
return true;
}
catch (JsonException)
{
return false;
}
}
}

View File

@@ -0,0 +1,8 @@
namespace MesETL.App.HostedServices.Abstractions
{
public interface IDataReader : IDisposable
{
DataRecord Current { get; }
ValueTask<bool> ReadAsync();
}
}

View File

@@ -0,0 +1,6 @@
namespace MesETL.App.HostedServices.Abstractions;
public interface IInputService
{
public Task ExecuteAsync(CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,6 @@
namespace MesETL.App.HostedServices.Abstractions;
public interface IOutputService
{
public Task ExecuteAsync(CancellationToken ct);
}

View File

@@ -0,0 +1,6 @@
namespace MesETL.App.HostedServices.Abstractions;
public interface ITransformService
{
public Task ExecuteAsync(CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,125 @@
using MesETL.App.HostedServices.Abstractions;
using MesETL.App.Options;
using MesETL.App.Services;
using MesETL.App.Services.ETL;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace MesETL.App.HostedServices;
public record FileInputInfo
{
public required string FileName { get; init; }
public required string TableName { get; init; }
public required string[] Headers { get; init; }
}
public enum FileInputType
{
MyDumperCsv,
MyDumperZst,
ErrorLog,
}
/// <summary>
/// 从输入目录中导入文件
/// </summary>
public class FileInputService : IInputService
{
private readonly ILogger _logger;
private readonly DataRecordQueue _producerQueue;
private readonly IOptions<DataInputOptions> _dataInputOptions;
private readonly ProcessContext _context;
private readonly DataReaderFactory _dataReaderFactory;
private readonly long _memoryThreshold;
public FileInputService(ILogger<FileInputService> logger,
IOptions<DataInputOptions> dataInputOptions,
ProcessContext context,
[FromKeyedServices(Const.ConstVar.Producer)] DataRecordQueue producerQueue,
DataReaderFactory dataReaderFactory,
IConfiguration configuration)
{
_logger = logger;
_dataInputOptions = dataInputOptions;
_context = context;
_producerQueue = producerQueue;
_dataReaderFactory = dataReaderFactory;
_memoryThreshold = (long)(configuration.GetValue<double>("MemoryThreshold", 8) * 1024 * 1024 * 1024);
}
public async Task ExecuteAsync(CancellationToken cancellationToken)
{
var inputDir = _dataInputOptions.Value.InputDir ?? throw new ApplicationException("未配置文件输入目录");
_logger.LogInformation("***** Input service started, working directory: {InputDir} *****", inputDir);
var trans = _dataInputOptions.Value.FileInputMetaBuilder;
if(trans is null) throw new ApplicationException("未配置文件名-表名映射委托");
FileInputInfo[] infoArr = Directory.GetFiles(inputDir)
.Select(f => trans(f))
.Where(info => info is not null).ToArray()!;
var orderedInfo = GetFilesInOrder(infoArr).ToArray();
_logger.LogInformation("***** {Count} files founded in directory{OrderedCount} files is matched with configuration *****", infoArr.Length, orderedInfo.Length);
foreach (var info in orderedInfo)
{
_logger.LogDebug("Table {TableName}: {FileName}", info.TableName, info.FileName);
}
foreach (var info in orderedInfo)
{
_logger.LogInformation("Reading file: {FileName}, table: {TableName}", info.FileName, info.TableName);
using var source = _dataReaderFactory.CreateReader(info.FileName,info.TableName,info.Headers);
var count = 0;
while (await source.ReadAsync())
{
if (GC.GetTotalMemory(false) > _memoryThreshold)
{
_logger.LogWarning("内存过高,暂缓输入");
GC.Collect();
await Task.Delay(3000, cancellationToken);
}
var record = source.Current;
await _producerQueue.EnqueueAsync(record);
count++;
_context.AddInput();
}
_context.AddTableInput(info.TableName, count);
_logger.LogInformation("Input of table: '{TableName}' finished", info.TableName);
_dataInputOptions.Value.OnTableInputCompleted?.Invoke(info.TableName);
}
_context.CompleteInput();
_logger.LogInformation("***** Input service finished *****");
}
/// <summary>
/// 读取配置,按照配置的表顺序来返回
/// </summary>
/// <returns></returns>
private IEnumerable<FileInputInfo> GetFilesInOrder(FileInputInfo[] inputFiles)
{
var tableOrder = _dataInputOptions.Value.TableOrder;
var ignoreTable = _dataInputOptions.Value.TableIgnoreList;
if (tableOrder is null or { Length: 0 })
return inputFiles;
return Yield();
IEnumerable<FileInputInfo> Yield()
{
foreach (var tableName in tableOrder)
{
var target = inputFiles.FirstOrDefault(f =>
f.TableName.Equals(tableName, StringComparison.OrdinalIgnoreCase));
if (target is not null && !ignoreTable.Contains(target.TableName))
yield return target;
}
}
}
}

View File

@@ -0,0 +1,191 @@
using System.Diagnostics;
using System.Text;
using MesETL.App.Helpers;
using MesETL.App.HostedServices.Abstractions;
using MesETL.App.Options;
using MesETL.App.Services;
using MesETL.App.Services.ErrorRecorder;
using MesETL.Shared.Helper;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MySqlConnector;
namespace MesETL.App.HostedServices;
public class MainHostedService : BackgroundService
{
private Stopwatch? _stopwatch;
private readonly IInputService _input;
private readonly ITransformService _transform;
private readonly IOutputService _output;
private readonly TaskMonitorService _taskMonitor;
private readonly ILogger _logger;
private readonly ProcessContext _context;
private readonly IOptions<DatabaseOutputOptions> _databaseOptions;
private readonly IOptions<TenantDbOptions> _tenantDbOptions;
private readonly IConfiguration _config;
public MainHostedService(IInputService input,
ITransformService transform,
IOutputService output,
ILogger<MainHostedService> logger,
IOptions<TenantDbOptions> tenantDbOptions,
IOptions<DatabaseOutputOptions> databaseOptions,
IConfiguration config,
ProcessContext context,
TaskMonitorService taskMonitor)
{
_input = input;
_transform = transform;
_output = output;
_logger = logger;
_tenantDbOptions = tenantDbOptions;
_databaseOptions = databaseOptions;
_config = config;
_context = context;
_taskMonitor = taskMonitor;
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
_logger.LogInformation("Command argument detected, execute for each database");
var command = _config["Command"];
if (!string.IsNullOrEmpty(command))
{
_logger.LogInformation("***** Running Sql Command *****");
await ExecuteEachDatabase(command, stoppingToken);
Environment.Exit(0);
}
_stopwatch = Stopwatch.StartNew();
var enableUnsafeVar = _config.GetValue<bool>("UnsafeVariable", false);
if (enableUnsafeVar)
await SetVariableAsync(); // 开启延迟写入,禁用重做日志 >>> 重做日志处于禁用状态时不要关闭数据库服务!
var monitorTask = Task.Run(async () => await _taskMonitor.Monitor(stoppingToken), stoppingToken);
var inputTask = ExecuteAndCatch(
async () => await _input.ExecuteAsync(stoppingToken), "文件输入程序出现异常", stoppingToken);
var transformTask = ExecuteAndCatch(
async () => await _transform.ExecuteAsync(stoppingToken), "转换程序出现异常", stoppingToken);
var outputTask = ExecuteAndCatch(
async () => await _output.ExecuteAsync(stoppingToken), "输出程序出现异常", stoppingToken);
await Task.WhenAll(inputTask, transformTask, outputTask);
_stopwatch.Stop();
_logger.LogInformation("***** All tasks completed *****");
_logger.LogInformation("***** ElapseTime: {Time}", (_stopwatch.ElapsedMilliseconds / 1000f).ToString("F3"));
await Task.Delay(5000, stoppingToken);
if(enableUnsafeVar)
await SetVariableAsync(false); // 关闭延迟写入,开启重做日志
if (!stoppingToken.IsCancellationRequested)
{
await ExportResultAsync();
_logger.LogInformation("The execution result export to {Path}",
Path.Combine(AppDomain.CurrentDomain.BaseDirectory, $"Result-{ErrorRecorder.UID}.md"));
Environment.Exit(0);
}
else Environment.Exit(1);
}
private Task ExecuteAndCatch(Func<Task> func, string message, CancellationToken ct)
{
return Task.Run(async () =>
{
try
{
await func();
}
catch (Exception e)
{
_logger.LogCritical(e, "{Msg}\t{ErrMsg}", message, e.Message);
_context.AddException(e);
Environment.Exit(1);
}
}, ct);
}
private async Task SetVariableAsync(bool enable = true)
{
var connStr = _databaseOptions.Value.ConnectionString
?? throw new ApplicationException("分库配置中没有配置数据库");
if (enable)
{
await DatabaseHelper.NonQueryAsync(connStr,
"""
SET GLOBAL innodb_flush_log_at_trx_commit = 0;
ALTER INSTANCE DISABLE INNODB REDO_LOG;
""");
}
else
{
await DatabaseHelper.NonQueryAsync(connStr,
"""
SET GLOBAL innodb_flush_log_at_trx_commit = 1;
ALTER INSTANCE ENABLE INNODB REDO_LOG;
""");
}
}
private async Task ExecuteEachDatabase(string command, CancellationToken cancellationToken = default)
{
var databases = _tenantDbOptions.Value.DbGroup?.Keys
?? throw new ApplicationException("分库配置中没有配置数据库");
var list = new List<Task>();
foreach (var db in databases)
{
var connStr = new MySqlConnectionStringBuilder(_databaseOptions.Value.ConnectionString
?? throw new ApplicationException("没有配置数据库连接字符串"))
{
ConnectionTimeout = 60,
DefaultCommandTimeout = 0,
Database = db
}.ConnectionString;
var task = Task.Run(async () => await DatabaseHelper.NonQueryAsync(connStr, command),
cancellationToken);
list.Add(task);
}
await Task.WhenAll(list);
}
private async Task ExportResultAsync()
{
var sb = new StringBuilder();
if (_context.HasException)
sb.AppendLine("# Program Completed With Error");
else sb.AppendLine("# Program Completed Successfully");
sb.AppendLine("## Process Count");
var processCount = new[]
{
new { State = "Input", Count = _context.InputCount },
new { State = "Transform", Count = _context.TransformCount },
new { State = "Output", Count = _context.OutputCount }
};
sb.AppendLine(processCount.ToMarkdownTable());
sb.AppendLine("\n---\n");
sb.AppendLine("## Table Output Progress");
var tableOutputProgress = _context.TableProgress.Select(pair =>
new { Table = pair.Key, Count = pair.Value }).OrderBy(s => s.Table);
sb.AppendLine(tableOutputProgress.ToMarkdownTable());
sb.AppendLine("\n---\n");
sb.AppendLine("## Result");
var elapsedTime = (_stopwatch!.ElapsedMilliseconds / 1000f);
var result = new[]
{
new { Field = "ElapsedTime", Value = elapsedTime.ToString("F2") },
new
{
Field = "Average Output Speed",
Value = (_context.OutputCount / elapsedTime).ToString("F2") + "records/s"
}
};
sb.AppendLine(result.ToMarkdownTable());
await File.WriteAllTextAsync(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, $"Result-{ErrorRecorder.UID}.md"),
sb.ToString());
}
}

View File

@@ -0,0 +1,147 @@
using System.Buffers;
using MesETL.App.Helpers;
using MesETL.App.HostedServices.Abstractions;
using MesETL.App.Options;
using MesETL.App.Services;
using MesETL.App.Services.ErrorRecorder;
using MesETL.Shared.Helper;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MySqlConnector;
using MySqlDestination = MesETL.App.Services.ETL.MySqlDestination;
using TaskExtensions = MesETL.Shared.Helper.TaskExtensions;
namespace MesETL.App.HostedServices;
/// <summary>
/// 数据导出服务将数据导出至MySql服务
/// </summary>
public class OutputService : IOutputService
{
private readonly ILogger _logger;
private readonly IOptions<DatabaseOutputOptions> _outputOptions;
private readonly ProcessContext _context;
private readonly ErrorRecorderFactory _errorRecorderFactory;
private readonly RecordQueuePool _queuePool;
public OutputService(ILogger<OutputService> logger,
IOptions<DatabaseOutputOptions> outputOptions,
ProcessContext context,
RecordQueuePool queuePool,
ErrorRecorderFactory errorRecorderFactory)
{
_logger = logger;
_outputOptions = outputOptions;
_context = context;
_queuePool = queuePool;
_errorRecorderFactory = errorRecorderFactory;
}
public async Task ExecuteAsync(CancellationToken ct)
{
_logger.LogInformation("***** Output service started *****");
var dbTaskManager = new TaskManager(5);
var dbTasks = new Dictionary<string, Task>();
while (!_context.IsTransformCompleted)
{
foreach (var (db, queue) in _queuePool.Queues)
{
if (!dbTasks.ContainsKey(db))
{
dbTasks.Add(db, await dbTaskManager.CreateTaskAsync(
async () => await StartDatabaseWorker(db, queue, ct), ct));
}
}
await Task.Delay(500, ct);
}
await TaskExtensions.WaitUntil(() => dbTaskManager.RunningTaskCount == 0, 25, ct);
_context.CompleteOutput();
_logger.LogInformation("***** Output service finished *****");
}
private async Task StartDatabaseWorker(string db, DataRecordQueue queue, CancellationToken ct = default)
{
_logger.LogInformation("*****开启输出线程,数据库: {db} *****", db);
var taskManager = new TaskManager(_outputOptions.Value.MaxDatabaseOutputTask);
var ignoreOutput = new HashSet<string>(_outputOptions.Value.NoOutput);
var tmp = new List<DataRecord>();
while (!_context.IsTransformCompleted || queue.Count > 0)
{
if (ct.IsCancellationRequested)
break;
if (!queue.TryDequeue(out var record) || ignoreOutput.Contains(record.TableName)) continue;
var dbName = record.Database ?? throw new ApplicationException("输出的记录缺少数据库名");
if(dbName != db)
throw new ApplicationException($"输出记录的数据与当前输出线程不匹配,记录:{dbName}, 输出线程:{db}");
tmp.Add(record);
if (tmp.Count >= _outputOptions.Value.FlushCount)
{
var list = tmp;
tmp = [];
await taskManager.CreateTaskAsync(async arg => // 转换为方法组
{
var tuple = arg as Tuple<string, List<DataRecord>>;
try
{
await FlushAsync(tuple!.Item1, tuple.Item2);
}
catch (Exception e)
{
_logger.LogError(e, "输出记录时发生错误");
throw;
}
}, Tuple.Create(dbName, list), ct);
}
}
// 等待所有子任务完成
await TaskExtensions.WaitUntil(() => taskManager.RunningTaskCount == 0, 10, ct);
// 清理剩余记录
if (tmp.Count > 0)
{
await FlushAsync(db, tmp);
}
_logger.LogInformation("*****输出线程结束,数据库: {db} *****", db);
}
private async Task FlushAsync(string dbName, IEnumerable<DataRecord> records)
{
var connStr = new MySqlConnectionStringBuilder(_outputOptions.Value.ConnectionString
?? throw new ApplicationException("未配置数据库连接字符串"))
{
CharacterSet = "utf8mb4",
AllowUserVariables = true,
IgnoreCommandTransaction = true,
TreatTinyAsBoolean = false,
ConnectionTimeout = 60,
DefaultCommandTimeout = 0,
SslMode = MySqlSslMode.None,
Database = dbName
}.ConnectionString;
await using var output = new MySqlDestination(connStr, _logger,
_outputOptions, _errorRecorderFactory.CreateOutput(dbName), _context);
var tableOutput = new Dictionary<string, int>();
foreach (var record in records)
{
await output.WriteRecordAsync(record);
tableOutput.AddOrUpdate(record.TableName, 1, (_, v) => v + 1);
}
await output.FlushAsync(_outputOptions.Value.MaxAllowedPacket);
foreach (var (key, value) in tableOutput)
{
_context.AddOutput(value);
_context.AddTableOutput(key, value);
}
_logger.LogTrace("Flushed {Count} records", tableOutput.Values.Sum(i => i));
}
}

View File

@@ -1,44 +1,36 @@
using System.Diagnostics; using System.Diagnostics;
using ConsoleApp2.Const; using System.Text;
using ConsoleApp2.Services; using MesETL.App.Services;
using MesETL.App.Services.Loggers;
using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
namespace ConsoleApp2.HostedServices; namespace MesETL.App.HostedServices;
/// <summary> /// <summary>
/// 任务监控 /// 任务监控
/// </summary> /// </summary>
public class TaskMonitorService : BackgroundService public class TaskMonitorService
{ {
private readonly IHostApplicationLifetime _lifetime; private readonly IEnumerable<ITaskMonitorLogger> _monitorLoggers;
private readonly ILogger<TaskMonitorService> _logger;
private readonly ProcessContext _context; private readonly ProcessContext _context;
private readonly DataRecordQueue _producerQueue; private readonly DataRecordQueue _producerQueue;
private readonly DataRecordQueue _consumerQueue; private readonly RecordQueuePool _queuePool;
private string _outputPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Log/progress.txt");
public TaskMonitorService(IHostApplicationLifetime lifetime, public TaskMonitorService(ProcessContext context,
ILogger<TaskMonitorService> logger, [FromKeyedServices(Const.ConstVar.Producer)]
ProcessContext context,
[FromKeyedServices(ProcessStep.Producer)]
DataRecordQueue producerQueue, DataRecordQueue producerQueue,
[FromKeyedServices(ProcessStep.Consumer)] RecordQueuePool queuePool,
DataRecordQueue consumerQueue) IEnumerable<ITaskMonitorLogger> monitorLoggers)
{ {
_lifetime = lifetime;
_logger = logger;
_context = context; _context = context;
_producerQueue = producerQueue; _producerQueue = producerQueue;
_consumerQueue = consumerQueue; _queuePool = queuePool;
_monitorLoggers = monitorLoggers;
} }
protected override async Task ExecuteAsync(CancellationToken stoppingToken) public async Task Monitor(CancellationToken stoppingToken)
{
await Task.Factory.StartNew(Monitor, stoppingToken);
}
private async Task Monitor()
{ {
var sw = Stopwatch.StartNew(); var sw = Stopwatch.StartNew();
var lastTime = sw.ElapsedMilliseconds; var lastTime = sw.ElapsedMilliseconds;
@@ -47,9 +39,8 @@ public class TaskMonitorService : BackgroundService
var lastOutputCount = _context.OutputCount; var lastOutputCount = _context.OutputCount;
bool endCheck = false; bool endCheck = false;
while (true) while (!stoppingToken.IsCancellationRequested)
{ {
if (_context.GetExceptions().Count>0) return;
EndCheck: EndCheck:
// var running = 0; // var running = 0;
// var error = 0; // var error = 0;
@@ -83,22 +74,46 @@ public class TaskMonitorService : BackgroundService
var inputSpeed = (inputCount - lastInputCount) / elapseTime; var inputSpeed = (inputCount - lastInputCount) / elapseTime;
var transformSpeed = (transformCount - lastTransformCount) / elapseTime; var transformSpeed = (transformCount - lastTransformCount) / elapseTime;
var outputSpeed = (outputCount - lastOutputCount) / elapseTime; var outputSpeed = (outputCount - lastOutputCount) / elapseTime;
// _logger.LogInformation( // _logger.LogInformation(
// "Task monitor: running: {Running}, error: {Error}, completed: {Completed}, canceled: {Canceled}, outputSpeed: {Speed} records/s", // "Task monitor: running: {Running}, error: {Error}, completed: {Completed}, canceled: {Canceled}, outputSpeed: {Speed} records/s",
// running, error, completed, canceled, outputSpeed); // running, error, completed, canceled, outputSpeed);
_logger.LogInformation( foreach (var logger in _monitorLoggers)
"Process monitor: input: {inputStatus}, transform: {transformStatus}, output: {outputStatus}\nInput: {InputCount}, Transform: {TransformCount}, Output: {OutputCount}", {
_context.IsInputCompleted ? "completed" : $"running {inputSpeed:F2} records/s", logger.LogStatus("Monitor: Progress status", new Dictionary<string, string>
_context.IsTransformCompleted ? "completed" : $"running {transformSpeed:F2} records/s", {
_context.IsOutputCompleted ? "completed" : $"running {outputSpeed:F2} records/s", {"Input",_context.IsInputCompleted ? "OK" : $"{inputSpeed:F2}/s" },
inputCount, {"Transform", _context.IsTransformCompleted ? "OK" : $"{transformSpeed:F2}/s" },
transformCount, {"Output", _context.IsOutputCompleted ? "OK" : $"{outputSpeed:F2}/s" },
outputCount);
_logger.LogInformation("Queue monitor: producer queue: {ProducerQueue}, consumer queue: {ConsumerQueue}", {"| Input Queue", _producerQueue.Count.ToString() },
_producerQueue.Count, _consumerQueue.Count); {"Output Queue", _queuePool.Queues.Values.Sum(queue => queue.Count).ToString()},
{"Memory", $"{GC.GetTotalMemory(false) / 1024 / 1024} MiB"},
});
await Task.Delay(5000); var dict = _context.TableProgress
.ToDictionary(kv => kv.Key, kv => $"{kv.Value.input}/{kv.Value.output}");
logger.LogStatus("Monitor: Table progress", dict, ITaskMonitorLogger.LogLevel.Progress);
var sb = new StringBuilder("Table Progress: \n");
foreach (var kv in dict)
{
sb.Append(kv.Key).AppendLine(kv.Value);
}
sb.AppendLine($"LongestCharCount: {_producerQueue.LongestFieldCharCount}");
await File.WriteAllTextAsync(_outputPath, sb.ToString(), CancellationToken.None);
// logger.LogStatus("Monitor: Process count", new Dictionary<string, string>
// {
// {"Input", inputCount.ToString()},
// {"Transform", transformCount.ToString()},
// {"Output", outputCount.ToString()}
// }, ITaskMonitorLogger.LogLevel.Progress);
}
await Task.Delay(5000, stoppingToken);
lastTime = time; lastTime = time;
lastInputCount = inputCount; lastInputCount = inputCount;
@@ -115,9 +130,6 @@ public class TaskMonitorService : BackgroundService
break; break;
} }
} }
_logger.LogInformation("***** All tasks completed *****");
_logger.LogInformation("***** ElapseTime: {Time}", (sw.ElapsedMilliseconds / 1000f).ToString("F3"));
// _lifetime.StopApplication();
} }
} }

View File

@@ -0,0 +1,131 @@
using MesETL.App.Cache;
using MesETL.App.HostedServices.Abstractions;
using MesETL.App.Options;
using MesETL.App.Services;
using MesETL.App.Services.ErrorRecorder;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace MesETL.App.HostedServices;
public record DataTransformContext(DataRecord Record, ICacher Cacher, ILogger Logger);
/// <summary>
/// 数据处理服务,对导入后的数据进行处理
/// </summary>
public class TransformService : ITransformService
{
private readonly ILogger _logger;
private readonly IOptions<DataTransformOptions> _options;
private readonly DataRecordQueue _producerQueue;
private readonly RecordQueuePool _queuePool;
private readonly ProcessContext _context;
private readonly ICacher _cache;
private readonly ErrorRecorderFactory _errorRecorderFactory;
public TransformService(ILogger<TransformService> logger,
IOptions<DataTransformOptions> options,
[FromKeyedServices(Const.ConstVar.Producer)] DataRecordQueue producerQueue,
RecordQueuePool queuePool,
ProcessContext context,
ICacher cache,
ErrorRecorderFactory errorRecorderFactory)
{
_logger = logger;
_options = options;
_producerQueue = producerQueue;
_queuePool = queuePool;
_context = context;
_cache = cache;
_errorRecorderFactory = errorRecorderFactory;
}
public async Task ExecuteAsync(CancellationToken cancellationToken)
{
_logger.LogInformation("***** Data transform service started, thread id: {ThreadId} *****", Environment.CurrentManagedThreadId);
// var tasks = new List<Task>();
// for (int i = 0; i < 4; i++)
// {
// tasks.Add(Task.Run(TransformWorker, cancellationToken));
// }
//
// await Task.WhenAll(tasks);
await TransformWorker();
_logger.LogInformation("***** Data transformation service finished *****");
}
public async Task TransformWorker()
{
while (!_context.IsInputCompleted || _producerQueue.Count > 0)
{
if (!_producerQueue.TryDequeue(out var record))
{
continue;
}
try
{
var context = new DataTransformContext(record, _cache, _logger);
if (_options.Value.EnableFilter)
{
// 数据过滤
var filter = _options.Value.RecordFilter;
if (filter is not null && await filter(context) == false) continue;
}
if (_options.Value.EnableReplacer)
{
// 数据替换
var replacer = _options.Value.RecordModify;
if (replacer is not null)
{
record = await replacer(context);
}
}
// 字段缓存
var cacher = _options.Value.RecordCache;
if(cacher is not null)
await cacher.Invoke(context);
//计算需要分流的数据库
var dbFilter = _options.Value.DatabaseFilter
?? throw new ApplicationException("未配置数据库过滤器");
record.Database = dbFilter(record);
await _queuePool[record.Database].EnqueueAsync(record);
_context.AddTransform();
if (_options.Value.EnableReBuilder)
{
//数据重建
var addRecords = _options.Value.RecordReBuild?.Invoke(context);
if (addRecords is { Count: > 0 })
{
foreach (var rc in addRecords)
{
if(dbFilter is not null)
rc.Database =dbFilter.Invoke(record);
await _queuePool[record.Database].EnqueueAsync(rc);
_context.AddTransform();
}
}
}
}
catch (Exception e)
{
_context.AddException(e);
var errorRecorder = _errorRecorderFactory.CreateTransform();
await errorRecorder.LogErrorRecordAsync(record, e);
if (!_options.Value.StrictMode)
_logger.LogError(e, "数据转换时发生错误");
else throw;
}
}
_context.CompleteTransform();
}
}

View File

@@ -0,0 +1,43 @@
using MesETL.App.HostedServices.Abstractions;
using MesETL.App.Services;
using Microsoft.Extensions.Logging;
namespace MesETL.App.HostedServices;
// 空输出服务,测试用
public class VoidOutputService : IOutputService
{
private readonly ILogger _logger;
private readonly RecordQueuePool _queuePool;
private readonly ProcessContext _context;
public VoidOutputService(
ProcessContext context, ILogger<VoidOutputService> logger, RecordQueuePool queuePool)
{
_context = context;
_logger = logger;
_queuePool = queuePool;
}
public Task ExecuteAsync(CancellationToken ct)
{
_logger.LogInformation("***** Void Output Service Started *****");
while (!_context.IsTransformCompleted || _queuePool.Queues.Count > 0)
{
foreach (var pair in _queuePool.Queues)
{
if (_context.IsTransformCompleted && pair.Value.Count == 0)
{
_queuePool.RemoveQueue(pair.Key);
continue;
}
if(!pair.Value.TryDequeue(out var record)) continue;
_context.AddOutput();
}
}
_context.CompleteOutput();
_logger.LogInformation("***** Void Output Service Stopped *****");
return Task.CompletedTask;
}
}

View File

@@ -1,11 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk"> <Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup> <PropertyGroup>
<AssemblyName>MesETL</AssemblyName>
<OutputType>Exe</OutputType> <OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework> <TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>
<DockerDefaultTargetOS>Linux</DockerDefaultTargetOS> <DockerDefaultTargetOS>Linux</DockerDefaultTargetOS>
<RootNamespace>MesETL.App</RootNamespace>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
@@ -19,15 +21,18 @@
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="Microsoft.Extensions.Caching.StackExchangeRedis" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" /> <PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="MySqlConnector" Version="2.3.3" />
<PackageReference Include="Serilog" Version="3.1.2-dev-02097" /> <PackageReference Include="Serilog" Version="3.1.2-dev-02097" />
<PackageReference Include="Serilog.Extensions.Hosting" Version="8.0.0" /> <PackageReference Include="Serilog.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" /> <PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
<PackageReference Include="Serilog.Sinks.File" Version="5.0.1-dev-00972" /> <PackageReference Include="Serilog.Sinks.File" Version="5.0.1-dev-00972" />
<PackageReference Include="ServiceStack.Text" Version="8.0.0" /> <PackageReference Include="ServiceStack.Text" Version="8.0.0" />
<PackageReference Include="StackExchange.Redis" Version="2.7.17" />
<PackageReference Include="ZstdSharp.Port" Version="0.7.4" /> <PackageReference Include="ZstdSharp.Port" Version="0.7.4" />
</ItemGroup> </ItemGroup>
<ItemGroup>
<ProjectReference Include="..\MesETL.Shared\MesETL.Shared.csproj" />
</ItemGroup>
</Project> </Project>

View File

@@ -0,0 +1,52 @@
using MesETL.App.HostedServices;
namespace MesETL.App.Options
{
public class DataInputOptions
{
public string? InputDir { get; set; }
#region CSV
/// <summary>
/// 字符串的包围符号,默认为双引号"
/// </summary>
public char QuoteChar { get; set; } = '"';
/// <summary>
/// 每个字段的分割符,默认逗号,
/// </summary>
public string Delimiter { get; set; } = ",";
#endregion
#region Mock
public bool UseMock { get; set; }
public double MockCountMultiplier { get; set; } = 1;
/// <summary>
/// Table -> Mock Count 暂时为手动配置
/// </summary>
public Dictionary<string, TableMockConfig>? TableMockConfig { get; set; }
#endregion
#region ManualSet
public string[]? TableOrder { get; set; }
public string[] TableIgnoreList { get; set; } = [];
/// <summary>
/// 配置如何从文件名转换为表名和表头
/// </summary>
public Func<string, FileInputInfo?>? FileInputMetaBuilder { get; set; } //TODO: 抽离
public Action<string>? OnTableInputCompleted { get; set; }
#endregion
}
}

View File

@@ -0,0 +1,50 @@
using MesETL.App.HostedServices;
namespace MesETL.App.Options;
public enum ColumnType
{
Blob,
Text,
Json,
UnDefine,
}
public class DataTransformOptions
{
public bool StrictMode { get; set; } = true;
public bool EnableFilter { get; set; } = true;
public bool EnableReplacer { get; set; } = true;
public bool EnableReBuilder { get; set; } = true;
/// <summary>
/// yyyyMM
/// </summary>
public string CleanDate { get; set; } = "202301";
/// <summary>
/// Record -> Database name
/// 对记录进行数据库过滤
/// </summary>
public Func<DataRecord, string>? DatabaseFilter { get; set; }
/// <summary>
/// Context -> Should output
/// 配置对数据过滤的条件
/// </summary>
public Func<DataTransformContext, Task<bool>>? RecordFilter { get; set; }//数据过滤方法
/// <summary>
/// Context -> New record
/// 对当前记录进行修改或完整替换
/// </summary>
public Func<DataTransformContext, Task<DataRecord>>? RecordModify { get; set; }//数据替换
/// <summary>
/// Context -> New rebuild records
/// 使用当前记录对某些数据进行重建
/// </summary>
public Func<DataTransformContext, IList<DataRecord>?>? RecordReBuild { get; set; }//新增数据
/// <summary>
/// Context -> void
/// 对数据的某些字段进行缓存
/// </summary>
public Func<DataTransformContext, Task>? RecordCache { get; set; }//数据缓存
}

View File

@@ -0,0 +1,36 @@
namespace MesETL.App.Options;
public class DatabaseOutputOptions
{
public string? ConnectionString { get; set; }
public int MaxAllowedPacket { get; set; } = 32 * 1024 * 1024;
public int FlushCount { get; set; } = 10000;
public int MaxDatabaseOutputTask { get; set; } = 4;
public bool TreatJsonAsHex { get; set; } = true;
public string[] NoOutput { get; set; } = [];
public Dictionary<string, string>? ForUpdate { get; set; }
/// <summary>
/// 配置导入数据的特殊列
/// </summary>
public Dictionary<string, ColumnType> ColumnTypeConfig { get; set; } = new(); // "table.column" -> type
public ColumnType GetColumnType(string table, string column)
{
return ColumnTypeConfig.GetValueOrDefault($"{table}.{column}", ColumnType.UnDefine);
}
public bool TryGetForUpdate(string table, out string? forUpdate)
{
forUpdate = null;
if (ForUpdate is null || !ForUpdate.TryGetValue(table, out forUpdate))
return false;
return true;
}
}

View File

@@ -0,0 +1,8 @@
namespace MesETL.App.Options;
public class RedisCacheOptions
{
public string? Configuration { get; init; }
public string InstanceName { get; init; } = "";
public int Database { get; init; } = 0;
}

View File

@@ -0,0 +1,32 @@
namespace MesETL.App.Options;
public struct TableMockConfig
{
/// <summary>
/// 使用深拷贝
/// </summary>
public bool UseDeepCopy { get; set; }
/// <summary>
/// 模拟数据量
/// </summary>
public long MockCount { get; set; }
/// <summary>
/// 需要开启MockCount
/// </summary>
public string[]? AutoIncrementColumn { get; set; } = null; // TODO: 换为自定义委托
public void Deconstruct(out bool useDeepCopy, out long mockCount, out string[]? autoIncrementColumn)
{
useDeepCopy = UseDeepCopy;
mockCount = MockCount;
autoIncrementColumn = AutoIncrementColumn;
}
public TableMockConfig(bool useDeepCopy, long mockCount, string[]? autoIncrementColumn)
{
UseDeepCopy = useDeepCopy;
MockCount = mockCount;
AutoIncrementColumn = autoIncrementColumn;
}
}

View File

@@ -0,0 +1,25 @@
namespace MesETL.App.Options;
public class TenantDbOptions
{
public string? TenantKey { get; set; }
public string? UseDbGroup { get; set; }
/// <summary>
/// Key-Value: {DbName}-{TenantKeyLessThan}
/// </summary>
public Dictionary<string, int>? DbGroup { get; set; }
public string GetDbNameByTenantKeyValue(int tenantKeyValue)
{
// var dictionary = new SortedDictionary<int, string>();
// DbList.ForEach(pair => dictionary.Add(pair.Value, pair.Key));
// 注意配置顺序
if(DbGroup is null) throw new ApplicationException("分库配置中没有发现任何数据库");
var dbName = DbGroup.Cast<KeyValuePair<string, int>?>()
.FirstOrDefault(pair => pair?.Value != null && pair.Value.Value > tenantKeyValue)!.Value.Key;
return dbName ??
throw new ArgumentOutOfRangeException(nameof(tenantKeyValue),
$"分库配置中没有任何符合'{nameof(tenantKeyValue)}'值的数据库");
}
}

666
MesETL.App/Program.cs Normal file
View File

@@ -0,0 +1,666 @@
// #define USE_TEST_DB // 测试库的结构与生产库不一样如果使用测试库运行则加上USE_TEST_DB预处理器指令
using MesETL.App;
using MesETL.App.Services;
using MesETL.App.Services.ETL;
using MesETL.App.Cache;
using MesETL.App.Const;
using MesETL.App.HostedServices;
using MesETL.App.HostedServices.Abstractions;
using MesETL.App.Options;
using MesETL.App.Services.ErrorRecorder;
using MesETL.App.Services.Loggers;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Serilog;
using Serilog.Events;
using DumpDataHelper = MesETL.App.Helpers.DumpDataHelper;
await RunProgram();
return;
async Task RunProgram()
{
ThreadPool.SetMaxThreads(200, 200);
var host = Host.CreateApplicationBuilder(args);
host.Configuration.AddJsonFile(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "appsettings.json"), false, false);
host.Configuration.AddCommandLine(args, new Dictionary<string, string>
{
{ "-d", "Input:InputDir" },
{ "--InputDir", "Input:InputDir" },
{ "-s", "Output:ConnectionString" },
{ "--ConnectionString", "Output:ConnectionString" },
{ "-r", "RedisCache:Configuration" },
{ "--Redis", "RedisCache:Configuration" },
{ "-g", "TenantDb:UseDbGroup" },
{ "--UseDbGroup", "TenantDb:UseDbGroup" },
{ "-c", "Command" },
{ "--Command", "Command" }
});
var inputOptions = host.Configuration.GetRequiredSection("Input").Get<DataInputOptions>()
?? throw new ApplicationException("缺少Input配置");
var transformOptions = host.Configuration.GetRequiredSection("Transform").Get<DataTransformOptions>()
?? throw new ApplicationException("缺少Transform配置");
var outputOptions = host.Configuration.GetRequiredSection("Output").Get<DatabaseOutputOptions>()
?? throw new ApplicationException("缺少Output配置");
var redisSection = host.Configuration.GetRequiredSection("RedisCache");
var redisOptions = redisSection.Get<RedisCacheOptions>() ?? throw new ApplicationException("缺少RedisCache配置");
var tenantDbSection = host.Configuration.GetRequiredSection("TenantDb");
var tenantDbOptions = new TenantDbOptions()
{
TenantKey = tenantDbSection.GetValue<string>(nameof(TenantDbOptions.TenantKey)) ?? throw new ApplicationException("分库配置缺少分库键TenantKey"),
UseDbGroup = tenantDbSection.GetValue<string>(nameof(TenantDbOptions.UseDbGroup)) ?? throw new ApplicationException("分库配置缺少使用分库组UseDbGroup")
};
tenantDbOptions.DbGroup = tenantDbSection.GetRequiredSection($"DbGroups:{tenantDbOptions.UseDbGroup}").Get<Dictionary<string,int>>()
?? throw new ApplicationException($"分库配置无法解析分库组{tenantDbOptions.UseDbGroup},请检查配置");
host.Services.Configure<TenantDbOptions>(options =>
{
options.TenantKey = tenantDbOptions.TenantKey;
options.UseDbGroup = tenantDbOptions.UseDbGroup;
options.DbGroup = tenantDbOptions.DbGroup;
});
host.Services.Configure<RedisCacheOptions>(redisSection);
var oldestTime = DateTime.ParseExact(transformOptions.CleanDate, "yyyyMM", System.Globalization.DateTimeFormatInfo.InvariantInfo);
var oldestTimeInt = int.Parse(transformOptions.CleanDate);
// 输入配置
host.Services.Configure<DataInputOptions>(options =>
{
options.InputDir = inputOptions.InputDir ?? throw new ApplicationException("未配置输入目录");
options.UseMock = inputOptions.UseMock;
options.TableMockConfig = inputOptions.TableMockConfig;
options.MockCountMultiplier = inputOptions.MockCountMultiplier;
options.TableIgnoreList = inputOptions.TableIgnoreList;
// 配置文件输入方法
options.FileInputMetaBuilder = fileName =>
{
if (fileName.EndsWith(".dat.zst"))
{
var tableName = DumpDataHelper.GetTableNameFromCsvFileName(
Path.GetFileNameWithoutExtension(fileName)); // 去除.zst
string[]? headers;
try
{
// 查找同目录下同表的SQL文件
var sqlFile = Directory.GetFiles(options.InputDir)
.SingleOrDefault(f => f.Equals(fileName.Replace(".dat.zst",".sql.zst")));
if (sqlFile is null)
return null;
headers = DumpDataHelper.GetCsvHeadersFromSqlFile(
DumpDataHelper.DecompressZstAsStringAsync(File.OpenRead(sqlFile)).Result);
}
catch (InvalidOperationException e)
{
throw new ApplicationException($"目录下不止一个{tableName}表的SQL文件", e);
}
return new FileInputInfo
{
FileName = fileName,
TableName = tableName,
Headers = headers
};
}
return null;
};
// 配置表输入完成事件,字典清理
options.OnTableInputCompleted = table =>
{
switch (table)
{
case TableNames.OrderBlockPlan:
MemoryCache.Instance?.Delete(s => s.StartsWith(TableNames.Order + '-'));
break;
case TableNames.OrderItem:
MemoryCache.Instance?.Delete(s => s.StartsWith(TableNames.OrderBlockPlan + '-'));
break;
case TableNames.OrderProcessSchedule:
MemoryCache.Instance?.Delete(s => s.StartsWith(TableNames.OrderProcess + '-'));
break;
}
};
options.TableOrder = inputOptions.TableOrder ??
[
TableNames.Machine,
TableNames.Order,
TableNames.OrderBoxBlock, // 依赖Order.CompanyID
TableNames.OrderDataBlock, // 依赖Order.CompanyID
TableNames.OrderBlockPlan,
TableNames.OrderBlockPlanResult,// 依赖OrderBlockPlan.CompanyID / 删除
TableNames.OrderItem,
TableNames.OrderDataGoods,
TableNames.OrderDataParts,
TableNames.OrderModule,
TableNames.OrderModuleExtra,
TableNames.OrderModuleItem,
TableNames.OrderPackage,
TableNames.OrderProcess,
TableNames.OrderProcessStep,
TableNames.OrderProcessStepItem,// 依赖OrderProcess.ShardKey / 删除
TableNames.OrderProcessSchedule,
TableNames.OrderScrapBoard,
TableNames.ProcessGroup,
TableNames.ProcessInfo,
TableNames.ProcessItemExp,
TableNames.ProcessScheduleCapacity,
TableNames.ProcessStepEfficiency,
TableNames.ReportTemplate,
TableNames.SimplePackage,
TableNames.SimplePlanOrder,
TableNames.SysConfig,
TableNames.WorkCalendar,
TableNames.WorkShift,
TableNames.WorkTime
];
// options.TableMockConfig = new Dictionary<string, TableMockConfig>
// {
// { TableNames.Machine, new TableMockConfig(true, 14655, ["ID"]) },
// { TableNames.Order, new TableMockConfig(true, 5019216, ["OrderNo"]) },
// { TableNames.OrderDataBlock, new TableMockConfig(true, 731800334, ["ID"]) },
// { TableNames.OrderDataGoods, new TableMockConfig(true, 25803671, ["ID"]) },
// { TableNames.OrderDataParts, new TableMockConfig(true, 468517543, ["ID"]) },
// { TableNames.OrderModule, new TableMockConfig(true, 103325385, ["ID"]) },
// { TableNames.OrderModuleExtra, new TableMockConfig(true, 54361321, ["ID"]) },
// { TableNames.OrderModuleItem, new TableMockConfig(true, 69173339, ["ID"]) },
// { TableNames.OrderPackage, new TableMockConfig(true, 16196195, ["ID"]) },
// { TableNames.OrderProcess, new TableMockConfig(true, 3892685, ["ID"]) },
// { TableNames.OrderProcessStep, new TableMockConfig(true, 8050349, ["ID"]) },
// { TableNames.OrderProcessStepItem, new TableMockConfig(true, 14538058, ["ID"]) },
// { TableNames.OrderScrapBoard, new TableMockConfig(true, 123998, ["ID"]) },
// { TableNames.ProcessGroup, new TableMockConfig(true, 1253, ["ID"]) },
// { TableNames.ProcessInfo, new TableMockConfig(true, 7839, ["ID"]) },
// { TableNames.ProcessItemExp, new TableMockConfig(true, 28, ["ID"]) },
// { TableNames.ProcessScheduleCapacity, new TableMockConfig(true, 39736, ["ID"]) },
// { TableNames.ProcessStepEfficiency, new TableMockConfig(true, 8, ["ID"]) },
// { TableNames.ReportTemplate, new TableMockConfig(true, 7337, ["ID"]) },
// { TableNames.SimplePackage, new TableMockConfig(true, 130436, ["ID"]) },
// { TableNames.SysConfig, new TableMockConfig(true, 2296, ["ID"]) },
// { TableNames.WorkCalendar, new TableMockConfig(true, 11, ["ID"]) },
// { TableNames.WorkShift, new TableMockConfig(true, 59, ["ID"]) },
// { TableNames.WorkTime, new TableMockConfig(true, 62, ["ID"]) }
// };
options.TableMockConfig = new Dictionary<string, TableMockConfig>
{
{ TableNames.Machine, new TableMockConfig(true, 14655, ["ID"]) },
{ TableNames.Order, new TableMockConfig(true, 50192, ["OrderNo"]) },
{ TableNames.OrderDataBlock, new TableMockConfig(true, 7318003, ["ID"]) },
{ TableNames.OrderDataGoods, new TableMockConfig(true, 258036, ["ID"]) },
{ TableNames.OrderDataParts, new TableMockConfig(true, 4685175, ["ID"]) },
{ TableNames.OrderItem, new TableMockConfig(true, 13298896, ["ID"])},
{ TableNames.OrderModule, new TableMockConfig(true, 1033253, ["ID"]) },
{ TableNames.OrderModuleExtra, new TableMockConfig(true, 543613, ["ID"]) },
{ TableNames.OrderModuleItem, new TableMockConfig(true, 691733, ["ID"]) },
{ TableNames.OrderPackage, new TableMockConfig(true, 161961, ["ID"]) },
{ TableNames.OrderProcess, new TableMockConfig(true, 38926, ["ID"]) },
{ TableNames.OrderProcessStep, new TableMockConfig(true, 80503, ["ID"]) },
{ TableNames.OrderProcessStepItem, new TableMockConfig(true, 145380, ["ID"]) },
{ TableNames.OrderScrapBoard, new TableMockConfig(true, 1239, ["ID"]) },
{ TableNames.ProcessGroup, new TableMockConfig(true, 125, ["ID"]) },
{ TableNames.ProcessInfo, new TableMockConfig(true, 783, ["ID"]) },
{ TableNames.ProcessItemExp, new TableMockConfig(true, 28, ["ID"]) },
{ TableNames.ProcessScheduleCapacity, new TableMockConfig(true, 39736, ["ID"]) },
{ TableNames.ProcessStepEfficiency, new TableMockConfig(true, 8, ["ID"]) },
{ TableNames.ReportTemplate, new TableMockConfig(true, 7337, ["ID"]) },
{ TableNames.SimplePackage, new TableMockConfig(true, 130436, ["ID"]) },
{ TableNames.SysConfig, new TableMockConfig(true, 2296, ["Key"]) },
{ TableNames.WorkCalendar, new TableMockConfig(true, 11, ["ID"]) },
{ TableNames.WorkShift, new TableMockConfig(true, 59, ["ID"]) },
{ TableNames.WorkTime, new TableMockConfig(true, 62, ["ID"]) }
};
});
host.Services.Configure<DataTransformOptions>(options =>
{
static string CalculateShardKeyByOrderNo(ReadOnlySpan<char> orderNo)
=> $"{orderNo[2..6]}0";
options.StrictMode = transformOptions.StrictMode;
options.EnableFilter = transformOptions.EnableFilter;
options.EnableReplacer = transformOptions.EnableReplacer;
options.EnableReBuilder = transformOptions.EnableReBuilder;
// order_block_plan_item和order_package_item表不导入根据order_item数据直接重建
// 数据清理
options.RecordFilter = async context =>
{
var record = context.Record;
var cache = context.Cacher;
switch (record.TableName)
{
// OrderBoxBlock删除对应Order.OrderNo不存在的对象
case TableNames.OrderBoxBlock:
{
if (!await cache.ExistsAsync(CacheKeysFunc.Order_OrderNo_CompanyID(record["OrderNo"])))
return false;
break;
}
// OrderDataBlock删除对应Order.OrderNo不存在的对象
case TableNames.OrderDataBlock:
{
if (!await cache.ExistsAsync(CacheKeysFunc.Order_OrderNo_CompanyID(record["OrderNo"])))
return false;
break;
}
// OrderDataParts删除对应Order.OrderNo不存在的对象
case TableNames.OrderDataParts:
{
if (!await cache.ExistsAsync(CacheKeysFunc.Order_OrderNo_CompanyID(record["OrderNo"])))
return false;
break;
}
// OrderBlockPlan删除CreateTime < 202301的
case TableNames.OrderBlockPlan:
{
var time = DateTime.Parse(record["CreateTime"].Trim('"','\''));
if (time < oldestTime)
return false;
// if (!DumpDataHelper.IsJson(record["OrderNos"])) return false; //Json列合法检查
break;
}
// OrderBlockPlanResult删除对应order_block_plan.ID不存在的对象
case TableNames.OrderBlockPlanResult:
{
if (!await cache.ExistsAsync(CacheKeysFunc.OrderBlockPlan_ID_CompanyID(record["ID"])))
return false;
break;
}
// case TableNames.OrderBlockPlanResult: // 用SaveTime过滤
// {
// if (DateTime.Parse(record["SaveTime"].Trim('"', '\'')) < oldestTime)
// return false;
// break;
// }
// OrderDataGoods Json列合法检查
case TableNames.OrderDataGoods:
{
// if (!DumpDataHelper.IsJson(record["ExtraProp"])) return false;
break;
}
// OrderModule删除OrderNo < 202301的
case TableNames.OrderModule:
{
var orderNo = record["OrderNo"];
if(int.Parse(orderNo.AsSpan(0, 6).ToString()) < oldestTimeInt)
return false;
break;
}
// OrderProcess删除OrderNo < 202301的
case TableNames.OrderProcess:
{
var orderNo = record["OrderNo"];
if(int.Parse(orderNo.AsSpan(0, 6).ToString()) < oldestTimeInt)
return false;
break;
}
// OrderProcessStep删除OrderNo < 202301的
case TableNames.OrderProcessStep:
{
var orderNo = record["OrderNo"];
if(int.Parse(orderNo.AsSpan(0, 6).ToString()) < oldestTimeInt)
return false;
break;
}
// OrderProcessStepStep删除对应OrderProcess.ID不存在的对象
case TableNames.OrderProcessStepItem:
{
if (!await cache.ExistsAsync(CacheKeysFunc.OrderProcess_ID_ShardKey(record["OrderProcessID"])))
return false;
break;
}
// SimplePackage删除OrderNo < 202301的
case TableNames.SimplePackage:
{
var orderNo = record["OrderNo"];
if(int.Parse(orderNo.AsSpan(0, 6).ToString()) < oldestTimeInt)
return false;
break;
}
// SimplePlanOrder删除CreateTime < 202301的
case TableNames.SimplePlanOrder:
{
var time = DateTime.Parse(record["CreateTime"].Trim('"', '\''));
if (time < oldestTime)
return false;
break;
}
}
return true;
};
// 数据替换
/*
* 空数据处理:
* 某些列生产库为可空,而测试库变为了不可空,则需要根据列的类型对这些列做单独处理
* int或任何非无符号整型 -> -1
* varchar -> ''(空字符串)
* datetime -> '1000-01-01'datetime最小值
* text -> 0 16进制0MyDumper中的text是为16进制
*/
const string DefaultInt = "0";
const string DefaultStr = "''";
const string DefaultDateTime = "'1000-01-01'";
const string DefaultText = "0";
options.RecordModify = async context =>
{
void ReplaceIfMyDumperNull(DataRecord record, string fieldName, string replaceValue)
{
if (record[fieldName] is ConstVar.MyDumperNull)
{
context.Logger.LogWarning("发现不可空的字段为空({TableName}.{FieldName}),填充默认值: {DefaultValue}",
record.TableName, fieldName, replaceValue);
record[fieldName] = replaceValue;
}
}
var record = context.Record;
var cache = context.Cacher;
switch (record.TableName)
{
// Machine处理非空列
case TableNames.Machine:
ReplaceIfMyDumperNull(record, "Name", DefaultStr);
ReplaceIfMyDumperNull(record, "CreateTime", DefaultDateTime);
ReplaceIfMyDumperNull(record, "CreatorID", DefaultInt);
ReplaceIfMyDumperNull(record, "EditTime", DefaultDateTime);
ReplaceIfMyDumperNull(record, "EditorID", DefaultInt);
ReplaceIfMyDumperNull(record, "Settings", DefaultText);
break;
// Order处理非空列
case TableNames.Order:
ReplaceIfMyDumperNull(record, "Deleted", DefaultInt);
break;
// OrderBlockPlan处理text->json列
case TableNames.OrderBlockPlan:
// 将所有值为'[]'(即字符串长度小等于2(16进制长度小于4))的置空 [] = 0x5b5d
if (record["OrderNos"].Length <= 4)
record["OrderNos"] = "NULL";
break;
// OrderBlockPlanResult添加CompanyID
case TableNames.OrderBlockPlanResult:
record.AddField("CompanyID",
// 获取OrderBlockPlan.ID -> CompanyID
ThrowIfNoCached(await cache.GetStringAsync(CacheKeysFunc.OrderBlockPlan_ID_CompanyID(record["ID"])),
TableNames.OrderBlockPlanResult, TableNames.OrderBlockPlan, "ID", "无法获取对应的CompanyID"));
break;
// OrderBoxBlock添加CompanyID列
case TableNames.OrderBoxBlock:
record.AddField("CompanyID",
// 获取Order.OrderNo -> CompanyID
ThrowIfNoCached(await cache.GetStringAsync(CacheKeysFunc.Order_OrderNo_CompanyID(record["OrderNo"])),
TableNames.OrderBoxBlock, TableNames.Order, "OrderNo", "无法获取对应的CompanyID"));
break;
// 修正OrderDataBlock.CompanyID
case TableNames.OrderDataBlock:
record["CompanyID"] =
// 获取Order.OrderNo -> CompanyID
ThrowIfNoCached(await cache.GetStringAsync(CacheKeysFunc.Order_OrderNo_CompanyID(record["OrderNo"])),
TableNames.OrderDataBlock, TableNames.Order, "OrderNo", "无法获取对应的CompanyID");
break;
// 修正OrderDataParts.CompanyID:
case TableNames.OrderDataParts:
record["CompanyID"] =
// 获取Order.OrderNo -> CompanyID
ThrowIfNoCached(await cache.GetStringAsync(CacheKeysFunc.Order_OrderNo_CompanyID(record["OrderNo"])),
TableNames.OrderDataParts, TableNames.Order, "OrderNo", "无法获取对应的CompanyID");
break;
// OrderModule添加ShardKey列移除ViewFileName列
case TableNames.OrderModule:
record.AddField("ShardKey", CalculateShardKeyByOrderNo(record["OrderNo"]));
record.RemoveField("ViewFileName");
break;
// OrderProcess添加ShardKey列NextStepID的空值转换为0
case TableNames.OrderProcess:
record.AddField("ShardKey", CalculateShardKeyByOrderNo(record["OrderNo"]));
break;
// OrderProcessStep添加ShardKey
case TableNames.OrderProcessStep:
record.AddField("ShardKey", CalculateShardKeyByOrderNo(record["OrderNo"]));
break;
// OrderProcessStepItem添加ShardKey列处理非空列
case TableNames.OrderProcessStepItem:
ReplaceIfMyDumperNull(record, "DataID", DefaultInt);
record.AddField("ShardKey",
// 获取OrderProcess.ID -> ShardKey
ThrowIfNoCached(await cache.GetStringAsync(CacheKeysFunc.OrderProcess_ID_ShardKey(record["OrderProcessID"])),
TableNames.OrderProcessStepItem, TableNames.OrderProcessStep, "OrderProcessID", "无法获取对应的ShardKey"));
break;
// OrderScrapBoard处理非空列
case TableNames.OrderScrapBoard:
ReplaceIfMyDumperNull(record, "Color", DefaultStr);
ReplaceIfMyDumperNull(record, "GoodsName", DefaultStr);
ReplaceIfMyDumperNull(record, "Material", DefaultStr);
ReplaceIfMyDumperNull(record, "MaterialName", DefaultStr);
break;
// ProcessItemExp处理非空列
case TableNames.ProcessItemExp:
ReplaceIfMyDumperNull(record, "MaxPartsID", DefaultInt);
ReplaceIfMyDumperNull(record, "ProcessGroupID", DefaultInt);
break;
// SimplePlanOrder处理非空列添加Deleted
case TableNames.SimplePlanOrder:
ReplaceIfMyDumperNull(record, "CreateTime", DefaultDateTime);
ReplaceIfMyDumperNull(record, "UpdateTime", DefaultDateTime);
ReplaceIfMyDumperNull(record, "CompanyID", DefaultInt);
ReplaceIfMyDumperNull(record, "SingleName", DefaultStr);
record.AddField("Deleted", "0");
break;
}
return record;
string ThrowIfNoCached(string? cached, string tableName, string cachedTableName, string cachedColumn, string appendMessage = "")
{
if (cached is null)
throw new InvalidDataException(
$"{tableName}数据异常,在缓存中未找到对应{cachedTableName}.{cachedColumn}\t{appendMessage}");
return cached;
}
};
// 数据缓存
options.RecordCache = async context =>
{
var record = context.Record;
var cache = context.Cacher;
switch (record.TableName)
{
// 缓存Order.OrderNo -> CompanyID
case TableNames.Order:
await cache.SetStringAsync(
CacheKeysFunc.Order_OrderNo_CompanyID(record["OrderNo"]),
record["CompanyID"]);
break;
// 缓存OrderBlockPlan.ID -> CompanyID
case TableNames.OrderBlockPlan:
await cache.SetStringAsync(
CacheKeysFunc.OrderBlockPlan_ID_CompanyID(record["ID"]),
record["CompanyID"]);
break;
// 缓存OrderProcess.ID -> ShardKey
case TableNames.OrderProcess:
await cache.SetStringAsync(
CacheKeysFunc.OrderProcess_ID_ShardKey(record["ID"]),
record["ShardKey"]);
break;
}
};
// 数据库过滤
options.DatabaseFilter = record =>
{
var companyId = int.Parse(record[tenantDbOptions.TenantKey]); // 每个实体都应存在CompanyID否则异常
return tenantDbOptions.GetDbNameByTenantKeyValue(companyId);
};
// 数据重建
options.RecordReBuild = context =>
{
var record = context.Record;
var resultList = new List<DataRecord>();
// 分流OrderItem表
if (record.TableName == TableNames.OrderItem)
{
record.TryGetField("ID", out var itemId);
record.TryGetField("ShardKey", out var shardKey);
record.TryGetField("PlanID", out var planId);
record.TryGetField("PackageID", out var packageId);
record.TryGetField("CompanyID", out var companyId);
if(!int.TryParse(planId, out var pid))
throw new ApplicationException($"数据发生异常OrderItem.PlanID值: {planId}");
if (pid > 0)
{
resultList.Add(new DataRecord(new[] { itemId, shardKey, planId, companyId },
TableNames.OrderBlockPlanItem,
["ItemID", "ShardKey", "PlanID", "CompanyID"]
));
}
if(!int.TryParse(packageId, out var pkid))
throw new ApplicationException($"数据发生异常OrderItem.PackageID值: {packageId}");
if(pkid > 0)
{
resultList.Add(new DataRecord(new[] { itemId, shardKey, packageId, companyId },
TableNames.OrderPackageItem,
[ "ItemID", "ShardKey", "PackageID", "CompanyID" ]
));
}
}
return resultList;
};
});
host.Services.Configure<DatabaseOutputOptions>(options =>
{
options.ConnectionString = outputOptions.ConnectionString;
options.FlushCount = outputOptions.FlushCount;
options.MaxAllowedPacket = outputOptions.MaxAllowedPacket / 2;
options.MaxDatabaseOutputTask = outputOptions.MaxDatabaseOutputTask;
options.TreatJsonAsHex = outputOptions.TreatJsonAsHex;
options.NoOutput = outputOptions.NoOutput;
options.ForUpdate = outputOptions.ForUpdate;
#if USE_TEST_DB
// Test Server
options.ColumnTypeConfig = new Dictionary<string, ColumnType>
{
{ "simple_plan_order.PlaceData", ColumnType.Blob },
{ "order_block_plan_result.PlaceData", ColumnType.Blob },
{ "order_box_block.Data", ColumnType.Blob },
{ "order_data_goods.ExtraProp", ColumnType.Json },
{ "order_module_extra.JsonStr", ColumnType.Text },
{ "process_info.Users", ColumnType.Text },
{ "order_process_schdule.CustomOrderNo", ColumnType.Text },
{ "order_process_schdule.OrderProcessStepName", ColumnType.Text },
{ "order_process_schdule.AreaName", ColumnType.Text },
{ "order_process_schdule.ConsigneeAddress", ColumnType.Text },
{ "order_process_schdule.ConsigneePhone", ColumnType.Text },
{ "report_source.Sql", ColumnType.Text },
{ "report_source.KeyValue", ColumnType.Text },
{ "report_source.Setting", ColumnType.Text },
{ "order_data_block.RemarkJson", ColumnType.Text },
{ "order_patch_detail.BlockDetail", ColumnType.Json },
{ "order_scrap_board.OutLineJson", ColumnType.Text },
{ "simple_package.Items", ColumnType.Json },
{ "order_batch_pack_config.Setting", ColumnType.Text },
{ "machine.Settings", ColumnType.Text },
{ "sys_config.Value", ColumnType.Text },
{ "sys_config.JsonStr", ColumnType.Text },
{ "process_item_exp.ItemJson", ColumnType.Text },
{ "report_template.Template", ColumnType.Text },
{ "report_template.SourceConfig", ColumnType.Text },
{ "order_block_plan.OrderNos", ColumnType.Json },
{ "order_block_plan.BlockInfo", ColumnType.Text },
};
#else
// 配置列类型
// Prod server
options.ColumnTypeConfig = new Dictionary<string, ColumnType>
{
{ "simple_plan_order.PlaceData", ColumnType.Blob },
{ "order_block_plan_result.PlaceData", ColumnType.Blob },
{ "order_box_block.Data", ColumnType.Blob },
{ "order_data_goods.ExtraProp", ColumnType.Text },
{ "order_module_extra.JsonStr", ColumnType.Text },
{ "process_info.Users", ColumnType.Text },
{ "order_process_schdule.CustomOrderNo", ColumnType.Text },
{ "order_process_schdule.OrderProcessStepName", ColumnType.Text },
{ "order_process_schdule.AreaName", ColumnType.Text },
{ "order_process_schdule.ConsigneeAddress", ColumnType.Text },
{ "order_process_schdule.ConsigneePhone", ColumnType.Text },
{ "report_source.Sql", ColumnType.Text },
{ "report_source.KeyValue", ColumnType.Text },
{ "report_source.Setting", ColumnType.Text },
{ "order_data_block.RemarkJson", ColumnType.Text },
{ "order_patch_detail.BlockDetail", ColumnType.Text },
{ "order_scrap_board.OutLineJson", ColumnType.Text },
{ "simple_package.Items", ColumnType.Text },
{ "order_batch_pack_config.Setting", ColumnType.Text },
{ "machine.Settings", ColumnType.Text },
{ "sys_config.Value", ColumnType.Text },
{ "sys_config.JsonStr", ColumnType.Text },
{ "process_item_exp.ItemJson", ColumnType.Text },
{ "report_template.Template", ColumnType.Text },
{ "report_template.SourceConfig", ColumnType.Text },
{ "order_block_plan.OrderNos", ColumnType.Text },
{ "order_block_plan.BlockInfo", ColumnType.Text },
};
#endif
});
host.Services.AddLogging(builder =>
{
builder.ClearProviders();
builder.AddSerilog(new LoggerConfiguration()
.WriteTo.Console()
.WriteTo.File(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, $"./Log/Error/{ErrorRecorder.UID}.log"),
restrictedToMinimumLevel:LogEventLevel.Error)
// .WriteTo.File("./Log/Info/{ErrorRecorder.UID}.log", restrictedToMinimumLevel:LogEventLevel.Information) //性能考虑暂不使用
.CreateLogger()
);
});
host.Services.AddDataSourceFactory();
host.Services.AddErrorRecorderFactory();
host.Services.AddSingleton<ProcessContext>();
var prodLen = host.Configuration.GetRequiredSection("RecordQueue").GetValue<int>("ProducerQueueLength");
var consLen = host.Configuration.GetRequiredSection("RecordQueue").GetValue<int>("ConsumerQueueLength");
var maxCharCount = host.Configuration.GetRequiredSection("RecordQueue").GetValue<long>("MaxByteCount") / 2;
host.Services.AddKeyedSingleton<DataRecordQueue>(ConstVar.Producer, new DataRecordQueue(prodLen, maxCharCount));
host.Services.AddRecordQueuePool(tenantDbOptions.DbGroup.Keys.Select(key => (key:key, queue:new DataRecordQueue(consLen, maxCharCount))).ToArray());
// host.Services.AddSingleton<ITaskMonitorLogger, CacheTaskMonitorLogger>();
host.Services.AddSingleton<ITaskMonitorLogger, LoggerTaskMonitorLogger>();
host.Services.AddHostedService<MainHostedService>();
host.Services.AddSingleton<IInputService, FileInputService>();
host.Services.AddSingleton<ITransformService, TransformService>();
host.Services.AddSingleton<IOutputService, OutputService>();
host.Services.AddSingleton<TaskMonitorService>();
// host.Services.AddRedisCache(redisOptions);
host.Services.AddSingleton<ICacher, MemoryCache>();
var app = host.Build();
await app.RunAsync();
}

View File

@@ -0,0 +1,67 @@
using System.Collections.Concurrent;
using System.Diagnostics.CodeAnalysis;
using TaskExtensions = MesETL.Shared.Helper.TaskExtensions;
namespace MesETL.App.Services;
/// <summary>
/// 数据队列
/// </summary>
public class DataRecordQueue : IDisposable
{
private readonly BlockingCollection<DataRecord> _queue;
private long _currentCharCount;
private readonly long _maxCharCount = 2_147_483_648; // 4GiB
public int Count => _queue.Count;
public bool IsCompleted => _queue.IsCompleted;
public bool IsAddingCompleted => _queue.IsAddingCompleted;
public long LongestFieldCharCount { get; private set; }
public event Action? OnRecordWrite;
public event Action? OnRecordRead;
public DataRecordQueue() : this(500_000, 2_147_483_648) // 默认容量最大500K
{
}
public DataRecordQueue(int boundedCapacity, long maxCharCount)
{
_queue = new BlockingCollection<DataRecord>(boundedCapacity);
_maxCharCount = maxCharCount;
}
public void CompleteAdding() => _queue.CompleteAdding();
public bool TryDequeue([MaybeNullWhen(false)] out DataRecord record)
{
if (_queue.TryTake(out record))
{
Interlocked.Add(ref _currentCharCount, -record.FieldCharCount);
OnRecordRead?.Invoke();
return true;
}
return false;
}
public async Task EnqueueAsync(DataRecord record)
{
var charCount = record.FieldCharCount;
LongestFieldCharCount = Math.Max(LongestFieldCharCount, charCount);
if(_currentCharCount + charCount > _maxCharCount)
await TaskExtensions.WaitUntil(() => _currentCharCount + charCount < _maxCharCount, 50);
_queue.Add(record);
Interlocked.Add(ref _currentCharCount, charCount);
OnRecordWrite?.Invoke();
}
public void Dispose()
{
_queue.Dispose();
}
}

View File

@@ -0,0 +1,160 @@
using System.Text;
using MesETL.App.HostedServices.Abstractions;
using Microsoft.Extensions.Logging;
namespace MesETL.App.Services.ETL;
/// <summary>
/// CSV文件读取
/// </summary>
public class CsvReader : IDataReader
{
protected readonly string? FilePath;
protected readonly Lazy<StreamReader> Reader;
private Stream? _stream;
protected readonly ILogger? Logger;
protected readonly string TableName;
public DataRecord Current { get; protected set; } = default!;
public string[] Headers { get; }
public string Delimiter { get; }
public char QuoteChar { get; }
public CsvReader(Stream stream, string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
: this(tableName, headers, delimiter, quoteChar, logger)
{
Reader = new Lazy<StreamReader>(() => new StreamReader(stream),false);
}
public CsvReader(string filePath, string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
: this(tableName, headers, delimiter, quoteChar, logger)
{
FilePath = filePath;
Reader = new Lazy<StreamReader>(() =>
{
_stream = File.OpenRead(filePath);
return new StreamReader(_stream);
});
}
private CsvReader(string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
{
TableName = tableName;
Headers = headers;
Logger = logger;
Delimiter = delimiter;
QuoteChar = quoteChar;
Reader = null!;
}
public virtual async ValueTask<bool> ReadAsync()
{
var str = await Reader.Value.ReadLineAsync();
if (string.IsNullOrWhiteSpace(str))
return false;
var fields = ParseRowFaster(str, QuoteChar, Delimiter[0]);
Current = new DataRecord(fields, TableName, Headers);
return true;
}
public static string[] ParseRow(ReadOnlySpan<char> source, char quoteChar, char delimiter)
{
var result = new List<string>();
var index = -1;
var current = new StringBuilder(source.Length);
var hasQuote = false;
var hasSlash = false;
while (index < source.Length - 1)
{
index++;
var currChar = source[index];
if (hasSlash == false && currChar == '\\')
{
hasSlash = true;
current.Append('\\');
continue;
}
if (hasSlash == false && currChar == quoteChar)
{
hasQuote = !hasQuote;
current.Append(currChar);
continue;
}
if (hasQuote == false && currChar == delimiter)
{
result.Add(current.ToString());
current.Clear();
}
else
{
current.Append(currChar);
}
hasSlash = false;
}
result.Add(current.ToString());
return result.ToArray();
}
public static List<string> ParseRowFaster(ReadOnlySpan<char> source, char quoteChar, char delimiter, int columnCount = 10)
{
var result = new List<string>(columnCount);
var index = -1;
var hasQuote = false;
var hasSlash = false;
var start = 0;
var end = 0;
var len = source.Length - 1;
while (index < len)
{
++index;
var currChar = source[index];
if (!hasSlash)
{
if (currChar is '\\')
{
hasSlash = true;
++end;
continue;
}
if (currChar == quoteChar)
{
hasQuote = !hasQuote;
++end;
continue;
}
}
if (!hasQuote && currChar == delimiter)
{
result.Add(source[start..(end)].ToString());
start = end + 1;
++end;
}
else
{
++end;
}
hasSlash = false;
}
result.Add(source[start..end].ToString());
return result;
}
public virtual void Dispose()
{
if (Reader.IsValueCreated)
{
Reader.Value.Dispose();
_stream?.Dispose();
}
}
}

View File

@@ -0,0 +1,46 @@
using MesETL.App.HostedServices.Abstractions;
using MesETL.App.Options;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace MesETL.App.Services.ETL;
public class DataReaderFactory
{
private readonly ILogger<DataReaderFactory> _logger;
private readonly IOptions<DataInputOptions> _options;
public DataReaderFactory(ILogger<DataReaderFactory> logger, IOptions<DataInputOptions> options)
{
_logger = logger;
_options = options;
}
public IDataReader CreateReader(string filePath, string tableName, string[] headers)
{
if (_options.Value.UseMock)
{
if (_options.Value.TableMockConfig is null)
throw new ApplicationException("未配置表模拟数据量级");
_logger.LogDebug("***** Using {Type} data source *****", "ZSTD mock");
var mockConfig = _options.Value.TableMockConfig.GetValueOrDefault(tableName,
new TableMockConfig { MockCount = 1, UseDeepCopy = false });
mockConfig.MockCount = (long)Math.Ceiling(mockConfig.MockCount * _options.Value.MockCountMultiplier);
return new ZstMockReader(mockConfig, filePath,
tableName, headers, _options.Value.Delimiter, _options.Value.QuoteChar, _logger);
}
_logger.LogDebug("***** Using {Type} data source *****", "ZSTD");
return new ZstReader(filePath, tableName, headers, _options.Value.Delimiter, _options.Value.QuoteChar, _logger);
}
}
public static class DataSourceFactoryExtensions
{
public static IServiceCollection AddDataSourceFactory(this IServiceCollection services)
{
services.AddSingleton<DataReaderFactory>();
return services;
}
}

View File

@@ -1,14 +1,14 @@
using System.Data.Common; using System.Text;
using System.Text;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using ConsoleApp2.Helpers; using MesETL.App.Const;
using ConsoleApp2.Options; using MesETL.App.Helpers;
using MesETL.App.Options;
using MesETL.Shared.Helper;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options; using Microsoft.Extensions.Options;
using MySqlConnector; using MySqlConnector;
using ServiceStack;
namespace ConsoleApp2.Services; namespace MesETL.App.Services.ETL;
/// <summary> /// <summary>
/// Mysql导出 /// Mysql导出
@@ -18,29 +18,29 @@ public partial class MySqlDestination : IDisposable, IAsyncDisposable
private readonly Dictionary<string, IList<DataRecord>> _recordCache; private readonly Dictionary<string, IList<DataRecord>> _recordCache;
private readonly MySqlConnection _conn; private readonly MySqlConnection _conn;
private readonly ILogger _logger; private readonly ILogger _logger;
private readonly IOptions<DatabaseOutputOptions> _options;
private readonly ErrorRecorder.OutputErrorRecorder _outputErrorRecorder;
private readonly ProcessContext _context; private readonly ProcessContext _context;
private readonly IOptions<DataTransformOptions> _transformOptions;
private readonly ErrorRecorder _errorRecorder;
public MySqlDestination( public MySqlDestination(
string connStr, string connStr,
ILogger logger, ILogger logger,
ProcessContext context, IOptions<DatabaseOutputOptions> options,
IOptions<DataTransformOptions> transformOptions, ErrorRecorder.OutputErrorRecorder outputErrorRecorder,
ErrorRecorder errorRecorder) ProcessContext context)
{ {
_conn = new MySqlConnection(connStr); _conn = new MySqlConnection(connStr);
_conn.Open(); _conn.Open();
_recordCache = new Dictionary<string, IList<DataRecord>>(); _recordCache = new Dictionary<string, IList<DataRecord>>();
_logger = logger; _logger = logger;
_options = options;
_outputErrorRecorder = outputErrorRecorder;
_context = context; _context = context;
_transformOptions = transformOptions;
_errorRecorder = errorRecorder;
} }
public Task WriteRecordAsync(DataRecord record) public Task WriteRecordAsync(DataRecord record)
{ {
_recordCache.AddOrUpdate(record.TableName, [record], (key, value) => _recordCache.AddOrUpdate(record.TableName, [record], (_, value) =>
{ {
value.Add(record); value.Add(record);
return value; return value;
@@ -62,11 +62,11 @@ public partial class MySqlDestination : IDisposable, IAsyncDisposable
return; return;
var cmd = _conn.CreateCommand(); var cmd = _conn.CreateCommand();
cmd.CommandTimeout = 3 * 60; cmd.CommandTimeout = 0;
try try
{ {
var excuseList = GetExcuseList(_recordCache, maxAllowPacket).ToList(); var excuseList = GetExcuseList(_recordCache, maxAllowPacket);
foreach (var insertSql in excuseList) foreach (var insertSql in excuseList)
{ {
cmd.CommandText = insertSql; cmd.CommandText = insertSql;
@@ -76,24 +76,23 @@ public partial class MySqlDestination : IDisposable, IAsyncDisposable
} }
catch (Exception e) catch (Exception e)
{ {
_logger.LogCritical(e, "Error when flushing records, sql: {Sql}", cmd.CommandText.Omit(1000)); _logger.LogError(e, "插入数据库时发生错误, sql: {Sql}", cmd.CommandText.Omit(1000));
_context.AddException(e); _context.AddException(e);
var match = MatchTableName().Match(cmd.CommandText); var match = MatchTableName().Match(cmd.CommandText);
if (match is { Success: true, Groups.Count: > 1 }) if (match is { Success: true, Groups.Count: > 1 })
{ {
var tableName = match.Groups[1].Value; var tableName = match.Groups[1].Value;
await _errorRecorder.LogErrorSqlAsync(cmd.CommandText, tableName, e); await _outputErrorRecorder.LogErrorSqlAsync(cmd.CommandText, tableName, e);
} }
else await _errorRecorder.LogErrorSqlAsync(cmd.CommandText, e); else await _outputErrorRecorder.LogErrorSqlAsync(cmd.CommandText, e);
} }
} }
_recordCache.Clear(); _recordCache.Clear();
} }
catch (Exception e) catch (Exception e)
{ {
_logger.LogCritical(e, "Error when serialize records, record:"); _logger.LogError(e, "序列化记录时发生错误");
_context.AddException(e); throw;
} }
finally finally
{ {
@@ -106,7 +105,8 @@ public partial class MySqlDestination : IDisposable, IAsyncDisposable
public IEnumerable<string> GetExcuseList(IDictionary<string, IList<DataRecord>> tableRecords,int maxAllowPacket) public IEnumerable<string> GetExcuseList(IDictionary<string, IList<DataRecord>> tableRecords,int maxAllowPacket)
{ {
var sb = new StringBuilder(); var sb = new StringBuilder("SET AUTOCOMMIT = 1;\n");
var appendCount = 0;
foreach (var (tableName, records) in tableRecords) foreach (var (tableName, records) in tableRecords)
{ {
if (records.Count == 0) if (records.Count == 0)
@@ -118,11 +118,11 @@ public partial class MySqlDestination : IDisposable, IAsyncDisposable
// INSERT INTO ... VALUES >>> // INSERT INTO ... VALUES >>>
sb.Append($"INSERT INTO `{tableName}`("); sb.Append($"INSERT INTO `{tableName}`(");
for (var i = 0; i < records[0].Headers.Length; i++) for (var i = 0; i < records[0].Headers.Count; i++)
{ {
var header = records[0].Headers[i]; var header = records[0].Headers[i];
sb.Append($"`{header}`"); sb.Append($"`{header}`");
if (i != records[0].Headers.Length - 1) if (i != records[0].Headers.Count - 1)
sb.Append(','); sb.Append(',');
} }
@@ -134,34 +134,41 @@ public partial class MySqlDestination : IDisposable, IAsyncDisposable
var record = records[recordIdx]; var record = records[recordIdx];
var recordSb = new StringBuilder(); var recordSb = new StringBuilder();
recordSb.Append('('); recordSb.Append('(');
for (var fieldIdx = 0; fieldIdx < record.Fields.Length; fieldIdx++) for (var fieldIdx = 0; fieldIdx < record.Fields.Count; fieldIdx++)
{ {
var field = record.Fields[fieldIdx]; var field = record.Fields[fieldIdx];
// 在这里处理特殊列 // 在这里处理特殊列
#region HandleFields #region HandleFields
if (field == "\\N")
if (field.Length == 2 && field == ConstVar.MyDumperNull) // MyDumper导出的NULL为'\N''\'不是转义字符)
{ {
recordSb.Append("NULL"); recordSb.Append(ConstVar.Null);
goto Escape; goto Escape;
} }
switch (_transformOptions.Value.GetColumnType(record.TableName, record.Headers[fieldIdx])) switch (_options.Value.GetColumnType(record.TableName, record.Headers[fieldIdx]))
{ {
case ColumnType.Text: case ColumnType.Text:
recordSb.Append(string.IsNullOrEmpty(field) if(string.IsNullOrEmpty(field))
? "''" recordSb.Append("''");
: _transformOptions.Value.TransformBinary?.Invoke(field) ?? field); else if (field == ConstVar.Null)
recordSb.Append(ConstVar.Null);
else recordSb.Append($"_utf8mb4 0x{field}");
break; break;
case ColumnType.Blob: case ColumnType.Blob:
if (string.IsNullOrEmpty(field)) if (string.IsNullOrEmpty(field))
recordSb.Append("''"); recordSb.Append("''");
else if (field == ConstVar.Null)
recordSb.Append(ConstVar.Null);
else recordSb.Append($"0x{field}"); else recordSb.Append($"0x{field}");
break; break;
case ColumnType.Json: case ColumnType.Json:// 生产库没有JSON列仅用于测试库进行测试
recordSb.Append(string.IsNullOrEmpty(field) if(string.IsNullOrEmpty(field))
? "\"[]\"" recordSb.Append("'[]'"); // JObject or JArray?
: _transformOptions.Value.TransformBinary?.Invoke(field) ?? field); else if (_options.Value.TreatJsonAsHex)
recordSb.Append($"_utf8mb4 0x{field}");
else recordSb.AppendLine(field);
break; break;
case ColumnType.UnDefine: case ColumnType.UnDefine:
default: default:
@@ -172,16 +179,25 @@ public partial class MySqlDestination : IDisposable, IAsyncDisposable
Escape: Escape:
#endregion #endregion
if (fieldIdx != record.Fields.Length - 1) if (fieldIdx != record.Fields.Count - 1)
recordSb.Append(','); recordSb.Append(',');
} }
recordSb.Append(')'); recordSb.Append(')');
// 若字符数量即将大于限制则返回SQL清空StringBuilder保留当前记录的索引值然后转到StartBuild标签重新开始一轮INSERT // 若字符数量即将大于限制则返回SQL清空StringBuilder保留当前记录的索引值然后转到StartBuild标签重新开始一轮INSERT
if (sb.Length + recordSb.Length + 1 > maxAllowPacket) if (sb.Length + recordSb.Length + 23 > maxAllowPacket)
{ {
sb.Append(';'); if (appendCount == 0) // 如果单条记录超出maxAllowedPacket
{
sb.Append(recordSb);
_logger.LogWarning("{Table}表单条数据的SQL超出了配置的MaxAllowedPacket字符数{Count}", tableName,
sb.Length + recordSb.Length + 23);
}
TryAddForUpdateSuffix(tableName, sb);
sb.Append(';').AppendLine();
sb.Append("SET AUTOCOMMIT = 1;");
yield return sb.ToString(); yield return sb.ToString();
sb.Clear(); sb.Clear();
goto StartBuild; goto StartBuild;
@@ -191,15 +207,35 @@ public partial class MySqlDestination : IDisposable, IAsyncDisposable
sb.Append(',').AppendLine(); sb.Append(',').AppendLine();
noCommas = false; noCommas = false;
sb.Append(recordSb); // StringBuilder.Append(StringBuilder)不会分配多余的内存 sb.Append(recordSb); // StringBuilder.Append(StringBuilder)不会分配多余的内存
appendCount++;
} }
TryAddForUpdateSuffix(tableName, sb);
sb.Append(';'); sb.Append(';');
sb.Append("COMMIT;");
yield return sb.ToString(); yield return sb.ToString();
sb.Clear(); sb.Clear();
} }
} }
/// <summary>
/// 数据必须是同一张表
/// </summary>
/// <param name="tableName"></param>
/// <param name="sb"></param>
private void TryAddForUpdateSuffix(string tableName, StringBuilder sb)
{
var forUpdate = _options.Value.TryGetForUpdate(tableName, out var forUpdateSql);
if (forUpdate)
{
sb.AppendLine($"""
AS new
ON DUPLICATE KEY UPDATE
{forUpdateSql}
""");
}
}
public void Dispose() public void Dispose()
{ {
_conn.Close(); _conn.Close();

View File

@@ -0,0 +1,64 @@
using MesETL.App.Options;
using Microsoft.Extensions.Logging;
namespace MesETL.App.Services.ETL;
/// <summary>
/// 截取提供ZST文件中的第一行然后复制成指定数量的数据
/// </summary>
public class ZstMockReader : ZstReader
{
private long _currentCount;
private readonly long _mockCount;
private DataRecord? _template;
private readonly bool _deepCopy;
private readonly string[]? _autoIncrementColumn;
static readonly IReadOnlyList<int> Range = [500, 1500, 2500];
public ZstMockReader(TableMockConfig mockConfig, string filePath, string tableName, string[] headers, string delimiter = ",", char quoteChar = '\"', ILogger? logger = null) : base(filePath, tableName, headers, delimiter, quoteChar, logger)
{
_mockCount = mockConfig.MockCount;
_deepCopy = mockConfig.UseDeepCopy;
_autoIncrementColumn = mockConfig.AutoIncrementColumn;
}
public ZstMockReader(TableMockConfig mockConfig, Stream stream, string tableName, string[] headers, string delimiter = ",", char quoteChar = '\"', ILogger? logger = null) : base(stream, tableName, headers, delimiter, quoteChar, logger)
{
_mockCount = mockConfig.MockCount;
_deepCopy = mockConfig.UseDeepCopy;
_autoIncrementColumn = mockConfig.AutoIncrementColumn;
}
public override async ValueTask<bool> ReadAsync()
{
if (_template is null)
{
if (!await base.ReadAsync())
throw new InvalidOperationException("所提供的ZST源为空无法生成模板数据");
_template = Current.Clone() as DataRecord;
if (_template is null)
throw new ApplicationException("记录拷贝失败");
_currentCount++;
return true;
}
if (_deepCopy)
{
Current = _template.Clone() as DataRecord ?? throw new ApplicationException("记录拷贝失败");
if(_autoIncrementColumn is not null)
{
foreach (var column in _autoIncrementColumn)
{
Current[column] = (Convert.ToInt64(Current[column]) + 1).ToString();
_template = Current;
}
}
Current["CompanyID"] = Range[Random.Shared.Next(0, Range.Count)].ToString();//随机CompanyID
}
else Current = _template;
_currentCount++;
return _currentCount < _mockCount;
}
}

View File

@@ -0,0 +1,53 @@
using Microsoft.Extensions.Logging;
using ZstdSharp;
namespace MesETL.App.Services.ETL;
/// <summary>
/// 解压ZST文件从中读取CSV数据
/// </summary>
public class ZstReader : CsvReader
{
protected new readonly Lazy<StreamReader> Reader;
private Stream? _stream;
public ZstReader(string filePath, string tableName, string[] headers, string delimiter = ",", char quoteChar = '\"', ILogger? logger = null)
: base(filePath, tableName, headers, delimiter, quoteChar, logger)
{
Reader = new Lazy<StreamReader>(() =>
{
_stream = new DecompressionStream(File.OpenRead(filePath));
return new StreamReader(_stream);
}, false);
}
public ZstReader(Stream stream, string tableName, string[] headers, string delimiter = ",", char quoteChar = '\"', ILogger? logger = null)
: base(stream, tableName, headers, delimiter, quoteChar, logger)
{
var ds = new DecompressionStream(stream);
Reader = new Lazy<StreamReader>(() => new StreamReader(ds), false);
}
public override async ValueTask<bool> ReadAsync()
{
var str = await Reader.Value.ReadLineAsync();
if (string.IsNullOrWhiteSpace(str))
return false;
var fields = ParseRowFaster(str, QuoteChar, Delimiter[0]);
Current = new DataRecord(fields, TableName, Headers);
return true;
}
public override void Dispose()
{
base.Dispose();
if (Reader.IsValueCreated)
{
Reader.Value.Dispose();
_stream?.Dispose();
}
}
}

View File

@@ -0,0 +1,80 @@
using MesETL.App.Helpers;
using MesETL.Shared.Helper;
using Microsoft.Extensions.Logging;
namespace MesETL.App.Services.ErrorRecorder;
public class ErrorRecorder
{
protected ILogger Logger;
/// <summary>
/// 当次执行标识
/// </summary>
public static readonly string UID = DateTime.Now.ToString("yyyy-MM-dd HH-mm-ss");
public ErrorRecorder(ILogger logger)
{
Logger = logger;
}
public static async Task LogErrorRecordAsync(string outputDir, DataRecord record, Exception exception)
{
if(!Directory.Exists(outputDir))
Directory.CreateDirectory(outputDir);
var content = $"""
### {exception.Message}
{string.Join(',', record.Fields)}
""";
var path = Path.Combine(outputDir, $"{record.TableName}.errlog");
await File.AppendAllTextAsync(path, content);
}
public static async Task LogErrorRecordAsync(string outputDir, IEnumerable<DataRecord> records, Exception exception)
{
if(!Directory.Exists(outputDir))
Directory.CreateDirectory(outputDir);
var tableMapping = new Dictionary<string, Tuple<List<DataRecord>, StreamWriter>>();
foreach (var record in records)
{
tableMapping.AddOrUpdate(record.TableName,
Tuple.Create((List<DataRecord>) [record], new StreamWriter(File.OpenRead(record.TableName))),
(_, tuple) =>
{
tuple.Item1.Add(record);
return tuple;
});
}
var maxParallelism = 5;
for (var i = 0; i < tableMapping.Count; i+=maxParallelism)
{
await Parallel.ForEachAsync(tableMapping.Take(maxParallelism), async (pair, token) =>
{
var (records, writer) = pair.Value;
foreach (var record in records)
{
var content =
$"""
### {exception.Message}
{string.Join(',', record.Fields)}
""";
await writer.WriteLineAsync(content);
if (token.IsCancellationRequested)
break;
}
await writer.DisposeAsync();
});
}
}
public void ClearErrorRecords(string dir)
{
Logger.LogInformation("***** Clear error records *****");
foreach (var file in Directory.GetFiles(dir, "*.errlog", SearchOption.AllDirectories))
{
File.Delete(file);
}
}
}

View File

@@ -0,0 +1,27 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
namespace MesETL.App.Services.ErrorRecorder;
public class ErrorRecorderFactory
{
private readonly ILogger<ErrorRecorderFactory> _logger;
public ErrorRecorderFactory(ILogger<ErrorRecorderFactory> logger)
{
_logger = logger;
}
public OutputErrorRecorder CreateOutput(string database) => new(database, _logger);
public TransformErrorRecorder CreateTransform() => new(_logger);
public InputErrorRecorder CreateInput() => new(_logger);
}
public static class ErrorRecorderFactoryExtensions
{
public static IServiceCollection AddErrorRecorderFactory(this IServiceCollection services)
{
services.AddSingleton<ErrorRecorderFactory>();
return services;
}
}

View File

@@ -0,0 +1,19 @@
using Microsoft.Extensions.Logging;
namespace MesETL.App.Services.ErrorRecorder;
public sealed class InputErrorRecorder : ErrorRecorder
{
private readonly string _outputDir =
Path.Combine(AppDomain.CurrentDomain.BaseDirectory, $"ErrorRecords/{UID}/Input");
public InputErrorRecorder(ILogger logger) : base(logger)
{
}
public Task LogErrorRecordAsync(DataRecord record, Exception exception) =>
LogErrorRecordAsync(_outputDir, record, exception);
public Task LogErrorRecordAsync(IEnumerable<DataRecord> records, Exception exception) =>
LogErrorRecordAsync(_outputDir, records, exception);
}

View File

@@ -1,27 +1,19 @@
using System.Text; using System.Text;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
namespace ConsoleApp2.Services; namespace MesETL.App.Services.ErrorRecorder;
public class ErrorRecorder
public sealed class OutputErrorRecorder : ErrorRecorder
{ {
private readonly string _outputDir = "./ErrorRecords"; private readonly string _outputDir = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, $"ErrorRecords/{UID}/Output");
private readonly ILogger _logger; private readonly string _database;
private readonly Dictionary<string, int> _logIndex = new(); private readonly Dictionary<string, int> _logIndex = new();
/// <summary> public OutputErrorRecorder(string database, ILogger logger) : base(logger)
/// 当次执行标识
/// </summary>
private static readonly string UID = DateTime.Now.ToString("yyyy-MM-dd HH-mm-ss");
public ErrorRecorder(ILogger<ErrorRecorder> logger)
{ {
_logger = logger; _database = database;
var dir = Path.Combine(_outputDir, UID); Logger = logger;
if (!Directory.Exists(dir))
{
Directory.CreateDirectory(dir);
}
} }
/// <summary> /// <summary>
@@ -32,25 +24,28 @@ public class ErrorRecorder
/// <param name="exception"></param> /// <param name="exception"></param>
public async Task LogErrorSqlAsync(string commandText, string tableName, Exception exception) public async Task LogErrorSqlAsync(string commandText, string tableName, Exception exception)
{ {
if (!Directory.Exists(_outputDir))
Directory.CreateDirectory(_outputDir);
if (!_logIndex.TryGetValue(tableName, out var idx)) if (!_logIndex.TryGetValue(tableName, out var idx))
{ {
idx = 0; idx = 0;
_logIndex.Add(tableName, idx); _logIndex.Add(tableName, idx);
} }
var filePath = Path.Combine(_outputDir, UID, $"{tableName}-{idx}.errlog"); var filePath = Path.Combine(_outputDir, $"{tableName}-{idx}.errlog");
if (File.Exists(filePath) && new FileInfo(filePath).Length > 10 * 1024 * 1024) if (File.Exists(filePath) && new FileInfo(filePath).Length > 10 * 1024 * 1024)
{ {
++idx; ++idx;
_logIndex[tableName] = idx; _logIndex[tableName] = idx;
filePath = Path.Combine(_outputDir, UID, $"{tableName}-{idx}.errlog"); filePath = Path.Combine(_outputDir, $"{tableName}-{idx}.errlog");
} }
var content = $""" var content = $"""
/* [{DateTime.Now:yyyy-MM-dd HH:mm:ss}] /* [{DateTime.Now:yyyy-MM-dd HH:mm:ss}]
* Error occurred when export table '{tableName}': * Error occurred when export table '{_database}.{tableName}':
* {exception.Message} * {exception.Message}
*/ */
USE `{_database}`;
{commandText} {commandText}
@@ -65,7 +60,9 @@ public class ErrorRecorder
/// <param name="exception"></param> /// <param name="exception"></param>
public async Task LogErrorSqlAsync(string commandText, Exception exception) public async Task LogErrorSqlAsync(string commandText, Exception exception)
{ {
var filePath = Path.Combine(_outputDir, UID, "UnknownTables.errlog"); if (!Directory.Exists(_outputDir))
Directory.CreateDirectory(_outputDir);
var filePath = Path.Combine(_outputDir, "UnknownTables.errlog");
var content = $""" var content = $"""
/* [{DateTime.Now:yyyy-MM-dd HH:mm:ss}] /* [{DateTime.Now:yyyy-MM-dd HH:mm:ss}]
* Error occurred when export table with unknown table name: * Error occurred when export table with unknown table name:
@@ -78,27 +75,4 @@ public class ErrorRecorder
await File.AppendAllTextAsync(filePath, content, Encoding.UTF8); await File.AppendAllTextAsync(filePath, content, Encoding.UTF8);
} }
public async Task LogErrorRecordsAsync(IDictionary<string, DataRecord> records, Exception exception)
{
var pathDict = new Dictionary<string, string>();
foreach (var pair in records)
{
if(!pathDict.TryGetValue(pair.Key, out var path))
{
path = Path.Combine(_outputDir, UID, "ErrorRecords", $"{pair.Key}.errlog");
pathDict.Add(pair.Key, path);
}
//
await File.AppendAllTextAsync(path, string.Join(',', pair.Value.Fields));
}
}
public void ClearErrorRecords()
{
_logger.LogInformation("***** Clear error records *****");
foreach (var file in Directory.GetFiles(_outputDir, "*.errlog", SearchOption.AllDirectories))
{
File.Delete(file);
}
}
} }

View File

@@ -0,0 +1,20 @@
using Microsoft.Extensions.Logging;
namespace MesETL.App.Services.ErrorRecorder;
public sealed class TransformErrorRecorder : ErrorRecorder
{
private readonly string _outputDir =
Path.Combine(AppDomain.CurrentDomain.BaseDirectory, $"ErrorRecords/{UID}/Transform");
public TransformErrorRecorder(ILogger logger) : base(logger)
{
}
public Task LogErrorRecordAsync(DataRecord record, Exception exception) =>
LogErrorRecordAsync(_outputDir, record, exception);
public Task LogErrorRecordAsync(IEnumerable<DataRecord> records, Exception exception) =>
LogErrorRecordAsync(_outputDir, records, exception);
}

View File

@@ -0,0 +1,20 @@
using System.Text;
using MesETL.App.Cache;
namespace MesETL.App.Services.Loggers;
public class CacheTaskMonitorLogger : ITaskMonitorLogger
{
private readonly ICacher _cacher;
public CacheTaskMonitorLogger(ICacher cacher)
{
_cacher = cacher;
}
public void LogStatus(string name, IReadOnlyDictionary<string, string> properties, ITaskMonitorLogger.LogLevel logLevel)
{
if(logLevel is ITaskMonitorLogger.LogLevel.Progress)
_cacher.SetHashAsync(name, properties);
}
}

View File

@@ -0,0 +1,12 @@
namespace MesETL.App.Services.Loggers;
public interface ITaskMonitorLogger
{
public enum LogLevel
{
Info,
Debug,
Progress,
}
void LogStatus(string name, IReadOnlyDictionary<string, string> properties, LogLevel logLevel = LogLevel.Info);
}

View File

@@ -0,0 +1,41 @@
using System.Text;
using Microsoft.Extensions.Logging;
namespace MesETL.App.Services.Loggers;
public class LoggerTaskMonitorLogger : ITaskMonitorLogger
{
private readonly ILogger _logger;
public LoggerTaskMonitorLogger(ILogger<LoggerTaskMonitorLogger> logger)
{
_logger = logger;
}
public void LogStatus(string name, IReadOnlyDictionary<string, string> properties, ITaskMonitorLogger.LogLevel logLevel)
{
var sb = new StringBuilder();
sb.Append($"{name}: {{");
sb.AppendJoin(',', properties.Select((pair, i) => $" {pair.Key}: {pair.Value}"));
sb.Append('}');
// var args = new List<string> { name };
// properties.Aggregate(args, (args, pair) =>
// {
// args.Add(pair.Key);
// args.Add(pair.Value);
// return args;
// });
switch (logLevel)
{
case ITaskMonitorLogger.LogLevel.Info:
_logger.LogInformation("{message}", sb.ToString());
break;
case ITaskMonitorLogger.LogLevel.Progress:
case ITaskMonitorLogger.LogLevel.Debug:
_logger.LogDebug("{message}", sb.ToString());
break;
default:
throw new ArgumentOutOfRangeException(nameof(logLevel), logLevel, null);
}
}
}

View File

@@ -0,0 +1,75 @@
using System.Collections.Concurrent;
namespace MesETL.App.Services;
/// <summary>
/// 处理上下文类,标识处理进度
/// </summary>
public class ProcessContext
{
private bool _hasException;
private long _inputCount;
private long _transformCount;
private long _outputCount;
private readonly ConcurrentDictionary<string, (long input, long output)> _tableProgress = new();
public bool HasException => _hasException;
public bool IsInputCompleted { get; private set; }
public bool IsTransformCompleted { get; private set; }
public bool IsOutputCompleted { get; private set; }
public long InputCount
{
get => _inputCount;
set => Interlocked.Exchange(ref _inputCount, value);
}
public long TransformCount
{
get => _transformCount;
set => Interlocked.Exchange(ref _transformCount, value);
}
public long OutputCount
{
get => _outputCount;
set => Interlocked.Exchange(ref _outputCount, value);
}
// TableName -> Count
public IReadOnlyDictionary<string, (long input, long output)> TableProgress => _tableProgress;
public void CompleteInput() => IsInputCompleted = true;
public void CompleteTransform() => IsTransformCompleted = true;
public void CompleteOutput() => IsOutputCompleted = true;
public bool AddException(Exception e) => _hasException = true;
public void AddInput() => Interlocked.Increment(ref _inputCount);
public void AddInput(int count) => Interlocked.Add(ref _inputCount, count);
public void AddTransform() => Interlocked.Increment(ref _transformCount);
public void AddTransform(int count) => Interlocked.Add(ref _transformCount, count);
public void AddOutput() => Interlocked.Increment(ref _outputCount);
public void AddOutput(int count) => Interlocked.Add(ref _outputCount, count);
public void AddTableInput(string table, int count)
{
_tableProgress.AddOrUpdate(table, (input:count, output:0), (k, tuple) =>
{
tuple.input += count;
return tuple;
});
}
public void AddTableOutput(string table, int count)
{
_tableProgress.AddOrUpdate(table, (input:0, output:count), (k, tuple) =>
{
tuple.output += count;
return tuple;
});
}
}

View File

@@ -0,0 +1,66 @@
using System.Collections.Concurrent;
using Microsoft.Extensions.DependencyInjection;
namespace MesETL.App.Services;
public class RecordQueuePool
{
private readonly ConcurrentDictionary<string, DataRecordQueue> _queues = new();
public IReadOnlyDictionary<string, DataRecordQueue> Queues => _queues;
public void AddQueue(string key, int boundedCapacity = 200_0000, long maxCharCount = 2_147_483_648)
=> AddQueue(key, new DataRecordQueue(boundedCapacity, maxCharCount));
public void AddQueue(string key, DataRecordQueue queue)
{
if (!_queues.TryAdd(key, queue))
throw new InvalidOperationException($"请勿添加重复的队列,队列名: {key}");
}
public void RemoveQueue(string key, bool dispose = true)
{
if (!_queues.Remove(key, out var queue))
throw new InvalidOperationException($"未找到对应的队列,队列名:{key}");
if (dispose) queue.Dispose();
}
public DataRecordQueue GetQueue(string key)
{
return _queues[key];
}
public DataRecordQueue this[string key]
{
get => GetQueue(key);
set => AddQueue(key, value);
}
}
public static class MultiRecordQueueExtensions
{
public static IServiceCollection AddRecordQueuePool(this IServiceCollection services, params string[] keys)
{
var pool = new RecordQueuePool();
foreach (var key in keys)
{
pool.AddQueue(key);
}
services.AddSingleton(pool);
return services;
}
public static IServiceCollection AddRecordQueuePool(this IServiceCollection services,
params (string key, DataRecordQueue queue)[] queues)
{
var pool = new RecordQueuePool();
foreach (var (key, queue) in queues)
{
pool.AddQueue(key, queue);
}
services.AddSingleton(pool);
return services;
}
}

View File

@@ -0,0 +1,81 @@
using ApplicationException = System.ApplicationException;
using TaskExtensions = MesETL.Shared.Helper.TaskExtensions;
namespace MesETL.App.Services;
/// <summary>
/// 快速批量创建和等待任务
/// </summary>
public class TaskManager
{
private int _runningTaskCount;
public int RunningTaskCount => _runningTaskCount;
public int MaxTaskCount { get; }
public event Action<Exception>? OnException;
public event Action? OnTaskCompleteSuccessfully;
public TaskManager(int maxTaskCount)
{
MaxTaskCount = maxTaskCount;
}
public async ValueTask<Task> CreateTaskAsync(Func<Task> func, CancellationToken cancellationToken = default)
{
await TaskExtensions.WaitUntil(() => _runningTaskCount < MaxTaskCount, 25, cancellationToken);
return RunTask(func, cancellationToken);
}
public async ValueTask<Task> CreateTaskAsync(Func<object?, Task> func, object? arg, CancellationToken ct = default)
{
await TaskExtensions.WaitUntil(() => _runningTaskCount < MaxTaskCount, 25, ct);
return RunTaskNoClosure(func, arg, ct);
}
private Task RunTask(Func<Task> func, CancellationToken cancellationToken = default)
{
var task = Task.Run(async () =>
{
try
{
await func();
OnTaskCompleteSuccessfully?.Invoke();
}
catch(Exception ex)
{
OnException?.Invoke(ex);
}
finally
{
Interlocked.Decrement(ref _runningTaskCount);
}
}, cancellationToken);
Interlocked.Increment(ref _runningTaskCount);
return task;
}
private Task RunTaskNoClosure(Func<object?, Task> func, object? arg, CancellationToken cancellationToken = default)
{
var task = Task.Factory.StartNew(async obj => // 性能考虑这个lambda中不要捕获任何外部变量!
{
if (obj is not Tuple<Func<object?, Task>, object?> tuple)
throw new ApplicationException("这个异常不该出现");
try
{
await tuple.Item1(tuple.Item2);
OnTaskCompleteSuccessfully?.Invoke();
}
catch(Exception ex)
{
OnException?.Invoke(ex);
}
finally
{
Interlocked.Decrement(ref _runningTaskCount);
}
}, Tuple.Create(func, arg), cancellationToken).Unwrap();
Interlocked.Increment(ref _runningTaskCount);
return task;
}
}

View File

@@ -0,0 +1,64 @@
{
"MemoryThreshold": 8,
"GCIntervalMilliseconds": -1,
"UnsafeVariable": false,
"Logging": {
"LogLevel": {
"Default": "Debug"
}
},
"Input":{
"InputDir": "D:\\Dump\\NewMockData", // Csv数据输入目录
"UseMock": false, // 使用模拟数据进行测试
"MockCountMultiplier": 1, // 模拟数据量级的乘数
"TableOrder": ["order", "order_data_parts"], // 按顺序输入的表
"TableIgnoreList": [] // 忽略输入的表
},
"Transform":{
"StrictMode": false, // 设为true时如果数据转换发生错误立刻停止程序
"EnableFilter": true, // 启用数据过滤
"EnableReplacer": true, // 启用数据修改
"EnableReBuilder": true, // 启用数据重建
"CleanDate": "202301" // 当数据过滤开启时,删除这个时间之前的数据
},
"Output":{
"ConnectionString": "Server=127.0.0.1;Port=3306;UserId=root;Password=cfmes123456;", // 要分库,不用加'Database='了
"MaxAllowedPacket": 67108864,
"FlushCount": 10000, // 每次提交记录条数
"MaxDatabaseOutputTask" : 4, // 每个数据库最大提交任务数
"TreatJsonAsHex": false, // 将json列作为16进制格式输出(0x前缀)生产库是没有json列的
"NoOutput": ["order"],
"ForUpdate":
{
"order_data_parts": "CompanyID = new.CompanyID"
}
},
"RecordQueue":{
"ProducerQueueLength": 50000, // 输入队列最大长度
"ConsumerQueueLength": 10000, // 每个输出队列最大长度
"MaxByteCount": 3221225472 // 队列最大字节数
},
"RedisCache": {
"Configuration": "192.168.1.246:6380",
"InstanceName" : "mes-etl:"
},
"TenantDb": // 分库配置
{
"TenantKey" : "CompanyID",
"UseDbGroup": "prod",
"DbGroups": {
"test": {
"cferp_test_1": 1000,
"cferp_test_2": 2000,
"cferp_test_3": 2147483647
},
"prod":{
"mesdb_1": 5000,
"mesdb_2": 10000,
"mesdb_3": 15000,
"mesdb_4": 20000,
"mesdb_5": 2147483647
}
}
}
}

View File

@@ -0,0 +1,14 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\MesETL.Shared\MesETL.Shared.csproj" />
</ItemGroup>
</Project>

55
MesETL.Clean/Program.cs Normal file
View File

@@ -0,0 +1,55 @@
using MesETL.Shared.Helper;
var connStr = GetArg("-s") ?? throw new ApplicationException("未配置数据库连接字符串");
var eachLimit = int.Parse(GetArg("-l") ?? "1000");
var parallelTask = int.Parse(GetArg("-p") ?? "4");
var deletionCount = 0;
Console.WriteLine("Running Deletion...");
_ = Task.Run(async () =>
{
while (true)
{
await Task.Delay(5000);
Console.WriteLine($"[{DateTime.Now}] DELETE COUNT: {deletionCount}");
}
});
await Parallel.ForAsync(0, parallelTask, async (i, token) =>
{
while (true)
{
var effectRows = await DatabaseHelper.NonQueryAsync(connStr,
$"DELETE FROM `order_data_block` WHERE CompanyID = 0 ORDER BY ID LIMIT {eachLimit};", token);
if(effectRows == 0)
break;
Interlocked.Add(ref deletionCount, effectRows);
}
});
Console.WriteLine($"[{DateTime.Now}] DELETE COUNT: {deletionCount}");
return;
string? GetArg(string instruct)
{
var idx = Array.IndexOf(args, instruct);
if (idx == -1)
return null;
if (args[idx + 1].StartsWith('-'))
throw new ArgumentException("Argument Lost", nameof(instruct));
return args[idx + 1];
}
// var match = await DatabaseHelper.QueryTableAsync(connStr,
// $"SELECT `ID` FROM `order_data_block` WHERE CompanyID = 0 LIMIT {eachLimit};",
// token);
// var rows = match.Tables[0].Rows;
// if (rows.Count == 0)
// return;
//
// foreach (DataRow row in rows)
// {
// var id = row["ID"].ToString();
// await DatabaseHelper.NonQueryAsync(connStr, $"DELETE FROM `order_data_block` WHERE `ID` = {id}", token);
// }

View File

@@ -0,0 +1,64 @@
using System.Data;
using MySqlConnector;
namespace MesETL.Shared.Helper;
public static class DatabaseHelper
{
public static MySqlConnection CreateConnection(string connStr)
{
var newConnStr = new MySqlConnectionStringBuilder(connStr)
{
ConnectionTimeout = 30,
DefaultCommandTimeout = 0,
}.ConnectionString;
return new MySqlConnection(newConnStr);
}
public static async Task<DataSet> QueryTableAsync(string connStr, string sql, CancellationToken ct = default)
{
await using var conn = CreateConnection(connStr);
if(conn.State is not ConnectionState.Open)
await conn.OpenAsync(ct);
await using var cmd = conn.CreateCommand();
cmd.CommandText = sql;
var ds = new DataSet();
new MySqlDataAdapter(cmd).Fill(ds);
return ds;
}
public static async Task<object?> QueryScalarAsync(string connStr, string sql, CancellationToken ct = default)
{
await using var conn = CreateConnection(connStr);
if(conn.State is not ConnectionState.Open)
await conn.OpenAsync(ct);
await using var cmd = conn.CreateCommand();
cmd.CommandText = sql;
return await cmd.ExecuteScalarAsync(ct);
}
public static async Task<int> NonQueryAsync(string connStr, string sql, CancellationToken ct = default)
{
await using var conn = CreateConnection(connStr);
if(conn.State is not ConnectionState.Open)
await conn.OpenAsync(ct);
await using var cmd = conn.CreateCommand();
cmd.CommandText = sql;
return await cmd.ExecuteNonQueryAsync(ct);
}
public static async Task<int> TransactionAsync(string connStr, string sql, params MySqlParameter[] parameters)
{
await using var conn = CreateConnection(connStr);
if(conn.State is not ConnectionState.Open)
await conn.OpenAsync();
await using var trans = await conn.BeginTransactionAsync();
await using var cmd = conn.CreateCommand();
cmd.CommandText = sql;
cmd.Transaction = trans;
cmd.Parameters.AddRange(parameters);
var rows = await cmd.ExecuteNonQueryAsync();
await trans.CommitAsync();
return rows;
}
}

View File

@@ -1,4 +1,4 @@
namespace ConsoleApp2.Helpers; namespace MesETL.Shared.Helper;
public static class DictionaryExtensions public static class DictionaryExtensions
{ {

View File

@@ -0,0 +1,71 @@
using System.Diagnostics.CodeAnalysis;
using System.Reflection;
namespace MesETL.Shared.Helper;
#nullable disable
public static class EnumerableExtensions
{
public static string ToMarkdownTable<
[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicProperties |
DynamicallyAccessedMemberTypes.NonPublicProperties |
DynamicallyAccessedMemberTypes.PublicFields |
DynamicallyAccessedMemberTypes.NonPublicFields)] T>(this IEnumerable<T> source)
{
var properties = typeof(T).GetRuntimeProperties();
var fields = typeof(T)
.GetRuntimeFields()
.Where(f => f.IsPublic);
var gettables = Enumerable.Union(
properties.Select(p => new { p.Name, GetValue = (Func<object, object>)p.GetValue, Type = p.PropertyType }),
fields.Select(p => new { p.Name, GetValue = (Func<object, object>)p.GetValue, Type = p.FieldType }));
var maxColumnValues = source
.Select(x => gettables.Select(p => p.GetValue(x)?.ToString()?.Length ?? 0))
.Union(new[] { gettables.Select(p => p.Name.Length) }) // Include header in column sizes
.Aggregate(
new int[gettables.Count()].AsEnumerable(),
(accumulate, x) => accumulate.Zip(x, Math.Max))
.ToArray();
var columnNames = gettables.Select(p => p.Name);
var headerLine = "| " + string.Join(" | ", columnNames.Select((n, i) => n.PadRight(maxColumnValues[i]))) + " |";
var isNumeric = new Func<Type, bool>(type =>
type == typeof(Byte) ||
type == typeof(SByte) ||
type == typeof(UInt16) ||
type == typeof(UInt32) ||
type == typeof(UInt64) ||
type == typeof(Int16) ||
type == typeof(Int32) ||
type == typeof(Int64) ||
type == typeof(Decimal) ||
type == typeof(Double) ||
type == typeof(Single));
var rightAlign = new Func<Type, char>(type => isNumeric(type) ? ':' : ' ');
var headerDataDividerLine =
"| " +
string.Join(
"| ",
gettables.Select((g, i) => new string('-', maxColumnValues[i]) + rightAlign(g.Type))) +
"|";
var lines = new[]
{
headerLine,
headerDataDividerLine,
}.Union(
source
.Select(s =>
"| " + string.Join(" | ",
gettables.Select((n, i) => (n.GetValue(s)?.ToString() ?? "").PadRight(maxColumnValues[i]))) +
" |"));
return lines
.Aggregate((p, c) => p + Environment.NewLine + c);
}
}

View File

@@ -1,10 +1,16 @@
using System.Globalization; using System.Globalization;
using System.Text; using System.Text;
namespace ConsoleApp2.Helpers; namespace MesETL.Shared.Helper;
public static class StringExtensions public static class StringExtensions
{ {
/// <summary>
/// 截断字符串
/// </summary>
/// <param name="this"></param>
/// <param name="maxLength">截断长度</param>
/// <returns></returns>
public static string Omit(this ReadOnlySpan<char> @this, int maxLength) public static string Omit(this ReadOnlySpan<char> @this, int maxLength)
{ {
if (@this.Length > maxLength) if (@this.Length > maxLength)
@@ -12,8 +18,20 @@ public static class StringExtensions
return @this.ToString(); return @this.ToString();
} }
/// <summary>
/// 截断字符串
/// </summary>
/// <param name="this"></param>
/// <param name="maxLength">截断长度</param>
/// <returns></returns>
public static string Omit(this string @this, int maxLength) => Omit(@this.AsSpan(), maxLength); public static string Omit(this string @this, int maxLength) => Omit(@this.AsSpan(), maxLength);
/// <summary>
/// 将16进制字符串转换为字符串
/// </summary>
/// <param name="hexString"></param>
/// <param name="encoding"></param>
/// <returns></returns>
public static string FromHex(ReadOnlySpan<char> hexString, Encoding? encoding = null) public static string FromHex(ReadOnlySpan<char> hexString, Encoding? encoding = null)
{ {
encoding ??= Encoding.UTF8; encoding ??= Encoding.UTF8;
@@ -38,6 +56,11 @@ public static class StringExtensions
return encoding.GetString(bytes); return encoding.GetString(bytes);
} }
/// <summary>
/// 检查是否为JSON字符串
/// </summary>
/// <param name="hexStr"></param>
/// <returns></returns>
public static bool CheckJsonHex(ReadOnlySpan<char> hexStr) public static bool CheckJsonHex(ReadOnlySpan<char> hexStr)
{ {
if (hexStr.Length < 2) if (hexStr.Length < 2)

View File

@@ -0,0 +1,19 @@
namespace MesETL.Shared.Helper;
public static class TaskExtensions
{
public static async Task WaitUntil(Func<bool> condition, int pollDelay = 25, CancellationToken ct = default)
{
try
{
while (!condition())
{
await Task.Delay(pollDelay, ct);
}
}
catch(TaskCanceledException)
{
// CancellationToken激活时Task.Delay通过抛异常来结束停止等待不用管它
}
}
}

View File

@@ -0,0 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="MySqlConnector" Version="2.3.5" />
</ItemGroup>
</Project>

142
MesETL.Test/DataFix.cs Normal file
View File

@@ -0,0 +1,142 @@
using System.Data;
using MesETL.App.Helpers;
using MesETL.Shared.Helper;
using MySqlConnector;
using Xunit.Abstractions;
namespace TestProject1;
public class DataFix
{
public const string ConnStr = "Server=192.168.1.245;Port=3306;UserId=root;Password=ruixinjie!@#123;";
public static string[] DbNames = ["cferp_test_1", "cferp_test_2", "cferp_test_3"];
private readonly ITestOutputHelper _output;
public DataFix(ITestOutputHelper output)
{
_output = output;
}
private string MakeConnStr(string dbName) => ConnStr + $"Database={dbName};";
/// <summary>
/// 查找cferp_test_1中CompanyID = 0的order_box_block根据OrderNo查找对应订单的CompanyID然后删除重新插入相应的数据库
/// 如果没有找到对应订单则保留CompanyID为0
/// </summary>
[Fact]
public async Task FixOrderBoxBlockCompanyID()
{
var ds = await DatabaseHelper.QueryTableAsync(MakeConnStr(DbNames[0]), "SELECT * FROM `order_box_block` WHERE COMPANYID = 0");
var dict = new Dictionary<long, int>();//orderNo -> CompanyID
foreach (DataRow row in ds.Tables[0].Rows)
{
// foreach (var column in row.ItemArray)
// {
// Console.Write(column.ToString() + '\t');
// }
var orderNo = Convert.ToInt64(row["OrderNo"]);
var boxId = Convert.ToInt64(row["BoxID"]);
int? companyId = null;
foreach (var db in DbNames)
{
if(!dict.TryGetValue(orderNo, out var cid)) // 可以提到外面
{
var result = await DatabaseHelper.QueryScalarAsync(ConnStr + $"Database={db};",
$"SELECT CompanyID FROM `order` WHERE OrderNo = {orderNo}");
if (result is null or 0) continue;
companyId = Convert.ToInt32(result);
dict.Add(orderNo, companyId.Value);
break;
}
else
{
companyId = cid;
}
}
if (companyId is null or 0)
{
_output.WriteLine($"OrderBoxBlock:{boxId} - OrderNo {orderNo} not found");
continue;
}
row["CompanyID"] = companyId;
await DatabaseHelper.TransactionAsync(ConnStr,
$"""
DELETE FROM cferp_test_1.`order_box_block` WHERE BoxID = {boxId};
INSERT INTO {TenantDbHelper.GetDbNameByTenantKeyValue(companyId.Value)}.`order_box_block`
VALUES(@c1, @c2, @c3, @c4, @c5);
""", [new MySqlParameter("c1", row[0]),
new MySqlParameter("c2", row[1]),
new MySqlParameter("c3", row[2]),
new MySqlParameter("c4", row[3]),
new MySqlParameter("c5", row[4])]);
_output.WriteLine($"OrderBoxBock:{boxId} CompanyID -> {companyId}");
}
}
[Theory]
[InlineData(["order_data_block", "ID", 0])]
[InlineData(["order_data_block", "ID", 1])]
[InlineData(["order_data_block", "ID", 2])]
public async Task FixCompanyIdWithOwnOrderNo(string tableName, string keyName, int dbNameIndex)
{
var ds = await DatabaseHelper.QueryTableAsync(MakeConnStr(DbNames[dbNameIndex]),
$"SELECT * FROM `{tableName}` WHERE COMPANYID = 0");
var dict = new Dictionary<long, int>();//orderNo -> CompanyID
foreach (DataRow row in ds.Tables[0].Rows)
{
// foreach (var column in row.ItemArray)
// {
// Console.Write(column.ToString() + '\t');
// }
var orderNo = Convert.ToInt64(row["OrderNo"]);
var key = Convert.ToInt32(row[keyName]);
int? companyId = null;
if(!dict.TryGetValue(orderNo, out var cid))
{
companyId = await MesDatabaseHelper.TrySearchCompanyId(ConnStr, DbNames,
$"SELECT CompanyID FROM `order` WHERE OrderNo = {orderNo}");
}
else
{
companyId = cid;
}
if (companyId is null or 0)
{
_output.WriteLine($"{tableName}:{key} - OrderNo {orderNo} not found");
continue;
}
row["CompanyID"] = companyId;
// ON DUPLICATE KEY UPDATE
await DatabaseHelper.TransactionAsync(ConnStr,
$"""
DELETE FROM {DbNames[dbNameIndex]}.`{tableName}` WHERE ID = {key};
INSERT INTO {TenantDbHelper.GetDbNameByTenantKeyValue(companyId.Value)}.`{tableName}`
VALUES({string.Join(',', Enumerable.Range(0, row.ItemArray.Length).Select(i => $"@c{i}"))})
ON DUPLICATE KEY UPDATE CompanyID = {companyId};
""",
Enumerable.Range(0, row.ItemArray.Length).Select(i => new MySqlParameter($"c{i}", row[i])).ToArray());
_output.WriteLine($"{tableName}:{key} CompanyID -> {companyId}");
}
}
[Theory]
[InlineData("order", 0)]
[InlineData("order", 1)]
[InlineData("order", 2)]
public async Task FixShardKeyWithOwnOrderNo(string tableName, int dbNameIndex)
{
var r = await DatabaseHelper.NonQueryAsync(MakeConnStr(DbNames[dbNameIndex]),
$"UPDATE `{tableName}` SET ShardKey = CONCAT(SUBSTR(`order`.OrderNo,3,4),'0') WHERE ShardKey = 0");
_output.WriteLine($"Affect Rows: {r}");
}
}

View File

@@ -0,0 +1,173 @@
using System.Data;
using System.Text;
using MesETL.App.Helpers;
using MesETL.Shared.Helper;
using MySqlConnector;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using Xunit.Abstractions;
namespace TestProject1;
public class DatabaseToolBox
{
private readonly ITestOutputHelper _output;
public const string ConnStr = "Server=127.0.0.1;Port=3306;UserId=root;Password=cfmes123456;";
public DatabaseToolBox(ITestOutputHelper output)
{
_output = output;
}
[Theory]
[InlineData("cferp_void_1")]
public async Task AlterAllTableToBlackHole(string database)
{
var connStr = ConnStr + $"Database={database}";
var tables = await DatabaseHelper.QueryTableAsync(connStr,
$"""
SELECT TABLE_NAME FROM information_schema.`TABLES` WHERE TABLE_SCHEMA = '{database}';
""");
foreach (DataRow row in tables.Tables[0].Rows)
{
var tableName = row["TABLE_NAME"].ToString();
var sql = $"""
ALTER TABLE `{tableName}` REMOVE PARTITIONING;
""";
try
{
await DatabaseHelper.NonQueryAsync(connStr, sql);
}
catch (MySqlException e) when (e.ErrorCode == MySqlErrorCode.PartitionManagementOnNoPartitioned)
{
}
sql = $"""
ALTER TABLE `{tableName}` ENGINE=BLACKHOLE;
""";
await DatabaseHelper.NonQueryAsync(connStr, sql);
}
}
public async Task<long> CountAllTable(string connStr, string database)
{
var count = 0L;
var set = await DatabaseHelper.QueryTableAsync(connStr,
$"""
SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA = '{database}';
""");
foreach (DataRow row in set.Tables[0].Rows)
{
count += Convert.ToInt64(await DatabaseHelper.QueryScalarAsync(connStr,
$"""
SELECT COUNT(1) FROM `{database}`.`{row[0]}`;
"""));
}
_output.WriteLine($"Record count: {count} records");
return count;
}
[Theory]
[InlineData([new[]{"cferp_test_1", "cferp_test_2", "cferp_test_3"}])]
public async Task CountAllDatabase(IEnumerable<string> databases)
{
var count = 0L;
var connStr = "Server=192.168.1.245;Port=3306;UserId=root;Password=ruixinjie!@#123;";
foreach (var db in databases)
{
count += await CountAllTable(connStr, db);
}
_output.WriteLine(count.ToString());
}
public async Task<TableIndex[]> GetAllTableIndexes(string database)
{
var data = await DatabaseHelper.QueryTableAsync(ConnStr,
$"""
SELECT TABLE_NAME, INDEX_NAME, NON_UNIQUE, COLUMN_NAME, INDEX_TYPE FROM information_schema.STATISTICS
WHERE TABLE_SCHEMA = '{database}'
AND INDEX_NAME != 'PRIMARY'
AND INDEX_TYPE = 'BTREE'
""");
return data.Tables[0].Rows.Cast<DataRow>().Select(row =>
{
return new TableIndex(row["TABLE_NAME"].ToString()!,
row["INDEX_NAME"].ToString()!,
!Convert.ToBoolean(row["NON_UNIQUE"]),
row["COLUMN_NAME"].ToString()!,
row["INDEX_TYPE"] switch
{
"BTREE" => TableIndex.TableIndexType.BTree,
"PRIMARY" => TableIndex.TableIndexType.Primary,
"HASH" => TableIndex.TableIndexType.Hash,
_ => throw new ArgumentOutOfRangeException(nameof(row))
}
);
}).ToArray();
}
[Theory]
[InlineData(["mesdb_1"])]
[InlineData(["mesdb_2"])]
[InlineData(["mesdb_3"])]
[InlineData(["mesdb_4"])]
[InlineData(["mesdb_5"])]
public async Task ShowIndex(string database)
{
var indexes = await GetAllTableIndexes(database);
var sb = new StringBuilder();
foreach (var (tableName, indexName, isUnique, columnName, tableIndexType) in indexes!)
{
sb.AppendLine($"Drop {(isUnique ? "UNIQUE" : string.Empty)} INDEX `{indexName}` ON `{database}`.`{tableName}`;");
}
_output.WriteLine(sb.ToString());
}
[Theory]
[InlineData(["cferp_test_1", "D:/Indexes_cferp_test_1.json"])]
[InlineData(["cferp_test_2", "D:/Indexes_cferp_test_2.json"])]
[InlineData(["cferp_test_3", "D:/Indexes_cferp_test_3.json"])]
public async Task ExportAllIndexes(string database, string outputPath)
{
var indexes = await GetAllTableIndexes(database);
var json = JArray.FromObject(indexes);
await File.WriteAllTextAsync(outputPath, json.ToString());
_output.WriteLine($"Exported {indexes.Length} indexes to '{outputPath}'");
}
[Theory]
[InlineData("cferp_test_1", "D:/Indexes_cferp_test_1.json")]
[InlineData("cferp_test_2", "D:/Indexes_cferp_test_2.json")]
[InlineData("cferp_test_3", "D:/Indexes_cferp_test_3.json")]
public async Task ImportAllIndexes(string database, string importPath)
{
var json = await File.ReadAllTextAsync(importPath);
var indexes = JsonConvert.DeserializeObject<TableIndex[]>(json);
var sb = new StringBuilder();
foreach (var (tableName, indexName, isUnique, columnName, tableIndexType) in indexes!)
{
sb.AppendLine($"CREATE {(isUnique ? "UNIQUE" : string.Empty)} INDEX `{indexName}` ON `{database}`.`{tableName}` (`{columnName}`) USING {tableIndexType};");
}
await DatabaseHelper.NonQueryAsync(ConnStr, sb.ToString());
_output.WriteLine($"Import {indexes.Length} indexes to '{database}' from '{importPath}'");
}
[Theory]
[InlineData(["cferp_test_1"])]
[InlineData(["cferp_test_2"])]
[InlineData(["cferp_test_3"])]
public async Task DropAllIndex(string database)
{
var indexes = await GetAllTableIndexes(database);
var sb = new StringBuilder();
foreach (var (tableName, indexName) in indexes)
{
sb.AppendLine($"DROP INDEX `{indexName}` ON `{database}`.`{tableName}`;");
}
await DatabaseHelper.NonQueryAsync(ConnStr, sb.ToString());
_output.WriteLine($"Dropped {indexes.Length} indexes from {database}");
}
}

View File

@@ -0,0 +1 @@
global using Xunit;

View File

@@ -0,0 +1,21 @@
using MesETL.App.Helpers;
using MesETL.Shared.Helper;
namespace TestProject1;
public static class MesDatabaseHelper
{
public static async Task<int?> TrySearchCompanyId(string connStr, IEnumerable<string> dbNames, string scalarQuery)
{
foreach (var db in dbNames)
{
var result = await DatabaseHelper.QueryScalarAsync(connStr + $"Database={db};", scalarQuery);
if (result is null or 0) continue;
var companyId = Convert.ToInt32(result);
return companyId;
}
return null;
}
}

View File

@@ -0,0 +1,30 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<IsPackable>false</IsPackable>
<IsTestProject>true</IsTestProject>
<RootNamespace>TestProject1</RootNamespace>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.6.0"/>
<PackageReference Include="xunit" Version="2.4.2"/>
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.5">
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
<PrivateAssets>all</PrivateAssets>
</PackageReference>
<PackageReference Include="coverlet.collector" Version="6.0.0">
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
<PrivateAssets>all</PrivateAssets>
</PackageReference>
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\MesETL.App\MesETL.App.csproj" />
</ItemGroup>
</Project>

24
MesETL.Test/TableIndex.cs Normal file
View File

@@ -0,0 +1,24 @@
namespace TestProject1;
public record TableIndex(string TableName, string IndexName, bool IsUnique, string ColumnName, TableIndex.TableIndexType IndexType)
{
public enum TableIndexType
{
BTree,
Hash,
Primary
}
public void Deconstruct(out string tableName, out string indexName)
{
tableName = TableName;
indexName = IndexName;
}
public void Deconstruct(out string tableName, out string indexName, out string columnName)
{
tableName = TableName;
indexName = IndexName;
columnName = ColumnName;
}
}

View File

@@ -0,0 +1,26 @@
namespace TestProject1;
public static class TenantDbHelper
{
/// <summary>
/// Key-Value: {DbName}-{TenantKeyLessThan}
/// </summary>
public static Dictionary<string, int> DbList { get; set; } = new Dictionary<string, int>
{
{ "cferp_test_1", 1000 },
{ "cferp_test_2", 2000 },
{ "cferp_test_3", int.MaxValue },
};
public static string GetDbNameByTenantKeyValue(int tenantKeyValue)
{
// var dictionary = new SortedDictionary<int, string>();
// DbList.ForEach(pair => dictionary.Add(pair.Value, pair.Key));
// 注意配置顺序
var dbName = DbList.Cast<KeyValuePair<string, int>?>()
.FirstOrDefault(pair => pair?.Value != null && pair.Value.Value > tenantKeyValue)!.Value.Key;
return dbName ??
throw new ArgumentOutOfRangeException(nameof(tenantKeyValue),
$"已配置的数据库中没有任何符合'{nameof(tenantKeyValue)}'值的对象");
}
}

152
MesETL.Test/Test.cs Normal file
View File

@@ -0,0 +1,152 @@
using System.Collections.Concurrent;
using System.Diagnostics;
using MesETL.App.Services.ETL;
using MesETL.Shared.Helper;
using Xunit.Abstractions;
using ZstdSharp;
namespace TestProject1;
public class Test
{
private readonly ITestOutputHelper _output;
public Test(ITestOutputHelper output)
{
_output = output;
}
[Theory]
[InlineData([@"D:\Dump\NewMockData2\cferp.order_box_block.00000.dat.zst"])]
public async Task ZstdDecompressTest(string inputFile)
{
var count = 0;
var flag = true;
var sw = Stopwatch.StartNew();
var reader = new StreamReader(new DecompressionStream(File.OpenRead(inputFile)));
var monitor = Task.Run(async () =>
{
var lastElapse = sw.ElapsedMilliseconds;
var lastCount = 0;
while (flag)
{
await Task.Delay(2000);
_output.WriteLine($"speed: {(count - lastCount) / ((sw.ElapsedMilliseconds - lastElapse) / 1000f)}");
lastElapse = sw.ElapsedMilliseconds;
lastCount = count;
}
});
while (!reader.EndOfStream)
{
var str = await reader.ReadLineAsync();
char a;
// foreach (var c in str)
// {
// a = c;
// }
CsvReader.ParseRowFaster(str, '"', ',');
count++;
}
flag = false;
monitor.Wait();
}
public static IEnumerable<object[]> ParseRowData()
{
yield return
[@"20220104020855,""2022-01-04 10:06:46"",1455,""0001-01-01 00:00:00"",""1"",0,""2"",""0"",\N,""0"",22010"];
yield return
[@"20220104020858,""2022-01-04 15:08:22"",1455,""0001-01-01 00:00:00"",""1"",838,""2"",""0"",""5"",""0"",22010"];
yield return
[@"5586326,20220104020855,220105981029,""1"",482278,482279,3768774,0,0,""1.000"",1455,22010"];
yield return
[@"130658,""PD220104002302"",3,4616,""2022-01-04 15:10:40"",1443,""2022-01-04 15:10:40"",""2022-01-04 15:10:51"",0,"""",0,1455,""0001-01-01 00:00:00"",1,5B32303232303130343032303835385D,E590B8E5A1912D2DE590B8E5A1912D2D31382D2D323030302A3630302D2D3130E789872D2D352E3936333B6361696C69616F2D2D79616E73652D2D392D2D323031302A313137342D2D31E789872D2D322E3336,""0"",0"];
}
[Theory]
[MemberData(nameof(ParseRowData))]
public void ParseRowFasterTest(string row)
{
var fields = CsvReader.ParseRowFaster(row, '"', ',');
_output.WriteLine(string.Join(',', fields));
}
[Fact]
public void DictMemoryTest()
{
var dict = new ConcurrentDictionary<string, string>();
for (int i = 0; i < 3000000; i++)
{
dict.AddOrUpdate(Guid.NewGuid().ToString(), Random.Shared.NextInt64(1000000000L, 9999999999L).ToString(), (_, __) => Random.Shared.NextInt64(1000000000L, 9999999999L).ToString());
}
while (true)
{
}
}
[Fact]
public void GetResult()
{
var input =
"""
machine: 19303/19061
order: 3416759/3415192
order_block_plan: 2934281/1968850
order_block_plan_item: 0/235927707
order_block_plan_result: 1375479/277667
order_box_block: 23457666/23450841
order_data_block: 513012248/513012248
order_data_goods: 18655270/18655270
order_data_parts: 353139066/353139066
order_item: 955274320/955274320
order_module: 102907480/56935691
order_module_extra: 40044077/40044077
order_module_item: 49209022/49209022
order_package: 12012712/12012712
order_package_item: 0/80605124
order_process: 4045309/2682043
order_process_step: 8343418/5505158
order_process_step_item: 14856509/9787696
order_scrap_board: 136096/136090
process_group: 1577/1543
process_info: 9212/9008
process_item_exp: 30/30
process_schdule_capacity: 42442/42442
process_step_efficiency: 8/8
report_template: 7358/7338
simple_package: 142861/137730
simple_plan_order: 1167004/854699
simple_plan_order: 0/55677
sys_config: 2608/2608
work_calendar: 11/11
work_shift: 73/73
work_time: 77/77
order_process_step_item: 14856509/9790701
order_process_step: 8343418/5506925
order_module: 102907480/56935691
order_process: 4045309/2682043
report_template: 7358/7358
process_info: 9212/9212
process_group: 1577/1577
order_block_plan_result: 1375479/277667
order_box_block: 23457666/23457666
order_block_plan: 2934281/1968850
order: 3416759/3416759
machine: 19303/19303
order_scrap_board: 136096/136096
""";
var arr = input.Split('\n').Select(s =>
{
var x = s.Split(':');
var y = x[1].Split('/').Select(i => long.Parse(i)).ToArray();
return new {TABLE_NAME = x[0], INPUT = y[0], OUTPUT = y[1], FILTER = y[0] - y[1]};
}).OrderBy(s => s.TABLE_NAME);
_output.WriteLine(arr.ToMarkdownTable());
}
}

40
MesETL.sln Normal file
View File

@@ -0,0 +1,40 @@

Microsoft Visual Studio Solution File, Format Version 12.00
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MesETL.App", "MesETL.App\MesETL.App.csproj", "{155E4B04-E88C-4BA4-AED2-B13E0A0432B5}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MesETL.Test", "MesETL.Test\MesETL.Test.csproj", "{8679D5B6-5853-446E-9882-7B7A8E270500}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Mesdb.Cli", "Mesdb.Cli\Mesdb.Cli.csproj", "{68307B05-3D66-4322-A42F-C044C1E8BA3B}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MesETL.Shared", "MesETL.Shared\MesETL.Shared.csproj", "{FE134001-0E22-458B-BEF2-29712A29087E}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MesETL.Clean", "MesETL.Clean\MesETL.Clean.csproj", "{E1B2BED0-EBA6-4A14-BAD5-8EC4E528D7E0}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{155E4B04-E88C-4BA4-AED2-B13E0A0432B5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{155E4B04-E88C-4BA4-AED2-B13E0A0432B5}.Debug|Any CPU.Build.0 = Debug|Any CPU
{155E4B04-E88C-4BA4-AED2-B13E0A0432B5}.Release|Any CPU.ActiveCfg = Release|Any CPU
{155E4B04-E88C-4BA4-AED2-B13E0A0432B5}.Release|Any CPU.Build.0 = Release|Any CPU
{8679D5B6-5853-446E-9882-7B7A8E270500}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{8679D5B6-5853-446E-9882-7B7A8E270500}.Debug|Any CPU.Build.0 = Debug|Any CPU
{8679D5B6-5853-446E-9882-7B7A8E270500}.Release|Any CPU.ActiveCfg = Release|Any CPU
{8679D5B6-5853-446E-9882-7B7A8E270500}.Release|Any CPU.Build.0 = Release|Any CPU
{68307B05-3D66-4322-A42F-C044C1E8BA3B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{68307B05-3D66-4322-A42F-C044C1E8BA3B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{68307B05-3D66-4322-A42F-C044C1E8BA3B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{68307B05-3D66-4322-A42F-C044C1E8BA3B}.Release|Any CPU.Build.0 = Release|Any CPU
{FE134001-0E22-458B-BEF2-29712A29087E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{FE134001-0E22-458B-BEF2-29712A29087E}.Debug|Any CPU.Build.0 = Debug|Any CPU
{FE134001-0E22-458B-BEF2-29712A29087E}.Release|Any CPU.ActiveCfg = Release|Any CPU
{FE134001-0E22-458B-BEF2-29712A29087E}.Release|Any CPU.Build.0 = Release|Any CPU
{E1B2BED0-EBA6-4A14-BAD5-8EC4E528D7E0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{E1B2BED0-EBA6-4A14-BAD5-8EC4E528D7E0}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E1B2BED0-EBA6-4A14-BAD5-8EC4E528D7E0}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E1B2BED0-EBA6-4A14-BAD5-8EC4E528D7E0}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal

View File

@@ -0,0 +1,52 @@
using System.Collections.Concurrent;
using System.Data;
using MesETL.Shared.Helper;
namespace Mesdb.Cli;
public static class BatchDbExtensions
{
public static async Task<IDictionary<string, IDictionary<string,long>>> CountDatabasesAsync(string connStr, IList<string> dbNames, CancellationToken cancellationToken = default)
{
var result = new ConcurrentDictionary<string, IDictionary<string,long>>();
var tables = await DatabaseHelper.QueryTableAsync(connStr,
$"""
SELECT TABLE_NAME FROM information_schema.`TABLES` WHERE TABLE_SCHEMA = '{dbNames[0]}';
""");
await Parallel.ForEachAsync(dbNames, async (dbName, ct) =>
{
await Parallel.ForEachAsync(tables.Tables[0].Rows.Cast<DataRow>(), async (row, ct) =>
{
var tableName = row[0].ToString()!;
var count = (long)(await DatabaseHelper.QueryScalarAsync(connStr,
$"SELECT COUNT(1) FROM `{dbName}`.`{tableName}`;", ct))!;
result.AddOrUpdate(dbName, new ConcurrentDictionary<string, long>(), (db, dict) =>
{
dict.AddOrUpdate(tableName, count, (table, num) => num + count);
return dict;
});
});
});
return result;
}
public static async Task AnalyzeAllAsync(string connStr, IList<string> dbNames)
{
var tables = await DatabaseHelper.QueryTableAsync(connStr,
$"""
SELECT TABLE_NAME FROM information_schema.`TABLES` WHERE TABLE_SCHEMA = '{dbNames[0]}';
""");
await Parallel.ForEachAsync(dbNames, async (dbName, ct) =>
{
await Parallel.ForEachAsync(tables.Tables[0].Rows.Cast<DataRow>(), async (row, ct) =>
{
var tableName = row[0].ToString()!;
var result = (await DatabaseHelper.QueryTableAsync(connStr,
$"ANALYZE TABLE `{dbName}`.`{tableName}`;", ct));
Console.WriteLine(string.Join('\t', result.Tables[0].Rows[0].ItemArray.Select(x => x.ToString())));
});
});
}
}

View File

@@ -0,0 +1,22 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Cocona" Version="2.2.0" />
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="Serilog" Version="4.0.0-dev-02108" />
<PackageReference Include="Serilog.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\MesETL.Shared\MesETL.Shared.csproj" />
</ItemGroup>
</Project>

52
Mesdb.Cli/Program.cs Normal file
View File

@@ -0,0 +1,52 @@
using Cocona;
using MesETL.Shared.Helper;
using Mesdb.Cli;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Hosting;
var host = Host.CreateApplicationBuilder(args);
host.Configuration.AddCommandLine(args, new Dictionary<string, string>
{
{ "-s", "ConnectionString" },
{ "--ConnectionString", "ConnectionString" },
{ "-B", "Databases" },
{ "--Databases", "Databases" },
{ "-a", "All" },
{ "-c", "Command"},
{ "--Command", "Command" },
{ "--Sql", "Command" }
});
host.Build();
var connStr = host.Configuration.GetValue<string>("ConnectionString") ?? throw new ApplicationException("没有配置数据库连接字符串");
var databases = host.Configuration.GetValue<string>("Databases")?.Split(',').ToList() ?? throw new ApplicationException("没有配置数据库");
var all = host.Configuration.GetValue<bool>("All");
if (args.Length > 1 && args[0] == "count")
{
var result = await BatchDbExtensions.CountDatabasesAsync(connStr, databases);
if (all)
{
foreach (var (k, v) in result)
{
Console.WriteLine(k + ":");
Console.WriteLine(v.Select(pair => new { TABLE_NAME = pair.Key, COUNT = pair.Value }).ToMarkdownTable());
}
}
else
{
var allCount = result.Aggregate(new Dictionary<string, long>(), (dict, pair) =>
{
foreach (var (k, v) in pair.Value)
{
dict.AddOrUpdate(k, v, (key, num) => num + v);
}
return dict;
});
Console.WriteLine(allCount.Select(pair => new { TABLE_NAME = pair.Key, COUNT = pair.Value }).ToMarkdownTable());
}
}
if (args.Length > 1 && args[0] == "analyze")
{
await BatchDbExtensions.AnalyzeAllAsync(connStr, databases);
}

44
Mesdb.Cli/Schema/DB.cs Normal file
View File

@@ -0,0 +1,44 @@
using MesETL.Shared.Helper;
using MySqlConnector;
using Serilog;
namespace Mesdb.Cli.Schema;
public class DB
{
public required string ConnectionString { get; init; }
public required IReadOnlyList<Database> Databases { get; init; }
public static DB Create(string connStr, IEnumerable<string> dbNames)
{
var databases = new List<Database>();
foreach (var dbName in dbNames)
{
var dbConnStr = new MySqlConnectionStringBuilder(connStr)
{
Database = dbName
}.ConnectionString;
try
{
_ = DatabaseHelper.NonQueryAsync(dbConnStr, "SHOW DATABASES;").Result;
databases.Add(new Database(dbName, dbConnStr));
}
catch (Exception e)
{
Log.Logger.Fatal(e, "无法连接到数据库: {DbName} ", dbName);
throw;
}
}
return new DB
{
ConnectionString = connStr,
Databases = databases
};
}
private DB()
{
}
}

View File

@@ -0,0 +1,50 @@
using System.Data;
using MesETL.Shared.Helper;
using MySqlConnector;
namespace Mesdb.Cli.Schema;
public class Database
{
public static async Task<Table[]> FetchTableAsync(string dbName, string connStr)
{
var tables = await DatabaseHelper.QueryTableAsync(connStr,
$"""
SELECT TABLE_NAME FROM information_schema.`TABLES` WHERE TABLE_SCHEMA = '{dbName}';
""");
return tables.Tables[0].Rows.Cast<DataRow>().Select(row => new Table{Name = row[0].ToString()!}).ToArray();
}
public string Name { get; }
public string ConnectionString { get; }
public IReadOnlyList<Table> Tables { get; }
public Database(string name, string connStr)
{
var trueConnStr = new MySqlConnectionStringBuilder(connStr)
{
Database = name
}.ConnectionString;
var tables = FetchTableAsync(name, trueConnStr).Result;
Name = name;
ConnectionString = trueConnStr;
Tables = tables;
}
public Task ExecuteNonQueryAsync(string sql, CancellationToken cancellationToken = default)
{
return DatabaseHelper.NonQueryAsync(ConnectionString, sql, cancellationToken);
}
public Task<DataSet> ExecuteQueryAsync(string sql, CancellationToken cancellationToken = default)
{
return DatabaseHelper.QueryTableAsync(ConnectionString, sql, cancellationToken);
}
public Task<object?> ExecuteScalarAsync(string sql, CancellationToken cancellationToken = default)
{
return DatabaseHelper.QueryScalarAsync(ConnectionString, sql, cancellationToken);
}
}

View File

@@ -0,0 +1,8 @@
namespace Mesdb.Cli.Schema;
public class Table
{
public required string Name { get; init; }
}