This commit is contained in:
lindj 2024-01-12 16:50:37 +08:00
parent eab3695f53
commit 0984853c79
28 changed files with 1115 additions and 166 deletions

View File

@ -9,12 +9,24 @@
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<None Remove="appsettings.json" />
</ItemGroup>
<ItemGroup>
<Content Include="appsettings.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Caching.StackExchangeRedis" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" /> <PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="MySqlConnector" Version="2.3.3" /> <PackageReference Include="MySqlConnector" Version="2.3.3" />
<PackageReference Include="Serilog" Version="3.1.2-dev-02097" /> <PackageReference Include="Serilog" Version="3.1.2-dev-02097" />
<PackageReference Include="Serilog.Extensions.Hosting" Version="8.0.0" /> <PackageReference Include="Serilog.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" /> <PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
<PackageReference Include="ServiceStack.Text" Version="8.0.0" /> <PackageReference Include="ServiceStack.Text" Version="8.0.0" />
<PackageReference Include="ZstdSharp.Port" Version="0.7.4" />
</ItemGroup> </ItemGroup>
</Project> </Project>

View File

@ -1,4 +1,6 @@
namespace ConsoleApp2; using System.ComponentModel.Design;
namespace ConsoleApp2;
public class DataRecord public class DataRecord
@ -33,9 +35,10 @@ public class DataRecord
public string TableName { get; } public string TableName { get; }
public string? Database { get; set; } public string? Database { get; set; }
public int CompanyID { get; set; }
public DataRecord(string[] fields, string tableName, string[] headers) public DataRecord(string[] fields, string tableName, string[] headers, int companyID=0)
{ {
if (fields.Length != headers.Length) if (fields.Length != headers.Length)
throw new ArgumentException( throw new ArgumentException(
@ -45,6 +48,7 @@ public class DataRecord
Fields = fields; Fields = fields;
TableName = tableName; TableName = tableName;
Headers = headers; Headers = headers;
CompanyID = companyID;
} }
public string this[int index] public string this[int index]

View File

@ -1,4 +1,5 @@
using System.Text; using ConsoleApp2.Options;
using System.Text;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
namespace ConsoleApp2.Helpers; namespace ConsoleApp2.Helpers;
@ -11,9 +12,9 @@ public static partial class DumpDataHelper
private static partial Regex MatchBrackets(); private static partial Regex MatchBrackets();
public static async Task<string[]> GetCsvHeadersFromSqlFileAsync(string filePath) public static async Task<string[]> GetCsvHeadersFromSqlFileAsync(string txt)
{ {
var txt = await File.ReadAllTextAsync(filePath); //var txt = await File.ReadAllTextAsync(filePath);
var match = MatchBrackets().Match(txt); var match = MatchBrackets().Match(txt);
return ParseHeader(match.ValueSpan); return ParseHeader(match.ValueSpan);
@ -59,10 +60,10 @@ public static partial class DumpDataHelper
return filePath[(firstDotIdx+1)..secondDotIdx].ToString(); return filePath[(firstDotIdx+1)..secondDotIdx].ToString();
} }
public static async Task<string[]> GetCsvFileNamesFromSqlFileAsync(string filePath) public static async Task<string[]> GetCsvFileNamesFromSqlFileAsync(string txt,Regex regex)
{ {
var txt = await File.ReadAllTextAsync(filePath); //var txt = await File.ReadAllTextAsync(filePath);
var matches = MatchDatFile().Matches(txt); var matches = regex.Matches(txt);
return matches.Select(match => match.ValueSpan[1..^1].ToString()).ToArray(); return matches.Select(match => match.ValueSpan[1..^1].ToString()).ToArray();
} }

View File

@ -0,0 +1,31 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApp2.Helpers
{
public static class ValidateConsole
{
public static void ValidateInput<T>(Func<string,bool> converter,string message)
{
Console.Write(message);
string ? input = Console.ReadLine();
while (true)
{
if (!string.IsNullOrEmpty(input))
{
var result = converter(input);
if (result == false)
{
Console.WriteLine($"输入的内容不合法,请重新输入!");
input = Console.ReadLine();
}
else break;
}
break;
}
}
}
}

View File

@ -0,0 +1,13 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApp2.HostedServices.Abstractions
{
public interface IDataSource:IDisposable
{
public Task DoEnqueue(Action<DataRecord> action);
}
}

View File

@ -15,54 +15,52 @@ namespace ConsoleApp2.HostedServices;
public class InputService : IInputService public class InputService : IInputService
{ {
private readonly ILogger _logger; private readonly ILogger _logger;
private readonly IOptions<CsvOptions> _csvOptions; private readonly IOptions<DataInputOptions> _dataInputOptions;
private readonly IOptions<InputTableOptions> _tableOptions;
private readonly DataRecordQueue _producerQueue; private readonly DataRecordQueue _producerQueue;
private readonly ProcessContext _context; private readonly ProcessContext _context;
public InputService(ILogger<InputService> logger, public InputService(ILogger<InputService> logger,
IOptions<CsvOptions> csvOptions, IOptions<DataInputOptions> dataInputOptions,
[FromKeyedServices(ProcessStep.Producer)]DataRecordQueue producerQueue, IOptions<InputTableOptions> tableOptions,
ProcessContext context) [FromKeyedServices(ProcessStep.Producer)] DataRecordQueue producerQueue,
ProcessContext context)
{ {
_logger = logger; _logger = logger;
_csvOptions = csvOptions; _dataInputOptions = dataInputOptions;
_tableOptions = tableOptions;
_producerQueue = producerQueue; _producerQueue = producerQueue;
_context = context; _context = context;
} }
public async Task ExecuteAsync(CancellationToken cancellationToken) public async Task ExecuteAsync(CancellationToken cancellationToken)
{ {
var inputDir = _csvOptions.Value.InputDir; var inputDir = _dataInputOptions.Value.InputDir;
_logger.LogInformation("***** Csv input service start, working dir: {InputDir}, thread id: {ThreadId} *****", inputDir, Environment.CurrentManagedThreadId); _logger.LogInformation("***** Csv input service start, working dir: {InputDir}, thread id: {ThreadId} *****", inputDir, Environment.CurrentManagedThreadId);
var files = Directory.GetFiles(inputDir).Where(s => s.EndsWith(".sql") && !s.Contains("schema")).ToArray(); var files = Directory.GetFiles(inputDir);
if (files.Length == 0) if (files.Length == 0)
{ {
_logger.LogInformation("No sql files found in {InputDir}", inputDir); _logger.LogInformation("No source files found in {InputDir}", inputDir);
return; return;
} }
var count = 0;
foreach (var sqlPath in files) foreach (var tableName in _tableOptions.Value.TableInfoConfig.Keys)
{ {
_logger.LogInformation("Working sql file: {SqlPath}", sqlPath); _logger.LogInformation("Working table: {tableName}", tableName);
var headers = await DumpDataHelper.GetCsvHeadersFromSqlFileAsync(sqlPath); var source = _dataInputOptions.Value.CreateSource?.Invoke(tableName);
var csvFiles = await DumpDataHelper.GetCsvFileNamesFromSqlFileAsync(sqlPath); await source.DoEnqueue((record) =>
foreach (var csvFile in csvFiles)
{ {
var csvPath = Path.Combine(inputDir, csvFile); _context.AddInput();
// var source = new JsvSource(csvPath, headers, _logger); _producerQueue.Enqueue(record);
var source = new CsvSource(csvPath, headers, _csvOptions.Value.Delimiter, _csvOptions.Value.QuoteChar, _logger); count++;
while (await source.ReadAsync()) });
{ if (_context.GetExceptions().Count > 0)
_context.AddInput(); {
_producerQueue.Enqueue(source.Current); _logger.LogInformation("***** Csv input service is canceled *****");
if (cancellationToken.IsCancellationRequested) return;
return;
}
} }
_logger.LogInformation("table:'{tableName}' input completed", tableName);
_logger.LogInformation("File '{File}' input completed", Path.GetFileName(sqlPath));
} }
_context.CompleteInput(); _context.CompleteInput();

View File

@ -1,30 +1,73 @@
using ConsoleApp2.HostedServices.Abstractions; using ConsoleApp2.HostedServices.Abstractions;
using ConsoleApp2.Services;
using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using System.Threading.Tasks;
namespace ConsoleApp2.HostedServices; namespace ConsoleApp2.HostedServices;
public class MainHostedService : BackgroundService public class MainHostedService : BackgroundService
{ {
private readonly ILogger _logger;
private readonly IInputService _input; private readonly IInputService _input;
private readonly ITransformService _transform; private readonly ITransformService _transform;
private readonly IOutputService _output; private readonly IOutputService _output;
private readonly ProcessContext _context;
public MainHostedService(IInputService input, ITransformService transform, IOutputService output) public MainHostedService(ILogger<MainHostedService> logger, IInputService input, ITransformService transform, IOutputService output, ProcessContext context)
{ {
_logger = logger;
_input = input; _input = input;
_transform = transform; _transform = transform;
_output = output; _output = output;
_context = context;
} }
protected override async Task ExecuteAsync(CancellationToken stoppingToken) protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{ {
var tasks = new List<Task>()
var inputTask = Task.Factory.StartNew(async () =>
{
try
{
await _input.ExecuteAsync(stoppingToken);
}
catch (Exception ex)
{
_context.AddException(ex);
_logger.LogError("Exception occurred on inputService:{Message},{StackTrace}", ex.Message, ex.StackTrace);
}
});
var transformTask = Task.Factory.StartNew(async () =>
{ {
Task.Run(async () => await _input.ExecuteAsync(stoppingToken), stoppingToken), try
Task.Run(async () => await _transform.ExecuteAsync(stoppingToken), stoppingToken), {
Task.Run(async () => await _output.ExecuteAsync(stoppingToken), stoppingToken), await _transform.ExecuteAsync(stoppingToken);
}; }
await Task.WhenAll(tasks); catch (Exception ex)
{
_context.AddException(ex);
_logger.LogError("Exception occurred on transformService:{Message},{StackTrace}", ex.Message, ex.StackTrace);
}
});
var outputTask = Task.Factory.StartNew(async () =>
{
try
{
await _output.ExecuteAsync(stoppingToken);
}
catch (Exception ex)
{
_context.AddException(ex);
_logger.LogError("Exception occurred on outputService:{Message},{StackTrace}", ex.Message, ex.StackTrace);
}
});
// await Task.Run(async () => await _output.ExecuteAsync(stoppingToken), stoppingToken); // await Task.Run(async () => await _output.ExecuteAsync(stoppingToken), stoppingToken);
} }
} }

View File

@ -5,6 +5,8 @@ using ConsoleApp2.Services;
using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options; using Microsoft.Extensions.Options;
using MySqlConnector;
using System.Threading;
namespace ConsoleApp2.HostedServices; namespace ConsoleApp2.HostedServices;
@ -32,36 +34,40 @@ public class OutputService : IOutputService
_taskManager = taskManager; _taskManager = taskManager;
} }
public async Task ExecuteAsync(CancellationToken stoppingToken) public async Task ExecuteAsync(CancellationToken cancellationToken)
{ {
_logger.LogInformation("***** Mysql output service started *****"); _logger.LogInformation("***** Mysql output service started *****");
var count = 0;
var records = new List<DataRecord>(); _taskManager.CreateTasks(async () =>
while (!_context.IsTransformCompleted || _consumerQueue.Count > 0)
{ {
if (!_consumerQueue.TryDequeue(out var record)) continue; var records = new List<DataRecord>();
records.Add(record); while (!_context.IsTransformCompleted || _consumerQueue.Count > 0)
if (records.Count >= _options.Value.FlushCount)
{ {
var recordsCopy = records; if (!_consumerQueue.TryDequeue(out var record)) continue;
_taskManager.CreateTask(async () => await FlushAsync(recordsCopy), stoppingToken); records.Add(record);
records = []; count++;
//_logger.LogInformation(@"*****OutputCount: {count} *****",count);
if (records.Count >= _options.Value.FlushCount)
{
await FlushAsync(records);
records.Clear();
}
if (_context.GetExceptions().Count>0)
{
_logger.LogInformation("***** Csv output service is canceled *****");
return;
}
} }
if (_context.IsTransformCompleted && records.Count > 0)
if (_taskManager.TaskCount >= _options.Value.MaxTask)
{ {
await _taskManager.WaitAll(); await FlushAsync(records);
_taskManager.ClearTask(); records.Clear();
_context.CompleteOutput();
_logger.LogInformation("***** Mysql output service completed *****");
} }
} }, _options.Value.TaskCount);
await _taskManager.WaitAll(); await _taskManager.WaitAll();
await FlushAsync(records);
_context.CompleteOutput();
_logger.LogInformation("***** Mysql output service completed *****");
} }
private async Task FlushAsync(IEnumerable<DataRecord> records) private async Task FlushAsync(IEnumerable<DataRecord> records)
@ -69,15 +75,29 @@ public class OutputService : IOutputService
var count = 0; var count = 0;
await using var output = new MySqlDestination( await using var output = new MySqlDestination(
_options.Value.ConnectionString ?? throw new InvalidOperationException("Connection string is required"), _options.Value.ConnectionString ?? throw new InvalidOperationException("Connection string is required"),
_logger, true); _logger, _context,true);
//if (records == null || records.Count() == 0) return;
//var dbName = $"cferp_test_1";
//if (records != null && records.Count() > 0)
//{
// dbName = $"cferp_test_{records.FirstOrDefault()?.CompanyID}";
//}
//await using var output = new MySqlDestination(new MySqlConnectionStringBuilder
//{
// Server = "127.0.0.1",
// Port = 34309,
// Database = dbName,
// UserID = "root",
// Password = "123456",
// MaximumPoolSize = 50,
//}.ConnectionString, _logger,true);
foreach (var record in records) foreach (var record in records)
{ {
await output.WriteRecordAsync(record); await output.WriteRecordAsync(record);
count++; count++;
} }
await output.FlushAsync(_options.Value.MaxAllowedPacket);
await output.FlushAsync();
_context.AddOutput(count); _context.AddOutput(count);
} }
} }

View File

@ -49,6 +49,7 @@ public class TaskMonitorService : BackgroundService
bool endCheck = false; bool endCheck = false;
while (true) while (true)
{ {
if (_context.GetExceptions().Count>0) return;
EndCheck: EndCheck:
// var running = 0; // var running = 0;
// var error = 0; // var error = 0;

View File

@ -0,0 +1,83 @@
using ConsoleApp2.HostedServices.Abstractions;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using ConsoleApp2.Const;
using ConsoleApp2.Options;
using ConsoleApp2.Services;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using System.Reflection.PortableExecutable;
using System.Collections.Concurrent;
using ConsoleApp2.SimulationService;
namespace ConsoleApp2.HostedServices
{
public class TestInputService : IInputService
{
private readonly ILogger _logger;
private readonly IOptions<CsvOptions> _csvOptions;
private readonly DataRecordQueue _producerQueue;
private readonly ProcessContext _context;
public TestInputService(ILogger<TestInputService> logger,
IOptions<CsvOptions> csvOptions,
[FromKeyedServices(ProcessStep.Producer)] DataRecordQueue producerQueue,
ProcessContext context)
{
_logger = logger;
_csvOptions = csvOptions;
_producerQueue = producerQueue;
_context = context;
}
public async Task ExecuteAsync(CancellationToken cancellationToken)
{
var tableName = "order_item";
var headers = new string[] { "ID","OrderNo","ItemNo","ItemType","RoomID","BoxID","DataID","PlanID","PackageID","Num","CompanyID","ShardKey" };
var dataCount = 1200000000L;
var tempCount = 80000;
var tempRecords=new List<DataRecord>();
var comanyID = 1;
short[] shareKeys = { 23040, 23070, 23100, 24000, 24040, 24070, 24100, 25000, 25040, 25070, 25100 };
int[] companyIds = { 1, 2, 3, 4 };
var sk = shareKeys.First();
var companyID = companyIds.First();
var shareKeyInterval = 20000;
var getShareKeyTimes = 0;
var getCompanyIDTimes = 0;
var shareKeyIntervalCount = 0;
for (long i = 1; i <= dataCount; i++)
{
shareKeyIntervalCount++;
if (shareKeyIntervalCount > shareKeyInterval) {
sk=DataHelper.GetShareKey(getShareKeyTimes);
getShareKeyTimes++;
shareKeyIntervalCount = 0;
}
var fields = new string[] { i.ToString(), "20220104020855", (220105981029 + i).ToString(), "1", "482278", "482279", "3768774", "0", "0", "1", companyID.ToString(), sk.ToString() };
var record = new DataRecord(fields, tableName, headers, comanyID);
tempRecords.Add(record);
if (tempRecords.Count >= tempCount)
{
foreach (var rc in tempRecords)
{
_context.AddInput();
_producerQueue.Enqueue(rc);
if (cancellationToken.IsCancellationRequested)
return;
}
tempRecords.Clear();
companyID = DataHelper. GetCompanyId(getCompanyIDTimes);
getCompanyIDTimes++;
}
}
_context.CompleteInput();
_logger.LogInformation("***** Csv input service completed *****");
}
}
}

View File

@ -36,11 +36,15 @@ public class TransformService : ITransformService
public async Task ExecuteAsync(CancellationToken cancellationToken) public async Task ExecuteAsync(CancellationToken cancellationToken)
{ {
_logger.LogInformation("***** Data transform service started, thread id: {ThreadId} *****", Environment.CurrentManagedThreadId); _logger.LogInformation("***** Data transform service started, thread id: {ThreadId} *****", Environment.CurrentManagedThreadId);
while (!_context.IsInputCompleted || _producerQueue.Count > 0) while ((!_context.IsInputCompleted || _producerQueue.Count > 0))
{ {
if (_context.GetExceptions().Count > 0)
{
_logger.LogInformation("***** Csv transform service is canceled *****");
return;
}
// var dbOptions = _options.Value.DatabaseFilter(record); // var dbOptions = _options.Value.DatabaseFilter(record);
if (!_producerQueue.TryDequeue(out var record)) continue; if (!_producerQueue.TryDequeue(out var record)) continue;
record.Database = _options.Value.DatabaseFilter?.Invoke(record);
for (var i = 0; i < record.Fields.Length; i++) for (var i = 0; i < record.Fields.Length; i++)
{ {
@ -56,20 +60,41 @@ public class TransformService : ITransformService
switch (_options.Value.GetColumnType(record.TableName, record.Headers[i])) switch (_options.Value.GetColumnType(record.TableName, record.Headers[i]))
{ {
case ColumnType.Blob or ColumnType.Text: case ColumnType.Text:
field = string.IsNullOrEmpty(field) ? "''" : $"0x{field}";
field = string.IsNullOrEmpty(field) ? "''" : _options.Value.TransformBinary?.Invoke(field) ?? field; ;
break;
case ColumnType.Blob:
field = string.IsNullOrEmpty(field) ? "NULL" : $"0x{field}";
break; break;
default: default:
break; break;
} }
Escape: Escape:
record[i] = field; record[i] = field;
} }
//过滤不要的record
// TODO: 数据处理/过滤/复制 if (_options.Value.RecordFilter?.Invoke(record) == false) continue;
record.Database = _options.Value.DatabaseFilter?.Invoke(record);
//修改record
_options.Value.RecordModify?.Invoke(record);
//替换record
var replaceRecord = _options.Value.RecordReplace?.Invoke(record);
if (replaceRecord != null)
{
record = replaceRecord;
}
_consumerQueue.Enqueue(record); _consumerQueue.Enqueue(record);
//数据增加
var addRecords=_options.Value.RecordAdd?.Invoke(record);
if(addRecords != null)
{
foreach(var rc in addRecords)
{
_consumerQueue.Enqueue(rc);
}
}
_context.AddTransform(); _context.AddTransform();
} }

View File

@ -0,0 +1,19 @@
using System;
using System.ComponentModel;
using System.Configuration;
namespace ConsoleApp2.Options
{
public class CommandOptions
{
public string InputDir { get; set; } = "./MyDumper";
public int TaskCount { get; set; } = 16;
public int FlushCount { get; set; } = 20000;
public bool Isutf8mb4 { get; set; } = true;
public short OldestShardKey { get; set; } = 23010;
}
}

View File

@ -5,7 +5,7 @@ public class CsvOptions
/// <summary> /// <summary>
/// MyDumper导出的CSV文件目录 /// MyDumper导出的CSV文件目录
/// </summary> /// </summary>
public string InputDir { get; set; } = "./"; //public string InputDir { get; set; } = "./";
/// <summary> /// <summary>
/// 字符串的包围符号,默认为双引号" /// 字符串的包围符号,默认为双引号"

View File

@ -0,0 +1,18 @@
using ConsoleApp2.Services;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Reflection.PortableExecutable;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApp2.Options
{
public enum InputFileType { CSV, JWT, JSV }
public class DataInputOptions
{
public string InputDir { get; set; } = "./";
public Func<string, CsvSource>? CreateSource { get; set; }
}
}

View File

@ -1,4 +1,6 @@
namespace ConsoleApp2.Options; using StackExchange.Redis;
namespace ConsoleApp2.Options;
public enum ColumnType public enum ColumnType
{ {
@ -11,6 +13,13 @@ public class DataTransformOptions
{ {
public Func<DataRecord, string>? DatabaseFilter { get; set; } public Func<DataRecord, string>? DatabaseFilter { get; set; }
public Func<string, string>? TransformBinary { get; set; }//Binary转字符串方法
public Func<DataRecord, bool>? RecordFilter { get; set; }//数据过滤方法
public Action<DataRecord>? RecordModify { get; set; }//数据修改
public Func<DataRecord, DataRecord?>? RecordReplace { get; set; }//数据替换
public Func<DataRecord, IList<DataRecord>?>? RecordAdd { get; set; }//数据替换
/// <summary> /// <summary>
/// 配置导入数据的特殊列 /// 配置导入数据的特殊列
/// </summary> /// </summary>

View File

@ -7,11 +7,13 @@ public class DatabaseOutputOptions
/// </summary> /// </summary>
public string? ConnectionString { get; set; } public string? ConnectionString { get; set; }
/// <summary> /// <summary>
/// 输出服务的最大任务(Task)数 /// 输出服务的任务(Task)数
/// </summary> /// </summary>
public int MaxTask { get; set; } public int TaskCount { get; set; }
/// <summary> /// <summary>
/// 每个任务每次提交到数据库的记录数量每N条构建一次SQL语句 /// 每个任务每次提交到数据库的记录数量每N条构建一次SQL语句
/// </summary> /// </summary>
public int FlushCount { get; set; } public int FlushCount { get; set; }
public int MaxAllowedPacket { get; set; } = 32*1024*1024;
} }

View File

@ -0,0 +1,17 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApp2.Options
{
public class TableInfo
{
public long SimulaRowCount { get; set; }//模拟的记录条数
}
public class InputTableOptions
{
public Dictionary<string, TableInfo> TableInfoConfig { get; set; } = new();
}
}

View File

@ -1,35 +1,227 @@
using ConsoleApp2.Const; using ConsoleApp2;
using ConsoleApp2.Const;
using ConsoleApp2.Helpers;
using ConsoleApp2.HostedServices; using ConsoleApp2.HostedServices;
using ConsoleApp2.HostedServices.Abstractions; using ConsoleApp2.HostedServices.Abstractions;
using ConsoleApp2.Options; using ConsoleApp2.Options;
using ConsoleApp2.Services; using ConsoleApp2.Services;
using ConsoleApp2.SimulationService;
using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
using MySqlConnector; using MySqlConnector;
using Serilog; using Serilog;
using Serilog.Core;
using System.Reflection.PortableExecutable;
// 运行之前把Mysql max_allowed_packets 调大 // 运行之前把Mysql max_allowed_packets 调大
// 运行之前把process_step表的外键删掉 // 运行之前把process_step表的外键删掉
await RunProgram(); await RunProgram();
return; return;
async Task RunProgram() async Task RunProgram()
{ {
//var inputDir= "D:\\MyDumper";
//ValidateConsole.ValidateInput<string>((_inputDir) =>
//{
// if (Directory.Exists(_inputDir))
// {
// inputDir = _inputDir;
// return true;
// }
// else return false;
//}, "请输入读取csv文件的目录(默认为当前目录下MyDumper文件夹):");
//var maxTask = 16;
//ValidateConsole.ValidateInput<string>((_inputDir) =>
//{
// _ = int.TryParse(_inputDir.ToString(), out var _taskCount);
// if (_taskCount > 0) {
// maxTask = _taskCount;
// return true;
// }
// else return false;
//}, "请输入执行输出的线程数量(默认为16):");
//var flushCount = 2_0000;
//ValidateConsole.ValidateInput<string>((_inputDir) =>
//{
// _ = int.TryParse(_inputDir.ToString(), out var _flashCount);
// if (_flashCount > 0)
// {
// flushCount = _flashCount;
// return true;
// } else return false;
//}, "请输入单次插入的行数(默认为20000):");
ThreadPool.SetMaxThreads(200, 200); ThreadPool.SetMaxThreads(200, 200);
var host = Host.CreateApplicationBuilder(); var host = Host.CreateApplicationBuilder(args);
host.Configuration.AddCommandLine(args); var commandOptions = host.Configuration.GetSection("CmdOptions").Get<CommandOptions>() ?? new CommandOptions();
Console.WriteLine($"InputDir:{commandOptions?.InputDir}");
Console.WriteLine($"OutPutFlushCount:{commandOptions?.FlushCount}");
Console.WriteLine($"OutPutTaskCount:{commandOptions?.TaskCount}");
host.Services.Configure<InputTableOptions>(option =>
{
option.TableInfoConfig = new Dictionary<string, TableInfo>
{
//{"order_block_plan_item",new TableInfo{SimulaRowCount=136323566 }},//从order_item表查询然后程序插入
//{"order_package_item",new TableInfo{SimulaRowCount=52525224 }},//从order_item表查询然后程序插入
//{"order_patch_detail",new TableInfo{SimulaRowCount=10 }},//生产没有这个表,不处理
//{"machine",new TableInfo{SimulaRowCount=14655 }},
//{"order",new TableInfo{SimulaRowCount=5019216 }},
//{"order_block_plan",new TableInfo{SimulaRowCount=2725553 }},//CreateTime < 202301的删除
{"order_block_plan_result",new TableInfo{SimulaRowCount=1174096 }},
//{"order_box_block",new TableInfo{SimulaRowCount=29755672 }},
//{"order_data_block",new TableInfo{SimulaRowCount=731800334 }},
//{"order_data_goods",new TableInfo{SimulaRowCount=25803671 }},
//{"order_data_parts",new TableInfo{SimulaRowCount=468517543 }},
//{"order_item",new TableInfo{SimulaRowCount=1345520079 }},
//{"order_module",new TableInfo{SimulaRowCount=103325385 }},
//{"order_module_extra",new TableInfo{SimulaRowCount=54361321 }},
//{"order_module_item",new TableInfo{SimulaRowCount=69173339 }},
//{"order_package",new TableInfo{SimulaRowCount=16196195 }},
//{"order_process",new TableInfo{SimulaRowCount=3892685 }},//orderNo < 202301的
//{"order_process_step",new TableInfo{SimulaRowCount=8050349 }},//orderNo < 202301的删除
//{"order_process_step_item",new TableInfo{SimulaRowCount=14538058 }},//orderNo < 202301的删除
//{"order_scrap_board",new TableInfo{SimulaRowCount=123998 }},
//{"process_group",new TableInfo{SimulaRowCount=1253 }},
//{"process_info",new TableInfo{SimulaRowCount=7839 }},
//{"process_item_exp",new TableInfo{SimulaRowCount=28 }},
//{"process_schdule_capacity",new TableInfo{SimulaRowCount=39736 }},
//{"process_step_efficiency",new TableInfo{SimulaRowCount=8 }},
//{"report_template",new TableInfo{SimulaRowCount=7337 }},
//{"simple_package",new TableInfo{SimulaRowCount=130436 }},//orderNo < 202301的删除
//{"simple_plan_order",new TableInfo{SimulaRowCount=351470 }},//CreateTime < 202301的删除
//{"sys_config",new TableInfo{SimulaRowCount=2296 }},
//{"work_calendar",new TableInfo{SimulaRowCount=11 }},
//{"work_shift",new TableInfo{SimulaRowCount=59 }},
//{"work_time",new TableInfo{SimulaRowCount=62 }},
};
});
host.Services.Configure<CsvOptions>(option => host.Services.Configure<CsvOptions>(option =>
{ {
option.Delimiter = ","; option.Delimiter = ",";
option.QuoteChar = '"'; option.QuoteChar = '"';
option.InputDir = "D:/Dump";
}); });
host.Services.Configure<DataInputOptions>(options =>
{
options.InputDir = commandOptions.InputDir;
var _csvOptions = new CsvOptions { Delimiter = ",", QuoteChar = '"' };
options.CreateSource = (string tableName) =>
{
var source = new ZstSource(commandOptions.InputDir, tableName, _csvOptions.Delimiter, _csvOptions.QuoteChar);
return source;
};
});
host.Services.Configure<DataTransformOptions>(options => host.Services.Configure<DataTransformOptions>(options =>
{ {
options.DatabaseFilter = record => "cferp_test_1"; options.DatabaseFilter = record => "cferp_test";
options.TransformBinary = field => commandOptions != null && commandOptions.Isutf8mb4 ? $"_utf8mb4 0x{field}" : $"0x{field}";
//数据过滤
options.RecordFilter = record =>
{
var index = Array.IndexOf(record.Headers, "ShardKey");
if (index > -1)
{
var skString = record.Fields[index];
short.TryParse(skString, out var sk);
if (sk < commandOptions.OldestShardKey) return false;
}
if (record.TableName == "order_package")
{
var pkNoIndex = Array.IndexOf(record.Headers, "PakageNo");
if (pkNoIndex > -1)
{
var pkNo = record.Fields[pkNoIndex];
if (pkNo.Length <= 2) return false;
}
}
if (record.TableName == "order_block_plan")
{
var orderNosIndex = Array.IndexOf(record.Headers, "OrderNos");
if (orderNosIndex > -1)
{
var pkNo = record.Fields[orderNosIndex];
if (pkNo.Length <= 2) return false;
}
}
return true;
};
//数据修改
options.RecordModify = (record) =>
{
if (record.TableName == "order_process")//修改order_process.NextStepID的默认值为0
{
var nextStepIdIndex = Array.IndexOf(record.Headers, "NextStepID");
if (nextStepIdIndex > -1)
{
var idString = record.Fields[nextStepIdIndex];
if (idString == "\\N")
{
record.Fields[nextStepIdIndex] = "0";
}
}
}
};
//数据替换
options.RecordReplace = (record) =>
{
//删除数据源里simple_plan_order.ProcessState 字段和值
if (record.TableName == "simple_plan_order")//修改order_process.NextStepID的默认值为0
{
var nextStepIdIndex = Array.IndexOf(record.Headers, "ProcessState");
if (nextStepIdIndex > -1)
{
var headers = record.Headers.Where(t => t != "ProcessState").ToArray();
var fs = record.Fields.ToList();
fs.RemoveAt(nextStepIdIndex);
var fields = fs.ToArray();
return new DataRecord(fields, record.TableName, headers, record.CompanyID);
}
}
return null;
};
//数据生成
options.RecordAdd = (record) =>
{
var resultList = new List<DataRecord>();
if(record.TableName == "order_item")
{
var itemIDIndex = Array.IndexOf(record.Headers, "ItemID");
var shardKeyIndex = Array.IndexOf(record.Headers, "ShardKey");
var planIDIndex = Array.IndexOf(record.Headers, "PlanID");
var packageIDIndex = Array.IndexOf(record.Headers, "PackageID");
var companyIDIndex = Array.IndexOf(record.Headers, "CompanyID");
//resultList.Add(new DataRecord(
// new[] { "ItemID", "ShardKey", "PlanID","CompanyID" }, "order_block_plan_item",
// new[] { record.Fields[itemIDIndex], record.Fields[shardKeyIndex], record.Fields[planIDIndex], record.Fields[companyIDIndex] }));
//resultList.Add(
// new DataRecord(new[] { "ItemID", "ShardKey", "PackageID", "CompanyID" }, "order_package_item",
// new[] { record.Fields[itemIDIndex], record.Fields[shardKeyIndex], record.Fields[packageIDIndex], record.Fields[companyIDIndex] }));
}
return resultList;
};
options.ColumnTypeConfig = new() options.ColumnTypeConfig = new()
{ {
{ "simple_plan_order.PlaceData", ColumnType.Blob }, { "simple_plan_order.PlaceData", ColumnType.Blob },
@ -61,20 +253,19 @@ async Task RunProgram()
{ "order_block_plan.BlockInfo", ColumnType.Text }, { "order_block_plan.BlockInfo", ColumnType.Text },
}; };
}); });
host.Services.Configure<DatabaseOutputOptions>(options => host.Services.Configure<DatabaseOutputOptions>(options =>
{ {
options.ConnectionString = new MySqlConnectionStringBuilder options.ConnectionString = new MySqlConnectionStringBuilder
{ {
Server = "127.0.0.1", Server = "127.0.0.1",
Port = 33306, Port = 33309,
Database = "cferp_test_1", Database = "cferp_test",
UserID = "root", UserID = "root",
Password = "123456", Password = "123456",
MaximumPoolSize = 50, // 这个值应当小于 max_connections MaximumPoolSize = 50, // 这个值应当小于 max_connections
}.ConnectionString; }.ConnectionString;
options.MaxTask = 16; options.TaskCount = commandOptions.TaskCount;
options.FlushCount = 200; options.FlushCount = commandOptions.FlushCount;
}); });
host.Services.AddLogging(builder => host.Services.AddLogging(builder =>
{ {
@ -89,10 +280,13 @@ async Task RunProgram()
host.Services.AddHostedService<MainHostedService>(); host.Services.AddHostedService<MainHostedService>();
host.Services.AddHostedService<TaskMonitorService>(); host.Services.AddHostedService<TaskMonitorService>();
host.Services.AddSingleton<IInputService, InputService>(); host.Services.AddSingleton<IInputService, SimulationInputService>();
host.Services.AddSingleton<ITransformService, TransformService>(); host.Services.AddSingleton<ITransformService, TransformService>();
host.Services.AddSingleton<IOutputService, OutputService>(); host.Services.AddSingleton<IOutputService, OutputService>();
host.Services.AddStackExchangeRedisCache(options =>
{
options.Configuration = "localhost:6379";
});
var app = host.Build(); var app = host.Build();
await app.RunAsync(); await app.RunAsync();
} }

View File

@ -1,5 +1,7 @@
using System.Text; using System.Text;
using System.Text.RegularExpressions;
using ConsoleApp2.Helpers; using ConsoleApp2.Helpers;
using ConsoleApp2.HostedServices.Abstractions;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
namespace ConsoleApp2.Services; namespace ConsoleApp2.Services;
@ -7,45 +9,52 @@ namespace ConsoleApp2.Services;
/// <summary> /// <summary>
/// CSV文件读取 /// CSV文件读取
/// </summary> /// </summary>
public class CsvSource public class CsvSource:IDataSource
{ {
private readonly string _filePath; protected readonly string _inputDir;
private readonly StreamReader _reader; //protected readonly StreamReader _reader;
private readonly ILogger? _logger; private readonly ILogger? _logger;
private readonly string _tableName; protected readonly string _tableName;
protected string _sqlFilePath;
protected readonly string _sqlFileText;
public DataRecord Current { get; private set; } //public DataRecord Current { get; protected set; }
public string[]? Headers { get; } //public string[]? Headers { get; }
public string? CurrentRaw { get; private set; } public string? CurrentRaw { get; protected set; }
public string Delimiter { get; private set; } public string Delimiter { get; private set; }
public char QuoteChar { get; private set; } public char QuoteChar { get; private set; }
public CsvSource(string filePath, string[]? headers = null, string delimiter = ",", char quoteChar = '"', public CsvSource(string inputDir,string tableName,string delimiter = ",", char quoteChar = '"',
ILogger? logger = null) ILogger? logger = null)
{ {
_filePath = filePath; _inputDir = inputDir;
Headers = headers; _tableName = tableName;
//Headers = headers;
_logger = logger; _logger = logger;
Delimiter = delimiter; Delimiter = delimiter;
QuoteChar = quoteChar; QuoteChar = quoteChar;
var fs = File.OpenRead(filePath); //var fs = File.OpenRead(filePath);
_reader = new StreamReader(fs); //_reader = new StreamReader(fs);
_tableName = DumpDataHelper.GetTableName(filePath); //_tableName = DumpDataHelper.GetTableName(filePath);
string pattern = $"^.*\\.{tableName}\\..*\\.sql$";
_sqlFilePath = Directory.GetFiles(_inputDir).FirstOrDefault(s => Regex.Match(s, pattern).Success);
} }
public async ValueTask<bool> ReadAsync() //public virtual async ValueTask<bool> ReadAsync()
{ //{
var str = await _reader.ReadLineAsync(); // var str = await _reader.ReadLineAsync();
if (string.IsNullOrWhiteSpace(str)) // if (string.IsNullOrWhiteSpace(str))
return false; // return false;
CurrentRaw = str; // CurrentRaw = str;
var fields = ParseRow2(str, QuoteChar, Delimiter); // var fields = ParseRow2(str, QuoteChar, Delimiter);
Current = new DataRecord(fields, _tableName, Headers); // Current = new DataRecord(fields, _tableName, Headers);
return true; // return true;
} //}
public string[] ParseRow(string row, char quoteChar, string delimiter) public string[] ParseRow(string row, char quoteChar, string delimiter)
{ {
@ -136,4 +145,64 @@ public class CsvSource
result.Add(current.ToString()); result.Add(current.ToString());
return result.ToArray(); return result.ToArray();
} }
public virtual async Task<string[]> GetHeaders()
{
var text = await File.ReadAllTextAsync(_sqlFilePath);
return await DumpDataHelper.GetCsvHeadersFromSqlFileAsync(text);
}
public virtual async Task<string[]> GetCsvFiles()
{
var text= await File.ReadAllTextAsync(_sqlFilePath);
return await DumpDataHelper.GetCsvFileNamesFromSqlFileAsync(text,new Regex(@"'.+\.dat'"));
}
public virtual async Task DoEnqueue(Action<DataRecord> action)
{
var sourceFiles =await GetCsvFiles();
foreach (var file in sourceFiles)
{
var headers = await GetHeaders();
var filePath= Path.Combine(_inputDir, file);
using (var fs = File.OpenRead(filePath))
{
using (StreamReader sr = new StreamReader(fs))
{
while (!sr.EndOfStream)
{
var line = await sr.ReadLineAsync();
var fields = ParseRow2(line, QuoteChar, Delimiter);
var record = new DataRecord(fields, _tableName, headers);
action?.Invoke(record);
}
}
}
}
}
public virtual async Task<DataRecord?> GetTestRecord()
{
var sourceFiles = await GetCsvFiles();
var file = sourceFiles.FirstOrDefault();
if (file != null)
{
var headers = await GetHeaders();
var filePath = Path.Combine(_inputDir, file);
using (var fs = File.OpenRead(filePath))
{
using (StreamReader sr = new StreamReader(fs))
{
var line = await sr.ReadLineAsync();
var fields = ParseRow2(line, QuoteChar, Delimiter);
var record = new DataRecord(fields, _tableName, headers);
return record;
}
}
}
return null;
}
public void Dispose()
{
// _reader.Dispose();
}
} }

View File

@ -19,7 +19,7 @@ public class DataRecordQueue : IDisposable
public DataRecordQueue() public DataRecordQueue()
{ {
_queue = new BlockingCollection<DataRecord>(200_000); // 队列最长为20W条记录 _queue = new BlockingCollection<DataRecord>(2000_000); // 队列最长为20W条记录
} }
public bool TryDequeue([MaybeNullWhen(false)] out DataRecord record) public bool TryDequeue([MaybeNullWhen(false)] out DataRecord record)

View File

@ -1,4 +1,5 @@
using ConsoleApp2.Helpers; using ConsoleApp2.Helpers;
using ConsoleApp2.HostedServices.Abstractions;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
using ServiceStack.Text; using ServiceStack.Text;
@ -8,9 +9,9 @@ namespace ConsoleApp2.Services;
/// 读取Jsv格式文件 /// 读取Jsv格式文件
/// </summary> /// </summary>
[Obsolete] [Obsolete]
public class JsvSource : IDisposable public class JsvSource:IDataSource
{ {
private readonly string _filePath; private readonly string _inputDir;
private readonly JsvStringSerializer _jsv; private readonly JsvStringSerializer _jsv;
private readonly StreamReader _reader; private readonly StreamReader _reader;
// ReSharper disable once PrivateFieldCanBeConvertedToLocalVariable // ReSharper disable once PrivateFieldCanBeConvertedToLocalVariable
@ -21,29 +22,22 @@ public class JsvSource : IDisposable
public string[]? Headers { get; } public string[]? Headers { get; }
public bool EndOfSource => _reader.EndOfStream; public bool EndOfSource => _reader.EndOfStream;
public JsvSource(string filePath, string[]? headers = null, ILogger? logger = null) public JsvSource(string inputDir,string tableName, ILogger? logger = null)
{ {
_filePath = filePath; _inputDir = inputDir;
_tableName = tableName;
_jsv = new JsvStringSerializer(); _jsv = new JsvStringSerializer();
_reader = new StreamReader(filePath); // _reader = new StreamReader(filePath);
Headers = headers; //Headers = headers;
_logger = logger; _logger = logger;
// _logger?.LogInformation("Reading file: {FilePath}", filePath); // _logger?.LogInformation("Reading file: {FilePath}", filePath);
_tableName = DumpDataHelper.GetTableName(filePath); //_tableName = DumpDataHelper.GetTableName(filePath);
} }
public async Task DoEnqueue(Action<DataRecord> action)
public async ValueTask<bool> ReadAsync()
{ {
var str = await _reader.ReadLineAsync();
if (string.IsNullOrEmpty(str))
return false;
var fields = _jsv.DeserializeFromString<string[]>(str);
Current = new DataRecord(fields, _tableName, Headers);
return true;
} }
public void Dispose() public void Dispose()
{ {
_reader.Dispose(); _reader.Dispose();
} }

View File

@ -2,6 +2,7 @@
using ConsoleApp2.Helpers; using ConsoleApp2.Helpers;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
using MySqlConnector; using MySqlConnector;
using ServiceStack;
namespace ConsoleApp2.Services; namespace ConsoleApp2.Services;
@ -14,16 +15,21 @@ public class MySqlDestination : IDisposable, IAsyncDisposable
private readonly MySqlConnection _conn; private readonly MySqlConnection _conn;
private readonly ILogger _logger; private readonly ILogger _logger;
private readonly bool _prettyOutput; private readonly bool _prettyOutput;
private readonly int _maxAllowPacket;
public MySqlDestination(string connStr, ILogger logger, bool prettyOutput = false) private readonly ProcessContext _context;
private static StringBuilder recordSb = new StringBuilder();
public MySqlDestination(string connStr, ILogger logger, ProcessContext context,bool prettyOutput = false)
{ {
_conn = new MySqlConnection(connStr); _conn = new MySqlConnection(connStr);
_conn.Open(); _conn.Open();
_recordCache = new Dictionary<string, IList<DataRecord>>(); _recordCache = new Dictionary<string, IList<DataRecord>>();
_logger = logger; _logger = logger;
_context = context;
_prettyOutput = prettyOutput; _prettyOutput = prettyOutput;
}
}
public Task WriteRecordAsync(DataRecord record) public Task WriteRecordAsync(DataRecord record)
{ {
_recordCache.AddOrUpdate(record.TableName, [record], (key, value) => _recordCache.AddOrUpdate(record.TableName, [record], (key, value) =>
@ -42,74 +48,113 @@ public class MySqlDestination : IDisposable, IAsyncDisposable
} }
} }
public async Task FlushAsync() public async Task FlushAsync(int maxAllowPacket)
{ {
if (_recordCache.Count == 0) if (_recordCache.Count == 0)
return; return;
var cmd = _conn.CreateCommand(); //var cmd = _conn.CreateCommand();
cmd.CommandText = SerializeRecords(_recordCache, _prettyOutput); //cmd.CommandTimeout = 3 * 60;
try try
{ {
await cmd.ExecuteNonQueryAsync(); var excuseList = GetExcuseList(_recordCache, maxAllowPacket, _prettyOutput);
//foreach (var insertSql in excuseList)
//{
// //cmd.CommandText = insertSql;
// //await cmd.ExecuteNonQueryAsync();
// //_logger.LogInformation(@"do insert completed!size:{Length}", cmd.CommandText.Length);
//}
_recordCache.Clear(); _recordCache.Clear();
} }
catch (Exception e) catch (Exception e)
{ {
_logger.LogCritical(e, "Error when flushing records, sql: {Sql}", cmd.CommandText.Omit(1000)); //_logger.LogCritical(e, "Error when flushing records, sql: {Sql}", cmd.CommandText.Omit(1000));
_context.AddException(e);
throw; throw;
} }
finally finally
{ {
await cmd.DisposeAsync(); //await cmd.DisposeAsync();
} }
} }
public static string SerializeRecords(IDictionary<string, IList<DataRecord>> tableRecords, public static IList<string> GetExcuseList(IDictionary<string, IList<DataRecord>> tableRecords,int maxAllowPacket,
bool prettyOutput = false) bool prettyOutput = false)
{ {
var sb = new StringBuilder(); var resultList = new List<string>();
var headerSb = string.Empty;
//var recordSb = new StringBuilder();
recordSb.Clear();
foreach (var (tableName, records) in tableRecords) foreach (var (tableName, records) in tableRecords)
{ {
if (records.Count == 0) if (records.Count == 0)
continue; continue;
sb.Append($"INSERT INTO `{tableName}`("); headerSb=$"INSERT INTO `{tableName}`(";
for (var i = 0; i < records[0].Headers.Length; i++) for (var i = 0; i < records[0].Headers.Length; i++)
{ {
var header = records[0].Headers[i]; var header = records[0].Headers[i];
sb.Append($"`{header}`"); headerSb+=$"`{header}`";
if (i != records[0].Headers.Length - 1) if (i != records[0].Headers.Length - 1)
sb.Append(','); headerSb.Append(',');
} }
sb.Append(") VALUES "); headerSb+=") VALUES ";
if (prettyOutput) if (prettyOutput)
sb.AppendLine(); headerSb+="/r/n";
var sbList = new List<string>();
var currentLength = headerSb.Length;
for (var i = 0; i < records.Count; i++) for (var i = 0; i < records.Count; i++)
{ {
var record = records[i]; var record = records[i];
sb.Append('('); recordSb.Append('(');
for (var j = 0; j < record.Fields.Length; j++) for (var j = 0; j < record.Fields.Length; j++)
{ {
var field = record.Fields[j]; var field = record.Fields[j];
sb.Append(field); recordSb.Append(field);
if (j != record.Fields.Length - 1) if (j != record.Fields.Length - 1)
sb.Append(','); recordSb.Append(',');
} }
sb.Append(')'); recordSb.Append(')');
if (i != records.Count - 1) // not last field //if (i != records.Count - 1) // not last field
sb.Append(','); // recordSb.Append(',');
if (prettyOutput) sb.AppendLine(); if (prettyOutput) recordSb.AppendLine();
if (currentLength + recordSb.Length >= maxAllowPacket)
{
var insertSb = headerSb;
insertSb+=string.Join(",", sbList);
insertSb += ";";
resultList.Add(insertSb);
insertSb=String.Empty;
sbList.Clear();
currentLength = headerSb.Length;
sbList.Add(recordSb.ToString());
}
else
{
sbList.Add(recordSb.ToString());
}
currentLength += recordSb.Length;
recordSb.Clear();
} }
if (sbList.Count > 0)
sb.AppendLine(";"); {
var insertSb = headerSb.ToString();
insertSb += string.Join(",", sbList);
insertSb += ";";
resultList.Add(insertSb.ToString());
insertSb=string.Empty;
}
headerSb=string.Empty;
} }
return sb.ToString(); return resultList;
} }

View File

@ -8,6 +8,7 @@ public class ProcessContext
private int _inputCount; private int _inputCount;
private int _transformCount; private int _transformCount;
private int _outputCount; private int _outputCount;
private IList<Exception> _exceptionList = new List<Exception>();
public bool IsInputCompleted { get; private set; } public bool IsInputCompleted { get; private set; }
public bool IsTransformCompleted { get; private set; } public bool IsTransformCompleted { get; private set; }
public bool IsOutputCompleted { get; private set; } public bool IsOutputCompleted { get; private set; }
@ -29,7 +30,14 @@ public class ProcessContext
get => _outputCount; get => _outputCount;
private set => _outputCount = value; private set => _outputCount = value;
} }
public void AddException(Exception ex)
{
_exceptionList.Add(ex);
}
public IList<Exception> GetExceptions()
{
return _exceptionList;
}
public void CompleteInput() => IsInputCompleted = true; public void CompleteInput() => IsInputCompleted = true;
public void CompleteTransform() => IsTransformCompleted = true; public void CompleteTransform() => IsTransformCompleted = true;

View File

@ -27,7 +27,13 @@ public class TaskManager
_tasks.Add(task); _tasks.Add(task);
_logger.LogDebug("New task created"); _logger.LogDebug("New task created");
} }
public void CreateTasks<TResult>(Func<TResult> func,int taskCount, CancellationToken cancellationToken = default)
{
for (int i = 0; i < taskCount; i++)
{
CreateTask(func, cancellationToken);
}
}
public async Task WaitAll() public async Task WaitAll()
{ {
await Task.WhenAll(_tasks); await Task.WhenAll(_tasks);

View File

@ -0,0 +1,126 @@
using ConsoleApp2.Helpers;
using Microsoft.Extensions.Logging;
using System.IO;
using System.Text.RegularExpressions;
using ZstdSharp;
namespace ConsoleApp2.Services
{
public class ZstSource : CsvSource
{
public ZstSource(string inputDir, string tableName, string delimiter = ",", char quoteChar = '"',
ILogger? logger = null) : base(inputDir, tableName, delimiter = ",", quoteChar = '"', logger = null)
{
//throw new Exception("aaa");
string pattern = $"^.*\\.{tableName}\\..*\\.sql.zst$";
_sqlFilePath = Directory.GetFiles(_inputDir).FirstOrDefault(s => Regex.Match(s, pattern).Success);
}
private async Task<string> DecompressFile(string filePath)
{
using (var input = File.OpenRead(filePath))
{
using (var decopress = new DecompressionStream(input))
{
var ms = new MemoryStream();
decopress.CopyTo(ms);
ms.Seek(0, SeekOrigin.Begin);
StreamReader reader = new StreamReader(ms);
var text = await reader.ReadToEndAsync();
return text;
}
}
}
public override async Task<string[]> GetHeaders()
{
var text = await DecompressFile(_sqlFilePath);
return await DumpDataHelper.GetCsvHeadersFromSqlFileAsync(text);
}
public override async Task<string[]> GetCsvFiles()
{
var text = await DecompressFile(_sqlFilePath);
return await DumpDataHelper.GetCsvFileNamesFromSqlFileAsync(text, new Regex(@"'.+\.dat.zst'"));
}
public override async Task DoEnqueue(Action<DataRecord> action)
{
var sourceFiles = await GetCsvFiles();
var headers = await GetHeaders();
foreach (var file in sourceFiles)
{
var filePath = Path.Combine(_inputDir, file);
using (var input = File.OpenRead(filePath))
{
using (var decopress = new DecompressionStream(input))
{
var ms = new MemoryStream();
decopress.CopyTo(ms);
ms.Seek(0, SeekOrigin.Begin);
StreamReader reader = new StreamReader(ms);
while (!reader.EndOfStream)
{
var line = await reader.ReadLineAsync();
var fields = ParseRow2(line, QuoteChar, Delimiter);
var record = new DataRecord(fields, _tableName, headers);
action?.Invoke(record);
}
}
}
//var headers = await GetHeaders();
//using (StreamReader sr = new StreamReader(file))
//{
// while (!sr.EndOfStream)
// {
// var line = await sr.ReadLineAsync();
// var fields = ParseRow2(line, QuoteChar, Delimiter);
// var record = new DataRecord(fields, _tableName, headers);
// action?.Invoke(record);
// }
//}
}
}
public override async Task<DataRecord?> GetTestRecord()
{
var sourceFiles = await GetCsvFiles();
var file = sourceFiles.FirstOrDefault();
if (file != null)
{
var headers = await GetHeaders();
var filePath = Path.Combine(_inputDir, file);
using (var input = File.OpenRead(filePath))
{
using (var decopress = new DecompressionStream(input))
{
var ms = new MemoryStream();
decopress.CopyTo(ms);
ms.Seek(0, SeekOrigin.Begin);
StreamReader reader = new StreamReader(ms);
var line = await reader.ReadLineAsync();
var fields = ParseRow2(line, QuoteChar, Delimiter);
var record = new DataRecord(fields, _tableName, headers);
return record;
}
}
//using (var fs = File.OpenRead(filePath))
//{
// using (StreamReader sr = new StreamReader(fs))
// {
// var line = await sr.ReadLineAsync();
// var fields = ParseRow2(line, QuoteChar, Delimiter);
// var record = new DataRecord(fields, _tableName, headers);
// return record;
// }
//}
}
return null;
}
public void Dispose()
{
//_reader.Dispose();
}
}
}

View File

@ -0,0 +1,28 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace ConsoleApp2.SimulationService
{
public static partial class DataHelper
{
public static short[] shareKeys = {23000, 23040, 23070, 23100, 24000, 24040, 24070, 24100, 25000, 25040, 25070, 25100 };
public static int[] companyIds = { 1, 2, 3, 4 };
private static T getArrayValue<T>(int index, T[] array)//按index取数据,超过数组长度,index从0开始再取
{
return array[index % array.Length];
}
public static short GetShareKey(int index)
{
return getArrayValue(index, shareKeys);
}
public static int GetCompanyId(int index)
{
return getArrayValue(index, companyIds);
}
}
}

View File

@ -0,0 +1,173 @@
using ConsoleApp2.Const;
using ConsoleApp2.Helpers;
using ConsoleApp2.HostedServices;
using ConsoleApp2.HostedServices.Abstractions;
using ConsoleApp2.Options;
using ConsoleApp2.Services;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using System.Text.RegularExpressions;
namespace ConsoleApp2.SimulationService
{
public class SimulationInputService : IInputService
{
private readonly ILogger _logger;
private readonly IOptions<DataInputOptions> _dataInputOptions;
private readonly IOptions<InputTableOptions> _tableOptions;
private readonly DataRecordQueue _producerQueue;
private readonly ProcessContext _context;
public SimulationInputService(ILogger<InputService> logger,
IOptions<DataInputOptions> dataInputOptions,
IOptions<InputTableOptions> tableOptions,
[FromKeyedServices(ProcessStep.Producer)] DataRecordQueue producerQueue,
ProcessContext context)
{
_logger = logger;
_dataInputOptions = dataInputOptions;
_tableOptions = tableOptions;
_producerQueue = producerQueue;
_context = context;
}
public async Task ExecuteAsync(CancellationToken cancellationToken)
{
var inputDir = _dataInputOptions.Value.InputDir;
_logger.LogInformation("***** simulation input service start, working dir: {InputDir}, thread id: {ThreadId} *****", inputDir, Environment.CurrentManagedThreadId);
var files = Directory.GetFiles(inputDir);
if (files.Length == 0)
{
_logger.LogInformation("No source files found in {InputDir}", inputDir);
return;
}
foreach (var tableName in _tableOptions.Value.TableInfoConfig.Keys)
{
//_logger.LogInformation("Working sql file: {SqlPath}", sqlPath);
//var headers = await DumpDataHelper.GetCsvHeadersFromSqlFileAsync(sqlPath);
//var sqlFileSource = _dataInputOptions.Value.CreateSource?.Invoke(sqlPath,null);
//var headers =await sqlFileSource?.GetHeaders();
//var csvFiles = await DumpDataHelper.GetCsvFileNamesFromSqlFileAsync(sqlPath);
//var csvFiles =await sqlFileSource?.GetCsvFiles();
//foreach (var csvFile in csvFiles)
//{
//var csvPath = Path.Combine(inputDir, csvFile);
//// var source = new JsvSource(csvPath, headers, _logger);
//var source = new CsvSource(csvPath, headers, _csvOptions.Value.Delimiter, _csvOptions.Value.QuoteChar, _logger);
//while (await source.ReadAsync())
//{
// _context.AddInput();
// _producerQueue.Enqueue(source.Current);
// if (cancellationToken.IsCancellationRequested)
// return;
//}
//var csvPath = Path.Combine(inputDir, csvFile);
//var tableName = DumpDataHelper.GetTableName(csvPath);
//var dataCount = 1200000000L;//当前表要生成的总数据量
var dataCount = _tableOptions.Value.TableInfoConfig[tableName].SimulaRowCount;//当前表要生成的总数据量
var companyTotallCount = 1000;//当前表每个公司生成的总数据量
var tempRecords = new List<DataRecord>();
var sk = DataHelper.shareKeys.First();
var companyID = DataHelper.companyIds.First();
var shareKeyInterval = 20000;//每个sharekey的数据量
var getShareKeyTimes = 0;//sharekey生成的次数,每生成一次改变sharekey的值
var getCompanyIDTimes = 0;//公司生成的次数,每生成一次改变companyID的值
var shareKeyIntervalCount = 0;
//CsvSource source;
//switch (_dataInputOptions.Value.FileType)
//{
// case InputFileType.CSV:
// source=new CsvSource(csvPath, headers, _csvOptions.Value.Delimiter, _csvOptions.Value.QuoteChar, _logger);
// break;
// case InputFileType.JWT:
// source = new JwtSource(csvPath, headers, _csvOptions.Value.Delimiter, _csvOptions.Value.QuoteChar, _logger);
// break;
// default: break;
//}
//var source = new JwtSource(csvPath, headers, _csvOptions.Value.Delimiter, _csvOptions.Value.QuoteChar, _logger);
var source = _dataInputOptions.Value.CreateSource?.Invoke(tableName);
var testRecord =await source.GetTestRecord();
for (long i = 1; i <= dataCount; i++)
{
shareKeyIntervalCount++;
if (shareKeyIntervalCount > shareKeyInterval)
{
sk = DataHelper.GetShareKey(getShareKeyTimes);
getShareKeyTimes++;
shareKeyIntervalCount = 0;
}
var fields = new string[testRecord.Fields.Length];
Array.Copy(testRecord.Fields, fields, testRecord.Fields.Length);
var record = new DataRecord(fields, testRecord.TableName, testRecord.Headers, companyID);
//更新record的ID、OrderNo,ShardKey值
if (record.Headers.Contains("ID"))
{
var index = Array.IndexOf(record.Headers, "ID");
if (index > -1)
{
record.Fields[index] = i.ToString();
}
}
if (record.TableName == "order_box_block" && record.Headers.Contains("BoxID"))
{
var index = Array.IndexOf(record.Headers, "BoxID");
if (index > -1)
{
record.Fields[index] = i.ToString();
}
}
if ((record.TableName == "order_block_plan_item" || record.TableName == "order_package_item") && record.Headers.Contains("ItemID"))
{
var index = Array.IndexOf(record.Headers, "ItemID");
if (index > -1)
{
record.Fields[index] = i.ToString();
}
}
if (record.TableName == "order" && record.Headers.Contains("OrderNo"))
{
var index = Array.IndexOf(record.Headers, "OrderNo");
if (index > -1)
{
record.Fields[index] = i.ToString();
}
}
if (record.Headers.Contains("ShardKey"))
{
var index = Array.IndexOf(record.Headers, "ShardKey");
if (index > -1)
{
record.Fields[index] = sk.ToString();
}
}
tempRecords.Add(record);
if (tempRecords.Count >= companyTotallCount || i >= dataCount - 1)
{
foreach (var rc in tempRecords)
{
_context.AddInput();
_producerQueue.Enqueue(rc);
if (cancellationToken.IsCancellationRequested)
return;
}
tempRecords.Clear();
companyID = DataHelper.GetCompanyId(getCompanyIDTimes);
getCompanyIDTimes++;
}
}
_logger.LogInformation("table:'{tableName}' simulation input completed", tableName);
//}
//_logger.LogInformation("File '{File}' input completed", Path.GetFileName(sqlPath));
}
_context.CompleteInput();
_logger.LogInformation("***** Csv input service completed *****");
}
}
}

View File

@ -0,0 +1,10 @@
{
"CmdOptions": {
"InputFileType": "CSV",
"InputDir": "D:/MyDumper-ZST",
"TaskCount": 1,
"FlushCount": 100,
"Isutf8mb4": true,
"OldestShardKey": 22000
}
}