This commit is contained in:
lindj 2024-01-12 16:50:37 +08:00
parent eab3695f53
commit 0984853c79
28 changed files with 1115 additions and 166 deletions

View File

@ -9,12 +9,24 @@
</PropertyGroup>
<ItemGroup>
<None Remove="appsettings.json" />
</ItemGroup>
<ItemGroup>
<Content Include="appsettings.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Caching.StackExchangeRedis" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="MySqlConnector" Version="2.3.3" />
<PackageReference Include="Serilog" Version="3.1.2-dev-02097" />
<PackageReference Include="Serilog.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
<PackageReference Include="ServiceStack.Text" Version="8.0.0" />
<PackageReference Include="ZstdSharp.Port" Version="0.7.4" />
</ItemGroup>
</Project>

View File

@ -1,4 +1,6 @@
namespace ConsoleApp2;
using System.ComponentModel.Design;
namespace ConsoleApp2;
public class DataRecord
@ -33,9 +35,10 @@ public class DataRecord
public string TableName { get; }
public string? Database { get; set; }
public int CompanyID { get; set; }
public DataRecord(string[] fields, string tableName, string[] headers)
public DataRecord(string[] fields, string tableName, string[] headers, int companyID=0)
{
if (fields.Length != headers.Length)
throw new ArgumentException(
@ -45,6 +48,7 @@ public class DataRecord
Fields = fields;
TableName = tableName;
Headers = headers;
CompanyID = companyID;
}
public string this[int index]

View File

@ -1,4 +1,5 @@
using System.Text;
using ConsoleApp2.Options;
using System.Text;
using System.Text.RegularExpressions;
namespace ConsoleApp2.Helpers;
@ -11,9 +12,9 @@ public static partial class DumpDataHelper
private static partial Regex MatchBrackets();
public static async Task<string[]> GetCsvHeadersFromSqlFileAsync(string filePath)
public static async Task<string[]> GetCsvHeadersFromSqlFileAsync(string txt)
{
var txt = await File.ReadAllTextAsync(filePath);
//var txt = await File.ReadAllTextAsync(filePath);
var match = MatchBrackets().Match(txt);
return ParseHeader(match.ValueSpan);
@ -59,10 +60,10 @@ public static partial class DumpDataHelper
return filePath[(firstDotIdx+1)..secondDotIdx].ToString();
}
public static async Task<string[]> GetCsvFileNamesFromSqlFileAsync(string filePath)
public static async Task<string[]> GetCsvFileNamesFromSqlFileAsync(string txt,Regex regex)
{
var txt = await File.ReadAllTextAsync(filePath);
var matches = MatchDatFile().Matches(txt);
//var txt = await File.ReadAllTextAsync(filePath);
var matches = regex.Matches(txt);
return matches.Select(match => match.ValueSpan[1..^1].ToString()).ToArray();
}

View File

@ -0,0 +1,31 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApp2.Helpers
{
public static class ValidateConsole
{
public static void ValidateInput<T>(Func<string,bool> converter,string message)
{
Console.Write(message);
string ? input = Console.ReadLine();
while (true)
{
if (!string.IsNullOrEmpty(input))
{
var result = converter(input);
if (result == false)
{
Console.WriteLine($"输入的内容不合法,请重新输入!");
input = Console.ReadLine();
}
else break;
}
break;
}
}
}
}

View File

@ -0,0 +1,13 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApp2.HostedServices.Abstractions
{
public interface IDataSource:IDisposable
{
public Task DoEnqueue(Action<DataRecord> action);
}
}

View File

@ -15,54 +15,52 @@ namespace ConsoleApp2.HostedServices;
public class InputService : IInputService
{
private readonly ILogger _logger;
private readonly IOptions<CsvOptions> _csvOptions;
private readonly IOptions<DataInputOptions> _dataInputOptions;
private readonly IOptions<InputTableOptions> _tableOptions;
private readonly DataRecordQueue _producerQueue;
private readonly ProcessContext _context;
public InputService(ILogger<InputService> logger,
IOptions<CsvOptions> csvOptions,
[FromKeyedServices(ProcessStep.Producer)]DataRecordQueue producerQueue,
ProcessContext context)
IOptions<DataInputOptions> dataInputOptions,
IOptions<InputTableOptions> tableOptions,
[FromKeyedServices(ProcessStep.Producer)] DataRecordQueue producerQueue,
ProcessContext context)
{
_logger = logger;
_csvOptions = csvOptions;
_dataInputOptions = dataInputOptions;
_tableOptions = tableOptions;
_producerQueue = producerQueue;
_context = context;
}
public async Task ExecuteAsync(CancellationToken cancellationToken)
{
var inputDir = _csvOptions.Value.InputDir;
var inputDir = _dataInputOptions.Value.InputDir;
_logger.LogInformation("***** Csv input service start, working dir: {InputDir}, thread id: {ThreadId} *****", inputDir, Environment.CurrentManagedThreadId);
var files = Directory.GetFiles(inputDir).Where(s => s.EndsWith(".sql") && !s.Contains("schema")).ToArray();
var files = Directory.GetFiles(inputDir);
if (files.Length == 0)
{
_logger.LogInformation("No sql files found in {InputDir}", inputDir);
_logger.LogInformation("No source files found in {InputDir}", inputDir);
return;
}
foreach (var sqlPath in files)
var count = 0;
foreach (var tableName in _tableOptions.Value.TableInfoConfig.Keys)
{
_logger.LogInformation("Working sql file: {SqlPath}", sqlPath);
var headers = await DumpDataHelper.GetCsvHeadersFromSqlFileAsync(sqlPath);
var csvFiles = await DumpDataHelper.GetCsvFileNamesFromSqlFileAsync(sqlPath);
foreach (var csvFile in csvFiles)
_logger.LogInformation("Working table: {tableName}", tableName);
var source = _dataInputOptions.Value.CreateSource?.Invoke(tableName);
await source.DoEnqueue((record) =>
{
var csvPath = Path.Combine(inputDir, csvFile);
// var source = new JsvSource(csvPath, headers, _logger);
var source = new CsvSource(csvPath, headers, _csvOptions.Value.Delimiter, _csvOptions.Value.QuoteChar, _logger);
_context.AddInput();
_producerQueue.Enqueue(record);
count++;
while (await source.ReadAsync())
{
_context.AddInput();
_producerQueue.Enqueue(source.Current);
if (cancellationToken.IsCancellationRequested)
return;
}
});
if (_context.GetExceptions().Count > 0)
{
_logger.LogInformation("***** Csv input service is canceled *****");
return;
}
_logger.LogInformation("File '{File}' input completed", Path.GetFileName(sqlPath));
_logger.LogInformation("table:'{tableName}' input completed", tableName);
}
_context.CompleteInput();

View File

@ -1,30 +1,73 @@
using ConsoleApp2.HostedServices.Abstractions;
using ConsoleApp2.Services;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using System.Threading.Tasks;
namespace ConsoleApp2.HostedServices;
public class MainHostedService : BackgroundService
{
private readonly ILogger _logger;
private readonly IInputService _input;
private readonly ITransformService _transform;
private readonly IOutputService _output;
private readonly ProcessContext _context;
public MainHostedService(IInputService input, ITransformService transform, IOutputService output)
public MainHostedService(ILogger<MainHostedService> logger, IInputService input, ITransformService transform, IOutputService output, ProcessContext context)
{
_logger = logger;
_input = input;
_transform = transform;
_output = output;
_context = context;
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
var tasks = new List<Task>()
var inputTask = Task.Factory.StartNew(async () =>
{
try
{
await _input.ExecuteAsync(stoppingToken);
}
catch (Exception ex)
{
_context.AddException(ex);
_logger.LogError("Exception occurred on inputService:{Message},{StackTrace}", ex.Message, ex.StackTrace);
}
});
var transformTask = Task.Factory.StartNew(async () =>
{
Task.Run(async () => await _input.ExecuteAsync(stoppingToken), stoppingToken),
Task.Run(async () => await _transform.ExecuteAsync(stoppingToken), stoppingToken),
Task.Run(async () => await _output.ExecuteAsync(stoppingToken), stoppingToken),
};
await Task.WhenAll(tasks);
try
{
await _transform.ExecuteAsync(stoppingToken);
}
catch (Exception ex)
{
_context.AddException(ex);
_logger.LogError("Exception occurred on transformService:{Message},{StackTrace}", ex.Message, ex.StackTrace);
}
});
var outputTask = Task.Factory.StartNew(async () =>
{
try
{
await _output.ExecuteAsync(stoppingToken);
}
catch (Exception ex)
{
_context.AddException(ex);
_logger.LogError("Exception occurred on outputService:{Message},{StackTrace}", ex.Message, ex.StackTrace);
}
});
// await Task.Run(async () => await _output.ExecuteAsync(stoppingToken), stoppingToken);
}
}

View File

@ -5,6 +5,8 @@ using ConsoleApp2.Services;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using MySqlConnector;
using System.Threading;
namespace ConsoleApp2.HostedServices;
@ -32,36 +34,40 @@ public class OutputService : IOutputService
_taskManager = taskManager;
}
public async Task ExecuteAsync(CancellationToken stoppingToken)
public async Task ExecuteAsync(CancellationToken cancellationToken)
{
_logger.LogInformation("***** Mysql output service started *****");
var records = new List<DataRecord>();
while (!_context.IsTransformCompleted || _consumerQueue.Count > 0)
var count = 0;
_taskManager.CreateTasks(async () =>
{
if (!_consumerQueue.TryDequeue(out var record)) continue;
records.Add(record);
if (records.Count >= _options.Value.FlushCount)
var records = new List<DataRecord>();
while (!_context.IsTransformCompleted || _consumerQueue.Count > 0)
{
var recordsCopy = records;
_taskManager.CreateTask(async () => await FlushAsync(recordsCopy), stoppingToken);
records = [];
if (!_consumerQueue.TryDequeue(out var record)) continue;
records.Add(record);
count++;
//_logger.LogInformation(@"*****OutputCount: {count} *****",count);
if (records.Count >= _options.Value.FlushCount)
{
await FlushAsync(records);
records.Clear();
}
if (_context.GetExceptions().Count>0)
{
_logger.LogInformation("***** Csv output service is canceled *****");
return;
}
}
if (_taskManager.TaskCount >= _options.Value.MaxTask)
if (_context.IsTransformCompleted && records.Count > 0)
{
await _taskManager.WaitAll();
_taskManager.ClearTask();
await FlushAsync(records);
records.Clear();
_context.CompleteOutput();
_logger.LogInformation("***** Mysql output service completed *****");
}
}
}, _options.Value.TaskCount);
await _taskManager.WaitAll();
await FlushAsync(records);
_context.CompleteOutput();
_logger.LogInformation("***** Mysql output service completed *****");
}
private async Task FlushAsync(IEnumerable<DataRecord> records)
@ -69,15 +75,29 @@ public class OutputService : IOutputService
var count = 0;
await using var output = new MySqlDestination(
_options.Value.ConnectionString ?? throw new InvalidOperationException("Connection string is required"),
_logger, true);
_logger, _context,true);
//if (records == null || records.Count() == 0) return;
//var dbName = $"cferp_test_1";
//if (records != null && records.Count() > 0)
//{
// dbName = $"cferp_test_{records.FirstOrDefault()?.CompanyID}";
//}
//await using var output = new MySqlDestination(new MySqlConnectionStringBuilder
//{
// Server = "127.0.0.1",
// Port = 34309,
// Database = dbName,
// UserID = "root",
// Password = "123456",
// MaximumPoolSize = 50,
//}.ConnectionString, _logger,true);
foreach (var record in records)
{
await output.WriteRecordAsync(record);
count++;
}
await output.FlushAsync();
await output.FlushAsync(_options.Value.MaxAllowedPacket);
_context.AddOutput(count);
}
}

View File

@ -49,6 +49,7 @@ public class TaskMonitorService : BackgroundService
bool endCheck = false;
while (true)
{
if (_context.GetExceptions().Count>0) return;
EndCheck:
// var running = 0;
// var error = 0;

View File

@ -0,0 +1,83 @@
using ConsoleApp2.HostedServices.Abstractions;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using ConsoleApp2.Const;
using ConsoleApp2.Options;
using ConsoleApp2.Services;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using System.Reflection.PortableExecutable;
using System.Collections.Concurrent;
using ConsoleApp2.SimulationService;
namespace ConsoleApp2.HostedServices
{
public class TestInputService : IInputService
{
private readonly ILogger _logger;
private readonly IOptions<CsvOptions> _csvOptions;
private readonly DataRecordQueue _producerQueue;
private readonly ProcessContext _context;
public TestInputService(ILogger<TestInputService> logger,
IOptions<CsvOptions> csvOptions,
[FromKeyedServices(ProcessStep.Producer)] DataRecordQueue producerQueue,
ProcessContext context)
{
_logger = logger;
_csvOptions = csvOptions;
_producerQueue = producerQueue;
_context = context;
}
public async Task ExecuteAsync(CancellationToken cancellationToken)
{
var tableName = "order_item";
var headers = new string[] { "ID","OrderNo","ItemNo","ItemType","RoomID","BoxID","DataID","PlanID","PackageID","Num","CompanyID","ShardKey" };
var dataCount = 1200000000L;
var tempCount = 80000;
var tempRecords=new List<DataRecord>();
var comanyID = 1;
short[] shareKeys = { 23040, 23070, 23100, 24000, 24040, 24070, 24100, 25000, 25040, 25070, 25100 };
int[] companyIds = { 1, 2, 3, 4 };
var sk = shareKeys.First();
var companyID = companyIds.First();
var shareKeyInterval = 20000;
var getShareKeyTimes = 0;
var getCompanyIDTimes = 0;
var shareKeyIntervalCount = 0;
for (long i = 1; i <= dataCount; i++)
{
shareKeyIntervalCount++;
if (shareKeyIntervalCount > shareKeyInterval) {
sk=DataHelper.GetShareKey(getShareKeyTimes);
getShareKeyTimes++;
shareKeyIntervalCount = 0;
}
var fields = new string[] { i.ToString(), "20220104020855", (220105981029 + i).ToString(), "1", "482278", "482279", "3768774", "0", "0", "1", companyID.ToString(), sk.ToString() };
var record = new DataRecord(fields, tableName, headers, comanyID);
tempRecords.Add(record);
if (tempRecords.Count >= tempCount)
{
foreach (var rc in tempRecords)
{
_context.AddInput();
_producerQueue.Enqueue(rc);
if (cancellationToken.IsCancellationRequested)
return;
}
tempRecords.Clear();
companyID = DataHelper. GetCompanyId(getCompanyIDTimes);
getCompanyIDTimes++;
}
}
_context.CompleteInput();
_logger.LogInformation("***** Csv input service completed *****");
}
}
}

View File

@ -36,12 +36,16 @@ public class TransformService : ITransformService
public async Task ExecuteAsync(CancellationToken cancellationToken)
{
_logger.LogInformation("***** Data transform service started, thread id: {ThreadId} *****", Environment.CurrentManagedThreadId);
while (!_context.IsInputCompleted || _producerQueue.Count > 0)
while ((!_context.IsInputCompleted || _producerQueue.Count > 0))
{
if (_context.GetExceptions().Count > 0)
{
_logger.LogInformation("***** Csv transform service is canceled *****");
return;
}
// var dbOptions = _options.Value.DatabaseFilter(record);
if (!_producerQueue.TryDequeue(out var record)) continue;
record.Database = _options.Value.DatabaseFilter?.Invoke(record);
for (var i = 0; i < record.Fields.Length; i++)
{
var field = record[i];
@ -56,20 +60,41 @@ public class TransformService : ITransformService
switch (_options.Value.GetColumnType(record.TableName, record.Headers[i]))
{
case ColumnType.Blob or ColumnType.Text:
field = string.IsNullOrEmpty(field) ? "''" : $"0x{field}";
case ColumnType.Text:
field = string.IsNullOrEmpty(field) ? "''" : _options.Value.TransformBinary?.Invoke(field) ?? field; ;
break;
case ColumnType.Blob:
field = string.IsNullOrEmpty(field) ? "NULL" : $"0x{field}";
break;
default:
break;
}
Escape:
Escape:
record[i] = field;
}
// TODO: 数据处理/过滤/复制
//过滤不要的record
if (_options.Value.RecordFilter?.Invoke(record) == false) continue;
record.Database = _options.Value.DatabaseFilter?.Invoke(record);
//修改record
_options.Value.RecordModify?.Invoke(record);
//替换record
var replaceRecord = _options.Value.RecordReplace?.Invoke(record);
if (replaceRecord != null)
{
record = replaceRecord;
}
_consumerQueue.Enqueue(record);
//数据增加
var addRecords=_options.Value.RecordAdd?.Invoke(record);
if(addRecords != null)
{
foreach(var rc in addRecords)
{
_consumerQueue.Enqueue(rc);
}
}
_context.AddTransform();
}

View File

@ -0,0 +1,19 @@
using System;
using System.ComponentModel;
using System.Configuration;
namespace ConsoleApp2.Options
{
public class CommandOptions
{
public string InputDir { get; set; } = "./MyDumper";
public int TaskCount { get; set; } = 16;
public int FlushCount { get; set; } = 20000;
public bool Isutf8mb4 { get; set; } = true;
public short OldestShardKey { get; set; } = 23010;
}
}

View File

@ -5,7 +5,7 @@ public class CsvOptions
/// <summary>
/// MyDumper导出的CSV文件目录
/// </summary>
public string InputDir { get; set; } = "./";
//public string InputDir { get; set; } = "./";
/// <summary>
/// 字符串的包围符号,默认为双引号"

View File

@ -0,0 +1,18 @@
using ConsoleApp2.Services;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Reflection.PortableExecutable;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApp2.Options
{
public enum InputFileType { CSV, JWT, JSV }
public class DataInputOptions
{
public string InputDir { get; set; } = "./";
public Func<string, CsvSource>? CreateSource { get; set; }
}
}

View File

@ -1,4 +1,6 @@
namespace ConsoleApp2.Options;
using StackExchange.Redis;
namespace ConsoleApp2.Options;
public enum ColumnType
{
@ -10,7 +12,14 @@ public enum ColumnType
public class DataTransformOptions
{
public Func<DataRecord, string>? DatabaseFilter { get; set; }
public Func<string, string>? TransformBinary { get; set; }//Binary转字符串方法
public Func<DataRecord, bool>? RecordFilter { get; set; }//数据过滤方法
public Action<DataRecord>? RecordModify { get; set; }//数据修改
public Func<DataRecord, DataRecord?>? RecordReplace { get; set; }//数据替换
public Func<DataRecord, IList<DataRecord>?>? RecordAdd { get; set; }//数据替换
/// <summary>
/// 配置导入数据的特殊列
/// </summary>

View File

@ -7,11 +7,13 @@ public class DatabaseOutputOptions
/// </summary>
public string? ConnectionString { get; set; }
/// <summary>
/// 输出服务的最大任务(Task)数
/// 输出服务的任务(Task)数
/// </summary>
public int MaxTask { get; set; }
public int TaskCount { get; set; }
/// <summary>
/// 每个任务每次提交到数据库的记录数量每N条构建一次SQL语句
/// </summary>
public int FlushCount { get; set; }
public int MaxAllowedPacket { get; set; } = 32*1024*1024;
}

View File

@ -0,0 +1,17 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApp2.Options
{
public class TableInfo
{
public long SimulaRowCount { get; set; }//模拟的记录条数
}
public class InputTableOptions
{
public Dictionary<string, TableInfo> TableInfoConfig { get; set; } = new();
}
}

View File

@ -1,35 +1,227 @@
using ConsoleApp2.Const;
using ConsoleApp2;
using ConsoleApp2.Const;
using ConsoleApp2.Helpers;
using ConsoleApp2.HostedServices;
using ConsoleApp2.HostedServices.Abstractions;
using ConsoleApp2.Options;
using ConsoleApp2.Services;
using ConsoleApp2.SimulationService;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using MySqlConnector;
using Serilog;
using Serilog.Core;
using System.Reflection.PortableExecutable;
// 运行之前把Mysql max_allowed_packets 调大
// 运行之前把process_step表的外键删掉
await RunProgram();
return;
async Task RunProgram()
{
//var inputDir= "D:\\MyDumper";
//ValidateConsole.ValidateInput<string>((_inputDir) =>
//{
// if (Directory.Exists(_inputDir))
// {
// inputDir = _inputDir;
// return true;
// }
// else return false;
//}, "请输入读取csv文件的目录(默认为当前目录下MyDumper文件夹):");
//var maxTask = 16;
//ValidateConsole.ValidateInput<string>((_inputDir) =>
//{
// _ = int.TryParse(_inputDir.ToString(), out var _taskCount);
// if (_taskCount > 0) {
// maxTask = _taskCount;
// return true;
// }
// else return false;
//}, "请输入执行输出的线程数量(默认为16):");
//var flushCount = 2_0000;
//ValidateConsole.ValidateInput<string>((_inputDir) =>
//{
// _ = int.TryParse(_inputDir.ToString(), out var _flashCount);
// if (_flashCount > 0)
// {
// flushCount = _flashCount;
// return true;
// } else return false;
//}, "请输入单次插入的行数(默认为20000):");
ThreadPool.SetMaxThreads(200, 200);
var host = Host.CreateApplicationBuilder();
host.Configuration.AddCommandLine(args);
var host = Host.CreateApplicationBuilder(args);
var commandOptions = host.Configuration.GetSection("CmdOptions").Get<CommandOptions>() ?? new CommandOptions();
Console.WriteLine($"InputDir:{commandOptions?.InputDir}");
Console.WriteLine($"OutPutFlushCount:{commandOptions?.FlushCount}");
Console.WriteLine($"OutPutTaskCount:{commandOptions?.TaskCount}");
host.Services.Configure<InputTableOptions>(option =>
{
option.TableInfoConfig = new Dictionary<string, TableInfo>
{
//{"order_block_plan_item",new TableInfo{SimulaRowCount=136323566 }},//从order_item表查询然后程序插入
//{"order_package_item",new TableInfo{SimulaRowCount=52525224 }},//从order_item表查询然后程序插入
//{"order_patch_detail",new TableInfo{SimulaRowCount=10 }},//生产没有这个表,不处理
//{"machine",new TableInfo{SimulaRowCount=14655 }},
//{"order",new TableInfo{SimulaRowCount=5019216 }},
//{"order_block_plan",new TableInfo{SimulaRowCount=2725553 }},//CreateTime < 202301的删除
{"order_block_plan_result",new TableInfo{SimulaRowCount=1174096 }},
//{"order_box_block",new TableInfo{SimulaRowCount=29755672 }},
//{"order_data_block",new TableInfo{SimulaRowCount=731800334 }},
//{"order_data_goods",new TableInfo{SimulaRowCount=25803671 }},
//{"order_data_parts",new TableInfo{SimulaRowCount=468517543 }},
//{"order_item",new TableInfo{SimulaRowCount=1345520079 }},
//{"order_module",new TableInfo{SimulaRowCount=103325385 }},
//{"order_module_extra",new TableInfo{SimulaRowCount=54361321 }},
//{"order_module_item",new TableInfo{SimulaRowCount=69173339 }},
//{"order_package",new TableInfo{SimulaRowCount=16196195 }},
//{"order_process",new TableInfo{SimulaRowCount=3892685 }},//orderNo < 202301的
//{"order_process_step",new TableInfo{SimulaRowCount=8050349 }},//orderNo < 202301的删除
//{"order_process_step_item",new TableInfo{SimulaRowCount=14538058 }},//orderNo < 202301的删除
//{"order_scrap_board",new TableInfo{SimulaRowCount=123998 }},
//{"process_group",new TableInfo{SimulaRowCount=1253 }},
//{"process_info",new TableInfo{SimulaRowCount=7839 }},
//{"process_item_exp",new TableInfo{SimulaRowCount=28 }},
//{"process_schdule_capacity",new TableInfo{SimulaRowCount=39736 }},
//{"process_step_efficiency",new TableInfo{SimulaRowCount=8 }},
//{"report_template",new TableInfo{SimulaRowCount=7337 }},
//{"simple_package",new TableInfo{SimulaRowCount=130436 }},//orderNo < 202301的删除
//{"simple_plan_order",new TableInfo{SimulaRowCount=351470 }},//CreateTime < 202301的删除
//{"sys_config",new TableInfo{SimulaRowCount=2296 }},
//{"work_calendar",new TableInfo{SimulaRowCount=11 }},
//{"work_shift",new TableInfo{SimulaRowCount=59 }},
//{"work_time",new TableInfo{SimulaRowCount=62 }},
};
});
host.Services.Configure<CsvOptions>(option =>
{
option.Delimiter = ",";
option.QuoteChar = '"';
option.InputDir = "D:/Dump";
});
host.Services.Configure<DataInputOptions>(options =>
{
options.InputDir = commandOptions.InputDir;
var _csvOptions = new CsvOptions { Delimiter = ",", QuoteChar = '"' };
options.CreateSource = (string tableName) =>
{
var source = new ZstSource(commandOptions.InputDir, tableName, _csvOptions.Delimiter, _csvOptions.QuoteChar);
return source;
};
});
host.Services.Configure<DataTransformOptions>(options =>
{
options.DatabaseFilter = record => "cferp_test_1";
options.DatabaseFilter = record => "cferp_test";
options.TransformBinary = field => commandOptions != null && commandOptions.Isutf8mb4 ? $"_utf8mb4 0x{field}" : $"0x{field}";
//数据过滤
options.RecordFilter = record =>
{
var index = Array.IndexOf(record.Headers, "ShardKey");
if (index > -1)
{
var skString = record.Fields[index];
short.TryParse(skString, out var sk);
if (sk < commandOptions.OldestShardKey) return false;
}
if (record.TableName == "order_package")
{
var pkNoIndex = Array.IndexOf(record.Headers, "PakageNo");
if (pkNoIndex > -1)
{
var pkNo = record.Fields[pkNoIndex];
if (pkNo.Length <= 2) return false;
}
}
if (record.TableName == "order_block_plan")
{
var orderNosIndex = Array.IndexOf(record.Headers, "OrderNos");
if (orderNosIndex > -1)
{
var pkNo = record.Fields[orderNosIndex];
if (pkNo.Length <= 2) return false;
}
}
return true;
};
//数据修改
options.RecordModify = (record) =>
{
if (record.TableName == "order_process")//修改order_process.NextStepID的默认值为0
{
var nextStepIdIndex = Array.IndexOf(record.Headers, "NextStepID");
if (nextStepIdIndex > -1)
{
var idString = record.Fields[nextStepIdIndex];
if (idString == "\\N")
{
record.Fields[nextStepIdIndex] = "0";
}
}
}
};
//数据替换
options.RecordReplace = (record) =>
{
//删除数据源里simple_plan_order.ProcessState 字段和值
if (record.TableName == "simple_plan_order")//修改order_process.NextStepID的默认值为0
{
var nextStepIdIndex = Array.IndexOf(record.Headers, "ProcessState");
if (nextStepIdIndex > -1)
{
var headers = record.Headers.Where(t => t != "ProcessState").ToArray();
var fs = record.Fields.ToList();
fs.RemoveAt(nextStepIdIndex);
var fields = fs.ToArray();
return new DataRecord(fields, record.TableName, headers, record.CompanyID);
}
}
return null;
};
//数据生成
options.RecordAdd = (record) =>
{
var resultList = new List<DataRecord>();
if(record.TableName == "order_item")
{
var itemIDIndex = Array.IndexOf(record.Headers, "ItemID");
var shardKeyIndex = Array.IndexOf(record.Headers, "ShardKey");
var planIDIndex = Array.IndexOf(record.Headers, "PlanID");
var packageIDIndex = Array.IndexOf(record.Headers, "PackageID");
var companyIDIndex = Array.IndexOf(record.Headers, "CompanyID");
//resultList.Add(new DataRecord(
// new[] { "ItemID", "ShardKey", "PlanID","CompanyID" }, "order_block_plan_item",
// new[] { record.Fields[itemIDIndex], record.Fields[shardKeyIndex], record.Fields[planIDIndex], record.Fields[companyIDIndex] }));
//resultList.Add(
// new DataRecord(new[] { "ItemID", "ShardKey", "PackageID", "CompanyID" }, "order_package_item",
// new[] { record.Fields[itemIDIndex], record.Fields[shardKeyIndex], record.Fields[packageIDIndex], record.Fields[companyIDIndex] }));
}
return resultList;
};
options.ColumnTypeConfig = new()
{
{ "simple_plan_order.PlaceData", ColumnType.Blob },
@ -61,20 +253,19 @@ async Task RunProgram()
{ "order_block_plan.BlockInfo", ColumnType.Text },
};
});
host.Services.Configure<DatabaseOutputOptions>(options =>
{
options.ConnectionString = new MySqlConnectionStringBuilder
{
Server = "127.0.0.1",
Port = 33306,
Database = "cferp_test_1",
Port = 33309,
Database = "cferp_test",
UserID = "root",
Password = "123456",
MaximumPoolSize = 50, // 这个值应当小于 max_connections
}.ConnectionString;
options.MaxTask = 16;
options.FlushCount = 200;
options.TaskCount = commandOptions.TaskCount;
options.FlushCount = commandOptions.FlushCount;
});
host.Services.AddLogging(builder =>
{
@ -89,10 +280,13 @@ async Task RunProgram()
host.Services.AddHostedService<MainHostedService>();
host.Services.AddHostedService<TaskMonitorService>();
host.Services.AddSingleton<IInputService, InputService>();
host.Services.AddSingleton<IInputService, SimulationInputService>();
host.Services.AddSingleton<ITransformService, TransformService>();
host.Services.AddSingleton<IOutputService, OutputService>();
host.Services.AddStackExchangeRedisCache(options =>
{
options.Configuration = "localhost:6379";
});
var app = host.Build();
await app.RunAsync();
}

View File

@ -1,5 +1,7 @@
using System.Text;
using System.Text.RegularExpressions;
using ConsoleApp2.Helpers;
using ConsoleApp2.HostedServices.Abstractions;
using Microsoft.Extensions.Logging;
namespace ConsoleApp2.Services;
@ -7,45 +9,52 @@ namespace ConsoleApp2.Services;
/// <summary>
/// CSV文件读取
/// </summary>
public class CsvSource
public class CsvSource:IDataSource
{
private readonly string _filePath;
private readonly StreamReader _reader;
protected readonly string _inputDir;
//protected readonly StreamReader _reader;
private readonly ILogger? _logger;
private readonly string _tableName;
protected readonly string _tableName;
protected string _sqlFilePath;
protected readonly string _sqlFileText;
public DataRecord Current { get; private set; }
public string[]? Headers { get; }
public string? CurrentRaw { get; private set; }
//public DataRecord Current { get; protected set; }
//public string[]? Headers { get; }
public string? CurrentRaw { get; protected set; }
public string Delimiter { get; private set; }
public char QuoteChar { get; private set; }
public CsvSource(string filePath, string[]? headers = null, string delimiter = ",", char quoteChar = '"',
public CsvSource(string inputDir,string tableName,string delimiter = ",", char quoteChar = '"',
ILogger? logger = null)
{
_filePath = filePath;
Headers = headers;
_inputDir = inputDir;
_tableName = tableName;
//Headers = headers;
_logger = logger;
Delimiter = delimiter;
QuoteChar = quoteChar;
var fs = File.OpenRead(filePath);
_reader = new StreamReader(fs);
_tableName = DumpDataHelper.GetTableName(filePath);
//var fs = File.OpenRead(filePath);
//_reader = new StreamReader(fs);
//_tableName = DumpDataHelper.GetTableName(filePath);
string pattern = $"^.*\\.{tableName}\\..*\\.sql$";
_sqlFilePath = Directory.GetFiles(_inputDir).FirstOrDefault(s => Regex.Match(s, pattern).Success);
}
public async ValueTask<bool> ReadAsync()
{
var str = await _reader.ReadLineAsync();
if (string.IsNullOrWhiteSpace(str))
return false;
//public virtual async ValueTask<bool> ReadAsync()
//{
// var str = await _reader.ReadLineAsync();
// if (string.IsNullOrWhiteSpace(str))
// return false;
CurrentRaw = str;
// CurrentRaw = str;
var fields = ParseRow2(str, QuoteChar, Delimiter);
Current = new DataRecord(fields, _tableName, Headers);
return true;
}
// var fields = ParseRow2(str, QuoteChar, Delimiter);
// Current = new DataRecord(fields, _tableName, Headers);
// return true;
//}
public string[] ParseRow(string row, char quoteChar, string delimiter)
{
@ -136,4 +145,64 @@ public class CsvSource
result.Add(current.ToString());
return result.ToArray();
}
public virtual async Task<string[]> GetHeaders()
{
var text = await File.ReadAllTextAsync(_sqlFilePath);
return await DumpDataHelper.GetCsvHeadersFromSqlFileAsync(text);
}
public virtual async Task<string[]> GetCsvFiles()
{
var text= await File.ReadAllTextAsync(_sqlFilePath);
return await DumpDataHelper.GetCsvFileNamesFromSqlFileAsync(text,new Regex(@"'.+\.dat'"));
}
public virtual async Task DoEnqueue(Action<DataRecord> action)
{
var sourceFiles =await GetCsvFiles();
foreach (var file in sourceFiles)
{
var headers = await GetHeaders();
var filePath= Path.Combine(_inputDir, file);
using (var fs = File.OpenRead(filePath))
{
using (StreamReader sr = new StreamReader(fs))
{
while (!sr.EndOfStream)
{
var line = await sr.ReadLineAsync();
var fields = ParseRow2(line, QuoteChar, Delimiter);
var record = new DataRecord(fields, _tableName, headers);
action?.Invoke(record);
}
}
}
}
}
public virtual async Task<DataRecord?> GetTestRecord()
{
var sourceFiles = await GetCsvFiles();
var file = sourceFiles.FirstOrDefault();
if (file != null)
{
var headers = await GetHeaders();
var filePath = Path.Combine(_inputDir, file);
using (var fs = File.OpenRead(filePath))
{
using (StreamReader sr = new StreamReader(fs))
{
var line = await sr.ReadLineAsync();
var fields = ParseRow2(line, QuoteChar, Delimiter);
var record = new DataRecord(fields, _tableName, headers);
return record;
}
}
}
return null;
}
public void Dispose()
{
// _reader.Dispose();
}
}

View File

@ -19,7 +19,7 @@ public class DataRecordQueue : IDisposable
public DataRecordQueue()
{
_queue = new BlockingCollection<DataRecord>(200_000); // 队列最长为20W条记录
_queue = new BlockingCollection<DataRecord>(2000_000); // 队列最长为20W条记录
}
public bool TryDequeue([MaybeNullWhen(false)] out DataRecord record)

View File

@ -1,4 +1,5 @@
using ConsoleApp2.Helpers;
using ConsoleApp2.HostedServices.Abstractions;
using Microsoft.Extensions.Logging;
using ServiceStack.Text;
@ -8,9 +9,9 @@ namespace ConsoleApp2.Services;
/// 读取Jsv格式文件
/// </summary>
[Obsolete]
public class JsvSource : IDisposable
public class JsvSource:IDataSource
{
private readonly string _filePath;
private readonly string _inputDir;
private readonly JsvStringSerializer _jsv;
private readonly StreamReader _reader;
// ReSharper disable once PrivateFieldCanBeConvertedToLocalVariable
@ -21,29 +22,22 @@ public class JsvSource : IDisposable
public string[]? Headers { get; }
public bool EndOfSource => _reader.EndOfStream;
public JsvSource(string filePath, string[]? headers = null, ILogger? logger = null)
public JsvSource(string inputDir,string tableName, ILogger? logger = null)
{
_filePath = filePath;
_inputDir = inputDir;
_tableName = tableName;
_jsv = new JsvStringSerializer();
_reader = new StreamReader(filePath);
Headers = headers;
// _reader = new StreamReader(filePath);
//Headers = headers;
_logger = logger;
// _logger?.LogInformation("Reading file: {FilePath}", filePath);
_tableName = DumpDataHelper.GetTableName(filePath);
//_tableName = DumpDataHelper.GetTableName(filePath);
}
public async ValueTask<bool> ReadAsync()
public async Task DoEnqueue(Action<DataRecord> action)
{
var str = await _reader.ReadLineAsync();
if (string.IsNullOrEmpty(str))
return false;
var fields = _jsv.DeserializeFromString<string[]>(str);
Current = new DataRecord(fields, _tableName, Headers);
return true;
}
public void Dispose()
public void Dispose()
{
_reader.Dispose();
}

View File

@ -2,6 +2,7 @@
using ConsoleApp2.Helpers;
using Microsoft.Extensions.Logging;
using MySqlConnector;
using ServiceStack;
namespace ConsoleApp2.Services;
@ -14,16 +15,21 @@ public class MySqlDestination : IDisposable, IAsyncDisposable
private readonly MySqlConnection _conn;
private readonly ILogger _logger;
private readonly bool _prettyOutput;
public MySqlDestination(string connStr, ILogger logger, bool prettyOutput = false)
private readonly int _maxAllowPacket;
private readonly ProcessContext _context;
private static StringBuilder recordSb = new StringBuilder();
public MySqlDestination(string connStr, ILogger logger, ProcessContext context,bool prettyOutput = false)
{
_conn = new MySqlConnection(connStr);
_conn.Open();
_recordCache = new Dictionary<string, IList<DataRecord>>();
_logger = logger;
_context = context;
_prettyOutput = prettyOutput;
}
}
public Task WriteRecordAsync(DataRecord record)
{
_recordCache.AddOrUpdate(record.TableName, [record], (key, value) =>
@ -42,74 +48,113 @@ public class MySqlDestination : IDisposable, IAsyncDisposable
}
}
public async Task FlushAsync()
public async Task FlushAsync(int maxAllowPacket)
{
if (_recordCache.Count == 0)
return;
var cmd = _conn.CreateCommand();
cmd.CommandText = SerializeRecords(_recordCache, _prettyOutput);
//var cmd = _conn.CreateCommand();
//cmd.CommandTimeout = 3 * 60;
try
{
await cmd.ExecuteNonQueryAsync();
var excuseList = GetExcuseList(_recordCache, maxAllowPacket, _prettyOutput);
//foreach (var insertSql in excuseList)
//{
// //cmd.CommandText = insertSql;
// //await cmd.ExecuteNonQueryAsync();
// //_logger.LogInformation(@"do insert completed!size:{Length}", cmd.CommandText.Length);
//}
_recordCache.Clear();
}
catch (Exception e)
{
_logger.LogCritical(e, "Error when flushing records, sql: {Sql}", cmd.CommandText.Omit(1000));
//_logger.LogCritical(e, "Error when flushing records, sql: {Sql}", cmd.CommandText.Omit(1000));
_context.AddException(e);
throw;
}
finally
{
await cmd.DisposeAsync();
//await cmd.DisposeAsync();
}
}
public static string SerializeRecords(IDictionary<string, IList<DataRecord>> tableRecords,
bool prettyOutput = false)
public static IList<string> GetExcuseList(IDictionary<string, IList<DataRecord>> tableRecords,int maxAllowPacket,
bool prettyOutput = false)
{
var sb = new StringBuilder();
var resultList = new List<string>();
var headerSb = string.Empty;
//var recordSb = new StringBuilder();
recordSb.Clear();
foreach (var (tableName, records) in tableRecords)
{
if (records.Count == 0)
continue;
sb.Append($"INSERT INTO `{tableName}`(");
headerSb=$"INSERT INTO `{tableName}`(";
for (var i = 0; i < records[0].Headers.Length; i++)
{
var header = records[0].Headers[i];
sb.Append($"`{header}`");
headerSb+=$"`{header}`";
if (i != records[0].Headers.Length - 1)
sb.Append(',');
headerSb.Append(',');
}
sb.Append(") VALUES ");
headerSb+=") VALUES ";
if (prettyOutput)
sb.AppendLine();
headerSb+="/r/n";
var sbList = new List<string>();
var currentLength = headerSb.Length;
for (var i = 0; i < records.Count; i++)
{
var record = records[i];
sb.Append('(');
recordSb.Append('(');
for (var j = 0; j < record.Fields.Length; j++)
{
var field = record.Fields[j];
sb.Append(field);
recordSb.Append(field);
if (j != record.Fields.Length - 1)
sb.Append(',');
recordSb.Append(',');
}
sb.Append(')');
recordSb.Append(')');
if (i != records.Count - 1) // not last field
sb.Append(',');
if (prettyOutput) sb.AppendLine();
//if (i != records.Count - 1) // not last field
// recordSb.Append(',');
if (prettyOutput) recordSb.AppendLine();
if (currentLength + recordSb.Length >= maxAllowPacket)
{
var insertSb = headerSb;
insertSb+=string.Join(",", sbList);
insertSb += ";";
resultList.Add(insertSb);
insertSb=String.Empty;
sbList.Clear();
currentLength = headerSb.Length;
sbList.Add(recordSb.ToString());
}
else
{
sbList.Add(recordSb.ToString());
}
currentLength += recordSb.Length;
recordSb.Clear();
}
sb.AppendLine(";");
if (sbList.Count > 0)
{
var insertSb = headerSb.ToString();
insertSb += string.Join(",", sbList);
insertSb += ";";
resultList.Add(insertSb.ToString());
insertSb=string.Empty;
}
headerSb=string.Empty;
}
return sb.ToString();
return resultList;
}

View File

@ -8,6 +8,7 @@ public class ProcessContext
private int _inputCount;
private int _transformCount;
private int _outputCount;
private IList<Exception> _exceptionList = new List<Exception>();
public bool IsInputCompleted { get; private set; }
public bool IsTransformCompleted { get; private set; }
public bool IsOutputCompleted { get; private set; }
@ -29,7 +30,14 @@ public class ProcessContext
get => _outputCount;
private set => _outputCount = value;
}
public void AddException(Exception ex)
{
_exceptionList.Add(ex);
}
public IList<Exception> GetExceptions()
{
return _exceptionList;
}
public void CompleteInput() => IsInputCompleted = true;
public void CompleteTransform() => IsTransformCompleted = true;

View File

@ -27,7 +27,13 @@ public class TaskManager
_tasks.Add(task);
_logger.LogDebug("New task created");
}
public void CreateTasks<TResult>(Func<TResult> func,int taskCount, CancellationToken cancellationToken = default)
{
for (int i = 0; i < taskCount; i++)
{
CreateTask(func, cancellationToken);
}
}
public async Task WaitAll()
{
await Task.WhenAll(_tasks);

View File

@ -0,0 +1,126 @@
using ConsoleApp2.Helpers;
using Microsoft.Extensions.Logging;
using System.IO;
using System.Text.RegularExpressions;
using ZstdSharp;
namespace ConsoleApp2.Services
{
public class ZstSource : CsvSource
{
public ZstSource(string inputDir, string tableName, string delimiter = ",", char quoteChar = '"',
ILogger? logger = null) : base(inputDir, tableName, delimiter = ",", quoteChar = '"', logger = null)
{
//throw new Exception("aaa");
string pattern = $"^.*\\.{tableName}\\..*\\.sql.zst$";
_sqlFilePath = Directory.GetFiles(_inputDir).FirstOrDefault(s => Regex.Match(s, pattern).Success);
}
private async Task<string> DecompressFile(string filePath)
{
using (var input = File.OpenRead(filePath))
{
using (var decopress = new DecompressionStream(input))
{
var ms = new MemoryStream();
decopress.CopyTo(ms);
ms.Seek(0, SeekOrigin.Begin);
StreamReader reader = new StreamReader(ms);
var text = await reader.ReadToEndAsync();
return text;
}
}
}
public override async Task<string[]> GetHeaders()
{
var text = await DecompressFile(_sqlFilePath);
return await DumpDataHelper.GetCsvHeadersFromSqlFileAsync(text);
}
public override async Task<string[]> GetCsvFiles()
{
var text = await DecompressFile(_sqlFilePath);
return await DumpDataHelper.GetCsvFileNamesFromSqlFileAsync(text, new Regex(@"'.+\.dat.zst'"));
}
public override async Task DoEnqueue(Action<DataRecord> action)
{
var sourceFiles = await GetCsvFiles();
var headers = await GetHeaders();
foreach (var file in sourceFiles)
{
var filePath = Path.Combine(_inputDir, file);
using (var input = File.OpenRead(filePath))
{
using (var decopress = new DecompressionStream(input))
{
var ms = new MemoryStream();
decopress.CopyTo(ms);
ms.Seek(0, SeekOrigin.Begin);
StreamReader reader = new StreamReader(ms);
while (!reader.EndOfStream)
{
var line = await reader.ReadLineAsync();
var fields = ParseRow2(line, QuoteChar, Delimiter);
var record = new DataRecord(fields, _tableName, headers);
action?.Invoke(record);
}
}
}
//var headers = await GetHeaders();
//using (StreamReader sr = new StreamReader(file))
//{
// while (!sr.EndOfStream)
// {
// var line = await sr.ReadLineAsync();
// var fields = ParseRow2(line, QuoteChar, Delimiter);
// var record = new DataRecord(fields, _tableName, headers);
// action?.Invoke(record);
// }
//}
}
}
public override async Task<DataRecord?> GetTestRecord()
{
var sourceFiles = await GetCsvFiles();
var file = sourceFiles.FirstOrDefault();
if (file != null)
{
var headers = await GetHeaders();
var filePath = Path.Combine(_inputDir, file);
using (var input = File.OpenRead(filePath))
{
using (var decopress = new DecompressionStream(input))
{
var ms = new MemoryStream();
decopress.CopyTo(ms);
ms.Seek(0, SeekOrigin.Begin);
StreamReader reader = new StreamReader(ms);
var line = await reader.ReadLineAsync();
var fields = ParseRow2(line, QuoteChar, Delimiter);
var record = new DataRecord(fields, _tableName, headers);
return record;
}
}
//using (var fs = File.OpenRead(filePath))
//{
// using (StreamReader sr = new StreamReader(fs))
// {
// var line = await sr.ReadLineAsync();
// var fields = ParseRow2(line, QuoteChar, Delimiter);
// var record = new DataRecord(fields, _tableName, headers);
// return record;
// }
//}
}
return null;
}
public void Dispose()
{
//_reader.Dispose();
}
}
}

View File

@ -0,0 +1,28 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace ConsoleApp2.SimulationService
{
public static partial class DataHelper
{
public static short[] shareKeys = {23000, 23040, 23070, 23100, 24000, 24040, 24070, 24100, 25000, 25040, 25070, 25100 };
public static int[] companyIds = { 1, 2, 3, 4 };
private static T getArrayValue<T>(int index, T[] array)//按index取数据,超过数组长度,index从0开始再取
{
return array[index % array.Length];
}
public static short GetShareKey(int index)
{
return getArrayValue(index, shareKeys);
}
public static int GetCompanyId(int index)
{
return getArrayValue(index, companyIds);
}
}
}

View File

@ -0,0 +1,173 @@
using ConsoleApp2.Const;
using ConsoleApp2.Helpers;
using ConsoleApp2.HostedServices;
using ConsoleApp2.HostedServices.Abstractions;
using ConsoleApp2.Options;
using ConsoleApp2.Services;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using System.Text.RegularExpressions;
namespace ConsoleApp2.SimulationService
{
public class SimulationInputService : IInputService
{
private readonly ILogger _logger;
private readonly IOptions<DataInputOptions> _dataInputOptions;
private readonly IOptions<InputTableOptions> _tableOptions;
private readonly DataRecordQueue _producerQueue;
private readonly ProcessContext _context;
public SimulationInputService(ILogger<InputService> logger,
IOptions<DataInputOptions> dataInputOptions,
IOptions<InputTableOptions> tableOptions,
[FromKeyedServices(ProcessStep.Producer)] DataRecordQueue producerQueue,
ProcessContext context)
{
_logger = logger;
_dataInputOptions = dataInputOptions;
_tableOptions = tableOptions;
_producerQueue = producerQueue;
_context = context;
}
public async Task ExecuteAsync(CancellationToken cancellationToken)
{
var inputDir = _dataInputOptions.Value.InputDir;
_logger.LogInformation("***** simulation input service start, working dir: {InputDir}, thread id: {ThreadId} *****", inputDir, Environment.CurrentManagedThreadId);
var files = Directory.GetFiles(inputDir);
if (files.Length == 0)
{
_logger.LogInformation("No source files found in {InputDir}", inputDir);
return;
}
foreach (var tableName in _tableOptions.Value.TableInfoConfig.Keys)
{
//_logger.LogInformation("Working sql file: {SqlPath}", sqlPath);
//var headers = await DumpDataHelper.GetCsvHeadersFromSqlFileAsync(sqlPath);
//var sqlFileSource = _dataInputOptions.Value.CreateSource?.Invoke(sqlPath,null);
//var headers =await sqlFileSource?.GetHeaders();
//var csvFiles = await DumpDataHelper.GetCsvFileNamesFromSqlFileAsync(sqlPath);
//var csvFiles =await sqlFileSource?.GetCsvFiles();
//foreach (var csvFile in csvFiles)
//{
//var csvPath = Path.Combine(inputDir, csvFile);
//// var source = new JsvSource(csvPath, headers, _logger);
//var source = new CsvSource(csvPath, headers, _csvOptions.Value.Delimiter, _csvOptions.Value.QuoteChar, _logger);
//while (await source.ReadAsync())
//{
// _context.AddInput();
// _producerQueue.Enqueue(source.Current);
// if (cancellationToken.IsCancellationRequested)
// return;
//}
//var csvPath = Path.Combine(inputDir, csvFile);
//var tableName = DumpDataHelper.GetTableName(csvPath);
//var dataCount = 1200000000L;//当前表要生成的总数据量
var dataCount = _tableOptions.Value.TableInfoConfig[tableName].SimulaRowCount;//当前表要生成的总数据量
var companyTotallCount = 1000;//当前表每个公司生成的总数据量
var tempRecords = new List<DataRecord>();
var sk = DataHelper.shareKeys.First();
var companyID = DataHelper.companyIds.First();
var shareKeyInterval = 20000;//每个sharekey的数据量
var getShareKeyTimes = 0;//sharekey生成的次数,每生成一次改变sharekey的值
var getCompanyIDTimes = 0;//公司生成的次数,每生成一次改变companyID的值
var shareKeyIntervalCount = 0;
//CsvSource source;
//switch (_dataInputOptions.Value.FileType)
//{
// case InputFileType.CSV:
// source=new CsvSource(csvPath, headers, _csvOptions.Value.Delimiter, _csvOptions.Value.QuoteChar, _logger);
// break;
// case InputFileType.JWT:
// source = new JwtSource(csvPath, headers, _csvOptions.Value.Delimiter, _csvOptions.Value.QuoteChar, _logger);
// break;
// default: break;
//}
//var source = new JwtSource(csvPath, headers, _csvOptions.Value.Delimiter, _csvOptions.Value.QuoteChar, _logger);
var source = _dataInputOptions.Value.CreateSource?.Invoke(tableName);
var testRecord =await source.GetTestRecord();
for (long i = 1; i <= dataCount; i++)
{
shareKeyIntervalCount++;
if (shareKeyIntervalCount > shareKeyInterval)
{
sk = DataHelper.GetShareKey(getShareKeyTimes);
getShareKeyTimes++;
shareKeyIntervalCount = 0;
}
var fields = new string[testRecord.Fields.Length];
Array.Copy(testRecord.Fields, fields, testRecord.Fields.Length);
var record = new DataRecord(fields, testRecord.TableName, testRecord.Headers, companyID);
//更新record的ID、OrderNo,ShardKey值
if (record.Headers.Contains("ID"))
{
var index = Array.IndexOf(record.Headers, "ID");
if (index > -1)
{
record.Fields[index] = i.ToString();
}
}
if (record.TableName == "order_box_block" && record.Headers.Contains("BoxID"))
{
var index = Array.IndexOf(record.Headers, "BoxID");
if (index > -1)
{
record.Fields[index] = i.ToString();
}
}
if ((record.TableName == "order_block_plan_item" || record.TableName == "order_package_item") && record.Headers.Contains("ItemID"))
{
var index = Array.IndexOf(record.Headers, "ItemID");
if (index > -1)
{
record.Fields[index] = i.ToString();
}
}
if (record.TableName == "order" && record.Headers.Contains("OrderNo"))
{
var index = Array.IndexOf(record.Headers, "OrderNo");
if (index > -1)
{
record.Fields[index] = i.ToString();
}
}
if (record.Headers.Contains("ShardKey"))
{
var index = Array.IndexOf(record.Headers, "ShardKey");
if (index > -1)
{
record.Fields[index] = sk.ToString();
}
}
tempRecords.Add(record);
if (tempRecords.Count >= companyTotallCount || i >= dataCount - 1)
{
foreach (var rc in tempRecords)
{
_context.AddInput();
_producerQueue.Enqueue(rc);
if (cancellationToken.IsCancellationRequested)
return;
}
tempRecords.Clear();
companyID = DataHelper.GetCompanyId(getCompanyIDTimes);
getCompanyIDTimes++;
}
}
_logger.LogInformation("table:'{tableName}' simulation input completed", tableName);
//}
//_logger.LogInformation("File '{File}' input completed", Path.GetFileName(sqlPath));
}
_context.CompleteInput();
_logger.LogInformation("***** Csv input service completed *****");
}
}
}

View File

@ -0,0 +1,10 @@
{
"CmdOptions": {
"InputFileType": "CSV",
"InputDir": "D:/MyDumper-ZST",
"TaskCount": 1,
"FlushCount": 100,
"Isutf8mb4": true,
"OldestShardKey": 22000
}
}