Update
This commit is contained in:
106
ConsoleApp2/Services/ETL/CsvReader.cs
Normal file
106
ConsoleApp2/Services/ETL/CsvReader.cs
Normal file
@@ -0,0 +1,106 @@
|
||||
using System.Text;
|
||||
using ConsoleApp2.HostedServices.Abstractions;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace ConsoleApp2.Services.ETL;
|
||||
|
||||
/// <summary>
|
||||
/// CSV文件读取
|
||||
/// </summary>
|
||||
public class CsvReader : IDataReader
|
||||
{
|
||||
protected readonly string? FilePath;
|
||||
protected readonly Lazy<StreamReader> Reader;
|
||||
protected readonly ILogger? Logger;
|
||||
protected readonly string TableName;
|
||||
|
||||
public DataRecord Current { get; protected set; } = null!;
|
||||
public string[] Headers { get; }
|
||||
public string? CurrentRaw { get; protected set; }
|
||||
public string Delimiter { get; }
|
||||
public char QuoteChar { get; }
|
||||
|
||||
public CsvReader(Stream stream, string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
|
||||
: this(tableName, headers, delimiter, quoteChar, logger)
|
||||
{
|
||||
Reader = new Lazy<StreamReader>(() => new StreamReader(stream));
|
||||
}
|
||||
|
||||
public CsvReader(string filePath, string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
|
||||
: this(tableName, headers, delimiter, quoteChar, logger)
|
||||
{
|
||||
var fs = File.OpenRead(filePath);
|
||||
FilePath = filePath;
|
||||
Reader = new Lazy<StreamReader>(() => new StreamReader(fs));
|
||||
}
|
||||
|
||||
private CsvReader(string tableName, string[] headers, string delimiter = ",", char quoteChar = '"', ILogger? logger = null)
|
||||
{
|
||||
TableName = tableName;
|
||||
Headers = headers;
|
||||
Logger = logger;
|
||||
Delimiter = delimiter;
|
||||
QuoteChar = quoteChar;
|
||||
Reader = null!;
|
||||
}
|
||||
|
||||
public virtual async ValueTask<bool> ReadAsync()
|
||||
{
|
||||
var str = await Reader.Value.ReadLineAsync();
|
||||
if (string.IsNullOrWhiteSpace(str))
|
||||
return false;
|
||||
|
||||
CurrentRaw = str;
|
||||
|
||||
var fields = ParseRow(str, QuoteChar, Delimiter);
|
||||
Current = new DataRecord(fields, TableName, Headers){RawField = str};
|
||||
return true;
|
||||
}
|
||||
|
||||
public string[] ParseRow(ReadOnlySpan<char> source, char quoteChar, string delimiter)
|
||||
{
|
||||
var result = new List<string>();
|
||||
var index = -1;
|
||||
var current = new StringBuilder();
|
||||
var hasQuote = false;
|
||||
var hasSlash = false;
|
||||
while (index < source.Length - 1)
|
||||
{
|
||||
index++;
|
||||
if (hasSlash == false && source[index] == '\\')
|
||||
{
|
||||
hasSlash = true;
|
||||
current.Append('\\');
|
||||
continue;
|
||||
}
|
||||
|
||||
if (hasSlash == false && source[index] == quoteChar)
|
||||
{
|
||||
hasQuote = !hasQuote;
|
||||
current.Append(source[index]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (hasQuote == false && source[index] == delimiter[0])
|
||||
{
|
||||
result.Add(current.ToString());
|
||||
current.Clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
current.Append(source[index]);
|
||||
}
|
||||
|
||||
hasSlash = false;
|
||||
}
|
||||
|
||||
result.Add(current.ToString());
|
||||
return result.ToArray();
|
||||
}
|
||||
|
||||
public virtual void Dispose()
|
||||
{
|
||||
if(Reader.IsValueCreated)
|
||||
Reader.Value.Dispose();
|
||||
}
|
||||
}
|
46
ConsoleApp2/Services/ETL/DataReaderFactory.cs
Normal file
46
ConsoleApp2/Services/ETL/DataReaderFactory.cs
Normal file
@@ -0,0 +1,46 @@
|
||||
using ConsoleApp2.HostedServices.Abstractions;
|
||||
using ConsoleApp2.Options;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace ConsoleApp2.Services.ETL;
|
||||
|
||||
public class DataReaderFactory
|
||||
{
|
||||
private readonly ILogger<DataReaderFactory> _logger;
|
||||
private readonly IOptions<DataInputOptions> _options;
|
||||
|
||||
public DataReaderFactory(ILogger<DataReaderFactory> logger, IOptions<DataInputOptions> options)
|
||||
{
|
||||
_logger = logger;
|
||||
_options = options;
|
||||
}
|
||||
|
||||
public IDataReader CreateReader(string filePath, string tableName, string[] headers)
|
||||
{
|
||||
if (_options.Value.UseMock)
|
||||
{
|
||||
if (_options.Value.TableMockConfig is null)
|
||||
throw new ApplicationException("未配置表模拟数据量级");
|
||||
_logger.LogDebug("***** Using {Type} data source *****", "ZSTD mock");
|
||||
var mockConfig = _options.Value.TableMockConfig.GetValueOrDefault(tableName,
|
||||
new TableMockConfig { MockCount = 1, UseDeepCopy = false });
|
||||
mockConfig.MockCount = (long)Math.Ceiling(mockConfig.MockCount * _options.Value.MockCountMultiplier);
|
||||
return new ZstMockReader(mockConfig, filePath,
|
||||
tableName, headers, _options.Value.Delimiter, _options.Value.QuoteChar, _logger);
|
||||
}
|
||||
|
||||
_logger.LogDebug("***** Using {Type} data source *****", "ZSTD");
|
||||
return new ZstReader(filePath, tableName, headers, _options.Value.Delimiter, _options.Value.QuoteChar, _logger);
|
||||
}
|
||||
}
|
||||
|
||||
public static class DataSourceFactoryExtensions
|
||||
{
|
||||
public static IServiceCollection AddDataSourceFactory(this IServiceCollection services)
|
||||
{
|
||||
services.AddSingleton<DataReaderFactory>();
|
||||
return services;
|
||||
}
|
||||
}
|
215
ConsoleApp2/Services/ETL/MySqlDestination.cs
Normal file
215
ConsoleApp2/Services/ETL/MySqlDestination.cs
Normal file
@@ -0,0 +1,215 @@
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using ConsoleApp2.Helpers;
|
||||
using ConsoleApp2.Options;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using MySqlConnector;
|
||||
|
||||
namespace ConsoleApp2.Services.ETL;
|
||||
|
||||
/// <summary>
|
||||
/// Mysql导出
|
||||
/// </summary>
|
||||
public partial class MySqlDestination : IDisposable, IAsyncDisposable
|
||||
{
|
||||
private readonly Dictionary<string, IList<DataRecord>> _recordCache;
|
||||
private readonly MySqlConnection _conn;
|
||||
private readonly ILogger _logger;
|
||||
private readonly IOptions<DatabaseOutputOptions> _options;
|
||||
private readonly ErrorRecorder.OutputErrorRecorder _outputErrorRecorder;
|
||||
private readonly ProcessContext _context;
|
||||
|
||||
public MySqlDestination(
|
||||
string connStr,
|
||||
ILogger logger,
|
||||
IOptions<DatabaseOutputOptions> options,
|
||||
ErrorRecorder.OutputErrorRecorder outputErrorRecorder,
|
||||
ProcessContext context)
|
||||
{
|
||||
_conn = new MySqlConnection(connStr);
|
||||
_conn.Open();
|
||||
_recordCache = new Dictionary<string, IList<DataRecord>>();
|
||||
_logger = logger;
|
||||
_options = options;
|
||||
_outputErrorRecorder = outputErrorRecorder;
|
||||
_context = context;
|
||||
}
|
||||
|
||||
public Task WriteRecordAsync(DataRecord record)
|
||||
{
|
||||
_recordCache.AddOrUpdate(record.TableName, [record], (_, value) =>
|
||||
{
|
||||
value.Add(record);
|
||||
return value;
|
||||
});
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public async Task WriteRecordsAsync(IEnumerable<DataRecord> records)
|
||||
{
|
||||
foreach (var record in records)
|
||||
{
|
||||
await WriteRecordAsync(record);
|
||||
}
|
||||
}
|
||||
|
||||
public async Task FlushAsync(int maxAllowPacket)
|
||||
{
|
||||
if (_recordCache.Count == 0)
|
||||
return;
|
||||
|
||||
var cmd = _conn.CreateCommand();
|
||||
cmd.CommandTimeout = 3 * 60;
|
||||
|
||||
try
|
||||
{
|
||||
var excuseList = GetExcuseList(_recordCache, maxAllowPacket).ToList();
|
||||
foreach (var insertSql in excuseList)
|
||||
{
|
||||
cmd.CommandText = insertSql;
|
||||
try
|
||||
{
|
||||
await cmd.ExecuteNonQueryAsync();
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
_logger.LogError(e, "插入数据库时发生错误, sql: {Sql}", cmd.CommandText.Omit(1000));
|
||||
_context.AddException(e);
|
||||
var match = MatchTableName().Match(cmd.CommandText);
|
||||
if (match is { Success: true, Groups.Count: > 1 })
|
||||
{
|
||||
var tableName = match.Groups[1].Value;
|
||||
await _outputErrorRecorder.LogErrorSqlAsync(cmd.CommandText, tableName, e);
|
||||
}
|
||||
else await _outputErrorRecorder.LogErrorSqlAsync(cmd.CommandText, e);
|
||||
}
|
||||
}
|
||||
_recordCache.Clear();
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
_logger.LogError(e, "序列化记录时发生错误");
|
||||
throw;
|
||||
}
|
||||
finally
|
||||
{
|
||||
await cmd.DisposeAsync();
|
||||
}
|
||||
}
|
||||
|
||||
[GeneratedRegex("INSERT INTO `([^`]+)`")]
|
||||
private static partial Regex MatchTableName();
|
||||
|
||||
public IEnumerable<string> GetExcuseList(IDictionary<string, IList<DataRecord>> tableRecords,int maxAllowPacket)
|
||||
{
|
||||
var sb = new StringBuilder("SET AUTOCOMMIT = 1;\n");
|
||||
foreach (var (tableName, records) in tableRecords)
|
||||
{
|
||||
if (records.Count == 0)
|
||||
continue;
|
||||
|
||||
var recordIdx = 0;
|
||||
StartBuild:
|
||||
var noCommas = true;
|
||||
|
||||
// INSERT INTO ... VALUES >>>
|
||||
sb.Append($"INSERT INTO `{tableName}`(");
|
||||
for (var i = 0; i < records[0].Headers.Count; i++)
|
||||
{
|
||||
var header = records[0].Headers[i];
|
||||
sb.Append($"`{header}`");
|
||||
if (i != records[0].Headers.Count - 1)
|
||||
sb.Append(',');
|
||||
}
|
||||
|
||||
sb.Append(") VALUES ");
|
||||
|
||||
// ([FIELDS]), >>>
|
||||
for (;recordIdx < records.Count; recordIdx++)
|
||||
{
|
||||
var record = records[recordIdx];
|
||||
var recordSb = new StringBuilder();
|
||||
recordSb.Append('(');
|
||||
for (var fieldIdx = 0; fieldIdx < record.Fields.Count; fieldIdx++)
|
||||
{
|
||||
var field = record.Fields[fieldIdx];
|
||||
|
||||
// 在这里处理特殊列
|
||||
#region HandleFields
|
||||
if (field == "\\N")
|
||||
{
|
||||
recordSb.Append("NULL");
|
||||
goto Escape;
|
||||
}
|
||||
|
||||
switch (_options.Value.GetColumnType(record.TableName, record.Headers[fieldIdx]))
|
||||
{
|
||||
case ColumnType.Text:
|
||||
if(string.IsNullOrEmpty(field))
|
||||
recordSb.Append("''");
|
||||
else recordSb.Append($"_utf8mb4 0x{field}");
|
||||
break;
|
||||
case ColumnType.Blob:
|
||||
if (string.IsNullOrEmpty(field))
|
||||
recordSb.Append("''");
|
||||
else recordSb.Append($"0x{field}");
|
||||
break;
|
||||
case ColumnType.Json:
|
||||
if(string.IsNullOrEmpty(field))
|
||||
recordSb.Append("'[]'"); // JObject or JArray?
|
||||
else if (_options.Value.TreatJsonAsHex)
|
||||
recordSb.Append($"_utf8mb4 0x{field}");
|
||||
else recordSb.AppendLine(field);
|
||||
break;
|
||||
case ColumnType.UnDefine:
|
||||
default:
|
||||
recordSb.Append(field);
|
||||
break;
|
||||
}
|
||||
|
||||
Escape:
|
||||
|
||||
#endregion
|
||||
if (fieldIdx != record.Fields.Count - 1)
|
||||
recordSb.Append(',');
|
||||
}
|
||||
|
||||
recordSb.Append(')');
|
||||
|
||||
// 若字符数量即将大于限制,则返回SQL,清空StringBuilder,保留当前记录的索引值,然后转到StartBuild标签重新开始一轮INSERT
|
||||
if (sb.Length + recordSb.Length + 23 > maxAllowPacket)
|
||||
{
|
||||
sb.Append(';').AppendLine();
|
||||
sb.Append("SET AUTOCOMMIT = 1;");
|
||||
yield return sb.ToString();
|
||||
sb.Clear();
|
||||
goto StartBuild;
|
||||
}
|
||||
|
||||
if (!noCommas)
|
||||
sb.Append(',').AppendLine();
|
||||
noCommas = false;
|
||||
sb.Append(recordSb); // StringBuilder.Append(StringBuilder)不会分配多余的内存
|
||||
}
|
||||
|
||||
sb.Append(';');
|
||||
sb.Append("COMMIT;");
|
||||
yield return sb.ToString();
|
||||
sb.Clear();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_conn.Close();
|
||||
_conn.Dispose();
|
||||
}
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
await _conn.CloseAsync();
|
||||
await _conn.DisposeAsync();
|
||||
}
|
||||
}
|
64
ConsoleApp2/Services/ETL/ZstMockReader.cs
Normal file
64
ConsoleApp2/Services/ETL/ZstMockReader.cs
Normal file
@@ -0,0 +1,64 @@
|
||||
using ConsoleApp2.Options;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace ConsoleApp2.Services.ETL;
|
||||
|
||||
/// <summary>
|
||||
/// 截取提供ZST文件中的第一行,然后复制成指定数量的数据
|
||||
/// </summary>
|
||||
public class ZstMockReader : ZstReader
|
||||
{
|
||||
private long _currentCount;
|
||||
private readonly long _mockCount;
|
||||
private DataRecord? _template;
|
||||
private readonly bool _deepCopy;
|
||||
private readonly string[]? _autoIncrementColumn;
|
||||
|
||||
static readonly IReadOnlyList<int> Range = [500, 1500, 2500];
|
||||
|
||||
public ZstMockReader(TableMockConfig mockConfig, string filePath, string tableName, string[] headers, string delimiter = ",", char quoteChar = '\"', ILogger? logger = null) : base(filePath, tableName, headers, delimiter, quoteChar, logger)
|
||||
{
|
||||
_mockCount = mockConfig.MockCount;
|
||||
_deepCopy = mockConfig.UseDeepCopy;
|
||||
_autoIncrementColumn = mockConfig.AutoIncrementColumn;
|
||||
}
|
||||
|
||||
public ZstMockReader(TableMockConfig mockConfig, Stream stream, string tableName, string[] headers, string delimiter = ",", char quoteChar = '\"', ILogger? logger = null) : base(stream, tableName, headers, delimiter, quoteChar, logger)
|
||||
{
|
||||
_mockCount = mockConfig.MockCount;
|
||||
_deepCopy = mockConfig.UseDeepCopy;
|
||||
_autoIncrementColumn = mockConfig.AutoIncrementColumn;
|
||||
}
|
||||
|
||||
public override async ValueTask<bool> ReadAsync()
|
||||
{
|
||||
if (_template is null)
|
||||
{
|
||||
if (!await base.ReadAsync())
|
||||
throw new InvalidOperationException("所提供的ZST源为空,无法生成模板数据");
|
||||
_template = Current.Clone() as DataRecord;
|
||||
if (_template is null)
|
||||
throw new ApplicationException("记录拷贝失败");
|
||||
_currentCount++;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (_deepCopy)
|
||||
{
|
||||
Current = _template.Clone() as DataRecord ?? throw new ApplicationException("记录拷贝失败");
|
||||
if(_autoIncrementColumn is not null)
|
||||
{
|
||||
foreach (var column in _autoIncrementColumn)
|
||||
{
|
||||
Current[column] = (Convert.ToInt64(Current[column]) + 1).ToString();
|
||||
_template = Current;
|
||||
}
|
||||
}
|
||||
|
||||
Current["CompanyID"] = Range[Random.Shared.Next(0, Range.Count)].ToString();//随机CompanyID
|
||||
}
|
||||
else Current = _template;
|
||||
_currentCount++;
|
||||
return _currentCount < _mockCount;
|
||||
}
|
||||
}
|
48
ConsoleApp2/Services/ETL/ZstReader.cs
Normal file
48
ConsoleApp2/Services/ETL/ZstReader.cs
Normal file
@@ -0,0 +1,48 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZstdSharp;
|
||||
|
||||
namespace ConsoleApp2.Services.ETL;
|
||||
|
||||
/// <summary>
|
||||
/// 解压ZST文件,从中读取CSV数据
|
||||
/// </summary>
|
||||
public class ZstReader : CsvReader
|
||||
{
|
||||
protected new readonly Lazy<StreamReader> Reader;
|
||||
|
||||
|
||||
public ZstReader(string filePath, string tableName, string[] headers, string delimiter = ",", char quoteChar = '\"', ILogger? logger = null)
|
||||
: base(filePath, tableName, headers, delimiter, quoteChar, logger)
|
||||
{
|
||||
var ds = new DecompressionStream(File.OpenRead(filePath));
|
||||
Reader = new Lazy<StreamReader>(() => new StreamReader(ds));
|
||||
}
|
||||
|
||||
public ZstReader(Stream stream, string tableName, string[] headers, string delimiter = ",", char quoteChar = '\"', ILogger? logger = null)
|
||||
: base(stream, tableName, headers, delimiter, quoteChar, logger)
|
||||
{
|
||||
var ds = new DecompressionStream(stream);
|
||||
Reader = new Lazy<StreamReader>(() => new StreamReader(ds));
|
||||
}
|
||||
|
||||
public override async ValueTask<bool> ReadAsync()
|
||||
{
|
||||
var str = await Reader.Value.ReadLineAsync();
|
||||
if (string.IsNullOrWhiteSpace(str))
|
||||
return false;
|
||||
|
||||
CurrentRaw = str;
|
||||
|
||||
var fields = ParseRow(str, QuoteChar, Delimiter);
|
||||
Current = new DataRecord(fields, TableName, Headers) {RawField = str};
|
||||
return true;
|
||||
}
|
||||
|
||||
public override void Dispose()
|
||||
{
|
||||
base.Dispose();
|
||||
if(Reader.IsValueCreated)
|
||||
Reader.Value.Dispose();
|
||||
}
|
||||
|
||||
}
|
Reference in New Issue
Block a user