This commit is contained in:
陈梓阳 2023-12-28 15:18:03 +08:00
commit 6b88f5bd40
20 changed files with 1544 additions and 0 deletions

478
.gitignore vendored Normal file
View File

@ -0,0 +1,478 @@
# ---> JetBrains
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
.idea/
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# AWS User-specific
.idea/**/aws.xml
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# SonarLint plugin
.idea/sonarlint/
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
# ---> VisualStudio
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
##
## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
# User-specific files
*.rsuser
*.suo
*.user
*.userosscache
*.sln.docstates
# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs
# Mono auto generated files
mono_crash.*
# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
[Ww][Ii][Nn]32/
[Aa][Rr][Mm]/
[Aa][Rr][Mm]64/
bld/
[Bb]in/
[Oo]bj/
[Ll]og/
[Ll]ogs/
# Visual Studio 2015/2017 cache/options directory
.vs/
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/
# Visual Studio 2017 auto generated files
Generated\ Files/
# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*
# NUnit
*.VisualState.xml
TestResult.xml
nunit-*.xml
# Build Results of an ATL Project
[Dd]ebugPS/
[Rr]eleasePS/
dlldata.c
# Benchmark Results
BenchmarkDotNet.Artifacts/
# .NET Core
project.lock.json
project.fragment.lock.json
artifacts/
# ASP.NET Scaffolding
ScaffoldingReadMe.txt
# StyleCop
StyleCopReport.xml
# Files built by Visual Studio
*_i.c
*_p.c
*_h.h
*.ilk
*.meta
*.obj
*.iobj
*.pch
*.pdb
*.ipdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*_wpftmp.csproj
*.log
*.tlog
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc
# Chutzpah Test files
_Chutzpah*
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opendb
*.opensdf
*.sdf
*.cachefile
*.VC.db
*.VC.VC.opendb
# Visual Studio profiler
*.psess
*.vsp
*.vspx
*.sap
# Visual Studio Trace Files
*.e2e
# TFS 2012 Local Workspace
$tf/
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user
# TeamCity is a build add-in
_TeamCity*
# DotCover is a Code Coverage Tool
*.dotCover
# AxoCover is a Code Coverage Tool
.axoCover/*
!.axoCover/settings.json
# Coverlet is a free, cross platform Code Coverage Tool
coverage*.json
coverage*.xml
coverage*.info
# Visual Studio code coverage results
*.coverage
*.coveragexml
# NCrunch
_NCrunch_*
.*crunch*.local.xml
nCrunchTemp_*
# MightyMoose
*.mm.*
AutoTest.Net/
# Web workbench (sass)
.sass-cache/
# Installshield output folder
[Ee]xpress/
# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html
# Click-Once directory
publish/
# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
# Note: Comment the next line if you want to checkin your web deploy settings,
# but database connection strings (with potential passwords) will be unencrypted
*.pubxml
*.publishproj
# Microsoft Azure Web App publish settings. Comment the next line if you want to
# checkin your Azure Web App publish settings, but sensitive information contained
# in these scripts will be unencrypted
PublishScripts/
# NuGet Packages
*.nupkg
# NuGet Symbol Packages
*.snupkg
# The packages folder can be ignored because of Package Restore
**/[Pp]ackages/*
# except build/, which is used as an MSBuild target.
!**/[Pp]ackages/build/
# Uncomment if necessary however generally it will be regenerated when needed
#!**/[Pp]ackages/repositories.config
# NuGet v3's project.json files produces more ignorable files
*.nuget.props
*.nuget.targets
# Microsoft Azure Build Output
csx/
*.build.csdef
# Microsoft Azure Emulator
ecf/
rcf/
# Windows Store app package directories and files
AppPackages/
BundleArtifacts/
Package.StoreAssociation.xml
_pkginfo.txt
*.appx
*.appxbundle
*.appxupload
# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!?*.[Cc]ache/
# Others
ClientBin/
~$*
*~
*.dbmdl
*.dbproj.schemaview
*.jfm
*.pfx
*.publishsettings
orleans.codegen.cs
# Including strong name files can present a security risk
# (https://github.com/github/gitignore/pull/2483#issue-259490424)
#*.snk
# Since there are multiple workflows, uncomment next line to ignore bower_components
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
#bower_components/
# RIA/Silverlight projects
Generated_Code/
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
ServiceFabricBackup/
*.rptproj.bak
# SQL Server files
*.mdf
*.ldf
*.ndf
# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.settings
*.rptproj.rsuser
*- [Bb]ackup.rdl
*- [Bb]ackup ([0-9]).rdl
*- [Bb]ackup ([0-9][0-9]).rdl
# Microsoft Fakes
FakesAssemblies/
# GhostDoc plugin setting file
*.GhostDoc.xml
# Node.js Tools for Visual Studio
.ntvs_analysis.dat
node_modules/
# Visual Studio 6 build log
*.plg
# Visual Studio 6 workspace options file
*.opt
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
*.vbw
# Visual Studio 6 auto-generated project file (contains which files were open etc.)
*.vbp
# Visual Studio 6 workspace and project file (working project files containing files to include in project)
*.dsw
*.dsp
# Visual Studio 6 technical files
*.ncb
*.aps
# Visual Studio LightSwitch build output
**/*.HTMLClient/GeneratedArtifacts
**/*.DesktopClient/GeneratedArtifacts
**/*.DesktopClient/ModelManifest.xml
**/*.Server/GeneratedArtifacts
**/*.Server/ModelManifest.xml
_Pvt_Extensions
# Paket dependency manager
.paket/paket.exe
paket-files/
# FAKE - F# Make
.fake/
# CodeRush personal settings
.cr/personal
# Python Tools for Visual Studio (PTVS)
__pycache__/
*.pyc
# Cake - Uncomment if you are using it
# tools/**
# !tools/packages.config
# Tabs Studio
*.tss
# Telerik's JustMock configuration file
*.jmconfig
# BizTalk build output
*.btp.cs
*.btm.cs
*.odx.cs
*.xsd.cs
# OpenCover UI analysis results
OpenCover/
# Azure Stream Analytics local run output
ASALocalRun/
# MSBuild Binary and Structured Log
*.binlog
# NVidia Nsight GPU debugger configuration file
*.nvuser
# MFractors (Xamarin productivity tool) working folder
.mfractor/
# Local History for Visual Studio
.localhistory/
# Visual Studio History (VSHistory) files
.vshistory/
# BeatPulse healthcheck temp database
healthchecksdb
# Backup folder for Package Reference Convert tool in Visual Studio 2017
MigrationBackup/
# Ionide (cross platform F# VS Code tools) working folder
.ionide/
# Fody - auto-generated XML schema
FodyWeavers.xsd
# VS Code files for those working on multiple tools
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace
# Local History for Visual Studio Code
.history/
# Windows Installer files from build outputs
*.cab
*.msi
*.msix
*.msm
*.msp
# JetBrains Rider
*.sln.iml

16
ConsoleApp2.sln Normal file
View File

@ -0,0 +1,16 @@

Microsoft Visual Studio Solution File, Format Version 12.00
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ConsoleApp2", "ConsoleApp2\ConsoleApp2.csproj", "{155E4B04-E88C-4BA4-AED2-B13E0A0432B5}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{155E4B04-E88C-4BA4-AED2-B13E0A0432B5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{155E4B04-E88C-4BA4-AED2-B13E0A0432B5}.Debug|Any CPU.Build.0 = Debug|Any CPU
{155E4B04-E88C-4BA4-AED2-B13E0A0432B5}.Release|Any CPU.ActiveCfg = Release|Any CPU
{155E4B04-E88C-4BA4-AED2-B13E0A0432B5}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<DockerDefaultTargetOS>Linux</DockerDefaultTargetOS>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="MySqlConnector" Version="2.3.3" />
<PackageReference Include="Serilog" Version="3.1.2-dev-02097" />
<PackageReference Include="Serilog.Extensions.Hosting" Version="8.0.0" />
<PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
<PackageReference Include="ServiceStack.Text" Version="8.0.0" />
</ItemGroup>
</Project>

View File

@ -0,0 +1,76 @@
using System.Diagnostics;
using ConsoleApp2.Helpers;
using ConsoleApp2.Services;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace ConsoleApp2;
public class CsvConversion : BackgroundService
{
private readonly ILogger _logger;
private readonly IOptions<CsvOptions> _csvOptions;
private readonly DataTransformService _transform;
private readonly TaskManager _taskManager;
public CsvConversion(ILogger<CsvConversion> logger,
IOptions<CsvOptions> csvOptions,
DataTransformService transform,
TaskManager taskManager)
{
_logger = logger;
_csvOptions = csvOptions;
_transform = transform;
_taskManager = taskManager;
}
protected override async Task ExecuteAsync(CancellationToken cancellationToken)
{
var sw = Stopwatch.StartNew();
var inputDir = _csvOptions.Value.InputDir;
_logger.LogInformation("Working dir: {InputDir}", inputDir);
var files = Directory.GetFiles(inputDir).Where(s => s.EndsWith(".sql") && !s.Contains("schema")).ToArray();
if (files.Length == 0)
{
_logger.LogInformation("No sql files found in {InputDir}", inputDir);
return;
}
foreach(var sqlPath in files)
{
_logger.LogInformation("Working sql file: {SqlPath}", sqlPath);
var headers = await DumpDataHelper.GetCsvHeadersFromSqlFileAsync(sqlPath);
var csvFiles = await DumpDataHelper.GetCsvFileNamesFromSqlFileAsync(sqlPath);
var queue = new DataRecordQueue();
foreach (var csvFile in csvFiles)
{
var csvPath = Path.Combine(inputDir, csvFile);
var source = new JsvSource(csvPath, headers, _logger);
while (await source.ReadAsync())
{
queue.Enqueue(source.Current);
}
if (queue.Count > 200)
{
var queue1 = queue;
await _taskManager.CreateTask(async () => await _transform.ExecuteAsync(queue1, cancellationToken));
queue = new DataRecordQueue();
}
if (cancellationToken.IsCancellationRequested)
return;
}
_logger.LogInformation("File '{File}' input completed", Path.GetFileName(sqlPath));
}
_logger.LogInformation("***** Csv input service completed *****");
_logger.LogInformation("Elapsed: {Elapsed}", sw.Elapsed);
_taskManager.MainTaskCompleted = true;
}
}

View File

@ -0,0 +1,54 @@
namespace ConsoleApp2.Entities;
public class DataRecord
{
public static bool TryGetField(DataRecord record, string columnName, out string value)
{
value = string.Empty;
if (record.Headers is null)
throw new InvalidOperationException("Cannot get field when headers of a record have not been set.");
var idx = Array.IndexOf(record.Headers, columnName);
if (idx == -1)
return false;
value = record.Fields[idx];
return true;
}
public static string GetField(DataRecord record, string columnName)
{
if (record.Headers is null)
throw new InvalidOperationException("Headers have not been set.");
var idx = Array.IndexOf(record.Headers, columnName);
if (idx is -1)
throw new IndexOutOfRangeException("Column name not found in this record.");
return record.Fields[idx];
}
public string[] Fields { get; }
public string[]? Headers { get; }
public string TableName { get; }
public DataRecord(string[] fields, string tableName, string[]? headers = null)
{
if (headers is not null && fields.Length != headers.Length)
throw new ArgumentException(
$"The number of fields does not match the number of headers. Expected: {fields.Length} Got: {headers.Length}",
nameof(fields));
Fields = fields;
TableName = tableName;
Headers = headers;
}
public string this[int index] => Fields[index];
public string this[string columnName] => GetField(this, columnName);
public int Count => Fields.Length;
public bool TryGetField(string columnName, out string value) => TryGetField(this, columnName, out value);
}

View File

@ -0,0 +1,91 @@
using System.Text.RegularExpressions;
namespace ConsoleApp2.Helpers;
public static partial class DumpDataHelper
{
[GeneratedRegex(@"'.+\.dat'")]
private static partial Regex MatchDatFile();
[GeneratedRegex(@"\([^)]*\)")]
private static partial Regex MatchBrackets();
public static async Task<string[]> GetCsvHeadersFromSqlFileAsync(string filePath)
{
var txt = await File.ReadAllTextAsync(filePath);
var match = MatchBrackets().Match(txt);
return ParseHeader(match.ValueSpan);
}
private static string[] ParseHeader(ReadOnlySpan<char> headerStr)
{
headerStr = headerStr[1..^1];
Span<Range> ranges = stackalloc Range[50];
var count = headerStr.Split(ranges, ',');
var arr = new string[count];
for (var i = 0; i < count; i++)
{
arr[i] = headerStr[ranges[i]].Trim("@`").ToString(); // 消除列名的反引号,如果是变量则消除@
}
return arr;
}
public static string GetTableName(ReadOnlySpan<char> filePath)
{
filePath = filePath[(filePath.LastIndexOf('\\') + 1)..];
var firstDotIdx = -1;
var secondDotIdx = -1;
var times = 0;
for (var i = 0; i < filePath.Length; i++)
{
if (filePath[i] == '.')
{
++times;
if(times == 1)
firstDotIdx = i;
if (times == 2)
{
secondDotIdx = i;
break;
}
}
}
return filePath[(firstDotIdx+1)..secondDotIdx].ToString();
}
public static async Task<string[]> GetCsvFileNamesFromSqlFileAsync(string filePath)
{
var txt = await File.ReadAllTextAsync(filePath);
var matches = MatchDatFile().Matches(txt);
return matches.Select(match => match.ValueSpan[1..^1].ToString()).ToArray();
}
public static bool CheckHexField(string? str)
{
if (string.IsNullOrWhiteSpace(str))
return false;
if (str.StartsWith('\"'))
return false;
var isDigit = true;
foreach (var c in str)
{
if (!char.IsAsciiHexDigit(c))
return false;
if (!char.IsNumber(c))
isDigit = false;
}
if (isDigit)
return false;
return true;
}
}

View File

@ -0,0 +1,30 @@
namespace ConsoleApp2.Helpers;
public static class DictionaryExtensions
{
/// <summary>
/// 根据指定的键是否存在来添加或是更新字典
/// </summary>
/// <param name="this"></param>
/// <param name="key">指定的键</param>
/// <param name="addValue">如果指定的键不存在,则向字典添加该值</param>
/// <param name="updateFactory">如果指定的键存在,则根据该委托的返回值修改字典中对应的值</param>
/// <typeparam name="TKey"></typeparam>
/// <typeparam name="TValue"></typeparam>
/// <returns>添加或是修改后的值</returns>
public static TValue AddOrUpdate<TKey, TValue>(this IDictionary<TKey, TValue> @this, TKey key, TValue addValue,
Func<TKey, TValue, TValue> updateFactory)
{
if (!@this.TryGetValue(key, out var value))
{
@this.Add(key, addValue);
}
else
{
@this[key] = updateFactory(key, value);
}
return @this[key];
}
}

View File

@ -0,0 +1,47 @@
using System.Globalization;
using System.Text;
namespace ConsoleApp2.Helpers;
public static class StringExtensions
{
public static string Omit(this ReadOnlySpan<char> @this, int maxLength)
{
if (@this.Length > maxLength)
return @this[..maxLength].ToString() + "...";
return @this.ToString();
}
public static string Omit(this string @this, int maxLength) => Omit(@this.AsSpan(), maxLength);
public static string FromHex(ReadOnlySpan<char> hexString, Encoding? encoding = null)
{
encoding ??= Encoding.UTF8;
var realLength = 0;
for (var i = hexString.Length - 2; i >= 0; i -= 2)
{
var b = byte.Parse(hexString.Slice(i, 2), NumberStyles.HexNumber, CultureInfo.InvariantCulture);
if (b != 0) //not NULL character
{
realLength = i + 2;
break;
}
}
var bytes = new byte[realLength / 2];
for (var i = 0; i < bytes.Length; i++)
{
bytes[i] = byte.Parse(hexString.Slice(i * 2, 2), NumberStyles.HexNumber, CultureInfo.InvariantCulture);
}
return encoding.GetString(bytes);
}
public static bool CheckJsonHex(ReadOnlySpan<char> hexStr)
{
if (hexStr.Length < 2)
return false;
return FromHex(hexStr[..2]) is ['{'] or ['['];
}
}

View File

@ -0,0 +1,249 @@
using System.Security.Cryptography;
using System.Text;
namespace ConsoleApp2.Helpers;
public static class HashExtensions
{
/// <summary>
/// 计算32位MD5码
/// </summary>
/// <param name="word">字符串</param>
/// <param name="toUpper">返回哈希值格式 true英文大写false英文小写</param>
/// <returns></returns>
public static string ToMd5Hash(this string word, bool toUpper = true)
{
try
{
var MD5CSP = MD5.Create();
var bytValue = Encoding.UTF8.GetBytes(word);
var bytHash = MD5CSP.ComputeHash(bytValue);
MD5CSP.Clear();
//根据计算得到的Hash码翻译为MD5码
var sHash = "";
foreach (var t in bytHash)
{
long i = t / 16;
var sTemp = i > 9 ? ((char)(i - 10 + 0x41)).ToString() : ((char)(i + 0x30)).ToString();
i = t % 16;
if (i > 9)
{
sTemp += ((char)(i - 10 + 0x41)).ToString();
}
else
{
sTemp += ((char)(i + 0x30)).ToString();
}
sHash += sTemp;
}
//根据大小写规则决定返回的字符串
return toUpper ? sHash : sHash.ToLower();
}
catch (System.Exception ex)
{
throw new System.Exception(ex.Message);
}
}
public static string ToMd5Hash(this Stream stream, bool toUpper = true)
{
using var md5Hash = MD5.Create();
var bytes = md5Hash.ComputeHash(stream);
return ToHashString(bytes, toUpper);
}
/// <summary>
/// 计算SHA-1码
/// </summary>
/// <param name="word">字符串</param>
/// <param name="toUpper">返回哈希值格式 true英文大写false英文小写</param>
/// <returns></returns>
public static string ToSHA1Hash(this string word, bool toUpper = true)
{
try
{
var SHA1CSP = SHA1.Create();
var bytValue = Encoding.UTF8.GetBytes(word);
var bytHash = SHA1CSP.ComputeHash(bytValue);
SHA1CSP.Clear();
//根据计算得到的Hash码翻译为SHA-1码
var sHash = "";
foreach (var t in bytHash)
{
long i = t / 16;
var sTemp = i > 9 ? ((char)(i - 10 + 0x41)).ToString() : ((char)(i + 0x30)).ToString();
i = t % 16;
if (i > 9)
{
sTemp += ((char)(i - 10 + 0x41)).ToString();
}
else
{
sTemp += ((char)(i + 0x30)).ToString();
}
sHash += sTemp;
}
//根据大小写规则决定返回的字符串
return toUpper ? sHash : sHash.ToLower();
}
catch (System.Exception ex)
{
throw new System.Exception(ex.Message);
}
}
/// <summary>
/// 计算SHA-256码
/// </summary>
/// <param name="word">字符串</param>
/// <param name="toUpper">返回哈希值格式 true英文大写false英文小写</param>
/// <returns></returns>
public static string ToSHA256Hash(this string word, bool toUpper = true)
{
try
{
var SHA256CSP = SHA256.Create();
var bytValue = Encoding.UTF8.GetBytes(word);
var bytHash = SHA256CSP.ComputeHash(bytValue);
SHA256CSP.Clear();
//根据计算得到的Hash码翻译为SHA-1码
var sHash = "";
foreach (var t in bytHash)
{
long i = t / 16;
var sTemp = i > 9 ? ((char)(i - 10 + 0x41)).ToString() : ((char)(i + 0x30)).ToString();
i = t % 16;
if (i > 9)
{
sTemp += ((char)(i - 10 + 0x41)).ToString();
}
else
{
sTemp += ((char)(i + 0x30)).ToString();
}
sHash += sTemp;
}
//根据大小写规则决定返回的字符串
return toUpper ? sHash : sHash.ToLower();
}
catch (System.Exception ex)
{
throw new System.Exception(ex.Message);
}
}
/// <summary>
/// 计算SHA-256码
/// </summary>
/// <param name="stream"></param>
/// <param name="toUpper"></param>
/// <returns></returns>
public static string ToSHA256Hash(this Stream stream, bool toUpper = true)
{
using var sha256Hash = SHA256.Create();
var bytes = sha256Hash.ComputeHash(stream);
return ToHashString(bytes, toUpper);
}
/// <summary>
/// 计算SHA-384码
/// </summary>
/// <param name="word">字符串</param>
/// <param name="toUpper">返回哈希值格式 true英文大写false英文小写</param>
/// <returns></returns>
public static string ToSHA384Hash(this string word, bool toUpper = true)
{
try
{
var SHA384CSP = SHA384.Create();
var bytValue = Encoding.UTF8.GetBytes(word);
var bytHash = SHA384CSP.ComputeHash(bytValue);
SHA384CSP.Clear();
//根据计算得到的Hash码翻译为SHA-1码
var sHash = "";
foreach (var t in bytHash)
{
long i = t / 16;
var sTemp = i > 9 ? ((char)(i - 10 + 0x41)).ToString() : ((char)(i + 0x30)).ToString();
i = t % 16;
if (i > 9)
{
sTemp += ((char)(i - 10 + 0x41)).ToString();
}
else
{
sTemp += ((char)(i + 0x30)).ToString();
}
sHash += sTemp;
}
//根据大小写规则决定返回的字符串
return toUpper ? sHash : sHash.ToLower();
}
catch (System.Exception ex)
{
throw new System.Exception(ex.Message);
}
}
/// <summary>
/// 计算SHA-512码
/// </summary>
/// <param name="word">字符串</param>
/// <param name="toUpper">返回哈希值格式 true英文大写false英文小写</param>
/// <returns></returns>
public static string ToSHA512Hash(this string word, bool toUpper = true)
{
try
{
var SHA512CSP = SHA512.Create();
var bytValue = Encoding.UTF8.GetBytes(word);
var bytHash = SHA512CSP.ComputeHash(bytValue);
SHA512CSP.Clear();
//根据计算得到的Hash码翻译为SHA-1码
var sHash = "";
foreach (var t in bytHash)
{
long i = t / 16;
var sTemp = i > 9 ? ((char)(i - 10 + 0x41)).ToString() : ((char)(i + 0x30)).ToString();
i = t % 16;
if (i > 9)
{
sTemp += ((char)(i - 10 + 0x41)).ToString();
}
else
{
sTemp += ((char)(i + 0x30)).ToString();
}
sHash += sTemp;
}
//根据大小写规则决定返回的字符串
return toUpper ? sHash : sHash.ToLower();
}
catch (System.Exception ex)
{
throw new System.Exception(ex.Message);
}
}
private static string ToHashString(byte[] bytes, bool toUpper = true)
{
var builder = new StringBuilder();
foreach (var t in bytes)
{
builder.Append(t.ToString("x2"));
}
var str = builder.ToString();
return toUpper ? str.ToUpper() : str.ToLower();
}
}

48
ConsoleApp2/JsvSource.cs Normal file
View File

@ -0,0 +1,48 @@
using ConsoleApp2.Entities;
using ConsoleApp2.Helpers;
using Microsoft.Extensions.Logging;
using ServiceStack.Text;
namespace ConsoleApp2;
public class JsvSource : IDisposable
{
private readonly string _filePath;
private readonly JsvStringSerializer _jsv;
private readonly StreamReader _reader;
// ReSharper disable once PrivateFieldCanBeConvertedToLocalVariable
private readonly ILogger? _logger;
private readonly string _tableName;
public DataRecord Current { get; protected set; } = null!;
public string[]? Headers { get; }
public bool EndOfSource => _reader.EndOfStream;
public JsvSource(string filePath, string[]? headers = null, ILogger? logger = null)
{
_filePath = filePath;
_jsv = new JsvStringSerializer();
_reader = new StreamReader(filePath);
Headers = headers;
_logger = logger;
// _logger?.LogInformation("Reading file: {FilePath}", filePath);
_tableName = DumpDataHelper.GetTableName(filePath);
}
public async ValueTask<bool> ReadAsync()
{
var str = await _reader.ReadLineAsync();
if (string.IsNullOrEmpty(str))
return false;
var fields = _jsv.DeserializeFromString<string[]>(str);
if(Headers is not null && Headers.Length != fields.Length)
throw new InvalidDataException("解析的字段数与指定的列数不匹配");
Current = new DataRecord(fields, _tableName, Headers);
return true;
}
public void Dispose()
{
_reader.Dispose();
}
}

View File

@ -0,0 +1,128 @@
using System.Text;
using ConsoleApp2.Entities;
using ConsoleApp2.Helpers;
using Microsoft.Extensions.Logging;
using MySqlConnector;
namespace ConsoleApp2;
public class MySqlDestination : IDisposable, IAsyncDisposable
{
private readonly Dictionary<string, List<DataRecord>> _recordCache;
private readonly MySqlConnection _conn;
private readonly ILogger _logger;
public static int AddCount;
public MySqlDestination(string connStr, ILogger logger)
{
_conn = new MySqlConnection(connStr);
_conn.Open();
_recordCache = new Dictionary<string, List<DataRecord>>();
_logger = logger;
}
public Task WriteRecordAsync(DataRecord record)
{
_recordCache.AddOrUpdate(record.TableName, [record], (key, value) =>
{
value.Add(record);
Interlocked.Increment(ref AddCount);
return value;
});
return Task.CompletedTask;
}
public async Task WriteRecordsAsync(IEnumerable<DataRecord> records)
{
foreach (var record in records)
{
await WriteRecordAsync(record);
}
}
public async Task FlushAsync()
{
if (_recordCache.Count == 0)
return;
try
{
var cmd = _conn.CreateCommand();
var sb = new StringBuilder();
var count = 0;
foreach (var (tableName, records) in _recordCache)
{
if (records.Count == 0)
continue;
sb.Append($"INSERT INTO `{tableName}`(");
for (var i = 0; i < records[0].Headers.Length; i++)
{
var header = records[0].Headers[i];
sb.Append($"`{header}`");
if (i != records[0].Headers.Length - 1)
sb.Append(',');
}
sb.AppendLine(") VALUES");
for (var i = 0; i < records.Count; i++)
{
count++;
var record = records[i];
sb.Append('(');
for (var j = 0; j < record.Fields.Length; j++)
{
var field = record.Fields[j];
#region HandleFields
if (field == "\\N")
sb.Append("NULL");
else if (DumpDataHelper.CheckHexField(field)) // TODO: 性能消耗
{
if (StringExtensions.CheckJsonHex(field))
sb.Append($"UNHEX(\"{field}\")");
else
sb.Append($"\"{field}\"");
}
else
sb.Append($"\"{field}\"");
#endregion
if (j != record.Fields.Length - 1)
sb.Append(',');
}
sb.Append(')');
if (i != records.Count - 1) // not last field
sb.AppendLine(",");
}
sb.AppendLine(";\n");
}
cmd.CommandText = sb.ToString();
await cmd.ExecuteNonQueryAsync();
_recordCache.Clear();
}
catch (Exception e)
{
_logger.LogCritical(e, "Error when flushing records");
throw;
}
}
public void Dispose()
{
_conn.Dispose();
}
public async ValueTask DisposeAsync()
{
await _conn.DisposeAsync();
}
}

View File

@ -0,0 +1,28 @@
namespace ConsoleApp2;
public class CsvOptions
{
/// <summary>
/// The directory to input csv and sql file.
/// </summary>
public string InputDir { get; set; } = "./";
/// <summary>
/// The output directory.
/// </summary>
public string OutputDir { get; set; } = "./Output";
/// <summary>
/// The ASCII char that fields are enclosed by. Default is '"'.
/// </summary>
public char QuoteChar { get; set; } = '"';
/// <summary>
/// The ASCII char that fields are separated by. Default is ','.
/// </summary>
public char DelimiterChar { get; set; } = ',';
/// <summary>
/// The max number of threads to use.
/// </summary>
public int MaxThreads { get; set; } = 12;
}

View File

@ -0,0 +1,9 @@
using ConsoleApp2.Entities;
using ConsoleApp2.Options;
namespace ConsoleApp2;
public class DataTransformOptions
{
public Func<DataRecord, DatabaseOptions> DatabaseFilter { get; set; }
}

View File

@ -0,0 +1,3 @@
namespace ConsoleApp2.Options;
public record DatabaseOptions(string Host, uint Port, string Database, string User, string Password);

37
ConsoleApp2/Program.cs Normal file
View File

@ -0,0 +1,37 @@
using ConsoleApp2;
using ConsoleApp2.Options;
using ConsoleApp2.Services;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Serilog;
ThreadPool.SetMaxThreads(200, 200);
var host = Host.CreateApplicationBuilder();
host.Configuration.AddCommandLine(args);
host.Services.Configure<CsvOptions>(option =>
{
option.DelimiterChar = ',';
option.QuoteChar = '"';
option.InputDir = "D:/Dump/MyDumper-Csv";
option.OutputDir = "D:/DumpOutput";
option.MaxThreads = 12;
});
host.Services.Configure<DataTransformOptions>(options =>
{
var dbOption = new DatabaseOptions("localhost", 33306, "cferp_test_1", "root", "123456");
options.DatabaseFilter = record => dbOption;
});
host.Services.AddLogging(builder =>
{
builder.ClearProviders();
builder.AddSerilog(new LoggerConfiguration().WriteTo.Console().CreateLogger());
});
host.Services.AddHostedService<CsvConversion>();
host.Services.AddHostedService<TaskMonitorService>();
host.Services.AddSingleton<TaskManager>();
host.Services.AddSingleton<DatabaseOutputService>();
host.Services.AddSingleton<DataTransformService>();
var app = host.Build();
await app.RunAsync();

View File

@ -0,0 +1,60 @@
using System.Collections.Concurrent;
using System.Diagnostics.CodeAnalysis;
using ConsoleApp2.Entities;
using Microsoft.Extensions.Logging;
namespace ConsoleApp2.Services;
public class DataRecordQueue
{
/// <summary>
/// Indicate that the queue is completed adding.
/// </summary>
public bool IsCompletedAdding { get; private set; }
/// <summary>
/// Remark that the queue is completed for adding and empty;
/// </summary>
public bool IsCompleted => IsCompletedAdding && _queue.IsEmpty;
private readonly ConcurrentQueue<DataRecord> _queue;
public DataRecordQueue()
{
_queue = new ConcurrentQueue<DataRecord>();
}
public DataRecordQueue(IEnumerable<DataRecord> records)
{
_queue = new ConcurrentQueue<DataRecord>(records);
}
/// <inheritdoc cref="ConcurrentQueue{T}.Enqueue"/>
public void Enqueue(DataRecord item)
{
_queue.Enqueue(item);
}
/// <inheritdoc cref="ConcurrentQueue{T}.TryDequeue"/>
public bool TryDequeue([MaybeNullWhen(false)] out DataRecord result)
{
return _queue.TryDequeue(out result);
}
/// <inheritdoc cref="ConcurrentQueue{T}.TryPeek"/>
public bool TryPeek([MaybeNullWhen(false)] out DataRecord result)
{
return _queue.TryPeek(out result);
}
/// <inheritdoc cref="ConcurrentQueue{T}.Count"/>
public int Count => _queue.Count;
/// <inheritdoc cref="ConcurrentQueue{T}.IsEmpty"/>
public bool IsEmpty => _queue.IsEmpty;
/// <summary>
/// Indicate that the queue is completed adding.
/// </summary>
public void CompleteAdding() => IsCompletedAdding = true;
}

View File

@ -0,0 +1,46 @@
using ConsoleApp2.Entities;
using ConsoleApp2.Helpers;
using ConsoleApp2.Options;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace ConsoleApp2.Services;
public class DataTransformService
{
private readonly ILogger _logger;
private readonly TaskManager _taskManager;
private readonly DatabaseOutputService _output;
private readonly IOptions<DataTransformOptions> _options;
public DataTransformService(ILogger<DataTransformService> logger, TaskManager taskManager, DatabaseOutputService output, IOptions<DataTransformOptions> options)
{
_logger = logger;
_taskManager = taskManager;
_output = output;
_options = options;
}
public async Task ExecuteAsync(DataRecordQueue records, CancellationToken cancellationToken = default)
{
_logger.LogInformation("Start transforming data.");
var map = new Dictionary<DatabaseOptions, DataRecordQueue>();
while (records.TryDequeue(out var record))
{
var dbOptions = _options.Value.DatabaseFilter(record);
map.AddOrUpdate(dbOptions, new DataRecordQueue([record]), (options, queue) =>
{
queue.Enqueue(record);
return queue;
});
}
foreach (var (dbOptions, queue) in map)
{
await _taskManager.CreateTask(async () =>
{
await _output.ExecuteAsync(queue, dbOptions, cancellationToken);
});
}
}
}

View File

@ -0,0 +1,39 @@
using ConsoleApp2.Entities;
using ConsoleApp2.Options;
using Microsoft.Extensions.Logging;
using MySqlConnector;
namespace ConsoleApp2.Services;
public class DatabaseOutputService
{
private readonly ILogger _logger;
public DatabaseOutputService(ILogger<DatabaseOutputService> logger)
{
_logger = logger;
}
public async Task ExecuteAsync(DataRecordQueue records, DatabaseOptions options, CancellationToken stoppingToken = default)
{
var count = records.Count;
var output = new MySqlDestination(new MySqlConnectionStringBuilder()
{
Server = options.Host,
Port = options.Port,
Database = options.Database,
UserID = options.User,
Password = options.Password,
ConnectionTimeout = 120,
}.ConnectionString, _logger); // TODO: 加入DI
while (records.TryDequeue(out var record) && !stoppingToken.IsCancellationRequested)
{
await output.WriteRecordAsync(record);
}
await output.FlushAsync();
_logger.LogInformation("Flush {Count} records to database.", count);
}
}

View File

@ -0,0 +1,28 @@
using System.Collections.Concurrent;
using Microsoft.Extensions.Logging;
namespace ConsoleApp2.Services;
public class TaskManager
{
private readonly ConcurrentBag<Task> _tasks;
private readonly ILogger _logger;
public int RunningTaskCount => _tasks.Count(task => !task.IsCompleted);
public IReadOnlyCollection<Task> Tasks => _tasks;
public bool MainTaskCompleted { get; set; }
public TaskManager(ILogger<TaskManager> logger)
{
_tasks = new ConcurrentBag<Task>();
_logger = logger;
}
public Task<TResult> CreateTask<TResult>(Func<TResult> func)
{
var task = Task.Factory.StartNew(func);
_tasks.Add(task);
_logger.LogInformation("New task created.");
return task;
}
}

View File

@ -0,0 +1,57 @@
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
namespace ConsoleApp2.Services;
public class TaskMonitorService : BackgroundService
{
private readonly IHostApplicationLifetime _lifetime;
private readonly TaskManager _taskManager;
private readonly ILogger<TaskMonitorService> _logger;
public TaskMonitorService(IHostApplicationLifetime lifetime, TaskManager taskManager,
ILogger<TaskMonitorService> logger)
{
_lifetime = lifetime;
_taskManager = taskManager;
_logger = logger;
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
while (!_taskManager.MainTaskCompleted || _taskManager.RunningTaskCount != 0)
{
var running = 0;
var error = 0;
var completed = 0;
var canceled = 0;
foreach (var task in _taskManager.Tasks)
{
switch (task.Status)
{
case TaskStatus.Running:
running++;
break;
case TaskStatus.Canceled:
canceled++;
break;
case TaskStatus.Faulted:
error++;
break;
case TaskStatus.RanToCompletion:
completed++;
break;
default:
throw new ArgumentOutOfRangeException();
}
}
_logger.LogInformation(
"Task monitor: running: {Running}, error: {Error}, completed: {Completed}, canceled: {Canceled}",
running, error, completed, canceled);
await Task.Delay(2000);
}
_logger.LogInformation("***** All tasks completed *****");
}
}