MES-ETL/MesETL.App/Helpers/DumpDataHelper.cs

191 lines
6.1 KiB
C#
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System.Text.Json;
using System.Text.RegularExpressions;
using MesETL.App.HostedServices;
using Serilog;
using ZstdSharp;
namespace MesETL.App.Helpers;
public static partial class DumpDataHelper
{
[GeneratedRegex(@"'.+\.dat'")]
private static partial Regex MatchDatFile();
[GeneratedRegex(@"\([^)]*\)")]
private static partial Regex MatchBrackets();
/// <summary>
/// 从MyDumper导出的SQL文件内容中读取表头
/// </summary>
/// <param name="content"></param>
/// <returns></returns>
/// <exception cref="ArgumentException"></exception>
public static string[] GetCsvHeadersFromSqlFile(string content)
{
var match = MatchBrackets().Match(content);
if (!match.Success)
throw new ArgumentException("输入的SQL内容有误无法提取表头", nameof(content));
return ParseHeader(match.ValueSpan);
string[] ParseHeader(ReadOnlySpan<char> headerStr)
{
headerStr = headerStr[1..^1];
var headers = new List<string>();
foreach (var range in headerStr.Split(','))
{
headers.Add(headerStr[range].Trim("@`").ToString()); // 消除列名的反引号,如果是变量则消除@
}
return headers.ToArray();
}
}
/// <summary>
/// 从MyDumper导出的Csv文件名解析出表名
/// </summary>
/// <param name="filePath"></param>
/// <returns></returns>
[Obsolete("用ParseMyDumperFile替代")]
public static string GetTableNameFromCsvFileName(ReadOnlySpan<char> filePath)
{
filePath = filePath[(filePath.LastIndexOf('\\') + 1)..];
var firstDotIdx = -1;
var secondDotIdx = -1;
var times = 0;
for (var i = 0; i < filePath.Length; i++)
{
if (filePath[i] == '.')
{
++times;
if(times == 1)
firstDotIdx = i;
if (times == 2)
{
secondDotIdx = i;
break;
}
}
}
return filePath[(firstDotIdx+1)..secondDotIdx].ToString();
}
public enum MyDumperFileType { Dat, Sql }
public record MyDumperFileMeta(string Path, string Database, string TableName, int Index, MyDumperFileType Type);
public static MyDumperFileMeta ParseMyDumperFile(ReadOnlySpan<char> path)
{
try
{
var fileName = Path.GetFileName(path).ToString();
var parts = fileName.Split('.');
var type = parts[3] switch
{
"dat" => MyDumperFileType.Dat,
"sql" => MyDumperFileType.Sql,
_ => throw new ArgumentException("不支持的MyDumper文件类型", nameof(path))
};
return new MyDumperFileMeta(path.ToString(), parts[0], parts[1], int.Parse(parts[2]), type);
}
catch (Exception e)
{
throw new ArgumentException($"此文件不是MyDumper导出的文件 {path}", nameof(path), e);
}
}
/// <summary>
/// 从MyDumper导出的SQL文件内容中读取CSV文件名
/// </summary>
/// <param name="txt"></param>
/// <param name="regex"></param>
/// <returns></returns>
public static Task<string[]> GetCsvFileNamesFromSqlFileAsync(string txt, Regex regex)
{
//var txt = await File.ReadAllTextAsync(filePath);
var matches = regex.Matches(txt);
return Task.FromResult(matches.Select(match => match.ValueSpan[1..^1].ToString()).ToArray());
}
/// <summary>
/// 检查字符串是否为16进制
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
public static bool CheckHexField(string? str)
{
if (string.IsNullOrWhiteSpace(str))
return false;
if (str.StartsWith('\"'))
return false;
var isAllDigit = true;
foreach (var c in str)
{
if (!char.IsAsciiHexDigit(c))
return false;
if (!char.IsNumber(c))
isAllDigit = false;
}
if (isAllDigit) //避免全数字
return false;
return true;
}
/// <summary>
/// 将输入流以ZSTD标准解压为字符串
/// </summary>
/// <param name="stream"></param>
/// <returns></returns>
public static async Task<string> DecompressZstAsStringAsync(Stream stream)
{
await using var ds = new DecompressionStream(stream);
var reader = new StreamReader(ds);
return await reader.ReadToEndAsync();
}
/// <summary>
/// 适用于文件输入服务以及MyDumper Zst导出目录的文件元数据构建函数
/// </summary>
/// <param name="filePath"></param>
/// <returns></returns>
/// <exception cref="ApplicationException"></exception>
public static FileInputInfo? MyDumperFileInputMetaBuilder(string filePath)
{
// 只查找后缀为.dat.zst的文件
if (!filePath.EndsWith(".dat.zst")) return null;
var fileMeta = ParseMyDumperFile(filePath);
var inputDir = Path.GetDirectoryName(filePath);
string[]? headers;
try
{
// 查找同目录下同表的SQL文件
var sqlFile = Directory.GetFiles(inputDir!)
.SingleOrDefault(f => f.Equals(filePath.Replace(".dat.zst", ".sql.zst")));
if (sqlFile is null)
{
Log.Debug("{TableName}表的SQL文件不存在", fileMeta.TableName);
return null;
}
headers = GetCsvHeadersFromSqlFile(
DecompressZstAsStringAsync(File.OpenRead(sqlFile)).Result);
}
catch (InvalidOperationException e)
{
throw new ApplicationException($"目录下不止一个{fileMeta.TableName}表的SQL文件", e);
}
return new FileInputInfo
{
FileName = filePath,
TableName = fileMeta.TableName,
Headers = headers,
Database = fileMeta.Database,
Part = fileMeta.Index
};
}
}