MES-ETL/ConsoleApp2/Helpers/DumpDataHelper.cs

125 lines
3.7 KiB
C#
Raw Normal View History

2024-01-29 09:29:16 +08:00
using System.Text.RegularExpressions;
using ZstdSharp;
2023-12-28 15:18:03 +08:00
namespace ConsoleApp2.Helpers;
public static partial class DumpDataHelper
{
[GeneratedRegex(@"'.+\.dat'")]
private static partial Regex MatchDatFile();
[GeneratedRegex(@"\([^)]*\)")]
private static partial Regex MatchBrackets();
2024-01-29 09:29:16 +08:00
/// <summary>
/// 从MyDumper导出的SQL文件内容中读取表头
/// </summary>
/// <param name="content"></param>
/// <returns></returns>
/// <exception cref="ArgumentException"></exception>
public static string[] GetCsvHeadersFromSqlFile(string content)
2023-12-28 15:18:03 +08:00
{
2024-01-29 09:29:16 +08:00
var match = MatchBrackets().Match(content);
if (!match.Success)
throw new ArgumentException("输入的SQL内容有误无法提取表头", nameof(content));
2023-12-28 15:18:03 +08:00
return ParseHeader(match.ValueSpan);
2024-01-29 09:29:16 +08:00
string[] ParseHeader(ReadOnlySpan<char> headerStr)
2023-12-28 15:18:03 +08:00
{
2024-01-29 09:29:16 +08:00
headerStr = headerStr[1..^1];
Span<Range> ranges = stackalloc Range[50];
var count = headerStr.Split(ranges, ',');
var arr = new string[count];
2023-12-28 15:18:03 +08:00
2024-01-29 09:29:16 +08:00
for (var i = 0; i < count; i++)
{
arr[i] = headerStr[ranges[i]].Trim("@`").ToString(); // 消除列名的反引号,如果是变量则消除@
}
return arr;
}
2023-12-28 15:18:03 +08:00
}
2024-01-29 09:29:16 +08:00
/// <summary>
/// 从MyDumper导出的Csv文件名解析出表名
/// </summary>
/// <param name="filePath"></param>
/// <returns></returns>
public static string GetTableNameFromCsvFileName(ReadOnlySpan<char> filePath)
2023-12-28 15:18:03 +08:00
{
filePath = filePath[(filePath.LastIndexOf('\\') + 1)..];
var firstDotIdx = -1;
var secondDotIdx = -1;
var times = 0;
for (var i = 0; i < filePath.Length; i++)
{
if (filePath[i] == '.')
{
++times;
if(times == 1)
firstDotIdx = i;
if (times == 2)
{
secondDotIdx = i;
break;
}
}
}
return filePath[(firstDotIdx+1)..secondDotIdx].ToString();
}
2024-01-29 09:29:16 +08:00
/// <summary>
/// 从MyDumper导出的SQL文件内容中读取CSV文件名
/// </summary>
/// <param name="txt"></param>
/// <param name="regex"></param>
/// <returns></returns>
public static Task<string[]> GetCsvFileNamesFromSqlFileAsync(string txt, Regex regex)
2023-12-28 15:18:03 +08:00
{
2024-01-12 16:50:37 +08:00
//var txt = await File.ReadAllTextAsync(filePath);
var matches = regex.Matches(txt);
2024-01-29 09:29:16 +08:00
return Task.FromResult(matches.Select(match => match.ValueSpan[1..^1].ToString()).ToArray());
2023-12-28 15:18:03 +08:00
}
2024-01-29 09:29:16 +08:00
/// <summary>
/// 检查字符串是否为16进制
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
2023-12-28 15:18:03 +08:00
public static bool CheckHexField(string? str)
{
if (string.IsNullOrWhiteSpace(str))
return false;
if (str.StartsWith('\"'))
return false;
2023-12-29 16:16:05 +08:00
var isAllDigit = true;
2023-12-28 15:18:03 +08:00
foreach (var c in str)
{
if (!char.IsAsciiHexDigit(c))
return false;
if (!char.IsNumber(c))
2023-12-29 16:16:05 +08:00
isAllDigit = false;
2023-12-28 15:18:03 +08:00
}
2023-12-29 16:16:05 +08:00
if (isAllDigit) //避免全数字
2023-12-28 15:18:03 +08:00
return false;
return true;
}
2024-01-29 09:29:16 +08:00
/// <summary>
/// 将输入流以ZSTD标准解压为字符串
/// </summary>
/// <param name="stream"></param>
/// <returns></returns>
public static async Task<string> DecompressZstAsStringAsync(Stream stream)
{
await using var ds = new DecompressionStream(stream);
var reader = new StreamReader(ds);
return await reader.ReadToEndAsync();
}
2023-12-29 16:16:05 +08:00
2023-12-28 15:18:03 +08:00
}