2025迁移版本,多项规则修改

This commit is contained in:
2024-12-10 14:03:09 +08:00
parent dc239c776e
commit 0e28d639c1
34 changed files with 1075 additions and 564 deletions

View File

@@ -1,5 +1,7 @@
using System.Text.Json;
using System.Text.RegularExpressions;
using MesETL.App.HostedServices;
using Serilog;
using ZstdSharp;
namespace MesETL.App.Helpers;
@@ -27,16 +29,13 @@ public static partial class DumpDataHelper
string[] ParseHeader(ReadOnlySpan<char> headerStr)
{
headerStr = headerStr[1..^1];
Span<Range> ranges = stackalloc Range[50];
var count = headerStr.Split(ranges, ',');
var arr = new string[count];
for (var i = 0; i < count; i++)
var headers = new List<string>();
foreach (var range in headerStr.Split(','))
{
arr[i] = headerStr[ranges[i]].Trim("@`").ToString(); // 消除列名的反引号,如果是变量则消除@
headers.Add(headerStr[range].Trim("@`").ToString()); // 消除列名的反引号,如果是变量则消除@
}
return arr;
return headers.ToArray();
}
}
@@ -45,6 +44,7 @@ public static partial class DumpDataHelper
/// </summary>
/// <param name="filePath"></param>
/// <returns></returns>
[Obsolete("用ParseMyDumperFile替代")]
public static string GetTableNameFromCsvFileName(ReadOnlySpan<char> filePath)
{
filePath = filePath[(filePath.LastIndexOf('\\') + 1)..];
@@ -68,6 +68,30 @@ public static partial class DumpDataHelper
return filePath[(firstDotIdx+1)..secondDotIdx].ToString();
}
public enum MyDumperFileType { Dat, Sql }
public record MyDumperFileMeta(string Path, string Database, string TableName, int Index, MyDumperFileType Type);
public static MyDumperFileMeta ParseMyDumperFile(ReadOnlySpan<char> path)
{
try
{
var fileName = Path.GetFileName(path).ToString();
var parts = fileName.Split('.');
var type = parts[3] switch
{
"dat" => MyDumperFileType.Dat,
"sql" => MyDumperFileType.Sql,
_ => throw new ArgumentException("不支持的MyDumper文件类型", nameof(path))
};
return new MyDumperFileMeta(path.ToString(), parts[0], parts[1], int.Parse(parts[2]), type);
}
catch (Exception e)
{
throw new ArgumentException($"此文件不是MyDumper导出的文件 {path}", nameof(path), e);
}
}
/// <summary>
/// 从MyDumper导出的SQL文件内容中读取CSV文件名
@@ -122,17 +146,46 @@ public static partial class DumpDataHelper
var reader = new StreamReader(ds);
return await reader.ReadToEndAsync();
}
public static bool IsJson(string str)
/// <summary>
/// 适用于文件输入服务以及MyDumper Zst导出目录的文件元数据构建函数
/// </summary>
/// <param name="filePath"></param>
/// <returns></returns>
/// <exception cref="ApplicationException"></exception>
public static FileInputInfo? MyDumperFileInputMetaBuilder(string filePath)
{
// 只查找后缀为.dat.zst的文件
if (!filePath.EndsWith(".dat.zst")) return null;
var fileMeta = ParseMyDumperFile(filePath);
var inputDir = Path.GetDirectoryName(filePath);
string[]? headers;
try
{
JsonDocument.Parse(str);
return true;
// 查找同目录下同表的SQL文件
var sqlFile = Directory.GetFiles(inputDir!)
.SingleOrDefault(f => f.Equals(filePath.Replace(".dat.zst", ".sql.zst")));
if (sqlFile is null)
{
Log.Debug("{TableName}表的SQL文件不存在", fileMeta.TableName);
return null;
}
headers = GetCsvHeadersFromSqlFile(
DecompressZstAsStringAsync(File.OpenRead(sqlFile)).Result);
}
catch (JsonException)
catch (InvalidOperationException e)
{
return false;
throw new ApplicationException($"目录下不止一个{fileMeta.TableName}表的SQL文件", e);
}
return new FileInputInfo
{
FileName = filePath,
TableName = fileMeta.TableName,
Headers = headers,
Database = fileMeta.Database,
Part = fileMeta.Index
};
}
}