using System.Text.Json; using System.Text.RegularExpressions; using MesETL.App.HostedServices; using Serilog; using ZstdSharp; namespace MesETL.App.Helpers; public static partial class DumpDataHelper { [GeneratedRegex(@"'.+\.dat'")] private static partial Regex MatchDatFile(); [GeneratedRegex(@"\([^)]*\)")] private static partial Regex MatchBrackets(); /// /// 从MyDumper导出的SQL文件内容中读取表头 /// /// /// /// public static string[] GetCsvHeadersFromSqlFile(string content) { var match = MatchBrackets().Match(content); if (!match.Success) throw new ArgumentException("输入的SQL内容有误,无法提取表头", nameof(content)); return ParseHeader(match.ValueSpan); string[] ParseHeader(ReadOnlySpan headerStr) { headerStr = headerStr[1..^1]; var headers = new List(); foreach (var range in headerStr.Split(',')) { headers.Add(headerStr[range].Trim("@`").ToString()); // 消除列名的反引号,如果是变量则消除@ } return headers.ToArray(); } } /// /// 从MyDumper导出的Csv文件名解析出表名 /// /// /// [Obsolete("用ParseMyDumperFile替代")] public static string GetTableNameFromCsvFileName(ReadOnlySpan filePath) { filePath = filePath[(filePath.LastIndexOf('\\') + 1)..]; var firstDotIdx = -1; var secondDotIdx = -1; var times = 0; for (var i = 0; i < filePath.Length; i++) { if (filePath[i] == '.') { ++times; if(times == 1) firstDotIdx = i; if (times == 2) { secondDotIdx = i; break; } } } return filePath[(firstDotIdx+1)..secondDotIdx].ToString(); } public enum MyDumperFileType { Dat, Sql } public record MyDumperFileMeta(string Path, string Database, string TableName, int Index, MyDumperFileType Type); public static MyDumperFileMeta ParseMyDumperFile(ReadOnlySpan path) { try { var fileName = Path.GetFileName(path).ToString(); var parts = fileName.Split('.'); var type = parts[3] switch { "dat" => MyDumperFileType.Dat, "sql" => MyDumperFileType.Sql, _ => throw new ArgumentException("不支持的MyDumper文件类型", nameof(path)) }; return new MyDumperFileMeta(path.ToString(), parts[0], parts[1], int.Parse(parts[2]), type); } catch (Exception e) { throw new ArgumentException($"此文件不是MyDumper导出的文件 {path}", nameof(path), e); } } /// /// 从MyDumper导出的SQL文件内容中读取CSV文件名 /// /// /// /// public static Task GetCsvFileNamesFromSqlFileAsync(string txt, Regex regex) { //var txt = await File.ReadAllTextAsync(filePath); var matches = regex.Matches(txt); return Task.FromResult(matches.Select(match => match.ValueSpan[1..^1].ToString()).ToArray()); } /// /// 检查字符串是否为16进制 /// /// /// public static bool CheckHexField(string? str) { if (string.IsNullOrWhiteSpace(str)) return false; if (str.StartsWith('\"')) return false; var isAllDigit = true; foreach (var c in str) { if (!char.IsAsciiHexDigit(c)) return false; if (!char.IsNumber(c)) isAllDigit = false; } if (isAllDigit) //避免全数字 return false; return true; } /// /// 将输入流以ZSTD标准解压为字符串 /// /// /// public static async Task DecompressZstAsStringAsync(Stream stream) { await using var ds = new DecompressionStream(stream); var reader = new StreamReader(ds); return await reader.ReadToEndAsync(); } /// /// 适用于文件输入服务以及MyDumper Zst导出目录的文件元数据构建函数 /// /// /// /// public static FileInputInfo? MyDumperFileInputMetaBuilder(string filePath) { // 只查找后缀为.dat.zst的文件 if (!filePath.EndsWith(".dat.zst")) return null; var fileMeta = ParseMyDumperFile(filePath); var inputDir = Path.GetDirectoryName(filePath); string[]? headers; try { // 查找同目录下同表的SQL文件 var sqlFile = Directory.GetFiles(inputDir!) .SingleOrDefault(f => f.Equals(filePath.Replace(".dat.zst", ".sql.zst"))); if (sqlFile is null) { Log.Debug("{TableName}表的SQL文件不存在", fileMeta.TableName); return null; } headers = GetCsvHeadersFromSqlFile( DecompressZstAsStringAsync(File.OpenRead(sqlFile)).Result); } catch (InvalidOperationException e) { throw new ApplicationException($"目录下不止一个{fileMeta.TableName}表的SQL文件", e); } return new FileInputInfo { FileName = filePath, TableName = fileMeta.TableName, Headers = headers, Database = fileMeta.Database, Part = fileMeta.Index }; } }