2024-02-06 15:37:21 +08:00
using MesETL.App.HostedServices.Abstractions ;
2024-02-02 17:14:41 +08:00
using MesETL.App.Options ;
using MesETL.App.Services ;
using MesETL.App.Services.ETL ;
2024-01-29 09:29:16 +08:00
using Microsoft.Extensions.DependencyInjection ;
using Microsoft.Extensions.Logging ;
using Microsoft.Extensions.Options ;
2024-02-02 17:14:41 +08:00
namespace MesETL.App.HostedServices ;
2024-01-29 09:29:16 +08:00
public record FileInputInfo
{
public required string FileName { get ; init ; }
public required string TableName { get ; init ; }
public required string [ ] Headers { get ; init ; }
}
public enum FileInputType
{
MyDumperCsv ,
MyDumperZst ,
ErrorLog ,
}
/// <summary>
/// 从输入目录中导入文件
/// </summary>
public class FileInputService : IInputService
{
private readonly ILogger _logger ;
private readonly DataRecordQueue _producerQueue ;
private readonly IOptions < DataInputOptions > _dataInputOptions ;
private readonly ProcessContext _context ;
private readonly DataReaderFactory _dataReaderFactory ;
public FileInputService ( ILogger < FileInputService > logger ,
IOptions < DataInputOptions > dataInputOptions ,
ProcessContext context ,
2024-02-06 15:37:21 +08:00
[FromKeyedServices(Const.ConstVar.Producer)] DataRecordQueue producerQueue ,
2024-01-29 09:29:16 +08:00
DataReaderFactory dataReaderFactory )
{
_logger = logger ;
_dataInputOptions = dataInputOptions ;
_context = context ;
_producerQueue = producerQueue ;
_dataReaderFactory = dataReaderFactory ;
}
public async Task ExecuteAsync ( CancellationToken cancellationToken )
{
var inputDir = _dataInputOptions . Value . InputDir ? ? throw new ApplicationException ( "未配置文件输入目录" ) ;
_logger . LogInformation ( "***** Input service started, working directory: {InputDir} *****" , inputDir ) ;
var trans = _dataInputOptions . Value . FileInputMetaBuilder ;
if ( trans is null ) throw new ApplicationException ( "未配置文件名-表名映射委托" ) ;
FileInputInfo [ ] infoArr = Directory . GetFiles ( inputDir )
. Select ( f = > trans ( f ) )
. Where ( info = > info is not null ) . ToArray ( ) ! ;
var orderedInfo = GetFilesInOrder ( infoArr ) . ToArray ( ) ;
_logger . LogInformation ( "***** {Count} files founded in directory, {OrderedCount} files is matched with configuration *****" , infoArr . Length , orderedInfo . Length ) ;
foreach ( var info in orderedInfo )
{
_logger . LogDebug ( "Table {TableName}: {FileName}" , info . TableName , info . FileName ) ;
}
foreach ( var info in orderedInfo )
{
_logger . LogInformation ( "Reading file: {FileName}, table: {TableName}" , info . FileName , info . TableName ) ;
2024-02-08 17:38:23 +08:00
using var source = _dataReaderFactory . CreateReader ( info . FileName , info . TableName , info . Headers ) ;
2024-02-10 17:12:26 +08:00
var count = 0 ;
2024-01-29 09:29:16 +08:00
while ( await source . ReadAsync ( ) )
{
var record = source . Current ;
2024-02-09 19:08:57 +08:00
await _producerQueue . EnqueueAsync ( record ) ;
2024-02-10 17:12:26 +08:00
count + + ;
2024-01-29 09:29:16 +08:00
_context . AddInput ( ) ;
}
2024-02-10 17:12:26 +08:00
_context . AddTableInput ( info . TableName , count ) ;
2024-01-29 09:29:16 +08:00
_logger . LogInformation ( "Input of table: '{TableName}' finished" , info . TableName ) ;
2024-02-09 19:08:57 +08:00
_dataInputOptions . Value . OnTableInputCompleted ? . Invoke ( info . TableName ) ;
2024-01-29 09:29:16 +08:00
}
_context . CompleteInput ( ) ;
_logger . LogInformation ( "***** Input service finished *****" ) ;
}
/// <summary>
/// 读取配置,按照配置的表顺序来返回
/// </summary>
/// <returns></returns>
private IEnumerable < FileInputInfo > GetFilesInOrder ( FileInputInfo [ ] inputFiles )
{
var tableOrder = _dataInputOptions . Value . TableOrder ;
2024-02-09 19:08:57 +08:00
var ignoreTable = _dataInputOptions . Value . TableIgnoreList ;
2024-01-29 09:29:16 +08:00
if ( tableOrder is null or { Length : 0 } )
return inputFiles ;
return Yield ( ) ;
IEnumerable < FileInputInfo > Yield ( )
{
foreach ( var tableName in tableOrder )
{
var target = inputFiles . FirstOrDefault ( f = >
f . TableName . Equals ( tableName , StringComparison . OrdinalIgnoreCase ) ) ;
2024-02-09 19:08:57 +08:00
if ( target is not null & & ! ignoreTable . Contains ( target . TableName ) )
2024-01-29 09:29:16 +08:00
yield return target ;
}
}
}
}