2023-12-29 16:16:05 +08:00
using System.Text ;
using ConsoleApp2.Helpers ;
using Microsoft.Extensions.Logging ;
namespace ConsoleApp2.Services ;
2024-01-04 09:00:44 +08:00
/// <summary>
/// CSV文件读取
/// </summary>
public class CsvSource
2023-12-29 16:16:05 +08:00
{
private readonly string _filePath ;
private readonly StreamReader _reader ;
private readonly ILogger ? _logger ;
private readonly string _tableName ;
2024-01-04 09:00:44 +08:00
public DataRecord Current { get ; private set ; }
2023-12-29 16:16:05 +08:00
public string [ ] ? Headers { get ; }
public string? CurrentRaw { get ; private set ; }
public string Delimiter { get ; private set ; }
public char QuoteChar { get ; private set ; }
2024-01-04 09:00:44 +08:00
public CsvSource ( string filePath , string [ ] ? headers = null , string delimiter = "," , char quoteChar = '"' ,
2023-12-29 16:16:05 +08:00
ILogger ? logger = null )
{
_filePath = filePath ;
Headers = headers ;
_logger = logger ;
Delimiter = delimiter ;
QuoteChar = quoteChar ;
var fs = File . OpenRead ( filePath ) ;
_reader = new StreamReader ( fs ) ;
_tableName = DumpDataHelper . GetTableName ( filePath ) ;
}
public async ValueTask < bool > ReadAsync ( )
{
var str = await _reader . ReadLineAsync ( ) ;
if ( string . IsNullOrWhiteSpace ( str ) )
return false ;
CurrentRaw = str ;
var fields = ParseRow2 ( str , QuoteChar , Delimiter ) ;
Current = new DataRecord ( fields , _tableName , Headers ) ;
return true ;
}
public string [ ] ParseRow ( string row , char quoteChar , string delimiter )
{
var span = row . AsSpan ( ) ;
var result = new List < string > ( ) ;
if ( span . Length = = 0 )
throw new ArgumentException ( "The row is empty" , nameof ( row ) ) ;
var isInQuote = span [ 0 ] = = quoteChar ;
var start = 0 ;
for ( var i = 1 ; i < span . Length ; i + + )
{
if ( span [ i ] = = quoteChar )
{
isInQuote = ! isInQuote ;
}
// delimiter需要足够复杂
else if ( /*!isInQuote && */ span . Length > i + delimiter . Length & & span [ i . . ( i + delimiter . Length ) ] . Equals ( delimiter , StringComparison . CurrentCulture ) ) // field matched
{
string field ;
if ( span [ start ] = = quoteChar & & span [ i - 1 ] = = quoteChar ) // enclosed by quoteChar
field = span [ ( start + 1 ) . . ( i - 1 ) ] . ToString ( ) ; // escape quoteChar
else
field = span [ start . . i ] . ToString ( ) ;
start = i + delimiter . Length ;
if ( field = = "\\N" )
field = "NULL" ;
result . Add ( field ) ;
continue ;
}
}
result . Add ( span [ start . . ] . ToString ( ) ) ;
for ( var i = 0 ; i < result . Count ; i + + )
{
var field = result [ i ] ;
if ( DumpDataHelper . CheckHexField ( field ) & & StringExtensions . CheckJsonHex ( field ) )
{
result [ i ] = StringExtensions . FromHex ( field ) ;
}
}
return result . ToArray ( ) ;
}
public string [ ] ParseRow2 ( ReadOnlySpan < char > source , char quoteChar , string delimiter )
{
var result = new List < string > ( ) ;
var index = - 1 ;
StringBuilder current = new StringBuilder ( ) ;
bool hasQuote = false ;
bool hasSlash = false ;
while ( index < source . Length - 1 )
{
index + + ;
if ( hasSlash = = false & & source [ index ] = = '\\' )
{
hasSlash = true ;
current . Append ( '\\' ) ;
continue ;
}
if ( hasSlash = = false & & source [ index ] = = quoteChar )
{
hasQuote = ! hasQuote ;
current . Append ( source [ index ] ) ;
continue ;
}
if ( hasQuote = = false & & source [ index ] = = delimiter [ 0 ] )
{
result . Add ( current . ToString ( ) ) ;
current . Clear ( ) ;
}
else
{
current . Append ( source [ index ] ) ;
}
hasSlash = false ;
}
result . Add ( current . ToString ( ) ) ;
return result . ToArray ( ) ;
}
}