|
// Adapted from PowerShell's Import-Csv implementation to be used in standalone C# projects. |
|
// https://github.com/PowerShell/PowerShell/blob/4838a8d5f6c95339d4b44c8b402165e69e6ff929/src/Microsoft.PowerShell.Commands.Utility/commands/utility/CsvCommands.cs |
|
using System; |
|
using System.Collections.Generic; |
|
using System.Globalization; |
|
using System.IO; |
|
using System.Linq.Expressions; |
|
using System.Management.Automation; |
|
using System.Reflection; |
|
using System.Text; |
|
|
|
namespace CsvUtils; |
|
|
|
public static class Parser |
|
{ |
|
private const int ValueCountGuestimate = 16; |
|
private const int LineLengthGuestimate = 256; |
|
private const string CRLF = "\r\n"; |
|
private const string LF = "\n"; |
|
private const string CR = "\r"; |
|
private static readonly HashSet<Type> s_numericTypes = [ |
|
typeof(int), typeof(long), typeof(double), |
|
typeof(float), typeof(decimal), typeof(short), |
|
typeof(ushort), typeof(byte), typeof(sbyte), |
|
typeof(uint), typeof(ulong) |
|
]; |
|
|
|
public static IEnumerable<T> Read<T>(string path, Encoding encoding) |
|
=> Read<T>(path, encoding: encoding); |
|
|
|
public static IEnumerable<T> Read<T>( |
|
string path, |
|
char delimiter = ',', |
|
Encoding? encoding = null) |
|
where T : class, new() |
|
{ |
|
using StreamReader sr = new(path, encoding ?? Encoding.UTF8); |
|
string[] header = ReadHeader(sr, delimiter); |
|
List<string> values = new(ValueCountGuestimate); |
|
StringBuilder builder = new(LineLengthGuestimate); |
|
Action<T, string>[] setters = [.. GetSetters<T>(header)]; |
|
|
|
while (true) |
|
{ |
|
ParseNextRecord(sr, values, builder, delimiter); |
|
|
|
if (values.Count == 0) |
|
{ |
|
break; |
|
} |
|
|
|
if (values.Count == 1 && string.IsNullOrEmpty(values[0])) |
|
{ |
|
continue; |
|
} |
|
|
|
T outobj = new(); |
|
for (int i = 0; i < setters.Length; i++) |
|
{ |
|
setters[i](outobj, values[i]); |
|
} |
|
|
|
yield return outobj; |
|
} |
|
} |
|
|
|
private static IEnumerable<Action<T, string>> GetSetters<T>(string[] properties) |
|
{ |
|
Type type = typeof(T); |
|
foreach (string property in properties) |
|
{ |
|
PropertyInfo info = type.GetProperty( |
|
property, |
|
BindingFlags.IgnoreCase | BindingFlags.Instance | BindingFlags.Public) |
|
?? throw new ArgumentNullException($"Property '{property}' not found on type '{type}'."); |
|
|
|
if (!info.CanWrite) |
|
throw new ArgumentException($"Property '{property}' on '{type}' is read-only."); |
|
|
|
MethodInfo setter = info.GetSetMethod() |
|
?? throw new ArgumentException($"Setter for '{property}' is inaccessible."); |
|
|
|
ParameterExpression target = Expression.Parameter(type, "target"); |
|
ParameterExpression value = Expression.Parameter(typeof(string), "value"); |
|
MethodCallExpression body = Expression.Call( |
|
target, |
|
setter, |
|
GetArgumentExpression(info.PropertyType, value)); |
|
|
|
yield return new( |
|
Expression |
|
.Lambda<Action<T, string>>(body, target, value) |
|
.Compile()); |
|
} |
|
} |
|
|
|
private static Expression GetArgumentExpression( |
|
Type type, |
|
ParameterExpression value) |
|
{ |
|
static MethodCallExpression ParseNumeric( |
|
Type numericType, |
|
ParameterExpression value) |
|
{ |
|
MethodInfo parseMethod = numericType.GetMethod( |
|
"Parse", |
|
[typeof(string), typeof(NumberStyles), typeof(IFormatProvider)]) |
|
?? throw new InvalidOperationException($"No suitable Parse overload found on {numericType}"); |
|
|
|
return Expression.Call( |
|
parseMethod, |
|
value, |
|
Expression.Constant(NumberStyles.Any), |
|
Expression.Constant(CultureInfo.InvariantCulture)); |
|
} |
|
|
|
return type switch |
|
{ |
|
_ when type == typeof(string) || type == typeof(object) => value, |
|
|
|
_ when s_numericTypes.Contains(type) => ParseNumeric(type, value), |
|
|
|
_ when type == typeof(DateTime) => |
|
Expression.Call( |
|
typeof(DateTime), |
|
nameof(DateTime.Parse), |
|
null, |
|
value, |
|
Expression.Constant(CultureInfo.InvariantCulture), |
|
Expression.Constant(DateTimeStyles.None)), |
|
|
|
_ when type == typeof(bool) => |
|
Expression.Call(typeof(bool), nameof(bool.Parse), null, value), |
|
|
|
_ when type == typeof(Guid) => |
|
Expression.Call(typeof(Guid), nameof(Guid.Parse), null, value), |
|
|
|
_ => Expression.Call( |
|
typeof(LanguagePrimitives), |
|
nameof(LanguagePrimitives.ConvertTo), |
|
[type], value) |
|
}; |
|
} |
|
|
|
private static bool NextCharIs(this StreamReader sr, char c) |
|
{ |
|
int i = sr.Peek(); |
|
return i != -1 && c == (char)i; |
|
} |
|
|
|
private static string[] ReadHeader(StreamReader sr, char delimiter) |
|
{ |
|
List<string> values = new(ValueCountGuestimate); |
|
StringBuilder builder = new(LineLengthGuestimate); |
|
|
|
while (!sr.EndOfStream) |
|
{ |
|
ParseNextRecord(sr, values, builder, delimiter); |
|
while (values.Count > 1 && values[^1] == string.Empty) |
|
{ |
|
values.RemoveAt(values.Count - 1); |
|
} |
|
|
|
if (values.Count != 0 && values[0].StartsWith("#Fields: ")) |
|
{ |
|
values[0] = values[0][9..]; |
|
break; |
|
} |
|
|
|
if (values.Count != 0 && values[0].StartsWith('#')) |
|
{ |
|
continue; |
|
} |
|
|
|
break; |
|
} |
|
|
|
ValidatePropertyNames(values); |
|
return [.. values]; |
|
} |
|
|
|
private static void ValidatePropertyNames(List<string> names) |
|
{ |
|
if (names.Count == 0) |
|
{ |
|
return; |
|
} |
|
|
|
HashSet<string> headers = new(StringComparer.OrdinalIgnoreCase); |
|
foreach (string currentHeader in names) |
|
{ |
|
if (!string.IsNullOrEmpty(currentHeader) && !headers.Add(currentHeader)) |
|
{ |
|
throw new ArgumentOutOfRangeException($"'{currentHeader}' is duplicated in header."); |
|
} |
|
} |
|
} |
|
|
|
private static void ParseNextRecord( |
|
StreamReader sr, |
|
List<string> result, |
|
StringBuilder current, |
|
char delimiter) |
|
{ |
|
result.Clear(); |
|
current.Clear(); |
|
bool seenBeginQuote = false; |
|
|
|
while (!sr.EndOfStream) |
|
{ |
|
char ch = (char)sr.Read(); |
|
|
|
if (ch == delimiter) |
|
{ |
|
if (seenBeginQuote) |
|
{ |
|
current.Append(ch); |
|
continue; |
|
} |
|
|
|
result.Add(current.ToString()); |
|
current.Clear(); |
|
continue; |
|
} |
|
|
|
if (ch == '"') |
|
{ |
|
if (seenBeginQuote) |
|
{ |
|
if (sr.NextCharIs('"')) |
|
{ |
|
sr.Read(); |
|
current.Append('"'); |
|
continue; |
|
} |
|
|
|
seenBeginQuote = false; |
|
bool endofRecord = false; |
|
ReadTillNextDelimiter(sr, current, ref endofRecord, true, delimiter); |
|
result.Add(current.ToString()); |
|
current.Clear(); |
|
|
|
if (endofRecord) break; |
|
continue; |
|
} |
|
|
|
if (current.Length == 0) |
|
{ |
|
seenBeginQuote = true; |
|
continue; |
|
} |
|
|
|
bool endOfRecord = false; |
|
current.Append(ch); |
|
ReadTillNextDelimiter(sr, current, ref endOfRecord, false, delimiter); |
|
result.Add(current.ToString()); |
|
current.Clear(); |
|
|
|
if (endOfRecord) break; |
|
continue; |
|
} |
|
|
|
if (ch == ' ' || ch == '\t') |
|
{ |
|
if (seenBeginQuote) |
|
{ |
|
current.Append(ch); |
|
continue; |
|
} |
|
|
|
if (current.Length == 0) |
|
continue; |
|
|
|
bool endOfRecord = false; |
|
current.Append(ch); |
|
ReadTillNextDelimiter(sr, current, ref endOfRecord, true, delimiter); |
|
result.Add(current.ToString()); |
|
current.Clear(); |
|
|
|
if (endOfRecord) break; |
|
continue; |
|
} |
|
|
|
if (IsNewLine(sr, ch, out string newLine)) |
|
{ |
|
if (seenBeginQuote) |
|
{ |
|
current.Append(newLine); |
|
continue; |
|
} |
|
|
|
result.Add(current.ToString()); |
|
current.Clear(); |
|
break; |
|
} |
|
|
|
current.Append(ch); |
|
} |
|
|
|
if (current.Length != 0) |
|
result.Add(current.ToString()); |
|
} |
|
|
|
private static void ReadTillNextDelimiter( |
|
StreamReader sr, |
|
StringBuilder current, |
|
ref bool endOfRecord, |
|
bool eatTrailingBlanks, |
|
char delimiter) |
|
{ |
|
int distance = 0; |
|
|
|
while (!sr.EndOfStream) |
|
{ |
|
char ch = (char)sr.Read(); |
|
|
|
if (ch == delimiter) |
|
break; |
|
|
|
if (IsNewLine(sr, ch, out string _)) |
|
{ |
|
endOfRecord = true; |
|
break; |
|
} |
|
|
|
current.Append(ch); |
|
|
|
if (ch == ' ' || ch == '\t') |
|
{ |
|
distance++; |
|
continue; |
|
} |
|
|
|
distance = 0; |
|
} |
|
|
|
if (eatTrailingBlanks) |
|
{ |
|
current.Length -= distance; |
|
} |
|
} |
|
|
|
private static bool IsNewLine( |
|
StreamReader sr, |
|
char ch, |
|
out string newLine) |
|
{ |
|
newLine = string.Empty; |
|
|
|
if (ch == '\n') |
|
{ |
|
newLine = LF; |
|
return true; |
|
} |
|
|
|
if (ch != '\r') |
|
{ |
|
return false; |
|
} |
|
|
|
if (!sr.NextCharIs('\n')) |
|
{ |
|
newLine = CR; |
|
return true; |
|
} |
|
|
|
sr.Read(); |
|
newLine = CRLF; |
|
return true; |
|
} |
|
} |