Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 57 additions & 1 deletion RegExtract.Test/Usage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,62 @@ public class Usage
const string pattern_nested = "(((.)(.)(.)(.)(.)(.)(.)(.)(.)))";
const string pattern_named = "(?<n>(?<s>(?<a>.)(?<b>.)(?<c>.)(?<d>.)(?<e>.)(?<f>.)(?<g>.)(?<h>.)(?<i>.)))";

[Fact]
public void can_try_extract_to_tuple_using_extension()
{
var ok = data.TryExtract<(int a, char b, string c, int d, char e, string f, int g, char h, string i)>(pattern, out var extracted);

Assert.True(ok);

Assert.IsType<int>(extracted.a);
Assert.IsType<char>(extracted.b);
Assert.IsType<string>(extracted.c);
Assert.IsType<int>(extracted.d);
Assert.IsType<char>(extracted.e);
Assert.IsType<string>(extracted.f);
Assert.IsType<int>(extracted.g);
Assert.IsType<char>(extracted.h);
Assert.IsType<string>(extracted.i);

Assert.Equal(1, extracted.a);
Assert.Equal('2', extracted.b);
Assert.Equal("3", extracted.c);
Assert.Equal(4, extracted.d);
Assert.Equal('5', extracted.e);
Assert.Equal("6", extracted.f);
Assert.Equal(7, extracted.g);
Assert.Equal('8', extracted.h);
Assert.Equal("9", extracted.i);
}
[Fact]
public void can_try_extract_to_tuple()
{
var plan = ExtractionPlan<(int a, char b, string c, int d, char e, string f, int g, char h, string i)>.CreatePlan(new Regex(pattern));
var ok = plan.TryExtract(data, out var extracted);

Assert.True(ok);

Assert.IsType<int>(extracted.a);
Assert.IsType<char>(extracted.b);
Assert.IsType<string>(extracted.c);
Assert.IsType<int>(extracted.d);
Assert.IsType<char>(extracted.e);
Assert.IsType<string>(extracted.f);
Assert.IsType<int>(extracted.g);
Assert.IsType<char>(extracted.h);
Assert.IsType<string>(extracted.i);

Assert.Equal(1, extracted.a);
Assert.Equal('2', extracted.b);
Assert.Equal("3", extracted.c);
Assert.Equal(4, extracted.d);
Assert.Equal('5', extracted.e);
Assert.Equal("6", extracted.f);
Assert.Equal(7, extracted.g);
Assert.Equal('8', extracted.h);
Assert.Equal("9", extracted.i);
}

[Fact]
public void can_parse_lookbehind()
{
Expand Down Expand Up @@ -140,7 +196,7 @@ record PropertiesRecord
}

// Don't currently handle nested named captures, and I'm not sure we ever will.
//[Fact]
// [Fact]
//public void can_extract_named_capture_groups_to_properties()
//{
// PropertiesRecord? record = data.Extract<PropertiesRecord>(pattern_named);
Expand Down
22 changes: 22 additions & 0 deletions RegExtract/ExtractionPlan.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,28 @@ public T Extract(Match match)
return (T)Plan.Execute(match)!;
}

public bool TryExtract(string str, out T result)
{
result = default!;
if (!Plan.TryExecute(_tree?.Regex.Match(str) ?? Regex.Match("",""), out var temp))
{
return false;
}
result = (T)temp!;
return true;
}

public bool TryExtract(Match match, out T result)
{
result = default!;
if (!Plan.TryExecute(match, out var temp))
{
return false;
}
result = (T)temp!;
return true;
}

static public ExtractionPlan<T> CreatePlan(Regex regex, RegExtractOptions reOptions= RegExtractOptions.None)
{
ExtractionPlan<T> plan = new ExtractionPlan<T>();
Expand Down
72 changes: 72 additions & 0 deletions RegExtract/ExtractionPlanNode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,83 @@ internal virtual void Validate()
return;
}

internal virtual bool TryConstruct(Match match, Type type, (string Value, int Index, int Length) range, out object? result)
{
result = Construct(match, type, range);
return true;
}

internal virtual object? Construct(Match match, Type type, (string Value, int Index, int Length) range)
{
throw new InvalidOperationException("Can't construct a node based on base ExtractionPlanNode type.");
}

internal virtual bool TryExecute(Match match, int captureStart, int captureLength, out object? result)
{
var ranges = AsEnumerable(match.Groups[groupName].Captures)
.Where(cap => cap.Index >= captureStart && cap.Index + cap.Length <= captureStart + captureLength)
.Select(cap => (cap.Value, cap.Index, cap.Length));
Type innerType = IsNullable(type) ? type.GetGenericArguments().Single() : type;
bool isCollection = IsCollection(type);
if (!isCollection)
{
if (!ranges.Any())
{
if (type.IsClass || Nullable.GetUnderlyingType(type) != null)
{
result = null;
return false;
}
result = Convert.ChangeType(null, type);
return true;
}
else
{
var lastRange = ranges.Last();
if (!TryConstruct(match, innerType, lastRange, out result))
{
return false;
}
foreach (var prop in propertyNodes)
{
result!.GetType().GetProperty(prop.groupName).GetSetMethod().Invoke(result, new[] { prop.Execute(match, lastRange.Index, lastRange.Length) });
}
}
}
else
{
result = null;
var itemType = type.GetGenericArguments().Single();
var vals = Activator.CreateInstance(type);
var addMethod = type.GetMethod("Add");
foreach (var range in ranges)
{
if (!TryConstruct(match, itemType, range, out var itemVal))
{
return false;
}
foreach (var prop in propertyNodes)
{
itemVal!.GetType().GetProperty(prop.groupName).GetSetMethod().Invoke(result, new[] { prop.Execute(match, range.Index, range.Length) });
}
addMethod.Invoke(vals, new[] { itemVal });
}

result = vals;
}
return true;
}

public bool TryExecute(Match match, out object? result)
{
if (!match.Success)
{
throw new ArgumentException("Regex didn't match.");
}

return TryExecute(match, match.Groups[groupName].Index, match.Groups[groupName].Length, out result);
}

internal virtual object? Execute(Match match, int captureStart, int captureLength)
{
object? result = null;
Expand Down
26 changes: 26 additions & 0 deletions RegExtract/ExtractionPlanNodeTypes.cs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,11 @@ internal override void Validate()
internal record EnumParseNode(string groupName, Type type, ExtractionPlanNode[] constructorParams, ExtractionPlanNode[] propertySetters) :
ExtractionPlanNode(groupName, type, constructorParams, propertySetters)
{
internal override bool TryConstruct(Match match, Type type, (string Value, int Index, int Length) range, out object? result)
{
result = Construct(match, type, range);
return true;
}
internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range)
{
return Enum.Parse(type, range.Value);
Expand Down Expand Up @@ -138,6 +143,27 @@ internal override void Validate()
internal record StaticParseMethodNode(string groupName, Type type, ExtractionPlanNode[] constructorParams, ExtractionPlanNode[] propertySetters) :
ExtractionPlanNode(groupName, type, constructorParams, propertySetters)
{
internal override bool TryConstruct(Match match, Type type, (string Value, int Index, int Length) range, out object? result)
{
type = IsCollection(type) ? type.GetGenericArguments().Single() : type;
type = IsNullable(type) ? type.GetGenericArguments().Single() : type;
if (type.Namespace != "System")
{
result = Construct(match, type, range);
return true;
}

var args = new object[] { range.Value, null! };
var ok = (bool)type.GetMethod(
"TryParse",
BindingFlags.Static | BindingFlags.Public,
null,
new Type[] { typeof(string), Type.GetType($"{type.FullName}&") },
null
).Invoke(null, args);
result = args[1];
return ok;
}
internal override object? Construct(Match match, Type type, (string Value, int Index, int Length) range)
{
type = IsCollection(type) ? type.GetGenericArguments().Single() : type;
Expand Down
78 changes: 77 additions & 1 deletion RegExtract/RegExtractExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,87 @@ public static IEnumerable<T> Extract<T>(this IEnumerable<string> str, RegExtract
var rx = GetRegexFromType(typeof(T));
return Extract<T>(str, rx, options);
}

public static IEnumerable<T> Extract<T>(this IEnumerable<string> str, Regex rx, RegExtractOptions options = RegExtractOptions.None)
{
var plan = ExtractionPlan<T>.CreatePlan(rx, options);
return str.Select(s => plan.Extract(rx.Match(s)));
}

public static bool TryExtract<T>(this string str, string rx, out T result, RegExtractOptions options = RegExtractOptions.None)
{
return TryExtract<T>(str, rx, RegexOptions.None, out result, options);
}

public static bool TryExtract<T>(this string str, string rx, RegexOptions rxOptions, out T result, RegExtractOptions options = RegExtractOptions.None)
{
var match = Regex.Match(str, rx, rxOptions);

var plan = ExtractionPlan<T>.CreatePlan(new Regex(rx));
return plan.TryExtract(match, out result);
}

public static bool TryExtract<T>(this string str, Regex rx, out T result, RegExtractOptions options = RegExtractOptions.None)
{
var match = rx.Match(str);
var plan = ExtractionPlan<T>.CreatePlan(rx);
return plan.TryExtract(match, out result);
}

public static bool TryExtract<T>(this string str, ExtractionPlan<T> plan, out T result)
{
return plan.TryExtract(str, out result);
}

public static bool TryExtract<T>(this string str, out T result, RegExtractOptions options = RegExtractOptions.None)
{
return TryExtract(str, GetRegexFromType(typeof(T)), out result, options);
}

public static bool TryExtract<T>(this IEnumerable<string> str, string rx, out IEnumerable<T> result, RegExtractOptions options = RegExtractOptions.None)
{
return TryExtract(str, rx, RegexOptions.None, out result, options);
}

public static bool TryExtract<T>(this IEnumerable<string> str, string rx, RegexOptions rxOptions, out IEnumerable<T> result, RegExtractOptions options = RegExtractOptions.None)
{
return TryExtract(str, new Regex(rx, rxOptions), out result, options);
}

public static bool TryExtract<T>(this IEnumerable<string> str, ExtractionPlan<T> plan, out IEnumerable<T> result)
{
var anyFailure = false;
result = str.Select(s =>
{
if (plan.TryExtract(s, out var result))
{
return result;
}
anyFailure = true;
return default!;
});
return anyFailure;
}

public static bool TryExtract<T>(this IEnumerable<string> str, out IEnumerable<T> result, RegExtractOptions options = RegExtractOptions.None)
{
return TryExtract(str, GetRegexFromType(typeof(T)), out result, options);
}

public static bool TryExtract<T>(this IEnumerable<string> str, Regex rx, out IEnumerable<T> result, RegExtractOptions options = RegExtractOptions.None)
{
var plan = ExtractionPlan<T>.CreatePlan(rx, options);
var anyFailure = false;
result = str.Select(s =>
{
if (plan.TryExtract(rx.Match(s), out var result))
{
return result;
}
anyFailure = true;
return default!;
});
return anyFailure;
}
}
}