Skip to content

[RegexDiff X64] [stephentoub] Transform regex X| into X? #1310

@MihuBot

Description

@MihuBot

Job completed in 15 minutes 56 seconds (remote runner delay: 53 seconds).
dotnet/runtime#118087
Using arguments: regexdiff

ERROR: System.Exception: runtime/.dotnet/dotnet publish -o artifacts failed with exit code 1
   at Runner.JobBase.RunProcessAsync(String fileName, String arguments, List`1 output, String logPrefix, String workDir, Boolean checkExitCode, Func`2 processLogs, Boolean suppressOutputLogs, Boolean suppressStartingLog, ProcessPriorityClass priority, List`1 envVars, CancellationToken cancellationToken) in /home/runtime-utils/Runner/JobBase.cs:line 425
   at Runner.Jobs.RegexDiffJob.<>c__DisplayClass10_1.<<RunJitDiffAsync>b__5>d.MoveNext() in /home/runtime-utils/Runner/Jobs/RegexDiffJob.cs:line 608
--- End of stack trace from previous location ---
   at System.Threading.Tasks.Parallel.<>c__49`1.<<ForAsync>b__49_2>d.MoveNext()
--- End of stack trace from previous location ---
   at Runner.Jobs.RegexDiffJob.<>c__DisplayClass10_0.<<RunJitDiffAsync>g__GenerateRegexAssembliesAsync|1>d.MoveNext() in /home/runtime-utils/Runner/Jobs/RegexDiffJob.cs:line 557
--- End of stack trace from previous location ---
   at Runner.Jobs.RegexDiffJob.RunJitDiffAsync(KnownPattern[] knownPatterns, RegexEntry[] entries) in /home/runtime-utils/Runner/Jobs/RegexDiffJob.cs:line 523
   at Runner.Jobs.RegexDiffJob.RunJobCoreAsync() in /home/runtime-utils/Runner/Jobs/RegexDiffJob.cs:line 55
   at Runner.JobBase.RunJobAsync() in /home/runtime-utils/Runner/JobBase.cs:line 133

659 out of 18857 patterns have generated source code changes.

Examples of GeneratedRegex source diffs
"\\r\\n|\\r" (823 uses)
[GeneratedRegex("\\r\\n|\\r")]
  /// Explanation:<br/>
  /// <code>
  /// ○ Match '\r'.<br/>
-   /// ○ Match with 2 alternative expressions, atomically.<br/>
-   ///     ○ Match '\n'.<br/>
-   ///     ○ Match an empty string.<br/>
+   /// ○ Match '\n' atomically, optionally.<br/>
  /// </code>
  /// </remarks>
  [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "42.42.42.42")]
                      return false; // The input didn't match.
                  }
                  
-                   // Match with 2 alternative expressions, atomically.
+                   // Match '\n' atomically, optionally.
                  {
-                       int alternation_starting_pos = pos;
-                       
-                       // Branch 0
+                       if ((uint)slice.Length > (uint)1 && slice[1] == '\n')
                      {
-                           // Match '\n'.
-                           if ((uint)slice.Length < 2 || slice[1] != '\n')
-                           {
-                               goto AlternationBranch;
-                           }
-                           
-                           pos += 2;
-                           slice = inputSpan.Slice(pos);
-                           goto AlternationMatch;
-                           
-                           AlternationBranch:
-                           pos = alternation_starting_pos;
-                           slice = inputSpan.Slice(pos);
-                       }
-                       
-                       // Branch 1
-                       {
-                           
+                           slice = slice.Slice(1);
                          pos++;
-                           slice = inputSpan.Slice(pos);
                      }
-                       
-                       AlternationMatch:;
                  }
                  
                  // The input matched.
+                   pos++;
                  base.runtextpos = pos;
                  base.Capture(0, matchStart, pos);
                  return true;
"(http|https):\\/\\/[\\w\\-_]+(\\.[\\w\\-_]+)+" (479 uses)
[GeneratedRegex("(http|https):\\/\\/[\\w\\-_]+(\\.[\\w\\-_]+)+")]
  /// <code>
  /// ○ 1st capture group.<br/>
  ///     ○ Match the string "http".<br/>
-   ///     ○ Match with 2 alternative expressions.<br/>
-   ///         ○ Match an empty string.<br/>
-   ///         ○ Match 's'.<br/>
+   ///     ○ Match 's' atomically, optionally.<br/>
  /// ○ Match the string "://".<br/>
  /// ○ Match a character in the set [\-_\w] atomically at least once.<br/>
  /// ○ Loop greedily and atomically at least once.<br/>
                  int pos = base.runtextpos;
                  int matchStart = pos;
                  char ch;
-                   int alternation_branch = 0;
-                   int alternation_starting_capturepos = 0;
-                   int alternation_starting_pos = 0;
                  int capture_starting_pos = 0;
                  int loop_iteration = 0;
                  int stackpos = 0;
                  ReadOnlySpan<char> slice = inputSpan.Slice(pos);
                  
                  // 1st capture group.
-                   //{
+                   {
                      capture_starting_pos = pos;
                      
                      // Match the string "http".
                          return false; // The input didn't match.
                      }
                      
-                       // Match with 2 alternative expressions.
-                       //{
-                           alternation_starting_pos = pos;
-                           alternation_starting_capturepos = base.Crawlpos();
-                           
-                           // Branch 0
-                           //{
-                               
-                               alternation_branch = 0;
-                               pos += 4;
-                               slice = inputSpan.Slice(pos);
-                               goto AlternationMatch;
-                               
-                               AlternationBranch:
-                               pos = alternation_starting_pos;
-                               slice = inputSpan.Slice(pos);
-                               UncaptureUntil(alternation_starting_capturepos);
-                           //}
-                           
-                           // Branch 1
-                           //{
-                               // Match 's'.
-                               if ((uint)slice.Length < 5 || slice[4] != 's')
-                               {
-                                   UncaptureUntil(0);
-                                   return false; // The input didn't match.
-                               }
-                               
-                               alternation_branch = 1;
-                               pos += 5;
-                               slice = inputSpan.Slice(pos);
-                               goto AlternationMatch;
-                           //}
-                           
-                           AlternationBacktrack:
-                           if (Utilities.s_hasTimeout)
+                       // Match 's' atomically, optionally.
+                       {
+                           if ((uint)slice.Length > (uint)4 && slice[4] == 's')
                          {
-                               base.CheckTimeout();
+                               slice = slice.Slice(1);
+                               pos++;
                          }
-                           
-                           switch (alternation_branch)
-                           {
-                               case 0:
-                                   goto AlternationBranch;
-                               case 1:
-                                   UncaptureUntil(0);
-                                   return false; // The input didn't match.
-                           }
-                           
-                           AlternationMatch:;
-                       //}
+                       }
                      
+                       pos += 4;
+                       slice = inputSpan.Slice(pos);
                      base.Capture(1, capture_starting_pos, pos);
-                       
-                       goto CaptureSkipBacktrack;
-                       
-                       CaptureBacktrack:
-                       goto AlternationBacktrack;
-                       
-                       CaptureSkipBacktrack:;
-                   //}
+                   }
                  
                  // Match the string "://".
                  if (!slice.StartsWith("://"))
                  {
-                       goto CaptureBacktrack;
+                       UncaptureUntil(0);
+                       return false; // The input didn't match.
                  }
                  
                  // Match a character in the set [\-_\w] atomically at least once.
                      
                      if (iteration == 0)
                      {
-                           goto CaptureBacktrack;
+                           UncaptureUntil(0);
+                           return false; // The input didn't match.
                      }
                      
                      slice = slice.Slice(iteration);
                      if (--loop_iteration < 0)
                      {
                          // Unable to match the remainder of the expression after exhausting the loop.
-                           goto CaptureBacktrack;
+                           UncaptureUntil(0);
+                           return false; // The input didn't match.
                      }
                      pos = base.runstack![--stackpos];
                      UncaptureUntil(base.runstack![--stackpos]);
                      if (loop_iteration == 0)
                      {
                          // All possible iterations have matched, but it's below the required minimum of 1. Fail the loop.
-                           goto CaptureBacktrack;
+                           UncaptureUntil(0);
+                           return false; // The input didn't match.
                      }
                      
                      LoopEnd:
"(http|https):\\/\\/[\\w\\-_]+(\\.[\\w\\-_]+) ..." (479 uses)
[GeneratedRegex("(http|https):\\/\\/[\\w\\-_]+(\\.[\\w\\-_]+)+([\\w\\-\\.,/~\\+#]*)?")]
  /// <code>
  /// ○ 1st capture group.<br/>
  ///     ○ Match the string "http".<br/>
-   ///     ○ Match with 2 alternative expressions.<br/>
-   ///         ○ Match an empty string.<br/>
-   ///         ○ Match 's'.<br/>
+   ///     ○ Match 's' atomically, optionally.<br/>
  /// ○ Match the string "://".<br/>
  /// ○ Match a character in the set [\-_\w] atomically at least once.<br/>
  /// ○ Loop greedily at least once.<br/>
                  int pos = base.runtextpos;
                  int matchStart = pos;
                  char ch;
-                   int alternation_branch = 0;
-                   int alternation_starting_capturepos = 0;
-                   int alternation_starting_pos = 0;
                  int capture_starting_pos = 0;
                  int charloop_starting_pos = 0, charloop_ending_pos = 0;
                  int loop_iteration = 0;
                  ReadOnlySpan<char> slice = inputSpan.Slice(pos);
                  
                  // 1st capture group.
-                   //{
+                   {
                      capture_starting_pos = pos;
                      
                      // Match the string "http".
                          return false; // The input didn't match.
                      }
                      
-                       // Match with 2 alternative expressions.
-                       //{
-                           alternation_starting_pos = pos;
-                           alternation_starting_capturepos = base.Crawlpos();
-                           
-                           // Branch 0
-                           //{
-                               
-                               alternation_branch = 0;
-                               pos += 4;
-                               slice = inputSpan.Slice(pos);
-                               goto AlternationMatch;
-                               
-                               AlternationBranch:
-                               pos = alternation_starting_pos;
-                               slice = inputSpan.Slice(pos);
-                               UncaptureUntil(alternation_starting_capturepos);
-                           //}
-                           
-                           // Branch 1
-                           //{
-                               // Match 's'.
-                               if ((uint)slice.Length < 5 || slice[4] != 's')
-                               {
-                                   UncaptureUntil(0);
-                                   return false; // The input didn't match.
-                               }
-                               
-                               alternation_branch = 1;
-                               pos += 5;
-                               slice = inputSpan.Slice(pos);
-                               goto AlternationMatch;
-                           //}
-                           
-                           AlternationBacktrack:
-                           if (Utilities.s_hasTimeout)
+                       // Match 's' atomically, optionally.
+                       {
+                           if ((uint)slice.Length > (uint)4 && slice[4] == 's')
                          {
-                               base.CheckTimeout();
+                               slice = slice.Slice(1);
+                               pos++;
                          }
-                           
-                           switch (alternation_branch)
-                           {
-                               case 0:
-                                   goto AlternationBranch;
-                               case 1:
-                                   UncaptureUntil(0);
-                                   return false; // The input didn't match.
-                           }
-                           
-                           AlternationMatch:;
-                       //}
+                       }
                      
+                       pos += 4;
+                       slice = inputSpan.Slice(pos);
                      base.Capture(1, capture_starting_pos, pos);
-                       
-                       goto CaptureSkipBacktrack;
-                       
-                       CaptureBacktrack:
-                       goto AlternationBacktrack;
-                       
-                       CaptureSkipBacktrack:;
-                   //}
+                   }
                  
                  // Match the string "://".
                  if (!slice.StartsWith("://"))
                  {
-                       goto CaptureBacktrack;
+                       UncaptureUntil(0);
+                       return false; // The input didn't match.
                  }
                  
                  // Match a character in the set [\-_\w] atomically at least once.
                      
                      if (iteration == 0)
                      {
-                           goto CaptureBacktrack;
+                           UncaptureUntil(0);
+                           return false; // The input didn't match.
                      }
                      
                      slice = slice.Slice(iteration);
                          base.Capture(2, capture_starting_pos1, pos);
                          
                          Utilities.StackPush(ref base.runstack!, ref stackpos, capture_starting_pos1);
-                           goto CaptureSkipBacktrack1;
+                           goto CaptureSkipBacktrack;
                          
-                           CaptureBacktrack1:
+                           CaptureBacktrack:
                          capture_starting_pos1 = base.runstack![--stackpos];
                          goto CharLoopBacktrack;
                          
-                           CaptureSkipBacktrack1:;
+                           CaptureSkipBacktrack:;
                      //}
                      
                      
                      if (--loop_iteration < 0)
                      {
                          // Unable to match the remainder of the expression after exhausting the loop.
-                           goto CaptureBacktrack;
+                           UncaptureUntil(0);
+                           return false; // The input didn't match.
                      }
                      pos = base.runstack![--stackpos];
                      UncaptureUntil(base.runstack![--stackpos]);
                      if (loop_iteration == 0)
                      {
                          // No iterations have been matched to backtrack into. Fail the loop.
-                           goto CaptureBacktrack;
+                           UncaptureUntil(0);
+                           return false; // The input didn't match.
                      }
                      
                      goto LoopEnd;
                      if (loop_iteration == 0)
                      {
                          // No iterations of the loop remain to backtrack into. Fail the loop.
-                           goto CaptureBacktrack;
+                           UncaptureUntil(0);
+                           return false; // The input didn't match.
                      }
-                       goto CaptureBacktrack1;
+                       goto CaptureBacktrack;
                      LoopEnd:;
                  //}
"(http|https):\\/\\/[\\w\\-_]+(\\.[\\w\\-_]+) ..." (479 uses)
[GeneratedRegex("(http|https):\\/\\/[\\w\\-_]+(\\.[\\w\\-_]+)+([\\w\\-\\.,/~\\+#]*)?/")]
  /// <code>
  /// ○ 1st capture group.<br/>
  ///     ○ Match the string "http".<br/>
-   ///     ○ Match with 2 alternative expressions.<br/>
-   ///         ○ Match an empty string.<br/>
-   ///         ○ Match 's'.<br/>
+   ///     ○ Match 's' atomically, optionally.<br/>
  /// ○ Match the string "://".<br/>
  /// ○ Match a character in the set [\-_\w] atomically at least once.<br/>
  /// ○ Loop greedily at least once.<br/>
                  int pos = base.runtextpos;
                  int matchStart = pos;
                  char ch;
-                   int alternation_branch = 0;
-                   int alternation_starting_capturepos = 0;
-                   int alternation_starting_pos = 0;
                  int capture_starting_pos = 0;
                  int charloop_starting_pos = 0, charloop_ending_pos = 0;
                  int charloop_starting_pos1 = 0, charloop_ending_pos1 = 0;
                  ReadOnlySpan<char> slice = inputSpan.Slice(pos);
                  
                  // 1st capture group.
-                   //{
+                   {
                      capture_starting_pos = pos;
                      
                      // Match the string "http".
                          return false; // The input didn't match.
                      }
                      
-                       // Match with 2 alternative expressions.
-                       //{
-                           alternation_starting_pos = pos;
-                           alternation_starting_capturepos = base.Crawlpos();
-                           
-                           // Branch 0
-                           //{
-                               
-                               alternation_branch = 0;
-                               pos += 4;
-                               slice = inputSpan.Slice(pos);
-                               goto AlternationMatch;
-                               
-                               AlternationBranch:
-                               pos = alternation_starting_pos;
-                               slice = inputSpan.Slice(pos);
-                               UncaptureUntil(alternation_starting_capturepos);
-                           //}
-                           
-                           // Branch 1
-                           //{
-                               // Match 's'.
-                               if ((uint)slice.Length < 5 || slice[4] != 's')
-                               {
-                                   UncaptureUntil(0);
-                                   return false; // The input didn't match.
-                               }
-                               
-                               alternation_branch = 1;
-                               pos += 5;
-                               slice = inputSpan.Slice(pos);
-                               goto AlternationMatch;
-                           //}
-                           
-                           AlternationBacktrack:
-                           if (Utilities.s_hasTimeout)
+                       // Match 's' atomically, optionally.
+                       {
+                           if ((uint)slice.Length > (uint)4 && slice[4] == 's')
                          {
-                               base.CheckTimeout();
+                               slice = slice.Slice(1);
+                               pos++;
                          }
-                           
-                           switch (alternation_branch)
-                           {
-                               case 0:
-                                   goto AlternationBranch;
-                               case 1:
-                                   UncaptureUntil(0);
-                                   return false; // The input didn't match.
-                           }
-                           
-                           AlternationMatch:;
-                       //}
+                       }
                      
+                       pos += 4;
+                       slice = inputSpan.Slice(pos);
                      base.Capture(1, capture_starting_pos, pos);
-                       
-                       goto CaptureSkipBacktrack;
-                       
-                       CaptureBacktrack:
-                       goto AlternationBacktrack;
-                       
-                       CaptureSkipBacktrack:;
-                   //}
+                   }
                  
                  // Match the string "://".
                  if (!slice.StartsWith("://"))
                  {
-                       goto CaptureBacktrack;
+                       UncaptureUntil(0);
+                       return false; // The input didn't match.
                  }
                  
                  // Match a character in the set [\-_\w] atomically at least once.
                      
                      if (iteration == 0)
                      {
-                           goto CaptureBacktrack;
+                           UncaptureUntil(0);
+                           return false; // The input didn't match.
                      }
                      
                      slice = slice.Slice(iteration);
                          base.Capture(2, capture_starting_pos1, pos);
                          
                          Utilities.StackPush(ref base.runstack!, ref stackpos, capture_starting_pos1);
-                           goto CaptureSkipBacktrack1;
+                           goto CaptureSkipBacktrack;
                          
-                           CaptureBacktrack1:
+                           CaptureBacktrack:
                          capture_starting_pos1 = base.runstack![--stackpos];
                          goto CharLoopBacktrack;
                          
-                           CaptureSkipBacktrack1:;
+                           CaptureSkipBacktrack:;
                      //}
                      
                      
                      if (--loop_iteration < 0)
                      {
                          // Unable to match the remainder of the expression after exhausting the loop.
-                           goto CaptureBacktrack;
+                           UncaptureUntil(0);
+                           return false; // The input didn't match.
                      }
                      pos = base.runstack![--stackpos];
                      UncaptureUntil(base.runstack![--stackpos]);
                      if (loop_iteration == 0)
                      {
                          // No iterations have been matched to backtrack into. Fail the loop.
-                           goto CaptureBacktrack;
+                           UncaptureUntil(0);
+                           return false; // The input didn't match.
                      }
                      
                      goto LoopEnd;
                      if (loop_iteration == 0)
                      {
                          // No iterations of the loop remain to backtrack into. Fail the loop.
-                           goto CaptureBacktrack;
+                           UncaptureUntil(0);
+                           return false; // The input didn't match.
                      }
-                       goto CaptureBacktrack1;
+                       goto CaptureBacktrack;
                      LoopEnd:;
                  //}
                  
                          base.Capture(3, capture_starting_pos2, pos);
                          
                          Utilities.StackPush(ref base.runstack!, ref stackpos, capture_starting_pos2);
-                           goto CaptureSkipBacktrack2;
+                           goto CaptureSkipBacktrack1;
                          
-                           CaptureBacktrack2:
+                           CaptureBacktrack1:
                          capture_starting_pos2 = base.runstack![--stackpos];
                          goto CharLoopBacktrack1;
                          
-                           CaptureSkipBacktrack2:;
+                           CaptureSkipBacktrack1:;
                      //}
                      
                      
                          // No iterations of the loop remain to backtrack into. Fail the loop.
                          goto LoopBacktrack;
                      }
-                       goto CaptureBacktrack2;
+                       goto CaptureBacktrack1;
                      LoopEnd1:;
                  //}
"^ps_(?<major>1|2|3|4|5)_(?<minor>0|1|)$" (308 uses)
[GeneratedRegex("^ps_(?<major>1|2|3|4|5)_(?<minor>0|1|)$")]
  ///     ○ Match a character in the set [1-5].<br/>
  /// ○ Match '_'.<br/>
  /// ○ "minor" capture group.<br/>
-   ///     ○ Match with 2 alternative expressions.<br/>
-   ///         ○ Match a character in the set [01].<br/>
-   ///         ○ Match an empty string.<br/>
+   ///     ○ Match a character in the set [01] atomically, optionally.<br/>
  /// ○ Match if at the end of the string or if before an ending newline.<br/>
  /// </code>
  /// </remarks>
              {
                  int pos = base.runtextpos;
                  int matchStart = pos;
-                   int alternation_branch = 0;
-                   int alternation_starting_capturepos = 0;
-                   int alternation_starting_pos = 0;
                  int capture_starting_pos = 0;
                  int capture_starting_pos1 = 0;
                  ReadOnlySpan<char> slice = inputSpan.Slice(pos);
                  }
                  
                  // "minor" capture group.
-                   //{
+                   {
                      pos++;
                      slice = inputSpan.Slice(pos);
                      capture_starting_pos1 = pos;
                      
-                       // Match with 2 alternative expressions.
-                       //{
-                           alternation_starting_pos = pos;
-                           alternation_starting_capturepos = base.Crawlpos();
-                           
-                           // Branch 0
-                           //{
-                               // Match a character in the set [01].
-                               if (slice.IsEmpty || !char.IsBetween(slice[0], '0', '1'))
-                               {
-                                   goto AlternationBranch;
-                               }
-                               
-                               alternation_branch = 0;
+                       // Match a character in the set [01] atomically, optionally.
+                       {
+                           if (!slice.IsEmpty && char.IsBetween(slice[0], '0', '1'))
+                           {
+                               slice = slice.Slice(1);
                              pos++;
-                               slice = inputSpan.Slice(pos);
-                               goto AlternationMatch;
-                               
-                               AlternationBranch:
-                               pos = alternation_starting_pos;
-                               slice = inputSpan.Slice(pos);
-                               UncaptureUntil(alternation_starting_capturepos);
-                           //}
-                           
-                           // Branch 1
-                           //{
-                               
-                               alternation_branch = 1;
-                               goto AlternationMatch;
-                           //}
-                           
-                           AlternationBacktrack:
-                           if (Utilities.s_hasTimeout)
-                           {
-                               base.CheckTimeout();
                          }
-                           
-                           switch (alternation_branch)
-                           {
-                               case 0:
-                                   goto AlternationBranch;
-                               case 1:
-                                   UncaptureUntil(0);
-                                   return false; // The input didn't match.
-                           }
-                           
-                           AlternationMatch:;
-                       //}
+                       }
                      
                      base.Capture(2, capture_starting_pos1, pos);
-                       
-                       goto CaptureSkipBacktrack;
-                       
-                       CaptureBacktrack:
-                       goto AlternationBacktrack;
-                       
-                       CaptureSkipBacktrack:;
-                   //}
+                   }
                  
                  // Match if at the end of the string or if before an ending newline.
                  if (pos < inputSpan.Length - 1 || ((uint)pos < (uint)inputSpan.Length && inputSpan[pos] != '\n'))
                  {
-                       goto CaptureBacktrack;
+                       UncaptureUntil(0);
+                       return false; // The input didn't match.
                  }
                  
                  // The input matched.

For more diff examples, see https://gist.github.com/MihuBot/4cd3bca9db93aac0d58e2198755d7011

Sample source code for further analysis
const string JsonPath = "RegexResults-1310.json";
if (!File.Exists(JsonPath))
{
    await using var archiveStream = await new HttpClient().GetStreamAsync("https://mihubot.xyz/r/E2uv-WNA");
    using var archive = new ZipArchive(archiveStream, ZipArchiveMode.Read);
    archive.Entries.First(e => e.Name == "Results.json").ExtractToFile(JsonPath);
}

using FileStream jsonFileStream = File.OpenRead(JsonPath);
RegexEntry[] entries = JsonSerializer.Deserialize<RegexEntry[]>(jsonFileStream, new JsonSerializerOptions { IncludeFields = true })!;
Console.WriteLine($"Working with {entries.Length} patterns");



record KnownPattern(string Pattern, RegexOptions Options, int Count);

sealed class RegexEntry
{
    public required KnownPattern Regex { get; set; }
    public required string MainSource { get; set; }
    public required string PrSource { get; set; }
    public string? FullDiff { get; set; }
    public string? ShortDiff { get; set; }
    public (string Name, string Values)[]? SearchValuesOfChar { get; set; }
    public (string[] Values, StringComparison ComparisonType)[]? SearchValuesOfString { get; set; }
}

Artifacts:

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions