Optimize integer for loop code gen #13573

albert-du · 2022-07-26T23:41:37Z

Optimizes for i in n .. step .. m do il code gen to avoid allocation.

Throws ArgumentException at runtime if step is zero.

Replicates logic of OperatorIntrinsics.RangeInt32 with additional optimizations if the step is a non-zero compiled constant.

Example:

let f n step m =
    for i in n..step..m do
        System.Console.WriteLine()

Before:

.method public static 
    void f (
        int32 n,
        int32 step,
        int32 m
    ) cil managed 
{
    .custom instance void [FSharp.Core]Microsoft.FSharp.Core.CompilationArgumentCountsAttribute::.ctor(int32[]) = (
        01 00 03 00 00 00 01 00 00 00 01 00 00 00 01 00
        00 00 00 00
    )
    // Method begins at RVA 0x2050
    // Code size 59 (0x3b)
    .maxstack 5
    .locals init (
        [0] class [System.Runtime]System.Collections.Generic.IEnumerable`1<int32>,
        [1] class [System.Runtime]System.Collections.Generic.IEnumerator`1<int32>,
        [2] int32,
        [3] class [System.Runtime]System.IDisposable
    )

    IL_0000: ldarg.0
    IL_0001: ldarg.1
    IL_0002: ldarg.2
    IL_0003: call class [System.Runtime]System.Collections.Generic.IEnumerable`1<int32> [FSharp.Core]Microsoft.FSharp.Core.Operators/OperatorIntrinsics::RangeInt32(int32, int32, int32)
    IL_0008: stloc.0
    IL_0009: ldloc.0
    IL_000a: callvirt instance class [System.Runtime]System.Collections.Generic.IEnumerator`1<!0> class [System.Runtime]System.Collections.Generic.IEnumerable`1<int32>::GetEnumerator()
    IL_000f: stloc.1
    .try
    {
        // loop start (head: IL_0010)
            IL_0010: ldloc.1
            IL_0011: callvirt instance bool [System.Runtime]System.Collections.IEnumerator::MoveNext()
            IL_0016: brfalse.s IL_0026

            IL_0018: ldloc.1
            IL_0019: callvirt instance !0 class [System.Runtime]System.Collections.Generic.IEnumerator`1<int32>::get_Current()
            IL_001e: stloc.2
            IL_001f: call void [System.Console]System.Console::WriteLine()
            IL_0024: br.s IL_0010
        // end loop

        IL_0026: leave.s IL_003a
    } // end .try
    finally
    {
        IL_0028: ldloc.1
        IL_0029: isinst [System.Runtime]System.IDisposable
        IL_002e: stloc.3
        IL_002f: ldloc.3
        IL_0030: brfalse.s IL_0039

        IL_0032: ldloc.3
        IL_0033: callvirt instance void [System.Runtime]System.IDisposable::Dispose()
        IL_0038: endfinally

        IL_0039: endfinally
    } // end handler

    IL_003a: ret
} // end of method _::f

public static void f(int n, int step, int m)
{
    IEnumerable<int> enumerable = Operators.OperatorIntrinsics.RangeInt32(n, step, m);
    IEnumerator<int> enumerator = enumerable.GetEnumerator();
    try
    {
        while (enumerator.MoveNext())
        {
            int current = enumerator.Current;
            Console.WriteLine();
        }
    }
    finally
    {
        IDisposable disposable = enumerator as IDisposable;
        if (disposable != null)
        {
            disposable.Dispose();
        }
    }
}

After:

.method public static 
    void f (
        int32 n,
        int32 step,
        int32 m
    ) cil managed 
{
    .custom instance void [FSharp.Core]Microsoft.FSharp.Core.CompilationArgumentCountsAttribute::.ctor(int32[]) = (
        01 00 03 00 00 00 01 00 00 00 01 00 00 00 01 00
        00 00 00 00
    )
    // Method begins at RVA 0x2050
    // Code size 53 (0x35)
    .maxstack 4
    .locals init (
        [0] int32,
        [1] int32,
        [2] int32
    )

    IL_0000: ldarg.0
    IL_0001: stloc.2
    IL_0002: ldarg.1
    IL_0003: stloc.1
    IL_0004: ldloc.1
    IL_0005: brtrue.s IL_0017

    IL_0007: ldstr "The step of a range cannot be zero."
    IL_000c: ldstr "step"
    IL_0011: newobj instance void [mscorlib]System.ArgumentException::.ctor(string, string)
    IL_0016: throw

    IL_0017: ldarg.2
    IL_0018: stloc.0
    IL_0019: br.s IL_0024
    // loop start (head: IL_0024)
        IL_001b: call void [mscorlib]System.Console::WriteLine()
        IL_0020: ldloc.2
        IL_0021: ldloc.1
        IL_0022: add
        IL_0023: stloc.2

        IL_0024: ldloc.2
        IL_0025: ldloc.0
        IL_0026: ble.s IL_002c

        IL_0028: ldloc.1
        IL_0029: ldc.i4.0
        IL_002a: bgt.s IL_0034

        IL_002c: ldloc.2
        IL_002d: ldloc.0
        IL_002e: bge.s IL_001b

        IL_0030: ldloc.1
        IL_0031: ldc.i4.0
        IL_0032: bge.s IL_001b
    // end loop

    IL_0034: ret
}

public static void f(int n, int step, int m)
{
    int num = n;
    if (step == 0)
    {
        throw new ArgumentException("The step of a range cannot be zero.", "step");
    }
    while ((num <= m || step <= 0) && (num >= m || step >= 0))
    {
        Console.WriteLine();
        num += step;
    }
}

Constant step optimization:

let f n m =
    for i in n..2..m do
        System.Console.WriteLine()

.method public static 
        void f (
            int32 n,
            int32 m
        ) cil managed 
    {
        .custom instance void [FSharp.Core]Microsoft.FSharp.Core.CompilationArgumentCountsAttribute::.ctor(int32[]) = (
            01 00 02 00 00 00 01 00 00 00 01 00 00 00 00 00
        )
        // Method begins at RVA 0x2050
        // Code size 24 (0x18)
        .maxstack 4
        .locals init (
            [0] int32,
            [1] int32
        )

        IL_0000: ldarg.0
        IL_0001: stloc.1
        IL_0002: ldarg.1
        IL_0003: stloc.0
        IL_0004: br.s IL_000f
        // loop start (head: IL_000f)
            IL_0006: call void [mscorlib]System.Console::WriteLine()
            IL_000b: ldloc.1
            IL_000c: ldc.i4.2
            IL_000d: add
            IL_000e: stloc.1

            IL_000f: ldloc.1
            IL_0010: ldarg.0
            IL_0011: blt.s IL_0017

            IL_0013: ldloc.1
            IL_0014: ldloc.0
            IL_0015: ble.s IL_0006
        // end loop

        IL_0017: ret
    }

public static void f(int n, int m)
{
    int num = n;
    while (num >= n && num <= m)
    {
        Console.WriteLine();
        num += 2;
    }
}

Benchmarks

Before:

Method	Start	Finish	Step	Mean	Error	StdDev	Code Size
VariableStep	100	1000	10	540.4 ns	12.26 ns	35.76 ns	465 B
ConstantStep	100	1000	10	528.2 ns	12.40 ns	36.58 ns	466 B
VariableStep	100	1000000	10	566,350.1 ns	13,115.83 ns	38,259.51 ns	465 B
ConstantStep	100	1000000	10	618,593.6 ns	16,760.01 ns	47,271.89 ns	466 B

After:

Method	Start	Finish	Step	Mean	Error	StdDev	Code Size
VariableStep	100	1000	10	108.83 ns	2.757 ns	7.775 ns	159 B
ConstantStep	100	1000	10	57.85 ns	1.701 ns	4.962 ns	60 B
VariableStep	100	1000000	10	100,057.40 ns	2,554.928 ns	7,452.844 ns	159 B
ConstantStep	100	1000000	10	33,615.63 ns	943.374 ns	2,766.752 ns	60 B

79.9% to 94.6% faster, while smaller and without allocation.

vzarytovskii · 2022-07-27T09:49:57Z

Nice. Can I please ask you to add some benchmarks, so we understand what are implications in runtime here?

Comparisons - before/after, comparison with normal loop, etc.

Also:

Throws ArgumentException at runtime if step is zero.

What is current behaviour? Also, it should produce warning/error if step is constant and known at compile time.

albert-du · 2022-07-28T06:22:07Z

@vzarytovskii I added benchmark results to the original pr message.

Throwing an error at runtime is a behavior replicated from the RangeInt32 enumerator type that backed the .. .. syntax. sharplab example I can look into displaying a warning/error if the step is zero however it might be better to emit such a message for any usage of an integer range step not just in a for loop.

vzarytovskii · 2022-07-28T16:47:35Z

@vzarytovskii I added benchmark results to the original pr message.

Thanks!

Throwing an error at runtime is a behavior replicated from the RangeInt32 enumerator type that backed the .. .. syntax. sharplab example I can look into displaying a warning/error if the step is zero however it might be better to emit such a message for any usage of an integer range step not just in a for loop.

Yep, agree, should be done separately for all such cases.

* adds new active pattern to match int32 range step for loops * existing Expr structure changed, no longer backed by an enumerator * updates test, existing check changed to int64 instead of int32

* even smaller and faster generated il code * ran fantomas

albert-du · 2022-07-28T23:52:14Z

I've made further improvements to the codegen that results in smaller and faster il. Disassembled il, c#, and benchmarks updated.

albert-du · 2022-07-29T16:50:00Z

this is ready

abelbraaksma · 2022-08-03T23:46:52Z

79.8% faster, while smaller and without allocation.

Wow! I noticed you showed timings for a loop of 90x, did you see similar behavior with larger loops? Did you compare when there is slightly more code in the body, or some reference type so that JIT cannot optimize everything away? Just curious! These timings are amazing!

vzarytovskii

This LGTM, need one more approval @KevinRansom @dsyme @0101 @psfinaki

dsyme · 2022-08-17T10:28:36Z

@albert-du This is fantastic work

We need some more testing

We have to test loops near Int32.MaxValue. These can be really difficult to get right since adding the step causes the value to wrap around. It's quite possible what you have will have problems for these loops - this is tha major reason why we haven't optimized more loops previously.
We have to test tasks containing these loops, since you've made changes in the resumable code state machines.
Please make additions to tests\walkthroughs\DebugStepping\TheBigFileOfDebugStepping.fsx, then compile that file (add a project file to that directory if you like and add it to the solution) and test the stepping and breakpoints for these loops

dsyme

See comments above

albert-du · 2022-08-22T20:29:47Z

@dsyme I added over/underflow checking to the constant step optimized loops, variable step loops don't seem to suffer from this as the logic was completely lifted from RangeInt32. Tests for under/overflow checking and tasks added. Debug stepping for loops looks good. Original comment updated with newer code gen for constant step loops.

albert-du · 2022-08-23T05:12:14Z

benchmarks updated

dsyme

Some further changes are going to be needed here to move the optimization out of checking and into Optimizer.fs or similar, to avoid additions to the quotation API and changes to quotation formats.

This is a little painful as the loop lowering code is placed most naturally in CheckExpressions.

Do you feel able to try the changes with guidance? I'd be happy to help you with them

dsyme · 2022-08-23T14:20:52Z

src/Compiler/Checking/QuotationTranslator.fs

            | FSharpForLoopUp -> QP.mkIntegerForLoop(ConvExpr cenv env lim0, ConvExpr cenv env lim1, ConvExpr cenv env body)
            | _ -> wfail(Error(FSComp.SR.crefQuotationsCantContainDescendingForLoops(), m))

+        | TOp.IntegerForLoop (_, _, _), [], [Expr.Lambda (_, _, _, [_], lim0, _, _);Expr.Lambda (_, _, _, [_], lim1, _, _);body; Expr.Lambda (_, _, _, [_], step, _, _)] ->


I'm concerned by the addition of these new quotation constructs, see above

dsyme · 2022-08-23T14:21:17Z

src/FSharp.Core/quotations.fsi

+    ///
+    /// <example-tbd></example-tbd>
+    [<CompiledName("ForIntegerRangeLoopWithStepPattern")>]
+    val (|ForIntegerRangeLoopWithStep|_|): input: Expr -> (Var * Expr * Expr * Expr * Expr) option


Optimizations shouldn't really cause additions to the quotation API. If we were to add something, we would need to add a corresponding quotation construction method, e.g. similar to

fsharp/src/FSharp.Core/quotations.fs

Line 2632 in 9978a14

static member ForIntegerRangeLoop(loopVariable, start: Expr, endExpr: Expr, body: Expr) =

dsyme · 2022-08-23T14:24:50Z

src/Compiler/Checking/CheckExpressions.fs

+
+        // optimize 'for i in n .. step .. m do'
+        | Expr.App(Expr.Val(vref, _, _), _, [ tyarg; stepTyarg ], [ startExpr; stepExpr; finishExpr ], _)
+            when valRefEq g vref g.range_step_op_vref && typeEquiv g tyarg g.int_ty && typeEquiv g stepTyarg g.int_ty ->


I think we may need to guard this via a language version switch or move this to an optimization phase (though that is painful). It changes the form of quotations for these constructs which is a breaking change.

Quoting these loops is rare but it's also more philosophical: we shouldn't be adding any optimizations which change the quoted form or require new additions to the quotations API. So optimziations should be in later phases.

albert-du · 2022-08-24T03:43:55Z

@dsyme I think I'd like to try making those changes, it'll be a good opportunity for me to understand more of the compiler

…fsharp into for-loop-optimization

charlesroddie · 2022-12-28T14:26:35Z

That's 400% to 1740% faster!

abelbraaksma · 2022-12-29T01:04:28Z

@albert-du please don’t let this go stale, it’s amazing work! 💯 🥇 If it gets behind ‘main’ for too long, the conflicts may become hard to solve. If you need some help with seeing this through, feel free to ping me, or anybody, on F# Slack.

albert-du · 2022-12-30T02:45:40Z

@abelbraaksma I've been busy with school the past few months but I fully intend to work more on this soon. Thank you so much for your support!

psfinaki · 2024-05-16T13:57:03Z

This was subsumed by @brianrourkeboll in his PR, closing this one.

albert-du added 3 commits July 26, 2022 16:14

Integer n .. step .. m for loop optimization

8b099b6

run fantomas

02bd0f9

Merge branch 'main' into for-loop-optimization

129e88a

add benchmark

86de1fd

albert-du added 3 commits July 28, 2022 14:09

update quotations

96fed0e

* adds new active pattern to match int32 range step for loops * existing Expr structure changed, no longer backed by an enumerator * updates test, existing check changed to int64 instead of int32

improve il generation

5b76d57

* even smaller and faster generated il code * ran fantomas

Update SurfaceArea.fs

d771c5b

baseline tests

8889595

vzarytovskii requested review from KevinRansom and dsyme July 31, 2022 17:10

vzarytovskii previously approved these changes Aug 8, 2022

View reviewed changes

dsyme requested changes Aug 17, 2022

View reviewed changes

albert-du force-pushed the for-loop-optimization branch from 58dd212 to 65e6553 Compare August 21, 2022 18:02

albert-du added 6 commits August 21, 2022 11:03

test near integer max

65e6553

fix integer overflow with constant step for loops

aa39f2b

update baselines

606e15d

test for loops in task

c1c6bc4

Update TheBigFileOfDebugStepping.fsx

e1dc3c7

Merge branch 'main' into for-loop-optimization

2afc973

albert-du requested review from dsyme and removed request for KevinRansom August 22, 2022 20:29

new benchmarks

0b93643

dsyme requested changes Aug 23, 2022

View reviewed changes

albert-du added 4 commits August 24, 2022 13:18

move optimizations out of type checking

5d4441d

Merge branch 'main' into for-loop-optimization

90154f1

formatting

76f22df

Merge branch 'for-loop-optimization' of https://github.com/albert-du/…

eb206ff

…fsharp into for-loop-optimization

vzarytovskii dismissed their stale review via eb206ff October 5, 2022 16:11

vzarytovskii marked this pull request as draft August 25, 2023 10:31

vzarytovskii self-assigned this Nov 13, 2023

vzarytovskii removed their assignment Nov 22, 2023

brianrourkeboll mentioned this pull request Feb 20, 2024

Better integral range lowering: start..finish, start..step..finish #16650

Merged

3 tasks

psfinaki closed this May 16, 2024

brianrourkeboll mentioned this pull request May 26, 2024

Integral range optimizations in resumable code computation expressions #17253

Open

Optimize integer for loop code gen #13573

Optimize integer for loop code gen #13573

Conversation

albert-du commented Jul 26, 2022 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Benchmarks

Uh oh!

vzarytovskii commented Jul 27, 2022

Uh oh!

albert-du commented Jul 28, 2022

Uh oh!

vzarytovskii commented Jul 28, 2022

Uh oh!

albert-du commented Jul 28, 2022 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

albert-du commented Jul 29, 2022

Uh oh!

abelbraaksma commented Aug 3, 2022

Uh oh!

vzarytovskii left a comment

Choose a reason for hiding this comment

Uh oh!

dsyme commented Aug 17, 2022

Uh oh!

dsyme left a comment

Choose a reason for hiding this comment

Uh oh!

albert-du commented Aug 22, 2022 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

albert-du commented Aug 23, 2022

Uh oh!

dsyme left a comment

Choose a reason for hiding this comment

Uh oh!

dsyme Aug 23, 2022

Choose a reason for hiding this comment

Uh oh!

dsyme Aug 23, 2022

Choose a reason for hiding this comment

Uh oh!

dsyme Aug 23, 2022

Choose a reason for hiding this comment

Uh oh!

albert-du commented Aug 24, 2022

Uh oh!

charlesroddie commented Dec 28, 2022

Uh oh!

abelbraaksma commented Dec 29, 2022 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

albert-du commented Dec 30, 2022

Uh oh!

psfinaki commented May 16, 2024

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

6 participants

albert-du commented Jul 26, 2022 •

edited

Loading

albert-du commented Jul 28, 2022 •

edited

Loading

albert-du commented Aug 22, 2022 •

edited

Loading

abelbraaksma commented Dec 29, 2022 •

edited

Loading