Skip to content

Future::and_then and other adapters are not always zero cost #37930

@japaric

Description

@japaric

I expected these two code snippets to produce the same (or very similar) code
when fully optimized (release+LTO) but they don't:

let tx = busy_wait(tx.write(4));
busy_wait(tx.write(2));
busy_wait(tx.write(4).and_then(|tx| tx.write(2)));

The first is panic!-free but the second is not; it still contains calls to
panic!("cannot poll a chained future twice") even though it never polls either
future after it has yielded its value.

STR

// src/main.rs
#![feature(lang_items)]
#![feature(never_type)]
#![no_main]
#![no_std]

extern crate futures;

use core::{fmt, ptr};

use futures::{Async, Future};

// entry point
#[no_mangle]
pub fn _start() -> ! {
    let tx = Tx { _0: () };

    busy_wait(tx.write(4).and_then(|tx| tx.write(2)));

    loop {}
}

struct Tx {
    _0: (),
}

impl Tx {
    fn write(self, byte: u8) -> Write {
        Write {
            tx: Some(self),
            byte: byte,
        }
    }
}

struct Write {
    byte: u8,
    tx: Option<Tx>,
}

impl Future for Write {
    type Error = !;
    type Item = Tx;

    fn poll(&mut self) -> Result<Async<Tx>, !> {
        unsafe {
            let tx = self.tx.take().expect("cannot poll `write` twice");

            // NOTE `0x0` and `0x4` emulate memory mapped IO registers
            // Can we send data yet?
            if ptr::read_volatile(0x0 as *const bool) {
                // Send one byte of data
                ptr::write_volatile(0x4 as *mut u8, self.byte);
                Ok(Async::Ready(tx))
            } else {
                self.tx = Some(tx);
                Ok(Async::NotReady)
            }
        }
    }
}

fn busy_wait<F>(mut f: F) -> F::Item
    where F: Future<Error = !>
{
    loop {
        if let Ok(Async::Ready(t)) = f.poll() {
            return t;
        }
    }
}

#[lang = "panic_fmt"]
extern "C" fn panic_fmt(_fmt: fmt::Arguments,
                        _file: &'static str,
                        _line: u32)
                        -> ! {
    // HACK to keep the `_file` string in the final binary
    unsafe { ptr::write_volatile(0x8 as *mut _, _file) }
    loop {}
}
$ head -n7 Cargo.toml
[dependencies.futures]
default-features = false
version = "0.1.3"

[profile.release]
lto = true
panic = "abort"

$ cargo rustc --release --verbose -- -C link-arg=-nostartfiles
$ objdump -Cd target/release/foo
00000000000002c0 <_start>:
 2c0:   b0 04                   mov    $0x4,%al
 2c2:   31 c9                   xor    %ecx,%ecx
 2c4:   66 66 66 2e 0f 1f 84    data16 data16 nopw %cs:0x0(%rax,%rax,1)
 2cb:   00 00 00 00 00
 2d0:   80 f9 01                cmp    $0x1,%cl
 2d3:   74 3b                   je     310 <_start+0x50>
 2d5:   66 66 2e 0f 1f 84 00    data16 nopw %cs:0x0(%rax,%rax,1)
 2dc:   00 00 00 00
 2e0:   f6 04 25 00 00 00 00    testb  $0x1,0x0
 2e7:   01
 2e8:   74 f6                   je     2e0 <_start+0x20>
 2ea:   88 04 25 04 00 00 00    mov    %al,0x4
 2f1:   84 c9                   test   %cl,%cl
 2f3:   75 3d                   jne    332 <_start+0x72>
 2f5:   b1 01                   mov    $0x1,%cl
 2f7:   b0 02                   mov    $0x2,%al
 2f9:   f6 04 25 00 00 00 00    testb  $0x1,0x0
 300:   01
 301:   74 cd                   je     2d0 <_start+0x10>
 303:   c6 04 25 04 00 00 00    movb   $0x2,0x4
 30a:   02
 30b:   eb 23                   jmp    330 <_start+0x70>
 30d:   0f 1f 00                nopl   (%rax)
 310:   f6 04 25 00 00 00 00    testb  $0x1,0x0
 317:   01
 318:   74 f6                   je     310 <_start+0x50>
 31a:   88 04 25 04 00 00 00    mov    %al,0x4
 321:   66 66 66 66 66 66 2e    data16 data16 data16 data16 data16 nopw %cs:0x0(%rax,%rax,1)
 328:   0f 1f 84 00 00 00 00
 32f:   00
 330:   eb fe                   jmp    330 <_start+0x70>
 332:   50                      push   %rax
 333:   e8 38 00 00 00          callq  370 <core::panicking::panic::h194ce5d68a8f28a1>
 338:   0f 1f 84 00 00 00 00    nopl   0x0(%rax,%rax,1)
 33f:   00

0000000000000340 <core::panicking::panic_fmt::h561c5ee168a3d2cb>:
(..)
$ objcopy -O binary target/release/foo foo.bin

$ strings foo.bin
(..)
/home/japaric/.cargo/registry/src/github.202132.xyz-1ecc6299db9ec823/futures-0.1.3/src/chain.rs

If the program is changed to the no-and_then variant, the disassembly looks
like this:

$ objdump -Cd target/release/foo
0000000000000280 <_start>:
 280:   f6 04 25 00 00 00 00    testb  $0x1,0x0
 287:   01
 288:   74 f6                   je     280 <_start>
 28a:   c6 04 25 04 00 00 00    movb   $0x4,0x4
 291:   04
 292:   66 66 66 66 66 2e 0f    data16 data16 data16 data16 nopw %cs:0x0(%rax,%rax,1)
 299:   1f 84 00 00 00 00 00
 2a0:   f6 04 25 00 00 00 00    testb  $0x1,0x0
 2a7:   01
 2a8:   74 f6                   je     2a0 <_start+0x20>
 2aa:   c6 04 25 04 00 00 00    movb   $0x2,0x4
 2b1:   02
 2b2:   66 66 66 66 66 2e 0f    data16 data16 data16 data16 nopw %cs:0x0(%rax,%rax,1)
 2b9:   1f 84 00 00 00 00 00
 2c0:   eb fe                   jmp    2c0 <_start+0x40>

There are no panic_fmt calls in it.

Meta

$ rustc -V
rustc 1.15.0-nightly (43006fcea 2016-11-15)

I don't think this is a problem with the implementation of AndThen/Chain in
the futures crate because I have tried to re-implement Future in a few
different ways e.g. without error-handling, sprinkling #[inline] everywhere but
none of that helps.

Code like this:

busy_wait(futures::done(Ok(42)).and_then(|x| futures::done(Ok(x))));

optimizes the same as the "split" version and in that case LLVM evaluates the
expression at compile time and replaces it with 42. So LLVM can actually lower
and_then to panic! free code; it just doesn't optimize well this particular
case (perhaps, because of the volatile memory operations?)

join is another adapter that has the same issue.

cc @alexcrichton @eddyb

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-LLVMArea: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.A-codegenArea: Code generationC-enhancementCategory: An issue proposing an enhancement or a PR with one.C-optimizationCategory: An issue highlighting optimization opportunities or PRs implementing suchI-slowIssue: Problems and improvements with respect to performance of generated code.T-compilerRelevant to the compiler team, which will review and decide on the PR/issue.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions