From 4d9e261778960504cfc53b2d859186f601490cac Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Tue, 19 Aug 2025 16:20:06 -0700 Subject: [PATCH 01/36] chore: add placeholder tco feature --- crates/vm/Cargo.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/vm/Cargo.toml b/crates/vm/Cargo.toml index 55d2e16030..2ece9e246e 100644 --- a/crates/vm/Cargo.toml +++ b/crates/vm/Cargo.toml @@ -68,6 +68,9 @@ basic-memory = [] # turns on stark-backend debugger in all proofs stark-debug = [] test-utils = ["openvm-stark-sdk"] +# Tail call optimizations. This requires nightly for the `become` keyword (https://github.com/rust-lang/rust/pull/144232) which will likely be stabilized in Rust 1.90 +# However `become` may still lead to compiler panics instead of runtime panics, so `tco` will remain a separate feature. +tco = [] # performance features: mimalloc = ["openvm-stark-backend/mimalloc"] jemalloc = ["openvm-stark-backend/jemalloc"] From 13cc6f202f32f558df534b43c1922870542dbe40 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Tue, 19 Aug 2025 21:41:15 -0700 Subject: [PATCH 02/36] feat: add macro to generate tco handler and update interpreter for tco --- Cargo.toml | 1 + crates/vm/Cargo.toml | 2 +- crates/vm/derive/Cargo.toml | 2 +- crates/vm/derive/src/lib.rs | 64 +++++++++- crates/vm/derive/src/tco.rs | 118 ++++++++++++++++++ crates/vm/src/arch/execution.rs | 34 ++++- crates/vm/src/arch/execution_mode/pure.rs | 6 - crates/vm/src/arch/interpreter.rs | 98 +++++++++++++++ crates/vm/src/arch/mod.rs | 2 + crates/vm/src/arch/state.rs | 2 + crates/vm/src/lib.rs | 3 + crates/vm/src/system/phantom/execution.rs | 17 +++ .../vm/src/system/public_values/execution.rs | 25 ++++ rust-toolchain.toml | 3 +- 14 files changed, 364 insertions(+), 13 deletions(-) create mode 100644 crates/vm/derive/src/tco.rs diff --git a/Cargo.toml b/Cargo.toml index 733294c63f..32cf2c83c9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -229,6 +229,7 @@ dashmap = "6.1.0" memmap2 = "0.9.5" libc = "0.2.175" tracing-subscriber = { version = "0.3.17", features = ["std", "env-filter"] } +paste = "1.0.15" # default-features = false for no_std for use in guest programs itertools = { version = "0.14.0", default-features = false } diff --git a/crates/vm/Cargo.toml b/crates/vm/Cargo.toml index 2ece9e246e..47083d236a 100644 --- a/crates/vm/Cargo.toml +++ b/crates/vm/Cargo.toml @@ -50,7 +50,7 @@ openvm-native-compiler.workspace = true openvm-rv32im-transpiler.workspace = true [features] -default = ["parallel", "jemalloc"] +default = ["parallel", "jemalloc", "tco"] parallel = [ "openvm-stark-backend/parallel", "dashmap/rayon", diff --git a/crates/vm/derive/Cargo.toml b/crates/vm/derive/Cargo.toml index d2d11dcc78..2fb38626fa 100644 --- a/crates/vm/derive/Cargo.toml +++ b/crates/vm/derive/Cargo.toml @@ -10,7 +10,7 @@ license.workspace = true proc-macro = true [dependencies] -syn = { version = "2.0", features = ["parsing"] } +syn = { version = "2.0", features = ["parsing", "full"] } quote = "1.0" proc-macro2 = "1.0" itertools = { workspace = true } diff --git a/crates/vm/derive/src/lib.rs b/crates/vm/derive/src/lib.rs index a43053e0cd..9eb04c6caa 100644 --- a/crates/vm/derive/src/lib.rs +++ b/crates/vm/derive/src/lib.rs @@ -9,6 +9,8 @@ use syn::{ GenericParam, Ident, Meta, Token, }; +mod tco; + #[proc_macro_derive(PreflightExecutor)] pub fn preflight_executor_derive(input: TokenStream) -> TokenStream { let ast: syn::DeriveInput = syn::parse(input).unwrap(); @@ -172,6 +174,18 @@ pub fn executor_derive(input: TokenStream) -> TokenStream { Ctx: ::openvm_circuit::arch::execution_mode::ExecutionCtxTrait, { self.0.pre_compute(pc, inst, data) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &::openvm_circuit::arch::instructions::instruction::Instruction, + data: &mut [u8], + ) -> Result<::openvm_circuit::arch::Handler, ::openvm_circuit::arch::StaticProgramError> + where + Ctx: ::openvm_circuit::arch::execution_mode::ExecutionCtxTrait, { + self.0.handler(pc, inst, data) + } } } .into() @@ -205,7 +219,7 @@ pub fn executor_derive(input: TokenStream) -> TokenStream { }); // Use full path ::openvm_circuit... so it can be used either within or outside the vm // crate. Assume F is already generic of the field. - let (pre_compute_size_arms, pre_compute_arms, where_predicates): (Vec<_>, Vec<_>, Vec<_>) = multiunzip(variants.iter().map(|(variant_name, field)| { + let (pre_compute_size_arms, pre_compute_arms, handler_arms, where_predicates): (Vec<_>, Vec<_>, Vec<_>, Vec<_>) = multiunzip(variants.iter().map(|(variant_name, field)| { let field_ty = &field.ty; let pre_compute_size_arm = quote! { #name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::Executor<#first_ty_generic>>::pre_compute_size(x) @@ -213,10 +227,13 @@ pub fn executor_derive(input: TokenStream) -> TokenStream { let pre_compute_arm = quote! { #name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::Executor<#first_ty_generic>>::pre_compute(x, pc, instruction, data) }; + let handler_arm = quote! { + #name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::Executor<#first_ty_generic>>::handler(x, pc, instruction, data) + }; let where_predicate = syn::parse_quote! { #field_ty: ::openvm_circuit::arch::Executor<#first_ty_generic> }; - (pre_compute_size_arm, pre_compute_arm, where_predicate) + (pre_compute_size_arm, pre_compute_arm, handler_arm, where_predicate) })); let where_clause = new_generics.make_where_clause(); for predicate in where_predicates { @@ -247,6 +264,20 @@ pub fn executor_derive(input: TokenStream) -> TokenStream { #(#pre_compute_arms,)* } } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + instruction: &::openvm_circuit::arch::instructions::instruction::Instruction, + data: &mut [u8], + ) -> Result<::openvm_circuit::arch::Handler, ::openvm_circuit::arch::StaticProgramError> + where + Ctx: ::openvm_circuit::arch::execution_mode::ExecutionCtxTrait, { + match self { + #(#handler_arms,)* + } + } } } .into() @@ -501,7 +532,7 @@ fn generate_config_traits_impl(name: &Ident, inner: &DataStruct) -> syn::Result< .iter() .filter(|f| f.attrs.iter().any(|attr| attr.path().is_ident("config"))) .exactly_one() - .clone() + .ok() .expect("Exactly one field must have the #[config] attribute"); let (source_name, source_name_upper) = gen_name_with_uppercase_idents(source_field.ident.as_ref().unwrap()); @@ -700,3 +731,30 @@ fn parse_executor_type( }) } } + +/// An attribute procedural macro for creating TCO (Tail Call Optimization) handlers. +/// +/// This macro generates a handler function that wraps an execute implementation +/// with tail call optimization using the `become` keyword. It extracts the generics +/// and where clauses from the original function. +/// +/// # Usage +/// +/// Place this attribute above a function definition: +/// ``` +/// #[create_tco_handler = "handler_name"] +/// unsafe fn execute_e1_impl( +/// pre_compute: &[u8], +/// state: &mut VmExecState, +/// ) where +/// CTX: ExecutionCtxTrait, +/// { +/// // function body +/// } +/// ``` +/// +/// This will generate a TCO handler function with the same generics and where clauses. +#[proc_macro_attribute] +pub fn create_tco_handler(_attr: TokenStream, item: TokenStream) -> TokenStream { + tco::tco_impl(item) +} diff --git a/crates/vm/derive/src/tco.rs b/crates/vm/derive/src/tco.rs new file mode 100644 index 0000000000..7321ab9cc3 --- /dev/null +++ b/crates/vm/derive/src/tco.rs @@ -0,0 +1,118 @@ +use proc_macro::TokenStream; +use quote::{format_ident, quote}; +use syn::{parse_macro_input, ItemFn}; + +/// Implementation of the TCO handler generation logic. +/// This is called from the proc macro attribute in lib.rs. +pub fn tco_impl(item: TokenStream) -> TokenStream { + // Parse the input function + let input_fn = parse_macro_input!(item as ItemFn); + + // Extract information from the function + let fn_name = &input_fn.sig.ident; + let generics = &input_fn.sig.generics; + let where_clause = &generics.where_clause; + + // Extract the first two generic type parameters (F and CTX) + let (f_type, ctx_type) = extract_f_and_ctx_types(generics); + // Derive new function name: + // If original ends with `_impl`, replace with `_tco_handler`, else append suffix. + let new_name_str = fn_name + .to_string() + .strip_suffix("_impl") + .map(|base| format!("{base}_tco_handler")) + .unwrap_or_else(|| format!("{fn_name}_tco_handler")); + let handler_name = format_ident!("{}", new_name_str); + + // Build the generic parameters for the handler, preserving all original generics + let handler_generics = generics.clone(); + + // Build the function call with all the generics + let generic_args = build_generic_args(generics); + let execute_call = if generic_args.is_empty() { + quote! { #fn_name(pre_compute, exec_state) } + } else { + quote! { #fn_name::<#(#generic_args),*>(pre_compute, exec_state) } + }; + + // Generate the TCO handler function + let handler_fn = quote! { + #[cfg(feature = "tco")] + #[inline(never)] + unsafe fn #handler_name #handler_generics ( + interpreter: &::openvm_circuit::arch::interpreter::InterpretedInstance<#f_type, #ctx_type>, + exec_state: &mut ::openvm_circuit::arch::VmExecState< + #f_type, + ::openvm_circuit::system::memory::online::GuestMemory, + #ctx_type, + >, + ) -> Result<(), ::openvm_circuit::arch::ExecutionError> + #where_clause + { + let pre_compute = interpreter.get_pre_compute(exec_state.pc); + #execute_call; + + if std::hint::unlikely(exec_state.exit_code.is_err()) { + return Err(::openvm_circuit::arch::ExecutionError::ExecStateError); + } + if std::hint::unlikely(exec_state.exit_code.as_ref().unwrap().is_some()) { + // terminate + return Ok(()); + } + // exec_state.pc should have been updated by execute_impl at this point + let next_handler = interpreter.get_handler(exec_state.pc)?; + become next_handler(interpreter, exec_state) + } + }; + + // Return both the original function and the new handler + let output = quote! { + #input_fn + + #handler_fn + }; + + TokenStream::from(output) +} + +fn extract_f_and_ctx_types(generics: &syn::Generics) -> (syn::Ident, syn::Ident) { + let mut type_params = generics.params.iter().filter_map(|param| { + if let syn::GenericParam::Type(type_param) = param { + Some(&type_param.ident) + } else { + None + } + }); + + let f_type = type_params + .next() + .expect("Function must have at least one type parameter (F)") + .clone(); + let ctx_type = type_params + .next() + .expect("Function must have at least two type parameters (F and CTX)") + .clone(); + + (f_type, ctx_type) +} + +fn build_generic_args(generics: &syn::Generics) -> Vec { + generics + .params + .iter() + .map(|param| match param { + syn::GenericParam::Type(type_param) => { + let ident = &type_param.ident; + quote! { #ident } + } + syn::GenericParam::Lifetime(lifetime) => { + let lifetime = &lifetime.lifetime; + quote! { #lifetime } + } + syn::GenericParam::Const(const_param) => { + let ident = &const_param.ident; + quote! { #ident } + } + }) + .collect() +} diff --git a/crates/vm/src/arch/execution.rs b/crates/vm/src/arch/execution.rs index 4e3f11804c..e859bea1e7 100644 --- a/crates/vm/src/arch/execution.rs +++ b/crates/vm/src/arch/execution.rs @@ -12,6 +12,8 @@ use serde::{Deserialize, Serialize}; use thiserror::Error; use super::{execution_mode::ExecutionCtxTrait, Streams, VmExecState}; +#[cfg(feature = "tco")] +use crate::arch::interpreter::InterpretedInstance; #[cfg(feature = "metrics")] use crate::metrics::VmMetrics; use crate::{ @@ -72,6 +74,9 @@ pub enum ExecutionError { Inventory(#[from] ExecutorInventoryError), #[error("static program error: {0}")] Static(#[from] StaticProgramError), + // Placeholder error type for tco + #[error("error in VmExecState")] + ExecStateError, } /// Errors in the program that can be statically analyzed before runtime. @@ -91,7 +96,20 @@ pub enum StaticProgramError { /// The `pre_compute: &[u8]` is a pre-computed buffer of data corresponding to a single instruction. /// The contents of `pre_compute` are determined from the program code as specified by the /// [Executor] and [MeteredExecutor] traits. -pub type ExecuteFunc = unsafe fn(&[u8], &mut VmExecState); +pub type ExecuteFunc = + unsafe fn(pre_compute: &[u8], exec_state: &mut VmExecState); + +/// Handler for tail call elimination. The `CTX` is assumed to contain pointers to the pre-computed +/// buffer and the function handler table. +/// +/// - `pre_compute_buf` is the starting pointer of the pre-computed buffer. +/// - `handlers` is the starting pointer of the table of function pointers of `Handler` type. The +/// pointer is typeless to avoid self-referential types. +#[cfg(feature = "tco")] +pub type Handler = unsafe fn( + interpreter: &InterpretedInstance, + exec_state: &mut VmExecState, +) -> Result<(), ExecutionError>; /// Trait for pure execution via a host interpreter. The trait methods provide the methods to /// pre-process the program code into function pointers which operate on `pre_compute` instruction @@ -108,6 +126,20 @@ pub trait Executor { ) -> Result, StaticProgramError> where Ctx: ExecutionCtxTrait; + + /// Returns a function pointer with tail call optimization. The handler function assumes that + /// the pre-compute buffer it receives is the populated `data`. + // NOTE: we could have used `pre_compute` above to populate `data`, but the implementations were + // simpler to keep `handler` entirely separate from `pre_compute`. + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait; } /// Trait for metered execution via a host interpreter. The trait methods provide the methods to diff --git a/crates/vm/src/arch/execution_mode/pure.rs b/crates/vm/src/arch/execution_mode/pure.rs index 176a8c8a2b..83001d7b64 100644 --- a/crates/vm/src/arch/execution_mode/pure.rs +++ b/crates/vm/src/arch/execution_mode/pure.rs @@ -19,12 +19,6 @@ impl ExecutionCtx { } } -impl Default for ExecutionCtx { - fn default() -> Self { - Self::new(None) - } -} - impl ExecutionCtxTrait for ExecutionCtx { #[inline(always)] fn on_memory_operation(&mut self, _address_space: u32, _ptr: u32, _size: u32) {} diff --git a/crates/vm/src/arch/interpreter.rs b/crates/vm/src/arch/interpreter.rs index 10bb981804..36d2ccaa44 100644 --- a/crates/vm/src/arch/interpreter.rs +++ b/crates/vm/src/arch/interpreter.rs @@ -15,6 +15,8 @@ use openvm_instructions::{ use openvm_stark_backend::p3_field::PrimeField32; use tracing::info_span; +#[cfg(feature = "tco")] +use crate::arch::Handler; use crate::{ arch::{ execution_mode::{ @@ -44,6 +46,11 @@ pub struct InterpretedInstance<'a, F, Ctx> { /// Instruction table of function pointers and pointers to the pre-computed buffer. Indexed by /// `pc_index = (pc - pc_base) / DEFAULT_PC_STEP`. pre_compute_insns: Vec>, + #[cfg(feature = "tco")] + pre_compute_max_size: usize, + /// Handler function pointers for tail call optimization. + #[cfg(feature = "tco")] + handlers: Vec>, // *const ()>, pc_base: u32, pc_start: u32, @@ -116,6 +123,30 @@ where let pc_base = program.pc_base; let pc_start = exe.pc_start; let init_memory = exe.init_memory.clone(); + #[cfg(feature = "tco")] + let handlers = program + .instructions_and_debug_infos + .iter() + .zip_eq(split_pre_compute_buf.iter_mut()) + .enumerate() + .map( + |(pc_idx, (inst_opt, pre_compute))| -> Result, StaticProgramError> { + if let Some((inst, _)) = inst_opt { + let pc = pc_base + pc_idx as u32 * DEFAULT_PC_STEP; + if get_system_opcode_handler::(inst, pre_compute).is_some() { + Ok(terminate_execute_e12_tco_handler) + } else { + // unwrap because get_pre_compute_instructions would have errored + // already on DisabledOperation + let executor = inventory.get_executor(inst.opcode).unwrap(); + executor.handler(pc, inst, *pre_compute) + } + } else { + Ok(unreachable_tco_handler) + } + }, + ) + .collect::, _>>()?; Ok(Self { system_config: inventory.config().clone(), @@ -124,8 +155,53 @@ where pc_base, pc_start, init_memory, + #[cfg(feature = "tco")] + pre_compute_max_size, + #[cfg(feature = "tco")] + handlers, }) } + + /// # Safety + /// - This function assumes that the `pc` is within program bounds - this should be the case if + /// the pc is checked to be in bounds before jumping to it. + /// - The returned slice may not be entirely initialized, but it is the job of each Executor to + /// initialize the parts of the buffer that the instruction handler will use. + #[cfg(feature = "tco")] + #[inline(always)] + pub fn get_pre_compute(&self, pc: u32) -> &[u8] { + let pc_idx = get_pc_index(self.pc_base, pc); + // SAFETY: + // - we assume that pc is in bounds + // - pre_compute_buf is allocated for pre_compute_max_size * program_len bytes, with each + // instruction getting pre_compute_max_size bytes + // - self.pre_compute_buf.ptr is non-null + // - initialization of the contents of the slice is the responsibility of each Executor + unsafe { + let ptr = self + .pre_compute_buf + .ptr + .add(pc_idx * self.pre_compute_max_size); + std::slice::from_raw_parts(ptr, self.pre_compute_max_size) + } + } + + #[cfg(feature = "tco")] + #[inline(always)] + pub fn get_handler(&self, pc: u32) -> Result, ExecutionError> { + let pc_idx = get_pc_index(self.pc_base, pc); + if std::hint::unlikely(pc_idx > self.handlers.len()) { + return Err(ExecutionError::PcOutOfBounds { + pc, + pc_base: self.pc_base, + program_len: self.handlers.len(), + }); + } + // SAFETY: + // - we checked above that pc_idx is within bounds + let handler = unsafe { self.handlers.get_unchecked(pc_idx) }; + Ok(*handler) + } } impl<'a, F, Ctx> InterpretedInstance<'a, F, Ctx> @@ -166,6 +242,10 @@ where pc_base, pc_start, init_memory, + #[cfg(feature = "tco")] + pre_compute_max_size, + #[cfg(feature = "tco")] + handlers: vec![], }) } } @@ -423,6 +503,7 @@ impl Drop for AlignedBuf { } } +#[inline(always)] unsafe fn terminate_execute_e12_impl( pre_compute: &[u8], vm_state: &mut VmExecState, @@ -432,6 +513,23 @@ unsafe fn terminate_execute_e12_impl( vm_state.exit_code = Ok(Some(pre_compute.exit_code)); } +#[cfg(feature = "tco")] +unsafe fn terminate_execute_e12_tco_handler( + interpreter: &InterpretedInstance, + vm_state: &mut VmExecState, +) -> Result<(), ExecutionError> { + let pre_compute = interpreter.get_pre_compute(vm_state.pc); + terminate_execute_e12_impl(pre_compute, vm_state); + Ok(()) +} +#[cfg(feature = "tco")] +unsafe fn unreachable_tco_handler( + _: &InterpretedInstance, + vm_state: &mut VmExecState, +) -> Result<(), ExecutionError> { + Err(ExecutionError::Unreachable(vm_state.pc)) +} + fn get_pre_compute_max_size>( program: &Program, inventory: &ExecutorInventory, diff --git a/crates/vm/src/arch/mod.rs b/crates/vm/src/arch/mod.rs index 974b86008e..402931e61c 100644 --- a/crates/vm/src/arch/mod.rs +++ b/crates/vm/src/arch/mod.rs @@ -30,6 +30,8 @@ pub use execution::*; pub use execution_mode::{ExecutionCtxTrait, MeteredExecutionCtxTrait}; pub use extensions::*; pub use integration_api::*; +#[cfg(feature = "tco")] +pub use openvm_circuit_derive::create_tco_handler; pub use openvm_instructions as instructions; pub use record_arena::*; pub use state::*; diff --git a/crates/vm/src/arch/state.rs b/crates/vm/src/arch/state.rs index a9c2ea3925..ae65844fa6 100644 --- a/crates/vm/src/arch/state.rs +++ b/crates/vm/src/arch/state.rs @@ -3,6 +3,7 @@ use std::{ ops::{Deref, DerefMut}, }; +use getset::WithSetters; use openvm_instructions::exe::SparseMemoryImage; use rand::{rngs::StdRng, SeedableRng}; @@ -94,6 +95,7 @@ impl VmState { /// The global state is generic in guest memory `MEM` and additional context `CTX`. /// The host state is execution context specific. // @dev: Do not confuse with `ExecutionState` struct. +#[derive(WithSetters)] pub struct VmExecState { /// Core VM state pub vm_state: VmState, diff --git a/crates/vm/src/lib.rs b/crates/vm/src/lib.rs index 271ea04b82..c4dfd35053 100644 --- a/crates/vm/src/lib.rs +++ b/crates/vm/src/lib.rs @@ -1,3 +1,6 @@ +#![cfg_attr(feature = "tco", allow(incomplete_features))] +#![cfg_attr(feature = "tco", feature(likely_unlikely))] +#![cfg_attr(feature = "tco", feature(explicit_tail_calls))] extern crate self as openvm_circuit; pub use openvm_circuit_derive as derive; diff --git a/crates/vm/src/system/phantom/execution.rs b/crates/vm/src/system/phantom/execution.rs index e7e1775052..0d71db54f1 100644 --- a/crates/vm/src/system/phantom/execution.rs +++ b/crates/vm/src/system/phantom/execution.rs @@ -7,6 +7,8 @@ use openvm_instructions::{ use openvm_stark_backend::p3_field::PrimeField32; use rand::rngs::StdRng; +#[cfg(feature = "tco")] +use crate::arch::{create_tco_handler, Handler}; use crate::{ arch::{ execution_mode::{ExecutionCtxTrait, MeteredExecutionCtxTrait}, @@ -53,6 +55,20 @@ where self.pre_compute_impl(inst, data); Ok(execute_e1_impl) } + #[cfg(feature = "tco")] + fn handler( + &self, + _pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let data: &mut PhantomPreCompute = data.borrow_mut(); + self.pre_compute_impl(inst, data); + Ok(execute_e1_tco_handler) + } } pub(super) struct PhantomStateMut<'a, F> { @@ -85,6 +101,7 @@ unsafe fn execute_e12_impl( vm_state.instret += 1; } +#[cfg_attr(feature = "tco", create_tco_handler)] #[inline(always)] unsafe fn execute_e1_impl( pre_compute: &[u8], diff --git a/crates/vm/src/system/public_values/execution.rs b/crates/vm/src/system/public_values/execution.rs index 34c1f22ff0..19057a75b6 100644 --- a/crates/vm/src/system/public_values/execution.rs +++ b/crates/vm/src/system/public_values/execution.rs @@ -7,6 +7,8 @@ use openvm_instructions::{ use openvm_stark_backend::p3_field::PrimeField32; use super::PublicValuesExecutor; +#[cfg(feature = "tco")] +use crate::arch::{create_tco_handler, Handler}; use crate::{ arch::{ execution_mode::{ExecutionCtxTrait, MeteredExecutionCtxTrait}, @@ -87,6 +89,28 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn handler( + &self, + _pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let data: &mut PublicValuesPreCompute = data.borrow_mut(); + let (b_is_imm, c_is_imm) = self.pre_compute_impl(inst, data); + + let fn_ptr = match (b_is_imm, c_is_imm) { + (true, true) => execute_e1_tco_handler::<_, _, true, true>, + (true, false) => execute_e1_tco_handler::<_, _, true, false>, + (false, true) => execute_e1_tco_handler::<_, _, false, true>, + (false, false) => execute_e1_tco_handler::<_, _, false, false>, + }; + Ok(fn_ptr) + } } impl MeteredExecutor for PublicValuesExecutor @@ -155,6 +179,7 @@ unsafe fn execute_e12_impl( pre_compute: &[u8], diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 8825102061..3828e8e42a 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,4 @@ [toolchain] -channel = "1.86.0" +channel = "nightly-2025-08-19" +# channel = "1.86.0" components = ["clippy", "rustfmt"] From fb3d997d93f1123d9c7b6e10625a3e25952e4678 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Tue, 19 Aug 2025 23:31:18 -0700 Subject: [PATCH 03/36] feat: rv32im tco without become keyword `become` keyword was causing some corruption and not passing references properly between calls. --- crates/vm/derive/src/tco.rs | 12 +++- crates/vm/src/arch/interpreter.rs | 59 +++++++++++++------ crates/vm/src/utils/stark_utils.rs | 2 + extensions/rv32im/circuit/Cargo.toml | 3 +- .../rv32im/circuit/src/auipc/execution.rs | 24 +++++--- .../rv32im/circuit/src/base_alu/execution.rs | 41 ++++++++++--- .../rv32im/circuit/src/branch_eq/execution.rs | 29 ++++++--- .../rv32im/circuit/src/branch_lt/execution.rs | 30 +++++++--- .../rv32im/circuit/src/divrem/execution.rs | 30 +++++++--- .../rv32im/circuit/src/hintstore/execution.rs | 25 +++++++- .../rv32im/circuit/src/jal_lui/execution.rs | 30 +++++++--- .../rv32im/circuit/src/jalr/execution.rs | 29 ++++++--- .../rv32im/circuit/src/less_than/execution.rs | 30 +++++++--- extensions/rv32im/circuit/src/lib.rs | 3 + .../circuit/src/load_sign_extend/execution.rs | 30 +++++++--- .../rv32im/circuit/src/loadstore/execution.rs | 39 ++++++++++++ .../rv32im/circuit/src/mul/execution.rs | 24 +++++--- .../rv32im/circuit/src/mulh/execution.rs | 29 ++++++--- .../rv32im/circuit/src/shift/execution.rs | 33 ++++++++--- extensions/rv32im/tests/Cargo.toml | 3 +- 20 files changed, 397 insertions(+), 108 deletions(-) diff --git a/crates/vm/derive/src/tco.rs b/crates/vm/derive/src/tco.rs index 7321ab9cc3..186db9e90b 100644 --- a/crates/vm/derive/src/tco.rs +++ b/crates/vm/derive/src/tco.rs @@ -49,19 +49,25 @@ pub fn tco_impl(item: TokenStream) -> TokenStream { ) -> Result<(), ::openvm_circuit::arch::ExecutionError> #where_clause { - let pre_compute = interpreter.get_pre_compute(exec_state.pc); + let pre_compute = interpreter.get_pre_compute(exec_state.vm_state.pc); #execute_call; if std::hint::unlikely(exec_state.exit_code.is_err()) { return Err(::openvm_circuit::arch::ExecutionError::ExecStateError); } if std::hint::unlikely(exec_state.exit_code.as_ref().unwrap().is_some()) { + #ctx_type::on_terminate(exec_state); // terminate return Ok(()); } + if #ctx_type::should_suspend(exec_state) { + return Ok(()); + } // exec_state.pc should have been updated by execute_impl at this point - let next_handler = interpreter.get_handler(exec_state.pc)?; - become next_handler(interpreter, exec_state) + let next_handler = interpreter.get_handler(exec_state.vm_state.pc)?; + + // The `become` keyword has a bug that is not re-passing the `interpreter`, `exec_state` references properly. But llvm seems to almost always guarantee tail call elimination when the function signature is the same as the current function. + next_handler(interpreter, exec_state) } }; diff --git a/crates/vm/src/arch/interpreter.rs b/crates/vm/src/arch/interpreter.rs index 36d2ccaa44..8bb46e4418 100644 --- a/crates/vm/src/arch/interpreter.rs +++ b/crates/vm/src/arch/interpreter.rs @@ -139,7 +139,7 @@ where // unwrap because get_pre_compute_instructions would have errored // already on DisabledOperation let executor = inventory.get_executor(inst.opcode).unwrap(); - executor.handler(pc, inst, *pre_compute) + executor.handler(pc, inst, pre_compute) } } else { Ok(unreachable_tco_handler) @@ -177,6 +177,9 @@ where // instruction getting pre_compute_max_size bytes // - self.pre_compute_buf.ptr is non-null // - initialization of the contents of the slice is the responsibility of each Executor + debug_assert!( + (pc_idx + 1) * self.pre_compute_max_size <= self.pre_compute_buf.layout.size() + ); unsafe { let ptr = self .pre_compute_buf @@ -190,7 +193,7 @@ where #[inline(always)] pub fn get_handler(&self, pc: u32) -> Result, ExecutionError> { let pc_idx = get_pc_index(self.pc_base, pc); - if std::hint::unlikely(pc_idx > self.handlers.len()) { + if std::hint::unlikely(pc_idx >= self.handlers.len()) { return Err(ExecutionError::PcOutOfBounds { pc, pc_base: self.pc_base, @@ -287,13 +290,38 @@ where ) -> Result, ExecutionError> { let ctx = ExecutionCtx::new(num_insns); let mut exec_state = VmExecState::new(from_state, ctx); - // Start execution - execute_with_metrics!( - "execute_e1", - self.pc_base, - &mut exec_state, - &self.pre_compute_insns - ); + + #[cfg(feature = "metrics")] + let start = std::time::Instant::now(); + #[cfg(feature = "metrics")] + let start_instret = exec_state.instret; + + #[cfg(not(feature = "tco"))] + unsafe { + execute_trampoline(self.pc_base, &mut exec_state, &self.pre_compute_insns); + } + #[cfg(feature = "tco")] + unsafe { + let handler = self.get_handler(exec_state.pc)?; + let res = handler(self, &mut exec_state); + if let Err(err) = res { + match err { + ExecutionError::ExecStateError => {} + _ => { + return Err(err); + } + } + } + } + + #[cfg(feature = "metrics")] + { + let elapsed = start.elapsed(); + let insns = exec_state.instret - start_instret; + tracing::info!("instructions_executed={insns}"); + metrics::counter!("execute_e1_insns").absolute(insns); + metrics::gauge!("execute_e1_insn_mi/s").set(insns as f64 / elapsed.as_micros() as f64); + } if num_insns.is_some() { check_exit_code(exec_state.exit_code)?; } else { @@ -410,15 +438,10 @@ fn split_pre_compute_buf<'a, F>( ) -> Vec<&'a mut [u8]> { let program_len = program.instructions_and_debug_infos.len(); let buf_len = program_len * pre_compute_max_size; - let mut pre_compute_buf_ptr = - unsafe { std::slice::from_raw_parts_mut(pre_compute_buf.ptr, buf_len) }; - let mut split_pre_compute_buf = Vec::with_capacity(program_len); - for _ in 0..program_len { - let (first, last) = pre_compute_buf_ptr.split_at_mut(pre_compute_max_size); - pre_compute_buf_ptr = last; - split_pre_compute_buf.push(first); - } - split_pre_compute_buf + let pre_compute_buf = unsafe { std::slice::from_raw_parts_mut(pre_compute_buf.ptr, buf_len) }; + pre_compute_buf + .chunks_exact_mut(pre_compute_max_size) + .collect() } /// Executes using function pointers with the trampoline (loop) approach. diff --git a/crates/vm/src/utils/stark_utils.rs b/crates/vm/src/utils/stark_utils.rs index de1a834d30..fdcdda2df5 100644 --- a/crates/vm/src/utils/stark_utils.rs +++ b/crates/vm/src/utils/stark_utils.rs @@ -114,6 +114,8 @@ where let exe = exe.into(); let input = input.into(); let metered_ctx = vm.build_metered_ctx(); + // TEMP: for testing + vm.interpreter(&exe)?.execute(input.clone(), None)?; let (segments, _) = vm .metered_interpreter(&exe)? .execute_metered(input.clone(), metered_ctx)?; diff --git a/extensions/rv32im/circuit/Cargo.toml b/extensions/rv32im/circuit/Cargo.toml index 9f6bbb6824..b073479fa8 100644 --- a/extensions/rv32im/circuit/Cargo.toml +++ b/extensions/rv32im/circuit/Cargo.toml @@ -33,9 +33,10 @@ openvm-circuit = { workspace = true, features = ["test-utils"] } test-case.workspace = true [features] -default = ["parallel", "jemalloc"] +default = ["parallel", "jemalloc", "tco"] parallel = ["openvm-circuit/parallel"] test-utils = ["openvm-circuit/test-utils", "dep:openvm-stark-sdk"] +tco = ["openvm-circuit/tco"] # performance features: mimalloc = ["openvm-circuit/mimalloc"] jemalloc = ["openvm-circuit/jemalloc"] diff --git a/extensions/rv32im/circuit/src/auipc/execution.rs b/extensions/rv32im/circuit/src/auipc/execution.rs index c9269613a1..c342f7f2bc 100644 --- a/extensions/rv32im/circuit/src/auipc/execution.rs +++ b/extensions/rv32im/circuit/src/auipc/execution.rs @@ -3,13 +3,7 @@ use std::{ mem::size_of, }; -use openvm_circuit::{ - arch::{ - E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait, - MeteredExecutor, StaticProgramError, VmExecState, - }, - system::memory::online::GuestMemory, -}; +use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; use openvm_circuit_primitives_derive::AlignedBytesBorrow; use openvm_instructions::{ instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_REGISTER_AS, @@ -66,6 +60,21 @@ where self.pre_compute_impl(pc, inst, data)?; Ok(execute_e1_impl) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let data: &mut AuiPcPreCompute = data.borrow_mut(); + self.pre_compute_impl(pc, inst, data)?; + Ok(execute_e1_tco_handler) + } } impl MeteredExecutor for Rv32AuipcExecutor @@ -105,6 +114,7 @@ unsafe fn execute_e12_impl( vm_state.instret += 1; } +#[create_tco_handler] unsafe fn execute_e1_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/rv32im/circuit/src/base_alu/execution.rs b/extensions/rv32im/circuit/src/base_alu/execution.rs index acbbf12844..3ac1e92cd8 100644 --- a/extensions/rv32im/circuit/src/base_alu/execution.rs +++ b/extensions/rv32im/circuit/src/base_alu/execution.rs @@ -3,13 +3,7 @@ use std::{ mem::size_of, }; -use openvm_circuit::{ - arch::{ - E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait, - MeteredExecutor, StaticProgramError, VmExecState, - }, - system::memory::online::GuestMemory, -}; +use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; use openvm_circuit_primitives_derive::AlignedBytesBorrow; use openvm_instructions::{ instruction::Instruction, @@ -102,6 +96,38 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let data: &mut BaseAluPreCompute = data.borrow_mut(); + let is_imm = self.pre_compute_impl(pc, inst, data)?; + let opcode = inst.opcode; + + let fn_ptr = match ( + is_imm, + BaseAluOpcode::from_usize(opcode.local_opcode_idx(self.offset)), + ) { + (true, BaseAluOpcode::ADD) => execute_e1_tco_handler::<_, _, true, AddOp>, + (false, BaseAluOpcode::ADD) => execute_e1_tco_handler::<_, _, false, AddOp>, + (true, BaseAluOpcode::SUB) => execute_e1_tco_handler::<_, _, true, SubOp>, + (false, BaseAluOpcode::SUB) => execute_e1_tco_handler::<_, _, false, SubOp>, + (true, BaseAluOpcode::XOR) => execute_e1_tco_handler::<_, _, true, XorOp>, + (false, BaseAluOpcode::XOR) => execute_e1_tco_handler::<_, _, false, XorOp>, + (true, BaseAluOpcode::OR) => execute_e1_tco_handler::<_, _, true, OrOp>, + (false, BaseAluOpcode::OR) => execute_e1_tco_handler::<_, _, false, OrOp>, + (true, BaseAluOpcode::AND) => execute_e1_tco_handler::<_, _, true, AndOp>, + (false, BaseAluOpcode::AND) => execute_e1_tco_handler::<_, _, false, AndOp>, + }; + Ok(fn_ptr) + } } impl MeteredExecutor @@ -174,6 +200,7 @@ unsafe fn execute_e12_impl< vm_state.instret += 1; } +#[create_tco_handler] #[inline(always)] unsafe fn execute_e1_impl< F: PrimeField32, diff --git a/extensions/rv32im/circuit/src/branch_eq/execution.rs b/extensions/rv32im/circuit/src/branch_eq/execution.rs index dba0d8cddb..620759731a 100644 --- a/extensions/rv32im/circuit/src/branch_eq/execution.rs +++ b/extensions/rv32im/circuit/src/branch_eq/execution.rs @@ -3,13 +3,7 @@ use std::{ mem::size_of, }; -use openvm_circuit::{ - arch::{ - E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait, - MeteredExecutor, StaticProgramError, VmExecState, - }, - system::memory::online::GuestMemory, -}; +use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; use openvm_circuit_primitives_derive::AlignedBytesBorrow; use openvm_instructions::{ instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_REGISTER_AS, LocalOpcode, @@ -84,6 +78,26 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let data: &mut BranchEqualPreCompute = data.borrow_mut(); + let is_bne = self.pre_compute_impl(pc, inst, data)?; + let fn_ptr = if is_bne { + execute_e1_tco_handler::<_, _, true> + } else { + execute_e1_tco_handler::<_, _, false> + }; + Ok(fn_ptr) + } } impl MeteredExecutor for BranchEqualExecutor @@ -131,6 +145,7 @@ unsafe fn execute_e12_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/rv32im/circuit/src/branch_lt/execution.rs b/extensions/rv32im/circuit/src/branch_lt/execution.rs index 206a49e4a1..a25b24b147 100644 --- a/extensions/rv32im/circuit/src/branch_lt/execution.rs +++ b/extensions/rv32im/circuit/src/branch_lt/execution.rs @@ -3,13 +3,7 @@ use std::{ mem::size_of, }; -use openvm_circuit::{ - arch::{ - E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait, - MeteredExecutor, StaticProgramError, VmExecState, - }, - system::memory::online::GuestMemory, -}; +use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; use openvm_circuit_primitives_derive::AlignedBytesBorrow; use openvm_instructions::{ instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_REGISTER_AS, LocalOpcode, @@ -86,6 +80,27 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let data: &mut BranchLePreCompute = data.borrow_mut(); + let local_opcode = self.pre_compute_impl(pc, inst, data)?; + let fn_ptr = match local_opcode { + BranchLessThanOpcode::BLT => execute_e1_tco_handler::<_, _, BltOp>, + BranchLessThanOpcode::BLTU => execute_e1_tco_handler::<_, _, BltuOp>, + BranchLessThanOpcode::BGE => execute_e1_tco_handler::<_, _, BgeOp>, + BranchLessThanOpcode::BGEU => execute_e1_tco_handler::<_, _, BgeuOp>, + }; + Ok(fn_ptr) + } } impl MeteredExecutor @@ -136,6 +151,7 @@ unsafe fn execute_e12_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/rv32im/circuit/src/divrem/execution.rs b/extensions/rv32im/circuit/src/divrem/execution.rs index dd87de540b..c057da312b 100644 --- a/extensions/rv32im/circuit/src/divrem/execution.rs +++ b/extensions/rv32im/circuit/src/divrem/execution.rs @@ -3,13 +3,7 @@ use std::{ mem::size_of, }; -use openvm_circuit::{ - arch::{ - E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait, - MeteredExecutor, StaticProgramError, VmExecState, - }, - system::memory::online::GuestMemory, -}; +use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; use openvm_circuit_primitives_derive::AlignedBytesBorrow; use openvm_instructions::{ instruction::Instruction, @@ -82,6 +76,27 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let data: &mut DivRemPreCompute = data.borrow_mut(); + let local_opcode = self.pre_compute_impl(pc, inst, data)?; + let fn_ptr = match local_opcode { + DivRemOpcode::DIV => execute_e1_tco_handler::<_, _, DivOp>, + DivRemOpcode::DIVU => execute_e1_tco_handler::<_, _, DivuOp>, + DivRemOpcode::REM => execute_e1_tco_handler::<_, _, RemOp>, + DivRemOpcode::REMU => execute_e1_tco_handler::<_, _, RemuOp>, + }; + Ok(fn_ptr) + } } impl MeteredExecutor @@ -128,6 +143,7 @@ unsafe fn execute_e12_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/rv32im/circuit/src/hintstore/execution.rs b/extensions/rv32im/circuit/src/hintstore/execution.rs index 2e87cc9cd9..03e1fdb3ad 100644 --- a/extensions/rv32im/circuit/src/hintstore/execution.rs +++ b/extensions/rv32im/circuit/src/hintstore/execution.rs @@ -1,4 +1,7 @@ -use std::borrow::{Borrow, BorrowMut}; +use std::{ + borrow::{Borrow, BorrowMut}, + mem::size_of, +}; use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; use openvm_circuit_primitives_derive::AlignedBytesBorrow; @@ -80,6 +83,25 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let pre_compute: &mut HintStorePreCompute = data.borrow_mut(); + let local_opcode = self.pre_compute_impl(pc, inst, pre_compute)?; + let fn_ptr = match local_opcode { + HINT_STOREW => execute_e1_tco_handler::<_, _, true>, + HINT_BUFFER => execute_e1_tco_handler::<_, _, false>, + }; + Ok(fn_ptr) + } } impl MeteredExecutor for Rv32HintStoreExecutor @@ -154,6 +176,7 @@ unsafe fn execute_e12_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/rv32im/circuit/src/jal_lui/execution.rs b/extensions/rv32im/circuit/src/jal_lui/execution.rs index 129fe32202..29859445f2 100644 --- a/extensions/rv32im/circuit/src/jal_lui/execution.rs +++ b/extensions/rv32im/circuit/src/jal_lui/execution.rs @@ -3,13 +3,7 @@ use std::{ mem::size_of, }; -use openvm_circuit::{ - arch::{ - E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait, - MeteredExecutor, StaticProgramError, VmExecState, - }, - system::memory::online::GuestMemory, -}; +use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; use openvm_circuit_primitives_derive::AlignedBytesBorrow; use openvm_instructions::{ instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_REGISTER_AS, LocalOpcode, @@ -74,6 +68,27 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn handler( + &self, + _pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let data: &mut JalLuiPreCompute = data.borrow_mut(); + let (is_jal, enabled) = self.pre_compute_impl(inst, data)?; + let fn_ptr = match (is_jal, enabled) { + (true, true) => execute_e1_tco_handler::<_, _, true, true>, + (true, false) => execute_e1_tco_handler::<_, _, true, false>, + (false, true) => execute_e1_tco_handler::<_, _, false, true>, + (false, false) => execute_e1_tco_handler::<_, _, false, false>, + }; + Ok(fn_ptr) + } } impl MeteredExecutor for Rv32JalLuiExecutor @@ -138,6 +153,7 @@ unsafe fn execute_e12_impl< vm_state.instret += 1; } +#[create_tco_handler] unsafe fn execute_e1_impl< F: PrimeField32, CTX: ExecutionCtxTrait, diff --git a/extensions/rv32im/circuit/src/jalr/execution.rs b/extensions/rv32im/circuit/src/jalr/execution.rs index 8eb09de03c..8b1825e955 100644 --- a/extensions/rv32im/circuit/src/jalr/execution.rs +++ b/extensions/rv32im/circuit/src/jalr/execution.rs @@ -3,13 +3,7 @@ use std::{ mem::size_of, }; -use openvm_circuit::{ - arch::{ - E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait, - MeteredExecutor, StaticProgramError, VmExecState, - }, - system::memory::online::GuestMemory, -}; +use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; use openvm_circuit_primitives_derive::AlignedBytesBorrow; use openvm_instructions::{ instruction::Instruction, @@ -74,6 +68,26 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let data: &mut JalrPreCompute = data.borrow_mut(); + let enabled = self.pre_compute_impl(pc, inst, data)?; + let fn_ptr = if enabled { + execute_e1_tco_handler::<_, _, true> + } else { + execute_e1_tco_handler::<_, _, false> + }; + Ok(fn_ptr) + } } impl MeteredExecutor for Rv32JalrExecutor @@ -126,6 +140,7 @@ unsafe fn execute_e12_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/rv32im/circuit/src/less_than/execution.rs b/extensions/rv32im/circuit/src/less_than/execution.rs index 16c11377e5..4674f54e78 100644 --- a/extensions/rv32im/circuit/src/less_than/execution.rs +++ b/extensions/rv32im/circuit/src/less_than/execution.rs @@ -3,13 +3,7 @@ use std::{ mem::size_of, }; -use openvm_circuit::{ - arch::{ - E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait, - MeteredExecutor, StaticProgramError, VmExecState, - }, - system::memory::online::GuestMemory, -}; +use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; use openvm_circuit_primitives_derive::AlignedBytesBorrow; use openvm_instructions::{ instruction::Instruction, @@ -98,6 +92,27 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let pre_compute: &mut LessThanPreCompute = data.borrow_mut(); + let (is_imm, is_sltu) = self.pre_compute_impl(pc, inst, pre_compute)?; + let fn_ptr = match (is_imm, is_sltu) { + (true, true) => execute_e1_tco_handler::<_, _, true, true>, + (true, false) => execute_e1_tco_handler::<_, _, true, false>, + (false, true) => execute_e1_tco_handler::<_, _, false, true>, + (false, false) => execute_e1_tco_handler::<_, _, false, false>, + }; + Ok(fn_ptr) + } } impl MeteredExecutor @@ -160,6 +175,7 @@ unsafe fn execute_e12_impl< vm_state.instret += 1; } +#[create_tco_handler] unsafe fn execute_e1_impl< F: PrimeField32, CTX: ExecutionCtxTrait, diff --git a/extensions/rv32im/circuit/src/lib.rs b/extensions/rv32im/circuit/src/lib.rs index 6224c0450a..68c4d2a295 100644 --- a/extensions/rv32im/circuit/src/lib.rs +++ b/extensions/rv32im/circuit/src/lib.rs @@ -1,3 +1,6 @@ +#![cfg_attr(feature = "tco", allow(incomplete_features))] +#![cfg_attr(feature = "tco", feature(likely_unlikely))] +#![cfg_attr(feature = "tco", feature(explicit_tail_calls))] use openvm_circuit::{ arch::{ AirInventory, ChipInventoryError, InitFileGenerator, MatrixRecordArena, SystemConfig, diff --git a/extensions/rv32im/circuit/src/load_sign_extend/execution.rs b/extensions/rv32im/circuit/src/load_sign_extend/execution.rs index 43f11a33a7..0fcff81fe2 100644 --- a/extensions/rv32im/circuit/src/load_sign_extend/execution.rs +++ b/extensions/rv32im/circuit/src/load_sign_extend/execution.rs @@ -4,13 +4,7 @@ use std::{ mem::size_of, }; -use openvm_circuit::{ - arch::{ - E2PreCompute, ExecuteFunc, ExecutionCtxTrait, ExecutionError, Executor, - MeteredExecutionCtxTrait, MeteredExecutor, StaticProgramError, VmExecState, - }, - system::memory::{online::GuestMemory, POINTER_MAX_BITS}, -}; +use openvm_circuit::{arch::*, system::memory::{online::GuestMemory, POINTER_MAX_BITS}}; use openvm_circuit_primitives_derive::AlignedBytesBorrow; use openvm_instructions::{ instruction::Instruction, @@ -106,6 +100,27 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let pre_compute: &mut LoadSignExtendPreCompute = data.borrow_mut(); + let (is_loadb, enabled) = self.pre_compute_impl(pc, inst, pre_compute)?; + let fn_ptr = match (is_loadb, enabled) { + (true, true) => execute_e1_tco_handler::<_, _, true, true>, + (true, false) => execute_e1_tco_handler::<_, _, true, false>, + (false, true) => execute_e1_tco_handler::<_, _, false, true>, + (false, false) => execute_e1_tco_handler::<_, _, false, false>, + }; + Ok(fn_ptr) + } } impl MeteredExecutor @@ -185,6 +200,7 @@ unsafe fn execute_e12_impl< vm_state.instret += 1; } +#[create_tco_handler] unsafe fn execute_e1_impl< F: PrimeField32, CTX: ExecutionCtxTrait, diff --git a/extensions/rv32im/circuit/src/loadstore/execution.rs b/extensions/rv32im/circuit/src/loadstore/execution.rs index 4d718c579e..5faec615a1 100644 --- a/extensions/rv32im/circuit/src/loadstore/execution.rs +++ b/extensions/rv32im/circuit/src/loadstore/execution.rs @@ -1,6 +1,7 @@ use std::{ borrow::{Borrow, BorrowMut}, fmt::Debug, + mem::size_of, }; use openvm_circuit::{ @@ -124,6 +125,43 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let pre_compute: &mut LoadStorePreCompute = data.borrow_mut(); + let (local_opcode, enabled, is_native_store) = + self.pre_compute_impl(pc, inst, pre_compute)?; + let fn_ptr = match (local_opcode, enabled, is_native_store) { + (LOADW, true, _) => execute_e1_tco_handler::<_, _, U8, LoadWOp, true>, + (LOADW, false, _) => execute_e1_tco_handler::<_, _, U8, LoadWOp, false>, + (LOADHU, true, _) => execute_e1_tco_handler::<_, _, U8, LoadHUOp, true>, + (LOADHU, false, _) => execute_e1_tco_handler::<_, _, U8, LoadHUOp, false>, + (LOADBU, true, _) => execute_e1_tco_handler::<_, _, U8, LoadBUOp, true>, + (LOADBU, false, _) => execute_e1_tco_handler::<_, _, U8, LoadBUOp, false>, + (STOREW, true, false) => execute_e1_tco_handler::<_, _, U8, StoreWOp, true>, + (STOREW, false, false) => execute_e1_tco_handler::<_, _, U8, StoreWOp, false>, + (STOREW, true, true) => execute_e1_tco_handler::<_, _, F, StoreWOp, true>, + (STOREW, false, true) => execute_e1_tco_handler::<_, _, F, StoreWOp, false>, + (STOREH, true, false) => execute_e1_tco_handler::<_, _, U8, StoreHOp, true>, + (STOREH, false, false) => execute_e1_tco_handler::<_, _, U8, StoreHOp, false>, + (STOREH, true, true) => execute_e1_tco_handler::<_, _, F, StoreHOp, true>, + (STOREH, false, true) => execute_e1_tco_handler::<_, _, F, StoreHOp, false>, + (STOREB, true, false) => execute_e1_tco_handler::<_, _, U8, StoreBOp, true>, + (STOREB, false, false) => execute_e1_tco_handler::<_, _, U8, StoreBOp, false>, + (STOREB, true, true) => execute_e1_tco_handler::<_, _, F, StoreBOp, true>, + (STOREB, false, true) => execute_e1_tco_handler::<_, _, F, StoreBOp, false>, + (_, _, _) => unreachable!(), + }; + Ok(fn_ptr) + } } impl MeteredExecutor for LoadStoreExecutor @@ -226,6 +264,7 @@ unsafe fn execute_e12_impl< vm_state.instret += 1; } +#[create_tco_handler] unsafe fn execute_e1_impl< F: PrimeField32, CTX: ExecutionCtxTrait, diff --git a/extensions/rv32im/circuit/src/mul/execution.rs b/extensions/rv32im/circuit/src/mul/execution.rs index 73376d8f98..bff5384b62 100644 --- a/extensions/rv32im/circuit/src/mul/execution.rs +++ b/extensions/rv32im/circuit/src/mul/execution.rs @@ -3,13 +3,7 @@ use std::{ mem::size_of, }; -use openvm_circuit::{ - arch::{ - E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait, - MeteredExecutor, StaticProgramError, VmExecState, - }, - system::memory::online::GuestMemory, -}; +use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; use openvm_circuit_primitives_derive::AlignedBytesBorrow; use openvm_instructions::{ instruction::Instruction, @@ -75,6 +69,21 @@ where self.pre_compute_impl(pc, inst, pre_compute)?; Ok(execute_e1_impl) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let pre_compute: &mut MultiPreCompute = data.borrow_mut(); + self.pre_compute_impl(pc, inst, pre_compute)?; + Ok(execute_e1_tco_handler) + } } impl MeteredExecutor @@ -121,6 +130,7 @@ unsafe fn execute_e12_impl( vm_state.instret += 1; } +#[create_tco_handler] unsafe fn execute_e1_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/rv32im/circuit/src/mulh/execution.rs b/extensions/rv32im/circuit/src/mulh/execution.rs index 1818a63080..d79815745e 100644 --- a/extensions/rv32im/circuit/src/mulh/execution.rs +++ b/extensions/rv32im/circuit/src/mulh/execution.rs @@ -3,13 +3,7 @@ use std::{ mem::size_of, }; -use openvm_circuit::{ - arch::{ - E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait, - MeteredExecutor, StaticProgramError, VmExecState, - }, - system::memory::online::GuestMemory, -}; +use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; use openvm_circuit_primitives_derive::AlignedBytesBorrow; use openvm_instructions::{ instruction::Instruction, @@ -74,6 +68,26 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn handler( + &self, + _pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let pre_compute: &mut MulHPreCompute = data.borrow_mut(); + let local_opcode = self.pre_compute_impl(inst, pre_compute)?; + let fn_ptr = match local_opcode { + MulHOpcode::MULH => execute_e1_tco_handler::<_, _, MulHOp>, + MulHOpcode::MULHSU => execute_e1_tco_handler::<_, _, MulHSuOp>, + MulHOpcode::MULHU => execute_e1_tco_handler::<_, _, MulHUOp>, + }; + Ok(fn_ptr) + } } impl MeteredExecutor @@ -123,6 +137,7 @@ unsafe fn execute_e12_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/rv32im/circuit/src/shift/execution.rs b/extensions/rv32im/circuit/src/shift/execution.rs index b756f8b768..862c8ec71f 100644 --- a/extensions/rv32im/circuit/src/shift/execution.rs +++ b/extensions/rv32im/circuit/src/shift/execution.rs @@ -3,13 +3,7 @@ use std::{ mem::size_of, }; -use openvm_circuit::{ - arch::{ - E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait, - MeteredExecutor, StaticProgramError, VmExecState, - }, - system::memory::online::GuestMemory, -}; +use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; use openvm_circuit_primitives_derive::AlignedBytesBorrow; use openvm_instructions::{ instruction::Instruction, @@ -94,6 +88,30 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let data: &mut ShiftPreCompute = data.borrow_mut(); + let (is_imm, shift_opcode) = self.pre_compute_impl(pc, inst, data)?; + // `d` is always expected to be RV32_REGISTER_AS. + let fn_ptr = match (is_imm, shift_opcode) { + (true, ShiftOpcode::SLL) => execute_e1_tco_handler::<_, _, true, SllOp>, + (false, ShiftOpcode::SLL) => execute_e1_tco_handler::<_, _, false, SllOp>, + (true, ShiftOpcode::SRL) => execute_e1_tco_handler::<_, _, true, SrlOp>, + (false, ShiftOpcode::SRL) => execute_e1_tco_handler::<_, _, false, SrlOp>, + (true, ShiftOpcode::SRA) => execute_e1_tco_handler::<_, _, true, SraOp>, + (false, ShiftOpcode::SRA) => execute_e1_tco_handler::<_, _, false, SraOp>, + }; + Ok(fn_ptr) + } } impl MeteredExecutor @@ -155,6 +173,7 @@ unsafe fn execute_e12_impl< state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP); } +#[create_tco_handler] unsafe fn execute_e1_impl< F: PrimeField32, CTX: ExecutionCtxTrait, diff --git a/extensions/rv32im/tests/Cargo.toml b/extensions/rv32im/tests/Cargo.toml index 45eb4c1654..372538f791 100644 --- a/extensions/rv32im/tests/Cargo.toml +++ b/extensions/rv32im/tests/Cargo.toml @@ -23,5 +23,6 @@ serde = { workspace = true, features = ["alloc"] } strum.workspace = true [features] -default = ["parallel"] +default = ["parallel", "tco"] parallel = ["openvm-circuit/parallel"] +tco = ["openvm-rv32im-circuit/tco"] From 0c935304d42acaff47df0ab81fb611440dd24539 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Tue, 19 Aug 2025 23:37:54 -0700 Subject: [PATCH 04/36] fmt --- extensions/rv32im/circuit/src/load_sign_extend/execution.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/extensions/rv32im/circuit/src/load_sign_extend/execution.rs b/extensions/rv32im/circuit/src/load_sign_extend/execution.rs index 0fcff81fe2..5521698a6a 100644 --- a/extensions/rv32im/circuit/src/load_sign_extend/execution.rs +++ b/extensions/rv32im/circuit/src/load_sign_extend/execution.rs @@ -4,7 +4,10 @@ use std::{ mem::size_of, }; -use openvm_circuit::{arch::*, system::memory::{online::GuestMemory, POINTER_MAX_BITS}}; +use openvm_circuit::{ + arch::*, + system::memory::{online::GuestMemory, POINTER_MAX_BITS}, +}; use openvm_circuit_primitives_derive::AlignedBytesBorrow; use openvm_instructions::{ instruction::Instruction, From f7fd1d79f1b57b9463b043ce5f60ef03b3913fe3 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 10:46:38 -0700 Subject: [PATCH 05/36] feat: tco for other extensions --- extensions/algebra/circuit/Cargo.toml | 5 + extensions/algebra/circuit/src/execution.rs | 94 +++++++++++++++++- extensions/algebra/circuit/src/lib.rs | 4 + .../algebra/circuit/src/modular_chip/is_eq.rs | 34 ++++++- extensions/bigint/circuit/Cargo.toml | 3 +- extensions/bigint/circuit/src/base_alu.rs | 23 +++++ extensions/bigint/circuit/src/branch_eq.rs | 20 ++++ extensions/bigint/circuit/src/branch_lt.rs | 22 +++++ extensions/bigint/circuit/src/less_than.rs | 20 ++++ extensions/bigint/circuit/src/lib.rs | 3 + extensions/bigint/circuit/src/mult.rs | 16 +++ extensions/bigint/circuit/src/shift.rs | 21 ++++ extensions/ecc/circuit/Cargo.toml | 5 + extensions/ecc/circuit/src/lib.rs | 4 + .../src/weierstrass_chip/add_ne/execution.rs | 98 ++++++++++++++++++- .../src/weierstrass_chip/double/execution.rs | 98 ++++++++++++++++++- extensions/keccak256/circuit/Cargo.toml | 3 +- extensions/keccak256/circuit/src/execution.rs | 21 +++- extensions/keccak256/circuit/src/lib.rs | 3 + extensions/native/circuit/Cargo.toml | 4 +- .../native/circuit/src/branch_eq/execution.rs | 30 +++++- .../native/circuit/src/castf/execution.rs | 22 ++++- .../circuit/src/field_arithmetic/execution.rs | 70 ++++++++++++- .../circuit/src/field_extension/execution.rs | 28 +++++- .../native/circuit/src/fri/execution.rs | 21 +++- extensions/native/circuit/src/lib.rs | 4 + extensions/pairing/circuit/Cargo.toml | 5 + extensions/pairing/circuit/src/lib.rs | 4 + extensions/sha256/circuit/Cargo.toml | 3 +- extensions/sha256/circuit/src/lib.rs | 4 + .../circuit/src/sha256_chip/execution.rs | 18 +++- 31 files changed, 683 insertions(+), 27 deletions(-) diff --git a/extensions/algebra/circuit/Cargo.toml b/extensions/algebra/circuit/Cargo.toml index 7d0eb389e6..0c2c7a0500 100644 --- a/extensions/algebra/circuit/Cargo.toml +++ b/extensions/algebra/circuit/Cargo.toml @@ -7,6 +7,11 @@ edition.workspace = true homepage.workspace = true repository.workspace = true +[features] +default = ["jemalloc", "tco"] +tco = ["openvm-circuit/tco"] +jemalloc = ["openvm-circuit/jemalloc"] + [dependencies] openvm-circuit-primitives = { workspace = true } openvm-circuit-primitives-derive = { workspace = true } diff --git a/extensions/algebra/circuit/src/execution.rs b/extensions/algebra/circuit/src/execution.rs index a99c4ba37b..dbfc794621 100644 --- a/extensions/algebra/circuit/src/execution.rs +++ b/extensions/algebra/circuit/src/execution.rs @@ -5,10 +5,7 @@ use std::{ use num_bigint::BigUint; use openvm_algebra_transpiler::{Fp2Opcode, Rv32ModularArithmeticOpcode}; -use openvm_circuit::{ - arch::*, - system::memory::{online::GuestMemory, POINTER_MAX_BITS}, -}; +use openvm_circuit::arch::*; use openvm_circuit_primitives::AlignedBytesBorrow; use openvm_instructions::{ instruction::Instruction, @@ -177,6 +174,94 @@ impl<'a, const BLOCKS: usize, const BLOCK_SIZE: usize, const IS_FP2: bool> impl Executor for FieldExprVecHeapExecutor { + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let pre_compute: &mut FieldExpressionPreCompute = data.borrow_mut(); + + let op = self.pre_compute_impl(pc, inst, pre_compute)?; + + if let Some(op) = op { + let modulus = &pre_compute.expr.prime; + if IS_FP2 { + if let Some(field_type) = get_fp2_field_type(modulus) { + generate_fp2_dispatch!( + field_type, + op, + BLOCKS, + BLOCK_SIZE, + execute_e1_tco_handler, + [ + (BN254Coordinate, Add), + (BN254Coordinate, Sub), + (BN254Coordinate, Mul), + (BN254Coordinate, Div), + (BLS12_381Coordinate, Add), + (BLS12_381Coordinate, Sub), + (BLS12_381Coordinate, Mul), + (BLS12_381Coordinate, Div), + ] + ) + } else { + Ok(execute_e1_generic_tco_handler::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) + } + } else if let Some(field_type) = get_field_type(modulus) { + generate_field_dispatch!( + field_type, + op, + BLOCKS, + BLOCK_SIZE, + execute_e1_tco_handler, + [ + (K256Coordinate, Add), + (K256Coordinate, Sub), + (K256Coordinate, Mul), + (K256Coordinate, Div), + (K256Scalar, Add), + (K256Scalar, Sub), + (K256Scalar, Mul), + (K256Scalar, Div), + (P256Coordinate, Add), + (P256Coordinate, Sub), + (P256Coordinate, Mul), + (P256Coordinate, Div), + (P256Scalar, Add), + (P256Scalar, Sub), + (P256Scalar, Mul), + (P256Scalar, Div), + (BN254Coordinate, Add), + (BN254Coordinate, Sub), + (BN254Coordinate, Mul), + (BN254Coordinate, Div), + (BN254Scalar, Add), + (BN254Scalar, Sub), + (BN254Scalar, Mul), + (BN254Scalar, Div), + (BLS12_381Coordinate, Add), + (BLS12_381Coordinate, Sub), + (BLS12_381Coordinate, Mul), + (BLS12_381Coordinate, Div), + (BLS12_381Scalar, Add), + (BLS12_381Scalar, Sub), + (BLS12_381Scalar, Mul), + (BLS12_381Scalar, Div), + ] + ) + } else { + Ok(execute_e1_generic_tco_handler::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) + } + } else { + Ok(execute_e1_setup_tco_handler::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) + } + } + #[inline(always)] fn pre_compute_size(&self) -> usize { std::mem::size_of::() @@ -527,6 +612,7 @@ unsafe fn execute_e2_setup_impl< execute_e12_setup_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>(&pre_compute.data, vm_state); } +#[create_tco_handler] unsafe fn execute_e1_impl< F: PrimeField32, CTX: ExecutionCtxTrait, diff --git a/extensions/algebra/circuit/src/lib.rs b/extensions/algebra/circuit/src/lib.rs index b4e494c812..8edd6108d0 100644 --- a/extensions/algebra/circuit/src/lib.rs +++ b/extensions/algebra/circuit/src/lib.rs @@ -1,3 +1,7 @@ +#![cfg_attr(feature = "tco", allow(incomplete_features))] +#![cfg_attr(feature = "tco", feature(likely_unlikely))] +#![cfg_attr(feature = "tco", feature(explicit_tail_calls))] + use derive_more::derive::{Deref, DerefMut}; use openvm_circuit_derive::PreflightExecutor; use openvm_mod_circuit_builder::FieldExpressionExecutor; diff --git a/extensions/algebra/circuit/src/modular_chip/is_eq.rs b/extensions/algebra/circuit/src/modular_chip/is_eq.rs index 348fb22bd6..70b59ef7d4 100644 --- a/extensions/algebra/circuit/src/modular_chip/is_eq.rs +++ b/extensions/algebra/circuit/src/modular_chip/is_eq.rs @@ -7,10 +7,7 @@ use num_bigint::BigUint; use openvm_algebra_transpiler::Rv32ModularArithmeticOpcode; use openvm_circuit::{ arch::*, - system::memory::{ - online::{GuestMemory, TracingMemory}, - MemoryAuxColsFactory, POINTER_MAX_BITS, - }, + system::memory::{online::TracingMemory, MemoryAuxColsFactory}, }; use openvm_circuit_primitives::{ bigint::utils::big_uint_to_limbs, @@ -550,6 +547,34 @@ where Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn handler( + &self, + _opcode: u32, + _instruction: &Instruction, + _data: &mut [u8], + ) -> Result< + for<'a, 'b, 'c> unsafe fn( + &'a InterpretedInstance<'b, F, Ctx>, + &'c mut VmExecState, + ) -> Result<(), ExecutionError>, + StaticProgramError, + > + where + Ctx: ExecutionCtxTrait, + { + let pre_compute: &mut ModularIsEqualPreCompute = data.borrow_mut(); + + let is_setup = self.pre_compute_impl(pc, inst, pre_compute)?; + let fn_ptr = if is_setup { + execute_e1_tco_handler::<_, _, NUM_LANES, LANE_SIZE, TOTAL_READ_SIZE, true> + } else { + execute_e1_tco_handler::<_, _, NUM_LANES, LANE_SIZE, TOTAL_READ_SIZE, false> + }; + + Ok(fn_ptr) + } } impl @@ -584,6 +609,7 @@ where } } +#[create_tco_handler] unsafe fn execute_e1_impl< F: PrimeField32, CTX: ExecutionCtxTrait, diff --git a/extensions/bigint/circuit/Cargo.toml b/extensions/bigint/circuit/Cargo.toml index aa9114c34a..6816e2c612 100644 --- a/extensions/bigint/circuit/Cargo.toml +++ b/extensions/bigint/circuit/Cargo.toml @@ -33,9 +33,10 @@ test-case.workspace = true alloy-primitives = { version = "1.2.1" } [features] -default = ["parallel", "jemalloc"] +default = ["parallel", "jemalloc", "tco"] parallel = ["openvm-circuit/parallel"] test-utils = ["openvm-circuit/test-utils"] +tco = ["openvm-circuit/tco"] # performance features: mimalloc = ["openvm-circuit/mimalloc"] jemalloc = ["openvm-circuit/jemalloc"] diff --git a/extensions/bigint/circuit/src/base_alu.rs b/extensions/bigint/circuit/src/base_alu.rs index 6444f601e1..3379ec0df6 100644 --- a/extensions/bigint/circuit/src/base_alu.rs +++ b/extensions/bigint/circuit/src/base_alu.rs @@ -59,6 +59,28 @@ impl Executor for Rv32BaseAlu256Executor { }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let data: &mut BaseAluPreCompute = data.borrow_mut(); + let local_opcode = self.pre_compute_impl(pc, inst, data)?; + let fn_ptr = match local_opcode { + BaseAluOpcode::ADD => execute_e1_tco_handler::<_, _, AddOp>, + BaseAluOpcode::SUB => execute_e1_tco_handler::<_, _, SubOp>, + BaseAluOpcode::XOR => execute_e1_tco_handler::<_, _, XorOp>, + BaseAluOpcode::OR => execute_e1_tco_handler::<_, _, OrOp>, + BaseAluOpcode::AND => execute_e1_tco_handler::<_, _, AndOp>, + }; + Ok(fn_ptr) + } } impl MeteredExecutor for Rv32BaseAlu256Executor { @@ -106,6 +128,7 @@ unsafe fn execute_e12_impl( vm_state.instret += 1; } +#[create_tco_handler] unsafe fn execute_e1_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/bigint/circuit/src/branch_eq.rs b/extensions/bigint/circuit/src/branch_eq.rs index eab11ae362..2fdd5472b2 100644 --- a/extensions/bigint/circuit/src/branch_eq.rs +++ b/extensions/bigint/circuit/src/branch_eq.rs @@ -54,6 +54,25 @@ impl Executor for Rv32BranchEqual256Executor { }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let data: &mut BranchEqPreCompute = data.borrow_mut(); + let local_opcode = self.pre_compute_impl(pc, inst, data)?; + let fn_ptr = match local_opcode { + BranchEqualOpcode::BEQ => execute_e1_tco_handler::<_, _, false>, + BranchEqualOpcode::BNE => execute_e1_tco_handler::<_, _, true>, + }; + Ok(fn_ptr) + } } impl MeteredExecutor for Rv32BranchEqual256Executor { @@ -101,6 +120,7 @@ unsafe fn execute_e12_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/bigint/circuit/src/branch_lt.rs b/extensions/bigint/circuit/src/branch_lt.rs index 7a701d4812..771183d706 100644 --- a/extensions/bigint/circuit/src/branch_lt.rs +++ b/extensions/bigint/circuit/src/branch_lt.rs @@ -59,6 +59,27 @@ impl Executor for Rv32BranchLessThan256Executor { }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let data: &mut BranchLtPreCompute = data.borrow_mut(); + let local_opcode = self.pre_compute_impl(pc, inst, data)?; + let fn_ptr = match local_opcode { + BranchLessThanOpcode::BLT => execute_e1_tco_handler::<_, _, BltOp>, + BranchLessThanOpcode::BLTU => execute_e1_tco_handler::<_, _, BltuOp>, + BranchLessThanOpcode::BGE => execute_e1_tco_handler::<_, _, BgeOp>, + BranchLessThanOpcode::BGEU => execute_e1_tco_handler::<_, _, BgeuOp>, + }; + Ok(fn_ptr) + } } impl MeteredExecutor for Rv32BranchLessThan256Executor { @@ -107,6 +128,7 @@ unsafe fn execute_e12_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/bigint/circuit/src/less_than.rs b/extensions/bigint/circuit/src/less_than.rs index e153a6221e..554039311d 100644 --- a/extensions/bigint/circuit/src/less_than.rs +++ b/extensions/bigint/circuit/src/less_than.rs @@ -54,6 +54,25 @@ impl Executor for Rv32LessThan256Executor { }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let data: &mut LessThanPreCompute = data.borrow_mut(); + let local_opcode = self.pre_compute_impl(pc, inst, data)?; + let fn_ptr = match local_opcode { + LessThanOpcode::SLT => execute_e1_tco_handler::<_, _, false>, + LessThanOpcode::SLTU => execute_e1_tco_handler::<_, _, true>, + }; + Ok(fn_ptr) + } } impl MeteredExecutor for Rv32LessThan256Executor { @@ -105,6 +124,7 @@ unsafe fn execute_e12_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/bigint/circuit/src/lib.rs b/extensions/bigint/circuit/src/lib.rs index 0dd5a5b4d4..48a55a072b 100644 --- a/extensions/bigint/circuit/src/lib.rs +++ b/extensions/bigint/circuit/src/lib.rs @@ -1,3 +1,6 @@ +#![cfg_attr(feature = "tco", allow(incomplete_features))] +#![cfg_attr(feature = "tco", feature(likely_unlikely))] +#![cfg_attr(feature = "tco", feature(explicit_tail_calls))] use openvm_circuit::{ self, arch::{ diff --git a/extensions/bigint/circuit/src/mult.rs b/extensions/bigint/circuit/src/mult.rs index 566c049264..b34762fb26 100644 --- a/extensions/bigint/circuit/src/mult.rs +++ b/extensions/bigint/circuit/src/mult.rs @@ -50,6 +50,21 @@ impl Executor for Rv32Multiplication256Executor { self.pre_compute_impl(pc, inst, data)?; Ok(execute_e1_impl) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let data: &mut MultPreCompute = data.borrow_mut(); + self.pre_compute_impl(pc, inst, data)?; + Ok(execute_e1_tco_handler) + } } impl MeteredExecutor for Rv32Multiplication256Executor { @@ -91,6 +106,7 @@ unsafe fn execute_e12_impl( vm_state.instret += 1; } +#[create_tco_handler] unsafe fn execute_e1_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/bigint/circuit/src/shift.rs b/extensions/bigint/circuit/src/shift.rs index 5b033ce832..aacc44c021 100644 --- a/extensions/bigint/circuit/src/shift.rs +++ b/extensions/bigint/circuit/src/shift.rs @@ -55,6 +55,26 @@ impl Executor for Rv32Shift256Executor { }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let data: &mut ShiftPreCompute = data.borrow_mut(); + let local_opcode = self.pre_compute_impl(pc, inst, data)?; + let fn_ptr = match local_opcode { + ShiftOpcode::SLL => execute_e1_tco_handler::<_, _, SllOp>, + ShiftOpcode::SRA => execute_e1_tco_handler::<_, _, SraOp>, + ShiftOpcode::SRL => execute_e1_tco_handler::<_, _, SrlOp>, + }; + Ok(fn_ptr) + } } impl MeteredExecutor for Rv32Shift256Executor { @@ -100,6 +120,7 @@ unsafe fn execute_e12_impl vm_state.instret += 1; } +#[create_tco_handler] unsafe fn execute_e1_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/ecc/circuit/Cargo.toml b/extensions/ecc/circuit/Cargo.toml index a194b5ac5a..eb2b37ce9b 100644 --- a/extensions/ecc/circuit/Cargo.toml +++ b/extensions/ecc/circuit/Cargo.toml @@ -7,6 +7,11 @@ edition.workspace = true homepage.workspace = true repository.workspace = true +[features] +default = ["jemalloc", "tco"] +tco = ["openvm-circuit/tco"] +jemalloc = ["openvm-circuit/jemalloc"] + [dependencies] openvm-circuit-primitives = { workspace = true } openvm-circuit-derive = { workspace = true } diff --git a/extensions/ecc/circuit/src/lib.rs b/extensions/ecc/circuit/src/lib.rs index 9986dca696..4789d09187 100644 --- a/extensions/ecc/circuit/src/lib.rs +++ b/extensions/ecc/circuit/src/lib.rs @@ -1,3 +1,7 @@ +#![cfg_attr(feature = "tco", allow(incomplete_features))] +#![cfg_attr(feature = "tco", feature(likely_unlikely))] +#![cfg_attr(feature = "tco", feature(explicit_tail_calls))] + mod weierstrass_chip; pub use weierstrass_chip::*; diff --git a/extensions/ecc/circuit/src/weierstrass_chip/add_ne/execution.rs b/extensions/ecc/circuit/src/weierstrass_chip/add_ne/execution.rs index 5c63a05c41..b3dec5c01e 100644 --- a/extensions/ecc/circuit/src/weierstrass_chip/add_ne/execution.rs +++ b/extensions/ecc/circuit/src/weierstrass_chip/add_ne/execution.rs @@ -5,10 +5,7 @@ use std::{ use num_bigint::BigUint; use openvm_algebra_circuit::fields::{get_field_type, FieldType}; -use openvm_circuit::{ - arch::*, - system::memory::{online::GuestMemory, POINTER_MAX_BITS}, -}; +use openvm_circuit::arch::*; use openvm_circuit_primitives::AlignedBytesBorrow; use openvm_ecc_transpiler::Rv32WeierstrassOpcode; use openvm_instructions::{ @@ -95,6 +92,98 @@ impl<'a, const BLOCKS: usize, const BLOCK_SIZE: usize> EcAddNeExecutor Executor for EcAddNeExecutor { + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let pre_compute: &mut EcAddNePreCompute = data.borrow_mut(); + + let is_setup = self.pre_compute_impl(pc, inst, pre_compute)?; + + if let Some(field_type) = { + let modulus = &pre_compute.expr.builder.prime; + get_field_type(modulus) + } { + match (is_setup, field_type) { + (true, FieldType::K256Coordinate) => Ok(execute_e1_tco_handler::< + _, + _, + BLOCKS, + BLOCK_SIZE, + { FieldType::K256Coordinate as u8 }, + true, + >), + (true, FieldType::P256Coordinate) => Ok(execute_e1_tco_handler::< + _, + _, + BLOCKS, + BLOCK_SIZE, + { FieldType::P256Coordinate as u8 }, + true, + >), + (true, FieldType::BN254Coordinate) => Ok(execute_e1_tco_handler::< + _, + _, + BLOCKS, + BLOCK_SIZE, + { FieldType::BN254Coordinate as u8 }, + true, + >), + (true, FieldType::BLS12_381Coordinate) => Ok(execute_e1_tco_handler::< + _, + _, + BLOCKS, + BLOCK_SIZE, + { FieldType::BLS12_381Coordinate as u8 }, + true, + >), + (false, FieldType::K256Coordinate) => Ok(execute_e1_tco_handler::< + _, + _, + BLOCKS, + BLOCK_SIZE, + { FieldType::K256Coordinate as u8 }, + false, + >), + (false, FieldType::P256Coordinate) => Ok(execute_e1_tco_handler::< + _, + _, + BLOCKS, + BLOCK_SIZE, + { FieldType::P256Coordinate as u8 }, + false, + >), + (false, FieldType::BN254Coordinate) => Ok(execute_e1_tco_handler::< + _, + _, + BLOCKS, + BLOCK_SIZE, + { FieldType::BN254Coordinate as u8 }, + false, + >), + (false, FieldType::BLS12_381Coordinate) => Ok(execute_e1_tco_handler::< + _, + _, + BLOCKS, + BLOCK_SIZE, + { FieldType::BLS12_381Coordinate as u8 }, + false, + >), + _ => panic!("Unsupported field type"), + } + } else if is_setup { + Ok(execute_e1_tco_handler::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, true>) + } else { + Ok(execute_e1_tco_handler::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, false>) + } + } + #[inline(always)] fn pre_compute_size(&self) -> usize { std::mem::size_of::() @@ -351,6 +440,7 @@ unsafe fn execute_e12_impl< vm_state.instret += 1; } +#[create_tco_handler] unsafe fn execute_e1_impl< F: PrimeField32, CTX: ExecutionCtxTrait, diff --git a/extensions/ecc/circuit/src/weierstrass_chip/double/execution.rs b/extensions/ecc/circuit/src/weierstrass_chip/double/execution.rs index 8e755aa6f7..abc5c47990 100644 --- a/extensions/ecc/circuit/src/weierstrass_chip/double/execution.rs +++ b/extensions/ecc/circuit/src/weierstrass_chip/double/execution.rs @@ -4,10 +4,7 @@ use std::{ }; use num_bigint::BigUint; -use openvm_circuit::{ - arch::*, - system::memory::{online::GuestMemory, POINTER_MAX_BITS}, -}; +use openvm_circuit::arch::*; use openvm_circuit_primitives::AlignedBytesBorrow; use openvm_ecc_transpiler::Rv32WeierstrassOpcode; use openvm_instructions::{ @@ -87,6 +84,98 @@ impl<'a, const BLOCKS: usize, const BLOCK_SIZE: usize> EcDoubleExecutor Executor for EcDoubleExecutor { + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let pre_compute: &mut EcDoublePreCompute = data.borrow_mut(); + + let is_setup = self.pre_compute_impl(pc, inst, pre_compute)?; + + if let Some(curve_type) = { + let modulus = &pre_compute.expr.builder.prime; + let a_coeff = &pre_compute.expr.setup_values[0]; + get_curve_type(modulus, a_coeff) + } { + match (is_setup, curve_type) { + (true, CurveType::K256) => Ok(execute_e1_tco_handler::< + _, + _, + BLOCKS, + BLOCK_SIZE, + { CurveType::K256 as u8 }, + true, + >), + (true, CurveType::P256) => Ok(execute_e1_tco_handler::< + _, + _, + BLOCKS, + BLOCK_SIZE, + { CurveType::P256 as u8 }, + true, + >), + (true, CurveType::BN254) => Ok(execute_e1_tco_handler::< + _, + _, + BLOCKS, + BLOCK_SIZE, + { CurveType::BN254 as u8 }, + true, + >), + (true, CurveType::BLS12_381) => Ok(execute_e1_tco_handler::< + _, + _, + BLOCKS, + BLOCK_SIZE, + { CurveType::BLS12_381 as u8 }, + true, + >), + (false, CurveType::K256) => Ok(execute_e1_tco_handler::< + _, + _, + BLOCKS, + BLOCK_SIZE, + { CurveType::K256 as u8 }, + false, + >), + (false, CurveType::P256) => Ok(execute_e1_tco_handler::< + _, + _, + BLOCKS, + BLOCK_SIZE, + { CurveType::P256 as u8 }, + false, + >), + (false, CurveType::BN254) => Ok(execute_e1_tco_handler::< + _, + _, + BLOCKS, + BLOCK_SIZE, + { CurveType::BN254 as u8 }, + false, + >), + (false, CurveType::BLS12_381) => Ok(execute_e1_tco_handler::< + _, + _, + BLOCKS, + BLOCK_SIZE, + { CurveType::BLS12_381 as u8 }, + false, + >), + } + } else if is_setup { + Ok(execute_e1_tco_handler::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, true>) + } else { + Ok(execute_e1_tco_handler::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, false>) + } + } + #[inline(always)] fn pre_compute_size(&self) -> usize { std::mem::size_of::() @@ -318,6 +407,7 @@ unsafe fn execute_e12_impl< vm_state.instret += 1; } +#[create_tco_handler] unsafe fn execute_e1_impl< F: PrimeField32, CTX: ExecutionCtxTrait, diff --git a/extensions/keccak256/circuit/Cargo.toml b/extensions/keccak256/circuit/Cargo.toml index 2299a0599a..f50f7c1c07 100644 --- a/extensions/keccak256/circuit/Cargo.toml +++ b/extensions/keccak256/circuit/Cargo.toml @@ -34,9 +34,10 @@ openvm-circuit = { workspace = true, features = ["test-utils"] } hex.workspace = true [features] -default = ["parallel", "jemalloc"] +default = ["parallel", "jemalloc", "tco"] parallel = ["openvm-circuit/parallel"] test-utils = ["openvm-circuit/test-utils"] +tco = ["openvm-circuit/tco"] # performance features: mimalloc = ["openvm-circuit/mimalloc"] jemalloc = ["openvm-circuit/jemalloc"] diff --git a/extensions/keccak256/circuit/src/execution.rs b/extensions/keccak256/circuit/src/execution.rs index b095fec4c4..20772fd6de 100644 --- a/extensions/keccak256/circuit/src/execution.rs +++ b/extensions/keccak256/circuit/src/execution.rs @@ -1,4 +1,7 @@ -use std::borrow::{Borrow, BorrowMut}; +use std::{ + borrow::{Borrow, BorrowMut}, + mem::size_of, +}; use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; use openvm_circuit_primitives_derive::AlignedBytesBorrow; @@ -71,6 +74,21 @@ impl Executor for KeccakVmExecutor { self.pre_compute_impl(pc, inst, data)?; Ok(execute_e1_impl::<_, _>) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let data: &mut KeccakPreCompute = data.borrow_mut(); + self.pre_compute_impl(pc, inst, data)?; + Ok(execute_e1_tco_handler) + } } impl MeteredExecutor for KeccakVmExecutor { @@ -134,6 +152,7 @@ unsafe fn execute_e12_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/keccak256/circuit/src/lib.rs b/extensions/keccak256/circuit/src/lib.rs index 13bd7b27db..c2ffb1a1e0 100644 --- a/extensions/keccak256/circuit/src/lib.rs +++ b/extensions/keccak256/circuit/src/lib.rs @@ -1,3 +1,6 @@ +#![cfg_attr(feature = "tco", allow(incomplete_features))] +#![cfg_attr(feature = "tco", feature(likely_unlikely))] +#![cfg_attr(feature = "tco", feature(explicit_tail_calls))] //! Stateful keccak256 hasher. Handles full keccak sponge (padding, absorb, keccak-f) on //! variable length inputs read from VM memory. diff --git a/extensions/native/circuit/Cargo.toml b/extensions/native/circuit/Cargo.toml index f9b9bd78c5..75856e110c 100644 --- a/extensions/native/circuit/Cargo.toml +++ b/extensions/native/circuit/Cargo.toml @@ -40,6 +40,8 @@ test-case = { workspace = true } test-log = { workspace = true } [features] -default = ["parallel"] +default = ["parallel", "jemalloc", "tco"] +tco = ["openvm-circuit/tco"] +jemalloc = ["openvm-circuit/jemalloc"] parallel = ["openvm-circuit/parallel"] test-utils = ["openvm-circuit/test-utils"] diff --git a/extensions/native/circuit/src/branch_eq/execution.rs b/extensions/native/circuit/src/branch_eq/execution.rs index bbd8051214..0e82060ac1 100644 --- a/extensions/native/circuit/src/branch_eq/execution.rs +++ b/extensions/native/circuit/src/branch_eq/execution.rs @@ -2,7 +2,6 @@ use std::borrow::{Borrow, BorrowMut}; use openvm_circuit::{ arch::*, - system::memory::online::GuestMemory, utils::{transmute_field_to_u32, transmute_u32_to_field}, }; use openvm_circuit_primitives::AlignedBytesBorrow; @@ -79,6 +78,34 @@ impl Executor for NativeBranchEqualExecutor where F: PrimeField32, { + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let pre_compute: &mut NativeBranchEqualPreCompute = data.borrow_mut(); + + let (a_is_imm, b_is_imm, is_bne) = self.pre_compute_impl(pc, inst, pre_compute)?; + + let fn_ptr = match (a_is_imm, b_is_imm, is_bne) { + (true, true, true) => execute_e1_tco_handler::<_, _, true, true, true>, + (true, true, false) => execute_e1_tco_handler::<_, _, true, true, false>, + (true, false, true) => execute_e1_tco_handler::<_, _, true, false, true>, + (true, false, false) => execute_e1_tco_handler::<_, _, true, false, false>, + (false, true, true) => execute_e1_tco_handler::<_, _, false, true, true>, + (false, true, false) => execute_e1_tco_handler::<_, _, false, true, false>, + (false, false, true) => execute_e1_tco_handler::<_, _, false, false, true>, + (false, false, false) => execute_e1_tco_handler::<_, _, false, false, false>, + }; + + Ok(fn_ptr) + } + #[inline(always)] fn pre_compute_size(&self) -> usize { size_of::() @@ -177,6 +204,7 @@ unsafe fn execute_e12_impl< vm_state.instret += 1; } +#[create_tco_handler] unsafe fn execute_e1_impl< F: PrimeField32, CTX: ExecutionCtxTrait, diff --git a/extensions/native/circuit/src/castf/execution.rs b/extensions/native/circuit/src/castf/execution.rs index b477620e4a..cb67debd43 100644 --- a/extensions/native/circuit/src/castf/execution.rs +++ b/extensions/native/circuit/src/castf/execution.rs @@ -1,6 +1,6 @@ use std::borrow::{Borrow, BorrowMut}; -use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; +use openvm_circuit::arch::*; use openvm_circuit_primitives::AlignedBytesBorrow; use openvm_instructions::{ instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_MEMORY_AS, LocalOpcode, @@ -52,6 +52,25 @@ impl Executor for CastFCoreExecutor where F: PrimeField32, { + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let pre_compute: &mut CastFPreCompute = data.borrow_mut(); + + self.pre_compute_impl(pc, inst, pre_compute)?; + + let fn_ptr = execute_e1_tco_handler::<_, _>; + + Ok(fn_ptr) + } + #[inline(always)] fn pre_compute_size(&self) -> usize { size_of::() @@ -102,6 +121,7 @@ where } } +#[create_tco_handler] unsafe fn execute_e1_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/native/circuit/src/field_arithmetic/execution.rs b/extensions/native/circuit/src/field_arithmetic/execution.rs index cac0770181..1100ca01c6 100644 --- a/extensions/native/circuit/src/field_arithmetic/execution.rs +++ b/extensions/native/circuit/src/field_arithmetic/execution.rs @@ -2,7 +2,6 @@ use std::borrow::{Borrow, BorrowMut}; use openvm_circuit::{ arch::*, - system::memory::online::GuestMemory, utils::{transmute_field_to_u32, transmute_u32_to_field}, }; use openvm_circuit_primitives::AlignedBytesBorrow; @@ -80,6 +79,74 @@ impl Executor for FieldArithmeticCoreExecutor where F: PrimeField32, { + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let pre_compute: &mut FieldArithmeticPreCompute = data.borrow_mut(); + + let (a_is_imm, b_is_imm, local_opcode) = self.pre_compute_impl(pc, inst, pre_compute)?; + + let fn_ptr = match (local_opcode, a_is_imm, b_is_imm) { + (FieldArithmeticOpcode::ADD, true, true) => { + execute_e1_tco_handler::<_, _, true, true, { FieldArithmeticOpcode::ADD as u8 }> + } + (FieldArithmeticOpcode::ADD, true, false) => { + execute_e1_tco_handler::<_, _, true, false, { FieldArithmeticOpcode::ADD as u8 }> + } + (FieldArithmeticOpcode::ADD, false, true) => { + execute_e1_tco_handler::<_, _, false, true, { FieldArithmeticOpcode::ADD as u8 }> + } + (FieldArithmeticOpcode::ADD, false, false) => { + execute_e1_tco_handler::<_, _, false, false, { FieldArithmeticOpcode::ADD as u8 }> + } + (FieldArithmeticOpcode::SUB, true, true) => { + execute_e1_tco_handler::<_, _, true, true, { FieldArithmeticOpcode::SUB as u8 }> + } + (FieldArithmeticOpcode::SUB, true, false) => { + execute_e1_tco_handler::<_, _, true, false, { FieldArithmeticOpcode::SUB as u8 }> + } + (FieldArithmeticOpcode::SUB, false, true) => { + execute_e1_tco_handler::<_, _, false, true, { FieldArithmeticOpcode::SUB as u8 }> + } + (FieldArithmeticOpcode::SUB, false, false) => { + execute_e1_tco_handler::<_, _, false, false, { FieldArithmeticOpcode::SUB as u8 }> + } + (FieldArithmeticOpcode::MUL, true, true) => { + execute_e1_tco_handler::<_, _, true, true, { FieldArithmeticOpcode::MUL as u8 }> + } + (FieldArithmeticOpcode::MUL, true, false) => { + execute_e1_tco_handler::<_, _, true, false, { FieldArithmeticOpcode::MUL as u8 }> + } + (FieldArithmeticOpcode::MUL, false, true) => { + execute_e1_tco_handler::<_, _, false, true, { FieldArithmeticOpcode::MUL as u8 }> + } + (FieldArithmeticOpcode::MUL, false, false) => { + execute_e1_tco_handler::<_, _, false, false, { FieldArithmeticOpcode::MUL as u8 }> + } + (FieldArithmeticOpcode::DIV, true, true) => { + execute_e1_tco_handler::<_, _, true, true, { FieldArithmeticOpcode::DIV as u8 }> + } + (FieldArithmeticOpcode::DIV, true, false) => { + execute_e1_tco_handler::<_, _, true, false, { FieldArithmeticOpcode::DIV as u8 }> + } + (FieldArithmeticOpcode::DIV, false, true) => { + execute_e1_tco_handler::<_, _, false, true, { FieldArithmeticOpcode::DIV as u8 }> + } + (FieldArithmeticOpcode::DIV, false, false) => { + execute_e1_tco_handler::<_, _, false, false, { FieldArithmeticOpcode::DIV as u8 }> + } + }; + + Ok(fn_ptr) + } + #[inline(always)] fn pre_compute_size(&self) -> usize { size_of::() @@ -276,6 +343,7 @@ unsafe fn execute_e12_impl< vm_state.instret += 1; } +#[create_tco_handler] unsafe fn execute_e1_impl< F: PrimeField32, CTX: ExecutionCtxTrait, diff --git a/extensions/native/circuit/src/field_extension/execution.rs b/extensions/native/circuit/src/field_extension/execution.rs index 7b4802987e..0f4cd14895 100644 --- a/extensions/native/circuit/src/field_extension/execution.rs +++ b/extensions/native/circuit/src/field_extension/execution.rs @@ -1,6 +1,6 @@ use std::borrow::{Borrow, BorrowMut}; -use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; +use openvm_circuit::arch::*; use openvm_circuit_primitives::AlignedBytesBorrow; use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode}; use openvm_native_compiler::{conversion::AS, FieldExtensionOpcode}; @@ -61,6 +61,31 @@ impl Executor for FieldExtensionCoreExecutor where F: PrimeField32, { + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let pre_compute: &mut FieldExtensionPreCompute = data.borrow_mut(); + + let opcode = self.pre_compute_impl(pc, inst, pre_compute)?; + + let fn_ptr = match opcode { + 0 => execute_e1_tco_handler::<_, _, 0>, // FE4ADD + 1 => execute_e1_tco_handler::<_, _, 1>, // FE4SUB + 2 => execute_e1_tco_handler::<_, _, 2>, // BBE4MUL + 3 => execute_e1_tco_handler::<_, _, 3>, // BBE4DIV + _ => panic!("Invalid field extension opcode: {opcode}"), + }; + + Ok(fn_ptr) + } + #[inline(always)] fn pre_compute_size(&self) -> usize { size_of::() @@ -145,6 +170,7 @@ unsafe fn execute_e12_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/native/circuit/src/fri/execution.rs b/extensions/native/circuit/src/fri/execution.rs index 7af4034ed9..1ce37b492a 100644 --- a/extensions/native/circuit/src/fri/execution.rs +++ b/extensions/native/circuit/src/fri/execution.rs @@ -1,6 +1,6 @@ use std::borrow::{Borrow, BorrowMut}; -use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; +use openvm_circuit::arch::*; use openvm_circuit_primitives::AlignedBytesBorrow; use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP}; use openvm_native_compiler::conversion::AS; @@ -66,6 +66,24 @@ impl Executor for FriReducedOpeningExecutor where F: PrimeField32, { + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let pre_compute: &mut FriReducedOpeningPreCompute = data.borrow_mut(); + + self.pre_compute_impl(pc, inst, pre_compute)?; + + let fn_ptr = execute_e1_tco_handler; + Ok(fn_ptr) + } + #[inline(always)] fn pre_compute_size(&self) -> usize { size_of::() @@ -114,6 +132,7 @@ where } } +#[create_tco_handler] unsafe fn execute_e1_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/native/circuit/src/lib.rs b/extensions/native/circuit/src/lib.rs index 01c0d0ba5b..6c9e4b930d 100644 --- a/extensions/native/circuit/src/lib.rs +++ b/extensions/native/circuit/src/lib.rs @@ -1,3 +1,7 @@ +#![cfg_attr(feature = "tco", allow(incomplete_features))] +#![cfg_attr(feature = "tco", feature(likely_unlikely))] +#![cfg_attr(feature = "tco", feature(explicit_tail_calls))] + use openvm_circuit::{ arch::{ AirInventory, ChipInventoryError, InitFileGenerator, MatrixRecordArena, MemoryConfig, diff --git a/extensions/pairing/circuit/Cargo.toml b/extensions/pairing/circuit/Cargo.toml index a44afff0f8..6a8a2c296f 100644 --- a/extensions/pairing/circuit/Cargo.toml +++ b/extensions/pairing/circuit/Cargo.toml @@ -7,6 +7,11 @@ edition.workspace = true homepage.workspace = true repository.workspace = true +[features] +default = ["jemalloc", "tco"] +tco = ["openvm-circuit/tco"] +jemalloc = ["openvm-circuit/jemalloc"] + [dependencies] openvm-circuit-primitives = { workspace = true } openvm-circuit-derive = { workspace = true } diff --git a/extensions/pairing/circuit/src/lib.rs b/extensions/pairing/circuit/src/lib.rs index 7edefa5490..4207a41534 100644 --- a/extensions/pairing/circuit/src/lib.rs +++ b/extensions/pairing/circuit/src/lib.rs @@ -1,3 +1,7 @@ +#![cfg_attr(feature = "tco", allow(incomplete_features))] +#![cfg_attr(feature = "tco", feature(likely_unlikely))] +#![cfg_attr(feature = "tco", feature(explicit_tail_calls))] + pub use openvm_pairing_guest::{ bls12_381::{BLS12_381_COMPLEX_STRUCT_NAME, BLS12_381_ECC_STRUCT_NAME}, bn254::BN254_COMPLEX_STRUCT_NAME, diff --git a/extensions/sha256/circuit/Cargo.toml b/extensions/sha256/circuit/Cargo.toml index 413265b622..839b7a1e49 100644 --- a/extensions/sha256/circuit/Cargo.toml +++ b/extensions/sha256/circuit/Cargo.toml @@ -29,7 +29,8 @@ openvm-stark-sdk = { workspace = true } openvm-circuit = { workspace = true, features = ["test-utils"] } [features] -default = ["parallel", "jemalloc"] +default = ["parallel", "jemalloc", "tco"] +tco = ["openvm-circuit/tco"] parallel = ["openvm-circuit/parallel"] test-utils = ["openvm-circuit/test-utils"] # performance features: diff --git a/extensions/sha256/circuit/src/lib.rs b/extensions/sha256/circuit/src/lib.rs index 741cf3ec9d..484f89b234 100644 --- a/extensions/sha256/circuit/src/lib.rs +++ b/extensions/sha256/circuit/src/lib.rs @@ -1,3 +1,7 @@ +#![cfg_attr(feature = "tco", allow(incomplete_features))] +#![cfg_attr(feature = "tco", feature(likely_unlikely))] +#![cfg_attr(feature = "tco", feature(explicit_tail_calls))] + use std::result::Result; use openvm_circuit::{ diff --git a/extensions/sha256/circuit/src/sha256_chip/execution.rs b/extensions/sha256/circuit/src/sha256_chip/execution.rs index befbb25f41..9a31fbf9e4 100644 --- a/extensions/sha256/circuit/src/sha256_chip/execution.rs +++ b/extensions/sha256/circuit/src/sha256_chip/execution.rs @@ -1,6 +1,6 @@ use std::borrow::{Borrow, BorrowMut}; -use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; +use openvm_circuit::arch::*; use openvm_circuit_primitives::AlignedBytesBorrow; use openvm_instructions::{ instruction::Instruction, @@ -23,6 +23,21 @@ struct ShaPreCompute { } impl Executor for Sha256VmExecutor { + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let data: &mut ShaPreCompute = data.borrow_mut(); + self.pre_compute_impl(pc, inst, data)?; + Ok(execute_e1_tco_handler::<_, _>) + } + fn pre_compute_size(&self) -> usize { size_of::() } @@ -105,6 +120,7 @@ unsafe fn execute_e12_impl( pre_compute: &[u8], vm_state: &mut VmExecState, From f01ccd2bebca34b3aadea6c397ec1a4feefc150c Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 10:59:52 -0700 Subject: [PATCH 06/36] chore: update feature deps --- extensions/algebra/circuit/Cargo.toml | 2 +- extensions/bigint/circuit/Cargo.toml | 2 +- extensions/ecc/circuit/Cargo.toml | 2 +- extensions/keccak256/circuit/Cargo.toml | 2 +- extensions/native/circuit/Cargo.toml | 3 +-- extensions/sha256/circuit/Cargo.toml | 2 +- 6 files changed, 6 insertions(+), 7 deletions(-) diff --git a/extensions/algebra/circuit/Cargo.toml b/extensions/algebra/circuit/Cargo.toml index 0c2c7a0500..345c132d03 100644 --- a/extensions/algebra/circuit/Cargo.toml +++ b/extensions/algebra/circuit/Cargo.toml @@ -9,7 +9,7 @@ repository.workspace = true [features] default = ["jemalloc", "tco"] -tco = ["openvm-circuit/tco"] +tco = ["openvm-rv32im-circuit/tco"] jemalloc = ["openvm-circuit/jemalloc"] [dependencies] diff --git a/extensions/bigint/circuit/Cargo.toml b/extensions/bigint/circuit/Cargo.toml index 6816e2c612..94e8ad0fab 100644 --- a/extensions/bigint/circuit/Cargo.toml +++ b/extensions/bigint/circuit/Cargo.toml @@ -36,7 +36,7 @@ alloy-primitives = { version = "1.2.1" } default = ["parallel", "jemalloc", "tco"] parallel = ["openvm-circuit/parallel"] test-utils = ["openvm-circuit/test-utils"] -tco = ["openvm-circuit/tco"] +tco = ["openvm-rv32im-circuit/tco"] # performance features: mimalloc = ["openvm-circuit/mimalloc"] jemalloc = ["openvm-circuit/jemalloc"] diff --git a/extensions/ecc/circuit/Cargo.toml b/extensions/ecc/circuit/Cargo.toml index eb2b37ce9b..4962c1ca40 100644 --- a/extensions/ecc/circuit/Cargo.toml +++ b/extensions/ecc/circuit/Cargo.toml @@ -9,7 +9,7 @@ repository.workspace = true [features] default = ["jemalloc", "tco"] -tco = ["openvm-circuit/tco"] +tco = ["openvm-algebra-circuit/tco"] jemalloc = ["openvm-circuit/jemalloc"] [dependencies] diff --git a/extensions/keccak256/circuit/Cargo.toml b/extensions/keccak256/circuit/Cargo.toml index f50f7c1c07..177f055d73 100644 --- a/extensions/keccak256/circuit/Cargo.toml +++ b/extensions/keccak256/circuit/Cargo.toml @@ -37,7 +37,7 @@ hex.workspace = true default = ["parallel", "jemalloc", "tco"] parallel = ["openvm-circuit/parallel"] test-utils = ["openvm-circuit/test-utils"] -tco = ["openvm-circuit/tco"] +tco = ["openvm-rv32im-circuit/tco"] # performance features: mimalloc = ["openvm-circuit/mimalloc"] jemalloc = ["openvm-circuit/jemalloc"] diff --git a/extensions/native/circuit/Cargo.toml b/extensions/native/circuit/Cargo.toml index 75856e110c..da21b3b78f 100644 --- a/extensions/native/circuit/Cargo.toml +++ b/extensions/native/circuit/Cargo.toml @@ -20,7 +20,6 @@ openvm-rv32im-circuit = { workspace = true } openvm-rv32im-transpiler = { workspace = true } openvm-native-compiler = { workspace = true } - strum.workspace = true itertools.workspace = true derive-new.workspace = true @@ -41,7 +40,7 @@ test-log = { workspace = true } [features] default = ["parallel", "jemalloc", "tco"] -tco = ["openvm-circuit/tco"] +tco = ["openvm-rv32im-circuit/tco"] jemalloc = ["openvm-circuit/jemalloc"] parallel = ["openvm-circuit/parallel"] test-utils = ["openvm-circuit/test-utils"] diff --git a/extensions/sha256/circuit/Cargo.toml b/extensions/sha256/circuit/Cargo.toml index 839b7a1e49..e677dc87a3 100644 --- a/extensions/sha256/circuit/Cargo.toml +++ b/extensions/sha256/circuit/Cargo.toml @@ -30,7 +30,7 @@ openvm-circuit = { workspace = true, features = ["test-utils"] } [features] default = ["parallel", "jemalloc", "tco"] -tco = ["openvm-circuit/tco"] +tco = ["openvm-rv32im-circuit/tco"] parallel = ["openvm-circuit/parallel"] test-utils = ["openvm-circuit/test-utils"] # performance features: From 5a193a06b2e61251098a17e969539921ce39b86f Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 11:29:42 -0700 Subject: [PATCH 07/36] fixes --- crates/sdk/Cargo.toml | 13 ++++- crates/vm/src/arch/mod.rs | 2 +- extensions/algebra/circuit/src/execution.rs | 48 +++++++++++-------- .../algebra/circuit/src/modular_chip/is_eq.rs | 19 ++++---- .../src/weierstrass_chip/add_ne/execution.rs | 5 +- .../src/weierstrass_chip/double/execution.rs | 5 +- .../native/circuit/src/branch_eq/execution.rs | 1 + .../native/circuit/src/castf/execution.rs | 2 +- .../circuit/src/field_arithmetic/execution.rs | 1 + .../circuit/src/field_extension/execution.rs | 2 +- .../native/circuit/src/fri/execution.rs | 2 +- .../circuit/src/jal_rangecheck/execution.rs | 24 ++++++++++ .../native/circuit/src/loadstore/execution.rs | 25 ++++++++++ .../native/circuit/src/poseidon2/execution.rs | 29 +++++++++++ extensions/pairing/circuit/Cargo.toml | 2 +- .../circuit/src/sha256_chip/execution.rs | 2 +- 16 files changed, 141 insertions(+), 41 deletions(-) diff --git a/crates/sdk/Cargo.toml b/crates/sdk/Cargo.toml index 8e1bdd449a..e2a624d4c8 100644 --- a/crates/sdk/Cargo.toml +++ b/crates/sdk/Cargo.toml @@ -61,7 +61,7 @@ rrs-lib.workspace = true num-bigint.workspace = true [features] -default = ["parallel", "jemalloc"] +default = ["parallel", "jemalloc", "tco"] evm-prove = [ "openvm-continuations/static-verifier", "openvm-native-recursion/evm-prove", @@ -79,6 +79,17 @@ metrics = [ "openvm-native-recursion/metrics", "openvm-native-compiler/metrics", ] +tco = [ + "openvm-circuit/tco", + "openvm-rv32im-circuit/tco", + "openvm-native-circuit/tco", + "openvm-sha256-circuit/tco", + "openvm-keccak256-circuit/tco", + "openvm-bigint-circuit/tco", + "openvm-algebra-circuit/tco", + "openvm-ecc-circuit/tco", + "openvm-pairing-circuit/tco" +] # for guest profiling: perf-metrics = ["openvm-circuit/perf-metrics", "openvm-transpiler/function-span"] # turns on stark-backend debugger in all proofs diff --git a/crates/vm/src/arch/mod.rs b/crates/vm/src/arch/mod.rs index 402931e61c..545a463883 100644 --- a/crates/vm/src/arch/mod.rs +++ b/crates/vm/src/arch/mod.rs @@ -30,7 +30,7 @@ pub use execution::*; pub use execution_mode::{ExecutionCtxTrait, MeteredExecutionCtxTrait}; pub use extensions::*; pub use integration_api::*; -#[cfg(feature = "tco")] +pub use interpreter::InterpretedInstance; pub use openvm_circuit_derive::create_tco_handler; pub use openvm_instructions as instructions; pub use record_arena::*; diff --git a/extensions/algebra/circuit/src/execution.rs b/extensions/algebra/circuit/src/execution.rs index dbfc794621..aeb7fae242 100644 --- a/extensions/algebra/circuit/src/execution.rs +++ b/extensions/algebra/circuit/src/execution.rs @@ -5,7 +5,10 @@ use std::{ use num_bigint::BigUint; use openvm_algebra_transpiler::{Fp2Opcode, Rv32ModularArithmeticOpcode}; -use openvm_circuit::arch::*; +use openvm_circuit::{ + arch::*, + system::memory::{online::GuestMemory, POINTER_MAX_BITS}, +}; use openvm_circuit_primitives::AlignedBytesBorrow; use openvm_instructions::{ instruction::Instruction, @@ -174,13 +177,17 @@ impl<'a, const BLOCKS: usize, const BLOCK_SIZE: usize, const IS_FP2: bool> impl Executor for FieldExprVecHeapExecutor { - #[cfg(feature = "tco")] - fn handler( + #[inline(always)] + fn pre_compute_size(&self) -> usize { + std::mem::size_of::() + } + + fn pre_compute( &self, pc: u32, inst: &Instruction, data: &mut [u8], - ) -> Result, StaticProgramError> + ) -> Result, StaticProgramError> where Ctx: ExecutionCtxTrait, { @@ -197,7 +204,7 @@ impl) + Ok(execute_e1_generic_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) } } else if let Some(field_type) = get_field_type(modulus) { generate_field_dispatch!( @@ -218,7 +225,7 @@ impl) + Ok(execute_e1_generic_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) } } else { - Ok(execute_e1_setup_tco_handler::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) + Ok(execute_e1_setup_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) } } - #[inline(always)] - fn pre_compute_size(&self) -> usize { - std::mem::size_of::() - } - - fn pre_compute( + #[cfg(feature = "tco")] + fn handler( &self, pc: u32, inst: &Instruction, data: &mut [u8], - ) -> Result, StaticProgramError> + ) -> Result, StaticProgramError> where Ctx: ExecutionCtxTrait, { @@ -289,7 +292,7 @@ impl) + Ok(execute_e1_generic_tco_handler::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) } } else if let Some(field_type) = get_field_type(modulus) { generate_field_dispatch!( @@ -310,7 +313,7 @@ impl) + Ok(execute_e1_generic_tco_handler::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) } } else { - Ok(execute_e1_setup_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) + Ok(execute_e1_setup_tco_handler::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) } } } @@ -581,6 +584,7 @@ unsafe fn execute_e12_setup_impl< vm_state.instret += 1; } +#[create_tco_handler] unsafe fn execute_e1_setup_impl< F: PrimeField32, CTX: ExecutionCtxTrait, @@ -629,6 +633,7 @@ unsafe fn execute_e1_impl< execute_e12_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2, FIELD_TYPE, OP>(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl< F: PrimeField32, CTX: MeteredExecutionCtxTrait, @@ -651,6 +656,7 @@ unsafe fn execute_e2_impl< ); } +#[create_tco_handler] unsafe fn execute_e1_generic_impl< F: PrimeField32, CTX: ExecutionCtxTrait, diff --git a/extensions/algebra/circuit/src/modular_chip/is_eq.rs b/extensions/algebra/circuit/src/modular_chip/is_eq.rs index 70b59ef7d4..32fe3c4f94 100644 --- a/extensions/algebra/circuit/src/modular_chip/is_eq.rs +++ b/extensions/algebra/circuit/src/modular_chip/is_eq.rs @@ -7,7 +7,10 @@ use num_bigint::BigUint; use openvm_algebra_transpiler::Rv32ModularArithmeticOpcode; use openvm_circuit::{ arch::*, - system::memory::{online::TracingMemory, MemoryAuxColsFactory}, + system::memory::{ + online::{GuestMemory, TracingMemory}, + MemoryAuxColsFactory, POINTER_MAX_BITS, + }, }; use openvm_circuit_primitives::{ bigint::utils::big_uint_to_limbs, @@ -551,16 +554,10 @@ where #[cfg(feature = "tco")] fn handler( &self, - _opcode: u32, - _instruction: &Instruction, - _data: &mut [u8], - ) -> Result< - for<'a, 'b, 'c> unsafe fn( - &'a InterpretedInstance<'b, F, Ctx>, - &'c mut VmExecState, - ) -> Result<(), ExecutionError>, - StaticProgramError, - > + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> where Ctx: ExecutionCtxTrait, { diff --git a/extensions/ecc/circuit/src/weierstrass_chip/add_ne/execution.rs b/extensions/ecc/circuit/src/weierstrass_chip/add_ne/execution.rs index b3dec5c01e..9310bf970b 100644 --- a/extensions/ecc/circuit/src/weierstrass_chip/add_ne/execution.rs +++ b/extensions/ecc/circuit/src/weierstrass_chip/add_ne/execution.rs @@ -5,7 +5,10 @@ use std::{ use num_bigint::BigUint; use openvm_algebra_circuit::fields::{get_field_type, FieldType}; -use openvm_circuit::arch::*; +use openvm_circuit::{ + arch::*, + system::memory::{online::GuestMemory, POINTER_MAX_BITS}, +}; use openvm_circuit_primitives::AlignedBytesBorrow; use openvm_ecc_transpiler::Rv32WeierstrassOpcode; use openvm_instructions::{ diff --git a/extensions/ecc/circuit/src/weierstrass_chip/double/execution.rs b/extensions/ecc/circuit/src/weierstrass_chip/double/execution.rs index abc5c47990..b968c8a2a2 100644 --- a/extensions/ecc/circuit/src/weierstrass_chip/double/execution.rs +++ b/extensions/ecc/circuit/src/weierstrass_chip/double/execution.rs @@ -4,7 +4,10 @@ use std::{ }; use num_bigint::BigUint; -use openvm_circuit::arch::*; +use openvm_circuit::{ + arch::*, + system::memory::{online::GuestMemory, POINTER_MAX_BITS}, +}; use openvm_circuit_primitives::AlignedBytesBorrow; use openvm_ecc_transpiler::Rv32WeierstrassOpcode; use openvm_instructions::{ diff --git a/extensions/native/circuit/src/branch_eq/execution.rs b/extensions/native/circuit/src/branch_eq/execution.rs index 0e82060ac1..033630cecd 100644 --- a/extensions/native/circuit/src/branch_eq/execution.rs +++ b/extensions/native/circuit/src/branch_eq/execution.rs @@ -2,6 +2,7 @@ use std::borrow::{Borrow, BorrowMut}; use openvm_circuit::{ arch::*, + system::memory::online::GuestMemory, utils::{transmute_field_to_u32, transmute_u32_to_field}, }; use openvm_circuit_primitives::AlignedBytesBorrow; diff --git a/extensions/native/circuit/src/castf/execution.rs b/extensions/native/circuit/src/castf/execution.rs index cb67debd43..09a4e4a283 100644 --- a/extensions/native/circuit/src/castf/execution.rs +++ b/extensions/native/circuit/src/castf/execution.rs @@ -1,6 +1,6 @@ use std::borrow::{Borrow, BorrowMut}; -use openvm_circuit::arch::*; +use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; use openvm_circuit_primitives::AlignedBytesBorrow; use openvm_instructions::{ instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_MEMORY_AS, LocalOpcode, diff --git a/extensions/native/circuit/src/field_arithmetic/execution.rs b/extensions/native/circuit/src/field_arithmetic/execution.rs index 1100ca01c6..b95b778d7a 100644 --- a/extensions/native/circuit/src/field_arithmetic/execution.rs +++ b/extensions/native/circuit/src/field_arithmetic/execution.rs @@ -2,6 +2,7 @@ use std::borrow::{Borrow, BorrowMut}; use openvm_circuit::{ arch::*, + system::memory::online::GuestMemory, utils::{transmute_field_to_u32, transmute_u32_to_field}, }; use openvm_circuit_primitives::AlignedBytesBorrow; diff --git a/extensions/native/circuit/src/field_extension/execution.rs b/extensions/native/circuit/src/field_extension/execution.rs index 0f4cd14895..49d38b88f7 100644 --- a/extensions/native/circuit/src/field_extension/execution.rs +++ b/extensions/native/circuit/src/field_extension/execution.rs @@ -1,6 +1,6 @@ use std::borrow::{Borrow, BorrowMut}; -use openvm_circuit::arch::*; +use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; use openvm_circuit_primitives::AlignedBytesBorrow; use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode}; use openvm_native_compiler::{conversion::AS, FieldExtensionOpcode}; diff --git a/extensions/native/circuit/src/fri/execution.rs b/extensions/native/circuit/src/fri/execution.rs index 1ce37b492a..6fe090955c 100644 --- a/extensions/native/circuit/src/fri/execution.rs +++ b/extensions/native/circuit/src/fri/execution.rs @@ -1,6 +1,6 @@ use std::borrow::{Borrow, BorrowMut}; -use openvm_circuit::arch::*; +use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; use openvm_circuit_primitives::AlignedBytesBorrow; use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP}; use openvm_native_compiler::conversion::AS; diff --git a/extensions/native/circuit/src/jal_rangecheck/execution.rs b/extensions/native/circuit/src/jal_rangecheck/execution.rs index f9cf17d7af..60a5691795 100644 --- a/extensions/native/circuit/src/jal_rangecheck/execution.rs +++ b/extensions/native/circuit/src/jal_rangecheck/execution.rs @@ -109,6 +109,28 @@ where Ok(execute_range_check_e1_impl) } } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> { + let &Instruction { opcode, .. } = inst; + + let is_jal = opcode == NativeJalOpcode::JAL.global_opcode(); + + if is_jal { + let jal_data: &mut JalPreCompute = data.borrow_mut(); + self.pre_compute_jal_impl(pc, inst, jal_data)?; + Ok(execute_jal_e1_tco_handler) + } else { + let range_check_data: &mut RangeCheckPreCompute = data.borrow_mut(); + self.pre_compute_range_check_impl(pc, inst, range_check_data)?; + Ok(execute_range_check_e1_tco_handler) + } + } } impl MeteredExecutor for JalRangeCheckExecutor @@ -190,6 +212,7 @@ unsafe fn execute_range_check_e12_impl( vm_state.instret += 1; } +#[create_tco_handler] unsafe fn execute_jal_e1_impl( pre_compute: &[u8], vm_state: &mut VmExecState, @@ -209,6 +232,7 @@ unsafe fn execute_jal_e2_impl( execute_jal_e12_impl(&pre_compute.data, vm_state); } +#[create_tco_handler] unsafe fn execute_range_check_e1_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/native/circuit/src/loadstore/execution.rs b/extensions/native/circuit/src/loadstore/execution.rs index a31efb831e..f94104d454 100644 --- a/extensions/native/circuit/src/loadstore/execution.rs +++ b/extensions/native/circuit/src/loadstore/execution.rs @@ -82,6 +82,28 @@ where Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> { + let pre_compute: &mut NativeLoadStorePreCompute = data.borrow_mut(); + + let local_opcode = self.pre_compute_impl(pc, inst, pre_compute)?; + + let fn_ptr = match local_opcode { + NativeLoadStoreOpcode::LOADW => execute_e1_loadw_tco_handler::, + NativeLoadStoreOpcode::STOREW => execute_e1_storew_tco_handler::, + NativeLoadStoreOpcode::HINT_STOREW => { + execute_e1_hint_storew_tco_handler:: + } + }; + + Ok(fn_ptr) + } } impl MeteredExecutor for NativeLoadStoreCoreExecutor @@ -116,6 +138,7 @@ where } } +#[create_tco_handler] unsafe fn execute_e1_loadw( pre_compute: &[u8], vm_state: &mut VmExecState, @@ -124,6 +147,7 @@ unsafe fn execute_e1_loadw(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e1_storew( pre_compute: &[u8], vm_state: &mut VmExecState, @@ -132,6 +156,7 @@ unsafe fn execute_e1_storew(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e1_hint_storew< F: PrimeField32, CTX: ExecutionCtxTrait, diff --git a/extensions/native/circuit/src/poseidon2/execution.rs b/extensions/native/circuit/src/poseidon2/execution.rs index 661d8e10cc..6ce150a036 100644 --- a/extensions/native/circuit/src/poseidon2/execution.rs +++ b/extensions/native/circuit/src/poseidon2/execution.rs @@ -173,6 +173,33 @@ impl Executor Ok(execute_verify_batch_e1_impl::<_, _, SBOX_REGISTERS>) } } + + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> { + let &Instruction { opcode, .. } = inst; + + let is_pos2 = opcode == PERM_POS2.global_opcode() || opcode == COMP_POS2.global_opcode(); + + if is_pos2 { + let pos2_data: &mut Pos2PreCompute = data.borrow_mut(); + self.pre_compute_pos2_impl(pc, inst, pos2_data)?; + if opcode == PERM_POS2.global_opcode() { + Ok(execute_pos2_e1_tco_handler::<_, _, SBOX_REGISTERS, true>) + } else { + Ok(execute_pos2_e1_tco_handler::<_, _, SBOX_REGISTERS, false>) + } + } else { + let verify_batch_data: &mut VerifyBatchPreCompute = + data.borrow_mut(); + self.pre_compute_verify_batch_impl(pc, inst, verify_batch_data)?; + Ok(execute_verify_batch_e1_tco_handler::<_, _, SBOX_REGISTERS>) + } + } } impl MeteredExecutor @@ -220,6 +247,7 @@ impl MeteredExecutor } } +#[create_tco_handler] unsafe fn execute_pos2_e1_impl< F: PrimeField32, CTX: ExecutionCtxTrait, @@ -250,6 +278,7 @@ unsafe fn execute_pos2_e2_impl< .on_height_change(pre_compute.chip_idx as usize, height); } +#[create_tco_handler] unsafe fn execute_verify_batch_e1_impl< F: PrimeField32, CTX: ExecutionCtxTrait, diff --git a/extensions/pairing/circuit/Cargo.toml b/extensions/pairing/circuit/Cargo.toml index 6a8a2c296f..4201c0fedd 100644 --- a/extensions/pairing/circuit/Cargo.toml +++ b/extensions/pairing/circuit/Cargo.toml @@ -9,7 +9,7 @@ repository.workspace = true [features] default = ["jemalloc", "tco"] -tco = ["openvm-circuit/tco"] +tco = ["openvm-rv32im-circuit/tco", "openvm-ecc-circuit/tco"] jemalloc = ["openvm-circuit/jemalloc"] [dependencies] diff --git a/extensions/sha256/circuit/src/sha256_chip/execution.rs b/extensions/sha256/circuit/src/sha256_chip/execution.rs index 9a31fbf9e4..33972ec1b1 100644 --- a/extensions/sha256/circuit/src/sha256_chip/execution.rs +++ b/extensions/sha256/circuit/src/sha256_chip/execution.rs @@ -1,6 +1,6 @@ use std::borrow::{Borrow, BorrowMut}; -use openvm_circuit::arch::*; +use openvm_circuit::{arch::*, system::memory::online::GuestMemory}; use openvm_circuit_primitives::AlignedBytesBorrow; use openvm_instructions::{ instruction::Instruction, From e6affb6ecf25152b73f221e6ce730e4f3cfafa99 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 14:47:25 -0700 Subject: [PATCH 08/36] feat: simplify the handler type without Result --- crates/vm/derive/src/lib.rs | 7 +++ crates/vm/derive/src/tco.rs | 21 ++++---- crates/vm/src/arch/execution.rs | 2 +- crates/vm/src/arch/interpreter.rs | 53 ++++++++++--------- crates/vm/src/lib.rs | 1 - crates/vm/src/system/phantom/execution.rs | 2 +- .../vm/src/system/public_values/execution.rs | 2 +- extensions/algebra/circuit/src/lib.rs | 1 - extensions/bigint/circuit/src/lib.rs | 1 - extensions/ecc/circuit/src/lib.rs | 1 - extensions/keccak256/circuit/src/lib.rs | 1 - extensions/native/circuit/src/lib.rs | 1 - extensions/pairing/circuit/src/lib.rs | 1 - extensions/rv32im/circuit/src/lib.rs | 1 - extensions/sha256/circuit/src/lib.rs | 1 - 15 files changed, 49 insertions(+), 47 deletions(-) diff --git a/crates/vm/derive/src/lib.rs b/crates/vm/derive/src/lib.rs index 9eb04c6caa..2ffc89c044 100644 --- a/crates/vm/derive/src/lib.rs +++ b/crates/vm/derive/src/lib.rs @@ -754,6 +754,13 @@ fn parse_executor_type( /// ``` /// /// This will generate a TCO handler function with the same generics and where clauses. +/// +/// # Safety +/// +/// Do not use this macro if your function wants to terminate execution without error with a +/// specific error code. The handler generated by this macro assumes that execution should continue +/// unless the execute_impl returns an error. This is done for performance to skip an exit code +/// check. #[proc_macro_attribute] pub fn create_tco_handler(_attr: TokenStream, item: TokenStream) -> TokenStream { tco::tco_impl(item) diff --git a/crates/vm/derive/src/tco.rs b/crates/vm/derive/src/tco.rs index 186db9e90b..1ec376e777 100644 --- a/crates/vm/derive/src/tco.rs +++ b/crates/vm/derive/src/tco.rs @@ -46,25 +46,26 @@ pub fn tco_impl(item: TokenStream) -> TokenStream { ::openvm_circuit::system::memory::online::GuestMemory, #ctx_type, >, - ) -> Result<(), ::openvm_circuit::arch::ExecutionError> + ) #where_clause { let pre_compute = interpreter.get_pre_compute(exec_state.vm_state.pc); #execute_call; - if std::hint::unlikely(exec_state.exit_code.is_err()) { - return Err(::openvm_circuit::arch::ExecutionError::ExecStateError); - } - if std::hint::unlikely(exec_state.exit_code.as_ref().unwrap().is_some()) { - #ctx_type::on_terminate(exec_state); - // terminate - return Ok(()); + if exec_state.exit_code.is_err() { + // stop execution + return; } if #ctx_type::should_suspend(exec_state) { - return Ok(()); + return; } // exec_state.pc should have been updated by execute_impl at this point - let next_handler = interpreter.get_handler(exec_state.vm_state.pc)?; + let next_handler = interpreter.get_handler(exec_state.vm_state.pc); + if next_handler.is_none() { + exec_state.exit_code = Err(interpreter.pc_out_of_bounds_err(exec_state.vm_state.pc)); + return; + } + let next_handler = next_handler.unwrap_unchecked(); // The `become` keyword has a bug that is not re-passing the `interpreter`, `exec_state` references properly. But llvm seems to almost always guarantee tail call elimination when the function signature is the same as the current function. next_handler(interpreter, exec_state) diff --git a/crates/vm/src/arch/execution.rs b/crates/vm/src/arch/execution.rs index e859bea1e7..97f3285ba9 100644 --- a/crates/vm/src/arch/execution.rs +++ b/crates/vm/src/arch/execution.rs @@ -109,7 +109,7 @@ pub type ExecuteFunc = pub type Handler = unsafe fn( interpreter: &InterpretedInstance, exec_state: &mut VmExecState, -) -> Result<(), ExecutionError>; +); /// Trait for pure execution via a host interpreter. The trait methods provide the methods to /// pre-process the program code into function pointers which operate on `pre_compute` instruction diff --git a/crates/vm/src/arch/interpreter.rs b/crates/vm/src/arch/interpreter.rs index 8bb46e4418..99728c94c7 100644 --- a/crates/vm/src/arch/interpreter.rs +++ b/crates/vm/src/arch/interpreter.rs @@ -189,21 +189,19 @@ where } } + pub fn pc_out_of_bounds_err(&self, pc: u32) -> ExecutionError { + ExecutionError::PcOutOfBounds { + pc, + pc_base: self.pc_base, + program_len: self.pre_compute_insns.len(), + } + } + #[cfg(feature = "tco")] #[inline(always)] - pub fn get_handler(&self, pc: u32) -> Result, ExecutionError> { + pub fn get_handler(&self, pc: u32) -> Option> { let pc_idx = get_pc_index(self.pc_base, pc); - if std::hint::unlikely(pc_idx >= self.handlers.len()) { - return Err(ExecutionError::PcOutOfBounds { - pc, - pc_base: self.pc_base, - program_len: self.handlers.len(), - }); - } - // SAFETY: - // - we checked above that pc_idx is within bounds - let handler = unsafe { self.handlers.get_unchecked(pc_idx) }; - Ok(*handler) + self.handlers.get(pc_idx).map(|x| *x) } } @@ -302,15 +300,21 @@ where } #[cfg(feature = "tco")] unsafe { - let handler = self.get_handler(exec_state.pc)?; - let res = handler(self, &mut exec_state); - if let Err(err) = res { - match err { - ExecutionError::ExecStateError => {} - _ => { - return Err(err); - } - } + let handler = self + .get_handler(exec_state.pc) + .ok_or(ExecutionError::PcOutOfBounds { + pc: exec_state.pc, + pc_base: self.pc_base, + program_len: self.handlers.len(), + })?; + handler(self, &mut exec_state); + + if exec_state + .exit_code + .as_ref() + .is_ok_and(|exit_code| exit_code.is_some()) + { + ExecutionCtx::on_terminate(&mut exec_state); } } @@ -540,17 +544,16 @@ unsafe fn terminate_execute_e12_impl( unsafe fn terminate_execute_e12_tco_handler( interpreter: &InterpretedInstance, vm_state: &mut VmExecState, -) -> Result<(), ExecutionError> { +) { let pre_compute = interpreter.get_pre_compute(vm_state.pc); terminate_execute_e12_impl(pre_compute, vm_state); - Ok(()) } #[cfg(feature = "tco")] unsafe fn unreachable_tco_handler( _: &InterpretedInstance, vm_state: &mut VmExecState, -) -> Result<(), ExecutionError> { - Err(ExecutionError::Unreachable(vm_state.pc)) +) { + vm_state.exit_code = Err(ExecutionError::Unreachable(vm_state.pc)); } fn get_pre_compute_max_size>( diff --git a/crates/vm/src/lib.rs b/crates/vm/src/lib.rs index c4dfd35053..138549fb70 100644 --- a/crates/vm/src/lib.rs +++ b/crates/vm/src/lib.rs @@ -1,5 +1,4 @@ #![cfg_attr(feature = "tco", allow(incomplete_features))] -#![cfg_attr(feature = "tco", feature(likely_unlikely))] #![cfg_attr(feature = "tco", feature(explicit_tail_calls))] extern crate self as openvm_circuit; diff --git a/crates/vm/src/system/phantom/execution.rs b/crates/vm/src/system/phantom/execution.rs index 0d71db54f1..56b8cc286f 100644 --- a/crates/vm/src/system/phantom/execution.rs +++ b/crates/vm/src/system/phantom/execution.rs @@ -101,7 +101,7 @@ unsafe fn execute_e12_impl( vm_state.instret += 1; } -#[cfg_attr(feature = "tco", create_tco_handler)] +#[create_tco_handler] #[inline(always)] unsafe fn execute_e1_impl( pre_compute: &[u8], diff --git a/crates/vm/src/system/public_values/execution.rs b/crates/vm/src/system/public_values/execution.rs index 19057a75b6..d976c02490 100644 --- a/crates/vm/src/system/public_values/execution.rs +++ b/crates/vm/src/system/public_values/execution.rs @@ -179,7 +179,7 @@ unsafe fn execute_e12_impl( pre_compute: &[u8], diff --git a/extensions/algebra/circuit/src/lib.rs b/extensions/algebra/circuit/src/lib.rs index 8edd6108d0..08a69c650a 100644 --- a/extensions/algebra/circuit/src/lib.rs +++ b/extensions/algebra/circuit/src/lib.rs @@ -1,5 +1,4 @@ #![cfg_attr(feature = "tco", allow(incomplete_features))] -#![cfg_attr(feature = "tco", feature(likely_unlikely))] #![cfg_attr(feature = "tco", feature(explicit_tail_calls))] use derive_more::derive::{Deref, DerefMut}; diff --git a/extensions/bigint/circuit/src/lib.rs b/extensions/bigint/circuit/src/lib.rs index 48a55a072b..0109a3f88e 100644 --- a/extensions/bigint/circuit/src/lib.rs +++ b/extensions/bigint/circuit/src/lib.rs @@ -1,5 +1,4 @@ #![cfg_attr(feature = "tco", allow(incomplete_features))] -#![cfg_attr(feature = "tco", feature(likely_unlikely))] #![cfg_attr(feature = "tco", feature(explicit_tail_calls))] use openvm_circuit::{ self, diff --git a/extensions/ecc/circuit/src/lib.rs b/extensions/ecc/circuit/src/lib.rs index 4789d09187..713088e864 100644 --- a/extensions/ecc/circuit/src/lib.rs +++ b/extensions/ecc/circuit/src/lib.rs @@ -1,5 +1,4 @@ #![cfg_attr(feature = "tco", allow(incomplete_features))] -#![cfg_attr(feature = "tco", feature(likely_unlikely))] #![cfg_attr(feature = "tco", feature(explicit_tail_calls))] mod weierstrass_chip; diff --git a/extensions/keccak256/circuit/src/lib.rs b/extensions/keccak256/circuit/src/lib.rs index c2ffb1a1e0..bc0b41026a 100644 --- a/extensions/keccak256/circuit/src/lib.rs +++ b/extensions/keccak256/circuit/src/lib.rs @@ -1,5 +1,4 @@ #![cfg_attr(feature = "tco", allow(incomplete_features))] -#![cfg_attr(feature = "tco", feature(likely_unlikely))] #![cfg_attr(feature = "tco", feature(explicit_tail_calls))] //! Stateful keccak256 hasher. Handles full keccak sponge (padding, absorb, keccak-f) on //! variable length inputs read from VM memory. diff --git a/extensions/native/circuit/src/lib.rs b/extensions/native/circuit/src/lib.rs index 6c9e4b930d..94f4a7d75f 100644 --- a/extensions/native/circuit/src/lib.rs +++ b/extensions/native/circuit/src/lib.rs @@ -1,5 +1,4 @@ #![cfg_attr(feature = "tco", allow(incomplete_features))] -#![cfg_attr(feature = "tco", feature(likely_unlikely))] #![cfg_attr(feature = "tco", feature(explicit_tail_calls))] use openvm_circuit::{ diff --git a/extensions/pairing/circuit/src/lib.rs b/extensions/pairing/circuit/src/lib.rs index 4207a41534..58e6527345 100644 --- a/extensions/pairing/circuit/src/lib.rs +++ b/extensions/pairing/circuit/src/lib.rs @@ -1,5 +1,4 @@ #![cfg_attr(feature = "tco", allow(incomplete_features))] -#![cfg_attr(feature = "tco", feature(likely_unlikely))] #![cfg_attr(feature = "tco", feature(explicit_tail_calls))] pub use openvm_pairing_guest::{ diff --git a/extensions/rv32im/circuit/src/lib.rs b/extensions/rv32im/circuit/src/lib.rs index 68c4d2a295..38a6c55747 100644 --- a/extensions/rv32im/circuit/src/lib.rs +++ b/extensions/rv32im/circuit/src/lib.rs @@ -1,5 +1,4 @@ #![cfg_attr(feature = "tco", allow(incomplete_features))] -#![cfg_attr(feature = "tco", feature(likely_unlikely))] #![cfg_attr(feature = "tco", feature(explicit_tail_calls))] use openvm_circuit::{ arch::{ diff --git a/extensions/sha256/circuit/src/lib.rs b/extensions/sha256/circuit/src/lib.rs index 484f89b234..7e1676702d 100644 --- a/extensions/sha256/circuit/src/lib.rs +++ b/extensions/sha256/circuit/src/lib.rs @@ -1,5 +1,4 @@ #![cfg_attr(feature = "tco", allow(incomplete_features))] -#![cfg_attr(feature = "tco", feature(likely_unlikely))] #![cfg_attr(feature = "tco", feature(explicit_tail_calls))] use std::result::Result; From 9d9aa8a8330515f2c427ee09ad2960ccce06bd5e Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 14:50:37 -0700 Subject: [PATCH 09/36] feat: try become keyword again --- crates/vm/derive/src/tco.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/vm/derive/src/tco.rs b/crates/vm/derive/src/tco.rs index 1ec376e777..5fa5b848f1 100644 --- a/crates/vm/derive/src/tco.rs +++ b/crates/vm/derive/src/tco.rs @@ -67,8 +67,7 @@ pub fn tco_impl(item: TokenStream) -> TokenStream { } let next_handler = next_handler.unwrap_unchecked(); - // The `become` keyword has a bug that is not re-passing the `interpreter`, `exec_state` references properly. But llvm seems to almost always guarantee tail call elimination when the function signature is the same as the current function. - next_handler(interpreter, exec_state) + become next_handler(interpreter, exec_state) } }; From 4c1de9d267d776fb4a1fcedd659a59324f8c1582 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 16:09:55 -0700 Subject: [PATCH 10/36] chore: propagate tco feature --- benchmarks/execute/Cargo.toml | 1 + benchmarks/prove/Cargo.toml | 1 + crates/cli/Cargo.toml | 1 + crates/vm/src/system/phantom/execution.rs | 3 ++- crates/vm/src/system/public_values/execution.rs | 3 ++- 5 files changed, 7 insertions(+), 2 deletions(-) diff --git a/benchmarks/execute/Cargo.toml b/benchmarks/execute/Cargo.toml index 76ca243c2d..bf690ab9ed 100644 --- a/benchmarks/execute/Cargo.toml +++ b/benchmarks/execute/Cargo.toml @@ -45,6 +45,7 @@ divan = { package = "codspeed-divan-compat", version = "3.0.2" } [features] default = ["jemalloc"] +tco = ["openvm-sdk/tco"] mimalloc = ["openvm-circuit/mimalloc"] jemalloc = ["openvm-circuit/jemalloc"] jemalloc-prof = ["openvm-circuit/jemalloc-prof"] diff --git a/benchmarks/prove/Cargo.toml b/benchmarks/prove/Cargo.toml index 88f0784e95..b7ee8ee3ac 100644 --- a/benchmarks/prove/Cargo.toml +++ b/benchmarks/prove/Cargo.toml @@ -35,6 +35,7 @@ metrics.workspace = true [features] default = ["parallel", "jemalloc", "metrics", "evm"] metrics = ["openvm-sdk/metrics"] +tco = ["openvm-sdk/tco"] perf-metrics = ["openvm-sdk/perf-metrics", "metrics"] stark-debug = ["openvm-sdk/stark-debug"] # runs leaf aggregation benchmarks: diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index 4983992663..66363ecc0d 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -45,6 +45,7 @@ default = ["parallel", "jemalloc", "evm-verify", "metrics"] evm-prove = ["openvm-sdk/evm-prove"] evm-verify = ["evm-prove", "openvm-sdk/evm-verify"] metrics = ["openvm-sdk/metrics"] +tco = ["openvm-sdk/tco"] # for guest profiling: perf-metrics = ["openvm-sdk/perf-metrics", "metrics"] # performance features: diff --git a/crates/vm/src/system/phantom/execution.rs b/crates/vm/src/system/phantom/execution.rs index 56b8cc286f..a0fe5c7b2b 100644 --- a/crates/vm/src/system/phantom/execution.rs +++ b/crates/vm/src/system/phantom/execution.rs @@ -8,9 +8,10 @@ use openvm_stark_backend::p3_field::PrimeField32; use rand::rngs::StdRng; #[cfg(feature = "tco")] -use crate::arch::{create_tco_handler, Handler}; +use crate::arch::Handler; use crate::{ arch::{ + create_tco_handler, execution_mode::{ExecutionCtxTrait, MeteredExecutionCtxTrait}, E2PreCompute, ExecuteFunc, ExecutionError, Executor, MeteredExecutor, PhantomSubExecutor, StaticProgramError, Streams, VmExecState, diff --git a/crates/vm/src/system/public_values/execution.rs b/crates/vm/src/system/public_values/execution.rs index d976c02490..29ae9e7a5f 100644 --- a/crates/vm/src/system/public_values/execution.rs +++ b/crates/vm/src/system/public_values/execution.rs @@ -8,9 +8,10 @@ use openvm_stark_backend::p3_field::PrimeField32; use super::PublicValuesExecutor; #[cfg(feature = "tco")] -use crate::arch::{create_tco_handler, Handler}; +use crate::arch::Handler; use crate::{ arch::{ + create_tco_handler, execution_mode::{ExecutionCtxTrait, MeteredExecutionCtxTrait}, E2PreCompute, ExecuteFunc, Executor, MeteredExecutor, StaticProgramError, VmExecState, }, From 76aa23d87054b612d1d462324cd9361aa7f70e5c Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 16:52:27 -0700 Subject: [PATCH 11/36] feat: use custom macros to reduce code in ecc execution --- .../src/weierstrass_chip/add_ne/execution.rs | 225 +++------------- .../src/weierstrass_chip/double/execution.rs | 242 ++++-------------- 2 files changed, 91 insertions(+), 376 deletions(-) diff --git a/extensions/ecc/circuit/src/weierstrass_chip/add_ne/execution.rs b/extensions/ecc/circuit/src/weierstrass_chip/add_ne/execution.rs index 9310bf970b..f6dc9ea12a 100644 --- a/extensions/ecc/circuit/src/weierstrass_chip/add_ne/execution.rs +++ b/extensions/ecc/circuit/src/weierstrass_chip/add_ne/execution.rs @@ -92,29 +92,14 @@ impl<'a, const BLOCKS: usize, const BLOCK_SIZE: usize> EcAddNeExecutor Executor - for EcAddNeExecutor -{ - #[cfg(feature = "tco")] - fn handler( - &self, - pc: u32, - inst: &Instruction, - data: &mut [u8], - ) -> Result, StaticProgramError> - where - Ctx: ExecutionCtxTrait, - { - let pre_compute: &mut EcAddNePreCompute = data.borrow_mut(); - - let is_setup = self.pre_compute_impl(pc, inst, pre_compute)?; - +macro_rules! dispatch { + ($execute_impl:ident, $pre_compute:ident, $is_setup:ident) => { if let Some(field_type) = { - let modulus = &pre_compute.expr.builder.prime; + let modulus = &$pre_compute.expr.builder.prime; get_field_type(modulus) } { - match (is_setup, field_type) { - (true, FieldType::K256Coordinate) => Ok(execute_e1_tco_handler::< + match ($is_setup, field_type) { + (true, FieldType::K256Coordinate) => Ok($execute_impl::< _, _, BLOCKS, @@ -122,7 +107,7 @@ impl Executor { FieldType::K256Coordinate as u8 }, true, >), - (true, FieldType::P256Coordinate) => Ok(execute_e1_tco_handler::< + (true, FieldType::P256Coordinate) => Ok($execute_impl::< _, _, BLOCKS, @@ -130,7 +115,7 @@ impl Executor { FieldType::P256Coordinate as u8 }, true, >), - (true, FieldType::BN254Coordinate) => Ok(execute_e1_tco_handler::< + (true, FieldType::BN254Coordinate) => Ok($execute_impl::< _, _, BLOCKS, @@ -138,7 +123,7 @@ impl Executor { FieldType::BN254Coordinate as u8 }, true, >), - (true, FieldType::BLS12_381Coordinate) => Ok(execute_e1_tco_handler::< + (true, FieldType::BLS12_381Coordinate) => Ok($execute_impl::< _, _, BLOCKS, @@ -146,7 +131,7 @@ impl Executor { FieldType::BLS12_381Coordinate as u8 }, true, >), - (false, FieldType::K256Coordinate) => Ok(execute_e1_tco_handler::< + (false, FieldType::K256Coordinate) => Ok($execute_impl::< _, _, BLOCKS, @@ -154,7 +139,7 @@ impl Executor { FieldType::K256Coordinate as u8 }, false, >), - (false, FieldType::P256Coordinate) => Ok(execute_e1_tco_handler::< + (false, FieldType::P256Coordinate) => Ok($execute_impl::< _, _, BLOCKS, @@ -162,7 +147,7 @@ impl Executor { FieldType::P256Coordinate as u8 }, false, >), - (false, FieldType::BN254Coordinate) => Ok(execute_e1_tco_handler::< + (false, FieldType::BN254Coordinate) => Ok($execute_impl::< _, _, BLOCKS, @@ -170,7 +155,7 @@ impl Executor { FieldType::BN254Coordinate as u8 }, false, >), - (false, FieldType::BLS12_381Coordinate) => Ok(execute_e1_tco_handler::< + (false, FieldType::BLS12_381Coordinate) => Ok($execute_impl::< _, _, BLOCKS, @@ -180,13 +165,16 @@ impl Executor >), _ => panic!("Unsupported field type"), } - } else if is_setup { - Ok(execute_e1_tco_handler::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, true>) + } else if $is_setup { + Ok($execute_impl::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, true>) } else { - Ok(execute_e1_tco_handler::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, false>) + Ok($execute_impl::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, false>) } - } - + }; +} +impl Executor + for EcAddNeExecutor +{ #[inline(always)] fn pre_compute_size(&self) -> usize { std::mem::size_of::() @@ -202,85 +190,25 @@ impl Executor Ctx: ExecutionCtxTrait, { let pre_compute: &mut EcAddNePreCompute = data.borrow_mut(); + let is_setup = self.pre_compute_impl(pc, inst, pre_compute)?; + + dispatch!(execute_e1_impl, pre_compute, is_setup) + } + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let pre_compute: &mut EcAddNePreCompute = data.borrow_mut(); let is_setup = self.pre_compute_impl(pc, inst, pre_compute)?; - if let Some(field_type) = { - let modulus = &pre_compute.expr.builder.prime; - get_field_type(modulus) - } { - match (is_setup, field_type) { - (true, FieldType::K256Coordinate) => Ok(execute_e1_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { FieldType::K256Coordinate as u8 }, - true, - >), - (true, FieldType::P256Coordinate) => Ok(execute_e1_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { FieldType::P256Coordinate as u8 }, - true, - >), - (true, FieldType::BN254Coordinate) => Ok(execute_e1_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { FieldType::BN254Coordinate as u8 }, - true, - >), - (true, FieldType::BLS12_381Coordinate) => Ok(execute_e1_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { FieldType::BLS12_381Coordinate as u8 }, - true, - >), - (false, FieldType::K256Coordinate) => Ok(execute_e1_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { FieldType::K256Coordinate as u8 }, - false, - >), - (false, FieldType::P256Coordinate) => Ok(execute_e1_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { FieldType::P256Coordinate as u8 }, - false, - >), - (false, FieldType::BN254Coordinate) => Ok(execute_e1_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { FieldType::BN254Coordinate as u8 }, - false, - >), - (false, FieldType::BLS12_381Coordinate) => Ok(execute_e1_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { FieldType::BLS12_381Coordinate as u8 }, - false, - >), - _ => panic!("Unsupported field type"), - } - } else if is_setup { - Ok(execute_e1_impl::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, true>) - } else { - Ok(execute_e1_impl::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, false>) - } + dispatch!(execute_e1_tco_handler, pre_compute, is_setup) } } @@ -305,84 +233,9 @@ impl MeteredExecu let pre_compute: &mut E2PreCompute = data.borrow_mut(); pre_compute.chip_idx = chip_idx as u32; - let is_setup = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; - - if let Some(field_type) = { - let modulus = &pre_compute.data.expr.builder.prime; - get_field_type(modulus) - } { - match (is_setup, field_type) { - (true, FieldType::K256Coordinate) => Ok(execute_e2_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { FieldType::K256Coordinate as u8 }, - true, - >), - (true, FieldType::P256Coordinate) => Ok(execute_e2_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { FieldType::P256Coordinate as u8 }, - true, - >), - (true, FieldType::BN254Coordinate) => Ok(execute_e2_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { FieldType::BN254Coordinate as u8 }, - true, - >), - (true, FieldType::BLS12_381Coordinate) => Ok(execute_e2_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { FieldType::BLS12_381Coordinate as u8 }, - true, - >), - (false, FieldType::K256Coordinate) => Ok(execute_e2_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { FieldType::K256Coordinate as u8 }, - false, - >), - (false, FieldType::P256Coordinate) => Ok(execute_e2_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { FieldType::P256Coordinate as u8 }, - false, - >), - (false, FieldType::BN254Coordinate) => Ok(execute_e2_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { FieldType::BN254Coordinate as u8 }, - false, - >), - (false, FieldType::BLS12_381Coordinate) => Ok(execute_e2_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { FieldType::BLS12_381Coordinate as u8 }, - false, - >), - _ => panic!("Unsupported field type"), - } - } else if is_setup { - Ok(execute_e2_impl::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, true>) - } else { - Ok(execute_e2_impl::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, false>) - } + let pre_compute_pure = &mut pre_compute.data; + let is_setup = self.pre_compute_impl(pc, inst, pre_compute_pure)?; + dispatch!(execute_e2_impl, pre_compute_pure, is_setup) } } diff --git a/extensions/ecc/circuit/src/weierstrass_chip/double/execution.rs b/extensions/ecc/circuit/src/weierstrass_chip/double/execution.rs index b968c8a2a2..6459ae8395 100644 --- a/extensions/ecc/circuit/src/weierstrass_chip/double/execution.rs +++ b/extensions/ecc/circuit/src/weierstrass_chip/double/execution.rs @@ -84,54 +84,24 @@ impl<'a, const BLOCKS: usize, const BLOCK_SIZE: usize> EcDoubleExecutor Executor - for EcDoubleExecutor -{ - #[cfg(feature = "tco")] - fn handler( - &self, - pc: u32, - inst: &Instruction, - data: &mut [u8], - ) -> Result, StaticProgramError> - where - Ctx: ExecutionCtxTrait, - { - let pre_compute: &mut EcDoublePreCompute = data.borrow_mut(); - - let is_setup = self.pre_compute_impl(pc, inst, pre_compute)?; - +macro_rules! dispatch { + ($execute_impl:ident,$pre_compute:ident,$is_setup:ident) => { if let Some(curve_type) = { - let modulus = &pre_compute.expr.builder.prime; - let a_coeff = &pre_compute.expr.setup_values[0]; + let modulus = &$pre_compute.expr.builder.prime; + let a_coeff = &$pre_compute.expr.setup_values[0]; get_curve_type(modulus, a_coeff) } { - match (is_setup, curve_type) { - (true, CurveType::K256) => Ok(execute_e1_tco_handler::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { CurveType::K256 as u8 }, - true, - >), - (true, CurveType::P256) => Ok(execute_e1_tco_handler::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { CurveType::P256 as u8 }, - true, - >), - (true, CurveType::BN254) => Ok(execute_e1_tco_handler::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { CurveType::BN254 as u8 }, - true, - >), - (true, CurveType::BLS12_381) => Ok(execute_e1_tco_handler::< + match ($is_setup, curve_type) { + (true, CurveType::K256) => { + Ok($execute_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::K256 as u8 }, true>) + } + (true, CurveType::P256) => { + Ok($execute_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::P256 as u8 }, true>) + } + (true, CurveType::BN254) => { + Ok($execute_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::BN254 as u8 }, true>) + } + (true, CurveType::BLS12_381) => Ok($execute_impl::< _, _, BLOCKS, @@ -139,31 +109,16 @@ impl Executor { CurveType::BLS12_381 as u8 }, true, >), - (false, CurveType::K256) => Ok(execute_e1_tco_handler::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { CurveType::K256 as u8 }, - false, - >), - (false, CurveType::P256) => Ok(execute_e1_tco_handler::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { CurveType::P256 as u8 }, - false, - >), - (false, CurveType::BN254) => Ok(execute_e1_tco_handler::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { CurveType::BN254 as u8 }, - false, - >), - (false, CurveType::BLS12_381) => Ok(execute_e1_tco_handler::< + (false, CurveType::K256) => { + Ok($execute_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::K256 as u8 }, false>) + } + (false, CurveType::P256) => { + Ok($execute_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::P256 as u8 }, false>) + } + (false, CurveType::BN254) => { + Ok($execute_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::BN254 as u8 }, false>) + } + (false, CurveType::BLS12_381) => Ok($execute_impl::< _, _, BLOCKS, @@ -172,13 +127,17 @@ impl Executor false, >), } - } else if is_setup { - Ok(execute_e1_tco_handler::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, true>) + } else if $is_setup { + Ok($execute_impl::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, true>) } else { - Ok(execute_e1_tco_handler::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, false>) + Ok($execute_impl::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, false>) } - } + }; +} +impl Executor + for EcDoubleExecutor +{ #[inline(always)] fn pre_compute_size(&self) -> usize { std::mem::size_of::() @@ -194,66 +153,25 @@ impl Executor Ctx: ExecutionCtxTrait, { let pre_compute: &mut EcDoublePreCompute = data.borrow_mut(); + let is_setup = self.pre_compute_impl(pc, inst, pre_compute)?; + + dispatch!(execute_e1_impl, pre_compute, is_setup) + } + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let pre_compute: &mut EcDoublePreCompute = data.borrow_mut(); let is_setup = self.pre_compute_impl(pc, inst, pre_compute)?; - if let Some(curve_type) = { - let modulus = &pre_compute.expr.builder.prime; - let a_coeff = &pre_compute.expr.setup_values[0]; - get_curve_type(modulus, a_coeff) - } { - match (is_setup, curve_type) { - (true, CurveType::K256) => { - Ok(execute_e1_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::K256 as u8 }, true>) - } - (true, CurveType::P256) => { - Ok(execute_e1_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::P256 as u8 }, true>) - } - (true, CurveType::BN254) => { - Ok( - execute_e1_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::BN254 as u8 }, true>, - ) - } - (true, CurveType::BLS12_381) => Ok(execute_e1_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { CurveType::BLS12_381 as u8 }, - true, - >), - (false, CurveType::K256) => { - Ok( - execute_e1_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::K256 as u8 }, false>, - ) - } - (false, CurveType::P256) => { - Ok( - execute_e1_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::P256 as u8 }, false>, - ) - } - (false, CurveType::BN254) => Ok(execute_e1_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { CurveType::BN254 as u8 }, - false, - >), - (false, CurveType::BLS12_381) => Ok(execute_e1_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { CurveType::BLS12_381 as u8 }, - false, - >), - } - } else if is_setup { - Ok(execute_e1_impl::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, true>) - } else { - Ok(execute_e1_impl::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, false>) - } + dispatch!(execute_e1_tco_handler, pre_compute, is_setup) } } @@ -277,66 +195,10 @@ impl MeteredExecu { let pre_compute: &mut E2PreCompute = data.borrow_mut(); pre_compute.chip_idx = chip_idx as u32; + let pre_compute_pure = &mut pre_compute.data; + let is_setup = self.pre_compute_impl(pc, inst, pre_compute_pure)?; - let is_setup = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; - - if let Some(curve_type) = { - let modulus = &pre_compute.data.expr.builder.prime; - let a_coeff = &pre_compute.data.expr.setup_values[0]; - get_curve_type(modulus, a_coeff) - } { - match (is_setup, curve_type) { - (true, CurveType::K256) => { - Ok(execute_e2_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::K256 as u8 }, true>) - } - (true, CurveType::P256) => { - Ok(execute_e2_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::P256 as u8 }, true>) - } - (true, CurveType::BN254) => { - Ok( - execute_e2_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::BN254 as u8 }, true>, - ) - } - (true, CurveType::BLS12_381) => Ok(execute_e2_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { CurveType::BLS12_381 as u8 }, - true, - >), - (false, CurveType::K256) => { - Ok( - execute_e2_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::K256 as u8 }, false>, - ) - } - (false, CurveType::P256) => { - Ok( - execute_e2_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::P256 as u8 }, false>, - ) - } - (false, CurveType::BN254) => Ok(execute_e2_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { CurveType::BN254 as u8 }, - false, - >), - (false, CurveType::BLS12_381) => Ok(execute_e2_impl::< - _, - _, - BLOCKS, - BLOCK_SIZE, - { CurveType::BLS12_381 as u8 }, - false, - >), - } - } else if is_setup { - Ok(execute_e2_impl::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, true>) - } else { - Ok(execute_e2_impl::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, false>) - } + dispatch!(execute_e2_impl, pre_compute_pure, is_setup) } } From a82dfb2a9b7a01e38f1b5763ef1783b8c9cd9264 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 17:52:39 -0700 Subject: [PATCH 12/36] refactor: use local dispatch! macros to reduce code duplication --- .../vm/src/system/public_values/execution.rs | 35 ++-- .../native/circuit/src/branch_eq/execution.rs | 54 ++--- .../circuit/src/field_arithmetic/execution.rs | 184 ++++-------------- .../circuit/src/field_extension/execution.rs | 42 ++-- .../rv32im/circuit/src/divrem/execution.rs | 35 ++-- .../rv32im/circuit/src/shift/execution.rs | 43 ++-- 6 files changed, 120 insertions(+), 273 deletions(-) diff --git a/crates/vm/src/system/public_values/execution.rs b/crates/vm/src/system/public_values/execution.rs index 29ae9e7a5f..ed32e0936c 100644 --- a/crates/vm/src/system/public_values/execution.rs +++ b/crates/vm/src/system/public_values/execution.rs @@ -60,6 +60,17 @@ where } } +macro_rules! dispatch { + ($execute_impl:ident, $b_is_imm:ident, $c_is_imm:ident) => { + match ($b_is_imm, $c_is_imm) { + (true, true) => Ok($execute_impl::<_, _, true, true>), + (true, false) => Ok($execute_impl::<_, _, true, false>), + (false, true) => Ok($execute_impl::<_, _, false, true>), + (false, false) => Ok($execute_impl::<_, _, false, false>), + } + }; +} + impl Executor for PublicValuesExecutor where F: PrimeField32, @@ -82,13 +93,7 @@ where let data: &mut PublicValuesPreCompute = data.borrow_mut(); let (b_is_imm, c_is_imm) = self.pre_compute_impl(inst, data); - let fn_ptr = match (b_is_imm, c_is_imm) { - (true, true) => execute_e1_impl::<_, _, true, true>, - (true, false) => execute_e1_impl::<_, _, true, false>, - (false, true) => execute_e1_impl::<_, _, false, true>, - (false, false) => execute_e1_impl::<_, _, false, false>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_impl, b_is_imm, c_is_imm) } #[cfg(feature = "tco")] @@ -104,13 +109,7 @@ where let data: &mut PublicValuesPreCompute = data.borrow_mut(); let (b_is_imm, c_is_imm) = self.pre_compute_impl(inst, data); - let fn_ptr = match (b_is_imm, c_is_imm) { - (true, true) => execute_e1_tco_handler::<_, _, true, true>, - (true, false) => execute_e1_tco_handler::<_, _, true, false>, - (false, true) => execute_e1_tco_handler::<_, _, false, true>, - (false, false) => execute_e1_tco_handler::<_, _, false, false>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_tco_handler, b_is_imm, c_is_imm) } } @@ -136,13 +135,7 @@ where data.chip_idx = chip_idx as u32; let (b_is_imm, c_is_imm) = self.pre_compute_impl(inst, &mut data.data); - let fn_ptr = match (b_is_imm, c_is_imm) { - (true, true) => execute_e2_impl::<_, _, true, true>, - (true, false) => execute_e2_impl::<_, _, true, false>, - (false, true) => execute_e2_impl::<_, _, false, true>, - (false, false) => execute_e2_impl::<_, _, false, false>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_impl, b_is_imm, c_is_imm) } } diff --git a/extensions/native/circuit/src/branch_eq/execution.rs b/extensions/native/circuit/src/branch_eq/execution.rs index 033630cecd..18955608a7 100644 --- a/extensions/native/circuit/src/branch_eq/execution.rs +++ b/extensions/native/circuit/src/branch_eq/execution.rs @@ -75,6 +75,21 @@ impl NativeBranchEqualExecutor { } } +macro_rules! dispatch { + ($execute_impl:ident, $a_is_imm:ident, $b_is_imm:ident, $is_bne:ident) => { + match ($a_is_imm, $b_is_imm, $is_bne) { + (true, true, true) => Ok($execute_impl::<_, _, true, true, true>), + (true, true, false) => Ok($execute_impl::<_, _, true, true, false>), + (true, false, true) => Ok($execute_impl::<_, _, true, false, true>), + (true, false, false) => Ok($execute_impl::<_, _, true, false, false>), + (false, true, true) => Ok($execute_impl::<_, _, false, true, true>), + (false, true, false) => Ok($execute_impl::<_, _, false, true, false>), + (false, false, true) => Ok($execute_impl::<_, _, false, false, true>), + (false, false, false) => Ok($execute_impl::<_, _, false, false, false>), + } + }; +} + impl Executor for NativeBranchEqualExecutor where F: PrimeField32, @@ -93,18 +108,7 @@ where let (a_is_imm, b_is_imm, is_bne) = self.pre_compute_impl(pc, inst, pre_compute)?; - let fn_ptr = match (a_is_imm, b_is_imm, is_bne) { - (true, true, true) => execute_e1_tco_handler::<_, _, true, true, true>, - (true, true, false) => execute_e1_tco_handler::<_, _, true, true, false>, - (true, false, true) => execute_e1_tco_handler::<_, _, true, false, true>, - (true, false, false) => execute_e1_tco_handler::<_, _, true, false, false>, - (false, true, true) => execute_e1_tco_handler::<_, _, false, true, true>, - (false, true, false) => execute_e1_tco_handler::<_, _, false, true, false>, - (false, false, true) => execute_e1_tco_handler::<_, _, false, false, true>, - (false, false, false) => execute_e1_tco_handler::<_, _, false, false, false>, - }; - - Ok(fn_ptr) + dispatch!(execute_e1_tco_handler, a_is_imm, b_is_imm, is_bne) } #[inline(always)] @@ -123,18 +127,7 @@ where let (a_is_imm, b_is_imm, is_bne) = self.pre_compute_impl(pc, inst, pre_compute)?; - let fn_ptr = match (a_is_imm, b_is_imm, is_bne) { - (true, true, true) => execute_e1_impl::<_, _, true, true, true>, - (true, true, false) => execute_e1_impl::<_, _, true, true, false>, - (true, false, true) => execute_e1_impl::<_, _, true, false, true>, - (true, false, false) => execute_e1_impl::<_, _, true, false, false>, - (false, true, true) => execute_e1_impl::<_, _, false, true, true>, - (false, true, false) => execute_e1_impl::<_, _, false, true, false>, - (false, false, true) => execute_e1_impl::<_, _, false, false, true>, - (false, false, false) => execute_e1_impl::<_, _, false, false, false>, - }; - - Ok(fn_ptr) + dispatch!(execute_e1_impl, a_is_imm, b_is_imm, is_bne) } } @@ -161,18 +154,7 @@ where let (a_is_imm, b_is_imm, is_bne) = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; - let fn_ptr = match (a_is_imm, b_is_imm, is_bne) { - (true, true, true) => execute_e2_impl::<_, _, true, true, true>, - (true, true, false) => execute_e2_impl::<_, _, true, true, false>, - (true, false, true) => execute_e2_impl::<_, _, true, false, true>, - (true, false, false) => execute_e2_impl::<_, _, true, false, false>, - (false, true, true) => execute_e2_impl::<_, _, false, true, true>, - (false, true, false) => execute_e2_impl::<_, _, false, true, false>, - (false, false, true) => execute_e2_impl::<_, _, false, false, true>, - (false, false, false) => execute_e2_impl::<_, _, false, false, false>, - }; - - Ok(fn_ptr) + dispatch!(execute_e2_impl, a_is_imm, b_is_imm, is_bne) } } diff --git a/extensions/native/circuit/src/field_arithmetic/execution.rs b/extensions/native/circuit/src/field_arithmetic/execution.rs index b95b778d7a..b2263ee55d 100644 --- a/extensions/native/circuit/src/field_arithmetic/execution.rs +++ b/extensions/native/circuit/src/field_arithmetic/execution.rs @@ -76,76 +76,80 @@ impl FieldArithmeticCoreExecutor { } } -impl Executor for FieldArithmeticCoreExecutor -where - F: PrimeField32, -{ - #[cfg(feature = "tco")] - fn handler( - &self, - pc: u32, - inst: &Instruction, - data: &mut [u8], - ) -> Result, StaticProgramError> - where - Ctx: ExecutionCtxTrait, - { - let pre_compute: &mut FieldArithmeticPreCompute = data.borrow_mut(); - - let (a_is_imm, b_is_imm, local_opcode) = self.pre_compute_impl(pc, inst, pre_compute)?; - - let fn_ptr = match (local_opcode, a_is_imm, b_is_imm) { +macro_rules! dispatch { + ($execute_impl:ident, $local_opcode:ident, $a_is_imm:ident, $b_is_imm:ident) => { + match ($local_opcode, $a_is_imm, $b_is_imm) { (FieldArithmeticOpcode::ADD, true, true) => { - execute_e1_tco_handler::<_, _, true, true, { FieldArithmeticOpcode::ADD as u8 }> + Ok($execute_impl::<_, _, true, true, { FieldArithmeticOpcode::ADD as u8 }>) } (FieldArithmeticOpcode::ADD, true, false) => { - execute_e1_tco_handler::<_, _, true, false, { FieldArithmeticOpcode::ADD as u8 }> + Ok($execute_impl::<_, _, true, false, { FieldArithmeticOpcode::ADD as u8 }>) } (FieldArithmeticOpcode::ADD, false, true) => { - execute_e1_tco_handler::<_, _, false, true, { FieldArithmeticOpcode::ADD as u8 }> + Ok($execute_impl::<_, _, false, true, { FieldArithmeticOpcode::ADD as u8 }>) } (FieldArithmeticOpcode::ADD, false, false) => { - execute_e1_tco_handler::<_, _, false, false, { FieldArithmeticOpcode::ADD as u8 }> + Ok($execute_impl::<_, _, false, false, { FieldArithmeticOpcode::ADD as u8 }>) } (FieldArithmeticOpcode::SUB, true, true) => { - execute_e1_tco_handler::<_, _, true, true, { FieldArithmeticOpcode::SUB as u8 }> + Ok($execute_impl::<_, _, true, true, { FieldArithmeticOpcode::SUB as u8 }>) } (FieldArithmeticOpcode::SUB, true, false) => { - execute_e1_tco_handler::<_, _, true, false, { FieldArithmeticOpcode::SUB as u8 }> + Ok($execute_impl::<_, _, true, false, { FieldArithmeticOpcode::SUB as u8 }>) } (FieldArithmeticOpcode::SUB, false, true) => { - execute_e1_tco_handler::<_, _, false, true, { FieldArithmeticOpcode::SUB as u8 }> + Ok($execute_impl::<_, _, false, true, { FieldArithmeticOpcode::SUB as u8 }>) } (FieldArithmeticOpcode::SUB, false, false) => { - execute_e1_tco_handler::<_, _, false, false, { FieldArithmeticOpcode::SUB as u8 }> + Ok($execute_impl::<_, _, false, false, { FieldArithmeticOpcode::SUB as u8 }>) } (FieldArithmeticOpcode::MUL, true, true) => { - execute_e1_tco_handler::<_, _, true, true, { FieldArithmeticOpcode::MUL as u8 }> + Ok($execute_impl::<_, _, true, true, { FieldArithmeticOpcode::MUL as u8 }>) } (FieldArithmeticOpcode::MUL, true, false) => { - execute_e1_tco_handler::<_, _, true, false, { FieldArithmeticOpcode::MUL as u8 }> + Ok($execute_impl::<_, _, true, false, { FieldArithmeticOpcode::MUL as u8 }>) } (FieldArithmeticOpcode::MUL, false, true) => { - execute_e1_tco_handler::<_, _, false, true, { FieldArithmeticOpcode::MUL as u8 }> + Ok($execute_impl::<_, _, false, true, { FieldArithmeticOpcode::MUL as u8 }>) } (FieldArithmeticOpcode::MUL, false, false) => { - execute_e1_tco_handler::<_, _, false, false, { FieldArithmeticOpcode::MUL as u8 }> + Ok($execute_impl::<_, _, false, false, { FieldArithmeticOpcode::MUL as u8 }>) } (FieldArithmeticOpcode::DIV, true, true) => { - execute_e1_tco_handler::<_, _, true, true, { FieldArithmeticOpcode::DIV as u8 }> + Ok($execute_impl::<_, _, true, true, { FieldArithmeticOpcode::DIV as u8 }>) } (FieldArithmeticOpcode::DIV, true, false) => { - execute_e1_tco_handler::<_, _, true, false, { FieldArithmeticOpcode::DIV as u8 }> + Ok($execute_impl::<_, _, true, false, { FieldArithmeticOpcode::DIV as u8 }>) } (FieldArithmeticOpcode::DIV, false, true) => { - execute_e1_tco_handler::<_, _, false, true, { FieldArithmeticOpcode::DIV as u8 }> + Ok($execute_impl::<_, _, false, true, { FieldArithmeticOpcode::DIV as u8 }>) } (FieldArithmeticOpcode::DIV, false, false) => { - execute_e1_tco_handler::<_, _, false, false, { FieldArithmeticOpcode::DIV as u8 }> + Ok($execute_impl::<_, _, false, false, { FieldArithmeticOpcode::DIV as u8 }>) } - }; + } + }; +} + +impl Executor for FieldArithmeticCoreExecutor +where + F: PrimeField32, +{ + #[cfg(feature = "tco")] + fn handler( + &self, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: ExecutionCtxTrait, + { + let pre_compute: &mut FieldArithmeticPreCompute = data.borrow_mut(); - Ok(fn_ptr) + let (a_is_imm, b_is_imm, local_opcode) = self.pre_compute_impl(pc, inst, pre_compute)?; + + dispatch!(execute_e1_tco_handler, local_opcode, a_is_imm, b_is_imm) } #[inline(always)] @@ -164,58 +168,7 @@ where let (a_is_imm, b_is_imm, local_opcode) = self.pre_compute_impl(pc, inst, pre_compute)?; - let fn_ptr = match (local_opcode, a_is_imm, b_is_imm) { - (FieldArithmeticOpcode::ADD, true, true) => { - execute_e1_impl::<_, _, true, true, { FieldArithmeticOpcode::ADD as u8 }> - } - (FieldArithmeticOpcode::ADD, true, false) => { - execute_e1_impl::<_, _, true, false, { FieldArithmeticOpcode::ADD as u8 }> - } - (FieldArithmeticOpcode::ADD, false, true) => { - execute_e1_impl::<_, _, false, true, { FieldArithmeticOpcode::ADD as u8 }> - } - (FieldArithmeticOpcode::ADD, false, false) => { - execute_e1_impl::<_, _, false, false, { FieldArithmeticOpcode::ADD as u8 }> - } - (FieldArithmeticOpcode::SUB, true, true) => { - execute_e1_impl::<_, _, true, true, { FieldArithmeticOpcode::SUB as u8 }> - } - (FieldArithmeticOpcode::SUB, true, false) => { - execute_e1_impl::<_, _, true, false, { FieldArithmeticOpcode::SUB as u8 }> - } - (FieldArithmeticOpcode::SUB, false, true) => { - execute_e1_impl::<_, _, false, true, { FieldArithmeticOpcode::SUB as u8 }> - } - (FieldArithmeticOpcode::SUB, false, false) => { - execute_e1_impl::<_, _, false, false, { FieldArithmeticOpcode::SUB as u8 }> - } - (FieldArithmeticOpcode::MUL, true, true) => { - execute_e1_impl::<_, _, true, true, { FieldArithmeticOpcode::MUL as u8 }> - } - (FieldArithmeticOpcode::MUL, true, false) => { - execute_e1_impl::<_, _, true, false, { FieldArithmeticOpcode::MUL as u8 }> - } - (FieldArithmeticOpcode::MUL, false, true) => { - execute_e1_impl::<_, _, false, true, { FieldArithmeticOpcode::MUL as u8 }> - } - (FieldArithmeticOpcode::MUL, false, false) => { - execute_e1_impl::<_, _, false, false, { FieldArithmeticOpcode::MUL as u8 }> - } - (FieldArithmeticOpcode::DIV, true, true) => { - execute_e1_impl::<_, _, true, true, { FieldArithmeticOpcode::DIV as u8 }> - } - (FieldArithmeticOpcode::DIV, true, false) => { - execute_e1_impl::<_, _, true, false, { FieldArithmeticOpcode::DIV as u8 }> - } - (FieldArithmeticOpcode::DIV, false, true) => { - execute_e1_impl::<_, _, false, true, { FieldArithmeticOpcode::DIV as u8 }> - } - (FieldArithmeticOpcode::DIV, false, false) => { - execute_e1_impl::<_, _, false, false, { FieldArithmeticOpcode::DIV as u8 }> - } - }; - - Ok(fn_ptr) + dispatch!(execute_e1_impl, local_opcode, a_is_imm, b_is_imm) } } @@ -242,58 +195,7 @@ where let (a_is_imm, b_is_imm, local_opcode) = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; - let fn_ptr = match (local_opcode, a_is_imm, b_is_imm) { - (FieldArithmeticOpcode::ADD, true, true) => { - execute_e2_impl::<_, _, true, true, { FieldArithmeticOpcode::ADD as u8 }> - } - (FieldArithmeticOpcode::ADD, true, false) => { - execute_e2_impl::<_, _, true, false, { FieldArithmeticOpcode::ADD as u8 }> - } - (FieldArithmeticOpcode::ADD, false, true) => { - execute_e2_impl::<_, _, false, true, { FieldArithmeticOpcode::ADD as u8 }> - } - (FieldArithmeticOpcode::ADD, false, false) => { - execute_e2_impl::<_, _, false, false, { FieldArithmeticOpcode::ADD as u8 }> - } - (FieldArithmeticOpcode::SUB, true, true) => { - execute_e2_impl::<_, _, true, true, { FieldArithmeticOpcode::SUB as u8 }> - } - (FieldArithmeticOpcode::SUB, true, false) => { - execute_e2_impl::<_, _, true, false, { FieldArithmeticOpcode::SUB as u8 }> - } - (FieldArithmeticOpcode::SUB, false, true) => { - execute_e2_impl::<_, _, false, true, { FieldArithmeticOpcode::SUB as u8 }> - } - (FieldArithmeticOpcode::SUB, false, false) => { - execute_e2_impl::<_, _, false, false, { FieldArithmeticOpcode::SUB as u8 }> - } - (FieldArithmeticOpcode::MUL, true, true) => { - execute_e2_impl::<_, _, true, true, { FieldArithmeticOpcode::MUL as u8 }> - } - (FieldArithmeticOpcode::MUL, true, false) => { - execute_e2_impl::<_, _, true, false, { FieldArithmeticOpcode::MUL as u8 }> - } - (FieldArithmeticOpcode::MUL, false, true) => { - execute_e2_impl::<_, _, false, true, { FieldArithmeticOpcode::MUL as u8 }> - } - (FieldArithmeticOpcode::MUL, false, false) => { - execute_e2_impl::<_, _, false, false, { FieldArithmeticOpcode::MUL as u8 }> - } - (FieldArithmeticOpcode::DIV, true, true) => { - execute_e2_impl::<_, _, true, true, { FieldArithmeticOpcode::DIV as u8 }> - } - (FieldArithmeticOpcode::DIV, true, false) => { - execute_e2_impl::<_, _, true, false, { FieldArithmeticOpcode::DIV as u8 }> - } - (FieldArithmeticOpcode::DIV, false, true) => { - execute_e2_impl::<_, _, false, true, { FieldArithmeticOpcode::DIV as u8 }> - } - (FieldArithmeticOpcode::DIV, false, false) => { - execute_e2_impl::<_, _, false, false, { FieldArithmeticOpcode::DIV as u8 }> - } - }; - - Ok(fn_ptr) + dispatch!(execute_e2_impl, local_opcode, a_is_imm, b_is_imm) } } diff --git a/extensions/native/circuit/src/field_extension/execution.rs b/extensions/native/circuit/src/field_extension/execution.rs index 49d38b88f7..d3e706ce07 100644 --- a/extensions/native/circuit/src/field_extension/execution.rs +++ b/extensions/native/circuit/src/field_extension/execution.rs @@ -57,6 +57,18 @@ impl FieldExtensionCoreExecutor { } } +macro_rules! dispatch { + ($execute_impl:ident, $opcode:ident) => { + match $opcode { + 0 => Ok($execute_impl::<_, _, 0>), // FE4ADD + 1 => Ok($execute_impl::<_, _, 1>), // FE4SUB + 2 => Ok($execute_impl::<_, _, 2>), // BBE4MUL + 3 => Ok($execute_impl::<_, _, 3>), // BBE4DIV + _ => panic!("Invalid field extension opcode: {}", $opcode), + } + }; +} + impl Executor for FieldExtensionCoreExecutor where F: PrimeField32, @@ -75,15 +87,7 @@ where let opcode = self.pre_compute_impl(pc, inst, pre_compute)?; - let fn_ptr = match opcode { - 0 => execute_e1_tco_handler::<_, _, 0>, // FE4ADD - 1 => execute_e1_tco_handler::<_, _, 1>, // FE4SUB - 2 => execute_e1_tco_handler::<_, _, 2>, // BBE4MUL - 3 => execute_e1_tco_handler::<_, _, 3>, // BBE4DIV - _ => panic!("Invalid field extension opcode: {opcode}"), - }; - - Ok(fn_ptr) + dispatch!(execute_e1_tco_handler, opcode) } #[inline(always)] @@ -102,15 +106,7 @@ where let opcode = self.pre_compute_impl(pc, inst, pre_compute)?; - let fn_ptr = match opcode { - 0 => execute_e1_impl::<_, _, 0>, // FE4ADD - 1 => execute_e1_impl::<_, _, 1>, // FE4SUB - 2 => execute_e1_impl::<_, _, 2>, // BBE4MUL - 3 => execute_e1_impl::<_, _, 3>, // BBE4DIV - _ => panic!("Invalid field extension opcode: {opcode}"), - }; - - Ok(fn_ptr) + dispatch!(execute_e1_impl, opcode) } } @@ -136,15 +132,7 @@ where let opcode = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; - let fn_ptr = match opcode { - 0 => execute_e2_impl::<_, _, 0>, // FE4ADD - 1 => execute_e2_impl::<_, _, 1>, // FE4SUB - 2 => execute_e2_impl::<_, _, 2>, // BBE4MUL - 3 => execute_e2_impl::<_, _, 3>, // BBE4DIV - _ => panic!("Invalid field extension opcode: {opcode}"), - }; - - Ok(fn_ptr) + dispatch!(execute_e2_impl, opcode) } } diff --git a/extensions/rv32im/circuit/src/divrem/execution.rs b/extensions/rv32im/circuit/src/divrem/execution.rs index c057da312b..846431b3ad 100644 --- a/extensions/rv32im/circuit/src/divrem/execution.rs +++ b/extensions/rv32im/circuit/src/divrem/execution.rs @@ -49,6 +49,17 @@ impl DivRemExecutor { + match $local_opcode { + DivRemOpcode::DIV => Ok($execute_impl::<_, _, DivOp>), + DivRemOpcode::DIVU => Ok($execute_impl::<_, _, DivuOp>), + DivRemOpcode::REM => Ok($execute_impl::<_, _, RemOp>), + DivRemOpcode::REMU => Ok($execute_impl::<_, _, RemuOp>), + } + }; +} + impl Executor for DivRemExecutor where @@ -68,13 +79,7 @@ where ) -> Result, StaticProgramError> { let data: &mut DivRemPreCompute = data.borrow_mut(); let local_opcode = self.pre_compute_impl(pc, inst, data)?; - let fn_ptr = match local_opcode { - DivRemOpcode::DIV => execute_e1_impl::<_, _, DivOp>, - DivRemOpcode::DIVU => execute_e1_impl::<_, _, DivuOp>, - DivRemOpcode::REM => execute_e1_impl::<_, _, RemOp>, - DivRemOpcode::REMU => execute_e1_impl::<_, _, RemuOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_impl, local_opcode) } #[cfg(feature = "tco")] @@ -89,13 +94,7 @@ where { let data: &mut DivRemPreCompute = data.borrow_mut(); let local_opcode = self.pre_compute_impl(pc, inst, data)?; - let fn_ptr = match local_opcode { - DivRemOpcode::DIV => execute_e1_tco_handler::<_, _, DivOp>, - DivRemOpcode::DIVU => execute_e1_tco_handler::<_, _, DivuOp>, - DivRemOpcode::REM => execute_e1_tco_handler::<_, _, RemOp>, - DivRemOpcode::REMU => execute_e1_tco_handler::<_, _, RemuOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_tco_handler, local_opcode) } } @@ -121,13 +120,7 @@ where let data: &mut E2PreCompute = data.borrow_mut(); data.chip_idx = chip_idx as u32; let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?; - let fn_ptr = match local_opcode { - DivRemOpcode::DIV => execute_e2_impl::<_, _, DivOp>, - DivRemOpcode::DIVU => execute_e2_impl::<_, _, DivuOp>, - DivRemOpcode::REM => execute_e2_impl::<_, _, RemOp>, - DivRemOpcode::REMU => execute_e2_impl::<_, _, RemuOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_impl, local_opcode) } } diff --git a/extensions/rv32im/circuit/src/shift/execution.rs b/extensions/rv32im/circuit/src/shift/execution.rs index 862c8ec71f..ec627998bd 100644 --- a/extensions/rv32im/circuit/src/shift/execution.rs +++ b/extensions/rv32im/circuit/src/shift/execution.rs @@ -59,6 +59,19 @@ impl ShiftExecutor { + match ($is_imm, $shift_opcode) { + (true, ShiftOpcode::SLL) => Ok($execute_impl::<_, _, true, SllOp>), + (false, ShiftOpcode::SLL) => Ok($execute_impl::<_, _, false, SllOp>), + (true, ShiftOpcode::SRL) => Ok($execute_impl::<_, _, true, SrlOp>), + (false, ShiftOpcode::SRL) => Ok($execute_impl::<_, _, false, SrlOp>), + (true, ShiftOpcode::SRA) => Ok($execute_impl::<_, _, true, SraOp>), + (false, ShiftOpcode::SRA) => Ok($execute_impl::<_, _, false, SraOp>), + } + }; +} + impl Executor for ShiftExecutor where @@ -78,15 +91,7 @@ where let data: &mut ShiftPreCompute = data.borrow_mut(); let (is_imm, shift_opcode) = self.pre_compute_impl(pc, inst, data)?; // `d` is always expected to be RV32_REGISTER_AS. - let fn_ptr = match (is_imm, shift_opcode) { - (true, ShiftOpcode::SLL) => execute_e1_impl::<_, _, true, SllOp>, - (false, ShiftOpcode::SLL) => execute_e1_impl::<_, _, false, SllOp>, - (true, ShiftOpcode::SRL) => execute_e1_impl::<_, _, true, SrlOp>, - (false, ShiftOpcode::SRL) => execute_e1_impl::<_, _, false, SrlOp>, - (true, ShiftOpcode::SRA) => execute_e1_impl::<_, _, true, SraOp>, - (false, ShiftOpcode::SRA) => execute_e1_impl::<_, _, false, SraOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_impl, is_imm, shift_opcode) } #[cfg(feature = "tco")] @@ -102,15 +107,7 @@ where let data: &mut ShiftPreCompute = data.borrow_mut(); let (is_imm, shift_opcode) = self.pre_compute_impl(pc, inst, data)?; // `d` is always expected to be RV32_REGISTER_AS. - let fn_ptr = match (is_imm, shift_opcode) { - (true, ShiftOpcode::SLL) => execute_e1_tco_handler::<_, _, true, SllOp>, - (false, ShiftOpcode::SLL) => execute_e1_tco_handler::<_, _, false, SllOp>, - (true, ShiftOpcode::SRL) => execute_e1_tco_handler::<_, _, true, SrlOp>, - (false, ShiftOpcode::SRL) => execute_e1_tco_handler::<_, _, false, SrlOp>, - (true, ShiftOpcode::SRA) => execute_e1_tco_handler::<_, _, true, SraOp>, - (false, ShiftOpcode::SRA) => execute_e1_tco_handler::<_, _, false, SraOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_tco_handler, is_imm, shift_opcode) } } @@ -135,15 +132,7 @@ where data.chip_idx = chip_idx as u32; let (is_imm, shift_opcode) = self.pre_compute_impl(pc, inst, &mut data.data)?; // `d` is always expected to be RV32_REGISTER_AS. - let fn_ptr = match (is_imm, shift_opcode) { - (true, ShiftOpcode::SLL) => execute_e2_impl::<_, _, true, SllOp>, - (false, ShiftOpcode::SLL) => execute_e2_impl::<_, _, false, SllOp>, - (true, ShiftOpcode::SRL) => execute_e2_impl::<_, _, true, SrlOp>, - (false, ShiftOpcode::SRL) => execute_e2_impl::<_, _, false, SrlOp>, - (true, ShiftOpcode::SRA) => execute_e2_impl::<_, _, true, SraOp>, - (false, ShiftOpcode::SRA) => execute_e2_impl::<_, _, false, SraOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_impl, is_imm, shift_opcode) } } From 5c7b832ec558811ee8bf92543499d90549e46e84 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 18:06:19 -0700 Subject: [PATCH 13/36] refactor: fp2 dispatch --- extensions/algebra/circuit/src/execution.rs | 319 ++++++-------------- rust-toolchain.toml | 3 +- 2 files changed, 102 insertions(+), 220 deletions(-) diff --git a/extensions/algebra/circuit/src/execution.rs b/extensions/algebra/circuit/src/execution.rs index aeb7fae242..031ae929ce 100644 --- a/extensions/algebra/circuit/src/execution.rs +++ b/extensions/algebra/circuit/src/execution.rs @@ -74,6 +74,83 @@ macro_rules! generate_fp2_dispatch { }; } +macro_rules! dispatch { + ($execute_impl:ident,$execute_generic_impl:ident,$execute_setup_impl:ident,$pre_compute:ident,$op:ident) => { + if let Some(op) = $op { + let modulus = &$pre_compute.expr.prime; + if IS_FP2 { + if let Some(field_type) = get_fp2_field_type(modulus) { + generate_fp2_dispatch!( + field_type, + op, + BLOCKS, + BLOCK_SIZE, + $execute_impl, + [ + (BN254Coordinate, Add), + (BN254Coordinate, Sub), + (BN254Coordinate, Mul), + (BN254Coordinate, Div), + (BLS12_381Coordinate, Add), + (BLS12_381Coordinate, Sub), + (BLS12_381Coordinate, Mul), + (BLS12_381Coordinate, Div), + ] + ) + } else { + Ok($execute_generic_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) + } + } else if let Some(field_type) = get_field_type(modulus) { + generate_field_dispatch!( + field_type, + op, + BLOCKS, + BLOCK_SIZE, + $execute_impl, + [ + (K256Coordinate, Add), + (K256Coordinate, Sub), + (K256Coordinate, Mul), + (K256Coordinate, Div), + (K256Scalar, Add), + (K256Scalar, Sub), + (K256Scalar, Mul), + (K256Scalar, Div), + (P256Coordinate, Add), + (P256Coordinate, Sub), + (P256Coordinate, Mul), + (P256Coordinate, Div), + (P256Scalar, Add), + (P256Scalar, Sub), + (P256Scalar, Mul), + (P256Scalar, Div), + (BN254Coordinate, Add), + (BN254Coordinate, Sub), + (BN254Coordinate, Mul), + (BN254Coordinate, Div), + (BN254Scalar, Add), + (BN254Scalar, Sub), + (BN254Scalar, Mul), + (BN254Scalar, Div), + (BLS12_381Coordinate, Add), + (BLS12_381Coordinate, Sub), + (BLS12_381Coordinate, Mul), + (BLS12_381Coordinate, Div), + (BLS12_381Scalar, Add), + (BLS12_381Scalar, Sub), + (BLS12_381Scalar, Mul), + (BLS12_381Scalar, Div), + ] + ) + } else { + Ok($execute_generic_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) + } + } else { + Ok($execute_setup_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) + } + }; +} + #[derive(AlignedBytesBorrow, Clone)] #[repr(C)] struct FieldExpressionPreCompute<'a> { @@ -192,81 +269,15 @@ impl) - } - } else if let Some(field_type) = get_field_type(modulus) { - generate_field_dispatch!( - field_type, - op, - BLOCKS, - BLOCK_SIZE, - execute_e1_impl, - [ - (K256Coordinate, Add), - (K256Coordinate, Sub), - (K256Coordinate, Mul), - (K256Coordinate, Div), - (K256Scalar, Add), - (K256Scalar, Sub), - (K256Scalar, Mul), - (K256Scalar, Div), - (P256Coordinate, Add), - (P256Coordinate, Sub), - (P256Coordinate, Mul), - (P256Coordinate, Div), - (P256Scalar, Add), - (P256Scalar, Sub), - (P256Scalar, Mul), - (P256Scalar, Div), - (BN254Coordinate, Add), - (BN254Coordinate, Sub), - (BN254Coordinate, Mul), - (BN254Coordinate, Div), - (BN254Scalar, Add), - (BN254Scalar, Sub), - (BN254Scalar, Mul), - (BN254Scalar, Div), - (BLS12_381Coordinate, Add), - (BLS12_381Coordinate, Sub), - (BLS12_381Coordinate, Mul), - (BLS12_381Coordinate, Div), - (BLS12_381Scalar, Add), - (BLS12_381Scalar, Sub), - (BLS12_381Scalar, Mul), - (BLS12_381Scalar, Div), - ] - ) - } else { - Ok(execute_e1_generic_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) - } - } else { - Ok(execute_e1_setup_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) - } + dispatch!( + execute_e1_impl, + execute_e1_generic_impl, + execute_e1_setup_impl, + pre_compute, + op + ) } #[cfg(feature = "tco")] @@ -280,81 +291,15 @@ impl) - } - } else if let Some(field_type) = get_field_type(modulus) { - generate_field_dispatch!( - field_type, - op, - BLOCKS, - BLOCK_SIZE, - execute_e1_tco_handler, - [ - (K256Coordinate, Add), - (K256Coordinate, Sub), - (K256Coordinate, Mul), - (K256Coordinate, Div), - (K256Scalar, Add), - (K256Scalar, Sub), - (K256Scalar, Mul), - (K256Scalar, Div), - (P256Coordinate, Add), - (P256Coordinate, Sub), - (P256Coordinate, Mul), - (P256Coordinate, Div), - (P256Scalar, Add), - (P256Scalar, Sub), - (P256Scalar, Mul), - (P256Scalar, Div), - (BN254Coordinate, Add), - (BN254Coordinate, Sub), - (BN254Coordinate, Mul), - (BN254Coordinate, Div), - (BN254Scalar, Add), - (BN254Scalar, Sub), - (BN254Scalar, Mul), - (BN254Scalar, Div), - (BLS12_381Coordinate, Add), - (BLS12_381Coordinate, Sub), - (BLS12_381Coordinate, Mul), - (BLS12_381Coordinate, Div), - (BLS12_381Scalar, Add), - (BLS12_381Scalar, Sub), - (BLS12_381Scalar, Mul), - (BLS12_381Scalar, Div), - ] - ) - } else { - Ok(execute_e1_generic_tco_handler::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) - } - } else { - Ok(execute_e1_setup_tco_handler::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) - } + dispatch!( + execute_e1_tco_handler, + execute_e1_generic_tco_handler, + execute_e1_setup_tco_handler, + pre_compute, + op + ) } } @@ -379,80 +324,16 @@ impl = data.borrow_mut(); pre_compute.chip_idx = chip_idx as u32; - let op = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; + let pre_compute_pure = &mut pre_compute.data; + let op = self.pre_compute_impl(pc, inst, pre_compute_pure)?; - if let Some(op) = op { - let modulus = &pre_compute.data.expr.prime; - if IS_FP2 { - if let Some(field_type) = get_fp2_field_type(modulus) { - generate_fp2_dispatch!( - field_type, - op, - BLOCKS, - BLOCK_SIZE, - execute_e2_impl, - [ - (BN254Coordinate, Add), - (BN254Coordinate, Sub), - (BN254Coordinate, Mul), - (BN254Coordinate, Div), - (BLS12_381Coordinate, Add), - (BLS12_381Coordinate, Sub), - (BLS12_381Coordinate, Mul), - (BLS12_381Coordinate, Div), - ] - ) - } else { - Ok(execute_e2_generic_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) - } - } else if let Some(field_type) = get_field_type(modulus) { - generate_field_dispatch!( - field_type, - op, - BLOCKS, - BLOCK_SIZE, - execute_e2_impl, - [ - (K256Coordinate, Add), - (K256Coordinate, Sub), - (K256Coordinate, Mul), - (K256Coordinate, Div), - (K256Scalar, Add), - (K256Scalar, Sub), - (K256Scalar, Mul), - (K256Scalar, Div), - (P256Coordinate, Add), - (P256Coordinate, Sub), - (P256Coordinate, Mul), - (P256Coordinate, Div), - (P256Scalar, Add), - (P256Scalar, Sub), - (P256Scalar, Mul), - (P256Scalar, Div), - (BN254Coordinate, Add), - (BN254Coordinate, Sub), - (BN254Coordinate, Mul), - (BN254Coordinate, Div), - (BN254Scalar, Add), - (BN254Scalar, Sub), - (BN254Scalar, Mul), - (BN254Scalar, Div), - (BLS12_381Coordinate, Add), - (BLS12_381Coordinate, Sub), - (BLS12_381Coordinate, Mul), - (BLS12_381Coordinate, Div), - (BLS12_381Scalar, Add), - (BLS12_381Scalar, Sub), - (BLS12_381Scalar, Mul), - (BLS12_381Scalar, Div), - ] - ) - } else { - Ok(execute_e2_generic_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) - } - } else { - Ok(execute_e2_setup_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>) - } + dispatch!( + execute_e2_impl, + execute_e2_generic_impl, + execute_e2_setup_impl, + pre_compute_pure, + op + ) } } unsafe fn execute_e12_impl< diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 3828e8e42a..a3c5cc1709 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,4 +1,5 @@ [toolchain] -channel = "nightly-2025-08-19" # channel = "1.86.0" +# To use the "tco" feature, switch to Rust nightly: +channel = "nightly-2025-08-19" components = ["clippy", "rustfmt"] From 32c2dbdbb7872ebbd7323bc92b8f99460e1c1334 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 16:04:21 -0700 Subject: [PATCH 14/36] chore: update feature comment --- crates/vm/Cargo.toml | 4 +-- crates/vm/derive/src/lib.rs | 34 +++++++++++++++++-- crates/vm/src/arch/execution.rs | 16 +++++++++ crates/vm/src/arch/interpreter.rs | 28 ++++++++++++++- crates/vm/src/system/phantom/execution.rs | 18 ++++++++++ .../vm/src/system/public_values/execution.rs | 19 +++++++++++ 6 files changed, 114 insertions(+), 5 deletions(-) diff --git a/crates/vm/Cargo.toml b/crates/vm/Cargo.toml index 47083d236a..de7be1c6f3 100644 --- a/crates/vm/Cargo.toml +++ b/crates/vm/Cargo.toml @@ -68,8 +68,8 @@ basic-memory = [] # turns on stark-backend debugger in all proofs stark-debug = [] test-utils = ["openvm-stark-sdk"] -# Tail call optimizations. This requires nightly for the `become` keyword (https://github.com/rust-lang/rust/pull/144232) which will likely be stabilized in Rust 1.90 -# However `become` may still lead to compiler panics instead of runtime panics, so `tco` will remain a separate feature. +# Tail call optimizations. This requires nightly for the `become` keyword (https://github.com/rust-lang/rust/pull/144232). +# However tail call elimination is still an incomplete feature in Rust, so the `tco` feature remains experimental until then. tco = [] # performance features: mimalloc = ["openvm-stark-backend/mimalloc"] diff --git a/crates/vm/derive/src/lib.rs b/crates/vm/derive/src/lib.rs index 2ffc89c044..7b0dfa4eed 100644 --- a/crates/vm/derive/src/lib.rs +++ b/crates/vm/derive/src/lib.rs @@ -331,6 +331,18 @@ pub fn metered_executor_derive(input: TokenStream) -> TokenStream { Ctx: ::openvm_circuit::arch::execution_mode::MeteredExecutionCtxTrait, { self.0.metered_pre_compute(chip_idx, pc, inst, data) } + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &::openvm_circuit::arch::instructions::instruction::Instruction, + data: &mut [u8], + ) -> Result<::openvm_circuit::arch::Handler, ::openvm_circuit::arch::StaticProgramError> + where + Ctx: ::openvm_circuit::arch::execution_mode::MeteredExecutionCtxTrait, { + self.0.metered_handler(chip_idx, pc, inst, data) + } } } .into() @@ -364,7 +376,7 @@ pub fn metered_executor_derive(input: TokenStream) -> TokenStream { }); // Use full path ::openvm_circuit... so it can be used either within or outside the vm // crate. Assume F is already generic of the field. - let (pre_compute_size_arms, metered_pre_compute_arms, where_predicates): (Vec<_>, Vec<_>, Vec<_>) = multiunzip(variants.iter().map(|(variant_name, field)| { + let (pre_compute_size_arms, metered_pre_compute_arms, metered_handler_arms, where_predicates): (Vec<_>, Vec<_>, Vec<_>, Vec<_>) = multiunzip(variants.iter().map(|(variant_name, field)| { let field_ty = &field.ty; let pre_compute_size_arm = quote! { #name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::MeteredExecutor<#first_ty_generic>>::metered_pre_compute_size(x) @@ -372,10 +384,13 @@ pub fn metered_executor_derive(input: TokenStream) -> TokenStream { let metered_pre_compute_arm = quote! { #name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::MeteredExecutor<#first_ty_generic>>::metered_pre_compute(x, chip_idx, pc, instruction, data) }; + let metered_handler_arm = quote! { + #name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::MeteredExecutor<#first_ty_generic>>::metered_handler(x, chip_idx, pc, instruction, data) + }; let where_predicate = syn::parse_quote! { #field_ty: ::openvm_circuit::arch::MeteredExecutor<#first_ty_generic> }; - (pre_compute_size_arm, metered_pre_compute_arm, where_predicate) + (pre_compute_size_arm, metered_pre_compute_arm, metered_handler_arm, where_predicate) })); let where_clause = new_generics.make_where_clause(); for predicate in where_predicates { @@ -407,6 +422,21 @@ pub fn metered_executor_derive(input: TokenStream) -> TokenStream { #(#metered_pre_compute_arms,)* } } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + instruction: &::openvm_circuit::arch::instructions::instruction::Instruction, + data: &mut [u8], + ) -> Result<::openvm_circuit::arch::Handler, ::openvm_circuit::arch::StaticProgramError> + where + Ctx: ::openvm_circuit::arch::execution_mode::MeteredExecutionCtxTrait, { + match self { + #(#metered_handler_arms,)* + } + } } } .into() diff --git a/crates/vm/src/arch/execution.rs b/crates/vm/src/arch/execution.rs index 97f3285ba9..0eb67c8305 100644 --- a/crates/vm/src/arch/execution.rs +++ b/crates/vm/src/arch/execution.rs @@ -158,6 +158,22 @@ pub trait MeteredExecutor { ) -> Result, StaticProgramError> where Ctx: MeteredExecutionCtxTrait; + + /// Returns a function pointer with tail call optimization. The handler function assumes that + /// the pre-compute buffer it receives is the populated `data`. + // NOTE: we could have used `metered_pre_compute` above to populate `data`, but the + // implementations were simpler to keep `metered_handler` entirely separate from + // `metered_pre_compute`. + #[cfg(feature = "tco")] + fn metered_handler( + &self, + air_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait; } /// Trait for preflight execution via a host interpreter. The trait methods allow execution of diff --git a/crates/vm/src/arch/interpreter.rs b/crates/vm/src/arch/interpreter.rs index 99728c94c7..76f3321004 100644 --- a/crates/vm/src/arch/interpreter.rs +++ b/crates/vm/src/arch/interpreter.rs @@ -235,6 +235,32 @@ where let pc_base = program.pc_base; let pc_start = exe.pc_start; let init_memory = exe.init_memory.clone(); + #[cfg(feature = "tco")] + let handlers = program + .instructions_and_debug_infos + .iter() + .zip_eq(split_pre_compute_buf.iter_mut()) + .enumerate() + .map( + |(pc_idx, (inst_opt, pre_compute))| -> Result, StaticProgramError> { + if let Some((inst, _)) = inst_opt { + let pc = pc_base + pc_idx as u32 * DEFAULT_PC_STEP; + if get_system_opcode_handler::(inst, pre_compute).is_some() { + Ok(terminate_execute_e12_tco_handler) + } else { + // unwrap because get_pre_compute_instructions would have errored + // already on DisabledOperation + let executor_idx = inventory.instruction_lookup[&inst.opcode] as usize; + let executor = &inventory.executors[executor_idx]; + let air_idx = executor_idx_to_air_idx[executor_idx]; + executor.metered_handler(air_idx, pc, inst, pre_compute) + } + } else { + Ok(unreachable_tco_handler) + } + }, + ) + .collect::, _>>()?; Ok(Self { system_config: inventory.config().clone(), @@ -246,7 +272,7 @@ where #[cfg(feature = "tco")] pre_compute_max_size, #[cfg(feature = "tco")] - handlers: vec![], + handlers, }) } } diff --git a/crates/vm/src/system/phantom/execution.rs b/crates/vm/src/system/phantom/execution.rs index a0fe5c7b2b..155b5d5713 100644 --- a/crates/vm/src/system/phantom/execution.rs +++ b/crates/vm/src/system/phantom/execution.rs @@ -112,6 +112,7 @@ unsafe fn execute_e1_impl( execute_e12_impl(pre_compute, vm_state); } +#[create_tco_handler] #[inline(always)] unsafe fn execute_e2_impl( pre_compute: &[u8], @@ -207,4 +208,21 @@ where self.pre_compute_impl(inst, &mut e2_data.data); Ok(execute_e2_impl) } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + _pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let e2_data: &mut E2PreCompute> = data.borrow_mut(); + e2_data.chip_idx = chip_idx as u32; + self.pre_compute_impl(inst, &mut e2_data.data); + Ok(execute_e2_tco_handler) + } } diff --git a/crates/vm/src/system/public_values/execution.rs b/crates/vm/src/system/public_values/execution.rs index ed32e0936c..dcc25b3bd9 100644 --- a/crates/vm/src/system/public_values/execution.rs +++ b/crates/vm/src/system/public_values/execution.rs @@ -137,6 +137,24 @@ where dispatch!(execute_e2_impl, b_is_imm, c_is_imm) } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + _pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let data: &mut E2PreCompute = data.borrow_mut(); + data.chip_idx = chip_idx as u32; + let (b_is_imm, c_is_imm) = self.pre_compute_impl(inst, &mut data.data); + + dispatch!(execute_e2_tco_handler, b_is_imm, c_is_imm) + } } #[inline(always)] @@ -185,6 +203,7 @@ unsafe fn execute_e1_impl(pre_compute, state); } +#[create_tco_handler] #[inline(always)] unsafe fn execute_e2_impl( pre_compute: &[u8], From eee6856977ba4467e4286d2022b467f86781c676 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 18:12:57 -0700 Subject: [PATCH 15/36] feat: metered handler for algebra extension --- extensions/algebra/circuit/src/execution.rs | 27 +++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/extensions/algebra/circuit/src/execution.rs b/extensions/algebra/circuit/src/execution.rs index 031ae929ce..0243c5bd5f 100644 --- a/extensions/algebra/circuit/src/execution.rs +++ b/extensions/algebra/circuit/src/execution.rs @@ -335,6 +335,31 @@ impl( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let pre_compute: &mut E2PreCompute = data.borrow_mut(); + pre_compute.chip_idx = chip_idx as u32; + + let pre_compute_pure = &mut pre_compute.data; + let op = self.pre_compute_impl(pc, inst, pre_compute_pure)?; + + dispatch!( + execute_e2_tco_handler, + execute_e2_generic_tco_handler, + execute_e2_setup_tco_handler, + pre_compute_pure, + op + ) + } } unsafe fn execute_e12_impl< F: PrimeField32, @@ -480,6 +505,7 @@ unsafe fn execute_e1_setup_impl< execute_e12_setup_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_setup_impl< F: PrimeField32, CTX: MeteredExecutionCtxTrait, @@ -552,6 +578,7 @@ unsafe fn execute_e1_generic_impl< execute_e12_generic_impl::<_, _, BLOCKS, BLOCK_SIZE>(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_generic_impl< F: PrimeField32, CTX: MeteredExecutionCtxTrait, From c0d4d8329ff4ec5a6cf57c44644f664178559b60 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 18:41:11 -0700 Subject: [PATCH 16/36] refactor: use local dispatch! macros to reduce code duplication --- .../src/weierstrass_chip/add_ne/execution.rs | 20 ++++++++++ .../src/weierstrass_chip/double/execution.rs | 20 ++++++++++ extensions/keccak256/circuit/src/execution.rs | 18 +++++++++ .../native/circuit/src/branch_eq/execution.rs | 19 +++++++++ .../native/circuit/src/castf/execution.rs | 20 ++++++++++ .../circuit/src/field_arithmetic/execution.rs | 19 +++++++++ .../circuit/src/field_extension/execution.rs | 18 +++++++++ .../native/circuit/src/fri/execution.rs | 19 +++++++++ .../circuit/src/jal_rangecheck/execution.rs | 30 ++++++++++++++ .../native/circuit/src/loadstore/execution.rs | 26 ++++++++++++ .../native/circuit/src/poseidon2/execution.rs | 36 +++++++++++++++++ .../rv32im/circuit/src/auipc/execution.rs | 18 +++++++++ .../rv32im/circuit/src/base_alu/execution.rs | 36 +++++++++++++++++ .../rv32im/circuit/src/branch_eq/execution.rs | 23 +++++++++++ .../rv32im/circuit/src/branch_lt/execution.rs | 24 +++++++++++ .../rv32im/circuit/src/divrem/execution.rs | 18 +++++++++ .../rv32im/circuit/src/hintstore/execution.rs | 22 ++++++++++ .../rv32im/circuit/src/jal_lui/execution.rs | 24 +++++++++++ .../rv32im/circuit/src/jalr/execution.rs | 23 +++++++++++ .../rv32im/circuit/src/less_than/execution.rs | 24 +++++++++++ .../circuit/src/load_sign_extend/execution.rs | 24 +++++++++++ .../rv32im/circuit/src/loadstore/execution.rs | 40 +++++++++++++++++++ .../rv32im/circuit/src/mul/execution.rs | 18 +++++++++ .../rv32im/circuit/src/mulh/execution.rs | 23 +++++++++++ .../rv32im/circuit/src/shift/execution.rs | 17 ++++++++ .../circuit/src/sha256_chip/execution.rs | 18 +++++++++ 26 files changed, 597 insertions(+) diff --git a/extensions/ecc/circuit/src/weierstrass_chip/add_ne/execution.rs b/extensions/ecc/circuit/src/weierstrass_chip/add_ne/execution.rs index f6dc9ea12a..a983321e10 100644 --- a/extensions/ecc/circuit/src/weierstrass_chip/add_ne/execution.rs +++ b/extensions/ecc/circuit/src/weierstrass_chip/add_ne/execution.rs @@ -237,6 +237,25 @@ impl MeteredExecu let is_setup = self.pre_compute_impl(pc, inst, pre_compute_pure)?; dispatch!(execute_e2_impl, pre_compute_pure, is_setup) } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let pre_compute: &mut E2PreCompute = data.borrow_mut(); + pre_compute.chip_idx = chip_idx as u32; + + let pre_compute_pure = &mut pre_compute.data; + let is_setup = self.pre_compute_impl(pc, inst, pre_compute_pure)?; + dispatch!(execute_e2_tco_handler, pre_compute_pure, is_setup) + } } unsafe fn execute_e12_impl< @@ -312,6 +331,7 @@ unsafe fn execute_e1_impl< execute_e12_impl::<_, _, BLOCKS, BLOCK_SIZE, FIELD_TYPE, IS_SETUP>(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl< F: PrimeField32, CTX: MeteredExecutionCtxTrait, diff --git a/extensions/ecc/circuit/src/weierstrass_chip/double/execution.rs b/extensions/ecc/circuit/src/weierstrass_chip/double/execution.rs index 6459ae8395..b6d569442a 100644 --- a/extensions/ecc/circuit/src/weierstrass_chip/double/execution.rs +++ b/extensions/ecc/circuit/src/weierstrass_chip/double/execution.rs @@ -200,6 +200,25 @@ impl MeteredExecu dispatch!(execute_e2_impl, pre_compute_pure, is_setup) } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let pre_compute: &mut E2PreCompute = data.borrow_mut(); + pre_compute.chip_idx = chip_idx as u32; + let pre_compute_pure = &mut pre_compute.data; + let is_setup = self.pre_compute_impl(pc, inst, pre_compute_pure)?; + + dispatch!(execute_e2_tco_handler, pre_compute_pure, is_setup) + } } unsafe fn execute_e12_impl< @@ -288,6 +307,7 @@ unsafe fn execute_e1_impl< execute_e12_impl::<_, _, BLOCKS, BLOCK_SIZE, CURVE_TYPE, IS_SETUP>(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl< F: PrimeField32, CTX: MeteredExecutionCtxTrait, diff --git a/extensions/keccak256/circuit/src/execution.rs b/extensions/keccak256/circuit/src/execution.rs index 20772fd6de..28d96f0a2a 100644 --- a/extensions/keccak256/circuit/src/execution.rs +++ b/extensions/keccak256/circuit/src/execution.rs @@ -111,6 +111,23 @@ impl MeteredExecutor for KeccakVmExecutor { self.pre_compute_impl(pc, inst, &mut data.data)?; Ok(execute_e2_impl::<_, _>) } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let data: &mut E2PreCompute = data.borrow_mut(); + data.chip_idx = chip_idx as u32; + self.pre_compute_impl(pc, inst, &mut data.data)?; + Ok(execute_e2_tco_handler::<_, _>) + } } #[inline(always)] @@ -161,6 +178,7 @@ unsafe fn execute_e1_impl( execute_e12_impl::(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/native/circuit/src/branch_eq/execution.rs b/extensions/native/circuit/src/branch_eq/execution.rs index 18955608a7..b344e09d4e 100644 --- a/extensions/native/circuit/src/branch_eq/execution.rs +++ b/extensions/native/circuit/src/branch_eq/execution.rs @@ -156,6 +156,24 @@ where dispatch!(execute_e2_impl, a_is_imm, b_is_imm, is_bne) } + + #[cfg(feature = "tco")] + #[inline(always)] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> { + let pre_compute: &mut E2PreCompute = data.borrow_mut(); + pre_compute.chip_idx = chip_idx as u32; + + let (a_is_imm, b_is_imm, is_bne) = + self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; + + dispatch!(execute_e2_tco_handler, a_is_imm, b_is_imm, is_bne) + } } #[inline(always)] @@ -202,6 +220,7 @@ unsafe fn execute_e1_impl< execute_e12_impl::<_, _, A_IS_IMM, B_IS_IMM, IS_NE>(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl< F: PrimeField32, CTX: MeteredExecutionCtxTrait, diff --git a/extensions/native/circuit/src/castf/execution.rs b/extensions/native/circuit/src/castf/execution.rs index 09a4e4a283..d5b2446952 100644 --- a/extensions/native/circuit/src/castf/execution.rs +++ b/extensions/native/circuit/src/castf/execution.rs @@ -119,6 +119,25 @@ where Ok(fn_ptr) } + + #[cfg(feature = "tco")] + #[inline(always)] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> { + let pre_compute: &mut E2PreCompute = data.borrow_mut(); + pre_compute.chip_idx = chip_idx as u32; + + self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; + + let fn_ptr = execute_e2_tco_handler::<_, _>; + + Ok(fn_ptr) + } } #[create_tco_handler] @@ -130,6 +149,7 @@ unsafe fn execute_e1_impl( execute_e12_impl(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/native/circuit/src/field_arithmetic/execution.rs b/extensions/native/circuit/src/field_arithmetic/execution.rs index b2263ee55d..747d555e02 100644 --- a/extensions/native/circuit/src/field_arithmetic/execution.rs +++ b/extensions/native/circuit/src/field_arithmetic/execution.rs @@ -197,6 +197,24 @@ where dispatch!(execute_e2_impl, local_opcode, a_is_imm, b_is_imm) } + + #[cfg(feature = "tco")] + #[inline(always)] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> { + let pre_compute: &mut E2PreCompute = data.borrow_mut(); + pre_compute.chip_idx = chip_idx as u32; + + let (a_is_imm, b_is_imm, local_opcode) = + self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; + + dispatch!(execute_e2_tco_handler, local_opcode, a_is_imm, b_is_imm) + } } #[inline(always)] @@ -261,6 +279,7 @@ unsafe fn execute_e1_impl< execute_e12_impl::(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl< F: PrimeField32, CTX: MeteredExecutionCtxTrait, diff --git a/extensions/native/circuit/src/field_extension/execution.rs b/extensions/native/circuit/src/field_extension/execution.rs index d3e706ce07..0c566cc24a 100644 --- a/extensions/native/circuit/src/field_extension/execution.rs +++ b/extensions/native/circuit/src/field_extension/execution.rs @@ -134,6 +134,23 @@ where dispatch!(execute_e2_impl, opcode) } + + #[cfg(feature = "tco")] + #[inline(always)] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> { + let pre_compute: &mut E2PreCompute = data.borrow_mut(); + pre_compute.chip_idx = chip_idx as u32; + + let opcode = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; + + dispatch!(execute_e2_tco_handler, opcode) + } } #[inline(always)] @@ -167,6 +184,7 @@ unsafe fn execute_e1_impl(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/native/circuit/src/fri/execution.rs b/extensions/native/circuit/src/fri/execution.rs index 6fe090955c..4152b9c50c 100644 --- a/extensions/native/circuit/src/fri/execution.rs +++ b/extensions/native/circuit/src/fri/execution.rs @@ -130,6 +130,24 @@ where let fn_ptr = execute_e2_impl; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + #[inline(always)] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> { + let pre_compute: &mut E2PreCompute = data.borrow_mut(); + pre_compute.chip_idx = chip_idx as u32; + + self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; + + let fn_ptr = execute_e2_tco_handler; + Ok(fn_ptr) + } } #[create_tco_handler] @@ -141,6 +159,7 @@ unsafe fn execute_e1_impl( execute_e12_impl(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/native/circuit/src/jal_rangecheck/execution.rs b/extensions/native/circuit/src/jal_rangecheck/execution.rs index 60a5691795..2ca2b0cfba 100644 --- a/extensions/native/circuit/src/jal_rangecheck/execution.rs +++ b/extensions/native/circuit/src/jal_rangecheck/execution.rs @@ -171,6 +171,34 @@ where Ok(execute_range_check_e2_impl) } } + + #[cfg(feature = "tco")] + #[inline(always)] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> { + let &Instruction { opcode, .. } = inst; + + let is_jal = opcode == NativeJalOpcode::JAL.global_opcode(); + + if is_jal { + let pre_compute: &mut E2PreCompute> = data.borrow_mut(); + pre_compute.chip_idx = chip_idx as u32; + + self.pre_compute_jal_impl(pc, inst, &mut pre_compute.data)?; + Ok(execute_jal_e2_tco_handler) + } else { + let pre_compute: &mut E2PreCompute = data.borrow_mut(); + pre_compute.chip_idx = chip_idx as u32; + + self.pre_compute_range_check_impl(pc, inst, &mut pre_compute.data)?; + Ok(execute_range_check_e2_tco_handler) + } + } } #[inline(always)] @@ -221,6 +249,7 @@ unsafe fn execute_jal_e1_impl( execute_jal_e12_impl(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_jal_e2_impl( pre_compute: &[u8], vm_state: &mut VmExecState, @@ -241,6 +270,7 @@ unsafe fn execute_range_check_e1_impl( execute_range_check_e12_impl(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_range_check_e2_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/native/circuit/src/loadstore/execution.rs b/extensions/native/circuit/src/loadstore/execution.rs index f94104d454..941dbe8bcd 100644 --- a/extensions/native/circuit/src/loadstore/execution.rs +++ b/extensions/native/circuit/src/loadstore/execution.rs @@ -136,6 +136,29 @@ where Ok(fn_ptr) } + + #[cfg(feature = "tco")] + #[inline(always)] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> { + let pre_compute: &mut E2PreCompute> = data.borrow_mut(); + pre_compute.chip_idx = chip_idx as u32; + + let local_opcode = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; + + let fn_ptr = match local_opcode { + NativeLoadStoreOpcode::LOADW => execute_e2_loadw_tco_handler::, + NativeLoadStoreOpcode::STOREW => execute_e2_storew_tco_handler::, + NativeLoadStoreOpcode::HINT_STOREW => execute_e2_hint_storew_tco_handler::, + }; + + Ok(fn_ptr) + } } #[create_tco_handler] @@ -169,6 +192,7 @@ unsafe fn execute_e1_hint_storew< execute_e12_hint_storew::<_, _, NUM_CELLS>(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_loadw< F: PrimeField32, CTX: MeteredExecutionCtxTrait, @@ -184,6 +208,7 @@ unsafe fn execute_e2_loadw< execute_e12_loadw::<_, _, NUM_CELLS>(&pre_compute.data, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_storew< F: PrimeField32, CTX: MeteredExecutionCtxTrait, @@ -199,6 +224,7 @@ unsafe fn execute_e2_storew< execute_e12_storew::<_, _, NUM_CELLS>(&pre_compute.data, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_hint_storew< F: PrimeField32, CTX: MeteredExecutionCtxTrait, diff --git a/extensions/native/circuit/src/poseidon2/execution.rs b/extensions/native/circuit/src/poseidon2/execution.rs index 6ce150a036..bed8ac40cc 100644 --- a/extensions/native/circuit/src/poseidon2/execution.rs +++ b/extensions/native/circuit/src/poseidon2/execution.rs @@ -245,6 +245,40 @@ impl MeteredExecutor Ok(execute_verify_batch_e2_impl::<_, _, SBOX_REGISTERS>) } } + + #[cfg(feature = "tco")] + #[inline(always)] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> { + let &Instruction { opcode, .. } = inst; + + let is_pos2 = opcode == PERM_POS2.global_opcode() || opcode == COMP_POS2.global_opcode(); + + if is_pos2 { + let pre_compute: &mut E2PreCompute> = + data.borrow_mut(); + pre_compute.chip_idx = chip_idx as u32; + + self.pre_compute_pos2_impl(pc, inst, &mut pre_compute.data)?; + if opcode == PERM_POS2.global_opcode() { + Ok(execute_pos2_e2_tco_handler::<_, _, SBOX_REGISTERS, true>) + } else { + Ok(execute_pos2_e2_tco_handler::<_, _, SBOX_REGISTERS, false>) + } + } else { + let pre_compute: &mut E2PreCompute> = + data.borrow_mut(); + pre_compute.chip_idx = chip_idx as u32; + + self.pre_compute_verify_batch_impl(pc, inst, &mut pre_compute.data)?; + Ok(execute_verify_batch_e2_tco_handler::<_, _, SBOX_REGISTERS>) + } + } } #[create_tco_handler] @@ -261,6 +295,7 @@ unsafe fn execute_pos2_e1_impl< execute_pos2_e12_impl::<_, _, SBOX_REGISTERS, IS_PERM>(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_pos2_e2_impl< F: PrimeField32, CTX: MeteredExecutionCtxTrait, @@ -292,6 +327,7 @@ unsafe fn execute_verify_batch_e1_impl< execute_verify_batch_e12_impl::<_, _, SBOX_REGISTERS, true>(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_verify_batch_e2_impl< F: PrimeField32, CTX: MeteredExecutionCtxTrait, diff --git a/extensions/rv32im/circuit/src/auipc/execution.rs b/extensions/rv32im/circuit/src/auipc/execution.rs index c342f7f2bc..454172a06f 100644 --- a/extensions/rv32im/circuit/src/auipc/execution.rs +++ b/extensions/rv32im/circuit/src/auipc/execution.rs @@ -100,6 +100,23 @@ where self.pre_compute_impl(pc, inst, &mut data.data)?; Ok(execute_e2_impl) } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let data: &mut E2PreCompute = data.borrow_mut(); + data.chip_idx = chip_idx as u32; + self.pre_compute_impl(pc, inst, &mut data.data)?; + Ok(execute_e2_tco_handler) + } } #[inline(always)] @@ -123,6 +140,7 @@ unsafe fn execute_e1_impl( execute_e12_impl(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/rv32im/circuit/src/base_alu/execution.rs b/extensions/rv32im/circuit/src/base_alu/execution.rs index 3ac1e92cd8..f737fc52f3 100644 --- a/extensions/rv32im/circuit/src/base_alu/execution.rs +++ b/extensions/rv32im/circuit/src/base_alu/execution.rs @@ -173,6 +173,41 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + #[inline(always)] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let data: &mut E2PreCompute = data.borrow_mut(); + data.chip_idx = chip_idx as u32; + let is_imm = self.pre_compute_impl(pc, inst, &mut data.data)?; + let opcode = inst.opcode; + + let fn_ptr = match ( + is_imm, + BaseAluOpcode::from_usize(opcode.local_opcode_idx(self.offset)), + ) { + (true, BaseAluOpcode::ADD) => execute_e2_tco_handler::<_, _, true, AddOp>, + (false, BaseAluOpcode::ADD) => execute_e2_tco_handler::<_, _, false, AddOp>, + (true, BaseAluOpcode::SUB) => execute_e2_tco_handler::<_, _, true, SubOp>, + (false, BaseAluOpcode::SUB) => execute_e2_tco_handler::<_, _, false, SubOp>, + (true, BaseAluOpcode::XOR) => execute_e2_tco_handler::<_, _, true, XorOp>, + (false, BaseAluOpcode::XOR) => execute_e2_tco_handler::<_, _, false, XorOp>, + (true, BaseAluOpcode::OR) => execute_e2_tco_handler::<_, _, true, OrOp>, + (false, BaseAluOpcode::OR) => execute_e2_tco_handler::<_, _, false, OrOp>, + (true, BaseAluOpcode::AND) => execute_e2_tco_handler::<_, _, true, AndOp>, + (false, BaseAluOpcode::AND) => execute_e2_tco_handler::<_, _, false, AndOp>, + }; + Ok(fn_ptr) + } } #[inline(always)] @@ -215,6 +250,7 @@ unsafe fn execute_e1_impl< execute_e12_impl::(pre_compute, vm_state); } +#[create_tco_handler] #[inline(always)] unsafe fn execute_e2_impl< F: PrimeField32, diff --git a/extensions/rv32im/circuit/src/branch_eq/execution.rs b/extensions/rv32im/circuit/src/branch_eq/execution.rs index 620759731a..3aa9be7ff4 100644 --- a/extensions/rv32im/circuit/src/branch_eq/execution.rs +++ b/extensions/rv32im/circuit/src/branch_eq/execution.rs @@ -128,6 +128,28 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let data: &mut E2PreCompute = data.borrow_mut(); + data.chip_idx = chip_idx as u32; + let is_bne = self.pre_compute_impl(pc, inst, &mut data.data)?; + let fn_ptr = if is_bne { + execute_e2_tco_handler::<_, _, true> + } else { + execute_e2_tco_handler::<_, _, false> + }; + Ok(fn_ptr) + } } #[inline(always)] @@ -154,6 +176,7 @@ unsafe fn execute_e1_impl(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/rv32im/circuit/src/branch_lt/execution.rs b/extensions/rv32im/circuit/src/branch_lt/execution.rs index a25b24b147..e85c913674 100644 --- a/extensions/rv32im/circuit/src/branch_lt/execution.rs +++ b/extensions/rv32im/circuit/src/branch_lt/execution.rs @@ -133,6 +133,29 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let data: &mut E2PreCompute = data.borrow_mut(); + data.chip_idx = chip_idx as u32; + let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?; + let fn_ptr = match local_opcode { + BranchLessThanOpcode::BLT => execute_e2_tco_handler::<_, _, BltOp>, + BranchLessThanOpcode::BLTU => execute_e2_tco_handler::<_, _, BltuOp>, + BranchLessThanOpcode::BGE => execute_e2_tco_handler::<_, _, BgeOp>, + BranchLessThanOpcode::BGEU => execute_e2_tco_handler::<_, _, BgeuOp>, + }; + Ok(fn_ptr) + } } #[inline(always)] @@ -160,6 +183,7 @@ unsafe fn execute_e1_impl(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/rv32im/circuit/src/divrem/execution.rs b/extensions/rv32im/circuit/src/divrem/execution.rs index 846431b3ad..68280f6c2b 100644 --- a/extensions/rv32im/circuit/src/divrem/execution.rs +++ b/extensions/rv32im/circuit/src/divrem/execution.rs @@ -122,6 +122,23 @@ where let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?; dispatch!(execute_e2_impl, local_opcode) } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let data: &mut E2PreCompute = data.borrow_mut(); + data.chip_idx = chip_idx as u32; + let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?; + dispatch!(execute_e2_tco_handler, local_opcode) + } } unsafe fn execute_e12_impl( @@ -145,6 +162,7 @@ unsafe fn execute_e1_impl execute_e12_impl::(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/rv32im/circuit/src/hintstore/execution.rs b/extensions/rv32im/circuit/src/hintstore/execution.rs index 03e1fdb3ad..fa21859411 100644 --- a/extensions/rv32im/circuit/src/hintstore/execution.rs +++ b/extensions/rv32im/circuit/src/hintstore/execution.rs @@ -131,6 +131,27 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let pre_compute: &mut E2PreCompute = data.borrow_mut(); + pre_compute.chip_idx = chip_idx as u32; + let local_opcode = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; + let fn_ptr = match local_opcode { + HINT_STOREW => execute_e2_tco_handler::<_, _, true>, + HINT_BUFFER => execute_e2_tco_handler::<_, _, false>, + }; + Ok(fn_ptr) + } } /// Return the number of used rows. @@ -185,6 +206,7 @@ unsafe fn execute_e1_impl(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl< F: PrimeField32, CTX: MeteredExecutionCtxTrait, diff --git a/extensions/rv32im/circuit/src/jal_lui/execution.rs b/extensions/rv32im/circuit/src/jal_lui/execution.rs index 29859445f2..ef6b4d67b8 100644 --- a/extensions/rv32im/circuit/src/jal_lui/execution.rs +++ b/extensions/rv32im/circuit/src/jal_lui/execution.rs @@ -120,6 +120,29 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + _pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let data: &mut E2PreCompute = data.borrow_mut(); + data.chip_idx = chip_idx as u32; + let (is_jal, enabled) = self.pre_compute_impl(inst, &mut data.data)?; + let fn_ptr = match (is_jal, enabled) { + (true, true) => execute_e2_tco_handler::<_, _, true, true>, + (true, false) => execute_e2_tco_handler::<_, _, true, false>, + (false, true) => execute_e2_tco_handler::<_, _, false, true>, + (false, false) => execute_e2_tco_handler::<_, _, false, false>, + }; + Ok(fn_ptr) + } } unsafe fn execute_e12_impl< @@ -167,6 +190,7 @@ unsafe fn execute_e1_impl< execute_e12_impl::(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl< F: PrimeField32, CTX: MeteredExecutionCtxTrait, diff --git a/extensions/rv32im/circuit/src/jalr/execution.rs b/extensions/rv32im/circuit/src/jalr/execution.rs index 8b1825e955..1942bbf828 100644 --- a/extensions/rv32im/circuit/src/jalr/execution.rs +++ b/extensions/rv32im/circuit/src/jalr/execution.rs @@ -118,6 +118,28 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let data: &mut E2PreCompute = data.borrow_mut(); + data.chip_idx = chip_idx as u32; + let enabled = self.pre_compute_impl(pc, inst, &mut data.data)?; + let fn_ptr = if enabled { + execute_e2_tco_handler::<_, _, true> + } else { + execute_e2_tco_handler::<_, _, false> + }; + Ok(fn_ptr) + } } #[inline(always)] @@ -149,6 +171,7 @@ unsafe fn execute_e1_impl(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/rv32im/circuit/src/less_than/execution.rs b/extensions/rv32im/circuit/src/less_than/execution.rs index 4674f54e78..c1d710c036 100644 --- a/extensions/rv32im/circuit/src/less_than/execution.rs +++ b/extensions/rv32im/circuit/src/less_than/execution.rs @@ -145,6 +145,29 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let pre_compute: &mut E2PreCompute = data.borrow_mut(); + pre_compute.chip_idx = chip_idx as u32; + let (is_imm, is_sltu) = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; + let fn_ptr = match (is_imm, is_sltu) { + (true, true) => execute_e2_tco_handler::<_, _, true, true>, + (true, false) => execute_e2_tco_handler::<_, _, true, false>, + (false, true) => execute_e2_tco_handler::<_, _, false, true>, + (false, false) => execute_e2_tco_handler::<_, _, false, false>, + }; + Ok(fn_ptr) + } } unsafe fn execute_e12_impl< @@ -189,6 +212,7 @@ unsafe fn execute_e1_impl< execute_e12_impl::(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl< F: PrimeField32, CTX: MeteredExecutionCtxTrait, diff --git a/extensions/rv32im/circuit/src/load_sign_extend/execution.rs b/extensions/rv32im/circuit/src/load_sign_extend/execution.rs index 5521698a6a..9f7286f62d 100644 --- a/extensions/rv32im/circuit/src/load_sign_extend/execution.rs +++ b/extensions/rv32im/circuit/src/load_sign_extend/execution.rs @@ -156,6 +156,29 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let pre_compute: &mut E2PreCompute = data.borrow_mut(); + pre_compute.chip_idx = chip_idx as u32; + let (is_loadb, enabled) = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; + let fn_ptr = match (is_loadb, enabled) { + (true, true) => execute_e2_tco_handler::<_, _, true, true>, + (true, false) => execute_e2_tco_handler::<_, _, true, false>, + (false, true) => execute_e2_tco_handler::<_, _, false, true>, + (false, false) => execute_e2_tco_handler::<_, _, false, false>, + }; + Ok(fn_ptr) + } } #[inline(always)] @@ -217,6 +240,7 @@ unsafe fn execute_e1_impl< execute_e12_impl::(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl< F: PrimeField32, CTX: MeteredExecutionCtxTrait, diff --git a/extensions/rv32im/circuit/src/loadstore/execution.rs b/extensions/rv32im/circuit/src/loadstore/execution.rs index 5faec615a1..ce20e2b967 100644 --- a/extensions/rv32im/circuit/src/loadstore/execution.rs +++ b/extensions/rv32im/circuit/src/loadstore/execution.rs @@ -209,6 +209,45 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let pre_compute: &mut E2PreCompute = data.borrow_mut(); + pre_compute.chip_idx = chip_idx as u32; + let (local_opcode, enabled, is_native_store) = + self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; + let fn_ptr = match (local_opcode, enabled, is_native_store) { + (LOADW, true, _) => execute_e2_tco_handler::<_, _, U8, LoadWOp, true>, + (LOADW, false, _) => execute_e2_tco_handler::<_, _, U8, LoadWOp, false>, + (LOADHU, true, _) => execute_e2_tco_handler::<_, _, U8, LoadHUOp, true>, + (LOADHU, false, _) => execute_e2_tco_handler::<_, _, U8, LoadHUOp, false>, + (LOADBU, true, _) => execute_e2_tco_handler::<_, _, U8, LoadBUOp, true>, + (LOADBU, false, _) => execute_e2_tco_handler::<_, _, U8, LoadBUOp, false>, + (STOREW, true, false) => execute_e2_tco_handler::<_, _, U8, StoreWOp, true>, + (STOREW, false, false) => execute_e2_tco_handler::<_, _, U8, StoreWOp, false>, + (STOREW, true, true) => execute_e2_tco_handler::<_, _, F, StoreWOp, true>, + (STOREW, false, true) => execute_e2_tco_handler::<_, _, F, StoreWOp, false>, + (STOREH, true, false) => execute_e2_tco_handler::<_, _, U8, StoreHOp, true>, + (STOREH, false, false) => execute_e2_tco_handler::<_, _, U8, StoreHOp, false>, + (STOREH, true, true) => execute_e2_tco_handler::<_, _, F, StoreHOp, true>, + (STOREH, false, true) => execute_e2_tco_handler::<_, _, F, StoreHOp, false>, + (STOREB, true, false) => execute_e2_tco_handler::<_, _, U8, StoreBOp, true>, + (STOREB, false, false) => execute_e2_tco_handler::<_, _, U8, StoreBOp, false>, + (STOREB, true, true) => execute_e2_tco_handler::<_, _, F, StoreBOp, true>, + (STOREB, false, true) => execute_e2_tco_handler::<_, _, F, StoreBOp, false>, + (_, _, _) => unreachable!(), + }; + Ok(fn_ptr) + } } #[inline(always)] @@ -279,6 +318,7 @@ unsafe fn execute_e1_impl< execute_e12_impl::(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl< F: PrimeField32, CTX: MeteredExecutionCtxTrait, diff --git a/extensions/rv32im/circuit/src/mul/execution.rs b/extensions/rv32im/circuit/src/mul/execution.rs index bff5384b62..e254434780 100644 --- a/extensions/rv32im/circuit/src/mul/execution.rs +++ b/extensions/rv32im/circuit/src/mul/execution.rs @@ -110,6 +110,23 @@ where self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; Ok(execute_e2_impl) } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let pre_compute: &mut E2PreCompute = data.borrow_mut(); + pre_compute.chip_idx = chip_idx as u32; + self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; + Ok(execute_e2_tco_handler) + } } #[inline(always)] @@ -139,6 +156,7 @@ unsafe fn execute_e1_impl( execute_e12_impl(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/rv32im/circuit/src/mulh/execution.rs b/extensions/rv32im/circuit/src/mulh/execution.rs index d79815745e..0310b6516c 100644 --- a/extensions/rv32im/circuit/src/mulh/execution.rs +++ b/extensions/rv32im/circuit/src/mulh/execution.rs @@ -119,6 +119,28 @@ where }; Ok(fn_ptr) } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + _pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let pre_compute: &mut E2PreCompute = data.borrow_mut(); + pre_compute.chip_idx = chip_idx as u32; + let local_opcode = self.pre_compute_impl(inst, &mut pre_compute.data)?; + let fn_ptr = match local_opcode { + MulHOpcode::MULH => execute_e2_tco_handler::<_, _, MulHOp>, + MulHOpcode::MULHSU => execute_e2_tco_handler::<_, _, MulHSuOp>, + MulHOpcode::MULHU => execute_e2_tco_handler::<_, _, MulHUOp>, + }; + Ok(fn_ptr) + } } #[inline(always)] @@ -146,6 +168,7 @@ unsafe fn execute_e1_impl(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/rv32im/circuit/src/shift/execution.rs b/extensions/rv32im/circuit/src/shift/execution.rs index ec627998bd..2ef0d7ff35 100644 --- a/extensions/rv32im/circuit/src/shift/execution.rs +++ b/extensions/rv32im/circuit/src/shift/execution.rs @@ -134,6 +134,22 @@ where // `d` is always expected to be RV32_REGISTER_AS. dispatch!(execute_e2_impl, is_imm, shift_opcode) } + + #[cfg(feature = "tco")] + #[inline(always)] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> { + let data: &mut E2PreCompute = data.borrow_mut(); + data.chip_idx = chip_idx as u32; + let (is_imm, shift_opcode) = self.pre_compute_impl(pc, inst, &mut data.data)?; + // `d` is always expected to be RV32_REGISTER_AS. + dispatch!(execute_e2_tco_handler, is_imm, shift_opcode) + } } unsafe fn execute_e12_impl< @@ -176,6 +192,7 @@ unsafe fn execute_e1_impl< execute_e12_impl::(pre_compute, state); } +#[create_tco_handler] unsafe fn execute_e2_impl< F: PrimeField32, CTX: MeteredExecutionCtxTrait, diff --git a/extensions/sha256/circuit/src/sha256_chip/execution.rs b/extensions/sha256/circuit/src/sha256_chip/execution.rs index 33972ec1b1..33b40a59c3 100644 --- a/extensions/sha256/circuit/src/sha256_chip/execution.rs +++ b/extensions/sha256/circuit/src/sha256_chip/execution.rs @@ -76,6 +76,23 @@ impl MeteredExecutor for Sha256VmExecutor { self.pre_compute_impl(pc, inst, &mut data.data)?; Ok(execute_e2_impl::<_, _>) } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let data: &mut E2PreCompute = data.borrow_mut(); + data.chip_idx = chip_idx as u32; + self.pre_compute_impl(pc, inst, &mut data.data)?; + Ok(execute_e2_tco_handler::<_, _>) + } } unsafe fn execute_e12_impl( @@ -128,6 +145,7 @@ unsafe fn execute_e1_impl( let pre_compute: &ShaPreCompute = pre_compute.borrow(); execute_e12_impl::(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl( pre_compute: &[u8], vm_state: &mut VmExecState, From bae6afe1fb19a89d58562c4f4721ad7bff639d23 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 18:59:02 -0700 Subject: [PATCH 17/36] feat: run! macro for tco on pure+metered execution --- crates/vm/src/arch/interpreter.rs | 101 +++++++++++++---------------- crates/vm/src/utils/stark_utils.rs | 2 - 2 files changed, 45 insertions(+), 58 deletions(-) diff --git a/crates/vm/src/arch/interpreter.rs b/crates/vm/src/arch/interpreter.rs index 76f3321004..bc0b8fe27c 100644 --- a/crates/vm/src/arch/interpreter.rs +++ b/crates/vm/src/arch/interpreter.rs @@ -58,6 +58,7 @@ pub struct InterpretedInstance<'a, F, Ctx> { init_memory: SparseMemoryImage, } +#[cfg_attr(feature = "tco", allow(dead_code))] struct PreComputeInstruction<'a, F, Ctx> { pub handler: ExecuteFunc, pub pre_compute: &'a [u8], @@ -69,16 +70,51 @@ struct TerminatePreCompute { exit_code: u32, } -macro_rules! execute_with_metrics { - ($span:literal, $pc_base:expr, $exec_state:expr, $pre_compute_insts:expr) => {{ +macro_rules! run { + ($span:literal, $interpreter:ident, $exec_state:ident, $ctx:ident) => {{ #[cfg(feature = "metrics")] let start = std::time::Instant::now(); #[cfg(feature = "metrics")] let start_instret = $exec_state.instret; - info_span!($span).in_scope(|| unsafe { - execute_trampoline($pc_base, $exec_state, $pre_compute_insts); - }); + info_span!($span).in_scope(|| -> Result<(), ExecutionError> { + #[cfg(not(feature = "tco"))] + unsafe { + tracing::debug!("execute_trampoline"); + execute_trampoline( + $interpreter.pc_base, + &mut $exec_state, + &$interpreter.pre_compute_insns, + ); + } + #[cfg(feature = "tco")] + { + tracing::debug!("execute_tco"); + let handler = $interpreter.get_handler($exec_state.pc).ok_or( + ExecutionError::PcOutOfBounds { + pc: $exec_state.pc, + pc_base: $interpreter.pc_base, + program_len: $interpreter.handlers.len(), + }, + )?; + // SAFETY: + // - handler is generated by Executor, MeteredExecutor traits + // - it is the responsibility of each Executor to ensure handler is safe given a + // valid VM state + unsafe { + handler($interpreter, &mut $exec_state); + } + + if $exec_state + .exit_code + .as_ref() + .is_ok_and(|exit_code| exit_code.is_some()) + { + $ctx::on_terminate(&mut $exec_state); + } + } + Ok(()) + })?; #[cfg(feature = "metrics")] { @@ -201,7 +237,7 @@ where #[inline(always)] pub fn get_handler(&self, pc: u32) -> Option> { let pc_idx = get_pc_index(self.pc_base, pc); - self.handlers.get(pc_idx).map(|x| *x) + self.handlers.get(pc_idx).copied() } } @@ -314,44 +350,7 @@ where ) -> Result, ExecutionError> { let ctx = ExecutionCtx::new(num_insns); let mut exec_state = VmExecState::new(from_state, ctx); - - #[cfg(feature = "metrics")] - let start = std::time::Instant::now(); - #[cfg(feature = "metrics")] - let start_instret = exec_state.instret; - - #[cfg(not(feature = "tco"))] - unsafe { - execute_trampoline(self.pc_base, &mut exec_state, &self.pre_compute_insns); - } - #[cfg(feature = "tco")] - unsafe { - let handler = self - .get_handler(exec_state.pc) - .ok_or(ExecutionError::PcOutOfBounds { - pc: exec_state.pc, - pc_base: self.pc_base, - program_len: self.handlers.len(), - })?; - handler(self, &mut exec_state); - - if exec_state - .exit_code - .as_ref() - .is_ok_and(|exit_code| exit_code.is_some()) - { - ExecutionCtx::on_terminate(&mut exec_state); - } - } - - #[cfg(feature = "metrics")] - { - let elapsed = start.elapsed(); - let insns = exec_state.instret - start_instret; - tracing::info!("instructions_executed={insns}"); - metrics::counter!("execute_e1_insns").absolute(insns); - metrics::gauge!("execute_e1_insn_mi/s").set(insns as f64 / elapsed.as_micros() as f64); - } + run!("execute_e1", self, exec_state, ExecutionCtx); if num_insns.is_some() { check_exit_code(exec_state.exit_code)?; } else { @@ -398,12 +397,7 @@ where ) -> Result<(Vec, VmState), ExecutionError> { let mut exec_state = VmExecState::new(from_state, ctx); // Start execution - execute_with_metrics!( - "execute_metered", - self.pc_base, - &mut exec_state, - &self.pre_compute_insns - ); + run!("execute_metered", self, exec_state, MeteredCtx); check_termination(exec_state.exit_code)?; let VmExecState { vm_state, ctx, .. } = exec_state; Ok((ctx.into_segments(), vm_state)) @@ -443,12 +437,7 @@ where ) -> Result { let mut exec_state = VmExecState::new(from_state, ctx); // Start execution - execute_with_metrics!( - "execute_metered_cost", - self.pc_base, - &mut exec_state, - &self.pre_compute_insns - ); + run!("execute_metered_cost", self, exec_state, MeteredCostCtx); check_exit_code(exec_state.exit_code)?; let VmExecState { ctx, vm_state, .. } = exec_state; let output = MeteredCostExecutionOutput::new(vm_state.instret, ctx.cost); diff --git a/crates/vm/src/utils/stark_utils.rs b/crates/vm/src/utils/stark_utils.rs index fdcdda2df5..de1a834d30 100644 --- a/crates/vm/src/utils/stark_utils.rs +++ b/crates/vm/src/utils/stark_utils.rs @@ -114,8 +114,6 @@ where let exe = exe.into(); let input = input.into(); let metered_ctx = vm.build_metered_ctx(); - // TEMP: for testing - vm.interpreter(&exe)?.execute(input.clone(), None)?; let (segments, _) = vm .metered_interpreter(&exe)? .execute_metered(input.clone(), metered_ctx)?; From 6e2b0a28f9aa720021f9cd1adfa55970f77afbd7 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 19:00:31 -0700 Subject: [PATCH 18/36] chore: fmt --- extensions/native/circuit/src/loadstore/execution.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/extensions/native/circuit/src/loadstore/execution.rs b/extensions/native/circuit/src/loadstore/execution.rs index 941dbe8bcd..c9faac15a3 100644 --- a/extensions/native/circuit/src/loadstore/execution.rs +++ b/extensions/native/circuit/src/loadstore/execution.rs @@ -154,7 +154,9 @@ where let fn_ptr = match local_opcode { NativeLoadStoreOpcode::LOADW => execute_e2_loadw_tco_handler::, NativeLoadStoreOpcode::STOREW => execute_e2_storew_tco_handler::, - NativeLoadStoreOpcode::HINT_STOREW => execute_e2_hint_storew_tco_handler::, + NativeLoadStoreOpcode::HINT_STOREW => { + execute_e2_hint_storew_tco_handler:: + } }; Ok(fn_ptr) From 714c97a512d64f3babf5edfd54cde5dd31ab4d32 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 19:06:21 -0700 Subject: [PATCH 19/36] fix: missing handler for is_eq --- .../algebra/circuit/src/modular_chip/is_eq.rs | 50 +++++++++++-------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/extensions/algebra/circuit/src/modular_chip/is_eq.rs b/extensions/algebra/circuit/src/modular_chip/is_eq.rs index 32fe3c4f94..3a74aba763 100644 --- a/extensions/algebra/circuit/src/modular_chip/is_eq.rs +++ b/extensions/algebra/circuit/src/modular_chip/is_eq.rs @@ -523,6 +523,16 @@ impl { + Ok(if $is_setup { + $execute_impl::<_, _, NUM_LANES, LANE_SIZE, TOTAL_READ_SIZE, true> + } else { + $execute_impl::<_, _, NUM_LANES, LANE_SIZE, TOTAL_READ_SIZE, false> + }) + }; +} + impl Executor for VmModularIsEqualExecutor where @@ -540,15 +550,9 @@ where data: &mut [u8], ) -> Result, StaticProgramError> { let pre_compute: &mut ModularIsEqualPreCompute = data.borrow_mut(); - let is_setup = self.pre_compute_impl(pc, inst, pre_compute)?; - let fn_ptr = if is_setup { - execute_e1_impl::<_, _, NUM_LANES, LANE_SIZE, TOTAL_READ_SIZE, true> - } else { - execute_e1_impl::<_, _, NUM_LANES, LANE_SIZE, TOTAL_READ_SIZE, false> - }; - Ok(fn_ptr) + dispatch!(execute_e1_impl, is_setup) } #[cfg(feature = "tco")] @@ -562,15 +566,9 @@ where Ctx: ExecutionCtxTrait, { let pre_compute: &mut ModularIsEqualPreCompute = data.borrow_mut(); - let is_setup = self.pre_compute_impl(pc, inst, pre_compute)?; - let fn_ptr = if is_setup { - execute_e1_tco_handler::<_, _, NUM_LANES, LANE_SIZE, TOTAL_READ_SIZE, true> - } else { - execute_e1_tco_handler::<_, _, NUM_LANES, LANE_SIZE, TOTAL_READ_SIZE, false> - }; - Ok(fn_ptr) + dispatch!(execute_e1_tco_handler, is_setup) } } @@ -596,13 +594,24 @@ where pre_compute.chip_idx = chip_idx as u32; let is_setup = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; - let fn_ptr = if is_setup { - execute_e2_impl::<_, _, NUM_LANES, LANE_SIZE, TOTAL_READ_SIZE, true> - } else { - execute_e2_impl::<_, _, NUM_LANES, LANE_SIZE, TOTAL_READ_SIZE, false> - }; - Ok(fn_ptr) + dispatch!(execute_e2_impl, is_setup) + } + + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> { + let pre_compute: &mut E2PreCompute> = + data.borrow_mut(); + pre_compute.chip_idx = chip_idx as u32; + + let is_setup = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; + + dispatch!(execute_e2_tco_handler, is_setup) } } @@ -626,6 +635,7 @@ unsafe fn execute_e1_impl< ); } +#[create_tco_handler] unsafe fn execute_e2_impl< F: PrimeField32, CTX: MeteredExecutionCtxTrait, From 8d3d06e2ba8bc06a37201662bf860a90579dae10 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 19:33:09 -0700 Subject: [PATCH 20/36] feat: bigint metered handler --- extensions/bigint/circuit/src/base_alu.rs | 61 +++++++----- extensions/bigint/circuit/src/branch_eq.rs | 45 ++++++--- extensions/bigint/circuit/src/branch_lt.rs | 53 ++++++---- extensions/bigint/circuit/src/less_than.rs | 45 ++++++--- extensions/bigint/circuit/src/mult.rs | 18 ++++ extensions/bigint/circuit/src/shift.rs | 49 ++++++---- .../rv32im/circuit/src/base_alu/execution.rs | 97 +++++-------------- .../rv32im/circuit/src/shift/execution.rs | 3 - 8 files changed, 204 insertions(+), 167 deletions(-) diff --git a/extensions/bigint/circuit/src/base_alu.rs b/extensions/bigint/circuit/src/base_alu.rs index 3379ec0df6..866c624e5d 100644 --- a/extensions/bigint/circuit/src/base_alu.rs +++ b/extensions/bigint/circuit/src/base_alu.rs @@ -34,6 +34,18 @@ struct BaseAluPreCompute { c: u8, } +macro_rules! dispatch { + ($execute_impl:ident, $local_opcode:ident) => { + Ok(match $local_opcode { + BaseAluOpcode::ADD => $execute_impl::<_, _, AddOp>, + BaseAluOpcode::SUB => $execute_impl::<_, _, SubOp>, + BaseAluOpcode::XOR => $execute_impl::<_, _, XorOp>, + BaseAluOpcode::OR => $execute_impl::<_, _, OrOp>, + BaseAluOpcode::AND => $execute_impl::<_, _, AndOp>, + }) + }; +} + impl Executor for Rv32BaseAlu256Executor { fn pre_compute_size(&self) -> usize { size_of::() @@ -50,14 +62,8 @@ impl Executor for Rv32BaseAlu256Executor { { let data: &mut BaseAluPreCompute = data.borrow_mut(); let local_opcode = self.pre_compute_impl(pc, inst, data)?; - let fn_ptr = match local_opcode { - BaseAluOpcode::ADD => execute_e1_impl::<_, _, AddOp>, - BaseAluOpcode::SUB => execute_e1_impl::<_, _, SubOp>, - BaseAluOpcode::XOR => execute_e1_impl::<_, _, XorOp>, - BaseAluOpcode::OR => execute_e1_impl::<_, _, OrOp>, - BaseAluOpcode::AND => execute_e1_impl::<_, _, AndOp>, - }; - Ok(fn_ptr) + + dispatch!(execute_e1_impl, local_opcode) } #[cfg(feature = "tco")] @@ -72,14 +78,8 @@ impl Executor for Rv32BaseAlu256Executor { { let data: &mut BaseAluPreCompute = data.borrow_mut(); let local_opcode = self.pre_compute_impl(pc, inst, data)?; - let fn_ptr = match local_opcode { - BaseAluOpcode::ADD => execute_e1_tco_handler::<_, _, AddOp>, - BaseAluOpcode::SUB => execute_e1_tco_handler::<_, _, SubOp>, - BaseAluOpcode::XOR => execute_e1_tco_handler::<_, _, XorOp>, - BaseAluOpcode::OR => execute_e1_tco_handler::<_, _, OrOp>, - BaseAluOpcode::AND => execute_e1_tco_handler::<_, _, AndOp>, - }; - Ok(fn_ptr) + + dispatch!(execute_e1_tco_handler, local_opcode) } } @@ -101,14 +101,26 @@ impl MeteredExecutor for Rv32BaseAlu256Executor { let data: &mut E2PreCompute = data.borrow_mut(); data.chip_idx = chip_idx as u32; let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?; - let fn_ptr = match local_opcode { - BaseAluOpcode::ADD => execute_e2_impl::<_, _, AddOp>, - BaseAluOpcode::SUB => execute_e2_impl::<_, _, SubOp>, - BaseAluOpcode::XOR => execute_e2_impl::<_, _, XorOp>, - BaseAluOpcode::OR => execute_e2_impl::<_, _, OrOp>, - BaseAluOpcode::AND => execute_e2_impl::<_, _, AndOp>, - }; - Ok(fn_ptr) + + dispatch!(execute_e2_impl, local_opcode) + } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let data: &mut E2PreCompute = data.borrow_mut(); + data.chip_idx = chip_idx as u32; + let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?; + + dispatch!(execute_e2_tco_handler, local_opcode) } } @@ -137,6 +149,7 @@ unsafe fn execute_e1_impl( execute_e12_impl::(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/bigint/circuit/src/branch_eq.rs b/extensions/bigint/circuit/src/branch_eq.rs index 2fdd5472b2..2c4044b2f8 100644 --- a/extensions/bigint/circuit/src/branch_eq.rs +++ b/extensions/bigint/circuit/src/branch_eq.rs @@ -32,6 +32,15 @@ struct BranchEqPreCompute { b: u8, } +macro_rules! dispatch { + ($execute_impl:ident, $local_opcode:ident) => { + match $local_opcode { + BranchEqualOpcode::BEQ => Ok($execute_impl::<_, _, false>), + BranchEqualOpcode::BNE => Ok($execute_impl::<_, _, true>), + } + }; +} + impl Executor for Rv32BranchEqual256Executor { fn pre_compute_size(&self) -> usize { size_of::() @@ -48,11 +57,7 @@ impl Executor for Rv32BranchEqual256Executor { { let data: &mut BranchEqPreCompute = data.borrow_mut(); let local_opcode = self.pre_compute_impl(pc, inst, data)?; - let fn_ptr = match local_opcode { - BranchEqualOpcode::BEQ => execute_e1_impl::<_, _, false>, - BranchEqualOpcode::BNE => execute_e1_impl::<_, _, true>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_impl, local_opcode) } #[cfg(feature = "tco")] @@ -67,11 +72,7 @@ impl Executor for Rv32BranchEqual256Executor { { let data: &mut BranchEqPreCompute = data.borrow_mut(); let local_opcode = self.pre_compute_impl(pc, inst, data)?; - let fn_ptr = match local_opcode { - BranchEqualOpcode::BEQ => execute_e1_tco_handler::<_, _, false>, - BranchEqualOpcode::BNE => execute_e1_tco_handler::<_, _, true>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_tco_handler, local_opcode) } } @@ -93,11 +94,24 @@ impl MeteredExecutor for Rv32BranchEqual256Executor { let data: &mut E2PreCompute = data.borrow_mut(); data.chip_idx = chip_idx as u32; let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?; - let fn_ptr = match local_opcode { - BranchEqualOpcode::BEQ => execute_e2_impl::<_, _, false>, - BranchEqualOpcode::BNE => execute_e2_impl::<_, _, true>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_impl, local_opcode) + } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let data: &mut E2PreCompute = data.borrow_mut(); + data.chip_idx = chip_idx as u32; + let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?; + dispatch!(execute_e2_tco_handler, local_opcode) } } @@ -129,6 +143,7 @@ unsafe fn execute_e1_impl(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/bigint/circuit/src/branch_lt.rs b/extensions/bigint/circuit/src/branch_lt.rs index 771183d706..b161fa0091 100644 --- a/extensions/bigint/circuit/src/branch_lt.rs +++ b/extensions/bigint/circuit/src/branch_lt.rs @@ -35,6 +35,17 @@ struct BranchLtPreCompute { b: u8, } +macro_rules! dispatch { + ($execute_impl:ident, $local_opcode:ident) => { + Ok(match $local_opcode { + BranchLessThanOpcode::BLT => $execute_impl::<_, _, BltOp>, + BranchLessThanOpcode::BLTU => $execute_impl::<_, _, BltuOp>, + BranchLessThanOpcode::BGE => $execute_impl::<_, _, BgeOp>, + BranchLessThanOpcode::BGEU => $execute_impl::<_, _, BgeuOp>, + }) + }; +} + impl Executor for Rv32BranchLessThan256Executor { fn pre_compute_size(&self) -> usize { size_of::() @@ -51,13 +62,7 @@ impl Executor for Rv32BranchLessThan256Executor { { let data: &mut BranchLtPreCompute = data.borrow_mut(); let local_opcode = self.pre_compute_impl(pc, inst, data)?; - let fn_ptr = match local_opcode { - BranchLessThanOpcode::BLT => execute_e1_impl::<_, _, BltOp>, - BranchLessThanOpcode::BLTU => execute_e1_impl::<_, _, BltuOp>, - BranchLessThanOpcode::BGE => execute_e1_impl::<_, _, BgeOp>, - BranchLessThanOpcode::BGEU => execute_e1_impl::<_, _, BgeuOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_impl, local_opcode) } #[cfg(feature = "tco")] @@ -72,13 +77,7 @@ impl Executor for Rv32BranchLessThan256Executor { { let data: &mut BranchLtPreCompute = data.borrow_mut(); let local_opcode = self.pre_compute_impl(pc, inst, data)?; - let fn_ptr = match local_opcode { - BranchLessThanOpcode::BLT => execute_e1_tco_handler::<_, _, BltOp>, - BranchLessThanOpcode::BLTU => execute_e1_tco_handler::<_, _, BltuOp>, - BranchLessThanOpcode::BGE => execute_e1_tco_handler::<_, _, BgeOp>, - BranchLessThanOpcode::BGEU => execute_e1_tco_handler::<_, _, BgeuOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_tco_handler, local_opcode) } } @@ -100,13 +99,24 @@ impl MeteredExecutor for Rv32BranchLessThan256Executor { let data: &mut E2PreCompute = data.borrow_mut(); data.chip_idx = chip_idx as u32; let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?; - let fn_ptr = match local_opcode { - BranchLessThanOpcode::BLT => execute_e2_impl::<_, _, BltOp>, - BranchLessThanOpcode::BLTU => execute_e2_impl::<_, _, BltuOp>, - BranchLessThanOpcode::BGE => execute_e2_impl::<_, _, BgeOp>, - BranchLessThanOpcode::BGEU => execute_e2_impl::<_, _, BgeuOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_impl, local_opcode) + } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let data: &mut E2PreCompute = data.borrow_mut(); + data.chip_idx = chip_idx as u32; + let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?; + dispatch!(execute_e2_tco_handler, local_opcode) } } @@ -137,6 +147,7 @@ unsafe fn execute_e1_impl(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/bigint/circuit/src/less_than.rs b/extensions/bigint/circuit/src/less_than.rs index 554039311d..85bfd152ce 100644 --- a/extensions/bigint/circuit/src/less_than.rs +++ b/extensions/bigint/circuit/src/less_than.rs @@ -32,6 +32,15 @@ struct LessThanPreCompute { c: u8, } +macro_rules! dispatch { + ($execute_impl:ident, $local_opcode:ident) => { + Ok(match $local_opcode { + LessThanOpcode::SLT => $execute_impl::<_, _, false>, + LessThanOpcode::SLTU => $execute_impl::<_, _, true>, + }) + }; +} + impl Executor for Rv32LessThan256Executor { fn pre_compute_size(&self) -> usize { size_of::() @@ -48,11 +57,7 @@ impl Executor for Rv32LessThan256Executor { { let data: &mut LessThanPreCompute = data.borrow_mut(); let local_opcode = self.pre_compute_impl(pc, inst, data)?; - let fn_ptr = match local_opcode { - LessThanOpcode::SLT => execute_e1_impl::<_, _, false>, - LessThanOpcode::SLTU => execute_e1_impl::<_, _, true>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_impl, local_opcode) } #[cfg(feature = "tco")] @@ -67,11 +72,7 @@ impl Executor for Rv32LessThan256Executor { { let data: &mut LessThanPreCompute = data.borrow_mut(); let local_opcode = self.pre_compute_impl(pc, inst, data)?; - let fn_ptr = match local_opcode { - LessThanOpcode::SLT => execute_e1_tco_handler::<_, _, false>, - LessThanOpcode::SLTU => execute_e1_tco_handler::<_, _, true>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_tco_handler, local_opcode) } } @@ -93,11 +94,24 @@ impl MeteredExecutor for Rv32LessThan256Executor { let data: &mut E2PreCompute = data.borrow_mut(); data.chip_idx = chip_idx as u32; let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?; - let fn_ptr = match local_opcode { - LessThanOpcode::SLT => execute_e2_impl::<_, _, false>, - LessThanOpcode::SLTU => execute_e2_impl::<_, _, true>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_impl, local_opcode) + } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let data: &mut E2PreCompute = data.borrow_mut(); + data.chip_idx = chip_idx as u32; + let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?; + dispatch!(execute_e2_tco_handler, local_opcode) } } @@ -133,6 +147,7 @@ unsafe fn execute_e1_impl(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/bigint/circuit/src/mult.rs b/extensions/bigint/circuit/src/mult.rs index b34762fb26..61d07a0fac 100644 --- a/extensions/bigint/circuit/src/mult.rs +++ b/extensions/bigint/circuit/src/mult.rs @@ -87,6 +87,23 @@ impl MeteredExecutor for Rv32Multiplication256Executor { self.pre_compute_impl(pc, inst, &mut data.data)?; Ok(execute_e2_impl) } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let data: &mut E2PreCompute = data.borrow_mut(); + data.chip_idx = chip_idx as u32; + self.pre_compute_impl(pc, inst, &mut data.data)?; + Ok(execute_e2_tco_handler) + } } #[inline(always)] @@ -115,6 +132,7 @@ unsafe fn execute_e1_impl( execute_e12_impl(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/bigint/circuit/src/shift.rs b/extensions/bigint/circuit/src/shift.rs index aacc44c021..71699d2f53 100644 --- a/extensions/bigint/circuit/src/shift.rs +++ b/extensions/bigint/circuit/src/shift.rs @@ -32,6 +32,16 @@ struct ShiftPreCompute { c: u8, } +macro_rules! dispatch { + ($execute_impl:ident, $local_opcode:ident) => { + Ok(match $local_opcode { + ShiftOpcode::SLL => $execute_impl::<_, _, SllOp>, + ShiftOpcode::SRA => $execute_impl::<_, _, SraOp>, + ShiftOpcode::SRL => $execute_impl::<_, _, SrlOp>, + }) + }; +} + impl Executor for Rv32Shift256Executor { fn pre_compute_size(&self) -> usize { size_of::() @@ -48,12 +58,7 @@ impl Executor for Rv32Shift256Executor { { let data: &mut ShiftPreCompute = data.borrow_mut(); let local_opcode = self.pre_compute_impl(pc, inst, data)?; - let fn_ptr = match local_opcode { - ShiftOpcode::SLL => execute_e1_impl::<_, _, SllOp>, - ShiftOpcode::SRA => execute_e1_impl::<_, _, SraOp>, - ShiftOpcode::SRL => execute_e1_impl::<_, _, SrlOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_impl, local_opcode) } #[cfg(feature = "tco")] @@ -68,12 +73,7 @@ impl Executor for Rv32Shift256Executor { { let data: &mut ShiftPreCompute = data.borrow_mut(); let local_opcode = self.pre_compute_impl(pc, inst, data)?; - let fn_ptr = match local_opcode { - ShiftOpcode::SLL => execute_e1_tco_handler::<_, _, SllOp>, - ShiftOpcode::SRA => execute_e1_tco_handler::<_, _, SraOp>, - ShiftOpcode::SRL => execute_e1_tco_handler::<_, _, SrlOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_tco_handler, local_opcode) } } @@ -95,12 +95,24 @@ impl MeteredExecutor for Rv32Shift256Executor { let data: &mut E2PreCompute = data.borrow_mut(); data.chip_idx = chip_idx as u32; let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?; - let fn_ptr = match local_opcode { - ShiftOpcode::SLL => execute_e2_impl::<_, _, SllOp>, - ShiftOpcode::SRA => execute_e2_impl::<_, _, SraOp>, - ShiftOpcode::SRL => execute_e2_impl::<_, _, SrlOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_impl, local_opcode) + } + + #[cfg(feature = "tco")] + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &Instruction, + data: &mut [u8], + ) -> Result, StaticProgramError> + where + Ctx: MeteredExecutionCtxTrait, + { + let data: &mut E2PreCompute = data.borrow_mut(); + data.chip_idx = chip_idx as u32; + let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?; + dispatch!(execute_e2_tco_handler, local_opcode) } } @@ -128,6 +140,7 @@ unsafe fn execute_e1_impl( let pre_compute: &ShiftPreCompute = pre_compute.borrow(); execute_e12_impl::(pre_compute, vm_state); } +#[create_tco_handler] unsafe fn execute_e2_impl( pre_compute: &[u8], vm_state: &mut VmExecState, diff --git a/extensions/rv32im/circuit/src/base_alu/execution.rs b/extensions/rv32im/circuit/src/base_alu/execution.rs index f737fc52f3..f2447e4f4e 100644 --- a/extensions/rv32im/circuit/src/base_alu/execution.rs +++ b/extensions/rv32im/circuit/src/base_alu/execution.rs @@ -55,6 +55,28 @@ impl BaseAluExecutor { + Ok( + match ( + $is_imm, + BaseAluOpcode::from_usize($opcode.local_opcode_idx($offset)), + ) { + (true, BaseAluOpcode::ADD) => $execute_impl::<_, _, true, AddOp>, + (false, BaseAluOpcode::ADD) => $execute_impl::<_, _, false, AddOp>, + (true, BaseAluOpcode::SUB) => $execute_impl::<_, _, true, SubOp>, + (false, BaseAluOpcode::SUB) => $execute_impl::<_, _, false, SubOp>, + (true, BaseAluOpcode::XOR) => $execute_impl::<_, _, true, XorOp>, + (false, BaseAluOpcode::XOR) => $execute_impl::<_, _, false, XorOp>, + (true, BaseAluOpcode::OR) => $execute_impl::<_, _, true, OrOp>, + (false, BaseAluOpcode::OR) => $execute_impl::<_, _, false, OrOp>, + (true, BaseAluOpcode::AND) => $execute_impl::<_, _, true, AndOp>, + (false, BaseAluOpcode::AND) => $execute_impl::<_, _, false, AndOp>, + }, + ) + }; +} + impl Executor for BaseAluExecutor where @@ -65,7 +87,6 @@ where size_of::() } - #[inline(always)] fn pre_compute( &self, pc: u32, @@ -77,24 +98,8 @@ where { let data: &mut BaseAluPreCompute = data.borrow_mut(); let is_imm = self.pre_compute_impl(pc, inst, data)?; - let opcode = inst.opcode; - let fn_ptr = match ( - is_imm, - BaseAluOpcode::from_usize(opcode.local_opcode_idx(self.offset)), - ) { - (true, BaseAluOpcode::ADD) => execute_e1_impl::<_, _, true, AddOp>, - (false, BaseAluOpcode::ADD) => execute_e1_impl::<_, _, false, AddOp>, - (true, BaseAluOpcode::SUB) => execute_e1_impl::<_, _, true, SubOp>, - (false, BaseAluOpcode::SUB) => execute_e1_impl::<_, _, false, SubOp>, - (true, BaseAluOpcode::XOR) => execute_e1_impl::<_, _, true, XorOp>, - (false, BaseAluOpcode::XOR) => execute_e1_impl::<_, _, false, XorOp>, - (true, BaseAluOpcode::OR) => execute_e1_impl::<_, _, true, OrOp>, - (false, BaseAluOpcode::OR) => execute_e1_impl::<_, _, false, OrOp>, - (true, BaseAluOpcode::AND) => execute_e1_impl::<_, _, true, AndOp>, - (false, BaseAluOpcode::AND) => execute_e1_impl::<_, _, false, AndOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_impl, is_imm, inst.opcode, self.offset) } #[cfg(feature = "tco")] @@ -109,24 +114,8 @@ where { let data: &mut BaseAluPreCompute = data.borrow_mut(); let is_imm = self.pre_compute_impl(pc, inst, data)?; - let opcode = inst.opcode; - let fn_ptr = match ( - is_imm, - BaseAluOpcode::from_usize(opcode.local_opcode_idx(self.offset)), - ) { - (true, BaseAluOpcode::ADD) => execute_e1_tco_handler::<_, _, true, AddOp>, - (false, BaseAluOpcode::ADD) => execute_e1_tco_handler::<_, _, false, AddOp>, - (true, BaseAluOpcode::SUB) => execute_e1_tco_handler::<_, _, true, SubOp>, - (false, BaseAluOpcode::SUB) => execute_e1_tco_handler::<_, _, false, SubOp>, - (true, BaseAluOpcode::XOR) => execute_e1_tco_handler::<_, _, true, XorOp>, - (false, BaseAluOpcode::XOR) => execute_e1_tco_handler::<_, _, false, XorOp>, - (true, BaseAluOpcode::OR) => execute_e1_tco_handler::<_, _, true, OrOp>, - (false, BaseAluOpcode::OR) => execute_e1_tco_handler::<_, _, false, OrOp>, - (true, BaseAluOpcode::AND) => execute_e1_tco_handler::<_, _, true, AndOp>, - (false, BaseAluOpcode::AND) => execute_e1_tco_handler::<_, _, false, AndOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_tco_handler, is_imm, inst.opcode, self.offset) } } @@ -140,7 +129,6 @@ where size_of::>() } - #[inline(always)] fn metered_pre_compute( &self, chip_idx: usize, @@ -154,28 +142,11 @@ where let data: &mut E2PreCompute = data.borrow_mut(); data.chip_idx = chip_idx as u32; let is_imm = self.pre_compute_impl(pc, inst, &mut data.data)?; - let opcode = inst.opcode; - let fn_ptr = match ( - is_imm, - BaseAluOpcode::from_usize(opcode.local_opcode_idx(self.offset)), - ) { - (true, BaseAluOpcode::ADD) => execute_e2_impl::<_, _, true, AddOp>, - (false, BaseAluOpcode::ADD) => execute_e2_impl::<_, _, false, AddOp>, - (true, BaseAluOpcode::SUB) => execute_e2_impl::<_, _, true, SubOp>, - (false, BaseAluOpcode::SUB) => execute_e2_impl::<_, _, false, SubOp>, - (true, BaseAluOpcode::XOR) => execute_e2_impl::<_, _, true, XorOp>, - (false, BaseAluOpcode::XOR) => execute_e2_impl::<_, _, false, XorOp>, - (true, BaseAluOpcode::OR) => execute_e2_impl::<_, _, true, OrOp>, - (false, BaseAluOpcode::OR) => execute_e2_impl::<_, _, false, OrOp>, - (true, BaseAluOpcode::AND) => execute_e2_impl::<_, _, true, AndOp>, - (false, BaseAluOpcode::AND) => execute_e2_impl::<_, _, false, AndOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_impl, is_imm, inst.opcode, self.offset) } #[cfg(feature = "tco")] - #[inline(always)] fn metered_handler( &self, chip_idx: usize, @@ -189,24 +160,8 @@ where let data: &mut E2PreCompute = data.borrow_mut(); data.chip_idx = chip_idx as u32; let is_imm = self.pre_compute_impl(pc, inst, &mut data.data)?; - let opcode = inst.opcode; - let fn_ptr = match ( - is_imm, - BaseAluOpcode::from_usize(opcode.local_opcode_idx(self.offset)), - ) { - (true, BaseAluOpcode::ADD) => execute_e2_tco_handler::<_, _, true, AddOp>, - (false, BaseAluOpcode::ADD) => execute_e2_tco_handler::<_, _, false, AddOp>, - (true, BaseAluOpcode::SUB) => execute_e2_tco_handler::<_, _, true, SubOp>, - (false, BaseAluOpcode::SUB) => execute_e2_tco_handler::<_, _, false, SubOp>, - (true, BaseAluOpcode::XOR) => execute_e2_tco_handler::<_, _, true, XorOp>, - (false, BaseAluOpcode::XOR) => execute_e2_tco_handler::<_, _, false, XorOp>, - (true, BaseAluOpcode::OR) => execute_e2_tco_handler::<_, _, true, OrOp>, - (false, BaseAluOpcode::OR) => execute_e2_tco_handler::<_, _, false, OrOp>, - (true, BaseAluOpcode::AND) => execute_e2_tco_handler::<_, _, true, AndOp>, - (false, BaseAluOpcode::AND) => execute_e2_tco_handler::<_, _, false, AndOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_tco_handler, is_imm, inst.opcode, self.offset) } } diff --git a/extensions/rv32im/circuit/src/shift/execution.rs b/extensions/rv32im/circuit/src/shift/execution.rs index 2ef0d7ff35..cacf3b9f2e 100644 --- a/extensions/rv32im/circuit/src/shift/execution.rs +++ b/extensions/rv32im/circuit/src/shift/execution.rs @@ -81,7 +81,6 @@ where size_of::() } - #[inline(always)] fn pre_compute( &self, pc: u32, @@ -120,7 +119,6 @@ where size_of::>() } - #[inline(always)] fn metered_pre_compute( &self, chip_idx: usize, @@ -136,7 +134,6 @@ where } #[cfg(feature = "tco")] - #[inline(always)] fn metered_handler( &self, chip_idx: usize, From 312f6d525229597901c2a573b09b2e3c9d4cfa5e Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 20:00:39 -0700 Subject: [PATCH 21/36] refactor: use dispatch! for rv32 executors --- .../rv32im/circuit/src/branch_eq/execution.rs | 38 ++---- .../rv32im/circuit/src/branch_lt/execution.rs | 43 ++---- .../rv32im/circuit/src/hintstore/execution.rs | 33 ++--- .../rv32im/circuit/src/jal_lui/execution.rs | 43 ++---- .../rv32im/circuit/src/jalr/execution.rs | 38 ++---- .../rv32im/circuit/src/less_than/execution.rs | 43 ++---- .../circuit/src/load_sign_extend/execution.rs | 43 ++---- .../rv32im/circuit/src/loadstore/execution.rs | 128 ++++++------------ .../rv32im/circuit/src/mulh/execution.rs | 38 ++---- 9 files changed, 155 insertions(+), 292 deletions(-) diff --git a/extensions/rv32im/circuit/src/branch_eq/execution.rs b/extensions/rv32im/circuit/src/branch_eq/execution.rs index 3aa9be7ff4..70b9dc4d67 100644 --- a/extensions/rv32im/circuit/src/branch_eq/execution.rs +++ b/extensions/rv32im/circuit/src/branch_eq/execution.rs @@ -53,6 +53,16 @@ impl BranchEqualExecutor { } } +macro_rules! dispatch { + ($execute_impl:ident, $is_bne:ident) => { + if $is_bne { + Ok($execute_impl::<_, _, true>) + } else { + Ok($execute_impl::<_, _, false>) + } + }; +} + impl Executor for BranchEqualExecutor where F: PrimeField32, @@ -71,12 +81,7 @@ where ) -> Result, StaticProgramError> { let data: &mut BranchEqualPreCompute = data.borrow_mut(); let is_bne = self.pre_compute_impl(pc, inst, data)?; - let fn_ptr = if is_bne { - execute_e1_impl::<_, _, true> - } else { - execute_e1_impl::<_, _, false> - }; - Ok(fn_ptr) + dispatch!(execute_e1_impl, is_bne) } #[cfg(feature = "tco")] @@ -91,12 +96,7 @@ where { let data: &mut BranchEqualPreCompute = data.borrow_mut(); let is_bne = self.pre_compute_impl(pc, inst, data)?; - let fn_ptr = if is_bne { - execute_e1_tco_handler::<_, _, true> - } else { - execute_e1_tco_handler::<_, _, false> - }; - Ok(fn_ptr) + dispatch!(execute_e1_tco_handler, is_bne) } } @@ -121,12 +121,7 @@ where let data: &mut E2PreCompute = data.borrow_mut(); data.chip_idx = chip_idx as u32; let is_bne = self.pre_compute_impl(pc, inst, &mut data.data)?; - let fn_ptr = if is_bne { - execute_e2_impl::<_, _, true> - } else { - execute_e2_impl::<_, _, false> - }; - Ok(fn_ptr) + dispatch!(execute_e2_impl, is_bne) } #[cfg(feature = "tco")] @@ -143,12 +138,7 @@ where let data: &mut E2PreCompute = data.borrow_mut(); data.chip_idx = chip_idx as u32; let is_bne = self.pre_compute_impl(pc, inst, &mut data.data)?; - let fn_ptr = if is_bne { - execute_e2_tco_handler::<_, _, true> - } else { - execute_e2_tco_handler::<_, _, false> - }; - Ok(fn_ptr) + dispatch!(execute_e2_tco_handler, is_bne) } } diff --git a/extensions/rv32im/circuit/src/branch_lt/execution.rs b/extensions/rv32im/circuit/src/branch_lt/execution.rs index e85c913674..b555973030 100644 --- a/extensions/rv32im/circuit/src/branch_lt/execution.rs +++ b/extensions/rv32im/circuit/src/branch_lt/execution.rs @@ -21,6 +21,17 @@ struct BranchLePreCompute { b: u8, } +macro_rules! dispatch { + ($execute_impl:ident, $local_opcode:ident) => { + match $local_opcode { + BranchLessThanOpcode::BLT => Ok($execute_impl::<_, _, BltOp>), + BranchLessThanOpcode::BLTU => Ok($execute_impl::<_, _, BltuOp>), + BranchLessThanOpcode::BGE => Ok($execute_impl::<_, _, BgeOp>), + BranchLessThanOpcode::BGEU => Ok($execute_impl::<_, _, BgeuOp>), + } + }; +} + impl BranchLessThanExecutor { @@ -72,13 +83,7 @@ where ) -> Result, StaticProgramError> { let data: &mut BranchLePreCompute = data.borrow_mut(); let local_opcode = self.pre_compute_impl(pc, inst, data)?; - let fn_ptr = match local_opcode { - BranchLessThanOpcode::BLT => execute_e1_impl::<_, _, BltOp>, - BranchLessThanOpcode::BLTU => execute_e1_impl::<_, _, BltuOp>, - BranchLessThanOpcode::BGE => execute_e1_impl::<_, _, BgeOp>, - BranchLessThanOpcode::BGEU => execute_e1_impl::<_, _, BgeuOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_impl, local_opcode) } #[cfg(feature = "tco")] @@ -93,13 +98,7 @@ where { let data: &mut BranchLePreCompute = data.borrow_mut(); let local_opcode = self.pre_compute_impl(pc, inst, data)?; - let fn_ptr = match local_opcode { - BranchLessThanOpcode::BLT => execute_e1_tco_handler::<_, _, BltOp>, - BranchLessThanOpcode::BLTU => execute_e1_tco_handler::<_, _, BltuOp>, - BranchLessThanOpcode::BGE => execute_e1_tco_handler::<_, _, BgeOp>, - BranchLessThanOpcode::BGEU => execute_e1_tco_handler::<_, _, BgeuOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_tco_handler, local_opcode) } } @@ -125,13 +124,7 @@ where let data: &mut E2PreCompute = data.borrow_mut(); data.chip_idx = chip_idx as u32; let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?; - let fn_ptr = match local_opcode { - BranchLessThanOpcode::BLT => execute_e2_impl::<_, _, BltOp>, - BranchLessThanOpcode::BLTU => execute_e2_impl::<_, _, BltuOp>, - BranchLessThanOpcode::BGE => execute_e2_impl::<_, _, BgeOp>, - BranchLessThanOpcode::BGEU => execute_e2_impl::<_, _, BgeuOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_impl, local_opcode) } #[cfg(feature = "tco")] @@ -148,13 +141,7 @@ where let data: &mut E2PreCompute = data.borrow_mut(); data.chip_idx = chip_idx as u32; let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?; - let fn_ptr = match local_opcode { - BranchLessThanOpcode::BLT => execute_e2_tco_handler::<_, _, BltOp>, - BranchLessThanOpcode::BLTU => execute_e2_tco_handler::<_, _, BltuOp>, - BranchLessThanOpcode::BGE => execute_e2_tco_handler::<_, _, BgeOp>, - BranchLessThanOpcode::BGEU => execute_e2_tco_handler::<_, _, BgeuOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_tco_handler, local_opcode) } } diff --git a/extensions/rv32im/circuit/src/hintstore/execution.rs b/extensions/rv32im/circuit/src/hintstore/execution.rs index fa21859411..41ab992243 100644 --- a/extensions/rv32im/circuit/src/hintstore/execution.rs +++ b/extensions/rv32im/circuit/src/hintstore/execution.rs @@ -60,6 +60,15 @@ impl Rv32HintStoreExecutor { } } +macro_rules! dispatch { + ($execute_impl:ident, $local_opcode:ident) => { + match $local_opcode { + HINT_STOREW => Ok($execute_impl::<_, _, true>), + HINT_BUFFER => Ok($execute_impl::<_, _, false>), + } + }; +} + impl Executor for Rv32HintStoreExecutor where F: PrimeField32, @@ -77,11 +86,7 @@ where ) -> Result, StaticProgramError> { let pre_compute: &mut HintStorePreCompute = data.borrow_mut(); let local_opcode = self.pre_compute_impl(pc, inst, pre_compute)?; - let fn_ptr = match local_opcode { - HINT_STOREW => execute_e1_impl::<_, _, true>, - HINT_BUFFER => execute_e1_impl::<_, _, false>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_impl, local_opcode) } #[cfg(feature = "tco")] @@ -96,11 +101,7 @@ where { let pre_compute: &mut HintStorePreCompute = data.borrow_mut(); let local_opcode = self.pre_compute_impl(pc, inst, pre_compute)?; - let fn_ptr = match local_opcode { - HINT_STOREW => execute_e1_tco_handler::<_, _, true>, - HINT_BUFFER => execute_e1_tco_handler::<_, _, false>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_tco_handler, local_opcode) } } @@ -125,11 +126,7 @@ where let pre_compute: &mut E2PreCompute = data.borrow_mut(); pre_compute.chip_idx = chip_idx as u32; let local_opcode = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; - let fn_ptr = match local_opcode { - HINT_STOREW => execute_e2_impl::<_, _, true>, - HINT_BUFFER => execute_e2_impl::<_, _, false>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_impl, local_opcode) } #[cfg(feature = "tco")] @@ -146,11 +143,7 @@ where let pre_compute: &mut E2PreCompute = data.borrow_mut(); pre_compute.chip_idx = chip_idx as u32; let local_opcode = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; - let fn_ptr = match local_opcode { - HINT_STOREW => execute_e2_tco_handler::<_, _, true>, - HINT_BUFFER => execute_e2_tco_handler::<_, _, false>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_tco_handler, local_opcode) } } diff --git a/extensions/rv32im/circuit/src/jal_lui/execution.rs b/extensions/rv32im/circuit/src/jal_lui/execution.rs index ef6b4d67b8..6f61b5d0e4 100644 --- a/extensions/rv32im/circuit/src/jal_lui/execution.rs +++ b/extensions/rv32im/circuit/src/jal_lui/execution.rs @@ -43,6 +43,17 @@ impl Rv32JalLuiExecutor { } } +macro_rules! dispatch { + ($execute_impl:ident, $is_jal:ident, $enabled:ident) => { + match ($is_jal, $enabled) { + (true, true) => Ok($execute_impl::<_, _, true, true>), + (true, false) => Ok($execute_impl::<_, _, true, false>), + (false, true) => Ok($execute_impl::<_, _, false, true>), + (false, false) => Ok($execute_impl::<_, _, false, false>), + } + }; +} + impl Executor for Rv32JalLuiExecutor where F: PrimeField32, @@ -60,13 +71,7 @@ where ) -> Result, StaticProgramError> { let data: &mut JalLuiPreCompute = data.borrow_mut(); let (is_jal, enabled) = self.pre_compute_impl(inst, data)?; - let fn_ptr = match (is_jal, enabled) { - (true, true) => execute_e1_impl::<_, _, true, true>, - (true, false) => execute_e1_impl::<_, _, true, false>, - (false, true) => execute_e1_impl::<_, _, false, true>, - (false, false) => execute_e1_impl::<_, _, false, false>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_impl, is_jal, enabled) } #[cfg(feature = "tco")] @@ -81,13 +86,7 @@ where { let data: &mut JalLuiPreCompute = data.borrow_mut(); let (is_jal, enabled) = self.pre_compute_impl(inst, data)?; - let fn_ptr = match (is_jal, enabled) { - (true, true) => execute_e1_tco_handler::<_, _, true, true>, - (true, false) => execute_e1_tco_handler::<_, _, true, false>, - (false, true) => execute_e1_tco_handler::<_, _, false, true>, - (false, false) => execute_e1_tco_handler::<_, _, false, false>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_tco_handler, is_jal, enabled) } } @@ -112,13 +111,7 @@ where let data: &mut E2PreCompute = data.borrow_mut(); data.chip_idx = chip_idx as u32; let (is_jal, enabled) = self.pre_compute_impl(inst, &mut data.data)?; - let fn_ptr = match (is_jal, enabled) { - (true, true) => execute_e2_impl::<_, _, true, true>, - (true, false) => execute_e2_impl::<_, _, true, false>, - (false, true) => execute_e2_impl::<_, _, false, true>, - (false, false) => execute_e2_impl::<_, _, false, false>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_impl, is_jal, enabled) } #[cfg(feature = "tco")] @@ -135,13 +128,7 @@ where let data: &mut E2PreCompute = data.borrow_mut(); data.chip_idx = chip_idx as u32; let (is_jal, enabled) = self.pre_compute_impl(inst, &mut data.data)?; - let fn_ptr = match (is_jal, enabled) { - (true, true) => execute_e2_tco_handler::<_, _, true, true>, - (true, false) => execute_e2_tco_handler::<_, _, true, false>, - (false, true) => execute_e2_tco_handler::<_, _, false, true>, - (false, false) => execute_e2_tco_handler::<_, _, false, false>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_tco_handler, is_jal, enabled) } } diff --git a/extensions/rv32im/circuit/src/jalr/execution.rs b/extensions/rv32im/circuit/src/jalr/execution.rs index 1942bbf828..e84e200eec 100644 --- a/extensions/rv32im/circuit/src/jalr/execution.rs +++ b/extensions/rv32im/circuit/src/jalr/execution.rs @@ -44,6 +44,16 @@ impl Rv32JalrExecutor { } } +macro_rules! dispatch { + ($execute_impl:ident, $enabled:ident) => { + if $enabled { + Ok($execute_impl::<_, _, true>) + } else { + Ok($execute_impl::<_, _, false>) + } + }; +} + impl Executor for Rv32JalrExecutor where F: PrimeField32, @@ -61,12 +71,7 @@ where ) -> Result, StaticProgramError> { let data: &mut JalrPreCompute = data.borrow_mut(); let enabled = self.pre_compute_impl(pc, inst, data)?; - let fn_ptr = if enabled { - execute_e1_impl::<_, _, true> - } else { - execute_e1_impl::<_, _, false> - }; - Ok(fn_ptr) + dispatch!(execute_e1_impl, enabled) } #[cfg(feature = "tco")] @@ -81,12 +86,7 @@ where { let data: &mut JalrPreCompute = data.borrow_mut(); let enabled = self.pre_compute_impl(pc, inst, data)?; - let fn_ptr = if enabled { - execute_e1_tco_handler::<_, _, true> - } else { - execute_e1_tco_handler::<_, _, false> - }; - Ok(fn_ptr) + dispatch!(execute_e1_tco_handler, enabled) } } @@ -111,12 +111,7 @@ where let data: &mut E2PreCompute = data.borrow_mut(); data.chip_idx = chip_idx as u32; let enabled = self.pre_compute_impl(pc, inst, &mut data.data)?; - let fn_ptr = if enabled { - execute_e2_impl::<_, _, true> - } else { - execute_e2_impl::<_, _, false> - }; - Ok(fn_ptr) + dispatch!(execute_e2_impl, enabled) } #[cfg(feature = "tco")] @@ -133,12 +128,7 @@ where let data: &mut E2PreCompute = data.borrow_mut(); data.chip_idx = chip_idx as u32; let enabled = self.pre_compute_impl(pc, inst, &mut data.data)?; - let fn_ptr = if enabled { - execute_e2_tco_handler::<_, _, true> - } else { - execute_e2_tco_handler::<_, _, false> - }; - Ok(fn_ptr) + dispatch!(execute_e2_tco_handler, enabled) } } diff --git a/extensions/rv32im/circuit/src/less_than/execution.rs b/extensions/rv32im/circuit/src/less_than/execution.rs index c1d710c036..7f3560f1e1 100644 --- a/extensions/rv32im/circuit/src/less_than/execution.rs +++ b/extensions/rv32im/circuit/src/less_than/execution.rs @@ -65,6 +65,17 @@ impl LessThanExecutor { + match ($is_imm, $is_sltu) { + (true, true) => Ok($execute_impl::<_, _, true, true>), + (true, false) => Ok($execute_impl::<_, _, true, false>), + (false, true) => Ok($execute_impl::<_, _, false, true>), + (false, false) => Ok($execute_impl::<_, _, false, false>), + } + }; +} + impl Executor for LessThanExecutor where @@ -84,13 +95,7 @@ where ) -> Result, StaticProgramError> { let pre_compute: &mut LessThanPreCompute = data.borrow_mut(); let (is_imm, is_sltu) = self.pre_compute_impl(pc, inst, pre_compute)?; - let fn_ptr = match (is_imm, is_sltu) { - (true, true) => execute_e1_impl::<_, _, true, true>, - (true, false) => execute_e1_impl::<_, _, true, false>, - (false, true) => execute_e1_impl::<_, _, false, true>, - (false, false) => execute_e1_impl::<_, _, false, false>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_impl, is_imm, is_sltu) } #[cfg(feature = "tco")] @@ -105,13 +110,7 @@ where { let pre_compute: &mut LessThanPreCompute = data.borrow_mut(); let (is_imm, is_sltu) = self.pre_compute_impl(pc, inst, pre_compute)?; - let fn_ptr = match (is_imm, is_sltu) { - (true, true) => execute_e1_tco_handler::<_, _, true, true>, - (true, false) => execute_e1_tco_handler::<_, _, true, false>, - (false, true) => execute_e1_tco_handler::<_, _, false, true>, - (false, false) => execute_e1_tco_handler::<_, _, false, false>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_tco_handler, is_imm, is_sltu) } } @@ -137,13 +136,7 @@ where let pre_compute: &mut E2PreCompute = data.borrow_mut(); pre_compute.chip_idx = chip_idx as u32; let (is_imm, is_sltu) = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; - let fn_ptr = match (is_imm, is_sltu) { - (true, true) => execute_e2_impl::<_, _, true, true>, - (true, false) => execute_e2_impl::<_, _, true, false>, - (false, true) => execute_e2_impl::<_, _, false, true>, - (false, false) => execute_e2_impl::<_, _, false, false>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_impl, is_imm, is_sltu) } #[cfg(feature = "tco")] @@ -160,13 +153,7 @@ where let pre_compute: &mut E2PreCompute = data.borrow_mut(); pre_compute.chip_idx = chip_idx as u32; let (is_imm, is_sltu) = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; - let fn_ptr = match (is_imm, is_sltu) { - (true, true) => execute_e2_tco_handler::<_, _, true, true>, - (true, false) => execute_e2_tco_handler::<_, _, true, false>, - (false, true) => execute_e2_tco_handler::<_, _, false, true>, - (false, false) => execute_e2_tco_handler::<_, _, false, false>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_tco_handler, is_imm, is_sltu) } } diff --git a/extensions/rv32im/circuit/src/load_sign_extend/execution.rs b/extensions/rv32im/circuit/src/load_sign_extend/execution.rs index 9f7286f62d..f8d5686c48 100644 --- a/extensions/rv32im/circuit/src/load_sign_extend/execution.rs +++ b/extensions/rv32im/circuit/src/load_sign_extend/execution.rs @@ -77,6 +77,17 @@ impl LoadSignExtendExecutor { + match ($is_loadb, $enabled) { + (true, true) => Ok($execute_impl::<_, _, true, true>), + (true, false) => Ok($execute_impl::<_, _, true, false>), + (false, true) => Ok($execute_impl::<_, _, false, true>), + (false, false) => Ok($execute_impl::<_, _, false, false>), + } + }; +} + impl Executor for LoadSignExtendExecutor where @@ -95,13 +106,7 @@ where ) -> Result, StaticProgramError> { let pre_compute: &mut LoadSignExtendPreCompute = data.borrow_mut(); let (is_loadb, enabled) = self.pre_compute_impl(pc, inst, pre_compute)?; - let fn_ptr = match (is_loadb, enabled) { - (true, true) => execute_e1_impl::<_, _, true, true>, - (true, false) => execute_e1_impl::<_, _, true, false>, - (false, true) => execute_e1_impl::<_, _, false, true>, - (false, false) => execute_e1_impl::<_, _, false, false>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_impl, is_loadb, enabled) } #[cfg(feature = "tco")] @@ -116,13 +121,7 @@ where { let pre_compute: &mut LoadSignExtendPreCompute = data.borrow_mut(); let (is_loadb, enabled) = self.pre_compute_impl(pc, inst, pre_compute)?; - let fn_ptr = match (is_loadb, enabled) { - (true, true) => execute_e1_tco_handler::<_, _, true, true>, - (true, false) => execute_e1_tco_handler::<_, _, true, false>, - (false, true) => execute_e1_tco_handler::<_, _, false, true>, - (false, false) => execute_e1_tco_handler::<_, _, false, false>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_tco_handler, is_loadb, enabled) } } @@ -148,13 +147,7 @@ where let pre_compute: &mut E2PreCompute = data.borrow_mut(); pre_compute.chip_idx = chip_idx as u32; let (is_loadb, enabled) = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; - let fn_ptr = match (is_loadb, enabled) { - (true, true) => execute_e2_impl::<_, _, true, true>, - (true, false) => execute_e2_impl::<_, _, true, false>, - (false, true) => execute_e2_impl::<_, _, false, true>, - (false, false) => execute_e2_impl::<_, _, false, false>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_impl, is_loadb, enabled) } #[cfg(feature = "tco")] @@ -171,13 +164,7 @@ where let pre_compute: &mut E2PreCompute = data.borrow_mut(); pre_compute.chip_idx = chip_idx as u32; let (is_loadb, enabled) = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; - let fn_ptr = match (is_loadb, enabled) { - (true, true) => execute_e2_tco_handler::<_, _, true, true>, - (true, false) => execute_e2_tco_handler::<_, _, true, false>, - (false, true) => execute_e2_tco_handler::<_, _, false, true>, - (false, false) => execute_e2_tco_handler::<_, _, false, false>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_tco_handler, is_loadb, enabled) } } diff --git a/extensions/rv32im/circuit/src/loadstore/execution.rs b/extensions/rv32im/circuit/src/loadstore/execution.rs index ce20e2b967..c79c9beb32 100644 --- a/extensions/rv32im/circuit/src/loadstore/execution.rs +++ b/extensions/rv32im/circuit/src/loadstore/execution.rs @@ -83,6 +83,32 @@ impl LoadStoreExecutor { } } +macro_rules! dispatch { + ($execute_impl:ident, $local_opcode:ident, $enabled:ident, $is_native_store:ident) => { + match ($local_opcode, $enabled, $is_native_store) { + (LOADW, true, _) => Ok($execute_impl::<_, _, U8, LoadWOp, true>), + (LOADW, false, _) => Ok($execute_impl::<_, _, U8, LoadWOp, false>), + (LOADHU, true, _) => Ok($execute_impl::<_, _, U8, LoadHUOp, true>), + (LOADHU, false, _) => Ok($execute_impl::<_, _, U8, LoadHUOp, false>), + (LOADBU, true, _) => Ok($execute_impl::<_, _, U8, LoadBUOp, true>), + (LOADBU, false, _) => Ok($execute_impl::<_, _, U8, LoadBUOp, false>), + (STOREW, true, false) => Ok($execute_impl::<_, _, U8, StoreWOp, true>), + (STOREW, false, false) => Ok($execute_impl::<_, _, U8, StoreWOp, false>), + (STOREW, true, true) => Ok($execute_impl::<_, _, F, StoreWOp, true>), + (STOREW, false, true) => Ok($execute_impl::<_, _, F, StoreWOp, false>), + (STOREH, true, false) => Ok($execute_impl::<_, _, U8, StoreHOp, true>), + (STOREH, false, false) => Ok($execute_impl::<_, _, U8, StoreHOp, false>), + (STOREH, true, true) => Ok($execute_impl::<_, _, F, StoreHOp, true>), + (STOREH, false, true) => Ok($execute_impl::<_, _, F, StoreHOp, false>), + (STOREB, true, false) => Ok($execute_impl::<_, _, U8, StoreBOp, true>), + (STOREB, false, false) => Ok($execute_impl::<_, _, U8, StoreBOp, false>), + (STOREB, true, true) => Ok($execute_impl::<_, _, F, StoreBOp, true>), + (STOREB, false, true) => Ok($execute_impl::<_, _, F, StoreBOp, false>), + (_, _, _) => unreachable!(), + } + }; +} + impl Executor for LoadStoreExecutor where F: PrimeField32, @@ -102,28 +128,7 @@ where let pre_compute: &mut LoadStorePreCompute = data.borrow_mut(); let (local_opcode, enabled, is_native_store) = self.pre_compute_impl(pc, inst, pre_compute)?; - let fn_ptr = match (local_opcode, enabled, is_native_store) { - (LOADW, true, _) => execute_e1_impl::<_, _, U8, LoadWOp, true>, - (LOADW, false, _) => execute_e1_impl::<_, _, U8, LoadWOp, false>, - (LOADHU, true, _) => execute_e1_impl::<_, _, U8, LoadHUOp, true>, - (LOADHU, false, _) => execute_e1_impl::<_, _, U8, LoadHUOp, false>, - (LOADBU, true, _) => execute_e1_impl::<_, _, U8, LoadBUOp, true>, - (LOADBU, false, _) => execute_e1_impl::<_, _, U8, LoadBUOp, false>, - (STOREW, true, false) => execute_e1_impl::<_, _, U8, StoreWOp, true>, - (STOREW, false, false) => execute_e1_impl::<_, _, U8, StoreWOp, false>, - (STOREW, true, true) => execute_e1_impl::<_, _, F, StoreWOp, true>, - (STOREW, false, true) => execute_e1_impl::<_, _, F, StoreWOp, false>, - (STOREH, true, false) => execute_e1_impl::<_, _, U8, StoreHOp, true>, - (STOREH, false, false) => execute_e1_impl::<_, _, U8, StoreHOp, false>, - (STOREH, true, true) => execute_e1_impl::<_, _, F, StoreHOp, true>, - (STOREH, false, true) => execute_e1_impl::<_, _, F, StoreHOp, false>, - (STOREB, true, false) => execute_e1_impl::<_, _, U8, StoreBOp, true>, - (STOREB, false, false) => execute_e1_impl::<_, _, U8, StoreBOp, false>, - (STOREB, true, true) => execute_e1_impl::<_, _, F, StoreBOp, true>, - (STOREB, false, true) => execute_e1_impl::<_, _, F, StoreBOp, false>, - (_, _, _) => unreachable!(), - }; - Ok(fn_ptr) + dispatch!(execute_e1_impl, local_opcode, enabled, is_native_store) } #[cfg(feature = "tco")] @@ -139,28 +144,12 @@ where let pre_compute: &mut LoadStorePreCompute = data.borrow_mut(); let (local_opcode, enabled, is_native_store) = self.pre_compute_impl(pc, inst, pre_compute)?; - let fn_ptr = match (local_opcode, enabled, is_native_store) { - (LOADW, true, _) => execute_e1_tco_handler::<_, _, U8, LoadWOp, true>, - (LOADW, false, _) => execute_e1_tco_handler::<_, _, U8, LoadWOp, false>, - (LOADHU, true, _) => execute_e1_tco_handler::<_, _, U8, LoadHUOp, true>, - (LOADHU, false, _) => execute_e1_tco_handler::<_, _, U8, LoadHUOp, false>, - (LOADBU, true, _) => execute_e1_tco_handler::<_, _, U8, LoadBUOp, true>, - (LOADBU, false, _) => execute_e1_tco_handler::<_, _, U8, LoadBUOp, false>, - (STOREW, true, false) => execute_e1_tco_handler::<_, _, U8, StoreWOp, true>, - (STOREW, false, false) => execute_e1_tco_handler::<_, _, U8, StoreWOp, false>, - (STOREW, true, true) => execute_e1_tco_handler::<_, _, F, StoreWOp, true>, - (STOREW, false, true) => execute_e1_tco_handler::<_, _, F, StoreWOp, false>, - (STOREH, true, false) => execute_e1_tco_handler::<_, _, U8, StoreHOp, true>, - (STOREH, false, false) => execute_e1_tco_handler::<_, _, U8, StoreHOp, false>, - (STOREH, true, true) => execute_e1_tco_handler::<_, _, F, StoreHOp, true>, - (STOREH, false, true) => execute_e1_tco_handler::<_, _, F, StoreHOp, false>, - (STOREB, true, false) => execute_e1_tco_handler::<_, _, U8, StoreBOp, true>, - (STOREB, false, false) => execute_e1_tco_handler::<_, _, U8, StoreBOp, false>, - (STOREB, true, true) => execute_e1_tco_handler::<_, _, F, StoreBOp, true>, - (STOREB, false, true) => execute_e1_tco_handler::<_, _, F, StoreBOp, false>, - (_, _, _) => unreachable!(), - }; - Ok(fn_ptr) + dispatch!( + execute_e1_tco_handler, + local_opcode, + enabled, + is_native_store + ) } } @@ -186,28 +175,7 @@ where pre_compute.chip_idx = chip_idx as u32; let (local_opcode, enabled, is_native_store) = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; - let fn_ptr = match (local_opcode, enabled, is_native_store) { - (LOADW, true, _) => execute_e2_impl::<_, _, U8, LoadWOp, true>, - (LOADW, false, _) => execute_e2_impl::<_, _, U8, LoadWOp, false>, - (LOADHU, true, _) => execute_e2_impl::<_, _, U8, LoadHUOp, true>, - (LOADHU, false, _) => execute_e2_impl::<_, _, U8, LoadHUOp, false>, - (LOADBU, true, _) => execute_e2_impl::<_, _, U8, LoadBUOp, true>, - (LOADBU, false, _) => execute_e2_impl::<_, _, U8, LoadBUOp, false>, - (STOREW, true, false) => execute_e2_impl::<_, _, U8, StoreWOp, true>, - (STOREW, false, false) => execute_e2_impl::<_, _, U8, StoreWOp, false>, - (STOREW, true, true) => execute_e2_impl::<_, _, F, StoreWOp, true>, - (STOREW, false, true) => execute_e2_impl::<_, _, F, StoreWOp, false>, - (STOREH, true, false) => execute_e2_impl::<_, _, U8, StoreHOp, true>, - (STOREH, false, false) => execute_e2_impl::<_, _, U8, StoreHOp, false>, - (STOREH, true, true) => execute_e2_impl::<_, _, F, StoreHOp, true>, - (STOREH, false, true) => execute_e2_impl::<_, _, F, StoreHOp, false>, - (STOREB, true, false) => execute_e2_impl::<_, _, U8, StoreBOp, true>, - (STOREB, false, false) => execute_e2_impl::<_, _, U8, StoreBOp, false>, - (STOREB, true, true) => execute_e2_impl::<_, _, F, StoreBOp, true>, - (STOREB, false, true) => execute_e2_impl::<_, _, F, StoreBOp, false>, - (_, _, _) => unreachable!(), - }; - Ok(fn_ptr) + dispatch!(execute_e2_impl, local_opcode, enabled, is_native_store) } #[cfg(feature = "tco")] @@ -225,28 +193,12 @@ where pre_compute.chip_idx = chip_idx as u32; let (local_opcode, enabled, is_native_store) = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?; - let fn_ptr = match (local_opcode, enabled, is_native_store) { - (LOADW, true, _) => execute_e2_tco_handler::<_, _, U8, LoadWOp, true>, - (LOADW, false, _) => execute_e2_tco_handler::<_, _, U8, LoadWOp, false>, - (LOADHU, true, _) => execute_e2_tco_handler::<_, _, U8, LoadHUOp, true>, - (LOADHU, false, _) => execute_e2_tco_handler::<_, _, U8, LoadHUOp, false>, - (LOADBU, true, _) => execute_e2_tco_handler::<_, _, U8, LoadBUOp, true>, - (LOADBU, false, _) => execute_e2_tco_handler::<_, _, U8, LoadBUOp, false>, - (STOREW, true, false) => execute_e2_tco_handler::<_, _, U8, StoreWOp, true>, - (STOREW, false, false) => execute_e2_tco_handler::<_, _, U8, StoreWOp, false>, - (STOREW, true, true) => execute_e2_tco_handler::<_, _, F, StoreWOp, true>, - (STOREW, false, true) => execute_e2_tco_handler::<_, _, F, StoreWOp, false>, - (STOREH, true, false) => execute_e2_tco_handler::<_, _, U8, StoreHOp, true>, - (STOREH, false, false) => execute_e2_tco_handler::<_, _, U8, StoreHOp, false>, - (STOREH, true, true) => execute_e2_tco_handler::<_, _, F, StoreHOp, true>, - (STOREH, false, true) => execute_e2_tco_handler::<_, _, F, StoreHOp, false>, - (STOREB, true, false) => execute_e2_tco_handler::<_, _, U8, StoreBOp, true>, - (STOREB, false, false) => execute_e2_tco_handler::<_, _, U8, StoreBOp, false>, - (STOREB, true, true) => execute_e2_tco_handler::<_, _, F, StoreBOp, true>, - (STOREB, false, true) => execute_e2_tco_handler::<_, _, F, StoreBOp, false>, - (_, _, _) => unreachable!(), - }; - Ok(fn_ptr) + dispatch!( + execute_e2_tco_handler, + local_opcode, + enabled, + is_native_store + ) } } diff --git a/extensions/rv32im/circuit/src/mulh/execution.rs b/extensions/rv32im/circuit/src/mulh/execution.rs index 0310b6516c..03f127e93c 100644 --- a/extensions/rv32im/circuit/src/mulh/execution.rs +++ b/extensions/rv32im/circuit/src/mulh/execution.rs @@ -42,6 +42,16 @@ impl MulHExecutor { + match $local_opcode { + MulHOpcode::MULH => Ok($execute_impl::<_, _, MulHOp>), + MulHOpcode::MULHSU => Ok($execute_impl::<_, _, MulHSuOp>), + MulHOpcode::MULHU => Ok($execute_impl::<_, _, MulHUOp>), + } + }; +} + impl Executor for MulHExecutor where @@ -61,12 +71,7 @@ where ) -> Result, StaticProgramError> { let pre_compute: &mut MulHPreCompute = data.borrow_mut(); let local_opcode = self.pre_compute_impl(inst, pre_compute)?; - let fn_ptr = match local_opcode { - MulHOpcode::MULH => execute_e1_impl::<_, _, MulHOp>, - MulHOpcode::MULHSU => execute_e1_impl::<_, _, MulHSuOp>, - MulHOpcode::MULHU => execute_e1_impl::<_, _, MulHUOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_impl, local_opcode) } #[cfg(feature = "tco")] @@ -81,12 +86,7 @@ where { let pre_compute: &mut MulHPreCompute = data.borrow_mut(); let local_opcode = self.pre_compute_impl(inst, pre_compute)?; - let fn_ptr = match local_opcode { - MulHOpcode::MULH => execute_e1_tco_handler::<_, _, MulHOp>, - MulHOpcode::MULHSU => execute_e1_tco_handler::<_, _, MulHSuOp>, - MulHOpcode::MULHU => execute_e1_tco_handler::<_, _, MulHUOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e1_tco_handler, local_opcode) } } @@ -112,12 +112,7 @@ where let pre_compute: &mut E2PreCompute = data.borrow_mut(); pre_compute.chip_idx = chip_idx as u32; let local_opcode = self.pre_compute_impl(inst, &mut pre_compute.data)?; - let fn_ptr = match local_opcode { - MulHOpcode::MULH => execute_e2_impl::<_, _, MulHOp>, - MulHOpcode::MULHSU => execute_e2_impl::<_, _, MulHSuOp>, - MulHOpcode::MULHU => execute_e2_impl::<_, _, MulHUOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_impl, local_opcode) } #[cfg(feature = "tco")] @@ -134,12 +129,7 @@ where let pre_compute: &mut E2PreCompute = data.borrow_mut(); pre_compute.chip_idx = chip_idx as u32; let local_opcode = self.pre_compute_impl(inst, &mut pre_compute.data)?; - let fn_ptr = match local_opcode { - MulHOpcode::MULH => execute_e2_tco_handler::<_, _, MulHOp>, - MulHOpcode::MULHSU => execute_e2_tco_handler::<_, _, MulHSuOp>, - MulHOpcode::MULHU => execute_e2_tco_handler::<_, _, MulHUOp>, - }; - Ok(fn_ptr) + dispatch!(execute_e2_tco_handler, local_opcode) } } From 9a2df6e43ec160db419dcdd891822ccd20c5f3d9 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 22:31:41 -0700 Subject: [PATCH 22/36] cleanup: turn off tco feature --- crates/cli/src/lib.rs | 3 +++ crates/sdk/Cargo.toml | 2 +- crates/sdk/src/lib.rs | 2 ++ crates/vm/Cargo.toml | 2 +- crates/vm/derive/Cargo.toml | 3 +++ crates/vm/derive/src/lib.rs | 12 ++++++++++-- crates/vm/derive/src/tco.rs | 3 +++ crates/vm/src/arch/interpreter.rs | 2 +- extensions/algebra/circuit/Cargo.toml | 9 ++++----- extensions/algebra/circuit/src/execution.rs | 1 + extensions/algebra/circuit/src/modular_chip/is_eq.rs | 1 + extensions/bigint/circuit/Cargo.toml | 2 +- extensions/ecc/circuit/Cargo.toml | 9 ++++----- extensions/ecc/tests/Cargo.toml | 1 + extensions/keccak256/circuit/Cargo.toml | 2 +- extensions/native/circuit/Cargo.toml | 2 +- extensions/native/circuit/src/branch_eq/execution.rs | 1 - extensions/native/circuit/src/castf/execution.rs | 1 - .../native/circuit/src/field_arithmetic/execution.rs | 1 - .../native/circuit/src/field_extension/execution.rs | 1 - extensions/native/circuit/src/fri/execution.rs | 1 - .../native/circuit/src/jal_rangecheck/execution.rs | 1 - extensions/native/circuit/src/loadstore/execution.rs | 1 - extensions/native/circuit/src/poseidon2/execution.rs | 1 - extensions/pairing/circuit/Cargo.toml | 9 ++++----- extensions/rv32im/circuit/Cargo.toml | 2 +- extensions/rv32im/tests/Cargo.toml | 2 +- extensions/sha256/circuit/Cargo.toml | 4 ++-- rust-toolchain.toml | 4 ++-- 29 files changed, 48 insertions(+), 37 deletions(-) diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index 1b58c45920..4516207ade 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -1,3 +1,6 @@ +#![cfg_attr(feature = "tco", allow(incomplete_features))] +#![cfg_attr(feature = "tco", feature(explicit_tail_calls))] + pub mod commands; pub mod default; pub mod input; diff --git a/crates/sdk/Cargo.toml b/crates/sdk/Cargo.toml index e2a624d4c8..f5d2f9b141 100644 --- a/crates/sdk/Cargo.toml +++ b/crates/sdk/Cargo.toml @@ -61,7 +61,7 @@ rrs-lib.workspace = true num-bigint.workspace = true [features] -default = ["parallel", "jemalloc", "tco"] +default = ["parallel", "jemalloc"] evm-prove = [ "openvm-continuations/static-verifier", "openvm-native-recursion/evm-prove", diff --git a/crates/sdk/src/lib.rs b/crates/sdk/src/lib.rs index 69ea7fc5d9..1df612540f 100644 --- a/crates/sdk/src/lib.rs +++ b/crates/sdk/src/lib.rs @@ -1,3 +1,5 @@ +#![cfg_attr(feature = "tco", allow(incomplete_features))] +#![cfg_attr(feature = "tco", feature(explicit_tail_calls))] use std::{ borrow::Borrow, fs::read, diff --git a/crates/vm/Cargo.toml b/crates/vm/Cargo.toml index de7be1c6f3..8e7517cf92 100644 --- a/crates/vm/Cargo.toml +++ b/crates/vm/Cargo.toml @@ -50,7 +50,7 @@ openvm-native-compiler.workspace = true openvm-rv32im-transpiler.workspace = true [features] -default = ["parallel", "jemalloc", "tco"] +default = ["parallel", "jemalloc"] parallel = [ "openvm-stark-backend/parallel", "dashmap/rayon", diff --git a/crates/vm/derive/Cargo.toml b/crates/vm/derive/Cargo.toml index 2fb38626fa..f3fd65e2e9 100644 --- a/crates/vm/derive/Cargo.toml +++ b/crates/vm/derive/Cargo.toml @@ -14,3 +14,6 @@ syn = { version = "2.0", features = ["parsing", "full"] } quote = "1.0" proc-macro2 = "1.0" itertools = { workspace = true } + +[features] +tco = [] diff --git a/crates/vm/derive/src/lib.rs b/crates/vm/derive/src/lib.rs index 7b0dfa4eed..d5ad943699 100644 --- a/crates/vm/derive/src/lib.rs +++ b/crates/vm/derive/src/lib.rs @@ -9,6 +9,7 @@ use syn::{ GenericParam, Ident, Meta, Token, }; +#[cfg(feature = "tco")] mod tco; #[proc_macro_derive(PreflightExecutor)] @@ -772,7 +773,7 @@ fn parse_executor_type( /// /// Place this attribute above a function definition: /// ``` -/// #[create_tco_handler = "handler_name"] +/// #[create_tco_handler] /// unsafe fn execute_e1_impl( /// pre_compute: &[u8], /// state: &mut VmExecState, @@ -793,5 +794,12 @@ fn parse_executor_type( /// check. #[proc_macro_attribute] pub fn create_tco_handler(_attr: TokenStream, item: TokenStream) -> TokenStream { - tco::tco_impl(item) + #[cfg(feature = "tco")] + { + tco::tco_impl(item) + } + #[cfg(not(feature = "tco"))] + { + item + } } diff --git a/crates/vm/derive/src/tco.rs b/crates/vm/derive/src/tco.rs index 5fa5b848f1..a7e4d1e593 100644 --- a/crates/vm/derive/src/tco.rs +++ b/crates/vm/derive/src/tco.rs @@ -67,6 +67,9 @@ pub fn tco_impl(item: TokenStream) -> TokenStream { } let next_handler = next_handler.unwrap_unchecked(); + // NOTE: `become` is a keyword that requires Rust Nightly. + // It is part of the explicit tail calls RFC: + // which is still incomplete. become next_handler(interpreter, exec_state) } }; diff --git a/crates/vm/src/arch/interpreter.rs b/crates/vm/src/arch/interpreter.rs index bc0b8fe27c..101fa9cc31 100644 --- a/crates/vm/src/arch/interpreter.rs +++ b/crates/vm/src/arch/interpreter.rs @@ -50,7 +50,7 @@ pub struct InterpretedInstance<'a, F, Ctx> { pre_compute_max_size: usize, /// Handler function pointers for tail call optimization. #[cfg(feature = "tco")] - handlers: Vec>, // *const ()>, + handlers: Vec>, pc_base: u32, pc_start: u32, diff --git a/extensions/algebra/circuit/Cargo.toml b/extensions/algebra/circuit/Cargo.toml index 345c132d03..4e4e7f7357 100644 --- a/extensions/algebra/circuit/Cargo.toml +++ b/extensions/algebra/circuit/Cargo.toml @@ -7,11 +7,6 @@ edition.workspace = true homepage.workspace = true repository.workspace = true -[features] -default = ["jemalloc", "tco"] -tco = ["openvm-rv32im-circuit/tco"] -jemalloc = ["openvm-circuit/jemalloc"] - [dependencies] openvm-circuit-primitives = { workspace = true } openvm-circuit-primitives-derive = { workspace = true } @@ -43,5 +38,9 @@ openvm-rv32-adapters = { workspace = true, features = ["test-utils"] } openvm-pairing-guest = { workspace = true, features = ["halo2curves"] } test-case = { workspace = true } +[features] +default = [] +tco = ["openvm-rv32im-circuit/tco"] + [package.metadata.cargo-shear] ignored = ["derive_more"] diff --git a/extensions/algebra/circuit/src/execution.rs b/extensions/algebra/circuit/src/execution.rs index 0243c5bd5f..e626e08f59 100644 --- a/extensions/algebra/circuit/src/execution.rs +++ b/extensions/algebra/circuit/src/execution.rs @@ -336,6 +336,7 @@ impl( &self, chip_idx: usize, diff --git a/extensions/algebra/circuit/src/modular_chip/is_eq.rs b/extensions/algebra/circuit/src/modular_chip/is_eq.rs index 3a74aba763..9569fac4c0 100644 --- a/extensions/algebra/circuit/src/modular_chip/is_eq.rs +++ b/extensions/algebra/circuit/src/modular_chip/is_eq.rs @@ -598,6 +598,7 @@ where dispatch!(execute_e2_impl, is_setup) } + #[cfg(feature = "tco")] fn metered_handler( &self, chip_idx: usize, diff --git a/extensions/bigint/circuit/Cargo.toml b/extensions/bigint/circuit/Cargo.toml index 94e8ad0fab..a745dd33cc 100644 --- a/extensions/bigint/circuit/Cargo.toml +++ b/extensions/bigint/circuit/Cargo.toml @@ -33,7 +33,7 @@ test-case.workspace = true alloy-primitives = { version = "1.2.1" } [features] -default = ["parallel", "jemalloc", "tco"] +default = ["parallel", "jemalloc"] parallel = ["openvm-circuit/parallel"] test-utils = ["openvm-circuit/test-utils"] tco = ["openvm-rv32im-circuit/tco"] diff --git a/extensions/ecc/circuit/Cargo.toml b/extensions/ecc/circuit/Cargo.toml index 4962c1ca40..c6ed2f14e1 100644 --- a/extensions/ecc/circuit/Cargo.toml +++ b/extensions/ecc/circuit/Cargo.toml @@ -7,11 +7,6 @@ edition.workspace = true homepage.workspace = true repository.workspace = true -[features] -default = ["jemalloc", "tco"] -tco = ["openvm-algebra-circuit/tco"] -jemalloc = ["openvm-circuit/jemalloc"] - [dependencies] openvm-circuit-primitives = { workspace = true } openvm-circuit-derive = { workspace = true } @@ -44,5 +39,9 @@ openvm-circuit = { workspace = true, features = ["test-utils"] } openvm-rv32-adapters = { workspace = true, features = ["test-utils"] } lazy_static = { workspace = true } +[features] +default = [] +tco = ["openvm-algebra-circuit/tco"] + [package.metadata.cargo-shear] ignored = ["rand"] diff --git a/extensions/ecc/tests/Cargo.toml b/extensions/ecc/tests/Cargo.toml index 5f90e77fa4..7ce8df032c 100644 --- a/extensions/ecc/tests/Cargo.toml +++ b/extensions/ecc/tests/Cargo.toml @@ -28,3 +28,4 @@ halo2curves-axiom = { workspace = true } [features] default = ["parallel"] parallel = ["openvm-circuit/parallel"] +tco = ["openvm-ecc-circuit/tco"] diff --git a/extensions/keccak256/circuit/Cargo.toml b/extensions/keccak256/circuit/Cargo.toml index 177f055d73..6b603a88f9 100644 --- a/extensions/keccak256/circuit/Cargo.toml +++ b/extensions/keccak256/circuit/Cargo.toml @@ -34,7 +34,7 @@ openvm-circuit = { workspace = true, features = ["test-utils"] } hex.workspace = true [features] -default = ["parallel", "jemalloc", "tco"] +default = ["parallel", "jemalloc"] parallel = ["openvm-circuit/parallel"] test-utils = ["openvm-circuit/test-utils"] tco = ["openvm-rv32im-circuit/tco"] diff --git a/extensions/native/circuit/Cargo.toml b/extensions/native/circuit/Cargo.toml index da21b3b78f..8661cc9091 100644 --- a/extensions/native/circuit/Cargo.toml +++ b/extensions/native/circuit/Cargo.toml @@ -39,7 +39,7 @@ test-case = { workspace = true } test-log = { workspace = true } [features] -default = ["parallel", "jemalloc", "tco"] +default = ["parallel", "jemalloc"] tco = ["openvm-rv32im-circuit/tco"] jemalloc = ["openvm-circuit/jemalloc"] parallel = ["openvm-circuit/parallel"] diff --git a/extensions/native/circuit/src/branch_eq/execution.rs b/extensions/native/circuit/src/branch_eq/execution.rs index b344e09d4e..003ac378bd 100644 --- a/extensions/native/circuit/src/branch_eq/execution.rs +++ b/extensions/native/circuit/src/branch_eq/execution.rs @@ -158,7 +158,6 @@ where } #[cfg(feature = "tco")] - #[inline(always)] fn metered_handler( &self, chip_idx: usize, diff --git a/extensions/native/circuit/src/castf/execution.rs b/extensions/native/circuit/src/castf/execution.rs index d5b2446952..99d52913ad 100644 --- a/extensions/native/circuit/src/castf/execution.rs +++ b/extensions/native/circuit/src/castf/execution.rs @@ -121,7 +121,6 @@ where } #[cfg(feature = "tco")] - #[inline(always)] fn metered_handler( &self, chip_idx: usize, diff --git a/extensions/native/circuit/src/field_arithmetic/execution.rs b/extensions/native/circuit/src/field_arithmetic/execution.rs index 747d555e02..38c6453763 100644 --- a/extensions/native/circuit/src/field_arithmetic/execution.rs +++ b/extensions/native/circuit/src/field_arithmetic/execution.rs @@ -199,7 +199,6 @@ where } #[cfg(feature = "tco")] - #[inline(always)] fn metered_handler( &self, chip_idx: usize, diff --git a/extensions/native/circuit/src/field_extension/execution.rs b/extensions/native/circuit/src/field_extension/execution.rs index 0c566cc24a..2752a05c44 100644 --- a/extensions/native/circuit/src/field_extension/execution.rs +++ b/extensions/native/circuit/src/field_extension/execution.rs @@ -136,7 +136,6 @@ where } #[cfg(feature = "tco")] - #[inline(always)] fn metered_handler( &self, chip_idx: usize, diff --git a/extensions/native/circuit/src/fri/execution.rs b/extensions/native/circuit/src/fri/execution.rs index 4152b9c50c..13297cc260 100644 --- a/extensions/native/circuit/src/fri/execution.rs +++ b/extensions/native/circuit/src/fri/execution.rs @@ -132,7 +132,6 @@ where } #[cfg(feature = "tco")] - #[inline(always)] fn metered_handler( &self, chip_idx: usize, diff --git a/extensions/native/circuit/src/jal_rangecheck/execution.rs b/extensions/native/circuit/src/jal_rangecheck/execution.rs index 2ca2b0cfba..296aa536af 100644 --- a/extensions/native/circuit/src/jal_rangecheck/execution.rs +++ b/extensions/native/circuit/src/jal_rangecheck/execution.rs @@ -173,7 +173,6 @@ where } #[cfg(feature = "tco")] - #[inline(always)] fn metered_handler( &self, chip_idx: usize, diff --git a/extensions/native/circuit/src/loadstore/execution.rs b/extensions/native/circuit/src/loadstore/execution.rs index c9faac15a3..b0b3e4b726 100644 --- a/extensions/native/circuit/src/loadstore/execution.rs +++ b/extensions/native/circuit/src/loadstore/execution.rs @@ -138,7 +138,6 @@ where } #[cfg(feature = "tco")] - #[inline(always)] fn metered_handler( &self, chip_idx: usize, diff --git a/extensions/native/circuit/src/poseidon2/execution.rs b/extensions/native/circuit/src/poseidon2/execution.rs index bed8ac40cc..d6739ff133 100644 --- a/extensions/native/circuit/src/poseidon2/execution.rs +++ b/extensions/native/circuit/src/poseidon2/execution.rs @@ -247,7 +247,6 @@ impl MeteredExecutor } #[cfg(feature = "tco")] - #[inline(always)] fn metered_handler( &self, chip_idx: usize, diff --git a/extensions/pairing/circuit/Cargo.toml b/extensions/pairing/circuit/Cargo.toml index 4201c0fedd..46565dd58e 100644 --- a/extensions/pairing/circuit/Cargo.toml +++ b/extensions/pairing/circuit/Cargo.toml @@ -7,11 +7,6 @@ edition.workspace = true homepage.workspace = true repository.workspace = true -[features] -default = ["jemalloc", "tco"] -tco = ["openvm-rv32im-circuit/tco", "openvm-ecc-circuit/tco"] -jemalloc = ["openvm-circuit/jemalloc"] - [dependencies] openvm-circuit-primitives = { workspace = true } openvm-circuit-derive = { workspace = true } @@ -54,3 +49,7 @@ openvm-pairing-guest = { workspace = true, features = [ "bls12_381", "bn254", ] } + +[features] +default = [] +tco = ["openvm-rv32im-circuit/tco", "openvm-ecc-circuit/tco"] diff --git a/extensions/rv32im/circuit/Cargo.toml b/extensions/rv32im/circuit/Cargo.toml index b073479fa8..1b6d021f35 100644 --- a/extensions/rv32im/circuit/Cargo.toml +++ b/extensions/rv32im/circuit/Cargo.toml @@ -33,7 +33,7 @@ openvm-circuit = { workspace = true, features = ["test-utils"] } test-case.workspace = true [features] -default = ["parallel", "jemalloc", "tco"] +default = ["parallel", "jemalloc"] parallel = ["openvm-circuit/parallel"] test-utils = ["openvm-circuit/test-utils", "dep:openvm-stark-sdk"] tco = ["openvm-circuit/tco"] diff --git a/extensions/rv32im/tests/Cargo.toml b/extensions/rv32im/tests/Cargo.toml index 372538f791..412c8cffb0 100644 --- a/extensions/rv32im/tests/Cargo.toml +++ b/extensions/rv32im/tests/Cargo.toml @@ -23,6 +23,6 @@ serde = { workspace = true, features = ["alloc"] } strum.workspace = true [features] -default = ["parallel", "tco"] +default = ["parallel"] parallel = ["openvm-circuit/parallel"] tco = ["openvm-rv32im-circuit/tco"] diff --git a/extensions/sha256/circuit/Cargo.toml b/extensions/sha256/circuit/Cargo.toml index e677dc87a3..5cdfb143c0 100644 --- a/extensions/sha256/circuit/Cargo.toml +++ b/extensions/sha256/circuit/Cargo.toml @@ -29,10 +29,10 @@ openvm-stark-sdk = { workspace = true } openvm-circuit = { workspace = true, features = ["test-utils"] } [features] -default = ["parallel", "jemalloc", "tco"] -tco = ["openvm-rv32im-circuit/tco"] +default = ["parallel", "jemalloc"] parallel = ["openvm-circuit/parallel"] test-utils = ["openvm-circuit/test-utils"] +tco = ["openvm-rv32im-circuit/tco"] # performance features: mimalloc = ["openvm-circuit/mimalloc"] jemalloc = ["openvm-circuit/jemalloc"] diff --git a/rust-toolchain.toml b/rust-toolchain.toml index a3c5cc1709..651e7fa7e6 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,5 +1,5 @@ [toolchain] -# channel = "1.86.0" +channel = "1.86.0" # To use the "tco" feature, switch to Rust nightly: -channel = "nightly-2025-08-19" +# channel = "nightly-2025-08-19" components = ["clippy", "rustfmt"] From 09e05c9643b081d3fb2de551e8a963d98a56a0ba Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 22:51:42 -0700 Subject: [PATCH 23/36] ci: switch benchmarks to use tco --- .github/workflows/benchmark-call.yml | 2 +- .github/workflows/benchmarks-execute.yml | 9 +++++---- ci/scripts/bench.py | 5 ++++- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/workflows/benchmark-call.yml b/.github/workflows/benchmark-call.yml index 71aa9bc54a..7ad9687a01 100644 --- a/.github/workflows/benchmark-call.yml +++ b/.github/workflows/benchmark-call.yml @@ -107,7 +107,7 @@ on: env: S3_METRICS_PATH: s3://openvm-public-data-sandbox-us-east-1/benchmark/github/metrics S3_FLAMEGRAPHS_PATH: s3://openvm-public-data-sandbox-us-east-1/benchmark/github/flamegraphs - FEATURE_FLAGS: "metrics,parallel,nightly-features" + FEATURE_FLAGS: "metrics,parallel,nightly-features,tco" INPUT_ARGS: "" CARGO_NET_GIT_FETCH_WITH_CLI: "true" diff --git a/.github/workflows/benchmarks-execute.yml b/.github/workflows/benchmarks-execute.yml index 11f94fd411..17273fc7af 100644 --- a/.github/workflows/benchmarks-execute.yml +++ b/.github/workflows/benchmarks-execute.yml @@ -27,6 +27,7 @@ env: CARGO_TERM_COLOR: always S3_FIXTURES_PATH: s3://openvm-public-data-sandbox-us-east-1/benchmark/fixtures JEMALLOC_SYS_WITH_MALLOC_CONF: "retain:true,background_thread:true,metadata_thp:always,thp:always,dirty_decay_ms:10000,muzzy_decay_ms:10000,abort_conf:true" + TOOLCHAIN: "+nightly-2025-08-19" jobs: codspeed-walltime-benchmarks: @@ -65,12 +66,12 @@ jobs: - name: Build benchmarks working-directory: benchmarks/execute - run: cargo codspeed build --profile maxperf + run: cargo $TOOLCHAIN codspeed build --profile maxperf - name: Run benchmarks uses: CodSpeedHQ/action@v3 with: working-directory: benchmarks/execute - run: cargo codspeed run + run: cargo $TOOLCHAIN codspeed run token: ${{ secrets.CODSPEED_TOKEN }} codspeed-instrumentation-benchmarks: @@ -110,10 +111,10 @@ jobs: - name: Build benchmarks working-directory: benchmarks/execute - run: cargo codspeed build + run: cargo $TOOLCHAIN codspeed build - name: Run benchmarks uses: CodSpeedHQ/action@v3 with: working-directory: benchmarks/execute - run: cargo codspeed run + run: cargo $TOOLCHAIN codspeed run token: ${{ secrets.CODSPEED_TOKEN }} diff --git a/ci/scripts/bench.py b/ci/scripts/bench.py index 9bf87f622f..8584999d48 100644 --- a/ci/scripts/bench.py +++ b/ci/scripts/bench.py @@ -15,9 +15,12 @@ def run_cargo_command( kzg_params_dir, profile="release" ): + toolchain = "+1.86" + if "tco" in feature_flags: + toolchain = "+nightly-2025-08-19" # Command to run (for best performance but slower builds, use --profile maxperf) command = [ - "cargo", "run", "--no-default-features", "-p", "openvm-benchmarks-prove", "--bin", bin_name, "--profile", profile, "--features", ",".join(feature_flags), "--" + "cargo", toolchain, "run", "--no-default-features", "-p", "openvm-benchmarks-prove", "--bin", bin_name, "--profile", profile, "--features", ",".join(feature_flags), "--" ] if app_log_blowup is not None: From fae54536248088c1e9a234d4a8df53bf84488e77 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 23:08:15 -0700 Subject: [PATCH 24/36] fix: proc-macro also needs tco feature --- crates/vm/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/vm/Cargo.toml b/crates/vm/Cargo.toml index 8e7517cf92..7cd7e2ca26 100644 --- a/crates/vm/Cargo.toml +++ b/crates/vm/Cargo.toml @@ -70,7 +70,7 @@ stark-debug = [] test-utils = ["openvm-stark-sdk"] # Tail call optimizations. This requires nightly for the `become` keyword (https://github.com/rust-lang/rust/pull/144232). # However tail call elimination is still an incomplete feature in Rust, so the `tco` feature remains experimental until then. -tco = [] +tco = ["openvm-circuit-derive/tco"] # performance features: mimalloc = ["openvm-stark-backend/mimalloc"] jemalloc = ["openvm-stark-backend/jemalloc"] From a4566a7beb7e9b2d11d6f45e8207429aaebcde60 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 23:14:21 -0700 Subject: [PATCH 25/36] chore: lint --- benchmarks/prove/Cargo.toml | 2 +- crates/vm/derive/src/lib.rs | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/benchmarks/prove/Cargo.toml b/benchmarks/prove/Cargo.toml index b7ee8ee3ac..786be53ae3 100644 --- a/benchmarks/prove/Cargo.toml +++ b/benchmarks/prove/Cargo.toml @@ -33,7 +33,7 @@ metrics.workspace = true [dev-dependencies] [features] -default = ["parallel", "jemalloc", "metrics", "evm"] +default = ["parallel", "jemalloc", "metrics"] metrics = ["openvm-sdk/metrics"] tco = ["openvm-sdk/tco"] perf-metrics = ["openvm-sdk/perf-metrics", "metrics"] diff --git a/crates/vm/derive/src/lib.rs b/crates/vm/derive/src/lib.rs index d5ad943699..5a252678bf 100644 --- a/crates/vm/derive/src/lib.rs +++ b/crates/vm/derive/src/lib.rs @@ -563,7 +563,6 @@ fn generate_config_traits_impl(name: &Ident, inner: &DataStruct) -> syn::Result< .iter() .filter(|f| f.attrs.iter().any(|attr| attr.path().is_ident("config"))) .exactly_one() - .ok() .expect("Exactly one field must have the #[config] attribute"); let (source_name, source_name_upper) = gen_name_with_uppercase_idents(source_field.ident.as_ref().unwrap()); From ce6981c131c9eb7c267adc98d81c641f19724953 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Wed, 20 Aug 2025 23:36:33 -0700 Subject: [PATCH 26/36] ci: benchmarks with tco feature --- .github/workflows/benchmarks-execute.yml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/benchmarks-execute.yml b/.github/workflows/benchmarks-execute.yml index 17273fc7af..739f68cc8c 100644 --- a/.github/workflows/benchmarks-execute.yml +++ b/.github/workflows/benchmarks-execute.yml @@ -2,8 +2,7 @@ name: "Execution benchmarks" on: push: - # TODO(ayush): remove after feat/new-execution is merged - branches: ["main", "feat/new-execution"] + branches: ["main"] pull_request: types: [opened, synchronize, reopened, labeled] branches: ["**"] @@ -66,12 +65,12 @@ jobs: - name: Build benchmarks working-directory: benchmarks/execute - run: cargo $TOOLCHAIN codspeed build --profile maxperf + run: cargo $TOOLCHAIN codspeed build --profile maxperf --features tco - name: Run benchmarks uses: CodSpeedHQ/action@v3 with: working-directory: benchmarks/execute - run: cargo $TOOLCHAIN codspeed run + run: cargo $TOOLCHAIN codspeed run --features tco token: ${{ secrets.CODSPEED_TOKEN }} codspeed-instrumentation-benchmarks: @@ -111,10 +110,10 @@ jobs: - name: Build benchmarks working-directory: benchmarks/execute - run: cargo $TOOLCHAIN codspeed build + run: cargo $TOOLCHAIN codspeed build --features tco - name: Run benchmarks uses: CodSpeedHQ/action@v3 with: working-directory: benchmarks/execute - run: cargo $TOOLCHAIN codspeed run + run: cargo $TOOLCHAIN codspeed run --features tco token: ${{ secrets.CODSPEED_TOKEN }} From 6759a94f98efbb7240e9d0e0b1847871ffdac1fb Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Thu, 21 Aug 2025 11:17:24 -0700 Subject: [PATCH 27/36] feat: use macro's "tco" feature --- crates/vm/derive/src/lib.rs | 146 +++++++++++++++++++++++------------- crates/vm/derive/src/tco.rs | 1 - 2 files changed, 93 insertions(+), 54 deletions(-) diff --git a/crates/vm/derive/src/lib.rs b/crates/vm/derive/src/lib.rs index 5a252678bf..33051b3532 100644 --- a/crates/vm/derive/src/lib.rs +++ b/crates/vm/derive/src/lib.rs @@ -158,6 +158,25 @@ pub fn executor_derive(input: TokenStream) -> TokenStream { where_clause .predicates .push(syn::parse_quote! { #inner_ty: ::openvm_circuit::arch::Executor }); + + // We use the macro's feature to decide whether to generate the impl or not. This avoids + // the target crate needing the "tco" feature defined. + #[cfg(feature = "tco")] + let handler = quote! { + fn handler( + &self, + pc: u32, + inst: &::openvm_circuit::arch::instructions::instruction::Instruction, + data: &mut [u8], + ) -> Result<::openvm_circuit::arch::Handler, ::openvm_circuit::arch::StaticProgramError> + where + Ctx: ::openvm_circuit::arch::execution_mode::ExecutionCtxTrait, { + self.0.handler(pc, inst, data) + } + }; + #[cfg(not(feature = "tco"))] + let handler = quote! {}; + quote! { impl #impl_generics ::openvm_circuit::arch::Executor for #name #ty_generics #where_clause { #[inline(always)] @@ -176,17 +195,7 @@ pub fn executor_derive(input: TokenStream) -> TokenStream { self.0.pre_compute(pc, inst, data) } - #[cfg(feature = "tco")] - fn handler( - &self, - pc: u32, - inst: &::openvm_circuit::arch::instructions::instruction::Instruction, - data: &mut [u8], - ) -> Result<::openvm_circuit::arch::Handler, ::openvm_circuit::arch::StaticProgramError> - where - Ctx: ::openvm_circuit::arch::execution_mode::ExecutionCtxTrait, { - self.0.handler(pc, inst, data) - } + #handler } } .into() @@ -220,7 +229,7 @@ pub fn executor_derive(input: TokenStream) -> TokenStream { }); // Use full path ::openvm_circuit... so it can be used either within or outside the vm // crate. Assume F is already generic of the field. - let (pre_compute_size_arms, pre_compute_arms, handler_arms, where_predicates): (Vec<_>, Vec<_>, Vec<_>, Vec<_>) = multiunzip(variants.iter().map(|(variant_name, field)| { + let (pre_compute_size_arms, pre_compute_arms, _handler_arms, where_predicates): (Vec<_>, Vec<_>, Vec<_>, Vec<_>) = multiunzip(variants.iter().map(|(variant_name, field)| { let field_ty = &field.ty; let pre_compute_size_arm = quote! { #name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::Executor<#first_ty_generic>>::pre_compute_size(x) @@ -240,6 +249,26 @@ pub fn executor_derive(input: TokenStream) -> TokenStream { for predicate in where_predicates { where_clause.predicates.push(predicate); } + // We use the macro's feature to decide whether to generate the impl or not. This avoids + // the target crate needing the "tco" feature defined. + #[cfg(feature = "tco")] + let handler = quote! { + fn handler( + &self, + pc: u32, + instruction: &::openvm_circuit::arch::instructions::instruction::Instruction, + data: &mut [u8], + ) -> Result<::openvm_circuit::arch::Handler, ::openvm_circuit::arch::StaticProgramError> + where + Ctx: ::openvm_circuit::arch::execution_mode::ExecutionCtxTrait, { + match self { + #(#_handler_arms,)* + } + } + }; + #[cfg(not(feature = "tco"))] + let handler = quote! {}; + // Don't use these ty_generics because it might have extra "F" let (impl_generics, _, where_clause) = new_generics.split_for_impl(); @@ -266,19 +295,7 @@ pub fn executor_derive(input: TokenStream) -> TokenStream { } } - #[cfg(feature = "tco")] - fn handler( - &self, - pc: u32, - instruction: &::openvm_circuit::arch::instructions::instruction::Instruction, - data: &mut [u8], - ) -> Result<::openvm_circuit::arch::Handler, ::openvm_circuit::arch::StaticProgramError> - where - Ctx: ::openvm_circuit::arch::execution_mode::ExecutionCtxTrait, { - match self { - #(#handler_arms,)* - } - } + #handler } } .into() @@ -314,6 +331,26 @@ pub fn metered_executor_derive(input: TokenStream) -> TokenStream { where_clause .predicates .push(syn::parse_quote! { #inner_ty: ::openvm_circuit::arch::MeteredExecutor }); + + // We use the macro's feature to decide whether to generate the impl or not. This avoids + // the target crate needing the "tco" feature defined. + #[cfg(feature = "tco")] + let metered_handler = quote! { + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + inst: &::openvm_circuit::arch::instructions::instruction::Instruction, + data: &mut [u8], + ) -> Result<::openvm_circuit::arch::Handler, ::openvm_circuit::arch::StaticProgramError> + where + Ctx: ::openvm_circuit::arch::execution_mode::MeteredExecutionCtxTrait, { + self.0.metered_handler(chip_idx, pc, inst, data) + } + }; + #[cfg(not(feature = "tco"))] + let metered_handler = quote! {}; + quote! { impl #impl_generics ::openvm_circuit::arch::MeteredExecutor for #name #ty_generics #where_clause { #[inline(always)] @@ -332,18 +369,7 @@ pub fn metered_executor_derive(input: TokenStream) -> TokenStream { Ctx: ::openvm_circuit::arch::execution_mode::MeteredExecutionCtxTrait, { self.0.metered_pre_compute(chip_idx, pc, inst, data) } - #[cfg(feature = "tco")] - fn metered_handler( - &self, - chip_idx: usize, - pc: u32, - inst: &::openvm_circuit::arch::instructions::instruction::Instruction, - data: &mut [u8], - ) -> Result<::openvm_circuit::arch::Handler, ::openvm_circuit::arch::StaticProgramError> - where - Ctx: ::openvm_circuit::arch::execution_mode::MeteredExecutionCtxTrait, { - self.0.metered_handler(chip_idx, pc, inst, data) - } + #metered_handler } } .into() @@ -377,7 +403,7 @@ pub fn metered_executor_derive(input: TokenStream) -> TokenStream { }); // Use full path ::openvm_circuit... so it can be used either within or outside the vm // crate. Assume F is already generic of the field. - let (pre_compute_size_arms, metered_pre_compute_arms, metered_handler_arms, where_predicates): (Vec<_>, Vec<_>, Vec<_>, Vec<_>) = multiunzip(variants.iter().map(|(variant_name, field)| { + let (pre_compute_size_arms, metered_pre_compute_arms, _metered_handler_arms, where_predicates): (Vec<_>, Vec<_>, Vec<_>, Vec<_>) = multiunzip(variants.iter().map(|(variant_name, field)| { let field_ty = &field.ty; let pre_compute_size_arm = quote! { #name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::MeteredExecutor<#first_ty_generic>>::metered_pre_compute_size(x) @@ -400,6 +426,28 @@ pub fn metered_executor_derive(input: TokenStream) -> TokenStream { // Don't use these ty_generics because it might have extra "F" let (impl_generics, _, where_clause) = new_generics.split_for_impl(); + // We use the macro's feature to decide whether to generate the impl or not. This avoids + // the target crate needing the "tco" feature defined. + #[cfg(feature = "tco")] + let metered_handler = quote! { + fn metered_handler( + &self, + chip_idx: usize, + pc: u32, + instruction: &::openvm_circuit::arch::instructions::instruction::Instruction, + data: &mut [u8], + ) -> Result<::openvm_circuit::arch::Handler, ::openvm_circuit::arch::StaticProgramError> + where + Ctx: ::openvm_circuit::arch::execution_mode::MeteredExecutionCtxTrait, + { + match self { + #(#_metered_handler_arms,)* + } + } + }; + #[cfg(not(feature = "tco"))] + let metered_handler = quote! {}; + quote! { impl #impl_generics ::openvm_circuit::arch::MeteredExecutor<#first_ty_generic> for #name #ty_generics #where_clause { #[inline(always)] @@ -424,20 +472,7 @@ pub fn metered_executor_derive(input: TokenStream) -> TokenStream { } } - #[cfg(feature = "tco")] - fn metered_handler( - &self, - chip_idx: usize, - pc: u32, - instruction: &::openvm_circuit::arch::instructions::instruction::Instruction, - data: &mut [u8], - ) -> Result<::openvm_circuit::arch::Handler, ::openvm_circuit::arch::StaticProgramError> - where - Ctx: ::openvm_circuit::arch::execution_mode::MeteredExecutionCtxTrait, { - match self { - #(#metered_handler_arms,)* - } - } + #metered_handler } } .into() @@ -563,7 +598,12 @@ fn generate_config_traits_impl(name: &Ident, inner: &DataStruct) -> syn::Result< .iter() .filter(|f| f.attrs.iter().any(|attr| attr.path().is_ident("config"))) .exactly_one() - .expect("Exactly one field must have the #[config] attribute"); + .map_err(|_| { + syn::Error::new( + name.span(), + "Exactly one field must have the #[config] attribute", + ) + })?; let (source_name, source_name_upper) = gen_name_with_uppercase_idents(source_field.ident.as_ref().unwrap()); diff --git a/crates/vm/derive/src/tco.rs b/crates/vm/derive/src/tco.rs index a7e4d1e593..b01d67b6a7 100644 --- a/crates/vm/derive/src/tco.rs +++ b/crates/vm/derive/src/tco.rs @@ -37,7 +37,6 @@ pub fn tco_impl(item: TokenStream) -> TokenStream { // Generate the TCO handler function let handler_fn = quote! { - #[cfg(feature = "tco")] #[inline(never)] unsafe fn #handler_name #handler_generics ( interpreter: &::openvm_circuit::arch::interpreter::InterpretedInstance<#f_type, #ctx_type>, From 68b12f502e683e6ed9538006635b09722fd27504 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Thu, 21 Aug 2025 11:36:19 -0700 Subject: [PATCH 28/36] chore: add instrumentation for VmState::initial --- crates/vm/src/arch/state.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/vm/src/arch/state.rs b/crates/vm/src/arch/state.rs index ae65844fa6..4dd93e5bb4 100644 --- a/crates/vm/src/arch/state.rs +++ b/crates/vm/src/arch/state.rs @@ -6,6 +6,7 @@ use std::{ use getset::WithSetters; use openvm_instructions::exe::SparseMemoryImage; use rand::{rngs::StdRng, SeedableRng}; +use tracing::instrument; use super::{create_memory_image, ExecutionError, Streams}; #[cfg(feature = "metrics")] @@ -54,6 +55,7 @@ impl VmState { } impl VmState { + #[instrument(name = "VmState::initial", level = "debug", skip_all)] pub fn initial( system_config: &SystemConfig, init_memory: &SparseMemoryImage, From d265fe9ed4c9d91eeae5f1fd893d8ae83006f1e2 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Thu, 21 Aug 2025 12:03:40 -0700 Subject: [PATCH 29/36] chore: cargo shear --- Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index fe477f1dbc..e34754d7b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -229,7 +229,6 @@ dashmap = "6.1.0" memmap2 = "0.9.5" libc = "0.2.175" tracing-subscriber = { version = "0.3.17", features = ["std", "env-filter"] } -paste = "1.0.15" # default-features = false for no_std for use in guest programs itertools = { version = "0.14.0", default-features = false } From 64c05716c683c8c78b6de995a5dbb6621724deaf Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Thu, 21 Aug 2025 15:11:51 -0700 Subject: [PATCH 30/36] fix: ci --- .github/workflows/benchmarks-execute.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/benchmarks-execute.yml b/.github/workflows/benchmarks-execute.yml index 68a7f96896..5108c7d48d 100644 --- a/.github/workflows/benchmarks-execute.yml +++ b/.github/workflows/benchmarks-execute.yml @@ -71,7 +71,7 @@ jobs: uses: CodSpeedHQ/action@v3 with: working-directory: benchmarks/execute - run: cargo $TOOLCHAIN codspeed run --features tco + run: cargo $TOOLCHAIN codspeed run token: ${{ secrets.CODSPEED_TOKEN }} codspeed-instrumentation-benchmarks: @@ -116,5 +116,5 @@ jobs: uses: CodSpeedHQ/action@v3 with: working-directory: benchmarks/execute - run: cargo $TOOLCHAIN codspeed run --features tco + run: cargo $TOOLCHAIN codspeed run token: ${{ secrets.CODSPEED_TOKEN }} From ce7c03724721fb8eef6e25ec990b9da1567bf742 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Thu, 21 Aug 2025 19:27:55 -0700 Subject: [PATCH 31/36] perf: remove pc_base from pc_idx calc --- crates/vm/derive/src/tco.rs | 4 +- crates/vm/src/arch/execution.rs | 8 +- crates/vm/src/arch/interpreter.rs | 119 ++++++++------------ crates/vm/src/arch/interpreter_preflight.rs | 28 ++--- 4 files changed, 70 insertions(+), 89 deletions(-) diff --git a/crates/vm/derive/src/tco.rs b/crates/vm/derive/src/tco.rs index b01d67b6a7..9019acd1e6 100644 --- a/crates/vm/derive/src/tco.rs +++ b/crates/vm/derive/src/tco.rs @@ -48,6 +48,8 @@ pub fn tco_impl(item: TokenStream) -> TokenStream { ) #where_clause { + use ::openvm_circuit::arch::ExecutionError; + let pre_compute = interpreter.get_pre_compute(exec_state.vm_state.pc); #execute_call; @@ -61,7 +63,7 @@ pub fn tco_impl(item: TokenStream) -> TokenStream { // exec_state.pc should have been updated by execute_impl at this point let next_handler = interpreter.get_handler(exec_state.vm_state.pc); if next_handler.is_none() { - exec_state.exit_code = Err(interpreter.pc_out_of_bounds_err(exec_state.vm_state.pc)); + exec_state.exit_code = Err(ExecutionError::PcOutOfBounds (exec_state.vm_state.pc)); return; } let next_handler = next_handler.unwrap_unchecked(); diff --git a/crates/vm/src/arch/execution.rs b/crates/vm/src/arch/execution.rs index 0eb67c8305..22aeaeeadd 100644 --- a/crates/vm/src/arch/execution.rs +++ b/crates/vm/src/arch/execution.rs @@ -28,12 +28,8 @@ use crate::{ pub enum ExecutionError { #[error("execution failed at pc {pc}, err: {msg}")] Fail { pc: u32, msg: &'static str }, - #[error("pc {pc} out of bounds for program of length {program_len}, with pc_base {pc_base}")] - PcOutOfBounds { - pc: u32, - pc_base: u32, - program_len: usize, - }, + #[error("pc {0} out of bounds")] + PcOutOfBounds(u32), #[error("unreachable instruction at pc {0}")] Unreachable(u32), #[error("at pc {pc}, opcode {opcode} was not enabled")] diff --git a/crates/vm/src/arch/interpreter.rs b/crates/vm/src/arch/interpreter.rs index 5ae8d36541..d1b160044f 100644 --- a/crates/vm/src/arch/interpreter.rs +++ b/crates/vm/src/arch/interpreter.rs @@ -1,6 +1,7 @@ use std::{ alloc::{alloc, dealloc, handle_alloc_error, Layout}, borrow::{Borrow, BorrowMut}, + iter::repeat_n, ptr::NonNull, }; @@ -44,7 +45,9 @@ pub struct InterpretedInstance<'a, F, Ctx> { #[allow(dead_code)] pre_compute_buf: AlignedBuf, /// Instruction table of function pointers and pointers to the pre-computed buffer. Indexed by - /// `pc_index = (pc - pc_base) / DEFAULT_PC_STEP`. + /// `pc_index = pc / DEFAULT_PC_STEP`. + /// SAFETY: The first `pc_base / DEFAULT_PC_STEP` entries will be unreachable. We do this to + /// avoid needing to subtract `pc_base` during runtime. pre_compute_insns: Vec>, #[cfg(feature = "tco")] pre_compute_max_size: usize, @@ -52,7 +55,6 @@ pub struct InterpretedInstance<'a, F, Ctx> { #[cfg(feature = "tco")] handlers: Vec>, - pc_base: u32, pc_start: u32, init_memory: SparseMemoryImage, @@ -84,22 +86,14 @@ macro_rules! run { #[cfg(not(feature = "tco"))] unsafe { tracing::debug!("execute_trampoline"); - execute_trampoline( - $interpreter.pc_base, - &mut $exec_state, - &$interpreter.pre_compute_insns, - ); + execute_trampoline(&mut $exec_state, &$interpreter.pre_compute_insns); } #[cfg(feature = "tco")] { tracing::debug!("execute_tco"); - let handler = $interpreter.get_handler($exec_state.pc).ok_or( - ExecutionError::PcOutOfBounds { - pc: $exec_state.pc, - pc_base: $interpreter.pc_base, - program_len: $interpreter.handlers.len(), - }, - )?; + let handler = $interpreter + .get_handler($exec_state.pc) + .ok_or(ExecutionError::PcOutOfBounds($exec_state.pc))?; // SAFETY: // - handler is generated by Executor, MeteredExecutor traits // - it is the responsibility of each Executor to ensure handler is safe given a @@ -151,7 +145,7 @@ where { let program = &exe.program; let pre_compute_max_size = get_pre_compute_max_size(program, inventory); - let mut pre_compute_buf = alloc_pre_compute_buf(program.len(), pre_compute_max_size); + let mut pre_compute_buf = alloc_pre_compute_buf(program, pre_compute_max_size); let mut split_pre_compute_buf = split_pre_compute_buf(program, &mut pre_compute_buf, pre_compute_max_size); let pre_compute_insns = get_pre_compute_instructions::( @@ -159,19 +153,17 @@ where inventory, &mut split_pre_compute_buf, )?; - let pc_base = program.pc_base; let pc_start = exe.pc_start; let init_memory = exe.init_memory.clone(); #[cfg(feature = "tco")] - let handlers = program - .instructions_and_debug_infos - .iter() + let handlers = repeat_n(&None, get_pc_index(program.pc_base)) + .chain(program.instructions_and_debug_infos.iter()) .zip_eq(split_pre_compute_buf.iter_mut()) .enumerate() .map( |(pc_idx, (inst_opt, pre_compute))| -> Result, StaticProgramError> { if let Some((inst, _)) = inst_opt { - let pc = pc_base + pc_idx as u32 * DEFAULT_PC_STEP; + let pc = pc_idx as u32 * DEFAULT_PC_STEP; if get_system_opcode_handler::(inst, pre_compute).is_some() { Ok(terminate_execute_e12_tco_handler) } else { @@ -191,7 +183,6 @@ where system_config: inventory.config().clone(), pre_compute_buf, pre_compute_insns, - pc_base, pc_start, init_memory, #[cfg(feature = "tco")] @@ -209,7 +200,7 @@ where #[cfg(feature = "tco")] #[inline(always)] pub fn get_pre_compute(&self, pc: u32) -> &[u8] { - let pc_idx = get_pc_index(self.pc_base, pc); + let pc_idx = get_pc_index(pc); // SAFETY: // - we assume that pc is in bounds // - pre_compute_buf is allocated for pre_compute_max_size * program_len bytes, with each @@ -228,18 +219,10 @@ where } } - pub fn pc_out_of_bounds_err(&self, pc: u32) -> ExecutionError { - ExecutionError::PcOutOfBounds { - pc, - pc_base: self.pc_base, - program_len: self.pre_compute_insns.len(), - } - } - #[cfg(feature = "tco")] #[inline(always)] pub fn get_handler(&self, pc: u32) -> Option> { - let pc_idx = get_pc_index(self.pc_base, pc); + let pc_idx = get_pc_index(pc); self.handlers.get(pc_idx).copied() } } @@ -261,7 +244,7 @@ where { let program = &exe.program; let pre_compute_max_size = get_metered_pre_compute_max_size(program, inventory); - let mut pre_compute_buf = alloc_pre_compute_buf(program.len(), pre_compute_max_size); + let mut pre_compute_buf = alloc_pre_compute_buf(program, pre_compute_max_size); let mut split_pre_compute_buf = split_pre_compute_buf(program, &mut pre_compute_buf, pre_compute_max_size); let pre_compute_insns = get_metered_pre_compute_instructions::( @@ -271,19 +254,17 @@ where &mut split_pre_compute_buf, )?; - let pc_base = program.pc_base; let pc_start = exe.pc_start; let init_memory = exe.init_memory.clone(); #[cfg(feature = "tco")] - let handlers = program - .instructions_and_debug_infos - .iter() + let handlers = repeat_n(&None, get_pc_index(program.pc_base)) + .chain(program.instructions_and_debug_infos.iter()) .zip_eq(split_pre_compute_buf.iter_mut()) .enumerate() .map( |(pc_idx, (inst_opt, pre_compute))| -> Result, StaticProgramError> { if let Some((inst, _)) = inst_opt { - let pc = pc_base + pc_idx as u32 * DEFAULT_PC_STEP; + let pc = pc_idx as u32 * DEFAULT_PC_STEP; if get_system_opcode_handler::(inst, pre_compute).is_some() { Ok(terminate_execute_e12_tco_handler) } else { @@ -305,7 +286,6 @@ where system_config: inventory.config().clone(), pre_compute_buf, pre_compute_insns, - pc_base, pc_start, init_memory, #[cfg(feature = "tco")] @@ -448,8 +428,10 @@ where } } -fn alloc_pre_compute_buf(program_len: usize, pre_compute_max_size: usize) -> AlignedBuf { - let buf_len = program_len * pre_compute_max_size; +fn alloc_pre_compute_buf(program: &Program, pre_compute_max_size: usize) -> AlignedBuf { + let base_idx = get_pc_index(program.pc_base); + let padded_program_len = base_idx + program.instructions_and_debug_infos.len(); + let buf_len = padded_program_len * pre_compute_max_size; AlignedBuf::uninit(buf_len, pre_compute_max_size) } @@ -458,8 +440,9 @@ fn split_pre_compute_buf<'a, F>( pre_compute_buf: &'a mut AlignedBuf, pre_compute_max_size: usize, ) -> Vec<&'a mut [u8]> { - let program_len = program.instructions_and_debug_infos.len(); - let buf_len = program_len * pre_compute_max_size; + let base_idx = get_pc_index(program.pc_base); + let padded_program_len = base_idx + program.instructions_and_debug_infos.len(); + let buf_len = padded_program_len * pre_compute_max_size; // SAFETY: // - pre_compute_buf.ptr was allocated with exactly buf_len bytes // - lifetime 'a ensures the returned slices don't outlive the AlignedBuf @@ -475,7 +458,6 @@ fn split_pre_compute_buf<'a, F>( /// The `fn_ptrs` pointer to pre-computed buffers that outlive this function. #[inline(always)] unsafe fn execute_trampoline( - pc_base: u32, vm_state: &mut VmExecState, fn_ptrs: &[PreComputeInstruction], ) { @@ -487,16 +469,12 @@ unsafe fn execute_trampoline( if Ctx::should_suspend(vm_state) { break; } - let pc_index = get_pc_index(pc_base, vm_state.pc); + let pc_index = get_pc_index(vm_state.pc); if let Some(inst) = fn_ptrs.get(pc_index) { // SAFETY: pre_compute assumed to live long enough unsafe { (inst.handler)(inst.pre_compute, vm_state) }; } else { - vm_state.exit_code = Err(ExecutionError::PcOutOfBounds { - pc: vm_state.pc, - pc_base, - program_len: fn_ptrs.len(), - }); + vm_state.exit_code = Err(ExecutionError::PcOutOfBounds(vm_state.pc)); } } if vm_state @@ -509,8 +487,8 @@ unsafe fn execute_trampoline( } #[inline(always)] -pub fn get_pc_index(pc_base: u32, pc: u32) -> usize { - ((pc - pc_base) / DEFAULT_PC_STEP) as usize +pub fn get_pc_index(pc: u32) -> usize { + (pc / DEFAULT_PC_STEP) as usize } /// Bytes allocated according to the given Layout @@ -647,15 +625,19 @@ where Ctx: ExecutionCtxTrait, E: Executor, { - program - .instructions_and_debug_infos - .iter() + let unreachable_handler: ExecuteFunc = |_, vm_state| { + vm_state.exit_code = Err(ExecutionError::Unreachable(vm_state.pc)); + }; + + repeat_n(&None, get_pc_index(program.pc_base)) + .chain(program.instructions_and_debug_infos.iter()) .zip_eq(pre_compute.iter_mut()) .enumerate() .map(|(i, (inst_opt, buf))| { - // SAFETY: we cast to raw pointer and then borrow to remove the lifetime. This is safe - // only in the current context because `buf` comes from `pre_compute_buf` which will - // outlive the returned `PreComputeInstruction`s. + // SAFETY: we cast to raw pointer and then borrow to remove the lifetime. This + // is safe only in the current context because `buf` comes + // from `pre_compute_buf` which will outlive the returned + // `PreComputeInstruction`s. let buf: &mut [u8] = unsafe { &mut *(*buf as *mut [u8]) }; let pre_inst = if let Some((inst, _)) = inst_opt { tracing::trace!("get_pre_compute_instruction {inst:?}"); @@ -679,9 +661,7 @@ where } else { // Dead instruction at this pc PreComputeInstruction { - handler: |_, vm_state| { - vm_state.exit_code = Err(ExecutionError::Unreachable(vm_state.pc)); - }, + handler: unreachable_handler, pre_compute: buf, } }; @@ -701,15 +681,18 @@ where Ctx: MeteredExecutionCtxTrait, E: MeteredExecutor, { - program - .instructions_and_debug_infos - .iter() + let unreachable_handler: ExecuteFunc = |_, vm_state| { + vm_state.exit_code = Err(ExecutionError::Unreachable(vm_state.pc)); + }; + repeat_n(&None, get_pc_index(program.pc_base)) + .chain(program.instructions_and_debug_infos.iter()) .zip_eq(pre_compute.iter_mut()) .enumerate() .map(|(i, (inst_opt, buf))| { - // SAFETY: we cast to raw pointer and then borrow to remove the lifetime. This is safe - // only in the current context because `buf` comes from `pre_compute_buf` which will - // outlive the returned `PreComputeInstruction`s. + // SAFETY: we cast to raw pointer and then borrow to remove the lifetime. This + // is safe only in the current context because `buf` comes + // from `pre_compute_buf` which will outlive the returned + // `PreComputeInstruction`s. let buf: &mut [u8] = unsafe { &mut *(*buf as *mut [u8]) }; let pre_inst = if let Some((inst, _)) = inst_opt { tracing::trace!("get_metered_pre_compute_instruction {inst:?}"); @@ -738,9 +721,7 @@ where } } else { PreComputeInstruction { - handler: |_, vm_state| { - vm_state.exit_code = Err(ExecutionError::Unreachable(vm_state.pc)); - }, + handler: unreachable_handler, pre_compute: buf, } }; diff --git a/crates/vm/src/arch/interpreter_preflight.rs b/crates/vm/src/arch/interpreter_preflight.rs index 7fb8006157..1b5530b2cb 100644 --- a/crates/vm/src/arch/interpreter_preflight.rs +++ b/crates/vm/src/arch/interpreter_preflight.rs @@ -1,4 +1,4 @@ -use std::sync::Arc; +use std::{iter::repeat_n, sync::Arc}; use openvm_instructions::{instruction::Instruction, program::Program, LocalOpcode, SystemOpcode}; use openvm_stark_backend::{ @@ -36,6 +36,7 @@ pub struct PreflightInterpretedInstance { } #[repr(C)] +#[derive(Clone)] pub struct PcEntry { // NOTE[jpw]: revisit storing only smaller `precompute` for better cache locality. Currently // VmOpcode is usize so align=8 and there are 7 u32 operands so we store ExecutorId(u32) after @@ -60,7 +61,10 @@ impl PreflightInterpretedInstance { return Err(StaticProgramError::TooManyExecutors); } let len = program.instructions_and_debug_infos.len(); - let mut pc_handler = Vec::with_capacity(len); + let pc_base = program.pc_base; + let base_idx = get_pc_index(pc_base); + let mut pc_handler = Vec::with_capacity(base_idx + len); + pc_handler.extend(repeat_n(PcEntry::undefined(), base_idx)); for insn_and_debug_info in &program.instructions_and_debug_infos { if let Some((insn, _)) = insn_and_debug_info { let insn = insn.clone(); @@ -86,9 +90,9 @@ impl PreflightInterpretedInstance { } Ok(Self { inventory, - execution_frequencies: vec![0u32; len], + execution_frequencies: vec![0u32; base_idx + len], + pc_base, pc_handler, - pc_base: program.pc_base, executor_idx_to_air_idx, }) } @@ -101,9 +105,11 @@ impl PreflightInterpretedInstance { where E: Send + Sync, { + let base_idx = get_pc_index(self.pc_base); self.pc_handler .par_iter() .enumerate() + .skip(base_idx) .filter(|(_, entry)| entry.is_some()) .map(|(i, _)| self.execution_frequencies[i]) .collect() @@ -157,15 +163,11 @@ impl PreflightInterpretedInstance { E: PreflightExecutor, { let pc = state.pc; - let pc_idx = get_pc_index(self.pc_base, pc); - let pc_entry = - self.pc_handler - .get(pc_idx) - .ok_or_else(|| ExecutionError::PcOutOfBounds { - pc, - pc_base: self.pc_base, - program_len: self.pc_handler.len(), - })?; + let pc_idx = get_pc_index(pc); + let pc_entry = self + .pc_handler + .get(pc_idx) + .ok_or_else(|| ExecutionError::PcOutOfBounds(pc))?; // SAFETY: `execution_frequencies` has the same length as `pc_handler` so `get_pc_entry` // already does the bounds check unsafe { From 63b013affdc0a0e18524a97df151462c77b54962 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Fri, 22 Aug 2025 14:26:29 -0700 Subject: [PATCH 32/36] fix: remove unused error --- crates/vm/src/arch/execution.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/crates/vm/src/arch/execution.rs b/crates/vm/src/arch/execution.rs index 22aeaeeadd..6bd3d3b90a 100644 --- a/crates/vm/src/arch/execution.rs +++ b/crates/vm/src/arch/execution.rs @@ -70,9 +70,6 @@ pub enum ExecutionError { Inventory(#[from] ExecutorInventoryError), #[error("static program error: {0}")] Static(#[from] StaticProgramError), - // Placeholder error type for tco - #[error("error in VmExecState")] - ExecStateError, } /// Errors in the program that can be statically analyzed before runtime. From 5f5138c05b959b70c06789c000fdbbcf81ce3573 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Fri, 22 Aug 2025 14:32:34 -0700 Subject: [PATCH 33/36] chore: don't keep pre_compute_insns when tco --- crates/vm/src/arch/interpreter.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crates/vm/src/arch/interpreter.rs b/crates/vm/src/arch/interpreter.rs index d1b160044f..67277d47b0 100644 --- a/crates/vm/src/arch/interpreter.rs +++ b/crates/vm/src/arch/interpreter.rs @@ -48,6 +48,7 @@ pub struct InterpretedInstance<'a, F, Ctx> { /// `pc_index = pc / DEFAULT_PC_STEP`. /// SAFETY: The first `pc_base / DEFAULT_PC_STEP` entries will be unreachable. We do this to /// avoid needing to subtract `pc_base` during runtime. + #[cfg(not(feature = "tco"))] pre_compute_insns: Vec>, #[cfg(feature = "tco")] pre_compute_max_size: usize, @@ -148,6 +149,7 @@ where let mut pre_compute_buf = alloc_pre_compute_buf(program, pre_compute_max_size); let mut split_pre_compute_buf = split_pre_compute_buf(program, &mut pre_compute_buf, pre_compute_max_size); + #[cfg_attr(feature = "tco", allow(unused_variables))] let pre_compute_insns = get_pre_compute_instructions::( program, inventory, @@ -182,6 +184,7 @@ where Ok(Self { system_config: inventory.config().clone(), pre_compute_buf, + #[cfg(not(feature = "tco"))] pre_compute_insns, pc_start, init_memory, @@ -247,6 +250,7 @@ where let mut pre_compute_buf = alloc_pre_compute_buf(program, pre_compute_max_size); let mut split_pre_compute_buf = split_pre_compute_buf(program, &mut pre_compute_buf, pre_compute_max_size); + #[cfg_attr(feature = "tco", allow(unused_variables))] let pre_compute_insns = get_metered_pre_compute_instructions::( program, inventory, @@ -285,6 +289,7 @@ where Ok(Self { system_config: inventory.config().clone(), pre_compute_buf, + #[cfg(not(feature = "tco"))] pre_compute_insns, pc_start, init_memory, From f46bf707b32bb62afba5b44f76c224a747e7ce6e Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Fri, 22 Aug 2025 14:33:03 -0700 Subject: [PATCH 34/36] chore: remove unused derive --- crates/vm/src/arch/state.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/vm/src/arch/state.rs b/crates/vm/src/arch/state.rs index dbf5bee3ec..611094ecfc 100644 --- a/crates/vm/src/arch/state.rs +++ b/crates/vm/src/arch/state.rs @@ -3,7 +3,6 @@ use std::{ ops::{Deref, DerefMut}, }; -use getset::WithSetters; use openvm_instructions::exe::SparseMemoryImage; use rand::{rngs::StdRng, SeedableRng}; use tracing::instrument; @@ -97,7 +96,6 @@ impl VmState { /// The global state is generic in guest memory `MEM` and additional context `CTX`. /// The host state is execution context specific. // @dev: Do not confuse with `ExecutionState` struct. -#[derive(WithSetters)] pub struct VmExecState { /// Core VM state pub vm_state: VmState, From 8a51077799d34711e3150089912360f8a942af74 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Fri, 22 Aug 2025 14:46:16 -0700 Subject: [PATCH 35/36] chore: dispatch! for poseidon2 --- .../native/circuit/src/poseidon2/execution.rs | 170 ++++++++++-------- 1 file changed, 93 insertions(+), 77 deletions(-) diff --git a/extensions/native/circuit/src/poseidon2/execution.rs b/extensions/native/circuit/src/poseidon2/execution.rs index d6739ff133..5dcc356c55 100644 --- a/extensions/native/circuit/src/poseidon2/execution.rs +++ b/extensions/native/circuit/src/poseidon2/execution.rs @@ -136,6 +136,33 @@ impl<'a, F: PrimeField32, const SBOX_REGISTERS: usize> NativePoseidon2Executor { + if $opcode == PERM_POS2.global_opcode() || $opcode == COMP_POS2.global_opcode() { + let pos2_data: &mut Pos2PreCompute = $data.borrow_mut(); + $executor.pre_compute_pos2_impl($pc, $inst, pos2_data)?; + if $opcode == PERM_POS2.global_opcode() { + Ok($execute_pos2_impl::<_, _, SBOX_REGISTERS, true>) + } else { + Ok($execute_pos2_impl::<_, _, SBOX_REGISTERS, false>) + } + } else { + let verify_batch_data: &mut VerifyBatchPreCompute = + $data.borrow_mut(); + $executor.pre_compute_verify_batch_impl($pc, $inst, verify_batch_data)?; + Ok($execute_verify_batch_impl::<_, _, SBOX_REGISTERS>) + } + }; +} + impl Executor for NativePoseidon2Executor { @@ -154,24 +181,15 @@ impl Executor inst: &Instruction, data: &mut [u8], ) -> Result, StaticProgramError> { - let &Instruction { opcode, .. } = inst; - - let is_pos2 = opcode == PERM_POS2.global_opcode() || opcode == COMP_POS2.global_opcode(); - - if is_pos2 { - let pos2_data: &mut Pos2PreCompute = data.borrow_mut(); - self.pre_compute_pos2_impl(pc, inst, pos2_data)?; - if opcode == PERM_POS2.global_opcode() { - Ok(execute_pos2_e1_impl::<_, _, SBOX_REGISTERS, true>) - } else { - Ok(execute_pos2_e1_impl::<_, _, SBOX_REGISTERS, false>) - } - } else { - let verify_batch_data: &mut VerifyBatchPreCompute = - data.borrow_mut(); - self.pre_compute_verify_batch_impl(pc, inst, verify_batch_data)?; - Ok(execute_verify_batch_e1_impl::<_, _, SBOX_REGISTERS>) - } + dispatch1!( + execute_pos2_e1_impl, + execute_verify_batch_e1_impl, + self, + inst.opcode, + pc, + inst, + data + ) } #[cfg(feature = "tco")] @@ -181,25 +199,49 @@ impl Executor inst: &Instruction, data: &mut [u8], ) -> Result, StaticProgramError> { - let &Instruction { opcode, .. } = inst; + dispatch1!( + execute_pos2_e1_tco_handler, + execute_verify_batch_e1_tco_handler, + self, + inst.opcode, + pc, + inst, + data + ) + } +} - let is_pos2 = opcode == PERM_POS2.global_opcode() || opcode == COMP_POS2.global_opcode(); +macro_rules! dispatch2 { + ( + $execute_pos2_impl:ident, + $execute_verify_batch_impl:ident, + $executor:ident, + $opcode:expr, + $chip_idx:ident, + $pc:ident, + $inst:ident, + $data:ident + ) => { + if $opcode == PERM_POS2.global_opcode() || $opcode == COMP_POS2.global_opcode() { + let pre_compute: &mut E2PreCompute> = + $data.borrow_mut(); + pre_compute.chip_idx = $chip_idx as u32; - if is_pos2 { - let pos2_data: &mut Pos2PreCompute = data.borrow_mut(); - self.pre_compute_pos2_impl(pc, inst, pos2_data)?; - if opcode == PERM_POS2.global_opcode() { - Ok(execute_pos2_e1_tco_handler::<_, _, SBOX_REGISTERS, true>) + $executor.pre_compute_pos2_impl($pc, $inst, &mut pre_compute.data)?; + if $opcode == PERM_POS2.global_opcode() { + Ok($execute_pos2_impl::<_, _, SBOX_REGISTERS, true>) } else { - Ok(execute_pos2_e1_tco_handler::<_, _, SBOX_REGISTERS, false>) + Ok($execute_pos2_impl::<_, _, SBOX_REGISTERS, false>) } } else { - let verify_batch_data: &mut VerifyBatchPreCompute = - data.borrow_mut(); - self.pre_compute_verify_batch_impl(pc, inst, verify_batch_data)?; - Ok(execute_verify_batch_e1_tco_handler::<_, _, SBOX_REGISTERS>) + let pre_compute: &mut E2PreCompute> = + $data.borrow_mut(); + pre_compute.chip_idx = $chip_idx as u32; + + $executor.pre_compute_verify_batch_impl($pc, $inst, &mut pre_compute.data)?; + Ok($execute_verify_batch_impl::<_, _, SBOX_REGISTERS>) } - } + }; } impl MeteredExecutor @@ -221,29 +263,16 @@ impl MeteredExecutor inst: &Instruction, data: &mut [u8], ) -> Result, StaticProgramError> { - let &Instruction { opcode, .. } = inst; - - let is_pos2 = opcode == PERM_POS2.global_opcode() || opcode == COMP_POS2.global_opcode(); - - if is_pos2 { - let pre_compute: &mut E2PreCompute> = - data.borrow_mut(); - pre_compute.chip_idx = chip_idx as u32; - - self.pre_compute_pos2_impl(pc, inst, &mut pre_compute.data)?; - if opcode == PERM_POS2.global_opcode() { - Ok(execute_pos2_e2_impl::<_, _, SBOX_REGISTERS, true>) - } else { - Ok(execute_pos2_e2_impl::<_, _, SBOX_REGISTERS, false>) - } - } else { - let pre_compute: &mut E2PreCompute> = - data.borrow_mut(); - pre_compute.chip_idx = chip_idx as u32; - - self.pre_compute_verify_batch_impl(pc, inst, &mut pre_compute.data)?; - Ok(execute_verify_batch_e2_impl::<_, _, SBOX_REGISTERS>) - } + dispatch2!( + execute_pos2_e2_impl, + execute_verify_batch_e2_impl, + self, + inst.opcode, + chip_idx, + pc, + inst, + data + ) } #[cfg(feature = "tco")] @@ -254,29 +283,16 @@ impl MeteredExecutor inst: &Instruction, data: &mut [u8], ) -> Result, StaticProgramError> { - let &Instruction { opcode, .. } = inst; - - let is_pos2 = opcode == PERM_POS2.global_opcode() || opcode == COMP_POS2.global_opcode(); - - if is_pos2 { - let pre_compute: &mut E2PreCompute> = - data.borrow_mut(); - pre_compute.chip_idx = chip_idx as u32; - - self.pre_compute_pos2_impl(pc, inst, &mut pre_compute.data)?; - if opcode == PERM_POS2.global_opcode() { - Ok(execute_pos2_e2_tco_handler::<_, _, SBOX_REGISTERS, true>) - } else { - Ok(execute_pos2_e2_tco_handler::<_, _, SBOX_REGISTERS, false>) - } - } else { - let pre_compute: &mut E2PreCompute> = - data.borrow_mut(); - pre_compute.chip_idx = chip_idx as u32; - - self.pre_compute_verify_batch_impl(pc, inst, &mut pre_compute.data)?; - Ok(execute_verify_batch_e2_tco_handler::<_, _, SBOX_REGISTERS>) - } + dispatch2!( + execute_pos2_e2_tco_handler, + execute_verify_batch_e2_tco_handler, + self, + inst.opcode, + chip_idx, + pc, + inst, + data + ) } } From aee66b1bc6b99853477651d4b8c140df313ff775 Mon Sep 17 00:00:00 2001 From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com> Date: Fri, 22 Aug 2025 14:48:47 -0700 Subject: [PATCH 36/36] chore: phantom lifetime --- crates/vm/src/arch/interpreter.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crates/vm/src/arch/interpreter.rs b/crates/vm/src/arch/interpreter.rs index 67277d47b0..504059c286 100644 --- a/crates/vm/src/arch/interpreter.rs +++ b/crates/vm/src/arch/interpreter.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "tco")] +use std::marker::PhantomData; use std::{ alloc::{alloc, dealloc, handle_alloc_error, Layout}, borrow::{Borrow, BorrowMut}, @@ -59,6 +61,8 @@ pub struct InterpretedInstance<'a, F, Ctx> { pc_start: u32, init_memory: SparseMemoryImage, + #[cfg(feature = "tco")] + phantom: PhantomData<&'a ()>, } #[cfg_attr(feature = "tco", allow(dead_code))] @@ -192,6 +196,8 @@ where pre_compute_max_size, #[cfg(feature = "tco")] handlers, + #[cfg(feature = "tco")] + phantom: PhantomData, }) } @@ -297,6 +303,8 @@ where pre_compute_max_size, #[cfg(feature = "tco")] handlers, + #[cfg(feature = "tco")] + phantom: PhantomData, }) } }