From 1f4fd3f86f9fb53391ad3f8e5c4f5e52e9624f8f Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Fri, 13 Aug 2021 15:20:00 +0300 Subject: [PATCH 1/2] [interp] Disable inlining if interpreter var limit is excedeed Currently, the interpreter has a 64k limit for the stack space to be used by vars. If we reach this limit, retry compilation with inlining disabled, since inlining can increase significantly the method code. --- src/mono/mono/mini/interp/transform.c | 30 +++++++++++++++++++++------ src/mono/mono/mini/interp/transform.h | 3 +++ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index c677122e6a5f72..1ce7b2c490e2ff 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -2777,6 +2777,9 @@ interp_method_check_inlining (TransformData *td, MonoMethod *method, MonoMethodS { MonoMethodHeaderSummary header; + if (td->disable_inlining) + return FALSE; + if (method->flags & METHOD_ATTRIBUTE_REQSECOBJ) /* Used to mark methods containing StackCrawlMark locals */ return FALSE; @@ -9406,6 +9409,7 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG int i; TransformData transform_data; TransformData *td; + gboolean retry_compilation = FALSE; static gboolean verbose_method_inited; static char* verbose_method_name; @@ -9414,6 +9418,7 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG verbose_method_inited = TRUE; } +retry: memset (&transform_data, 0, sizeof(transform_data)); td = &transform_data; @@ -9437,6 +9442,8 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG td->seq_points = g_ptr_array_new (); td->verbose_level = mono_interp_traceopt; td->prof_coverage = mono_profiler_coverage_instrumentation_enabled (method); + if (retry_compilation) + td->disable_inlining = TRUE; rtm->data_items = td->data_items; if (td->prof_coverage) @@ -9484,12 +9491,21 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG generate_compacted_code (td); if (td->total_locals_size >= G_MAXUINT16) { - char *name = mono_method_get_full_name (method); - char *msg = g_strdup_printf ("Unable to run method '%s': locals size too big.", name); - g_free (name); - mono_error_set_generic_error (error, "System", "InvalidProgramException", "%s", msg); - g_free (msg); - goto exit; + if (td->disable_inlining) { + char *name = mono_method_get_full_name (method); + char *msg = g_strdup_printf ("Unable to run method '%s': locals size too big.", name); + g_free (name); + mono_error_set_generic_error (error, "System", "InvalidProgramException", "%s", msg); + g_free (msg); + retry_compilation = FALSE; + goto exit; + } else { + // We give the method another chance to compile with inlining disabled + retry_compilation = TRUE; + goto exit; + } + } else { + retry_compilation = FALSE; } if (td->verbose_level) { @@ -9580,6 +9596,8 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG if (td->line_numbers) g_array_free (td->line_numbers, TRUE); mono_mempool_destroy (td->mempool); + if (retry_compilation) + goto retry; } gboolean diff --git a/src/mono/mono/mini/interp/transform.h b/src/mono/mono/mini/interp/transform.h index 8964a3ba92482b..c787b9d52f9226 100644 --- a/src/mono/mono/mini/interp/transform.h +++ b/src/mono/mono/mini/interp/transform.h @@ -207,6 +207,9 @@ typedef struct GList *dont_inline; int inline_depth; int has_localloc : 1; + // If method compilation fails due to certain limits being exceeded, we disable inlining + // and retry compilation. + int disable_inlining : 1; // If the current method (inlined_method) has the aggressive inlining attribute, we no longer // bail out of inlining when having to generate certain opcodes (like call, throw). int aggressive_inlining : 1; From 2d8af6e3e21e932fe39ae805c1d888e3976b7b8c Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Fri, 13 Aug 2021 15:59:20 +0300 Subject: [PATCH 2/2] [interp] Improve cprop speed by removing redundant memclear Instead of clearing the defs for all vars, clear only for the vars used within a basic block, as the jit does. Otherwise, for large methods with many locals and many bblocks, we end up spending most of the time clearing memory. Improves speed of cprop by 20-30%, while for huge methods it can improve it by orders of magnitude. --- src/mono/mono/mini/interp/transform.c | 86 +++++++++++++++++---------- 1 file changed, 53 insertions(+), 33 deletions(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 1ce7b2c490e2ff..9672b112a90098 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -8253,6 +8253,42 @@ cprop_sreg (TransformData *td, InterpInst *ins, int *psreg, LocalValue *local_de } } +static void +foreach_local_var (TransformData *td, InterpInst *ins, gpointer data, void (*callback)(TransformData*, int, gpointer)) +{ + int opcode = ins->opcode; + if (mono_interp_op_sregs [opcode]) { + for (int i = 0; i < mono_interp_op_sregs [opcode]; i++) { + int sreg = ins->sregs [i]; + + if (sreg == MINT_CALL_ARGS_SREG) { + int *call_args = ins->info.call_args; + if (call_args) { + int var = *call_args; + while (var != -1) { + callback (td, var, data); + call_args++; + var = *call_args; + } + } + } else { + callback (td, sreg, data); + } + } + } + + if (mono_interp_op_dregs [opcode]) + callback (td, ins->dreg, data); +} + +static void +clear_local_defs (TransformData *td, int var, void *data) +{ + LocalValue *local_defs = (LocalValue*) data; + local_defs [var].type = LOCAL_VALUE_NONE; + local_defs [var].ins = NULL; +} + static void interp_cprop (TransformData *td) { @@ -8277,8 +8313,8 @@ interp_cprop (TransformData *td) // Set cbb since we do some instruction inserting below td->cbb = bb; - // FIXME This is excessive. Remove this once we have SSA - memset (local_defs, 0, td->locals_size * sizeof (LocalValue)); + for (ins = bb->first_ins; ins != NULL; ins = ins->next) + foreach_local_var (td, ins, local_defs, clear_local_defs); if (td->verbose_level) g_print ("BB%d\n", bb->index); @@ -8908,34 +8944,6 @@ interp_optimize_code (TransformData *td) MONO_TIME_TRACK (mono_interp_stats.super_instructions_time, interp_super_instructions (td)); } -static void -foreach_local_var (TransformData *td, InterpInst *ins, int data, void (*callback)(TransformData*, int, int)) -{ - int opcode = ins->opcode; - if (mono_interp_op_sregs [opcode]) { - for (int i = 0; i < mono_interp_op_sregs [opcode]; i++) { - int sreg = ins->sregs [i]; - - if (sreg == MINT_CALL_ARGS_SREG) { - int *call_args = ins->info.call_args; - if (call_args) { - int var = *call_args; - while (var != -1) { - callback (td, var, data); - call_args++; - var = *call_args; - } - } - } else { - callback (td, sreg, data); - } - } - } - - if (mono_interp_op_dregs [opcode]) - callback (td, ins->dreg, data); -} - static void set_var_live_range (TransformData *td, int var, int ins_index) { @@ -8947,6 +8955,12 @@ set_var_live_range (TransformData *td, int var, int ins_index) td->locals [var].live_end = ins_index; } +static void +set_var_live_range_cb (TransformData *td, int var, gpointer data) +{ + set_var_live_range (td, var, (int)(gsize)data); +} + static void initialize_global_var (TransformData *td, int var, int bb_index) { @@ -8963,6 +8977,12 @@ initialize_global_var (TransformData *td, int var, int bb_index) alloc_global_var_offset (td, var); td->locals [var].flags |= INTERP_LOCAL_FLAG_GLOBAL; } +} + +static void +initialize_global_var_cb (TransformData *td, int var, gpointer data) +{ + initialize_global_var (td, var, (int)(gsize)data); } static void @@ -8987,7 +9007,7 @@ initialize_global_vars (TransformData *td) td->locals [var].flags |= INTERP_LOCAL_FLAG_GLOBAL; } } - foreach_local_var (td, ins, bb->index, initialize_global_var); + foreach_local_var (td, ins, (gpointer)(gsize)bb->index, initialize_global_var_cb); } } } @@ -9257,7 +9277,7 @@ interp_alloc_offsets (TransformData *td) // The arg of the call is no longer global *call_args = new_var; // Also update liveness for this instruction - foreach_local_var (td, new_inst, ins_index, set_var_live_range); + foreach_local_var (td, new_inst, (gpointer)(gsize)ins_index, set_var_live_range_cb); ins_index++; } } else { @@ -9295,7 +9315,7 @@ interp_alloc_offsets (TransformData *td) } } // Set live_start and live_end for every referenced local that is not global - foreach_local_var (td, ins, ins_index, set_var_live_range); + foreach_local_var (td, ins, (gpointer)(gsize)ins_index, set_var_live_range_cb); ins_index++; } gint32 current_offset = td->total_locals_size;