From 85c2f0d7689f05bb109d37bee68030c23a8a18d7 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Sep 2019 07:38:33 +0100 Subject: [PATCH 1/9] Move real_clinit_name declaration up No functional changes. This is so that we can avoid recomputing it in each loop. --- jbmc/src/java_bytecode/java_static_initializers.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jbmc/src/java_bytecode/java_static_initializers.cpp b/jbmc/src/java_bytecode/java_static_initializers.cpp index eae2a77a4d2..384a10c3e28 100644 --- a/jbmc/src/java_bytecode/java_static_initializers.cpp +++ b/jbmc/src/java_bytecode/java_static_initializers.cpp @@ -777,6 +777,7 @@ code_blockt get_user_specified_clinit_body( std::unordered_map &references) { jsont json; + const irep_idt &real_clinit_name = clinit_function_name(class_id); if( !static_values_file.empty() && !parse_json(static_values_file, message_handler, json) && json.is_object()) @@ -814,7 +815,7 @@ code_blockt get_user_specified_clinit_body( assign_from_json( value_pair.first, value_pair.second, - clinit_function_name(class_id), + real_clinit_name, body, symbol_table, needed_lazy_methods, @@ -825,7 +826,6 @@ code_blockt get_user_specified_clinit_body( } } } - const irep_idt &real_clinit_name = clinit_function_name(class_id); if(const auto clinit_func = symbol_table.lookup(real_clinit_name)) return code_blockt{{code_function_callt{clinit_func->symbol_expr()}}}; return code_blockt{}; From 6a61209d91d4d29ad56b8da512d3ae10604acb7c Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Sep 2019 07:50:49 +0100 Subject: [PATCH 2/9] Make get_user_specified_clinit_body take jsont arg This avoids reparsing the whole json file for each class that needs a clinit. Locally, on an example with a large json provided for --static-values, execution time went from 10s to 2s. --- .../src/java_bytecode/java_bytecode_language.cpp | 16 +++++++++++----- jbmc/src/java_bytecode/java_bytecode_language.h | 6 +++--- .../java_bytecode/java_static_initializers.cpp | 9 +++------ .../src/java_bytecode/java_static_initializers.h | 7 +++---- 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/jbmc/src/java_bytecode/java_bytecode_language.cpp b/jbmc/src/java_bytecode/java_bytecode_language.cpp index f63975719e5..9846c63bf31 100644 --- a/jbmc/src/java_bytecode/java_bytecode_language.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_language.cpp @@ -203,7 +203,13 @@ void java_bytecode_languaget::set_language_options(const optionst &options) java_cp_include_files=".*"; nondet_static = options.get_bool_option("nondet-static"); - static_values_file = options.get_option("static-values"); + if(options.is_set("static-values")) + { + const std::string filename = options.get_option("static-values"); + jsont tmp_json; + if(!parse_json(filename, *message_handler, tmp_json)) + static_values_json = std::move(tmp_json); + } ignore_manifest_main_class = options.get_bool_option("ignore-manifest-main-class"); @@ -853,7 +859,7 @@ bool java_bytecode_languaget::typecheck( symbol_table, synthetic_methods, threading_support, - !static_values_file.empty()); + static_values_json.has_value()); // Now incrementally elaborate methods // that are reachable from this entry point. @@ -1177,7 +1183,7 @@ bool java_bytecode_languaget::convert_single_method( function_id, symbol_table, nondet_static, - !static_values_file.empty(), + static_values_json.has_value(), object_factory_parameters, get_pointer_type_selector(), get_message_handler()); @@ -1186,7 +1192,7 @@ bool java_bytecode_languaget::convert_single_method( function_id, symbol_table, nondet_static, - !static_values_file.empty(), + static_values_json.has_value(), object_factory_parameters, get_pointer_type_selector(), get_message_handler()); @@ -1199,7 +1205,7 @@ bool java_bytecode_languaget::convert_single_method( class_name, "user_specified_clinit must be declared by a class."); writable_symbol.value = get_user_specified_clinit_body( *class_name, - static_values_file, + static_values_json, symbol_table, get_message_handler(), needed_lazy_methods, diff --git a/jbmc/src/java_bytecode/java_bytecode_language.h b/jbmc/src/java_bytecode/java_bytecode_language.h index 5a344a9be18..f0f3b5f1b68 100644 --- a/jbmc/src/java_bytecode/java_bytecode_language.h +++ b/jbmc/src/java_bytecode/java_bytecode_language.h @@ -236,10 +236,10 @@ class java_bytecode_languaget:public languaget java_string_library_preprocesst string_preprocess; std::string java_cp_include_files; bool nondet_static; - /// Path to a JSON file which contains initial values of static fields (right + /// JSON which contains initial values of static fields (right /// after the static initializer of the class was run). This is read from the - /// --static-values command-line option. - std::string static_values_file; + /// file specified by the --static-values command-line option. + optionalt static_values_json; // list of classes to force load even without reference from the entry point std::vector java_load_classes; diff --git a/jbmc/src/java_bytecode/java_static_initializers.cpp b/jbmc/src/java_bytecode/java_static_initializers.cpp index 384a10c3e28..2e5982b2679 100644 --- a/jbmc/src/java_bytecode/java_static_initializers.cpp +++ b/jbmc/src/java_bytecode/java_static_initializers.cpp @@ -769,20 +769,17 @@ code_ifthenelset get_clinit_wrapper_body( code_blockt get_user_specified_clinit_body( const irep_idt &class_id, - const std::string &static_values_file, + const optionalt &static_values_json, symbol_table_baset &symbol_table, message_handlert &message_handler, optionalt needed_lazy_methods, size_t max_user_array_length, std::unordered_map &references) { - jsont json; const irep_idt &real_clinit_name = clinit_function_name(class_id); - if( - !static_values_file.empty() && - !parse_json(static_values_file, message_handler, json) && json.is_object()) + if(static_values_json.has_value() && static_values_json->is_object()) { - const auto &json_object = to_json_object(json); + const auto &json_object = to_json_object(*static_values_json); const auto class_entry = json_object.find(id2string(strip_java_namespace_prefix(class_id))); if(class_entry != json_object.end()) diff --git a/jbmc/src/java_bytecode/java_static_initializers.h b/jbmc/src/java_bytecode/java_static_initializers.h index ef77a778381..96a4b7c5e4a 100644 --- a/jbmc/src/java_bytecode/java_static_initializers.h +++ b/jbmc/src/java_bytecode/java_static_initializers.h @@ -62,10 +62,9 @@ code_ifthenelset get_clinit_wrapper_body( /// assigned default values (zero or null). /// \param class_id: the id of the class to create a user_specified_clinit /// function body for. -/// \param static_values_file: input file containing values of static fields. +/// \param static_values_json: JSON object containing values of static fields. /// The format is expected to be a map whose keys are class names, and whose -/// values are maps from static field names to values. Currently only JSON -/// is supported as a file format. +/// values are maps from static field names to values. /// \param symbol_table: used to look up and create new symbols /// \param message_handler: used to log any errors with parsing the input file /// \param needed_lazy_methods: used to mark any runtime types given in the @@ -77,7 +76,7 @@ code_ifthenelset get_clinit_wrapper_body( /// \return the body of the user_specified_clinit function as a code block. code_blockt get_user_specified_clinit_body( const irep_idt &class_id, - const std::string &static_values_file, + const optionalt &static_values_json, symbol_table_baset &symbol_table, message_handlert &message_handler, optionalt needed_lazy_methods, From c0b681a815c7a725466e534ab8494db54a90d2fe Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Sep 2019 08:12:33 +0100 Subject: [PATCH 3/9] Ensure JSON file is JSON object just after parsing There is no reason to delay this check, and we can provide useful warning messages right at the start of the bytecode analysis. --- .../java_bytecode/java_bytecode_language.cpp | 17 +++++++++++++++-- jbmc/src/java_bytecode/java_bytecode_language.h | 2 +- .../java_bytecode/java_static_initializers.cpp | 11 +++++------ .../java_bytecode/java_static_initializers.h | 2 +- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/jbmc/src/java_bytecode/java_bytecode_language.cpp b/jbmc/src/java_bytecode/java_bytecode_language.cpp index 9846c63bf31..a6febf7996d 100644 --- a/jbmc/src/java_bytecode/java_bytecode_language.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_language.cpp @@ -207,8 +207,21 @@ void java_bytecode_languaget::set_language_options(const optionst &options) { const std::string filename = options.get_option("static-values"); jsont tmp_json; - if(!parse_json(filename, *message_handler, tmp_json)) - static_values_json = std::move(tmp_json); + if(parse_json(filename, *message_handler, tmp_json)) + { + warning() << "Provided JSON file for static-values cannot be parsed; it" + << " will be ignored." << messaget::eom; + } + else + { + if(!tmp_json.is_object()) + { + warning() << "Provided JSON file for static-values is not a JSON " + << "object; it will be ignored." << messaget::eom; + } + else + static_values_json = std::move(to_json_object(tmp_json)); + } } ignore_manifest_main_class = diff --git a/jbmc/src/java_bytecode/java_bytecode_language.h b/jbmc/src/java_bytecode/java_bytecode_language.h index f0f3b5f1b68..f3d47577006 100644 --- a/jbmc/src/java_bytecode/java_bytecode_language.h +++ b/jbmc/src/java_bytecode/java_bytecode_language.h @@ -239,7 +239,7 @@ class java_bytecode_languaget:public languaget /// JSON which contains initial values of static fields (right /// after the static initializer of the class was run). This is read from the /// file specified by the --static-values command-line option. - optionalt static_values_json; + optionalt static_values_json; // list of classes to force load even without reference from the entry point std::vector java_load_classes; diff --git a/jbmc/src/java_bytecode/java_static_initializers.cpp b/jbmc/src/java_bytecode/java_static_initializers.cpp index 2e5982b2679..2632d91e381 100644 --- a/jbmc/src/java_bytecode/java_static_initializers.cpp +++ b/jbmc/src/java_bytecode/java_static_initializers.cpp @@ -769,7 +769,7 @@ code_ifthenelset get_clinit_wrapper_body( code_blockt get_user_specified_clinit_body( const irep_idt &class_id, - const optionalt &static_values_json, + const optionalt &static_values_json, symbol_table_baset &symbol_table, message_handlert &message_handler, optionalt needed_lazy_methods, @@ -777,12 +777,11 @@ code_blockt get_user_specified_clinit_body( std::unordered_map &references) { const irep_idt &real_clinit_name = clinit_function_name(class_id); - if(static_values_json.has_value() && static_values_json->is_object()) + if(static_values_json.has_value()) { - const auto &json_object = to_json_object(*static_values_json); - const auto class_entry = - json_object.find(id2string(strip_java_namespace_prefix(class_id))); - if(class_entry != json_object.end()) + const auto class_entry = static_values_json->find( + id2string(strip_java_namespace_prefix(class_id))); + if(class_entry != static_values_json->end()) { const auto &class_json_value = class_entry->second; if(class_json_value.is_object()) diff --git a/jbmc/src/java_bytecode/java_static_initializers.h b/jbmc/src/java_bytecode/java_static_initializers.h index 96a4b7c5e4a..dc97b6bc7f2 100644 --- a/jbmc/src/java_bytecode/java_static_initializers.h +++ b/jbmc/src/java_bytecode/java_static_initializers.h @@ -76,7 +76,7 @@ code_ifthenelset get_clinit_wrapper_body( /// \return the body of the user_specified_clinit function as a code block. code_blockt get_user_specified_clinit_body( const irep_idt &class_id, - const optionalt &static_values_json, + const optionalt &static_values_json, symbol_table_baset &symbol_table, message_handlert &message_handler, optionalt needed_lazy_methods, From 84796413bd12e2050d575924ba4ffea06c88a569 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Sep 2019 08:18:49 +0100 Subject: [PATCH 4/9] Make get_user_specified_clinit_body json arg non optional No user specified static initializer should appear if the static-values is not set anyway. So this if condition can be replaced by an invariant at the call site. --- .../java_bytecode/java_bytecode_language.cpp | 4 +- .../java_static_initializers.cpp | 69 +++++++++---------- .../java_bytecode/java_static_initializers.h | 2 +- 3 files changed, 37 insertions(+), 38 deletions(-) diff --git a/jbmc/src/java_bytecode/java_bytecode_language.cpp b/jbmc/src/java_bytecode/java_bytecode_language.cpp index a6febf7996d..3d2fea1e064 100644 --- a/jbmc/src/java_bytecode/java_bytecode_language.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_language.cpp @@ -1216,9 +1216,11 @@ bool java_bytecode_languaget::convert_single_method( declaring_class(symbol_table.lookup_ref(function_id)); INVARIANT( class_name, "user_specified_clinit must be declared by a class."); + INVARIANT( + static_values_json.has_value(), "static-values JSON must be available"); writable_symbol.value = get_user_specified_clinit_body( *class_name, - static_values_json, + *static_values_json, symbol_table, get_message_handler(), needed_lazy_methods, diff --git a/jbmc/src/java_bytecode/java_static_initializers.cpp b/jbmc/src/java_bytecode/java_static_initializers.cpp index 2632d91e381..0a408c17b8c 100644 --- a/jbmc/src/java_bytecode/java_static_initializers.cpp +++ b/jbmc/src/java_bytecode/java_static_initializers.cpp @@ -769,7 +769,7 @@ code_ifthenelset get_clinit_wrapper_body( code_blockt get_user_specified_clinit_body( const irep_idt &class_id, - const optionalt &static_values_json, + const json_objectt &static_values_json, symbol_table_baset &symbol_table, message_handlert &message_handler, optionalt needed_lazy_methods, @@ -777,49 +777,46 @@ code_blockt get_user_specified_clinit_body( std::unordered_map &references) { const irep_idt &real_clinit_name = clinit_function_name(class_id); - if(static_values_json.has_value()) + const auto class_entry = + static_values_json.find(id2string(strip_java_namespace_prefix(class_id))); + if(class_entry != static_values_json.end()) { - const auto class_entry = static_values_json->find( - id2string(strip_java_namespace_prefix(class_id))); - if(class_entry != static_values_json->end()) + const auto &class_json_value = class_entry->second; + if(class_json_value.is_object()) { - const auto &class_json_value = class_entry->second; - if(class_json_value.is_object()) + const auto &class_json_object = to_json_object(class_json_value); + std::map static_field_values; + for(const auto &symbol_pair : symbol_table) { - const auto &class_json_object = to_json_object(class_json_value); - std::map static_field_values; - for(const auto &symbol_pair : symbol_table) + const symbolt &symbol = symbol_pair.second; + if( + declaring_class(symbol) && *declaring_class(symbol) == class_id && + symbol.is_static_lifetime) { - const symbolt &symbol = symbol_pair.second; - if( - declaring_class(symbol) && *declaring_class(symbol) == class_id && - symbol.is_static_lifetime) + const symbol_exprt &static_field_expr = symbol.symbol_expr(); + const auto &static_field_entry = + class_json_object.find(id2string(symbol.base_name)); + if(static_field_entry != class_json_object.end()) { - const symbol_exprt &static_field_expr = symbol.symbol_expr(); - const auto &static_field_entry = - class_json_object.find(id2string(symbol.base_name)); - if(static_field_entry != class_json_object.end()) - { - static_field_values.insert( - {static_field_expr, static_field_entry->second}); - } + static_field_values.insert( + {static_field_expr, static_field_entry->second}); } } - code_blockt body; - for(const auto &value_pair : static_field_values) - { - assign_from_json( - value_pair.first, - value_pair.second, - real_clinit_name, - body, - symbol_table, - needed_lazy_methods, - max_user_array_length, - references); - } - return body; } + code_blockt body; + for(const auto &value_pair : static_field_values) + { + assign_from_json( + value_pair.first, + value_pair.second, + real_clinit_name, + body, + symbol_table, + needed_lazy_methods, + max_user_array_length, + references); + } + return body; } } if(const auto clinit_func = symbol_table.lookup(real_clinit_name)) diff --git a/jbmc/src/java_bytecode/java_static_initializers.h b/jbmc/src/java_bytecode/java_static_initializers.h index dc97b6bc7f2..dfd301db0a5 100644 --- a/jbmc/src/java_bytecode/java_static_initializers.h +++ b/jbmc/src/java_bytecode/java_static_initializers.h @@ -76,7 +76,7 @@ code_ifthenelset get_clinit_wrapper_body( /// \return the body of the user_specified_clinit function as a code block. code_blockt get_user_specified_clinit_body( const irep_idt &class_id, - const optionalt &static_values_json, + const json_objectt &static_values_json, symbol_table_baset &symbol_table, message_handlert &message_handler, optionalt needed_lazy_methods, From 3ed8e342380240f5be6e9d8cc03f2b9c9775627b Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Sep 2019 08:24:54 +0100 Subject: [PATCH 5/9] Remove unused message handler This is no longer used now that the JSON parsing is done outside this function. --- jbmc/src/java_bytecode/java_bytecode_language.cpp | 1 - jbmc/src/java_bytecode/java_static_initializers.cpp | 1 - jbmc/src/java_bytecode/java_static_initializers.h | 2 -- 3 files changed, 4 deletions(-) diff --git a/jbmc/src/java_bytecode/java_bytecode_language.cpp b/jbmc/src/java_bytecode/java_bytecode_language.cpp index 3d2fea1e064..3eab473a22d 100644 --- a/jbmc/src/java_bytecode/java_bytecode_language.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_language.cpp @@ -1222,7 +1222,6 @@ bool java_bytecode_languaget::convert_single_method( *class_name, *static_values_json, symbol_table, - get_message_handler(), needed_lazy_methods, max_user_array_length, references); diff --git a/jbmc/src/java_bytecode/java_static_initializers.cpp b/jbmc/src/java_bytecode/java_static_initializers.cpp index 0a408c17b8c..e92fca78047 100644 --- a/jbmc/src/java_bytecode/java_static_initializers.cpp +++ b/jbmc/src/java_bytecode/java_static_initializers.cpp @@ -771,7 +771,6 @@ code_blockt get_user_specified_clinit_body( const irep_idt &class_id, const json_objectt &static_values_json, symbol_table_baset &symbol_table, - message_handlert &message_handler, optionalt needed_lazy_methods, size_t max_user_array_length, std::unordered_map &references) diff --git a/jbmc/src/java_bytecode/java_static_initializers.h b/jbmc/src/java_bytecode/java_static_initializers.h index dfd301db0a5..aa9f44f2ae0 100644 --- a/jbmc/src/java_bytecode/java_static_initializers.h +++ b/jbmc/src/java_bytecode/java_static_initializers.h @@ -66,7 +66,6 @@ code_ifthenelset get_clinit_wrapper_body( /// The format is expected to be a map whose keys are class names, and whose /// values are maps from static field names to values. /// \param symbol_table: used to look up and create new symbols -/// \param message_handler: used to log any errors with parsing the input file /// \param needed_lazy_methods: used to mark any runtime types given in the /// input file as needed /// \param max_user_array_length: maximum value for constant or variable length @@ -78,7 +77,6 @@ code_blockt get_user_specified_clinit_body( const irep_idt &class_id, const json_objectt &static_values_json, symbol_table_baset &symbol_table, - message_handlert &message_handler, optionalt needed_lazy_methods, size_t max_user_array_length, std::unordered_map &references); From 2d2ebca01c3457d6757c6c5677790114ff57bc3b Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Sep 2019 09:07:37 +0100 Subject: [PATCH 6/9] Add an equal_range utility function This is to make the equal_range method of multimap easier to use. The usage is illustrated in this commit by an example in java_static_initializers which allows use to use range-for. This will be also useful in following commits. --- jbmc/src/java_bytecode/java_static_initializers.cpp | 10 +++++----- src/util/range.h | 11 +++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/jbmc/src/java_bytecode/java_static_initializers.cpp b/jbmc/src/java_bytecode/java_static_initializers.cpp index e92fca78047..a9a90abc443 100644 --- a/jbmc/src/java_bytecode/java_static_initializers.cpp +++ b/jbmc/src/java_bytecode/java_static_initializers.cpp @@ -988,21 +988,21 @@ code_blockt stub_global_initializer_factoryt::get_stub_initializer_body( // class. Note this is the same invocation used in // java_static_lifetime_init. - auto class_globals = stub_globals_by_class.equal_range(*class_id); + auto class_globals = equal_range(stub_globals_by_class, *class_id); INVARIANT( - class_globals.first != class_globals.second, + !class_globals.empty(), "class with synthetic clinit should have at least one global to init"); java_object_factory_parameterst parameters = object_factory_parameters; parameters.function_id = function_id; - for(auto it = class_globals.first; it != class_globals.second; ++it) + for(const auto &pair : class_globals) { const symbol_exprt new_global_symbol = - symbol_table.lookup_ref(it->second).symbol_expr(); + symbol_table.lookup_ref(pair.second).symbol_expr(); parameters.min_null_tree_depth = - is_non_null_library_global(it->second) + is_non_null_library_global(pair.second) ? object_factory_parameters.min_null_tree_depth + 1 : object_factory_parameters.min_null_tree_depth; diff --git a/src/util/range.h b/src/util/range.h index 54501a23bae..c714cdb0464 100644 --- a/src/util/range.h +++ b/src/util/range.h @@ -533,4 +533,15 @@ auto make_range(containert &container) -> ranget container.begin(), container.end()); } +/// Utility function to make equal_range method of multimap easier to use by +/// returning a ranget object. For instance, we can write: +/// `for(auto value : equal_range(map, key).filter(...).map(...)) {...}`. +template +ranget +equal_range(const multimapt &multimap, const typename multimapt::key_type &key) +{ + auto iterator_pair = multimap.equal_range(key); + return make_range(iterator_pair.first, iterator_pair.second); +} + #endif // CPROVER_UTIL_RANGE_H From dd452877e76662953138799f468c2cfaec376e77 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Sep 2019 09:08:41 +0100 Subject: [PATCH 7/9] Extract a class_to_declared_symbols method This is an intermediary commit towards computing this map outside of the get_user_specified_clinit_body function, so that we can avoid going through the whole symbol map at each get_user_specified_clinit_body call. --- .../java_static_initializers.cpp | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/jbmc/src/java_bytecode/java_static_initializers.cpp b/jbmc/src/java_bytecode/java_static_initializers.cpp index a9a90abc443..39d951f71be 100644 --- a/jbmc/src/java_bytecode/java_static_initializers.cpp +++ b/jbmc/src/java_bytecode/java_static_initializers.cpp @@ -767,6 +767,20 @@ code_ifthenelset get_clinit_wrapper_body( return code_ifthenelset(std::move(check_already_run), std::move(init_body)); } +/// \return map associating classes to the symbols they declare +static std::unordered_multimap +class_to_declared_symbols(const symbol_tablet &symbol_table) +{ + std::unordered_multimap result; + for(const auto &symbol_pair : symbol_table) + { + const symbolt &symbol = symbol_pair.second; + if(optionalt declaring = declaring_class(symbol)) + result.emplace(*declaring, symbol); + } + return result; +} + code_blockt get_user_specified_clinit_body( const irep_idt &class_id, const json_objectt &static_values_json, @@ -785,12 +799,13 @@ code_blockt get_user_specified_clinit_body( { const auto &class_json_object = to_json_object(class_json_value); std::map static_field_values; - for(const auto &symbol_pair : symbol_table) + const auto class_to_declared_symbols_map = + class_to_declared_symbols(symbol_table); + for(const auto &class_symbol_pair : + equal_range(class_to_declared_symbols_map, class_id)) { - const symbolt &symbol = symbol_pair.second; - if( - declaring_class(symbol) && *declaring_class(symbol) == class_id && - symbol.is_static_lifetime) + const symbolt &symbol = class_symbol_pair.second; + if(symbol.is_static_lifetime) { const symbol_exprt &static_field_expr = symbol.symbol_expr(); const auto &static_field_entry = From 67a02e67fc258db337308db2645f5682167e604f Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Sep 2019 09:23:25 +0100 Subject: [PATCH 8/9] Make class_to_declared_symbols take multimap argument This is an intermediary commit that gets us closer to avoiding recomputing this map everytime. --- jbmc/src/java_bytecode/java_bytecode_language.cpp | 5 ++++- jbmc/src/java_bytecode/java_static_initializers.cpp | 8 ++++---- jbmc/src/java_bytecode/java_static_initializers.h | 10 +++++++++- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/jbmc/src/java_bytecode/java_bytecode_language.cpp b/jbmc/src/java_bytecode/java_bytecode_language.cpp index 3eab473a22d..e1e807a0e83 100644 --- a/jbmc/src/java_bytecode/java_bytecode_language.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_language.cpp @@ -1218,13 +1218,16 @@ bool java_bytecode_languaget::convert_single_method( class_name, "user_specified_clinit must be declared by a class."); INVARIANT( static_values_json.has_value(), "static-values JSON must be available"); + const auto class_to_declared_symbols_map = + class_to_declared_symbols(symbol_table); writable_symbol.value = get_user_specified_clinit_body( *class_name, *static_values_json, symbol_table, needed_lazy_methods, max_user_array_length, - references); + references, + class_to_declared_symbols_map); break; } case synthetic_method_typet::STUB_CLASS_STATIC_INITIALIZER: diff --git a/jbmc/src/java_bytecode/java_static_initializers.cpp b/jbmc/src/java_bytecode/java_static_initializers.cpp index 39d951f71be..97484ebba14 100644 --- a/jbmc/src/java_bytecode/java_static_initializers.cpp +++ b/jbmc/src/java_bytecode/java_static_initializers.cpp @@ -768,7 +768,7 @@ code_ifthenelset get_clinit_wrapper_body( } /// \return map associating classes to the symbols they declare -static std::unordered_multimap +std::unordered_multimap class_to_declared_symbols(const symbol_tablet &symbol_table) { std::unordered_multimap result; @@ -787,7 +787,9 @@ code_blockt get_user_specified_clinit_body( symbol_table_baset &symbol_table, optionalt needed_lazy_methods, size_t max_user_array_length, - std::unordered_map &references) + std::unordered_map &references, + const std::unordered_multimap + &class_to_declared_symbols_map) { const irep_idt &real_clinit_name = clinit_function_name(class_id); const auto class_entry = @@ -799,8 +801,6 @@ code_blockt get_user_specified_clinit_body( { const auto &class_json_object = to_json_object(class_json_value); std::map static_field_values; - const auto class_to_declared_symbols_map = - class_to_declared_symbols(symbol_table); for(const auto &class_symbol_pair : equal_range(class_to_declared_symbols_map, class_id)) { diff --git a/jbmc/src/java_bytecode/java_static_initializers.h b/jbmc/src/java_bytecode/java_static_initializers.h index aa9f44f2ae0..5171c99c6d2 100644 --- a/jbmc/src/java_bytecode/java_static_initializers.h +++ b/jbmc/src/java_bytecode/java_static_initializers.h @@ -52,6 +52,10 @@ code_ifthenelset get_clinit_wrapper_body( const select_pointer_typet &pointer_type_selector, message_handlert &message_handler); +/// \return map associating classes to the symbols they declare +std::unordered_multimap +class_to_declared_symbols(const symbol_tablet &symbol_table); + /// Create the body of a user_specified_clinit function for a given class, which /// includes assignments for all static fields of the class to values read from /// an input file. If the file could not be parsed or an entry for this class @@ -72,6 +76,8 @@ code_ifthenelset get_clinit_wrapper_body( /// arrays. Any arrays that were specified to be of nondeterministic length in /// the input file will be limited by this value. /// \param references: map to keep track of reference-equal objets. +/// \param class_to_declared_symbols_map: map classes to the symbols that +/// they declare. /// \return the body of the user_specified_clinit function as a code block. code_blockt get_user_specified_clinit_body( const irep_idt &class_id, @@ -79,7 +85,9 @@ code_blockt get_user_specified_clinit_body( symbol_table_baset &symbol_table, optionalt needed_lazy_methods, size_t max_user_array_length, - std::unordered_map &references); + std::unordered_map &references, + const std::unordered_multimap + &class_to_declared_symbols_map); class stub_global_initializer_factoryt { From 30bc2b662d19f5c76f5094cd58997ee048f6bf3d Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 3 Sep 2019 09:44:28 +0100 Subject: [PATCH 9/9] Declare a lazy_class_to_declared_symbols_mapt class This is to avoid computing the map several times and make sure it is never computed if it is not actually needed. --- jbmc/src/java_bytecode/README.md | 5 +- .../java_bytecode/java_bytecode_language.cpp | 49 +++++++++++++++---- .../java_bytecode/java_bytecode_language.h | 36 ++++++++++++-- 3 files changed, 75 insertions(+), 15 deletions(-) diff --git a/jbmc/src/java_bytecode/README.md b/jbmc/src/java_bytecode/README.md index 675a57b691e..9bdbaa6e495 100644 --- a/jbmc/src/java_bytecode/README.md +++ b/jbmc/src/java_bytecode/README.md @@ -674,10 +674,11 @@ determine which loading strategy to use. If [lazy_methods_mode](\ref java_bytecode_languaget::lazy_methods_mode) is \ref java_bytecode_languaget::LAZY_METHODS_MODE_EAGER then eager loading is used. Under eager loading -\ref java_bytecode_languaget::convert_single_method(const irep_idt &, symbol_table_baset &) +\ref java_bytecode_languaget::convert_single_method(const irep_idt &, symbol_table_baset &, lazy_class_to_declared_symbols_mapt &) is called once for each method listed in method_bytecode (described above). This then calls -\ref java_bytecode_languaget::convert_single_method(const irep_idt &, symbol_table_baset &, optionalt) +\ref java_bytecode_languaget::convert_single_method(const irep_idt &, symbol_table_baset &, optionalt, lazy_class_to_declared_symbols_mapt &); + without a ci_lazy_methods_neededt object, which calls \ref java_bytecode_convert_method, passing in the method parse tree. This in turn uses \ref java_bytecode_convert_methodt to turn the bytecode into symbols diff --git a/jbmc/src/java_bytecode/java_bytecode_language.cpp b/jbmc/src/java_bytecode/java_bytecode_language.cpp index e1e807a0e83..c5f46b549e7 100644 --- a/jbmc/src/java_bytecode/java_bytecode_language.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_language.cpp @@ -115,6 +115,23 @@ static prefix_filtert get_context(const optionst &options) return prefix_filtert(std::move(context_include), std::move(context_exclude)); } +std::unordered_multimap & +lazy_class_to_declared_symbols_mapt::get(const symbol_tablet &symbol_table) +{ + if(!initialized) + { + map = class_to_declared_symbols(symbol_table); + initialized = true; + } + return map; +} + +void lazy_class_to_declared_symbols_mapt::reinitialize() +{ + initialized = false; + map.clear(); +} + /// Consume options that are java bytecode specific. void java_bytecode_languaget::set_language_options(const optionst &options) { @@ -874,6 +891,8 @@ bool java_bytecode_languaget::typecheck( threading_support, static_values_json.has_value()); + lazy_class_to_declared_symbols_mapt class_to_declared_symbols; + // Now incrementally elaborate methods // that are reachable from this entry point. switch(lazy_methods_mode) @@ -895,18 +914,21 @@ bool java_bytecode_languaget::typecheck( for(const auto &function_id_and_type : synthetic_methods) { convert_single_method( - function_id_and_type.first, journalling_symbol_table); + function_id_and_type.first, + journalling_symbol_table, + class_to_declared_symbols); } // Convert all methods for which we have bytecode now for(const auto &method_sig : method_bytecode) { - convert_single_method(method_sig.first, journalling_symbol_table); + convert_single_method( + method_sig.first, journalling_symbol_table, class_to_declared_symbols); } // Now convert all newly added string methods for(const auto &fn_name : journalling_symbol_table.get_inserted()) { if(string_preprocess.implements_function(fn_name)) - convert_single_method(fn_name, symbol_table); + convert_single_method(fn_name, symbol_table, class_to_declared_symbols); } } break; @@ -1001,12 +1023,17 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( symbol_table_buildert symbol_table_builder = symbol_table_buildert::wrap(symbol_table); + lazy_class_to_declared_symbols_mapt class_to_declared_symbols; + const method_convertert method_converter = - [this, &symbol_table_builder]( + [this, &symbol_table_builder, &class_to_declared_symbols]( const irep_idt &function_id, ci_lazy_methods_neededt lazy_methods_needed) { return convert_single_method( - function_id, symbol_table_builder, std::move(lazy_methods_needed)); + function_id, + symbol_table_builder, + std::move(lazy_methods_needed), + class_to_declared_symbols); }; ci_lazy_methodst method_gather( @@ -1069,7 +1096,8 @@ void java_bytecode_languaget::convert_lazy_method( journalling_symbol_tablet symbol_table= journalling_symbol_tablet::wrap(symtab); - convert_single_method(function_id, symbol_table); + lazy_class_to_declared_symbols_mapt class_to_declared_symbols; + convert_single_method(function_id, symbol_table, class_to_declared_symbols); // Instrument runtime exceptions (unless symbol is a stub) if(symbol.value.is_not_nil()) @@ -1137,10 +1165,13 @@ static void notify_static_method_calls( /// \param symbol_table: global symbol table /// \param needed_lazy_methods: optionally a collection of needed methods to /// update with any methods touched during the conversion +/// \param class_to_declared_symbols: maps classes to the symbols that +/// they declare. bool java_bytecode_languaget::convert_single_method( const irep_idt &function_id, symbol_table_baset &symbol_table, - optionalt needed_lazy_methods) + optionalt needed_lazy_methods, + lazy_class_to_declared_symbols_mapt &class_to_declared_symbols) { // Do not convert if method is not in context if(method_in_context && !(*method_in_context)(id2string(function_id))) @@ -1218,8 +1249,6 @@ bool java_bytecode_languaget::convert_single_method( class_name, "user_specified_clinit must be declared by a class."); INVARIANT( static_values_json.has_value(), "static-values JSON must be available"); - const auto class_to_declared_symbols_map = - class_to_declared_symbols(symbol_table); writable_symbol.value = get_user_specified_clinit_body( *class_name, *static_values_json, @@ -1227,7 +1256,7 @@ bool java_bytecode_languaget::convert_single_method( needed_lazy_methods, max_user_array_length, references, - class_to_declared_symbols_map); + class_to_declared_symbols.get(symbol_table)); break; } case synthetic_method_typet::STUB_CLASS_STATIC_INITIALIZER: diff --git a/jbmc/src/java_bytecode/java_bytecode_language.h b/jbmc/src/java_bytecode/java_bytecode_language.h index f3d47577006..dacfdd80089 100644 --- a/jbmc/src/java_bytecode/java_bytecode_language.h +++ b/jbmc/src/java_bytecode/java_bytecode_language.h @@ -121,6 +121,31 @@ enum lazy_methods_modet LAZY_METHODS_MODE_EXTERNAL_DRIVER }; +/// Map classes to the symbols they declare but is only computed once it is +/// needed and the map is then kept. +/// Note that it only includes function and field symbols (and not for example, +/// local variables), these are produced in the convert-class phase. +/// Calling `get` before the symbol table is properly filled with these symbols, +/// would make later calls return an outdated map. The +/// lazy_class_to_declared_symbols_mapt would then need to be reinitialized. +/// Similarly if some transformation creates or deletes function or field +/// symbols in the symbol table, then the map would get out of date and would +/// need to be reinitialized. +class lazy_class_to_declared_symbols_mapt +{ +public: + lazy_class_to_declared_symbols_mapt() = default; + + std::unordered_multimap & + get(const symbol_tablet &symbol_table); + + void reinitialize(); + +private: + bool initialized = false; + std::unordered_multimap map; +}; + #define JAVA_CLASS_MODEL_SUFFIX "@class_model" class java_bytecode_languaget:public languaget @@ -205,15 +230,20 @@ class java_bytecode_languaget:public languaget protected: void convert_single_method( const irep_idt &function_id, - symbol_table_baset &symbol_table) + symbol_table_baset &symbol_table, + lazy_class_to_declared_symbols_mapt &class_to_declared_symbols) { convert_single_method( - function_id, symbol_table, optionalt()); + function_id, + symbol_table, + optionalt(), + class_to_declared_symbols); } bool convert_single_method( const irep_idt &function_id, symbol_table_baset &symbol_table, - optionalt needed_lazy_methods); + optionalt needed_lazy_methods, + lazy_class_to_declared_symbols_mapt &class_to_declared_symbols); bool do_ci_lazy_method_conversion(symbol_tablet &); const select_pointer_typet &get_pointer_type_selector() const;