From 7e5a0dd3399d280b55143bfe272eafe92019cf53 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Thu, 2 Jan 2025 18:53:12 +0100 Subject: [PATCH 01/89] Parse relocatable file (hacky handling of relocations) --- notes-TODO | 3 +++ src/analyse/ControlFlow.ml | 4 +-- src/analyse/Elf.ml | 2 +- src/bin/copySources.ml | 4 +-- src/dw/dw.ml | 2 +- src/elf/file.ml | 47 ++++++++++++++--------------------- src/elf/linksemRelocatable.ml | 39 +++++++++++++++++++++++++++++ src/elf/symTable.ml | 12 ++++----- src/elf/symTable.mli | 4 +-- src/elf/symbol.ml | 38 +++++++++++++++++----------- src/elf/symbol.mli | 13 +++++++--- 11 files changed, 109 insertions(+), 59 deletions(-) create mode 100644 notes-TODO create mode 100644 src/elf/linksemRelocatable.ml diff --git a/notes-TODO b/notes-TODO new file mode 100644 index 00000000..c8e943ae --- /dev/null +++ b/notes-TODO @@ -0,0 +1,3 @@ +Symbolic symbol table +- value of symbol?? (we don't have segments in relocatable files) +- can probably keep the same api, but addresses are symbolic \ No newline at end of file diff --git a/src/analyse/ControlFlow.ml b/src/analyse/ControlFlow.ml index 166a80c3..9d9628c0 100644 --- a/src/analyse/ControlFlow.ml +++ b/src/analyse/ControlFlow.ml @@ -132,14 +132,14 @@ let branch_table_target_addresses test filename_branch_table_option : (addr * ad (* pull out .rodata section from ELF *) let ((_, rodata_addr, bs) as _rodata : Dwarf.p_context * Nat_big_num.num * BytesSeq.t) = - Dwarf.extract_section_body test.elf_file ".rodata" false + Dwarf.extract_section_body_without_relocations test.elf_file ".rodata" false in (* chop into bytes *) let rodata_bytes : char array = BytesSeq.to_array bs in (* chop into 4-byte words - as needed for branch offset tables, though not for all other things in .rodata *) - let rodata_words : (natural * natural) list = Dwarf.words_of_byte_sequence rodata_addr bs [] in + let rodata_words : (natural * natural) list = Dwarf.words_of_rel_byte_sequence rodata_addr (Dwarf.rbs_no_reloc bs) [] in (*HACK*) let read_rodata_b addr = Elf_types_native_uint.natural_of_byte diff --git a/src/analyse/Elf.ml b/src/analyse/Elf.ml index 0be22494..4b4bfc9b 100644 --- a/src/analyse/Elf.ml +++ b/src/analyse/Elf.ml @@ -143,7 +143,7 @@ let parse_elf_file (filename : string) : test = segments in let ds = - match Dwarf.extract_dwarf_static (Elf_file.ELF_File_64 f1) with + match Dwarf.extract_dwarf_static (Elf_file.ELF_File_64 f1) Abi_aarch64_symbolic_relocation.aarch64_data_relocation_interpreter with | None -> fatal "%s" "extract_dwarf_static failed" | Some ds -> (* Debug.print_string2 (Dwarf.pp_analysed_location_data ds.Dwarf.ds_dwarf diff --git a/src/bin/copySources.ml b/src/bin/copySources.ml index d664698c..094d3257 100644 --- a/src/bin/copySources.ml +++ b/src/bin/copySources.ml @@ -84,8 +84,8 @@ let process_file () : unit = else Some (Byte_sequence.string_of_byte_sequence - (List.nth lnh.lnh_include_directories (dir - 1)))), - Byte_sequence.string_of_byte_sequence lnfe.lnfe_path )) + (rbs_unwrap (List.nth lnh.lnh_include_directories (dir - 1))))), + Byte_sequence.string_of_byte_sequence (rbs_unwrap lnfe.lnfe_path) )) lnh.lnh_file_entries in diff --git a/src/dw/dw.ml b/src/dw/dw.ml index 270c3576..af505b53 100644 --- a/src/dw/dw.ml +++ b/src/dw/dw.ml @@ -85,7 +85,7 @@ let of_elf (elf : Elf.File.t) = Arch.load_elf_arch elf; info "Extracting dwarf of %s" elf.filename; let ldwarf = - match Dwarf.extract_dwarf elf.linksem with + match Dwarf.extract_dwarf elf.linksem Abi_aarch64_symbolic_relocation.aarch64_data_relocation_interpreter with | Some d -> d | None -> dwarferror "Linksem extract_dwarf failed" in diff --git a/src/elf/file.ml b/src/elf/file.ml index 4b49c026..e3d8925d 100644 --- a/src/elf/file.ml +++ b/src/elf/file.ml @@ -105,49 +105,40 @@ let elferror fmt = Printf.ksprintf (fun s -> raise (ElfError s)) fmt let of_file (filename : string) = info "Loading ELF file %s" filename; (* parse the ELF file using linksem *) - let ( (elf_file : Elf_file.elf_file), + let bs = match Byte_sequence.acquire filename with + | Error.Fail s -> elferror "Linksem: Byte_sequence.acquire: %s" s + | Error.Success x -> x + in + let elf64_file = match Elf_file.read_elf64_file bs with + | Error.Fail s -> elferror "Linksem: read_elf64_file: %s" s + | Error.Success x -> x + in + let symbol_map = match LinksemRelocatable.get_elf64_file_global_symbol_init elf64_file with + | Error.Fail s -> elferror "LinksemRelocatable: get_elf64_file_global_symbol_init: %s" s + | Error.Success x -> x + in + (* let ( (elf_file : Elf_file.elf_file), (elf_epi : Sail_interface.executable_process_image), (symbol_map : Elf_file.global_symbol_init_info) ) = match Sail_interface.populate_and_obtain_global_symbol_init_info filename with | Error.Fail s -> elferror "Linksem: populate_and_obtain_global_symbol_init_info: %s" s | Error.Success x -> x - in + in *) (* Check this is a 64 bits ELF file *) - begin - match elf_file with - | Elf_file.ELF_File_32 _ -> elferror "32 bits elf files unsupported" - | _ -> () - end; - let (segments, entry, machine) = - match elf_epi with - | ELF_Class_32 _ -> elferror "32 bits elf file class unsupported" - | ELF_Class_64 (s, e, m) -> (s, e, m) - in - - (* Extract all the segments *) - let segments = - List.filter_map - (fun (seg, prov) -> if prov = Elf_file.FromELF then Some seg else None) - segments - in - let entry = Z.to_int entry in - let machine = machine_of_linksem machine in - debug "Loading ELF segments of %s" filename; - let segments = List.map Segment.of_linksem segments in - debug "Loaded ELF segments %t" - @@ Pp.top (Pp.list Pp.hex) - @@ List.map (fun x -> x.Segment.addr) segments; + let entry = Z.to_int elf64_file.elf64_file_header.elf64_entry in + let machine = machine_of_linksem elf64_file.elf64_file_header.elf64_machine in debug "Loading ELF symbols of %s" filename; - let symbols = SymTbl.of_linksem segments symbol_map in + let symbols = SymTbl.of_linksem symbol_map in debug "Adding .rodata section of %s" filename; (* We add the .rodata section seperately from the symbols because - it can contain non-symbol information such as string literals and constants used in branch-register target calculations - the range of the section is guaranteed to overlap with any symbols within it, and so not suitable to be stored in the [RngMap] *) + let elf_file = Elf_file.ELF_File_64 elf64_file in let rodata = let (_, addr, data) = - Dwarf.extract_section_body elf_file ".rodata" false + Dwarf.extract_section_body_without_relocations elf_file ".rodata" false (* `false' argument is for returning an empty byte-sequence if section is not found, instead of throwing an exception *) in diff --git a/src/elf/linksemRelocatable.ml b/src/elf/linksemRelocatable.ml new file mode 100644 index 00000000..82b4e035 --- /dev/null +++ b/src/elf/linksemRelocatable.ml @@ -0,0 +1,39 @@ +(* TODO header *) + +type sym_addr = string * Z.t + +(* Like in linksem, but address is section+offset, and with a writable flag *) +type symbol = string * (Z.t * Z.t * sym_addr * Byte_sequence_wrapper.byte_sequence * Z.t) * bool + +type global_symbol_init_info = symbol list + +open Elf_symbol_table +open Elf_interpreted_section + +let get_elf64_file_global_symbol_init f : global_symbol_init_info Error.error = + let secs = f.Elf_file.elf64_file_interpreted_sections in + Error.bind (Elf_file.get_elf64_file_symbol_table f) (fun (symtab, strtab) -> + List.filter_map ( + fun entry -> + let name = Uint32_wrapper.to_bigint entry.elf64_st_name in + let addr_offset = Uint64_wrapper.to_bigint entry.elf64_st_value in + let size = Uint64_wrapper.to_bigint entry.elf64_st_size in + let shndx = Uint32_wrapper.to_int entry.elf64_st_shndx in + let typ = Elf_symbol_table.extract_symbol_type entry.elf64_st_info in + let bnd = Elf_symbol_table.extract_symbol_binding entry.elf64_st_info in + Option.map ( + fun section -> + let addr = (section.elf64_section_name_as_string, addr_offset) in + let data = if Byte_sequence.length0 section.elf64_section_body = Z.zero then + Error.return (Byte_sequence.zeros size) + else + Byte_sequence.offset_and_cut addr_offset size section.elf64_section_body + in + Error.bind data (fun data -> + Error.bind (String_table.get_string_at name strtab) (fun str -> + let write = Elf_file.flag_is_set Elf_section_header_table.shf_write section.elf64_section_flags in + Error.return (str, (typ, size, addr, data, bnd), write) + )) + ) (List.nth_opt secs shndx) + ) symtab |> Error.mapM Fun.id + ) \ No newline at end of file diff --git a/src/elf/symTable.ml b/src/elf/symTable.ml index d1b2ecd7..8ef56b4d 100644 --- a/src/elf/symTable.ml +++ b/src/elf/symTable.ml @@ -59,7 +59,7 @@ type sym_offset = sym * int module RMap = RngMap.Make (Symbol) module SMap = Map.Make (String) -type linksem_t = Elf_file.global_symbol_init_info +type linksem_t = LinksemRelocatable.global_symbol_init_info type t = { by_name : sym SMap.t; by_addr : RMap.t } @@ -111,11 +111,11 @@ let of_position_string t s : sym_offset = if s = "" then raise Not_found; if s.[0] = '0' then of_addr_with_offset t (int_of_string s) else sym_offset_of_string t s -let of_linksem segments linksem_map = - let add_linksem_sym_to_map (map : t) (lsym : linksem_sym) = - if is_interesting_linksem lsym then add map (Symbol.of_linksem segments lsym) else map - in - List.fold_left add_linksem_sym_to_map empty linksem_map + let of_linksem linksem_map = + let add_linksem_sym_to_map (map : t) (lsym : linksem_sym) = + if is_interesting_linksem lsym then add map (Symbol.of_linksem lsym) else map + in + List.fold_left add_linksem_sym_to_map empty linksem_map let pp_raw st = RMap.bindings st.by_addr |> List.map (Pair.map Pp.ptr pp_raw) |> Pp.mapping "syms" diff --git a/src/elf/symTable.mli b/src/elf/symTable.mli index bcd57ca5..8cb35435 100644 --- a/src/elf/symTable.mli +++ b/src/elf/symTable.mli @@ -57,7 +57,7 @@ type linksem_sym = Symbol.linksem_t (** The type of a symbol with offset *) type sym_offset = sym * int -type linksem_t = Elf_file.global_symbol_init_info +type linksem_t = LinksemRelocatable.global_symbol_init_info (** The type of a symbol table. *) type t @@ -110,7 +110,7 @@ val of_position_string : t -> string -> sym_offset (** Extract the symbol from the linksem symbol representation. Need the segments for filling the missing symbol data *) -val of_linksem : Segment.t list -> linksem_t -> t +val of_linksem : linksem_t -> t (** Pretty print the table as a raw ocaml value *) val pp_raw : t -> Pp.document diff --git a/src/elf/symbol.ml b/src/elf/symbol.ml index 439dc0ec..d6e0baa3 100644 --- a/src/elf/symbol.ml +++ b/src/elf/symbol.ml @@ -42,27 +42,38 @@ (* *) (*==================================================================================*) +open Logs.Logger (struct + let str = __MODULE__ +end) + (* The documentation is in the mli file *) type typ = NOTYPE | OBJECT | FUNC | SECTION | FILE | UNKNOWN type linksem_typ = Z.t +(* TODO: move somewhere to reuse *) +type addr = { + section : string; + offset: int; +} + type t = { name : string; other_names : string list; typ : typ; + (* addr : addr; *) addr : int; size : int; writable : bool; data : BytesSeq.t; } -type linksem_t = string * (Z.t * Z.t * Z.t * BytesSeq.t option * Z.t) +type linksem_t = LinksemRelocatable.symbol let push_name s t = { t with other_names = s :: t.other_names } -let is_in t addr = t.addr <= addr && addr < t.addr + t.size +(* let is_in t addr = t.addr <= addr && addr < t.addr + t.size *) let len t = t.size @@ -75,7 +86,7 @@ let typ_of_linksem ltyp = | 4 -> FILE | _ -> UNKNOWN -let linksem_typ (_name, (typ, _size, _addr, _data, _)) = typ +let linksem_typ (_name, (typ, _size, _addr, _data, _), _) = typ (** [LoadingError(name,addr)] means that symbol [name] at [addr] could not be loaded *) exception LoadingError of string * int @@ -86,19 +97,17 @@ let _ = Some (Printf.sprintf "Symbol %s at 0x%x could not be loaded" name addr) | _ -> None) -let of_linksem segs (name, (typ, size, addr, data, _)) = +(* for debugging TODO remove *) +module SMap = Map.Make (String) +let locs = SMap.empty |> SMap.add ".text" 0 |> SMap.add ".data" 1000000 |> SMap.add ".eh_frame" 2000000 + +let of_linksem (name, (typ, size, addr, data, _), writable) = let typ = typ_of_linksem typ in let size = Z.to_int size in - let addr = Z.to_int addr in - let segment = - Option.value_fail (Segment.get_containing segs addr) "No segment contains symbol %s" name - in - let writable = segment.write in - let data = - data - |> Option.value_fun ~default:(fun () -> - Segment.get_addr (BytesSeq.getbs ~len:size) segment addr) - in + let section, offset = addr in + (* let addr = { section; offset = Z.to_int offset } in *) + let addr = SMap.find section locs + Z.to_int offset in + debug "Symbol %s at address %s+%d (using %d)" name section (Z.to_int offset) addr; { name; other_names = []; typ; size; addr; data; writable } let is_interesting = function OBJECT | FUNC -> true | _ -> false @@ -128,6 +137,7 @@ let pp_raw sym = ("name", !^(sym.name)); ("other names", separate nbspace (List.map string sym.other_names)); ("typ", pp_typ sym.typ); + (* ("addr", !^(sym.addr.section) ^^ !^"+" ^^ ptr sym.addr.offset); *) ("addr", ptr sym.addr); ("size", ptr sym.size); ("writable", bool sym.writable); diff --git a/src/elf/symbol.mli b/src/elf/symbol.mli index e557439f..51213545 100644 --- a/src/elf/symbol.mli +++ b/src/elf/symbol.mli @@ -55,12 +55,19 @@ type typ = NOTYPE | OBJECT | FUNC | SECTION | FILE | UNKNOWN type linksem_typ = Z.t +(* TODO: move somewhere to reuse *) +type addr = { + section : string; + offset: int; +} + (** The ELF symbol. This type guarantee the data exists contrary to linksem symbols (it may be all zeros though) *) type t = { name : string; other_names : string list; typ : typ; + (* addr : addr; *) addr : int; size : int; writable : bool; @@ -68,13 +75,13 @@ type t = { } (** The type of an ELF symbol in linksem. See {!of_linksem}*) -type linksem_t = string * (Z.t * Z.t * Z.t * BytesSeq.t option * Z.t) +type linksem_t = LinksemRelocatable.symbol (** Add a name to the other names list *) val push_name : string -> t -> t (** Check if an address is in a symbol *) -val is_in : t -> int -> bool +(* val is_in : t -> int -> bool *) (** For conformance with the {!Utils.RngMap.LenObject} module type *) val len : t -> int @@ -93,7 +100,7 @@ exception LoadingError of string * int May raise {!LoadingError} when the symbol has no data and the data cannot be found in the segments *) -val of_linksem : Segment.t list -> linksem_t -> t +val of_linksem : linksem_t -> t (** Tell if a symbol type is interesting for readDwarf purposes *) val is_interesting : typ -> bool From ecb6af3cd66655757460144e55ca559f2435a482 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Fri, 3 Jan 2025 18:20:24 +0100 Subject: [PATCH 02/89] Update config --- src/config/isla_aarch64.toml | 150 ++++++++--------------------------- 1 file changed, 35 insertions(+), 115 deletions(-) diff --git a/src/config/isla_aarch64.toml b/src/config/isla_aarch64.toml index cf209041..27d6e500 100644 --- a/src/config/isla_aarch64.toml +++ b/src/config/isla_aarch64.toml @@ -1,61 +1,24 @@ -#==================================================================================# -# BSD 2-Clause License # -# # -# Copyright (c) 2020-2021 Thibaut Pérami # -# Copyright (c) 2020-2021 Dhruv Makwana # -# Copyright (c) 2019-2021 Peter Sewell # -# All rights reserved. # -# # -# This software was developed by the University of Cambridge Computer # -# Laboratory as part of the Rigorous Engineering of Mainstream Systems # -# (REMS) project. # -# # -# This project has been partly funded by EPSRC grant EP/K008528/1. # -# This project has received funding from the European Research Council # -# (ERC) under the European Union's Horizon 2020 research and innovation # -# programme (grant agreement No 789108, ERC Advanced Grant ELVER). # -# This project has been partly funded by an EPSRC Doctoral Training studentship. # -# This project has been partly funded by Google. # -# # -# Redistribution and use in source and binary forms, with or without # -# modification, are permitted provided that the following conditions # -# are met: # -# 1. Redistributions of source code must retain the above copyright # -# notice, this list of conditions and the following disclaimer. # -# 2. Redistributions in binary form must reproduce the above copyright # -# notice, this list of conditions and the following disclaimer in # -# the documentation and/or other materials provided with the # -# distribution. # -# # -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' # -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED # -# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # -# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR # -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF # -# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT # -# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # -# SUCH DAMAGE. # -# # -#==================================================================================# - # This is a config file for the Sail generated from ARM-ASL -# It is copied from the Isla repo and should be synced periodically. -# It probably contains a lot of stuff that's not needed for read-dwarf. pc = "_PC" ifetch = "Read_ifetch" -read_exclusives = ["Read_exclusive", "Read_exclusive_acquire"] -write_exclusives = ["Write_exclusive", "Write_exclusive_release"] +in_program_order = ["sail_barrier"] # The assembler is used for assembling the code in litmus tests. We # assume it takes arguments like GNU as. -assembler = "aarch64-linux-gnu-as -march=armv8.3-a" # read-dwarf +[[toolchain]] +name = "macos-aarch64" +os = "macos" +arch = "aarch64" +assembler = "as --target=aarch64-unknown-linux-gnu" +objdump = "/opt/homebrew/opt/llvm/bin/llvm-objdump" +linker = "/opt/homebrew/opt/llvm/bin/ld.lld" + +[[toolchain]] +name = "default" +assembler = "aarch64-linux-gnu-as -march=armv8.1-a" objdump = "aarch64-linux-gnu-objdump" linker = "aarch64-linux-gnu-ld" @@ -84,6 +47,8 @@ stride = "0x10" [registers] ignore = [ + "_PC", + "__PC_changed", "SEE", "__unconditional", "__trickbox_enabled", @@ -91,25 +56,15 @@ ignore = [ "__v82_implemented", "__v83_implemented", "__v84_implemented", - "__v85_implemented", - "_GTEExtObsAccess", - "_GTEExtObsActive", - "_GTEExtObsAddress", - "_GTEExtObsCount", - "_GTEExtObsData", - "_GTEExtObsIndex", - "_GTEExtObsResult", - "_GTEExtObsResultIndex", - "_GTEExtObsResultIsAddress", - "_GTE_PPU_Access", - "_GTE_PPU_Address", - "_GTE_PPU_SizeEn" + "__v85_implemented" ] # These registers are set before any symbolic execution occurs [registers.defaults] -"__isla_monomorphize_reads" = false -"__isla_monomorphize_writes" = false +"__isla_vector_gpr" = false +"__isla_continue_on_see" = true +"__monomorphize_reads" = false +"__monomorphize_writes" = false "VBAR_EL1" = "0x0000000000000000" "VBAR_EL2" = "0x0000000000000000" # Causes CNTCV to be incremented every cycle if bit 0 is 1 @@ -117,6 +72,7 @@ ignore = [ # SSAdvance? "MDSCR_EL1" = "0x00000000" "InGuardedPage" = false +"__highest_el_aarch32" = false "__currentInstrLength" = 4 "_PendingPhysicalSE" = false "__CNTControlBase" = "0x0000000000000" @@ -129,19 +85,18 @@ ignore = [ "CFG_RMR_AA64" = "0b1" "CFG_RVBAR" = "0x0000000010300000" "CFG_ID_AA64PFR0_EL1_MPAM" = "0x1" -"CFG_ID_AA64PFR0_EL1_EL3" = "0x2" -"CFG_ID_AA64PFR0_EL1_EL2" = "0x2" -"CFG_ID_AA64PFR0_EL1_EL1" = "0x2" -"CFG_ID_AA64PFR0_EL1_EL0" = "0x2" +"CFG_ID_AA64PFR0_EL1_EL3" = "0x1" +"CFG_ID_AA64PFR0_EL1_EL2" = "0x1" +"CFG_ID_AA64PFR0_EL1_EL1" = "0x1" +"CFG_ID_AA64PFR0_EL1_EL0" = "0x1" # Need to investigate BTI extension. Guard pages cause problems with # memory accesses. "__v81_implemented" = true -"__v82_implemented" = true # read-dwarf -"__v83_implemented" = true # read-dwarf +"__v82_implemented" = false +"__v83_implemented" = false "__v84_implemented" = false "__v85_implemented" = false "__unpred_tsize_aborts" = true -"exclusive_never_fails" = true # read-dwarf # Trickbox has various features for debugging spec and running tests "__trickbox_enabled" = false "__tlb_enabled" = false @@ -173,12 +128,19 @@ ignore = [ "__crypto_sm3_implemented" = false "__crypto_sha512_implemented" = false "__crypto_sha3_implemented" = false +"_GTEExtObsAccess" = "[0x0000; 256]" +"_GTEExtObsAddress" = "[0x0000000000000000; 256]" +"_GTEExtObsData" = "[0x0000000000000000; 256]" +"_GTEExtObsResult" = "[0x0000000000000000; 256]" +"_GTE_PPU_SizeEn" = "[0x00000000; 6]" +"_GTE_PPU_Address" = "[0x0000000000000000; 6]" +"_GTE_PPU_Access" = "[0x00000000; 6]" # These registers are set during symbolic execution by the special builtin "reset_registers" [registers.reset] -# Bit 1 being set causes us to abort on unaligned accesses +# Bit 1 being unset allows unaligned accesses # Bit 26 being set allows cache-maintenance ops in EL0 -"SCTLR_EL1" = "0x0000000004000002" +"SCTLR_EL1" = "0x0000000004000000" # A map from register names that may appear in litmus files to Sail # register names @@ -245,45 +207,3 @@ ignore = [ "W28" = "R28" "W29" = "R29" "W30" = "R30" - -[reads] -Read_acquire = "A" -Read_exclusive_acquire = "A" - -[writes] -Write_release = "L" -Write_exclusive_release = "L" - -[cache_ops] -Cache_op_D_CVAU = "DC" -Cache_op_I_IVAU = "IC" -Cache_op_I_IALLU = "IC" - -# A mapping from Sail barrier_kinds for the spec to the names in cat -# memory models. -[barriers] -Barrier_DMB_SY = "DMB.SY" -Barrier_DMB_ST = "DMB.ST" -Barrier_DMB_LD = "DMB.LD" -Barrier_DMB_ISH = "DMB.ISH" -Barrier_DMB_ISHST = "DMB.ISHST" -Barrier_DMB_ISHLD = "DMB.ISHLD" -Barrier_DMB_NSH = "DMB.NSH" -Barrier_DMB_NSHST = "DMB.NSHST" -Barrier_DMB_NSHLD = "DMB.NSHLD" -Barrier_DMB_OSH = "DMB.OSH" -Barrier_DMB_OSHST = "DMB.OSHST" -Barrier_DMB_OSHLD = "DMB.OSHLD" -Barrier_DSB_SY = "DSB.SY" -Barrier_DSB_ST = "DSB.ST" -Barrier_DSB_LD = "DSB.LD" -Barrier_DSB_ISH = "DSB.ISH" -Barrier_DSB_ISHST = "DSB.ISHST" -Barrier_DSB_ISHLD = "DSB.ISHLD" -Barrier_DSB_NSH = "DSB.NSH" -Barrier_DSB_NSHST = "DSB.NSHST" -Barrier_DSB_NSHLD = "DSB.NSHLD" -Barrier_DSB_OSH = "DSB.OSH" -Barrier_DSB_OSHST = "DSB.OSHST" -Barrier_DSB_OSHLD = "DSB.OSHLD" -Barrier_ISB = "ISB" From 7514a8a69385deba347330a860e338d4ce078b1a Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sat, 4 Jan 2025 19:31:52 +0100 Subject: [PATCH 03/89] Symbolic symbol table --- src/elf/symTable.ml | 71 ++++++++++++++++++++++++++++++++++---------- src/elf/symTable.mli | 10 +++---- src/elf/symbol.ml | 19 ++++++------ src/elf/symbol.mli | 7 +++-- 4 files changed, 75 insertions(+), 32 deletions(-) diff --git a/src/elf/symTable.ml b/src/elf/symTable.ml index 8ef56b4d..dee03749 100644 --- a/src/elf/symTable.ml +++ b/src/elf/symTable.ml @@ -59,19 +59,57 @@ type sym_offset = sym * int module RMap = RngMap.Make (Symbol) module SMap = Map.Make (String) +module AddrMap = struct + type t = RMap.t SMap.t + + let add t addr sym = + SMap.update addr.section (fun old -> + let old = match old with + | None -> RMap.empty + | Some x -> x + in + Some (RMap.add old addr.offset sym) + ) t + + let update f t addr = + SMap.update addr.section (Option.map (fun x -> RMap.update f x addr.offset)) t + + let empty = SMap.empty + + let at t addr = + SMap.find addr.section t |> Fun.flip RMap.at addr.offset + + let at_opt t addr = + Option.bind (SMap.find_opt addr.section t) @@ Fun.flip RMap.at_opt addr.offset + + let at_off t addr = + SMap.find addr.section t |> Fun.flip RMap.at_off addr.offset + + let at_off_opt t addr = + Option.bind (SMap.find_opt addr.section t) @@ Fun.flip RMap.at_off_opt addr.offset + + let bindings t = + let sections = SMap.bindings t in + List.bind sections @@ fun (section, rmap) -> + let inner_bindings = RMap.bindings rmap in + List.map (fun (offset, sym) -> ({section; offset}, sym)) inner_bindings + + +end + type linksem_t = LinksemRelocatable.global_symbol_init_info -type t = { by_name : sym SMap.t; by_addr : RMap.t } +type t = { by_name : sym SMap.t; by_addr : AddrMap.t } -let empty = { by_name = SMap.empty; by_addr = RMap.empty } +let empty = { by_name = SMap.empty; by_addr = AddrMap.empty } let add t sym = let by_name = SMap.add sym.name sym t.by_name in - try { by_name; by_addr = RMap.add t.by_addr sym.addr sym } + try { by_name; by_addr = AddrMap.add t.by_addr sym.addr sym } with Invalid_argument _ -> let updated = ref false in let by_addr = - RMap.update + AddrMap.update (fun usym -> if usym.addr = sym.addr && usym.size = sym.size then begin updated := true; @@ -88,15 +126,15 @@ let of_name t name = let of_name_opt t name = SMap.find_opt name t.by_name -let of_addr t addr = RMap.at t.by_addr addr +let of_addr t addr = AddrMap.at t.by_addr addr -let of_addr_opt t addr = RMap.at_opt t.by_addr addr +let of_addr_opt t addr = AddrMap.at_opt t.by_addr addr -let of_addr_with_offset t addr = RMap.at_off t.by_addr addr +let of_addr_with_offset t addr = AddrMap.at_off t.by_addr addr -let of_addr_with_offset_opt t addr = RMap.at_off_opt t.by_addr addr +let of_addr_with_offset_opt t addr = AddrMap.at_off_opt t.by_addr addr -let to_addr_offset (sym, offset) = sym.addr + offset +let to_addr_offset (sym, offset) = { section = sym.addr.section; offset = sym.addr.offset + offset } let string_of_sym_offset ((sym, off) : sym_offset) = sym.name ^ "+" ^ string_of_int off @@ -109,15 +147,16 @@ let sym_offset_of_string t s : sym_offset = let of_position_string t s : sym_offset = let s = String.trim s in if s = "" then raise Not_found; - if s.[0] = '0' then of_addr_with_offset t (int_of_string s) else sym_offset_of_string t s + if s.[0] = '0' then raise Not_found (* no absolute addresses *) + else sym_offset_of_string t s - let of_linksem linksem_map = - let add_linksem_sym_to_map (map : t) (lsym : linksem_sym) = - if is_interesting_linksem lsym then add map (Symbol.of_linksem lsym) else map - in - List.fold_left add_linksem_sym_to_map empty linksem_map +let of_linksem linksem_map = + let add_linksem_sym_to_map (map : t) (lsym : linksem_sym) = + if is_interesting_linksem lsym then add map (Symbol.of_linksem lsym) else map + in + List.fold_left add_linksem_sym_to_map empty linksem_map -let pp_raw st = RMap.bindings st.by_addr |> List.map (Pair.map Pp.ptr pp_raw) |> Pp.mapping "syms" +let pp_raw st = AddrMap.bindings st.by_addr |> List.map (Pair.map pp_addr pp_raw) |> Pp.mapping "syms" let iter t f = SMap.iter (fun _ value -> f value) t.by_name diff --git a/src/elf/symTable.mli b/src/elf/symTable.mli index 8cb35435..0b919a8a 100644 --- a/src/elf/symTable.mli +++ b/src/elf/symTable.mli @@ -78,19 +78,19 @@ val of_name_opt : t -> string -> sym option (** Get the symbol owning that address. Not_found is raised if no symbol own that address.data See {!of_addr_opt} *) -val of_addr : t -> int -> sym +val of_addr : t -> Symbol.addr -> sym (** Get the symbol owning that address. None if no symbol own that address. See {!of_addr} *) -val of_addr_opt : t -> int -> sym option +val of_addr_opt : t -> Symbol.addr -> sym option (** Get a symbol with the offset that correspond to that address *) -val of_addr_with_offset : t -> int -> sym_offset +val of_addr_with_offset : t -> Symbol.addr -> sym_offset (** Get a symbol with the offset that correspond to that address *) -val of_addr_with_offset_opt : t -> int -> sym_offset option +val of_addr_with_offset_opt : t -> Symbol.addr -> sym_offset option (** Get back the raw address from a symbol+offset value *) -val to_addr_offset : sym_offset -> int +val to_addr_offset : sym_offset -> Symbol.addr (** Transform a symbol + offset into the corresponding string *) val string_of_sym_offset : sym_offset -> string diff --git a/src/elf/symbol.ml b/src/elf/symbol.ml index d6e0baa3..bcbd00ae 100644 --- a/src/elf/symbol.ml +++ b/src/elf/symbol.ml @@ -62,8 +62,8 @@ type t = { name : string; other_names : string list; typ : typ; - (* addr : addr; *) - addr : int; + addr : addr; + (* addr : int; *) size : int; writable : bool; data : BytesSeq.t; @@ -98,16 +98,15 @@ let _ = | _ -> None) (* for debugging TODO remove *) -module SMap = Map.Make (String) -let locs = SMap.empty |> SMap.add ".text" 0 |> SMap.add ".data" 1000000 |> SMap.add ".eh_frame" 2000000 +(* module SMap = Map.Make (String) +let locs = SMap.empty |> SMap.add ".text" 0 |> SMap.add ".data" 1000000 |> SMap.add ".eh_frame" 2000000 *) let of_linksem (name, (typ, size, addr, data, _), writable) = let typ = typ_of_linksem typ in let size = Z.to_int size in let section, offset = addr in - (* let addr = { section; offset = Z.to_int offset } in *) - let addr = SMap.find section locs + Z.to_int offset in - debug "Symbol %s at address %s+%d (using %d)" name section (Z.to_int offset) addr; + let addr = { section; offset = Z.to_int offset } in + (* let addr = SMap.find section locs + Z.to_int offset in *) { name; other_names = []; typ; size; addr; data; writable } let is_interesting = function OBJECT | FUNC -> true | _ -> false @@ -129,6 +128,8 @@ let pp_typ typ = | FILE -> "FILE" | UNKNOWN -> "UNKNOWN" +let pp_addr addr = Pp.(!^(addr.section) ^^ !^"+" ^^ ptr addr.offset) + let pp_raw sym = Pp.( !^"sym" @@ -137,8 +138,8 @@ let pp_raw sym = ("name", !^(sym.name)); ("other names", separate nbspace (List.map string sym.other_names)); ("typ", pp_typ sym.typ); - (* ("addr", !^(sym.addr.section) ^^ !^"+" ^^ ptr sym.addr.offset); *) - ("addr", ptr sym.addr); + ("addr", pp_addr sym.addr); + (* ("addr", ptr sym.addr); *) ("size", ptr sym.size); ("writable", bool sym.writable); ("data", BytesSeq.ppby ~by:4 sym.data); diff --git a/src/elf/symbol.mli b/src/elf/symbol.mli index 51213545..5b5e9e6f 100644 --- a/src/elf/symbol.mli +++ b/src/elf/symbol.mli @@ -67,8 +67,8 @@ type t = { name : string; other_names : string list; typ : typ; - (* addr : addr; *) - addr : int; + addr : addr; + (* addr : int; *) size : int; writable : bool; data : BytesSeq.t; @@ -117,5 +117,8 @@ val compare : t -> t -> int (** Pretty prints a symbol type *) val pp_typ : typ -> Pp.document +(** Pretty prints symbolic address *) +val pp_addr : addr -> Pp.document + (** Raw pretty printing of a symbol *) val pp_raw : t -> Pp.document From 58091d76021cef4260b7823208af12a57841c6bf Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sat, 4 Jan 2025 20:14:36 +0100 Subject: [PATCH 04/89] Convert to symbolic addresses to make things compile TODO complete the logic --- src/dw/func.ml | 3 ++- src/dw/loc.ml | 3 ++- src/state/base.ml | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/dw/func.ml b/src/dw/func.ml index 32f64174..e8738d24 100644 --- a/src/dw/func.ml +++ b/src/dw/func.ml @@ -140,7 +140,8 @@ let of_linksem (elf : Elf.File.t) (tenv : Ctype.env) (lfun : linksem_t) = | None -> ( match lfun.ss_entry_address with | Some a -> ( - match Elf.SymTable.of_addr_opt elf.symbols (Nat_big_num.to_int a) with + let addr = Elf.Symbol.{section = ".text"; offset = Nat_big_num.to_int a} in (* TODO this is wrong, need symbolic DWARF *) + match Elf.SymTable.of_addr_opt elf.symbols addr with | Some sym -> Some sym | None -> None ) diff --git a/src/dw/loc.ml b/src/dw/loc.ml index 7b194e3e..bcd4eda0 100644 --- a/src/dw/loc.ml +++ b/src/dw/loc.ml @@ -120,7 +120,8 @@ let of_linksem ?(amap = Arch.dwarf_reg_map ()) (elf : Elf.File.t) : linksem_t -> (* Global *) | [{ op_semantics = OpSem_lit; op_code = code; op_argument_values = [arg]; _ }] as ops when Z.to_int code = vDW_OP_addr -> ( - try Global (Elf.SymTable.of_addr_with_offset elf.symbols @@ int_of_oav arg) + let addr = Elf.Symbol.{ section = ".data"; offset = int_of_oav arg } in (* TODO this is wrong, need symbolic DWARF*) + try Global (Elf.SymTable.of_addr_with_offset elf.symbols @@ addr) with Not_found -> warn "Symbol at 0x%x not found in Loc.of_linksem" (int_of_oav arg); Dwarf ops diff --git a/src/state/base.ml b/src/state/base.ml index 52cad8a8..33d4de14 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -429,9 +429,10 @@ let read_from_rodata (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t option if not @@ ConcreteEval.is_concrete addr then None else let int_addr = ConcreteEval.eval addr |> Value.expect_bv |> BitVec.to_int in + let sym_addr = Elf.Symbol.{ section = ".rodata"; offset = int_addr } in (* TODO this is wrong *) let size = size |> Ast.Size.to_bits in try - let (sym, offset) = Elf.SymTable.of_addr_with_offset elf.symbols int_addr in + let (sym, offset) = Elf.SymTable.of_addr_with_offset elf.symbols sym_addr in if sym.writable then None else (* Assume little endian here *) From 03ce63b6e1f1a5764d59712afe884c637b4d78fd Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sat, 4 Jan 2025 20:16:54 +0100 Subject: [PATCH 05/89] DIsable typing --- src/trace/run.ml | 14 ++++++++------ src/trace/typer.ml | 4 ++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/trace/run.ml b/src/trace/run.ml index bd8eb38d..986d3b53 100644 --- a/src/trace/run.ml +++ b/src/trace/run.ml @@ -70,10 +70,11 @@ let expand ~(ctxt : ctxt) (exp : Base.exp) : State.exp = otherwise the type will be [None] *) let expand_tval ~(ctxt : ctxt) (exp : Base.exp) : State.tval = let sexp = expand ~ctxt exp in - if Ctxt.typing_enabled ~ctxt then + (* if Ctxt.typing_enabled ~ctxt then let ctyp = Typer.expr ~ctxt exp in { ctyp; exp = sexp } - else { ctyp = None; exp = sexp } + else *) + { ctyp = None; exp = sexp } (** Run the event. The modified state is the one inside [ctxt]. *) @@ -85,22 +86,23 @@ let event_mut ~(ctxt : ctxt) (event : Base.event) = | ReadMem { addr; value; size } -> let naddr = expand ~ctxt addr in let tval = - match ctxt.dwarf with + (* match ctxt.dwarf with | Some dwarf -> let ptrtype = Typer.expr ~ctxt addr in Typer.read ~dwarf ctxt.state ?ptrtype ~addr:naddr ~size - | None -> State.read_noprov ctxt.state ~addr:naddr ~size |> State.Tval.of_exp + | None -> *) + State.read_noprov ctxt.state ~addr:naddr ~size |> State.Tval.of_exp in HashVector.set ctxt.mem_reads value tval | WriteMem { addr; value; size } -> ( let naddr = expand ~ctxt addr in - match ctxt.dwarf with + (* match ctxt.dwarf with | Some dwarf -> let ptrtype = Typer.expr ~ctxt addr in debug "Typed write mem with ptr:%t" (Pp.top (Pp.opt Ctype.pp) ptrtype); let value = expand_tval ~ctxt value in Typer.write ~dwarf ctxt.state ?ptrtype ~addr:naddr ~size value - | None -> + | None -> *) let value = expand ~ctxt value in State.write_noprov ctxt.state ~addr:naddr ~size value ) diff --git a/src/trace/typer.ml b/src/trace/typer.ml index e83f3284..f7b0f6de 100644 --- a/src/trace/typer.ml +++ b/src/trace/typer.ml @@ -1,4 +1,4 @@ -(*==================================================================================*) +(* ================================================================================== (* BSD 2-Clause License *) (* *) (* Copyright (c) 2020-2021 Thibaut Pérami *) @@ -314,4 +314,4 @@ let write ~(dwarf : Dw.t) (s : State.t) ?(ptrtype : Ctype.t option) ~addr ~size State.write ~provenance s ~addr ~size value.exp | _ -> warn "Writing without provenance"; - State.write_noprov s ~addr ~size value.exp + State.write_noprov s ~addr ~size value.exp *) From 261484664121657870d1dfc17c0cd2e2cf7c66ce Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sun, 5 Jan 2025 11:34:06 +0100 Subject: [PATCH 06/89] Refactor (symbolic) address to separate module --- src/dw/func.ml | 2 +- src/dw/loc.ml | 2 +- src/elf/address.ml | 10 ++++++++++ src/elf/symTable.ml | 18 +++++++++--------- src/elf/symTable.mli | 10 +++++----- src/elf/symbol.ml | 15 +++------------ src/elf/symbol.mli | 11 +---------- src/state/base.ml | 2 +- 8 files changed, 31 insertions(+), 39 deletions(-) create mode 100644 src/elf/address.ml diff --git a/src/dw/func.ml b/src/dw/func.ml index e8738d24..18fdd676 100644 --- a/src/dw/func.ml +++ b/src/dw/func.ml @@ -140,7 +140,7 @@ let of_linksem (elf : Elf.File.t) (tenv : Ctype.env) (lfun : linksem_t) = | None -> ( match lfun.ss_entry_address with | Some a -> ( - let addr = Elf.Symbol.{section = ".text"; offset = Nat_big_num.to_int a} in (* TODO this is wrong, need symbolic DWARF *) + let addr = Elf.Address.{section = ".text"; offset = Nat_big_num.to_int a} in (* TODO this is wrong, need symbolic DWARF *) match Elf.SymTable.of_addr_opt elf.symbols addr with | Some sym -> Some sym | None -> None diff --git a/src/dw/loc.ml b/src/dw/loc.ml index bcd4eda0..e9aacff2 100644 --- a/src/dw/loc.ml +++ b/src/dw/loc.ml @@ -120,7 +120,7 @@ let of_linksem ?(amap = Arch.dwarf_reg_map ()) (elf : Elf.File.t) : linksem_t -> (* Global *) | [{ op_semantics = OpSem_lit; op_code = code; op_argument_values = [arg]; _ }] as ops when Z.to_int code = vDW_OP_addr -> ( - let addr = Elf.Symbol.{ section = ".data"; offset = int_of_oav arg } in (* TODO this is wrong, need symbolic DWARF*) + let addr = Elf.Address.{ section = ".data"; offset = int_of_oav arg } in (* TODO this is wrong, need symbolic DWARF*) try Global (Elf.SymTable.of_addr_with_offset elf.symbols @@ addr) with Not_found -> warn "Symbol at 0x%x not found in Loc.of_linksem" (int_of_oav arg); diff --git a/src/elf/address.ml b/src/elf/address.ml new file mode 100644 index 00000000..c9f3b66b --- /dev/null +++ b/src/elf/address.ml @@ -0,0 +1,10 @@ +type t = { + section : string; + offset: int; +} + +let pp addr = Pp.(!^(addr.section) ^^ !^"+" ^^ ptr addr.offset) + +let of_linksem (section, offset) = { section; offset = Z.to_int offset } + +let (+) addr offset = { section = addr.section; offset = addr.offset + offset } \ No newline at end of file diff --git a/src/elf/symTable.ml b/src/elf/symTable.ml index dee03749..f0ac45a1 100644 --- a/src/elf/symTable.ml +++ b/src/elf/symTable.ml @@ -62,7 +62,7 @@ module SMap = Map.Make (String) module AddrMap = struct type t = RMap.t SMap.t - let add t addr sym = + let add t (addr: Address.t) sym = SMap.update addr.section (fun old -> let old = match old with | None -> RMap.empty @@ -71,28 +71,28 @@ module AddrMap = struct Some (RMap.add old addr.offset sym) ) t - let update f t addr = + let update f t (addr: Address.t) = SMap.update addr.section (Option.map (fun x -> RMap.update f x addr.offset)) t let empty = SMap.empty - let at t addr = + let at t (addr: Address.t) = SMap.find addr.section t |> Fun.flip RMap.at addr.offset - let at_opt t addr = + let at_opt t (addr: Address.t) = Option.bind (SMap.find_opt addr.section t) @@ Fun.flip RMap.at_opt addr.offset - let at_off t addr = + let at_off t (addr: Address.t) = SMap.find addr.section t |> Fun.flip RMap.at_off addr.offset - let at_off_opt t addr = + let at_off_opt t (addr: Address.t) = Option.bind (SMap.find_opt addr.section t) @@ Fun.flip RMap.at_off_opt addr.offset let bindings t = let sections = SMap.bindings t in List.bind sections @@ fun (section, rmap) -> let inner_bindings = RMap.bindings rmap in - List.map (fun (offset, sym) -> ({section; offset}, sym)) inner_bindings + List.map (fun (offset, sym) -> (Address.{section; offset}, sym)) inner_bindings end @@ -134,7 +134,7 @@ let of_addr_with_offset t addr = AddrMap.at_off t.by_addr addr let of_addr_with_offset_opt t addr = AddrMap.at_off_opt t.by_addr addr -let to_addr_offset (sym, offset) = { section = sym.addr.section; offset = sym.addr.offset + offset } +let to_addr_offset (sym, offset) = Address.(sym.addr + offset) let string_of_sym_offset ((sym, off) : sym_offset) = sym.name ^ "+" ^ string_of_int off @@ -156,7 +156,7 @@ let of_linksem linksem_map = in List.fold_left add_linksem_sym_to_map empty linksem_map -let pp_raw st = AddrMap.bindings st.by_addr |> List.map (Pair.map pp_addr pp_raw) |> Pp.mapping "syms" +let pp_raw st = AddrMap.bindings st.by_addr |> List.map (Pair.map Address.pp pp_raw) |> Pp.mapping "syms" let iter t f = SMap.iter (fun _ value -> f value) t.by_name diff --git a/src/elf/symTable.mli b/src/elf/symTable.mli index 0b919a8a..18e7de50 100644 --- a/src/elf/symTable.mli +++ b/src/elf/symTable.mli @@ -78,19 +78,19 @@ val of_name_opt : t -> string -> sym option (** Get the symbol owning that address. Not_found is raised if no symbol own that address.data See {!of_addr_opt} *) -val of_addr : t -> Symbol.addr -> sym +val of_addr : t -> Address.t -> sym (** Get the symbol owning that address. None if no symbol own that address. See {!of_addr} *) -val of_addr_opt : t -> Symbol.addr -> sym option +val of_addr_opt : t -> Address.t -> sym option (** Get a symbol with the offset that correspond to that address *) -val of_addr_with_offset : t -> Symbol.addr -> sym_offset +val of_addr_with_offset : t -> Address.t -> sym_offset (** Get a symbol with the offset that correspond to that address *) -val of_addr_with_offset_opt : t -> Symbol.addr -> sym_offset option +val of_addr_with_offset_opt : t -> Address.t -> sym_offset option (** Get back the raw address from a symbol+offset value *) -val to_addr_offset : sym_offset -> Symbol.addr +val to_addr_offset : sym_offset -> Address.t (** Transform a symbol + offset into the corresponding string *) val string_of_sym_offset : sym_offset -> string diff --git a/src/elf/symbol.ml b/src/elf/symbol.ml index bcbd00ae..da1bf9e0 100644 --- a/src/elf/symbol.ml +++ b/src/elf/symbol.ml @@ -52,17 +52,11 @@ type typ = NOTYPE | OBJECT | FUNC | SECTION | FILE | UNKNOWN type linksem_typ = Z.t -(* TODO: move somewhere to reuse *) -type addr = { - section : string; - offset: int; -} - type t = { name : string; other_names : string list; typ : typ; - addr : addr; + addr : Address.t; (* addr : int; *) size : int; writable : bool; @@ -104,8 +98,7 @@ let locs = SMap.empty |> SMap.add ".text" 0 |> SMap.add ".data" 1000000 |> SMap. let of_linksem (name, (typ, size, addr, data, _), writable) = let typ = typ_of_linksem typ in let size = Z.to_int size in - let section, offset = addr in - let addr = { section; offset = Z.to_int offset } in + let addr = Address.of_linksem addr in (* let addr = SMap.find section locs + Z.to_int offset in *) { name; other_names = []; typ; size; addr; data; writable } @@ -128,8 +121,6 @@ let pp_typ typ = | FILE -> "FILE" | UNKNOWN -> "UNKNOWN" -let pp_addr addr = Pp.(!^(addr.section) ^^ !^"+" ^^ ptr addr.offset) - let pp_raw sym = Pp.( !^"sym" @@ -138,7 +129,7 @@ let pp_raw sym = ("name", !^(sym.name)); ("other names", separate nbspace (List.map string sym.other_names)); ("typ", pp_typ sym.typ); - ("addr", pp_addr sym.addr); + ("addr", Address.pp sym.addr); (* ("addr", ptr sym.addr); *) ("size", ptr sym.size); ("writable", bool sym.writable); diff --git a/src/elf/symbol.mli b/src/elf/symbol.mli index 5b5e9e6f..0e91fa11 100644 --- a/src/elf/symbol.mli +++ b/src/elf/symbol.mli @@ -55,19 +55,13 @@ type typ = NOTYPE | OBJECT | FUNC | SECTION | FILE | UNKNOWN type linksem_typ = Z.t -(* TODO: move somewhere to reuse *) -type addr = { - section : string; - offset: int; -} - (** The ELF symbol. This type guarantee the data exists contrary to linksem symbols (it may be all zeros though) *) type t = { name : string; other_names : string list; typ : typ; - addr : addr; + addr : Address.t; (* addr : int; *) size : int; writable : bool; @@ -117,8 +111,5 @@ val compare : t -> t -> int (** Pretty prints a symbol type *) val pp_typ : typ -> Pp.document -(** Pretty prints symbolic address *) -val pp_addr : addr -> Pp.document - (** Raw pretty printing of a symbol *) val pp_raw : t -> Pp.document diff --git a/src/state/base.ml b/src/state/base.ml index 33d4de14..353c6cfd 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -429,7 +429,7 @@ let read_from_rodata (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t option if not @@ ConcreteEval.is_concrete addr then None else let int_addr = ConcreteEval.eval addr |> Value.expect_bv |> BitVec.to_int in - let sym_addr = Elf.Symbol.{ section = ".rodata"; offset = int_addr } in (* TODO this is wrong *) + let sym_addr = Elf.Address.{ section = ".rodata"; offset = int_addr } in (* TODO this is wrong *) let size = size |> Ast.Size.to_bits in try let (sym, offset) = Elf.SymTable.of_addr_with_offset elf.symbols sym_addr in From fd2c3ff4483f9c59b8e24fd61ce8ac25fe273769 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sun, 5 Jan 2025 12:01:00 +0100 Subject: [PATCH 07/89] Symbolic runner (TODO parsing symbolic addresses from SMT) --- src/run/runner.ml | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/src/run/runner.ml b/src/run/runner.ml index 864dba24..760670f9 100644 --- a/src/run/runner.ml +++ b/src/run/runner.ml @@ -75,9 +75,9 @@ type slot = type t = { elf : Elf.File.t; dwarf : Dw.t option; - instrs : (int, slot) Hashtbl.t; (** Instruction cache *) + instrs : (Elf.Address.t, slot) Hashtbl.t; (** Instruction cache *) pc : Reg.t; - funcs : int Vec.t; (** Loaded functions by loading order *) + funcs : Elf.Address.t Vec.t; (** Loaded functions by loading order *) } let of_elf ?dwarf elf = @@ -100,36 +100,36 @@ let load_sym runner (sym : Elf.Symbol.t) = (fun code -> let (addr, instr_len) = let result = !addr and len = BytesSeq.length code in - addr := !addr + len; + addr := Elf.Address.(!addr + len); (result, len) in try let instr = Trace.Cache.get_instr code in if instr.traces = [] then begin - debug "Instruction at 0x%x in %s is loaded as special" addr sym.name; + debug "Instruction at %t in %s is loaded as special" (Pp.top Elf.Address.pp addr) sym.name; Hashtbl.add runner.instrs addr (Special instr_len) end else begin - debug "Instruction at 0x%x in %s is loaded as normal. Traces are:\n%t" addr sym.name + debug "Instruction at %t in %s is loaded as normal. Traces are:\n%t" (Pp.top Elf.Address.pp addr) sym.name Pp.(topi Trace.Instr.pp instr); Hashtbl.add runner.instrs addr (Normal instr) end with exn -> - warn "Could not convert isla trace of instruction at 0x%x in %s to Trace.t: %s\n%s" addr + warn "Could not convert isla trace of instruction at %t in %s to Trace.t: %s\n%s" (Pp.top Elf.Address.pp addr) runner.elf.filename (Printexc.to_string exn) (Printexc.get_backtrace ()); Hashtbl.add runner.instrs addr (IslaFail instr_len)) opcode_list (** Fetch an instruction, and return corresponding slot. *) -let fetch (runner : t) (pc : int) : slot = - debug "Fetching PC 0x%x" pc; +let fetch (runner : t) (pc : Elf.Address.t) : slot = + debug "Fetching PC %t" (Pp.top Elf.Address.pp pc); match Hashtbl.find_opt runner.instrs pc with | Some v -> v | None -> ( match Elf.SymTable.of_addr_opt runner.elf.symbols pc with | Some sym when sym.typ = Elf.Symbol.FUNC -> if Hashtbl.mem runner.instrs sym.addr then begin - warn "Tried to fetch in middle of instructions in %s at 0x%x" runner.elf.filename pc; + warn "Tried to fetch in middle of instructions in %s at %t" runner.elf.filename (Pp.top Elf.Address.pp pc); Hashtbl.add runner.instrs pc Nocode; Nocode end @@ -138,13 +138,13 @@ let fetch (runner : t) (pc : int) : slot = match Hashtbl.find_opt runner.instrs pc with | Some v -> v | None -> - warn "Tried to fetch in middle of instructions in %s at 0x%x" runner.elf.filename - pc; + warn "Tried to fetch in middle of instructions in %s at %t" runner.elf.filename + (Pp.top Elf.Address.pp pc); Hashtbl.add runner.instrs pc Nocode; Nocode end | _ -> - warn "Tried to fetch outside of normal code in %s at 0x%x" runner.elf.filename pc; + warn "Tried to fetch outside of normal code in %s at %t" runner.elf.filename (Pp.top Elf.Address.pp pc); Hashtbl.add runner.instrs pc Nocode; Nocode ) @@ -188,6 +188,7 @@ let skip runner state : State.t list = let pc_exp = State.get_reg_exp state runner.pc in try let pc = pc_exp |> Ast.expect_bits |> BitVec.to_int in + let pc = Elf.Address.{ section = ".text"; offset = pc } in (* TODO this is wrong, should get symbolic value from pc_exp *) match fetch runner pc with | Normal { traces = _; read = _; written = _; length; opcode = _ } |Special length @@ -195,7 +196,7 @@ let skip runner state : State.t list = let state = State.copy_if_locked state in State.bump_pc ~pc:runner.pc state length; [state] - | Nocode -> Raise.fail "Trying to skip 0x%x in %s: no code there" pc runner.elf.filename + | Nocode -> Raise.fail "Trying to skip %t in %s: no code there" (Pp.tos Elf.Address.pp pc) runner.elf.filename with exn -> err "Trying to skip instruction at %t in %s: Unexpected error" Pp.(top State.Exp.pp pc_exp) @@ -220,13 +221,14 @@ let run ?prelock runner state : State.t list = let pc_exp = State.get_reg_exp state runner.pc in try let pc = pc_exp |> Ast.expect_bits |> BitVec.to_int in + let pc = Elf.Address.{ section = ".text"; offset = pc } in (* TODO this is wrong, should get symbolic value from pc_exp *) match fetch runner pc with | Normal instr -> execute_normal ?prelock ~pc runner instr state | Special _ -> - Raise.fail "Special instruction at 0x%x in %s. unsupported for now" pc runner.elf.filename - | Nocode -> Raise.fail "Trying to run 0x%x in %s: no code there" pc runner.elf.filename + Raise.fail "Special instruction at %t in %s. unsupported for now" (Pp.tos Elf.Address.pp pc) runner.elf.filename + | Nocode -> Raise.fail "Trying to run %t in %s: no code there" (Pp.tos Elf.Address.pp pc) runner.elf.filename | IslaFail _ -> - Raise.fail "Trying to run 0x%x in %s: Isla pipeline failed on that instruction" pc + Raise.fail "Trying to run %t in %s: Isla pipeline failed on that instruction" (Pp.tos Elf.Address.pp pc) runner.elf.filename with exn -> err "Trying to run instruction at %t in %s: Unexpected error" @@ -257,4 +259,4 @@ let pp_slot = (** Dump instruction table *) let pp_instr (runner : t) = let open Pp in - hashtbl_sorted ~name:"Instructions" ~compare ptr pp_slot runner.instrs + hashtbl_sorted ~name:"Instructions" ~compare Elf.Address.pp pp_slot runner.instrs From d42ecf06eb6410fafa9d04ddff56a8e33c3ee9c5 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sun, 5 Jan 2025 16:06:53 +0100 Subject: [PATCH 08/89] Run relocatable file TODOS: - translations from/to SMT - relocations - translations from DWARF - provenance or concrete SP - simplify read addresses --- src/bin/dune | 4 ++-- src/bin/main.ml | 4 ++-- src/bin/main_riscv64.ml | 4 ++-- src/bin/readDwarf.ml | 2 +- src/dune | 2 ++ src/run/block.ml | 2 +- src/run/block_lib.ml | 10 ++++++++-- src/run/func.ml | 2 +- src/run/funcRD.ml | 12 ++++++------ src/state/base.ml | 13 +++++++++---- src/state/base.mli | 6 ++++-- 11 files changed, 38 insertions(+), 23 deletions(-) diff --git a/src/bin/dune b/src/bin/dune index f4c90590..fea23d27 100644 --- a/src/bin/dune +++ b/src/bin/dune @@ -4,7 +4,7 @@ (modules main) (flags (:standard -open Utils)) - (libraries config run utils sig_aarch64 other_cmds branchTable)) + (libraries config run utils sig_aarch64 other_cmds)) (executable (name main_riscv64) @@ -12,7 +12,7 @@ (modules main_riscv64) (flags (:standard -open Utils)) - (libraries config run utils sig_riscv64 other_cmds branchTable)) + (libraries config run utils sig_riscv64 other_cmds)) (library (name other_cmds) diff --git a/src/bin/main.ml b/src/bin/main.ml index fc17e98a..714c0fc2 100644 --- a/src/bin/main.ml +++ b/src/bin/main.ml @@ -71,9 +71,9 @@ let commands = Run.Func.command; Run.Instr.command; Run.Block.command; - Run.FuncRD.command; + (* Run.FuncRD.command; *) Other_cmds.CopySourcesCmd.command; - BranchTable.command; + (* BranchTable.command; *) ] let _ = Printexc.record_backtrace Config.enable_backtrace diff --git a/src/bin/main_riscv64.ml b/src/bin/main_riscv64.ml index 92a20d0f..50622cb7 100644 --- a/src/bin/main_riscv64.ml +++ b/src/bin/main_riscv64.ml @@ -71,9 +71,9 @@ let commands = Run.Func.command; Run.Instr.command; Run.Block.command; - Run.FuncRD.command; + (* Run.FuncRD.command; *) Other_cmds.CopySourcesCmd.command; - BranchTable.command; + (* BranchTable.command; *) ] let _ = Printexc.record_backtrace Config.enable_backtrace diff --git a/src/bin/readDwarf.ml b/src/bin/readDwarf.ml index 16d38882..957552f8 100644 --- a/src/bin/readDwarf.ml +++ b/src/bin/readDwarf.ml @@ -71,7 +71,7 @@ let commands = Run.Func.command; Run.Instr.command; Run.Block.command; - Run.FuncRD.command; + (* Run.FuncRD.command; *) CopySourcesCmd.command; ] diff --git a/src/dune b/src/dune index cdaa4d1c..b474cb2a 100644 --- a/src/dune +++ b/src/dune @@ -2,3 +2,5 @@ (release (flags (:standard -short-paths)))) + +(dirs :standard \ branchTable) \ No newline at end of file diff --git a/src/run/block.ml b/src/run/block.ml index 89d0c0e0..ed9f8ddb 100644 --- a/src/run/block.ml +++ b/src/run/block.ml @@ -109,7 +109,7 @@ let gen_block ((elf : Elf.File.t), (symoffset : Elf.SymTable.sym_offset)) len br let open Option in unlift_pair @@ let+ l = len in - (start, start + l) + (start, Elf.Address.(start + l)) in let endpred = Block_lib.gen_endpred ?min ?max ~brks () in Trace.Cache.start @@ Arch.get_isla_config (); diff --git a/src/run/block_lib.ml b/src/run/block_lib.ml index 25e5c528..ddb6c14d 100644 --- a/src/run/block_lib.ml +++ b/src/run/block_lib.ml @@ -56,7 +56,7 @@ open Logs.Logger (struct end) (** [endpred pc_exp] gives when to stop *) -type t = { runner : Runner.t; start : int; endpred : State.exp -> string option } +type t = { runner : Runner.t; start : Elf.Address.t; endpred : State.exp -> string option } (** Build a complex block starting from [start] in [sym] and ending when [endpred] says so. [endpred] is a predicate on the symbolic PC expression *) @@ -137,7 +137,7 @@ let run ?(every_instruction = false) ?relevant (b : t) (start : State.t) : label end in let state = State.copy start in - State.set_pc ~pc:pcreg state b.start; + State.set_pc_sym ~pc:pcreg state b.start; let rest = [run_from state] in State.Tree.{ state = start; data = Start; rest } @@ -148,6 +148,12 @@ let run ?(every_instruction = false) ?relevant (b : t) (start : State.t) : label - pc has be seen more than [loop] *) let gen_endpred ?min ?max ?loop ?(brks = []) () : State.exp -> string option = + (* HACK *) + (* TODO rewrite for symbolic pc *) + let min = Option.map (fun min -> min.Elf.Address.offset) min in + let max = Option.map (fun max -> max.Elf.Address.offset) max in + let brks = List.map (fun brks -> brks.Elf.Address.offset) brks in + (* *) let endnow fmt = Printf.ksprintf Option.some fmt in let pchtbl = Hashtbl.create 10 in let loop_str = diff --git a/src/run/func.ml b/src/run/func.ml index 3afdb1d5..d41d3c03 100644 --- a/src/run/func.ml +++ b/src/run/func.ml @@ -85,7 +85,7 @@ let get_state_tree ~elf:elfname ~name ?(dump = false) ?(entry = false) ?len ?(br let open Option in unlift_pair @@ let+ l = len in - (sym.addr, sym.addr + l) + (sym.addr, Elf.Address.(sym.addr + l)) in let endpred = Block_lib.gen_endpred ?min ?max ?loop ~brks () in let runner = Runner.of_dwarf dwarf in diff --git a/src/run/funcRD.ml b/src/run/funcRD.ml index 9e1dc449..374b7e9d 100644 --- a/src/run/funcRD.ml +++ b/src/run/funcRD.ml @@ -48,14 +48,14 @@ instructions.*) open Cmdliner -open Config.CommonOpt -open Fun +(* open Config.CommonOpt *) +(* open Fun *) open Logs.Logger (struct let str = __MODULE__ end) -let run_func_rd elfname name objdump_d branchtables breakpoints = +(* let run_func_rd elfname name objdump_d branchtables breakpoints = base "Running with rd %s in %s" name elfname; base "Loading %s" elfname; let dwarf = Dw.of_file elfname in @@ -142,7 +142,7 @@ let run_func_rd elfname name objdump_d branchtables breakpoints = |> List.iter (fun (msg, st, regs) -> base "At 0x%x, %s:\n%t" pc msg Pp.(topi (State.pp_partial ~regs) st)); print_string (print_analyse_instruction pc))) - runner.funcs + runner.funcs *) let elf = let doc = "ELF file from which to pull the code" in @@ -168,7 +168,7 @@ let breakpoints = in Arg.(value & opt_all string [] & info ["b"; "break"] ~docv:"POSITION" ~doc) -let term = +(* let term = Term.( CmdlinerHelper.func_options comopts run_func_rd $ elf $ func $ objdump_d $ branch_table $ breakpoints) @@ -181,4 +181,4 @@ let info = in Cmd.(info "run-func-rd" ~doc ~exits) -let command = (term, info) +let command = (term, info) *) diff --git a/src/state/base.ml b/src/state/base.ml index 353c6cfd..abd14f3f 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -317,7 +317,7 @@ type t = { However the symbolic execution should always be more concrete with it than without it *) fenv : Fragment.env; (** The memory type environment. See {!Fragment.env} *) - mutable last_pc : int; + mutable last_pc : Elf.Address.t; (** The PC of the instruction that lead into this state. The state should be right after that instruction. This has no semantic meaning as part of the state. It's just for helping knowing what comes from where *) @@ -354,7 +354,7 @@ let make ?elf () = mem = Mem.empty (); elf; fenv = Fragment.Env.make (); - last_pc = 0; + last_pc = Elf.Address.{ section = ".text"; offset = 0 }; (* TODO is this right? *) } in next_id := id + 1; @@ -498,6 +498,11 @@ let set_pc ~(pc : Reg.t) (s : t) (pcval : int) = let ctyp = Ctype.of_frag Ctype.Global ~offset:pcval ~constexpr:true in set_reg s pc @@ Tval.make ~ctyp exp +(* TODO *) +let set_pc_sym ~(pc : Reg.t) (s : t) (pcval : Elf.Address.t) = + set_pc ~pc s pcval.offset + + let bump_pc ~(pc : Reg.t) (s : t) (bump : int) = let pc_exp = get_reg_exp s pc in assert (ConcreteEval.is_concrete pc_exp); @@ -520,7 +525,7 @@ let pp s = [ ("id", Id.pp s.id); ("base_state", Option.fold ~none:!^"none" ~some:(fun s -> Id.pp s.id) s.base_state); - ("last_pc", ptr s.last_pc); + ("last_pc", Elf.Address.pp s.last_pc); ("regs", Reg.Map.pp Tval.pp s.regs); ("fenv", Fragment.Env.pp s.fenv); ("read_vars", Vec.ppi Tval.pp s.read_vars); @@ -537,7 +542,7 @@ let pp_partial ~regs s = [ ("id", Id.pp s.id |> some); ("base_state", Option.map (fun s -> Id.pp s.id) s.base_state); - ("last_pc", ptr s.last_pc |> some); + ("last_pc", Elf.Address.pp s.last_pc |> some); ( "regs", List.map (fun reg -> (Reg.pp reg, Reg.Map.get s.regs reg |> Tval.pp)) regs |> Pp.mapping "" |> some ); diff --git a/src/state/base.mli b/src/state/base.mli index 455943b0..05669eb6 100644 --- a/src/state/base.mli +++ b/src/state/base.mli @@ -292,7 +292,7 @@ type t = private { However the symbolic execution should always be more concrete with it than without it *) fenv : Fragment.env; (** The memory type environment. See {!Fragment.env} *) - mutable last_pc : int; + mutable last_pc : Elf.Address.t; (** The PC of the instruction that lead into this state. The state should be right after that instruction. This has no semantic meaning as part of the state. It's just for helping knowing what comes from where *) @@ -464,6 +464,8 @@ val update_reg_exp : t -> Reg.t -> (exp -> exp) -> unit (** Set the PC to a concrete value and keep its type appropriate *) val set_pc : pc:Reg.t -> t -> int -> unit +val set_pc_sym : pc:Reg.t -> t -> Elf.Address.t -> unit + (** Bump a concrete PC by a concrete bump (generally the size of a non-branching instruction *) val bump_pc : pc:Reg.t -> t -> int -> unit @@ -471,7 +473,7 @@ val bump_pc : pc:Reg.t -> t -> int -> unit val concretize_pc : pc:Reg.t -> t -> unit (** Set the [last_pc] of the state *) -val set_last_pc : t -> int -> unit +val set_last_pc : t -> Elf.Address.t -> unit (** {1 Pretty printing } *) From fbd7c07712dea148d651e81787c333bdf7249dc4 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Mon, 6 Jan 2025 22:56:36 +0100 Subject: [PATCH 09/89] Enable typer --- src/arch/aarch64/sig.ml | 2 +- src/arch/riscv64/sig.ml | 2 +- src/ctype/ctype.ml | 4 ++-- src/state/base.ml | 3 ++- src/trace/run.ml | 17 ++++++++++------- src/trace/typer.ml | 10 +++++----- 6 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/arch/aarch64/sig.ml b/src/arch/aarch64/sig.ml index 18b0b18c..af5bd091 100644 --- a/src/arch/aarch64/sig.ml +++ b/src/arch/aarch64/sig.ml @@ -331,7 +331,7 @@ let get_abi api = State.set_reg_type state sp (Ctype.of_frag ~provenance:stack_provenance @@ DynFragment stack_frag_id); State.set_reg state r30 - (State.Tval.of_var ~ctyp:(Ctype.of_frag_somewhere Ctype.Global) RetAddr); + (State.Tval.of_var ~ctyp:(Ctype.of_frag_somewhere (Ctype.Global ".text")) RetAddr); let sp_exp = State.Exp.of_reg state.id sp in (* Assert that Sp is 16 bytes aligned *) State.push_assert state Exp.Typed.(extract ~last:3 ~first:0 sp_exp = bits_int ~size:4 0); diff --git a/src/arch/riscv64/sig.ml b/src/arch/riscv64/sig.ml index 0e14e523..c999179e 100644 --- a/src/arch/riscv64/sig.ml +++ b/src/arch/riscv64/sig.ml @@ -289,7 +289,7 @@ let get_abi api = State.set_reg_type state sp (Ctype.of_frag ~provenance:stack_provenance @@ DynFragment stack_frag_id); State.set_reg state ra - (State.Tval.of_var ~ctyp:(Ctype.of_frag_somewhere Ctype.Global) RetAddr); + (State.Tval.of_var ~ctyp:(Ctype.of_frag_somewhere (Ctype.Global ".text")) RetAddr); let sp_exp = State.Exp.of_reg state.id sp in (* Assert that Sp is 16 bytes aligned *) State.push_assert state Exp.Typed.(extract ~last:3 ~first:0 sp_exp = bits_int ~size:4 0); diff --git a/src/ctype/ctype.ml b/src/ctype/ctype.ml index daa31632..fab17f6b 100644 --- a/src/ctype/ctype.ml +++ b/src/ctype/ctype.ml @@ -136,7 +136,7 @@ and fragment = | Single of t (** Single object: Only when accessing of a global variable *) | DynArray of t (** Generic C pointer, may point to multiple element of that type *) | DynFragment of int (** Writable fragment for memory whose type is changing dynamically *) - | Global + | Global of string (** The Global fragment that contains all the fixed ELF section .text, .data, .rodata, ... *) @@ -669,7 +669,7 @@ and pp_fragment frag = | DynArray t -> pp t ^^ !^"[]" | Unknown -> !^"unknown" | DynFragment i -> dprintf "frag %d" i - | Global -> !^"global" + | Global s -> !^"global " ^^ !^s and pp_offset = function | Const off when off = 0 -> empty diff --git a/src/state/base.ml b/src/state/base.ml index abd14f3f..466a4ca0 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -493,9 +493,10 @@ let get_reg_exp s reg = get_reg s reg |> Tval.exp let update_reg_exp (s : t) (reg : Reg.t) (f : exp -> exp) = Reg.Map.get s.regs reg |> Tval.map_exp f |> Reg.Map.set s.regs reg +(* TODO *) let set_pc ~(pc : Reg.t) (s : t) (pcval : int) = let exp = Typed.bits_int ~size:64 pcval in - let ctyp = Ctype.of_frag Ctype.Global ~offset:pcval ~constexpr:true in + let ctyp = Ctype.of_frag (Ctype.Global ".text") ~offset:pcval ~constexpr:true in set_reg s pc @@ Tval.make ~ctyp exp (* TODO *) diff --git a/src/trace/run.ml b/src/trace/run.ml index 986d3b53..9b4f820a 100644 --- a/src/trace/run.ml +++ b/src/trace/run.ml @@ -70,10 +70,10 @@ let expand ~(ctxt : ctxt) (exp : Base.exp) : State.exp = otherwise the type will be [None] *) let expand_tval ~(ctxt : ctxt) (exp : Base.exp) : State.tval = let sexp = expand ~ctxt exp in - (* if Ctxt.typing_enabled ~ctxt then + if Ctxt.typing_enabled ~ctxt then let ctyp = Typer.expr ~ctxt exp in { ctyp; exp = sexp } - else *) + else { ctyp = None; exp = sexp } (** Run the event. @@ -85,24 +85,27 @@ let event_mut ~(ctxt : ctxt) (event : Base.event) = | WriteReg { reg; value } -> Vec.add_one ctxt.reg_writes (reg, expand_tval ~ctxt value) | ReadMem { addr; value; size } -> let naddr = expand ~ctxt addr in + let ptrtype = Typer.expr ~ctxt addr in + debug "ptrtype: %t" Pp.(top (optional Ctype.pp) ptrtype); let tval = - (* match ctxt.dwarf with + match ctxt.dwarf with | Some dwarf -> - let ptrtype = Typer.expr ~ctxt addr in Typer.read ~dwarf ctxt.state ?ptrtype ~addr:naddr ~size - | None -> *) + | None -> State.read_noprov ctxt.state ~addr:naddr ~size |> State.Tval.of_exp in HashVector.set ctxt.mem_reads value tval | WriteMem { addr; value; size } -> ( let naddr = expand ~ctxt addr in - (* match ctxt.dwarf with + let ptrtype = Typer.expr ~ctxt addr in + debug "ptrtype: %t" Pp.(top (optional Ctype.pp) ptrtype); + match ctxt.dwarf with | Some dwarf -> let ptrtype = Typer.expr ~ctxt addr in debug "Typed write mem with ptr:%t" (Pp.top (Pp.opt Ctype.pp) ptrtype); let value = expand_tval ~ctxt value in Typer.write ~dwarf ctxt.state ?ptrtype ~addr:naddr ~size value - | None -> *) + | None -> let value = expand ~ctxt value in State.write_noprov ctxt.state ~addr:naddr ~size value ) diff --git a/src/trace/typer.ml b/src/trace/typer.ml index f7b0f6de..cf04c5af 100644 --- a/src/trace/typer.ml +++ b/src/trace/typer.ml @@ -1,4 +1,4 @@ -(* ================================================================================== +(* ================================================================================== *) (* BSD 2-Clause License *) (* *) (* Copyright (c) 2020-2021 Thibaut Pérami *) @@ -178,7 +178,7 @@ let manyop ~ctxt (m : Ast.manyop) (tvals : tval list) : Ctype.t option = match List.hd tvals with | { exp = Unop (Extract (_, _), _, _); - ctyp = Some ({ unqualified = Ptr { fragment = Global; offset; _ }; _ } as ctyp); + ctyp = Some ({ unqualified = Ptr { fragment = Global _; offset; _ }; _ } as ctyp); } -> ( match offset with | Somewhere -> Some ctyp @@ -254,8 +254,8 @@ let fragment_at ~(dwarf : Dw.t) ~fenv ~size (frag : Ctype.fragment) at : Ctype.t let frag = Fragment.Env.get fenv i in let* (typ, off) = Fragment.at_off_opt frag at in Ctype.type_at ~env ~size typ off - | Global -> ( - match Elf.SymTable.of_addr_with_offset_opt dwarf.elf.symbols at with + | Global s -> ( + match Elf.SymTable.of_addr_with_offset_opt dwarf.elf.symbols Elf.Address.{ section = s; offset = at } with | Some (sym, offset) -> ( match Hashtbl.find_opt dwarf.vars sym.name with | Some v -> Ctype.type_at ~env ~size v.ctype offset @@ -314,4 +314,4 @@ let write ~(dwarf : Dw.t) (s : State.t) ?(ptrtype : Ctype.t option) ~addr ~size State.write ~provenance s ~addr ~size value.exp | _ -> warn "Writing without provenance"; - State.write_noprov s ~addr ~size value.exp *) + State.write_noprov s ~addr ~size value.exp From 49ffe70846bfb4f421bae33839e86df701aec510 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Tue, 7 Jan 2025 12:17:45 +0100 Subject: [PATCH 10/89] Some debug prints --- src/trace/run.ml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/trace/run.ml b/src/trace/run.ml index 9b4f820a..f6eaf1c1 100644 --- a/src/trace/run.ml +++ b/src/trace/run.ml @@ -85,6 +85,7 @@ let event_mut ~(ctxt : ctxt) (event : Base.event) = | WriteReg { reg; value } -> Vec.add_one ctxt.reg_writes (reg, expand_tval ~ctxt value) | ReadMem { addr; value; size } -> let naddr = expand ~ctxt addr in + debug "naddr: %t" (Pp.top State.Exp.pp naddr); let ptrtype = Typer.expr ~ctxt addr in debug "ptrtype: %t" Pp.(top (optional Ctype.pp) ptrtype); let tval = @@ -97,6 +98,7 @@ let event_mut ~(ctxt : ctxt) (event : Base.event) = HashVector.set ctxt.mem_reads value tval | WriteMem { addr; value; size } -> ( let naddr = expand ~ctxt addr in + debug "naddr: %t" (Pp.top State.Exp.pp naddr); let ptrtype = Typer.expr ~ctxt addr in debug "ptrtype: %t" Pp.(top (optional Ctype.pp) ptrtype); match ctxt.dwarf with From 3f0498b721f3b52484981f27471678d6908cd543 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Tue, 7 Jan 2025 12:18:13 +0100 Subject: [PATCH 11/89] Mark potential bug --- src/trace/context.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/trace/context.ml b/src/trace/context.ml index cd8b46be..bfbce294 100644 --- a/src/trace/context.ml +++ b/src/trace/context.ml @@ -67,7 +67,7 @@ let expand_var ~(ctxt : t) (v : Base.Var.t) (a : Ast.no Ast.ty) : State.exp = assert (Base.Var.ty v = a); match v with | Register reg -> State.get_reg_exp ctxt.state reg - | NonDet (i, _) | Read (i, _) -> (HashVector.get ctxt.mem_reads i).exp + | NonDet (i, _) | Read (i, _) -> (HashVector.get ctxt.mem_reads i).exp (* TODO is the NonDet case correct *) let map_var ~(ctxt : t) (v : Base.Var.t) (a : Ast.no Ast.ty) : State.var = assert (Base.Var.ty v = a); From e4485cdf2153781052343b85cac0f7c19a9537df Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Tue, 7 Jan 2025 12:21:48 +0100 Subject: [PATCH 12/89] Add TODO --- src/arch/aarch64/sig.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arch/aarch64/sig.ml b/src/arch/aarch64/sig.ml index af5bd091..151fce0b 100644 --- a/src/arch/aarch64/sig.ml +++ b/src/arch/aarch64/sig.ml @@ -331,7 +331,7 @@ let get_abi api = State.set_reg_type state sp (Ctype.of_frag ~provenance:stack_provenance @@ DynFragment stack_frag_id); State.set_reg state r30 - (State.Tval.of_var ~ctyp:(Ctype.of_frag_somewhere (Ctype.Global ".text")) RetAddr); + (State.Tval.of_var ~ctyp:(Ctype.of_frag_somewhere (Ctype.Global ".text")) RetAddr); (* TODO doesn't have to be .text *) let sp_exp = State.Exp.of_reg state.id sp in (* Assert that Sp is 16 bytes aligned *) State.push_assert state Exp.Typed.(extract ~last:3 ~first:0 sp_exp = bits_int ~size:4 0); From 4ef12a190f742efde18cadf01fd363cbf8caf8b4 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Tue, 7 Jan 2025 18:55:41 +0100 Subject: [PATCH 13/89] Implement opcodes with relocations --- src/isla/cache.ml | 35 ++++++++++++++++++++--------------- src/isla/server.ml | 7 +++++++ src/run/bb_lib.ml | 2 +- src/run/instr.ml | 2 +- src/run/runner.ml | 2 +- src/trace/cache.ml | 4 ++-- src/trace/instr.ml | 5 +++-- 7 files changed, 35 insertions(+), 22 deletions(-) diff --git a/src/isla/cache.ml b/src/isla/cache.ml index 038bbdd6..f7d5850f 100644 --- a/src/isla/cache.ml +++ b/src/isla/cache.ml @@ -77,45 +77,49 @@ type config = Server.config bit 0 to back -1 : The start of the data bit back -1: set *) module Opcode (*: Cache.Key *) = struct - type t = BytesSeq.t option + type t = Server.opcode option let equal a b = match (a, b) with | (None, None) -> true - | (Some bs, Some bs2) -> BytesSeq.equal bs bs2 + | (Some (bs, r1), Some (bs2, r2)) -> BytesSeq.equal bs bs2 && r1 = r2 | _ -> false + let small_enough bs rel_id = + BytesSeq.length bs < BytesSeq.int_bytes && rel_id < 16 + let hash = function | None -> 0 - | Some bs -> + | Some (bs, rel) -> let i = BytesSeq.getintle_ze bs 0 in let l = BytesSeq.length bs in - if l < BytesSeq.int_bytes then begin + let rel_id = Server.reloc_id rel in + if small_enough bs rel_id then begin assert (not @@ IntBits.get i IntBits.back); let res = IntBits.blit l 0 i (IntBits.back - 3) 3 in + let res = IntBits.blit rel_id 0 res (IntBits.back - 7) 4 in res end else IntBits.set i IntBits.back - let to_file file = function + let to_file _file = function | None -> () - | Some bs -> - if BytesSeq.length bs < BytesSeq.int_bytes then () + | Some (bs, rel) -> + let rel_id = Server.reloc_id rel in + if small_enough bs rel_id then () else - let keyfile = Utils.Cache.to_keyfile file in - Files.write_bin BytesSeq.output keyfile bs + Raise.todo() - let of_file hash file = + let of_file hash _file = if hash = 0 then None else if IntBits.get hash IntBits.back then - let keyfile = Utils.Cache.to_keyfile file in - Some (Files.read BytesSeq.input keyfile) + Raise.todo() else let data = IntBits.sub hash 0 (IntBits.back - 3) in let size = IntBits.sub hash (IntBits.back - 3) 3 in let b = Bytes.create size in Bits.unsafe_blit_of_int data 0 b 0 (size * 8); - Some (BytesSeq.of_bytes b) + Some (BytesSeq.of_bytes b, None) end (** Representation of trace lists on disk. @@ -216,13 +220,14 @@ let get_cache () = match !cache with Some cache -> cache | None -> failwith "Isla cache was not started" (** Get the traces of the opcode given. Use {!Server} if the value is not in the cache *) -let get_traces (opcode : BytesSeq.t) : Base.rtrc list = +let get_traces (opcode : Server.opcode) : Base.rtrc list = let (cache, config) = get_cache () in match IC.get_opt cache (Some opcode) with | Some trcs -> trcs | None -> ensure_started (); - let trcs = Server.request_bin_parsed opcode in + let raw_opcode, _ = opcode in (*TODO*) + let trcs = Server.request_bin_parsed raw_opcode in let ptrcs = Preprocess.preprocess config trcs in IC.add cache (Some opcode) ptrcs; ptrcs diff --git a/src/isla/server.ml b/src/isla/server.ml index 0fff247d..ec544bb7 100644 --- a/src/isla/server.ml +++ b/src/isla/server.ml @@ -68,6 +68,13 @@ type config = Config.t processor exception/fault) or not *) type trcs = (bool * Base.rtrc) list +type reloc = | + +let reloc_id: reloc option -> int = function +| None -> 0 + +type opcode = BytesSeq.t * reloc option + (** Bump when updating isla. TODO: move the version checking to allow a range of version. Also, right now the cache invalidation is based on diff --git a/src/run/bb_lib.ml b/src/run/bb_lib.ml index 083d07eb..477a9d98 100644 --- a/src/run/bb_lib.ml +++ b/src/run/bb_lib.ml @@ -80,7 +80,7 @@ let from_binary (code : BytesSeq.t) : t = "BB.from_binary: Multiple path instruction.\n\ If this is not a branching instruction, try `run-block --linear'." in - code |> Isla.Cache.get_traces |> get_normal + (code, None) |> Isla.Cache.get_traces |> get_normal (*TODO relocs *) in let main = code |> BytesSeq.to_listbs ~len:4 |> List.map process |> Array.of_list in { main } diff --git a/src/run/instr.ml b/src/run/instr.ml index 07a8dab8..4b8ea439 100644 --- a/src/run/instr.ml +++ b/src/run/instr.ml @@ -152,7 +152,7 @@ let get_traces instr isla_run dump_types : traces = Isla.Cache.start @@ Arch.get_isla_config (); (* I call Init.init manually to print the register types *) Init.init () |> ignore; - let rtraces = Isla.Cache.get_traces instr in + let rtraces = Isla.Cache.get_traces (instr, None) in (* TODO relocs *) List.iter (fun t -> Isla.Type.type_trc t |> ignore) rtraces; if dump_types then base "Register types:\n%t\n" (Pp.topi State.Reg.pp_index ()); if isla_run then IslaTraces rtraces else Traces (List.map Trace.of_isla rtraces) diff --git a/src/run/runner.ml b/src/run/runner.ml index 760670f9..ed65e3e2 100644 --- a/src/run/runner.ml +++ b/src/run/runner.ml @@ -104,7 +104,7 @@ let load_sym runner (sym : Elf.Symbol.t) = (result, len) in try - let instr = Trace.Cache.get_instr code in + let instr = Trace.Cache.get_instr (code, None) in (*TODO relocs*) if instr.traces = [] then begin debug "Instruction at %t in %s is loaded as special" (Pp.top Elf.Address.pp addr) sym.name; Hashtbl.add runner.instrs addr (Special instr_len) diff --git a/src/trace/cache.ml b/src/trace/cache.ml index 366f7bea..0f7d7e33 100644 --- a/src/trace/cache.ml +++ b/src/trace/cache.ml @@ -155,7 +155,7 @@ let get_cache () = match !cache with Some cache -> cache | None -> failwith "Trace cache was not started" (** Get the traces of the opcode given. Use {!Isla.Server} if the value is not in the cache *) -let get_traces (opcode : BytesSeq.t) : Base.t list = +let get_traces (opcode : Isla.Server.opcode) : Base.t list = let cache = get_cache () in match TC.get_opt cache (Some opcode) with | Some trcs -> trcs @@ -168,4 +168,4 @@ let get_traces (opcode : BytesSeq.t) : Base.t list = (** Get a full blown {!Instr} from the opcode, going through the whole Isla pipeline if necessary.*) -let get_instr (opcode : BytesSeq.t) : Instr.t = Instr.of_traces opcode @@ get_traces opcode +let get_instr (opcode : Isla.Server.opcode) : Instr.t = Instr.of_traces opcode @@ get_traces opcode diff --git a/src/trace/instr.ml b/src/trace/instr.ml index 3dd01196..4f0b54b9 100644 --- a/src/trace/instr.ml +++ b/src/trace/instr.ml @@ -66,7 +66,7 @@ type t = { length : int; (** Bytes length *) read : Reg.t list; written : Reg.t list; - opcode : BytesSeq.t; + opcode : Isla.Server.opcode; } let dedup_regs = List.sort_uniq State.Reg.compare @@ -101,7 +101,8 @@ let trace_meta_of_trace trace = (** Generate full instruction data from a list of traces *) let of_traces opcode traces = let traces = List.map trace_meta_of_trace traces in - let length = BytesSeq.length opcode in + let raw_opcode, _ = opcode in + let length = BytesSeq.length raw_opcode in let read = dedup_regs @@ List.concat_map (fun (tr : trace_meta) -> tr.read) traces in let written = dedup_regs @@ List.concat_map (fun (tr : trace_meta) -> tr.written) traces in { traces; length; read; written; opcode } From 8dd5040da133f682fed0f3f72fd7787e856ec82f Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Tue, 7 Jan 2025 23:58:58 +0100 Subject: [PATCH 14/89] [WIP] read relocations from linksem --- notes-TODO | 4 +++- src/elf/linksemRelocatable.ml | 45 +++++++++++++++++++++++++---------- src/elf/relocations.ml | 32 +++++++++++++++++++++++++ src/elf/symbol.ml | 17 +++++++++---- src/elf/symbol.mli | 9 +++++-- 5 files changed, 88 insertions(+), 19 deletions(-) create mode 100644 src/elf/relocations.ml diff --git a/notes-TODO b/notes-TODO index c8e943ae..66ca908d 100644 --- a/notes-TODO +++ b/notes-TODO @@ -1,3 +1,5 @@ Symbolic symbol table - value of symbol?? (we don't have segments in relocatable files) -- can probably keep the same api, but addresses are symbolic \ No newline at end of file +- can probably keep the same api, but addresses are symbolic + +Instruction fetch: is it sound? (rewriting .text) \ No newline at end of file diff --git a/src/elf/linksemRelocatable.ml b/src/elf/linksemRelocatable.ml index 82b4e035..2123d8f3 100644 --- a/src/elf/linksemRelocatable.ml +++ b/src/elf/linksemRelocatable.ml @@ -1,18 +1,40 @@ (* TODO header *) +module SMap = Map.Make (String) + type sym_addr = string * Z.t -(* Like in linksem, but address is section+offset, and with a writable flag *) -type symbol = string * (Z.t * Z.t * sym_addr * Byte_sequence_wrapper.byte_sequence * Z.t) * bool +type rels = + | AArch64 of (Z.t, Abi_aarch64_symbolic_relocation.aarch64_relocation_target Elf_symbolic.abstract_relocation) Pmap.map + +type sym_data = +Byte_sequence_wrapper.byte_sequence * rels + + +(* Like in linksem, but address is section+offset, data has relocations and with a writable flag *) +type symbol = string * (Z.t * Z.t * sym_addr * sym_data * Z.t) * bool type global_symbol_init_info = symbol list open Elf_symbol_table open Elf_interpreted_section -let get_elf64_file_global_symbol_init f : global_symbol_init_info Error.error = - let secs = f.Elf_file.elf64_file_interpreted_sections in - Error.bind (Elf_file.get_elf64_file_symbol_table f) (fun (symtab, strtab) -> +let get_elf64_file_global_symbol_init (f: Elf_file.elf64_file) : global_symbol_init_info Error.error = + let secs = f.elf64_file_interpreted_sections in + let machine = f.elf64_file_header.elf64_machine in + Error.bind (Elf_file.get_elf64_file_symbol_table f) @@ fun (symtab, strtab) -> + let rel_cache = ref SMap.empty in + let get_relocs section = + match SMap.find_opt section !rel_cache with + | Some rels -> rels + | None -> + if machine = Elf_header.elf_ma_aarch64 then + Error.bind + (Elf_symbolic.extract_elf64_relocations_for_section f Abi_aarch64_symbolic_relocation.abi_aarch64_relocation_to_abstract section) + @@ fun relocs -> Error.return (AArch64 relocs) + else + Error.fail @@ "machine not supported " ^ (Elf_header.string_of_elf_machine_architecture machine) + in List.filter_map ( fun entry -> let name = Uint32_wrapper.to_bigint entry.elf64_st_name in @@ -29,11 +51,10 @@ let get_elf64_file_global_symbol_init f : global_symbol_init_info Error.error = else Byte_sequence.offset_and_cut addr_offset size section.elf64_section_body in - Error.bind data (fun data -> - Error.bind (String_table.get_string_at name strtab) (fun str -> - let write = Elf_file.flag_is_set Elf_section_header_table.shf_write section.elf64_section_flags in - Error.return (str, (typ, size, addr, data, bnd), write) - )) + Error.bind (get_relocs section.elf64_section_name_as_string) @@ fun relocs -> + Error.bind data @@ fun data -> + Error.bind (String_table.get_string_at name strtab) @@ fun str -> + let write = Elf_file.flag_is_set Elf_section_header_table.shf_write section.elf64_section_flags in + Error.return (str, (typ, size, addr, (data, relocs), bnd), write) ) (List.nth_opt secs shndx) - ) symtab |> Error.mapM Fun.id - ) \ No newline at end of file + ) symtab |> Error.mapM Fun.id \ No newline at end of file diff --git a/src/elf/relocations.ml b/src/elf/relocations.ml new file mode 100644 index 00000000..e48871b7 --- /dev/null +++ b/src/elf/relocations.ml @@ -0,0 +1,32 @@ +module IMap = Map.Make (Int) + +type target = + AArch64 of Abi_aarch64_symbolic_relocation.aarch64_relocation_target + +type rel = { + target : target; + value : Elf_symbolic.symbolic_expression; +} + +type t = rel IMap.t + +type linksem_t = LinksemRelocatable.rels + +let of_linksem: linksem_t -> t = function +| LinksemRelocatable.AArch64 relocs -> + let add k Elf_symbolic.{ arel_value; arel_target } m = + IMap.add (Z.to_int k) { value = arel_value; target = AArch64 arel_target } m + in + Pmap.fold add relocs IMap.empty + +let sub rels off len = + rels + |> IMap.to_list + |> List.filter_map (fun (pos, rel) -> if off <= pos && pos < off + len then Some (pos-off, rel) else None) + |> IMap.of_list + +let pp rels = + if IMap.is_empty rels then + Pp.empty + else + Pp.string "(has relocations)" \ No newline at end of file diff --git a/src/elf/symbol.ml b/src/elf/symbol.ml index da1bf9e0..8bdce76a 100644 --- a/src/elf/symbol.ml +++ b/src/elf/symbol.ml @@ -52,6 +52,11 @@ type typ = NOTYPE | OBJECT | FUNC | SECTION | FILE | UNKNOWN type linksem_typ = Z.t +type data = { + data: BytesSeq.t; + relocations: Relocations.t +} + type t = { name : string; other_names : string list; @@ -60,7 +65,7 @@ type t = { (* addr : int; *) size : int; writable : bool; - data : BytesSeq.t; + data : data; } type linksem_t = LinksemRelocatable.symbol @@ -95,10 +100,11 @@ let _ = (* module SMap = Map.Make (String) let locs = SMap.empty |> SMap.add ".text" 0 |> SMap.add ".data" 1000000 |> SMap.add ".eh_frame" 2000000 *) -let of_linksem (name, (typ, size, addr, data, _), writable) = +let of_linksem (name, (typ, size, addr, (data, rels), _), writable) = let typ = typ_of_linksem typ in let size = Z.to_int size in let addr = Address.of_linksem addr in + let data = { data; relocations = Relocations.of_linksem rels } in (* let addr = SMap.find section locs + Z.to_int offset in *) { name; other_names = []; typ; size; addr; data; writable } @@ -106,7 +112,10 @@ let is_interesting = function OBJECT | FUNC -> true | _ -> false let is_interesting_linksem lsym = lsym |> linksem_typ |> typ_of_linksem |> is_interesting -let sub sym off len = BytesSeq.sub sym.data off len +let sub sym off len = { + data = BytesSeq.sub sym.data.data off len; + relocations = Relocations.sub sym.data.relocations off len; +} let compare s1 s2 = compare s1.addr s2.addr @@ -133,5 +142,5 @@ let pp_raw sym = (* ("addr", ptr sym.addr); *) ("size", ptr sym.size); ("writable", bool sym.writable); - ("data", BytesSeq.ppby ~by:4 sym.data); + ("data", BytesSeq.ppby ~by:4 sym.data.data ^^ Relocations.pp sym.data.relocations); ]) diff --git a/src/elf/symbol.mli b/src/elf/symbol.mli index 0e91fa11..3bc5803f 100644 --- a/src/elf/symbol.mli +++ b/src/elf/symbol.mli @@ -55,6 +55,11 @@ type typ = NOTYPE | OBJECT | FUNC | SECTION | FILE | UNKNOWN type linksem_typ = Z.t +type data = { + data: BytesSeq.t; + relocations: Relocations.t +} + (** The ELF symbol. This type guarantee the data exists contrary to linksem symbols (it may be all zeros though) *) type t = { @@ -65,7 +70,7 @@ type t = { (* addr : int; *) size : int; writable : bool; - data : BytesSeq.t; + data : data; } (** The type of an ELF symbol in linksem. See {!of_linksem}*) @@ -103,7 +108,7 @@ val is_interesting : typ -> bool val is_interesting_linksem : linksem_t -> bool (** Take the BytesSeq.t corresponding to the offset and length *) -val sub : t -> int -> int -> BytesSeq.t +val sub : t -> int -> int -> data (** Starting address comparison *) val compare : t -> t -> int From d11e65a696879db0234dc1440c39a7f1ceee4f72 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Wed, 8 Jan 2025 11:55:57 +0100 Subject: [PATCH 15/89] [WIP] relocations from linksem --- src/isla/server.ml | 8 +++++++- src/isla/test.ml | 2 +- src/run/BB.ml | 2 +- src/run/instr.ml | 2 +- src/run/runner.ml | 2 +- src/state/base.ml | 2 +- 6 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/isla/server.ml b/src/isla/server.ml index ec544bb7..6c093fe3 100644 --- a/src/isla/server.ml +++ b/src/isla/server.ml @@ -68,10 +68,16 @@ type config = Config.t processor exception/fault) or not *) type trcs = (bool * Base.rtrc) list -type reloc = | +type reloc = Elf.Relocations.target let reloc_id: reloc option -> int = function | None -> 0 +| Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data640) -> 1 +| Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data320) -> 2 +| Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADRP) -> 3 +| Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADD) -> 4 +| Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.LDST) -> 5 +| Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CALL) -> 6 type opcode = BytesSeq.t * reloc option diff --git a/src/isla/test.ml b/src/isla/test.ml index 29b011de..c44343d6 100644 --- a/src/isla/test.ml +++ b/src/isla/test.ml @@ -166,7 +166,7 @@ let input imode (arg : string) : (string * string) Term.ret = try Elf.SymTable.of_position_string elf.symbols s with Not_found -> fail "The position %s could not be found in %s" s arg in - `Ok (filename, BytesSeq.to_string (BytesSeq.sub sym.data off 4)) + `Ok (filename, BytesSeq.to_string (BytesSeq.sub sym.data.data off 4)) (* TODO relocations *) let input_term = Term.(ret (const input $ imode_term $ arg)) diff --git a/src/run/BB.ml b/src/run/BB.ml index 0e3cab82..cdddee56 100644 --- a/src/run/BB.ml +++ b/src/run/BB.ml @@ -101,7 +101,7 @@ let get_code elfname symname len : BytesSeq.t = with Not_found -> fail "The symbol %s cannot found in %s" symname elfname in let len = match len with Some i -> i | None -> sym.size - off in - Elf.Symbol.sub sym off len + (Elf.Symbol.sub sym off len).data (*TODO relocations*) let code_term = Term.(CmdlinerHelper.func_options comopts get_code $ elf $ sym $ len) diff --git a/src/run/instr.ml b/src/run/instr.ml index 4b8ea439..efa790bc 100644 --- a/src/run/instr.ml +++ b/src/run/instr.ml @@ -140,7 +140,7 @@ let get_instr arch instr elfopt : BytesSeq.t = in debug "Got symbol:\n%t\n" (Pp.topi Elf.Symbol.pp_raw sym); let len = 4 (* TODO proper Instruction length system *) in - BytesSeq.sub sym.data off len + BytesSeq.sub sym.data.data off len (*TODO relocations*) let instr_term = Term.(CmdlinerHelper.func_options comopts get_instr $ arch $ instr $ elf) diff --git a/src/run/runner.ml b/src/run/runner.ml index ed65e3e2..74eabce5 100644 --- a/src/run/runner.ml +++ b/src/run/runner.ml @@ -94,7 +94,7 @@ let of_dwarf dwarf = of_elf ~dwarf dwarf.elf let load_sym runner (sym : Elf.Symbol.t) = info "Loading symbol %s in %s" sym.name runner.elf.filename; Vec.add_one runner.funcs sym.addr; - let opcode_list = Arch.split_into_instrs sym.data in + let opcode_list = Arch.split_into_instrs sym.data.data in (* TODO relocations *) let addr = ref sym.addr in List.iter (fun code -> diff --git a/src/state/base.ml b/src/state/base.ml index 466a4ca0..4cda529c 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -436,7 +436,7 @@ let read_from_rodata (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t option if sym.writable then None else (* Assume little endian here *) - let bv = BytesSeq.getbvle ~size sym.data offset in + let bv = BytesSeq.getbvle ~size sym.data.data offset in (* TODO relocations *) Some (Typed.bits bv) with Not_found -> let rodata = elf.rodata in From 8d1db3ee5e3bf77c726e5be0c70b4da24d2c38ce Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Wed, 8 Jan 2025 12:56:34 +0100 Subject: [PATCH 16/89] WIP instructions with relocations --- src/arch/aarch64/sig.ml | 13 ++++++++++++- src/arch/riscv64/sig.ml | 13 ++++++++++++- src/arch/sig.mli | 2 +- src/run/runner.ml | 5 +++-- 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/arch/aarch64/sig.ml b/src/arch/aarch64/sig.ml index 151fce0b..658dab20 100644 --- a/src/arch/aarch64/sig.ml +++ b/src/arch/aarch64/sig.ml @@ -360,7 +360,18 @@ let assemble_to_elf instr = Sys.remove obj_file; elf_file -let split_into_instrs = BytesSeq.to_listbs ~len:4 +let split_into_instrs (data: Elf.Symbol.data) = + let module IMap = Elf.Relocations.IMap in + let rawdata = BytesSeq.to_listbs ~len:4 data.data in + List.mapi (fun pos bytes -> + let (_, rel, rest) = IMap.split pos data.relocations in + if Option.is_some @@ IMap.find_first_opt (fun i -> i < pos + 4) rest then + Raise.fail "Misaligned relocation"; + Elf.Symbol.{ + data = bytes; + relocations = rel |> Option.map (IMap.singleton 0) |> Option.value ~default:IMap.empty; + } + ) rawdata (** https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/RET--Return-from-subroutine- *) let is_ret code = diff --git a/src/arch/riscv64/sig.ml b/src/arch/riscv64/sig.ml index c999179e..4f661598 100644 --- a/src/arch/riscv64/sig.ml +++ b/src/arch/riscv64/sig.ml @@ -317,7 +317,18 @@ let assemble_to_elf instr = Sys.remove obj_file; elf_file -let split_into_instrs = BytesSeq.to_listbs ~len:4 +let split_into_instrs (data: Elf.Symbol.data) = + let module IMap = Elf.Relocations.IMap in + let rawdata = BytesSeq.to_listbs ~len:4 data.data in + List.mapi (fun pos bytes -> + let (_, rel, rest) = IMap.split pos data.relocations in + if Option.is_some @@ IMap.find_first_opt (fun i -> i < pos + 4) rest then + Raise.fail "Misaligned relocation"; + Elf.Symbol.{ + data = bytes; + relocations = rel |> Option.map (IMap.singleton 0) |> Option.value ~default:IMap.empty; + } + ) rawdata let is_ret code = assert (BytesSeq.length code = 4); diff --git a/src/arch/sig.mli b/src/arch/sig.mli index 13268696..a355e3e6 100644 --- a/src/arch/sig.mli +++ b/src/arch/sig.mli @@ -109,7 +109,7 @@ val sp : unit -> State.Reg.t val assemble_to_elf : string -> string (** Split a byte-sequence into a list of instructions. *) -val split_into_instrs : BytesSeq.t -> BytesSeq.t list +val split_into_instrs : Elf.Symbol.data -> Elf.Symbol.data list (** Tell if an instruction is a return instruction. *) val is_ret : BytesSeq.t -> bool diff --git a/src/run/runner.ml b/src/run/runner.ml index 74eabce5..c3a4e031 100644 --- a/src/run/runner.ml +++ b/src/run/runner.ml @@ -94,16 +94,17 @@ let of_dwarf dwarf = of_elf ~dwarf dwarf.elf let load_sym runner (sym : Elf.Symbol.t) = info "Loading symbol %s in %s" sym.name runner.elf.filename; Vec.add_one runner.funcs sym.addr; - let opcode_list = Arch.split_into_instrs sym.data.data in (* TODO relocations *) + let opcode_list = Arch.split_into_instrs sym.data in (* TODO relocations *) let addr = ref sym.addr in List.iter - (fun code -> + (fun Elf.Symbol.{ data = code; relocations } -> let (addr, instr_len) = let result = !addr and len = BytesSeq.length code in addr := Elf.Address.(!addr + len); (result, len) in try + let _reloc = Elf.Relocations.IMap.find_opt 0 relocations in let instr = Trace.Cache.get_instr (code, None) in (*TODO relocs*) if instr.traces = [] then begin debug "Instruction at %t in %s is loaded as special" (Pp.top Elf.Address.pp addr) sym.name; From 8722715f7fe2bf4a345f8688ca23116eb5c201b0 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Thu, 9 Jan 2025 10:57:15 +0100 Subject: [PATCH 17/89] WIP --- src/isla/base.ml | 20 +++++++++++++++ src/isla/cache.ml | 27 ++++++++----------- src/isla/preprocess.ml | 7 +++-- src/isla/preprocess.mli | 2 +- src/isla/server.ml | 57 +++++++++++++++++++++++++---------------- src/isla/test.ml | 2 +- src/run/bb_lib.ml | 7 ++--- src/run/instr.ml | 7 ++--- src/trace/base.ml | 3 ++- src/trace/cache.ml | 7 +++-- 10 files changed, 87 insertions(+), 52 deletions(-) diff --git a/src/isla/base.ml b/src/isla/base.ml index 4cb88d85..94f4788b 100644 --- a/src/isla/base.ml +++ b/src/isla/base.ml @@ -97,6 +97,8 @@ type rsmt = lrng smt (** The type of raw expressions out of the parser *) type rexp = lrng exp +type rtrcs = lrng trcs + (*****************************************************************************) (*****************************************************************************) (*****************************************************************************) @@ -173,6 +175,24 @@ let parse_trc_string ?(filename = "default") (s : string) : rtrc = let parse_trc_channel ?(filename = "default") (c : in_channel) : rtrc = parse_trc ~filename @@ Lexing.from_channel ~with_positions:true c +let parse_trcs = parse Parser.trcs_start + +let parse_trcs_string ?filename (s : string) : rtrcs = + parse_trcs ?filename @@ Lexing.from_string ~with_positions:true s + +let parse_trcs_channel ?filename (c : in_channel) : rtrcs = + parse_trcs ?filename @@ Lexing.from_channel ~with_positions:true c + +let parse_segments ?filename l = match parse_trcs ?filename l with +| TracesWithSegments (s, []) -> s +| _ -> raise (ParseError (l.lex_start_p, "Data is not SEGMENTS")) + +let parse_segments_string ?filename (s : string) : instruction_segments = + parse_segments ?filename @@ Lexing.from_string ~with_positions:true s + +let parse_segments_channel ?filename (c : in_channel) : instruction_segments = + parse_segments ?filename @@ Lexing.from_channel ~with_positions:true c + (*$R try let exp = parse_exp_string ~filename:"test" "v42" in diff --git a/src/isla/cache.ml b/src/isla/cache.ml index f7d5850f..47845f36 100644 --- a/src/isla/cache.ml +++ b/src/isla/cache.ml @@ -126,23 +126,14 @@ end It is just a list of traces separated by new lines *) module TraceList (*: Cache.Value *) = struct - type t = Base.rtrc list + type t = Base.rtrcs let to_file file (trcs : t) = - let output_trc ochannel trc = Pp.fprintln ochannel @@ Base.pp_trc trc in - let output_trcs = Files.output_list output_trc in - Files.write output_trcs file trcs + Files.write Pp.fprintln file (Base.pp_trcs trcs) let of_file file : t = - let num = ref 0 in - let input_trc ichannel = - let trc = Files.input_sexp ichannel in - let filename = Printf.sprintf "Trace %i of %s" !num file in - incr num; - Base.parse_trc_string ~filename trc - in - let input_trcs = Files.input_list input_trc in - Files.read input_trcs file + let filename = Printf.sprintf "Traces of %s" file in + Files.read Files.input_string file |> Base.parse_trcs_string ~filename end (** An epoch independant of the isla version, bump if you change the representation @@ -220,7 +211,7 @@ let get_cache () = match !cache with Some cache -> cache | None -> failwith "Isla cache was not started" (** Get the traces of the opcode given. Use {!Server} if the value is not in the cache *) -let get_traces (opcode : Server.opcode) : Base.rtrc list = +let get_traces (opcode : Server.opcode) : Base.rtrcs = let (cache, config) = get_cache () in match IC.get_opt cache (Some opcode) with | Some trcs -> trcs @@ -237,11 +228,13 @@ let get_traces (opcode : Server.opcode) : Base.rtrc list = let get_nop () : Base.rtrc = let (cache, _) = get_cache () in match IC.get_opt cache None with - | Some [trc] -> trc + | Some (Traces [trc]) -> trc + | Some (TracesWithSegments _) -> fatal "Corrupted cache, nop has segments" | Some _ -> fatal "Corrupted cache, nop hasn't exactly one trace" | None -> ensure_started (); - let trcs = Server.request_bin_parsed @@ Arch.nop () in + let (segs, trcs) = Server.request_bin_parsed @@ Arch.nop () in + assert (Option.is_none segs); let trc = List.assoc true trcs in - IC.add cache None [trc]; + IC.add cache None (Traces [trc]); trc diff --git a/src/isla/preprocess.ml b/src/isla/preprocess.ml index 0f187384..ec220f35 100644 --- a/src/isla/preprocess.ml +++ b/src/isla/preprocess.ml @@ -157,7 +157,7 @@ let simplify_trc (Trace events : rtrc) : rtrc = (1 + Counter.read new_variables); Trace (List.rev !res) -let preprocess (config : Server.config) (trcs : (bool * rtrc) list) : rtrc list = +let preprocess (config : Server.config) ((segs, trcs) : Server.trcs) : rtrcs = let preprocess_one (b, trc) = if not b then None else @@ -165,4 +165,7 @@ let preprocess (config : Server.config) (trcs : (bool * rtrc) list) : rtrc list let trc = simplify_trc trc in Some trc in - List.filter_map preprocess_one trcs + let trcs = List.filter_map preprocess_one trcs in + match segs with + | None -> Traces (trcs) + | Some segs -> TracesWithSegments (segs, trcs) diff --git a/src/isla/preprocess.mli b/src/isla/preprocess.mli index b78fc41d..2b4ae5be 100644 --- a/src/isla/preprocess.mli +++ b/src/isla/preprocess.mli @@ -72,4 +72,4 @@ val simplify_trc : Base.rtrc -> Base.rtrc (** Preprocess a group of traces, by removing useless registers (according to the config), removing initialisation code and simplifying with {!simplify_trc} *) -val preprocess : Server.config -> Server.trcs -> Base.rtrc list +val preprocess : Server.config -> Server.trcs -> Base.rtrcs diff --git a/src/isla/server.ml b/src/isla/server.ml index 6c093fe3..acec119a 100644 --- a/src/isla/server.ml +++ b/src/isla/server.ml @@ -66,7 +66,7 @@ type config = Config.t It is a list of traces, each with a flag telling if they are normal traces (no processor exception/fault) or not *) -type trcs = (bool * Base.rtrc) list +type trcs = Base.instruction_segments option * (bool * Base.rtrc) list type reloc = Elf.Relocations.target @@ -134,7 +134,7 @@ let raw_stop () = | None -> () (** This should match exactly with the Answer type in isla-client code *) -type basic_answer = Error | Version of string | StartTraces | Trace of bool * string | EndTraces +type basic_answer = Error | Version of string | StartTraces | Trace of bool * string | EndTraces | Segments of string (** Read an answer from isla-client. This must match exactly [write_answer] in [client.rs] in [isla] *) @@ -149,11 +149,12 @@ let read_basic_answer () = let s = Server.read_string serv in Trace (b, s) | 4 -> EndTraces + | 5 -> Segments (Server.read_string serv) | _ -> failwith "Unknown isla anwser" (** The interpreted answer. If the protocol is followed, then one request lead to exactly one answer of that type *) -type answer = Version of string | Traces of (bool * string) list +type answer = Version of string | Traces of (string option * (bool * string) list) (** Expect a version answer and fails if it is not the case *) let expect_version = function Version s -> s | _ -> failwith "expected version number from isla" @@ -163,11 +164,15 @@ let expect_traces = function Traces tl -> tl | _ -> failwith "expected traces fr (** Expect isla traces and fails if it is not the case, additionally parse them *) let expect_parsed_traces a : trcs = - a |> expect_traces - |> List.mapi (fun i (b, t) -> + let rsegs, rtrcs = expect_traces a in + let filename = Printf.sprintf "Isla call %d" !req_num in + let trcs = List.mapi (fun i (b, t) -> ( b, - let filename = Printf.sprintf "Isla call %d, trace %d" !req_num i in - Base.parse_trc_string ~filename t )) + let filename = filename ^ Printf.sprintf ", trace %d" i in + Base.parse_trc_string ~filename t )) rtrcs + in + let segs = Option.map (Base.parse_segments_string ~filename) rsegs in + segs, trcs (** When isla encounter a non fatal error with that specific request. This error is recoverable and the sever can accept other requests *) @@ -175,30 +180,38 @@ exception IslaError (** Read the answer from isla, block until full answer *) let read_answer () : answer = + let rec traces_seq () = + match read_basic_answer () with + | EndTraces -> Seq.Nil + | Trace (bool, s) -> Seq.Cons ((bool, s), traces_seq) + | Error -> raise IslaError + | _ -> failwith "isla protocol error: no EndTraces" + in match read_basic_answer () with | Error -> raise IslaError | Version s -> Version s + | Segments s -> ( + match read_basic_answer () with + | StartTraces -> Traces (Some s, List.of_seq traces_seq) + | _ -> failwith "segments not followed by traces" + ) | StartTraces -> - let rec seq () = - match read_basic_answer () with - | EndTraces -> Seq.Nil - | Trace (bool, s) -> Seq.Cons ((bool, s), seq) - | Error -> raise IslaError - | _ -> failwith "isla protocol error: no EndTraces" - in - Traces (List.of_seq seq) + Traces (None, List.of_seq traces_seq) | _ -> failwith "isla protocol error: Traces element before StartTraces" (** Answer pretty printer *) let pp_answer = function | Version s -> Pp.(prefix 2 1 !^"isla-client version:" !^s) - | Traces l -> - l - |> List.map (fun (b, t) -> - Pp.( - let bdoc = if b then !^"norm:" else !^"ex:" in - prefix 2 1 bdoc (string t))) - |> Pp.(separate (hardline ^^ hardline)) + | Traces (s, l) -> + Pp.( + optional string s + ^^ hardline + ^^ hardline + ^^ (l + |> List.map (fun (b, t) -> + let bdoc = if b then !^"norm:" else !^"ex:" in + prefix 2 1 bdoc (string t)) + |> separate (hardline ^^ hardline))) (** The type of a request to isla *) type request = TEXT_ASM of string | ASM of BytesSeq.t | VERSION | STOP diff --git a/src/isla/test.ml b/src/isla/test.ml index c44343d6..d1e07e4e 100644 --- a/src/isla/test.ml +++ b/src/isla/test.ml @@ -204,7 +204,7 @@ let isla_run isla_mode arch (filename, input) : string * string * Server.config start config; let msg : string = match request (isla_mode_to_request isla_mode input) with - | Traces l -> List.assoc true l + | Traces (_, l) -> List.assoc true l (* TODO segments *) | _ -> failwith "isla did not send back traces" in stop (); diff --git a/src/run/bb_lib.ml b/src/run/bb_lib.ml index 477a9d98..6790fe1a 100644 --- a/src/run/bb_lib.ml +++ b/src/run/bb_lib.ml @@ -64,8 +64,9 @@ type t = { main : trc array } Also does the typing of traces for register discovery. TODO Support variable length instructions *) -let from_binary (code : BytesSeq.t) : t = - let num = BytesSeq.length code / 4 in +let from_binary (_code : BytesSeq.t) : t = + Raise.todo() + (* let num = BytesSeq.length code / 4 in (* TODO fix fixed size instructions *) if BytesSeq.length code != num * 4 then failwith "BB.from_binary: The specified range cuts an instruction"; @@ -83,7 +84,7 @@ let from_binary (code : BytesSeq.t) : t = (code, None) |> Isla.Cache.get_traces |> get_normal (*TODO relocs *) in let main = code |> BytesSeq.to_listbs ~len:4 |> List.map process |> Array.of_list in - { main } + { main } *) (* Sequence of the second test: mpool.c:116.6 (mpool_fini) 40012240: 37000049 tbnz diff --git a/src/run/instr.ml b/src/run/instr.ml index efa790bc..0098d35b 100644 --- a/src/run/instr.ml +++ b/src/run/instr.ml @@ -148,14 +148,15 @@ let simp_trace_term = Term.(const ( || ) $ simp_trace $ simp) let simp_state_term = Term.(const ( || ) $ simp_state $ simp) -let get_traces instr isla_run dump_types : traces = - Isla.Cache.start @@ Arch.get_isla_config (); +let get_traces _instr _isla_run _dump_types : traces = + Raise.todo() + (* Isla.Cache.start @@ Arch.get_isla_config (); (* I call Init.init manually to print the register types *) Init.init () |> ignore; let rtraces = Isla.Cache.get_traces (instr, None) in (* TODO relocs *) List.iter (fun t -> Isla.Type.type_trc t |> ignore) rtraces; if dump_types then base "Register types:\n%t\n" (Pp.topi State.Reg.pp_index ()); - if isla_run then IslaTraces rtraces else Traces (List.map Trace.of_isla rtraces) + if isla_run then IslaTraces rtraces else Traces (List.map Trace.of_isla rtraces) *) let pre_traces_term = Term.(const get_traces $ instr_term $ isla_run $ reg_types) diff --git a/src/trace/base.ml b/src/trace/base.ml index eb0a88b0..891d3133 100644 --- a/src/trace/base.ml +++ b/src/trace/base.ml @@ -320,8 +320,9 @@ let events_of_isla ~written_registers ~read_counter ~(vc : value_context) : | AbstractCall _ -> [] | AbstractPrimop _ -> [] + (* TODO segments *) (** Top level function to convert an isla trace to one of this module *) -let of_isla (Trace events : Isla.rtrc) : t = +let of_isla (_segments: Isla.segment list) (Trace events : Isla.rtrc) : t = let written_registers = Hashtbl.create 10 in let read_counter = Counter.make 0 in let vc = HashVector.empty () in diff --git a/src/trace/cache.ml b/src/trace/cache.ml index 0f7d7e33..cd5e611b 100644 --- a/src/trace/cache.ml +++ b/src/trace/cache.ml @@ -160,8 +160,11 @@ let get_traces (opcode : Isla.Server.opcode) : Base.t list = match TC.get_opt cache (Some opcode) with | Some trcs -> trcs | None -> - let isla_traces = Isla.Cache.get_traces opcode in - let traces = List.map (tee (Isla.Type.type_trc %> ignore) %> Base.of_isla) isla_traces in + let segments, isla_traces = match Isla.Cache.get_traces opcode with + | Traces t -> [], t + | TracesWithSegments (Segments s, t) -> s, t + in + let traces = List.map (tee (Isla.Type.type_trc %> ignore) %> Base.of_isla segments) isla_traces in let straces = List.map Base.simplify traces in TC.add cache (Some opcode) straces; straces From 2d5379fb54038023bc3e9b9e8dd55d3d7aff78f5 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Thu, 9 Jan 2025 20:35:01 +0100 Subject: [PATCH 18/89] [WIP] symbolic traces --- src/arch/aarch64/sig.ml | 3 ++- src/elf/relocations.ml | 14 ++++++++++++- src/elf/symbol.ml | 2 +- src/isla/cache.ml | 5 ++--- src/isla/preprocess.ml | 20 +++++++++++++----- src/isla/preprocess.mli | 2 +- src/isla/server.ml | 34 +++++++++++++++++++++++++++--- src/isla/test.ml | 4 ++-- src/run/runner.ml | 8 ++++--- src/trace/base.ml | 46 +++++++++++++++++++++++++++++------------ src/trace/context.ml | 8 +------ src/trace/instr.ml | 2 +- src/trace/typer.ml | 1 + 13 files changed, 108 insertions(+), 41 deletions(-) diff --git a/src/arch/aarch64/sig.ml b/src/arch/aarch64/sig.ml index 658dab20..c46bb8b8 100644 --- a/src/arch/aarch64/sig.ml +++ b/src/arch/aarch64/sig.ml @@ -363,7 +363,8 @@ let assemble_to_elf instr = let split_into_instrs (data: Elf.Symbol.data) = let module IMap = Elf.Relocations.IMap in let rawdata = BytesSeq.to_listbs ~len:4 data.data in - List.mapi (fun pos bytes -> + List.mapi (fun i bytes -> + let pos = 4 * i in let (_, rel, rest) = IMap.split pos data.relocations in if Option.is_some @@ IMap.find_first_opt (fun i -> i < pos + 4) rest then Raise.fail "Misaligned relocation"; diff --git a/src/elf/relocations.ml b/src/elf/relocations.ml index e48871b7..b129d48f 100644 --- a/src/elf/relocations.ml +++ b/src/elf/relocations.ml @@ -25,8 +25,20 @@ let sub rels off len = |> List.filter_map (fun (pos, rel) -> if off <= pos && pos < off + len then Some (pos-off, rel) else None) |> IMap.of_list +let pp_rel rel = + let target = match rel.target with + | AArch64 Abi_aarch64_symbolic_relocation.Data640 -> "Data64" + | AArch64 Abi_aarch64_symbolic_relocation.Data320 -> "Data32" + | AArch64 Abi_aarch64_symbolic_relocation.ADD -> "ADD" + | AArch64 Abi_aarch64_symbolic_relocation.ADRP -> "ADRP" + | AArch64 Abi_aarch64_symbolic_relocation.CALL -> "CALL" + | AArch64 Abi_aarch64_symbolic_relocation.LDST -> "LDST" + in + let expr = Elf_symbolic.pp_sym_expr rel.value in + Pp.(!^target ^^ !^": " ^^ !^expr) + let pp rels = if IMap.is_empty rels then Pp.empty else - Pp.string "(has relocations)" \ No newline at end of file + Pp.(mapping "relocations" @@ List.map (fun (i, r) -> (hex i, pp_rel r)) (IMap.to_list rels)) \ No newline at end of file diff --git a/src/elf/symbol.ml b/src/elf/symbol.ml index 8bdce76a..263c184b 100644 --- a/src/elf/symbol.ml +++ b/src/elf/symbol.ml @@ -142,5 +142,5 @@ let pp_raw sym = (* ("addr", ptr sym.addr); *) ("size", ptr sym.size); ("writable", bool sym.writable); - ("data", BytesSeq.ppby ~by:4 sym.data.data ^^ Relocations.pp sym.data.relocations); + ("data", pair (BytesSeq.ppby ~by:4) Relocations.pp (sym.data.data, sym.data.relocations)); ]) diff --git a/src/isla/cache.ml b/src/isla/cache.ml index 47845f36..a44cc8f3 100644 --- a/src/isla/cache.ml +++ b/src/isla/cache.ml @@ -217,8 +217,7 @@ let get_traces (opcode : Server.opcode) : Base.rtrcs = | Some trcs -> trcs | None -> ensure_started (); - let raw_opcode, _ = opcode in (*TODO*) - let trcs = Server.request_bin_parsed raw_opcode in + let trcs = Server.request_bin_parsed opcode in let ptrcs = Preprocess.preprocess config trcs in IC.add cache (Some opcode) ptrcs; ptrcs @@ -233,7 +232,7 @@ let get_nop () : Base.rtrc = | Some _ -> fatal "Corrupted cache, nop hasn't exactly one trace" | None -> ensure_started (); - let (segs, trcs) = Server.request_bin_parsed @@ Arch.nop () in + let (segs, trcs) = Server.request_bin_parsed @@ (Arch.nop (), None) in assert (Option.is_none segs); let trc = List.assoc true trcs in IC.add cache None (Traces [trc]); diff --git a/src/isla/preprocess.ml b/src/isla/preprocess.ml index ec220f35..7aa0abd1 100644 --- a/src/isla/preprocess.ml +++ b/src/isla/preprocess.ml @@ -68,7 +68,7 @@ let expect_processed = function | _ -> Raise.fail "Variables should be processed at this point" (** Preprocess a single trace *) -let simplify_trc (Trace events : rtrc) : rtrc = +let simplify_trc ?(num_segments = 0) (Trace events : rtrc) : rtrc = (* Phase 1: Discover which variable are actually used *) let used = HashVector.empty () in let process_used event = @@ -90,7 +90,8 @@ let simplify_trc (Trace events : rtrc) : rtrc = not yet commited are inlined. Variables are also renumbered at the same time. *) let simplify_context = HashVector.empty () in - let new_variables = Counter.make 0 in + (* Segment variables should not be renamed (we assume that those are v0-v(num_segments-1)) *) + let new_variables = Counter.make num_segments in let res = ref [] in let push_event (d : revent) = res := d :: !res in let push_smt loc (d : rsmt) = push_event (Smt (d, loc)) in @@ -100,13 +101,13 @@ let simplify_trc (Trace events : rtrc) : rtrc = match HashVector.get simplify_context i with | Declared { ty; loc } -> debug "Commiting declared variable %d" i; - let new_val = Counter.get new_variables in + let new_val = if i < num_segments then i else Counter.get new_variables in HashVector.set simplify_context i (Processed new_val); push_smt loc (DeclareConst (new_val, ty)); new_val | Defined { exp; loc } -> debug "Commiting defined variable %d" i; - let new_val = Counter.get new_variables in + let new_val = if i < num_segments then i else Counter.get new_variables in HashVector.set simplify_context i (Processed new_val); debug "New id is %d" new_val; let new_exp = simplify_exp exp in @@ -158,11 +159,20 @@ let simplify_trc (Trace events : rtrc) : rtrc = Trace (List.rev !res) let preprocess (config : Server.config) ((segs, trcs) : Server.trcs) : rtrcs = + let num_segments = + segs + |> Option.map (fun (Segments s) -> + let x = List.length s in + assert (List.for_all (fun (Segment (_,_,v)) -> v < x) s); + x + ) + |> Option.value ~default:0 + in let preprocess_one (b, trc) = if not b then None else let trc = trc |> Manip.remove_init |> Manip.remove_ignored config.ignored_regs in - let trc = simplify_trc trc in + let trc = simplify_trc ~num_segments trc in Some trc in let trcs = List.filter_map preprocess_one trcs in diff --git a/src/isla/preprocess.mli b/src/isla/preprocess.mli index 2b4ae5be..73c8a70f 100644 --- a/src/isla/preprocess.mli +++ b/src/isla/preprocess.mli @@ -67,7 +67,7 @@ *) (** Simplify a simple trace by removing all useless variables *) -val simplify_trc : Base.rtrc -> Base.rtrc +val simplify_trc : ?num_segments:int -> Base.rtrc -> Base.rtrc (** Preprocess a group of traces, by removing useless registers (according to the config), removing initialisation code and simplifying with diff --git a/src/isla/server.ml b/src/isla/server.ml index acec119a..82bd8603 100644 --- a/src/isla/server.ml +++ b/src/isla/server.ml @@ -214,13 +214,41 @@ let pp_answer = function |> separate (hardline ^^ hardline))) (** The type of a request to isla *) -type request = TEXT_ASM of string | ASM of BytesSeq.t | VERSION | STOP +type request = TEXT_ASM of string | ASM of opcode | VERSION | STOP + +let pp_interpreted_opcode (b, r) = + match r with + | None -> Pp.(!^"#x" ^^ BytesSeq.ppint b) + | Some (Elf.Relocations.AArch64 rtype) -> + let bits = BytesSeq.getbvle ~size:32 b 0 in + Pp.( + match rtype with + | Abi_aarch64_symbolic_relocation.Data640 -> Raise.fail "64bit relocation not allowed for instruction" + | Abi_aarch64_symbolic_relocation.Data320 -> !^"x:32" + | Abi_aarch64_symbolic_relocation.ADRP -> + BitVec.pp_smt (BitVec.extract 31 31 bits) + ^^ !^" x0:2 " ^^ + BitVec.pp_smt (BitVec.extract 24 28 bits) + ^^ !^" x1:19 " ^^ + BitVec.pp_smt (BitVec.extract 0 4 bits) + | Abi_aarch64_symbolic_relocation.ADD -> + BitVec.pp_smt (BitVec.extract 22 31 bits) + ^^ !^" x0:12 " ^^ + BitVec.pp_smt (BitVec.extract 0 9 bits) + | Abi_aarch64_symbolic_relocation.LDST -> (* TODO different width loads, alignment *) + BitVec.pp_smt (BitVec.extract 22 31 bits) + ^^ !^" x0:10 " ^^ + BitVec.pp_smt (BitVec.extract 0 11 bits) + | Abi_aarch64_symbolic_relocation.CALL -> + BitVec.pp_smt (BitVec.extract 26 31 bits) + ^^ !^" x0:26 " + ) (** Convert a request into the string message expected by isla-client This should match the protocol *) let string_of_request = function | TEXT_ASM s -> Printf.sprintf "execute_asm %s" s - | ASM b -> Pp.(sprintc @@ !^"execute " ^^ BytesSeq.ppint b) + | ASM b -> Pp.(sprintc @@ !^"execute " ^^ pp_interpreted_opcode b) | VERSION -> "version" | STOP -> "stop" @@ -243,7 +271,7 @@ let request (req : request) : answer = req |> string_of_request |> string_reques This is the main entry point of this module. *) -let request_bin_parsed (bin : BytesSeq.t) : trcs = ASM bin |> request |> expect_parsed_traces +let request_bin_parsed (opcode : opcode) : trcs = ASM opcode |> request |> expect_parsed_traces (** Send a request without expecting any answer *) let send_request req = req |> string_of_request |> send_string_request diff --git a/src/isla/test.ml b/src/isla/test.ml index d1e07e4e..b7a06ad7 100644 --- a/src/isla/test.ml +++ b/src/isla/test.ml @@ -186,8 +186,8 @@ let isla_mode_term = let isla_mode_to_request imode input = match imode with | ASM -> Server.TEXT_ASM input - | HEX -> Server.ASM (BytesSeq.of_hex input) - | BIN -> Server.ASM (BytesSeq.of_string input) + | HEX -> Server.ASM (BytesSeq.of_hex input, None) (* TODO? *) + | BIN -> Server.ASM (BytesSeq.of_string input, None) | _ -> assert false (** Run isla and return a text trace with a filename diff --git a/src/run/runner.ml b/src/run/runner.ml index c3a4e031..dfc4ba73 100644 --- a/src/run/runner.ml +++ b/src/run/runner.ml @@ -94,7 +94,8 @@ let of_dwarf dwarf = of_elf ~dwarf dwarf.elf let load_sym runner (sym : Elf.Symbol.t) = info "Loading symbol %s in %s" sym.name runner.elf.filename; Vec.add_one runner.funcs sym.addr; - let opcode_list = Arch.split_into_instrs sym.data in (* TODO relocations *) + debug "Loding symbol %t" (Pp.top Elf.Symbol.pp_raw sym); + let opcode_list = Arch.split_into_instrs sym.data in let addr = ref sym.addr in List.iter (fun Elf.Symbol.{ data = code; relocations } -> @@ -103,9 +104,10 @@ let load_sym runner (sym : Elf.Symbol.t) = addr := Elf.Address.(!addr + len); (result, len) in + debug "Relocation at address %t: %t" (Pp.top Elf.Address.pp addr) (Pp.top Elf.Relocations.pp relocations); try - let _reloc = Elf.Relocations.IMap.find_opt 0 relocations in - let instr = Trace.Cache.get_instr (code, None) in (*TODO relocs*) + let reloc = Elf.Relocations.IMap.find_opt 0 relocations in + let instr = Trace.Cache.get_instr (code, Option.map (fun (x : Elf.Relocations.rel) -> x.target) reloc) in if instr.traces = [] then begin debug "Instruction at %t in %s is loaded as special" (Pp.top Elf.Address.pp addr) sym.name; Hashtbl.add runner.instrs addr (Special instr_len) diff --git a/src/trace/base.ml b/src/trace/base.ml index 891d3133..2419e7b7 100644 --- a/src/trace/base.ml +++ b/src/trace/base.ml @@ -93,6 +93,7 @@ module Var = struct | Register of Reg.t (** The value of the register at the beginning of the trace *) | Read of int * Ast.Size.t (** The result of that memory reading operation *) | NonDet of int * Ast.Size.t (** Variable representing non-determinism in the spec *) + | Segment of string * int (** Variable representing symbolic segment in the opcode *) (** Convert the variable to the string encoding. For parsing infractructure reason, the encoding must always contain at least one [:]. *) @@ -104,6 +105,8 @@ module Var = struct | NonDet (num, size) -> if size = Ast.Size.B64 then Printf.sprintf "nondet:%i" num else Printf.sprintf "nondet:%i:%dbits" num (Ast.Size.to_bits size) + | Segment (name, bits) -> + Printf.sprintf "segment:%s:%dbits" name bits (** Inverse of {!to_string} *) let of_string s = @@ -117,6 +120,9 @@ module Var = struct | ["nondet"; num; size] -> let size = Scanf.sscanf size "%dbits" Ast.Size.of_bits in NonDet (int_of_string num, size) + | ["segment"; name; bits] -> + let bits = Scanf.sscanf bits "%dbits" Fun.id in + Segment (name, bits) | _ -> Raise.inv_arg "%s is not a Base.Var.t" s (** Pretty prints the variable *) @@ -130,6 +136,7 @@ module Var = struct | Register reg -> Reg.reg_type reg | Read (_, size) -> Ast.Ty_BitVec (Ast.Size.to_bits size) | NonDet (_, size) -> Ast.Ty_BitVec (Ast.Size.to_bits size) + | Segment (_, bits) -> Ast.Ty_BitVec bits let of_reg reg = Register reg end @@ -248,17 +255,27 @@ let write_to_valu vc valu exp = match valu with Isla.(RegVal_Base (Val_Symbolic i)) -> HashVector.set vc i exp | _ -> () (** Convert an isla event to Trace events, most events are deleted *) -let events_of_isla ~written_registers ~read_counter ~(vc : value_context) : +let events_of_isla ~segments_map ~written_registers ~read_counter ~(vc : value_context) : Isla.revent -> event list = function | Smt (DeclareConst (i, ty), _) -> - ( try - match ty with - | Ty_BitVec ((8 | 16 | 32 | 64 | 128) as size) -> - HashVector.set vc i (Exp.of_var (Var.NonDet (i, Ast.Size.of_bits size))) - | Ty_BitVec _ | Ty_Bool | Ty_Enum _ | Ty_Array (_, _) -> - debug "Unimplemented: ignoring non-det variable %i of type %t" i - (Pp.top Isla.pp_ty ty) - with OfIslaError -> warn "not setting nondet:%d" i + ( match HashVector.get_opt segments_map i with + | Some (name, size) -> + let ty_match = match ty with + | Ty_BitVec sz -> size = sz + | _ -> false + in + if not ty_match then fatal "Variable type doesn't match instruction segment %t and %d" (Pp.top Isla.pp_ty ty) size + else + HashVector.set vc i (Exp.of_var (Var.Segment (name, size))) + | None -> + try + match ty with + | Ty_BitVec ((8 | 16 | 32 | 64 | 128) as size) -> + HashVector.set vc i (Exp.of_var (Var.NonDet (i, Ast.Size.of_bits size))) + | Ty_BitVec _ | Ty_Bool | Ty_Enum _ | Ty_Array (_, _) -> + debug "Unimplemented: ignoring non-det variable %i of type %t" i + (Pp.top Isla.pp_ty ty) + with OfIslaError -> warn "not setting nondet:%d" i ); [] | Smt (DefineConst (i, e), _) -> @@ -320,13 +337,16 @@ let events_of_isla ~written_registers ~read_counter ~(vc : value_context) : | AbstractCall _ -> [] | AbstractPrimop _ -> [] - (* TODO segments *) (** Top level function to convert an isla trace to one of this module *) -let of_isla (_segments: Isla.segment list) (Trace events : Isla.rtrc) : t = +let of_isla (segments: Isla.segment list) (Trace events : Isla.rtrc) : t = let written_registers = Hashtbl.create 10 in let read_counter = Counter.make 0 in let vc = HashVector.empty () in - List.concat_map (events_of_isla ~written_registers ~read_counter ~vc) events + let segments_map = HashVector.empty () in + List.iter (fun (Isla.Segment (name, size, var)) -> + HashVector.set segments_map var (name, size) + ) segments; + List.concat_map (events_of_isla ~segments_map ~written_registers ~read_counter ~vc) events (*****************************************************************************) (*****************************************************************************) @@ -346,7 +366,7 @@ let declare_non_det serv events = iter_var (function | Register _ | Read _ -> () - | NonDet _ as var -> + | NonDet _ | Segment _ as var -> if not @@ VarTbl.mem declared @@ var then begin Z3Tr.declare_var_always serv var; VarTbl.add declared var () diff --git a/src/trace/context.ml b/src/trace/context.ml index bfbce294..fb7a54a4 100644 --- a/src/trace/context.ml +++ b/src/trace/context.ml @@ -68,13 +68,7 @@ let expand_var ~(ctxt : t) (v : Base.Var.t) (a : Ast.no Ast.ty) : State.exp = match v with | Register reg -> State.get_reg_exp ctxt.state reg | NonDet (i, _) | Read (i, _) -> (HashVector.get ctxt.mem_reads i).exp (* TODO is the NonDet case correct *) - -let map_var ~(ctxt : t) (v : Base.Var.t) (a : Ast.no Ast.ty) : State.var = - assert (Base.Var.ty v = a); - match v with - | Register reg -> State.Var.Register (ctxt.state.id, reg) - | NonDet (i, size) -> State.Var.NonDet (i, size) - | Read (i, size) -> State.Var.ReadVar (ctxt.state.id, i, size) + | Segment (_name, size) -> Exp.Typed.bits (BitVec.zero ~size) (* TODO put the actual value there *) (** Tell if typing should enabled with this context *) let typing_enabled ~(ctxt : t) = ctxt.dwarf <> None diff --git a/src/trace/instr.ml b/src/trace/instr.ml index 4f0b54b9..25d1a9e5 100644 --- a/src/trace/instr.ml +++ b/src/trace/instr.ml @@ -81,7 +81,7 @@ let trace_meta_of_trace trace = let jump = ref None in let process_var = function | Base.Var.Register reg -> read := reg :: !read - | Base.Var.(Read _ | NonDet _) -> () + | Base.Var.(Read _ | NonDet _ | Segment _) -> () in let process_exp : Base.exp -> unit = Ast.Manip.exp_iter_var process_var in let process_event : Base.event -> unit = function diff --git a/src/trace/typer.ml b/src/trace/typer.ml index cf04c5af..238a17ef 100644 --- a/src/trace/typer.ml +++ b/src/trace/typer.ml @@ -203,6 +203,7 @@ let rec expr ~ctxt (exp : Base.exp) : Ctype.t option = | Var (Register reg, _) -> State.get_reg ctxt.state reg |> State.Tval.ctyp | Var (Read (r, _), _) -> HashVector.get ctxt.mem_reads r |> State.Tval.ctyp | Var (NonDet _, _) -> None + | Var (Segment _, _) -> None (* TODO? *) | Bits (bv, _) -> let size = BitVec.size bv in if size mod 8 = 0 || size = Arch.address_size then From 36f77417fbb3113ed4a056ceee4b2f36d66006d3 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Fri, 10 Jan 2025 15:20:45 +0100 Subject: [PATCH 19/89] Store segments with instruction --- src/elf/relocations.ml | 39 +++++++++++++++++++++++++++++++++++---- src/isla/server.ml | 15 +++++++++++++-- src/run/runner.ml | 4 ++-- src/trace/cache.ml | 5 ++++- src/trace/instr.ml | 27 ++++++++++++++++++++++----- 5 files changed, 76 insertions(+), 14 deletions(-) diff --git a/src/elf/relocations.ml b/src/elf/relocations.ml index b129d48f..b253f100 100644 --- a/src/elf/relocations.ml +++ b/src/elf/relocations.ml @@ -3,19 +3,39 @@ module IMap = Map.Make (Int) type target = AArch64 of Abi_aarch64_symbolic_relocation.aarch64_relocation_target +type binary_operation = Elf_symbolic.binary_operation + +type unary_operation = Elf_symbolic.unary_operation + +type exp = +| Section of string +| Const of int +| BinOp of (exp * binary_operation * exp) +| UnOp of (unary_operation * exp) +(* | AssertRange of (exp * int * int) *) +| Mask of (exp * int * int) + type rel = { target : target; - value : Elf_symbolic.symbolic_expression; + value : exp; } type t = rel IMap.t type linksem_t = LinksemRelocatable.rels +let rec exp_of_linksem = function +| Elf_symbolic.Section s -> Section s +| Elf_symbolic.Const x -> Const (Z.to_int x) +| Elf_symbolic.BinOp (x, op, y) -> BinOp (exp_of_linksem x, op, exp_of_linksem y) +| Elf_symbolic.UnOp (op, x) -> UnOp (op, exp_of_linksem x) +| Elf_symbolic.AssertRange (x, _, _) -> exp_of_linksem x (* TODO *) +| Elf_symbolic.Mask (x, a, b) -> Mask (exp_of_linksem x, Z.to_int a, Z.to_int b) + let of_linksem: linksem_t -> t = function | LinksemRelocatable.AArch64 relocs -> let add k Elf_symbolic.{ arel_value; arel_target } m = - IMap.add (Z.to_int k) { value = arel_value; target = AArch64 arel_target } m + IMap.add (Z.to_int k) { value = exp_of_linksem arel_value; target = AArch64 arel_target } m in Pmap.fold add relocs IMap.empty @@ -25,6 +45,17 @@ let sub rels off len = |> List.filter_map (fun (pos, rel) -> if off <= pos && pos < off + len then Some (pos-off, rel) else None) |> IMap.of_list +let rec pp_exp = Pp.( + function + | Section s -> !^s + | Const x -> int x + | BinOp (a, Add, b) -> !^"(" ^^ pp_exp a ^^ !^"+" ^^ pp_exp b ^^ !^")" + | BinOp (a, Sub, b) -> !^"(" ^^ pp_exp a ^^ !^"-" ^^ pp_exp b ^^ !^")" + | BinOp (a, And, b) -> !^"(" ^^ pp_exp a ^^ !^"&" ^^ pp_exp b ^^ !^")" + | UnOp (Not, b) -> !^"(" ^^ !^"~" ^^ pp_exp b ^^ !^")" + | Mask (x, a, b) -> pp_exp x ^^ !^"[" ^^ int a ^^ !^":" ^^ int b ^^ !^"]" +) + let pp_rel rel = let target = match rel.target with | AArch64 Abi_aarch64_symbolic_relocation.Data640 -> "Data64" @@ -34,8 +65,8 @@ let pp_rel rel = | AArch64 Abi_aarch64_symbolic_relocation.CALL -> "CALL" | AArch64 Abi_aarch64_symbolic_relocation.LDST -> "LDST" in - let expr = Elf_symbolic.pp_sym_expr rel.value in - Pp.(!^target ^^ !^": " ^^ !^expr) + let expr = pp_exp rel.value in + Pp.(!^target ^^ !^": " ^^ expr) let pp rels = if IMap.is_empty rels then diff --git a/src/isla/server.ml b/src/isla/server.ml index 82bd8603..a4fa0bb4 100644 --- a/src/isla/server.ml +++ b/src/isla/server.ml @@ -81,6 +81,8 @@ let reloc_id: reloc option -> int = function type opcode = BytesSeq.t * reloc option +type reloc_segment = string * (int * int) (* mapping the name of a segment to the range of the relocation value *) + (** Bump when updating isla. TODO: move the version checking to allow a range of version. Also, right now the cache invalidation is based on @@ -223,8 +225,8 @@ let pp_interpreted_opcode (b, r) = let bits = BytesSeq.getbvle ~size:32 b 0 in Pp.( match rtype with - | Abi_aarch64_symbolic_relocation.Data640 -> Raise.fail "64bit relocation not allowed for instruction" - | Abi_aarch64_symbolic_relocation.Data320 -> !^"x:32" + | Abi_aarch64_symbolic_relocation.Data640 -> fatal "Data64 relocation not allowed for instruction" + | Abi_aarch64_symbolic_relocation.Data320 -> fatal "Data32 relocation not allowed for instruction" | Abi_aarch64_symbolic_relocation.ADRP -> BitVec.pp_smt (BitVec.extract 31 31 bits) ^^ !^" x0:2 " ^^ @@ -244,6 +246,15 @@ let pp_interpreted_opcode (b, r) = ^^ !^" x0:26 " ) +(* for interpreting the segments *) +let segments_of_reloc: reloc -> reloc_segment list = function +| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data640 -> fatal "invalid relocation for instructions (Data64)" +| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data320 -> fatal "invalid relocation for instructions (Data32)" +| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADRP -> ["x0", (0, 1); "x1", (2, 20)] (* or absolute? ["x0", (12, 13); "x1", (14, 32)] *) +| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADD -> ["x0", (0, 11)] +| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.LDST -> ["x0", (0, 9)] (* TODO depends on load size *) (* or absolute? ["x0", (2, 11)] *) +| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CALL -> ["x0", (0, 25)] (* or absolute? ["x0", (2, 27)] *) + (** Convert a request into the string message expected by isla-client This should match the protocol *) let string_of_request = function diff --git a/src/run/runner.ml b/src/run/runner.ml index dfc4ba73..d21193d3 100644 --- a/src/run/runner.ml +++ b/src/run/runner.ml @@ -107,7 +107,7 @@ let load_sym runner (sym : Elf.Symbol.t) = debug "Relocation at address %t: %t" (Pp.top Elf.Address.pp addr) (Pp.top Elf.Relocations.pp relocations); try let reloc = Elf.Relocations.IMap.find_opt 0 relocations in - let instr = Trace.Cache.get_instr (code, Option.map (fun (x : Elf.Relocations.rel) -> x.target) reloc) in + let instr = Trace.Cache.get_instr (code, reloc) in if instr.traces = [] then begin debug "Instruction at %t in %s is loaded as special" (Pp.top Elf.Address.pp addr) sym.name; Hashtbl.add runner.instrs addr (Special instr_len) @@ -193,7 +193,7 @@ let skip runner state : State.t list = let pc = pc_exp |> Ast.expect_bits |> BitVec.to_int in let pc = Elf.Address.{ section = ".text"; offset = pc } in (* TODO this is wrong, should get symbolic value from pc_exp *) match fetch runner pc with - | Normal { traces = _; read = _; written = _; length; opcode = _ } + | Normal { traces = _; read = _; written = _; length; opcode = _; segments = _ } |Special length |IslaFail length -> let state = State.copy_if_locked state in diff --git a/src/trace/cache.ml b/src/trace/cache.ml index cd5e611b..7cb5625d 100644 --- a/src/trace/cache.ml +++ b/src/trace/cache.ml @@ -171,4 +171,7 @@ let get_traces (opcode : Isla.Server.opcode) : Base.t list = (** Get a full blown {!Instr} from the opcode, going through the whole Isla pipeline if necessary.*) -let get_instr (opcode : Isla.Server.opcode) : Instr.t = Instr.of_traces opcode @@ get_traces opcode +let get_instr (opcode : BytesSeq.t * Elf.Relocations.rel option) : Instr.t = + let raw_opcode, reloc = opcode in + let reloc_target = Option.map (fun (x : Elf.Relocations.rel) -> x.target) reloc in + Instr.of_traces opcode @@ get_traces (raw_opcode, reloc_target) diff --git a/src/trace/instr.ml b/src/trace/instr.ml index 25d1a9e5..012273a5 100644 --- a/src/trace/instr.ml +++ b/src/trace/instr.ml @@ -60,13 +60,16 @@ type trace_meta = { written : Reg.t list; } +module SMap = Map.Make (String) + (** A full instruction representation *) type t = { traces : trace_meta list; length : int; (** Bytes length *) read : Reg.t list; written : Reg.t list; - opcode : Isla.Server.opcode; + opcode : BytesSeq.t; + segments: Elf.Relocations.exp SMap.t } let dedup_regs = List.sort_uniq State.Reg.compare @@ -99,17 +102,31 @@ let trace_meta_of_trace trace = { trace; jump_target = !jump; read = dedup_regs !read; written = dedup_regs !written } (** Generate full instruction data from a list of traces *) -let of_traces opcode traces = +let of_traces (opcode: BytesSeq.t * Elf.Relocations.rel option) traces = let traces = List.map trace_meta_of_trace traces in - let raw_opcode, _ = opcode in - let length = BytesSeq.length raw_opcode in + let opcode, reloc = opcode in + let length = BytesSeq.length opcode in let read = dedup_regs @@ List.concat_map (fun (tr : trace_meta) -> tr.read) traces in let written = dedup_regs @@ List.concat_map (fun (tr : trace_meta) -> tr.written) traces in - { traces; length; read; written; opcode } + + let segments = match reloc with + | None -> SMap.empty + | Some reloc -> + reloc.target + |> Isla.Server.segments_of_reloc + |> List.map (fun (v, (lo, hi)) -> (v, Elf.Relocations.Mask(reloc.value, hi, lo))) + |> SMap.of_list + in + { traces; length; read; written; opcode; segments } (** Pretty print the representation of an instruction *) let pp instr = let open Pp in + (prefix 4 1 !^"Segments:" @@ + separate_map hardline + (pair string Elf.Relocations.pp_exp) + (SMap.to_list instr.segments)) + ^^ hardline ^^ separate_mapi hardline (fun i trc -> prefix 4 1 (dprintf "Trace %d:" i) (Base.pp trc.trace)) instr.traces From 90b3ac8106d8819cea250fa74bfab1ffc7db052b Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Fri, 10 Jan 2025 16:24:28 +0100 Subject: [PATCH 20/89] WIP eval relocations --- src/run/runner.ml | 5 +++-- src/trace/context.ml | 20 +++++++++++++++++--- src/trace/run.ml | 12 ++++++------ 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/src/run/runner.ml b/src/run/runner.ml index d21193d3..a557708e 100644 --- a/src/run/runner.ml +++ b/src/run/runner.ml @@ -165,12 +165,13 @@ let fetch (runner : t) (pc : Elf.Address.t) : slot = let execute_normal ?(prelock = ignore) ~pc runner (instr : Trace.Instr.t) state = let dwarf = runner.dwarf in let next = instr.length in + let segments_map = instr.segments in let run_pure () = List.map (fun (trc : Trace.Instr.trace_meta) -> let nstate = State.copy state in State.set_last_pc nstate pc; - Trace.Run.trace_pc_mut ?dwarf ~next nstate trc.trace; + Trace.Run.trace_pc_mut ?dwarf ~segments_map ~next nstate trc.trace; nstate) instr.traces in @@ -178,7 +179,7 @@ let execute_normal ?(prelock = ignore) ~pc runner (instr : Trace.Instr.t) state match instr.traces with | [trc] -> State.set_last_pc state pc; - Trace.Run.trace_pc_mut ?dwarf ~next state trc.trace; + Trace.Run.trace_pc_mut ?dwarf ~segments_map ~next state trc.trace; [state] | _ -> prelock state; diff --git a/src/trace/context.ml b/src/trace/context.ml index fb7a54a4..01da1e50 100644 --- a/src/trace/context.ml +++ b/src/trace/context.ml @@ -48,19 +48,33 @@ should be added here *) +module SMap = Map.Make (String) + (** The context to run a trace *) type t = { reg_writes : (State.Reg.t * State.tval) Vec.t; (** Stores the delayed register writes *) mem_reads : State.tval HashVector.t; (** Stores the result of memory reads *) state : State.t; + segments : State.exp SMap.t; dwarf : Dw.t option; (** Optionally DWARF information. If present, typing is enabled *) } +let rec exp_of_relocation: Elf.Relocations.exp -> State.exp = + let f = exp_of_relocation in function + | Section _ -> Exp.Typed.bits (BitVec.zero ~size:64) (* TODO put the actual value there, size? *) + | Const x -> Exp.Typed.bits (BitVec.of_int x ~size:64) (* TODO size? *) + | BinOp (a, Add, b) -> Exp.Typed.(f a + f b) + | BinOp (a, Sub, b) -> Exp.Typed.(f a - f b) + | BinOp (a, And, b) -> Exp.Typed.manyop (AstGen.Ott.Bvmanyarith AstGen.Ott.Bvand) [f a; f b] + | UnOp (Not, b) -> Exp.Typed.unop AstGen.Ott.Bvnot (f b) + | Mask (x, last, first) -> Exp.Typed.extract ~last ~first (f x) + (** Build a {!context} from a state *) -let make_context ?dwarf state = +let make_context ?dwarf ?segments_map state = let reg_writes = Vec.empty () in let mem_reads = HashVector.empty () in - { state; reg_writes; mem_reads; dwarf } + let segments = segments_map |> Option.value ~default:SMap.empty |> SMap.map exp_of_relocation in + { state; reg_writes; mem_reads; dwarf; segments } (** Expand a Trace variable to a State expression, using the context *) let expand_var ~(ctxt : t) (v : Base.Var.t) (a : Ast.no Ast.ty) : State.exp = @@ -68,7 +82,7 @@ let expand_var ~(ctxt : t) (v : Base.Var.t) (a : Ast.no Ast.ty) : State.exp = match v with | Register reg -> State.get_reg_exp ctxt.state reg | NonDet (i, _) | Read (i, _) -> (HashVector.get ctxt.mem_reads i).exp (* TODO is the NonDet case correct *) - | Segment (_name, size) -> Exp.Typed.bits (BitVec.zero ~size) (* TODO put the actual value there *) + | Segment (name, _) -> SMap.find name ctxt.segments (* TODO put the actual value there *) (** Tell if typing should enabled with this context *) let typing_enabled ~(ctxt : t) = ctxt.dwarf <> None diff --git a/src/trace/run.ml b/src/trace/run.ml index f6eaf1c1..df944ca7 100644 --- a/src/trace/run.ml +++ b/src/trace/run.ml @@ -114,17 +114,17 @@ let event_mut ~(ctxt : ctxt) (event : Base.event) = | Assert exp -> State.push_assert ctxt.state (expand ~ctxt exp) (** Run a trace on the provided state by mutation. Enable typing if [dwarf] is provided *) -let trace_mut ?dwarf (state : State.t) (events : Base.t) : unit = +let trace_mut ?dwarf ?segments_map (state : State.t) (events : Base.t) : unit = assert (not @@ State.is_locked state); info "Running trace with typing %s" (if dwarf <> None then "on" else "off"); - let ctxt = Context.make_context ?dwarf state in + let ctxt = Context.make_context ?dwarf ?segments_map state in List.iter (event_mut ~ctxt) events; Vec.iter (fun (reg, tval) -> State.Reg.Map.set state.regs reg tval) ctxt.reg_writes (** Run a trace on the provided state by returning an updated copy.*) -let trace ?dwarf (start : State.t) (events : Base.t) : State.t = +let trace ?dwarf ?segments_map (start : State.t) (events : Base.t) : State.t = let state = State.copy start in - trace_mut ?dwarf state events; + trace_mut ?dwarf ?segments_map state events; State.lock state; state @@ -133,12 +133,12 @@ let trace ?dwarf (start : State.t) (events : Base.t) : State.t = Thus this function automatically handle moving the PC for fall-through instruction *) -let trace_pc_mut ?dwarf ~(next : int) (state : State.t) (events : Base.t) : unit = +let trace_pc_mut ?dwarf ?segments_map ~(next : int) (state : State.t) (events : Base.t) : unit = let pc = Arch.pc () in let rec is_touching_pc : Base.t -> bool = function | [] -> false | WriteReg { reg; _ } :: _ when reg = pc -> true | _ :: l -> is_touching_pc l in - trace_mut ?dwarf state events; + trace_mut ?dwarf ?segments_map state events; if is_touching_pc events then State.concretize_pc ~pc state else State.bump_pc ~pc state next From 34824b143fe367f9bdeaf3faf30f29f7e597eb64 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sun, 12 Jan 2025 12:14:57 +0100 Subject: [PATCH 21/89] Symbolic section addresses --- src/state/base.ml | 6 ++++++ src/state/base.mli | 2 ++ src/trace/context.ml | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/state/base.ml b/src/state/base.ml index 4cda529c..5b054155 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -76,6 +76,8 @@ module Var = struct | NonDet of int * Ast.Size.t (** Variable representing non-determinism in the spec. Can only be bit-vectors of size {8, 16, 32, 64} for now. *) + | Section of string + (** Symbolic base address of ELF section. Assume 64bit for now. *) let to_string = function | Register (state, reg) -> @@ -90,6 +92,7 @@ module Var = struct | NonDet (num, size) -> if size = Ast.Size.B64 then Printf.sprintf "nondet:%i" num else Printf.sprintf "nondet:%i:%dbits" num (Ast.Size.to_bits size) + | Section s -> "section:"^s let expect_register = function | Register (_, reg) -> reg @@ -125,6 +128,7 @@ module Var = struct | ["arg"; num] -> Arg (int_of_string num) | ["retarg"; ""] -> RetArg | ["retaddr"; ""] -> RetAddr + | ["section"; s] -> Section s | _ -> Raise.inv_arg "Invalid state variable: %s" s let of_reg id reg = Register (id, reg) @@ -138,6 +142,7 @@ module Var = struct | (RetArg, RetArg) -> true | (RetAddr, RetAddr) -> true | (NonDet (num, size), NonDet (num', size')) -> num = num' && size = size' + | (Section s, Section s') -> s = s' | _ -> false let hash = Hashtbl.hash @@ -153,6 +158,7 @@ module Var = struct | RetArg -> Ast.Ty_BitVec 64 | RetAddr -> Ast.Ty_BitVec 64 | NonDet (_, size) -> Ast.Ty_BitVec (Ast.Size.to_bits size) + | Section _ -> Ast.Ty_BitVec 64 end type var = Var.t diff --git a/src/state/base.mli b/src/state/base.mli index 05669eb6..b65853c3 100644 --- a/src/state/base.mli +++ b/src/state/base.mli @@ -113,6 +113,8 @@ module Var : sig | RetAddr (** The return address: The address to which a "return" instruction would jump. *) | NonDet of int * Ast.Size.t (** Variable representing non-determinism in the spec. Can only be a bit-vector for now. *) + | Section of string + (** Symbolic base address of ELF section. Assume 64bit for now. *) (** Convert the variable to the string encoding. For parsing infrastructure reason, the encoding must always contain at least one [:]. *) diff --git a/src/trace/context.ml b/src/trace/context.ml index 01da1e50..d3cbd143 100644 --- a/src/trace/context.ml +++ b/src/trace/context.ml @@ -61,7 +61,7 @@ type t = { let rec exp_of_relocation: Elf.Relocations.exp -> State.exp = let f = exp_of_relocation in function - | Section _ -> Exp.Typed.bits (BitVec.zero ~size:64) (* TODO put the actual value there, size? *) + | Section s -> State.Exp.of_var (State.Var.Section s) (* TODO put the actual value there, size? *) | Const x -> Exp.Typed.bits (BitVec.of_int x ~size:64) (* TODO size? *) | BinOp (a, Add, b) -> Exp.Typed.(f a + f b) | BinOp (a, Sub, b) -> Exp.Typed.(f a - f b) From 897e638fb6c98542d8f471dbb3c656983cc6d05c Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sun, 12 Jan 2025 12:41:23 +0100 Subject: [PATCH 22/89] Fix cache --- src/elf/relocations.ml | 19 +++++++++---------- src/isla/cache.ml | 9 +++++---- src/isla/server.ml | 10 ++++++++++ 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/elf/relocations.ml b/src/elf/relocations.ml index b253f100..8bf736ce 100644 --- a/src/elf/relocations.ml +++ b/src/elf/relocations.ml @@ -56,17 +56,16 @@ let rec pp_exp = Pp.( | Mask (x, a, b) -> pp_exp x ^^ !^"[" ^^ int a ^^ !^":" ^^ int b ^^ !^"]" ) +let pp_target = Pp.(function +| AArch64 Abi_aarch64_symbolic_relocation.Data640 -> !^"Data64" +| AArch64 Abi_aarch64_symbolic_relocation.Data320 -> !^"Data32" +| AArch64 Abi_aarch64_symbolic_relocation.ADD -> !^"ADD" +| AArch64 Abi_aarch64_symbolic_relocation.ADRP -> !^"ADRP" +| AArch64 Abi_aarch64_symbolic_relocation.CALL -> !^"CALL" +| AArch64 Abi_aarch64_symbolic_relocation.LDST -> !^"LDST") + let pp_rel rel = - let target = match rel.target with - | AArch64 Abi_aarch64_symbolic_relocation.Data640 -> "Data64" - | AArch64 Abi_aarch64_symbolic_relocation.Data320 -> "Data32" - | AArch64 Abi_aarch64_symbolic_relocation.ADD -> "ADD" - | AArch64 Abi_aarch64_symbolic_relocation.ADRP -> "ADRP" - | AArch64 Abi_aarch64_symbolic_relocation.CALL -> "CALL" - | AArch64 Abi_aarch64_symbolic_relocation.LDST -> "LDST" - in - let expr = pp_exp rel.value in - Pp.(!^target ^^ !^": " ^^ expr) + Pp.(pp_target rel.target ^^ !^": " ^^ pp_exp rel.value) let pp rels = if IMap.is_empty rels then diff --git a/src/isla/cache.ml b/src/isla/cache.ml index a44cc8f3..98d7dfa2 100644 --- a/src/isla/cache.ml +++ b/src/isla/cache.ml @@ -86,7 +86,7 @@ module Opcode (*: Cache.Key *) = struct | _ -> false let small_enough bs rel_id = - BytesSeq.length bs < BytesSeq.int_bytes && rel_id < 16 + BytesSeq.length bs < BytesSeq.int_bytes && rel_id < 8 let hash = function | None -> 0 @@ -97,7 +97,7 @@ module Opcode (*: Cache.Key *) = struct if small_enough bs rel_id then begin assert (not @@ IntBits.get i IntBits.back); let res = IntBits.blit l 0 i (IntBits.back - 3) 3 in - let res = IntBits.blit rel_id 0 res (IntBits.back - 7) 4 in + let res = IntBits.blit rel_id 0 res (IntBits.back - 6) 3 in res end else IntBits.set i IntBits.back @@ -115,11 +115,12 @@ module Opcode (*: Cache.Key *) = struct else if IntBits.get hash IntBits.back then Raise.todo() else - let data = IntBits.sub hash 0 (IntBits.back - 3) in + let data = IntBits.sub hash 0 (IntBits.back - 6) in + let reloc_id = IntBits.sub hash (IntBits.back - 6) 3 in let size = IntBits.sub hash (IntBits.back - 3) 3 in let b = Bytes.create size in Bits.unsafe_blit_of_int data 0 b 0 (size * 8); - Some (BytesSeq.of_bytes b, None) + Some (BytesSeq.of_bytes b, Server.reloc_of_id reloc_id) end (** Representation of trace lists on disk. diff --git a/src/isla/server.ml b/src/isla/server.ml index a4fa0bb4..b2ba008b 100644 --- a/src/isla/server.ml +++ b/src/isla/server.ml @@ -79,6 +79,16 @@ let reloc_id: reloc option -> int = function | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.LDST) -> 5 | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CALL) -> 6 +let reloc_of_id: int -> reloc option = function +| 0 -> None +| 1 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data640) +| 2 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data320) +| 3 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADRP) +| 4 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADD) +| 5 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.LDST) +| 6 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CALL) +| _ -> Raise.fail "invalid reloc id" + type opcode = BytesSeq.t * reloc option type reloc_segment = string * (int * int) (* mapping the name of a segment to the range of the relocation value *) From 88cb3402a5b97b7e8f11d472ce67266016a9b4fd Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Tue, 14 Jan 2025 15:32:11 +0100 Subject: [PATCH 23/89] Make PC symbolic in isla --- src/config/config.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config/config.toml b/src/config/config.toml index 2497ed77..9c968490 100644 --- a/src/config/config.toml +++ b/src/config/config.toml @@ -66,7 +66,7 @@ toolchain = "aarch64-linux-gnu" arch-file = "../../aarch64.ir" # relative to the toml file arch-toml = "isla_aarch64.toml" # relative to the toml file linearize = ["ConditionHolds", "integer_conditional_select", "InterruptPending"] - other-opts = [] + other-opts = ["-R", "_PC=undefined:%bv64"] [archs.riscv64] toolchain = "riscv64-linux-gnu" From ebf0b274049993f5dab4423225507a6182d44cb2 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Tue, 14 Jan 2025 18:23:24 +0100 Subject: [PATCH 24/89] Symbolic PC --- src/elf/address.ml | 16 +++++++++++++++- src/run/block_lib.ml | 30 +++++++++++++----------------- src/run/runner.ml | 6 ++---- src/state/base.ml | 22 +++++++++++++++++----- src/state/base.mli | 2 ++ 5 files changed, 49 insertions(+), 27 deletions(-) diff --git a/src/elf/address.ml b/src/elf/address.ml index c9f3b66b..701ddd89 100644 --- a/src/elf/address.ml +++ b/src/elf/address.ml @@ -7,4 +7,18 @@ let pp addr = Pp.(!^(addr.section) ^^ !^"+" ^^ ptr addr.offset) let of_linksem (section, offset) = { section; offset = Z.to_int offset } -let (+) addr offset = { section = addr.section; offset = addr.offset + offset } \ No newline at end of file +let (+) addr offset = { section = addr.section; offset = addr.offset + offset } + +let compare f {section=s1; offset=o1} {section=s2; offset=o2} = + if s1 = s2 then + Some (f o1 o2) + else + None + +let (<) = compare (<) + +let (>) = compare (>) + +let (<=) = compare (<=) + +let (>=) = compare (>=) diff --git a/src/run/block_lib.ml b/src/run/block_lib.ml index ddb6c14d..2d548c75 100644 --- a/src/run/block_lib.ml +++ b/src/run/block_lib.ml @@ -107,7 +107,7 @@ let run ?(every_instruction = false) ?relevant (b : t) (start : State.t) : label State.lock state end; let states = - let pc = pc_exp |> Ast.expect_bits |> BitVec.to_int in + let pc = State.Exp.expect_sym_address pc_exp in if Option.fold ~none:true ~some:(Fun.flip Hashtbl.mem pc) relevant then ( info "Running pc %t" (Pp.top State.Exp.pp pc_exp); Runner.run ~prelock b.runner state @@ -148,12 +148,6 @@ let run ?(every_instruction = false) ?relevant (b : t) (start : State.t) : label - pc has be seen more than [loop] *) let gen_endpred ?min ?max ?loop ?(brks = []) () : State.exp -> string option = - (* HACK *) - (* TODO rewrite for symbolic pc *) - let min = Option.map (fun min -> min.Elf.Address.offset) min in - let max = Option.map (fun max -> max.Elf.Address.offset) max in - let brks = List.map (fun brks -> brks.Elf.Address.offset) brks in - (* *) let endnow fmt = Printf.ksprintf Option.some fmt in let pchtbl = Hashtbl.create 10 in let loop_str = @@ -164,21 +158,23 @@ let gen_endpred ?min ?max ?loop ?(brks = []) () : State.exp -> string option = | Some n -> Printf.sprintf "%d times" n | None -> "" in - function - | Ast.Bits (bv, _) -> ( - let pc = BitVec.to_int bv in - debug "enpred: Evaluating PC 0x%x" pc; + fun pc_exp -> + ( try + Some (State.Exp.expect_sym_address pc_exp) + with + _ -> None + ) |> Option.map (fun pc -> + debug "enpred: Evaluating PC %t" (Pp.top Elf.Address.pp pc); match (min, max, loop) with - | (Some min, _, _) when pc < min -> endnow "PC 0x%x was below min 0x%x" pc min - | (_, Some max, _) when pc >= max -> endnow "PC 0x%x was above max 0x%x" pc max - | _ when List.exists (( = ) pc) brks -> endnow "PC 0x%x hit a breakpoint" pc + | (Some min, _, _) when Elf.Address.(pc < min) <> Some false -> endnow "PC %t was below min %t" Pp.(tos Elf.Address.pp pc) Pp.(tos Elf.Address.pp min) + | (_, Some max, _) when Elf.Address.(pc >= max) <> Some false -> endnow "PC %t was above max %t" Pp.(tos Elf.Address.pp pc) Pp.(tos Elf.Address.pp max) + | _ when List.exists (( = ) pc) brks -> endnow "PC %t hit a breakpoint" Pp.(tos Elf.Address.pp pc) | (_, _, Some loop) -> let current_num = Hashtbl.find_opt pchtbl pc |> Option.value ~default:0 in - if current_num >= loop then endnow "PC 0x%x had been seen more than %s" pc loop_str + if current_num >= loop then endnow "PC %t had been seen more than %s" Pp.(tos Elf.Address.pp pc) loop_str else begin Hashtbl.replace pchtbl pc (current_num + 1); None end | _ -> None - ) - | exp -> endnow "PC %t is symbolic" Pp.(tos State.Exp.pp exp) + ) |> Option.value_fun ~default:(fun () -> endnow "PC %t is symbolic" Pp.(tos State.Exp.pp pc_exp)) diff --git a/src/run/runner.ml b/src/run/runner.ml index a557708e..fda103e4 100644 --- a/src/run/runner.ml +++ b/src/run/runner.ml @@ -191,8 +191,7 @@ let execute_normal ?(prelock = ignore) ~pc runner (instr : Trace.Instr.t) state let skip runner state : State.t list = let pc_exp = State.get_reg_exp state runner.pc in try - let pc = pc_exp |> Ast.expect_bits |> BitVec.to_int in - let pc = Elf.Address.{ section = ".text"; offset = pc } in (* TODO this is wrong, should get symbolic value from pc_exp *) + let pc = State.Exp.expect_sym_address pc_exp in match fetch runner pc with | Normal { traces = _; read = _; written = _; length; opcode = _; segments = _ } |Special length @@ -224,8 +223,7 @@ let skip runner state : State.t list = let run ?prelock runner state : State.t list = let pc_exp = State.get_reg_exp state runner.pc in try - let pc = pc_exp |> Ast.expect_bits |> BitVec.to_int in - let pc = Elf.Address.{ section = ".text"; offset = pc } in (* TODO this is wrong, should get symbolic value from pc_exp *) + let pc = State.Exp.expect_sym_address pc_exp in match fetch runner pc with | Normal instr -> execute_normal ?prelock ~pc runner instr state | Special _ -> diff --git a/src/state/base.ml b/src/state/base.ml index 5b054155..489bc5fc 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -172,6 +172,15 @@ module Exp = struct include Exp.Make (Var) let of_reg id reg = Var.of_reg id reg |> of_var + + let expect_sym_address exp = + let sym, conc = Exp.Sums.split_concrete exp in + let section = match sym with + | Some(Ast.Var (Var.Section s, _)) -> s + | _ -> Raise.fail "Expected symbolic Section base" + in + let offset = BitVec.to_int conc in + Elf.Address.{ section; offset } end type exp = Exp.t @@ -507,16 +516,19 @@ let set_pc ~(pc : Reg.t) (s : t) (pcval : int) = (* TODO *) let set_pc_sym ~(pc : Reg.t) (s : t) (pcval : Elf.Address.t) = - set_pc ~pc s pcval.offset + (* set_pc ~pc s pcval.offset *) + let exp = Typed.(var ~typ:(Ty_BitVec 64) (Var.Section pcval.section) + bits_int ~size:64 pcval.offset) in + let ctyp = Ctype.of_frag (Ctype.Global ".text") ~offset:pcval.offset ~constexpr:true in + set_reg s pc @@ Tval.make ~ctyp exp let bump_pc ~(pc : Reg.t) (s : t) (bump : int) = let pc_exp = get_reg_exp s pc in - assert (ConcreteEval.is_concrete pc_exp); - let old_pc = ConcreteEval.eval pc_exp |> Value.expect_bv |> BitVec.to_int in - let new_pc = old_pc + bump in - set_pc ~pc s new_pc + let old_pc = Exp.expect_sym_address pc_exp in + let new_pc = Elf.Address.(old_pc + bump) in + set_pc_sym ~pc s new_pc +(* TODO section + offset *) let concretize_pc ~(pc : Reg.t) (s : t) = let pc_exp = get_reg_exp s pc in try ConcreteEval.eval pc_exp |> Value.expect_bv |> BitVec.to_int |> set_pc ~pc s diff --git a/src/state/base.mli b/src/state/base.mli index b65853c3..bcba6c5f 100644 --- a/src/state/base.mli +++ b/src/state/base.mli @@ -158,6 +158,8 @@ module Exp : sig (** Create an expression from an register and a state id *) val of_reg : id -> Reg.t -> t + + val expect_sym_address : t -> Elf.Address.t end type exp = Exp.t From dde2e988439d0f7e2c5e4eca56c2a4619d67ebf0 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Wed, 15 Jan 2025 16:47:09 +0100 Subject: [PATCH 25/89] WIP --- src/exp/sums.ml | 20 ++++++++++++++++++++ src/state/base.ml | 5 +++++ src/trace/context.ml | 3 ++- src/utils/list.ml | 14 ++++++++++++++ 4 files changed, 41 insertions(+), 1 deletion(-) diff --git a/src/exp/sums.ml b/src/exp/sums.ml index 4d945032..6d843f5d 100644 --- a/src/exp/sums.ml +++ b/src/exp/sums.ml @@ -42,6 +42,10 @@ (* *) (*==================================================================================*) +open Logs.Logger (struct + let str = __MODULE__ +end) + (* The documentation is in the mli file *) let rec split = @@ -59,6 +63,20 @@ let rec split = let l' = split e' in let rl' = List.rev_map Typed.neg l' in List.rev_append rl' l + | Manyop (Concat, l, _) -> + let all_splits = List.map split l in + let defaults = List.map (fun e -> + let size = e |> Typed.get_type |> Typed.expect_bv in + Typed.bits_int ~size 0 + ) l in + let terms = List.transpose ~defaults all_splits in + List.map Typed.concat terms + | Unop (ZeroExtend m, e, _) -> + let l = split e in + List.map (Typed.unop (ZeroExtend m)) l + | Unop (SignExtend s, e, _) -> + let l = split e in + List.map (Typed.unop (SignExtend s)) l | e -> [e] let merge ~size l = if l = [] then Typed.zero ~size else Typed.sum l @@ -88,6 +106,8 @@ let smart_substract ~equal ~term exp = let split_concrete exp = let size = Typed.expect_bv (Typed.get_type exp) in let terms = split exp in + debug "Split:"; + List.iter (fun t -> debug "\t%t" Pp.(top (PpExp.pp_exp (fun _ -> !^"var")) t)) terms; let (symterms, concvals) = List.partition_map ConcreteEval.eval_if_concrete terms in let concbvs = List.map Value.expect_bv concvals in let concbv = List.fold_left BitVec.( + ) (BitVec.zero ~size) concbvs in diff --git a/src/state/base.ml b/src/state/base.ml index 489bc5fc..55606fc8 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -163,6 +163,8 @@ end type var = Var.t +module Z3St = Z3.Make (Var) + module Sums = Exp.Sums module Typed = Exp.Typed module ConcreteEval = Exp.ConcreteEval @@ -474,6 +476,9 @@ let read ~provenance ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t Option.value exp ~default:(Exp.of_var var) let read_noprov ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t = + (* let addr = Z3St.simplify_full addr in *) + let sym, conc = Sums.split_concrete addr in + debug "Address: %t + %t" Pp.(top (optional Exp.pp) sym) Pp.(top BitVec.pp_smt conc); if ConcreteEval.is_concrete addr || Vec.length s.mem.frags = 0 then read ~provenance:Ctype.Main ?ctyp s ~addr ~size else Raise.fail "Trying to access %t in state %d: No provenance info" Pp.(tos Exp.pp addr) s.id diff --git a/src/trace/context.ml b/src/trace/context.ml index d3cbd143..3ed8317d 100644 --- a/src/trace/context.ml +++ b/src/trace/context.ml @@ -82,7 +82,8 @@ let expand_var ~(ctxt : t) (v : Base.Var.t) (a : Ast.no Ast.ty) : State.exp = match v with | Register reg -> State.get_reg_exp ctxt.state reg | NonDet (i, _) | Read (i, _) -> (HashVector.get ctxt.mem_reads i).exp (* TODO is the NonDet case correct *) - | Segment (name, _) -> SMap.find name ctxt.segments (* TODO put the actual value there *) + | Segment (name, _) -> SMap.find name ctxt.segments (*TODO put the actual value there*) + (* | Segment (name, sz) -> Exp.Typed.extract ~first:0 ~last:(sz-1) (State.Exp.of_var (State.Var.Section name)) TODO put the actual value there *) (** Tell if typing should enabled with this context *) let typing_enabled ~(ctxt : t) = ctxt.dwarf <> None diff --git a/src/utils/list.ml b/src/utils/list.ml index 5eae6c30..6eb978ca 100644 --- a/src/utils/list.ml +++ b/src/utils/list.ml @@ -298,3 +298,17 @@ let prod l1 l2 = (** Monadic merge. [let* x = xl and* y = yl in ... = let* x= xl in let* y = yl in ...] *) let ( and* ) = prod + +let hd_opt = function +| [] -> None +| h :: _ -> Some h + +let rec transpose ~defaults l = + let first = map hd_opt l in + let rest = map (drop 1) l in + if for_all Stdlib.Option.is_none first then + [] + else + let t = transpose ~defaults rest in + let h = combine first defaults |> map (fun (value, default) -> Stdlib.Option.value ~default value) in + h :: t From 516f4f1d4a0e95fb7e5e92f0a0322e0585b277a8 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Wed, 15 Jan 2025 22:27:36 +0100 Subject: [PATCH 26/89] wip --- src/elf/relocations.ml | 34 +++++++++++++++------- src/isla/cache.ml | 25 ++++++++++++++-- src/isla/isla.ml | 1 + src/isla/relocation.ml | 44 ++++++++++++++++++++++++++++ src/isla/server.ml | 65 ++---------------------------------------- src/run/runner.ml | 8 +++--- src/trace/context.ml | 36 +++++++++++++++++++---- src/trace/instr.ml | 21 +++----------- src/trace/run.ml | 12 ++++---- 9 files changed, 138 insertions(+), 108 deletions(-) create mode 100644 src/isla/relocation.ml diff --git a/src/elf/relocations.ml b/src/elf/relocations.ml index 8bf736ce..e9295717 100644 --- a/src/elf/relocations.ml +++ b/src/elf/relocations.ml @@ -13,29 +13,41 @@ type exp = | BinOp of (exp * binary_operation * exp) | UnOp of (unary_operation * exp) (* | AssertRange of (exp * int * int) *) -| Mask of (exp * int * int) +(* | Mask of (exp * int * int) *) type rel = { target : target; value : exp; + range: (int64 * int64) option; + mask : int * int; } type t = rel IMap.t type linksem_t = LinksemRelocatable.rels -let rec exp_of_linksem = function -| Elf_symbolic.Section s -> Section s -| Elf_symbolic.Const x -> Const (Z.to_int x) -| Elf_symbolic.BinOp (x, op, y) -> BinOp (exp_of_linksem x, op, exp_of_linksem y) -| Elf_symbolic.UnOp (op, x) -> UnOp (op, exp_of_linksem x) -| Elf_symbolic.AssertRange (x, _, _) -> exp_of_linksem x (* TODO *) -| Elf_symbolic.Mask (x, a, b) -> Mask (exp_of_linksem x, Z.to_int a, Z.to_int b) +let exp_of_linksem = + let rec value_of_linksem = function + | Elf_symbolic.Section s -> Section s + | Elf_symbolic.Const x -> Const (Z.to_int x) + | Elf_symbolic.BinOp (x, op, y) -> BinOp (value_of_linksem x, op, value_of_linksem y) + | Elf_symbolic.UnOp (op, x) -> UnOp (op, value_of_linksem x) + | Elf_symbolic.AssertRange (_, _, _) -> Raise.fail "AssertRange should not occur in value expression" + | Elf_symbolic.Mask (_, _, _) -> Raise.fail "AssertRange should not occur in value expression" + in function + | Elf_symbolic.Mask (e, hi, lo) -> + let e, range = match e with + | Elf_symbolic.AssertRange (e, min, max) -> e, Some (Z.to_int64 min, Z.to_int64 max) + | e -> e, None + in + fun target -> {target; range; mask = (Z.to_int hi, Z.to_int lo); value = value_of_linksem e} + | _ -> Raise.fail "Expression does not have Mask in top level" + let of_linksem: linksem_t -> t = function | LinksemRelocatable.AArch64 relocs -> let add k Elf_symbolic.{ arel_value; arel_target } m = - IMap.add (Z.to_int k) { value = exp_of_linksem arel_value; target = AArch64 arel_target } m + IMap.add (Z.to_int k) (exp_of_linksem arel_value (AArch64 arel_target)) m in Pmap.fold add relocs IMap.empty @@ -53,7 +65,6 @@ let rec pp_exp = Pp.( | BinOp (a, Sub, b) -> !^"(" ^^ pp_exp a ^^ !^"-" ^^ pp_exp b ^^ !^")" | BinOp (a, And, b) -> !^"(" ^^ pp_exp a ^^ !^"&" ^^ pp_exp b ^^ !^")" | UnOp (Not, b) -> !^"(" ^^ !^"~" ^^ pp_exp b ^^ !^")" - | Mask (x, a, b) -> pp_exp x ^^ !^"[" ^^ int a ^^ !^":" ^^ int b ^^ !^"]" ) let pp_target = Pp.(function @@ -65,7 +76,8 @@ let pp_target = Pp.(function | AArch64 Abi_aarch64_symbolic_relocation.LDST -> !^"LDST") let pp_rel rel = - Pp.(pp_target rel.target ^^ !^": " ^^ pp_exp rel.value) + let hi, lo = rel.mask in + Pp.(pp_target rel.target ^^ !^": " ^^ pp_exp rel.value ^^ !^"[" ^^ int hi ^^ !^":" ^^ int lo ^^ !^"]") let pp rels = if IMap.is_empty rels then diff --git a/src/isla/cache.ml b/src/isla/cache.ml index 98d7dfa2..e5c95d72 100644 --- a/src/isla/cache.ml +++ b/src/isla/cache.ml @@ -79,6 +79,25 @@ type config = Server.config module Opcode (*: Cache.Key *) = struct type t = Server.opcode option + let reloc_id: Relocation.t option -> int = function + | None -> 0 + | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data640) -> 1 + | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data320) -> 2 + | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADRP) -> 3 + | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADD) -> 4 + | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.LDST) -> 5 + | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CALL) -> 6 + + let reloc_of_id: int -> Relocation.t option = function + | 0 -> None + | 1 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data640) + | 2 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data320) + | 3 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADRP) + | 4 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADD) + | 5 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.LDST) + | 6 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CALL) + | _ -> Raise.fail "invalid reloc id" + let equal a b = match (a, b) with | (None, None) -> true @@ -93,7 +112,7 @@ module Opcode (*: Cache.Key *) = struct | Some (bs, rel) -> let i = BytesSeq.getintle_ze bs 0 in let l = BytesSeq.length bs in - let rel_id = Server.reloc_id rel in + let rel_id = reloc_id rel in if small_enough bs rel_id then begin assert (not @@ IntBits.get i IntBits.back); let res = IntBits.blit l 0 i (IntBits.back - 3) 3 in @@ -105,7 +124,7 @@ module Opcode (*: Cache.Key *) = struct let to_file _file = function | None -> () | Some (bs, rel) -> - let rel_id = Server.reloc_id rel in + let rel_id = reloc_id rel in if small_enough bs rel_id then () else Raise.todo() @@ -120,7 +139,7 @@ module Opcode (*: Cache.Key *) = struct let size = IntBits.sub hash (IntBits.back - 3) 3 in let b = Bytes.create size in Bits.unsafe_blit_of_int data 0 b 0 (size * 8); - Some (BytesSeq.of_bytes b, Server.reloc_of_id reloc_id) + Some (BytesSeq.of_bytes b, reloc_of_id reloc_id) end (** Representation of trace lists on disk. diff --git a/src/isla/isla.ml b/src/isla/isla.ml index 5ed4bdf5..e61981e1 100644 --- a/src/isla/isla.ml +++ b/src/isla/isla.ml @@ -50,6 +50,7 @@ module Cache = Cache module Conv = Conv module Manip = Manip module Preprocess = Preprocess +module Relocation = Relocation module Run = Run module Server = Server module Test = Test diff --git a/src/isla/relocation.ml b/src/isla/relocation.ml new file mode 100644 index 00000000..21f7c268 --- /dev/null +++ b/src/isla/relocation.ml @@ -0,0 +1,44 @@ +open Logs.Logger (struct + let str = __MODULE__ +end) + +type t = Elf.Relocations.target + +type segment = string * (int * int) (* mapping the name of a segment to the range of the relocation value *) + +let pp_opcode_with_segments (b, r) = + match r with + | None -> Pp.(!^"#x" ^^ BytesSeq.ppint b) + | Some (Elf.Relocations.AArch64 rtype) -> + let bits = BytesSeq.getbvle ~size:32 b 0 in + Pp.( + match rtype with + | Abi_aarch64_symbolic_relocation.Data640 -> fatal "Data64 relocation not allowed for instruction" + | Abi_aarch64_symbolic_relocation.Data320 -> fatal "Data32 relocation not allowed for instruction" + | Abi_aarch64_symbolic_relocation.ADRP -> + BitVec.pp_smt (BitVec.extract 31 31 bits) + ^^ !^" x0:2 " ^^ + BitVec.pp_smt (BitVec.extract 24 28 bits) + ^^ !^" x1:19 " ^^ + BitVec.pp_smt (BitVec.extract 0 4 bits) + | Abi_aarch64_symbolic_relocation.ADD -> + BitVec.pp_smt (BitVec.extract 22 31 bits) + ^^ !^" x0:12 " ^^ + BitVec.pp_smt (BitVec.extract 0 9 bits) + | Abi_aarch64_symbolic_relocation.LDST -> (* TODO different width loads, alignment *) + BitVec.pp_smt (BitVec.extract 22 31 bits) + ^^ !^" x0:10 " ^^ + BitVec.pp_smt (BitVec.extract 0 11 bits) + | Abi_aarch64_symbolic_relocation.CALL -> + BitVec.pp_smt (BitVec.extract 26 31 bits) + ^^ !^" x0:26 " + ) + +(* for interpreting the segments *) +let segments_of_reloc: t -> segment list = function +| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data640 -> fatal "invalid relocation for instructions (Data64)" +| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data320 -> fatal "invalid relocation for instructions (Data32)" +| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADRP -> ["x0", (0, 1); "x1", (2, 20)] (* or absolute? ["x0", (12, 13); "x1", (14, 32)] *) +| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADD -> ["x0", (0, 11)] +| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.LDST -> ["x0", (0, 9)] (* TODO depends on load size *) (* or absolute? ["x0", (2, 11)] *) +| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CALL -> ["x0", (0, 25)] (* or absolute? ["x0", (2, 27)] *) \ No newline at end of file diff --git a/src/isla/server.ml b/src/isla/server.ml index b2ba008b..e6f247ff 100644 --- a/src/isla/server.ml +++ b/src/isla/server.ml @@ -68,30 +68,8 @@ type config = Config.t processor exception/fault) or not *) type trcs = Base.instruction_segments option * (bool * Base.rtrc) list -type reloc = Elf.Relocations.target - -let reloc_id: reloc option -> int = function -| None -> 0 -| Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data640) -> 1 -| Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data320) -> 2 -| Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADRP) -> 3 -| Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADD) -> 4 -| Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.LDST) -> 5 -| Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CALL) -> 6 - -let reloc_of_id: int -> reloc option = function -| 0 -> None -| 1 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data640) -| 2 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data320) -| 3 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADRP) -| 4 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADD) -| 5 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.LDST) -| 6 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CALL) -| _ -> Raise.fail "invalid reloc id" - -type opcode = BytesSeq.t * reloc option - -type reloc_segment = string * (int * int) (* mapping the name of a segment to the range of the relocation value *) +type opcode = BytesSeq.t * Relocation.t option + (** Bump when updating isla. TODO: move the version checking to allow a range of version. @@ -228,48 +206,11 @@ let pp_answer = function (** The type of a request to isla *) type request = TEXT_ASM of string | ASM of opcode | VERSION | STOP -let pp_interpreted_opcode (b, r) = - match r with - | None -> Pp.(!^"#x" ^^ BytesSeq.ppint b) - | Some (Elf.Relocations.AArch64 rtype) -> - let bits = BytesSeq.getbvle ~size:32 b 0 in - Pp.( - match rtype with - | Abi_aarch64_symbolic_relocation.Data640 -> fatal "Data64 relocation not allowed for instruction" - | Abi_aarch64_symbolic_relocation.Data320 -> fatal "Data32 relocation not allowed for instruction" - | Abi_aarch64_symbolic_relocation.ADRP -> - BitVec.pp_smt (BitVec.extract 31 31 bits) - ^^ !^" x0:2 " ^^ - BitVec.pp_smt (BitVec.extract 24 28 bits) - ^^ !^" x1:19 " ^^ - BitVec.pp_smt (BitVec.extract 0 4 bits) - | Abi_aarch64_symbolic_relocation.ADD -> - BitVec.pp_smt (BitVec.extract 22 31 bits) - ^^ !^" x0:12 " ^^ - BitVec.pp_smt (BitVec.extract 0 9 bits) - | Abi_aarch64_symbolic_relocation.LDST -> (* TODO different width loads, alignment *) - BitVec.pp_smt (BitVec.extract 22 31 bits) - ^^ !^" x0:10 " ^^ - BitVec.pp_smt (BitVec.extract 0 11 bits) - | Abi_aarch64_symbolic_relocation.CALL -> - BitVec.pp_smt (BitVec.extract 26 31 bits) - ^^ !^" x0:26 " - ) - -(* for interpreting the segments *) -let segments_of_reloc: reloc -> reloc_segment list = function -| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data640 -> fatal "invalid relocation for instructions (Data64)" -| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data320 -> fatal "invalid relocation for instructions (Data32)" -| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADRP -> ["x0", (0, 1); "x1", (2, 20)] (* or absolute? ["x0", (12, 13); "x1", (14, 32)] *) -| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADD -> ["x0", (0, 11)] -| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.LDST -> ["x0", (0, 9)] (* TODO depends on load size *) (* or absolute? ["x0", (2, 11)] *) -| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CALL -> ["x0", (0, 25)] (* or absolute? ["x0", (2, 27)] *) - (** Convert a request into the string message expected by isla-client This should match the protocol *) let string_of_request = function | TEXT_ASM s -> Printf.sprintf "execute_asm %s" s - | ASM b -> Pp.(sprintc @@ !^"execute " ^^ pp_interpreted_opcode b) + | ASM b -> Pp.(sprintc @@ !^"execute " ^^ Relocation.pp_opcode_with_segments b) | VERSION -> "version" | STOP -> "stop" diff --git a/src/run/runner.ml b/src/run/runner.ml index fda103e4..7ae5ff59 100644 --- a/src/run/runner.ml +++ b/src/run/runner.ml @@ -165,13 +165,13 @@ let fetch (runner : t) (pc : Elf.Address.t) : slot = let execute_normal ?(prelock = ignore) ~pc runner (instr : Trace.Instr.t) state = let dwarf = runner.dwarf in let next = instr.length in - let segments_map = instr.segments in + let relocation = instr.relocation in let run_pure () = List.map (fun (trc : Trace.Instr.trace_meta) -> let nstate = State.copy state in State.set_last_pc nstate pc; - Trace.Run.trace_pc_mut ?dwarf ~segments_map ~next nstate trc.trace; + Trace.Run.trace_pc_mut ?dwarf ?relocation ~next nstate trc.trace; nstate) instr.traces in @@ -179,7 +179,7 @@ let execute_normal ?(prelock = ignore) ~pc runner (instr : Trace.Instr.t) state match instr.traces with | [trc] -> State.set_last_pc state pc; - Trace.Run.trace_pc_mut ?dwarf ~segments_map ~next state trc.trace; + Trace.Run.trace_pc_mut ?dwarf ?relocation ~next state trc.trace; [state] | _ -> prelock state; @@ -193,7 +193,7 @@ let skip runner state : State.t list = try let pc = State.Exp.expect_sym_address pc_exp in match fetch runner pc with - | Normal { traces = _; read = _; written = _; length; opcode = _; segments = _ } + | Normal { traces = _; read = _; written = _; length; opcode = _; relocation = _ } |Special length |IslaFail length -> let state = State.copy_if_locked state in diff --git a/src/trace/context.ml b/src/trace/context.ml index 3ed8317d..d03b0732 100644 --- a/src/trace/context.ml +++ b/src/trace/context.ml @@ -48,6 +48,10 @@ should be added here *) +open Logs.Logger (struct + let str = __MODULE__ +end) + module SMap = Map.Make (String) (** The context to run a trace *) @@ -59,21 +63,43 @@ type t = { dwarf : Dw.t option; (** Optionally DWARF information. If present, typing is enabled *) } -let rec exp_of_relocation: Elf.Relocations.exp -> State.exp = - let f = exp_of_relocation in function +let rec exp_of_relocation_exp: Elf.Relocations.exp -> State.exp = + let f = exp_of_relocation_exp in function | Section s -> State.Exp.of_var (State.Var.Section s) (* TODO put the actual value there, size? *) | Const x -> Exp.Typed.bits (BitVec.of_int x ~size:64) (* TODO size? *) | BinOp (a, Add, b) -> Exp.Typed.(f a + f b) | BinOp (a, Sub, b) -> Exp.Typed.(f a - f b) | BinOp (a, And, b) -> Exp.Typed.manyop (AstGen.Ott.Bvmanyarith AstGen.Ott.Bvand) [f a; f b] | UnOp (Not, b) -> Exp.Typed.unop AstGen.Ott.Bvnot (f b) - | Mask (x, last, first) -> Exp.Typed.extract ~last ~first (f x) + (* | Mask (x, last, first) -> Exp.Typed.extract ~last ~first (f x) *) (** Build a {!context} from a state *) -let make_context ?dwarf ?segments_map state = +let make_context ?dwarf ?relocation state = let reg_writes = Vec.empty () in let mem_reads = HashVector.empty () in - let segments = segments_map |> Option.value ~default:SMap.empty |> SMap.map exp_of_relocation in + + let segments = relocation + |> Option.map (fun relocation -> + let open Elf.Relocations in + let value = exp_of_relocation_exp relocation.value in + Option.iter (fun (min, max) -> + let min = Exp.Typed.bits @@ BitVec.of_z ~size:64 @@ Z.of_int64 min in + let max = Exp.Typed.bits @@ BitVec.of_z ~size:64 @@ Z.of_int64 max in + let cond1 = Exp.Typed.(binop (Bvcomp Bvsle) min value) in + let cond2 = Exp.Typed.(binop (Bvcomp Bvslt) value max) in + let cond = Exp.Typed.(manyop And [cond1; cond2]) in + State.push_assert state cond; + ) relocation.range; + let (last, first) = relocation.mask in + let masked = Exp.Typed.extract ~first ~last value in + + relocation.target + |> Isla.Relocation.segments_of_reloc + |> SMap.of_list + |> SMap.map (fun (first, last) -> Exp.Typed.extract ~first ~last masked) + ) + |> Option.value ~default:SMap.empty + in { state; reg_writes; mem_reads; dwarf; segments } (** Expand a Trace variable to a State expression, using the context *) diff --git a/src/trace/instr.ml b/src/trace/instr.ml index 012273a5..e53848b1 100644 --- a/src/trace/instr.ml +++ b/src/trace/instr.ml @@ -69,7 +69,7 @@ type t = { read : Reg.t list; written : Reg.t list; opcode : BytesSeq.t; - segments: Elf.Relocations.exp SMap.t + relocation : Elf.Relocations.rel option; } let dedup_regs = List.sort_uniq State.Reg.compare @@ -102,31 +102,18 @@ let trace_meta_of_trace trace = { trace; jump_target = !jump; read = dedup_regs !read; written = dedup_regs !written } (** Generate full instruction data from a list of traces *) -let of_traces (opcode: BytesSeq.t * Elf.Relocations.rel option) traces = +let of_traces ((opcode: BytesSeq.t), (relocation: Elf.Relocations.rel option)) traces = let traces = List.map trace_meta_of_trace traces in - let opcode, reloc = opcode in let length = BytesSeq.length opcode in let read = dedup_regs @@ List.concat_map (fun (tr : trace_meta) -> tr.read) traces in let written = dedup_regs @@ List.concat_map (fun (tr : trace_meta) -> tr.written) traces in - let segments = match reloc with - | None -> SMap.empty - | Some reloc -> - reloc.target - |> Isla.Server.segments_of_reloc - |> List.map (fun (v, (lo, hi)) -> (v, Elf.Relocations.Mask(reloc.value, hi, lo))) - |> SMap.of_list - in - { traces; length; read; written; opcode; segments } + { traces; length; read; written; opcode; relocation } (** Pretty print the representation of an instruction *) let pp instr = let open Pp in - (prefix 4 1 !^"Segments:" @@ - separate_map hardline - (pair string Elf.Relocations.pp_exp) - (SMap.to_list instr.segments)) - ^^ hardline ^^ + !^"Relocation" ^^ optional Elf.Relocations.pp_rel instr.relocation ^^ hardline ^^ separate_mapi hardline (fun i trc -> prefix 4 1 (dprintf "Trace %d:" i) (Base.pp trc.trace)) instr.traces diff --git a/src/trace/run.ml b/src/trace/run.ml index df944ca7..45c9252d 100644 --- a/src/trace/run.ml +++ b/src/trace/run.ml @@ -114,17 +114,17 @@ let event_mut ~(ctxt : ctxt) (event : Base.event) = | Assert exp -> State.push_assert ctxt.state (expand ~ctxt exp) (** Run a trace on the provided state by mutation. Enable typing if [dwarf] is provided *) -let trace_mut ?dwarf ?segments_map (state : State.t) (events : Base.t) : unit = +let trace_mut ?dwarf ?relocation (state : State.t) (events : Base.t) : unit = assert (not @@ State.is_locked state); info "Running trace with typing %s" (if dwarf <> None then "on" else "off"); - let ctxt = Context.make_context ?dwarf ?segments_map state in + let ctxt = Context.make_context ?dwarf ?relocation state in List.iter (event_mut ~ctxt) events; Vec.iter (fun (reg, tval) -> State.Reg.Map.set state.regs reg tval) ctxt.reg_writes (** Run a trace on the provided state by returning an updated copy.*) -let trace ?dwarf ?segments_map (start : State.t) (events : Base.t) : State.t = +let trace ?dwarf ?relocation (start : State.t) (events : Base.t) : State.t = let state = State.copy start in - trace_mut ?dwarf ?segments_map state events; + trace_mut ?dwarf ?relocation state events; State.lock state; state @@ -133,12 +133,12 @@ let trace ?dwarf ?segments_map (start : State.t) (events : Base.t) : State.t = Thus this function automatically handle moving the PC for fall-through instruction *) -let trace_pc_mut ?dwarf ?segments_map ~(next : int) (state : State.t) (events : Base.t) : unit = +let trace_pc_mut ?dwarf ?relocation ~(next : int) (state : State.t) (events : Base.t) : unit = let pc = Arch.pc () in let rec is_touching_pc : Base.t -> bool = function | [] -> false | WriteReg { reg; _ } :: _ when reg = pc -> true | _ :: l -> is_touching_pc l in - trace_mut ?dwarf ?segments_map state events; + trace_mut ?dwarf ?relocation state events; if is_touching_pc events then State.concretize_pc ~pc state else State.bump_pc ~pc state next From 3832eb4c8ff55a6e251c7a29a6239ceeb10328ef Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Wed, 15 Jan 2025 22:28:17 +0100 Subject: [PATCH 27/89] todo --- notes-TODO | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/notes-TODO b/notes-TODO index 66ca908d..b1a58b67 100644 --- a/notes-TODO +++ b/notes-TODO @@ -2,4 +2,8 @@ Symbolic symbol table - value of symbol?? (we don't have segments in relocatable files) - can probably keep the same api, but addresses are symbolic -Instruction fetch: is it sound? (rewriting .text) \ No newline at end of file +Instruction fetch: is it sound? (rewriting .text) + +Z3 finding unique solution +- Get model -> assert not model -> check now it is unsat +- Need to extend the protocol probably \ No newline at end of file From 83414d1a9df9aacb3579cc3e0675ab1fba5326eb Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Thu, 16 Jan 2025 16:55:27 +0100 Subject: [PATCH 28/89] Smarter context full simplifier --- src/ast/manip.ml | 7 ++++ src/bin/main.ml | 1 + src/bin/readDwarf.ml | 1 + src/z3/z3.ml | 76 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 85 insertions(+) diff --git a/src/ast/manip.ml b/src/ast/manip.ml index 46a7ce9c..b21dffe7 100644 --- a/src/ast/manip.ml +++ b/src/ast/manip.ml @@ -350,3 +350,10 @@ let check_no_mem (e : ('a, 'v, 'b, 'm) exp) : bool = let expect_no_mem ?(handler = fun () -> failwith "Expected no mem") : ('a, 'v, 'b, 'm1) exp -> ('a, 'v, 'b, 'm2) exp = fun exp -> if check_no_mem exp then Obj.magic exp else handler () + + +let all_subterms e = + let rec recurse acc e = + e :: direct_exp_fold_left_exp recurse acc e + in + direct_exp_fold_left_exp recurse [] e \ No newline at end of file diff --git a/src/bin/main.ml b/src/bin/main.ml index 714c0fc2..132b8f4a 100644 --- a/src/bin/main.ml +++ b/src/bin/main.ml @@ -74,6 +74,7 @@ let commands = (* Run.FuncRD.command; *) Other_cmds.CopySourcesCmd.command; (* BranchTable.command; *) + Z3.Test.command; ] let _ = Printexc.record_backtrace Config.enable_backtrace diff --git a/src/bin/readDwarf.ml b/src/bin/readDwarf.ml index 957552f8..6be5927f 100644 --- a/src/bin/readDwarf.ml +++ b/src/bin/readDwarf.ml @@ -73,6 +73,7 @@ let commands = Run.Block.command; (* Run.FuncRD.command; *) CopySourcesCmd.command; + Z3.Test.command; ] let _ = Printexc.record_backtrace Config.enable_backtrace diff --git a/src/z3/z3.ml b/src/z3/z3.ml index 45321719..73733720 100644 --- a/src/z3/z3.ml +++ b/src/z3/z3.ml @@ -395,6 +395,10 @@ module type S = sig This results in two calls to the SMT solver. one with {!check} and one with {!check_sat} *) val check_both : server -> exp -> bool option + val simplify_subterms : server -> exp -> exp + + val simplify_subterms_decl : server -> declared:unit Htbl.t -> exp -> exp + (*****************************************************************************) (*****************************************************************************) (*****************************************************************************) @@ -415,6 +419,8 @@ module type S = sig (** Do a standalone check of whether the set of assertion is sat *) val check_sat_full : exp list -> bool option + + val simplify_subterms_full : ?hyps:exp list -> exp -> exp end module SimpContext = ContextCounter (struct @@ -516,4 +522,74 @@ module Make (Var : Var) : S with type var = Var.t = struct | _ -> ( match check_sat serv e with Some false as f -> f | _ -> None ) + + + let rec simplify_subterms serv (e : Exp.t) : Exp.t = + e |> Ast.Manip.all_subterms + |> List.find_opt (fun t -> + let et = Typed.get_type e in + let tt = Typed.get_type t in + Printf.printf "Types: %t, %t\n" Pp.(top Ast.pp_ty (Ast.Manip.ty_allow_mem et)) Pp.(top Ast.pp_ty (Ast.Manip.ty_allow_mem tt)); + Typed.get_type e = Typed.get_type t && + let result = check serv Typed.(e = t) in + Printf.printf "%t\n" Pp.(top (optional bool) result); + result = Some true + ) + |> Option.map (simplify_subterms serv) + |> Option.value_fun ~default:(fun () -> + Ast.Manip.direct_exp_map_exp (simplify_subterms serv) e + ) + + let simplify_subterms_decl serv ~declared (e : Exp.t) : Exp.t = + declare_vars serv ~declared e; + simplify_subterms serv e + + let simplify_subterms_full ?(hyps = []) e = + let serv = ensure_started_get () in + SimpContext.openc (); + let declared = Htbl.create 10 in + List.iter (send_assert_decl ~declared serv) hyps; + let res = simplify_subterms_decl ~declared serv e in + SimpContext.closec (); + res +end + +module Test = struct + module Var = struct + include String + + let pp = Pp.string + + let ty _ = Ast.Ty_BitVec 64 + + let of_string = Fun.id + end + + module Typed = Exp.Typed + module Exp = Exp.Make (Var) + + module Z3Test = Make (Var) + + let test () = + let x = Typed.var ~typ:(Ast.Ty_BitVec 64) "x" in + let bvint64 = Typed.bits_int ~size:64 in + let constr = Typed.(binop (Ast.Bvcomp Ast.Bvult) x (bvint64 16)) in + let exp = Typed.(concat [bits_int ~size:60 0; extract x ~first:0 ~last:3]) in + let simplified = Z3Test.simplify_subterms_full ~hyps:[constr] exp in + Printf.printf "original: %t\n" (Pp.top Exp.pp exp); + Printf.printf "simplified: %t\n" (Pp.top Exp.pp simplified); + + + open Cmdliner + open Config.CommonOpt + + + let term = Term.(CmdlinerHelper.func_options (config :: z3 :: comopts) test $ const ()) + + let info = + let doc = "" + in + Cmd.(info "z3-test" ~doc ~exits) + + let command = (term, info) end From 75185eed6e40f7921ed06f66425da84d6d0b05e3 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Thu, 16 Jan 2025 18:53:56 +0100 Subject: [PATCH 29/89] Simplify relocated addresses --- src/elf/relocations.ml | 20 ++++++++++++++------ src/state/base.ml | 13 ++++++++++++- src/state/base.mli | 4 ++++ src/trace/context.ml | 15 +++++++++------ 4 files changed, 39 insertions(+), 13 deletions(-) diff --git a/src/elf/relocations.ml b/src/elf/relocations.ml index e9295717..1a60a8bd 100644 --- a/src/elf/relocations.ml +++ b/src/elf/relocations.ml @@ -15,10 +15,14 @@ type exp = (* | AssertRange of (exp * int * int) *) (* | Mask of (exp * int * int) *) +type assertion = +| Range of int64 * int64 +| Alignment of int + type rel = { target : target; value : exp; - range: (int64 * int64) option; + assertions: assertion list; mask : int * int; } @@ -33,14 +37,18 @@ let exp_of_linksem = | Elf_symbolic.BinOp (x, op, y) -> BinOp (value_of_linksem x, op, value_of_linksem y) | Elf_symbolic.UnOp (op, x) -> UnOp (op, value_of_linksem x) | Elf_symbolic.AssertRange (_, _, _) -> Raise.fail "AssertRange should not occur in value expression" + | Elf_symbolic.AssertAlignment (_, _) -> Raise.fail "AssertAlignment should not occur in value expression" | Elf_symbolic.Mask (_, _, _) -> Raise.fail "AssertRange should not occur in value expression" in function | Elf_symbolic.Mask (e, hi, lo) -> - let e, range = match e with - | Elf_symbolic.AssertRange (e, min, max) -> e, Some (Z.to_int64 min, Z.to_int64 max) - | e -> e, None - in - fun target -> {target; range; mask = (Z.to_int hi, Z.to_int lo); value = value_of_linksem e} + let rec extract_asserts e = + match e with + | Elf_symbolic.AssertRange (e, min, max) -> let (e, a) = extract_asserts e in e, Range (Z.to_int64 min, Z.to_int64 max) :: a + | Elf_symbolic.AssertAlignment (e, bits) -> let (e, a) = extract_asserts e in e, Alignment (Z.to_int bits) :: a + | e -> e, [] + in + let e, assertions = extract_asserts e in + fun target -> {target; assertions; mask = (Z.to_int hi, Z.to_int lo); value = value_of_linksem e} | _ -> Raise.fail "Expression does not have Mask in top level" diff --git a/src/state/base.ml b/src/state/base.ml index 55606fc8..29f6ab80 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -326,6 +326,7 @@ type t = { mutable regs : Tval.t Reg.Map.t; (** The values and types of registers *) read_vars : Tval.t Vec.t; (** The results of reads made since base state *) mutable asserts : exp list; (** Only asserts since base_state *) + mutable relocation_asserts : exp list; (** Only asserts since base_state *) mem : Mem.t; elf : Elf.File.t option; (** Optionally an ELF file, this may be used when running instructions on @@ -368,6 +369,7 @@ let make ?elf () = regs = Reg.Map.init @@ Tval.of_reg id; read_vars = Vec.empty (); asserts = []; + relocation_asserts = []; mem = Mem.empty (); elf; fenv = Fragment.Env.make (); @@ -389,6 +391,7 @@ let copy ?elf state = regs = Reg.Map.copy state.regs; read_vars = Vec.empty (); asserts = (if locked then [] else state.asserts); + relocation_asserts = (if locked then [] else state.relocation_asserts); mem = (if locked then Mem.from state.mem else Mem.copy state.mem); elf = Option.(elf ||| state.elf); fenv = Fragment.Env.copy state.fenv; @@ -405,6 +408,13 @@ let push_assert (s : t) (e : exp) = assert (not @@ is_locked s); s.asserts <- e :: s.asserts +let push_relocation_assert (s : t) (e : exp) = + assert (not @@ is_locked s); + s.relocation_asserts <- e :: s.relocation_asserts + +let rec load_relocation_asserts (s : t) = + s.relocation_asserts @ (s.base_state |> Option.map load_relocation_asserts |> Option.value ~default:[]) + let set_asserts state asserts = assert (not @@ is_locked state); state.asserts <- asserts @@ -476,7 +486,8 @@ let read ~provenance ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t Option.value exp ~default:(Exp.of_var var) let read_noprov ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t = - (* let addr = Z3St.simplify_full addr in *) + let hyps = load_relocation_asserts s in + let addr = Z3St.simplify_subterms_full ~hyps addr in let sym, conc = Sums.split_concrete addr in debug "Address: %t + %t" Pp.(top (optional Exp.pp) sym) Pp.(top BitVec.pp_smt conc); if ConcreteEval.is_concrete addr || Vec.length s.mem.frags = 0 then diff --git a/src/state/base.mli b/src/state/base.mli index bcba6c5f..f4945039 100644 --- a/src/state/base.mli +++ b/src/state/base.mli @@ -288,6 +288,7 @@ type t = private { mutable regs : Tval.t Reg.Map.t; (** The values and types of registers *) read_vars : Tval.t Vec.t; (** The results of reads made since base state *) mutable asserts : exp list; (** Only asserts since base_state *) + mutable relocation_asserts : exp list; (** Only asserts since base_state *) mem : Mem.t; elf : Elf.File.t option; (** Optionally an ELF file, this may be used when running instructions on @@ -390,6 +391,9 @@ val copy_if_locked : ?elf:Elf.File.t -> t -> t (** Add an assertion to a state *) val push_assert : t -> exp -> unit +(** Add an assertion to a state *) +val push_relocation_assert : t -> exp -> unit + (** Set a state to be impossible (single [false] assert). *) val set_impossible : t -> unit diff --git a/src/trace/context.ml b/src/trace/context.ml index d03b0732..1734f041 100644 --- a/src/trace/context.ml +++ b/src/trace/context.ml @@ -82,22 +82,25 @@ let make_context ?dwarf ?relocation state = |> Option.map (fun relocation -> let open Elf.Relocations in let value = exp_of_relocation_exp relocation.value in - Option.iter (fun (min, max) -> + List.iter (function + | Range (min, max) -> let min = Exp.Typed.bits @@ BitVec.of_z ~size:64 @@ Z.of_int64 min in let max = Exp.Typed.bits @@ BitVec.of_z ~size:64 @@ Z.of_int64 max in let cond1 = Exp.Typed.(binop (Bvcomp Bvsle) min value) in let cond2 = Exp.Typed.(binop (Bvcomp Bvslt) value max) in - let cond = Exp.Typed.(manyop And [cond1; cond2]) in - State.push_assert state cond; - ) relocation.range; + State.push_relocation_assert state Exp.Typed.(manyop And [cond1; cond2]) + | Alignment b -> + let last = b-1 in + State.push_relocation_assert state Exp.Typed.(extract ~first:0 ~last value = bits_int ~size:b 0) + ) relocation.assertions; let (last, first) = relocation.mask in let masked = Exp.Typed.extract ~first ~last value in - + relocation.target |> Isla.Relocation.segments_of_reloc |> SMap.of_list |> SMap.map (fun (first, last) -> Exp.Typed.extract ~first ~last masked) - ) + ) |> Option.value ~default:SMap.empty in { state; reg_writes; mem_reads; dwarf; segments } From ad13b155bf14026cd8d1da92e7b67339833f533e Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Fri, 17 Jan 2025 16:15:45 +0100 Subject: [PATCH 30/89] Fix immediate encoding --- src/isla/relocation.ml | 4 ++-- src/state/base.ml | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/isla/relocation.ml b/src/isla/relocation.ml index 21f7c268..3e3004ad 100644 --- a/src/isla/relocation.ml +++ b/src/isla/relocation.ml @@ -26,9 +26,9 @@ let pp_opcode_with_segments (b, r) = ^^ !^" x0:12 " ^^ BitVec.pp_smt (BitVec.extract 0 9 bits) | Abi_aarch64_symbolic_relocation.LDST -> (* TODO different width loads, alignment *) - BitVec.pp_smt (BitVec.extract 22 31 bits) + BitVec.pp_smt (BitVec.extract 20 31 bits) ^^ !^" x0:10 " ^^ - BitVec.pp_smt (BitVec.extract 0 11 bits) + BitVec.pp_smt (BitVec.extract 0 9 bits) | Abi_aarch64_symbolic_relocation.CALL -> BitVec.pp_smt (BitVec.extract 26 31 bits) ^^ !^" x0:26 " diff --git a/src/state/base.ml b/src/state/base.ml index 29f6ab80..cfcef658 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -488,6 +488,7 @@ let read ~provenance ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t let read_noprov ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t = let hyps = load_relocation_asserts s in let addr = Z3St.simplify_subterms_full ~hyps addr in + let addr = Z3St.simplify_full addr in let sym, conc = Sums.split_concrete addr in debug "Address: %t + %t" Pp.(top (optional Exp.pp) sym) Pp.(top BitVec.pp_smt conc); if ConcreteEval.is_concrete addr || Vec.length s.mem.frags = 0 then From 17424037276df13a5aedcc40c23ee9810bc5e1f6 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Fri, 17 Jan 2025 19:05:06 +0100 Subject: [PATCH 31/89] Write with section offset --- src/state/base.ml | 46 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/src/state/base.ml b/src/state/base.ml index cfcef658..be7d6474 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -485,15 +485,45 @@ let read ~provenance ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t Option.iter (set_read s (Var.expect_readvar var)) exp; Option.value exp ~default:(Exp.of_var var) -let read_noprov ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t = +let address_to_exp ~(size : int) (addr : Elf.Address.t) = + let first = 0 in + let last = size - 1 in + Typed.( + extract ~last ~first + (Exp.of_var @@ Var.Section addr.section) + + + bits_int ~size addr.offset + ) + +let eval_address (s : t) (addr: Exp.t) : Elf.Address.t option = + let ctxt0 = function Var.Section _ -> Value.bv @@ BitVec.of_int ~size:64 0 | _ -> raise ConcreteEval.Symbolic in + let offset = addr |> ConcreteEval.eval ~ctxt:ctxt0 |> Value.expect_bv |> BitVec.to_int in + let sections = Hashtbl.create 10 in + Ast.Manip.exp_iter_var (function Var.Section s -> Hashtbl.add sections s () | _ -> ()) addr; + let hyps = load_relocation_asserts s in - let addr = Z3St.simplify_subterms_full ~hyps addr in - let addr = Z3St.simplify_full addr in - let sym, conc = Sums.split_concrete addr in - debug "Address: %t + %t" Pp.(top (optional Exp.pp) sym) Pp.(top BitVec.pp_smt conc); - if ConcreteEval.is_concrete addr || Vec.length s.mem.frags = 0 then - read ~provenance:Ctype.Main ?ctyp s ~addr ~size - else Raise.fail "Trying to access %t in state %d: No provenance info" Pp.(tos Exp.pp addr) s.id + let size = addr |> Typed.get_type |> Typed.expect_bv in + sections |> Hashtbl.to_seq_keys |> Seq.find_map (fun section -> + let address = Elf.Address.{ section; offset } in + let expression = address_to_exp ~size address in + if Z3St.check_full ~hyps Typed.(expression = addr) = Some true then + Some address + else + None + ) + + +let read_noprov ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t = + let elf_addr = eval_address s addr in + debug "Address: %t" Pp.(top (optional Elf.Address.pp) elf_addr); + match elf_addr with + | Some elf_addr -> + let addr_size = addr |> Typed.get_type |> Typed.expect_bv in + let addr = address_to_exp ~size:addr_size elf_addr in + read ~provenance:Ctype.Main ?ctyp s ~addr ~size + | None when Vec.length s.mem.frags = 0 -> + read ~provenance:Ctype.Main ?ctyp s ~addr ~size + | None -> Raise.fail "Trying to access %t in state %d: No provenance info" Pp.(tos Exp.pp addr) s.id let write ~provenance (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) (value : Exp.t) : unit = assert (not @@ is_locked s); From f3468d476bcb286c01421117508073a9ce8974bd Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Fri, 17 Jan 2025 19:19:19 +0100 Subject: [PATCH 32/89] Fix type of pc values in state tree --- src/run/block_lib.ml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/run/block_lib.ml b/src/run/block_lib.ml index 2d548c75..3082c0c1 100644 --- a/src/run/block_lib.ml +++ b/src/run/block_lib.ml @@ -67,14 +67,14 @@ type label = | Start (** Root node of the tree *) | End of string (** Lead node of the tree, the string describe which end condition has be triggered *) - | BranchAt of int (** A Branching node at a given PC *) - | NormalAt of int (** A normal instruction at PC. Exists only if [every_instruction] is true *) + | BranchAt of Elf.Address.t (** A Branching node at a given PC *) + | NormalAt of Elf.Address.t (** A normal instruction at PC. Exists only if [every_instruction] is true *) let label_to_string = function | Start -> "Start" | End s -> Printf.sprintf "End (%s)" s - | BranchAt pc -> Printf.sprintf "Branch at 0x%x" pc - | NormalAt pc -> Printf.sprintf "Normal at 0x%x" pc + | BranchAt pc -> Printf.sprintf "Branch at %t" Pp.(tos Elf.Address.pp pc) + | NormalAt pc -> Printf.sprintf "Normal at %t" Pp.(tos Elf.Address.pp pc) let pp_label label = label |> label_to_string |> Pp.string @@ -123,11 +123,11 @@ let run ?(every_instruction = false) ?relevant (b : t) (start : State.t) : label | [state] when not every_instruction -> run_from state | [nstate] when every_instruction -> let rest = [run_from nstate] in - { state; data = NormalAt (pc_exp |> Ast.expect_bits |> BitVec.to_int); rest } + { state; data = NormalAt (State.Exp.expect_sym_address pc_exp); rest } | states -> let rest = List.map run_from states in State.Tree. - { state; data = BranchAt (pc_exp |> Ast.expect_bits |> BitVec.to_int); rest } + { state; data = BranchAt (State.Exp.expect_sym_address pc_exp); rest } ) else begin info "Reached dead code at %t" (Pp.top State.Exp.pp pc_exp); @@ -162,7 +162,7 @@ let gen_endpred ?min ?max ?loop ?(brks = []) () : State.exp -> string option = ( try Some (State.Exp.expect_sym_address pc_exp) with - _ -> None + _ -> debug "PC is sus"; None ) |> Option.map (fun pc -> debug "enpred: Evaluating PC %t" (Pp.top Elf.Address.pp pc); match (min, max, loop) with From 8d417e6f6e3a2d5bcd7f217fd92ec6948fac8ace Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Fri, 17 Jan 2025 19:26:03 +0100 Subject: [PATCH 33/89] Symbolic write --- src/state/base.ml | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/state/base.ml b/src/state/base.ml index be7d6474..480b06d7 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -530,9 +530,16 @@ let write ~provenance (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) (value : Exp. Mem.write ~provenance s.mem ~addr ~size ~exp:value let write_noprov (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) (value : Exp.t) : unit = - if ConcreteEval.is_concrete addr || Vec.length s.mem.frags = 0 then - write ~provenance:Ctype.Main s ~addr ~size value - else Raise.fail "Trying to access %t in state %d: No provenance info" Pp.(tos Exp.pp addr) s.id + let elf_addr = eval_address s addr in + debug "Address: %t" Pp.(top (optional Elf.Address.pp) elf_addr); + match elf_addr with + | Some elf_addr -> + let addr_size = addr |> Typed.get_type |> Typed.expect_bv in + let addr = address_to_exp ~size:addr_size elf_addr in + write ~provenance:Ctype.Main s ~addr ~size value + | None when Vec.length s.mem.frags = 0 -> + write ~provenance:Ctype.Main s ~addr ~size value + | None -> Raise.fail "Trying to access %t in state %d: No provenance info" Pp.(tos Exp.pp addr) s.id let reset_reg (s : t) ?(ctyp : Ctype.t option) (reg : Reg.t) : unit = assert (not @@ is_locked s); From 9cb38f4a94902e472c3004ed2a66d559f7179962 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Mon, 20 Jan 2025 18:43:15 +0000 Subject: [PATCH 34/89] Separate fragments for sections --- src/run/func.ml | 2 +- src/state/base.ml | 71 +++++++++++++++++++++++++++++++++++----------- src/state/base.mli | 3 ++ 3 files changed, 59 insertions(+), 17 deletions(-) diff --git a/src/run/func.ml b/src/run/func.ml index d41d3c03..91167c8b 100644 --- a/src/run/func.ml +++ b/src/run/func.ml @@ -66,7 +66,7 @@ let no_run_prep ~elf:elfname ~name ~entry = let abi = Arch.get_abi api in Trace.Cache.start @@ Arch.get_isla_config (); base "Computing entry state"; - let start = Init.state () |> State.copy ~elf |> abi.init in + let start = Init.state () |> State.copy ~elf |> State.init_sections ~addr_size:Arch.address_size |> abi.init in if entry then base "Entry state:\n%t" (Pp.topi State.pp start); (dwarf, elf, func, start) diff --git a/src/state/base.ml b/src/state/base.ml index 480b06d7..542a3f61 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -216,6 +216,18 @@ end type tval = Tval.t +let section_to_exp ~(size : int) (section : string) = + Typed.extract ~last:(size-1) ~first:0 + (Exp.of_var @@ Var.Section section) + + +let address_to_exp ~(size : int) (addr : Elf.Address.t) = + Typed.( + section_to_exp ~size addr.section + + + bits_int ~size addr.offset + ) + module Mem = struct module Size = Ast.Size @@ -231,20 +243,28 @@ module Mem = struct In general the stack will be the fragment 0 but this is not guaranteed. Some execution contexts may even not have any stacks.*) - type t = { mutable main : Fragment.t; frags : (Exp.t * Fragment.t) Vec.t } + type t = { + mutable main : Fragment.t; + frags : (Exp.t * Fragment.t) Vec.t; + sections : (string, provenance) Hashtbl.t; (* mapping sections to their fragments *) + } (** Get the main fragment of memory *) - let get_main { main; frags = _ } = main + let get_main { main; frags = _; sections = _ } = main (** Empty memory, every address is unbound *) - let empty () = { main = Fragment.empty; frags = Vec.empty () } + let empty () = { main = Fragment.empty; frags = Vec.empty (); sections = Hashtbl.create 10 } (** Build a new memory from the old one by keeping the old one as a base *) let from mem = - { main = Fragment.from mem.main; frags = Vec.map (Pair.map Fun.id Fragment.from) mem.frags } + { + main = Fragment.from mem.main; + frags = Vec.map (Pair.map Fun.id Fragment.from) mem.frags; + sections = Hashtbl.copy mem.sections; + } (** Copy the memory so that it can be mutated separately *) - let copy mem = { main = mem.main; frags = Vec.copy mem.frags } + let copy mem = { main = mem.main; frags = Vec.copy mem.frags; sections = Hashtbl.copy mem.sections } (** Add a new fragment with the specified base *) let new_frag mem base = @@ -312,11 +332,28 @@ module Mem = struct Vec.ppi (fun (base, frag) -> Pp.infix 2 1 colon (Exp.pp base) (Fragment.pp_raw frag)) mem.frags ); + ("sections", hashtbl string Ctype.pp_provenance mem.sections) ] (** Check is this memory is empty which means all addresses are undefined *) let is_empty mem = Fragment.is_empty mem.main && Vec.for_all (Pair.for_all Fun.ctrue Fragment.is_empty) mem.frags + + + let create_section_frag ~addr_size mem section = + match Hashtbl.find_opt mem.sections section with + | Some prov -> + info "Fragment for section %s already exists" section; + prov + | None -> + let base = section_to_exp ~size:addr_size section in + let prov = new_frag mem base in + Hashtbl.replace mem.sections section prov; + prov + + let get_section_provenance mem section = + Hashtbl.find_opt mem.sections section + |> Option.value ~default:Ctype.Main end type t = { @@ -404,6 +441,15 @@ let copy ?elf state = let copy_if_locked ?elf state = if is_locked state then copy ?elf state else state +let init_sections ~addr_size state = + let state = copy_if_locked state in + let _ = Option.( + let+ elf = state.elf in + Elf.SymTable.iter elf.symbols @@ fun sym -> + let _ = Mem.create_section_frag ~addr_size state.mem sym.addr.section in () + ) in + state + let push_assert (s : t) (e : exp) = assert (not @@ is_locked s); s.asserts <- e :: s.asserts @@ -485,15 +531,6 @@ let read ~provenance ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t Option.iter (set_read s (Var.expect_readvar var)) exp; Option.value exp ~default:(Exp.of_var var) -let address_to_exp ~(size : int) (addr : Elf.Address.t) = - let first = 0 in - let last = size - 1 in - Typed.( - extract ~last ~first - (Exp.of_var @@ Var.Section addr.section) - + - bits_int ~size addr.offset - ) let eval_address (s : t) (addr: Exp.t) : Elf.Address.t option = let ctxt0 = function Var.Section _ -> Value.bv @@ BitVec.of_int ~size:64 0 | _ -> raise ConcreteEval.Symbolic in @@ -520,7 +557,8 @@ let read_noprov ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t = | Some elf_addr -> let addr_size = addr |> Typed.get_type |> Typed.expect_bv in let addr = address_to_exp ~size:addr_size elf_addr in - read ~provenance:Ctype.Main ?ctyp s ~addr ~size + let provenance = Mem.get_section_provenance s.mem elf_addr.section in + read ~provenance ?ctyp s ~addr ~size | None when Vec.length s.mem.frags = 0 -> read ~provenance:Ctype.Main ?ctyp s ~addr ~size | None -> Raise.fail "Trying to access %t in state %d: No provenance info" Pp.(tos Exp.pp addr) s.id @@ -536,7 +574,8 @@ let write_noprov (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) (value : Exp.t) : | Some elf_addr -> let addr_size = addr |> Typed.get_type |> Typed.expect_bv in let addr = address_to_exp ~size:addr_size elf_addr in - write ~provenance:Ctype.Main s ~addr ~size value + let provenance = Mem.get_section_provenance s.mem elf_addr.section in + write ~provenance s ~addr ~size value | None when Vec.length s.mem.frags = 0 -> write ~provenance:Ctype.Main s ~addr ~size value | None -> Raise.fail "Trying to access %t in state %d: No provenance info" Pp.(tos Exp.pp addr) s.id diff --git a/src/state/base.mli b/src/state/base.mli index f4945039..5e281f57 100644 --- a/src/state/base.mli +++ b/src/state/base.mli @@ -386,6 +386,9 @@ val copy : ?elf:Elf.File.t -> t -> t The returned state is always unlocked *) val copy_if_locked : ?elf:Elf.File.t -> t -> t +val init_sections : addr_size:int -> t -> t + + (** {1 State convenience manipulation } *) (** Add an assertion to a state *) From 7e10b1d9466a4f98d2f8d9d09a5e3467c7823fa3 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Mon, 20 Jan 2025 22:18:51 +0000 Subject: [PATCH 35/89] Initialise symvbols --- src/state/base.ml | 118 ++++++++++++++++++++++++++++++++++--------- src/state/base.mli | 16 ++++++ src/trace/context.ml | 33 ++---------- 3 files changed, 116 insertions(+), 51 deletions(-) diff --git a/src/state/base.ml b/src/state/base.ml index 542a3f61..80a1c2a8 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -183,6 +183,18 @@ module Exp = struct in let offset = BitVec.to_int conc in Elf.Address.{ section; offset } + + let of_section ~(size : int) (section : string) = + Typed.extract ~last:(size-1) ~first:0 + (of_var @@ Var.Section section) + + + let of_address ~(size : int) (addr : Elf.Address.t) = + Typed.( + of_section ~size addr.section + + + bits_int ~size addr.offset + ) end type exp = Exp.t @@ -216,17 +228,71 @@ end type tval = Tval.t -let section_to_exp ~(size : int) (section : string) = - Typed.extract ~last:(size-1) ~first:0 - (Exp.of_var @@ Var.Section section) - +module Relocation = struct + type t = { + value: Exp.t; + asserts: Exp.t list; + target: Elf.Relocations.target; + } -let address_to_exp ~(size : int) (addr : Elf.Address.t) = - Typed.( - section_to_exp ~size addr.section - + - bits_int ~size addr.offset - ) + let rec exp_of_relocation_exp: Elf.Relocations.exp -> exp = + let f = exp_of_relocation_exp in function + | Section s -> Exp.of_var (Var.Section s) (* TODO size? *) + | Const x -> Typed.bits (BitVec.of_int x ~size:64) (* TODO size? *) + | BinOp (a, Add, b) -> Typed.(f a + f b) + | BinOp (a, Sub, b) -> Typed.(f a - f b) + | BinOp (a, And, b) -> Typed.manyop (AstGen.Ott.Bvmanyarith AstGen.Ott.Bvand) [f a; f b] + | UnOp (Not, b) -> Typed.unop AstGen.Ott.Bvnot (f b) + + let of_elf (relocation: Elf.Relocations.rel) = + let open Elf.Relocations in + let value = exp_of_relocation_exp relocation.value in + let asserts = List.map (function + | Range (min, max) -> + let min = Typed.bits @@ BitVec.of_z ~size:64 @@ Z.of_int64 min in + let max = Typed.bits @@ BitVec.of_z ~size:64 @@ Z.of_int64 max in + let cond1 = Typed.(binop (Bvcomp Bvsle) min value) in + let cond2 = Typed.(binop (Bvcomp Bvslt) value max) in + Typed.(manyop And [cond1; cond2]) + | Alignment b -> + let last = b-1 in + Typed.(extract ~first:0 ~last value = bits_int ~size:b 0) + ) relocation.assertions in + let (last, first) = relocation.mask in + let value = Typed.extract ~first ~last value in + { value; asserts; target = relocation.target } + + module IMap = Map.Make (Int) + + let exp_of_data (data : Elf.Symbol.data) = + let size = 8 * (BytesSeq.length data.data) in + (* Assume little endian here *) + let bv = BytesSeq.getbvle ~size data.data 0 in + let exp = Typed.bits bv in + IMap.fold (fun offset rel (exp, asserts) -> + let relocation = of_elf rel in + let pos = 8 * offset in + let width = match relocation.target with + | AArch64 Abi_aarch64_symbolic_relocation.Data640 -> 64 + | AArch64 Abi_aarch64_symbolic_relocation.Data320 -> 32 + | _ -> Raise.fail "Unsopported relocation" + in + let before = if pos > 0 then + [Typed.extract ~first:0 ~last:(pos-1) exp] + else + [] + in + let after = if pos + width < size then + [Typed.extract ~first:(pos+width) ~last:(size-1) exp] + else + [] + in + ( + Typed.concat (before @ relocation.value :: after), + relocation.asserts @ asserts + ) + ) data.relocations (exp, []) +end module Mem = struct module Size = Ast.Size @@ -346,7 +412,7 @@ module Mem = struct info "Fragment for section %s already exists" section; prov | None -> - let base = section_to_exp ~size:addr_size section in + let base = Exp.of_section ~size:addr_size section in let prov = new_frag mem base in Hashtbl.replace mem.sections section prov; prov @@ -441,15 +507,6 @@ let copy ?elf state = let copy_if_locked ?elf state = if is_locked state then copy ?elf state else state -let init_sections ~addr_size state = - let state = copy_if_locked state in - let _ = Option.( - let+ elf = state.elf in - Elf.SymTable.iter elf.symbols @@ fun sym -> - let _ = Mem.create_section_frag ~addr_size state.mem sym.addr.section in () - ) in - state - let push_assert (s : t) (e : exp) = assert (not @@ is_locked s); s.asserts <- e :: s.asserts @@ -469,6 +526,21 @@ let set_impossible state = assert (not @@ is_locked state); state.asserts <- [Typed.false_] +let init_sections ~addr_size state = + let state = copy_if_locked state in + let _ = Option.( + let+ elf = state.elf in + Elf.SymTable.iter elf.symbols @@ fun sym -> + if sym.typ = Elf.Symbol.OBJECT then + let provenance = Mem.create_section_frag ~addr_size state.mem sym.addr.section in + let addr = Exp.of_address ~size:addr_size sym.addr in + let size = Ast.Size.of_bytes sym.size in + let (exp, asserts) = Relocation.exp_of_data sym.data in + Mem.write ~provenance state.mem ~addr ~size ~exp; + List.iter (push_relocation_assert state) asserts; + ) in + state + let map_mut_exp (f : exp -> exp) s : unit = assert (not @@ is_locked s); Reg.Map.map_mut_current (Tval.map_exp f) s.regs; @@ -542,7 +614,7 @@ let eval_address (s : t) (addr: Exp.t) : Elf.Address.t option = let size = addr |> Typed.get_type |> Typed.expect_bv in sections |> Hashtbl.to_seq_keys |> Seq.find_map (fun section -> let address = Elf.Address.{ section; offset } in - let expression = address_to_exp ~size address in + let expression = Exp.of_address ~size address in if Z3St.check_full ~hyps Typed.(expression = addr) = Some true then Some address else @@ -556,7 +628,7 @@ let read_noprov ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t = match elf_addr with | Some elf_addr -> let addr_size = addr |> Typed.get_type |> Typed.expect_bv in - let addr = address_to_exp ~size:addr_size elf_addr in + let addr = Exp.of_address ~size:addr_size elf_addr in let provenance = Mem.get_section_provenance s.mem elf_addr.section in read ~provenance ?ctyp s ~addr ~size | None when Vec.length s.mem.frags = 0 -> @@ -573,7 +645,7 @@ let write_noprov (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) (value : Exp.t) : match elf_addr with | Some elf_addr -> let addr_size = addr |> Typed.get_type |> Typed.expect_bv in - let addr = address_to_exp ~size:addr_size elf_addr in + let addr = Exp.of_address ~size:addr_size elf_addr in let provenance = Mem.get_section_provenance s.mem elf_addr.section in write ~provenance s ~addr ~size value | None when Vec.length s.mem.frags = 0 -> diff --git a/src/state/base.mli b/src/state/base.mli index 5e281f57..665b61a9 100644 --- a/src/state/base.mli +++ b/src/state/base.mli @@ -160,6 +160,10 @@ module Exp : sig val of_reg : id -> Reg.t -> t val expect_sym_address : t -> Elf.Address.t + + val of_section : size:int -> string -> t + + val of_address : size:int -> Elf.Address.t -> t end type exp = Exp.t @@ -193,6 +197,18 @@ end type tval = Tval.t +module Relocation : sig + type t = { + value: Exp.t; + asserts: Exp.t list; + target: Elf.Relocations.target; + } + + val of_elf : Elf.Relocations.rel -> t + + val exp_of_data : Elf.Symbol.data -> (exp * exp list) +end + (** {1 State memory management } *) (** This module manages the memory part of the state. diff --git a/src/trace/context.ml b/src/trace/context.ml index 1734f041..66a045ee 100644 --- a/src/trace/context.ml +++ b/src/trace/context.ml @@ -63,16 +63,6 @@ type t = { dwarf : Dw.t option; (** Optionally DWARF information. If present, typing is enabled *) } -let rec exp_of_relocation_exp: Elf.Relocations.exp -> State.exp = - let f = exp_of_relocation_exp in function - | Section s -> State.Exp.of_var (State.Var.Section s) (* TODO put the actual value there, size? *) - | Const x -> Exp.Typed.bits (BitVec.of_int x ~size:64) (* TODO size? *) - | BinOp (a, Add, b) -> Exp.Typed.(f a + f b) - | BinOp (a, Sub, b) -> Exp.Typed.(f a - f b) - | BinOp (a, And, b) -> Exp.Typed.manyop (AstGen.Ott.Bvmanyarith AstGen.Ott.Bvand) [f a; f b] - | UnOp (Not, b) -> Exp.Typed.unop AstGen.Ott.Bvnot (f b) - (* | Mask (x, last, first) -> Exp.Typed.extract ~last ~first (f x) *) - (** Build a {!context} from a state *) let make_context ?dwarf ?relocation state = let reg_writes = Vec.empty () in @@ -80,26 +70,13 @@ let make_context ?dwarf ?relocation state = let segments = relocation |> Option.map (fun relocation -> - let open Elf.Relocations in - let value = exp_of_relocation_exp relocation.value in - List.iter (function - | Range (min, max) -> - let min = Exp.Typed.bits @@ BitVec.of_z ~size:64 @@ Z.of_int64 min in - let max = Exp.Typed.bits @@ BitVec.of_z ~size:64 @@ Z.of_int64 max in - let cond1 = Exp.Typed.(binop (Bvcomp Bvsle) min value) in - let cond2 = Exp.Typed.(binop (Bvcomp Bvslt) value max) in - State.push_relocation_assert state Exp.Typed.(manyop And [cond1; cond2]) - | Alignment b -> - let last = b-1 in - State.push_relocation_assert state Exp.Typed.(extract ~first:0 ~last value = bits_int ~size:b 0) - ) relocation.assertions; - let (last, first) = relocation.mask in - let masked = Exp.Typed.extract ~first ~last value in - - relocation.target + let State.Relocation.{value;asserts;target} = State.Relocation.of_elf relocation in + List.iter (State.push_relocation_assert state) asserts; + + target |> Isla.Relocation.segments_of_reloc |> SMap.of_list - |> SMap.map (fun (first, last) -> Exp.Typed.extract ~first ~last masked) + |> SMap.map (fun (first, last) -> Exp.Typed.extract ~first ~last value) ) |> Option.value ~default:SMap.empty in From 9d5173e6d4a5e6f60ba8ef9ec624d744b26cf50f Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Mon, 20 Jan 2025 22:46:43 +0000 Subject: [PATCH 36/89] Fix loading relocations --- src/elf/linksemRelocatable.ml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/elf/linksemRelocatable.ml b/src/elf/linksemRelocatable.ml index 2123d8f3..d822c2fa 100644 --- a/src/elf/linksemRelocatable.ml +++ b/src/elf/linksemRelocatable.ml @@ -51,7 +51,20 @@ let get_elf64_file_global_symbol_init (f: Elf_file.elf64_file) : global_symbol_i else Byte_sequence.offset_and_cut addr_offset size section.elf64_section_body in - Error.bind (get_relocs section.elf64_section_name_as_string) @@ fun relocs -> + Error.bind (get_relocs section.elf64_section_name_as_string) @@ fun (AArch64 relocs) -> + let relocs = relocs + |> Pmap.bindings_list + |> List.fold_left (fun m (pos, r) -> + let sz = size in + let open Z in + let open Compare in + if pos >= addr_offset && pos < addr_offset + sz then + Pmap.add (pos - addr_offset) r m + else + m + ) (Pmap.empty Z.compare) + |> fun x -> AArch64 x + in Error.bind data @@ fun data -> Error.bind (String_table.get_string_at name strtab) @@ fun str -> let write = Elf_file.flag_is_set Elf_section_header_table.shf_write section.elf64_section_flags in From 04e3bc6765ca5571778ac2e6b71aff6bca4fc025 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Tue, 21 Jan 2025 11:35:25 +0000 Subject: [PATCH 37/89] Handle jumps --- src/state/base.ml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/state/base.ml b/src/state/base.ml index 80a1c2a8..b6918d03 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -606,7 +606,13 @@ let read ~provenance ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t let eval_address (s : t) (addr: Exp.t) : Elf.Address.t option = let ctxt0 = function Var.Section _ -> Value.bv @@ BitVec.of_int ~size:64 0 | _ -> raise ConcreteEval.Symbolic in - let offset = addr |> ConcreteEval.eval ~ctxt:ctxt0 |> Value.expect_bv |> BitVec.to_int in + let open Option in + let* offset_exp = try + Some (ConcreteEval.eval ~ctxt:ctxt0 addr) + with + ConcreteEval.Symbolic -> None + in + let offset = offset_exp |> Value.expect_bv |> BitVec.to_int in let sections = Hashtbl.create 10 in Ast.Manip.exp_iter_var (function Var.Section s -> Hashtbl.add sections s () | _ -> ()) addr; @@ -679,9 +685,7 @@ let set_pc ~(pc : Reg.t) (s : t) (pcval : int) = let ctyp = Ctype.of_frag (Ctype.Global ".text") ~offset:pcval ~constexpr:true in set_reg s pc @@ Tval.make ~ctyp exp -(* TODO *) let set_pc_sym ~(pc : Reg.t) (s : t) (pcval : Elf.Address.t) = - (* set_pc ~pc s pcval.offset *) let exp = Typed.(var ~typ:(Ty_BitVec 64) (Var.Section pcval.section) + bits_int ~size:64 pcval.offset) in let ctyp = Ctype.of_frag (Ctype.Global ".text") ~offset:pcval.offset ~constexpr:true in set_reg s pc @@ Tval.make ~ctyp exp @@ -693,11 +697,8 @@ let bump_pc ~(pc : Reg.t) (s : t) (bump : int) = let new_pc = Elf.Address.(old_pc + bump) in set_pc_sym ~pc s new_pc -(* TODO section + offset *) let concretize_pc ~(pc : Reg.t) (s : t) = - let pc_exp = get_reg_exp s pc in - try ConcreteEval.eval pc_exp |> Value.expect_bv |> BitVec.to_int |> set_pc ~pc s - with ConcreteEval.Symbolic -> () + pc |> get_reg_exp s |> eval_address s |> Option.iter (set_pc_sym ~pc s) let set_last_pc state pc = assert (not @@ is_locked state); From 646f6a50f5021a4fb2efcf34353debcfb74dc808 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sun, 26 Jan 2025 18:24:48 +0000 Subject: [PATCH 38/89] Convert dwarf from linksem --- src/ctype/ctype.ml | 34 +++++++++++++++++----------------- src/dw/addr.ml | 6 ++++++ src/dw/func.ml | 2 +- src/dw/loc.ml | 25 +++++++++++++------------ src/dw/var.ml | 15 +++++++++------ src/utils/sym.ml | 3 +++ 6 files changed, 49 insertions(+), 36 deletions(-) create mode 100644 src/dw/addr.ml create mode 100644 src/utils/sym.ml diff --git a/src/ctype/ctype.ml b/src/ctype/ctype.ml index fab17f6b..1c876f37 100644 --- a/src/ctype/ctype.ml +++ b/src/ctype/ctype.ml @@ -81,17 +81,17 @@ end) (*****************************************************************************) (** {1 DWARF constants } *) -let vDW_ATE_address = "DW_ATE_address" |> Dwarf.base_type_attribute_encode |> Z.to_int +let vDW_ATE_address = "DW_ATE_address" |> Dwarf.base_type_attribute_encode |> Sym.to_int -let vDW_ATE_boolean = "DW_ATE_boolean" |> Dwarf.base_type_attribute_encode |> Z.to_int +let vDW_ATE_boolean = "DW_ATE_boolean" |> Dwarf.base_type_attribute_encode |> Sym.to_int -let vDW_ATE_signed = "DW_ATE_signed" |> Dwarf.base_type_attribute_encode |> Z.to_int +let vDW_ATE_signed = "DW_ATE_signed" |> Dwarf.base_type_attribute_encode |> Sym.to_int -let vDW_ATE_signed_char = "DW_ATE_signed_char" |> Dwarf.base_type_attribute_encode |> Z.to_int +let vDW_ATE_signed_char = "DW_ATE_signed_char" |> Dwarf.base_type_attribute_encode |> Sym.to_int -let vDW_ATE_unsigned = "DW_ATE_unsigned" |> Dwarf.base_type_attribute_encode |> Z.to_int +let vDW_ATE_unsigned = "DW_ATE_unsigned" |> Dwarf.base_type_attribute_encode |> Sym.to_int -let vDW_ATE_unsigned_char = "DW_ATE_unsigned_char" |> Dwarf.base_type_attribute_encode |> Z.to_int +let vDW_ATE_unsigned_char = "DW_ATE_unsigned_char" |> Dwarf.base_type_attribute_encode |> Sym.to_int (*****************************************************************************) (*****************************************************************************) @@ -372,7 +372,7 @@ type conversion_context = { env : env; potential_link_name : string option } (** Get the id of a linksem [cupdie] *) let ids_of_cupdie ((cu, _, die) : Dwarf.cupdie) : cupdie_id = - (Z.to_int cu.cu_header.cuh_offset, Z.to_int die.die_offset) + (Sym.to_int cu.cu_header.cuh_offset, Sym.to_int die.die_offset) (** Pretty print the dwarf decl type @@ -380,7 +380,7 @@ let ids_of_cupdie ((cu, _, die) : Dwarf.cupdie) : cupdie_id = let pp_decl (d : Dwarf.decl) = Pp.dprintf "File %s, line %d" (Option.value d.decl_file ~default:"?") - (d.decl_line |> Option.map Z.to_int |> Option.value ~default:0) + (d.decl_line |> Option.map Sym.to_int |> Option.value ~default:0) (** This exception is raised when the type we are trying to reach must came from another translation unit or later in the current one. @@ -402,14 +402,14 @@ let expect_some_link = Option.value_fun ~default:(fun _ -> raise LinkError) Only integers, chars and bools supported. No floating points *) let base_type_of_linksem ?size ~encoding name = - let encoding = Z.to_int encoding in + let encoding = Sym.to_int encoding in if encoding = vDW_ATE_boolean then Cbool else if encoding = vDW_ATE_signed || encoding = vDW_ATE_unsigned then let size = Option.value_fail size "In Ctype.base_type_of_linksem: integer type %s do not have a size" name in - Cint { name; signed = encoding = vDW_ATE_signed; size = Z.to_int size; ischar = false } + Cint { name; signed = encoding = vDW_ATE_signed; size = Sym.to_int size; ischar = false } else if encoding = vDW_ATE_signed_char || encoding = vDW_ATE_unsigned_char then Cint { name; signed = encoding = vDW_ATE_signed_char; size = 1; ischar = true } else Raise.fail "In Ctype.base_of_linksem: encoding %x unknown" encoding @@ -428,7 +428,7 @@ let rec field_of_linksem ~cc ((_, fname, ltyp, offseto) : linksem_field) : field debug "Processed field %t" Pp.(top (opt string) fname); let offset = match offseto with - | Some offset -> Z.to_int offset + | Some offset -> Sym.to_int offset (* assume missing offsets are zero - perhaps should only occur for union members*) | None -> 0 in @@ -469,7 +469,7 @@ and[@warning "-16"] struct_type_of_linksem ?(force_complete = false) ~cc ~cupdie match Hashtbl.find cc.env.lenv (ids_of_cupdie cupdie) with | CT (CT_struct_union (_, Atk_structure, _, msize, _, Some members)) -> let lsize = expect_some_link msize in - let size = Z.to_int lsize in + let size = Sym.to_int lsize in let cc = { cc with potential_link_name = Some name } in let struc : struc = struc_of_linksem ~cc name size members in IdMap.seti cc.env.structs id struc; @@ -485,7 +485,7 @@ and[@warning "-16"] struct_type_of_linksem ?(force_complete = false) ~cc ~cupdie | CT (CT_struct_union (_, Atk_structure, _, msize, _, Some members)) -> let size = match msize with - | Some x -> Z.to_int x + | Some x -> Sym.to_int x | None -> warn "Struct %s doesn't have size" name; 0 @@ -503,7 +503,7 @@ and[@warning "-16"] struct_type_of_linksem ?(force_complete = false) ~cc ~cupdie and enum_of_linksem ~cc:_ name llabels : enum = let labels = Hashtbl.create 5 in - List.iter (fun (_, name, value) -> Hashtbl.add labels (Z.to_int value) name) llabels; + List.iter (fun (_, name, value) -> Hashtbl.add labels (Sym.to_int value) name) llabels; { name; labels } and enum_type_of_linksem ~cc ~cupdie ~mname ~decl : unqualified = @@ -529,13 +529,13 @@ and unqualified_of_linksem ?(force_complete = false) ~cc : linksem_t -> unqualif | CT (CT_pointer (_, Some t)) -> ptr @@ of_linksem_cc ~cc t | CT (CT_pointer (_, None)) -> voidstar | CT (CT_array (_, elem, l)) -> - Array { elem = of_linksem_cc ~cc elem; dims = List.map Fun.(fst %> Option.map Z.to_int) l } + Array { elem = of_linksem_cc ~cc elem; dims = List.map Fun.(fst %> Option.map Sym.to_int) l } | CT (CT_struct_union (cupdie, Atk_structure, mname, _, decl, _)) -> struct_type_of_linksem ~force_complete ~cc ~cupdie ~mname ~decl | CT (CT_struct_union (_, Atk_union, _, size, decl, _)) -> let size = match size with - | Some s -> Z.to_int s + | Some s -> Sym.to_int s | None -> warn "%t: Sizeless union defaulting to 8 for now" Pp.(top pp_decl decl); 8 @@ -619,7 +619,7 @@ let env_of_linksem (lenv : linksem_env) : env = Option.( let+! name = mname and+ size = msize in if not @@ IdMap.mem env.structs name then - IdMap.add env.structs name @@ incomplete_struct name (Z.to_int size) |> ignore) + IdMap.add env.structs name @@ incomplete_struct name (Sym.to_int size) |> ignore) | _ -> ()) lenv; (* Third phase: Add all the type to the result environement *) diff --git a/src/dw/addr.ml b/src/dw/addr.ml new file mode 100644 index 00000000..0604a6f7 --- /dev/null +++ b/src/dw/addr.ml @@ -0,0 +1,6 @@ +include Elf.Address + +let of_sym : Sym.t -> t = function +| Dwarf.Offset (section, offset) -> { section; offset = Z.to_int offset } +| _ -> Raise.fail "expected section+offset" + diff --git a/src/dw/func.ml b/src/dw/func.ml index 18fdd676..a4d5894f 100644 --- a/src/dw/func.ml +++ b/src/dw/func.ml @@ -140,7 +140,7 @@ let of_linksem (elf : Elf.File.t) (tenv : Ctype.env) (lfun : linksem_t) = | None -> ( match lfun.ss_entry_address with | Some a -> ( - let addr = Elf.Address.{section = ".text"; offset = Nat_big_num.to_int a} in (* TODO this is wrong, need symbolic DWARF *) + let addr = Addr.of_sym a in match Elf.SymTable.of_addr_opt elf.symbols addr with | Some sym -> Some sym | None -> None diff --git a/src/dw/loc.ml b/src/dw/loc.ml index e9aacff2..5d03061a 100644 --- a/src/dw/loc.ml +++ b/src/dw/loc.ml @@ -83,44 +83,45 @@ type linksem_t = dwop list let vDW_OP_addr : int = 0x03 (** The integer value of the DW_OP_reg0 constant in DWARF standard *) -let vDW_OP_reg0 : int = Z.to_int Dwarf.vDW_OP_reg0 +let vDW_OP_reg0 : int = Sym.to_int Dwarf.vDW_OP_reg0 (** The integer value of the DW_OP_breg0 constant in DWARF standard *) -let vDW_OP_breg0 : int = Z.to_int Dwarf.vDW_OP_breg0 +let vDW_OP_breg0 : int = Sym.to_int Dwarf.vDW_OP_breg0 (** Convert a linksem location description into a {!Loc.t} Very naive for now : If the list has a single element that we can translate directly, we do. Otherwise, we dump it into the {!t.Dwarf} constructor *) let of_linksem ?(amap = Arch.dwarf_reg_map ()) (elf : Elf.File.t) : linksem_t -> t = - let int_of_oav : Dwarf.operation_argument_value -> int = function - | OAV_natural n -> Z.to_int n - | OAV_integer i -> Z.to_int i - | _ -> failwith "Expected integer argument" + let sym_of_oav : Dwarf.operation_argument_value -> Sym.t = function + | OAV_natural n -> n + | OAV_integer i -> i + | _ -> failwith "Expected integer argument" in + let int_of_oav oav = oav |> sym_of_oav |> Sym.to_int in function (* Register *) | [({ op_semantics = OpSem_reg; _ } as op)] -> - let reg_num = Z.to_int op.op_code - vDW_OP_reg0 in + let reg_num = Sym.to_int op.op_code - vDW_OP_reg0 in if reg_num >= Array.length amap then failwith (Printf.sprintf "Loc.of_linksem: register number %d unknown, code %x, name %s" reg_num - (Z.to_int op.op_code) op.op_string) + (Sym.to_int op.op_code) op.op_string) else Register amap.(reg_num) (* RegisterOffset *) | [({ op_semantics = OpSem_breg; op_argument_values = [arg]; _ } as op)] -> - let reg_num = Z.to_int op.op_code - vDW_OP_breg0 in + let reg_num = Sym.to_int op.op_code - vDW_OP_breg0 in if reg_num >= Array.length amap then failwith (Printf.sprintf "Loc.of_linksem: register number %d unknown, code %x, name %s" reg_num - (Z.to_int op.op_code) op.op_string) + (Sym.to_int op.op_code) op.op_string) else RegisterOffset (amap.(reg_num), int_of_oav arg) (* StackFrame *) | [{ op_semantics = OpSem_fbreg; op_argument_values = [arg]; _ }] -> StackFrame (int_of_oav arg) (* Global *) | [{ op_semantics = OpSem_lit; op_code = code; op_argument_values = [arg]; _ }] as ops - when Z.to_int code = vDW_OP_addr -> ( - let addr = Elf.Address.{ section = ".data"; offset = int_of_oav arg } in (* TODO this is wrong, need symbolic DWARF*) + when Sym.to_int code = vDW_OP_addr -> ( + let addr = Addr.of_sym @@ sym_of_oav arg in try Global (Elf.SymTable.of_addr_with_offset elf.symbols @@ addr) with Not_found -> warn "Symbol at 0x%x not found in Loc.of_linksem" (int_of_oav arg); diff --git a/src/dw/var.ml b/src/dw/var.ml index 0e6d86d5..e453d61b 100644 --- a/src/dw/var.ml +++ b/src/dw/var.ml @@ -45,21 +45,24 @@ (** This module contain all the definition to handle local and global variables as defined in the DWARF information of the target file *) +type range = Addr.t * Addr.t option + (** Type of a DWARF variable *) -type t = { name : string; param : bool; ctype : Ctype.t; locs : ((int * int) * Loc.t) list } +type t = { name : string; param : bool; ctype : Ctype.t; locs : (range * Loc.t) list } (** Type of a DWARF variable in linksem *) type linksem_t = Dwarf.sdt_variable_or_formal_parameter (** Merge contiguous location lists *) let rec loc_merge = function - | ((a1, b1), d1) :: ((a2, b2), d2) :: l when b1 = a2 && Loc.compare d1 d2 = 0 -> + | ((a1, b1), d1) :: ((a2, b2), d2) :: l when b1 = Some a2 && Loc.compare d1 d2 = 0 -> loc_merge (((a1, b2), d1) :: l) | a :: l -> a :: loc_merge l | [] -> [] -(** Convert from Z.t to int, if there is an overflow, returns Int.max_int instead of throwing *) -let clamp_z z = try Z.to_int z with Z.Overflow when Z.compare z Z.zero > 0 -> Int.max_int +let end_addr_of_sym = function +| Dwarf.Absolute z when Z.compare z (Z.of_int Int.max_int) > 0 -> None +| x -> Some (Addr.of_sym x) (** Create a DWARF variable from its linksem counterpart *) let of_linksem (elf : Elf.File.t) (env : Ctype.env) (lvar : linksem_t) : t = @@ -72,7 +75,7 @@ let of_linksem (elf : Elf.File.t) (env : Ctype.env) (lvar : linksem_t) : t = in let locs = lvar.svfp_locations |> Option.value ~default:[] - |> List.map (fun (a, b, l) -> ((Z.to_int a, clamp_z b), Loc.of_linksem elf l)) + |> List.map (fun (a, b, l) -> ((Addr.of_sym a, end_addr_of_sym b), Loc.of_linksem elf l)) |> loc_merge in { name; param; ctype; locs } @@ -85,5 +88,5 @@ let pp_raw v = [ ("name", string v.name); ("ctype", Ctype.pp v.ctype); - ("locs", list (pair (pair ptr ptr) Loc.pp) v.locs); + ("locs", list (pair (pair Addr.pp (opt Addr.pp)) Loc.pp) v.locs); ]) diff --git a/src/utils/sym.ml b/src/utils/sym.ml new file mode 100644 index 00000000..bb2ecd56 --- /dev/null +++ b/src/utils/sym.ml @@ -0,0 +1,3 @@ +type t = Z.t Dwarf.sym0 + +let to_int x = Z.to_int @@ Dwarf.sym_unwrap x "to_int" \ No newline at end of file From 6f086f2896b91da265891ddd60d36eb3d85c5ef3 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sun, 26 Jan 2025 22:39:13 +0000 Subject: [PATCH 39/89] [wip] symbolic analyse --- src/analyse/CallGraph.ml | 8 ++-- src/analyse/ControlFlow.ml | 80 ++++++++++++++++---------------- src/analyse/ControlFlowPpDot.ml | 8 ++-- src/analyse/DwarfFrameInfo.ml | 6 +-- src/analyse/DwarfInliningInfo.ml | 6 +-- src/analyse/DwarfLineInfo.ml | 64 ++++++++++++------------- src/analyse/DwarfVarInfo.ml | 38 +++++++-------- src/analyse/Elf.ml | 4 +- src/analyse/ElfSymbols.ml | 4 +- src/analyse/Pp.ml | 10 ++-- src/analyse/QemuLog.ml | 4 +- src/analyse/Utils.ml | 9 +++- src/utils/sym.ml | 17 ++++++- 13 files changed, 139 insertions(+), 119 deletions(-) diff --git a/src/analyse/CallGraph.ml b/src/analyse/CallGraph.ml index e90602e1..026db808 100644 --- a/src/analyse/CallGraph.ml +++ b/src/analyse/CallGraph.ml @@ -60,7 +60,7 @@ type call_graph_node = addr * index * string list let mk_call_graph test (an : CollectedType.analysis) = let mask_addr x:natural = if !Globals.morello - then Nat_big_num.shift_left (Nat_big_num.shift_right x 1) 1 + then Sym.shift_left (Sym.shift_right x 1) 1 else x in (* take the nodes to be all the elf symbol addresses of stt_func symbol type (each with their list of elf symbol names) together @@ -110,7 +110,7 @@ let mk_call_graph test (an : CollectedType.analysis) = if not (List.exists - (function (a'', _) -> Nat_big_num.equal a' a'') + (function (a'', _) -> Sym.equal a' a'') elf_symbols) then Some (a', ["FROM BL:" ^ s']) else None) @@ -122,7 +122,7 @@ let mk_call_graph test (an : CollectedType.analysis) = match axs with | [] -> acc | (a, x) :: axs' -> - if not (List.exists (function (a', _) -> Nat_big_num.equal a a') acc) then + if not (List.exists (function (a', _) -> Sym.equal a a') acc) then dedup axs' ((a, x) :: acc) else dedup axs' acc in @@ -133,7 +133,7 @@ let mk_call_graph test (an : CollectedType.analysis) = List.sort (function | (a, _) -> ( - function (a', _) -> Nat_big_num.compare a a' + function (a', _) -> Sym.compare a a' )) (elf_symbols @ extra_bl_targets) in diff --git a/src/analyse/ControlFlow.ml b/src/analyse/ControlFlow.ml index 9d9628c0..89bc11f6 100644 --- a/src/analyse/ControlFlow.ml +++ b/src/analyse/ControlFlow.ml @@ -119,11 +119,11 @@ let branch_table_target_addresses test filename_branch_table_option : (addr * ad with | (a_br, a_table, n, shift, a_offset) -> Some - ( Nat_big_num.of_int a_br, - ( Nat_big_num.of_int a_table, - Nat_big_num.of_int n, + ( Sym.of_int a_br, + ( Sym.of_int a_table, + Sym.of_int n, shift, - Nat_big_num.of_int a_offset ) ) + Sym.of_int a_offset ) ) | exception _ -> fatal "couldn't parse branch table data file line: \"%s\"\n" s in List.filter_map parse_line (List.tl (Array.to_list lines)) @@ -131,7 +131,7 @@ let branch_table_target_addresses test filename_branch_table_option : (addr * ad in (* pull out .rodata section from ELF *) - let ((_, rodata_addr, bs) as _rodata : Dwarf.p_context * Nat_big_num.num * BytesSeq.t) = + let ((_, rodata_addr, bs) as _rodata : Dwarf.p_context * Sym.t * BytesSeq.t) = Dwarf.extract_section_body_without_relocations test.elf_file ".rodata" false in (* chop into bytes *) @@ -143,37 +143,37 @@ let branch_table_target_addresses test filename_branch_table_option : (addr * ad let read_rodata_b addr = Elf_types_native_uint.natural_of_byte - rodata_bytes.(Nat_big_num.to_int (Nat_big_num.sub addr rodata_addr)) + rodata_bytes.(Sym.to_int (Sym.sub addr rodata_addr)) in let read_rodata_h addr = - Nat_big_num.add (read_rodata_b addr) - (Nat_big_num.mul (Nat_big_num.of_int 256) - (read_rodata_b (Nat_big_num.add addr (Nat_big_num.of_int 1)))) + Sym.add (read_rodata_b addr) + (Sym.mul (Sym.of_int 256) + (read_rodata_b (Sym.add addr (Sym.of_int 1)))) in let sign_extend_W n = - let half = Nat_big_num.mul (Nat_big_num.of_int 65536) (Nat_big_num.of_int 32768) in - let whole = Nat_big_num.mul half (Nat_big_num.of_int 2) in - if Nat_big_num.greater_equal n half then Nat_big_num.sub n whole else n + let half = Sym.mul (Sym.of_int 65536) (Sym.of_int 32768) in + let whole = Sym.mul half (Sym.of_int 2) in + if Sym.greater_equal n half then Sym.sub n whole else n in let read_rodata_W addr = sign_extend_W - (Nat_big_num.add (read_rodata_b addr) - (Nat_big_num.add - (Nat_big_num.mul (Nat_big_num.of_int 256) - (read_rodata_b (Nat_big_num.add addr (Nat_big_num.of_int 1)))) - (Nat_big_num.add - (Nat_big_num.mul (Nat_big_num.of_int 65536) - (read_rodata_b (Nat_big_num.add addr (Nat_big_num.of_int 2)))) - (Nat_big_num.mul (Nat_big_num.of_int 16777216) - (read_rodata_b (Nat_big_num.add addr (Nat_big_num.of_int 3))))))) + (Sym.add (read_rodata_b addr) + (Sym.add + (Sym.mul (Sym.of_int 256) + (read_rodata_b (Sym.add addr (Sym.of_int 1)))) + (Sym.add + (Sym.mul (Sym.of_int 65536) + (read_rodata_b (Sym.add addr (Sym.of_int 2)))) + (Sym.mul (Sym.of_int 16777216) + (read_rodata_b (Sym.add addr (Sym.of_int 3))))))) in let rec natural_assoc_opt n nys = match nys with | [] -> None - | (n', y) :: nys' -> if Nat_big_num.equal n n' then Some y else natural_assoc_opt n nys' + | (n', y) :: nys' -> if Sym.equal n n' then Some y else natural_assoc_opt n nys' in (* this is the evaluator for a little stack-machine language used in the hafnium.branch-table files to describe the access pattern for each branch table *) @@ -187,8 +187,8 @@ let branch_table_target_addresses test filename_branch_table_option : (addr * ad h read two bytes from the branch table W read four byte from the branch table and sign-extend *) - let rec eval_shift_expression (shift : string) (a_table : Nat_big_num.num) - (a_offset : Nat_big_num.num) (i : Nat_big_num.num) (stack : Nat_big_num.num list) (pc : int) + let rec eval_shift_expression (shift : string) (a_table : Sym.t) + (a_offset : Sym.t) (i : Sym.t) (stack : Sym.t list) (pc : int) = if pc = String.length shift then match stack with @@ -205,8 +205,8 @@ let branch_table_target_addresses test filename_branch_table_option : (addr * ad match stack with | a :: stack' -> let a' = - Nat_big_num.mul a - (Nat_big_num.pow_int_positive 2 (Char.code command - Char.code '0')) + Sym.mul a + (Sym.pow_int_positive 2 (Char.code command - Char.code '0')) in eval_shift_expression shift a_table a_offset i (a' :: stack') (pc + 1) | _ -> fatal "eval_shift_expression shift empty stack" @@ -222,7 +222,7 @@ let branch_table_target_addresses test filename_branch_table_option : (addr * ad (* plus *) match stack with | a1 :: a2 :: stack' -> - let a' = Nat_big_num.add a1 a2 in + let a' = Sym.add a1 a2 in eval_shift_expression shift a_table a_offset i (a' :: stack') (pc + 1) | _ -> fatal "eval_shift_expression plus emptyish stack" else if command = 'b' then @@ -254,24 +254,24 @@ let branch_table_target_addresses test filename_branch_table_option : (addr * ad (function | (a_br, (a_table, size, shift, a_offset)) -> let rec f i = - if i > Nat_big_num.to_int size then [] + if i > Sym.to_int size then [] else let a_target = if shift = "2" then - let table_entry_addr = Nat_big_num.add a_table (Nat_big_num.of_int (4 * i)) in + let table_entry_addr = Sym.add a_table (Sym.of_int (4 * i)) in match natural_assoc_opt table_entry_addr rodata_words with | None -> fatal "no branch table entry for address %s\n" (pp_addr table_entry_addr) | Some table_entry -> let a_target = - Nat_big_num.modulus - (Nat_big_num.add a_table table_entry) - (Nat_big_num.pow_int_positive 2 32) + Sym.modulus + (Sym.add a_table table_entry) + (Sym.pow_int_positive 2 32) in (* that 32 is good for the sign-extended negative 32-bit offsets we see in the old hafnium-playground-src branch tables *) a_target - else eval_shift_expression shift a_table a_offset (Nat_big_num.of_int i) [] 0 + else eval_shift_expression shift a_table a_offset (Sym.of_int i) [] 0 in a_target :: f (i + 1) in @@ -289,10 +289,10 @@ let branch_table_target_addresses test filename_branch_table_option : (addr * ad let parse_addr (s : string) : natural = try - Scanf.sscanf s "0x%Lx" (fun i64 -> Nat_big_num.of_int64 i64) + Scanf.sscanf s "0x%Lx" (fun i64 -> Sym.of_int64 i64) with Scanf.Scan_failure _ -> - Scanf.sscanf s "%Lx" (fun i64 -> Nat_big_num.of_int64 i64) + Scanf.sscanf s "%Lx" (fun i64 -> Sym.of_int64 i64) let parse_target s = match Scanf.sscanf s " %s %s" (fun s1 s2 -> (s1, s2)) with @@ -360,7 +360,7 @@ let parse_control_flow_instruction s mnemonic s' : control_flow_insn = let targets_of_control_flow_insn_without_index branch_table_targets (addr : natural) (opcode_bytes : int list) (c : control_flow_insn) : (target_kind * addr * string) list = - let succ_addr = Nat_big_num.add addr (Nat_big_num.of_int (List.length opcode_bytes)) in + let succ_addr = Sym.add addr (Sym.of_int (List.length opcode_bytes)) in let targets = match c with | C_no_instruction -> [] @@ -465,7 +465,7 @@ let parse_objdump_line (s : string) : objdump_instruction option = if Str.string_match objdump_line_regexp s 0 then begin let addr_int64 = parse_hex_int64 (Str.matched_group 1 s) in - let addr = Nat_big_num.of_int64 addr_int64 in + let addr = Sym.of_int64 addr_int64 in let op = Str.matched_group 2 s in let op = strip_whitespace op in let opcode_byte_strings = @@ -497,7 +497,7 @@ let rec parse_objdump_lines arch lines (next_index : int) (last_address : natura match last_address with | None -> i :: parse_objdump_lines arch lines (next_index + 1) (Some addr) | Some last_address' -> - let last_address'' = Nat_big_num.add last_address' (Nat_big_num.of_int 4) in + let last_address'' = Sym.add last_address' (Sym.of_int 4) in if addr > last_address'' then (* fake up "missing" instructions for any gaps in the address space*) (*warn "gap in objdump instruction address sequence at %s" (pp_addr last_address'');*) @@ -602,11 +602,11 @@ let highlight c = (* highlight branch targets to earlier addresses*) let pp_target_addr_wrt (addr : natural) (c : control_flow_insn) (a : natural) = - (if highlight c && Nat_big_num.less a addr then "^" else "") ^ pp_addr a + (if highlight c && Sym.less a addr then "^" else "") ^ pp_addr a (* highlight branch come-froms from later addresses*) let pp_come_from_addr_wrt (addr : natural) (c : control_flow_insn) (a : natural) = - (if highlight c && Nat_big_num.greater a addr then "v" else "") ^ pp_addr a + (if highlight c && Sym.greater a addr then "v" else "") ^ pp_addr a (* let pp_branch_targets (xs : (addr * control_flow_insn * (target_kind * addr * int * string) list) list) diff --git a/src/analyse/ControlFlowPpDot.ml b/src/analyse/ControlFlowPpDot.ml index fc3f90ac..77375477 100644 --- a/src/analyse/ControlFlowPpDot.ml +++ b/src/analyse/ControlFlowPpDot.ml @@ -204,7 +204,7 @@ let inlining_stack_at_index (an : analysis) k = (function | sss' -> String.concat "\n---\n" - (List.map (Dwarf.pp_sdt_subroutine (Nat_big_num.of_int 0)) sss')) + (List.map (Dwarf.pp_sdt_subroutine (Sym.of_int 0)) sss')) maximal)); [] @@ -793,7 +793,7 @@ let mk_cfg test an visitedo node_name_prefix (recurse_flat : bool) (_inline_all let ((_comp_dir, _dir, _file) as ufe) = Dwarf.unpack_file_entry lnh lnr.lnr_file in - (ufe, Nat_big_num.to_int lnr.lnr_line)) + (ufe, Sym.to_int lnr.lnr_line)) an.line_info.(k)) in @@ -859,12 +859,12 @@ let mk_cfg test an visitedo node_name_prefix (recurse_flat : bool) (_inline_all match new_ss_O2_ambient_option with | None -> fatal "no call site for\n%s" - (Dwarf.pp_sdt_subroutine (Nat_big_num.of_int 0) ss_current) + (Dwarf.pp_sdt_subroutine (Sym.of_int 0) ss_current) | Some new_ss_O2_ambient -> ( match new_ss_O2_ambient.ss_call_site with | None -> fatal "no call site2 for\n%s" - (Dwarf.pp_sdt_subroutine (Nat_big_num.of_int 0) ss_current) + (Dwarf.pp_sdt_subroutine (Sym.of_int 0) ss_current) | Some (_ufe, line, _subprogram_name) -> line ) (*, diff --git a/src/analyse/DwarfFrameInfo.ml b/src/analyse/DwarfFrameInfo.ml index 2b41c478..991b295d 100644 --- a/src/analyse/DwarfFrameInfo.ml +++ b/src/analyse/DwarfFrameInfo.ml @@ -56,11 +56,11 @@ let aof ((a : natural), (_cfa : string), (_regs : (string * string) list)) = a let rec f (aof : 'b -> natural) (a : natural) (last : 'b option) (bs : 'b list) : 'b option = match (last, bs) with | (None, []) -> None - | (Some b', []) -> if Nat_big_num.greater_equal a (aof b') then Some b' else None + | (Some b', []) -> if Sym.greater_equal a (aof b') then Some b' else None | (None, b'' :: bs') -> f aof a (Some b'') bs' | (Some b', b'' :: bs') -> - if Nat_big_num.less a (aof b') then None - else if Nat_big_num.greater_equal a (aof b') && Nat_big_num.less a (aof b'') then Some b' + if Sym.less a (aof b') then None + else if Sym.greater_equal a (aof b') && Sym.less a (aof b'') then Some b' else f aof a (Some b'') bs' let mk_frame_info test instructions : diff --git a/src/analyse/DwarfInliningInfo.ml b/src/analyse/DwarfInliningInfo.ml index bc6a1903..832b4cac 100644 --- a/src/analyse/DwarfInliningInfo.ml +++ b/src/analyse/DwarfInliningInfo.ml @@ -68,7 +68,7 @@ let mk_inlining test sdt instructions = let addr = i.i_addr in let issr_still_current = List.filter - (function (_label, ((_n1, n2), (_m, _n), _is)) -> Nat_big_num.less addr n2) + (function (_label, ((_n1, n2), (_m, _n), _is)) -> Sym.less addr n2) issr_current in @@ -83,8 +83,8 @@ let mk_inlining test sdt instructions = let (issr_starting_here0, issr_rest') = find_first - (function ((_n1, n2), (_m, _n), _is) -> Nat_big_num.less_equal n2 addr) - (function ((n1, _n2), (_m, _n), _is) -> Nat_big_num.equal n1 addr) + (function ((_n1, n2), (_m, _n), _is) -> Sym.less_equal n2 addr) + (function ((n1, _n2), (_m, _n), _is) -> Sym.equal n1 addr) [] issr_rest in diff --git a/src/analyse/DwarfLineInfo.ml b/src/analyse/DwarfLineInfo.ml index cf126a41..b3561313 100644 --- a/src/analyse/DwarfLineInfo.ml +++ b/src/analyse/DwarfLineInfo.ml @@ -86,18 +86,18 @@ type evaluated_line_info_for_instruction = { (* line number sequences can overlap, and we have to walk through instructions (not addresses), so we simplify by splitting all of them into individual entries, sort them by first address, and then walk through them painting a per-instruction array. This is algorithmically a bit terrible, but seems to add only a couple of seconds to read-dwarf rd *) let pp_line_number_header_concise (lnh : Dwarf.line_number_header) : string = - "lnh offset = " ^ Dwarf.pphex lnh.lnh_offset ^ "\n" + "lnh offset = " ^ Dwarf.pphex_sym lnh.lnh_offset ^ "\n" (*^ ("dwarf_format = " ^ (pp_dwarf_format lnh.lnh_dwarf_format ^ ("\n" -^ ("unit_length = " ^ (Nat_big_num.to_string lnh.lnh_unit_length ^ ("\n" -^ ("version = " ^ (Nat_big_num.to_string lnh.lnh_version ^ ("\n" -^ ("header_length = " ^ (Nat_big_num.to_string lnh.lnh_header_length ^ ("\n" -^ ("minimum_instruction_length = " ^ (Nat_big_num.to_string lnh.lnh_minimum_instruction_length ^ ("\n" -^ ("maximum_operations_per_instruction = " ^ (Nat_big_num.to_string lnh.lnh_maximum_operations_per_instruction ^ ("\n" +^ ("unit_length = " ^ (Sym.to_string lnh.lnh_unit_length ^ ("\n" +^ ("version = " ^ (Sym.to_string lnh.lnh_version ^ ("\n" +^ ("header_length = " ^ (Sym.to_string lnh.lnh_header_length ^ ("\n" +^ ("minimum_instruction_length = " ^ (Sym.to_string lnh.lnh_minimum_instruction_length ^ ("\n" +^ ("maximum_operations_per_instruction = " ^ (Sym.to_string lnh.lnh_maximum_operations_per_instruction ^ ("\n" ^ ("default_is_stmt = " ^ (string_of_bool lnh.lnh_default_is_stmt ^ ("\n" -^ ("line_base = " ^ (Nat_big_num.to_string lnh.lnh_line_base ^ ("\n" -^ ("line_range = " ^ (Nat_big_num.to_string lnh.lnh_line_range ^ ("\n" -^ ("opcode_base = " ^ (Nat_big_num.to_string lnh.lnh_opcode_base ^ ("\n" +^ ("line_base = " ^ (Sym.to_string lnh.lnh_line_base ^ ("\n" +^ ("line_range = " ^ (Sym.to_string lnh.lnh_line_range ^ ("\n" +^ ("opcode_base = " ^ (Sym.to_string lnh.lnh_opcode_base ^ ("\n" ^ ("standard_opcode_lengths = " ^ (string_of_list instance_Show_Show_Num_natural_dict lnh.lnh_standard_opcode_lengths ^ ("\n" ^ ("comp_dir = " ^ (string_of_maybe @@ -136,7 +136,7 @@ let split_into_sequences | None -> fatal "split_into_sequences found sequence of length 0" in let last = lnr.lnr_address in - if Nat_big_num.equal first last then fatal "split_into_sequences found first=last" + if Sym.equal first last then fatal "split_into_sequences found first=last" else (); let elis = { @@ -160,8 +160,8 @@ let split_into_entries (s : evaluated_line_info_sequence) : evaluated_line_info_ { elie_first = l1.lnr_address; elie_last = - ( if Nat_big_num.equal l2.lnr_address l1.lnr_address then l1.lnr_address - else Nat_big_num.sub l2.lnr_address (Nat_big_num.of_int 1) + ( if Sym.equal l2.lnr_address l1.lnr_address then l1.lnr_address + else Sym.sub l2.lnr_address (Sym.of_int 1) ); elie_lnh = s.elis_lnh; elie_lnr = l1; @@ -176,9 +176,9 @@ let split_into_entries (s : evaluated_line_info_sequence) : evaluated_line_info_ let mk_line_info (eli: Dwarf.evaluated_line_info) instructions : evaluated_line_info_for_instruction option array = let sequences = List.flatten (List.map split_into_sequences eli) in - let compare_sequence s1 s2 = Nat_big_num.compare s1.elis_first s2.elis_first in + let compare_sequence s1 s2 = Sym.compare s1.elis_first s2.elis_first in let sequences_sorted = List.sort compare_sequence sequences in - (*let overlap_sequence s1 s2 = not( Nat_big_num.greater_equal s2.first s1.last || Nat_big_num.greater_equal s1.first s2.last) in*) + (*let overlap_sequence s1 s2 = not( Sym.greater_equal s2.first s1.last || Sym.greater_equal s1.first s2.last) in*) Printf.printf "mk_line_info\n%s" (String.concat "\n" (List.map pp_sequence_concise sequences_sorted)); @@ -189,12 +189,12 @@ let mk_line_info (eli: Dwarf.evaluated_line_info) instructions : evaluated_line_ let (discardable,remaining') = List.partition (function sequence -> - Nat_big_num.less_equal sequence.elis_last addr) + Sym.less_equal sequence.elis_last addr) remaining_sequences in let (sequences,remaining'') = List.partition (function sequence -> - Nat_big_num.less_equal sequence.elis_first addr) + Sym.less_equal sequence.elis_first addr) remaining' in (sequences,remaining'') in @@ -224,7 +224,7 @@ let mk_line_info (eli: Dwarf.evaluated_line_info) instructions : evaluated_line_ let addr = instructions.(k).i_addr in match remaining_lines with | l1::((l2::remaining_lines') as remaining_lines'') -> - if Nat_big_num.equal addr l1.lnr_address then + if Sym.equal addr l1.lnr_address then (* this instruction address exactly matches the first line of this sequence *) let elifi = { elifi_start = true; @@ -232,7 +232,7 @@ let mk_line_info (eli: Dwarf.evaluated_line_info) instructions : evaluated_line_ elifi_line = l1 } in elifis.(k) <- Some elifi; f active_sequence remaining_lines remaining_sequences (k+1) - else if Nat_big_num.less l1.lnr_address addr && Nat_big_num.less addr l2.lnr_address then + else if Sym.less l1.lnr_address addr && Sym.less addr l2.lnr_address then (* this instruction address is within the range of the first line, but not equal to it*) let elifi = { elifi_start = false; @@ -240,7 +240,7 @@ let mk_line_info (eli: Dwarf.evaluated_line_info) instructions : evaluated_line_ elifi_line = l1 } in elifis.(k) <- Some elifi; f active_sequence remaining_lines remaining_sequences (k+1) - else if Nat_big_num.greater_equal addr l2.lnr_address then + else if Sym.greater_equal addr l2.lnr_address then (* this instruction address is after the range of the first line *) if not(l2.lnr_end_sequence (* invariant: iff remaining'=[]*)) then (* there are more non-end lines left in this sequence: try again with the next *) @@ -268,11 +268,11 @@ let mk_line_info (eli : Dwarf.evaluated_line_info) instructions : let elifis = Array.make size [] in let sequences = List.flatten (List.map split_into_sequences eli) in - let compare_sequence s1 s2 = Nat_big_num.compare s1.elis_first s2.elis_first in + let compare_sequence s1 s2 = Sym.compare s1.elis_first s2.elis_first in let sequences_sorted = List.sort compare_sequence sequences in let entries = List.flatten (List.map split_into_entries sequences_sorted) in - let compare_entry e1 e2 = Nat_big_num.compare e1.elie_first e2.elie_first in + let compare_entry e1 e2 = Sym.compare e1.elie_first e2.elie_first in let entries_sorted = List.sort compare_entry entries in (*List.iter (function elie -> Printf.printf "%s" (pp_elie_concise elie)) entries_sorted;*) @@ -285,7 +285,7 @@ let mk_line_info (eli : Dwarf.evaluated_line_info) instructions : match remaining with | [] -> (acc, remaining) | elie :: remaining' -> - if Nat_big_num.less_equal elie.elie_first addr then + if Sym.less_equal elie.elie_first addr then mk_new_perhaps_relevant (elie :: acc) remaining' else (acc, remaining) in @@ -293,7 +293,7 @@ let mk_line_info (eli : Dwarf.evaluated_line_info) instructions : let (new_perhaps_relevant, remaining') = mk_new_perhaps_relevant [] remaining_entries in let addr_in elie = - Nat_big_num.less_equal elie.elie_first addr && Nat_big_num.less_equal addr elie.elie_last + Sym.less_equal elie.elie_first addr && Sym.less_equal addr elie.elie_last in let still_active_entries = @@ -305,7 +305,7 @@ let mk_line_info (eli : Dwarf.evaluated_line_info) instructions : (function | elie -> let elifi = - { elifi_start = Nat_big_num.equal addr elie.elie_first; elifi_entry = elie } + { elifi_start = Sym.equal addr elie.elie_first; elifi_entry = elie } in elifi) still_active_entries; @@ -443,14 +443,14 @@ let pp_dwarf_source_file_lines' m (ds : Dwarf.dwarf_static) (pp_actual_line : bo | Ascii -> s | Html -> "@" ^ s ^ "@ " in wrap_link m (subprogram_name ^ ":" - ^ Nat_big_num.to_string lnr.lnr_line + ^ Sym.to_string lnr.lnr_line ^ "." - ^ Nat_big_num.to_string lnr.lnr_column + ^ Sym.to_string lnr.lnr_column ^ " (" ^ file ^ ")" ) (* ^ (if elifi.elifi_start then "S" else "s")*) @@ -463,10 +463,10 @@ let pp_dwarf_source_file_lines' m (ds : Dwarf.dwarf_static) (pp_actual_line : bo ^ " " ^ if pp_actual_line then - let line = Nat_big_num.to_int lnr.lnr_line in + let line = Sym.to_int lnr.lnr_line in if line = 0 then "line 0" else - pp_source_line (source_line (comp_dir, dir, file) line) (Nat_big_num.to_int lnr.lnr_column) + pp_source_line (source_line (comp_dir, dir, file) line) (Sym.to_int lnr.lnr_column) else "" (* OLD source line number for O0/2 correlation @@ -479,11 +479,11 @@ let rec dwarf_source_file_line_numbers' test recursion_limit (a : natural) : match sls with | [] -> dwarf_source_file_line_numbers' test (recursion_limit - 1) - (Nat_big_num.sub a (Nat_big_num.of_int 4)) + (Sym.sub a (Sym.of_int 4)) | _ -> List.map (fun ((comp_dir, dir, file), n, lnr, subprogram_name) -> - (subprogram_name, Nat_big_num.to_int n)) + (subprogram_name, Sym.to_int n)) sls let dwarf_source_file_line_numbers test (a : natural) = @@ -507,7 +507,7 @@ let dwarf_source_file_line_numbers_by_index test line_info k : Dwarf.subprogram_at_line test.dwarf_static.ds_subprogram_line_extents ufe lnr.lnr_line ) in - (subprogram_name, Nat_big_num.to_int lnr.lnr_line)) + (subprogram_name, Sym.to_int lnr.lnr_line)) elifis) in match lines with [_] -> lines | [] -> lines | _ -> [] diff --git a/src/analyse/DwarfVarInfo.ml b/src/analyse/DwarfVarInfo.ml index bd0709f0..54e2964f 100644 --- a/src/analyse/DwarfVarInfo.ml +++ b/src/analyse/DwarfVarInfo.ml @@ -76,7 +76,7 @@ let pp_sdt_concise_variable_or_formal_parameter_main (level : int) ^ (match svfp.svfp_type with None -> "none" | Some t -> Dwarf.pp_type_info_deep t) ^ " " (*^ indent ^ "const_value:"*) - ^ (match svfp.svfp_const_value with None -> "" | Some v -> "const:" ^ Nat_big_num.to_string v) + ^ (match svfp.svfp_const_value with None -> "" | Some v -> "const:" ^ Sym.to_string v) ^ " " (*^ indent ^ "external:" ^ show svfp.svfp_external ^ "\n"*) @@ -90,7 +90,7 @@ let pp_sdt_concise_variable_or_formal_parameter (level : int) (is_params : bool) ^ match svfp.svfp_locations with | None -> "no locations\n" - | Some [loc] -> " " ^ Dwarf.pp_parsed_single_location_description (Nat_big_num.of_int 0) loc + | Some [loc] -> " " ^ Dwarf.pp_parsed_single_location_description (Z.of_int 0) loc | Some locs -> "\n" ^ String.concat "" @@ -99,7 +99,7 @@ let pp_sdt_concise_variable_or_formal_parameter (level : int) (is_params : bool) | loc -> "+" ^ Dwarf.pp_parsed_single_location_description - (Nat_big_num.of_int (level + 1)) + (Z.of_int (level + 1)) loc) locs) @@ -150,14 +150,14 @@ let rec locals_subroutine context (ss : Dwarf.sdt_subroutine) = ^ (indent (*^ "name:" ^*) ^ (pp_sdt_maybe ss.ss_name (fun name1 -> name1 ^ "\n") (* ^ indent ^ "cupdie:" ^ pp_cupdie3 ss.ss_cupdie ^ "\n"*) ^ (indent ^ ("kind:" ^ (((match ss.ss_kind with SSK_subprogram -> "subprogram" | SSK_inlined_subroutine -> "inlined subroutine" )) ^ ("\n" - ^ (indent ^ ("call site:" ^ (pp_sdt_maybe ss.ss_call_site (fun ud -> "\n" ^ (indent_level true (Nat_big_num.add level(Nat_big_num.of_int 1)) ^ (pp_ud ud ^ "\n"))) - ^ (indent ^ ("abstract origin:" ^ (pp_sdt_maybe ss.ss_abstract_origin (fun s -> "\n" ^ locals__subroutine (Nat_big_num.add level(Nat_big_num.of_int 1)) s) + ^ (indent ^ ("call site:" ^ (pp_sdt_maybe ss.ss_call_site (fun ud -> "\n" ^ (indent_level true (Sym.add level(Sym.of_int 1)) ^ (pp_ud ud ^ "\n"))) + ^ (indent ^ ("abstract origin:" ^ (pp_sdt_maybe ss.ss_abstract_origin (fun s -> "\n" ^ locals__subroutine (Sym.add level(Sym.of_int 1)) s) (* ^ indent ^ "type:" ^ pp_sdt_maybe ss.ss_type (fun typ -> pp_type_info_deep typ ^"\n" end)*) - ^ (indent ^ ("vars:" ^ (pp_sdt_list ss.ss_vars (pp_sdt_concise_variable_or_formal_parameter (Nat_big_num.add level(Nat_big_num.of_int 1))) - ^ (indent ^ ("unspecified_parameters:" ^ (pp_sdt_list ss.ss_unspecified_parameters (pp_sdt_unspecified_parameter (Nat_big_num.add level(Nat_big_num.of_int 1))) + ^ (indent ^ ("vars:" ^ (pp_sdt_list ss.ss_vars (pp_sdt_concise_variable_or_formal_parameter (Sym.add level(Sym.of_int 1))) + ^ (indent ^ ("unspecified_parameters:" ^ (pp_sdt_list ss.ss_unspecified_parameters (pp_sdt_unspecified_parameter (Sym.add level(Sym.of_int 1))) (* ^ indent ^ "pc ranges:" ^ pp_pc_ranges (level+1) ss.ss_pc_ranges*) - ^ (indent ^ ("subroutines:" ^ (pp_sdt_list ss.ss_subroutines (locals__subroutine (Nat_big_num.add level(Nat_big_num.of_int 1))) - ^ (indent ^ ("lexical_blocks:" ^ (pp_sdt_list ss.ss_lexical_blocks (locals__lexical_block (Nat_big_num.add level(Nat_big_num.of_int 1))) + ^ (indent ^ ("subroutines:" ^ (pp_sdt_list ss.ss_subroutines (locals__subroutine (Sym.add level(Sym.of_int 1))) + ^ (indent ^ ("lexical_blocks:" ^ (pp_sdt_list ss.ss_lexical_blocks (locals__lexical_block (Sym.add level(Sym.of_int 1))) (* ^ indent ^ "decl:" ^ pp_sdt_maybe ss.ss_decl (fun ((ufe,line) as ud) -> "\n" ^ indent_level true (level+1) ^ pp_ufe ufe ^ " " ^ show line ^ "\n" end)*) (* ^ indent ^ "noreturn:" ^ show ss.ss_noreturn ^ "\n"*) (* ^ indent ^ "external:" ^ show ss.ss_external ^"\n"*) @@ -172,10 +172,10 @@ and locals_lexical_block context (lb : Dwarf.sdt_lexical_block) = (* "" (* ^ indent ^ "cupdie:" ^ pp_cupdie3 lb.slb_cupdie ^ "\n"*) - ^ (indent ^ ("vars:" ^ (pp_sdt_list lb.slb_vars (pp_sdt_concise_variable_or_formal_parameter (Nat_big_num.add level(Nat_big_num.of_int 1))) + ^ (indent ^ ("vars:" ^ (pp_sdt_list lb.slb_vars (pp_sdt_concise_variable_or_formal_parameter (Sym.add level(Sym.of_int 1))) (* ^ indent ^ "pc ranges:" ^ pp_pc_ranges (level+1) lb.slb_pc_ranges*) - ^ (indent ^ ("subroutines :" ^ (pp_sdt_list lb.slb_subroutines (locals__subroutine (Nat_big_num.add level(Nat_big_num.of_int 1))) - ^ (indent ^ ("lexical_blocks:" ^ (pp_sdt_list lb.slb_lexical_blocks (locals__lexical_block (Nat_big_num.add level(Nat_big_num.of_int 1))) + ^ (indent ^ ("subroutines :" ^ (pp_sdt_list lb.slb_subroutines (locals__subroutine (Sym.add level(Sym.of_int 1))) + ^ (indent ^ ("lexical_blocks:" ^ (pp_sdt_list lb.slb_lexical_blocks (locals__lexical_block (Sym.add level(Sym.of_int 1))) ^ "\n")))))))))) *) @@ -190,8 +190,8 @@ let locals_compilation_unit context (cu : Dwarf.sdt_compilation_unit) = "" ^ (indent (*^ "name:" *) ^ (cu.scu_name ^ ("\n" (* ^ indent ^ "cupdie:" ^ pp_cupdie3 cu.scu_cupdie ^ "\n"*) - ^ (indent ^ ("vars:" ^ (pp_sdt_list cu.scu_vars (pp_sdt_concise_variable_or_formal_parameter (Nat_big_num.add level(Nat_big_num.of_int 1))) - ^ (indent ^ ("subroutines :" ^ pp_sdt_list cu.scu_subroutines (locals__subroutine (Nat_big_num.add level(Nat_big_num.of_int 1)))))))))))) + ^ (indent ^ ("vars:" ^ (pp_sdt_list cu.scu_vars (pp_sdt_concise_variable_or_formal_parameter (Sym.add level(Sym.of_int 1))) + ^ (indent ^ ("subroutines :" ^ pp_sdt_list cu.scu_subroutines (locals__subroutine (Sym.add level(Sym.of_int 1)))))))))))) *) let locals_dwarf (sdt_d : Dwarf.sdt_dwarf) : (Dwarf.sdt_variable_or_formal_parameter * string list) (*context*) list = @@ -246,7 +246,7 @@ let pp_ranged_var (prefix : string) (var : ranged_var) : string = let ((n1, n2, ops), (svfp, context)) = var in prefix ^ pp_sdt_concise_variable_or_formal_parameter_main 0 svfp - ^ (let s = Dwarf.pp_parsed_single_location_description (Nat_big_num.of_int 0) (n1, n2, ops) in + ^ (let s = Dwarf.pp_parsed_single_location_description (Sym.of_int 0) (n1, n2, ops) in String.sub s 0 (String.length s - 1)) (*hackish stripping of trailing \n from linksem - TODO: fix linksem interface*) ^ " " @@ -299,14 +299,14 @@ let mk_ranged_vars_at_instructions (sdt_d : Dwarf.sdt_dwarf) instructions : if k >= size then () else let addr = instructions.(k).i_addr in - if not (Nat_big_num.less addr_prev addr) then + if not (Sym.less addr_prev addr) then fatal "mk_ranged_vars_at_instructions found non-increasing address %s" (pp_addr addr); let (still_current, old) = - List.partition (function ((_, n2, _), _) -> Nat_big_num.less addr n2) prev + List.partition (function ((_, n2, _), _) -> Sym.less addr n2) prev in let (new', remaining') = partition_first - (function ((n1, _n2, _ops), _var) as _rv -> Nat_big_num.greater_equal addr n1) + (function ((n1, _n2, _ops), _var) as _rv -> Sym.greater_equal addr n1) remaining in (* TODO: do we need to drop any that have been totally skipped over? *) @@ -317,7 +317,7 @@ let mk_ranged_vars_at_instructions (sdt_d : Dwarf.sdt_dwarf) instructions : rvai_remaining.(k) <- remaining'; f addr current remaining' (k + 1) in - f (Nat_big_num.of_int (0 - 1)) [] locals_by_pc_ranges 0; + f (Sym.of_int (0 - 1)) [] locals_by_pc_ranges 0; { rvai_globals = globals_dwarf sdt_d; diff --git a/src/analyse/Elf.ml b/src/analyse/Elf.ml index 4b4bfc9b..0734c459 100644 --- a/src/analyse/Elf.ml +++ b/src/analyse/Elf.ml @@ -58,8 +58,8 @@ let pp_symbol_map (symbol_map : Elf_file.global_symbol_init_info) = String.concat "" (List.map (fun (name, (typ, _size, address, _mb, _binding)) -> - Printf.sprintf "**** name = %s address = %s typ = %d\n" name (pp_addr address) - (Nat_big_num.to_int typ)) + Printf.sprintf "**** name = %s address = %s typ = %d\n" name (pp_addr (Dwarf.Absolute address)) + (Sym.to_int (Dwarf.Absolute typ))) symbol_map) (*****************************************************************************) diff --git a/src/analyse/ElfSymbols.ml b/src/analyse/ElfSymbols.ml index e74f7aeb..83a526ea 100644 --- a/src/analyse/ElfSymbols.ml +++ b/src/analyse/ElfSymbols.ml @@ -54,7 +54,7 @@ open ControlFlowTypes let elf_symbols_of_address (test : test) (addr : natural) : string list = List.filter_map (fun (name, (_typ, _size, address, _mb, _binding)) -> - if address = addr then Some name else None) + if Dwarf.Absolute address = addr then Some name else None) test.symbol_map let mk_elf_symbols test instructions : string list array = @@ -62,5 +62,5 @@ let mk_elf_symbols test instructions : string list array = let address_of_elf_symbol test (s : string) : addr option = List.find_map - (fun (name, (_typ, _size, address, _mb, _binding)) -> if s = name then Some address else None) + (fun (name, (_typ, _size, address, _mb, _binding)) -> if s = name then Some (Dwarf.Absolute address) else None) test.symbol_map diff --git a/src/analyse/Pp.ml b/src/analyse/Pp.ml index d9c749c3..87ce1bec 100644 --- a/src/analyse/Pp.ml +++ b/src/analyse/Pp.ml @@ -574,7 +574,7 @@ let pp_instructions_ranged m test an (low, high) = Printf.printf "pp_instructions_ranged indices: low=%i high=%i \n" (an.index_of_address low) (an.index_of_address high); *) let index_low = an.index_of_address low in - let index_high = (an.index_of_address (Nat_big_num.sub high (Nat_big_num.of_int 4)))+1 in + let index_high = (an.index_of_address (Sym.sub high (Sym.of_int 4)))+1 in let rec subarray_map_to_list f a k k' = if k >= k' then [] else f k a.(k) :: subarray_map_to_list f a (k + 1) k' in @@ -619,7 +619,7 @@ let chunks_of_ranged_cu m test an filename_stem ((low, high), cu) = pp_abbreviations_table cu'.cu_abbreviations_table ); ( "die", ".debug_info die tree", - pp_die c cu'.cu_header d.d_str true (*indent*) (Nat_big_num.of_int 0) true cu'.cu_die ); + pp_die c cu'.cu_header d.d_str true (*indent*) (Sym.of_int 0) true cu'.cu_die ); ( "line", ".debug_line line number info", let lnp = line_number_program_of_compilation_unit d cu' in @@ -628,13 +628,13 @@ let chunks_of_ranged_cu m test an filename_stem ((low, high), cu) = ".debug_line evaluated line info", let lnrs = evaluated_line_info_of_compilation_unit d cu' ds.ds_evaluated_line_info in pp_line_number_registerss lnrs ); - ("sdt", "simple die tree", pp_sdt_compilation_unit (Nat_big_num.of_int 0) cu); + ("sdt", "simple die tree", pp_sdt_compilation_unit (Sym.of_int 0) cu); ( "sdt_globals", "simple die tree globals", - pp_sdt_globals_compilation_unit (Nat_big_num.of_int 0) cu ); + pp_sdt_globals_compilation_unit (Sym.of_int 0) cu ); ( "sdt_locals", "simple die tree locals", - pp_sdt_locals_compilation_unit (Nat_big_num.of_int 0) cu ); + pp_sdt_locals_compilation_unit (Sym.of_int 0) cu ); ("inlined", "inlined subroutine info", pp_inlined_subroutines ds iss); ( "inlined_by_range", "inlined subroutine info by range", diff --git a/src/analyse/QemuLog.ml b/src/analyse/QemuLog.ml index 9591bef5..e25a65c0 100644 --- a/src/analyse/QemuLog.ml +++ b/src/analyse/QemuLog.ml @@ -64,8 +64,8 @@ let read_qemu_log an filename_qemu_log : bool array = (* Printf.printf "%s " s;*) match Scanf.sscanf s "0x%x:%s" (fun addr _ -> addr) with | addr -> - (*Printf.printf "PARSED %s\n" (pp_addr (Nat_big_num.of_int addr));*) - Some (Nat_big_num.of_int addr) + (*Printf.printf "PARSED %s\n" (pp_addr (Sym.of_int addr));*) + Some (Sym.of_int addr) | exception _ -> (*Printf.printf "NOT\n";*) None in List.filter_map parse_line (Array.to_list lines) diff --git a/src/analyse/Utils.ml b/src/analyse/Utils.ml index cf316505..bcd5a717 100644 --- a/src/analyse/Utils.ml +++ b/src/analyse/Utils.ml @@ -49,13 +49,18 @@ open Logs.Logger (struct end) (** TODO: Maybe just use Z.t everywhere (it's shorter) *) -type natural = Nat_big_num.num +type natural = Sym.t (** machine address *) type addr = natural (* hackishly mask out bigint conversion failure *) -let pp_addr (a : natural) = try Ml_bindings.hex_string_of_big_int_pad8 a with Failure s -> let s' = "Failure: int64_of_big_int " ^ Nat_big_num.to_string a in (warn "pp_addr failure: %s" s); s'| e -> raise e +let pp_addr (a : natural) = + try + Dwarf.pp_sym Ml_bindings.hex_string_of_big_int_pad8 a + with + | Failure s -> let s' = "Failure: int64_of_big_int " ^ Sym.to_string a in (warn "pp_addr failure: %s" s); s' + | e -> raise e (** index into instruction-indexed arrays *) type index = int diff --git a/src/utils/sym.ml b/src/utils/sym.ml index bb2ecd56..3f584582 100644 --- a/src/utils/sym.ml +++ b/src/utils/sym.ml @@ -1,3 +1,18 @@ type t = Z.t Dwarf.sym0 -let to_int x = Z.to_int @@ Dwarf.sym_unwrap x "to_int" \ No newline at end of file +let to_int x = Z.to_int @@ Dwarf.sym_unwrap x "to_int" + +let of_int x = Dwarf.Absolute (Z.of_int x) + +let equal = Dwarf.sym_comp Nat_big_num.equal +let less = Dwarf.sym_comp Nat_big_num.less +let less_equal = Dwarf.sym_comp Nat_big_num.less_equal +let greater = Dwarf.sym_comp Nat_big_num.greater +let greater_equal = Dwarf.sym_comp Nat_big_num.greater_equal + +let to_string = Dwarf.pp_sym Z.to_string + +let sub x y = match (x, y) with +| (Dwarf.Offset (s, a), Dwarf.Offset (t, b)) when s = t -> Nat_big_num.sub a b +| (Dwarf.Absolute a, Dwarf.Absolute b) -> Nat_big_num.sub a b +| _ -> failwith "Can't compare" \ No newline at end of file From 14242979a366c234f8dd7fe3f598b12cbb3abe18 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Mon, 27 Jan 2025 13:46:23 +0000 Subject: [PATCH 40/89] [wip] symbolic analyse --- src/analyse/ControlFlow.ml | 2 +- src/analyse/ControlFlowPpDot.ml | 6 +- src/analyse/DwarfVarInfo.ml | 2 +- src/analyse/Elf.ml | 98 +++++++++++++++------------------ src/analyse/ElfSymbols.ml | 4 +- src/analyse/ElfTypes.ml | 3 +- src/analyse/Pp.ml | 8 +-- src/analyse/Symbols.ml | 70 +++++++++++++++++++++++ src/bin/copySources.ml | 2 +- src/elf/file.ml | 2 +- src/utils/sym.ml | 22 +++++++- 11 files changed, 148 insertions(+), 71 deletions(-) create mode 100644 src/analyse/Symbols.ml diff --git a/src/analyse/ControlFlow.ml b/src/analyse/ControlFlow.ml index 89bc11f6..578279cf 100644 --- a/src/analyse/ControlFlow.ml +++ b/src/analyse/ControlFlow.ml @@ -142,7 +142,7 @@ let branch_table_target_addresses test filename_branch_table_option : (addr * ad let rodata_words : (natural * natural) list = Dwarf.words_of_rel_byte_sequence rodata_addr (Dwarf.rbs_no_reloc bs) [] in (*HACK*) let read_rodata_b addr = - Elf_types_native_uint.natural_of_byte + Dwarf.sym_natural_of_byte rodata_bytes.(Sym.to_int (Sym.sub addr rodata_addr)) in let read_rodata_h addr = diff --git a/src/analyse/ControlFlowPpDot.ml b/src/analyse/ControlFlowPpDot.ml index 77375477..123c0c04 100644 --- a/src/analyse/ControlFlowPpDot.ml +++ b/src/analyse/ControlFlowPpDot.ml @@ -204,7 +204,7 @@ let inlining_stack_at_index (an : analysis) k = (function | sss' -> String.concat "\n---\n" - (List.map (Dwarf.pp_sdt_subroutine (Sym.of_int 0)) sss')) + (List.map (Dwarf.pp_sdt_subroutine (Nat_big_num.of_int 0)) sss')) maximal)); [] @@ -859,12 +859,12 @@ let mk_cfg test an visitedo node_name_prefix (recurse_flat : bool) (_inline_all match new_ss_O2_ambient_option with | None -> fatal "no call site for\n%s" - (Dwarf.pp_sdt_subroutine (Sym.of_int 0) ss_current) + (Dwarf.pp_sdt_subroutine (Nat_big_num.of_int 0) ss_current) | Some new_ss_O2_ambient -> ( match new_ss_O2_ambient.ss_call_site with | None -> fatal "no call site2 for\n%s" - (Dwarf.pp_sdt_subroutine (Sym.of_int 0) ss_current) + (Dwarf.pp_sdt_subroutine (Nat_big_num.of_int 0) ss_current) | Some (_ufe, line, _subprogram_name) -> line ) (*, diff --git a/src/analyse/DwarfVarInfo.ml b/src/analyse/DwarfVarInfo.ml index 54e2964f..144e48b5 100644 --- a/src/analyse/DwarfVarInfo.ml +++ b/src/analyse/DwarfVarInfo.ml @@ -246,7 +246,7 @@ let pp_ranged_var (prefix : string) (var : ranged_var) : string = let ((n1, n2, ops), (svfp, context)) = var in prefix ^ pp_sdt_concise_variable_or_formal_parameter_main 0 svfp - ^ (let s = Dwarf.pp_parsed_single_location_description (Sym.of_int 0) (n1, n2, ops) in + ^ (let s = Dwarf.pp_parsed_single_location_description (Nat_big_num.of_int 0) (n1, n2, ops) in String.sub s 0 (String.length s - 1)) (*hackish stripping of trailing \n from linksem - TODO: fix linksem interface*) ^ " " diff --git a/src/analyse/Elf.ml b/src/analyse/Elf.ml index 0734c459..5b3bb63c 100644 --- a/src/analyse/Elf.ml +++ b/src/analyse/Elf.ml @@ -69,20 +69,23 @@ let pp_symbol_map (symbol_map : Elf_file.global_symbol_init_info) = let parse_elf_file (filename : string) : test = (* call ELF analyser on file *) - let info = Sail_interface.populate_and_obtain_global_symbol_init_info filename in - - let ( (elf_file : Elf_file.elf_file), - (elf_epi : Sail_interface.executable_process_image), - (symbol_map : Elf_file.global_symbol_init_info) ) = - match info with - | Error.Fail s -> fatal "populate_and_obtain_global_symbol_init_info: %s" s + let bs = match Byte_sequence.acquire filename with + | Error.Fail s -> fatal "Linksem: Byte_sequence.acquire: %s" s | Error.Success x -> x in - - let f64 = - match elf_file with Elf_file.ELF_File_64 f -> f | _ -> raise (Failure "not Elf64") + let f64 = match Elf_file.read_elf64_file bs with + | Error.Fail s -> fatal "Linksem: read_elf64_file: %s" s + | Error.Success x -> x in + let symbol_map = match Symbols.get_elf64_file_global_symbol_init f64 with + | Error.Fail s -> fatal "LinksemRelocatable: get_elf64_file_global_symbol_init: %s" s + | Error.Success x -> x + in + + let elf_file = Elf_file.ELF_File_64 f64 in + let entry = f64.elf64_file_header.elf64_entry in + let machine = f64.elf64_file_header.elf64_machine in (* linksem main_elf --symbols looks ok for gcc and clang That uses Elf_file.read_elf64_file bs0 >>= fun f1 -> @@ -125,49 +128,38 @@ let parse_elf_file (filename : string) : test = *) (* Debug.print_string "elf segments etc\n";*) - match (elf_epi, elf_file) with - | (Sail_interface.ELF_Class_32 _, _) -> fatal "%s" "cannot handle ELF_Class_32" - | (_, Elf_file.ELF_File_32 _) -> fatal "%s" "cannot handle ELF_File_32" - | (Sail_interface.ELF_Class_64 (segments, e_entry, e_machine), Elf_file.ELF_File_64 f1) -> - (* architectures from linksem elf_header.lem *) - let arch = - if f64.elf64_file_header.elf64_machine = Elf_header.elf_ma_aarch64 then AArch64 - else if f64.elf64_file_header.elf64_machine = Elf_header.elf_ma_x86_64 then X86 - else fatal "unrecognised ELF file architecture" - in - - (* remove all the auto generated segments (they contain only 0s) *) - let segments = - Lem_list.mapMaybe - (fun (seg, prov) -> if prov = Elf_file.FromELF then Some seg else None) - segments - in - let ds = - match Dwarf.extract_dwarf_static (Elf_file.ELF_File_64 f1) Abi_aarch64_symbolic_relocation.aarch64_data_relocation_interpreter with - | None -> fatal "%s" "extract_dwarf_static failed" - | Some ds -> - (* Debug.print_string2 (Dwarf.pp_analysed_location_data ds.Dwarf.ds_dwarf - ds.Dwarf.ds_analysed_location_data); - Debug.print_string2 (Dwarf.pp_evaluated_frame_info - ds.Dwarf.ds_evaluated_frame_info);*) - ds - in - let dwarf_semi_pp_frame_info = - Dwarf.semi_pp_evaluated_frame_info ds.ds_evaluated_frame_info - in - let test = - { - elf_file; - arch; - symbol_map (*@ (symbols_for_stacks !Globals.elf_threads)*); - segments; - e_entry; - e_machine; - dwarf_static = ds; - dwarf_semi_pp_frame_info; - } - in - test + (* architectures from linksem elf_header.lem *) + let arch = + if f64.elf64_file_header.elf64_machine = Elf_header.elf_ma_aarch64 then AArch64 + else if f64.elf64_file_header.elf64_machine = Elf_header.elf_ma_x86_64 then X86 + else fatal "unrecognised ELF file architecture" + in + + let ds = + match Dwarf.extract_dwarf_static (Elf_file.ELF_File_64 f64) Abi_aarch64_symbolic_relocation.aarch64_data_relocation_interpreter with + | None -> fatal "%s" "extract_dwarf_static failed" + | Some ds -> + (* Debug.print_string2 (Dwarf.pp_analysed_location_data ds.Dwarf.ds_dwarf + ds.Dwarf.ds_analysed_location_data); + Debug.print_string2 (Dwarf.pp_evaluated_frame_info + ds.Dwarf.ds_evaluated_frame_info);*) + ds + in + let dwarf_semi_pp_frame_info = + Dwarf.semi_pp_evaluated_frame_info ds.ds_evaluated_frame_info + in + let test = + { + elf_file; + arch; + symbol_map (*@ (symbols_for_stacks !Globals.elf_threads)*); + e_entry = Dwarf.Absolute (entry); + e_machine = Dwarf.Absolute (machine); + dwarf_static = ds; + dwarf_semi_pp_frame_info; + } + in + test (*****************************************************************************) (** marshal and unmarshal test *) diff --git a/src/analyse/ElfSymbols.ml b/src/analyse/ElfSymbols.ml index 83a526ea..e74f7aeb 100644 --- a/src/analyse/ElfSymbols.ml +++ b/src/analyse/ElfSymbols.ml @@ -54,7 +54,7 @@ open ControlFlowTypes let elf_symbols_of_address (test : test) (addr : natural) : string list = List.filter_map (fun (name, (_typ, _size, address, _mb, _binding)) -> - if Dwarf.Absolute address = addr then Some name else None) + if address = addr then Some name else None) test.symbol_map let mk_elf_symbols test instructions : string list array = @@ -62,5 +62,5 @@ let mk_elf_symbols test instructions : string list array = let address_of_elf_symbol test (s : string) : addr option = List.find_map - (fun (name, (_typ, _size, address, _mb, _binding)) -> if s = name then Some (Dwarf.Absolute address) else None) + (fun (name, (_typ, _size, address, _mb, _binding)) -> if s = name then Some address else None) test.symbol_map diff --git a/src/analyse/ElfTypes.ml b/src/analyse/ElfTypes.ml index a0795eb0..005a7ba6 100644 --- a/src/analyse/ElfTypes.ml +++ b/src/analyse/ElfTypes.ml @@ -62,8 +62,7 @@ type architecture = type test = { elf_file : Elf_file.elf_file; arch : architecture; - symbol_map : Elf_file.global_symbol_init_info; - segments : Elf_interpreted_segment.elf64_interpreted_segment list; + symbol_map : Symbols.global_symbol_init_info; e_entry : natural; e_machine : natural; dwarf_static : Dwarf.dwarf_static; diff --git a/src/analyse/Pp.ml b/src/analyse/Pp.ml index 87ce1bec..9dbf2b7a 100644 --- a/src/analyse/Pp.ml +++ b/src/analyse/Pp.ml @@ -619,7 +619,7 @@ let chunks_of_ranged_cu m test an filename_stem ((low, high), cu) = pp_abbreviations_table cu'.cu_abbreviations_table ); ( "die", ".debug_info die tree", - pp_die c cu'.cu_header d.d_str true (*indent*) (Sym.of_int 0) true cu'.cu_die ); + pp_die c cu'.cu_header d.d_str true (*indent*) (Nat_big_num.of_int 0) true cu'.cu_die ); ( "line", ".debug_line line number info", let lnp = line_number_program_of_compilation_unit d cu' in @@ -628,13 +628,13 @@ let chunks_of_ranged_cu m test an filename_stem ((low, high), cu) = ".debug_line evaluated line info", let lnrs = evaluated_line_info_of_compilation_unit d cu' ds.ds_evaluated_line_info in pp_line_number_registerss lnrs ); - ("sdt", "simple die tree", pp_sdt_compilation_unit (Sym.of_int 0) cu); + ("sdt", "simple die tree", pp_sdt_compilation_unit (Nat_big_num.of_int 0) cu); ( "sdt_globals", "simple die tree globals", - pp_sdt_globals_compilation_unit (Sym.of_int 0) cu ); + pp_sdt_globals_compilation_unit (Nat_big_num.of_int 0) cu ); ( "sdt_locals", "simple die tree locals", - pp_sdt_locals_compilation_unit (Sym.of_int 0) cu ); + pp_sdt_locals_compilation_unit (Nat_big_num.of_int 0) cu ); ("inlined", "inlined subroutine info", pp_inlined_subroutines ds iss); ( "inlined_by_range", "inlined subroutine info by range", diff --git a/src/analyse/Symbols.ml b/src/analyse/Symbols.ml new file mode 100644 index 00000000..360ffbf6 --- /dev/null +++ b/src/analyse/Symbols.ml @@ -0,0 +1,70 @@ +(* TODO header *) + +module SMap = Map.Make (String) + +type rels = + | AArch64 of (Z.t, Abi_aarch64_symbolic_relocation.aarch64_relocation_target Elf_symbolic.abstract_relocation) Pmap.map + +type sym_data = +Byte_sequence_wrapper.byte_sequence * rels + + +(* Like in linksem, but address is section+offset, data has relocations and with a writable flag *) +type symbol = string * (Z.t * Z.t * Sym.t * sym_data * Z.t) + +type global_symbol_init_info = symbol list + +open Elf_symbol_table +open Elf_interpreted_section + +let get_elf64_file_global_symbol_init (f: Elf_file.elf64_file) : global_symbol_init_info Error.error = + let secs = f.elf64_file_interpreted_sections in + let machine = f.elf64_file_header.elf64_machine in + Error.bind (Elf_file.get_elf64_file_symbol_table f) @@ fun (symtab, strtab) -> + let rel_cache = ref SMap.empty in + let get_relocs section = + match SMap.find_opt section !rel_cache with + | Some rels -> rels + | None -> + if machine = Elf_header.elf_ma_aarch64 then + Error.bind + (Elf_symbolic.extract_elf64_relocations_for_section f Abi_aarch64_symbolic_relocation.abi_aarch64_relocation_to_abstract section) + @@ fun relocs -> Error.return (AArch64 relocs) + else + Error.fail @@ "machine not supported " ^ (Elf_header.string_of_elf_machine_architecture machine) + in + List.filter_map ( + fun entry -> + let name = Uint32_wrapper.to_bigint entry.elf64_st_name in + let addr_offset = Uint64_wrapper.to_bigint entry.elf64_st_value in + let size = Uint64_wrapper.to_bigint entry.elf64_st_size in + let shndx = Uint32_wrapper.to_int entry.elf64_st_shndx in + let typ = Elf_symbol_table.extract_symbol_type entry.elf64_st_info in + let bnd = Elf_symbol_table.extract_symbol_binding entry.elf64_st_info in + Option.map ( + fun section -> + let addr = Dwarf.Offset (section.elf64_section_name_as_string, addr_offset) in + let data = if Byte_sequence.length0 section.elf64_section_body = Z.zero then + Error.return (Byte_sequence.zeros size) + else + Byte_sequence.offset_and_cut addr_offset size section.elf64_section_body + in + Error.bind (get_relocs section.elf64_section_name_as_string) @@ fun (AArch64 relocs) -> + let relocs = relocs + |> Pmap.bindings_list + |> List.fold_left (fun m (pos, r) -> + let sz = size in + let open Z in + let open Compare in + if pos >= addr_offset && pos < addr_offset + sz then + Pmap.add (pos - addr_offset) r m + else + m + ) (Pmap.empty Z.compare) + |> fun x -> AArch64 x + in + Error.bind data @@ fun data -> + Error.bind (String_table.get_string_at name strtab) @@ fun str -> + Error.return (str, (typ, size, addr, (data, relocs), bnd)) + ) (List.nth_opt secs shndx) + ) symtab |> Error.mapM Fun.id \ No newline at end of file diff --git a/src/bin/copySources.ml b/src/bin/copySources.ml index 094d3257..dc30fd01 100644 --- a/src/bin/copySources.ml +++ b/src/bin/copySources.ml @@ -79,7 +79,7 @@ let process_file () : unit = (function | lnfe -> ( lnh.lnh_comp_dir, - (let dir = Nat_big_num.to_int lnfe.lnfe_directory_index in + (let dir = Sym.to_int lnfe.lnfe_directory_index in if dir = 0 then None else Some diff --git a/src/elf/file.ml b/src/elf/file.ml index e3d8925d..1b2f39fc 100644 --- a/src/elf/file.ml +++ b/src/elf/file.ml @@ -145,7 +145,7 @@ let of_file (filename : string) = Segment. { data; - addr = Nat_big_num.to_int addr; + addr = Sym.to_int addr; (* TODO *) size = BytesSeq.length data; read = true; write = false; diff --git a/src/utils/sym.ml b/src/utils/sym.ml index 3f584582..910c0b1c 100644 --- a/src/utils/sym.ml +++ b/src/utils/sym.ml @@ -3,16 +3,32 @@ type t = Z.t Dwarf.sym0 let to_int x = Z.to_int @@ Dwarf.sym_unwrap x "to_int" let of_int x = Dwarf.Absolute (Z.of_int x) +let of_int64 x = Dwarf.Absolute (Z.of_int64 x) let equal = Dwarf.sym_comp Nat_big_num.equal let less = Dwarf.sym_comp Nat_big_num.less let less_equal = Dwarf.sym_comp Nat_big_num.less_equal let greater = Dwarf.sym_comp Nat_big_num.greater let greater_equal = Dwarf.sym_comp Nat_big_num.greater_equal +let compare = Dwarf.sym_comp Nat_big_num.compare let to_string = Dwarf.pp_sym Z.to_string let sub x y = match (x, y) with -| (Dwarf.Offset (s, a), Dwarf.Offset (t, b)) when s = t -> Nat_big_num.sub a b -| (Dwarf.Absolute a, Dwarf.Absolute b) -> Nat_big_num.sub a b -| _ -> failwith "Can't compare" \ No newline at end of file +| (Dwarf.Offset (s, a), Dwarf.Offset (t, b)) when s = t -> Dwarf.Absolute (Nat_big_num.sub a b) +| (Dwarf.Absolute a, Dwarf.Absolute b) -> Dwarf.Absolute (Nat_big_num.sub a b) +| _ -> Dwarf.Unknown + +let add x y = match (x, y) with +| (Dwarf.Offset (s, a), Dwarf.Absolute b) -> Dwarf.Offset (s, Nat_big_num.add a b) +| (Dwarf.Absolute (a), Dwarf.Offset (s,b)) -> Dwarf.Offset (s, Nat_big_num.add a b) +| (Dwarf.Absolute a, Dwarf.Absolute b) -> Dwarf.Absolute (Nat_big_num.add a b) +| _ -> Dwarf.Unknown + +let mul = Dwarf.sym_map2 Nat_big_num.mul + +let pow_int_positive x y = Dwarf.Absolute (Nat_big_num.pow_int_positive x y) + +let shift_left x s = Dwarf.sym_map (fun x -> Nat_big_num.shift_left x s) x +let shift_right x s = Dwarf.sym_map (fun x -> Nat_big_num.shift_right x s) x +let modulus = Dwarf.sym_map2 Nat_big_num.modulus \ No newline at end of file From 1d1611ff45d7b1b3b76e788ee55a8bec8a5f25a5 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Mon, 27 Jan 2025 18:16:33 +0000 Subject: [PATCH 41/89] [wip] symbolic analyse --- src/analyse/ControlFlow.ml | 35 ++++++++++++++++++++++++----------- src/analyse/DwarfLineInfo.ml | 3 ++- src/analyse/Pp.ml | 7 +++---- src/utils/sym.ml | 24 ++++++++++++++++++++++-- 4 files changed, 51 insertions(+), 18 deletions(-) diff --git a/src/analyse/ControlFlow.ml b/src/analyse/ControlFlow.ml index 578279cf..2bcc4c4b 100644 --- a/src/analyse/ControlFlow.ml +++ b/src/analyse/ControlFlow.ml @@ -440,12 +440,21 @@ AArch64: let objdump_line_regexp = Str.regexp " *\\([0-9a-fA-F]+\\):[ \t]\\([0-9a-fA-F ]+\\)\t\\([^ \r\t\n]+\\) *\\(.*\\)$" +let section_start_line_regexp = + Str.regexp "Disassembly of section \\(.*\\):$" + type objdump_instruction = natural (*address*) * int list (*opcode bytes*) * string (*mnemonic*) * string (*args etc*) -let parse_objdump_line (s : string) : objdump_instruction option = +let parse_section_start s = + if Str.string_match section_start_line_regexp s 0 then + Some (Str.matched_group 1 s) + else + None + +let parse_objdump_line (s : string) : (int64 * int list * string * string) option = let parse_hex_int64 s' = try Scanf.sscanf s' "%Lx" (fun i64 -> i64) with _ -> fatal "cannot parse address in objdump line %s\n" s @@ -465,7 +474,6 @@ let parse_objdump_line (s : string) : objdump_instruction option = if Str.string_match objdump_line_regexp s 0 then begin let addr_int64 = parse_hex_int64 (Str.matched_group 1 s) in - let addr = Sym.of_int64 addr_int64 in let op = Str.matched_group 2 s in let op = strip_whitespace op in let opcode_byte_strings = @@ -477,7 +485,7 @@ let parse_objdump_line (s : string) : objdump_instruction option = let opcode_bytes = List.map parse_hex_int opcode_byte_strings in let mnemonic = Str.matched_group 3 s in let operands = Str.matched_group 4 s in - Some (addr, opcode_bytes, mnemonic, operands) + Some (addr_int64, opcode_bytes, mnemonic, operands) end else None @@ -486,30 +494,35 @@ let parse_objdump_lines arch lines : objdump_instruction list = List.filter_map (parse_objdump_line arch) (Array.to_list lines) *) -let rec parse_objdump_lines arch lines (next_index : int) (last_address : natural option) : +let with_symbolic_address (section: string) (addr, opcode_bytes, mnemonic, operands) : objdump_instruction = + (Dwarf.Offset (section, Nat_big_num.of_int64 addr), opcode_bytes, mnemonic, operands) + +let rec parse_objdump_lines arch lines (next_index : int) (last_address : int64 option) (section: string option) : objdump_instruction list = if next_index >= Array.length lines then [] else + let section = Option.fold ~none:section ~some:Option.some @@ parse_section_start lines.(next_index) in match parse_objdump_line lines.(next_index) with (* skip over unparseable lines *) - | None -> parse_objdump_lines arch lines (next_index + 1) last_address + | None -> parse_objdump_lines arch lines (next_index + 1) last_address section | Some ((addr, _opcode_bytes, _mnemonic, _operands) as i) -> ( + let mki = with_symbolic_address (Option.get section) in match last_address with - | None -> i :: parse_objdump_lines arch lines (next_index + 1) (Some addr) + | None -> mki i :: parse_objdump_lines arch lines (next_index + 1) (Some addr) section | Some last_address' -> - let last_address'' = Sym.add last_address' (Sym.of_int 4) in + let last_address'' = Int64.add last_address' (Int64.of_int 4) in if addr > last_address'' then (* fake up "missing" instructions for any gaps in the address space*) (*warn "gap in objdump instruction address sequence at %s" (pp_addr last_address'');*) - (last_address'', [], "missing", "") - :: parse_objdump_lines arch lines next_index (Some last_address'') - else i :: parse_objdump_lines arch lines (next_index + 1) (Some addr) + mki (last_address'', [], "missing", "") + :: parse_objdump_lines arch lines next_index (Some last_address'') section + else mki i :: parse_objdump_lines arch lines (next_index + 1) (Some addr) section ) let parse_objdump_file arch filename_objdump_d : objdump_instruction array = match read_file_lines filename_objdump_d with | Error s -> fatal "%s\ncouldn't read objdump-d file: \"%s\"\n" s filename_objdump_d - | Ok lines -> Array.of_list (parse_objdump_lines arch lines 0 None) + | Ok lines -> Array.of_list (parse_objdump_lines arch lines 0 None None) (*****************************************************************************) (** parse control-flow instruction asm from objdump and branch table data *) diff --git a/src/analyse/DwarfLineInfo.ml b/src/analyse/DwarfLineInfo.ml index b3561313..7cb6bf73 100644 --- a/src/analyse/DwarfLineInfo.ml +++ b/src/analyse/DwarfLineInfo.ml @@ -136,6 +136,7 @@ let split_into_sequences | None -> fatal "split_into_sequences found sequence of length 0" in let last = lnr.lnr_address in + (* print_endline (Dwarf.pphex_sym first ^ " " ^ Dwarf.pphex_sym last); *) if Sym.equal first last then fatal "split_into_sequences found first=last" else (); let elis = @@ -293,7 +294,7 @@ let mk_line_info (eli : Dwarf.evaluated_line_info) instructions : let (new_perhaps_relevant, remaining') = mk_new_perhaps_relevant [] remaining_entries in let addr_in elie = - Sym.less_equal elie.elie_first addr && Sym.less_equal addr elie.elie_last + Sym.in_range elie.elie_first elie.elie_last addr in let still_active_entries = diff --git a/src/analyse/Pp.ml b/src/analyse/Pp.ml index 9dbf2b7a..0bf41866 100644 --- a/src/analyse/Pp.ml +++ b/src/analyse/Pp.ml @@ -857,7 +857,6 @@ let pp_test_analysis m test an = call_graph ^ "* ************* transitive call graph **************\n" ^ transitive_call_graph | Html -> - "" - (* "\n* ************* instructions *****************\n" *) - (*pp_instruction_init (); - String.concat "" (Array.to_list (Array.mapi (pp_instruction m test an 0) an.instructions))*) + "\n* ************* instructions *****************\n" + ^ (pp_instruction_init (); + String.concat "" (Array.to_list (Array.mapi (pp_instruction m test an 0) an.instructions))) diff --git a/src/utils/sym.ml b/src/utils/sym.ml index 910c0b1c..839f5c96 100644 --- a/src/utils/sym.ml +++ b/src/utils/sym.ml @@ -1,12 +1,25 @@ type t = Z.t Dwarf.sym0 +let pp x = x |> Dwarf.pphex_sym |> Pp.string + let to_int x = Z.to_int @@ Dwarf.sym_unwrap x "to_int" let of_int x = Dwarf.Absolute (Z.of_int x) let of_int64 x = Dwarf.Absolute (Z.of_int64 x) let equal = Dwarf.sym_comp Nat_big_num.equal -let less = Dwarf.sym_comp Nat_big_num.less + +let max_addr = Z.(shift_left (of_int 1) 64 - (of_int 1)) + +let min_addr = Z.of_int 0 + +(* TODO very hacky *) +let less x y = match (x, y) with +| (Dwarf.Absolute x, Dwarf.Offset (_, y)) when Nat_big_num.less x y -> true +| (Dwarf.Absolute x, Dwarf.Offset (_,_)) when Nat_big_num.greater_equal x max_addr -> false +| (Dwarf.Offset (_,_), Dwarf.Absolute y) when Nat_big_num.less max_addr y -> true +| (Dwarf.Offset (_, x), Dwarf.Absolute y) when Nat_big_num.greater_equal x y -> false +| _ -> Dwarf.sym_comp Nat_big_num.less x y let less_equal = Dwarf.sym_comp Nat_big_num.less_equal let greater = Dwarf.sym_comp Nat_big_num.greater let greater_equal = Dwarf.sym_comp Nat_big_num.greater_equal @@ -16,6 +29,7 @@ let to_string = Dwarf.pp_sym Z.to_string let sub x y = match (x, y) with | (Dwarf.Offset (s, a), Dwarf.Offset (t, b)) when s = t -> Dwarf.Absolute (Nat_big_num.sub a b) +| (Dwarf.Offset (s, a), Dwarf.Absolute b) -> Dwarf.Offset (s, Nat_big_num.sub a b) | (Dwarf.Absolute a, Dwarf.Absolute b) -> Dwarf.Absolute (Nat_big_num.sub a b) | _ -> Dwarf.Unknown @@ -31,4 +45,10 @@ let pow_int_positive x y = Dwarf.Absolute (Nat_big_num.pow_int_positive x y) let shift_left x s = Dwarf.sym_map (fun x -> Nat_big_num.shift_left x s) x let shift_right x s = Dwarf.sym_map (fun x -> Nat_big_num.shift_right x s) x -let modulus = Dwarf.sym_map2 Nat_big_num.modulus \ No newline at end of file +let modulus = Dwarf.sym_map2 Nat_big_num.modulus + +let in_range first last x = match (first, last, x) with +| (Dwarf.Absolute f, Dwarf.Absolute l, Dwarf.Absolute x) -> Nat_big_num.less_equal f x && Nat_big_num.less_equal x l +| (Dwarf.Offset (s1, f), Dwarf.Offset (s2, l), Dwarf.Offset (s, x)) when s1 = s2 -> + s1 = s && Nat_big_num.less_equal f x && Nat_big_num.less_equal x l (* TODO kinda hacky *) +| _ -> Raise.fail "Can't determine if %t is in range [%t,%t]" (Pp.tos pp x) (Pp.tos pp first) (Pp.tos pp last) \ No newline at end of file From 04ee90ef6985ffc26f15d18f29611cd946146832 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Mon, 27 Jan 2025 23:53:21 +0000 Subject: [PATCH 42/89] Fix parsing objdump --- src/analyse/ControlFlow.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/analyse/ControlFlow.ml b/src/analyse/ControlFlow.ml index 2bcc4c4b..4c305098 100644 --- a/src/analyse/ControlFlow.ml +++ b/src/analyse/ControlFlow.ml @@ -291,7 +291,7 @@ let parse_addr (s : string) : natural = try Scanf.sscanf s "0x%Lx" (fun i64 -> Sym.of_int64 i64) with - Scanf.Scan_failure _ -> + (Scanf.Scan_failure _ | End_of_file) -> Scanf.sscanf s "%Lx" (fun i64 -> Sym.of_int64 i64) let parse_target s = From 5d010309d195e27241d7186e22a46c037e727012 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Thu, 30 Jan 2025 13:06:17 +0000 Subject: [PATCH 43/89] notes --- notes-TODO | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/notes-TODO b/notes-TODO index b1a58b67..95c382c0 100644 --- a/notes-TODO +++ b/notes-TODO @@ -6,4 +6,17 @@ Instruction fetch: is it sound? (rewriting .text) Z3 finding unique solution - Get model -> assert not model -> check now it is unsat -- Need to extend the protocol probably \ No newline at end of file +- Need to extend the protocol probably + +SIMREL +- state = (pc,M) (and registers TODO) +- (pc,M1) ~ (pc.M2) iff there is MT, such that + MT(A1,A2,sz) = T => M1[A1:A1+sz] =T M2[A2:A2+sz] + and respects dwarf at pc + and maybe some consistency of MT?? e.g. overlaping ranges +- relation (=T) defined using MT + - (=base_type) is equality + - (=struct) fieldwise + - (=*T): + A1 =*T A2 <=> MT(A1,A2,sz(T)) = T +- Hoare logic (or similar) with MT as variable (only read/write commands) From 65ebed5aca0933d92e983ca08b6fcab7e03d5db4 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Thu, 30 Jan 2025 13:10:22 +0000 Subject: [PATCH 44/89] set SCTLR_EL2 --- src/config/isla_aarch64.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/config/isla_aarch64.toml b/src/config/isla_aarch64.toml index 27d6e500..f762a682 100644 --- a/src/config/isla_aarch64.toml +++ b/src/config/isla_aarch64.toml @@ -141,6 +141,7 @@ ignore = [ # Bit 1 being unset allows unaligned accesses # Bit 26 being set allows cache-maintenance ops in EL0 "SCTLR_EL1" = "0x0000000004000000" +"SCTLR_EL2" = "0x0000000004000000" # A map from register names that may appear in litmus files to Sail # register names From d30207dc5a408771da53d1576d342e4cbe71cb21 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Thu, 30 Jan 2025 13:53:32 +0000 Subject: [PATCH 45/89] Make funcRD work --- src/bin/readDwarf.ml | 2 +- src/elf/address.ml | 2 ++ src/run/funcRD.ml | 38 +++++++++++++++++++++++--------------- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/bin/readDwarf.ml b/src/bin/readDwarf.ml index 6be5927f..53e15618 100644 --- a/src/bin/readDwarf.ml +++ b/src/bin/readDwarf.ml @@ -71,7 +71,7 @@ let commands = Run.Func.command; Run.Instr.command; Run.Block.command; - (* Run.FuncRD.command; *) + Run.FuncRD.command; CopySourcesCmd.command; Z3.Test.command; ] diff --git a/src/elf/address.ml b/src/elf/address.ml index 701ddd89..f332f31a 100644 --- a/src/elf/address.ml +++ b/src/elf/address.ml @@ -22,3 +22,5 @@ let (>) = compare (>) let (<=) = compare (<=) let (>=) = compare (>=) + +let to_sym {section; offset} = Dwarf.Offset (section, Z.of_int offset) \ No newline at end of file diff --git a/src/run/funcRD.ml b/src/run/funcRD.ml index 374b7e9d..f2d1fffb 100644 --- a/src/run/funcRD.ml +++ b/src/run/funcRD.ml @@ -48,14 +48,18 @@ instructions.*) open Cmdliner -(* open Config.CommonOpt *) -(* open Fun *) +open Config.CommonOpt +open Fun open Logs.Logger (struct let str = __MODULE__ end) -(* let run_func_rd elfname name objdump_d branchtables breakpoints = +let run_func_rd elfname name objdump_d branchtables breakpoints = + match Analyse.Utils.read_file_lines "src/analyse/html-preamble-insts.html" with + | Error _ -> () + | Ok lines -> Array.iter (function s -> Printf.printf "%s\n" s) lines + ; base "Running with rd %s in %s" name elfname; base "Loading %s" elfname; let dwarf = Dw.of_file elfname in @@ -70,13 +74,13 @@ end) let abi = Arch.get_abi api in Trace.Cache.start @@ Arch.get_isla_config (); base "Computing entry state"; - let start = Init.state () |> State.copy ~elf |> abi.init in + let start = Init.state () |> State.copy ~elf |> State.init_sections ~addr_size:Arch.address_size |> abi.init in base "Loading %s for Analyse" elfname; let analyse_test = Analyse.Elf.parse_elf_file elfname in base "Analysing %s for Analyse" elfname; let analyse_analysis = Analyse.Collected.mk_analysis analyse_test objdump_d branchtables in let print_analyse_instruction pc = - let pc = Z.of_int pc in + let pc = Elf.Address.to_sym pc in let index = analyse_analysis.index_of_address pc in let instr = analyse_analysis.instructions.(index) in Analyse.Pp.pp_instruction Analyse.Types.Html (*Ascii*) analyse_test analyse_analysis 0 index @@ -98,7 +102,7 @@ end) let tree = Block_lib.run ~every_instruction:true block start in base "Ended running, start pretty printing"; (* This table will contain the state diff to print at each pc with a message *) - let instr_data : (int, string * State.t * State.Reg.t list) Hashtbl.t = + let instr_data : (Elf.Address.t, string * State.t * State.Reg.t list) Hashtbl.t = Hashtbl.create 100 in let get_footprint pc = @@ -113,7 +117,7 @@ end) let last_pc = st.last_pc in let last_instr_f = get_footprint last_pc in let s = - if last_pc <> pc - 4 then Printf.sprintf "Coming from 0x%x: " last_pc else "" + if Elf.Address.(last_pc + 4 <> pc) then Printf.sprintf "Coming from %t: " Pp.(tos Elf.Address.pp last_pc) else "" in let regs = List.merge_uniq Stdlib.compare cur_instr_f last_instr_f in Hashtbl.add instr_data pc (Printf.sprintf "%sBefore branch" s, st, regs) @@ -122,27 +126,31 @@ end) let last_pc = st.last_pc in let last_instr_f = get_footprint last_pc in let s = - if last_pc <> pc - 4 then Printf.sprintf "Coming from 0x%x: " last_pc else "" + if Elf.Address.(last_pc + 4 <> pc) then Printf.sprintf "Coming from %t: " Pp.(tos Elf.Address.pp last_pc) else "" in let regs = List.merge_uniq Stdlib.compare cur_instr_f last_instr_f in Hashtbl.add instr_data pc (Printf.sprintf "%sNormal instruction" s, st, regs) | Block_lib.End s -> let last_pc = st.last_pc in let last_instr = Runner.expect_normal runner last_pc in - Hashtbl.add instr_data (st.last_pc + 4) + Hashtbl.add instr_data Elf.Address.(st.last_pc + 4) (Printf.sprintf "End because: %s" s, st, Trace.Instr.footprint last_instr)) tree; Vec.iter (fun funcaddr -> let sym = Elf.SymTable.of_addr elf.symbols funcaddr in Analyse.Pp.pp_instruction_init (); - Seq.iota_step_up ~start:funcaddr ~step:4 ~endi:(funcaddr + sym.size) - |> Seq.iter (fun pc -> + Seq.iota_step_up ~start:0 ~step:4 ~endi:sym.size + |> Seq.iter (fun pc_diff -> + let pc = Elf.Address.(funcaddr + pc_diff) in Hashtbl.find_all instr_data pc |> List.iter (fun (msg, st, regs) -> - base "At 0x%x, %s:\n%t" pc msg Pp.(topi (State.pp_partial ~regs) st)); + base "At %t, %s:\n%t" Pp.(top Elf.Address.pp pc) msg Pp.(topi (State.pp_partial ~regs) st)); print_string (print_analyse_instruction pc))) - runner.funcs *) + runner.funcs; + match Analyse.Utils.read_file_lines "src/analyse/html-postamble.html" with + | Error _ -> () + | Ok lines -> Array.iter (function s -> Printf.printf "%s\n" s) lines let elf = let doc = "ELF file from which to pull the code" in @@ -168,7 +176,7 @@ let breakpoints = in Arg.(value & opt_all string [] & info ["b"; "break"] ~docv:"POSITION" ~doc) -(* let term = +let term = Term.( CmdlinerHelper.func_options comopts run_func_rd $ elf $ func $ objdump_d $ branch_table $ breakpoints) @@ -181,4 +189,4 @@ let info = in Cmd.(info "run-func-rd" ~doc ~exits) -let command = (term, info) *) +let command = (term, info) From b0dc540d6795bc61c126918cc6ec204d97728430 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Fri, 31 Jan 2025 15:20:47 +0000 Subject: [PATCH 46/89] Fix init of large objects --- src/elf/file.ml | 1 + src/state/base.ml | 15 ++++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/elf/file.ml b/src/elf/file.ml index 1b2f39fc..d628ed8f 100644 --- a/src/elf/file.ml +++ b/src/elf/file.ml @@ -142,6 +142,7 @@ let of_file (filename : string) = (* `false' argument is for returning an empty byte-sequence if section is not found, instead of throwing an exception *) in + Printf.printf "%t" Pp.(top Sym.pp addr); Segment. { data; diff --git a/src/state/base.ml b/src/state/base.ml index b6918d03..96e35997 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -533,11 +533,16 @@ let init_sections ~addr_size state = Elf.SymTable.iter elf.symbols @@ fun sym -> if sym.typ = Elf.Symbol.OBJECT then let provenance = Mem.create_section_frag ~addr_size state.mem sym.addr.section in - let addr = Exp.of_address ~size:addr_size sym.addr in - let size = Ast.Size.of_bytes sym.size in - let (exp, asserts) = Relocation.exp_of_data sym.data in - Mem.write ~provenance state.mem ~addr ~size ~exp; - List.iter (push_relocation_assert state) asserts; + Seq.iota_step_up ~step:16 ~endi:sym.size + |> Seq.iter (fun off -> + let len = min 16 (sym.size - off) in + let data = Elf.Symbol.sub sym off len in + let addr = Exp.of_address ~size:addr_size Elf.Address.(sym.addr + off) in + let size = Ast.Size.of_bytes len in + let (exp, asserts) = Relocation.exp_of_data data in + Mem.write ~provenance state.mem ~addr ~size ~exp; + List.iter (push_relocation_assert state) asserts; + ) ) in state From bda07fb41125359631d1f18e07eb7be184c7cffc Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Fri, 31 Jan 2025 16:05:25 +0000 Subject: [PATCH 47/89] [wip] Run program --- src/bin/readDwarf.ml | 1 + src/run/relProg.ml | 109 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 src/run/relProg.ml diff --git a/src/bin/readDwarf.ml b/src/bin/readDwarf.ml index 53e15618..60a7749f 100644 --- a/src/bin/readDwarf.ml +++ b/src/bin/readDwarf.ml @@ -72,6 +72,7 @@ let commands = Run.Instr.command; Run.Block.command; Run.FuncRD.command; + Run.RelProg.command; CopySourcesCmd.command; Z3.Test.command; ] diff --git a/src/run/relProg.ml b/src/run/relProg.ml new file mode 100644 index 00000000..f2a3f202 --- /dev/null +++ b/src/run/relProg.ml @@ -0,0 +1,109 @@ +open Cmdliner +open Config.CommonOpt + +open Logs.Logger (struct + let str = __MODULE__ +end) + + +let run_prog elfname name objdump_d branchtables = + match Analyse.Utils.read_file_lines "src/analyse/html-preamble-insts.html" with + | Error _ -> () + | Ok lines -> Array.iter (function s -> Printf.printf "%s\n" s) lines + ; + base "Running with rd %s in %s" name elfname; + base "Loading %s" elfname; + let dwarf = Dw.of_file elfname in + let elf = dwarf.elf in + let func = + Dw.get_func_opt ~name dwarf + |> Option.value_fun ~default:(fun () -> fail "Function %s wasn't found in %s" name elfname) + in + let api = Dw.Func.get_api func in + base "API %t" (Pp.top Arch.pp_api api); + base "Loading ABI"; + let abi = Arch.get_abi api in + Trace.Cache.start @@ Arch.get_isla_config (); + base "Computing entry state"; + let start = Init.state () |> State.copy ~elf |> State.init_sections ~addr_size:Arch.address_size |> abi.init in + base "Loading %s for Analyse" elfname; + let analyse_test = Analyse.Elf.parse_elf_file elfname in + base "Analysing %s for Analyse" elfname; + let analyse_analysis = Analyse.Collected.mk_analysis analyse_test objdump_d branchtables in + let print_analyse_instruction pc = + let pc = Elf.Address.to_sym pc in + let index = analyse_analysis.index_of_address pc in + let instr = analyse_analysis.instructions.(index) in + Analyse.Pp.pp_instruction Analyse.Types.Html (*Ascii*) analyse_test analyse_analysis 0 index + instr + in + (* base "Entry state:\n%t" Pp.(topi State.pp start); *) + match func.sym with + | None -> fail "Function %s exists in DWARF data but do not have any code" name + | Some sym -> + let endpred = Block_lib.gen_endpred () in + let runner = Runner.of_dwarf dwarf in + let block = Block_lib.make ~runner ~start:sym.addr ~endpred in + base "Start running"; + let tree = Block_lib.run ~every_instruction:true block start in + base "Ended running, start pretty printing"; + (* This table will contain the state diff to print at each pc with a message *) + (* let instr_data : (Elf.Address.t, string * State.t * State.Reg.t list) Hashtbl.t = + Hashtbl.create 100 + in + let get_footprint pc = + Runner.get_normal_opt runner pc |> Option.fold ~none:[] ~some:Trace.Instr.footprint + in *) + State.Tree.iter + (fun a st -> + match a with + | Block_lib.Start -> () + | Block_lib.BranchAt pc -> + let last_pc = st.last_pc in + if Elf.Address.(last_pc + 4 <> pc) then + Printf.printf "\nJUMP from %t:\n " Pp.(top Elf.Address.pp last_pc); + print_string (print_analyse_instruction pc); + print_endline "BRANCH!"; + | Block_lib.NormalAt pc -> + let last_pc = st.last_pc in + if Elf.Address.(last_pc + 4 <> pc) then + Printf.printf "\nJUMP from %t:\n " Pp.(top Elf.Address.pp last_pc); + print_string (print_analyse_instruction pc); + | Block_lib.End _ -> ()) + tree; + match Analyse.Utils.read_file_lines "src/analyse/html-postamble.html" with + | Error _ -> () + | Ok lines -> Array.iter (function s -> Printf.printf "%s\n" s) lines + + +let elf = + let doc = "ELF file from which to pull the code" in + Arg.(required & pos 0 (some non_dir_file) None & info [] ~docv:"ELF_FILE" ~doc) + +let func = + let doc = "Symbol name of the function to run" in + Arg.(value & pos 1 string "main" & info [] ~docv:"FUNCTION" ~doc) + +let objdump_d = + let doc = "File containing result of objdump -d" in + Arg.(required & opt (some non_dir_file) None & info ["objdump-d"] ~docv:"OBJDUMP_FILE" ~doc) + +let branch_table = + let doc = "File containing branch table base addresses and sizes" in + Arg.( + (* required *) + value & opt (some non_dir_file) None & info ["branch-tables"] ~docv:"BRANCH_TABLES_FILE" ~doc) + +let term = + Term.( + CmdlinerHelper.func_options comopts run_prog + $ elf $ func $ objdump_d $ branch_table) + +let info = + let doc = + "Run main of relocatable file" + in + Cmd.(info "run-rel-prog" ~doc ~exits) + +let command = (term, info) + \ No newline at end of file From 8b30d350b9d940d568eaf518549f11ab45278cc1 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sat, 1 Feb 2025 19:38:24 +0000 Subject: [PATCH 48/89] Run and print debug info --- src/dw/loc.ml | 8 +++++++ src/run/relProg.ml | 59 +++++++++++++++++++++++++++++++++++++++++----- src/utils/sym.ml | 4 +++- 3 files changed, 64 insertions(+), 7 deletions(-) diff --git a/src/dw/loc.ml b/src/dw/loc.ml index 5d03061a..3ead2a51 100644 --- a/src/dw/loc.ml +++ b/src/dw/loc.ml @@ -72,6 +72,7 @@ type t = | RegisterOffset of State.Reg.t * int (** At register + offset address *) | StackFrame of int (** On the stackFrame with offset *) | Global of Elf.SymTable.sym_offset (** Global variable with an offset *) + | Const of Z.t | Dwarf of dwop list (** Uninterpreted dwarf location *) (** The type of a location in linksem format *) @@ -128,6 +129,9 @@ let of_linksem ?(amap = Arch.dwarf_reg_map ()) (elf : Elf.File.t) : linksem_t -> Dwarf ops ) (* Other *) + | [{ op_semantics = OpSem_lit; op_argument_values = [arg]; _ }; { op_semantics = OpSem_stack_value; _ }] -> + let value = Sym.to_z @@ sym_of_oav arg in + Const value | ops -> Dwarf ops (** Convert the location to a string. This is not reversible *) @@ -136,6 +140,7 @@ let to_string = function | RegisterOffset (reg, off) -> Printf.sprintf "[%s+%x]" (State.Reg.to_string reg) off | StackFrame off -> Printf.sprintf "[frame+%x]" off | Global symoff -> Elf.SymTable.string_of_sym_offset symoff + | Const x -> Z.to_string x | Dwarf ops -> Dwarf.pp_operations ops (** Compare two location. Loc.t is not compatible with polymorphic compare *) @@ -154,6 +159,9 @@ let compare l1 l2 = Pair.compare ~fst:Elf.Symbol.compare (sym1, off1) (sym2, off2) | (Global (_, _), _) -> -1 | (_, Global (_, _)) -> 1 + | (Const x, Const y) -> Z.compare x y + | (Const _, _) -> -1 + | (_, Const _) -> 1 | (Dwarf ops1, Dwarf ops2) -> compare ops1 ops2 (** Pretty-print the location *) diff --git a/src/run/relProg.ml b/src/run/relProg.ml index f2a3f202..62c3f8da 100644 --- a/src/run/relProg.ml +++ b/src/run/relProg.ml @@ -5,6 +5,51 @@ open Logs.Logger (struct let str = __MODULE__ end) +let pp_eval_loc sz st (loc: Dw.Loc.t) : PPrint.document = + let value = match loc with + | Register reg -> Some (State.get_reg_exp st reg) + | RegisterOffset (reg, off) -> + let r = State.get_reg_exp st reg in + Some (State.read_noprov st ~addr:Exp.Typed.(r + bits_int ~size:Arch.address_size off) ~size:(Ast.Size.of_bytes sz)) + | StackFrame _off -> + None + | Global symoff -> + let addr = Elf.SymTable.to_addr_offset symoff in + let addr = State.Exp.of_address ~size:Arch.address_size addr in + Some (State.read_noprov st ~addr ~size:(Ast.Size.of_bytes sz)) + | Const x -> Some(x |> BitVec.of_z ~size:(8*sz) |> Exp.Typed.bits) + | Dwarf _ops -> None in + Pp.optional State.Exp.pp value + +let printvars ~st ~(dwarf: Dw.t) pc = + let st = State.copy_if_locked st in + let pv vars = + Seq.iter (fun (v: Dw.Var.t) -> + let sz = Ctype.sizeof v.ctype in + match List.find_map (fun ((lo,hi), loc) -> Option.( + let open Elf.Address in + let* hi = hi in + let* over = lo <= pc in + let* under = pc < hi in + if over && under then + Some loc + else + None + )) v.locs with + | None -> () + | Some loc -> Printf.printf "%s = %t\n" v.name Pp.(top (pp_eval_loc sz st) loc); + ) + vars + in + pv (Hashtbl.to_seq_values dwarf.vars); + Hashtbl.iter (fun _ (fn:Dw.Func.t) -> + let rec pscope (scope:Dw.Func.scope) = + pv (List.to_seq scope.vars); + List.iter pscope scope.scopes + in + pscope fn.func.scope + ) dwarf.funcs + let run_prog elfname name objdump_d branchtables = match Analyse.Utils.read_file_lines "src/analyse/html-preamble-insts.html" with @@ -56,20 +101,22 @@ let run_prog elfname name objdump_d branchtables = in *) State.Tree.iter (fun a st -> - match a with + let last_pc = st.last_pc in + (match a with | Block_lib.Start -> () | Block_lib.BranchAt pc -> - let last_pc = st.last_pc in if Elf.Address.(last_pc + 4 <> pc) then - Printf.printf "\nJUMP from %t:\n " Pp.(top Elf.Address.pp last_pc); + Printf.printf "\nJUMP from %t:\n\n" Pp.(top Elf.Address.pp last_pc); + printvars ~st ~dwarf pc; print_string (print_analyse_instruction pc); print_endline "BRANCH!"; | Block_lib.NormalAt pc -> - let last_pc = st.last_pc in if Elf.Address.(last_pc + 4 <> pc) then - Printf.printf "\nJUMP from %t:\n " Pp.(top Elf.Address.pp last_pc); + Printf.printf "\nJUMP from %t:\n\n" Pp.(top Elf.Address.pp last_pc); + printvars ~st ~dwarf pc; print_string (print_analyse_instruction pc); - | Block_lib.End _ -> ()) + | Block_lib.End _ -> ()); + ) tree; match Analyse.Utils.read_file_lines "src/analyse/html-postamble.html" with | Error _ -> () diff --git a/src/utils/sym.ml b/src/utils/sym.ml index 839f5c96..1510a8b3 100644 --- a/src/utils/sym.ml +++ b/src/utils/sym.ml @@ -2,7 +2,9 @@ type t = Z.t Dwarf.sym0 let pp x = x |> Dwarf.pphex_sym |> Pp.string -let to_int x = Z.to_int @@ Dwarf.sym_unwrap x "to_int" +let to_z x = Dwarf.sym_unwrap x "to_z" + +let to_int x = Z.to_int @@ to_z x let of_int x = Dwarf.Absolute (Z.of_int x) let of_int64 x = Dwarf.Absolute (Z.of_int64 x) From 7c5fd5f45174f97ebf2092bd3d25389216248828 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sat, 1 Feb 2025 20:32:54 +0000 Subject: [PATCH 49/89] Fix ldst relocations --- src/elf/relocations.ml | 2 +- src/isla/cache.ml | 11 +++++------ src/isla/relocation.ml | 8 ++++---- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/elf/relocations.ml b/src/elf/relocations.ml index 1a60a8bd..66cdc831 100644 --- a/src/elf/relocations.ml +++ b/src/elf/relocations.ml @@ -81,7 +81,7 @@ let pp_target = Pp.(function | AArch64 Abi_aarch64_symbolic_relocation.ADD -> !^"ADD" | AArch64 Abi_aarch64_symbolic_relocation.ADRP -> !^"ADRP" | AArch64 Abi_aarch64_symbolic_relocation.CALL -> !^"CALL" -| AArch64 Abi_aarch64_symbolic_relocation.LDST -> !^"LDST") +| AArch64 Abi_aarch64_symbolic_relocation.LDST b -> !^"LDST" ^^ int (1 lsl b)) let pp_rel rel = let hi, lo = rel.mask in diff --git a/src/isla/cache.ml b/src/isla/cache.ml index e5c95d72..17a71ce6 100644 --- a/src/isla/cache.ml +++ b/src/isla/cache.ml @@ -85,8 +85,8 @@ module Opcode (*: Cache.Key *) = struct | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data320) -> 2 | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADRP) -> 3 | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADD) -> 4 - | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.LDST) -> 5 - | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CALL) -> 6 + | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CALL) -> 5 + | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.LDST b) -> 6 + b let reloc_of_id: int -> Relocation.t option = function | 0 -> None @@ -94,9 +94,8 @@ module Opcode (*: Cache.Key *) = struct | 2 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data320) | 3 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADRP) | 4 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADD) - | 5 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.LDST) - | 6 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CALL) - | _ -> Raise.fail "invalid reloc id" + | 5 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CALL) + | x -> Some (Elf.Relocations.AArch64 (Abi_aarch64_symbolic_relocation.LDST (x-6))) let equal a b = match (a, b) with @@ -105,7 +104,7 @@ module Opcode (*: Cache.Key *) = struct | _ -> false let small_enough bs rel_id = - BytesSeq.length bs < BytesSeq.int_bytes && rel_id < 8 + BytesSeq.length bs < (BytesSeq.int_bytes-1) && rel_id < (8*256) let hash = function | None -> 0 diff --git a/src/isla/relocation.ml b/src/isla/relocation.ml index 3e3004ad..d8e17a5a 100644 --- a/src/isla/relocation.ml +++ b/src/isla/relocation.ml @@ -25,9 +25,9 @@ let pp_opcode_with_segments (b, r) = BitVec.pp_smt (BitVec.extract 22 31 bits) ^^ !^" x0:12 " ^^ BitVec.pp_smt (BitVec.extract 0 9 bits) - | Abi_aarch64_symbolic_relocation.LDST -> (* TODO different width loads, alignment *) - BitVec.pp_smt (BitVec.extract 20 31 bits) - ^^ !^" x0:10 " ^^ + | Abi_aarch64_symbolic_relocation.LDST b -> (* TODO different width loads, alignment *) + BitVec.pp_smt (BitVec.extract (22-b) 31 bits) + ^^ !^" x0:" ^^ int (12-b) ^^ !^" " ^^ BitVec.pp_smt (BitVec.extract 0 9 bits) | Abi_aarch64_symbolic_relocation.CALL -> BitVec.pp_smt (BitVec.extract 26 31 bits) @@ -40,5 +40,5 @@ let segments_of_reloc: t -> segment list = function | Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data320 -> fatal "invalid relocation for instructions (Data32)" | Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADRP -> ["x0", (0, 1); "x1", (2, 20)] (* or absolute? ["x0", (12, 13); "x1", (14, 32)] *) | Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADD -> ["x0", (0, 11)] -| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.LDST -> ["x0", (0, 9)] (* TODO depends on load size *) (* or absolute? ["x0", (2, 11)] *) +| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.LDST b -> ["x0", (0, 11-b)] (* TODO depends on load size *) (* or absolute? ["x0", (2, 11)] *) | Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CALL -> ["x0", (0, 25)] (* or absolute? ["x0", (2, 27)] *) \ No newline at end of file From 84798b3f458e6f8e72f045a7db84dce05e8584f6 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Wed, 5 Feb 2025 16:07:48 +0000 Subject: [PATCH 50/89] Fix rngmap croping --- src/utils/rngMap.ml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/utils/rngMap.ml b/src/utils/rngMap.ml index 1e81ad21..42f872cb 100644 --- a/src/utils/rngMap.ml +++ b/src/utils/rngMap.ml @@ -303,14 +303,20 @@ module Make (Obj : LenObject) : S with type obj = Obj.t = struct let clear_crop t ~pos ~len ~crop = assert (len >= 0); (* Crop an possible object starting before the start but ending after the start. *) + let endp = pos + len in let t = match prev t (pos - 1) with | Some (addr, obj) when addr + Obj.len obj > pos -> + let objend = addr + Obj.len obj in + let t = if endp < objend then + IMap.add (pos + len) (crop ~pos:(endp - addr) ~len:(objend - endp) obj) t + else + t + in IMap.update addr (Option.map (crop ~pos:0 ~len:(pos - addr))) t | _ -> t in let seq = IMap.to_seq_from pos t in - let endp = pos + len in (* Remove all objects of the sequence from t until endp *) let rec remove_until t seq endp = match seq () with From 688d53968d96d30ea805fd7572ab40b8ca2b5eab Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Wed, 5 Feb 2025 16:22:25 +0000 Subject: [PATCH 51/89] Fix symbolic bytes sub --- src/state/symbolicBytes.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/state/symbolicBytes.ml b/src/state/symbolicBytes.ml index e9bf6836..2693b0b3 100644 --- a/src/state/symbolicBytes.ml +++ b/src/state/symbolicBytes.ml @@ -148,7 +148,7 @@ module Make (Var : Exp.Var) : S with type var = Var.t = struct || *) let next = pos + off_len in - let* rest = sub_list ~pos:next ~len:(len - next) sb in + let* rest = sub_list ~pos:next ~len:(len - off_len) sb in let nexp = Typed.extract ~last:((8 * elen) - 1) ~first:(8 * off) e in Some (nexp :: rest) else From a9b1c224e6c1224be6bacc4e673d7677a1bbf805 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Wed, 5 Feb 2025 16:28:18 +0000 Subject: [PATCH 52/89] Eval debug variable expressions --- src/run/relProg.ml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/run/relProg.ml b/src/run/relProg.ml index 62c3f8da..e0923831 100644 --- a/src/run/relProg.ml +++ b/src/run/relProg.ml @@ -19,7 +19,11 @@ let pp_eval_loc sz st (loc: Dw.Loc.t) : PPrint.document = Some (State.read_noprov st ~addr ~size:(Ast.Size.of_bytes sz)) | Const x -> Some(x |> BitVec.of_z ~size:(8*sz) |> Exp.Typed.bits) | Dwarf _ops -> None in - Pp.optional State.Exp.pp value + Pp.optional (fun value -> + match Exp.ConcreteEval.eval_if_concrete value with + | Some(value) -> Exp.Value.pp value + | None -> State.Exp.pp value + ) value let printvars ~st ~(dwarf: Dw.t) pc = let st = State.copy_if_locked st in From 02cf6019379bb7ca1cf3394c875610488ce86ad5 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Wed, 5 Feb 2025 17:32:13 +0000 Subject: [PATCH 53/89] Nicer print (needs testing) --- src/run/relProg.ml | 44 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/src/run/relProg.ml b/src/run/relProg.ml index e0923831..16a277b1 100644 --- a/src/run/relProg.ml +++ b/src/run/relProg.ml @@ -5,7 +5,42 @@ open Logs.Logger (struct let str = __MODULE__ end) -let pp_eval_loc sz st (loc: Dw.Loc.t) : PPrint.document = +let rec pp_array pp sz dims value = + match dims with + | [] -> pp value + | None::_ -> pp value + | Some d :: dims -> + let sz = sz / d in + Seq.iota d + |> Seq.map (fun x -> Exp.Typed.extract ~first:(8*x*sz) ~last:(8*(x+1)*sz-1) value) + |> List.of_seq + |> Pp.list (pp_array pp sz dims) + +let pp_typed ~(tenv: Ctype.env) ~(ctype: Ctype.t) ~pp (value: State.Exp.t) = + match ctype.unqualified with + | Machine _ -> pp value + | Cint _ -> pp value + | Cbool -> pp value + | Ptr _ -> pp value + | Struct { id; _ } -> + let s = IdMap.geti tenv.structs id in + Pp.( + Ctype.FieldMap.to_seq s.layout + |> Seq.map (fun (offset, (field:Ctype.field)) -> ( + opt string field.fname, + pp (Exp.Typed.extract ~first:(8*offset) ~last:(8*(offset + field.size)-1) value) + )) + |> List.of_seq + |> mapping s.name + ) + | Array { dims; _ } -> + let sz = Ctype.sizeof ctype in + pp_array pp sz dims value + | Enum _ -> pp value + | FuncPtr -> pp value + | Missing -> pp value + +let pp_eval_loc sz st ~(tenv: Ctype.env) ~(ctype: Ctype.t) (loc: Dw.Loc.t) : PPrint.document = let value = match loc with | Register reg -> Some (State.get_reg_exp st reg) | RegisterOffset (reg, off) -> @@ -19,11 +54,12 @@ let pp_eval_loc sz st (loc: Dw.Loc.t) : PPrint.document = Some (State.read_noprov st ~addr ~size:(Ast.Size.of_bytes sz)) | Const x -> Some(x |> BitVec.of_z ~size:(8*sz) |> Exp.Typed.bits) | Dwarf _ops -> None in - Pp.optional (fun value -> + let pp = fun value -> match Exp.ConcreteEval.eval_if_concrete value with | Some(value) -> Exp.Value.pp value | None -> State.Exp.pp value - ) value + in + Pp.optional (pp_typed ~tenv ~ctype ~pp) value let printvars ~st ~(dwarf: Dw.t) pc = let st = State.copy_if_locked st in @@ -41,7 +77,7 @@ let printvars ~st ~(dwarf: Dw.t) pc = None )) v.locs with | None -> () - | Some loc -> Printf.printf "%s = %t\n" v.name Pp.(top (pp_eval_loc sz st) loc); + | Some loc -> Printf.printf "%s = %t\n" v.name Pp.(top (pp_eval_loc sz st ~ctype:v.ctype ~tenv:dwarf.tenv) loc); ) vars in From 81d0deca0bf32fd0aff2d8ee3f6c839701aa637a Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sat, 15 Feb 2025 16:49:48 +0000 Subject: [PATCH 54/89] nicer print --- src/run/relProg.ml | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/src/run/relProg.ml b/src/run/relProg.ml index 16a277b1..6b140beb 100644 --- a/src/run/relProg.ml +++ b/src/run/relProg.ml @@ -40,18 +40,28 @@ let pp_typed ~(tenv: Ctype.env) ~(ctype: Ctype.t) ~pp (value: State.Exp.t) = | FuncPtr -> pp value | Missing -> pp value +let read_big st addr sz = + Seq.iota_step_up ~step:16 ~endi:sz + |> Seq.map (fun off -> + let addr = Exp.Typed.(addr + bits_int ~size:Arch.address_size off) in + let len = min 16 (sz - off) in + State.read_noprov st ~addr ~size:(Ast.Size.of_bytes len) + ) + |> List.of_seq + |> Exp.Typed.concat + let pp_eval_loc sz st ~(tenv: Ctype.env) ~(ctype: Ctype.t) (loc: Dw.Loc.t) : PPrint.document = let value = match loc with | Register reg -> Some (State.get_reg_exp st reg) | RegisterOffset (reg, off) -> let r = State.get_reg_exp st reg in - Some (State.read_noprov st ~addr:Exp.Typed.(r + bits_int ~size:Arch.address_size off) ~size:(Ast.Size.of_bytes sz)) + Some (read_big st Exp.Typed.(r + bits_int ~size:Arch.address_size off) sz) | StackFrame _off -> None | Global symoff -> let addr = Elf.SymTable.to_addr_offset symoff in let addr = State.Exp.of_address ~size:Arch.address_size addr in - Some (State.read_noprov st ~addr ~size:(Ast.Size.of_bytes sz)) + Some (read_big st addr sz) | Const x -> Some(x |> BitVec.of_z ~size:(8*sz) |> Exp.Typed.bits) | Dwarf _ops -> None in let pp = fun value -> @@ -62,6 +72,8 @@ let pp_eval_loc sz st ~(tenv: Ctype.env) ~(ctype: Ctype.t) (loc: Dw.Loc.t) : PPr Pp.optional (pp_typed ~tenv ~ctype ~pp) value let printvars ~st ~(dwarf: Dw.t) pc = + let out = ref "" in + let st = State.copy_if_locked st in let pv vars = Seq.iter (fun (v: Dw.Var.t) -> @@ -77,7 +89,7 @@ let printvars ~st ~(dwarf: Dw.t) pc = None )) v.locs with | None -> () - | Some loc -> Printf.printf "%s = %t\n" v.name Pp.(top (pp_eval_loc sz st ~ctype:v.ctype ~tenv:dwarf.tenv) loc); + | Some loc -> out := !out ^ Printf.sprintf "%s = %t\n" v.name Pp.(tos (pp_eval_loc sz st ~ctype:v.ctype ~tenv:dwarf.tenv) loc); ) vars in @@ -88,7 +100,8 @@ let printvars ~st ~(dwarf: Dw.t) pc = List.iter pscope scope.scopes in pscope fn.func.scope - ) dwarf.funcs + ) dwarf.funcs; + !out let run_prog elfname name objdump_d branchtables = @@ -147,13 +160,13 @@ let run_prog elfname name objdump_d branchtables = | Block_lib.BranchAt pc -> if Elf.Address.(last_pc + 4 <> pc) then Printf.printf "\nJUMP from %t:\n\n" Pp.(top Elf.Address.pp last_pc); - printvars ~st ~dwarf pc; + print_string @@ Analyse.Pp.css Analyse.Types.Html Render_vars @@ printvars ~st ~dwarf pc; print_string (print_analyse_instruction pc); print_endline "BRANCH!"; | Block_lib.NormalAt pc -> if Elf.Address.(last_pc + 4 <> pc) then Printf.printf "\nJUMP from %t:\n\n" Pp.(top Elf.Address.pp last_pc); - printvars ~st ~dwarf pc; + print_string @@ Analyse.Pp.css Analyse.Types.Html Render_vars @@ printvars ~st ~dwarf pc; print_string (print_analyse_instruction pc); | Block_lib.End _ -> ()); ) From 83f1d45c040870e7bc0f59a63dbe4b8fa6d291ef Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sat, 15 Feb 2025 23:45:16 +0000 Subject: [PATCH 55/89] Take state init function as arg in Run.Func --- src/run/func.ml | 10 +++++----- src/run/func.mli | 1 + src/state/base.ml | 11 +++++++++++ src/state/base.mli | 3 +++ 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/run/func.ml b/src/run/func.ml index 91167c8b..44ab9e46 100644 --- a/src/run/func.ml +++ b/src/run/func.ml @@ -52,7 +52,7 @@ open Logs.Logger (struct let str = __MODULE__ end) -let no_run_prep ~elf:elfname ~name ~entry = +let no_run_prep ~elf:elfname ~name ~entry ?(init = State.init_sections_symbolic) () = base "Running %s in %s" name elfname; let dwarf = Dw.of_file elfname in let elf = dwarf.elf in @@ -66,13 +66,13 @@ let no_run_prep ~elf:elfname ~name ~entry = let abi = Arch.get_abi api in Trace.Cache.start @@ Arch.get_isla_config (); base "Computing entry state"; - let start = Init.state () |> State.copy ~elf |> State.init_sections ~addr_size:Arch.address_size |> abi.init in + let start = Init.state () |> State.copy ~elf |> init |> abi.init in if entry then base "Entry state:\n%t" (Pp.topi State.pp start); (dwarf, elf, func, start) let get_state_tree ~elf:elfname ~name ?(dump = false) ?(entry = false) ?len ?(breakpoints = []) - ?loop ?tree_to_file () = - let (dwarf, elf, func, start) = no_run_prep ~elf:elfname ~name ~entry in + ?loop ?tree_to_file ?init () = + let (dwarf, elf, func, start) = no_run_prep ~elf:elfname ~name ~entry ?init () in match func.sym with | None -> fail "Function %s exists in DWARF data but does not have any code" name | Some sym -> @@ -104,7 +104,7 @@ let get_state_tree ~elf:elfname ~name ?(dump = false) ?(entry = false) ?len ?(br tree let cmd_func elfname name dump no_run entry len breakpoints loop tree_to_file = - if no_run then ignore @@ no_run_prep ~elf:elfname ~name ~entry + if no_run then ignore @@ no_run_prep ~elf:elfname ~name ~entry () else ignore @@ get_state_tree ~elf:elfname ~name ~dump ~entry ?len ~breakpoints ?loop ?tree_to_file () diff --git a/src/run/func.mli b/src/run/func.mli index 7f72d3f5..548f1f07 100644 --- a/src/run/func.mli +++ b/src/run/func.mli @@ -7,6 +7,7 @@ val get_state_tree : ?breakpoints:string list -> ?loop:int -> ?tree_to_file:string -> + ?init:(State.t -> State.t) -> unit -> Block_lib.label State.Tree.t diff --git a/src/state/base.ml b/src/state/base.ml index 96e35997..b214b4c3 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -546,6 +546,17 @@ let init_sections ~addr_size state = ) in state +let init_sections_symbolic state = + let state = copy_if_locked state in + let _ = Option.( + let+ elf = state.elf in + Elf.SymTable.iter elf.symbols @@ fun sym -> + if sym.typ = Elf.Symbol.OBJECT then + Hashtbl.replace state.mem.sections sym.addr.section Main + ) in + state + + let map_mut_exp (f : exp -> exp) s : unit = assert (not @@ is_locked s); Reg.Map.map_mut_current (Tval.map_exp f) s.regs; diff --git a/src/state/base.mli b/src/state/base.mli index 665b61a9..73f123db 100644 --- a/src/state/base.mli +++ b/src/state/base.mli @@ -404,6 +404,9 @@ val copy_if_locked : ?elf:Elf.File.t -> t -> t val init_sections : addr_size:int -> t -> t +(** Assigns all sections with global objects to Main fragment *) +val init_sections_symbolic : t -> t + (** {1 State convenience manipulation } *) From 7ad05907d0166c88f136b9700493e05e70378d0d Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sat, 15 Feb 2025 23:49:57 +0000 Subject: [PATCH 56/89] Make Sums.split sound We can guarantee that (a+b)[first:last] = a[first:last]+b[first:last] only when first=0 --- src/exp/sums.ml | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/src/exp/sums.ml b/src/exp/sums.ml index 6d843f5d..287f4936 100644 --- a/src/exp/sums.ml +++ b/src/exp/sums.ml @@ -52,7 +52,7 @@ let rec split = let open Ast in function | Manyop (Bvmanyarith Bvadd, l, _) -> List.concat_map split l - | Unop (Extract (last, first), e, _) -> + | Unop (Extract (last, (0 as first)), e, _) -> let l = split e in List.map (Typed.extract ~first ~last) l | Unop (Bvneg, e, _) -> @@ -63,20 +63,6 @@ let rec split = let l' = split e' in let rl' = List.rev_map Typed.neg l' in List.rev_append rl' l - | Manyop (Concat, l, _) -> - let all_splits = List.map split l in - let defaults = List.map (fun e -> - let size = e |> Typed.get_type |> Typed.expect_bv in - Typed.bits_int ~size 0 - ) l in - let terms = List.transpose ~defaults all_splits in - List.map Typed.concat terms - | Unop (ZeroExtend m, e, _) -> - let l = split e in - List.map (Typed.unop (ZeroExtend m)) l - | Unop (SignExtend s, e, _) -> - let l = split e in - List.map (Typed.unop (SignExtend s)) l | e -> [e] let merge ~size l = if l = [] then Typed.zero ~size else Typed.sum l From 94304463890893d7011d84053e2af613a45f17e1 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Tue, 25 Feb 2025 10:27:00 +0000 Subject: [PATCH 57/89] Context simplifier for relocation TODO: move all simplification here instead of read_noprov --- src/relsim/base.ml | 115 +++++++++++++++++++++++++++++++++++++++++++ src/state/base.ml | 1 + src/trace/context.ml | 19 +++++-- src/trace/run.ml | 13 +++-- src/z3/z3.ml | 4 -- 5 files changed, 138 insertions(+), 14 deletions(-) create mode 100644 src/relsim/base.ml diff --git a/src/relsim/base.ml b/src/relsim/base.ml new file mode 100644 index 00000000..43df0a71 --- /dev/null +++ b/src/relsim/base.ml @@ -0,0 +1,115 @@ +(* open Logs.Logger (struct + let str = __MODULE__ +end) + +module Sums = Exp.Sums +module Typed = Exp.Typed + +module Var = struct + (** The type of variables *) + type t = Left of State.var | Right of State.var + + let equal a b = match (a,b) with + | Left a, Left b -> State.Var.equal a b + | Right a, Right b -> State.Var.equal a b + | _ -> false + + let pp = function + | Left v -> Pp.(!^"L:" ^^ State.Var.pp v) + | Right v -> Pp.(!^"R:" ^^ State.Var.pp v) + + (** Get the type of the variable *) + let ty = function Left v | Right v -> State.Var.ty v + + let hash = Hashtbl.hash + + let of_string = State.Var.of_string (*TODO*) +end + +module Exp = struct + include Exp.Make (Var) + + let left : State.Exp.t -> t = Ast.Manip.exp_map_var (fun v -> Var.Left v) + + let right : State.Exp.t -> t = Ast.Manip.exp_map_var (fun v -> Var.Right v) +end + +type sem_type = + | Value of int + | Ptr of sem_type + +type mem_rel = (State.Exp.t * State.Exp.t * sem_type) list + +let rec sem_type_of_ctype Ctype.{unqualified; _} = + match unqualified with + | Machine b -> Value b + | Cint { size; _ } -> Value size + | Cbool -> Value 1 + | Ptr { fragment=Ctype.DynArray t; _ } -> Ptr (sem_type_of_ctype t) + | _ -> Raise.todo() + +let mem_rel_of_dwarf (dw: (Dw.Var.t * Dw.Var.t) list) : mem_rel = + List.map (fun ((v1: Dw.Var.t), (v2: Dw.Var.t)) -> + let addr1 = match v1.locs with + | [_, Global a] -> + a |> Elf.SymTable.to_addr_offset + |> State.Exp.of_address ~size:Arch.address_size + (* |> Ast.Manip.exp_map_var (fun x -> Var.Left x) *) + | _ -> Raise.todo() + in + let addr2 = match v2.locs with + | [_, Global a] -> + a |> Elf.SymTable.to_addr_offset + |> State.Exp.of_address ~size:Arch.address_size + (* |> Ast.Manip.exp_map_var (fun x -> Var.Right x) *) + | _ -> Raise.todo() + in + let stp = sem_type_of_ctype v1.ctype in + (addr1, addr2, stp) + ) dw + +type rel = mem_rel * Exp.t list + +type event = State.Mem.Fragment.Event.t +type block = State.Mem.Fragment.Block.t + +let type_at (mem_rel:mem_rel) (block1:block) (block2:block) = + List.find_map (fun (e1,e2,t) -> + let (sym1, off1) = Sums.split_concrete e1 in + let (sym2, off2) = Sums.split_concrete e2 in + if (BitVec.to_int off1 == block1.offset && BitVec.to_int off2 == block2.offset + && Option.equal State.Exp.equal sym1 block1.base + && Option.equal State.Exp.equal sym2 block2.base) + then + Some t + else + None + ) mem_rel + +module Z3sim = Z3.Make (Var) + +let update_rel ((mem, asserts):rel) (e1: event) (e2: event) : rel = + (* TODO check sizes *) + match e1, e2 with + | (Read (block1, v1), Read (block2, v2)) -> ( + match type_at mem block1 block2 with + | Some (Value _) -> mem, Typed.(Exp.of_var (Left v1) = Exp.of_var (Right v2))::asserts + | Some (Ptr t) -> (State.Exp.of_var v1, State.Exp.of_var v2, t)::mem, asserts + | None -> mem, asserts + ) + | (Write (block1, e1), Write (block2, e2)) -> ( + (match type_at mem block1 block2 with + | Some (Value _) -> Z3 + | Some (Ptr t) -> Raise.todo() (* Check (e1, e2, t) in mem *) + | None -> Raise.fail "simrel failed"); + mem, asserts + ) + | _ -> Raise.fail "simrel failed" + + +let verify (st1:State.t) (st2:State.t) (dw:(Dw.Var.t * Dw.Var.t) list) = + let mem_rel = mem_rel_of_dwarf dw in + Raise.todo() + + + *) diff --git a/src/state/base.ml b/src/state/base.ml index b214b4c3..eebb0ace 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -645,6 +645,7 @@ let eval_address (s : t) (addr: Exp.t) : Elf.Address.t option = let read_noprov ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t = + debug "Addr: %t" Pp.(top Exp.pp addr); let elf_addr = eval_address s addr in debug "Address: %t" Pp.(top (optional Elf.Address.pp) elf_addr); match elf_addr with diff --git a/src/trace/context.ml b/src/trace/context.ml index 66a045ee..f233abe9 100644 --- a/src/trace/context.ml +++ b/src/trace/context.ml @@ -60,6 +60,7 @@ type t = { mem_reads : State.tval HashVector.t; (** Stores the result of memory reads *) state : State.t; segments : State.exp SMap.t; + asserts: State.exp list; dwarf : Dw.t option; (** Optionally DWARF information. If present, typing is enabled *) } @@ -68,19 +69,20 @@ let make_context ?dwarf ?relocation state = let reg_writes = Vec.empty () in let mem_reads = HashVector.empty () in - let segments = relocation + let segments, asserts = relocation |> Option.map (fun relocation -> let State.Relocation.{value;asserts;target} = State.Relocation.of_elf relocation in List.iter (State.push_relocation_assert state) asserts; - target + (target |> Isla.Relocation.segments_of_reloc |> SMap.of_list - |> SMap.map (fun (first, last) -> Exp.Typed.extract ~first ~last value) + |> SMap.map (fun (first, last) -> Exp.Typed.extract ~first ~last value), + asserts) ) - |> Option.value ~default:SMap.empty + |> Option.value ~default:(SMap.empty, []) in - { state; reg_writes; mem_reads; dwarf; segments } + { state; reg_writes; mem_reads; dwarf; segments; asserts } (** Expand a Trace variable to a State expression, using the context *) let expand_var ~(ctxt : t) (v : Base.Var.t) (a : Ast.no Ast.ty) : State.exp = @@ -93,3 +95,10 @@ let expand_var ~(ctxt : t) (v : Base.Var.t) (a : Ast.no Ast.ty) : State.exp = (** Tell if typing should enabled with this context *) let typing_enabled ~(ctxt : t) = ctxt.dwarf <> None + +module Z3St = State.Simplify.Z3St + +let simplify ~(ctxt : t) (exp : State.exp) : State.exp = + exp + |> Z3St.simplify_subterms_full ~hyps:ctxt.asserts + |> Z3St.simplify_full \ No newline at end of file diff --git a/src/trace/run.ml b/src/trace/run.ml index 45c9252d..b0250aae 100644 --- a/src/trace/run.ml +++ b/src/trace/run.ml @@ -64,12 +64,15 @@ type ctxt = Ctxt.t let expand ~(ctxt : ctxt) (exp : Base.exp) : State.exp = Ast.Manip.exp_var_subst (Ctxt.expand_var ~ctxt) exp +let expand_simplify ~(ctxt : ctxt) (exp : Base.exp) : State.exp = + exp |> expand ~ctxt |> Context.simplify ~ctxt + (** Expand a Trace expression to a typed State expression, using the context. If the context enables typing, the expression will actually be typed, otherwise the type will be [None] *) let expand_tval ~(ctxt : ctxt) (exp : Base.exp) : State.tval = - let sexp = expand ~ctxt exp in + let sexp = expand_simplify ~ctxt exp in if Ctxt.typing_enabled ~ctxt then let ctyp = Typer.expr ~ctxt exp in { ctyp; exp = sexp } @@ -84,7 +87,7 @@ let event_mut ~(ctxt : ctxt) (event : Base.event) = match event with | WriteReg { reg; value } -> Vec.add_one ctxt.reg_writes (reg, expand_tval ~ctxt value) | ReadMem { addr; value; size } -> - let naddr = expand ~ctxt addr in + let naddr = expand_simplify ~ctxt addr in debug "naddr: %t" (Pp.top State.Exp.pp naddr); let ptrtype = Typer.expr ~ctxt addr in debug "ptrtype: %t" Pp.(top (optional Ctype.pp) ptrtype); @@ -97,7 +100,7 @@ let event_mut ~(ctxt : ctxt) (event : Base.event) = in HashVector.set ctxt.mem_reads value tval | WriteMem { addr; value; size } -> ( - let naddr = expand ~ctxt addr in + let naddr = expand_simplify ~ctxt addr in debug "naddr: %t" (Pp.top State.Exp.pp naddr); let ptrtype = Typer.expr ~ctxt addr in debug "ptrtype: %t" Pp.(top (optional Ctype.pp) ptrtype); @@ -108,10 +111,10 @@ let event_mut ~(ctxt : ctxt) (event : Base.event) = let value = expand_tval ~ctxt value in Typer.write ~dwarf ctxt.state ?ptrtype ~addr:naddr ~size value | None -> - let value = expand ~ctxt value in + let value = expand_simplify ~ctxt value in State.write_noprov ctxt.state ~addr:naddr ~size value ) - | Assert exp -> State.push_assert ctxt.state (expand ~ctxt exp) + | Assert exp -> State.push_assert ctxt.state (expand_simplify ~ctxt exp) (** Run a trace on the provided state by mutation. Enable typing if [dwarf] is provided *) let trace_mut ?dwarf ?relocation (state : State.t) (events : Base.t) : unit = diff --git a/src/z3/z3.ml b/src/z3/z3.ml index 73733720..52c31a95 100644 --- a/src/z3/z3.ml +++ b/src/z3/z3.ml @@ -527,12 +527,8 @@ module Make (Var : Var) : S with type var = Var.t = struct let rec simplify_subterms serv (e : Exp.t) : Exp.t = e |> Ast.Manip.all_subterms |> List.find_opt (fun t -> - let et = Typed.get_type e in - let tt = Typed.get_type t in - Printf.printf "Types: %t, %t\n" Pp.(top Ast.pp_ty (Ast.Manip.ty_allow_mem et)) Pp.(top Ast.pp_ty (Ast.Manip.ty_allow_mem tt)); Typed.get_type e = Typed.get_type t && let result = check serv Typed.(e = t) in - Printf.printf "%t\n" Pp.(top (optional bool) result); result = Some true ) |> Option.map (simplify_subterms serv) From 27b4dafaa7fa8399491c039e362e9005d1dee5b5 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Tue, 18 Mar 2025 21:16:13 +0000 Subject: [PATCH 58/89] Fix caching --- src/isla/cache.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/isla/cache.ml b/src/isla/cache.ml index 17a71ce6..be6f64e8 100644 --- a/src/isla/cache.ml +++ b/src/isla/cache.ml @@ -115,7 +115,7 @@ module Opcode (*: Cache.Key *) = struct if small_enough bs rel_id then begin assert (not @@ IntBits.get i IntBits.back); let res = IntBits.blit l 0 i (IntBits.back - 3) 3 in - let res = IntBits.blit rel_id 0 res (IntBits.back - 6) 3 in + let res = IntBits.blit rel_id 0 res (IntBits.back - 14) 11 in res end else IntBits.set i IntBits.back @@ -133,8 +133,8 @@ module Opcode (*: Cache.Key *) = struct else if IntBits.get hash IntBits.back then Raise.todo() else - let data = IntBits.sub hash 0 (IntBits.back - 6) in - let reloc_id = IntBits.sub hash (IntBits.back - 6) 3 in + let data = IntBits.sub hash 0 (IntBits.back - 14) in + let reloc_id = IntBits.sub hash (IntBits.back - 14) 11 in let size = IntBits.sub hash (IntBits.back - 3) 3 in let b = Bytes.create size in Bits.unsafe_blit_of_int data 0 b 0 (size * 8); From 65eb0ae18e2e0c4b1d2131fa32137ed217213469 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Tue, 18 Mar 2025 21:47:59 +0000 Subject: [PATCH 59/89] Refactor: use get_state_tree in run_prog --- src/run/func.ml | 4 +-- src/run/func.mli | 1 + src/run/relProg.ml | 77 +++++++++++++++++----------------------------- 3 files changed, 32 insertions(+), 50 deletions(-) diff --git a/src/run/func.ml b/src/run/func.ml index 44ab9e46..2c5968c4 100644 --- a/src/run/func.ml +++ b/src/run/func.ml @@ -71,7 +71,7 @@ let no_run_prep ~elf:elfname ~name ~entry ?(init = State.init_sections_symbolic) (dwarf, elf, func, start) let get_state_tree ~elf:elfname ~name ?(dump = false) ?(entry = false) ?len ?(breakpoints = []) - ?loop ?tree_to_file ?init () = + ?loop ?tree_to_file ?init ?every_instruction () = let (dwarf, elf, func, start) = no_run_prep ~elf:elfname ~name ~entry ?init () in match func.sym with | None -> fail "Function %s exists in DWARF data but does not have any code" name @@ -96,7 +96,7 @@ let get_state_tree ~elf:elfname ~name ?(dump = false) ?(entry = false) ?len ?(br base "Instructions:\n%t\n" (Pp.topi Runner.pp_instr runner) end; base "Start running"; - let tree = Block_lib.run block start in + let tree = Block_lib.run block start ?every_instruction in tree_to_file |> Option.iter (fun x -> Files.write_string x @@ Pp.tos (State.Tree.pp_all Block_lib.pp_label) tree ()); diff --git a/src/run/func.mli b/src/run/func.mli index 548f1f07..eac27b23 100644 --- a/src/run/func.mli +++ b/src/run/func.mli @@ -8,6 +8,7 @@ val get_state_tree : ?loop:int -> ?tree_to_file:string -> ?init:(State.t -> State.t) -> + ?every_instruction:bool -> unit -> Block_lib.label State.Tree.t diff --git a/src/run/relProg.ml b/src/run/relProg.ml index 6b140beb..e375db07 100644 --- a/src/run/relProg.ml +++ b/src/run/relProg.ml @@ -112,18 +112,6 @@ let run_prog elfname name objdump_d branchtables = base "Running with rd %s in %s" name elfname; base "Loading %s" elfname; let dwarf = Dw.of_file elfname in - let elf = dwarf.elf in - let func = - Dw.get_func_opt ~name dwarf - |> Option.value_fun ~default:(fun () -> fail "Function %s wasn't found in %s" name elfname) - in - let api = Dw.Func.get_api func in - base "API %t" (Pp.top Arch.pp_api api); - base "Loading ABI"; - let abi = Arch.get_abi api in - Trace.Cache.start @@ Arch.get_isla_config (); - base "Computing entry state"; - let start = Init.state () |> State.copy ~elf |> State.init_sections ~addr_size:Arch.address_size |> abi.init in base "Loading %s for Analyse" elfname; let analyse_test = Analyse.Elf.parse_elf_file elfname in base "Analysing %s for Analyse" elfname; @@ -135,42 +123,35 @@ let run_prog elfname name objdump_d branchtables = Analyse.Pp.pp_instruction Analyse.Types.Html (*Ascii*) analyse_test analyse_analysis 0 index instr in - (* base "Entry state:\n%t" Pp.(topi State.pp start); *) - match func.sym with - | None -> fail "Function %s exists in DWARF data but do not have any code" name - | Some sym -> - let endpred = Block_lib.gen_endpred () in - let runner = Runner.of_dwarf dwarf in - let block = Block_lib.make ~runner ~start:sym.addr ~endpred in - base "Start running"; - let tree = Block_lib.run ~every_instruction:true block start in - base "Ended running, start pretty printing"; - (* This table will contain the state diff to print at each pc with a message *) - (* let instr_data : (Elf.Address.t, string * State.t * State.Reg.t list) Hashtbl.t = - Hashtbl.create 100 - in - let get_footprint pc = - Runner.get_normal_opt runner pc |> Option.fold ~none:[] ~some:Trace.Instr.footprint - in *) - State.Tree.iter - (fun a st -> - let last_pc = st.last_pc in - (match a with - | Block_lib.Start -> () - | Block_lib.BranchAt pc -> - if Elf.Address.(last_pc + 4 <> pc) then - Printf.printf "\nJUMP from %t:\n\n" Pp.(top Elf.Address.pp last_pc); - print_string @@ Analyse.Pp.css Analyse.Types.Html Render_vars @@ printvars ~st ~dwarf pc; - print_string (print_analyse_instruction pc); - print_endline "BRANCH!"; - | Block_lib.NormalAt pc -> - if Elf.Address.(last_pc + 4 <> pc) then - Printf.printf "\nJUMP from %t:\n\n" Pp.(top Elf.Address.pp last_pc); - print_string @@ Analyse.Pp.css Analyse.Types.Html Render_vars @@ printvars ~st ~dwarf pc; - print_string (print_analyse_instruction pc); - | Block_lib.End _ -> ()); - ) - tree; + base "Start running"; + let tree = Func.get_state_tree ~elf:elfname ~name ~init:(State.init_sections ~addr_size:Arch.address_size) ~every_instruction:true () in + base "Ended running, start pretty printing"; + (* This table will contain the state diff to print at each pc with a message *) + (* let instr_data : (Elf.Address.t, string * State.t * State.Reg.t list) Hashtbl.t = + Hashtbl.create 100 + in + let get_footprint pc = + Runner.get_normal_opt runner pc |> Option.fold ~none:[] ~some:Trace.Instr.footprint + in *) + State.Tree.iter + (fun a st -> + let last_pc = st.last_pc in + (match a with + | Block_lib.Start -> () + | Block_lib.BranchAt pc -> + if Elf.Address.(last_pc + 4 <> pc) then + Printf.printf "\nJUMP from %t:\n\n" Pp.(top Elf.Address.pp last_pc); + print_string @@ Analyse.Pp.css Analyse.Types.Html Render_vars @@ printvars ~st ~dwarf pc; + print_string (print_analyse_instruction pc); + print_endline "BRANCH!"; + | Block_lib.NormalAt pc -> + if Elf.Address.(last_pc + 4 <> pc) then + Printf.printf "\nJUMP from %t:\n\n" Pp.(top Elf.Address.pp last_pc); + print_string @@ Analyse.Pp.css Analyse.Types.Html Render_vars @@ printvars ~st ~dwarf pc; + print_string (print_analyse_instruction pc); + | Block_lib.End _ -> ()); + ) + tree; match Analyse.Utils.read_file_lines "src/analyse/html-postamble.html" with | Error _ -> () | Ok lines -> Array.iter (function s -> Printf.printf "%s\n" s) lines From f025a7c84ab25917c9056892e7b9141b4432d99b Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Wed, 9 Apr 2025 09:55:00 +0100 Subject: [PATCH 60/89] Update for changes in linksem --- src/analyse/ControlFlow.ml | 5 +-- src/analyse/Elf.ml | 8 ++--- src/analyse/Symbols.ml | 4 +-- src/analyse/Utils.ml | 2 +- src/bin/copySources.ml | 4 +-- src/dw/addr.ml | 2 +- src/dw/var.ml | 2 +- src/elf/address.ml | 2 +- src/elf/linksemRelocatable.ml | 2 +- src/utils/sym.ml | 68 ++++++++++++++++------------------- 10 files changed, 46 insertions(+), 53 deletions(-) diff --git a/src/analyse/ControlFlow.ml b/src/analyse/ControlFlow.ml index 4c305098..bba61c63 100644 --- a/src/analyse/ControlFlow.ml +++ b/src/analyse/ControlFlow.ml @@ -139,7 +139,8 @@ let branch_table_target_addresses test filename_branch_table_option : (addr * ad (* chop into 4-byte words - as needed for branch offset tables, though not for all other things in .rodata *) - let rodata_words : (natural * natural) list = Dwarf.words_of_rel_byte_sequence rodata_addr (Dwarf.rbs_no_reloc bs) [] in (*HACK*) + let rodata_words : (natural * natural) list = + Dwarf.words_of_sym_byte_sequence rodata_addr (Dwarf_byte_sequence.sym_bs_construct bs (Pmap.empty Nat_big_num.compare)) [] in (*HACK*) let read_rodata_b addr = Dwarf.sym_natural_of_byte @@ -495,7 +496,7 @@ let parse_objdump_lines arch lines : objdump_instruction list = *) let with_symbolic_address (section: string) (addr, opcode_bytes, mnemonic, operands) : objdump_instruction = - (Dwarf.Offset (section, Nat_big_num.of_int64 addr), opcode_bytes, mnemonic, operands) + (Sym_ocaml.Num.Offset (section, Nat_big_num.of_int64 addr), opcode_bytes, mnemonic, operands) let rec parse_objdump_lines arch lines (next_index : int) (last_address : int64 option) (section: string option) : objdump_instruction list = diff --git a/src/analyse/Elf.ml b/src/analyse/Elf.ml index 5b3bb63c..574664ca 100644 --- a/src/analyse/Elf.ml +++ b/src/analyse/Elf.ml @@ -58,8 +58,8 @@ let pp_symbol_map (symbol_map : Elf_file.global_symbol_init_info) = String.concat "" (List.map (fun (name, (typ, _size, address, _mb, _binding)) -> - Printf.sprintf "**** name = %s address = %s typ = %d\n" name (pp_addr (Dwarf.Absolute address)) - (Sym.to_int (Dwarf.Absolute typ))) + Printf.sprintf "**** name = %s address = %s typ = %d\n" name (pp_addr (Sym_ocaml.Num.Absolute address)) + (Sym.to_int (Sym_ocaml.Num.Absolute typ))) symbol_map) (*****************************************************************************) @@ -153,8 +153,8 @@ let parse_elf_file (filename : string) : test = elf_file; arch; symbol_map (*@ (symbols_for_stacks !Globals.elf_threads)*); - e_entry = Dwarf.Absolute (entry); - e_machine = Dwarf.Absolute (machine); + e_entry = Sym_ocaml.Num.Absolute (entry); + e_machine = Sym_ocaml.Num.Absolute (machine); dwarf_static = ds; dwarf_semi_pp_frame_info; } diff --git a/src/analyse/Symbols.ml b/src/analyse/Symbols.ml index 360ffbf6..cc059ae4 100644 --- a/src/analyse/Symbols.ml +++ b/src/analyse/Symbols.ml @@ -28,7 +28,7 @@ let get_elf64_file_global_symbol_init (f: Elf_file.elf64_file) : global_symbol_i | None -> if machine = Elf_header.elf_ma_aarch64 then Error.bind - (Elf_symbolic.extract_elf64_relocations_for_section f Abi_aarch64_symbolic_relocation.abi_aarch64_relocation_to_abstract section) + (Elf_symbolic.extract_elf64_relocations_for_section f Abi_aarch64_symbolic_relocation.aarch64_relocation_interpreter section) @@ fun relocs -> Error.return (AArch64 relocs) else Error.fail @@ "machine not supported " ^ (Elf_header.string_of_elf_machine_architecture machine) @@ -43,7 +43,7 @@ let get_elf64_file_global_symbol_init (f: Elf_file.elf64_file) : global_symbol_i let bnd = Elf_symbol_table.extract_symbol_binding entry.elf64_st_info in Option.map ( fun section -> - let addr = Dwarf.Offset (section.elf64_section_name_as_string, addr_offset) in + let addr = Sym_ocaml.Num.Offset (section.elf64_section_name_as_string, addr_offset) in let data = if Byte_sequence.length0 section.elf64_section_body = Z.zero then Error.return (Byte_sequence.zeros size) else diff --git a/src/analyse/Utils.ml b/src/analyse/Utils.ml index bcd5a717..5d79072b 100644 --- a/src/analyse/Utils.ml +++ b/src/analyse/Utils.ml @@ -57,7 +57,7 @@ type addr = natural (* hackishly mask out bigint conversion failure *) let pp_addr (a : natural) = try - Dwarf.pp_sym Ml_bindings.hex_string_of_big_int_pad8 a + Sym_ocaml.Num.ppf Ml_bindings.hex_string_of_big_int_pad8 a with | Failure s -> let s' = "Failure: int64_of_big_int " ^ Sym.to_string a in (warn "pp_addr failure: %s" s); s' | e -> raise e diff --git a/src/bin/copySources.ml b/src/bin/copySources.ml index dc30fd01..ed6def6f 100644 --- a/src/bin/copySources.ml +++ b/src/bin/copySources.ml @@ -84,8 +84,8 @@ let process_file () : unit = else Some (Byte_sequence.string_of_byte_sequence - (rbs_unwrap (List.nth lnh.lnh_include_directories (dir - 1))))), - Byte_sequence.string_of_byte_sequence (rbs_unwrap lnfe.lnfe_path) )) + (Dwarf_byte_sequence.sym_bs_expect_const (List.nth lnh.lnh_include_directories (dir - 1))))), + Byte_sequence.string_of_byte_sequence (Dwarf_byte_sequence.sym_bs_expect_const lnfe.lnfe_path) )) lnh.lnh_file_entries in diff --git a/src/dw/addr.ml b/src/dw/addr.ml index 0604a6f7..201906bf 100644 --- a/src/dw/addr.ml +++ b/src/dw/addr.ml @@ -1,6 +1,6 @@ include Elf.Address let of_sym : Sym.t -> t = function -| Dwarf.Offset (section, offset) -> { section; offset = Z.to_int offset } +| Sym_ocaml.Num.Offset (section, offset) -> { section; offset = Z.to_int offset } | _ -> Raise.fail "expected section+offset" diff --git a/src/dw/var.ml b/src/dw/var.ml index e453d61b..b6010848 100644 --- a/src/dw/var.ml +++ b/src/dw/var.ml @@ -61,7 +61,7 @@ let rec loc_merge = function | [] -> [] let end_addr_of_sym = function -| Dwarf.Absolute z when Z.compare z (Z.of_int Int.max_int) > 0 -> None +| Sym_ocaml.Num.Absolute z when Z.compare z (Z.of_int Int.max_int) > 0 -> None | x -> Some (Addr.of_sym x) (** Create a DWARF variable from its linksem counterpart *) diff --git a/src/elf/address.ml b/src/elf/address.ml index f332f31a..33da9cd8 100644 --- a/src/elf/address.ml +++ b/src/elf/address.ml @@ -23,4 +23,4 @@ let (<=) = compare (<=) let (>=) = compare (>=) -let to_sym {section; offset} = Dwarf.Offset (section, Z.of_int offset) \ No newline at end of file +let to_sym {section; offset} = Sym_ocaml.Num.Offset (section, Z.of_int offset) \ No newline at end of file diff --git a/src/elf/linksemRelocatable.ml b/src/elf/linksemRelocatable.ml index d822c2fa..99998fb7 100644 --- a/src/elf/linksemRelocatable.ml +++ b/src/elf/linksemRelocatable.ml @@ -30,7 +30,7 @@ let get_elf64_file_global_symbol_init (f: Elf_file.elf64_file) : global_symbol_i | None -> if machine = Elf_header.elf_ma_aarch64 then Error.bind - (Elf_symbolic.extract_elf64_relocations_for_section f Abi_aarch64_symbolic_relocation.abi_aarch64_relocation_to_abstract section) + (Elf_symbolic.extract_elf64_relocations_for_section f Abi_aarch64_symbolic_relocation.aarch64_relocation_interpreter section) @@ fun relocs -> Error.return (AArch64 relocs) else Error.fail @@ "machine not supported " ^ (Elf_header.string_of_elf_machine_architecture machine) diff --git a/src/utils/sym.ml b/src/utils/sym.ml index 1510a8b3..42ef411d 100644 --- a/src/utils/sym.ml +++ b/src/utils/sym.ml @@ -1,15 +1,15 @@ -type t = Z.t Dwarf.sym0 +type t = Sym_ocaml.Num.t let pp x = x |> Dwarf.pphex_sym |> Pp.string -let to_z x = Dwarf.sym_unwrap x "to_z" +let to_z x = Sym_ocaml.Num.to_num x let to_int x = Z.to_int @@ to_z x -let of_int x = Dwarf.Absolute (Z.of_int x) -let of_int64 x = Dwarf.Absolute (Z.of_int64 x) +let of_int x = Sym_ocaml.Num.Absolute (Z.of_int x) +let of_int64 x = Sym_ocaml.Num.Absolute (Z.of_int64 x) -let equal = Dwarf.sym_comp Nat_big_num.equal +let equal = Sym_ocaml.Num.equal let max_addr = Z.(shift_left (of_int 1) 64 - (of_int 1)) @@ -17,40 +17,32 @@ let min_addr = Z.of_int 0 (* TODO very hacky *) let less x y = match (x, y) with -| (Dwarf.Absolute x, Dwarf.Offset (_, y)) when Nat_big_num.less x y -> true -| (Dwarf.Absolute x, Dwarf.Offset (_,_)) when Nat_big_num.greater_equal x max_addr -> false -| (Dwarf.Offset (_,_), Dwarf.Absolute y) when Nat_big_num.less max_addr y -> true -| (Dwarf.Offset (_, x), Dwarf.Absolute y) when Nat_big_num.greater_equal x y -> false -| _ -> Dwarf.sym_comp Nat_big_num.less x y -let less_equal = Dwarf.sym_comp Nat_big_num.less_equal -let greater = Dwarf.sym_comp Nat_big_num.greater -let greater_equal = Dwarf.sym_comp Nat_big_num.greater_equal -let compare = Dwarf.sym_comp Nat_big_num.compare - -let to_string = Dwarf.pp_sym Z.to_string - -let sub x y = match (x, y) with -| (Dwarf.Offset (s, a), Dwarf.Offset (t, b)) when s = t -> Dwarf.Absolute (Nat_big_num.sub a b) -| (Dwarf.Offset (s, a), Dwarf.Absolute b) -> Dwarf.Offset (s, Nat_big_num.sub a b) -| (Dwarf.Absolute a, Dwarf.Absolute b) -> Dwarf.Absolute (Nat_big_num.sub a b) -| _ -> Dwarf.Unknown - -let add x y = match (x, y) with -| (Dwarf.Offset (s, a), Dwarf.Absolute b) -> Dwarf.Offset (s, Nat_big_num.add a b) -| (Dwarf.Absolute (a), Dwarf.Offset (s,b)) -> Dwarf.Offset (s, Nat_big_num.add a b) -| (Dwarf.Absolute a, Dwarf.Absolute b) -> Dwarf.Absolute (Nat_big_num.add a b) -| _ -> Dwarf.Unknown - -let mul = Dwarf.sym_map2 Nat_big_num.mul - -let pow_int_positive x y = Dwarf.Absolute (Nat_big_num.pow_int_positive x y) - -let shift_left x s = Dwarf.sym_map (fun x -> Nat_big_num.shift_left x s) x -let shift_right x s = Dwarf.sym_map (fun x -> Nat_big_num.shift_right x s) x -let modulus = Dwarf.sym_map2 Nat_big_num.modulus +| (Sym_ocaml.Num.Absolute x, Sym_ocaml.Num.Offset (_, y)) when Nat_big_num.less x y -> true +| (Sym_ocaml.Num.Absolute x, Sym_ocaml.Num.Offset (_,_)) when Nat_big_num.greater_equal x max_addr -> false +| (Sym_ocaml.Num.Offset (_,_), Sym_ocaml.Num.Absolute y) when Nat_big_num.less max_addr y -> true +| (Sym_ocaml.Num.Offset (_, x), Sym_ocaml.Num.Absolute y) when Nat_big_num.greater_equal x y -> false +| _ -> Sym_ocaml.Num.comp Nat_big_num.less x y +let less_equal = Sym_ocaml.Num.less_equal +let greater = Sym_ocaml.Num.greater +let greater_equal = Sym_ocaml.Num.greater_equal +let compare = Sym_ocaml.Num.compare + +let to_string = Sym_ocaml.Num.ppf Z.to_string + +let sub = Sym_ocaml.Num.sub + +let add = Sym_ocaml.Num.add + +let mul = Sym_ocaml.Num.mul + +let pow_int_positive x y = Sym_ocaml.Num.Absolute (Nat_big_num.pow_int_positive x y) + +let shift_left x s = Sym_ocaml.Num.map (fun x -> Nat_big_num.shift_left x s) x +let shift_right x s = Sym_ocaml.Num.map (fun x -> Nat_big_num.shift_right x s) x +let modulus = Sym_ocaml.Num.map2 Nat_big_num.modulus let in_range first last x = match (first, last, x) with -| (Dwarf.Absolute f, Dwarf.Absolute l, Dwarf.Absolute x) -> Nat_big_num.less_equal f x && Nat_big_num.less_equal x l -| (Dwarf.Offset (s1, f), Dwarf.Offset (s2, l), Dwarf.Offset (s, x)) when s1 = s2 -> +| (Sym_ocaml.Num.Absolute f, Sym_ocaml.Num.Absolute l, Sym_ocaml.Num.Absolute x) -> Nat_big_num.less_equal f x && Nat_big_num.less_equal x l +| (Sym_ocaml.Num.Offset (s1, f), Sym_ocaml.Num.Offset (s2, l), Sym_ocaml.Num.Offset (s, x)) when s1 = s2 -> s1 = s && Nat_big_num.less_equal f x && Nat_big_num.less_equal x l (* TODO kinda hacky *) | _ -> Raise.fail "Can't determine if %t is in range [%t,%t]" (Pp.tos pp x) (Pp.tos pp first) (Pp.tos pp last) \ No newline at end of file From a212415f023939605ee7d93ee69a116e07e42d6d Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Thu, 10 Apr 2025 12:26:15 +0100 Subject: [PATCH 61/89] Test script --- src/bin/readDwarf.ml | 1 + src/run/block_lib.ml | 13 ++++---- src/run/func.ml | 9 ++++-- src/run/relProg.ml | 4 ++- src/run/testRelProg.ml | 70 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 88 insertions(+), 9 deletions(-) create mode 100644 src/run/testRelProg.ml diff --git a/src/bin/readDwarf.ml b/src/bin/readDwarf.ml index 60a7749f..577cac14 100644 --- a/src/bin/readDwarf.ml +++ b/src/bin/readDwarf.ml @@ -73,6 +73,7 @@ let commands = Run.Block.command; Run.FuncRD.command; Run.RelProg.command; + Run.TestRelProg.command; CopySourcesCmd.command; Z3.Test.command; ] diff --git a/src/run/block_lib.ml b/src/run/block_lib.ml index 3082c0c1..3b62215f 100644 --- a/src/run/block_lib.ml +++ b/src/run/block_lib.ml @@ -93,24 +93,25 @@ let run ?(every_instruction = false) ?relevant (b : t) (start : State.t) : label assert (State.is_locked start); let rec run_from state = let pc_exp = State.get_reg_exp state pcreg in + State.Simplify.ctxfull state; if State.is_possible state then match b.endpred pc_exp with | Some endmsg -> info "Stopped at pc %t because %s" (Pp.top State.Exp.pp pc_exp) endmsg; - State.Simplify.ctxfull state; + (* State.Simplify.ctxfull state; *) State.lock state; State.Tree.{ state; data = End endmsg; rest = [] } | None -> ( - let prelock state = State.Simplify.ctxfull state in + (* let prelock state = State.Simplify.ctxfull state in *) if every_instruction then begin - prelock state; + (* prelock state; *) State.lock state end; let states = let pc = State.Exp.expect_sym_address pc_exp in if Option.fold ~none:true ~some:(Fun.flip Hashtbl.mem pc) relevant then ( info "Running pc %t" (Pp.top State.Exp.pp pc_exp); - Runner.run ~prelock b.runner state + Runner.run ~prelock:ignore b.runner state ) else ( info "Skipping pc %t" (Pp.top State.Exp.pp pc_exp); @@ -131,7 +132,7 @@ let run ?(every_instruction = false) ?relevant (b : t) (start : State.t) : label ) else begin info "Reached dead code at %t" (Pp.top State.Exp.pp pc_exp); - State.Simplify.ctxfull state; + (* State.Simplify.ctxfull state; *) State.lock state; State.Tree.{ state; data = End "Reached dead code"; rest = [] } end @@ -162,7 +163,7 @@ let gen_endpred ?min ?max ?loop ?(brks = []) () : State.exp -> string option = ( try Some (State.Exp.expect_sym_address pc_exp) with - _ -> debug "PC is sus"; None + _ -> None ) |> Option.map (fun pc -> debug "enpred: Evaluating PC %t" (Pp.top Elf.Address.pp pc); match (min, max, loop) with diff --git a/src/run/func.ml b/src/run/func.ml index 2c5968c4..e733d880 100644 --- a/src/run/func.ml +++ b/src/run/func.ml @@ -46,7 +46,7 @@ open Cmdliner open Config.CommonOpt -open Fun +(* open Fun *) open Logs.Logger (struct let str = __MODULE__ @@ -78,7 +78,12 @@ let get_state_tree ~elf:elfname ~name ?(dump = false) ?(entry = false) ?len ?(br | Some sym -> let brks = List.map - (Elf.SymTable.of_position_string elf.symbols %> Elf.SymTable.to_addr_offset) + (fun x -> + if String.starts_with ~prefix:"UND" x then (*HACK for undefined symbol*) + Elf.Address.{ section=x; offset=0 } + else + x |> Elf.SymTable.of_position_string elf.symbols |> Elf.SymTable.to_addr_offset + ) breakpoints in let (min, max) = diff --git a/src/run/relProg.ml b/src/run/relProg.ml index e375db07..7bf90660 100644 --- a/src/run/relProg.ml +++ b/src/run/relProg.ml @@ -124,7 +124,9 @@ let run_prog elfname name objdump_d branchtables = instr in base "Start running"; - let tree = Func.get_state_tree ~elf:elfname ~name ~init:(State.init_sections ~addr_size:Arch.address_size) ~every_instruction:true () in + let tree = Func.get_state_tree ~elf:elfname ~name ~init:(State.init_sections ~addr_size:Arch.address_size) ~every_instruction:true () + ~breakpoints:["UND.abort"; "UND.exit"] + in base "Ended running, start pretty printing"; (* This table will contain the state diff to print at each pc with a message *) (* let instr_data : (Elf.Address.t, string * State.t * State.Reg.t list) Hashtbl.t = diff --git a/src/run/testRelProg.ml b/src/run/testRelProg.ml new file mode 100644 index 00000000..1a429422 --- /dev/null +++ b/src/run/testRelProg.ml @@ -0,0 +1,70 @@ +open Cmdliner +open Config.CommonOpt + +open Logs.Logger (struct + let str = __MODULE__ +end) + +let test return_register exit_register name = + let tree = Func.get_state_tree ~elf:name ~name:"main" ~init:(State.init_sections ~addr_size:Arch.address_size) ~every_instruction:false () + ~breakpoints:["UND.abort"; "UND.exit"] + in + let pc = Arch.pc () in + let ret = State.Reg.of_string return_register in + let ext = State.Reg.of_string exit_register in + State.Tree.iter (fun l st -> + if State.is_possible st then + match l with + | Block_lib.End _ -> ( + let pc_exp = State.get_reg_exp st pc in + let ret_exp = match (try + Some (State.Exp.expect_sym_address pc_exp) + with + _ -> None + ) with + | Some pc_addr -> + if pc_addr = Elf.Address.{ section="UND.abort"; offset=0 } then + fail "abort called from %t" (Pp.top Elf.Address.pp st.last_pc) + else if pc_addr <> Elf.Address.{ section="UND.exit"; offset=0 } then + fail "finished at weird address %t" (Pp.top Elf.Address.pp pc_addr) + else + State.get_reg_exp st ext + | None -> + State.get_reg_exp st ret (* Symbolic pc = returned from main *) + in + let ret_val = ret_exp |> Exp.ConcreteEval.eval |> Exp.Value.expect_bv |> BitVec.to_int in + if ret_val <> 0 then + fail "non-zero return code %d" ret_val; + ) + | _ -> () + ) tree; + base "Success" + +let elf = + let doc = "ELF file from which to pull the code" in + Arg.(required & pos 0 (some non_dir_file) None & info [] ~docv:"ELF_FILE" ~doc) + +let return_register = + let doc = "The name of the register containing the return value of main function" in + Arg.(value & opt string "R0" & info ["r"] ~docv:"RETURN_REGISTER" ~doc) + +let exit_register = + let doc = "The name of the register containing the argument to exit function" in + Arg.(value & opt string "R0" & info ["e"] ~docv:"EXIT_REGISTER" ~doc) + + +let term = + Term.( + CmdlinerHelper.func_options comopts test + $ return_register $ exit_register $ elf) + +let info = + let doc = + "Test run relocatable file\ + + Test succeeds if all possble outcomes result in the program exiting with\ + with code 0" + in + Cmd.(info "test-rel-prog" ~doc ~exits) + +let command = (term, info) From a77a87b2ea128fe8698fbf90d3599a7112a1537b Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Thu, 10 Apr 2025 12:36:59 +0100 Subject: [PATCH 62/89] Fix rodata --- src/elf/file.ml | 2 +- src/state/base.ml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/elf/file.ml b/src/elf/file.ml index d628ed8f..1b56d28f 100644 --- a/src/elf/file.ml +++ b/src/elf/file.ml @@ -146,7 +146,7 @@ let of_file (filename : string) = Segment. { data; - addr = Sym.to_int addr; (* TODO *) + addr = 0; (* Meaningless for relocatable files *) size = BytesSeq.length data; read = true; write = false; diff --git a/src/state/base.ml b/src/state/base.ml index eebb0ace..2aa43a6c 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -589,8 +589,7 @@ let read_from_rodata (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t option | Some elf -> ( if not @@ ConcreteEval.is_concrete addr then None else - let int_addr = ConcreteEval.eval addr |> Value.expect_bv |> BitVec.to_int in - let sym_addr = Elf.Address.{ section = ".rodata"; offset = int_addr } in (* TODO this is wrong *) + let sym_addr = Exp.expect_sym_address addr in let size = size |> Ast.Size.to_bits in try let (sym, offset) = Elf.SymTable.of_addr_with_offset elf.symbols sym_addr in @@ -600,13 +599,14 @@ let read_from_rodata (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t option let bv = BytesSeq.getbvle ~size sym.data.data offset in (* TODO relocations *) Some (Typed.bits bv) with Not_found -> + let int_addr = sym_addr.offset in let rodata = elf.rodata in - if rodata.addr <= int_addr && int_addr + size < rodata.addr + rodata.size then + if sym_addr.section = ".rodata" && rodata.addr <= int_addr && int_addr + size < rodata.addr + rodata.size then let bv = BytesSeq.getbvle ~size rodata.data (int_addr - rodata.addr) in (* Assume little endian here *) Some (Typed.bits bv) else ( - warn "Failed to find symbol or rodata at 0x%x" int_addr; + warn "Failed to find symbol or rodata at %t" (Pp.top Elf.Address.pp sym_addr); None ) ) From c5e2b56433f14a21acf90e87dae0da9dce6deb50 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Thu, 10 Apr 2025 13:12:56 +0100 Subject: [PATCH 63/89] Fix loading objects --- src/state/base.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/state/base.ml b/src/state/base.ml index 2aa43a6c..458a3114 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -531,11 +531,11 @@ let init_sections ~addr_size state = let _ = Option.( let+ elf = state.elf in Elf.SymTable.iter elf.symbols @@ fun sym -> + let len = List.find (fun x -> sym.size mod x = 0) [16;8;4;2;1] in if sym.typ = Elf.Symbol.OBJECT then let provenance = Mem.create_section_frag ~addr_size state.mem sym.addr.section in - Seq.iota_step_up ~step:16 ~endi:sym.size + Seq.iota_step_up ~step:len ~endi:sym.size |> Seq.iter (fun off -> - let len = min 16 (sym.size - off) in let data = Elf.Symbol.sub sym off len in let addr = Exp.of_address ~size:addr_size Elf.Address.(sym.addr + off) in let size = Ast.Size.of_bytes len in From 2bc6c015396547af9d59bb48f052ac0e011ca40a Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Thu, 10 Apr 2025 15:46:32 +0100 Subject: [PATCH 64/89] Fix reading rodata Also fixes a bug in read_from_rodata of comparing sizes in bits vs bytes --- src/run/testRelProg.ml | 1 + src/state/base.ml | 73 +++++++++++++++++++++--------------------- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/run/testRelProg.ml b/src/run/testRelProg.ml index 1a429422..1f4b396d 100644 --- a/src/run/testRelProg.ml +++ b/src/run/testRelProg.ml @@ -9,6 +9,7 @@ let test return_register exit_register name = let tree = Func.get_state_tree ~elf:name ~name:"main" ~init:(State.init_sections ~addr_size:Arch.address_size) ~every_instruction:false () ~breakpoints:["UND.abort"; "UND.exit"] in + debug "%t" (Pp.top (State.Tree.pp_all Block_lib.pp_label) tree); let pc = Arch.pc () in let ret = State.Reg.of_string return_register in let ext = State.Reg.of_string exit_register in diff --git a/src/state/base.ml b/src/state/base.ml index 458a3114..2099b52b 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -583,43 +583,6 @@ let set_read (s : t) (read_num : int) (exp : Exp.t) = assert (Typed.get_type exp = Typed.get_type (Vec.get s.read_vars read_num |> Tval.exp)); Vec.update s.read_vars read_num @@ Tval.map_exp (Fun.const exp) -let read_from_rodata (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t option = - match s.elf with - | None -> None - | Some elf -> ( - if not @@ ConcreteEval.is_concrete addr then None - else - let sym_addr = Exp.expect_sym_address addr in - let size = size |> Ast.Size.to_bits in - try - let (sym, offset) = Elf.SymTable.of_addr_with_offset elf.symbols sym_addr in - if sym.writable then None - else - (* Assume little endian here *) - let bv = BytesSeq.getbvle ~size sym.data.data offset in (* TODO relocations *) - Some (Typed.bits bv) - with Not_found -> - let int_addr = sym_addr.offset in - let rodata = elf.rodata in - if sym_addr.section = ".rodata" && rodata.addr <= int_addr && int_addr + size < rodata.addr + rodata.size then - let bv = BytesSeq.getbvle ~size rodata.data (int_addr - rodata.addr) in - (* Assume little endian here *) - Some (Typed.bits bv) - else ( - warn "Failed to find symbol or rodata at %t" (Pp.top Elf.Address.pp sym_addr); - None - ) - ) - -let read ~provenance ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t = - assert (not @@ is_locked s); - let var = make_read ?ctyp s size in - let exp = Mem.read s.mem ~provenance ~var ~addr ~size in - let exp = if provenance = Main && exp = None then read_from_rodata ~addr ~size s else exp in - Option.iter (set_read s (Var.expect_readvar var)) exp; - Option.value exp ~default:(Exp.of_var var) - - let eval_address (s : t) (addr: Exp.t) : Elf.Address.t option = let ctxt0 = function Var.Section _ -> Value.bv @@ BitVec.of_int ~size:64 0 | _ -> raise ConcreteEval.Symbolic in let open Option in @@ -643,6 +606,42 @@ let eval_address (s : t) (addr: Exp.t) : Elf.Address.t option = None ) +let read_from_rodata (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t option = + debug "reading from rodata at address: %t" (Pp.top Exp.pp addr); + match s.elf with + | None -> None + | Some elf -> ( + Option.bind (eval_address s addr) @@ fun sym_addr -> + let size = size |> Ast.Size.to_bits in + try + let (sym, offset) = Elf.SymTable.of_addr_with_offset elf.symbols sym_addr in + if sym.writable then None + else ( + (* Assume little endian here *) + assert (Relocation.IMap.is_empty sym.data.relocations); + let bv = BytesSeq.getbvle ~size sym.data.data offset in (* TODO relocations *) + Some (Typed.bits bv) + ) + with Not_found -> + let int_addr = sym_addr.offset in + let rodata = elf.rodata in + if sym_addr.section = ".rodata" && rodata.addr <= int_addr && int_addr + size < rodata.addr + rodata.size * 8 then + let bv = BytesSeq.getbvle ~size rodata.data (int_addr - rodata.addr) in + (* Assume little endian here *) + Some (Typed.bits bv) + else ( + warn "Failed to find symbol or rodata at %t" (Pp.top Elf.Address.pp sym_addr); + None + ) + ) + +let read ~provenance ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t = + assert (not @@ is_locked s); + let var = make_read ?ctyp s size in + let exp = Mem.read s.mem ~provenance ~var ~addr ~size in + let exp = if provenance = Main && exp = None then read_from_rodata ~addr ~size s else exp in + Option.iter (set_read s (Var.expect_readvar var)) exp; + Option.value exp ~default:(Exp.of_var var) let read_noprov ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t = debug "Addr: %t" Pp.(top Exp.pp addr); From c1c5cc5bcd95adb4a6c8f3826bd1e8410c6a28e8 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Thu, 10 Apr 2025 16:34:17 +0100 Subject: [PATCH 65/89] Multiple rodata sections --- src/elf/file.ml | 44 ++++++++++++++++++++++++++++---------------- src/state/base.ml | 5 +++-- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/src/elf/file.ml b/src/elf/file.ml index 1b56d28f..6acffd47 100644 --- a/src/elf/file.ml +++ b/src/elf/file.ml @@ -75,6 +75,8 @@ let machine_to_string = function (** Pretty prints a {!machine} *) let pp_machine mach = mach |> machine_to_string |> Pp.string +module SMap = Map.Make(String) + (** The type containing all the information about an ELF file *) type t = { filename : string; (** The name on the file system. Useful for error messages *) @@ -84,7 +86,7 @@ type t = { (** The target architecture of the file; only used in [arch.ml, dumpSym.ml, dw.ml] *) linksem : Elf_file.elf_file; (** The original linksem structure for the file; only used in [dw.ml] *) - rodata : Segment.t; (** The read-only data section *) + rodata : Segment.t SMap.t; (** The read-only data sections *) } (** Error on Elf parsing *) @@ -137,21 +139,31 @@ let of_file (filename : string) = within it, and so not suitable to be stored in the [RngMap] *) let elf_file = Elf_file.ELF_File_64 elf64_file in let rodata = - let (_, addr, data) = - Dwarf.extract_section_body_without_relocations elf_file ".rodata" false - (* `false' argument is for returning an empty byte-sequence if - section is not found, instead of throwing an exception *) - in - Printf.printf "%t" Pp.(top Sym.pp addr); - Segment. - { - data; - addr = 0; (* Meaningless for relocatable files *) - size = BytesSeq.length data; - read = true; - write = false; - execute = false; - } + SMap.of_list @@ List.filter_map Option.(fun (section:Elf_interpreted_section.elf64_interpreted_section) -> + let+ sname = if String.starts_with ~prefix:".rodata" section.elf64_section_name_as_string then + Some section.elf64_section_name_as_string + else + None + in + let (_, addr, data) = + Dwarf.extract_section_body_without_relocations elf_file sname false + (* `false' argument is for returning an empty byte-sequence if + section is not found, instead of throwing an exception *) + in + Printf.printf "%t" Pp.(top Sym.pp addr); + ( + sname, + Segment. + { + data; + addr = 0; (* Meaningless for relocatable files *) + size = BytesSeq.length data; + read = true; + write = false; + execute = false; + } + ) + ) elf64_file.elf64_file_interpreted_sections in info "ELF file %s has been loaded" filename; { filename; symbols; entry; machine; linksem = elf_file; rodata } diff --git a/src/state/base.ml b/src/state/base.ml index 2099b52b..28f7d087 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -624,8 +624,9 @@ let read_from_rodata (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t option ) with Not_found -> let int_addr = sym_addr.offset in - let rodata = elf.rodata in - if sym_addr.section = ".rodata" && rodata.addr <= int_addr && int_addr + size < rodata.addr + rodata.size * 8 then + let open Option in + let* rodata = Elf.File.SMap.find_opt sym_addr.section elf.rodata in + if rodata.addr <= int_addr && int_addr + size < rodata.addr + rodata.size * 8 then let bv = BytesSeq.getbvle ~size rodata.data (int_addr - rodata.addr) in (* Assume little endian here *) Some (Typed.bits bv) From 1ed0c5ccff3561e97a70d71a4ea49fe002b8faa3 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Thu, 10 Apr 2025 17:45:41 +0100 Subject: [PATCH 66/89] Generate section address constraints --- src/run/func.ml | 2 +- src/state/base.ml | 10 +++++++++- src/state/base.mli | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/run/func.ml b/src/run/func.ml index e733d880..4ea1d385 100644 --- a/src/run/func.ml +++ b/src/run/func.ml @@ -52,7 +52,7 @@ open Logs.Logger (struct let str = __MODULE__ end) -let no_run_prep ~elf:elfname ~name ~entry ?(init = State.init_sections_symbolic) () = +let no_run_prep ~elf:elfname ~name ~entry ?(init = State.init_sections_symbolic ~addr_size:Arch.address_size) () = base "Running %s in %s" name elfname; let dwarf = Dw.of_file elfname in let elf = dwarf.elf in diff --git a/src/state/base.ml b/src/state/base.ml index 28f7d087..077f2ac4 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -531,6 +531,10 @@ let init_sections ~addr_size state = let _ = Option.( let+ elf = state.elf in Elf.SymTable.iter elf.symbols @@ fun sym -> + let max_section_addr = Int.shift_left 1 addr_size - sym.size - sym.addr.offset in + push_assert state Typed.( + comp Ast.Bvule (Exp.of_var (Var.Section sym.addr.section)) (bits_int ~size:64 max_section_addr) + ); let len = List.find (fun x -> sym.size mod x = 0) [16;8;4;2;1] in if sym.typ = Elf.Symbol.OBJECT then let provenance = Mem.create_section_frag ~addr_size state.mem sym.addr.section in @@ -546,11 +550,15 @@ let init_sections ~addr_size state = ) in state -let init_sections_symbolic state = +let init_sections_symbolic ~addr_size state = let state = copy_if_locked state in let _ = Option.( let+ elf = state.elf in Elf.SymTable.iter elf.symbols @@ fun sym -> + let max_section_addr = Int.shift_left 1 addr_size - sym.size - sym.addr.offset in + push_assert state Typed.( + comp Ast.Bvule (Exp.of_var (Var.Section sym.addr.section)) (bits_int ~size:64 max_section_addr) + ); if sym.typ = Elf.Symbol.OBJECT then Hashtbl.replace state.mem.sections sym.addr.section Main ) in diff --git a/src/state/base.mli b/src/state/base.mli index 73f123db..1877b17d 100644 --- a/src/state/base.mli +++ b/src/state/base.mli @@ -405,7 +405,7 @@ val copy_if_locked : ?elf:Elf.File.t -> t -> t val init_sections : addr_size:int -> t -> t (** Assigns all sections with global objects to Main fragment *) -val init_sections_symbolic : t -> t +val init_sections_symbolic : addr_size:int -> t -> t (** {1 State convenience manipulation } *) From 65a7e044b98ab53d6c686c30820688ec6e8dfef1 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Fri, 11 Apr 2025 10:05:16 +0100 Subject: [PATCH 67/89] Fix nondet --- src/state/base.ml | 7 +++++++ src/state/base.mli | 3 +++ src/trace/context.ml | 11 +++++++++-- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/state/base.ml b/src/state/base.ml index 077f2ac4..30dc867e 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -62,6 +62,8 @@ end type id = Id.t module Var = struct + let next_nondet = ref 0 + type t = | Register of Id.t * Reg.t (** The value of this register in this state *) | ReadVar of Id.t * int * Ast.Size.t @@ -159,6 +161,11 @@ module Var = struct | RetAddr -> Ast.Ty_BitVec 64 | NonDet (_, size) -> Ast.Ty_BitVec (Ast.Size.to_bits size) | Section _ -> Ast.Ty_BitVec 64 + + let new_nondet sz = + let v = NonDet (!next_nondet, sz) in + next_nondet := !next_nondet + 1; + v end type var = Var.t diff --git a/src/state/base.mli b/src/state/base.mli index 1877b17d..a7275a7e 100644 --- a/src/state/base.mli +++ b/src/state/base.mli @@ -145,6 +145,9 @@ module Var : sig (** Get the type of a variable *) val ty : t -> Reg.ty + + (** Get a fresh NonDet variable *) + val new_nondet : Ast.Size.t -> t end (** The type of variables *) diff --git a/src/trace/context.ml b/src/trace/context.ml index f233abe9..2fe350d4 100644 --- a/src/trace/context.ml +++ b/src/trace/context.ml @@ -58,6 +58,7 @@ module SMap = Map.Make (String) type t = { reg_writes : (State.Reg.t * State.tval) Vec.t; (** Stores the delayed register writes *) mem_reads : State.tval HashVector.t; (** Stores the result of memory reads *) + nondets : State.var HashVector.t; (** Stores the mapping of nondet variables *) state : State.t; segments : State.exp SMap.t; asserts: State.exp list; @@ -68,6 +69,7 @@ type t = { let make_context ?dwarf ?relocation state = let reg_writes = Vec.empty () in let mem_reads = HashVector.empty () in + let nondets = HashVector.empty () in let segments, asserts = relocation |> Option.map (fun relocation -> @@ -82,14 +84,19 @@ let make_context ?dwarf ?relocation state = ) |> Option.value ~default:(SMap.empty, []) in - { state; reg_writes; mem_reads; dwarf; segments; asserts } + { state; reg_writes; mem_reads; nondets; dwarf; segments; asserts } (** Expand a Trace variable to a State expression, using the context *) let expand_var ~(ctxt : t) (v : Base.Var.t) (a : Ast.no Ast.ty) : State.exp = assert (Base.Var.ty v = a); match v with | Register reg -> State.get_reg_exp ctxt.state reg - | NonDet (i, _) | Read (i, _) -> (HashVector.get ctxt.mem_reads i).exp (* TODO is the NonDet case correct *) + | NonDet (i, sz) -> HashVector.get_opt ctxt.nondets i + |> Option.value_fun ~default:(fun () -> + Fun.tee (HashVector.add ctxt.nondets i) (State.Var.new_nondet sz) + ) + |> State.Exp.of_var + | Read (i, _) -> (HashVector.get ctxt.mem_reads i).exp (* TODO is the NonDet case correct *) | Segment (name, _) -> SMap.find name ctxt.segments (*TODO put the actual value there*) (* | Segment (name, sz) -> Exp.Typed.extract ~first:0 ~last:(sz-1) (State.Exp.of_var (State.Var.Section name)) TODO put the actual value there *) From 9c760c08286ada49bafce5a3cd8c13dfd6726d34 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Fri, 11 Apr 2025 10:36:29 +0100 Subject: [PATCH 68/89] Fix symbolic bytes subranges Subranges were extracted as little and concatenated as big endian Now both are little endian --- src/state/symbolicBytes.ml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/state/symbolicBytes.ml b/src/state/symbolicBytes.ml index 2693b0b3..07a43622 100644 --- a/src/state/symbolicBytes.ml +++ b/src/state/symbolicBytes.ml @@ -126,6 +126,7 @@ module Make (Var : Exp.Var) : S with type var = Var.t = struct assert (pos + len <= elen); (Typed.extract ~last:((8 * (pos + len)) - 1) ~first:(8 * pos) e, len) + (* TODO should we care about endianness? *) (* Warning: This code is complicated because of all the indices. I tried to make diagrams to explain *) let sub ~pos ~len sb = @@ -166,7 +167,7 @@ module Make (Var : Exp.Var) : S with type var = Var.t = struct Some [Typed.extract ~last:((8 * taken_len) - 1) ~first:(8 * off) e] in let+ list = sub_list ~pos ~len sb in - Typed.concat list + Typed.concat @@ List.rev list let blit_exp exp ~pos ~len sb = assert (len > 0); From 63d3ebb1461f1b44ae43c4ea191520bb95d623b9 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Fri, 11 Apr 2025 13:23:58 +0100 Subject: [PATCH 69/89] Hack addres-size extract of constexpr --- src/trace/typer.ml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/trace/typer.ml b/src/trace/typer.ml index 238a17ef..25dc0a92 100644 --- a/src/trace/typer.ml +++ b/src/trace/typer.ml @@ -106,11 +106,13 @@ let unop (u : Ast.unop) tval : Ctype.t option = | Bvneg | Bvnot -> machine_of_size typ |> some | Extract (b, a) -> debug "Extracting from type %t" Pp.(top (opt Ctype.pp) tval.ctyp); - if (* HACK for adrp: a = 0 && b = Arch.address_size - 1 &&*) Ctype.is_ptr typ then tval.ctyp + if (* HACK for adrp: a = 0 && b = Arch.address_size - 1 &&*) Ctype.is_ptr typ then + tval.ctyp else let bitsize = b - a + 1 in let constexpr = typ.constexpr in - if bitsize mod 8 = 0 then Ctype.machine ~constexpr (bitsize / 8) |> some else None + if bitsize mod 8 = 0 || bitsize = Arch.address_size && constexpr then + Ctype.machine ~constexpr (bitsize / 8) |> some else None | ZeroExtend m | SignExtend m -> if m mod 8 = 0 then machine_of_size ~update:(m / 8) typ |> some else None From 34b6c323a705cce2ce1643724f0215202ac2f37a Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Fri, 11 Apr 2025 13:33:33 +0100 Subject: [PATCH 70/89] Testing: Warn about read variables --- src/run/testRelProg.ml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/run/testRelProg.ml b/src/run/testRelProg.ml index 1f4b396d..067e3799 100644 --- a/src/run/testRelProg.ml +++ b/src/run/testRelProg.ml @@ -14,6 +14,14 @@ let test return_register exit_register name = let ret = State.Reg.of_string return_register in let ext = State.Reg.of_string exit_register in State.Tree.iter (fun l st -> + let found_symread = ref false in + st.read_vars |> Vec.iter Fun.(State.Tval.exp %> Ast.Manip.exp_iter_var (fun v -> + match v with + | State.Var.ReadVar _ -> found_symread := true; warn "State contains symbolic read variable:\n %t" (Pp.top State.Var.pp v) + | _ -> () + )); + if !found_symread then + warn "State:\n%t" (Pp.top State.pp st); if State.is_possible st then match l with | Block_lib.End _ -> ( From d65c184a7642a5edffdf73f9893c5555386bc2ea Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Fri, 11 Apr 2025 14:33:31 +0100 Subject: [PATCH 71/89] More typer hacking New bits type for bit fragments that are not whole-byte size --- src/ctype/ctype.ml | 3 +++ src/run/relProg.ml | 1 + src/trace/typer.ml | 52 +++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 51 insertions(+), 5 deletions(-) diff --git a/src/ctype/ctype.ml b/src/ctype/ctype.ml index 1c876f37..dbbe61f8 100644 --- a/src/ctype/ctype.ml +++ b/src/ctype/ctype.ml @@ -120,6 +120,7 @@ type unqualified = | Enum of { name : string; id : int } (** See {!env} for what the id refers to *) | FuncPtr (** Hack to accommodate PKVM *) | Missing (** Hack to accommodate PKVM *) + | Bits (** Hack to prevent losing type information when processing bitvectors with non-whole-byte sizes *) (** The internal representation of generalized C types *) and t = { @@ -337,6 +338,7 @@ let rec sizeof_unqualified = function | Array { elem; dims } -> let num = dims |> List.map (Option.value ~default:0) |> List.fold_left ( * ) 1 in num * sizeof elem + | Bits -> 0 (* Shouldn't use this value *) (** Give the size of an type. Need the environement. *) and sizeof t = sizeof_unqualified t.unqualified @@ -660,6 +662,7 @@ and pp_unqualified = function | Enum { name; _ } -> dprintf "Enum %s" name | FuncPtr -> dprintf "FuncPtr" | Missing -> dprintf "Missing" + | Bits -> dprintf "Bits" and pp_fragment frag = group diff --git a/src/run/relProg.ml b/src/run/relProg.ml index 7bf90660..7170b132 100644 --- a/src/run/relProg.ml +++ b/src/run/relProg.ml @@ -39,6 +39,7 @@ let pp_typed ~(tenv: Ctype.env) ~(ctype: Ctype.t) ~pp (value: State.Exp.t) = | Enum _ -> pp value | FuncPtr -> pp value | Missing -> pp value + | Bits -> pp value let read_big st addr sz = Seq.iota_step_up ~step:16 ~endi:sz diff --git a/src/trace/typer.ml b/src/trace/typer.ml index 25dc0a92..2dd8e1a2 100644 --- a/src/trace/typer.ml +++ b/src/trace/typer.ml @@ -98,6 +98,24 @@ let machine_of_size ?(update = 0) (typ : Ctype.t) : Ctype.t = let constexpr = typ.constexpr in Ctype.machine ~constexpr (size + update) +let is_const tval = + tval.ctyp + |> Option.map Ctype.is_constexpr + |> Option.value_fun ~default:(fun () -> Exp.ConcreteEval.is_concrete tval.exp) + +let constexpr_of_exp e = + let ty = Exp.Typed.get_type e in + if Exp.Typed.is_bv ty then + let bitsize = Exp.Typed.expect_bv ty in + Option.some @@ + if bitsize mod 8 = 0 || bitsize = Arch.address_size then + Ctype.machine ~constexpr:true (bitsize / 8) + else + Ctype.Bits |> Ctype.qual ~constexpr:true + else + None + + let unop (u : Ast.unop) tval : Ctype.t option = let open Option in let* typ = tval.ctyp in @@ -111,8 +129,7 @@ let unop (u : Ast.unop) tval : Ctype.t option = else let bitsize = b - a + 1 in let constexpr = typ.constexpr in - if bitsize mod 8 = 0 || bitsize = Arch.address_size && constexpr then - Ctype.machine ~constexpr (bitsize / 8) |> some else None + if bitsize mod 8 = 0 then Ctype.machine ~constexpr (bitsize / 8) |> some else None | ZeroExtend m | SignExtend m -> if m mod 8 = 0 then machine_of_size ~update:(m / 8) typ |> some else None @@ -214,12 +231,37 @@ let rec expr ~ctxt (exp : Base.exp) : Ctype.t option = | Bool _ -> None | Enum _ -> None | Vec _ -> None - | Unop (u, e, _) -> expr_tval ~ctxt e |> unop u + | Unop (u, e, _) -> + let tval = expr_tval ~ctxt e in + Option.( + unop u tval + ||| + if is_const tval then + constexpr_of_exp exp + else + None + ) | Binop (b, e, e', _) -> let te = expr_tval ~ctxt e in let te' = expr_tval ~ctxt e' in - binop ~ctxt b te te' - | Manyop (m, el, _) -> List.map (expr_tval ~ctxt) el |> manyop ~ctxt m + Option.( + binop ~ctxt b te te' + ||| + if is_const te && is_const te' then + constexpr_of_exp exp + else + None + ) + | Manyop (m, el, _) -> + let tvals = List.map (expr_tval ~ctxt) el in + Option.( + manyop ~ctxt m tvals + ||| + if List.for_all is_const tvals then + constexpr_of_exp exp + else + None + ) | Ite _ -> None | Bound _ -> . | Let _ -> . From af5e7fe5ab2b500d71fe742c961c6c73bba0e1ee Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Mon, 14 Apr 2025 10:18:22 +0100 Subject: [PATCH 72/89] Better section asserts --- src/elf/file.ml | 23 ++++++++++++++++++----- src/state/base.ml | 32 ++++++++++++++++++++++++-------- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/src/elf/file.ml b/src/elf/file.ml index 6acffd47..3dcc5291 100644 --- a/src/elf/file.ml +++ b/src/elf/file.ml @@ -77,6 +77,12 @@ let pp_machine mach = mach |> machine_to_string |> Pp.string module SMap = Map.Make(String) +type section = { + name : string; + size : int; + align : int; +} + (** The type containing all the information about an ELF file *) type t = { filename : string; (** The name on the file system. Useful for error messages *) @@ -87,6 +93,7 @@ type t = { linksem : Elf_file.elf_file; (** The original linksem structure for the file; only used in [dw.ml] *) rodata : Segment.t SMap.t; (** The read-only data sections *) + sections : section list; } (** Error on Elf parsing *) @@ -138,10 +145,16 @@ let of_file (filename : string) = - the range of the section is guaranteed to overlap with any symbols within it, and so not suitable to be stored in the [RngMap] *) let elf_file = Elf_file.ELF_File_64 elf64_file in + let sections = List.map (fun (s:Elf_interpreted_section.elf64_interpreted_section) -> { + name=s.elf64_section_name_as_string; + size=Z.to_int s.elf64_section_size; + align=Z.to_int s.elf64_section_align; + }) elf64_file.elf64_file_interpreted_sections + in let rodata = - SMap.of_list @@ List.filter_map Option.(fun (section:Elf_interpreted_section.elf64_interpreted_section) -> - let+ sname = if String.starts_with ~prefix:".rodata" section.elf64_section_name_as_string then - Some section.elf64_section_name_as_string + SMap.of_list @@ List.filter_map Option.(fun section -> + let+ sname = if String.starts_with ~prefix:".rodata" section.name then + Some section.name else None in @@ -163,7 +176,7 @@ let of_file (filename : string) = execute = false; } ) - ) elf64_file.elf64_file_interpreted_sections + ) sections in info "ELF file %s has been loaded" filename; - { filename; symbols; entry; machine; linksem = elf_file; rodata } + { filename; symbols; entry; machine; linksem = elf_file; rodata; sections } diff --git a/src/state/base.ml b/src/state/base.ml index 30dc867e..d6e93053 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -533,15 +533,34 @@ let set_impossible state = assert (not @@ is_locked state); state.asserts <- [Typed.false_] +let push_section_constraints ~addr_size state sections = + List.iter (fun (s:Elf.File.section) -> + let max_section_addr = Int.shift_left 1 addr_size - s.size in + let s_exp = (Exp.of_var (Var.Section s.name)) in + (* The whole section fits in memory *) + push_assert state Typed.(comp Ast.Bvule s_exp (bits_int ~size:64 max_section_addr)); + (* The load address cannot be 0 *) + push_assert state Typed.(not (s_exp = (bits_int ~size:64 0))); + if s.align > 1 then + let (align_pow, _) = Seq.ints 0 + |> Seq.drop_while (fun x -> Int.shift_left 1 x < s.align) + |> Seq.uncons + |> Option.get + in + if s.align = Int.shift_left 1 align_pow then + let last = align_pow - 1 in + (* Section address is aligned *) + push_assert state Typed.(extract ~first:0 ~last s_exp = zero ~size:align_pow) + else + warn "Section alignment is not a power of two: %d" s.align; + ) sections + let init_sections ~addr_size state = let state = copy_if_locked state in let _ = Option.( let+ elf = state.elf in + push_section_constraints ~addr_size state elf.sections; Elf.SymTable.iter elf.symbols @@ fun sym -> - let max_section_addr = Int.shift_left 1 addr_size - sym.size - sym.addr.offset in - push_assert state Typed.( - comp Ast.Bvule (Exp.of_var (Var.Section sym.addr.section)) (bits_int ~size:64 max_section_addr) - ); let len = List.find (fun x -> sym.size mod x = 0) [16;8;4;2;1] in if sym.typ = Elf.Symbol.OBJECT then let provenance = Mem.create_section_frag ~addr_size state.mem sym.addr.section in @@ -561,11 +580,8 @@ let init_sections_symbolic ~addr_size state = let state = copy_if_locked state in let _ = Option.( let+ elf = state.elf in + push_section_constraints ~addr_size state elf.sections; Elf.SymTable.iter elf.symbols @@ fun sym -> - let max_section_addr = Int.shift_left 1 addr_size - sym.size - sym.addr.offset in - push_assert state Typed.( - comp Ast.Bvule (Exp.of_var (Var.Section sym.addr.section)) (bits_int ~size:64 max_section_addr) - ); if sym.typ = Elf.Symbol.OBJECT then Hashtbl.replace state.mem.sections sym.addr.section Main ) in From 22996d3917b72d825e3ed5a70383493fa454cd6c Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Mon, 14 Apr 2025 18:32:05 +0100 Subject: [PATCH 73/89] Better logs --- src/run/relProg.ml | 19 +++++-- src/run/testRelProg.ml | 111 ++++++++++++++++++++++++++++------------- src/state/simplify.ml | 11 +++- 3 files changed, 99 insertions(+), 42 deletions(-) diff --git a/src/run/relProg.ml b/src/run/relProg.ml index 7170b132..bb7b2edf 100644 --- a/src/run/relProg.ml +++ b/src/run/relProg.ml @@ -41,12 +41,15 @@ let pp_typed ~(tenv: Ctype.env) ~(ctype: Ctype.t) ~pp (value: State.Exp.t) = | Missing -> pp value | Bits -> pp value -let read_big st addr sz = +let read_big ~prov st addr sz = + let addr = Exp.Typed.extract ~last:(Arch.address_size-1) ~first:0 addr in Seq.iota_step_up ~step:16 ~endi:sz |> Seq.map (fun off -> let addr = Exp.Typed.(addr + bits_int ~size:Arch.address_size off) in let len = min 16 (sz - off) in - State.read_noprov st ~addr ~size:(Ast.Size.of_bytes len) + match prov with + | None -> State.read_noprov st ~addr ~size:(Ast.Size.of_bytes len) + | Some p -> State.read ~provenance:p st ~addr:addr ~size:(Ast.Size.of_bytes len) ) |> List.of_seq |> Exp.Typed.concat @@ -55,14 +58,20 @@ let pp_eval_loc sz st ~(tenv: Ctype.env) ~(ctype: Ctype.t) (loc: Dw.Loc.t) : PPr let value = match loc with | Register reg -> Some (State.get_reg_exp st reg) | RegisterOffset (reg, off) -> - let r = State.get_reg_exp st reg in - Some (read_big st Exp.Typed.(r + bits_int ~size:Arch.address_size off) sz) + let r = State.get_reg st reg in + let open Ctype in + let prov = Option.bind r.ctyp (fun ctype -> + match ctype.unqualified with + | Ptr { provenance; _ } -> Some provenance + | _ -> None + ) in + Some (read_big ~prov st Exp.Typed.(r.exp + bits_int ~size:64 off) sz) | StackFrame _off -> None | Global symoff -> let addr = Elf.SymTable.to_addr_offset symoff in let addr = State.Exp.of_address ~size:Arch.address_size addr in - Some (read_big st addr sz) + Some (read_big ~prov:None st addr sz) | Const x -> Some(x |> BitVec.of_z ~size:(8*sz) |> Exp.Typed.bits) | Dwarf _ops -> None in let pp = fun value -> diff --git a/src/run/testRelProg.ml b/src/run/testRelProg.ml index 067e3799..3ac67b19 100644 --- a/src/run/testRelProg.ml +++ b/src/run/testRelProg.ml @@ -5,6 +5,75 @@ open Logs.Logger (struct let str = __MODULE__ end) +type err = { msg: string; asserts: State.Exp.t list } +type node_result = { all_fail: bool; errors: err list } +let node_result_of_result = function +| Ok () -> { all_fail=false; errors=[] } +| Error e -> { all_fail=true; errors=[e] } + +let rec process_tree ~pc ~ret ~ext (node:Block_lib.label State.Tree.t) = + let l = node.data in + let st = node.state in + + let found_symread = ref false in + st.read_vars |> Vec.iter Fun.(State.Tval.exp %> Ast.Manip.exp_iter_var (fun v -> + match v with + | State.Var.ReadVar _ -> found_symread := true; warn "State contains symbolic read variable:\n %t" (Pp.top State.Var.pp v) + | _ -> () + )); + if !found_symread then + warn "State:\n%t" (Pp.top State.pp st); + + if not (State.is_possible st) then + { all_fail=true; errors=[] } + else match l with + | Block_lib.End _ -> let result = ( + let pc_exp = State.get_reg_exp st pc in + let pc_addr = try + Some (State.Exp.expect_sym_address pc_exp) + with + _ -> None + in + let ret_exp = match pc_addr with + | Some pc_addr -> + if pc_addr = Elf.Address.{ section="UND.abort"; offset=0 } then + Result.error { + msg=Printf.sprintf "abort called from %t" (Pp.tos Elf.Address.pp st.last_pc); + asserts=st.asserts; + } + else if pc_addr <> Elf.Address.{ section="UND.exit"; offset=0 } then + Result.error { + msg=Printf.sprintf "finished at weird address %t" (Pp.tos Elf.Address.pp pc_addr); + asserts=st.asserts; + } + else + Result.ok (State.get_reg_exp st ext) + | None -> + Result.ok (State.get_reg_exp st ret) (* Symbolic pc = returned from main *) + in + Result.bind ret_exp @@ fun ret_exp -> + let ret_val = ret_exp |> Exp.ConcreteEval.eval |> Exp.Value.expect_bv |> BitVec.to_int in + if ret_val <> 0 then + Result.error { + msg=Printf.sprintf "non-zero return code %d" ret_val; + asserts=st.asserts + } + else + Result.ok () + ) in + node_result_of_result result + | _ -> + let results = List.map (process_tree ~pc ~ret ~ext) node.rest in + let all_errors = List.bind results (fun x -> x.errors) in + if List.for_all (fun x -> x.all_fail) results then { + all_fail=true; + errors=List.map (fun x -> {x with asserts=st.asserts}) all_errors; + } + else { + all_fail=false; + errors=all_errors; + } + let test return_register exit_register name = let tree = Func.get_state_tree ~elf:name ~name:"main" ~init:(State.init_sections ~addr_size:Arch.address_size) ~every_instruction:false () ~breakpoints:["UND.abort"; "UND.exit"] @@ -13,41 +82,13 @@ let test return_register exit_register name = let pc = Arch.pc () in let ret = State.Reg.of_string return_register in let ext = State.Reg.of_string exit_register in - State.Tree.iter (fun l st -> - let found_symread = ref false in - st.read_vars |> Vec.iter Fun.(State.Tval.exp %> Ast.Manip.exp_iter_var (fun v -> - match v with - | State.Var.ReadVar _ -> found_symread := true; warn "State contains symbolic read variable:\n %t" (Pp.top State.Var.pp v) - | _ -> () - )); - if !found_symread then - warn "State:\n%t" (Pp.top State.pp st); - if State.is_possible st then - match l with - | Block_lib.End _ -> ( - let pc_exp = State.get_reg_exp st pc in - let ret_exp = match (try - Some (State.Exp.expect_sym_address pc_exp) - with - _ -> None - ) with - | Some pc_addr -> - if pc_addr = Elf.Address.{ section="UND.abort"; offset=0 } then - fail "abort called from %t" (Pp.top Elf.Address.pp st.last_pc) - else if pc_addr <> Elf.Address.{ section="UND.exit"; offset=0 } then - fail "finished at weird address %t" (Pp.top Elf.Address.pp pc_addr) - else - State.get_reg_exp st ext - | None -> - State.get_reg_exp st ret (* Symbolic pc = returned from main *) - in - let ret_val = ret_exp |> Exp.ConcreteEval.eval |> Exp.Value.expect_bv |> BitVec.to_int in - if ret_val <> 0 then - fail "non-zero return code %d" ret_val; - ) - | _ -> () - ) tree; - base "Success" + let results = process_tree ~pc ~ret ~ext tree in + if List.is_empty results.errors then + base "Success" + else + fail "Some paths fail: %t" Pp.( + top (list (fun (e:err) -> !^(e.msg) ^^ !^" when " ^^ list State.Exp.pp e.asserts)) results.errors + ) let elf = let doc = "ELF file from which to pull the code" in diff --git a/src/state/simplify.ml b/src/state/simplify.ml index 3fee6e92..14088110 100644 --- a/src/state/simplify.ml +++ b/src/state/simplify.ml @@ -90,11 +90,18 @@ let ctxfull state = (fun e -> declare e; match Z3St.check_both serv e with - | Some true -> None + | Some true -> + debug "%t is redundant" (Pp.top Exp.pp e); + None | Some false -> found_false := true; + debug "%t is impossible" (Pp.top Exp.pp e); None - | None -> Some e) + | None -> + debug "%t is possible" (Pp.top Exp.pp e); + Z3St.send_assert serv e; + Some e + ) state.asserts in (* If state is impossible then it has a single assertion: false *) From 0cf6b06ff2ab5f52af4bfcc2f5227805f7b26ec9 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Mon, 14 Apr 2025 20:41:48 +0100 Subject: [PATCH 74/89] Fix provenance with section fragments --- src/state/base.ml | 50 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/src/state/base.ml b/src/state/base.ml index d6e93053..18c52de8 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -320,24 +320,26 @@ module Mem = struct mutable main : Fragment.t; frags : (Exp.t * Fragment.t) Vec.t; sections : (string, provenance) Hashtbl.t; (* mapping sections to their fragments *) + mutable allow_main : bool; (* HACK to prvent incorrectly assuming Main provenance when using section fragments *) } (** Get the main fragment of memory *) - let get_main { main; frags = _; sections = _ } = main + let get_main { main; _ } = main (** Empty memory, every address is unbound *) - let empty () = { main = Fragment.empty; frags = Vec.empty (); sections = Hashtbl.create 10 } + let empty () = { main = Fragment.empty; frags = Vec.empty (); sections = Hashtbl.create 10; allow_main = true } (** Build a new memory from the old one by keeping the old one as a base *) let from mem = { main = Fragment.from mem.main; frags = Vec.map (Pair.map Fun.id Fragment.from) mem.frags; - sections = Hashtbl.copy mem.sections; + sections = Hashtbl.copy mem.sections; + allow_main = mem.allow_main; } (** Copy the memory so that it can be mutated separately *) - let copy mem = { main = mem.main; frags = Vec.copy mem.frags; sections = Hashtbl.copy mem.sections } + let copy mem = { main = mem.main; frags = Vec.copy mem.frags; sections = Hashtbl.copy mem.sections; allow_main = mem.allow_main } (** Add a new fragment with the specified base *) let new_frag mem base = @@ -559,11 +561,13 @@ let init_sections ~addr_size state = let state = copy_if_locked state in let _ = Option.( let+ elf = state.elf in + state.mem.allow_main <- false; push_section_constraints ~addr_size state elf.sections; + List.iter (fun (x:Elf.File.section) -> Mem.create_section_frag ~addr_size state.mem x.name |> ignore) elf.sections; Elf.SymTable.iter elf.symbols @@ fun sym -> let len = List.find (fun x -> sym.size mod x = 0) [16;8;4;2;1] in if sym.typ = Elf.Symbol.OBJECT then - let provenance = Mem.create_section_frag ~addr_size state.mem sym.addr.section in + let provenance = Mem.get_section_provenance state.mem sym.addr.section in Seq.iota_step_up ~step:len ~endi:sym.size |> Seq.iter (fun off -> let data = Elf.Symbol.sub sym off len in @@ -667,15 +671,18 @@ let read_from_rodata (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t option ) ) -let read ~provenance ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t = +let rec read ~provenance ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t = assert (not @@ is_locked s); - let var = make_read ?ctyp s size in - let exp = Mem.read s.mem ~provenance ~var ~addr ~size in - let exp = if provenance = Main && exp = None then read_from_rodata ~addr ~size s else exp in - Option.iter (set_read s (Var.expect_readvar var)) exp; - Option.value exp ~default:(Exp.of_var var) - -let read_noprov ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t = + if provenance = Ctype.Main && not s.mem.allow_main then + read_noprov ?ctyp s ~addr ~size + else + let var = make_read ?ctyp s size in + let exp = Mem.read s.mem ~provenance ~var ~addr ~size in + let exp = if exp = None then read_from_rodata ~addr ~size s else exp in + Option.iter (set_read s (Var.expect_readvar var)) exp; + Option.value exp ~default:(Exp.of_var var) + +and read_noprov ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t = debug "Addr: %t" Pp.(top Exp.pp addr); let elf_addr = eval_address s addr in debug "Address: %t" Pp.(top (optional Elf.Address.pp) elf_addr); @@ -684,16 +691,23 @@ let read_noprov ?ctyp (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t = let addr_size = addr |> Typed.get_type |> Typed.expect_bv in let addr = Exp.of_address ~size:addr_size elf_addr in let provenance = Mem.get_section_provenance s.mem elf_addr.section in + if provenance = Ctype.Main && not s.mem.allow_main then + Raise.fail "Main fragment should not be used here"; read ~provenance ?ctyp s ~addr ~size | None when Vec.length s.mem.frags = 0 -> + if not s.mem.allow_main then + Raise.fail "Main fragment should not be used here"; read ~provenance:Ctype.Main ?ctyp s ~addr ~size | None -> Raise.fail "Trying to access %t in state %d: No provenance info" Pp.(tos Exp.pp addr) s.id -let write ~provenance (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) (value : Exp.t) : unit = +let rec write ~provenance (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) (value : Exp.t) : unit = assert (not @@ is_locked s); - Mem.write ~provenance s.mem ~addr ~size ~exp:value + if provenance = Ctype.Main && not s.mem.allow_main then + write_noprov s ~addr ~size value + else + Mem.write ~provenance s.mem ~addr ~size ~exp:value -let write_noprov (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) (value : Exp.t) : unit = +and write_noprov (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) (value : Exp.t) : unit = let elf_addr = eval_address s addr in debug "Address: %t" Pp.(top (optional Elf.Address.pp) elf_addr); match elf_addr with @@ -701,8 +715,12 @@ let write_noprov (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) (value : Exp.t) : let addr_size = addr |> Typed.get_type |> Typed.expect_bv in let addr = Exp.of_address ~size:addr_size elf_addr in let provenance = Mem.get_section_provenance s.mem elf_addr.section in + if provenance = Ctype.Main && not s.mem.allow_main then + Raise.fail "Main fragment should not be used here"; write ~provenance s ~addr ~size value | None when Vec.length s.mem.frags = 0 -> + if not s.mem.allow_main then + Raise.fail "Main fragment should not be used here"; write ~provenance:Ctype.Main s ~addr ~size value | None -> Raise.fail "Trying to access %t in state %d: No provenance info" Pp.(tos Exp.pp addr) s.id From 1f25a8fcdda7a1b930193e72587e925a4c0508f3 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Thu, 24 Apr 2025 13:20:54 +0100 Subject: [PATCH 75/89] Make fbreg locations work --- src/dw/var.ml | 15 +++++++++++-- src/run/relProg.ml | 52 ++++++++++++++++++++++++++++++++-------------- 2 files changed, 49 insertions(+), 18 deletions(-) diff --git a/src/dw/var.ml b/src/dw/var.ml index b6010848..a6a28c6c 100644 --- a/src/dw/var.ml +++ b/src/dw/var.ml @@ -48,7 +48,13 @@ type range = Addr.t * Addr.t option (** Type of a DWARF variable *) -type t = { name : string; param : bool; ctype : Ctype.t; locs : (range * Loc.t) list } +type t = { + name : string; + param : bool; + ctype : Ctype.t; + locs : (range * Loc.t) list; + locs_frame_base : (range * Loc.t) list; +} (** Type of a DWARF variable in linksem *) type linksem_t = Dwarf.sdt_variable_or_formal_parameter @@ -78,7 +84,12 @@ let of_linksem (elf : Elf.File.t) (env : Ctype.env) (lvar : linksem_t) : t = |> List.map (fun (a, b, l) -> ((Addr.of_sym a, end_addr_of_sym b), Loc.of_linksem elf l)) |> loc_merge in - { name; param; ctype; locs } + let locs_frame_base = + lvar.svfp_locations_frame_base |> Option.value ~default:[] + |> List.map (fun (a, b, l) -> ((Addr.of_sym a, end_addr_of_sym b), Loc.of_linksem elf l)) + |> loc_merge + in + { name; param; ctype; locs; locs_frame_base } (** Pretty print a variable *) let pp_raw v = diff --git a/src/run/relProg.ml b/src/run/relProg.ml index bb7b2edf..aeca7ac6 100644 --- a/src/run/relProg.ml +++ b/src/run/relProg.ml @@ -54,8 +54,8 @@ let read_big ~prov st addr sz = |> List.of_seq |> Exp.Typed.concat -let pp_eval_loc sz st ~(tenv: Ctype.env) ~(ctype: Ctype.t) (loc: Dw.Loc.t) : PPrint.document = - let value = match loc with +let eval_loc ?frame_value sz st (loc: Dw.Loc.t) : State.Exp.t option = + match loc with | Register reg -> Some (State.get_reg_exp st reg) | RegisterOffset (reg, off) -> let r = State.get_reg st reg in @@ -66,14 +66,40 @@ let pp_eval_loc sz st ~(tenv: Ctype.env) ~(ctype: Ctype.t) (loc: Dw.Loc.t) : PPr | _ -> None ) in Some (read_big ~prov st Exp.Typed.(r.exp + bits_int ~size:64 off) sz) - | StackFrame _off -> - None + | StackFrame off -> + (* This is a bit hacky, should instead extract the provenance from frame_value *) + let stack_provenance = Option.bind (State.get_reg st (Arch.sp())).ctyp (fun ctype -> + match ctype.unqualified with + | Ptr { provenance; _ } -> Some provenance + | _ -> None + ) in + + let open Option in + let+ frame_value = frame_value in + debug "Reading from %t" Pp.(top State.Exp.pp Exp.Typed.(frame_value + bits_int ~size:64 off)); + read_big ~prov:stack_provenance st Exp.Typed.(frame_value + bits_int ~size:64 off) sz | Global symoff -> let addr = Elf.SymTable.to_addr_offset symoff in let addr = State.Exp.of_address ~size:Arch.address_size addr in Some (read_big ~prov:None st addr sz) | Const x -> Some(x |> BitVec.of_z ~size:(8*sz) |> Exp.Typed.bits) - | Dwarf _ops -> None in + | Dwarf _ops -> None + +let eval_loc_from_list ?frame_value sz st pc locs= + let open Option in + let+ loc = List.find_map (fun ((lo,hi), loc) -> ( + let open Elf.Address in + let* hi = hi in + let* over = lo <= pc in + let* under = pc < hi in + if over && under then + Some loc + else + None + )) locs in + eval_loc ?frame_value sz st loc + +let pp_variable_value ~(tenv: Ctype.env) ~(ctype: Ctype.t) value = let pp = fun value -> match Exp.ConcreteEval.eval_if_concrete value with | Some(value) -> Exp.Value.pp value @@ -88,18 +114,12 @@ let printvars ~st ~(dwarf: Dw.t) pc = let pv vars = Seq.iter (fun (v: Dw.Var.t) -> let sz = Ctype.sizeof v.ctype in - match List.find_map (fun ((lo,hi), loc) -> Option.( - let open Elf.Address in - let* hi = hi in - let* over = lo <= pc in - let* under = pc < hi in - if over && under then - Some loc - else - None - )) v.locs with + let frame_value = eval_loc_from_list sz st pc v.locs_frame_base |> Option.join in + debug "Frame value %t" Pp.(top (optional State.Exp.pp) frame_value); + let value = eval_loc_from_list ?frame_value sz st pc v.locs in + match value with | None -> () - | Some loc -> out := !out ^ Printf.sprintf "%s = %t\n" v.name Pp.(tos (pp_eval_loc sz st ~ctype:v.ctype ~tenv:dwarf.tenv) loc); + | Some var_val -> out := !out ^ Printf.sprintf "%s = %t\n" v.name Pp.(tos (pp_variable_value ~ctype:v.ctype ~tenv:dwarf.tenv) var_val); ) vars in From 0ff8cc9510fc77ef1b9ab65496d4b31b5955b045 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sun, 27 Apr 2025 11:44:58 +0100 Subject: [PATCH 76/89] Fix read rodata --- src/state/base.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/state/base.ml b/src/state/base.ml index 18c52de8..ac324ec0 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -661,7 +661,7 @@ let read_from_rodata (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t option let int_addr = sym_addr.offset in let open Option in let* rodata = Elf.File.SMap.find_opt sym_addr.section elf.rodata in - if rodata.addr <= int_addr && int_addr + size < rodata.addr + rodata.size * 8 then + if rodata.addr <= int_addr && int_addr + size <= rodata.addr + rodata.size * 8 then let bv = BytesSeq.getbvle ~size rodata.data (int_addr - rodata.addr) in (* Assume little endian here *) Some (Typed.bits bv) From 38f9e1146b9a416a2a4735f95b0d989a0e64ed09 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sun, 27 Apr 2025 11:45:45 +0100 Subject: [PATCH 77/89] Fix printing execution --- src/run/relProg.ml | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/src/run/relProg.ml b/src/run/relProg.ml index aeca7ac6..5526eb21 100644 --- a/src/run/relProg.ml +++ b/src/run/relProg.ml @@ -165,25 +165,27 @@ let run_prog elfname name objdump_d branchtables = let get_footprint pc = Runner.get_normal_opt runner pc |> Option.fold ~none:[] ~some:Trace.Instr.footprint in *) - State.Tree.iter - (fun a st -> - let last_pc = st.last_pc in - (match a with - | Block_lib.Start -> () - | Block_lib.BranchAt pc -> - if Elf.Address.(last_pc + 4 <> pc) then - Printf.printf "\nJUMP from %t:\n\n" Pp.(top Elf.Address.pp last_pc); - print_string @@ Analyse.Pp.css Analyse.Types.Html Render_vars @@ printvars ~st ~dwarf pc; - print_string (print_analyse_instruction pc); - print_endline "BRANCH!"; - | Block_lib.NormalAt pc -> - if Elf.Address.(last_pc + 4 <> pc) then - Printf.printf "\nJUMP from %t:\n\n" Pp.(top Elf.Address.pp last_pc); - print_string @@ Analyse.Pp.css Analyse.Types.Html Render_vars @@ printvars ~st ~dwarf pc; - print_string (print_analyse_instruction pc); - | Block_lib.End _ -> ()); - ) - tree; + let rec iter (f:Block_lib.label State.Tree.t) = + let st = f.state in + let last_pc = st.last_pc in + (match f.data with + | Block_lib.Start -> () + | Block_lib.BranchAt pc | Block_lib.NormalAt pc -> + if Elf.Address.(last_pc + 4 <> pc) then + Printf.printf "\nJUMP from %t:\n\n" Pp.(top Elf.Address.pp last_pc); + print_string @@ Analyse.Pp.css Analyse.Types.Html Render_vars @@ printvars ~st ~dwarf pc; + print_string (print_analyse_instruction pc); + | Block_lib.End _ -> + print_string "END"; + ); + let succ = List.filter (fun (s:Block_lib.label State.Tree.t) -> + State.is_possible s.state + ) f.rest in + if List.length succ > 1 then + print_string "BRANCH!"; + List.iter iter succ + in + iter tree; match Analyse.Utils.read_file_lines "src/analyse/html-postamble.html" with | Error _ -> () | Ok lines -> Array.iter (function s -> Printf.printf "%s\n" s) lines From 19343af0ac199093efc34c49d4f4260d2a0ccf6a Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Wed, 30 Apr 2025 21:16:03 +0100 Subject: [PATCH 78/89] Nicer visualization --- src/run/relProg.ml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/run/relProg.ml b/src/run/relProg.ml index 5526eb21..a7f8d109 100644 --- a/src/run/relProg.ml +++ b/src/run/relProg.ml @@ -16,22 +16,22 @@ let rec pp_array pp sz dims value = |> List.of_seq |> Pp.list (pp_array pp sz dims) -let pp_typed ~(tenv: Ctype.env) ~(ctype: Ctype.t) ~pp (value: State.Exp.t) = +let pp_typed ~(tenv: Ctype.env) ~(ctype: Ctype.t) ~(pp : ?hex:bool -> _ -> _) (value: State.Exp.t) = match ctype.unqualified with | Machine _ -> pp value | Cint _ -> pp value | Cbool -> pp value - | Ptr _ -> pp value + | Ptr _ -> pp ~hex:true value | Struct { id; _ } -> let s = IdMap.geti tenv.structs id in Pp.( Ctype.FieldMap.to_seq s.layout |> Seq.map (fun (offset, (field:Ctype.field)) -> ( - opt string field.fname, + Option.value ~default:"?" field.fname, pp (Exp.Typed.extract ~first:(8*offset) ~last:(8*(offset + field.size)-1) value) )) |> List.of_seq - |> mapping s.name + |> record s.name ) | Array { dims; _ } -> let sz = Ctype.sizeof ctype in @@ -100,8 +100,9 @@ let eval_loc_from_list ?frame_value sz st pc locs= eval_loc ?frame_value sz st loc let pp_variable_value ~(tenv: Ctype.env) ~(ctype: Ctype.t) value = - let pp = fun value -> + let pp ?(hex=false) = fun value -> match Exp.ConcreteEval.eval_if_concrete value with + | Some(Exp.Value.Bv bv) when not hex -> Pp.int @@ BitVec.to_int bv | Some(value) -> Exp.Value.pp value | None -> State.Exp.pp value in From e66ab9ca456260dfb1b586b1c640661ef9b2ad2f Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Thu, 1 May 2025 23:34:17 +0100 Subject: [PATCH 79/89] Allow relocations in rodata and fix endianness problems --- src/elf/file.ml | 23 +++++++++++++--------- src/elf/linksemRelocatable.ml | 18 +++++++++-------- src/elf/segment.ml | 4 ++-- src/state/base.ml | 37 ++++++++++++++++++++++++----------- src/trace/run.ml | 2 ++ 5 files changed, 54 insertions(+), 30 deletions(-) diff --git a/src/elf/file.ml b/src/elf/file.ml index 3dcc5291..ecac2613 100644 --- a/src/elf/file.ml +++ b/src/elf/file.ml @@ -152,23 +152,28 @@ let of_file (filename : string) = }) elf64_file.elf64_file_interpreted_sections in let rodata = - SMap.of_list @@ List.filter_map Option.(fun section -> - let+ sname = if String.starts_with ~prefix:".rodata" section.name then - Some section.name + SMap.of_list @@ List.filter_map Option.(fun (section:Elf_interpreted_section.elf64_interpreted_section) -> + let+ sname = if String.starts_with ~prefix:".rodata" section.elf64_section_name_as_string then + Some section.elf64_section_name_as_string else None in - let (_, addr, data) = - Dwarf.extract_section_body_without_relocations elf_file sname false + let data = section.elf64_section_body in + Printf.printf "%t" Pp.(top BytesSeq.pp data); + let relocations = match LinksemRelocatable.get_relocations_for_section elf64_file sname with + | Error.Fail s -> elferror "LinksemRelocatable: get_relocations_for_section: %s" s + | Error.Success x -> Relocations.of_linksem x + in + (* let (_, addr, data) = + Dwarf.extract_section_body elf_file Abi_aarch64_symbolic_relocation.aarch64_data_relocation_interpreter sname false (* `false' argument is for returning an empty byte-sequence if section is not found, instead of throwing an exception *) - in - Printf.printf "%t" Pp.(top Sym.pp addr); + in *) ( sname, Segment. { - data; + data = (data, relocations); addr = 0; (* Meaningless for relocatable files *) size = BytesSeq.length data; read = true; @@ -176,7 +181,7 @@ let of_file (filename : string) = execute = false; } ) - ) sections + ) elf64_file.elf64_file_interpreted_sections in info "ELF file %s has been loaded" filename; { filename; symbols; entry; machine; linksem = elf_file; rodata; sections } diff --git a/src/elf/linksemRelocatable.ml b/src/elf/linksemRelocatable.ml index 99998fb7..eb0900d8 100644 --- a/src/elf/linksemRelocatable.ml +++ b/src/elf/linksemRelocatable.ml @@ -19,21 +19,23 @@ type global_symbol_init_info = symbol list open Elf_symbol_table open Elf_interpreted_section +let get_relocations_for_section (f:Elf_file.elf64_file) section = + let machine = f.elf64_file_header.elf64_machine in + if machine = Elf_header.elf_ma_aarch64 then + Error.bind + (Elf_symbolic.extract_elf64_relocations_for_section f Abi_aarch64_symbolic_relocation.aarch64_relocation_interpreter section) + @@ fun relocs -> Error.return (AArch64 relocs) + else + Error.fail @@ "machine not supported " ^ (Elf_header.string_of_elf_machine_architecture machine) + let get_elf64_file_global_symbol_init (f: Elf_file.elf64_file) : global_symbol_init_info Error.error = let secs = f.elf64_file_interpreted_sections in - let machine = f.elf64_file_header.elf64_machine in Error.bind (Elf_file.get_elf64_file_symbol_table f) @@ fun (symtab, strtab) -> let rel_cache = ref SMap.empty in let get_relocs section = match SMap.find_opt section !rel_cache with | Some rels -> rels - | None -> - if machine = Elf_header.elf_ma_aarch64 then - Error.bind - (Elf_symbolic.extract_elf64_relocations_for_section f Abi_aarch64_symbolic_relocation.aarch64_relocation_interpreter section) - @@ fun relocs -> Error.return (AArch64 relocs) - else - Error.fail @@ "machine not supported " ^ (Elf_header.string_of_elf_machine_architecture machine) + | None -> get_relocations_for_section f section in List.filter_map ( fun entry -> diff --git a/src/elf/segment.ml b/src/elf/segment.ml index faacf802..51b82009 100644 --- a/src/elf/segment.ml +++ b/src/elf/segment.ml @@ -51,7 +51,7 @@ (** The type of a segment *) type t = { - data : BytesSeq.t; + data : BytesSeq.t * Relocations.t; addr : int; (** The actual start address of the BytesSeq *) size : int; (** redundant with {!Utils.BytesSeq.length} data *) read : bool; @@ -66,7 +66,7 @@ let of_linksem (lseg : Elf_interpreted_segment.elf64_interpreted_segment) : t = BytesSeq.blit lseg.elf64_segment_body 0 bytes 0 (Z.to_int lseg.elf64_segment_size); let (read, write, execute) = lseg.elf64_segment_flags in { - data = BytesSeq.of_bytes bytes; + data = BytesSeq.of_bytes bytes, Relocations.IMap.empty; addr = Z.to_int lseg.elf64_segment_base; size; read; diff --git a/src/state/base.ml b/src/state/base.ml index ac324ec0..d1272de3 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -294,10 +294,12 @@ module Relocation = struct else [] in + let v, a = ( - Typed.concat (before @ relocation.value :: after), + Typed.concat (after @ relocation.value :: before), relocation.asserts @ asserts - ) + ) in + v,a ) data.relocations (exp, []) end @@ -647,24 +649,37 @@ let read_from_rodata (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t option | None -> None | Some elf -> ( Option.bind (eval_address s addr) @@ fun sym_addr -> - let size = size |> Ast.Size.to_bits in + let size = size |> Ast.Size.to_bytes in try let (sym, offset) = Elf.SymTable.of_addr_with_offset elf.symbols sym_addr in if sym.writable then None else ( - (* Assume little endian here *) - assert (Relocation.IMap.is_empty sym.data.relocations); - let bv = BytesSeq.getbvle ~size sym.data.data offset in (* TODO relocations *) - Some (Typed.bits bv) + let data = Elf.Symbol.sub sym offset size in + let value, asserts = Relocation.exp_of_data data in + + if not @@ List.is_empty asserts then + warn "Relocaiton assserts in .rodata ignored: %t" Pp.(top (list Exp.pp) asserts); + + Some value ) with Not_found -> let int_addr = sym_addr.offset in let open Option in let* rodata = Elf.File.SMap.find_opt sym_addr.section elf.rodata in - if rodata.addr <= int_addr && int_addr + size <= rodata.addr + rodata.size * 8 then - let bv = BytesSeq.getbvle ~size rodata.data (int_addr - rodata.addr) in - (* Assume little endian here *) - Some (Typed.bits bv) + if rodata.addr <= int_addr && int_addr + size <= rodata.addr + rodata.size then + let data, relocations = rodata.data in + let data = BytesSeq.sub data (int_addr - rodata.addr) size in + base "Addr offset: %d, size: %d" int_addr size; + base "All relocs: %t" (Pp.top Elf.Relocations.pp relocations); + let relocations = Elf.Relocations.sub relocations (int_addr - rodata.addr) size in + base "Sub relocs: %t" (Pp.top Elf.Relocations.pp relocations); + let value, asserts = Relocation.exp_of_data {data; relocations} in + base "Value: %t" (Pp.top Exp.pp value); + + if not @@ List.is_empty asserts then + warn "Relocaiton assserts in .rodata ignored: %t" Pp.(top (list Exp.pp) asserts); + + Some value else ( warn "Failed to find symbol or rodata at %t" (Pp.top Elf.Address.pp sym_addr); None diff --git a/src/trace/run.ml b/src/trace/run.ml index b0250aae..5298484f 100644 --- a/src/trace/run.ml +++ b/src/trace/run.ml @@ -98,6 +98,7 @@ let event_mut ~(ctxt : ctxt) (event : Base.event) = | None -> State.read_noprov ctxt.state ~addr:naddr ~size |> State.Tval.of_exp in + debug "read value: %t" Pp.(top State.Tval.pp tval); HashVector.set ctxt.mem_reads value tval | WriteMem { addr; value; size } -> ( let naddr = expand_simplify ~ctxt addr in @@ -109,6 +110,7 @@ let event_mut ~(ctxt : ctxt) (event : Base.event) = let ptrtype = Typer.expr ~ctxt addr in debug "Typed write mem with ptr:%t" (Pp.top (Pp.opt Ctype.pp) ptrtype); let value = expand_tval ~ctxt value in + debug "written value: %t" Pp.(top State.Tval.pp value); Typer.write ~dwarf ctxt.state ?ptrtype ~addr:naddr ~size value | None -> let value = expand_simplify ~ctxt value in From 0f9393ab4a535e6128151d8cfb0083b8eac94978 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Thu, 1 May 2025 23:51:09 +0100 Subject: [PATCH 80/89] Remove print --- src/elf/file.ml | 1 - src/state/base.ml | 4 ---- 2 files changed, 5 deletions(-) diff --git a/src/elf/file.ml b/src/elf/file.ml index ecac2613..91bef6fc 100644 --- a/src/elf/file.ml +++ b/src/elf/file.ml @@ -159,7 +159,6 @@ let of_file (filename : string) = None in let data = section.elf64_section_body in - Printf.printf "%t" Pp.(top BytesSeq.pp data); let relocations = match LinksemRelocatable.get_relocations_for_section elf64_file sname with | Error.Fail s -> elferror "LinksemRelocatable: get_relocations_for_section: %s" s | Error.Success x -> Relocations.of_linksem x diff --git a/src/state/base.ml b/src/state/base.ml index d1272de3..bec126eb 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -669,12 +669,8 @@ let read_from_rodata (s : t) ~(addr : Exp.t) ~(size : Mem.Size.t) : Exp.t option if rodata.addr <= int_addr && int_addr + size <= rodata.addr + rodata.size then let data, relocations = rodata.data in let data = BytesSeq.sub data (int_addr - rodata.addr) size in - base "Addr offset: %d, size: %d" int_addr size; - base "All relocs: %t" (Pp.top Elf.Relocations.pp relocations); let relocations = Elf.Relocations.sub relocations (int_addr - rodata.addr) size in - base "Sub relocs: %t" (Pp.top Elf.Relocations.pp relocations); let value, asserts = Relocation.exp_of_data {data; relocations} in - base "Value: %t" (Pp.top Exp.pp value); if not @@ List.is_empty asserts then warn "Relocaiton assserts in .rodata ignored: %t" Pp.(top (list Exp.pp) asserts); From 6524a9dd64f1deb62b4aa166ae27ab40ce5c3ae2 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Fri, 2 May 2025 00:01:06 +0100 Subject: [PATCH 81/89] Fix debug loc parsing --- src/dw/loc.ml | 13 ++++++++----- src/run/relProg.ml | 6 +++++- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/dw/loc.ml b/src/dw/loc.ml index 3ead2a51..a63fcc53 100644 --- a/src/dw/loc.ml +++ b/src/dw/loc.ml @@ -72,7 +72,7 @@ type t = | RegisterOffset of State.Reg.t * int (** At register + offset address *) | StackFrame of int (** On the stackFrame with offset *) | Global of Elf.SymTable.sym_offset (** Global variable with an offset *) - | Const of Z.t + | Const of Sym.t | Dwarf of dwop list (** Uninterpreted dwarf location *) (** The type of a location in linksem format *) @@ -125,12 +125,12 @@ let of_linksem ?(amap = Arch.dwarf_reg_map ()) (elf : Elf.File.t) : linksem_t -> let addr = Addr.of_sym @@ sym_of_oav arg in try Global (Elf.SymTable.of_addr_with_offset elf.symbols @@ addr) with Not_found -> - warn "Symbol at 0x%x not found in Loc.of_linksem" (int_of_oav arg); + warn "Symbol at %t not found in Loc.of_linksem" (Pp.top Sym.pp (sym_of_oav arg)); Dwarf ops ) (* Other *) | [{ op_semantics = OpSem_lit; op_argument_values = [arg]; _ }; { op_semantics = OpSem_stack_value; _ }] -> - let value = Sym.to_z @@ sym_of_oav arg in + let value = sym_of_oav arg in Const value | ops -> Dwarf ops @@ -140,7 +140,7 @@ let to_string = function | RegisterOffset (reg, off) -> Printf.sprintf "[%s+%x]" (State.Reg.to_string reg) off | StackFrame off -> Printf.sprintf "[frame+%x]" off | Global symoff -> Elf.SymTable.string_of_sym_offset symoff - | Const x -> Z.to_string x + | Const x -> Sym.to_string x | Dwarf ops -> Dwarf.pp_operations ops (** Compare two location. Loc.t is not compatible with polymorphic compare *) @@ -159,7 +159,10 @@ let compare l1 l2 = Pair.compare ~fst:Elf.Symbol.compare (sym1, off1) (sym2, off2) | (Global (_, _), _) -> -1 | (_, Global (_, _)) -> 1 - | (Const x, Const y) -> Z.compare x y + | (Const (Absolute x), Const (Absolute y)) -> Z.compare x y + | (Const (Offset(s,x)), Const (Offset(t,y))) -> Pair.compare ~snd:Z.compare (s,x) (t,y) + | (Const (Absolute _), Const (Offset _)) -> -1 + | (Const (Offset _), Const (Absolute _)) -> 1 | (Const _, _) -> -1 | (_, Const _) -> 1 | (Dwarf ops1, Dwarf ops2) -> compare ops1 ops2 diff --git a/src/run/relProg.ml b/src/run/relProg.ml index a7f8d109..20719885 100644 --- a/src/run/relProg.ml +++ b/src/run/relProg.ml @@ -82,7 +82,11 @@ let eval_loc ?frame_value sz st (loc: Dw.Loc.t) : State.Exp.t option = let addr = Elf.SymTable.to_addr_offset symoff in let addr = State.Exp.of_address ~size:Arch.address_size addr in Some (read_big ~prov:None st addr sz) - | Const x -> Some(x |> BitVec.of_z ~size:(8*sz) |> Exp.Typed.bits) + | Const x -> + Some (match x with + | Absolute x -> x |> BitVec.of_z ~size:(8*sz) |> Exp.Typed.bits + | Offset (s, o) -> State.Exp.of_address ~size:(8*sz) Elf.Address.{section=s; offset=Z.to_int o} + ) | Dwarf _ops -> None let eval_loc_from_list ?frame_value sz st pc locs= From 2feb3a6c6ace8bb0d1cbf00b9ffda0e9ab83338e Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Fri, 2 May 2025 12:52:28 +0100 Subject: [PATCH 82/89] Add sections non-overlap constraints --- src/elf/file.ml | 15 +++-- src/run/func.ml | 4 +- src/run/funcRD.ml | 2 +- src/run/relProg.ml | 2 +- src/run/testRelProg.ml | 2 +- src/state/base.ml | 139 ++++++++++++++++++++++++----------------- src/state/base.mli | 4 +- 7 files changed, 99 insertions(+), 69 deletions(-) diff --git a/src/elf/file.ml b/src/elf/file.ml index 91bef6fc..095ad835 100644 --- a/src/elf/file.ml +++ b/src/elf/file.ml @@ -145,11 +145,16 @@ let of_file (filename : string) = - the range of the section is guaranteed to overlap with any symbols within it, and so not suitable to be stored in the [RngMap] *) let elf_file = Elf_file.ELF_File_64 elf64_file in - let sections = List.map (fun (s:Elf_interpreted_section.elf64_interpreted_section) -> { - name=s.elf64_section_name_as_string; - size=Z.to_int s.elf64_section_size; - align=Z.to_int s.elf64_section_align; - }) elf64_file.elf64_file_interpreted_sections + let sections = List.filter_map (fun (s:Elf_interpreted_section.elf64_interpreted_section) -> + if Z.equal Z.zero (Z.logand s.elf64_section_flags Elf_section_header_table.shf_alloc) then + None + else + Some { + name=s.elf64_section_name_as_string; + size=Z.to_int s.elf64_section_size; + align=Z.to_int s.elf64_section_align; + } + ) elf64_file.elf64_file_interpreted_sections in let rodata = SMap.of_list @@ List.filter_map Option.(fun (section:Elf_interpreted_section.elf64_interpreted_section) -> diff --git a/src/run/func.ml b/src/run/func.ml index 4ea1d385..08ac6c2b 100644 --- a/src/run/func.ml +++ b/src/run/func.ml @@ -52,7 +52,7 @@ open Logs.Logger (struct let str = __MODULE__ end) -let no_run_prep ~elf:elfname ~name ~entry ?(init = State.init_sections_symbolic ~addr_size:Arch.address_size) () = +let no_run_prep ~elf:elfname ~name ~entry ?(init = State.init_sections_symbolic ~sp:Arch.sp ~addr_size:Arch.address_size) () = base "Running %s in %s" name elfname; let dwarf = Dw.of_file elfname in let elf = dwarf.elf in @@ -66,7 +66,7 @@ let no_run_prep ~elf:elfname ~name ~entry ?(init = State.init_sections_symbolic let abi = Arch.get_abi api in Trace.Cache.start @@ Arch.get_isla_config (); base "Computing entry state"; - let start = Init.state () |> State.copy ~elf |> init |> abi.init in + let start = Init.state () |> State.copy ~elf |> abi.init |> init in if entry then base "Entry state:\n%t" (Pp.topi State.pp start); (dwarf, elf, func, start) diff --git a/src/run/funcRD.ml b/src/run/funcRD.ml index f2d1fffb..052b50b9 100644 --- a/src/run/funcRD.ml +++ b/src/run/funcRD.ml @@ -74,7 +74,7 @@ let run_func_rd elfname name objdump_d branchtables breakpoints = let abi = Arch.get_abi api in Trace.Cache.start @@ Arch.get_isla_config (); base "Computing entry state"; - let start = Init.state () |> State.copy ~elf |> State.init_sections ~addr_size:Arch.address_size |> abi.init in + let start = Init.state () |> State.copy ~elf |> abi.init |> State.init_sections ~sp:Arch.sp ~addr_size:Arch.address_size in base "Loading %s for Analyse" elfname; let analyse_test = Analyse.Elf.parse_elf_file elfname in base "Analysing %s for Analyse" elfname; diff --git a/src/run/relProg.ml b/src/run/relProg.ml index 20719885..f4a0f354 100644 --- a/src/run/relProg.ml +++ b/src/run/relProg.ml @@ -159,7 +159,7 @@ let run_prog elfname name objdump_d branchtables = instr in base "Start running"; - let tree = Func.get_state_tree ~elf:elfname ~name ~init:(State.init_sections ~addr_size:Arch.address_size) ~every_instruction:true () + let tree = Func.get_state_tree ~elf:elfname ~name ~init:(State.init_sections ~sp:Arch.sp ~addr_size:Arch.address_size) ~every_instruction:true () ~breakpoints:["UND.abort"; "UND.exit"] in base "Ended running, start pretty printing"; diff --git a/src/run/testRelProg.ml b/src/run/testRelProg.ml index 3ac67b19..e9f15408 100644 --- a/src/run/testRelProg.ml +++ b/src/run/testRelProg.ml @@ -75,7 +75,7 @@ let rec process_tree ~pc ~ret ~ext (node:Block_lib.label State.Tree.t) = } let test return_register exit_register name = - let tree = Func.get_state_tree ~elf:name ~name:"main" ~init:(State.init_sections ~addr_size:Arch.address_size) ~every_instruction:false () + let tree = Func.get_state_tree ~elf:name ~name:"main" ~init:(State.init_sections ~sp:Arch.sp ~addr_size:Arch.address_size) ~every_instruction:false () ~breakpoints:["UND.abort"; "UND.exit"] in debug "%t" (Pp.top (State.Tree.pp_all Block_lib.pp_label) tree); diff --git a/src/state/base.ml b/src/state/base.ml index bec126eb..2f4c78ba 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -537,63 +537,6 @@ let set_impossible state = assert (not @@ is_locked state); state.asserts <- [Typed.false_] -let push_section_constraints ~addr_size state sections = - List.iter (fun (s:Elf.File.section) -> - let max_section_addr = Int.shift_left 1 addr_size - s.size in - let s_exp = (Exp.of_var (Var.Section s.name)) in - (* The whole section fits in memory *) - push_assert state Typed.(comp Ast.Bvule s_exp (bits_int ~size:64 max_section_addr)); - (* The load address cannot be 0 *) - push_assert state Typed.(not (s_exp = (bits_int ~size:64 0))); - if s.align > 1 then - let (align_pow, _) = Seq.ints 0 - |> Seq.drop_while (fun x -> Int.shift_left 1 x < s.align) - |> Seq.uncons - |> Option.get - in - if s.align = Int.shift_left 1 align_pow then - let last = align_pow - 1 in - (* Section address is aligned *) - push_assert state Typed.(extract ~first:0 ~last s_exp = zero ~size:align_pow) - else - warn "Section alignment is not a power of two: %d" s.align; - ) sections - -let init_sections ~addr_size state = - let state = copy_if_locked state in - let _ = Option.( - let+ elf = state.elf in - state.mem.allow_main <- false; - push_section_constraints ~addr_size state elf.sections; - List.iter (fun (x:Elf.File.section) -> Mem.create_section_frag ~addr_size state.mem x.name |> ignore) elf.sections; - Elf.SymTable.iter elf.symbols @@ fun sym -> - let len = List.find (fun x -> sym.size mod x = 0) [16;8;4;2;1] in - if sym.typ = Elf.Symbol.OBJECT then - let provenance = Mem.get_section_provenance state.mem sym.addr.section in - Seq.iota_step_up ~step:len ~endi:sym.size - |> Seq.iter (fun off -> - let data = Elf.Symbol.sub sym off len in - let addr = Exp.of_address ~size:addr_size Elf.Address.(sym.addr + off) in - let size = Ast.Size.of_bytes len in - let (exp, asserts) = Relocation.exp_of_data data in - Mem.write ~provenance state.mem ~addr ~size ~exp; - List.iter (push_relocation_assert state) asserts; - ) - ) in - state - -let init_sections_symbolic ~addr_size state = - let state = copy_if_locked state in - let _ = Option.( - let+ elf = state.elf in - push_section_constraints ~addr_size state elf.sections; - Elf.SymTable.iter elf.symbols @@ fun sym -> - if sym.typ = Elf.Symbol.OBJECT then - Hashtbl.replace state.mem.sections sym.addr.section Main - ) in - state - - let map_mut_exp (f : exp -> exp) s : unit = assert (not @@ is_locked s); Reg.Map.map_mut_current (Tval.map_exp f) s.regs; @@ -781,6 +724,88 @@ let set_last_pc state pc = assert (not @@ is_locked state); state.last_pc <- pc + +let push_section_constraints ~sp ~addr_size state sections = + let sp = sp () in + let rec f : Elf.File.section list -> unit = function + | [] -> () + | s::rest -> ( + let max_section_addr = Int.shift_left 1 addr_size - s.size in + let s_exp = (Exp.of_var (Var.Section s.name)) in + (* The whole section fits in memory *) + push_assert state Typed.(comp Ast.Bvule s_exp (bits_int ~size:64 max_section_addr)); + (* The load address cannot be 0 *) + push_assert state Typed.(not (s_exp = (bits_int ~size:64 0))); + if s.align > 1 then ( + let (align_pow, _) = Seq.ints 0 + |> Seq.drop_while (fun x -> Int.shift_left 1 x < s.align) + |> Seq.uncons + |> Option.get + in + if s.align = Int.shift_left 1 align_pow then + let last = align_pow - 1 in + (* Section address is aligned *) + push_assert state Typed.(extract ~first:0 ~last s_exp = zero ~size:align_pow) + else + warn "Section alignment is not a power of two: %d" s.align + ); + (* Sections don't overlap *) + let s_end = Typed.(s_exp + bits_int ~size:64 s.size) in (* we know this doesn't overflow thanks to the other constraints *) + List.iter (fun (s2:Elf.File.section) -> + let s2_exp = (Exp.of_var (Var.Section s2.name)) in + let s2_end = Typed.(s2_exp + bits_int ~size:64 s2.size) in + let order1 = Typed.(comp Ast.Bvule s_end s2_exp) in + let order2 = Typed.(comp Ast.Bvule s2_end s_exp) in + push_assert state Typed.(manyop Or [order1; order2]) + ) rest; + (* Doesn't overlap with stack *) + let stack_end = get_reg_exp state sp in + let stack_start = Typed.(stack_end - bits_int ~size:64 0x1000) in + let order1 = Typed.(comp Ast.Bvule s_end stack_start) in + let order2 = Typed.(comp Ast.Bvule stack_end s_exp) in + push_assert state Typed.(manyop Or [order1; order2]); + + f rest + ) + in + f sections + +let init_sections ~sp ~addr_size state = + let state = copy_if_locked state in + let _ = Option.( + let+ elf = state.elf in + state.mem.allow_main <- false; + push_section_constraints ~sp ~addr_size state elf.sections; + List.iter (fun (x:Elf.File.section) -> Mem.create_section_frag ~addr_size state.mem x.name |> ignore) elf.sections; + Elf.SymTable.iter elf.symbols @@ fun sym -> + let len = List.find (fun x -> sym.size mod x = 0) [16;8;4;2;1] in + if sym.typ = Elf.Symbol.OBJECT then + let provenance = Mem.get_section_provenance state.mem sym.addr.section in + Seq.iota_step_up ~step:len ~endi:sym.size + |> Seq.iter (fun off -> + let data = Elf.Symbol.sub sym off len in + let addr = Exp.of_address ~size:addr_size Elf.Address.(sym.addr + off) in + let size = Ast.Size.of_bytes len in + let (exp, asserts) = Relocation.exp_of_data data in + Mem.write ~provenance state.mem ~addr ~size ~exp; + List.iter (push_relocation_assert state) asserts; + ) + ) in + lock state; + state + +let init_sections_symbolic ~sp ~addr_size state = + let state = copy_if_locked state in + let _ = Option.( + let+ elf = state.elf in + push_section_constraints ~sp ~addr_size state elf.sections; + Elf.SymTable.iter elf.symbols @@ fun sym -> + if sym.typ = Elf.Symbol.OBJECT then + Hashtbl.replace state.mem.sections sym.addr.section Main + ) in + lock state; + state + let pp s = let open Pp in record "state" diff --git a/src/state/base.mli b/src/state/base.mli index a7275a7e..2d373fd4 100644 --- a/src/state/base.mli +++ b/src/state/base.mli @@ -405,10 +405,10 @@ val copy : ?elf:Elf.File.t -> t -> t The returned state is always unlocked *) val copy_if_locked : ?elf:Elf.File.t -> t -> t -val init_sections : addr_size:int -> t -> t +val init_sections : sp:(unit -> Reg.t) -> addr_size:int -> t -> t (** Assigns all sections with global objects to Main fragment *) -val init_sections_symbolic : addr_size:int -> t -> t +val init_sections_symbolic : sp:(unit -> Reg.t) -> addr_size:int -> t -> t (** {1 State convenience manipulation } *) From cfa543e7ffdd49dac31a95fb07a3474bcc2985fc Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sat, 3 May 2025 13:22:10 +0100 Subject: [PATCH 83/89] debug --- src/analyse/Symbols.ml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/analyse/Symbols.ml b/src/analyse/Symbols.ml index cc059ae4..6e44279b 100644 --- a/src/analyse/Symbols.ml +++ b/src/analyse/Symbols.ml @@ -1,5 +1,9 @@ (* TODO header *) +open Logs.Logger (struct + let str = __MODULE__ +end) + module SMap = Map.Make (String) type rels = @@ -24,12 +28,14 @@ let get_elf64_file_global_symbol_init (f: Elf_file.elf64_file) : global_symbol_i let rel_cache = ref SMap.empty in let get_relocs section = match SMap.find_opt section !rel_cache with - | Some rels -> rels + | Some rels -> Error.return rels | None -> if machine = Elf_header.elf_ma_aarch64 then Error.bind (Elf_symbolic.extract_elf64_relocations_for_section f Abi_aarch64_symbolic_relocation.aarch64_relocation_interpreter section) - @@ fun relocs -> Error.return (AArch64 relocs) + @@ fun relocs -> + rel_cache := SMap.add section (AArch64 relocs) !rel_cache; + Error.return (AArch64 relocs) else Error.fail @@ "machine not supported " ^ (Elf_header.string_of_elf_machine_architecture machine) in @@ -65,6 +71,7 @@ let get_elf64_file_global_symbol_init (f: Elf_file.elf64_file) : global_symbol_i in Error.bind data @@ fun data -> Error.bind (String_table.get_string_at name strtab) @@ fun str -> + debug "Processed %s\n" str; Error.return (str, (typ, size, addr, (data, relocs), bnd)) ) (List.nth_opt secs shndx) ) symtab |> Error.mapM Fun.id \ No newline at end of file From e9ca1854d9b3f77220c068cd2e3ca2e54b94003d Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sat, 3 May 2025 15:28:42 +0100 Subject: [PATCH 84/89] More relocation types --- src/elf/relocations.ml | 4 +++- src/isla/cache.ml | 9 +++++++-- src/isla/relocation.ml | 17 +++++++++++++---- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/elf/relocations.ml b/src/elf/relocations.ml index 66cdc831..9d2d2107 100644 --- a/src/elf/relocations.ml +++ b/src/elf/relocations.ml @@ -81,7 +81,9 @@ let pp_target = Pp.(function | AArch64 Abi_aarch64_symbolic_relocation.ADD -> !^"ADD" | AArch64 Abi_aarch64_symbolic_relocation.ADRP -> !^"ADRP" | AArch64 Abi_aarch64_symbolic_relocation.CALL -> !^"CALL" -| AArch64 Abi_aarch64_symbolic_relocation.LDST b -> !^"LDST" ^^ int (1 lsl b)) +| AArch64 Abi_aarch64_symbolic_relocation.LDST b -> !^"LDST" ^^ int (1 lsl b) +| AArch64 Abi_aarch64_symbolic_relocation.CONDBR -> !^"CONDBR" +| AArch64 Abi_aarch64_symbolic_relocation.B -> !^"B") let pp_rel rel = let hi, lo = rel.mask in diff --git a/src/isla/cache.ml b/src/isla/cache.ml index be6f64e8..1cf72492 100644 --- a/src/isla/cache.ml +++ b/src/isla/cache.ml @@ -86,7 +86,9 @@ module Opcode (*: Cache.Key *) = struct | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADRP) -> 3 | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADD) -> 4 | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CALL) -> 5 - | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.LDST b) -> 6 + b + | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.LDST b) -> assert (b < 5); 6 + b + | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CONDBR) -> 11 + | Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.B) -> 12 let reloc_of_id: int -> Relocation.t option = function | 0 -> None @@ -95,7 +97,10 @@ module Opcode (*: Cache.Key *) = struct | 3 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADRP) | 4 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADD) | 5 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CALL) - | x -> Some (Elf.Relocations.AArch64 (Abi_aarch64_symbolic_relocation.LDST (x-6))) + | x when x < 11 -> Some (Elf.Relocations.AArch64 (Abi_aarch64_symbolic_relocation.LDST (x-6))) + | 11 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CONDBR) + | 12 -> Some (Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.B) + | x -> fail "Invalid relocation id %d" x let equal a b = match (a, b) with diff --git a/src/isla/relocation.ml b/src/isla/relocation.ml index d8e17a5a..c73e231b 100644 --- a/src/isla/relocation.ml +++ b/src/isla/relocation.ml @@ -25,20 +25,29 @@ let pp_opcode_with_segments (b, r) = BitVec.pp_smt (BitVec.extract 22 31 bits) ^^ !^" x0:12 " ^^ BitVec.pp_smt (BitVec.extract 0 9 bits) - | Abi_aarch64_symbolic_relocation.LDST b -> (* TODO different width loads, alignment *) + | Abi_aarch64_symbolic_relocation.LDST b -> BitVec.pp_smt (BitVec.extract (22-b) 31 bits) ^^ !^" x0:" ^^ int (12-b) ^^ !^" " ^^ BitVec.pp_smt (BitVec.extract 0 9 bits) | Abi_aarch64_symbolic_relocation.CALL -> BitVec.pp_smt (BitVec.extract 26 31 bits) ^^ !^" x0:26 " + | Abi_aarch64_symbolic_relocation.CONDBR -> + BitVec.pp_smt (BitVec.extract 24 31 bits) + ^^ !^" x0:19 " + ^^ BitVec.pp_smt (BitVec.extract 0 4 bits) + | Abi_aarch64_symbolic_relocation.B -> + BitVec.pp_smt (BitVec.extract 26 31 bits) + ^^ !^" x0:26 " ) (* for interpreting the segments *) let segments_of_reloc: t -> segment list = function | Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data640 -> fatal "invalid relocation for instructions (Data64)" | Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.Data320 -> fatal "invalid relocation for instructions (Data32)" -| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADRP -> ["x0", (0, 1); "x1", (2, 20)] (* or absolute? ["x0", (12, 13); "x1", (14, 32)] *) +| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADRP -> ["x0", (0, 1); "x1", (2, 20)] | Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.ADD -> ["x0", (0, 11)] -| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.LDST b -> ["x0", (0, 11-b)] (* TODO depends on load size *) (* or absolute? ["x0", (2, 11)] *) -| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CALL -> ["x0", (0, 25)] (* or absolute? ["x0", (2, 27)] *) \ No newline at end of file +| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.LDST b -> ["x0", (0, 11-b)] +| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CALL -> ["x0", (0, 25)] +| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.CONDBR -> ["x0", (0, 18)] +| Elf.Relocations.AArch64 Abi_aarch64_symbolic_relocation.B -> ["x0", (0, 25)] From d72240d5048d9bebbdd969c52a7fb264b25eb129 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sat, 3 May 2025 16:51:38 +0100 Subject: [PATCH 85/89] new relocation representation in linksem --- src/analyse/Symbols.ml | 2 +- src/elf/linksemRelocatable.ml | 2 +- src/elf/relocations.ml | 34 +++++++++++++++------------------- 3 files changed, 17 insertions(+), 21 deletions(-) diff --git a/src/analyse/Symbols.ml b/src/analyse/Symbols.ml index 6e44279b..d555ae9f 100644 --- a/src/analyse/Symbols.ml +++ b/src/analyse/Symbols.ml @@ -7,7 +7,7 @@ end) module SMap = Map.Make (String) type rels = - | AArch64 of (Z.t, Abi_aarch64_symbolic_relocation.aarch64_relocation_target Elf_symbolic.abstract_relocation) Pmap.map + | AArch64 of (Z.t, Abi_aarch64_symbolic_relocation.aarch64_relocation_target Elf_symbolic.universal_relocation) Pmap.map type sym_data = Byte_sequence_wrapper.byte_sequence * rels diff --git a/src/elf/linksemRelocatable.ml b/src/elf/linksemRelocatable.ml index eb0900d8..5f92e2e0 100644 --- a/src/elf/linksemRelocatable.ml +++ b/src/elf/linksemRelocatable.ml @@ -5,7 +5,7 @@ module SMap = Map.Make (String) type sym_addr = string * Z.t type rels = - | AArch64 of (Z.t, Abi_aarch64_symbolic_relocation.aarch64_relocation_target Elf_symbolic.abstract_relocation) Pmap.map + | AArch64 of (Z.t, Abi_aarch64_symbolic_relocation.aarch64_relocation_target Elf_symbolic.universal_relocation) Pmap.map type sym_data = Byte_sequence_wrapper.byte_sequence * rels diff --git a/src/elf/relocations.ml b/src/elf/relocations.ml index 9d2d2107..8021bde4 100644 --- a/src/elf/relocations.ml +++ b/src/elf/relocations.ml @@ -12,8 +12,6 @@ type exp = | Const of int | BinOp of (exp * binary_operation * exp) | UnOp of (unary_operation * exp) -(* | AssertRange of (exp * int * int) *) -(* | Mask of (exp * int * int) *) type assertion = | Range of int64 * int64 @@ -30,32 +28,30 @@ type t = rel IMap.t type linksem_t = LinksemRelocatable.rels -let exp_of_linksem = +let rel_of_aarch64_linksem Elf_symbolic.{rel_desc_value; rel_desc_checks; rel_desc_mask; rel_desc_target } = let rec value_of_linksem = function | Elf_symbolic.Section s -> Section s | Elf_symbolic.Const x -> Const (Z.to_int x) | Elf_symbolic.BinOp (x, op, y) -> BinOp (value_of_linksem x, op, value_of_linksem y) | Elf_symbolic.UnOp (op, x) -> UnOp (op, value_of_linksem x) - | Elf_symbolic.AssertRange (_, _, _) -> Raise.fail "AssertRange should not occur in value expression" - | Elf_symbolic.AssertAlignment (_, _) -> Raise.fail "AssertAlignment should not occur in value expression" - | Elf_symbolic.Mask (_, _, _) -> Raise.fail "AssertRange should not occur in value expression" - in function - | Elf_symbolic.Mask (e, hi, lo) -> - let rec extract_asserts e = - match e with - | Elf_symbolic.AssertRange (e, min, max) -> let (e, a) = extract_asserts e in e, Range (Z.to_int64 min, Z.to_int64 max) :: a - | Elf_symbolic.AssertAlignment (e, bits) -> let (e, a) = extract_asserts e in e, Alignment (Z.to_int bits) :: a - | e -> e, [] - in - let e, assertions = extract_asserts e in - fun target -> {target; assertions; mask = (Z.to_int hi, Z.to_int lo); value = value_of_linksem e} - | _ -> Raise.fail "Expression does not have Mask in top level" + in + let assertions = List.map (function + | Elf_symbolic.Overflow (min, max) -> Range (Z.to_int64 min, Z.to_int64 max) + | Elf_symbolic.Alignment (bits) -> Alignment (Z.to_int bits) + ) rel_desc_checks in + let hi, lo = rel_desc_mask in + { + target=AArch64 rel_desc_target; + assertions; + mask = (Z.to_int hi, Z.to_int lo); + value = value_of_linksem rel_desc_value + } let of_linksem: linksem_t -> t = function | LinksemRelocatable.AArch64 relocs -> - let add k Elf_symbolic.{ arel_value; arel_target } m = - IMap.add (Z.to_int k) (exp_of_linksem arel_value (AArch64 arel_target)) m + let add k rel m = + IMap.add (Z.to_int k) (rel_of_aarch64_linksem rel) m in Pmap.fold add relocs IMap.empty From f2f1d24bc3b1cce7923d13d44068f84c7482d76f Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sat, 3 May 2025 16:52:16 +0100 Subject: [PATCH 86/89] rename relocation assertions to checks --- src/elf/relocations.ml | 6 +++--- src/state/base.ml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/elf/relocations.ml b/src/elf/relocations.ml index 8021bde4..04117864 100644 --- a/src/elf/relocations.ml +++ b/src/elf/relocations.ml @@ -20,7 +20,7 @@ type assertion = type rel = { target : target; value : exp; - assertions: assertion list; + checks: assertion list; mask : int * int; } @@ -35,14 +35,14 @@ let rel_of_aarch64_linksem Elf_symbolic.{rel_desc_value; rel_desc_checks; rel_de | Elf_symbolic.BinOp (x, op, y) -> BinOp (value_of_linksem x, op, value_of_linksem y) | Elf_symbolic.UnOp (op, x) -> UnOp (op, value_of_linksem x) in - let assertions = List.map (function + let checks = List.map (function | Elf_symbolic.Overflow (min, max) -> Range (Z.to_int64 min, Z.to_int64 max) | Elf_symbolic.Alignment (bits) -> Alignment (Z.to_int bits) ) rel_desc_checks in let hi, lo = rel_desc_mask in { target=AArch64 rel_desc_target; - assertions; + checks; mask = (Z.to_int hi, Z.to_int lo); value = value_of_linksem rel_desc_value } diff --git a/src/state/base.ml b/src/state/base.ml index 2f4c78ba..2657fa7d 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -264,7 +264,7 @@ module Relocation = struct | Alignment b -> let last = b-1 in Typed.(extract ~first:0 ~last value = bits_int ~size:b 0) - ) relocation.assertions in + ) relocation.checks in let (last, first) = relocation.mask in let value = Typed.extract ~first ~last value in { value; asserts; target = relocation.target } From cd6e5a8bd46f95111d14faf55cf9e5914e3dda78 Mon Sep 17 00:00:00 2001 From: Matej Urban Date: Sat, 3 May 2025 16:54:02 +0100 Subject: [PATCH 87/89] rm notes-TODO --- notes-TODO | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 notes-TODO diff --git a/notes-TODO b/notes-TODO deleted file mode 100644 index 95c382c0..00000000 --- a/notes-TODO +++ /dev/null @@ -1,22 +0,0 @@ -Symbolic symbol table -- value of symbol?? (we don't have segments in relocatable files) -- can probably keep the same api, but addresses are symbolic - -Instruction fetch: is it sound? (rewriting .text) - -Z3 finding unique solution -- Get model -> assert not model -> check now it is unsat -- Need to extend the protocol probably - -SIMREL -- state = (pc,M) (and registers TODO) -- (pc,M1) ~ (pc.M2) iff there is MT, such that - MT(A1,A2,sz) = T => M1[A1:A1+sz] =T M2[A2:A2+sz] - and respects dwarf at pc - and maybe some consistency of MT?? e.g. overlaping ranges -- relation (=T) defined using MT - - (=base_type) is equality - - (=struct) fieldwise - - (=*T): - A1 =*T A2 <=> MT(A1,A2,sz(T)) = T -- Hoare logic (or similar) with MT as variable (only read/write commands) From 1fa15c3d83bba0e8b3b19a2c86e15a3cfd7ff485 Mon Sep 17 00:00:00 2001 From: maturvo <59334936+maturvo@users.noreply.github.com> Date: Tue, 13 May 2025 01:48:20 +0100 Subject: [PATCH 88/89] Simrel (#2) * wip * wip * Debug prints * wip * Refactor simrel * Global memory processing * fixes * better printing * cleanup --- src/bin/dune | 4 +- src/bin/readDwarf.ml | 1 + src/relsim/base.ml | 115 --------- src/relsim/dune | 6 + src/relsim/relsim.ml | 447 ++++++++++++++++++++++++++++++++++ src/state/base.ml | 4 + src/state/base.mli | 3 + src/state/reg.mli | 3 + src/state/symbolicFragment.ml | 2 + src/trace/context.ml | 5 +- src/utils/fullVec.ml | 5 + src/utils/fullVec.mli | 3 + 12 files changed, 480 insertions(+), 118 deletions(-) delete mode 100644 src/relsim/base.ml create mode 100644 src/relsim/dune create mode 100644 src/relsim/relsim.ml diff --git a/src/bin/dune b/src/bin/dune index fea23d27..02b51b85 100644 --- a/src/bin/dune +++ b/src/bin/dune @@ -4,7 +4,7 @@ (modules main) (flags (:standard -open Utils)) - (libraries config run utils sig_aarch64 other_cmds)) + (libraries config run utils sig_aarch64 other_cmds relsim)) (executable (name main_riscv64) @@ -20,4 +20,4 @@ (flags (:standard -open Utils)) (modules copySourcesCmd copySources dumpDwarf dumpSym readDwarf) - (libraries run utils config state trace)) + (libraries run utils config state trace relsim)) diff --git a/src/bin/readDwarf.ml b/src/bin/readDwarf.ml index 577cac14..9fe1ac33 100644 --- a/src/bin/readDwarf.ml +++ b/src/bin/readDwarf.ml @@ -76,6 +76,7 @@ let commands = Run.TestRelProg.command; CopySourcesCmd.command; Z3.Test.command; + Relsim.command; ] let _ = Printexc.record_backtrace Config.enable_backtrace diff --git a/src/relsim/base.ml b/src/relsim/base.ml deleted file mode 100644 index 43df0a71..00000000 --- a/src/relsim/base.ml +++ /dev/null @@ -1,115 +0,0 @@ -(* open Logs.Logger (struct - let str = __MODULE__ -end) - -module Sums = Exp.Sums -module Typed = Exp.Typed - -module Var = struct - (** The type of variables *) - type t = Left of State.var | Right of State.var - - let equal a b = match (a,b) with - | Left a, Left b -> State.Var.equal a b - | Right a, Right b -> State.Var.equal a b - | _ -> false - - let pp = function - | Left v -> Pp.(!^"L:" ^^ State.Var.pp v) - | Right v -> Pp.(!^"R:" ^^ State.Var.pp v) - - (** Get the type of the variable *) - let ty = function Left v | Right v -> State.Var.ty v - - let hash = Hashtbl.hash - - let of_string = State.Var.of_string (*TODO*) -end - -module Exp = struct - include Exp.Make (Var) - - let left : State.Exp.t -> t = Ast.Manip.exp_map_var (fun v -> Var.Left v) - - let right : State.Exp.t -> t = Ast.Manip.exp_map_var (fun v -> Var.Right v) -end - -type sem_type = - | Value of int - | Ptr of sem_type - -type mem_rel = (State.Exp.t * State.Exp.t * sem_type) list - -let rec sem_type_of_ctype Ctype.{unqualified; _} = - match unqualified with - | Machine b -> Value b - | Cint { size; _ } -> Value size - | Cbool -> Value 1 - | Ptr { fragment=Ctype.DynArray t; _ } -> Ptr (sem_type_of_ctype t) - | _ -> Raise.todo() - -let mem_rel_of_dwarf (dw: (Dw.Var.t * Dw.Var.t) list) : mem_rel = - List.map (fun ((v1: Dw.Var.t), (v2: Dw.Var.t)) -> - let addr1 = match v1.locs with - | [_, Global a] -> - a |> Elf.SymTable.to_addr_offset - |> State.Exp.of_address ~size:Arch.address_size - (* |> Ast.Manip.exp_map_var (fun x -> Var.Left x) *) - | _ -> Raise.todo() - in - let addr2 = match v2.locs with - | [_, Global a] -> - a |> Elf.SymTable.to_addr_offset - |> State.Exp.of_address ~size:Arch.address_size - (* |> Ast.Manip.exp_map_var (fun x -> Var.Right x) *) - | _ -> Raise.todo() - in - let stp = sem_type_of_ctype v1.ctype in - (addr1, addr2, stp) - ) dw - -type rel = mem_rel * Exp.t list - -type event = State.Mem.Fragment.Event.t -type block = State.Mem.Fragment.Block.t - -let type_at (mem_rel:mem_rel) (block1:block) (block2:block) = - List.find_map (fun (e1,e2,t) -> - let (sym1, off1) = Sums.split_concrete e1 in - let (sym2, off2) = Sums.split_concrete e2 in - if (BitVec.to_int off1 == block1.offset && BitVec.to_int off2 == block2.offset - && Option.equal State.Exp.equal sym1 block1.base - && Option.equal State.Exp.equal sym2 block2.base) - then - Some t - else - None - ) mem_rel - -module Z3sim = Z3.Make (Var) - -let update_rel ((mem, asserts):rel) (e1: event) (e2: event) : rel = - (* TODO check sizes *) - match e1, e2 with - | (Read (block1, v1), Read (block2, v2)) -> ( - match type_at mem block1 block2 with - | Some (Value _) -> mem, Typed.(Exp.of_var (Left v1) = Exp.of_var (Right v2))::asserts - | Some (Ptr t) -> (State.Exp.of_var v1, State.Exp.of_var v2, t)::mem, asserts - | None -> mem, asserts - ) - | (Write (block1, e1), Write (block2, e2)) -> ( - (match type_at mem block1 block2 with - | Some (Value _) -> Z3 - | Some (Ptr t) -> Raise.todo() (* Check (e1, e2, t) in mem *) - | None -> Raise.fail "simrel failed"); - mem, asserts - ) - | _ -> Raise.fail "simrel failed" - - -let verify (st1:State.t) (st2:State.t) (dw:(Dw.Var.t * Dw.Var.t) list) = - let mem_rel = mem_rel_of_dwarf dw in - Raise.todo() - - - *) diff --git a/src/relsim/dune b/src/relsim/dune new file mode 100644 index 00000000..2f6abe26 --- /dev/null +++ b/src/relsim/dune @@ -0,0 +1,6 @@ +(library + (name relsim) + (public_name read-dwarf.relsim) + (flags + (:standard -open Utils)) + (libraries utils ast state run z3)) \ No newline at end of file diff --git a/src/relsim/relsim.ml b/src/relsim/relsim.ml new file mode 100644 index 00000000..3146a1b4 --- /dev/null +++ b/src/relsim/relsim.ml @@ -0,0 +1,447 @@ +open Logs.Logger (struct + let str = __MODULE__ +end) + +open Cmdliner +open Config.CommonOpt + +module Sums = Exp.Sums +module Typed = Exp.Typed + +module Var = struct + type t = Left of State.var | Right of State.var + + let equal a b = match (a,b) with + | Left a, Left b -> State.Var.equal a b + | Right a, Right b -> State.Var.equal a b + | _ -> false + + let pp = function + | Left v -> Pp.(!^"L:" ^^ State.Var.pp v) + | Right v -> Pp.(!^"R:" ^^ State.Var.pp v) + + let ty = function Left v | Right v -> State.Var.ty v + + let hash = Hashtbl.hash + + let of_string s = + let v = State.Var.of_string @@ String.sub s 2 (String.length s - 2) in + match String.sub s 0 2 with + | "L:" -> Left v + | "R:" -> Right v + | _ -> Raise.inv_arg "Invalid variable: %s" s +end + +module Exp = struct + include Exp.Make (Var) + + let left : State.Exp.t -> t = Ast.Manip.exp_map_var (fun v -> Var.Left v) + + let right : State.Exp.t -> t = Ast.Manip.exp_map_var (fun v -> Var.Right v) +end + +module Z3sim = Z3.Make (Var) + +type sem_type = +| Value of int +| Ptr of sem_type + +type value_relation = +| Eq +| EqSection of string +| EqPage of string +| Indirect of sem_type + +let rec pp_sem_type = Pp.(function +| Value w -> !^"Val"^^(int w) +| Ptr typ -> (pp_sem_type typ)^^(!^"*") +) + +let pp_rel = Pp.(function +| Eq -> !^"Eq" +| EqSection s -> !^"EqSection " ^^ !^s +| EqPage s -> !^"EqPage " ^^ !^s +| Indirect typ -> !^"Indirect " ^^ (pp_sem_type typ) +) + +let rec sem_type_of_type (typ: Ctype.t) : sem_type = + match typ.unqualified with + | Ctype.Machine _ | Ctype.Cint _ | Ctype.Cbool | Ctype.Enum _ -> Value (Ctype.sizeof typ) + | Ptr { fragment=Ctype.DynArray typ'; _ } -> Ptr (sem_type_of_type typ') + | _ -> Raise.todo() + +let value_rel_for_type: Ctype.unqualified -> value_relation = function +| Ctype.Machine _ | Ctype.Cint _ | Ctype.Cbool | Ctype.Enum _ -> Eq +| Ptr { fragment=Ctype.Global s; _ } -> EqSection s +| Ptr { fragment=Ctype.DynFragment i; _ } -> EqSection ("Dyn_"^string_of_int i) +| Ptr { fragment=Ctype.DynArray typ'; _ } -> Indirect (sem_type_of_type typ') +| _ -> Raise.todo() + +exception SimulationFailure of string + +let fail_sim fmt = + let fail msg = raise(SimulationFailure msg) in + Printf.ksprintf fail fmt + +let pp_diff pre pp l r = + let open Pp in + surround 2 2 + pre + (!^"L: "^^pp l ^^ space ^^ !^"R: "^^pp r) + empty + +module ExpRel = struct + type t = State.exp * value_relation * State.exp + + let to_exp ((exp1, rel, exp2):t) = + let open Option in + let modify e = + match rel with + | Eq -> e |> some + | EqSection s -> Typed.(e - State.Exp.of_var (State.Var.Section s)) |> some + (* TODO this is probably wrong: *) + | EqPage s -> Typed.(e - concat [extract ~first:12 ~last:63 (State.Exp.of_var (State.Var.Section s)); bits_int ~size:12 0]) |> some + | Indirect _ -> None + in + let+ e1, e2 = lift_pair (modify exp1, modify exp2) in + Typed.((Exp.left e1) = (Exp.right e2)) + + let pp ((a, r, b):t) = + let open Pp in + pp_diff + (pp_rel r ^^ !^" between") + Exp.pp (Exp.left a) (Exp.right b) +end + +module RegRel = struct + type t = value_relation State.Reg.Map.t + + let special_regs = ["OSDLR_EL1"; "OSLSR_EL1"; "EDSCR"; "SCR_EL3"] + + let infer_from_types (s:State.t) = + State.Reg.Map.mapi (fun reg (r:State.Tval.t) -> + if List.exists ((=) (State.Reg.to_string reg)) special_regs then + Some Eq + else + Option.map (fun (r:Ctype.t) -> + value_rel_for_type r.unqualified + ) r.ctyp + ) s.regs + + let to_exp_rel (s1:State.t) (s2:State.t) reg_rel : ExpRel.t list = + let bindings = State.Reg.Map.bindings reg_rel in + List.filter_map (fun (reg, rel) -> + Option.map (fun rel -> + State.get_reg_exp s1 reg, rel, State.get_reg_exp s2 reg + ) rel + ) bindings +end + +module StackRel = struct + module RelMap = RngMap.Make (struct + type t = value_relation * int + let len (_, sz: t) = sz + end) + type t = RelMap.t + + type loc = { offset:int; size:int } + + module Event = State.Mem.Fragment.Event + + let loc_of_blocks (blk1:State.Mem.Fragment.Block.t) (blk2:State.Mem.Fragment.Block.t) = + if Option.is_some blk1.base || Option.is_some blk2.base then + Raise.todo(); + if blk1.offset != blk2.offset || blk1.size != blk2.size then + fail_sim "blocks don't match (%d, %t bytes) (%d, %t bytes)" + blk1.offset (Pp.tos Ast.Size.pp_bytes blk1.size) + blk2.offset (Pp.tos Ast.Size.pp_bytes blk2.size); + { offset=blk1.offset; size=Ast.Size.to_bytes blk1.size } + + let rel_at_loc stack loc = + let open Option in + let* ((rel, relsz), reloff) = RelMap.at_off_opt stack loc.offset in + if loc.size != relsz || reloff != 0 then + (warn "Size not matching"; None) + else + Some rel + + let clear_loc stack loc = + RelMap.clear stack ~pos:loc.offset ~len:loc.size + + let infer_from_types ~stack_frag (st1:State.t) = + let frag = Vec.get st1.fenv.frags stack_frag in + let stack = ref RelMap.empty in + State.Fragment.iteri (fun off ctype -> + stack := RelMap.add !stack off (value_rel_for_type ctype.unqualified, Ctype.len ctype) + ) frag; + !stack +end + +module GlobalRel = struct + type eq_pair = State.exp * State.exp * sem_type + + type t = eq_pair list + + let find ~hyps (rel:t) a1 a2 = + let check_one a1 a2 (a1', a2', typ) = + debug "%t %t %t %t" (Pp.top State.Exp.pp a1) (Pp.top State.Exp.pp a2) (Pp.top State.Exp.pp a1') (Pp.top State.Exp.pp a2'); + let equal = Z3sim.check_full ~hyps Typed.(manyop Ast.And [Exp.left a1= Exp.left a1'; Exp.right a2= Exp.right a2']) in + match equal with + | Some true -> Some typ + | _ -> None + in + List.find_map (check_one a1 a2) rel + + let add (rel:t) ((a1, a2, typ): eq_pair) = + (a1, a2, typ)::rel + + let check ~hyps (rel:t) ((a1, a2, typ): eq_pair) = + Option.map ((=) typ) (find ~hyps rel a1 a2) + + let rel_of_sem_type = function + | Ptr t -> Indirect t + | Value _ -> Eq +end + +let block_addr (blk : State.Mem.Fragment.Block.t) = + let ext = 64-Arch.address_size in + match blk.base with + | None -> Typed.bits_int ~size:64 blk.offset + | Some b -> Typed.(unop (Ast.ZeroExtend ext) b + bits_int ~size:64 blk.offset) + +let ptr_safety_asserts typ v = + let sz = match typ with + | Value x -> x + | Ptr _ -> 8 (*assume 64 bit pointers*) + in + let topbits = (64 - Arch.address_size) in + let small_enough = Typed.(extract ~first:Arch.address_size ~last:63 v = zero ~size:topbits) in + + let last = sz - 1 in + let aligned = Typed.(extract ~first:0 ~last v = zero ~size:sz) in + + [small_enough; aligned] + +module Context = struct + type t = { + asserts: Exp.t list; + stack: StackRel.t; + global: GlobalRel.t; + } + + module Event = State.Mem.Fragment.Event + + let add_expr_rel (ctxt:t) rel = + match rel with + | (v1, Indirect t, v2) -> + let safety1 = ptr_safety_asserts t v1 |> List.map Exp.left in + let safety2 = ptr_safety_asserts t v2 |> List.map Exp.right in + let nullptrs = Typed.((Exp.left v1 = zero ~size:64) = (Exp.right v2 = zero ~size:64)) in + { + asserts = safety1 @ safety2 @ nullptrs::ctxt.asserts; + global = GlobalRel.add ctxt.global (v1, v2, t); + stack = ctxt.stack; + } + | rel -> + let exp = Option.value_fail (ExpRel.to_exp rel) "Failed to convert relation to expression" in + { ctxt with asserts = exp::ctxt.asserts } + + let check_expr_rel (ctxt:t) rel = + match rel with + | (v1, Indirect t, v2) -> + GlobalRel.check ~hyps:ctxt.asserts ctxt.global (v1, v2, t) + | rel -> + let exp = Option.value_fail (ExpRel.to_exp rel) "Failed to convert relation to expression" in + Z3sim.check_full ~hyps:ctxt.asserts exp + + let process_stack_operation event1 event2 (ctxt: t) = + match event1, event2 with + | Event.Read (blk1, v1), Event.Read (blk2, v2) -> + let loc = StackRel.loc_of_blocks blk1 blk2 in + let rel = StackRel.rel_at_loc ctxt.stack loc in + ( match rel with + | Some rel -> add_expr_rel ctxt (State.Exp.of_var v1, rel, State.Exp.of_var v2) + | None -> (debug "No relation for stack read %t %t" (Pp.top Event.pp event1) (Pp.top Event.pp event2); ctxt) + ) + | Event.Write (blk1, _exp1), Event.Write (blk2, _exp2) -> + let loc = StackRel.loc_of_blocks blk1 blk2 in + { ctxt with stack = StackRel.clear_loc ctxt.stack loc} + | _ -> fail_sim "traces don't match %t %t" (Pp.tos Event.pp event1) (Pp.tos Event.pp event2) + + let process_global_operation event1 event2 (ctxt: t) = + match event1, event2 with + | Event.Read (blk1, v1), Event.Read (blk2, v2) -> + let addr1 = block_addr blk1 in + let addr2 = block_addr blk2 in + let typ = GlobalRel.find ~hyps:ctxt.asserts ctxt.global addr1 addr2 in + ( match typ with + | Some typ -> + let rel = GlobalRel.rel_of_sem_type typ in + add_expr_rel ctxt (State.Exp.of_var v1, rel, State.Exp.of_var v2) + | None -> (warn "No relation for global read %t %t" (Pp.top Event.pp event1) (Pp.top Event.pp event2); ctxt) + ) + | Event.Write (blk1, exp1), Event.Write (blk2, exp2) -> + let addr1 = block_addr blk1 in + let addr2 = block_addr blk2 in + let typ = GlobalRel.find ~hyps:ctxt.asserts ctxt.global addr1 addr2 in + ( match typ with + | Some typ -> + let rel = GlobalRel.rel_of_sem_type typ in + if check_expr_rel ctxt (exp1, rel, exp2) <> Some true then + fail_sim "Unable to verify %t" (Pp.tos ExpRel.pp (exp1, rel, exp2)) + | None -> fail_sim "Unable to determine target type for global write %t %t" (Pp.tos Event.pp event1) (Pp.tos Event.pp event2) + ); + ctxt + | _ -> fail_sim "traces don't match %t %t" (Pp.tos Event.pp event1) (Pp.tos Event.pp event2) + + let infer_from_types ~stack_frag ~(dwarf:Dw.t) (state: State.t) = + let stack = StackRel.infer_from_types state ~stack_frag in + + let regs = RegRel.infer_from_types state in + let register_rels = RegRel.to_exp_rel state state regs in + + let global_variable_rels = + Hashtbl.to_seq_values dwarf.vars + |> Seq.map (fun (v:Dw.Var.t) -> + let typ = sem_type_of_type v.ctype in + match v.locs with + | [_, Global addr] -> + let addr = Elf.SymTable.to_addr_offset addr in + let exp = State.Exp.of_address ~size:64 addr in + (exp, Indirect typ, exp) + | _ -> + Raise.fail "Weird location description for global variable: %t" (Pp.tos Dw.Var.pp_raw v); + ) + |> List.of_seq + in + + let ctxt = { stack; asserts=[]; global=[] } in + List.fold_left add_expr_rel ctxt (register_rels @ global_variable_rels) +end + +type simrel = (State.Id.t*State.Id.t, Context.t) Hashtbl.t + +let stack_prov = 0(* TODO determine stack_frag automatically *) +let stack_frag = 0(* TODO determine stack_frag automatically *) + +exception SimulationFailureWithContext of { + msg:string; + states: State.t * State.t; + ctxt: Context.t; +} + +let rec checksim ~(rel:simrel) (s1:State.t) (s2:State.t) = + Hashtbl.find_opt rel (s1.id, s2.id) + |> Option.value_fun ~default:(fun() -> + let bs1 = Option.value_fail s1.base_state "no base state" in + let bs2 = Option.value_fail s2.base_state "no base state" in + let prev_ctxt = checksim ~rel bs1 bs2 in + try + + (* Process stack trace *) + let ((_,mem1), (_,mem2)) = State.Mem.(get_frag s1.mem stack_prov, get_frag s2.mem stack_prov) in + let (trc1, trc2) = State.Mem.Fragment.(get_trace mem1, get_trace mem2) in + + let ctxt = List.fold_right2 Context.process_stack_operation trc1 trc2 prev_ctxt in + + (* Process global trace *) + let (mem1, mem2) = State.Mem.(get_main s1.mem, get_main s2.mem) in + let (trc1, trc2) = State.Mem.Fragment.(get_trace mem1, get_trace mem2) in + + let ctxt = List.fold_right2 Context.process_global_operation trc1 trc2 ctxt in + + + let asst1 = List.map Exp.left s1.asserts in + let asst2 = List.map Exp.right s2.asserts in + let impl1 = List.find_all Fun.(Z3sim.check_full ~hyps:(asst1@ctxt.asserts) %> flip Option.value_fail "TODO: Z3 failed" %> not) asst2 in + let impl2 = List.find_all Fun.(Z3sim.check_full ~hyps:(asst2@ctxt.asserts) %> flip Option.value_fail "TODO: Z3 failed" %> not) asst1 in + if not (List.is_empty impl1 && List.is_empty impl2) then + fail_sim "%t" Pp.(Fun.const @@ sprint @@ pp_diff + !^"States not equivalent on path conditions" + (list Exp.pp) + impl2 + impl1 + ); + + Hashtbl.add rel (s1.id, s2.id) ctxt; + ctxt + with + | SimulationFailure s -> raise @@ SimulationFailureWithContext { + msg=s; + states=(s1,s2); + ctxt=prev_ctxt; + } + ) + +let check_return_values ~(ret_reg) ~(ret_type:Ctype.t) ~(rel:simrel) (s1:State.t) (s2:State.t) = + let ctxt = Hashtbl.find rel (s1.id, s2.id) in + + let ret_val1 = State.get_reg_exp s1 ret_reg in + let ret_val2 = State.get_reg_exp s2 ret_reg in + let rel = value_rel_for_type ret_type.unqualified in + + if Context.check_expr_rel ctxt (ret_val1, rel, ret_val2) <> Some true then + raise @@ SimulationFailureWithContext { + msg=Printf.sprintf "Return values not equivalent +Condition: %t\n" (Pp.tos ExpRel.pp (ret_val1, rel, ret_val2)); + states=(s1,s2); + ctxt=ctxt; + } + + +let run elf name = + let dwarf = Dw.of_file elf in + let tree = Run.Func.get_state_tree ~elf ~name () in + let initial_state = tree.state in + + let initial_ctxt = Context.infer_from_types ~stack_frag ~dwarf initial_state in + + let simrel:simrel = Hashtbl.create 10 in + Hashtbl.add simrel (initial_state.id, initial_state.id) initial_ctxt; + + debug "%t" (Pp.top (State.Tree.pp_all Run.Block_lib.pp_label) tree); + + let ret = Option.( + let* func =Dw.get_func_opt ~name dwarf in + let+ typ = func.func.ret in + if Ctype.sizeof typ > 8 then + Raise.fail "unsupported return type %t" (Pp.tos Ctype.pp typ) + else + ((Arch.dwarf_reg_map()).(0), typ) + ) in + + try + State.Tree.prefix_iter (fun _ s -> + checksim ~rel:simrel s s |> ignore; + if State.get_reg_exp s (Arch.pc()) = State.Exp.of_var State.Var.RetAddr then + Option.iter (fun (ret_reg, ret_type) -> + check_return_values ~ret_reg ~ret_type ~rel:simrel s s; + ) ret + ) tree; + base "Simulation successful" + with + | SimulationFailureWithContext e -> + let st, _ = e.states in + debug "Failing state: %t" (Pp.top State.pp st); + base "Simulation failed:\n\n%s" e.msg + +let elf = + let doc = "ELF file from which to pull the code" in + Arg.(required & pos 0 (some non_dir_file) None & info [] ~docv:"ELF_FILE" ~doc) + +let func = + let doc = "Symbol name of the function to run" in + Arg.(value & pos 1 string "main" & info [] ~docv:"FUNCTION" ~doc) + +let term = + Term.( + CmdlinerHelper.func_options comopts run + $ elf $ func) + +let info = + let doc = + "Simulation relation on relocatable binary" + in + Cmd.(info "relsim" ~doc ~exits) + +let command = (term, info) diff --git a/src/state/base.ml b/src/state/base.ml index 2657fa7d..17659776 100644 --- a/src/state/base.ml +++ b/src/state/base.ml @@ -328,6 +328,10 @@ module Mem = struct (** Get the main fragment of memory *) let get_main { main; _ } = main + (** Get fragment *) + let get_frag mem i = + Vec.get mem.frags i + (** Empty memory, every address is unbound *) let empty () = { main = Fragment.empty; frags = Vec.empty (); sections = Hashtbl.create 10; allow_main = true } diff --git a/src/state/base.mli b/src/state/base.mli index 2d373fd4..73d6c370 100644 --- a/src/state/base.mli +++ b/src/state/base.mli @@ -259,6 +259,9 @@ module Mem : sig (** Get the main fragment of memory *) val get_main : t -> Fragment.t + + (** Get fragment *) + val get_frag : t -> int -> Exp.t * Fragment.t end (** {1 State type } *) diff --git a/src/state/reg.mli b/src/state/reg.mli index fd67c573..b9692051 100644 --- a/src/state/reg.mli +++ b/src/state/reg.mli @@ -216,6 +216,9 @@ module Map : sig (** Map the function all the registers (including future, not yet added ones) *) val map : ('a -> 'b) -> 'a t -> 'b t + (** Same as {!map} but with the index *) + val mapi : (reg -> 'a -> 'b) -> 'a t -> 'b t + (** Map the function on all the register by mutation (including future ones) *) val map_mut : ('a -> 'a) -> 'a t -> unit diff --git a/src/state/symbolicFragment.ml b/src/state/symbolicFragment.ml index dbc941fa..40ef78f2 100644 --- a/src/state/symbolicFragment.ml +++ b/src/state/symbolicFragment.ml @@ -98,6 +98,8 @@ module type S = sig type t = | Read of Block.t * var (** From [Block.t], read [var] *) | Write of Block.t * exp (** To [Block.t], write [exp] *) + + val pp : t -> Pp.document end (** The type of a memory fragment *) diff --git a/src/trace/context.ml b/src/trace/context.ml index 2fe350d4..59950dc2 100644 --- a/src/trace/context.ml +++ b/src/trace/context.ml @@ -106,6 +106,9 @@ let typing_enabled ~(ctxt : t) = ctxt.dwarf <> None module Z3St = State.Simplify.Z3St let simplify ~(ctxt : t) (exp : State.exp) : State.exp = + debug "Before simplification: %t" (Pp.top State.Exp.pp exp); + debug "Before simplification: %t" (Pp.top State.Exp.pp (Z3St.simplify_full exp)); exp |> Z3St.simplify_subterms_full ~hyps:ctxt.asserts - |> Z3St.simplify_full \ No newline at end of file + |> Z3St.simplify_full + |> Fun.tee (fun e -> debug "After simplification: %t" (Pp.top State.Exp.pp e)) \ No newline at end of file diff --git a/src/utils/fullVec.ml b/src/utils/fullVec.ml index f6bea2c1..998e2232 100644 --- a/src/utils/fullVec.ml +++ b/src/utils/fullVec.ml @@ -107,6 +107,11 @@ let map f fv = let gen = fv.gen %> f in { vec; gen } +let mapi f fv = + let vec = Vec.mapi f fv.vec in + let gen = (fun i -> fv.gen i |> f i) in + { vec; gen } + let map_mut f fv = Vec.map_mut f fv.vec; fv.gen <- fv.gen %> f diff --git a/src/utils/fullVec.mli b/src/utils/fullVec.mli index 51d9f079..9a67676f 100644 --- a/src/utils/fullVec.mli +++ b/src/utils/fullVec.mli @@ -89,6 +89,9 @@ val get_vec_until : 'a t -> int -> 'a Vec.t (** Map the function over the fullvec. Postcompose the map on the generator *) val map : ('a -> 'b) -> 'a t -> 'b t +(** Same as {!map} but with the index *) +val mapi : (int -> 'a -> 'b) -> 'a t -> 'b t + (** Map the function over the fullvector by mutation. Postcompose the map on the generator.contents Warning, a lot of {!map_mut} may make the generator big and slow. Maybe try to use {!set_after} to reset it when required.*) From 0d7f6b19051fc66d06a7cb8834e1e5598047f194 Mon Sep 17 00:00:00 2001 From: maturvo <59334936+maturvo@users.noreply.github.com> Date: Tue, 13 May 2025 01:53:03 +0100 Subject: [PATCH 89/89] Fix analyse (#3) --- src/analyse/CallGraph.ml | 6 +- src/analyse/ControlFlow.ml | 108 ++++++++++++++++++++------- src/analyse/ControlFlowTypes.ml | 1 + src/analyse/DwarfFrameInfo.ml | 7 +- src/analyse/DwarfInliningInfo.ml | 6 +- src/analyse/DwarfLineInfo.ml | 6 +- src/analyse/DwarfVarInfo.ml | 8 +- src/analyse/Pp.ml | 14 +++- src/analyse/html-preamble-insts.html | 3 + src/utils/sym.ml | 36 ++++++--- 10 files changed, 141 insertions(+), 54 deletions(-) diff --git a/src/analyse/CallGraph.ml b/src/analyse/CallGraph.ml index 026db808..1a070b26 100644 --- a/src/analyse/CallGraph.ml +++ b/src/analyse/CallGraph.ml @@ -110,7 +110,7 @@ let mk_call_graph test (an : CollectedType.analysis) = if not (List.exists - (function (a'', _) -> Sym.equal a' a'') + (function (a'', _) -> Sym.Ordered.equal a' a'') elf_symbols) then Some (a', ["FROM BL:" ^ s']) else None) @@ -122,7 +122,7 @@ let mk_call_graph test (an : CollectedType.analysis) = match axs with | [] -> acc | (a, x) :: axs' -> - if not (List.exists (function (a', _) -> Sym.equal a a') acc) then + if not (List.exists (function (a', _) -> Sym.Ordered.equal a a') acc) then dedup axs' ((a, x) :: acc) else dedup axs' acc in @@ -133,7 +133,7 @@ let mk_call_graph test (an : CollectedType.analysis) = List.sort (function | (a, _) -> ( - function (a', _) -> Sym.compare a a' + function (a', _) -> Sym.Ordered.compare a a' )) (elf_symbols @ extra_bl_targets) in diff --git a/src/analyse/ControlFlow.ml b/src/analyse/ControlFlow.ml index bba61c63..8eefb233 100644 --- a/src/analyse/ControlFlow.ml +++ b/src/analyse/ControlFlow.ml @@ -295,9 +295,9 @@ with (Scanf.Scan_failure _ | End_of_file) -> Scanf.sscanf s "%Lx" (fun i64 -> Sym.of_int64 i64) -let parse_target s = +let parse_target base s = match Scanf.sscanf s " %s %s" (fun s1 s2 -> (s1, s2)) with - | (s1, s2) -> Some (parse_addr s1, s2) + | (s1, s2) -> Some (Sym.add base (parse_addr s1), s2) | exception _ -> None let parse_drop_one s = @@ -309,7 +309,20 @@ let parse_drop_one s = | (_, s') -> Some s' | exception _ -> None -let parse_control_flow_instruction s mnemonic s' : control_flow_insn = +let parse_relocation_target symbol_map s = + let s, offset = match String.split_on_char '+' s with + | [s1; s2] -> + s1, Scanf.sscanf s2 "0x%x" Fun.id + | [s] -> s, 0 + | _ -> fatal "Unable to parse relocation target '%s'" s + in + let addr = List.find_map (fun (name, (_,_,addr,_,_)) -> if name = s then Some addr else None) symbol_map in + Option.map (Sym.add (Sym.of_int offset)) addr + +let parse_control_flow_instruction symbol_map base s mnemonic s' relocation : control_flow_insn = + let relocation_target = Option.bind relocation (fun (_typ, target) -> + Option.map (fun a -> (a, target)) (parse_relocation_target symbol_map target) + ) in (* Printf.printf "s=\"%s\" mnemonic=\"%s\" mnemonic chars=\"%s\" s'=\"%s\" "s mnemonic (String.concat "," (List.map (function c -> string_of_int (Char.code c)) (char_list_of_string mnemonic))) s';flush stdout;*) let c = if List.mem String.equal mnemonic [".word"] then C_no_instruction @@ -320,9 +333,9 @@ let parse_control_flow_instruction s mnemonic s' : control_flow_insn = (String.length mnemonic >= 2 && String.sub mnemonic 0 2 = "b.") || List.mem String.equal mnemonic ["b"; "bl"] then - match parse_target s' with - | None -> raise (Failure ("b./b/bl parse error for: \"" ^ s ^ "\"\n")) - | Some (a, s) -> + match parse_target base s', relocation_target with + | None, None -> raise (Failure ("b./b/bl parse error for: \"" ^ s ^ "\"\n")) + | _, Some(a, s) | Some (a, s), None -> if mnemonic = "b" then C_branch (a, s) else if mnemonic = "bl" then C_branch_and_link (a, s) else C_branch_cond (mnemonic, a, s) @@ -330,9 +343,9 @@ let parse_control_flow_instruction s mnemonic s' : control_flow_insn = match parse_drop_one s' with | None -> raise (Failure ("cbz/cbnz 1 parse error for: " ^ s ^ "\n")) | Some s' -> ( - match parse_target s' with - | None -> raise (Failure ("cbz/cbnz 2 parse error for: " ^ s ^ "\n")) - | Some (a, s) -> C_branch_cond (mnemonic, a, s) + match parse_target base s', relocation_target with + | None, None -> raise (Failure ("cbz/cbnz 2 parse error for: " ^ s ^ "\n")) + | _, Some(a, s) | Some (a, s), None -> C_branch_cond (mnemonic, a, s) ) else if List.mem String.equal mnemonic ["tbz"; "tbnz"] then match parse_drop_one s' with @@ -341,9 +354,9 @@ let parse_control_flow_instruction s mnemonic s' : control_flow_insn = match parse_drop_one s'' with | None -> raise (Failure ("tbz/tbnz 2 parse error for: " ^ s ^ "\n")) | Some s''' -> ( - match parse_target s''' with - | None -> raise (Failure ("tbz/tbnz 3 parse error for: " ^ s ^ "\n")) - | Some (a, s'''') -> + match parse_target base s''', relocation_target with + | None, None -> raise (Failure ("tbz/tbnz 3 parse error for: " ^ s ^ "\n")) + | _, Some(a, s'''') | Some (a, s''''), None -> (* Printf.printf "s=%s mnemonic=%s s'=%s s''=%s s'''=%s s''''=%s\n"s mnemonic s' s'' s''' s'''';*) C_branch_cond (mnemonic, a, s'''') ) @@ -438,14 +451,21 @@ AArch64: 10004: 52800129 mov w9, #0x9 // #9 *) +let relocation_regexp_string = "[ \t][0-9a-fA-F]+:[ \t]\\([0-9A-Z_]+\\)\t\\(.*\\)" + let objdump_line_regexp = - Str.regexp " *\\([0-9a-fA-F]+\\):[ \t]\\([0-9a-fA-F ]+\\)\t\\([^ \r\t\n]+\\) *\\(.*\\)$" + Str.regexp (" *\\([0-9a-fA-F]+\\):[ \t]\\([0-9a-fA-F ]+\\)\t\\([^ \r\t\n]+\\)[ \t]*\\([^:]*\\)\\(" ^ relocation_regexp_string ^ "\\)?$") let section_start_line_regexp = Str.regexp "Disassembly of section \\(.*\\):$" +type relocation = string * string + +type raw_objdump_instruction = + int64 (*address*) * int list (*opcode bytes*) * string (*mnemonic*) * string * relocation option + type objdump_instruction = - natural (*address*) * int list (*opcode bytes*) * string (*mnemonic*) * string + natural (*address*) * int list (*opcode bytes*) * string (*mnemonic*) * string * relocation option (*args etc*) @@ -455,7 +475,7 @@ let parse_section_start s = else None -let parse_objdump_line (s : string) : (int64 * int list * string * string) option = +let parse_objdump_line (s : string) : raw_objdump_instruction option = let parse_hex_int64 s' = try Scanf.sscanf s' "%Lx" (fun i64 -> i64) with _ -> fatal "cannot parse address in objdump line %s\n" s @@ -474,6 +494,7 @@ let parse_objdump_line (s : string) : (int64 * int list * string * string) optio in if Str.string_match objdump_line_regexp s 0 then begin + (* debug "matched line"; *) let addr_int64 = parse_hex_int64 (Str.matched_group 1 s) in let op = Str.matched_group 2 s in let op = strip_whitespace op in @@ -486,17 +507,43 @@ let parse_objdump_line (s : string) : (int64 * int list * string * string) optio let opcode_bytes = List.map parse_hex_int opcode_byte_strings in let mnemonic = Str.matched_group 3 s in let operands = Str.matched_group 4 s in - Some (addr_int64, opcode_bytes, mnemonic, operands) + let relocation = try + Some (Str.matched_group 6 s, Str.matched_group 7 s) + with + | Not_found -> None + in + Some (addr_int64, opcode_bytes, mnemonic, operands, relocation) end else None +(* let parse_objdump_relocation (s : string) : (string * string) option = + let parse_hex_int s' = + try Scanf.sscanf s' "%x" (fun i -> i) + with _ -> fatal "cannot parse relocation '%s' in objdump line %s\n" s' s + in + if Str.string_match objdump_line_regexp s 0 then + begin + let addr = Str.matched_group 1 s in + let op = Str.matched_group 2 s in + let op = strip_whitespace op in + let opcode_byte_strings = + [String.sub op 0 2; + String.sub op 2 2; + String.sub op 4 2; + String.sub op 6 2] + in + let opcode_bytes = List.map parse_hex_int opcode_byte_strings in + Some (addr, op) + end + else None *) + (* let parse_objdump_lines arch lines : objdump_instruction list = List.filter_map (parse_objdump_line arch) (Array.to_list lines) *) -let with_symbolic_address (section: string) (addr, opcode_bytes, mnemonic, operands) : objdump_instruction = - (Sym_ocaml.Num.Offset (section, Nat_big_num.of_int64 addr), opcode_bytes, mnemonic, operands) +let with_symbolic_address (section: string) (addr, opcode_bytes, mnemonic, operands, relocation) : objdump_instruction = + (Sym_ocaml.Num.Offset (section, Nat_big_num.of_int64 addr), opcode_bytes, mnemonic, operands, relocation) let rec parse_objdump_lines arch lines (next_index : int) (last_address : int64 option) (section: string option) : objdump_instruction list = @@ -506,7 +553,7 @@ let rec parse_objdump_lines arch lines (next_index : int) (last_address : int64 match parse_objdump_line lines.(next_index) with (* skip over unparseable lines *) | None -> parse_objdump_lines arch lines (next_index + 1) last_address section - | Some ((addr, _opcode_bytes, _mnemonic, _operands) as i) -> ( + | Some ((addr, _opcode_bytes, _mnemonic, _operands, _relocation) as i) -> ( let mki = with_symbolic_address (Option.get section) in match last_address with | None -> mki i :: parse_objdump_lines arch lines (next_index + 1) (Some addr) section @@ -515,7 +562,7 @@ let rec parse_objdump_lines arch lines (next_index : int) (last_address : int64 if addr > last_address'' then (* fake up "missing" instructions for any gaps in the address space*) (*warn "gap in objdump instruction address sequence at %s" (pp_addr last_address'');*) - mki (last_address'', [], "missing", "") + mki (last_address'', [], "missing", "", None) :: parse_objdump_lines arch lines next_index (Some last_address'') section else mki i :: parse_objdump_lines arch lines (next_index + 1) (Some addr) section ) @@ -546,7 +593,7 @@ let mk_instructions test filename_objdump_d filename_branch_table_option : Array.iteri (function | k -> ( - function (addr, _, _, _) -> Hashtbl.add tbl addr k + function (addr, _, _, _, _) -> Hashtbl.add tbl addr k )) objdump_instructions; ( (function @@ -563,9 +610,14 @@ let mk_instructions test filename_objdump_d filename_branch_table_option : let instructions = Array.map (function - | (addr, opcode_bytes, mnemonic, operands) -> + | (addr, opcode_bytes, mnemonic, operands, relocation) -> + (* a bit hacky *) + let base = match addr with + | Sym_ocaml.Num.Offset(s,_) -> Sym_ocaml.Num.section s + | Sym_ocaml.Num.Absolute(_) -> Sym.of_int 0 + in let c : control_flow_insn = - parse_control_flow_instruction ("objdump line " ^ pp_addr addr) mnemonic operands + parse_control_flow_instruction test.symbol_map base ("objdump line " ^ pp_addr addr) mnemonic operands relocation in let targets = @@ -579,12 +631,18 @@ let mk_instructions test filename_objdump_d filename_branch_table_option : i_operands = operands; i_control_flow = c; i_targets = targets; + i_relocation = relocation; }) objdump_instructions in let address_of_index k = instructions.(k).i_addr in + Array.sort + (fun i1 i2 -> + Sym.Ordered.compare (i1.i_addr) (i2.i_addr)) + instructions; + (instructions, index_of_address, index_option_of_address, address_of_index) (* pull out indirect branches *) @@ -616,11 +674,11 @@ let highlight c = (* highlight branch targets to earlier addresses*) let pp_target_addr_wrt (addr : natural) (c : control_flow_insn) (a : natural) = - (if highlight c && Sym.less a addr then "^" else "") ^ pp_addr a + (if highlight c && Sym.Ordered.less a addr then "^" else "") ^ pp_addr a (* highlight branch come-froms from later addresses*) let pp_come_from_addr_wrt (addr : natural) (c : control_flow_insn) (a : natural) = - (if highlight c && Sym.greater a addr then "v" else "") ^ pp_addr a + (if highlight c && Sym.Ordered.greater a addr then "v" else "") ^ pp_addr a (* let pp_branch_targets (xs : (addr * control_flow_insn * (target_kind * addr * int * string) list) list) diff --git a/src/analyse/ControlFlowTypes.ml b/src/analyse/ControlFlowTypes.ml index 97815632..4ecb695f 100644 --- a/src/analyse/ControlFlowTypes.ml +++ b/src/analyse/ControlFlowTypes.ml @@ -83,6 +83,7 @@ type instruction = { i_operands : string; i_control_flow : control_flow_insn; i_targets : target list; + i_relocation : (string * string) option; } type come_from = { diff --git a/src/analyse/DwarfFrameInfo.ml b/src/analyse/DwarfFrameInfo.ml index 991b295d..63da2fc1 100644 --- a/src/analyse/DwarfFrameInfo.ml +++ b/src/analyse/DwarfFrameInfo.ml @@ -53,14 +53,15 @@ open ControlFlowTypes let aof ((a : natural), (_cfa : string), (_regs : (string * string) list)) = a +(* TODO does Sym.Ordered work as we want? *) let rec f (aof : 'b -> natural) (a : natural) (last : 'b option) (bs : 'b list) : 'b option = match (last, bs) with | (None, []) -> None - | (Some b', []) -> if Sym.greater_equal a (aof b') then Some b' else None + | (Some b', []) -> if Sym.Ordered.greater_equal a (aof b') then Some b' else None | (None, b'' :: bs') -> f aof a (Some b'') bs' | (Some b', b'' :: bs') -> - if Sym.less a (aof b') then None - else if Sym.greater_equal a (aof b') && Sym.less a (aof b'') then Some b' + if Sym.Ordered.less a (aof b') then None + else if Sym.Ordered.greater_equal a (aof b') && Sym.Ordered.less a (aof b'') then Some b' else f aof a (Some b'') bs' let mk_frame_info test instructions : diff --git a/src/analyse/DwarfInliningInfo.ml b/src/analyse/DwarfInliningInfo.ml index 832b4cac..4180e17e 100644 --- a/src/analyse/DwarfInliningInfo.ml +++ b/src/analyse/DwarfInliningInfo.ml @@ -68,7 +68,7 @@ let mk_inlining test sdt instructions = let addr = i.i_addr in let issr_still_current = List.filter - (function (_label, ((_n1, n2), (_m, _n), _is)) -> Sym.less addr n2) + (function (_label, ((_n1, n2), (_m, _n), _is)) -> Sym.Ordered.less addr n2) issr_current in @@ -83,8 +83,8 @@ let mk_inlining test sdt instructions = let (issr_starting_here0, issr_rest') = find_first - (function ((_n1, n2), (_m, _n), _is) -> Sym.less_equal n2 addr) - (function ((n1, _n2), (_m, _n), _is) -> Sym.equal n1 addr) + (function ((_n1, n2), (_m, _n), _is) -> Sym.Ordered.less_equal n2 addr) + (function ((n1, _n2), (_m, _n), _is) -> Sym.Ordered.equal n1 addr) [] issr_rest in diff --git a/src/analyse/DwarfLineInfo.ml b/src/analyse/DwarfLineInfo.ml index 7cb6bf73..2cbf4fef 100644 --- a/src/analyse/DwarfLineInfo.ml +++ b/src/analyse/DwarfLineInfo.ml @@ -269,11 +269,11 @@ let mk_line_info (eli : Dwarf.evaluated_line_info) instructions : let elifis = Array.make size [] in let sequences = List.flatten (List.map split_into_sequences eli) in - let compare_sequence s1 s2 = Sym.compare s1.elis_first s2.elis_first in + let compare_sequence s1 s2 = Sym.Ordered.compare s1.elis_first s2.elis_first in let sequences_sorted = List.sort compare_sequence sequences in let entries = List.flatten (List.map split_into_entries sequences_sorted) in - let compare_entry e1 e2 = Sym.compare e1.elie_first e2.elie_first in + let compare_entry e1 e2 = Sym.Ordered.compare e1.elie_first e2.elie_first in let entries_sorted = List.sort compare_entry entries in (*List.iter (function elie -> Printf.printf "%s" (pp_elie_concise elie)) entries_sorted;*) @@ -286,7 +286,7 @@ let mk_line_info (eli : Dwarf.evaluated_line_info) instructions : match remaining with | [] -> (acc, remaining) | elie :: remaining' -> - if Sym.less_equal elie.elie_first addr then + if Sym.Ordered.less_equal elie.elie_first addr then mk_new_perhaps_relevant (elie :: acc) remaining' else (acc, remaining) in diff --git a/src/analyse/DwarfVarInfo.ml b/src/analyse/DwarfVarInfo.ml index 144e48b5..8e1ffbec 100644 --- a/src/analyse/DwarfVarInfo.ml +++ b/src/analyse/DwarfVarInfo.ml @@ -260,7 +260,7 @@ let pp_ranged_var (prefix : string) (var : ranged_var) : string = let pp_ranged_vars (prefix : string) (vars : ranged_var list) : string = String.concat "" (List.map (pp_ranged_var prefix) vars) -let compare_pc_ranges ((n1, _, _), _) ((n1', _, _), _) = compare n1 n1' +let compare_pc_ranges ((n1, _, _), _) ((n1', _, _), _) = Sym.Ordered.compare n1 n1' let local_by_pc_ranges (((svfp : Dwarf.sdt_variable_or_formal_parameter), _context) as var) : ranged_var list = @@ -299,14 +299,14 @@ let mk_ranged_vars_at_instructions (sdt_d : Dwarf.sdt_dwarf) instructions : if k >= size then () else let addr = instructions.(k).i_addr in - if not (Sym.less addr_prev addr) then + if not (Sym.Ordered.less addr_prev addr) then fatal "mk_ranged_vars_at_instructions found non-increasing address %s" (pp_addr addr); let (still_current, old) = - List.partition (function ((_, n2, _), _) -> Sym.less addr n2) prev + List.partition (function ((_, n2, _), _) -> Sym.Ordered.less addr n2) prev in let (new', remaining') = partition_first - (function ((n1, _n2, _ops), _var) as _rv -> Sym.greater_equal addr n1) + (function ((n1, _n2, _ops), _var) as _rv -> Sym.Ordered.greater_equal addr n1) remaining in (* TODO: do we need to drop any that have been totally skipped over? *) diff --git a/src/analyse/Pp.ml b/src/analyse/Pp.ml index 0bf41866..cc57b5f6 100644 --- a/src/analyse/Pp.ml +++ b/src/analyse/Pp.ml @@ -74,6 +74,7 @@ type render_kind = | Render_vars_old | Render_inlining | Render_ctrlflow + | Render_relocation let render_colour = function | Render_symbol_star -> "gold" @@ -86,6 +87,7 @@ let render_colour = function | Render_vars_old -> "grey" | Render_inlining -> "red" | Render_ctrlflow -> "white" + | Render_relocation -> "purple" let render_class_name = function | Render_symbol_star -> "symbol-star" @@ -98,6 +100,7 @@ let render_class_name = function | Render_vars_old -> "vars-old" | Render_inlining -> "inlining" | Render_ctrlflow -> "ctrlflow" + | Render_relocation -> "relocation" type html_idiom = HI_span | HI_pre | HI_classless_span | HI_font @@ -327,13 +330,20 @@ let pp_instruction m test an rendered_control_flow_common_prefix_end k i = (ControlFlowPpText.pp_glyphs rendered_control_flow_common_prefix_end an.rendered_control_flow.(k)) (* the address and (hex) instruction *) - ^ css m Render_instruction - (pp_addr addr ^ ": " + ^ css m Render_instruction ( + pp_addr addr ^ ": " ^ pp_opcode_bytes test.arch i.i_opcode (* the dissassembly from objdump *) ^ " " ^ i.i_mnemonic ^ "\t" ^ i.i_operands ) + ^ css m Render_relocation + (match i.i_relocation with + | None -> "" + | Some (typ, targ) -> + "\t" ^ typ ^ " " ^ targ + ) + (* the instruction's control flow *) (* any indirect-branch control flow from this instruction *) ^ css m Render_ctrlflow (begin diff --git a/src/analyse/html-preamble-insts.html b/src/analyse/html-preamble-insts.html index 36645dfd..3b8adb8b 100644 --- a/src/analyse/html-preamble-insts.html +++ b/src/analyse/html-preamble-insts.html @@ -81,6 +81,9 @@ .ctrlflow { color: white; } + .relocation { + color: mediumpurple; + } a:link { color: aqua; background-color: transparent; diff --git a/src/utils/sym.ml b/src/utils/sym.ml index 42ef411d..83421beb 100644 --- a/src/utils/sym.ml +++ b/src/utils/sym.ml @@ -16,16 +16,18 @@ let max_addr = Z.(shift_left (of_int 1) 64 - (of_int 1)) let min_addr = Z.of_int 0 (* TODO very hacky *) -let less x y = match (x, y) with -| (Sym_ocaml.Num.Absolute x, Sym_ocaml.Num.Offset (_, y)) when Nat_big_num.less x y -> true -| (Sym_ocaml.Num.Absolute x, Sym_ocaml.Num.Offset (_,_)) when Nat_big_num.greater_equal x max_addr -> false -| (Sym_ocaml.Num.Offset (_,_), Sym_ocaml.Num.Absolute y) when Nat_big_num.less max_addr y -> true -| (Sym_ocaml.Num.Offset (_, x), Sym_ocaml.Num.Absolute y) when Nat_big_num.greater_equal x y -> false -| _ -> Sym_ocaml.Num.comp Nat_big_num.less x y -let less_equal = Sym_ocaml.Num.less_equal -let greater = Sym_ocaml.Num.greater -let greater_equal = Sym_ocaml.Num.greater_equal -let compare = Sym_ocaml.Num.compare +let compare x y = match (x, y) with +| (Sym_ocaml.Num.Absolute x, Sym_ocaml.Num.Offset (_, y)) when Nat_big_num.less x y -> -1 +| (Sym_ocaml.Num.Absolute x, Sym_ocaml.Num.Offset (_, _)) when Nat_big_num.greater_equal x max_addr -> 1 +| (Sym_ocaml.Num.Offset (_, x), Sym_ocaml.Num.Absolute y) when Nat_big_num.less y x -> 1 +| (Sym_ocaml.Num.Offset (_, _), Sym_ocaml.Num.Absolute y) when Nat_big_num.greater_equal y max_addr -> -1 +| (x, y) -> Sym_ocaml.Num.compare x y + +let less x y = compare x y < 0 +let less_equal x y = compare x y <= 0 +(* let equal x y = compare x y = 0 *) +let greater x y = compare x y > 0 +let greater_equal x y = compare x y >= 0 let to_string = Sym_ocaml.Num.ppf Z.to_string @@ -45,4 +47,16 @@ let in_range first last x = match (first, last, x) with | (Sym_ocaml.Num.Absolute f, Sym_ocaml.Num.Absolute l, Sym_ocaml.Num.Absolute x) -> Nat_big_num.less_equal f x && Nat_big_num.less_equal x l | (Sym_ocaml.Num.Offset (s1, f), Sym_ocaml.Num.Offset (s2, l), Sym_ocaml.Num.Offset (s, x)) when s1 = s2 -> s1 = s && Nat_big_num.less_equal f x && Nat_big_num.less_equal x l (* TODO kinda hacky *) -| _ -> Raise.fail "Can't determine if %t is in range [%t,%t]" (Pp.tos pp x) (Pp.tos pp first) (Pp.tos pp last) \ No newline at end of file +| _ -> Raise.fail "Can't determine if %t is in range [%t,%t]" (Pp.tos pp x) (Pp.tos pp first) (Pp.tos pp last) + +module Ordered = struct + let compare x y = match (x, y) with + | (Sym_ocaml.Num.Offset (s1, _x), Sym_ocaml.Num.Offset (s2, _y)) when s1 <> s2 -> String.compare s1 s2 + | (x, y) -> compare x y + + let less_equal x y = compare x y <= 0 + let less x y = compare x y < 0 + let greater x y = compare x y > 0 + let greater_equal x y = compare x y >= 0 + let equal x y = compare x y = 0 +end \ No newline at end of file