From ad0535d0a566824c469742e0c9584271c1f48a1b Mon Sep 17 00:00:00 2001 From: pjuftring Date: Sat, 30 Apr 2016 22:34:42 +0200 Subject: [PATCH 1/6] Write integer of_string from scratch While #265 did the job of supporting unsigned integers, it certainly was no nice implementation. This version adds the same features, i.e. allowing signed integers (as defined in the lexer) and larger unsigned values, but is actually written from scratch and doesn't rely on the builtin of_string anymore. This PR should close #225 if it is accepted. --- ml-proto/spec/int.ml | 51 +++++++++++++++++++++++++++++++-- ml-proto/test/int_literals.wast | 12 ++++++++ 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/ml-proto/spec/int.ml b/ml-proto/spec/int.ml index 51a353ca9e..a522020210 100644 --- a/ml-proto/spec/int.ml +++ b/ml-proto/spec/int.ml @@ -8,6 +8,7 @@ sig val zero : t val one : t val minus_one : t + val neg : t -> t val shift_left : t -> int -> t val shift_right : t -> int -> t val logand : t -> t -> t @@ -21,7 +22,6 @@ sig val shift_right_logical : t -> int -> t val of_int : int -> t val to_int : t -> int - val of_string : string -> t val to_string : t -> string val bitwidth : int @@ -208,7 +208,54 @@ struct let ge_s x y = x >= y let ge_u x y = cmp_u x (>=) y - let of_string = Rep.of_string + (* This implementation allows leading signs and unsigned values *) + let of_string x = + let len = String.length x in + let power_of_two n = (Rep.shift_left Rep.one n) in + let ten = (Rep.of_int 10) in + let parse_hexdigit c = + int_of_char c - + if '0' <= c && '9' >= c then 0x30 + else if 'a' <= c && 'f' >= c then (0x61 - 10) + else if 'A' <= c && 'F' >= c then (0x41 - 10) + else assert false + in + let parse_hex offset sign = + let num = ref Rep.zero in + for i = offset to (len - 1) do + assert (lt_u !num (power_of_two (Rep.bitwidth - 4))); + num := Rep.logor (Rep.shift_left !num 4) (Rep.of_int (parse_hexdigit x.[i])); + done; + assert (sign || (le_u !num (power_of_two (Rep.bitwidth - 1)))); + !num + in + let parse_dec offset sign = + let max_upper, max_lower = + if sign then + divrem_u Rep.minus_one ten + else + divrem_u (power_of_two (Rep.bitwidth - 1)) ten + in + let num = ref Rep.zero in + for i = offset to (len - 1) do + assert ('0' <= x.[i] && '9' >= x.[i]); + let new_digit = (Rep.of_int (int_of_char x.[i] - 0x30)) in + assert ((lt_u !num max_upper) || ((!num = max_upper) && (le_u new_digit max_lower))); + num := (Rep.add (Rep.mul !num ten) new_digit) + done; + !num + in + let parse_int offset sign = + if offset + 3 <= len && (String.sub x offset 2) = "0x" then + parse_hex (offset + 2) sign + else + parse_dec offset sign + in + match x.[0] with + | '+' -> parse_int 1 true + | '-' -> Rep.neg (parse_int 1 false) + | _ -> parse_int 0 true + let to_string = Rep.to_string let of_int = Rep.of_int diff --git a/ml-proto/test/int_literals.wast b/ml-proto/test/int_literals.wast index a60adb439d..bc545992dc 100644 --- a/ml-proto/test/int_literals.wast +++ b/ml-proto/test/int_literals.wast @@ -8,6 +8,8 @@ (func $i32.inc_smin (result i32) (return (i32.add (i32.const -0x80000000) (i32.const 1)))) (func $i32.neg_zero (result i32) (return (i32.const -0x0))) (func $i32.not_octal (result i32) (return (i32.const 010))) + (func $i32.unsigned_decimal (result i32) (return (i32.const 4294967295))) + (func $i32.plus_sign (result i32) (return (i32.const +42))) (func $i64.test (result i64) (return (i64.const 0x0CABBA6E0ba66a6e))) (func $i64.umax (result i64) (return (i64.const 0xffffffffffffffff))) @@ -18,6 +20,8 @@ (func $i64.inc_smin (result i64) (return (i64.add (i64.const -0x8000000000000000) (i64.const 1)))) (func $i64.neg_zero (result i64) (return (i64.const -0x0))) (func $i64.not_octal (result i64) (return (i64.const 010))) + (func $i64.unsigned_decimal (result i64) (return (i64.const 18446744073709551615))) + (func $i64.plus_sign (result i64) (return (i64.const +42))) (export "i32.test" $i32.test) (export "i32.umax" $i32.umax) @@ -28,6 +32,8 @@ (export "i32.inc_smin" $i32.inc_smin) (export "i32.neg_zero" $i32.neg_zero) (export "i32.not_octal" $i32.not_octal) + (export "i32.unsigned_decimal" $i32.unsigned_decimal) + (export "i32.plus_sign" $i32.plus_sign) (export "i64.test" $i64.test) (export "i64.umax" $i64.umax) @@ -38,6 +44,8 @@ (export "i64.inc_smin" $i64.inc_smin) (export "i64.neg_zero" $i64.neg_zero) (export "i64.not_octal" $i64.not_octal) + (export "i64.unsigned_decimal" $i64.unsigned_decimal) + (export "i64.plus_sign" $i64.plus_sign) ) (assert_return (invoke "i32.test") (i32.const 195940365)) @@ -49,6 +57,8 @@ (assert_return (invoke "i32.inc_smin") (i32.const -2147483647)) (assert_return (invoke "i32.neg_zero") (i32.const 0)) (assert_return (invoke "i32.not_octal") (i32.const 10)) +(assert_return (invoke "i32.unsigned_decimal") (i32.const -1)) +(assert_return (invoke "i32.plus_sign") (i32.const 42)) (assert_return (invoke "i64.test") (i64.const 913028331277281902)) (assert_return (invoke "i64.umax") (i64.const -1)) @@ -59,3 +69,5 @@ (assert_return (invoke "i64.inc_smin") (i64.const -9223372036854775807)) (assert_return (invoke "i64.neg_zero") (i64.const 0)) (assert_return (invoke "i64.not_octal") (i64.const 10)) +(assert_return (invoke "i64.unsigned_decimal") (i64.const -1)) +(assert_return (invoke "i64.plus_sign") (i64.const 42)) From b7bbb2225614aa2164a8de60340897e9f821f73d Mon Sep 17 00:00:00 2001 From: pjuftring Date: Tue, 3 May 2016 21:47:16 +0200 Subject: [PATCH 2/6] Remove parentheses and catch asserts --- ml-proto/spec/int.ml | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/ml-proto/spec/int.ml b/ml-proto/spec/int.ml index a522020210..dc1c4e6fc8 100644 --- a/ml-proto/spec/int.ml +++ b/ml-proto/spec/int.ml @@ -211,18 +211,18 @@ struct (* This implementation allows leading signs and unsigned values *) let of_string x = let len = String.length x in - let power_of_two n = (Rep.shift_left Rep.one n) in - let ten = (Rep.of_int 10) in + let power_of_two n = Rep.shift_left Rep.one n in + let ten = Rep.of_int 10 in let parse_hexdigit c = int_of_char c - if '0' <= c && '9' >= c then 0x30 - else if 'a' <= c && 'f' >= c then (0x61 - 10) - else if 'A' <= c && 'F' >= c then (0x41 - 10) + else if 'a' <= c && 'f' >= c then 0x61 - 10 + else if 'A' <= c && 'F' >= c then 0x41 - 10 else assert false in let parse_hex offset sign = let num = ref Rep.zero in - for i = offset to (len - 1) do + for i = offset to len - 1 do assert (lt_u !num (power_of_two (Rep.bitwidth - 4))); num := Rep.logor (Rep.shift_left !num 4) (Rep.of_int (parse_hexdigit x.[i])); done; @@ -237,24 +237,26 @@ struct divrem_u (power_of_two (Rep.bitwidth - 1)) ten in let num = ref Rep.zero in - for i = offset to (len - 1) do + for i = offset to len - 1 do assert ('0' <= x.[i] && '9' >= x.[i]); - let new_digit = (Rep.of_int (int_of_char x.[i] - 0x30)) in + let new_digit = Rep.of_int (int_of_char x.[i] - 0x30) in assert ((lt_u !num max_upper) || ((!num = max_upper) && (le_u new_digit max_lower))); - num := (Rep.add (Rep.mul !num ten) new_digit) + num := Rep.add (Rep.mul !num ten) new_digit done; !num in let parse_int offset sign = - if offset + 3 <= len && (String.sub x offset 2) = "0x" then + if offset + 3 <= len && String.sub x offset 2 = "0x" then parse_hex (offset + 2) sign else parse_dec offset sign in - match x.[0] with - | '+' -> parse_int 1 true - | '-' -> Rep.neg (parse_int 1 false) - | _ -> parse_int 0 true + try + match x.[0] with + | '+' -> parse_int 1 true + | '-' -> Rep.neg (parse_int 1 false) + | _ -> parse_int 0 true + with _ -> failwith "int_to_string" let to_string = Rep.to_string From 8c0dbca3ab0a0cdc704264f708b2d6f9fabf5fe0 Mon Sep 17 00:00:00 2001 From: pjuftring Date: Wed, 4 May 2016 20:52:41 +0200 Subject: [PATCH 3/6] Replace assert with failwith --- ml-proto/spec/int.ml | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/ml-proto/spec/int.ml b/ml-proto/spec/int.ml index dc1c4e6fc8..4cdbdfff2d 100644 --- a/ml-proto/spec/int.ml +++ b/ml-proto/spec/int.ml @@ -213,34 +213,35 @@ struct let len = String.length x in let power_of_two n = Rep.shift_left Rep.one n in let ten = Rep.of_int 10 in + let fail () = failwith "int_of_string" in let parse_hexdigit c = int_of_char c - if '0' <= c && '9' >= c then 0x30 else if 'a' <= c && 'f' >= c then 0x61 - 10 else if 'A' <= c && 'F' >= c then 0x41 - 10 - else assert false + else fail () in let parse_hex offset sign = let num = ref Rep.zero in for i = offset to len - 1 do - assert (lt_u !num (power_of_two (Rep.bitwidth - 4))); + if ge_u !num (power_of_two (Rep.bitwidth - 4)) then fail (); num := Rep.logor (Rep.shift_left !num 4) (Rep.of_int (parse_hexdigit x.[i])); done; - assert (sign || (le_u !num (power_of_two (Rep.bitwidth - 1)))); + if sign && gt_u !num (power_of_two (Rep.bitwidth - 1)) then fail (); !num in let parse_dec offset sign = let max_upper, max_lower = if sign then - divrem_u Rep.minus_one ten - else divrem_u (power_of_two (Rep.bitwidth - 1)) ten + else + divrem_u Rep.minus_one ten in let num = ref Rep.zero in for i = offset to len - 1 do - assert ('0' <= x.[i] && '9' >= x.[i]); + if '0' > x.[i] || '9' < x.[i] then fail (); let new_digit = Rep.of_int (int_of_char x.[i] - 0x30) in - assert ((lt_u !num max_upper) || ((!num = max_upper) && (le_u new_digit max_lower))); + if gt_u !num max_upper || (!num = max_upper && gt_u new_digit max_lower) then fail (); num := Rep.add (Rep.mul !num ten) new_digit done; !num @@ -251,12 +252,10 @@ struct else parse_dec offset sign in - try - match x.[0] with - | '+' -> parse_int 1 true - | '-' -> Rep.neg (parse_int 1 false) - | _ -> parse_int 0 true - with _ -> failwith "int_to_string" + match x.[0] with + | '+' -> parse_int 1 false + | '-' -> Rep.neg (parse_int 1 true) + | _ -> parse_int 0 false let to_string = Rep.to_string From 88a0e682f3644714fb8dbd8911330ad835e29ebd Mon Sep 17 00:00:00 2001 From: pjuftring Date: Fri, 20 May 2016 13:12:59 +0200 Subject: [PATCH 4/6] Address comments --- ml-proto/spec/int.ml | 76 +++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 40 deletions(-) diff --git a/ml-proto/spec/int.ml b/ml-proto/spec/int.ml index 4cdbdfff2d..7885dcde5c 100644 --- a/ml-proto/spec/int.ml +++ b/ml-proto/spec/int.ml @@ -103,6 +103,8 @@ struct let to_bits x = x let zero = Rep.zero + let ten = Rep.of_int 10 + let max_upper, max_lower = divrem_u Rep.minus_one ten (* add, sub, and mul are sign-agnostic and do not trap on overflow. *) let add = Rep.add @@ -208,54 +210,48 @@ struct let ge_s x y = x >= y let ge_u x y = cmp_u x (>=) y + let parse_hexdigit c = + int_of_char c - + if '0' <= c && '9' >= c then 0x30 + else if 'a' <= c && 'f' >= c then 0x61 - 10 + else if 'A' <= c && 'F' >= c then 0x41 - 10 + else failwith "unexpected digit" + + let neg_safe x = + if lt_s (sub x Rep.one) Rep.minus_one then raise Numerics.IntegerOverflow; + Rep.neg x + (* This implementation allows leading signs and unsigned values *) let of_string x = + let open Rep in let len = String.length x in - let power_of_two n = Rep.shift_left Rep.one n in - let ten = Rep.of_int 10 in - let fail () = failwith "int_of_string" in - let parse_hexdigit c = - int_of_char c - - if '0' <= c && '9' >= c then 0x30 - else if 'a' <= c && 'f' >= c then 0x61 - 10 - else if 'A' <= c && 'F' >= c then 0x41 - 10 - else fail () - in - let parse_hex offset sign = - let num = ref Rep.zero in - for i = offset to len - 1 do - if ge_u !num (power_of_two (Rep.bitwidth - 4)) then fail (); - num := Rep.logor (Rep.shift_left !num 4) (Rep.of_int (parse_hexdigit x.[i])); - done; - if sign && gt_u !num (power_of_two (Rep.bitwidth - 1)) then fail (); - !num + let require b = if not b then failwith "of_int" in + let rec parse_hex i num = + if i = len then num + else begin + require (le_u num (shr_u minus_one (of_int 4))); + parse_hex (i + 1) (logor (shift_left num 4) (of_int (parse_hexdigit x.[i]))) + end in - let parse_dec offset sign = - let max_upper, max_lower = - if sign then - divrem_u (power_of_two (Rep.bitwidth - 1)) ten - else - divrem_u Rep.minus_one ten - in - let num = ref Rep.zero in - for i = offset to len - 1 do - if '0' > x.[i] || '9' < x.[i] then fail (); - let new_digit = Rep.of_int (int_of_char x.[i] - 0x30) in - if gt_u !num max_upper || (!num = max_upper && gt_u new_digit max_lower) then fail (); - num := Rep.add (Rep.mul !num ten) new_digit - done; - !num + let rec parse_dec i num = + if i = len then num + else begin + require ('0' <= x.[i] && '9' >= x.[i]); + let new_digit = of_int (int_of_char x.[i] - 0x30) in + require (le_u num max_upper && (num <> max_upper || le_u new_digit max_lower)); + parse_dec (i + 1) (add (mul num ten) new_digit) + end in - let parse_int offset sign = - if offset + 3 <= len && String.sub x offset 2 = "0x" then - parse_hex (offset + 2) sign + let parse_int i = + if i + 3 <= len && x.[i] = '0' && x.[i + 1] = 'x' then + parse_hex (i + 2) zero else - parse_dec offset sign + parse_dec i zero in match x.[0] with - | '+' -> parse_int 1 false - | '-' -> Rep.neg (parse_int 1 true) - | _ -> parse_int 0 false + | '+' -> parse_int 1 + | '-' -> neg_safe (parse_int 1) + | _ -> parse_int 0 let to_string = Rep.to_string From 70251134e411bff77f5ef589d9e156b1b1e7cff2 Mon Sep 17 00:00:00 2001 From: pjuftring Date: Fri, 17 Jun 2016 15:34:02 +0200 Subject: [PATCH 5/6] Add tests and hoist even more --- ml-proto/spec/int.ml | 30 +++++++++++-------- .../test/of_string-overflow-hex-u32.fail.wast | 1 + .../test/of_string-overflow-hex-u64.fail.wast | 1 + .../test/of_string-overflow-s32.fail.wast | 1 + .../test/of_string-overflow-s64.fail.wast | 1 + .../test/of_string-overflow-u32.fail.wast | 1 + .../test/of_string-overflow-u64.fail.wast | 1 + 7 files changed, 23 insertions(+), 13 deletions(-) create mode 100644 ml-proto/test/of_string-overflow-hex-u32.fail.wast create mode 100644 ml-proto/test/of_string-overflow-hex-u64.fail.wast create mode 100644 ml-proto/test/of_string-overflow-s32.fail.wast create mode 100644 ml-proto/test/of_string-overflow-s64.fail.wast create mode 100644 ml-proto/test/of_string-overflow-u32.fail.wast create mode 100644 ml-proto/test/of_string-overflow-u64.fail.wast diff --git a/ml-proto/spec/int.ml b/ml-proto/spec/int.ml index 7885dcde5c..2fc0aaad8e 100644 --- a/ml-proto/spec/int.ml +++ b/ml-proto/spec/int.ml @@ -210,22 +210,22 @@ struct let ge_s x y = x >= y let ge_u x y = cmp_u x (>=) y - let parse_hexdigit c = - int_of_char c - - if '0' <= c && '9' >= c then 0x30 - else if 'a' <= c && 'f' >= c then 0x61 - 10 - else if 'A' <= c && 'F' >= c then 0x41 - 10 - else failwith "unexpected digit" + let parse_hexdigit = function + | '0' .. '9' as c -> Char.code c - Char.code '0' + | 'a' .. 'f' as c -> 0xa + Char.code c - Char.code 'a' + | 'A' .. 'F' as c -> 0xa + Char.code c - Char.code 'A' + | _ -> failwith "unexpected digit" - let neg_safe x = - if lt_s (sub x Rep.one) Rep.minus_one then raise Numerics.IntegerOverflow; - Rep.neg x + let parse_decdigit c = + if '0' > c || '9' < c then failwith "unexpected digit"; + Rep.of_int (int_of_char c - Char.code '0') + + let require b = if not b then failwith "of_string" (* This implementation allows leading signs and unsigned values *) let of_string x = let open Rep in let len = String.length x in - let require b = if not b then failwith "of_int" in let rec parse_hex i num = if i = len then num else begin @@ -236,8 +236,7 @@ struct let rec parse_dec i num = if i = len then num else begin - require ('0' <= x.[i] && '9' >= x.[i]); - let new_digit = of_int (int_of_char x.[i] - 0x30) in + let new_digit = parse_decdigit x.[i] in require (le_u num max_upper && (num <> max_upper || le_u new_digit max_lower)); parse_dec (i + 1) (add (mul num ten) new_digit) end @@ -250,7 +249,12 @@ struct in match x.[0] with | '+' -> parse_int 1 - | '-' -> neg_safe (parse_int 1) + | '-' -> + begin + let y = (parse_int 1) in + require (ge_s (sub y one) minus_one); + neg y + end | _ -> parse_int 0 let to_string = Rep.to_string diff --git a/ml-proto/test/of_string-overflow-hex-u32.fail.wast b/ml-proto/test/of_string-overflow-hex-u32.fail.wast new file mode 100644 index 0000000000..1323a1f0b0 --- /dev/null +++ b/ml-proto/test/of_string-overflow-hex-u32.fail.wast @@ -0,0 +1 @@ +(module (func (i32.const 0x100000000))) diff --git a/ml-proto/test/of_string-overflow-hex-u64.fail.wast b/ml-proto/test/of_string-overflow-hex-u64.fail.wast new file mode 100644 index 0000000000..d13d6e4b46 --- /dev/null +++ b/ml-proto/test/of_string-overflow-hex-u64.fail.wast @@ -0,0 +1 @@ +(module (func (i64.const 0x10000000000000000))) diff --git a/ml-proto/test/of_string-overflow-s32.fail.wast b/ml-proto/test/of_string-overflow-s32.fail.wast new file mode 100644 index 0000000000..4dda960273 --- /dev/null +++ b/ml-proto/test/of_string-overflow-s32.fail.wast @@ -0,0 +1 @@ +(module (func (i32.const -2147483649))) diff --git a/ml-proto/test/of_string-overflow-s64.fail.wast b/ml-proto/test/of_string-overflow-s64.fail.wast new file mode 100644 index 0000000000..1034b0b001 --- /dev/null +++ b/ml-proto/test/of_string-overflow-s64.fail.wast @@ -0,0 +1 @@ +(module (func (i64.const -9223372036854775809))) diff --git a/ml-proto/test/of_string-overflow-u32.fail.wast b/ml-proto/test/of_string-overflow-u32.fail.wast new file mode 100644 index 0000000000..8f226177bd --- /dev/null +++ b/ml-proto/test/of_string-overflow-u32.fail.wast @@ -0,0 +1 @@ +(module (func (i32.const 4294967296))) diff --git a/ml-proto/test/of_string-overflow-u64.fail.wast b/ml-proto/test/of_string-overflow-u64.fail.wast new file mode 100644 index 0000000000..cfa2c46b58 --- /dev/null +++ b/ml-proto/test/of_string-overflow-u64.fail.wast @@ -0,0 +1 @@ +(module (func (i64.const 18446744073709551616))) From b5e2a35eada55a1f689bd77679af2f2199e2c535 Mon Sep 17 00:00:00 2001 From: pjuftring Date: Mon, 20 Jun 2016 20:51:55 +0200 Subject: [PATCH 6/6] Rename error messages and remove 'begin' --- ml-proto/spec/int.ml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/ml-proto/spec/int.ml b/ml-proto/spec/int.ml index 2fc0aaad8e..f53a18cc88 100644 --- a/ml-proto/spec/int.ml +++ b/ml-proto/spec/int.ml @@ -214,10 +214,10 @@ struct | '0' .. '9' as c -> Char.code c - Char.code '0' | 'a' .. 'f' as c -> 0xa + Char.code c - Char.code 'a' | 'A' .. 'F' as c -> 0xa + Char.code c - Char.code 'A' - | _ -> failwith "unexpected digit" + | _ -> failwith "of_string" let parse_decdigit c = - if '0' > c || '9' < c then failwith "unexpected digit"; + if '0' > c || '9' < c then failwith "of_string"; Rep.of_int (int_of_char c - Char.code '0') let require b = if not b then failwith "of_string" @@ -250,11 +250,9 @@ struct match x.[0] with | '+' -> parse_int 1 | '-' -> - begin - let y = (parse_int 1) in - require (ge_s (sub y one) minus_one); - neg y - end + let y = (parse_int 1) in + require (ge_s (sub y one) minus_one); + neg y | _ -> parse_int 0 let to_string = Rep.to_string