From 5c834d8c84106283b5e1053b6ed72959a4a6eb6c Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 17 Mar 2021 19:35:42 -0500 Subject: [PATCH 1/3] Constrain the allowed set of language features in 'compute' functions. --- setup.cfg | 2 + setup.py | 8 +- src/vector/compute/spatial/costheta.py | 4 +- src/vector/compute/spatial/cottheta.py | 4 +- src/vector/compute/spatial/eta.py | 6 +- src/vector/compute/spatial/z.py | 4 +- tests/test_compute_features.py | 330 +++++++++++++++++++++++++ 7 files changed, 348 insertions(+), 10 deletions(-) create mode 100644 tests/test_compute_features.py diff --git a/setup.cfg b/setup.cfg index 8c279a90..5bf65731 100644 --- a/setup.cfg +++ b/setup.cfg @@ -45,6 +45,8 @@ where = src addopts = -vv -rs -Wd testpaths = tests +markers = + slow [tool:isort] profile = black diff --git a/setup.py b/setup.py index 092ec376..4959b8d6 100644 --- a/setup.py +++ b/setup.py @@ -13,8 +13,14 @@ 'numba>=0.50; python_version>="3" and python_version<"3.9"', "scikit-hep-testdata>=0.2.0", "pytest>=4.6", + "uncompyle6>=3.7.4", + "spark-parser>=1.8.9", + ], + "test": [ + "pytest>=4.6", + "uncompyle6>=3.7.4", + "spark-parser>=1.8.9", ], - "test": ["pytest>=4.6"], } extras["all"] = sum(extras.values(), []) diff --git a/src/vector/compute/spatial/costheta.py b/src/vector/compute/spatial/costheta.py index 3d012cdd..c5522edf 100644 --- a/src/vector/compute/spatial/costheta.py +++ b/src/vector/compute/spatial/costheta.py @@ -19,7 +19,7 @@ def xy_z(lib, x, y, z): - return lib.nan_to_num(z / mag.xy_z(lib, x, y, z), 1.0) + return lib.nan_to_num(z / mag.xy_z(lib, x, y, z), nan=1.0) def xy_theta(lib, x, y, theta): @@ -31,7 +31,7 @@ def xy_eta(lib, x, y, eta): def rhophi_z(lib, rho, phi, z): - return lib.nan_to_num(z / mag.rhophi_z(lib, rho, phi, z), 1.0) + return lib.nan_to_num(z / mag.rhophi_z(lib, rho, phi, z), nan=1.0) def rhophi_theta(lib, rho, phi, theta): diff --git a/src/vector/compute/spatial/cottheta.py b/src/vector/compute/spatial/cottheta.py index e8917104..ab9e9707 100644 --- a/src/vector/compute/spatial/cottheta.py +++ b/src/vector/compute/spatial/cottheta.py @@ -20,7 +20,7 @@ def xy_z(lib, x, y, z): - return lib.nan_to_num(z / rho.xy(lib, x, y), lib.inf) + return lib.nan_to_num(z / rho.xy(lib, x, y), nan=lib.inf) def xy_theta(lib, x, y, theta): @@ -32,7 +32,7 @@ def xy_eta(lib, x, y, eta): def rhophi_z(lib, rho, phi, z): - return lib.nan_to_num(z / rho, lib.inf) + return lib.nan_to_num(z / rho, nan=lib.inf) def rhophi_theta(lib, rho, phi, theta): diff --git a/src/vector/compute/spatial/eta.py b/src/vector/compute/spatial/eta.py index 26bfb893..a0c3019e 100644 --- a/src/vector/compute/spatial/eta.py +++ b/src/vector/compute/spatial/eta.py @@ -18,11 +18,11 @@ def xy_z(lib, x, y, z): - return lib.nan_to_num(lib.arctanh(z / lib.sqrt(x ** 2 + y ** 2 + z ** 2)), 0.0) + return lib.nan_to_num(lib.arctanh(z / lib.sqrt(x ** 2 + y ** 2 + z ** 2)), nan=0.0) def xy_theta(lib, x, y, theta): - return lib.nan_to_num(-lib.log(lib.tan(0.5 * theta)), 0.0) + return lib.nan_to_num(-lib.log(lib.tan(0.5 * theta)), nan=0.0) def xy_eta(lib, x, y, eta): @@ -30,7 +30,7 @@ def xy_eta(lib, x, y, eta): def rhophi_z(lib, rho, phi, z): - return lib.nan_to_num(lib.arctanh(z / lib.sqrt(rho ** 2 + z ** 2)), 0.0) + return lib.nan_to_num(lib.arctanh(z / lib.sqrt(rho ** 2 + z ** 2)), nan=0.0) def rhophi_theta(lib, rho, phi, theta): diff --git a/src/vector/compute/spatial/z.py b/src/vector/compute/spatial/z.py index 0a6a9267..3e4c4749 100644 --- a/src/vector/compute/spatial/z.py +++ b/src/vector/compute/spatial/z.py @@ -23,7 +23,7 @@ def xy_z(lib, x, y, z): def xy_theta(lib, x, y, theta): - return lib.nan_to_num(rho.xy(lib, x, y) / lib.tan(theta), 0.0) + return lib.nan_to_num(rho.xy(lib, x, y) / lib.tan(theta), nan=0.0) def xy_eta(lib, x, y, eta): @@ -35,7 +35,7 @@ def rhophi_z(lib, rho, phi, z): def rhophi_theta(lib, rho, phi, theta): - return lib.nan_to_num(rho / lib.tan(theta), 0.0) + return lib.nan_to_num(rho / lib.tan(theta), nan=0.0) def rhophi_eta(lib, rho, phi, eta): diff --git a/tests/test_compute_features.py b/tests/test_compute_features.py new file mode 100644 index 00000000..5d0bee8a --- /dev/null +++ b/tests/test_compute_features.py @@ -0,0 +1,330 @@ +# Copyright (c) 2019-2021, Jonas Eschle, Jim Pivarski, Eduardo Rodrigues, and Henry Schreiner. +# +# Distributed under the 3-clause BSD license, see accompanying file LICENSE +# or https://github.com/scikit-hep/vector for details. + +import collections +import inspect +import sys + +import pytest +import spark_parser +import uncompyle6 + +import vector.compute.lorentz +import vector.compute.planar +import vector.compute.spatial + +Context = collections.namedtuple("Context", ["name", "closure"]) + + +functions = dict( + [ + ( + f'{y.__name__}({", ".join(repr(v) if isinstance(v, str) else v.__name__ for v in w)})', + z[0], + ) + for x, y in inspect.getmembers( + vector.compute.planar, predicate=inspect.ismodule + ) + if hasattr(y, "dispatch_map") + for w, z in y.dispatch_map.items() + ] + + [ + ( + f'{y.__name__}({", ".join(repr(v) if isinstance(v, str) else v.__name__ for v in w)})', + z[0], + ) + for x, y in inspect.getmembers( + vector.compute.spatial, predicate=inspect.ismodule + ) + if hasattr(y, "dispatch_map") + for w, z in y.dispatch_map.items() + ] + + [ + ( + f'{y.__name__}({", ".join(repr(v) if isinstance(v, str) else v.__name__ for v in w)})', + z[0], + ) + for x, y in inspect.getmembers( + vector.compute.lorentz, predicate=inspect.ismodule + ) + if hasattr(y, "dispatch_map") + for w, z in y.dispatch_map.items() + ] +) + + +@pytest.mark.slow +@pytest.mark.parametrize("signature", functions.keys()) +def test(signature): + analyze_function(functions[signature]) + + +def analyze_function(function): + if function not in analyze_function.done: + # print(function.__module__ + "." + function.__name__) + + closure = dict(function.__globals__) + if function.__closure__ is not None: + for var, cell in zip(function.__code__.co_freevars, function.__closure__): + try: + closure[var] = cell.cell_contents + except ValueError: + pass # the cell has not been filled yet, so ignore it + + analyze_code(function.__code__, Context(function.__name__, closure)) + analyze_function.done.add(function) + + +analyze_function.done = set() + + +def analyze_code(code, context): + # this block is all uncompyle6 + python_version = float(sys.version[0:3]) + is_pypy = "__pypy__" in sys.builtin_module_names + parser = uncompyle6.parser.get_python_parser( + python_version, + debug_parser=dict(spark_parser.DEFAULT_DEBUG), + compile_mode="exec", + is_pypy=is_pypy, + ) + scanner = uncompyle6.scanner.get_scanner(python_version, is_pypy=is_pypy) + tokens, customize = scanner.ingest(code, code_objects={}, show_asm=False) + parsed = uncompyle6.parser.parse(parser, tokens, customize, code) + + # now the disassembled bytecodes have been parsed into a tree for us to walk + analyze_body(parsed, context) + + +def analyze_body(node, context): + assert node.kind == "stmts" + assert len(node) >= 1 + + for statement in node[:-1]: + analyze_assignment(statement, context) + analyze_return(node[-1], context) + + +def analyze_assignment(node, context): + assert node.kind == "sstmt" + assert len(node) == 1 + + assert ( + node[0].kind == "assign" + ), "only assignments and a final 'return' are allowed (and not tuple-assignment)" + assert len(node[0]) == 2 + assert node[0][1].kind == "store" + + if node[0][1][0].kind == "STORE_FAST": + analyze_expression(expr(node[0][0]), context) + + elif node[0][1][0].kind == "unpack": + assert len(node[0][1][0]) >= 2 + assert node[0][1][0][0].kind.startswith("UNPACK_SEQUENCE") + for item in node[0][1][0][1:]: + assert item.kind == "store" + assert len(item) == 1 + assert item[0].kind == "STORE_FAST" + + else: + print(node[0][1][0]) + raise AssertionError("what is this?") + + +def expr(node): + assert node.kind == "expr" + assert len(node) == 1 + return node[0] + + +def is_pi(node): + return ( + node.kind == "attribute" + and len(node) == 2 + and expr(node[0]).kind == "LOAD_FAST" + and expr(node[0]).attr == "lib" + and node[1].kind == "LOAD_ATTR" + and node[1].attr == "pi" + ) + + +def is_nan_to_num(node): + if node.kind == "call_kw36" and len(node) >= 3: + function = expr(node[0]) + return ( + function.kind == "attribute" + and expr(function[0]).attr == "lib" + and function[1].attr == "nan_to_num" + ) + else: + return False + + +def analyze_return(node, context): + assert node.kind == "sstmt" + assert len(node) == 1 + + assert node[0].kind == "return", "compute function must end with a 'return'" + assert len(node[0]) == 2 + assert node[0][0].kind == "ret_expr" + assert len(node[0][0]) == 1 + expr(node[0][0][0]) + assert node[0][1].kind == "RETURN_VALUE" + + if node[0][0][0][0].kind == "tuple": + assert len(node[0][0][0][0]) >= 2, "returning an empty tuple?" + assert node[0][0][0][0][-1].kind.startswith("BUILD_TUPLE") + for item in node[0][0][0][0][:-1]: + analyze_expression(expr(item), context) + + else: + analyze_expression(node[0][0][0][0], context) + + +def analyze_expression(node, context): + if node.kind == "LOAD_FAST": + # Don't bother checking to see if this variable has been defined. + # Unit checks test that if the coverage is complete. + pass + + elif node.kind == "LOAD_CONST": + assert isinstance(node.attr, (int, float)) + + elif is_pi(node): + pass + + elif node.kind == "unary_op": + assert len(node) == 2 + analyze_expression(expr(node[0]), context) + assert node[1].kind == "unary_operator" + assert len(node[1]) == 1 + analyze_unary_operator(node[1][0], context) + + elif node.kind == "bin_op": + assert len(node) == 3 + analyze_expression(expr(node[0]), context) + analyze_expression(expr(node[1]), context) + assert node[2].kind == "binary_operator" + assert len(node[2]) == 1 + analyze_binary_operator(node[2][0], context) + + elif node.kind == "compare": + assert len(node) == 1 + assert node[0].kind == "compare_single", "only do single comparisons" + assert len(node[0]) == 3 + analyze_expression(expr(node[0][0]), context) + analyze_expression(expr(node[0][1]), context) + assert node[0][2].kind == "COMPARE_OP" + assert ( + node[0][2].attr in allowed_comparisons + ), f"add {repr(node[0][2].attr)} to allowed_comparisons" + + elif node.kind == "call": + assert len(node) >= 2 + + assert node[-1].kind.startswith("CALL_METHOD") or node[-1].kind.startswith( + "CALL_FUNCTION" + ) + analyze_callable(expr(node[0]), context) + + for argument in node[1:-1]: + assert argument.kind == "pos_arg", "only positional arguments" + analyze_expression(expr(argument[0]), context) + + elif is_nan_to_num(node): + analyze_expression(expr(node[1]), context) + + else: + print(node) + raise AssertionError("what is this?") + + +def analyze_unary_operator(node, context): + assert ( + node.kind in allowed_unary_operators + ), f"add {repr(node.kind)} to allowed_unary_operators" + + +def analyze_binary_operator(node, context): + assert ( + node.kind in allowed_binary_operators + ), f"add {repr(node.kind)} to allowed_binary_operators" + + +def analyze_callable(node, context): + if node.kind == "attribute37": + assert len(node) == 2 + module = expr(node[0]) + assert module.kind == "LOAD_FAST" or module.kind == "LOAD_GLOBAL" + assert node[1].kind == "LOAD_METHOD" + + if module.attr == "lib": + assert ( + node[1].attr in allowed_lib_functions + ), f"add {repr(node[1].attr)} to allowed_lib_functions" + + else: + module_name = ".".join( + context.closure.get(module.attr).__name__.split(".")[:-1] + ) + assert module_name in ( + "vector.compute.planar", + "vector.compute.spatial", + "vector.compute.lorentz", + ) + + elif node.kind == "LOAD_GLOBAL" or node.kind == "LOAD_DEREF": + function = context.closure.get(node.attr) + assert ( + function is not None + ), f"unrecognized function in scope: {repr(node.attr)}" + analyze_function(function) + + else: + print(node) + raise AssertionError("what is this?") + + +allowed_unary_operators = [ + "UNARY_NEGATIVE", +] + +allowed_binary_operators = [ + "BINARY_ADD", + "BINARY_SUBTRACT", + "BINARY_MULTIPLY", + "BINARY_TRUE_DIVIDE", + "BINARY_MODULO", + "BINARY_POWER", + "BINARY_AND", +] + +allowed_comparisons = [ + "==", + "!=", + "<", + ">", +] + +allowed_lib_functions = [ + "absolute", + "sign", + "copysign", + "maximum", + "sqrt", + "exp", + "log", + "sin", + "cos", + "tan", + "arcsin", + "arccos", + "arctan", + "arctan2", + "sinh", + "cosh", + "arctanh", + "isclose", +] From bbfe782a9586647ee744d4eedeb9397d966b3368 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 17 Mar 2021 19:46:06 -0500 Subject: [PATCH 2/3] Only run that test on Python 3.8. --- .github/workflows/ci.yml | 4 ++++ setup.py | 4 ---- tests/test_compute_features.py | 13 +++++++++++-- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 04c66cc6..69f61849 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,6 +25,10 @@ jobs: with: python-version: ${{ matrix.python-version }} + - name: Check compute features only on Python 3.8 + if: "matrix.python-version == 3.8" + run: python -m pip install uncompyle6 spark-parser + - name: Requirements check run: python -m pip list diff --git a/setup.py b/setup.py index 4959b8d6..ab1d3fbb 100644 --- a/setup.py +++ b/setup.py @@ -13,13 +13,9 @@ 'numba>=0.50; python_version>="3" and python_version<"3.9"', "scikit-hep-testdata>=0.2.0", "pytest>=4.6", - "uncompyle6>=3.7.4", - "spark-parser>=1.8.9", ], "test": [ "pytest>=4.6", - "uncompyle6>=3.7.4", - "spark-parser>=1.8.9", ], } diff --git a/tests/test_compute_features.py b/tests/test_compute_features.py index 5d0bee8a..86c13889 100644 --- a/tests/test_compute_features.py +++ b/tests/test_compute_features.py @@ -8,13 +8,16 @@ import sys import pytest -import spark_parser -import uncompyle6 import vector.compute.lorentz import vector.compute.planar import vector.compute.spatial + +uncompyle6 = pytest.importorskip("uncompyle6") +spark_parser = pytest.importorskip("spark_parser") + + Context = collections.namedtuple("Context", ["name", "closure"]) @@ -61,6 +64,12 @@ def test(signature): analyze_function(functions[signature]) +# def test(): +# for signature, function in functions.items(): +# print(signature) +# analyze_function(function) + + def analyze_function(function): if function not in analyze_function.done: # print(function.__module__ + "." + function.__name__) From e01fd861baa727b923671f4e6d0563457243c8d6 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 17 Mar 2021 19:46:26 -0500 Subject: [PATCH 3/3] Black. --- tests/test_compute_features.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_compute_features.py b/tests/test_compute_features.py index 86c13889..e23c4bbe 100644 --- a/tests/test_compute_features.py +++ b/tests/test_compute_features.py @@ -13,7 +13,6 @@ import vector.compute.planar import vector.compute.spatial - uncompyle6 = pytest.importorskip("uncompyle6") spark_parser = pytest.importorskip("spark_parser")