diff --git a/manim/__main__.py b/manim/__main__.py index 14d524e060..4952d415ed 100644 --- a/manim/__main__.py +++ b/manim/__main__.py @@ -111,13 +111,16 @@ def get_scene_classes_from_module(module): def get_module(file_name): if file_name == "-": + # Since this feature is used for rapid testing, using Scene Caching would be a + # hindrance in this case. + file_writer_config["disable_caching"] = True module = types.ModuleType("input_scenes") logger.info( "Enter the animation's code & end with an EOF (CTRL+D on Linux/Unix, CTRL+Z on Windows):" ) code = sys.stdin.read() if not code.startswith("from manim import"): - logger.warn( + logger.warning( "Didn't find an import statement for Manim. Importing automatically..." ) code = "from manim import *\n" + code diff --git a/manim/scene/scene.py b/manim/scene/scene.py index 1ca43e09a1..834139a477 100644 --- a/manim/scene/scene.py +++ b/manim/scene/scene.py @@ -805,7 +805,7 @@ def wrapper(self, *args, **kwargs): if not file_writer_config["disable_caching"]: mobjects_on_scene = self.get_mobjects() hash_play = get_hash_from_play_call( - self.camera, animations, mobjects_on_scene + self, self.camera, animations, mobjects_on_scene ) self.play_hashes_list.append(hash_play) if self.file_writer.is_already_cached(hash_play): @@ -836,7 +836,7 @@ def wrapper(self, duration=DEFAULT_WAIT_TIME, stop_condition=None): self.revert_to_original_skipping_status() if not file_writer_config["disable_caching"]: hash_wait = get_hash_from_wait_call( - self.camera, duration, stop_condition, self.get_mobjects() + self, self.camera, duration, stop_condition, self.get_mobjects() ) self.play_hashes_list.append(hash_wait) if self.file_writer.is_already_cached(hash_wait): diff --git a/manim/scene/scene_file_writer.py b/manim/scene/scene_file_writer.py index eb9e8b7289..e5762291e5 100644 --- a/manim/scene/scene_file_writer.py +++ b/manim/scene/scene_file_writer.py @@ -446,7 +446,7 @@ def close_movie_pipe(self): shutil.move( self.temp_partial_movie_file_path, self.partial_movie_file_path, ) - logger.debug( + logger.info( f"Animation {self.scene.num_plays} : Partial movie file written in {self.partial_movie_file_path}" ) diff --git a/manim/utils/hashing.py b/manim/utils/hashing.py index 4946f6f881..f93020e8f9 100644 --- a/manim/utils/hashing.py +++ b/manim/utils/hashing.py @@ -2,12 +2,14 @@ import zlib import inspect import copy -import dis import numpy as np -from types import ModuleType +from types import ModuleType, MappingProxyType, FunctionType, MethodType +from time import perf_counter from .. import logger +ALREADY_PROCESSED_ID = {} + class CustomEncoder(json.JSONEncoder): def default(self, obj): @@ -30,56 +32,132 @@ def default(self, obj): Python object that JSON encoder will recognize """ - if inspect.isfunction(obj) and not isinstance(obj, ModuleType): + if not (isinstance(obj, ModuleType)) and isinstance( + obj, (MethodType, FunctionType) + ): cvars = inspect.getclosurevars(obj) cvardict = {**copy.copy(cvars.globals), **copy.copy(cvars.nonlocals)} for i in list(cvardict): # NOTE : All module types objects are removed, because otherwise it throws ValueError: Circular reference detected if not. TODO if isinstance(cvardict[i], ModuleType): del cvardict[i] - return {"code": inspect.getsource(obj), "nonlocals": cvardict} + return self._check_iterable( + {"code": inspect.getsource(obj), "nonlocals": cvardict} + ) elif isinstance(obj, np.ndarray): - return list(obj) + if obj.size > 1000: + obj = np.resize(obj, (100, 100)) + return f"TRUNCATED ARRAY: {repr(obj)}" + # We return the repr and not a list to avoid the JsonEncoder to iterate over it. + return repr(obj) elif hasattr(obj, "__dict__"): temp = getattr(obj, "__dict__") - return self._encode_dict(temp) + # MappingProxy is not supported by the Json Encoder + if isinstance(temp, MappingProxyType): + return dict(temp) + return self._check_iterable(temp) elif isinstance(obj, np.uint8): return int(obj) - try: - return json.JSONEncoder.default(self, obj) - except TypeError: - # This is used when the user enters an unknown type in CONFIG. Rather than throwing an error, we transform - # it into a string "Unsupported type for hashing" so that it won't affect the hash. - return "Unsupported type for hashing" - - def _encode_dict(self, obj): - """Clean dicts to be serialized : As dict keys must be of the type (str, int, float, bool), we have to change them when they are not of the right type. - To do that, if one is not of the good type we turn it into its hash using the same - method as all the objects here. + + return f"Unsupported type for serializing -> {str(type(obj))}" + + def _handle_already_processed(self, obj): + """Handle if an object has been already processed by checking the id of the object. + + This prevents the mechanism to handle an object several times, and is used to prevent any circular reference. Parameters ---------- obj : Any - The obj to be cleaned. + The obj to check. Returns ------- Any - The object cleaned following the processus above. + "already_processed" string if it has been processed, otherwise obj. """ + global ALREADY_PROCESSED_ID + if id(obj) in ALREADY_PROCESSED_ID: + return "already_processed" + if not isinstance(obj, (str, int, bool, float)): + ALREADY_PROCESSED_ID[id(obj)] = obj + return obj - def key_to_hash(key): - if not isinstance(key, (str, int, float, bool)) and key is not None: - # print('called') - return zlib.crc32(json.dumps(key, cls=CustomEncoder).encode()) - return key + def _check_iterable(self, iterable): + """Check for circular reference at each iterable that will go through the JSONEncoder, as well as key of the wrong format. - if isinstance(obj, dict): - return {key_to_hash(k): self._encode_dict(v) for k, v in obj.items()} - return obj + If a key with a bad format is found (i.e not a int, string, or float), it gets replaced byt its hash using the same process implemented here. + If a circular reference is found within the iterable, it will be replaced by the string "already processed". + + Parameters + ---------- + iterable : Iterable[Any] + The iterable to check. + """ + + def _key_to_hash(key): + return zlib.crc32(json.dumps(key, cls=CustomEncoder).encode()) + + def _iter_check_list(lst): + # We have to make a copy, as we don't want to touch to the original list + # A deepcopy isn't necessary as it is already recursive. + lst_copy = copy.copy(lst) + for i, el in enumerate(lst): + if not isinstance(lst, tuple): + lst_copy[i] = self._handle_already_processed( + el + ) # ISSUE here, because of copy. + if isinstance(el, (list, tuple)): + lst_copy[i] = _iter_check_list(el) + elif isinstance(el, dict): + lst_copy[i] = _iter_check_dict(el) + return lst_copy + + def _iter_check_dict(dct): + # We have to make a copy, as we don't want to touch to the original dict + # A deepcopy isn't necessary as it is already recursive. + dct_copy = copy.copy(dct) + for k, v in dct.items(): + dct_copy[k] = self._handle_already_processed(v) + # We check if the k is of the right format (supporter by Json) + if not isinstance(k, (str, int, float, bool)) and k is not None: + k_new = _key_to_hash(k) + # We delete the value coupled with the old key, as the value is now coupled with the new key. + dct_copy[k_new] = dct_copy[k] + del dct_copy[k] + else: + k_new = k + if isinstance(v, dict): + dct_copy[k_new] = _iter_check_dict(v) + elif isinstance(v, (list, tuple)): + dct_copy[k_new] = _iter_check_list(v) + return dct_copy + + if isinstance(iterable, (list, tuple)): + return _iter_check_list(iterable) + elif isinstance(iterable, dict): + return _iter_check_dict(iterable) def encode(self, obj): - return super().encode(self._encode_dict(obj)) + """Overriding of :meth:`JSONEncoder.encode`, to make our own process. + + Parameters + ---------- + obj: Any + The object to encode in JSON. + + Returns + ------- + :class:`str` + The object encoder with the standard json process. + """ + # We need to mark as already processed the first object to go in the process, + # As after, only objects that come from iterables will be marked as such. + global ALREADY_PROCESSED_ID + ALREADY_PROCESSED_ID[id(obj)] = obj + if isinstance(obj, (dict, list, tuple)): + return super().encode(self._check_iterable(obj)) + return super().encode(obj) def get_json(obj): @@ -120,11 +198,16 @@ def get_camera_dict_for_hashing(camera_object): return camera_object_dict -def get_hash_from_play_call(camera_object, animations_list, current_mobjects_list): +def get_hash_from_play_call( + scene_object, camera_object, animations_list, current_mobjects_list +): """Take the list of animations and a list of mobjects and output their hashes. This is meant to be used for `scene.play` function. Parameters ----------- + scene_object : :class:`~.Scene` + The scene object. + camera_object : :class:`~.Camera` The camera object used in the scene. @@ -139,30 +222,44 @@ def get_hash_from_play_call(camera_object, animations_list, current_mobjects_lis :class:`str` A string concatenation of the respective hashes of `camera_object`, `animations_list` and `current_mobjects_list`, separated by `_`. """ + logger.debug("Hashing ...") + global ALREADY_PROCESSED_ID + # We add the scene object within the ALREADY_PROCESSED_ID, as we don't want to process because pretty much all of its attributes will be soon or later processed (in one of the three hashes). + ALREADY_PROCESSED_ID = {id(scene_object): scene_object} + t_start = perf_counter() camera_json = get_json(get_camera_dict_for_hashing(camera_object)) - animations_list_json = [ - get_json(x) for x in sorted(animations_list, key=lambda obj: str(obj)) - ] + animations_list_json = [get_json(x) for x in sorted(animations_list, key=str)] current_mobjects_list_json = [ - get_json(x) for x in sorted(current_mobjects_list, key=lambda obj: str(obj)) + get_json(x) for x in sorted(current_mobjects_list, key=str) ] hash_camera, hash_animations, hash_current_mobjects = [ zlib.crc32(repr(json_val).encode()) for json_val in [camera_json, animations_list_json, current_mobjects_list_json] ] + t_end = perf_counter() + logger.debug("Hashing done in {:.5f} s.".format(t_end - t_start)) + # This will reset ALREADY_PROCESSED_ID as all the hashing processus is finished. + ALREADY_PROCESSED_ID = {} return "{}_{}_{}".format(hash_camera, hash_animations, hash_current_mobjects) def get_hash_from_wait_call( - camera_object, wait_time, stop_condition_function, current_mobjects_list + scene_object, + camera_object, + wait_time, + stop_condition_function, + current_mobjects_list, ): """Take a wait time, a boolean function as a stop condition and a list of mobjects, and then output their individual hashes. This is meant to be used for `scene.wait` function. Parameters ----------- + scene_object : :class:`~.Scene` + The scene object. + camera_object : :class:`~.Camera` + The camera object. wait_time : :class:`float` The time to wait - stop_condition_function : Callable[[...], bool] Boolean function used as a stop_condition in `wait`. @@ -171,21 +268,32 @@ def get_hash_from_wait_call( :class:`str` A concatenation of the respective hashes of `animations_list and `current_mobjects_list`, separated by `_`. """ + logger.debug("Hashing ...") + t_start = perf_counter() + global ALREADY_PROCESSED_ID + # We add the scene object within the ALREADY_PROCESSED_ID, as we don't want to process because pretty much all of its attributes will be soon or later processed (in one of the three hashes). + ALREADY_PROCESSED_ID = {id(scene_object): scene_object} camera_json = get_json(get_camera_dict_for_hashing(camera_object)) current_mobjects_list_json = [ - get_json(x) for x in sorted(current_mobjects_list, key=lambda obj: str(obj)) + get_json(x) for x in sorted(current_mobjects_list, key=str) ] hash_current_mobjects = zlib.crc32(repr(current_mobjects_list_json).encode()) hash_camera = zlib.crc32(repr(camera_json).encode()) if stop_condition_function is not None: hash_function = zlib.crc32(get_json(stop_condition_function).encode()) + # This will reset ALREADY_PROCESSED_ID as all the hashing processus is finished. + ALREADY_PROCESSED_ID = {} + t_end = perf_counter() + logger.debug("Hashing done in {:.5f} s.".format(t_end - t_start)) return "{}_{}{}_{}".format( hash_camera, str(wait_time).replace(".", "-"), hash_function, hash_current_mobjects, ) - else: - return "{}_{}_{}".format( - hash_camera, str(wait_time).replace(".", "-"), hash_current_mobjects - ) + ALREADY_PROCESSED_ID = {} + t_end = perf_counter() + logger.debug("Hashing done in {:.5f} s.".format(t_end - t_start)) + return "{}_{}_{}".format( + hash_camera, str(wait_time).replace(".", "-"), hash_current_mobjects + ) diff --git a/tests/test_hashing.py b/tests/test_hashing.py new file mode 100644 index 0000000000..aa295415ee --- /dev/null +++ b/tests/test_hashing.py @@ -0,0 +1,110 @@ +import manim.utils.hashing as hashing + +import json + + +def test_JSON_basic(): + o = {"test": 1, 2: 4, 3: 2.0} + o_serialized = hashing.get_json(o) + assert isinstance(o_serialized, str) + assert o_serialized == str({"test": 1, "2": 4, "3": 2.0}).replace("'", '"') + + +def test_JSON_with_object(): + class Obj: + def __init__(self, a): + self.a = a + self.b = 3.0 + self.c = [1, 2, "test", ["nested list"]] + self.d = {2: 3, "2": "salut"} + + o = Obj(2) + o_serialized = hashing.get_json(o) + assert ( + str(o_serialized) + == '{"a": 2, "b": 3.0, "c": [1, 2, "test", ["nested list"]], "d": {"2": 3, "2": "salut"}}' + ) + + +def test_JSON_with_function(): + def test(uhu): + uhu += 2 + return uhu + + o_serialized = hashing.get_json(test) + dict_o = json.loads(o_serialized) + assert "code" in dict_o + assert "nonlocals" in dict_o + assert ( + str(o_serialized) + == r'{"code": " def test(uhu):\n uhu += 2\n return uhu\n", "nonlocals": {}}' + ) + + +def test_JSON_with_function_and_external_val(): + external = 2 + + def test(uhu): + uhu += external + return uhu + + o_ser = hashing.get_json(test) + external = 3 + o_ser2 = hashing.get_json(test) + assert json.loads(o_ser2)["nonlocals"] == {"external": 3} + assert o_ser != o_ser2 + + +def test_JSON_with_method(): + class A: + def __init__(self): + self.a = self.method + self.b = 3 + + def method(self, b): + b += 3 + return b + + o_ser = hashing.get_json(A()) + dict_o = json.loads(o_ser) + assert dict_o["a"]["nonlocals"] == {} + + +def test_JSON_with_wrong_keys(): + def test(): + return 3 + + class Test: + def __init__(self): + self.a = 2 + + a = {(1, 2): 3} + b = {Test(): 3} + c = {test: 3} + + for el in (a, b, c): + o_ser = hashing.get_json(el) + dict_o = json.loads(o_ser) + # check if this is an int (it meant that the lkey has been hashed) + assert int(list(dict_o.keys())[0]) + + +def test_JSON_with_circular_references(): + B = {1: 2} + + class A: + def __init__(self): + self.b = B + + B["circular_ref"] = A() + o_ser = hashing.get_json(B) + dict_o = json.loads(o_ser) + assert dict_o["circular_ref"]["b"]["circular_ref"] == "already_processed" + + +def test_JSON_with_big_np_array(): + import numpy as np + + a = np.zeros((1000, 1000)) + o_ser = hashing.get_json(a) + assert "TRUNCATED ARRAY" in o_ser diff --git a/tests/test_logging/expected.txt b/tests/test_logging/expected.txt index f0920138e3..1f56f982ea 100644 --- a/tests/test_logging/expected.txt +++ b/tests/test_logging/expected.txt @@ -1,7 +1,13 @@ DEBUG Read configuration files: config.py: -DEBUG Animation : Partial movie file written in scene_file_writer.py: -DEBUG Animation : Partial movie file written in scene_file_writer.py: -DEBUG Animation : Partial movie file written in scene_file_writer.py: +DEBUG Hashing ... hashing.py: +DEBUG Hashing done in . s. hashing.py: +INFO Animation : Partial movie file written in scene_file_writer.py: +DEBUG Hashing ... hashing.py: +DEBUG Hashing done in . s. hashing.py: +INFO Animation : Partial movie file written in scene_file_writer.py: +DEBUG Hashing ... hashing.py: +DEBUG Hashing done in . s. hashing.py: +INFO Animation : Partial movie file written in scene_file_writer.py: INFO scene_file_writer.py: File ready at