Skip to content

Commit 5dd7bfe

Browse files
huguesdevimeuxAathish04Hugues DevimeuxleotrsPgBiel
authored
Miscellaneous Scene-Caching Optimisations and Bugfixes. (#315)
* fix 301/scene-caching optimization * added nested dict with cirular references support + some docs * optimization: now it ignores the scene object * fix #320 * modified verbosity level * added copy error handling * fixed test_logging (maybe) * captain black * Various improvemens and optimizations, Mostly about copys handling. * yes sir I run black I swear sir * ahem, typo * (hopefully) fixed logging test * Suggestion from the great @aathis * Disable Scene Caching if `-` is filename. Fix #348 * Import logger from proper place. * Update expected log file * added mappingproxy support * fixed bug related when keys of the wrong format, Now manim do really change the key to its hash. (wasn't really the case before) * added large np array handling * added message when using truncated array * smolfix * added unit test for hashing.py * Fix a typo. Co-authored-by: Leo Torres <[email protected]> * Assign suggestions from @PgBiel * suggestion from @leotrs * Apply suggestions from code review Co-authored-by: Pg Biel <[email protected]> * fixed tests * imrpoved code organization * NO COLON NO COLON NO NO COLON NO LOCON NO COLON Co-authored-by: Pg Biel <[email protected]> * NO COLON NO COLON NO NO COLON NO LOCON NO COLON Co-authored-by: Pg Biel <[email protected]> Co-authored-by: Aathish Sivasubrahmanian <[email protected]> Co-authored-by: Hugues Devimeux <[email protected]> Co-authored-by: Leo Torres <[email protected]> Co-authored-by: Pg Biel <[email protected]>
1 parent 9824463 commit 5dd7bfe

File tree

6 files changed

+274
-47
lines changed

6 files changed

+274
-47
lines changed

manim/__main__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,16 @@ def get_scene_classes_from_module(module):
111111

112112
def get_module(file_name):
113113
if file_name == "-":
114+
# Since this feature is used for rapid testing, using Scene Caching would be a
115+
# hindrance in this case.
116+
file_writer_config["disable_caching"] = True
114117
module = types.ModuleType("input_scenes")
115118
logger.info(
116119
"Enter the animation's code & end with an EOF (CTRL+D on Linux/Unix, CTRL+Z on Windows):"
117120
)
118121
code = sys.stdin.read()
119122
if not code.startswith("from manim import"):
120-
logger.warn(
123+
logger.warning(
121124
"Didn't find an import statement for Manim. Importing automatically..."
122125
)
123126
code = "from manim import *\n" + code

manim/scene/scene.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -805,7 +805,7 @@ def wrapper(self, *args, **kwargs):
805805
if not file_writer_config["disable_caching"]:
806806
mobjects_on_scene = self.get_mobjects()
807807
hash_play = get_hash_from_play_call(
808-
self.camera, animations, mobjects_on_scene
808+
self, self.camera, animations, mobjects_on_scene
809809
)
810810
self.play_hashes_list.append(hash_play)
811811
if self.file_writer.is_already_cached(hash_play):
@@ -836,7 +836,7 @@ def wrapper(self, duration=DEFAULT_WAIT_TIME, stop_condition=None):
836836
self.revert_to_original_skipping_status()
837837
if not file_writer_config["disable_caching"]:
838838
hash_wait = get_hash_from_wait_call(
839-
self.camera, duration, stop_condition, self.get_mobjects()
839+
self, self.camera, duration, stop_condition, self.get_mobjects()
840840
)
841841
self.play_hashes_list.append(hash_wait)
842842
if self.file_writer.is_already_cached(hash_wait):

manim/scene/scene_file_writer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ def close_movie_pipe(self):
446446
shutil.move(
447447
self.temp_partial_movie_file_path, self.partial_movie_file_path,
448448
)
449-
logger.debug(
449+
logger.info(
450450
f"Animation {self.scene.num_plays} : Partial movie file written in {self.partial_movie_file_path}"
451451
)
452452

manim/utils/hashing.py

Lines changed: 148 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22
import zlib
33
import inspect
44
import copy
5-
import dis
65
import numpy as np
7-
from types import ModuleType
6+
from types import ModuleType, MappingProxyType, FunctionType, MethodType
7+
from time import perf_counter
88

99
from .. import logger
1010

11+
ALREADY_PROCESSED_ID = {}
12+
1113

1214
class CustomEncoder(json.JSONEncoder):
1315
def default(self, obj):
@@ -30,56 +32,132 @@ def default(self, obj):
3032
Python object that JSON encoder will recognize
3133
3234
"""
33-
if inspect.isfunction(obj) and not isinstance(obj, ModuleType):
35+
if not (isinstance(obj, ModuleType)) and isinstance(
36+
obj, (MethodType, FunctionType)
37+
):
3438
cvars = inspect.getclosurevars(obj)
3539
cvardict = {**copy.copy(cvars.globals), **copy.copy(cvars.nonlocals)}
3640
for i in list(cvardict):
3741
# NOTE : All module types objects are removed, because otherwise it throws ValueError: Circular reference detected if not. TODO
3842
if isinstance(cvardict[i], ModuleType):
3943
del cvardict[i]
40-
return {"code": inspect.getsource(obj), "nonlocals": cvardict}
44+
return self._check_iterable(
45+
{"code": inspect.getsource(obj), "nonlocals": cvardict}
46+
)
4147
elif isinstance(obj, np.ndarray):
42-
return list(obj)
48+
if obj.size > 1000:
49+
obj = np.resize(obj, (100, 100))
50+
return f"TRUNCATED ARRAY: {repr(obj)}"
51+
# We return the repr and not a list to avoid the JsonEncoder to iterate over it.
52+
return repr(obj)
4353
elif hasattr(obj, "__dict__"):
4454
temp = getattr(obj, "__dict__")
45-
return self._encode_dict(temp)
55+
# MappingProxy is not supported by the Json Encoder
56+
if isinstance(temp, MappingProxyType):
57+
return dict(temp)
58+
return self._check_iterable(temp)
4659
elif isinstance(obj, np.uint8):
4760
return int(obj)
48-
try:
49-
return json.JSONEncoder.default(self, obj)
50-
except TypeError:
51-
# This is used when the user enters an unknown type in CONFIG. Rather than throwing an error, we transform
52-
# it into a string "Unsupported type for hashing" so that it won't affect the hash.
53-
return "Unsupported type for hashing"
54-
55-
def _encode_dict(self, obj):
56-
"""Clean dicts to be serialized : As dict keys must be of the type (str, int, float, bool), we have to change them when they are not of the right type.
57-
To do that, if one is not of the good type we turn it into its hash using the same
58-
method as all the objects here.
61+
62+
return f"Unsupported type for serializing -> {str(type(obj))}"
63+
64+
def _handle_already_processed(self, obj):
65+
"""Handle if an object has been already processed by checking the id of the object.
66+
67+
This prevents the mechanism to handle an object several times, and is used to prevent any circular reference.
5968
6069
Parameters
6170
----------
6271
obj : Any
63-
The obj to be cleaned.
72+
The obj to check.
6473
6574
Returns
6675
-------
6776
Any
68-
The object cleaned following the processus above.
77+
"already_processed" string if it has been processed, otherwise obj.
6978
"""
79+
global ALREADY_PROCESSED_ID
80+
if id(obj) in ALREADY_PROCESSED_ID:
81+
return "already_processed"
82+
if not isinstance(obj, (str, int, bool, float)):
83+
ALREADY_PROCESSED_ID[id(obj)] = obj
84+
return obj
7085

71-
def key_to_hash(key):
72-
if not isinstance(key, (str, int, float, bool)) and key is not None:
73-
# print('called')
74-
return zlib.crc32(json.dumps(key, cls=CustomEncoder).encode())
75-
return key
86+
def _check_iterable(self, iterable):
87+
"""Check for circular reference at each iterable that will go through the JSONEncoder, as well as key of the wrong format.
7688
77-
if isinstance(obj, dict):
78-
return {key_to_hash(k): self._encode_dict(v) for k, v in obj.items()}
79-
return obj
89+
If a key with a bad format is found (i.e not a int, string, or float), it gets replaced byt its hash using the same process implemented here.
90+
If a circular reference is found within the iterable, it will be replaced by the string "already processed".
91+
92+
Parameters
93+
----------
94+
iterable : Iterable[Any]
95+
The iterable to check.
96+
"""
97+
98+
def _key_to_hash(key):
99+
return zlib.crc32(json.dumps(key, cls=CustomEncoder).encode())
100+
101+
def _iter_check_list(lst):
102+
# We have to make a copy, as we don't want to touch to the original list
103+
# A deepcopy isn't necessary as it is already recursive.
104+
lst_copy = copy.copy(lst)
105+
for i, el in enumerate(lst):
106+
if not isinstance(lst, tuple):
107+
lst_copy[i] = self._handle_already_processed(
108+
el
109+
) # ISSUE here, because of copy.
110+
if isinstance(el, (list, tuple)):
111+
lst_copy[i] = _iter_check_list(el)
112+
elif isinstance(el, dict):
113+
lst_copy[i] = _iter_check_dict(el)
114+
return lst_copy
115+
116+
def _iter_check_dict(dct):
117+
# We have to make a copy, as we don't want to touch to the original dict
118+
# A deepcopy isn't necessary as it is already recursive.
119+
dct_copy = copy.copy(dct)
120+
for k, v in dct.items():
121+
dct_copy[k] = self._handle_already_processed(v)
122+
# We check if the k is of the right format (supporter by Json)
123+
if not isinstance(k, (str, int, float, bool)) and k is not None:
124+
k_new = _key_to_hash(k)
125+
# We delete the value coupled with the old key, as the value is now coupled with the new key.
126+
dct_copy[k_new] = dct_copy[k]
127+
del dct_copy[k]
128+
else:
129+
k_new = k
130+
if isinstance(v, dict):
131+
dct_copy[k_new] = _iter_check_dict(v)
132+
elif isinstance(v, (list, tuple)):
133+
dct_copy[k_new] = _iter_check_list(v)
134+
return dct_copy
135+
136+
if isinstance(iterable, (list, tuple)):
137+
return _iter_check_list(iterable)
138+
elif isinstance(iterable, dict):
139+
return _iter_check_dict(iterable)
80140

81141
def encode(self, obj):
82-
return super().encode(self._encode_dict(obj))
142+
"""Overriding of :meth:`JSONEncoder.encode`, to make our own process.
143+
144+
Parameters
145+
----------
146+
obj: Any
147+
The object to encode in JSON.
148+
149+
Returns
150+
-------
151+
:class:`str`
152+
The object encoder with the standard json process.
153+
"""
154+
# We need to mark as already processed the first object to go in the process,
155+
# As after, only objects that come from iterables will be marked as such.
156+
global ALREADY_PROCESSED_ID
157+
ALREADY_PROCESSED_ID[id(obj)] = obj
158+
if isinstance(obj, (dict, list, tuple)):
159+
return super().encode(self._check_iterable(obj))
160+
return super().encode(obj)
83161

84162

85163
def get_json(obj):
@@ -120,11 +198,16 @@ def get_camera_dict_for_hashing(camera_object):
120198
return camera_object_dict
121199

122200

123-
def get_hash_from_play_call(camera_object, animations_list, current_mobjects_list):
201+
def get_hash_from_play_call(
202+
scene_object, camera_object, animations_list, current_mobjects_list
203+
):
124204
"""Take the list of animations and a list of mobjects and output their hashes. This is meant to be used for `scene.play` function.
125205
126206
Parameters
127207
-----------
208+
scene_object : :class:`~.Scene`
209+
The scene object.
210+
128211
camera_object : :class:`~.Camera`
129212
The camera object used in the scene.
130213
@@ -139,30 +222,44 @@ def get_hash_from_play_call(camera_object, animations_list, current_mobjects_lis
139222
:class:`str`
140223
A string concatenation of the respective hashes of `camera_object`, `animations_list` and `current_mobjects_list`, separated by `_`.
141224
"""
225+
logger.debug("Hashing ...")
226+
global ALREADY_PROCESSED_ID
227+
# We add the scene object within the ALREADY_PROCESSED_ID, as we don't want to process because pretty much all of its attributes will be soon or later processed (in one of the three hashes).
228+
ALREADY_PROCESSED_ID = {id(scene_object): scene_object}
229+
t_start = perf_counter()
142230
camera_json = get_json(get_camera_dict_for_hashing(camera_object))
143-
animations_list_json = [
144-
get_json(x) for x in sorted(animations_list, key=lambda obj: str(obj))
145-
]
231+
animations_list_json = [get_json(x) for x in sorted(animations_list, key=str)]
146232
current_mobjects_list_json = [
147-
get_json(x) for x in sorted(current_mobjects_list, key=lambda obj: str(obj))
233+
get_json(x) for x in sorted(current_mobjects_list, key=str)
148234
]
149235
hash_camera, hash_animations, hash_current_mobjects = [
150236
zlib.crc32(repr(json_val).encode())
151237
for json_val in [camera_json, animations_list_json, current_mobjects_list_json]
152238
]
239+
t_end = perf_counter()
240+
logger.debug("Hashing done in {:.5f} s.".format(t_end - t_start))
241+
# This will reset ALREADY_PROCESSED_ID as all the hashing processus is finished.
242+
ALREADY_PROCESSED_ID = {}
153243
return "{}_{}_{}".format(hash_camera, hash_animations, hash_current_mobjects)
154244

155245

156246
def get_hash_from_wait_call(
157-
camera_object, wait_time, stop_condition_function, current_mobjects_list
247+
scene_object,
248+
camera_object,
249+
wait_time,
250+
stop_condition_function,
251+
current_mobjects_list,
158252
):
159253
"""Take a wait time, a boolean function as a stop condition and a list of mobjects, and then output their individual hashes. This is meant to be used for `scene.wait` function.
160254
161255
Parameters
162256
-----------
257+
scene_object : :class:`~.Scene`
258+
The scene object.
259+
camera_object : :class:`~.Camera`
260+
The camera object.
163261
wait_time : :class:`float`
164262
The time to wait
165-
166263
stop_condition_function : Callable[[...], bool]
167264
Boolean function used as a stop_condition in `wait`.
168265
@@ -171,21 +268,32 @@ def get_hash_from_wait_call(
171268
:class:`str`
172269
A concatenation of the respective hashes of `animations_list and `current_mobjects_list`, separated by `_`.
173270
"""
271+
logger.debug("Hashing ...")
272+
t_start = perf_counter()
273+
global ALREADY_PROCESSED_ID
274+
# We add the scene object within the ALREADY_PROCESSED_ID, as we don't want to process because pretty much all of its attributes will be soon or later processed (in one of the three hashes).
275+
ALREADY_PROCESSED_ID = {id(scene_object): scene_object}
174276
camera_json = get_json(get_camera_dict_for_hashing(camera_object))
175277
current_mobjects_list_json = [
176-
get_json(x) for x in sorted(current_mobjects_list, key=lambda obj: str(obj))
278+
get_json(x) for x in sorted(current_mobjects_list, key=str)
177279
]
178280
hash_current_mobjects = zlib.crc32(repr(current_mobjects_list_json).encode())
179281
hash_camera = zlib.crc32(repr(camera_json).encode())
180282
if stop_condition_function is not None:
181283
hash_function = zlib.crc32(get_json(stop_condition_function).encode())
284+
# This will reset ALREADY_PROCESSED_ID as all the hashing processus is finished.
285+
ALREADY_PROCESSED_ID = {}
286+
t_end = perf_counter()
287+
logger.debug("Hashing done in {:.5f} s.".format(t_end - t_start))
182288
return "{}_{}{}_{}".format(
183289
hash_camera,
184290
str(wait_time).replace(".", "-"),
185291
hash_function,
186292
hash_current_mobjects,
187293
)
188-
else:
189-
return "{}_{}_{}".format(
190-
hash_camera, str(wait_time).replace(".", "-"), hash_current_mobjects
191-
)
294+
ALREADY_PROCESSED_ID = {}
295+
t_end = perf_counter()
296+
logger.debug("Hashing done in {:.5f} s.".format(t_end - t_start))
297+
return "{}_{}_{}".format(
298+
hash_camera, str(wait_time).replace(".", "-"), hash_current_mobjects
299+
)

0 commit comments

Comments
 (0)