Skip to content

Commit aa8ea5d

Browse files
committed
test
1 parent 2f4afae commit aa8ea5d

File tree

3 files changed

+28
-6
lines changed

3 files changed

+28
-6
lines changed

examples/apps/flux_demo.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,14 +135,16 @@ def forward_loop(mod):
135135
pipe.transformer = trt_gm
136136
seed = 42
137137
image = pipe(
138-
["Beach and Kids"],
138+
[
139+
"enchanted winter forest, soft diffuse light on a snow-filled day, serene nature scene, the forest is illuminated by the snow"
140+
],
139141
output_type="pil",
140-
num_inference_steps=20,
142+
num_inference_steps=30,
141143
num_images_per_prompt=batch_size,
142144
generator=torch.Generator("cuda").manual_seed(seed),
143145
).images
144146
print(f"generated {len(image)} images")
145-
image[0].save("beach_kids.png")
147+
image[0].save("forest.png")
146148

147149
torch.cuda.empty_cache()
148150

py/torch_tensorrt/dynamo/lowering/passes/constant_folding.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,11 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
106106
import modelopt.torch.quantization as mtq
107107

108108
assert torch.ops.tensorrt.quantize_op.default
109+
assert torch.ops.tensorrt.dynamic_block_quantize_op.default
109110
self.quantization_ops.add(torch.ops.tensorrt.quantize_op.default)
111+
self.quantization_ops.add(
112+
torch.ops.tensorrt.dynamic_block_quantize_op.default
113+
)
110114
except Exception as e:
111115
pass
112116

tools/perf/Flux/flux_perf.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,24 @@
99
from flux_demo import compile_model
1010

1111

12+
def profile(pipe, prompt, inference_step, batch_size=1):
13+
print(f"Running torch profiler with {inference_step=} {batch_size=}")
14+
with torch.profiler.profile(
15+
activities=[torch.profiler.ProfilerActivity.CUDA],
16+
record_shapes=True,
17+
profile_memory=True,
18+
with_stack=True,
19+
) as prof:
20+
with torch.profiler.record_function("model_inference"):
21+
pipe(
22+
prompt,
23+
output_type="pil",
24+
num_inference_steps=inference_step,
25+
num_images_per_prompt=batch_size,
26+
).images
27+
print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=100))
28+
29+
1230
def benchmark(pipe, prompt, inference_step, batch_size=1, iterations=1):
1331
print(f"Running warmup with {batch_size=} {inference_step=} iterations=10")
1432
# warmup
@@ -41,9 +59,6 @@ def benchmark(pipe, prompt, inference_step, batch_size=1, iterations=1):
4159
"Average Latency Per Step:",
4260
(end - start) / inference_step / iterations / batch_size,
4361
)
44-
45-
# run the perf tool
46-
print(f"Running cudart perf tool with {inference_step=} {batch_size=}")
4762
return
4863

4964

@@ -52,6 +67,7 @@ def main(args):
5267
pipe, backbone, trt_gm = compile_model(args)
5368

5469
benchmark(pipe, ["Test"], 20, batch_size=args.max_batch_size, iterations=3)
70+
# profile(pipe, ["enchanted winter forest, soft diffuse light on a snow-filled day, serene nature scene, the forest is illuminated by the snow"], 20, batch_size=args.max_batch_size)
5571

5672

5773
if __name__ == "__main__":

0 commit comments

Comments
 (0)