Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 61380f6

Browse files
committed
dev: add benchmark and graph scripts
1 parent 43a411e commit 61380f6

File tree

2 files changed

+68
-0
lines changed

2 files changed

+68
-0
lines changed

dev/benchmark.sh

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#!/bin/bash
2+
3+
run_test() {
4+
N_SAMPLES=${N_SAMPLES:-1000000} N_THREADS=${N_THREADS:-16} LOG_LEVEL=${LOG_LEVEL:-info} BENCHMARK=1 \
5+
poetry run python3 -m unittest tests/test_database_types.py -v -k $1
6+
}
7+
8+
run_test "postgresql_int_mysql_int"
9+
run_test "mysql_int_mysql_int"
10+
run_test "postgresql_int_postgresql_int"
11+
run_test "postgresql_ts6_n_tz_mysql_ts0"
12+
run_test "postgresql_ts6_n_tz_snowflake_ts9"
13+
run_test "postgresql_int_presto_int"
14+
run_test "postgresql_int_redshift_int"
15+
run_test "postgresql_int_snowflake_int"
16+
# run_test "postgresql_int_bigquery_int"
17+
run_test "snowflake_int_snowflake_int"
18+
19+
poetry run python dev/graph.py

dev/graph.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import pandas as pd
2+
import plotly.graph_objects as go
3+
from data_diff.utils import number_to_human
4+
import glob
5+
6+
for benchmark_file in glob.glob("benchmark_*.jsonl"):
7+
rows = pd.read_json(benchmark_file, lines=True)
8+
rows["cloud"] = rows["test"].str.match(r".*(snowflake|redshift|presto|bigquery)")
9+
sha = benchmark_file.split("_")[1].split(".")[0]
10+
print(f"Generating graphs from {benchmark_file}..")
11+
12+
for n_rows, group in rows.groupby(["rows"]):
13+
image_path = f"benchmark_{sha}_{number_to_human(n_rows)}.png"
14+
print(f"\t rows: {number_to_human(n_rows)}, image: {image_path}")
15+
16+
r = group.drop_duplicates(subset=["name_human"])
17+
r = r.sort_values(by=["cloud", "source_type", "target_type", "name_human"])
18+
19+
fig = go.Figure(
20+
data=[
21+
go.Bar(
22+
name="count(*)",
23+
x=r["name_human"],
24+
y=r["count_max_sec"],
25+
text=r["count_max_sec"],
26+
textfont=dict(color="blue"),
27+
),
28+
go.Bar(
29+
name="data-diff (checksum)",
30+
x=r["name_human"],
31+
y=r["checksum_sec"],
32+
text=r["checksum_sec"],
33+
textfont=dict(color="red"),
34+
),
35+
go.Bar(
36+
name="Download and compare †",
37+
x=r["name_human"],
38+
y=r["download_sec"],
39+
text=r["download_sec"],
40+
textfont=dict(color="green"),
41+
),
42+
]
43+
)
44+
# Change the bar mode
45+
fig.update_layout(title=f"data-diff {number_to_human(n_rows)} rows, sha: {sha}")
46+
fig.update_traces(texttemplate="%{text:.1f}", textposition="outside")
47+
fig.update_layout(uniformtext_minsize=2, uniformtext_mode="hide")
48+
fig.update_yaxes(title="Time")
49+
fig.write_image(image_path, scale=2)

0 commit comments

Comments
 (0)