Skip to content

Commit c04e4a0

Browse files
authored
Merge pull request #30 from InfuseAI/feature/upgrade-dbt-16
[Feature] Bump to dbt 1.6 and support new metric
2 parents fd5aa1a + 1f75528 commit c04e4a0

18 files changed

+74
-247
lines changed

.piperider/config.yml

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,6 @@
11
dataSources: []
22
dbt:
33
projectDir: .
4-
tag: 'piperider'
5-
6-
profiler:
7-
table:
8-
# the maximum row count to profile. (Default unlimited)
9-
# limit: 1000000
10-
duplicateRows: true
114

125
telemetry:
136
id: f3373c578173414fb4af8574d1a9725f

Makefile

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,10 @@
11
all: fetch load transform report piperider
22

3-
fetch_gh:
4-
@python -m git_repo_analytics.fetch_github_api
5-
@echo
6-
73
clone_repos:
84
@python -m git_repo_analytics.clone_repos
95
@echo
106

11-
fetch: clone_repos fetch_gh
7+
fetch: clone_repos
128

139
load:
1410
@python -m git_repo_analytics.load
@@ -28,6 +24,5 @@ piperider:
2824
@piperider run -o data/piperider --open
2925
@echo
3026

31-
3227
clean:
33-
@rm -rf data
28+
@rm -rf data

README.md

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,7 @@ This is a demo project for [PipeRider](https://github.com/InfuseAI/piperider). I
2929
make load
3030
```
3131
32-
The file `git_repo.duckdb` is generated
33-
34-
> Note: If rate limit exceeded, you can get a higher rate limit with authenticated requests to get a higher rate limit. <br/> You could set the `GITHUB_TOKEN` environment variable with your token value. <br/> `export GITHUB_TOKEN=XXXX`<br/> Check out the [GitHub documentation](https://docs.github.com/rest/overview/resources-in-the-rest-api#rate-limiting) for more details.
32+
The file `git_repo.duckdb` is generated under `./data`
3533
3634
1. Run dbt
3735
```
@@ -53,5 +51,4 @@ make
5351
5452
# Screenshots
5553
56-
![](assets/screenshot3.png)
57-
![](assets/screenshot4.png)
54+
![](assets/screenshot5.png)

assets/screenshot5.png

423 KB
Loading

dbt_project.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,6 @@ models:
3131
git_repo_analytics:
3232
staging:
3333
+tags: [piperider]
34-
intermediate:
35-
+materialized: table
36-
+tags: [piperider]
3734
marts:
3835
+materialized: table
3936
+tags: [piperider]

git_repo_analytics/fetch_github_api.py

Lines changed: 0 additions & 71 deletions
This file was deleted.

git_repo_analytics/gen_report.py

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,39 +16,28 @@ def generate_table():
1616
{'selector': 'td',
1717
'props': [('padding', '5px')]}]
1818

19-
df = conn.query('SELECT * FROM repos').fetchdf()
19+
df = conn.query(
20+
'SELECT count(distinct(author)) as contributors, count(distinct(hash)) as commits '
21+
'FROM stg_commits '
22+
'GROUP BY repo'
23+
).fetchdf()
2024
df = df.fillna('')
2125
df = df.replace({pd.NaT: None})
2226
styled_table = df.style.set_table_styles(css)
2327

24-
styled_table = styled_table.format({'repo': lambda url: f'<a href="https://github.com/{url}">{url}</a>',
25-
'homepage': lambda url: f'<a href="{url}">{url}</a>'})
28+
styled_table = styled_table.format({'repo': lambda url: f'<a href="https://github.com/{url}">{url}</a>'})
2629
html_table = styled_table.to_html(index=False)
2730
return html_table
2831

2932

30-
def generate_chart(path):
31-
with duckdb.connect(database='data/git_repo.duckdb') as conn:
32-
df = conn.query('SELECT * FROM commit_weekly').fetchdf()
33-
fig, ax = plt.subplots()
34-
35-
for key, grp in df.groupby('repo'):
36-
ax = grp.plot(ax=ax, kind='line', x='date_week', y='total_commits', label=key)
37-
plt.legend(loc='best')
38-
plt.savefig(path)
39-
40-
4133
def generate():
4234
html_table = generate_table()
4335
os.makedirs('data/report', exist_ok=True)
44-
generate_chart('data/report/weekly_commits.png')
4536
html_path = 'data/report/index.html'
4637
with open(html_path, 'w') as f:
4738
f.write('<html><body>')
4839
f.write('<h2>Git repo analytics</h2>')
4940
f.write(html_table)
50-
f.write('<h3>Weekly commits</h3>')
51-
f.write("<img src='weekly_commits.png'>")
5241
f.write('</body></html>')
5342

5443
abs_html_path = os.path.abspath(html_path)

git_repo_analytics/load.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -69,23 +69,6 @@ def commit(c):
6969
conn.executemany("INSERT INTO raw_commits VALUES(?, ?, ?, ?, ?, ?, ?)", map(commit, commits))
7070

7171

72-
def load_github(conn):
73-
print("Loading github data...")
74-
75-
conn.execute(
76-
'''
77-
create table raw_repos as
78-
select * from 'data/gh/*/repo.csv';
79-
'''
80-
)
81-
conn.execute(
82-
'''
83-
create table raw_contributers as
84-
select * from 'data/gh/*/contributers.csv';
85-
'''
86-
)
87-
88-
8972
if __name__ == '__main__':
9073
fname = 'data/git_repo.duckdb'
9174

@@ -94,4 +77,3 @@ def load_github(conn):
9477

9578
with duckdb.connect(database=fname) as conn:
9679
load_repos(conn)
97-
load_github(conn)

models/marts/commit_weekly.sql

Lines changed: 0 additions & 9 deletions
This file was deleted.

models/marts/metrics.yml

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,45 @@
11
version: 2
22

3+
semantic_models:
4+
- name: commits
5+
model: ref('stg_commits')
6+
description: commit information from git repo's raw commit
7+
defaults:
8+
agg_time_dimension: datetime
9+
10+
entities:
11+
- name: repo
12+
type: primary
13+
14+
dimensions:
15+
- name: datetime
16+
type: time
17+
type_params:
18+
time_granularity: day
19+
20+
measures:
21+
- name: total_commits
22+
description: "The total number of commits in the repo"
23+
agg: count_distinct
24+
expr: hash
25+
26+
- name: active_authors
27+
description: "The total number of active authors in the repo"
28+
agg: count_distinct
29+
expr: author
30+
331
metrics:
432

533
- name: total_commits
34+
description: "The total number of commits in the repo"
35+
type: simple
636
label: Total commits
7-
model: ref('stg_commits')
8-
9-
calculation_method: count_distinct
10-
expression: hash
11-
12-
timestamp: datetime
13-
time_grains: [ day, week, month, quarter, year, all_time ]
14-
dimensions: [ repo ]
15-
16-
tags:
17-
- piperider
37+
type_params:
38+
measure: total_commits
1839

1940
- name: active_authors
41+
description: "The total number of active authors in the repo"
42+
type: simple
2043
label: Active authors
21-
model: ref('stg_commits')
22-
23-
calculation_method: count_distinct
24-
expression: author
25-
26-
timestamp: datetime
27-
time_grains: [ day, week, month ]
28-
dimensions: [ repo ]
29-
30-
tags:
31-
- piperider
44+
type_params:
45+
measure: active_authors

0 commit comments

Comments
 (0)