Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/regenerate_dashboard.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:

- name: "Regenerate the dashboard webpages"
run: |
python3 ./dashboard.py "all-open-PRs-1.json" "all-open-PRs-2.json" > ./index-old.html
python3 ./dashboard.py
rm *.json

- name: Commit and push changes
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/update_metadata.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ jobs:
- name: "Regenerate the dashboard webpages"
if: ${{ !cancelled() }} && (steps.download-json.outcome == 'success')
run: |
python3 ./dashboard.py "all-open-PRs-1.json" "all-open-PRs-2.json" > ./index-old.html
python3 ./dashboard.py
rm *.json

- name: Commit and push changes
Expand Down
2 changes: 1 addition & 1 deletion ARCHITECTURE.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ There are several levels at which this project can be tested. Currently, there a

TODO: there are more generated HTML files now; recommend copying the folder instead...
- changes to just `dashboard.py` can be tested using the JSON files in the `test` directory: run the following from the `test` directory.
`python3 ../dashboard.py all-open-PRs-1.json all-open-PRs-2.json`.
`python3 ../dashboard.py`.
This creates two webpages named `on_the_queue.html` and `index.html`, overwriting any previous files named thus.
You can run this command before and after your changes and compare the resulting files (using `diff` or a similar tool). Because of the overwriting, take care to copy e.g. the old version of the output to a different file before running the tool again.
(The output file needs to be in the top-level directory in order for the styling to work.)
Expand Down
26 changes: 6 additions & 20 deletions compute_dashboard_prs.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,9 @@ class AggregatePRInfo(NamedTuple):
first_on_queue: Tuple[DataStatus, datetime | None] | None
total_queue_time: TotalQueueTime | None

# Missing aggregate information will be replaced by this default item.
PLACEHOLDER_AGGREGATE_INFO = AggregatePRInfo(
False, CIStatus.Missing, "master", "leanprover-community", "open", datetime.now(timezone.utc),
"unknown", "unknown title", [], -1, -1, -1, [], [], None, None, None, None,
)
@staticmethod
def toBasicPRInformation(self, number: int) -> BasicPRInformation:
return BasicPRInformation(number, self.author, self.title, infer_pr_url(number), self.labels, self.last_updated)


# Parse the contents |data| of an aggregate json file into a dictionary pr number -> AggregatePRInfo.
Expand Down Expand Up @@ -195,7 +193,6 @@ def toLabel(name: str) -> Label:
# (`BasicPRInformation` is not hashable, hence cannot be used as a dictionary key.)
# 'aggregate_info' contains aggregate information about each PR's CI state,
# draft status and base branch (and more, which we do not use).
# If no detailed information was available for a given PR number, 'None' is returned.
def compute_pr_statusses(aggregate_info: dict[int, AggregatePRInfo], prs: List[BasicPRInformation]) -> dict[int, PRStatus]:
def determine_status(aggregate_info: AggregatePRInfo) -> PRStatus:
# Ignore all "other" labels, which are not relevant for this anyway.
Expand All @@ -204,7 +201,7 @@ def determine_status(aggregate_info: AggregatePRInfo) -> PRStatus:
state = PRState(labels, aggregate_info.CI_status, aggregate_info.is_draft, from_fork)
return determine_PR_status(datetime.now(timezone.utc), state)

return {info.number: determine_status(aggregate_info[info.number] or PLACEHOLDER_AGGREGATE_INFO) for info in prs}
return {info.number: determine_status(aggregate_info[info.number]) for info in prs}


# Does a PR have a given label?
Expand Down Expand Up @@ -391,16 +388,13 @@ def has_topic_label(pr: BasicPRInformation) -> bool:
return (with_bad_title, prs_without_topic_label, prs_with_contradictory_labels)


# use_aggregate_queue: if True, determine the review queue (and everything depending on it)
# from the aggregate data and not queue.json
def determine_pr_dashboards(
all_open_prs: List[BasicPRInformation],
nondraft_PRs: List[BasicPRInformation],
base_branch: dict[int, str],
prs_from_fork: List[BasicPRInformation],
CI_status: dict[int, CIStatus],
aggregate_info: dict[int, AggregatePRInfo],
use_aggregate_queue: bool,
aggregate_info: dict[int, AggregatePRInfo]
) -> dict[Dashboard, List[BasicPRInformation]]:
approved = [pr for pr in nondraft_PRs if aggregate_info[pr.number].approvals]
prs_to_list: dict[Dashboard, List[BasicPRInformation]] = dict()
Expand Down Expand Up @@ -445,15 +439,7 @@ def determine_pr_dashboards(
foo = [pr for pr in interesting_CI if base_branch[pr.number] == "master" and pr not in prs_from_fork]
prs_to_list[Dashboard.InessentialCIFails] = prs_without_any_label(foo, other_labels + ["merge-conflict"])

queue_prs2 = None
with open("queue.json", "r") as queuefile:
queue_prs2 = _extract_prs(json.load(queuefile))
queue_pr_numbers2 = [pr.number for pr in queue_prs2]
msg = "comparing this page's review dashboard (left) with the Github #queue (right)"
if my_assert_eq(msg, [pr.number for pr in queue_prs], queue_pr_numbers2):
print("Review dashboard and #queue match, hooray!", file=sys.stderr)

prs_to_list[Dashboard.Queue] = queue_prs if use_aggregate_queue else queue_prs2
prs_to_list[Dashboard.Queue] = queue_prs
queue = prs_to_list[Dashboard.Queue]
prs_to_list[Dashboard.QueueNewContributor] = prs_with_label(queue, "new-contributor")
prs_to_list[Dashboard.QueueEasy] = prs_with_label(queue, "easy")
Expand Down
44 changes: 11 additions & 33 deletions dashboard.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
#!/usr/bin/env python3

# This script accepts json files as command line arguments and displays the data in an HTML dashboard.
# It assumes that for each PR N which should appear in some dashboard,
# there is a file N.json in the `data` directory, which contains all necessary detailed information about that PR.
# This script uses the aggregate information in processed_data/all_open_prs.json
# to generate several HTML webpages on an HTML dashboard.

import json
import sys
Expand All @@ -16,7 +15,7 @@
from ci_status import CIStatus
from classify_pr_state import PRState, PRStatus
from compute_dashboard_prs import (AggregatePRInfo, BasicPRInformation, Label, DataStatus, LastStatusChange, TotalQueueTime,
PLACEHOLDER_AGGREGATE_INFO, compute_pr_statusses, determine_pr_dashboards, infer_pr_url, link_to, parse_aggregate_file, gather_pr_statistics, _extract_prs)
compute_pr_statusses, determine_pr_dashboards, infer_pr_url, link_to, parse_aggregate_file, gather_pr_statistics, _extract_prs)
from mathlib_dashboards import Dashboard, short_description, long_description, getIdTitle
from util import my_assert_eq, format_delta, timedelta_tryParse, relativedelta_tryParse

Expand All @@ -27,26 +26,14 @@
class JSONInputData(NamedTuple):
# All aggregate information stored for every open PR.
aggregate_info: dict[int, AggregatePRInfo]
# Information about all open PRs
all_open_prs: List[BasicPRInformation]


# Validate the command-line arguments and try to read all data passed in via JSON files.
# Any number of JSON files passed in is fine; we interpret them all as containing open PRs.
def read_json_files() -> JSONInputData:
if len(sys.argv) == 1:
print("error: need to pass in some JSON files with open PRs")
sys.exit(1)
all_open_prs = []
for i in range(1, len(sys.argv)):
with open(sys.argv[i]) as prfile:
open_prs = _extract_prs(json.load(prfile))
if len(open_prs) >= 900:
print(f"warning: file {sys.argv[i]} contains at least 900 PRs: the REST API will never return more than 1000 PRs. Please split the list into more files as necessary.", file=sys.stderr)
all_open_prs.extend(open_prs)
with open(path.join("processed_data", "open_pr_data.json"), "r") as f:
aggregate_info = parse_aggregate_file(json.load(f))
return JSONInputData(aggregate_info, all_open_prs)
return JSONInputData(aggregate_info)


### Helper methods: writing HTML code for various parts of the generated webpage ###
Expand Down Expand Up @@ -866,18 +853,11 @@ def main() -> None:
# Populate basic information from the input data: splitting into draft and non-draft PRs
# (mostly, we only use the latter); extract separate dictionaries for CI status and base branch.

# NB. We handle missing metadata by adding "default" values for its aggregate data
# (ready for review, open, against master, failing CI and just updated now).
aggregate_info = input_data.aggregate_info.copy()
for pr in input_data.all_open_prs:
if pr.number not in input_data.aggregate_info:
print(f"warning: found no aggregate information for PR {pr.number}; filling in defaults", file=sys.stderr)
aggregate_info[pr.number] = PLACEHOLDER_AGGREGATE_INFO
draft_PRs = [pr for pr in input_data.all_open_prs if aggregate_info[pr.number].is_draft]
nondraft_PRs = [pr for pr in input_data.all_open_prs if not aggregate_info[pr.number].is_draft]

# The only exception is for the "on the queue" page,
# which points out missing information explicitly, hence is passed the non-filled in data.
aggregate_info = input_data.aggregate_info
all_open_prs = [AggregatePRInfo.toBasicPRInformation(data, number) for (number, data) in aggregate_info.items()]
draft_PRs = [pr for pr in all_open_prs if aggregate_info[pr.number].is_draft]
nondraft_PRs = [pr for pr in all_open_prs if not aggregate_info[pr.number].is_draft]

CI_status: dict[int, CIStatus] = dict()
for pr in nondraft_PRs:
if pr.number in input_data.aggregate_info:
Expand All @@ -888,12 +868,10 @@ def main() -> None:
for pr in nondraft_PRs:
base_branch[pr.number] = aggregate_info[pr.number].base_branch
prs_from_fork = [pr for pr in nondraft_PRs if aggregate_info[pr.number].head_repo != "leanprover-community"]
all_pr_status = compute_pr_statusses(aggregate_info, input_data.all_open_prs)
all_pr_status = compute_pr_statusses(aggregate_info, all_open_prs)
write_on_the_queue_page(all_pr_status, aggregate_info, nondraft_PRs, prs_from_fork, CI_status, base_branch)

# TODO: try to enable |use_aggregate_queue| 'queue_prs' again, once all the root causes
# for PRs getting 'dropped' by 'gather_stats.sh' are found and fixed.
prs_to_list = determine_pr_dashboards(input_data.all_open_prs, nondraft_PRs, base_branch, prs_from_fork, CI_status, aggregate_info, False)
prs_to_list = determine_pr_dashboards(all_open_prs, nondraft_PRs, base_branch, prs_from_fork, CI_status, aggregate_info)

# FUTURE: can this time be displayed in the local time zone of the user viewing this page?
updated = datetime.now(timezone.utc).strftime("%B %d, %Y at %H:%M UTC")
Expand Down
2 changes: 1 addition & 1 deletion docs/Overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ This is not difficult, but requires changes in multiple stages.
1. Edit `process.py`, change the `get_aggregate_data` function to extract the data you want.
Pay attention to the fact that there is "basic" and "full" PR info; not all information might be available in both files.
2. Run `process.py` locally to update the generated file; commit the changes (except for the changed time-stamp). This step is optional; you can also just push the previous step and let CI perform this update.
3. Once step 2 is complete, edit the definition of `AggregatePRInfo` in `dashboard.py` to include your new data field. Update `PLACEHOLDER_AGGREGATE_INFO` as well. Update `read_json_files` to parse this field as well.
3. Once step 2 is complete, edit the definition of `AggregatePRInfo` in `dashboard.py` to include your new data field. Update `read_json_files` to parse this field as well.
4. Congratulations, now you have made some new metadata available to the dashboard processing. (For making use of this, see the previous bullet point for changing the dashboard.)


Expand Down
4 changes: 1 addition & 3 deletions generate_assigment_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,10 @@

# Assumes the aggregate data is correct: no cross-filling in of placeholder data.
def compute_pr_list_from_aggregate_data_only(aggregate_data: dict[int, AggregatePRInfo]) -> dict[Dashboard, List[BasicPRInformation]]:
all_open_prs: List[BasicPRInformation] = []
nondraft_PRs: List[BasicPRInformation] = []
for number, data in aggregate_data.items():
if data.state == "open":
info = BasicPRInformation(number, data.author, data.title, infer_pr_url(number), data.labels, data.last_updated)
all_open_prs.append(info)
if not data.is_draft:
nondraft_PRs.append(info)
CI_status: dict[int, CIStatus] = dict()
Expand All @@ -54,7 +52,7 @@ def compute_pr_list_from_aggregate_data_only(aggregate_data: dict[int, Aggregate
for pr in nondraft_PRs:
base_branch[pr.number] = aggregate_data[pr.number].base_branch
prs_from_fork = [pr for pr in nondraft_PRs if aggregate_data[pr.number].head_repo != "leanprover-community"]
return determine_pr_dashboards(all_open_prs, nondraft_PRs, base_branch, prs_from_fork, CI_status, aggregate_data, True)
return determine_pr_dashboards(all_open_prs, nondraft_PRs, base_branch, prs_from_fork, CI_status, aggregate_data)


class ReviewerInfo(NamedTuple):
Expand Down