Skip to content

Commit 1b18e98

Browse files
authored
[Github] Improve formating of PR diffs in bot notifications (#66118)
* This avoid pinging folks on all issue when they got pinged on bugzilla eons ago * Avoid formatting bugs when there is html in the issue description * Truncate the list of files and the diff independently of each other. This avoids truncating cutting a file line in 2 and to cut in the middle of html markup. This is a fringe case but it does happen when people accidentally push weird branches conflicting on all the files.
1 parent 4e970d7 commit 1b18e98

File tree

1 file changed

+40
-16
lines changed

1 file changed

+40
-16
lines changed

llvm/utils/git/github-automation.py

Lines changed: 40 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,16 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]:
4747
return None
4848

4949

50+
def escape_description(str):
51+
# https://github.com/github/markup/issues/1168#issuecomment-494946168
52+
str = html.escape(str, False)
53+
# '@' followed by alphanum is a user name
54+
str = re.sub("@(?=\w+)", "@<!-- -->", str)
55+
# '#' followed by digits is considered an issue number
56+
str = re.sub("#(?=\d+\s)", "#<!-- -->", str)
57+
return str
58+
59+
5060
class IssueSubscriber:
5161
@property
5262
def team_name(self) -> str:
@@ -67,12 +77,15 @@ def run(self) -> bool:
6777
if team.slug == "issue-subscribers-good-first-issue":
6878
comment = "{}\n".format(beginner_comment)
6979

70-
comment = (
71-
f"@llvm/{team.slug}"
72-
+ "\n\n<details>\n"
73-
+ f"{self.issue.body}\n"
74-
+ "</details>"
75-
)
80+
body = escape_description(self.issue.body)
81+
82+
comment = f"""
83+
@llvm/{team.slug}
84+
85+
<details>
86+
{body}
87+
</details>
88+
"""
7689

7790
self.issue.create_comment(comment)
7891
return True
@@ -113,6 +126,11 @@ def run(self) -> bool:
113126
print(f"couldn't find team named {self.team_name}")
114127
return False
115128

129+
# GitHub limits comments to 65,536 characters, let's limit the diff
130+
# and the file list to 20kB each.
131+
STAT_LIMIT = 20 * 1024
132+
DIFF_LIMIT = 20 * 1024
133+
116134
# Get statistics for each file
117135
diff_stats = f"{self.pr.changed_files} Files Affected:\n\n"
118136
for file in self.pr.get_files():
@@ -125,35 +143,41 @@ def run(self) -> bool:
125143
if file.status == "renamed":
126144
print(f"(from {file.previous_filename})")
127145
diff_stats += "\n"
128-
diff_stats += "\n"
146+
if len(diff_stats) > STAT_LIMIT:
147+
break
129148

130149
# Get the diff
131150
try:
132-
patch = html.escape(requests.get(self.pr.diff_url).text)
151+
patch = requests.get(self.pr.diff_url).text
133152
except:
134153
patch = ""
135-
diff_stats += "\n<pre>\n" + html.escape(patch)
136154

137-
# GitHub limits comments to 65,536 characters, let's limit the diff to 20kB.
138-
DIFF_LIMIT = 20 * 1024
139155
patch_link = f"Full diff: {self.pr.diff_url}\n"
140156
if len(patch) > DIFF_LIMIT:
141157
patch_link = f"\nPatch is {human_readable_size(len(patch))}, truncated to {human_readable_size(DIFF_LIMIT)} below, full version: {self.pr.diff_url}\n"
142-
diff_stats = diff_stats[0:DIFF_LIMIT] + "...\n<truncated>\n"
143-
diff_stats += "</pre>"
158+
patch = patch[0:DIFF_LIMIT] + "...\n[truncated]\n"
144159
team_mention = "@llvm/{}".format(team.slug)
145160

146-
body = self.pr.body
161+
body = escape_description(self.pr.body)
162+
# Note: the comment is in markdown and the code below
163+
# is sensible to line break
147164
comment = f"""
148165
{self.COMMENT_TAG}
149166
{team_mention}
150-
167+
151168
<details>
152169
<summary>Changes</summary>
170+
153171
{body}
154-
--
172+
---
155173
{patch_link}
174+
156175
{diff_stats}
176+
177+
``````````diff
178+
{patch}
179+
``````````
180+
157181
</details>
158182
"""
159183

0 commit comments

Comments
 (0)