From bc345e870abde717cc637c96e50d9bf7ceda87c7 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 1 Oct 2025 11:53:53 +0000 Subject: [PATCH 1/3] Update gemini_cookbook.py with checks from google.py --- .../tools/nblint/style/gemini_cookbook.py | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/tools/tensorflow_docs/tools/nblint/style/gemini_cookbook.py b/tools/tensorflow_docs/tools/nblint/style/gemini_cookbook.py index b34c8193c1..e6ce9ced8d 100644 --- a/tools/tensorflow_docs/tools/nblint/style/gemini_cookbook.py +++ b/tools/tensorflow_docs/tools/nblint/style/gemini_cookbook.py @@ -43,6 +43,27 @@ from tensorflow_docs.tools.nblint.decorator import Options +def search_wordlist(wordlist, src_str): + """Search for wordlist entries in text and return set of found items. + + Args: + wordlist: Dict of word entries and recommendations to search in string. + src_str: String to search for word entries. + + Returns: + A dict that is a subset of entries from `wordlist` found in `src_str`. + """ + found_words = {} + for word in wordlist: + # Word-boundary and ignore between path separator '/'. + if re.search(rf"[^/]\b{word}\b[^/]", src_str, re.IGNORECASE): + alt_word = wordlist[word] + if not alt_word: + alt_word = "n/a" + found_words[word] = alt_word + return found_words + + # Acceptable copyright heading for notebooks following this style. copyrights_re = [ r"Copyright 20[1-9][0-9] The TensorFlow\s.*?\s?Authors", @@ -339,3 +360,48 @@ def button_r1_extra(args): ) else: return True + + +# Non-exhaustive list: {word: alt-word} (Use False if alt not provided.) +_SECOND_PERSON_WORDLIST = {"we": "you", "we're": "you are"} + + +@lint( + message="Prefer second person instead of first person: https://developers.google.com/style/person", + cond=Options.Cond.ALL) +def second_person(args): + """Test for first person usage in doc and recommend second person.""" + found_words = search_wordlist(_SECOND_PERSON_WORDLIST, args["cell_source"]) + if found_words: + words = ", ".join([f"{word} => {alt}" for word, alt in found_words.items()]) + fail( + f"Prefer second person instead of first person. Found: {words} in" + f" {args['cell_source']}" + ) + else: + return True + + +# Non-exhaustive list: {word: alt-word} (Use False if alt not provided.) +_INCLUSIVE_WORDLIST = { + "blacklist": "blocked", + "whitelist": "allowed", + "master": "primary", + "slave": "replica", +} + + +@lint( + message="Use inclusive language: https://developers.google.com/style/inclusive-documentation", + cond=Options.Cond.ALL) +def inclusive_language(args): + """Test for words found in inclusive wordlist and recommend alternatives.""" + found_words = search_wordlist(_INCLUSIVE_WORDLIST, args["cell_source"]) + if found_words: + words = ", ".join([f"{word} => {alt}" for word, alt in found_words.items()]) + fail( + f"Use inclusive language where possible and accurate. Found: {words} in" + f" {args['cell_source']}" + ) + else: + return True From f0b04695e10a35f9061de0d886c6e1f7b04de377 Mon Sep 17 00:00:00 2001 From: Mark McDonald Date: Mon, 13 Oct 2025 15:08:29 +0800 Subject: [PATCH 2/3] Run internal Python formatter --- .../tools/nblint/style/gemini_cookbook.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tools/tensorflow_docs/tools/nblint/style/gemini_cookbook.py b/tools/tensorflow_docs/tools/nblint/style/gemini_cookbook.py index e6ce9ced8d..15404acb96 100644 --- a/tools/tensorflow_docs/tools/nblint/style/gemini_cookbook.py +++ b/tools/tensorflow_docs/tools/nblint/style/gemini_cookbook.py @@ -367,8 +367,12 @@ def button_r1_extra(args): @lint( - message="Prefer second person instead of first person: https://developers.google.com/style/person", - cond=Options.Cond.ALL) + message=( + "Prefer second person instead of first person:" + " https://developers.google.com/style/person" + ), + cond=Options.Cond.ALL, +) def second_person(args): """Test for first person usage in doc and recommend second person.""" found_words = search_wordlist(_SECOND_PERSON_WORDLIST, args["cell_source"]) @@ -392,8 +396,12 @@ def second_person(args): @lint( - message="Use inclusive language: https://developers.google.com/style/inclusive-documentation", - cond=Options.Cond.ALL) + message=( + "Use inclusive language:" + " https://developers.google.com/style/inclusive-documentation" + ), + cond=Options.Cond.ALL, +) def inclusive_language(args): """Test for words found in inclusive wordlist and recommend alternatives.""" found_words = search_wordlist(_INCLUSIVE_WORDLIST, args["cell_source"]) From eb21989ecd55ff124bf1f4283b7bac349c0de99f Mon Sep 17 00:00:00 2001 From: Mark McDonald Date: Mon, 13 Oct 2025 17:29:53 +0800 Subject: [PATCH 3/3] pyformat again --- .../tools/nblint/style/gemini_cookbook.py | 52 ++++++++++++------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/tools/tensorflow_docs/tools/nblint/style/gemini_cookbook.py b/tools/tensorflow_docs/tools/nblint/style/gemini_cookbook.py index 15404acb96..1390de65c2 100644 --- a/tools/tensorflow_docs/tools/nblint/style/gemini_cookbook.py +++ b/tools/tensorflow_docs/tools/nblint/style/gemini_cookbook.py @@ -84,7 +84,8 @@ def copyright_check(args): @lint( message="Apache license cell is required", scope=Options.Scope.CODE, - cond=Options.Cond.ANY) + cond=Options.Cond.ANY, +) def license_check(args): if license_re.search(args["cell_source"]): return True @@ -126,8 +127,10 @@ def get_arg_or_fail(user_args, arg_name, arg_fmt): return user_args.get(arg_name) else: fail( - f"Requires user-argument '{arg_name}': nblint --arg={arg_name}:{arg_fmt} ...", - always_show=True) + f"Requires user-argument '{arg_name}': nblint" + f" --arg={arg_name}:{arg_fmt} ...", + always_show=True, + ) def split_doc_path(filepath): @@ -157,8 +160,8 @@ def split_doc_path(filepath): def split_path_on_dir(fp, dirname, offset=1): parts = fp.parts idx = parts.index(dirname) - docs_dir = pathlib.Path(*parts[idx:idx + offset]) - rel_path = fp.relative_to(*parts[:idx + offset]) + docs_dir = pathlib.Path(*parts[idx : idx + offset]) + rel_path = fp.relative_to(*parts[: idx + offset]) return docs_dir, rel_path if "site" in fp_full.parts: @@ -180,7 +183,8 @@ def split_path_on_dir(fp, dirname, offset=1): @lint( message="Missing or malformed URL in Colab button.", scope=Options.Scope.TEXT, - cond=Options.Cond.ANY) + cond=Options.Cond.ANY, +) def button_colab(args): """Test that the URL in the Colab button matches the file path.""" cell_source = args["cell_source"] @@ -201,13 +205,15 @@ def button_colab(args): fail( f"Colab button URL doesn't match: {this_url}", fix=fix.regex_between_groups_replace_all, - fix_args=[r"(href.*)http.*?(\\\".*colab_logo_32px.png)", this_url]) + fix_args=[r"(href.*)http.*?(\\\".*colab_logo_32px.png)", this_url], + ) @lint( message="Missing or malformed URL in Download button.", scope=Options.Scope.TEXT, - cond=Options.Cond.ANY) + cond=Options.Cond.ANY, +) def button_download(args): """Test that the URL in the Download button matches the file path.""" cell_source = args["cell_source"] @@ -224,7 +230,8 @@ def button_download(args): this_url = urllib.parse.urljoin( "https://storage.googleapis.com", - str(f"tensorflow_docs/{repo_name}" / docs_dir / rel_path)) + str(f"tensorflow_docs/{repo_name}" / docs_dir / rel_path), + ) if is_button_cell_re.search(cell_source) and cell_source.find(this_url) != -1: return True @@ -232,13 +239,15 @@ def button_download(args): fail( f"Download button URL doesn't match: {this_url}", fix=fix.regex_between_groups_replace_all, - fix_args=[r"(href.*)http.*?(\\\".*download_logo_32px.png)", this_url]) + fix_args=[r"(href.*)http.*?(\\\".*download_logo_32px.png)", this_url], + ) @lint( message="Missing or malformed URL in GitHub button.", scope=Options.Scope.TEXT, - cond=Options.Cond.ANY) + cond=Options.Cond.ANY, +) def button_github(args): """Test that the URL in the GitHub button matches the file path.""" cell_source = args["cell_source"] @@ -259,13 +268,15 @@ def button_github(args): fail( f"GitHub button URL doesn't match: {this_url}", fix=fix.regex_between_groups_replace_all, - fix_args=[r"(href.*)http.*?(\\\".*GitHub-Mark-32px.png)", this_url]) + fix_args=[r"(href.*)http.*?(\\\".*GitHub-Mark-32px.png)", this_url], + ) @lint( message="Missing or malformed URL in 'View on' button.", scope=Options.Scope.TEXT, - cond=Options.Cond.ANY) + cond=Options.Cond.ANY, +) def button_website(args): """Test that the website URL in the 'View on' button matches the file path. @@ -310,7 +321,8 @@ def button_website(args): @lint( message="Missing or malformed URL in 'TFHub' button.", scope=Options.Scope.TEXT, - cond=Options.Cond.ANY) + cond=Options.Cond.ANY, +) def button_hub(args): """Notebooks that mention tfhub.dev should have a TFHub button.""" cell_source = args["cell_source"] @@ -333,7 +345,8 @@ def button_hub(args): @lint( message="Remove extra buttons from TF 1.x docs.", scope=Options.Scope.TEXT, - cond=Options.Cond.ALL) + cond=Options.Cond.ALL, +) def button_r1_extra(args): """The r1/ docs should not have website or download buttons.""" cell_source = args["cell_source"] @@ -353,10 +366,13 @@ def button_r1_extra(args): base_url = "https://www.tensorflow.org/" # Look for button URLs that shouldn't be there.. - if (re.search(f"{base_url}/(?!images)", cell_source) or - cell_source.find(download_url) != -1): + if ( + re.search(f"{base_url}/(?!images)", cell_source) + or cell_source.find(download_url) != -1 + ): fail( - "Remove the 'View on' and 'Download notebook' buttons since r1/ docs are not published." + "Remove the 'View on' and 'Download notebook' buttons since r1/ docs" + " are not published." ) else: return True