|
9 | 9 | from git import Repo |
10 | 10 | from urllib.parse import urlparse |
11 | 11 | from bs4 import BeautifulSoup |
| 12 | +from domain_filter import is_validurl, is_valid_package |
| 13 | + |
12 | 14 |
|
13 | 15 | GITHUB_API = "https://api.github.com" |
14 | 16 | GITHUB_TOKEN = os.getenv("GH_TOKEN") |
|
50 | 52 |
|
51 | 53 |
|
52 | 54 | def check_package_url(name, lang): |
53 | | - name = name.strip() |
54 | | - ban_list = { |
55 | | - "host", |
56 | | - "port", |
57 | | - "design", |
58 | | - "pretty", |
59 | | - "performance", |
60 | | - "value", |
61 | | - "index", |
62 | | - "main", |
63 | | - "default", |
64 | | - "debug", |
65 | | - "error", |
66 | | - "message", |
67 | | - "json", |
68 | | - "config", |
69 | | - "release", |
70 | | - "object", |
71 | | - "input", |
72 | | - "output", |
73 | | - "none", |
74 | | - "true", |
75 | | - "false", |
76 | | - "null", |
77 | | - } |
78 | | - if ( |
79 | | - not name |
80 | | - or len(name) < 2 |
81 | | - or name.lower() in ban_list |
82 | | - or name.strip().isdigit() |
83 | | - or re.fullmatch(r"[-_.]+", name) |
84 | | - or re.match(r"^[A-Z0-9_]{3,}$", name) |
85 | | - or re.search(r"[^a-zA-Z0-9_\-]", name) |
86 | | - or name.startswith("-") |
87 | | - or name.endswith("-") |
88 | | - or name.count("-") > 3 |
89 | | - ): |
90 | | - return name, "INVALID" |
91 | | - |
92 | 55 | url = PACKAGE_REGISTRIES[lang](name) |
93 | 56 | try: |
94 | 57 | r = requests.get(url, timeout=6) |
@@ -185,15 +148,16 @@ def extract_urls_and_packages(repo_path): |
185 | 148 | print(f"[+] Found {len(raw_urls)} URLs in {full_path}") |
186 | 149 | for u in raw_urls: |
187 | 150 | print(f" URL: {u}") |
188 | | - findings["urls"].update(raw_urls) |
| 151 | + filtered = [u for u in raw_urls if is_validurl(u)] |
| 152 | + findings["urls"].update(filtered) |
189 | 153 | declared = extract_declared_packages(full_path) |
190 | 154 | for k in declared: |
191 | | - findings["packages"][k].update(declared[k]) |
| 155 | + cleaned = {p for p in declared[k] if is_valid_package(p)} |
| 156 | + findings["packages"][k].update(cleaned) |
192 | 157 | except Exception as e: |
193 | 158 | print(f"[!] Error reading {full_path}: {e}") |
194 | 159 | return findings |
195 | | - |
196 | | - |
| 160 | +# org from here |
197 | 161 | def get_repos(org): |
198 | 162 | repos = [] |
199 | 163 | page = 1 |
@@ -252,7 +216,9 @@ def write_output(org, findings): |
252 | 216 | for lang, pkgs in findings["packages"].items(): |
253 | 217 | f.write(f"\n==== {lang.upper()} Packages (Status) ====\n") |
254 | 218 | pkg_futures = [ |
255 | | - executor.submit(check_package_url, p, lang) for p in pkgs |
| 219 | + executor.submit(check_package_url, p, lang) |
| 220 | + for p in pkgs |
| 221 | + if is_valid_package(p) |
256 | 222 | ] |
257 | 223 | for pf in concurrent.futures.as_completed(pkg_futures): |
258 | 224 | name, status = pf.result() |
|
0 commit comments