From 58afe29d681e71230fc18bf3ec2602766b080f2d Mon Sep 17 00:00:00 2001 From: Raghav Verma Date: Tue, 15 Aug 2023 16:41:05 +0000 Subject: [PATCH] Added MV3 parsing, updated extension permissions, and properly formatted code --- README.md | 33 +- docker-compose.yaml.example | 17 +- tarnish-worker/configs/permissions.json | 212 +- tarnish-worker/tasks.py | 2928 ++++++++++++++--------- 4 files changed, 1899 insertions(+), 1291 deletions(-) diff --git a/README.md b/README.md index 13816d6..551867c 100644 --- a/README.md +++ b/README.md @@ -54,4 +54,35 @@ Pulls any Chrome extension from a provided Chrome webstore link. * Download the original extension. * Download a beautified version of the extension (auto prettified HTML and JavaScript). * Automatic caching of scan results, running an extension scan will take a good amount of time the first time you run it. However the second time, assuming the extension hasn’t been updated, will be almost instant due to the results being cached. -Linkable Report URLs, easily link someone else to an extension report generated by tarnish. \ No newline at end of file +Linkable Report URLs, easily link someone else to an extension report generated by tarnish. + +## Tarnish using local disk storage instead of S3. + +### Pre-requisite Steps: + +1. Set host.docker.internal to 127.0.0.1 in /etc/hosts. I'd do this in a + separate line than localhost, otherwise it'll get overwritten. + +```bash +127.0.0.1 localhost +127.0.0.1 host.docker.internal +``` + +1. Run an upload/download server on your host. The s3 client function is + configured to use the `multipart/form-data` upload format and has only + tested with [updog](https://github.com/sc0tfree/updog) + - Run `python3 auto_tarnish ` to setup folders and files in + the directory you are running updog on. + - Update `server_dir` in docker-compose.yaml to the running directory of + this server. This wouldn't be needed with better upload/download server. +2. Run a local redis instance. (e.g. `docker run --name my-redis -p 6379:6379 + -d redis`) +3. Ignore CORS errors in your browser. This can be done by + [chrome extension](https://addons.mozilla.org/en-US/firefox/addon/cors-everywhere/) + or cmdline flag (e.g. `google-chrome --disable-web-security`). (again, this + would be solved with a better upload/download server) + +To run: + +1. Start your local server on port 8080 +2. `./start.sh` diff --git a/docker-compose.yaml.example b/docker-compose.yaml.example index 0202cdc..2c5f7dc 100644 --- a/docker-compose.yaml.example +++ b/docker-compose.yaml.example @@ -3,19 +3,26 @@ services: tarnishserver: build: ./tarnish-server/ environment: - - redis_backend=redis://REDIS_HOSTNAME:6379/0 + - redis_backend=redis://host.docker.internal:6379/0 - aws_secret_key=REPLACE_ME_AWS_SECRET_KEY_WITH_S3_WRITE_PERMS - aws_access_key=REPLACE_ME_AWS_ACCESS_KEY_WITH_S3_WRITE_PERMS - extension_s3_bucket=REPLACE_ME_WITH_EXTENSIONS_S3 - - debugging=false - - frontend_origin=https://your-web-origin.com + - debugging=true + - local=true + - frontend_origin=http://localhost:8001 ports: - "80:80" + extra_hosts: + - "host.docker.internal:host-gateway" tarnishworker: build: ./tarnish-worker/ environment: - - redis_backend=redis://REDIS_HOSTNAME:6379/0 + - redis_backend=redis://host.docker.internal:6379/0 - aws_secret_key=REPLACE_ME_AWS_SECRET_KEY_WITH_S3_WRITE_PERMS - aws_access_key=REPLACE_ME_AWS_ACCESS_KEY_WITH_S3_WRITE_PERMS - extension_s3_bucket=REPLACE_ME_WITH_EXTENSIONS_S3 - - frontend_origin=https://your-web-origin.com + - local=true + - frontend_origin=http://localhost:8001 + - server_dir=REPLACE_ME_WITH_UPDOG_DIRECTORY + extra_hosts: + - "host.docker.internal:host-gateway" diff --git a/tarnish-worker/configs/permissions.json b/tarnish-worker/configs/permissions.json index d121fa3..0642278 100644 --- a/tarnish-worker/configs/permissions.json +++ b/tarnish-worker/configs/permissions.json @@ -1,97 +1,117 @@ { - "version": "1.0.0", - "permissions_metadata": { - "bookmarks": { - "warning_text": "Read and modify your bookmarks", - "notes": "The bookmarks permission is required by the chrome.bookmarks module." - }, - "history": { - "warning_text": "Read and modify your browsing history", - "notes": "The history permission is required by chrome.history.
The topSites permission is required by chrome.topSites." - }, - "topSites": { - "warning_text": "Read and modify your browsing history", - "notes": "The history permission is required by chrome.history.
The topSites permission is required by chrome.topSites." - }, - "tabs": { - "warning_text": "Access your browsing activity", - "notes": "The tabs permission is required by the chrome.tabs and chrome.windows modules.
The webNavigation permission is required by the chrome.webNavigation module." - }, - "webNavigation": { - "warning_text": "Access your browsing activity", - "notes": "The tabs permission is required by the chrome.tabs and chrome.windows modules.
The webNavigation permission is required by the chrome.webNavigation module." - }, - "contentSettings": { - "warning_text": "Manipulate settings that specify whether websites can use features such as cookies, JavaScript, plugins, geolocation, microphone, camera etc.", - "notes": "The contentSettings permission is required by chrome.contentSettings." - }, - "debugger": { - "warning_text": "Read and modify all your data on all websites you visit", - "notes": "" - }, - "pageCapture": { - "warning_text": "Read and modify all your data on all websites you visit", - "notes": "" - }, - "proxy": { - "warning_text": "Read and modify all your data on all websites you visit", - "notes": "" - }, - "devtools_page": { - "warning_text": "Read and modify all your data on all websites you visit", - "notes": "" - }, - "http://*/*": { - "warning_text": "Read and modify all your data on all websites you visit", - "notes": "" - }, - "https://*/*": { - "warning_text": "Read and modify all your data on all websites you visit", - "notes": "" - }, - "*://*/*": { - "warning_text": "Read and modify all your data on all websites you visit", - "notes": "" - }, - "": { - "warning_text": "Read and modify all your data on all websites you visit", - "notes": "" - }, - "http://*/": { - "warning_text": "Read and modify all your data on all websites you visit", - "notes": "" - }, - "https://*/": { - "warning_text": "Read and modify all your data on all websites you visit", - "notes": "" - }, - "management": { - "warning_text": "Manage your apps, extensions, and themes", - "notes": "The management permission is required by the chrome.management module." - }, - "mdns": { - "warning_text": "Discover devices on your local network", - "notes": "The mdns permission is required by the chrome.mdns module." - }, - "geolocation": { - "warning_text": "Detect your physical location", - "notes": "Allows the extension to use the proposed HTML5 geolocation API without prompting the user for permission." - }, - "clipboardRead": { - "warning_text": "Access data you copy and paste", - "notes": "Allows the extension to use the following editing commands with document.execCommand(): copy, cut" - }, - "privacy": { - "warning_text": "Manipulate privacy-related settings", - "notes": "The privacy permission is required by the chrome.privacy module." - }, - "signedInDevices": { - "warning_text": "Access the list of your signed-in devices", - "notes": "The signedInDevices permission is required by the chrome.signedInDevices module." - }, - "ttsEngine": { - "warning_text": "Access all text spoken using synthesized speech", - "notes": "The ttsEngine permission is required by the chrome.ttsEngine module." - } - } -} \ No newline at end of file + "version": "1.0.0", + "permissions_metadata": { + "storage": { + "warning_text": "Read and modify your data locally", + "notes": "The storage permission is required by the chrome.storage API" + }, + "background": { + "warning_text": "Run background scripts actively without extension invocation", + "notes": "The background permission is required by the chrome.background API" + }, + "identity": { + "warning_text": "Read user's identity information and OAuth tokens", + "notes": "The identity permission is required by the chrome.identity API" + }, + "webRequest": { + "warning_text": "Allows interception/modification of network requests", + "notes": "The webRequest permission is required by the chrome.webRequest API" + }, + "webRequestBlocking": { + "warning_text": "Allows blocking/redirection of network requests", + "notes": "The webRequestBlocking permission is required by the chrome.webRequestBlocking API" + }, + "bookmarks": { + "warning_text": "Read and modify your bookmarks", + "notes": "The bookmarks permission is required by the chrome.bookmarks module." + }, + "history": { + "warning_text": "Read and modify your browsing history", + "notes": "The history permission is required by chrome.history.
The topSites permission is required by chrome.topSites." + }, + "topSites": { + "warning_text": "Read and modify your browsing history", + "notes": "The history permission is required by chrome.history.
The topSites permission is required by chrome.topSites." + }, + "tabs": { + "warning_text": "Access your browsing activity", + "notes": "The tabs permission is required by the chrome.tabs and chrome.windows modules.
The webNavigation permission is required by the chrome.webNavigation module." + }, + "webNavigation": { + "warning_text": "Access your browsing activity", + "notes": "The tabs permission is required by the chrome.tabs and chrome.windows modules.
The webNavigation permission is required by the chrome.webNavigation module." + }, + "contentSettings": { + "warning_text": "Manipulate settings that specify whether websites can use features such as cookies, JavaScript, plugins, geolocation, microphone, camera etc.", + "notes": "The contentSettings permission is required by chrome.contentSettings." + }, + "debugger": { + "warning_text": "Read and modify all your data on all websites you visit", + "notes": "" + }, + "pageCapture": { + "warning_text": "Read and modify all your data on all websites you visit", + "notes": "" + }, + "proxy": { + "warning_text": "Read and modify all your data on all websites you visit", + "notes": "" + }, + "devtools_page": { + "warning_text": "Read and modify all your data on all websites you visit", + "notes": "" + }, + "http://*/*": { + "warning_text": "Read and modify all your data on all websites you visit", + "notes": "" + }, + "https://*/*": { + "warning_text": "Read and modify all your data on all websites you visit", + "notes": "" + }, + "*://*/*": { + "warning_text": "Read and modify all your data on all websites you visit", + "notes": "" + }, + "": { + "warning_text": "Read and modify all your data on all websites you visit", + "notes": "" + }, + "http://*/": { + "warning_text": "Read and modify all your data on all websites you visit", + "notes": "" + }, + "https://*/": { + "warning_text": "Read and modify all your data on all websites you visit", + "notes": "" + }, + "management": { + "warning_text": "Manage your apps, extensions, and themes", + "notes": "The management permission is required by the chrome.management module." + }, + "mdns": { + "warning_text": "Discover devices on your local network", + "notes": "The mdns permission is required by the chrome.mdns module." + }, + "geolocation": { + "warning_text": "Detect your physical location", + "notes": "Allows the extension to use the proposed HTML5 geolocation API without prompting the user for permission." + }, + "clipboardRead": { + "warning_text": "Access data you copy and paste", + "notes": "Allows the extension to use the following editing commands with document.execCommand(): copy, cut" + }, + "privacy": { + "warning_text": "Manipulate privacy-related settings", + "notes": "The privacy permission is required by the chrome.privacy module." + }, + "signedInDevices": { + "warning_text": "Access the list of your signed-in devices", + "notes": "The signedInDevices permission is required by the chrome.signedInDevices module." + }, + "ttsEngine": { + "warning_text": "Access all text spoken using synthesized speech", + "notes": "The ttsEngine permission is required by the chrome.ttsEngine module." + } + } +} diff --git a/tarnish-worker/tasks.py b/tarnish-worker/tasks.py index 9a81b60..abd66d8 100644 --- a/tarnish-worker/tasks.py +++ b/tarnish-worker/tasks.py @@ -27,106 +27,151 @@ from botocore.exceptions import ClientError from distutils.version import LooseVersion, StrictVersion -reload( sys ) -sys.setdefaultencoding( "utf8" ) +reload(sys) +sys.setdefaultencoding("utf8") S3_CLIENT = boto3.client( "s3", - aws_access_key_id=os.environ.get( "aws_access_key" ), - aws_secret_access_key=os.environ.get( "aws_secret_key" ), + aws_access_key_id=os.environ.get("aws_access_key"), + aws_secret_access_key=os.environ.get("aws_secret_key"), ) +DOCKER_HOST = "http://host.docker.internal:8080/" +METADATA_URL = "https://chrome.google.com/webstore/detail/extension-name/" +DOWNLOAD_URL = "https://clients2.google.com/service/update2/crx?response=redirect&prodversion=49.0&x=id%3D~~~~%26installsource%3Dondemand%26uc" + +if os.environ.get("local"): + METADATA_URL = DOCKER_HOST + "metadata/" + DOWNLOAD_URL = DOCKER_HOST + "crx/~~~~" + # Taken from https://stackoverflow.com/questions/2319019/using-regex-to-remove-comments-from-source-files def remove_comments(string): - pattern = r"(\".*?(? - '''), - ( "raw.githubusercontent.com", """ + """, + ), + ( + "raw.githubusercontent.com", + """ This is a hostname of which anyone can upload content. This host is used when viewing uploaded Github repo files in "raw". Example: https://github.com/mandatoryprogrammer/sonar.js/blob/master/sonar.js -> https://raw.githubusercontent.com/mandatoryprogrammer/sonar.js/master/sonar.js - """ ), - ( "github.io", """ + """, + ), + ( + "github.io", + """ This is a shared hostname of which anyone can upload content. This domain for Github pages (https://pages.github.com/) which allows you to host content on github.io via repo commits. - """ ), - ( "*.s3.amazonaws.com", """ + """, + ), + ( + "*.s3.amazonaws.com", + """ This is a shared hostname of which anyone can upload content. Any user can add content to this host via Amazon AWS's S3 offering (https://aws.amazon.com/s3/). - """ ), - ( "*.cloudfront.com", """ + """, + ), + ( + "*.cloudfront.com", + """ This is a shared hostname of which anyone can upload content. Any user can add content to this host via Amazon's Cloudfront CDN offering (https://aws.amazon.com/cloudfront/). - """ ), - ( "*.herokuapp.com", """ + """, + ), + ( + "*.herokuapp.com", + """ This is a shared hostname of which anyone can upload content. Any user can add content to this host via Heroku's app offering (https://www.heroku.com/platform). - """ ), - ( "dl.dropboxusercontent.com", """ + """, + ), + ( + "dl.dropboxusercontent.com", + """ This is a shared hostname of which anyone can upload content. Any user can add content to this host via uploading content to their Dropbox account (https://www.dropbox.com/) and getting the web download link for it. - """ ), - ( "*.appspot.com", """ + """, + ), + ( + "*.appspot.com", + """ This is a shared hostname of which anyone can upload content. Any user can add content to this host via creating a Google AppEngine app (https://cloud.google.com/appengine/). - """ ), - ( "*.googleusercontent.com", """ + """, + ), + ( + "*.googleusercontent.com", + """ This is a shared hostname of which anyone can upload content. Any user can add content to this host via uploading to various Google services. - """ ), - ( "cdn.jsdelivr.net", """ + """, + ), + ( + "cdn.jsdelivr.net", + """ This is a shared hostname of which anyone can upload content. Any user can add content to this host via uploading a package to npm (https://www.npmjs.com/) which will then be proxy hosted on this host (https://www.jsdelivr.com/features). - """ ), - ( "cdnjs.cloudflare.com", """ + """, + ), + ( + "cdnjs.cloudflare.com", + """ This host serves old version of the Angular library. Hosts that serve old Angular libraries can be used to bypass Content Security Policy (CSP) in ways similar to the following: @@ -134,8 +179,11 @@ def get_json_from_file( filename, should_remove_comments=False ): More information about older Angular version sandboxing (or lack of) and various escapes can be read about here: http://blog.portswigger.net/2017/05/dom-based-angularjs-sandbox-escapes.html - """ ), - ( "code.angularjs.org", """ + """, + ), + ( + "code.angularjs.org", + """ This host serves old version of the Angular library. Hosts that serve old Angular libraries can be used to bypass Content Security Policy (CSP) in ways similar to the following: @@ -143,927 +191,1297 @@ def get_json_from_file( filename, should_remove_comments=False ): More information about older Angular version sandboxing (or lack of) and various escapes can be read about here: http://blog.portswigger.net/2017/05/dom-based-angularjs-sandbox-escapes.html - """ ), - ( "d.yimg.com", """ + """, + ), + ( + "d.yimg.com", + """ This host contains a JSONP endpoint which can be used to bypass Content Security Policy (CSP): - """ ), - ( "www.linkedin.com", """ + """, + ), + ( + "www.linkedin.com", + """ This host contains a JSONP endpoint which can be used to bypass Content Security Policy (CSP): - """ ), - ( "*.wikipedia.org", """ + """, + ), + ( + "*.wikipedia.org", + """ This host contains a JSONP endpoint which can be used to bypass Content Security Policy (CSP): - """ ), - #( "", """ - #""" ), - ] + """, + ), + # ( "", """ + # """ ), + ] } # First load all JSON files into a tree structure for future use CHROME_DOC_DIR = "./chromium-docs/" -def get_chrome_doc_dict( path_to_docs_dir ): - """ - Grab the JSON files in Chromium source and parse them into a tree for autodoc - https://github.com/chromium/chromium/tree/master/chrome/common/extensions/api - """ - chrome_doc_dict = {} - for root, dirnames, filenames in os.walk( path_to_docs_dir ): - for filename in filenames: - if filename.endswith( ".json" ) and not filename.startswith( "_" ): - file_path = os.path.join( root, filename ) - api_function_list = get_json_from_file( - file_path, - True - ) - - for api_function in api_function_list: - chrome_doc_dict[ api_function[ "namespace" ] ] = api_function - - return chrome_doc_dict - -def get_api_call_targets( chrome_doc_dict ): - """ - Now generate a list of target strings - - [ - { - "match_string": "", - "comment": "", - } - ] - """ - api_call_targets = [] - for api_name, api_data in chrome_doc_dict.iteritems(): - if "functions" in api_data and not "Private" in api_name: - combined_list = [] - - if "events" in api_data: - combined_list = combined_list + api_data[ "events" ] - if "functions" in api_data: - combined_list = combined_list + api_data[ "functions" ] - - for function_data in combined_list: - comment_data = "{{WHITESPAE_PLACEHOLDER}}// chrome." + api_name + "." + function_data[ "name" ] + "(" - - if "parameters" in function_data: - parameter_list = [] - for parameter in function_data[ "parameters" ]: - parameter_list.append( parameter[ "name" ] ) - - comment_data += ", ".join( parameter_list ) - comment_data += ")\n" - - for parameter in function_data[ "parameters" ]: - if "type" in parameter: - comment_data += "{{WHITESPAE_PLACEHOLDER}}// @param " + parameter[ "type" ] + " {" + parameter[ "name" ] + "} " - else: - comment_data += "{{WHITESPAE_PLACEHOLDER}}// @param unknown {" + parameter[ "name" ] + "} " - - if "description" in parameter: - comment_data += parameter[ "description" ] - - comment_data += "\n" - - if "type" in parameter and parameter[ "type" ] == "object" and "properties" in parameter: - for object_name, object_value in parameter[ "properties" ].iteritems(): - comment_data += "{{WHITESPAE_PLACEHOLDER}}// -> @property " - if "type" in parameter: - comment_data += "{" + parameter[ "type" ] + "} " - else: - comment_data += "{unknown} " - - comment_data += object_name + " " - - if "description" in object_value: - comment_data += object_value[ "description" ] - comment_data += "\n" - - if "type" in parameter and parameter[ "type" ] == "function" and "parameters" in parameter: - for function_value in parameter[ "parameters" ]: - comment_data += "{{WHITESPAE_PLACEHOLDER}}// -> @argument " - - if "type" in function_value: - comment_data += "{" + parameter[ "type" ] + "} " - else: - comment_data += "{unknown} " - - comment_data += function_value[ "name" ] + " " - - if "description" in function_value: - comment_data += function_value[ "description" ] - - comment_data += "\n" - - else: - comment_data += ")\n" - - if "description" in function_data: - comment_data += "{{WHITESPAE_PLACEHOLDER}}// Description: " + re.sub( "<[^<]+?>", "", function_data[ "description" ] ) + "\n" - - comment_data += "{{WHITESPAE_PLACEHOLDER}}// https://developer.chrome.com/extensions/" + api_name + "#method-" + function_data[ "name" ] - - api_call_targets.append({ - "match_string": "chrome." + api_name + "." + function_data[ "name" ] + "(", - "comment": comment_data, - }) - - return api_call_targets - -CHROME_DOC_LIST = get_chrome_doc_dict( - CHROME_DOC_DIR -) +def get_chrome_doc_dict(path_to_docs_dir): + """Grab the JSON files in Chromium source and parse them into a tree for autodoc + + https://github.com/chromium/chromium/tree/master/chrome/common/extensions/api + """ + chrome_doc_dict = {} + for root, dirnames, filenames in os.walk(path_to_docs_dir): + for filename in filenames: + if filename.endswith(".json") and not filename.startswith("_"): + file_path = os.path.join(root, filename) + api_function_list = get_json_from_file(file_path, True) + + for api_function in api_function_list: + chrome_doc_dict[api_function["namespace"]] = api_function + + return chrome_doc_dict + + +def get_api_call_targets(chrome_doc_dict): + """Now generate a list of target strings + + [ + { + "match_string": "", + "comment": "", + } + ] + """ + api_call_targets = [] + for api_name, api_data in chrome_doc_dict.iteritems(): + if "functions" in api_data and not "Private" in api_name: + combined_list = [] + + if "events" in api_data: + combined_list = combined_list + api_data["events"] + if "functions" in api_data: + combined_list = combined_list + api_data["functions"] + + for function_data in combined_list: + comment_data = ( + "{{WHITESPAE_PLACEHOLDER}}// chrome." + + api_name + + "." + + function_data["name"] + + "(" + ) + + if "parameters" in function_data: + parameter_list = [] + for parameter in function_data["parameters"]: + parameter_list.append(parameter["name"]) + + comment_data += ", ".join(parameter_list) + comment_data += ")\n" + + for parameter in function_data["parameters"]: + if "type" in parameter: + comment_data += ( + "{{WHITESPAE_PLACEHOLDER}}// @param " + + parameter["type"] + + " {" + + parameter["name"] + + "} " + ) + else: + comment_data += ( + "{{WHITESPAE_PLACEHOLDER}}// @param unknown {" + + parameter["name"] + + "} " + ) + + if "description" in parameter: + comment_data += parameter["description"] + + comment_data += "\n" + + if ( + "type" in parameter + and parameter["type"] == "object" + and "properties" in parameter + ): + for object_name, object_value in parameter[ + "properties" + ].iteritems(): + comment_data += "{{WHITESPAE_PLACEHOLDER}}// -> @property " + if "type" in parameter: + comment_data += "{" + parameter["type"] + "} " + else: + comment_data += "{unknown} " + + comment_data += object_name + " " + + if "description" in object_value: + comment_data += object_value["description"] + + comment_data += "\n" + + if ( + "type" in parameter + and parameter["type"] == "function" + and "parameters" in parameter + ): + for function_value in parameter["parameters"]: + comment_data += "{{WHITESPAE_PLACEHOLDER}}// -> @argument " + + if "type" in function_value: + comment_data += "{" + parameter["type"] + "} " + else: + comment_data += "{unknown} " + + comment_data += function_value["name"] + " " + + if "description" in function_value: + comment_data += function_value["description"] + + comment_data += "\n" + + else: + comment_data += ")\n" + + if "description" in function_data: + comment_data += ( + "{{WHITESPAE_PLACEHOLDER}}// Description: " + + re.sub("<[^<]+?>", "", function_data["description"]) + + "\n" + ) + + comment_data += ( + "{{WHITESPAE_PLACEHOLDER}}//" + " https://developer.chrome.com/extensions/" + + api_name + + "#method-" + + function_data["name"] + ) + + api_call_targets.append({ + "match_string": ( + "chrome." + api_name + "." + function_data["name"] + "(" + ), + "comment": comment_data, + }) + + return api_call_targets + + +CHROME_DOC_LIST = get_chrome_doc_dict(CHROME_DOC_DIR) # Now convert the tree into a call target list -API_CALL_TARGETS = get_api_call_targets( CHROME_DOC_LIST ) - -class RetireJS( object ): - """ - Scan a given JavaScript file for Retire.js matches. - """ - def __init__( self, definitions ): - cleaned_definitions = {} - - # Clean up dirty definitions - for definition_name, definition_value in definitions.iteritems(): - is_useful = True - if not "vulnerabilities" in definition_value or len( definition_value[ "vulnerabilities" ] ) == 0: - is_useful = False - if is_useful: - cleaned_definitions[ definition_name ] = definition_value - - self.definitions = cleaned_definitions - - def regex_version_match( self, definition_name, regex_list, target_string ): - """ - Check a given target string for a version match, return a list of matches - and their respective versions. - """ - matching_definitions = [] - for filecontent_matcher in regex_list : - matcher_parts = filecontent_matcher.split( "(§§version§§)" ) - filecontent_matcher = filecontent_matcher.replace( "(§§version§§)", "[a-z0-9\.\-]+" ) - match = re.search( filecontent_matcher, target_string ) - if match: - version_match = str( match.group() ) - for matcher_part in matcher_parts: - matcher_match = re.search( matcher_part, version_match ) - if matcher_match: - version_match = version_match.replace( str( matcher_match.group() ), "" ) - - matching_definitions.append({ - "definition_name": definition_name, - "version": version_match - }) - - return matching_definitions - - def get_libraries( self, filename, file_data ): - """ - Find libraries and their versions and return a list of match(s): - - [{ - "definition_name": "jquery", - "version": "1.1.1" - }] - """ - matching_definitions = [] - - # In this first iteration we simply attempt to extract version numbers - for definition_name, definition_value in self.definitions.iteritems(): - # File contents match - if "filecontent" in definition_value[ "extractors" ]: - filecontent_matches = self.regex_version_match( - definition_name, - definition_value[ "extractors" ][ "filecontent" ], - file_data - ) - matching_definitions = filecontent_matches + matching_definitions - - # URI name match - if "uri" in definition_value[ "extractors" ]: - uri_matches = self.regex_version_match( - definition_name, - definition_value[ "extractors" ][ "uri" ], - file_data - ) - matching_definitions = uri_matches + matching_definitions - - # Filename - if "filename" in definition_value[ "extractors" ]: - filename_matches = self.regex_version_match( - definition_name, - definition_value[ "extractors" ][ "filename" ], - file_data - ) - matching_definitions = filename_matches + matching_definitions - - # Hash matching - if "hashes" in definition_value[ "extractors" ]: - hasher = hashlib.sha1() - hasher.update( file_data ) - js_hash = hasher.hexdigest() - if js_hash in definition_value[ "extractors" ][ "hashes" ]: - matching_definitions.append({ - "definition_name": definition_name, - "version": definition_value[ "extractors" ][ "hashes" ][ js_hash ] - }) - - # De-duplicate matches via hashing - match_hash = {} - for matching_definition in matching_definitions: - match_hash[ matching_definition[ "definition_name" ] + matching_definition[ "version" ] ] = { - "definition_name": matching_definition[ "definition_name" ], - "version": matching_definition[ "version" ] - } - - matching_definitions = [] - - for key, value in match_hash.iteritems(): - matching_definitions.append( value ) - - return matching_definitions - - def check_file( self, filename, file_data ): - """ - Check a given file - @filename: Name of the file - @file_data: Contents of the JavaScript - """ - matching_definitions = self.get_libraries( - filename, - file_data - ) - - vulnerability_match_hash = {} - vulnerability_match = [] - - for matching_definition in matching_definitions: - vulnerabilities = self.definitions[ matching_definition[ "definition_name" ] ][ "vulnerabilities" ] - - for vulnerability in vulnerabilities: - match = False - if matching_definition[ "version" ].strip() == "": - match = False - elif "atOrAbove" in vulnerability and "below" in vulnerability: - if LooseVersion( matching_definition[ "version" ] ) >= LooseVersion( vulnerability[ "atOrAbove" ] ) and LooseVersion( matching_definition[ "version" ] ) < LooseVersion( vulnerability[ "below" ] ): - match = True - elif "above" in vulnerability and "below" in vulnerability: - if LooseVersion( matching_definition[ "version" ] ) > LooseVersion( vulnerability[ "above" ] ) and LooseVersion( matching_definition[ "version" ] ) < LooseVersion( vulnerability[ "below" ] ): - match = True - elif "below" in vulnerability: - if LooseVersion( matching_definition[ "version" ] ) < LooseVersion( vulnerability[ "below" ] ): - match = True - elif "above" in vulnerability: - if LooseVersion( matching_definition[ "version" ] ) > LooseVersion( vulnerability[ "above" ] ): - match = True - elif "atOrAbove" in vulnerability: - if LooseVersion( matching_definition[ "version" ] ) >= LooseVersion( vulnerability[ "atOrAbove" ] ): - match = True - elif "atOrBelow" in vulnerability: - if LooseVersion( matching_definition[ "version" ] ) <= LooseVersion( vulnerability[ "atOrBelow" ] ): - match = True - - if match: - vulnerability_match_hash[ matching_definition[ "definition_name" ] + matching_definition[ "version" ] ] = { - "version": matching_definition[ "version" ], - "definition_name": matching_definition[ "definition_name" ], - "vulnerability": vulnerability - } - - # De-duplicate - for key, value in vulnerability_match_hash.iteritems(): - vulnerability_match.append( - value - ) +API_CALL_TARGETS = get_api_call_targets(CHROME_DOC_LIST) + + +class RetireJS(object): + """Scan a given JavaScript file for Retire.js matches.""" + + def __init__(self, definitions): + cleaned_definitions = {} + + # Clean up dirty definitions + for definition_name, definition_value in definitions.iteritems(): + is_useful = True + if ( + not "vulnerabilities" in definition_value + or len(definition_value["vulnerabilities"]) == 0 + ): + is_useful = False + if is_useful: + cleaned_definitions[definition_name] = definition_value + + self.definitions = cleaned_definitions + + def regex_version_match(self, definition_name, regex_list, target_string): + """Check a given target string for a version match, return a list of matches + + and their respective versions. + """ + matching_definitions = [] + for filecontent_matcher in regex_list: + matcher_parts = filecontent_matcher.split("(§§version§§)") + filecontent_matcher = filecontent_matcher.replace( + "(§§version§§)", "[a-z0-9\.\-]+" + ) + match = re.search(filecontent_matcher, target_string) + if match: + version_match = str(match.group()) + for matcher_part in matcher_parts: + matcher_match = re.search(matcher_part, version_match) + if matcher_match: + version_match = version_match.replace( + str(matcher_match.group()), "" + ) + + matching_definitions.append( + {"definition_name": definition_name, "version": version_match} + ) + + return matching_definitions + + def get_libraries(self, filename, file_data): + """Find libraries and their versions and return a list of match(s): + + [{ + "definition_name": "jquery", + "version": "1.1.1" + }] + """ + matching_definitions = [] + + # In this first iteration we simply attempt to extract version numbers + for definition_name, definition_value in self.definitions.iteritems(): + # File contents match + if "filecontent" in definition_value["extractors"]: + filecontent_matches = self.regex_version_match( + definition_name, + definition_value["extractors"]["filecontent"], + file_data, + ) + matching_definitions = filecontent_matches + matching_definitions + + # URI name match + if "uri" in definition_value["extractors"]: + uri_matches = self.regex_version_match( + definition_name, definition_value["extractors"]["uri"], file_data + ) + matching_definitions = uri_matches + matching_definitions + + # Filename + if "filename" in definition_value["extractors"]: + filename_matches = self.regex_version_match( + definition_name, + definition_value["extractors"]["filename"], + file_data, + ) + matching_definitions = filename_matches + matching_definitions + + # Hash matching + if "hashes" in definition_value["extractors"]: + hasher = hashlib.sha1() + hasher.update(file_data) + js_hash = hasher.hexdigest() + if js_hash in definition_value["extractors"]["hashes"]: + matching_definitions.append({ + "definition_name": definition_name, + "version": definition_value["extractors"]["hashes"][js_hash], + }) + + # De-duplicate matches via hashing + match_hash = {} + for matching_definition in matching_definitions: + match_hash[ + matching_definition["definition_name"] + + matching_definition["version"] + ] = { + "definition_name": matching_definition["definition_name"], + "version": matching_definition["version"], + } + + matching_definitions = [] + + for key, value in match_hash.iteritems(): + matching_definitions.append(value) + + return matching_definitions + + def check_file(self, filename, file_data): + """Check a given file + + @filename: Name of the file + @file_data: Contents of the JavaScript + """ + matching_definitions = self.get_libraries(filename, file_data) + + vulnerability_match_hash = {} + vulnerability_match = [] + + for matching_definition in matching_definitions: + vulnerabilities = self.definitions[ + matching_definition["definition_name"] + ]["vulnerabilities"] + + for vulnerability in vulnerabilities: + match = False + if matching_definition["version"].strip() == "": + match = False + elif "atOrAbove" in vulnerability and "below" in vulnerability: + if LooseVersion(matching_definition["version"]) >= LooseVersion( + vulnerability["atOrAbove"] + ) and LooseVersion(matching_definition["version"]) < LooseVersion( + vulnerability["below"] + ): + match = True + elif "above" in vulnerability and "below" in vulnerability: + if LooseVersion(matching_definition["version"]) > LooseVersion( + vulnerability["above"] + ) and LooseVersion(matching_definition["version"]) < LooseVersion( + vulnerability["below"] + ): + match = True + elif "below" in vulnerability: + if LooseVersion(matching_definition["version"]) < LooseVersion( + vulnerability["below"] + ): + match = True + elif "above" in vulnerability: + if LooseVersion(matching_definition["version"]) > LooseVersion( + vulnerability["above"] + ): + match = True + elif "atOrAbove" in vulnerability: + if LooseVersion(matching_definition["version"]) >= LooseVersion( + vulnerability["atOrAbove"] + ): + match = True + elif "atOrBelow" in vulnerability: + if LooseVersion(matching_definition["version"]) <= LooseVersion( + vulnerability["atOrBelow"] + ): + match = True + + if match: + vulnerability_match_hash[ + matching_definition["definition_name"] + + matching_definition["version"] + ] = { + "version": matching_definition["version"], + "definition_name": matching_definition["definition_name"], + "vulnerability": vulnerability, + } + + # De-duplicate + for key, value in vulnerability_match_hash.iteritems(): + vulnerability_match.append(value) + + return vulnerability_match + + +RETIRE_JS = RetireJS(RETIRE_JS_DEFINITIONS) + + +def prettify_json(input_dict): + p = json.dumps(input_dict, sort_keys=True, indent=4, separators=(",", ": ")) + print("Prettified json: " + p) + return p + + +def pprint(input_dict): + print( + json.dumps(input_dict, sort_keys=True, indent=4, separators=(",", ": ")) + ) + + +def upload_to_s3(content_type, remote_path, body): + object_exists = True + + if (os.environ.get("local")) and os.environ.get("server_dir"): + """if content_type == "application/json": + + r = requests.post(DOCKER_HOST + "upload", json=body) + print("Upload resp: " + r.text) + elif content_type == "application/zip": + print("Received upload for " + remote_path + ", with body: " + body) + r = requests.post(DOCKER_HOST + "upload", files= {'file': + {remote_path, body}}) + print("Upload resp: " + r.text) + else: + exit("Unknown content_type: " + content_type) + """ + proxies = { + "http": "http://host.docker.internal:8088", + "https": "http://host.docker.internal:8088", + } + print("Received upload for " + remote_path + ", with body: " + body[:10]) + files = { + "file": (remote_path.split("/")[-1], body), + "path": ( + None, + os.path.join( + os.environ.get("server_dir"), + "uploads", + "/".join(remote_path.split("/")[:-1]), + ), + ), + } + r = requests.post(DOCKER_HOST + "upload", files=files) # , proxies=proxies) + # print("Upload resp: " + r.text) + print("Upload finished!") + return + + try: + response = S3_CLIENT.head_object( + Bucket=os.environ.get("extension_s3_bucket"), Key=remote_path + ) + except ClientError as e: + if int(e.response["Error"]["Code"]) == 404: + object_exists = False + + if object_exists: + print("It already exists, not uploading...") + return os.environ.get("extension_s3_bucket") + "/" + remote_path + + print( + "Uploading to: " + + os.environ.get("extension_s3_bucket") + + "/" + + remote_path + ) + S3_CLIENT.put_object( + ACL="public-read", + ContentType=content_type, + Bucket=os.environ.get("extension_s3_bucket"), + Key=remote_path, + Body=body, + ) + print("Upload finished!") + return os.environ.get("extension_s3_bucket") + "/" + remote_path - return vulnerability_match - -RETIRE_JS = RetireJS( RETIRE_JS_DEFINITIONS ) - -def prettify_json( input_dict ): - return json.dumps( input_dict, sort_keys=True, indent=4, separators=( ",", ": " ) ) - -def pprint( input_dict ): - print( json.dumps( input_dict, sort_keys=True, indent=4, separators=( ",", ": " ) ) ) - -def upload_to_s3( content_type, remote_path, body ): - object_exists = True - try: - response = S3_CLIENT.head_object( - Bucket=os.environ.get( "extension_s3_bucket" ), - Key=remote_path - ) - except ClientError as e: - if int( e.response["Error"]["Code"] ) == 404: - object_exists = False - - if object_exists: - print( "It already exists, not uploading..." ) - return os.environ.get( "extension_s3_bucket" ) + "/" + remote_path - - print( "Uploading to: " + os.environ.get( "extension_s3_bucket" ) + "/" + remote_path ) - S3_CLIENT.put_object( - ACL="public-read", - ContentType=content_type, - Bucket=os.environ.get( "extension_s3_bucket" ), - Key=remote_path, - Body=body - ) - print( "Upload finished!" ) - return os.environ.get( "extension_s3_bucket" ) + "/" + remote_path @app.task( - name="tarnishworker.tasks.get_chrome_extension_metadata", - time_limit=( 30 * 1 ), # Don't wait more then 30 minutes. + name="tarnishworker.tasks.get_chrome_extension_metadata", + time_limit=(30 * 1), # Don't wait more then 30 minutes. ) -def get_chrome_extension_metadata( extension_id ): - """ - Get Chrome extension metadata from the Chrome store. - """ - return_metadata = {} - - headers = { - "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:49.0) Gecko/20100101 Firefox/49.0", - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", - "Accept-Language": "en-US,en;q=0.5", - "Accept-Encoding": "gzip, deflate, br", - "X-Same-Domain": "1", - "Content-Type": "application/x-www-form-urlencoded;charset=utf-8", - "Referer": "https://chrome.google.com/", - } - - try: - response = requests.get( - "https://chrome.google.com/webstore/detail/extension-name/" + extension_id + "?hl=en", - headers=headers, - timeout=( 15 ), - ) - except: - raise self.retry() - - soup = BeautifulSoup( - response.text, - "html.parser" - ) - - version_element = soup.find( - "meta", - { "itemprop": "version" } - ) - return_metadata[ "version" ] = str( version_element.get( "content" ) ) - - name_element = soup.find( - "meta", - { "itemprop": "name" } - ) - return_metadata[ "name" ] = str( name_element.get( "content" ) ) - - url_element = soup.find( - "meta", - { "itemprop": "url" } - ) - return_metadata[ "url" ] = str( url_element.get( "content" ) ) - - image_element = soup.find( - "meta", - { "itemprop": "image" } - ) - return_metadata[ "image" ] = str( image_element.get( "content" ) ) - - download_count_element = soup.find( - "meta", - { "itemprop": "interactionCount" } - ) - return_metadata[ "download_count" ] = int( - str( - download_count_element.get( "content" ) - ).replace( - "UserDownloads:", - "" - ).replace( - ",", - "" - ).replace( - "+", - "" - ) - ) - - os_element = soup.find( - "meta", - { "itemprop": "operatingSystem" } - ) - return_metadata[ "os" ] = str( os_element.get( "content" ) ) - - rating_element = soup.find( - "meta", - { "itemprop": "ratingValue" } - ) - return_metadata[ "rating" ] = float( rating_element.get( "content" ) ) - - rating_count_element = soup.find( - "meta", - { "itemprop": "ratingCount" } - ) - return_metadata[ "rating_count" ] = int( rating_count_element.get( "content" ) ) - - description_element = soup.find( - "div", - { "itemprop": "description" } - ) - return_metadata[ "short_description" ] = str( description_element.text ) - - return return_metadata +def get_chrome_extension_metadata(extension_id): + """Get Chrome extension metadata from the Chrome store.""" + print("Getting extension metadata with METADATA_URL: " + METADATA_URL) + return_metadata = {} + + headers = { + "User-Agent": ( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:49.0)" + " Gecko/20100101 Firefox/49.0" + ), + "Accept": ( + "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" + ), + "Accept-Language": "en-US,en;q=0.5", + "Accept-Encoding": "gzip, deflate, br", + "X-Same-Domain": "1", + "Content-Type": "application/x-www-form-urlencoded;charset=utf-8", + "Referer": "https://chrome.google.com/", + } + + try: + response = requests.get( + METADATA_URL + extension_id + "?hl=en", + headers=headers, + timeout=(15), + ) + except: + print("Hitting exception") + raise self.retry() + + soup = BeautifulSoup(response.text, "html.parser") + + version_element = soup.find("meta", {"itemprop": "version"}) + return_metadata["version"] = str(version_element.get("content")) + + name_element = soup.find("meta", {"itemprop": "name"}) + return_metadata["name"] = str(name_element.get("content")) + + url_element = soup.find("meta", {"itemprop": "url"}) + return_metadata["url"] = str(url_element.get("content")) + + image_element = soup.find("meta", {"itemprop": "image"}) + return_metadata["image"] = str(image_element.get("content")) + + download_count_element = soup.find("meta", {"itemprop": "interactionCount"}) + return_metadata["download_count"] = int( + str(download_count_element.get("content")) + .replace("UserDownloads:", "") + .replace(",", "") + .replace("+", "") + ) + + os_element = soup.find("meta", {"itemprop": "operatingSystem"}) + return_metadata["os"] = str(os_element.get("content")) + + rating_element = soup.find("meta", {"itemprop": "ratingValue"}) + return_metadata["rating"] = ( + -1 if not rating_element else float(rating_element.get("content")) + ) + + rating_count_element = soup.find("meta", {"itemprop": "ratingCount"}) + return_metadata["rating_count"] = ( + -1 + if not rating_count_element + else int(rating_count_element.get("content")) + ) + + description_element = soup.find("div", {"itemprop": "description"}) + return_metadata["short_description"] = str(description_element.text) + + return return_metadata def get_uuid(): - return str( uuid.uuid4() ) - -def pprint( input_dict ): - print( json.dumps(input_dict, sort_keys=True, indent=4, separators=(',', ': ')) ) - -def beautified_js( input_js ): - options = jsbeautifier.default_options() - options.indent_size = 4 - return jsbeautifier.beautify( - input_js, - options - ) - -def ends_in_ext_list( target_string, ext_list ): - for ext in ext_list: - if target_string.endswith( ext ): - return True - - return False - -def get_csp_report( csp_object ): - """ - Much of this is taken from: https://github.com/moloch--/CSP-Bypass/ - Credits to moloch--, he can't hang but he can code :) - - return_data = [ - { - "name": "", - "description": "", - "risk": "", - } - ] - """ - return_data = [] - - """ Checks the current CSP header for unsafe content sources """ - for directive in [SCRIPT_SRC]: - if UNSAFE_EVAL in csp_object[directive]: - return_data.append({ - "name": "Unsafe Eval", - "description": "Extension allows unsafe evaluation of JavaScript via eval().", - "risk": "high" - }) - if UNSAFE_INLINE in csp_object[directive]: - return_data.append({ - "name": "Unsafe Inline", - "description": "Extension allows unsafe evaluation of JavaScript via inline