diff --git a/cycode/cli/apps/report/sbom/path/path_command.py b/cycode/cli/apps/report/sbom/path/path_command.py index 9c839b08..61c9ddb7 100644 --- a/cycode/cli/apps/report/sbom/path/path_command.py +++ b/cycode/cli/apps/report/sbom/path/path_command.py @@ -12,6 +12,7 @@ from cycode.cli.files_collector.zip_documents import zip_documents from cycode.cli.utils.get_api_client import get_report_cycode_client from cycode.cli.utils.progress_bar import SbomReportProgressBarSection +from cycode.cli.utils.scan_utils import is_cycodeignore_allowed_by_scan_config from cycode.cli.utils.sentry import add_breadcrumb @@ -37,7 +38,11 @@ def path_command( try: documents = get_relevant_documents( - progress_bar, SbomReportProgressBarSection.PREPARE_LOCAL_FILES, consts.SCA_SCAN_TYPE, (str(path),) + progress_bar, + SbomReportProgressBarSection.PREPARE_LOCAL_FILES, + consts.SCA_SCAN_TYPE, + (str(path),), + is_cycodeignore_allowed=is_cycodeignore_allowed_by_scan_config(ctx), ) # TODO(MarshalX): combine perform_pre_scan_documents_actions with get_relevant_document. # unhardcode usage of context in perform_pre_scan_documents_actions diff --git a/cycode/cli/apps/scan/code_scanner.py b/cycode/cli/apps/scan/code_scanner.py index ad6a6e3e..5b4c3e78 100644 --- a/cycode/cli/apps/scan/code_scanner.py +++ b/cycode/cli/apps/scan/code_scanner.py @@ -23,7 +23,11 @@ from cycode.cli.models import CliError, Document, LocalScanResult from cycode.cli.utils.progress_bar import ScanProgressBarSection from cycode.cli.utils.scan_batch import run_parallel_batched_scan -from cycode.cli.utils.scan_utils import generate_unique_scan_id, set_issue_detected_by_scan_results +from cycode.cli.utils.scan_utils import ( + generate_unique_scan_id, + is_cycodeignore_allowed_by_scan_config, + set_issue_detected_by_scan_results, +) from cycode.cyclient.models import ZippedFileScanResult from cycode.logger import get_logger @@ -42,7 +46,13 @@ def scan_disk_files(ctx: typer.Context, paths: tuple[str, ...]) -> None: progress_bar = ctx.obj['progress_bar'] try: - documents = get_relevant_documents(progress_bar, ScanProgressBarSection.PREPARE_LOCAL_FILES, scan_type, paths) + documents = get_relevant_documents( + progress_bar, + ScanProgressBarSection.PREPARE_LOCAL_FILES, + scan_type, + paths, + is_cycodeignore_allowed=is_cycodeignore_allowed_by_scan_config(ctx), + ) add_sca_dependencies_tree_documents_if_needed(ctx, scan_type, documents) scan_documents(ctx, documents, get_scan_parameters(ctx, paths)) except Exception as e: diff --git a/cycode/cli/apps/scan/commit_range_scanner.py b/cycode/cli/apps/scan/commit_range_scanner.py index 3abd2940..335531c2 100644 --- a/cycode/cli/apps/scan/commit_range_scanner.py +++ b/cycode/cli/apps/scan/commit_range_scanner.py @@ -29,6 +29,7 @@ parse_commit_range_sast, parse_commit_range_sca, ) +from cycode.cli.files_collector.documents_walk_ignore import filter_documents_with_cycodeignore from cycode.cli.files_collector.file_excluder import excluder from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip from cycode.cli.files_collector.sca.sca_file_collector import ( @@ -40,7 +41,11 @@ from cycode.cli.utils.git_proxy import git_proxy from cycode.cli.utils.path_utils import get_path_by_os from cycode.cli.utils.progress_bar import ScanProgressBarSection -from cycode.cli.utils.scan_utils import generate_unique_scan_id, set_issue_detected_by_scan_results +from cycode.cli.utils.scan_utils import ( + generate_unique_scan_id, + is_cycodeignore_allowed_by_scan_config, + set_issue_detected_by_scan_results, +) from cycode.cyclient.models import ZippedFileScanResult from cycode.logger import get_logger @@ -189,6 +194,12 @@ def _scan_sca_commit_range(ctx: typer.Context, repo_path: str, commit_range: str from_commit_documents = excluder.exclude_irrelevant_documents_to_scan(consts.SCA_SCAN_TYPE, from_commit_documents) to_commit_documents = excluder.exclude_irrelevant_documents_to_scan(consts.SCA_SCAN_TYPE, to_commit_documents) + is_cycodeignore_allowed = is_cycodeignore_allowed_by_scan_config(ctx) + from_commit_documents = filter_documents_with_cycodeignore( + from_commit_documents, repo_path, is_cycodeignore_allowed + ) + to_commit_documents = filter_documents_with_cycodeignore(to_commit_documents, repo_path, is_cycodeignore_allowed) + perform_sca_pre_commit_range_scan_actions( repo_path, from_commit_documents, from_commit_rev, to_commit_documents, to_commit_rev ) @@ -204,6 +215,11 @@ def _scan_secret_commit_range( consts.SECRET_SCAN_TYPE, commit_diff_documents_to_scan ) + is_cycodeignore_allowed = is_cycodeignore_allowed_by_scan_config(ctx) + diff_documents_to_scan = filter_documents_with_cycodeignore( + diff_documents_to_scan, repo_path, is_cycodeignore_allowed + ) + scan_documents( ctx, diff_documents_to_scan, get_scan_parameters(ctx, (repo_path,)), is_git_diff=True, is_commit_range=True ) @@ -221,9 +237,14 @@ def _scan_sast_commit_range(ctx: typer.Context, repo_path: str, commit_range: st to_commit_rev, reverse_diff=False, ) + commit_documents = excluder.exclude_irrelevant_documents_to_scan(consts.SAST_SCAN_TYPE, commit_documents) diff_documents = excluder.exclude_irrelevant_documents_to_scan(consts.SAST_SCAN_TYPE, diff_documents) + is_cycodeignore_allowed = is_cycodeignore_allowed_by_scan_config(ctx) + commit_documents = filter_documents_with_cycodeignore(commit_documents, repo_path, is_cycodeignore_allowed) + diff_documents = filter_documents_with_cycodeignore(diff_documents, repo_path, is_cycodeignore_allowed) + _scan_commit_range_documents(ctx, commit_documents, diff_documents, scan_parameters=scan_parameters) @@ -254,11 +275,18 @@ def _scan_sca_pre_commit(ctx: typer.Context, repo_path: str) -> None: progress_bar_section=ScanProgressBarSection.PREPARE_LOCAL_FILES, repo_path=repo_path, ) + git_head_documents = excluder.exclude_irrelevant_documents_to_scan(consts.SCA_SCAN_TYPE, git_head_documents) pre_committed_documents = excluder.exclude_irrelevant_documents_to_scan( consts.SCA_SCAN_TYPE, pre_committed_documents ) + is_cycodeignore_allowed = is_cycodeignore_allowed_by_scan_config(ctx) + git_head_documents = filter_documents_with_cycodeignore(git_head_documents, repo_path, is_cycodeignore_allowed) + pre_committed_documents = filter_documents_with_cycodeignore( + pre_committed_documents, repo_path, is_cycodeignore_allowed + ) + perform_sca_pre_hook_range_scan_actions(repo_path, git_head_documents, pre_committed_documents) _scan_commit_range_documents( @@ -288,8 +316,12 @@ def _scan_secret_pre_commit(ctx: typer.Context, repo_path: str) -> None: is_git_diff_format=True, ) ) + documents_to_scan = excluder.exclude_irrelevant_documents_to_scan(consts.SECRET_SCAN_TYPE, documents_to_scan) + is_cycodeignore_allowed = is_cycodeignore_allowed_by_scan_config(ctx) + documents_to_scan = filter_documents_with_cycodeignore(documents_to_scan, repo_path, is_cycodeignore_allowed) + scan_documents(ctx, documents_to_scan, get_scan_parameters(ctx), is_git_diff=True) @@ -301,11 +333,18 @@ def _scan_sast_pre_commit(ctx: typer.Context, repo_path: str, **_) -> None: progress_bar_section=ScanProgressBarSection.PREPARE_LOCAL_FILES, repo_path=repo_path, ) + pre_committed_documents = excluder.exclude_irrelevant_documents_to_scan( consts.SAST_SCAN_TYPE, pre_committed_documents ) diff_documents = excluder.exclude_irrelevant_documents_to_scan(consts.SAST_SCAN_TYPE, diff_documents) + is_cycodeignore_allowed = is_cycodeignore_allowed_by_scan_config(ctx) + pre_committed_documents = filter_documents_with_cycodeignore( + pre_committed_documents, repo_path, is_cycodeignore_allowed + ) + diff_documents = filter_documents_with_cycodeignore(diff_documents, repo_path, is_cycodeignore_allowed) + _scan_commit_range_documents(ctx, pre_committed_documents, diff_documents, scan_parameters=scan_parameters) diff --git a/cycode/cli/apps/scan/repository/repository_command.py b/cycode/cli/apps/scan/repository/repository_command.py index 6fc77bee..9692ccc4 100644 --- a/cycode/cli/apps/scan/repository/repository_command.py +++ b/cycode/cli/apps/scan/repository/repository_command.py @@ -8,6 +8,7 @@ from cycode.cli.apps.scan.code_scanner import scan_documents from cycode.cli.apps.scan.scan_parameters import get_scan_parameters from cycode.cli.exceptions.handle_scan_errors import handle_scan_exception +from cycode.cli.files_collector.documents_walk_ignore import filter_documents_with_cycodeignore from cycode.cli.files_collector.file_excluder import excluder from cycode.cli.files_collector.repository_documents import get_git_repository_tree_file_entries from cycode.cli.files_collector.sca.sca_file_collector import add_sca_dependencies_tree_documents_if_needed @@ -15,6 +16,7 @@ from cycode.cli.models import Document from cycode.cli.utils.path_utils import get_path_by_os from cycode.cli.utils.progress_bar import ScanProgressBarSection +from cycode.cli.utils.scan_utils import is_cycodeignore_allowed_by_scan_config from cycode.cli.utils.sentry import add_breadcrumb @@ -60,6 +62,9 @@ def repository_command( documents_to_scan = excluder.exclude_irrelevant_documents_to_scan(scan_type, documents_to_scan) + is_cycodeignore_allowed = is_cycodeignore_allowed_by_scan_config(ctx) + documents_to_scan = filter_documents_with_cycodeignore(documents_to_scan, str(path), is_cycodeignore_allowed) + add_sca_dependencies_tree_documents_if_needed(ctx, scan_type, documents_to_scan) logger.debug('Found all relevant files for scanning %s', {'path': path, 'branch': branch}) diff --git a/cycode/cli/apps/scan/scan_command.py b/cycode/cli/apps/scan/scan_command.py index dda94876..2eb51f12 100644 --- a/cycode/cli/apps/scan/scan_command.py +++ b/cycode/cli/apps/scan/scan_command.py @@ -1,9 +1,11 @@ +import os from pathlib import Path from typing import Annotated, Optional import click import typer +from cycode.cli.apps.scan.remote_url_resolver import _try_get_git_remote_url from cycode.cli.cli_types import ExportTypeOption, ScanTypeOption, ScaScanTypeOption, SeverityOption from cycode.cli.consts import ( ISSUE_DETECTED_STATUS_CODE, @@ -161,10 +163,15 @@ def scan_command( scan_client = get_scan_cycode_client(ctx) ctx.obj['client'] = scan_client - remote_scan_config = scan_client.get_scan_configuration_safe(scan_type) + # Get remote URL from current working directory + remote_url = _try_get_git_remote_url(os.getcwd()) + + remote_scan_config = scan_client.get_scan_configuration_safe(scan_type, remote_url) if remote_scan_config: excluder.apply_scan_config(str(scan_type), remote_scan_config) + ctx.obj['scan_config'] = remote_scan_config + if export_type and export_file: console_printer = ctx.obj['console_printer'] console_printer.enable_recording(export_type, export_file) diff --git a/cycode/cli/consts.py b/cycode/cli/consts.py index 7384e33e..1b1497bd 100644 --- a/cycode/cli/consts.py +++ b/cycode/cli/consts.py @@ -17,6 +17,8 @@ IAC_SCAN_SUPPORTED_FILE_EXTENSIONS = ('.tf', '.tf.json', '.json', '.yaml', '.yml', '.dockerfile', '.containerfile') IAC_SCAN_SUPPORTED_FILE_PREFIXES = ('dockerfile', 'containerfile') +CYCODEIGNORE_FILENAME = '.cycodeignore' + SECRET_SCAN_FILE_EXTENSIONS_TO_IGNORE = ( '.DS_Store', '.bmp', diff --git a/cycode/cli/files_collector/documents_walk_ignore.py b/cycode/cli/files_collector/documents_walk_ignore.py new file mode 100644 index 00000000..5a4dbe6d --- /dev/null +++ b/cycode/cli/files_collector/documents_walk_ignore.py @@ -0,0 +1,124 @@ +import os +from typing import TYPE_CHECKING + +from cycode.cli import consts +from cycode.cli.logger import get_logger +from cycode.cli.utils.ignore_utils import IgnoreFilterManager + +if TYPE_CHECKING: + from cycode.cli.models import Document + +logger = get_logger('Documents Ignores') + + +def _get_cycodeignore_path(repo_path: str) -> str: + """Get the path to .cycodeignore file in the repository root.""" + return os.path.join(repo_path, consts.CYCODEIGNORE_FILENAME) + + +def _create_ignore_filter_manager(repo_path: str, cycodeignore_path: str) -> IgnoreFilterManager: + """Create IgnoreFilterManager with .cycodeignore file.""" + return IgnoreFilterManager.build( + path=repo_path, + global_ignore_file_paths=[cycodeignore_path], + global_patterns=[], + ) + + +def _log_ignored_files(repo_path: str, dirpath: str, ignored_dirnames: list[str], ignored_filenames: list[str]) -> None: + """Log ignored files for debugging (similar to walk_ignore function).""" + rel_dirpath = '' if dirpath == repo_path else os.path.relpath(dirpath, repo_path) + display_dir = rel_dirpath or '.' + + for is_dir, names in ( + (True, ignored_dirnames), + (False, ignored_filenames), + ): + for name in names: + full_path = os.path.join(repo_path, display_dir, name) + if is_dir: + full_path = os.path.join(full_path, '*') + logger.debug('Ignoring match %s', full_path) + + +def _build_allowed_paths_set(ignore_filter_manager: IgnoreFilterManager, repo_path: str) -> set[str]: + """Build set of allowed file paths using walk_with_ignored.""" + allowed_paths = set() + + for dirpath, _dirnames, filenames, ignored_dirnames, ignored_filenames in ignore_filter_manager.walk_with_ignored(): + _log_ignored_files(repo_path, dirpath, ignored_dirnames, ignored_filenames) + + for filename in filenames: + file_path = os.path.join(dirpath, filename) + allowed_paths.add(file_path) + + return allowed_paths + + +def _get_document_check_path(document: 'Document', repo_path: str) -> str: + """Get the normalized absolute path for a document to check against allowed paths.""" + check_path = document.absolute_path + if not check_path: + check_path = document.path if os.path.isabs(document.path) else os.path.join(repo_path, document.path) + + return os.path.normpath(check_path) + + +def _filter_documents_by_allowed_paths( + documents: list['Document'], allowed_paths: set[str], repo_path: str +) -> list['Document']: + """Filter documents by checking if their paths are in the allowed set.""" + filtered_documents = [] + + for document in documents: + try: + check_path = _get_document_check_path(document, repo_path) + + if check_path in allowed_paths: + filtered_documents.append(document) + else: + relative_path = os.path.relpath(check_path, repo_path) + logger.debug('Filtered out document due to .cycodeignore: %s', relative_path) + except Exception as e: + logger.debug('Error processing document %s: %s', document.path, e) + filtered_documents.append(document) + + return filtered_documents + + +def filter_documents_with_cycodeignore( + documents: list['Document'], repo_path: str, is_cycodeignore_allowed: bool = True +) -> list['Document']: + """Filter documents based on .cycodeignore patterns. + + This function uses .cycodeignore file in the repository root to filter out + documents whose paths match any of those patterns. + + Args: + documents: List of Document objects to filter + repo_path: Path to the repository root + is_cycodeignore_allowed: Whether .cycodeignore filtering is allowed by scan configuration + + Returns: + List of Document objects that don't match any .cycodeignore patterns + """ + if not is_cycodeignore_allowed: + logger.debug('.cycodeignore filtering is not allowed by scan configuration') + return documents + + cycodeignore_path = _get_cycodeignore_path(repo_path) + + if not os.path.exists(cycodeignore_path): + logger.debug('.cycodeignore file does not exist in the repository root') + return documents + + logger.info('Using %s for filtering documents', cycodeignore_path) + + ignore_filter_manager = _create_ignore_filter_manager(repo_path, cycodeignore_path) + + allowed_paths = _build_allowed_paths_set(ignore_filter_manager, repo_path) + + filtered_documents = _filter_documents_by_allowed_paths(documents, allowed_paths, repo_path) + + logger.debug('Filtered %d documents using .cycodeignore patterns', len(documents) - len(filtered_documents)) + return filtered_documents diff --git a/cycode/cli/files_collector/path_documents.py b/cycode/cli/files_collector/path_documents.py index 73cd0768..142c63bf 100644 --- a/cycode/cli/files_collector/path_documents.py +++ b/cycode/cli/files_collector/path_documents.py @@ -1,4 +1,5 @@ import os +from collections.abc import Generator from typing import TYPE_CHECKING from cycode.cli.files_collector.file_excluder import excluder @@ -17,10 +18,18 @@ from cycode.cli.utils.progress_bar import BaseProgressBar, ProgressBarSection -def _get_all_existing_files_in_directory(path: str, *, walk_with_ignore_patterns: bool = True) -> list[str]: +def _get_all_existing_files_in_directory( + path: str, *, walk_with_ignore_patterns: bool = True, is_cycodeignore_allowed: bool = True +) -> list[str]: files: list[str] = [] - walk_func = walk_ignore if walk_with_ignore_patterns else os.walk + if walk_with_ignore_patterns: + + def walk_func(path: str) -> Generator[tuple[str, list[str], list[str]], None, None]: + return walk_ignore(path, is_cycodeignore_allowed=is_cycodeignore_allowed) + else: + walk_func = os.walk + for root, _, filenames in walk_func(path): for filename in filenames: files.append(os.path.join(root, filename)) @@ -28,7 +37,7 @@ def _get_all_existing_files_in_directory(path: str, *, walk_with_ignore_patterns return files -def _get_relevant_files_in_path(path: str) -> list[str]: +def _get_relevant_files_in_path(path: str, *, is_cycodeignore_allowed: bool = True) -> list[str]: absolute_path = get_absolute_path(path) if not os.path.isfile(absolute_path) and not os.path.isdir(absolute_path): @@ -37,16 +46,21 @@ def _get_relevant_files_in_path(path: str) -> list[str]: if os.path.isfile(absolute_path): return [absolute_path] - file_paths = _get_all_existing_files_in_directory(absolute_path) + file_paths = _get_all_existing_files_in_directory(absolute_path, is_cycodeignore_allowed=is_cycodeignore_allowed) return [file_path for file_path in file_paths if os.path.isfile(file_path)] def _get_relevant_files( - progress_bar: 'BaseProgressBar', progress_bar_section: 'ProgressBarSection', scan_type: str, paths: tuple[str, ...] + progress_bar: 'BaseProgressBar', + progress_bar_section: 'ProgressBarSection', + scan_type: str, + paths: tuple[str, ...], + *, + is_cycodeignore_allowed: bool = True, ) -> list[str]: all_files_to_scan = [] for path in paths: - all_files_to_scan.extend(_get_relevant_files_in_path(path)) + all_files_to_scan.extend(_get_relevant_files_in_path(path, is_cycodeignore_allowed=is_cycodeignore_allowed)) # we are double the progress bar section length because we are going to process the files twice # first time to get the file list with respect of excluded patterns (excluding takes seconds to execute) @@ -94,8 +108,11 @@ def get_relevant_documents( paths: tuple[str, ...], *, is_git_diff: bool = False, + is_cycodeignore_allowed: bool = True, ) -> list[Document]: - relevant_files = _get_relevant_files(progress_bar, progress_bar_section, scan_type, paths) + relevant_files = _get_relevant_files( + progress_bar, progress_bar_section, scan_type, paths, is_cycodeignore_allowed=is_cycodeignore_allowed + ) documents: list[Document] = [] for file in relevant_files: diff --git a/cycode/cli/files_collector/walk_ignore.py b/cycode/cli/files_collector/walk_ignore.py index fb723109..0c9d53a3 100644 --- a/cycode/cli/files_collector/walk_ignore.py +++ b/cycode/cli/files_collector/walk_ignore.py @@ -1,6 +1,7 @@ import os from collections.abc import Generator, Iterable +from cycode.cli import consts from cycode.cli.logger import get_logger from cycode.cli.utils.ignore_utils import IgnoreFilterManager @@ -8,7 +9,6 @@ _SUPPORTED_IGNORE_PATTERN_FILES = { '.gitignore', - '.cycodeignore', } _DEFAULT_GLOBAL_IGNORE_PATTERNS = [ '.git', @@ -25,11 +25,17 @@ def _walk_to_top(path: str) -> Iterable[str]: yield path # Include the top-level directory -def _collect_top_level_ignore_files(path: str) -> list[str]: +def _collect_top_level_ignore_files(path: str, *, is_cycodeignore_allowed: bool = True) -> list[str]: ignore_files = [] top_paths = reversed(list(_walk_to_top(path))) # we must reverse it to make top levels more prioritized + + supported_files = set(_SUPPORTED_IGNORE_PATTERN_FILES) + if is_cycodeignore_allowed: + supported_files.add(consts.CYCODEIGNORE_FILENAME) + logger.debug('.cycodeignore files included due to scan configuration') + for dir_path in top_paths: - for ignore_file in _SUPPORTED_IGNORE_PATTERN_FILES: + for ignore_file in supported_files: ignore_file_path = os.path.join(dir_path, ignore_file) if os.path.exists(ignore_file_path): logger.debug('Reading top level ignore file: %s', ignore_file_path) @@ -37,10 +43,13 @@ def _collect_top_level_ignore_files(path: str) -> list[str]: return ignore_files -def walk_ignore(path: str) -> Generator[tuple[str, list[str], list[str]], None, None]: +def walk_ignore( + path: str, *, is_cycodeignore_allowed: bool = True +) -> Generator[tuple[str, list[str], list[str]], None, None]: + ignore_file_paths = _collect_top_level_ignore_files(path, is_cycodeignore_allowed=is_cycodeignore_allowed) ignore_filter_manager = IgnoreFilterManager.build( path=path, - global_ignore_file_paths=_collect_top_level_ignore_files(path), + global_ignore_file_paths=ignore_file_paths, global_patterns=_DEFAULT_GLOBAL_IGNORE_PATTERNS, ) for dirpath, dirnames, filenames, ignored_dirnames, ignored_filenames in ignore_filter_manager.walk_with_ignored(): diff --git a/cycode/cli/utils/scan_utils.py b/cycode/cli/utils/scan_utils.py index 57586b51..1332a7cf 100644 --- a/cycode/cli/utils/scan_utils.py +++ b/cycode/cli/utils/scan_utils.py @@ -1,11 +1,12 @@ import os -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Optional from uuid import UUID, uuid4 import typer if TYPE_CHECKING: from cycode.cli.models import LocalScanResult + from cycode.cyclient.models import ScanConfiguration def set_issue_detected(ctx: typer.Context, issue_detected: bool) -> None: @@ -22,6 +23,11 @@ def is_scan_failed(ctx: typer.Context) -> bool: return did_fail or issue_detected +def is_cycodeignore_allowed_by_scan_config(ctx: typer.Context) -> bool: + scan_config: Optional[ScanConfiguration] = ctx.obj.get('scan_config') + return scan_config.is_cycode_ignore_allowed if scan_config else True + + def generate_unique_scan_id() -> UUID: if 'PYTEST_TEST_UNIQUE_ID' in os.environ: return UUID(os.environ['PYTEST_TEST_UNIQUE_ID']) diff --git a/cycode/cyclient/models.py b/cycode/cyclient/models.py index fa952985..f6419645 100644 --- a/cycode/cyclient/models.py +++ b/cycode/cyclient/models.py @@ -505,6 +505,7 @@ def build_dto(self, data: dict[str, Any], **_) -> 'SupportedModulesPreferences': @dataclass class ScanConfiguration: scannable_extensions: list[str] + is_cycode_ignore_allowed: bool class ScanConfigurationSchema(Schema): @@ -512,6 +513,7 @@ class Meta: unknown = EXCLUDE scannable_extensions = fields.List(fields.String(), allow_none=True) + is_cycode_ignore_allowed = fields.Boolean(load_default=True) @post_load def build_dto(self, data: dict[str, Any], **_) -> 'ScanConfiguration': diff --git a/cycode/cyclient/scan_client.py b/cycode/cyclient/scan_client.py index 6ddce8d5..4f2debca 100644 --- a/cycode/cyclient/scan_client.py +++ b/cycode/cyclient/scan_client.py @@ -280,16 +280,23 @@ def get_scan_configuration_path(self, scan_type: str) -> str: correct_scan_type = self.scan_config.get_async_scan_type(scan_type) return f'{self.get_scan_service_url_path(scan_type)}/{correct_scan_type}/configuration' - def get_scan_configuration(self, scan_type: str) -> models.ScanConfiguration: + def get_scan_configuration(self, scan_type: str, remote_url: Optional[str] = None) -> models.ScanConfiguration: + params = {} + if remote_url: + params['remote_url'] = remote_url + response = self.scan_cycode_client.get( url_path=self.get_scan_configuration_path(scan_type), + params=params, hide_response_content_log=self._hide_response_log, ) return models.ScanConfigurationSchema().load(response.json()) - def get_scan_configuration_safe(self, scan_type: str) -> Optional['models.ScanConfiguration']: + def get_scan_configuration_safe( + self, scan_type: str, remote_url: Optional[str] = None + ) -> Optional['models.ScanConfiguration']: try: - return self.get_scan_configuration(scan_type) + return self.get_scan_configuration(scan_type, remote_url) except RequestHttpError as e: if e.status_code == 404: logger.debug( diff --git a/tests/cli/files_collector/test_documents_walk_ignore.py b/tests/cli/files_collector/test_documents_walk_ignore.py new file mode 100644 index 00000000..b92cb96e --- /dev/null +++ b/tests/cli/files_collector/test_documents_walk_ignore.py @@ -0,0 +1,430 @@ +import os +from os.path import normpath +from typing import TYPE_CHECKING + +from cycode.cli.files_collector.documents_walk_ignore import ( + _build_allowed_paths_set, + _create_ignore_filter_manager, + _filter_documents_by_allowed_paths, + _get_cycodeignore_path, + _get_document_check_path, + filter_documents_with_cycodeignore, +) +from cycode.cli.models import Document + +if TYPE_CHECKING: + from pyfakefs.fake_filesystem import FakeFilesystem + + +# we are using normpath() in every test to provide multi-platform support + + +def _create_mocked_file_structure(fs: 'FakeFilesystem') -> None: + """Create a mock file structure for testing.""" + fs.create_dir('/home/user/project') + fs.create_dir('/home/user/.git') + + fs.create_dir('/home/user/project/.cycode') + fs.create_file('/home/user/project/.cycode/config.yaml') + fs.create_dir('/home/user/project/.git') + fs.create_file('/home/user/project/.git/HEAD') + + # Create .cycodeignore with patterns + fs.create_file('/home/user/project/.cycodeignore', contents='*.pyc\n*.log\nbuild/\n# comment line\n\n') + + # Create test files that should be filtered + fs.create_file('/home/user/project/ignored.pyc') + fs.create_file('/home/user/project/ignored.log') + fs.create_file('/home/user/project/presented.txt') + fs.create_file('/home/user/project/presented.py') + + # Create build directory with files (should be ignored) + fs.create_dir('/home/user/project/build') + fs.create_file('/home/user/project/build/output.js') + fs.create_file('/home/user/project/build/bundle.css') + + # Create subdirectory + fs.create_dir('/home/user/project/src') + fs.create_file('/home/user/project/src/main.py') + fs.create_file('/home/user/project/src/debug.log') # should be ignored + fs.create_file('/home/user/project/src/temp.pyc') # should be ignored + + +def _create_test_documents(repo_path: str) -> list[Document]: + """Create test Document objects for the mocked file structure.""" + documents = [] + + # Files in root + documents.append( + Document( + path='ignored.pyc', + content='# compiled python', + absolute_path=normpath(os.path.join(repo_path, 'ignored.pyc')), + ) + ) + documents.append( + Document( + path='ignored.log', content='log content', absolute_path=normpath(os.path.join(repo_path, 'ignored.log')) + ) + ) + documents.append( + Document( + path='presented.txt', + content='text content', + absolute_path=normpath(os.path.join(repo_path, 'presented.txt')), + ) + ) + documents.append( + Document( + path='presented.py', + content='print("hello")', + absolute_path=normpath(os.path.join(repo_path, 'presented.py')), + ) + ) + + # Files in build directory (should be ignored) + documents.append( + Document( + path='build/output.js', + content='console.log("build");', + absolute_path=normpath(os.path.join(repo_path, 'build/output.js')), + ) + ) + documents.append( + Document( + path='build/bundle.css', + content='body { color: red; }', + absolute_path=normpath(os.path.join(repo_path, 'build/bundle.css')), + ) + ) + + # Files in src directory + documents.append( + Document( + path='src/main.py', + content='def main(): pass', + absolute_path=normpath(os.path.join(repo_path, 'src/main.py')), + ) + ) + documents.append( + Document( + path='src/debug.log', content='debug info', absolute_path=normpath(os.path.join(repo_path, 'src/debug.log')) + ) + ) + documents.append( + Document( + path='src/temp.pyc', content='compiled', absolute_path=normpath(os.path.join(repo_path, 'src/temp.pyc')) + ) + ) + + return documents + + +def test_get_cycodeignore_path() -> None: + """Test _get_cycodeignore_path helper function.""" + repo_path = normpath('/home/user/project') + expected = normpath('/home/user/project/.cycodeignore') + result = _get_cycodeignore_path(repo_path) + assert result == expected + + +def test_create_ignore_filter_manager(fs: 'FakeFilesystem') -> None: + """Test _create_ignore_filter_manager helper function.""" + _create_mocked_file_structure(fs) + + repo_path = normpath('/home/user/project') + cycodeignore_path = normpath('/home/user/project/.cycodeignore') + + manager = _create_ignore_filter_manager(repo_path, cycodeignore_path) + assert manager is not None + + # Test that it can walk the directory + walked_dirs = list(manager.walk_with_ignored()) + assert len(walked_dirs) > 0 + + +def test_get_document_check_path() -> None: + """Test _get_document_check_path helper function.""" + repo_path = normpath('/home/user/project') + + # Test document with absolute_path + doc_with_abs = Document( + path='src/main.py', content='code', absolute_path=normpath('/home/user/project/src/main.py') + ) + result = _get_document_check_path(doc_with_abs, repo_path) + assert result == normpath('/home/user/project/src/main.py') + + # Test document without absolute_path but with absolute path + doc_abs_path = Document(path=normpath('/home/user/project/src/main.py'), content='code') + result = _get_document_check_path(doc_abs_path, repo_path) + assert result == normpath('/home/user/project/src/main.py') + + # Test document with relative path + doc_rel_path = Document(path='src/main.py', content='code') + result = _get_document_check_path(doc_rel_path, repo_path) + assert result == normpath('/home/user/project/src/main.py') + + +def test_build_allowed_paths_set(fs: 'FakeFilesystem') -> None: + """Test _build_allowed_paths_set helper function.""" + _create_mocked_file_structure(fs) + + repo_path = normpath('/home/user/project') + cycodeignore_path = normpath('/home/user/project/.cycodeignore') + + manager = _create_ignore_filter_manager(repo_path, cycodeignore_path) + allowed_paths = _build_allowed_paths_set(manager, repo_path) + + # Check that allowed files are in the set + assert normpath('/home/user/project/presented.txt') in allowed_paths + assert normpath('/home/user/project/presented.py') in allowed_paths + assert normpath('/home/user/project/src/main.py') in allowed_paths + assert normpath('/home/user/project/.cycodeignore') in allowed_paths + + # Check that ignored files are NOT in the set + assert normpath('/home/user/project/ignored.pyc') not in allowed_paths + assert normpath('/home/user/project/ignored.log') not in allowed_paths + assert normpath('/home/user/project/src/debug.log') not in allowed_paths + assert normpath('/home/user/project/src/temp.pyc') not in allowed_paths + assert normpath('/home/user/project/build/output.js') not in allowed_paths + assert normpath('/home/user/project/build/bundle.css') not in allowed_paths + + +def test_filter_documents_by_allowed_paths() -> None: + """Test _filter_documents_by_allowed_paths helper function.""" + repo_path = normpath('/home/user/project') + + # Create test documents + documents = [ + Document(path='allowed.txt', content='content', absolute_path=normpath('/home/user/project/allowed.txt')), + Document(path='ignored.txt', content='content', absolute_path=normpath('/home/user/project/ignored.txt')), + ] + + # Create allowed paths set (only allow first document) + allowed_paths = {normpath('/home/user/project/allowed.txt')} + + result = _filter_documents_by_allowed_paths(documents, allowed_paths, repo_path) + + assert len(result) == 1 + assert result[0].path == 'allowed.txt' + + +def test_filter_documents_with_cycodeignore_no_ignore_file(fs: 'FakeFilesystem') -> None: + """Test filtering when no .cycodeignore file exists.""" + # Create structure without .cycodeignore + fs.create_dir('/home/user/project') + fs.create_file('/home/user/project/file1.py') + fs.create_file('/home/user/project/file2.log') + + repo_path = normpath('/home/user/project') + documents = [ + Document(path='file1.py', content='code'), + Document(path='file2.log', content='log'), + ] + + result = filter_documents_with_cycodeignore(documents, repo_path, is_cycodeignore_allowed=True) + + # Should return all documents since no .cycodeignore exists + assert len(result) == 2 + assert result == documents + + +def test_filter_documents_with_cycodeignore_basic_filtering(fs: 'FakeFilesystem') -> None: + """Test basic document filtering with .cycodeignore.""" + _create_mocked_file_structure(fs) + + repo_path = normpath('/home/user/project') + documents = _create_test_documents(repo_path) + + result = filter_documents_with_cycodeignore(documents, repo_path, is_cycodeignore_allowed=True) + + # Count expected results: should exclude *.pyc, *.log, and build/* files + expected_files = { + 'presented.txt', + 'presented.py', + 'src/main.py', + } + + result_files = {doc.path for doc in result} + assert result_files == expected_files + + # Verify specific exclusions + excluded_files = {doc.path for doc in documents if doc not in result} + assert 'ignored.pyc' in excluded_files + assert 'ignored.log' in excluded_files + assert 'src/debug.log' in excluded_files + assert 'src/temp.pyc' in excluded_files + assert 'build/output.js' in excluded_files + assert 'build/bundle.css' in excluded_files + + +def test_filter_documents_with_cycodeignore_relative_paths(fs: 'FakeFilesystem') -> None: + """Test filtering documents with relative paths (no absolute_path set).""" + _create_mocked_file_structure(fs) + + repo_path = normpath('/home/user/project') + + # Create documents without absolute_path + documents = [ + Document(path='presented.py', content='code'), + Document(path='ignored.pyc', content='compiled'), + Document(path='src/main.py', content='code'), + Document(path='src/debug.log', content='log'), + ] + + result = filter_documents_with_cycodeignore(documents, repo_path, is_cycodeignore_allowed=True) + + # Should filter out .pyc and .log files + expected_files = {'presented.py', 'src/main.py'} + result_files = {doc.path for doc in result} + assert result_files == expected_files + + +def test_filter_documents_with_cycodeignore_absolute_paths(fs: 'FakeFilesystem') -> None: + """Test filtering documents with absolute paths in path field.""" + _create_mocked_file_structure(fs) + + repo_path = normpath('/home/user/project') + + # Create documents with absolute paths in path field + documents = [ + Document(path=normpath('/home/user/project/presented.py'), content='code'), + Document(path=normpath('/home/user/project/ignored.pyc'), content='compiled'), + Document(path=normpath('/home/user/project/src/main.py'), content='code'), + Document(path=normpath('/home/user/project/src/debug.log'), content='log'), + ] + + result = filter_documents_with_cycodeignore(documents, repo_path, is_cycodeignore_allowed=True) + + # Should filter out .pyc and .log files + expected_files = {normpath('/home/user/project/presented.py'), normpath('/home/user/project/src/main.py')} + result_files = {doc.path for doc in result} + assert result_files == expected_files + + +def test_filter_documents_with_cycodeignore_empty_file(fs: 'FakeFilesystem') -> None: + """Test filtering with empty .cycodeignore file.""" + fs.create_dir('/home/user/project') + fs.create_file('/home/user/project/.cycodeignore', contents='') # empty file + fs.create_file('/home/user/project/file1.py') + fs.create_file('/home/user/project/file2.log') + + repo_path = normpath('/home/user/project') + documents = [ + Document(path='file1.py', content='code'), + Document(path='file2.log', content='log'), + ] + + result = filter_documents_with_cycodeignore(documents, repo_path, is_cycodeignore_allowed=True) + + # Should return all documents since .cycodeignore is empty + assert len(result) == 2 + + +def test_filter_documents_with_cycodeignore_comments_only(fs: 'FakeFilesystem') -> None: + """Test filtering with .cycodeignore file containing only comments and empty lines.""" + fs.create_dir('/home/user/project') + fs.create_file('/home/user/project/.cycodeignore', contents='# Just comments\n\n# More comments\n') + fs.create_file('/home/user/project/file1.py') + fs.create_file('/home/user/project/file2.log') + + repo_path = normpath('/home/user/project') + documents = [ + Document(path='file1.py', content='code'), + Document(path='file2.log', content='log'), + ] + + result = filter_documents_with_cycodeignore(documents, repo_path, is_cycodeignore_allowed=True) + + # Should return all documents since no real ignore patterns + assert len(result) == 2 + + +def test_filter_documents_with_cycodeignore_error_handling() -> None: + """Test error handling when document processing fails.""" + # Use non-existent repo path + repo_path = normpath('/non/existent/path') + + documents = [ + Document(path='file1.py', content='code'), + Document(path='file2.txt', content='content'), + ] + + # Should return all documents since .cycodeignore doesn't exist + result = filter_documents_with_cycodeignore(documents, repo_path, is_cycodeignore_allowed=True) + assert len(result) == 2 + + +def test_filter_documents_with_cycodeignore_complex_patterns(fs: 'FakeFilesystem') -> None: + """Test filtering with complex ignore patterns.""" + fs.create_dir('/home/user/project') + + # Create .cycodeignore with various pattern types + cycodeignore_content = """ +# Ignore specific files +config.json +secrets.key + +# Ignore file patterns +*.tmp +*.cache + +# Ignore directories +logs/ +temp/ + +# Ignore files in specific directories +tests/*.pyc +""" + fs.create_file('/home/user/project/.cycodeignore', contents=cycodeignore_content) + + # Create test files + test_files = [ + 'config.json', # ignored + 'secrets.key', # ignored + 'app.py', # allowed + 'file.tmp', # ignored + 'data.cache', # ignored + 'logs/app.log', # ignored (directory) + 'temp/file.txt', # ignored (directory) + 'tests/test.pyc', # ignored (pattern in directory) + 'tests/test.py', # allowed + 'src/main.py', # allowed + ] + + for file_path in test_files: + full_path = normpath(os.path.join('/home/user/project', file_path)) + os.makedirs(os.path.dirname(full_path), exist_ok=True) + fs.create_file(full_path) + + repo_path = normpath('/home/user/project') + documents = [Document(path=f, content='content') for f in test_files] + + result = filter_documents_with_cycodeignore(documents, repo_path, is_cycodeignore_allowed=True) + + # Should only allow: app.py, tests/test.py, src/main.py + expected_files = {'app.py', 'tests/test.py', 'src/main.py'} + result_files = {doc.path for doc in result} + assert result_files == expected_files + + +def test_filter_documents_with_cycodeignore_not_allowed(fs: 'FakeFilesystem') -> None: + """Test that filtering is skipped when is_cycodeignore_allowed is False.""" + _create_mocked_file_structure(fs) + + repo_path = normpath('/home/user/project') + documents = _create_test_documents(repo_path) + + # With filtering disabled, should return all documents + result = filter_documents_with_cycodeignore(documents, repo_path, is_cycodeignore_allowed=False) + + # Should return all documents without filtering + assert len(result) == len(documents) + assert {doc.path for doc in result} == {doc.path for doc in documents} + + # Verify that files that would normally be filtered are still present + result_files = {doc.path for doc in result} + assert 'ignored.pyc' in result_files + assert 'ignored.log' in result_files + assert 'build/output.js' in result_files + assert 'src/debug.log' in result_files