From cb09381f99773757f4b67d592bd90c22c208616b Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 8 Nov 2025 20:48:57 +0000 Subject: [PATCH 1/2] Fix HuggingFace cache path resolution on Windows - Expand tilde (~) in cache_dir path using os.path.expanduser() - Convert to absolute path using os.path.abspath() - Fixes WinError 3 when HuggingFace tries to resolve symlinks with relative paths - Applied fix to both code-detection and code-analysis modules Resolves issue where cache_dir defaulting to "~/.cache/huggingface/hub" was not properly expanded, causing symlink resolution failures on Windows. --- code-analysis/baselines/utils/preprocessing.py | 3 ++- code-detection/baselines/utils/preprocessing.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/code-analysis/baselines/utils/preprocessing.py b/code-analysis/baselines/utils/preprocessing.py index e2b6fc7..9f14d96 100644 --- a/code-analysis/baselines/utils/preprocessing.py +++ b/code-analysis/baselines/utils/preprocessing.py @@ -13,7 +13,8 @@ def preprocess_and_save(args): - cache_dir = args.cache_dir + # Expand ~ and convert to absolute path to avoid Windows symlink issues + cache_dir = os.path.abspath(os.path.expanduser(args.cache_dir)) os.environ["XDG_CACHE_HOME"] = cache_dir if not os.path.exists(cache_dir): os.makedirs(cache_dir) diff --git a/code-detection/baselines/utils/preprocessing.py b/code-detection/baselines/utils/preprocessing.py index e2b6fc7..9f14d96 100644 --- a/code-detection/baselines/utils/preprocessing.py +++ b/code-detection/baselines/utils/preprocessing.py @@ -13,7 +13,8 @@ def preprocess_and_save(args): - cache_dir = args.cache_dir + # Expand ~ and convert to absolute path to avoid Windows symlink issues + cache_dir = os.path.abspath(os.path.expanduser(args.cache_dir)) os.environ["XDG_CACHE_HOME"] = cache_dir if not os.path.exists(cache_dir): os.makedirs(cache_dir) From 39739e66084eab0144aeb3a12dc8dd63cd1936d0 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 8 Nov 2025 20:52:23 +0000 Subject: [PATCH 2/2] Add comprehensive dependencies to requirements.txt Added missing packages and version constraints: - huggingface-hub: Better cache handling (fixes Windows path issues) - sentencepiece: Required for many tokenizers - protobuf: Model serialization support - safetensors: Modern model weight format - prettytable: Used in analyze_naturalness.py Added minimum version constraints for all packages to ensure compatibility: - PyTorch >= 2.0.0 - Transformers >= 4.30.0 - Other packages with appropriate minimum versions This ensures the project can run on fresh installations without missing dependencies. --- requirements.txt | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0af96ed..9bc034f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,26 +1,35 @@ # Core deep learning and AI libraries -torch -transformers -accelerate -datasets -openai +torch>=2.0.0 +transformers>=4.30.0 +accelerate>=0.20.0 +datasets>=2.12.0 +openai>=0.27.0 + +# HuggingFace ecosystem +huggingface-hub>=0.15.0 +sentencepiece>=0.1.99 +protobuf>=3.20.0 +safetensors>=0.3.0 # Scientific computing -numpy -scipy -scikit-learn +numpy>=1.24.0 +scipy>=1.10.0 +scikit-learn>=1.2.0 # Data processing and analysis -pandas -matplotlib -seaborn +pandas>=2.0.0 +matplotlib>=3.7.0 +seaborn>=0.12.0 # Progress bars and logging -tqdm -loguru +tqdm>=4.65.0 +loguru>=0.7.0 # Code parsing -tree-sitter +tree-sitter>=0.20.0 # Web requests (for API calls) -requests +requests>=2.28.0 + +# Table formatting +prettytable>=3.7.0