diff --git a/code-analysis/baselines/utils/preprocessing.py b/code-analysis/baselines/utils/preprocessing.py index e2b6fc7..9f14d96 100644 --- a/code-analysis/baselines/utils/preprocessing.py +++ b/code-analysis/baselines/utils/preprocessing.py @@ -13,7 +13,8 @@ def preprocess_and_save(args): - cache_dir = args.cache_dir + # Expand ~ and convert to absolute path to avoid Windows symlink issues + cache_dir = os.path.abspath(os.path.expanduser(args.cache_dir)) os.environ["XDG_CACHE_HOME"] = cache_dir if not os.path.exists(cache_dir): os.makedirs(cache_dir) diff --git a/code-detection/baselines/utils/preprocessing.py b/code-detection/baselines/utils/preprocessing.py index e2b6fc7..9f14d96 100644 --- a/code-detection/baselines/utils/preprocessing.py +++ b/code-detection/baselines/utils/preprocessing.py @@ -13,7 +13,8 @@ def preprocess_and_save(args): - cache_dir = args.cache_dir + # Expand ~ and convert to absolute path to avoid Windows symlink issues + cache_dir = os.path.abspath(os.path.expanduser(args.cache_dir)) os.environ["XDG_CACHE_HOME"] = cache_dir if not os.path.exists(cache_dir): os.makedirs(cache_dir) diff --git a/requirements.txt b/requirements.txt index 0af96ed..9bc034f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,26 +1,35 @@ # Core deep learning and AI libraries -torch -transformers -accelerate -datasets -openai +torch>=2.0.0 +transformers>=4.30.0 +accelerate>=0.20.0 +datasets>=2.12.0 +openai>=0.27.0 + +# HuggingFace ecosystem +huggingface-hub>=0.15.0 +sentencepiece>=0.1.99 +protobuf>=3.20.0 +safetensors>=0.3.0 # Scientific computing -numpy -scipy -scikit-learn +numpy>=1.24.0 +scipy>=1.10.0 +scikit-learn>=1.2.0 # Data processing and analysis -pandas -matplotlib -seaborn +pandas>=2.0.0 +matplotlib>=3.7.0 +seaborn>=0.12.0 # Progress bars and logging -tqdm -loguru +tqdm>=4.65.0 +loguru>=0.7.0 # Code parsing -tree-sitter +tree-sitter>=0.20.0 # Web requests (for API calls) -requests +requests>=2.28.0 + +# Table formatting +prettytable>=3.7.0