Skip to content

Commit 1f68504

Browse files
authored
Merge pull request #408 from JohT/feature/support-venv
Support Python's build-in module venv for virtual environments additionally to Conda
2 parents 5e67563 + 9f90997 commit 1f68504

17 files changed

+311
-53
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
name: Check Python venv virtual environment
2+
3+
on:
4+
pull_request:
5+
branches:
6+
- main
7+
# Only watch changes related to Python virtual environment venv
8+
paths:
9+
- 'requirements.txt'
10+
- 'scripts/activatePythonEnvironment.sh'
11+
- '.github/workflows/internal-check-python-venv-support.yml' # or when this file changed
12+
13+
jobs:
14+
check-python-venv-environment:
15+
runs-on: ubuntu-22.04
16+
strategy:
17+
matrix:
18+
include:
19+
- python: 3.12
20+
21+
steps:
22+
- name: Checkout GIT Repository
23+
uses: actions/checkout@v4
24+
25+
- name: (Python Setup) Use version ${{ matrix.python }} with venv environment management module
26+
uses: actions/setup-python@v5
27+
with:
28+
python-version: ${{ matrix.python }}
29+
cache: 'pip'
30+
31+
- name: Activate virtual environment using venv and check if the required packages were installed
32+
env:
33+
USE_VIRTUAL_PYTHON_ENVIRONMENT_VENV: "true"
34+
# 1. Run the script under test to create, activate and install the virtual environment
35+
# 2a. Run pip in dry-run mode without installing or resolving dependencies
36+
# 2b. Suppress all pip output (stderr)
37+
# 2c. Check if pip *would install* anything using grep
38+
# 2d. If there are missing dependencies and the environment is incomplete, return 1 (indicates all requirements already satisfied)
39+
run: |
40+
./scripts/activatePythonEnvironment.sh
41+
pip install --dry-run --no-deps --requirement "./requirements.txt" 2>/dev/null | grep -q "Would install" || return 1

.github/workflows/public-analyze-code-graph.yml

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ on:
6161
required: false
6262
type: string
6363
default: 'true'
64+
use-venv_virtual_python_environment:
65+
description: >
66+
Use venv for virtual Python environments instead of Conda ("true") or not ("false", default).
67+
required: false
68+
type: string
69+
default: 'false'
6470
outputs:
6571
uploaded-analysis-results:
6672
description: >
@@ -103,16 +109,26 @@ jobs:
103109

104110
# "Setup Python" can be skipped if jupyter notebook analysis-results aren't needed
105111
- name: (Python Setup) Use version ${{ matrix.python }} with Conda package manager Miniforge
112+
if: inputs.use-venv_virtual_python_environment == 'false'
106113
id: prepare-conda-environment
107114
uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3
108115
with:
109116
python-version: ${{ matrix.python }}
110117
miniforge-version: ${{ matrix.miniforge }}
111118
activate-environment: codegraph
112-
environment-file: ./jupyter/environment.yml
119+
environment-file: ./conda-environment.yml
113120
auto-activate-base: false
114121
show-channel-urls: true
122+
123+
- name: (Python Setup) Use version ${{ matrix.python }} with venv environment management module
124+
if: inputs.use-venv_virtual_python_environment == 'true'
125+
uses: actions/setup-python@v5
126+
with:
127+
python-version: ${{ matrix.python }}
128+
cache: 'pip'
129+
115130
- name: (Python Setup) Conda environment info
131+
if: inputs.use-venv_virtual_python_environment == 'false'
116132
shell: bash -el {0}
117133
run: |
118134
conda info
@@ -168,6 +184,7 @@ jobs:
168184
ENABLE_JUPYTER_NOTEBOOK_PDF_GENERATION: ${{ inputs.jupyter-pdf }}
169185
IMPORT_GIT_LOG_DATA_IF_SOURCE_IS_PRESENT: "" # Options: "none", "aggregated", "full". default = "plugin" or ""
170186
PREPARE_CONDA_ENVIRONMENT: "false" # Had already been done in step with id "prepare-conda-environment".
187+
USE_VIRTUAL_PYTHON_ENVIRONMENT_VENV: ${{ inputs.use-venv_virtual_python_environment }}
171188
run: |
172189
TYPESCRIPT_SCAN_HEAP_MEMORY=${{ inputs.typescript-scan-heap-memory }} ./../../scripts/analysis/analyze.sh ${{ inputs.analysis-arguments }}
173190

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ __pycache__/
9898

9999
# Python environments
100100
.conda
101+
.venv/
102+
*.pyc
101103

102104
# Optuna (and other) Database data
103105
*.db

COMMANDS.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -389,23 +389,23 @@ Here is an example on how to use [executeJupyterNotebook.sh](./scripts/executeJu
389389
conda activate codegraph
390390
```
391391

392-
or by using the environment file [codegraph-environment.yml](./jupyter/environment.yml):
392+
or by using the codegraph environment file [conda-environment.yml](./conda-environment.yml):
393393

394394
```shell
395-
conda env create --file ./jupyter/environment.yml
395+
conda env create --file ./conda-environment.yml
396396
conda activate codegraph
397397
```
398398

399-
- Export full environment.yml
399+
- Export full conda-environment.yml
400400

401401
```shell
402-
conda env export --name codegraph > full-codegraph-environment.yml
402+
conda env export --name codegraph > full-codegraph-conda-environment.yml
403403
```
404404

405-
- Export only explicit environment.yml
405+
- Export only explicit conda-environment.yml
406406

407407
```shell
408-
conda env export --from-history --name codegraph | grep -v "^prefix: " > explicit-codegraph-environment.yml
408+
conda env export --from-history --name codegraph | grep -v "^prefix: " > explicit-codegraph-conda-environment.yml
409409
```
410410

411411
### Executing Jupyter Notebooks with nbconvert

GETTING_STARTED.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,13 @@ Use these optional command line options as needed:
8484
./../../scripts/analysis/analyze.sh --report Csv
8585
```
8686
87-
- Jupyter notebook reports when Python and Conda are installed (and Chromium Browser for PDF generation):
87+
- Jupyter notebook reports when Python and Conda (or venv) are installed (and Chromium Browser for PDF generation):
8888
8989
```shell
9090
./../../scripts/analysis/analyze.sh --report Jupyter
9191
```
9292
93-
- Python reports when Python and Conda are installed (without Chromium Browser for PDF generation):
93+
- Python reports when Python and Conda (or venv) are installed (without Chromium Browser for PDF generation):
9494
9595
```shell
9696
./../../scripts/analysis/analyze.sh --report Python
@@ -102,7 +102,7 @@ Use these optional command line options as needed:
102102
./../../scripts/analysis/analyze.sh --report Visualization
103103
```
104104
105-
- All reports with Python, Conda, Node.js and npm installed:
105+
- All reports with Python, Conda (or venv), Node.js and npm installed:
106106
107107
```shell
108108
./../../scripts/analysis/analyze.sh

README.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,10 @@ Here are some fully automated graph visualizations utilizing [GraphViz](https://
8686

8787
### Additional Prerequisites for Python and Jupyter Notebooks
8888

89-
- Python is required for Jupyter Notebook reports.
90-
- A conda package manager like [Miniconda](https://docs.conda.io/projects/miniconda/en/latest) or [Anaconda](https://www.anaconda.com/download)(Recommended for Windows) is required for Jupyter Notebook reports.
89+
- Python is required for Jupyter Notebook and Python reports.
90+
- Either [Conda](https://docs.conda.io) or Python's build-in module [venv](https://docs.python.org/3/library/venv.html) a required as environment manager.
91+
- For Conda, use for example [Miniconda](https://docs.conda.io/projects/miniconda/en/latest) or [Anaconda](https://www.anaconda.com/download)(Recommended for Windows).
92+
- To use venv, no additional installation is needed. For that the environment variable `USE_VIRTUAL_PYTHON_ENVIRONMENT_VENV` needs to be set to `'true'`.
9193
- Chromium will automatically be downloaded if needed for Jupyter Notebook PDF reports generation.
9294

9395
### Additional Prerequisites for Graph Visualization
@@ -131,13 +133,14 @@ The [Code Structure Analysis Pipeline](./.github/workflows/internal-java-code-an
131133
- [Checkout GIT Repository](https://github.com/actions/checkout)
132134
- [Setup Java](https://github.com/actions/setup-java)
133135
- [Setup Python with Conda](https://github.com/conda-incubator/setup-miniconda) package manager [Mambaforge](https://github.com/conda-forge/miniforge#mambaforge)
136+
- [Setup Python with venv](https://docs.python.org/3/library/venv.html)
134137
- Download artifacts and optionally source code that contain the code to be analyzed [scripts/downloader](./scripts/downloader)
135138
- Setup [Neo4j](https://neo4j.com) Graph Database ([analysis.sh](./scripts/analysis/analyze.sh))
136139
- Setup [jQAssistant](https://jqassistant.github.io/jqassistant/current) for Java and [Typescript](https://github.com/jqassistant-plugin/jqassistant-typescript-plugin) analysis ([analysis.sh](./scripts/analysis/analyze.sh))
137140
- Start [Neo4j](https://neo4j.com) Graph Database ([analysis.sh](./scripts/analysis/analyze.sh))
138141
- Generate CSV Reports [scripts/reports](./scripts/reports) using the command line JSON parser [jq](https://jqlang.github.io/jq)
139142
- Uses [Neo4j Graph Data Science](https://neo4j.com/product/graph-data-science) for community detection, centrality, similarity, node embeddings and topological sort ([analysis.sh](./scripts/analysis/analyze.sh))
140-
- Generate [Jupyter Notebook](https://jupyter.org) reports using these libraries specified in the [environment.yml](./jupyter/environment.yml):
143+
- Generate [Jupyter Notebook](https://jupyter.org) reports using these libraries specified in the [conda-environment.yml](./conda-environment.yml):
141144
- [Python](https://www.python.org)
142145
- [jupyter](https://jupyter.org)
143146
- [matplotlib](https://matplotlib.org)
File renamed without changes.

domains/anomaly-detection/anomalyDetectionPython.sh

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,24 @@ source "${SCRIPTS_DIR}/executeQueryFunctions.sh"
6060
# Define functions to create and delete Graph Projections like "createUndirectedDependencyProjection"
6161
source "${SCRIPTS_DIR}/projectionFunctions.sh"
6262

63+
# Define functions (like is_csv_column_greater_zero) to parse CSV format strings from Cypher query results.
64+
source "${SCRIPTS_DIR}/parseCsvFunctions.sh"
65+
66+
is_sufficient_data_available() {
67+
language=$( extractQueryParameter "projection_language" "${@}" )
68+
nodeLabel=$( extractQueryParameter "projection_node_label" "${@}" )
69+
70+
query_result=$( execute_cypher "${ANOMALY_DETECTION_QUERY_CYPHER_DIR}/AnomalyDetectionNodeCount.cypher" "${@}" )
71+
node_count=$(get_csv_column_value "${query_result}" "node_count")
72+
if [ "${node_count}" -lt 15 ]; then
73+
echo "anomalyDetectionPipeline: Warning: Skipping anomaly detection. Only ${node_count} ${language} ${nodeLabel} nodes. At least 15 required."
74+
false
75+
else
76+
echo "anomalyDetectionPipeline: Info: Running anomaly detection with ${node_count} ${language} ${nodeLabel} nodes."
77+
true
78+
fi
79+
}
80+
6381
# Query or recalculate features.
6482
#
6583
# Required Parameters:
@@ -158,30 +176,38 @@ EMBEDDING_PROPERTY="embedding_property=embeddingsFastRandomProjectionTunedForClu
158176

159177
# -- Java Artifact Node Embeddings -------------------------------
160178

161-
if createUndirectedDependencyProjection "${PROJECTION_NAME}=artifact-anomaly-detection" "${PROJECTION_NODE}=Artifact" "${PROJECTION_WEIGHT}=weight" "${PROJECTION_LANGUAGE}=Java"; then
162-
createDirectedDependencyProjection "${PROJECTION_NAME}=artifact-anomaly-detection-directed" "${PROJECTION_NODE}=Artifact" "${PROJECTION_WEIGHT}=weight" "${PROJECTION_LANGUAGE}=Java"
163-
anomaly_detection_python_reports "${ALGORITHM_PROJECTION}=artifact-anomaly-detection" "${ALGORITHM_NODE}=Artifact" "${ALGORITHM_WEIGHT}=weight" "${ALGORITHM_LANGUAGE}=Java" "${COMMUNITY_PROPERTY}" "${EMBEDDING_PROPERTY}"
179+
if is_sufficient_data_available "${ALGORITHM_NODE}=Artifact" "${ALGORITHM_WEIGHT}=weight"; then
180+
if createUndirectedDependencyProjection "${PROJECTION_NAME}=artifact-anomaly-detection" "${PROJECTION_NODE}=Artifact" "${PROJECTION_WEIGHT}=weight" "${PROJECTION_LANGUAGE}=Java"; then
181+
createDirectedDependencyProjection "${PROJECTION_NAME}=artifact-anomaly-detection-directed" "${PROJECTION_NODE}=Artifact" "${PROJECTION_WEIGHT}=weight" "${PROJECTION_LANGUAGE}=Java"
182+
anomaly_detection_python_reports "${ALGORITHM_PROJECTION}=artifact-anomaly-detection" "${ALGORITHM_NODE}=Artifact" "${ALGORITHM_WEIGHT}=weight" "${ALGORITHM_LANGUAGE}=Java" "${COMMUNITY_PROPERTY}" "${EMBEDDING_PROPERTY}"
183+
fi
164184
fi
165185

166186
# -- Java Package Node Embeddings --------------------------------
167187

168-
if createUndirectedDependencyProjection "${PROJECTION_NAME}=package-anomaly-detection" "${PROJECTION_NODE}=Package" "${PROJECTION_WEIGHT}=weight25PercentInterfaces" "${PROJECTION_LANGUAGE}=Java"; then
169-
createDirectedDependencyProjection "${PROJECTION_NAME}=package-anomaly-detection-directed" "${PROJECTION_NODE}=Package" "${PROJECTION_WEIGHT}=weight25PercentInterfaces" "${PROJECTION_LANGUAGE}=Java"
170-
anomaly_detection_python_reports "${ALGORITHM_PROJECTION}=package-anomaly-detection" "${ALGORITHM_NODE}=Package" "${ALGORITHM_WEIGHT}=weight25PercentInterfaces" "${ALGORITHM_LANGUAGE}=Java" "${COMMUNITY_PROPERTY}" "${EMBEDDING_PROPERTY}"
188+
if is_sufficient_data_available "${ALGORITHM_NODE}=Package" "${ALGORITHM_WEIGHT}=weight25PercentInterfaces"; then
189+
if createUndirectedDependencyProjection "${PROJECTION_NAME}=package-anomaly-detection" "${PROJECTION_NODE}=Package" "${PROJECTION_WEIGHT}=weight25PercentInterfaces" "${PROJECTION_LANGUAGE}=Java"; then
190+
createDirectedDependencyProjection "${PROJECTION_NAME}=package-anomaly-detection-directed" "${PROJECTION_NODE}=Package" "${PROJECTION_WEIGHT}=weight25PercentInterfaces" "${PROJECTION_LANGUAGE}=Java"
191+
anomaly_detection_python_reports "${ALGORITHM_PROJECTION}=package-anomaly-detection" "${ALGORITHM_NODE}=Package" "${ALGORITHM_WEIGHT}=weight25PercentInterfaces" "${ALGORITHM_LANGUAGE}=Java" "${COMMUNITY_PROPERTY}" "${EMBEDDING_PROPERTY}"
192+
fi
171193
fi
172194

173195
# -- Java Type Node Embeddings -----------------------------------
174196

175-
if createUndirectedJavaTypeDependencyProjection "${PROJECTION_NAME}=type-anomaly-detection"; then
176-
createDirectedJavaTypeDependencyProjection "${PROJECTION_NAME}=type-anomaly-detection-directed"
177-
anomaly_detection_python_reports "${ALGORITHM_PROJECTION}=type-anomaly-detection" "${ALGORITHM_NODE}=Type" "${ALGORITHM_WEIGHT}=weight" "${ALGORITHM_LANGUAGE}=Java" "${COMMUNITY_PROPERTY}" "${EMBEDDING_PROPERTY}"
197+
if is_sufficient_data_available "${ALGORITHM_NODE}=Type" "${ALGORITHM_WEIGHT}=weight"; then
198+
if createUndirectedJavaTypeDependencyProjection "${PROJECTION_NAME}=type-anomaly-detection"; then
199+
createDirectedJavaTypeDependencyProjection "${PROJECTION_NAME}=type-anomaly-detection-directed"
200+
anomaly_detection_python_reports "${ALGORITHM_PROJECTION}=type-anomaly-detection" "${ALGORITHM_NODE}=Type" "${ALGORITHM_WEIGHT}=weight" "${ALGORITHM_LANGUAGE}=Java" "${COMMUNITY_PROPERTY}" "${EMBEDDING_PROPERTY}"
201+
fi
178202
fi
179203

180204
# -- Typescript Module Node Embeddings ---------------------------
181205

182-
if createUndirectedDependencyProjection "${PROJECTION_NAME}=typescript-module-embedding" "${PROJECTION_NODE}=Module" "${PROJECTION_WEIGHT}=lowCouplingElement25PercentWeight" "${PROJECTION_LANGUAGE}=Typescript"; then
183-
createDirectedDependencyProjection "${PROJECTION_NAME}=typescript-module-embedding-directed" "${PROJECTION_NODE}=Module" "${PROJECTION_WEIGHT}=lowCouplingElement25PercentWeight" "${PROJECTION_LANGUAGE}=Typescript"
184-
anomaly_detection_python_reports "${ALGORITHM_PROJECTION}=typescript-module-embedding" "${ALGORITHM_NODE}=Module" "${ALGORITHM_WEIGHT}=lowCouplingElement25PercentWeight" "${ALGORITHM_LANGUAGE}=Typescript" "${COMMUNITY_PROPERTY}" "${EMBEDDING_PROPERTY}"
206+
if is_sufficient_data_available "${ALGORITHM_NODE}=Module" "${ALGORITHM_WEIGHT}=lowCouplingElement25PercentWeight"; then
207+
if createUndirectedDependencyProjection "${PROJECTION_NAME}=typescript-module-embedding" "${PROJECTION_NODE}=Module" "${PROJECTION_WEIGHT}=lowCouplingElement25PercentWeight" "${PROJECTION_LANGUAGE}=Typescript"; then
208+
createDirectedDependencyProjection "${PROJECTION_NAME}=typescript-module-embedding-directed" "${PROJECTION_NODE}=Module" "${PROJECTION_WEIGHT}=lowCouplingElement25PercentWeight" "${PROJECTION_LANGUAGE}=Typescript"
209+
anomaly_detection_python_reports "${ALGORITHM_PROJECTION}=typescript-module-embedding" "${ALGORITHM_NODE}=Module" "${ALGORITHM_WEIGHT}=lowCouplingElement25PercentWeight" "${ALGORITHM_LANGUAGE}=Typescript" "${COMMUNITY_PROPERTY}" "${EMBEDDING_PROPERTY}"
210+
fi
185211
fi
186212

187213
# ---------------------------------------------------------------
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// Count the number of nodes with dependencies. Variables: dependencies_projection_node, dependencies_projection_weight_property
2+
3+
MATCH (source)-[dependency:DEPENDS_ON]->(target)
4+
WHERE $projection_node_label IN labels(source)
5+
AND $projection_node_label IN labels(target)
6+
AND $projection_weight_property IN keys(dependency)
7+
WITH collect(DISTINCT source.name) AS sources
8+
,collect(DISTINCT target.name) AS targets
9+
UNWIND sources + targets AS source_or_target
10+
RETURN count(DISTINCT source_or_target) AS node_count

domains/anomaly-detection/tunedAnomalyDetectionExplained.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -384,10 +384,11 @@ def no_anomalies(cls):
384384

385385
def tune_anomaly_detection_models(
386386
feature_matrix: np.ndarray,
387+
parameters: Parameters,
387388
contamination: float | typing.Literal["auto"] = 0.05,
388389
random_seed: int = 42,
389390
number_of_trials: int = 25,
390-
optimization_timeout_in_seconds: int = 60
391+
optimization_timeout_in_seconds: int = 50
391392
) -> AnomalyDetectionResults:
392393
"""
393394
Tunes both Isolation Forest and a proxy Random Forest using Optuna, maximizing the F1 score
@@ -464,7 +465,7 @@ def objective(trial) -> float:
464465

465466
# Print the number of samples and features in the feature matrix
466467
n_samples = feature_matrix.shape[0]
467-
print(f"tunedAnomalyDetectionExplained: Tuned Anomaly Detection: Number of samples: {n_samples}, Number of features: {feature_matrix.shape[1]}, Number of trials: {number_of_trials}")
468+
print(f"tunedAnomalyDetectionExplained: Tuning Anomaly Detection: Number of samples: {n_samples}, Number of features: {feature_matrix.shape[1]}, Number of trials: {number_of_trials}")
468469

469470
# Run Optuna optimization
470471
study = create_study(direction="maximize", sampler=TPESampler(seed=random_seed), study_name="AnomalyDetection_Tuning")
@@ -480,7 +481,12 @@ def objective(trial) -> float:
480481
study.enqueue_trial({'isolation_max_samples': 0.10015063610944819, 'isolation_n_estimators': 329, 'proxy_n_estimators': 314, 'proxy_max_depth': 8})
481482

482483
study.optimize(objective, n_trials=number_of_trials, timeout=optimization_timeout_in_seconds)
483-
output_optuna_tuning_results(study, study.study_name)
484+
485+
# Output tuning results
486+
print(f"Best Isolation & Random Forest parameters for {parameters.get_plot_prefix()} after {len(study.trials)}/{number_of_trials} trials with best #{study.best_trial.number} (Optuna):", study.best_params)
487+
488+
if parameters.is_verbose():
489+
output_optuna_tuning_results(study, study.study_name)
484490

485491
if np.isclose(study.best_value, 0.0, rtol=1e-09, atol=1e-09):
486492
red = "\x1b[31;20m"
@@ -869,7 +875,7 @@ def add_top_shap_features_to_anomalies(
869875
features_prepared = np.hstack([features_standardized, node_embeddings_reduced])
870876
feature_names = list(features_to_standardize) + [f'nodeEmbeddingPCA_{i}' for i in range(node_embeddings_reduced.shape[1])]
871877

872-
anomaly_detection_results = tune_anomaly_detection_models(features_prepared)
878+
anomaly_detection_results = tune_anomaly_detection_models(features_prepared, parameters)
873879
if anomaly_detection_results.is_empty():
874880
sys.exit(0)
875881

0 commit comments

Comments
 (0)