Skip to content

Correct modin samples #768

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Mar 4, 2022
Merged
34 changes: 23 additions & 11 deletions AI-and-Analytics/End-to-end-Workloads/Census/census_modin.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Import basic python modules"
"Import basic python modules and disable warnings to avoid output cluttering"
]
},
{
Expand All @@ -66,7 +66,10 @@
"outputs": [],
"source": [
"import os\n",
"import numpy as np"
"import numpy as np\n",
"import warnings\n",
"\n",
"warnings.filterwarnings(\"ignore\")"
]
},
{
Expand All @@ -90,11 +93,20 @@
},
"outputs": [],
"source": [
"#import modin.pandas as pd\n",
"os.environ[\"MODIN_ENGINE\"] = \"native\"\n",
"os.environ[\"MODIN_BACKEND\"] = \"omnisci\"\n",
"os.environ[\"MODIN_EXPERIMENTAL\"] = \"True\"\n",
"import modin.pandas as pd"
"#import pandas as pd\n",
"import modin.pandas as pd\n",
"\n",
"import modin.config as cfg\n",
"from packaging import version\n",
"import modin\n",
"\n",
"cfg.IsExperimental.put(\"True\")\n",
"cfg.Engine.put('native')\n",
"# Since modin 0.12.0 OmniSci engine activation process slightly changed\n",
"if version.parse(modin.__version__) <= version.parse('0.11.3'):\n",
" cfg.Backend.put('omnisci')\n",
"else:\n",
" cfg.StorageFormat.put('omnisci')\n"
]
},
{
Expand Down Expand Up @@ -148,7 +160,7 @@
},
"outputs": [],
"source": [
"df = pd.read_csv('ipums_education2income_1970-2010.csv.gz', compression=\"gzip\", nrows=10000)"
"df = pd.read_csv('ipums_education2income_1970-2010.csv.gz')"
]
},
{
Expand Down Expand Up @@ -183,9 +195,9 @@
"df = df[keep_cols]\n",
"\n",
"# clean up samples with invalid income, education, etc.\n",
"df = df.query(\"INCTOT != 9999999\")\n",
"df = df.query(\"EDUC != -1\")\n",
"df = df.query(\"EDUCD != -1\")\n",
"df = df[df[\"INCTOT\"] != 9999999]\n",
"df = df[df[\"EDUC\"] != -1]\n",
"df = df[df[\"EDUCD\"] != -1]\n",
"\n",
"# normalize income for inflation\n",
"df[\"INCTOT\"] = df[\"INCTOT\"] * df[\"CPI99\"]\n",
Expand Down
23 changes: 13 additions & 10 deletions AI-and-Analytics/End-to-end-Workloads/Census/sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,19 @@
"targetDevice": ["CPU"],
"ciTests": {
"linux": [
{
"env": ["source activate base"],
"steps": [
"conda create -y -n intel-aikit-modin intel-aikit-modin -c intel",
"conda activate intel-aikit-modin",
"conda install -y runipy",
"pip install opencensus",
"runipy census_modin.ipynb"
]
}
{
"env": [],
"id": "Intel_Modin_E2E_py",
"steps": [
"set -e # Terminate the script on first error",
"source $(conda info --base)/etc/profile.d/conda.sh # Bypassing conda's disability to activate environments inside a bash script: https://github.com/conda/conda/issues/7980",
"conda create -y -n intel-aikit-modin intel-aikit-modin -c intel",
"conda activate intel-aikit-modin",
"conda install -y jupyter # Installing 'jupyter' for extended abilities to execute the notebook",
"pip install opencensus # Installing 'runipy' for extended abilities to execute the notebook",
"jupyter nbconvert --to notebook --execute census_modin.ipynb"
]
}
]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,17 @@ source activate intel-aikit-modin

### Activate conda environment Without Root Access (Optional)

By default, the Intel® oneAPI AI Analytics toolkit is installed in the `oneapi` folder, which requires root privileges to manage it. If you would like to bypass using root access to manage your conda environment, then you can clone your desired conda environment using the following command:
By default, the Intel® oneAPI AI Analytics toolkit is installed in the `oneapi` folder, which requires root privileges to manage it. If you would like to bypass using root access to manage your conda environment, then you can install the Intel® Distribution of Modin* python environment with the following command:

#### Linux

```
conda create --name user-intel-aikit-modin --clone intel-aikit-modin
conda create -y -n modin-conda-forge -c conda-forge modin-all
conda install -y -n modin-conda-forge -c conda-forge matplotlib
```

Then activate your conda environment with the following command:

```
source activate user-intel-aikit-modin
conda activate modin-conda-forge
```


Expand All @@ -87,7 +87,7 @@ source activate user-intel-aikit-modin
Launch Jupyter Notebook in the directory housing the code example:

```
conda install jupyter nb_conda_kernels
conda install jupyter nb_conda_kernels -c conda-forge -y
```

#### View in Jupyter Notebook
Expand Down