diff --git a/sagemaker-pipelines/step-decorator/bedrock-examples/config.yaml b/sagemaker-pipelines/step-decorator/bedrock-examples/config.yaml new file mode 100644 index 0000000000..a13d031716 --- /dev/null +++ b/sagemaker-pipelines/step-decorator/bedrock-examples/config.yaml @@ -0,0 +1,18 @@ +SchemaVersion: '1.0' +SageMaker: + PythonSDK: + Modules: + RemoteFunction: + # role arn is not required if in SageMaker Notebook instance or SageMaker Studio + # Uncomment the following line and replace with the right execution role if in a local IDE + # RoleArn: + InstanceType: ml.c5.2xlarge + Dependencies: ./requirements.txt + IncludeLocalWorkDir: true + CustomFileFilter: + IgnoreNamePatterns: # files or directories to ignore + - "*.ipynb" # all notebook files + + Pipeline: + RoleArn: 'arn:aws:iam::095351214964:role/service-role/AmazonSageMaker-ExecutionRole-20200130T133110' + diff --git a/sagemaker-pipelines/step-decorator/bedrock-examples/fine_tune_bedrock_step_decorator.ipynb b/sagemaker-pipelines/step-decorator/bedrock-examples/fine_tune_bedrock_step_decorator.ipynb new file mode 100644 index 0000000000..6e225e9cd5 --- /dev/null +++ b/sagemaker-pipelines/step-decorator/bedrock-examples/fine_tune_bedrock_step_decorator.ipynb @@ -0,0 +1,1892 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Using SageMaker @step decorator feature convert python functions for creating a custom Bedrock model into a SageMaker pipeline.\n", + "\n", + "---\n", + "\n", + "This notebook's CI test result for us-west-2 is as follows. CI test results in other regions can be found at the end of the notebook.\n", + "\n", + "![This us-west-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-west-2/sagemaker-pipelines|step-decorator|bedrock-examples|fine_tune_bedrock_step_decorator.ipynb)\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "> *This notebook has been tested with the **`Python 3`** kernel in SageMaker Studio (JupyterLab version).*\n", + "\n", + "We will fine tune the [Amazon Titan Text Lite](#https://docs.aws.amazon.com/bedrock/latest/userguide/titan-text-models.html) model provided by Amazon Bedrock for a summarization use case. It uses a dataset from CNN that includes news articles and their summaries. The dataset called [cnn_dailymail v3.0](https://huggingface.co/datasets/cnn_dailymail) is available from Hugging Face. \n", + "\n", + "A *config.yaml* file can be found in the same folder as this notebook. This file includes properties that are passed to the @step decorator.\n", + "\n", + "
\n", + "Warning: The last section in this notebook does the clean up by removing the resources created during fine tuning and testing. That includes the Bedrock provisioned throughput which is needed to access the fine tuned custom model. Note that you will continue to incur AWS charges, unless you run the cleanup step.\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: botocore>=1.31.57 in /opt/conda/lib/python3.10/site-packages (from -r requirements.txt (line 1)) (1.34.84)\n", + "Requirement already satisfied: boto3>=1.28.57 in /opt/conda/lib/python3.10/site-packages (from -r requirements.txt (line 2)) (1.34.84)\n", + "Collecting sagemaker<3,>=v2.211.0 (from -r requirements.txt (line 3))\n", + " Downloading sagemaker-2.215.0-py3-none-any.whl.metadata (14 kB)\n", + "Requirement already satisfied: typing_extensions in /opt/conda/lib/python3.10/site-packages (from -r requirements.txt (line 4)) (4.5.0)\n", + "Requirement already satisfied: pypdf in /opt/conda/lib/python3.10/site-packages (from -r requirements.txt (line 5)) (4.2.0)\n", + "Requirement already satisfied: ipywidgets==7.7.2 in /opt/conda/lib/python3.10/site-packages (from -r requirements.txt (line 7)) (7.7.2)\n", + "Requirement already satisfied: jsonlines in /opt/conda/lib/python3.10/site-packages (from -r requirements.txt (line 8)) (4.0.0)\n", + "Requirement already satisfied: datasets==2.15.0 in /opt/conda/lib/python3.10/site-packages (from -r requirements.txt (line 9)) (2.15.0)\n", + "Requirement already satisfied: pandas==2.1.3 in /opt/conda/lib/python3.10/site-packages (from -r requirements.txt (line 10)) (2.1.3)\n", + "Requirement already satisfied: ipykernel>=4.5.1 in /opt/conda/lib/python3.10/site-packages (from ipywidgets==7.7.2->-r requirements.txt (line 7)) (6.29.3)\n", + "Requirement already satisfied: ipython-genutils~=0.2.0 in /opt/conda/lib/python3.10/site-packages (from ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.2.0)\n", + "Requirement already satisfied: traitlets>=4.3.1 in /opt/conda/lib/python3.10/site-packages (from ipywidgets==7.7.2->-r requirements.txt (line 7)) (5.14.1)\n", + "Requirement already satisfied: widgetsnbextension~=3.6.0 in /opt/conda/lib/python3.10/site-packages (from ipywidgets==7.7.2->-r requirements.txt (line 7)) (3.6.6)\n", + "Requirement already satisfied: ipython>=4.0.0 in /opt/conda/lib/python3.10/site-packages (from ipywidgets==7.7.2->-r requirements.txt (line 7)) (8.21.0)\n", + "Requirement already satisfied: jupyterlab-widgets<3,>=1.0.0 in /opt/conda/lib/python3.10/site-packages (from ipywidgets==7.7.2->-r requirements.txt (line 7)) (1.1.7)\n", + "Requirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from datasets==2.15.0->-r requirements.txt (line 9)) (1.26.4)\n", + "Requirement already satisfied: pyarrow>=8.0.0 in /opt/conda/lib/python3.10/site-packages (from datasets==2.15.0->-r requirements.txt (line 9)) (12.0.1)\n", + "Requirement already satisfied: pyarrow-hotfix in /opt/conda/lib/python3.10/site-packages (from datasets==2.15.0->-r requirements.txt (line 9)) (0.6)\n", + "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from datasets==2.15.0->-r requirements.txt (line 9)) (0.3.7)\n", + "Requirement already satisfied: requests>=2.19.0 in /opt/conda/lib/python3.10/site-packages (from datasets==2.15.0->-r requirements.txt (line 9)) (2.31.0)\n", + "Requirement already satisfied: tqdm>=4.62.1 in /opt/conda/lib/python3.10/site-packages (from datasets==2.15.0->-r requirements.txt (line 9)) (4.66.2)\n", + "Requirement already satisfied: xxhash in /opt/conda/lib/python3.10/site-packages (from datasets==2.15.0->-r requirements.txt (line 9)) (3.4.1)\n", + "Requirement already satisfied: multiprocess in /opt/conda/lib/python3.10/site-packages (from datasets==2.15.0->-r requirements.txt (line 9)) (0.70.15)\n", + "Requirement already satisfied: fsspec<=2023.10.0,>=2023.1.0 in /opt/conda/lib/python3.10/site-packages (from fsspec[http]<=2023.10.0,>=2023.1.0->datasets==2.15.0->-r requirements.txt (line 9)) (2023.6.0)\n", + "Requirement already satisfied: aiohttp in /opt/conda/lib/python3.10/site-packages (from datasets==2.15.0->-r requirements.txt (line 9)) (3.9.3)\n", + "Requirement already satisfied: huggingface-hub>=0.18.0 in /opt/conda/lib/python3.10/site-packages (from datasets==2.15.0->-r requirements.txt (line 9)) (0.21.1)\n", + "Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from datasets==2.15.0->-r requirements.txt (line 9)) (23.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from datasets==2.15.0->-r requirements.txt (line 9)) (6.0.1)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas==2.1.3->-r requirements.txt (line 10)) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas==2.1.3->-r requirements.txt (line 10)) (2023.3)\n", + "Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas==2.1.3->-r requirements.txt (line 10)) (2024.1)\n", + "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /opt/conda/lib/python3.10/site-packages (from botocore>=1.31.57->-r requirements.txt (line 1)) (1.0.1)\n", + "Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /opt/conda/lib/python3.10/site-packages (from botocore>=1.31.57->-r requirements.txt (line 1)) (1.26.18)\n", + "Requirement already satisfied: s3transfer<0.11.0,>=0.10.0 in /opt/conda/lib/python3.10/site-packages (from boto3>=1.28.57->-r requirements.txt (line 2)) (0.10.1)\n", + "Requirement already satisfied: attrs<24,>=23.1.0 in /opt/conda/lib/python3.10/site-packages (from sagemaker<3,>=v2.211.0->-r requirements.txt (line 3)) (23.2.0)\n", + "Requirement already satisfied: cloudpickle==2.2.1 in /opt/conda/lib/python3.10/site-packages (from sagemaker<3,>=v2.211.0->-r requirements.txt (line 3)) (2.2.1)\n", + "Requirement already satisfied: google-pasta in /opt/conda/lib/python3.10/site-packages (from sagemaker<3,>=v2.211.0->-r requirements.txt (line 3)) (0.2.0)\n", + "Requirement already satisfied: protobuf<5.0,>=3.12 in /opt/conda/lib/python3.10/site-packages (from sagemaker<3,>=v2.211.0->-r requirements.txt (line 3)) (4.21.12)\n", + "Requirement already satisfied: smdebug-rulesconfig==1.0.1 in /opt/conda/lib/python3.10/site-packages (from sagemaker<3,>=v2.211.0->-r requirements.txt (line 3)) (1.0.1)\n", + "Requirement already satisfied: importlib-metadata<7.0,>=1.4.0 in /opt/conda/lib/python3.10/site-packages (from sagemaker<3,>=v2.211.0->-r requirements.txt (line 3)) (6.10.0)\n", + "Requirement already satisfied: pathos in /opt/conda/lib/python3.10/site-packages (from sagemaker<3,>=v2.211.0->-r requirements.txt (line 3)) (0.3.1)\n", + "Requirement already satisfied: schema in /opt/conda/lib/python3.10/site-packages (from sagemaker<3,>=v2.211.0->-r requirements.txt (line 3)) (0.7.5)\n", + "Requirement already satisfied: jsonschema in /opt/conda/lib/python3.10/site-packages (from sagemaker<3,>=v2.211.0->-r requirements.txt (line 3)) (4.17.3)\n", + "Requirement already satisfied: platformdirs in /opt/conda/lib/python3.10/site-packages (from sagemaker<3,>=v2.211.0->-r requirements.txt (line 3)) (4.2.0)\n", + "Requirement already satisfied: tblib<4,>=1.7.0 in /opt/conda/lib/python3.10/site-packages (from sagemaker<3,>=v2.211.0->-r requirements.txt (line 3)) (1.7.0)\n", + "Collecting docker (from sagemaker<3,>=v2.211.0->-r requirements.txt (line 3))\n", + " Using cached docker-7.0.0-py3-none-any.whl.metadata (3.5 kB)\n", + "Requirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from sagemaker<3,>=v2.211.0->-r requirements.txt (line 3)) (5.9.8)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets==2.15.0->-r requirements.txt (line 9)) (1.3.1)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets==2.15.0->-r requirements.txt (line 9)) (1.4.1)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets==2.15.0->-r requirements.txt (line 9)) (6.0.5)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets==2.15.0->-r requirements.txt (line 9)) (1.9.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets==2.15.0->-r requirements.txt (line 9)) (4.0.3)\n", + "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.18.0->datasets==2.15.0->-r requirements.txt (line 9)) (3.13.1)\n", + "Requirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.10/site-packages (from importlib-metadata<7.0,>=1.4.0->sagemaker<3,>=v2.211.0->-r requirements.txt (line 3)) (3.17.0)\n", + "Requirement already satisfied: comm>=0.1.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.2.1)\n", + "Requirement already satisfied: debugpy>=1.6.5 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets==7.7.2->-r requirements.txt (line 7)) (1.8.1)\n", + "Requirement already satisfied: jupyter-client>=6.1.12 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets==7.7.2->-r requirements.txt (line 7)) (8.6.0)\n", + "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets==7.7.2->-r requirements.txt (line 7)) (5.7.1)\n", + "Requirement already satisfied: matplotlib-inline>=0.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.1.6)\n", + "Requirement already satisfied: nest-asyncio in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets==7.7.2->-r requirements.txt (line 7)) (1.6.0)\n", + "Requirement already satisfied: pyzmq>=24 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets==7.7.2->-r requirements.txt (line 7)) (25.1.2)\n", + "Requirement already satisfied: tornado>=6.1 in /opt/conda/lib/python3.10/site-packages (from ipykernel>=4.5.1->ipywidgets==7.7.2->-r requirements.txt (line 7)) (6.4)\n", + "Requirement already satisfied: decorator in /opt/conda/lib/python3.10/site-packages (from ipython>=4.0.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (5.1.1)\n", + "Requirement already satisfied: jedi>=0.16 in /opt/conda/lib/python3.10/site-packages (from ipython>=4.0.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.19.1)\n", + "Requirement already satisfied: prompt-toolkit<3.1.0,>=3.0.41 in /opt/conda/lib/python3.10/site-packages (from ipython>=4.0.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (3.0.42)\n", + "Requirement already satisfied: pygments>=2.4.0 in /opt/conda/lib/python3.10/site-packages (from ipython>=4.0.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (2.17.2)\n", + "Requirement already satisfied: stack-data in /opt/conda/lib/python3.10/site-packages (from ipython>=4.0.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.6.2)\n", + "Requirement already satisfied: exceptiongroup in /opt/conda/lib/python3.10/site-packages (from ipython>=4.0.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (1.2.0)\n", + "Requirement already satisfied: pexpect>4.3 in /opt/conda/lib/python3.10/site-packages (from ipython>=4.0.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (4.9.0)\n", + "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas==2.1.3->-r requirements.txt (line 10)) (1.16.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets==2.15.0->-r requirements.txt (line 9)) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets==2.15.0->-r requirements.txt (line 9)) (3.6)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets==2.15.0->-r requirements.txt (line 9)) (2024.2.2)\n", + "Requirement already satisfied: notebook>=4.4.1 in /opt/conda/lib/python3.10/site-packages (from widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (7.1.1)\n", + "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /opt/conda/lib/python3.10/site-packages (from jsonschema->sagemaker<3,>=v2.211.0->-r requirements.txt (line 3)) (0.20.0)\n", + "Requirement already satisfied: ppft>=1.7.6.7 in /opt/conda/lib/python3.10/site-packages (from pathos->sagemaker<3,>=v2.211.0->-r requirements.txt (line 3)) (1.7.6.8)\n", + "Requirement already satisfied: pox>=0.3.3 in /opt/conda/lib/python3.10/site-packages (from pathos->sagemaker<3,>=v2.211.0->-r requirements.txt (line 3)) (0.3.4)\n", + "Requirement already satisfied: contextlib2>=0.5.5 in /opt/conda/lib/python3.10/site-packages (from schema->sagemaker<3,>=v2.211.0->-r requirements.txt (line 3)) (21.6.0)\n", + "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /opt/conda/lib/python3.10/site-packages (from jedi>=0.16->ipython>=4.0.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.8.3)\n", + "Requirement already satisfied: jupyter-server<3,>=2.4.0 in /opt/conda/lib/python3.10/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (2.10.0)\n", + "Requirement already satisfied: jupyterlab-server<3,>=2.22.1 in /opt/conda/lib/python3.10/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (2.24.0)\n", + "Requirement already satisfied: jupyterlab<4.2,>=4.1.1 in /opt/conda/lib/python3.10/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (4.1.2)\n", + "Requirement already satisfied: notebook-shim<0.3,>=0.2 in /opt/conda/lib/python3.10/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.2.4)\n", + "Requirement already satisfied: ptyprocess>=0.5 in /opt/conda/lib/python3.10/site-packages (from pexpect>4.3->ipython>=4.0.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.7.0)\n", + "Requirement already satisfied: wcwidth in /opt/conda/lib/python3.10/site-packages (from prompt-toolkit<3.1.0,>=3.0.41->ipython>=4.0.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.2.13)\n", + "Requirement already satisfied: executing>=1.2.0 in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=4.0.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (2.0.1)\n", + "Requirement already satisfied: asttokens>=2.1.0 in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=4.0.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (2.4.1)\n", + "Requirement already satisfied: pure-eval in /opt/conda/lib/python3.10/site-packages (from stack-data->ipython>=4.0.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.2.2)\n", + "Requirement already satisfied: anyio>=3.1.0 in /opt/conda/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (3.7.1)\n", + "Requirement already satisfied: argon2-cffi in /opt/conda/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (23.1.0)\n", + "Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (3.1.3)\n", + "Requirement already satisfied: jupyter-events>=0.6.0 in /opt/conda/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.6.3)\n", + "Requirement already satisfied: jupyter-server-terminals in /opt/conda/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.5.2)\n", + "Requirement already satisfied: nbconvert>=6.4.4 in /opt/conda/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (7.16.1)\n", + "Requirement already satisfied: nbformat>=5.3.0 in /opt/conda/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (5.9.2)\n", + "Requirement already satisfied: overrides in /opt/conda/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (7.7.0)\n", + "Requirement already satisfied: prometheus-client in /opt/conda/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.20.0)\n", + "Requirement already satisfied: send2trash>=1.8.2 in /opt/conda/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (1.8.2)\n", + "Requirement already satisfied: terminado>=0.8.3 in /opt/conda/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.18.0)\n", + "Requirement already satisfied: websocket-client in /opt/conda/lib/python3.10/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (1.7.0)\n", + "Requirement already satisfied: async-lru>=1.0.0 in /opt/conda/lib/python3.10/site-packages (from jupyterlab<4.2,>=4.1.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (2.0.4)\n", + "Requirement already satisfied: httpx>=0.25.0 in /opt/conda/lib/python3.10/site-packages (from jupyterlab<4.2,>=4.1.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.27.0)\n", + "Requirement already satisfied: jupyter-lsp>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from jupyterlab<4.2,>=4.1.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (2.2.3)\n", + "Requirement already satisfied: tomli in /opt/conda/lib/python3.10/site-packages (from jupyterlab<4.2,>=4.1.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (2.0.1)\n", + "Requirement already satisfied: babel>=2.10 in /opt/conda/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (2.14.0)\n", + "Requirement already satisfied: json5>=0.9.0 in /opt/conda/lib/python3.10/site-packages (from jupyterlab-server<3,>=2.22.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.9.17)\n", + "Requirement already satisfied: sniffio>=1.1 in /opt/conda/lib/python3.10/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (1.3.1)\n", + "Requirement already satisfied: httpcore==1.* in /opt/conda/lib/python3.10/site-packages (from httpx>=0.25.0->jupyterlab<4.2,>=4.1.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (1.0.4)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /opt/conda/lib/python3.10/site-packages (from httpcore==1.*->httpx>=0.25.0->jupyterlab<4.2,>=4.1.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.14.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (2.1.5)\n", + "Requirement already satisfied: python-json-logger>=2.0.4 in /opt/conda/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (2.0.7)\n", + "Requirement already satisfied: rfc3339-validator in /opt/conda/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.1.4)\n", + "Requirement already satisfied: rfc3986-validator>=0.1.1 in /opt/conda/lib/python3.10/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.1.1)\n", + "Requirement already satisfied: beautifulsoup4 in /opt/conda/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (4.12.3)\n", + "Requirement already satisfied: bleach!=5.0.0 in /opt/conda/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (6.1.0)\n", + "Requirement already satisfied: defusedxml in /opt/conda/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.7.1)\n", + "Requirement already satisfied: jupyterlab-pygments in /opt/conda/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.3.0)\n", + "Requirement already satisfied: mistune<4,>=2.0.3 in /opt/conda/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (3.0.2)\n", + "Requirement already satisfied: nbclient>=0.5.0 in /opt/conda/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.8.0)\n", + "Requirement already satisfied: pandocfilters>=1.4.1 in /opt/conda/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (1.5.0)\n", + "Requirement already satisfied: tinycss2 in /opt/conda/lib/python3.10/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (1.2.1)\n", + "Requirement already satisfied: fastjsonschema in /opt/conda/lib/python3.10/site-packages (from nbformat>=5.3.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (2.19.1)\n", + "Requirement already satisfied: argon2-cffi-bindings in /opt/conda/lib/python3.10/site-packages (from argon2-cffi->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (21.2.0)\n", + "Requirement already satisfied: webencodings in /opt/conda/lib/python3.10/site-packages (from bleach!=5.0.0->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (0.5.1)\n", + "Requirement already satisfied: fqdn in /opt/conda/lib/python3.10/site-packages (from jsonschema[format-nongpl]>=3.2.0->jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (1.5.1)\n", + "Requirement already satisfied: isoduration in /opt/conda/lib/python3.10/site-packages (from jsonschema[format-nongpl]>=3.2.0->jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (20.11.0)\n", + "Requirement already satisfied: jsonpointer>1.13 in /opt/conda/lib/python3.10/site-packages (from jsonschema[format-nongpl]>=3.2.0->jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (2.4)\n", + "Requirement already satisfied: uri-template in /opt/conda/lib/python3.10/site-packages (from jsonschema[format-nongpl]>=3.2.0->jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (1.3.0)\n", + "Requirement already satisfied: webcolors>=1.11 in /opt/conda/lib/python3.10/site-packages (from jsonschema[format-nongpl]>=3.2.0->jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (1.13)\n", + "Requirement already satisfied: cffi>=1.0.1 in /opt/conda/lib/python3.10/site-packages (from argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (1.16.0)\n", + "Requirement already satisfied: soupsieve>1.2 in /opt/conda/lib/python3.10/site-packages (from beautifulsoup4->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (2.5)\n", + "Requirement already satisfied: pycparser in /opt/conda/lib/python3.10/site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (2.21)\n", + "Requirement already satisfied: arrow>=0.15.0 in /opt/conda/lib/python3.10/site-packages (from isoduration->jsonschema[format-nongpl]>=3.2.0->jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (1.3.0)\n", + "Requirement already satisfied: types-python-dateutil>=2.8.10 in /opt/conda/lib/python3.10/site-packages (from arrow>=0.15.0->isoduration->jsonschema[format-nongpl]>=3.2.0->jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2->-r requirements.txt (line 7)) (2.8.19.20240106)\n", + "Downloading sagemaker-2.215.0-py3-none-any.whl (1.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m68.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hUsing cached docker-7.0.0-py3-none-any.whl (147 kB)\n", + "Installing collected packages: docker, sagemaker\n", + " Attempting uninstall: sagemaker\n", + " Found existing installation: sagemaker 2.198.1\n", + " Uninstalling sagemaker-2.198.1:\n", + " Successfully uninstalled sagemaker-2.198.1\n", + "Successfully installed docker-7.0.0 sagemaker-2.215.0\n" + ] + } + ], + "source": [ + "!pip install -r requirements.txt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# restart kernel for the packages installed above to take effect\n", + "from IPython.core.display import HTML\n", + "\n", + "HTML(\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml\n", + "sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml\n" + ] + } + ], + "source": [ + "from datasets import load_dataset\n", + "from itertools import islice\n", + "import pandas as pd\n", + "import sagemaker\n", + "import jsonlines\n", + "import warnings\n", + "\n", + "warnings.filterwarnings(\"ignore\")\n", + "import json\n", + "import os\n", + "import sys\n", + "import boto3\n", + "import time\n", + "import pprint\n", + "import random\n", + "import yaml\n", + "from sagemaker.workflow.function_step import step\n", + "from sagemaker.workflow.parameters import ParameterString\n", + "from sagemaker.workflow.pipeline import Pipeline\n", + "from datetime import datetime\n", + "from botocore.exceptions import ClientError" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Set path to config file \"config.yaml\"\n", + "# The config.yaml file contains the arguments that are passed to the step decorator functions.\n", + "os.environ[\"SAGEMAKER_USER_CONFIG_OVERRIDE\"] = os.getcwd()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "1. This notebook uses the default S3 bucket for the user. The default Amazon S3 bucket follows the naming pattern s3://sagemaker-{Region}-{your-account-id}. It is automatically created if it does not exist.\n", + "\n", + "2. This notebook uses the default IAM role for the user. If your studio user role does not have AWS admininstrator access, you will need to add the necessary permissions to the role. These include:\n", + " - [create a training job](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html#sagemaker-roles-createtrainingjob-perms)\n", + " - [Access to Bedrock models](https://docs.aws.amazon.com/bedrock/latest/userguide/security_iam_id-based-policy-examples.html)\n", + " - [Customize Amazon Bedrock model](https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-iam-role.html)\n", + " - [Access to SageMaker Pipelines](https://docs.aws.amazon.com/sagemaker/latest/dg/build-and-manage-access.html)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sagemaker.config INFO - Fetched defaults config from location: /home/sagemaker-user/blog\n", + "IAM role: arn:aws:iam::095351214964:role/service-role/AmazonSageMaker-ExecutionRole-20200130T133110\n", + "S3 bucket: sagemaker-us-east-1-095351214964\n" + ] + } + ], + "source": [ + "sagemaker_session = sagemaker.session.Session()\n", + "region = sagemaker_session.boto_region_name\n", + "\n", + "# get the default bucket and IAM role for the user\n", + "bucket_name = sagemaker_session.default_bucket()\n", + "role_arn = sagemaker.get_execution_role()\n", + "\n", + "print(f\"IAM role: {role_arn}\")\n", + "print(f\"S3 bucket: {bucket_name}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SageMaker:\n", + " PythonSDK:\n", + " Modules:\n", + " RemoteFunction:\n", + " CustomFileFilter:\n", + " IgnoreNamePatterns:\n", + " - '*.ipynb'\n", + " Dependencies: ./requirements.txt\n", + " IncludeLocalWorkDir: true\n", + " InstanceType: ml.c5.2xlarge\n", + "SchemaVersion: '1.0'\n", + "\n" + ] + } + ], + "source": [ + "# let's look at the contemts of config.yaml\n", + "# The properties in congig.ymk are passed into the @step function.\n", + "# print the contents of config.yaml\n", + "# Notice that pipeline step runs on ml.c5.2xlarge as specified in the InstanceType property\n", + "with open(\"./config.yaml\", \"r\") as f:\n", + " config = yaml.safe_load(f)\n", + " print(yaml.dump(config, default_flow_style=False))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from datasets import load_dataset\n", + "\n", + "instruction = \"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n", + "\n", + "instruction:\n", + "\n", + "Summarize the news article provided below.\n", + "\n", + "input:\n", + "\n", + "\"\"\"\n", + "\n", + "\n", + "def add_prompt_to_data(dataset):\n", + " # Need to add prompt to the dataset in the format that is\n", + " # required for fine tuning by the Titan test Lite model.\n", + " datapoints = []\n", + "\n", + " for datapoint in dataset:\n", + " # Add insruction prompt to each CNN article\n", + " # and add prefix 'response:' to the article summary.\n", + " temp_dict = {}\n", + " temp_dict[\"prompt\"] = instruction + datapoint[\"article\"]\n", + " temp_dict[\"completion\"] = \"response:\\n\\n\" + datapoint[\"highlights\"]\n", + " datapoints.append(temp_dict)\n", + " return datapoints\n", + "\n", + "\n", + "#### Define step for downloading the dataset\n", + "@step(\n", + " name=\"data-load-step\",\n", + " keep_alive_period_in_seconds=300,\n", + ")\n", + "def data_load(ds_name: str, ds_version: str) -> tuple:\n", + " dataset = load_dataset(ds_name, ds_version)\n", + "\n", + " # the dataset includes data for training, validation, and test.\n", + " # The raw dataset includes the article and its summary.\n", + " # We need to format each row with the LLM prompt.\n", + " datapoints_train = add_prompt_to_data(dataset[\"train\"])\n", + " datapoints_valid = add_prompt_to_data(dataset[\"validation\"])\n", + " datapoints_test = add_prompt_to_data(dataset[\"test\"])\n", + "\n", + " print(f\"Number of training rows: {len(datapoints_train)}\")\n", + " print(f'\\nTraining prompt: {datapoints_train[0][\"prompt\"]}')\n", + " print(f'\\nTraining Completion: {datapoints_train[0][\"completion\"]}')\n", + "\n", + " print(f\"\\nNumber of validation rows: {len(datapoints_valid)}\")\n", + " print(f'\\nValidation prompt: {datapoints_valid[0][\"prompt\"]}')\n", + " print(f'\\nValidation Completion: {datapoints_valid[0][\"completion\"]}')\n", + "\n", + " print(f\"\\nNumber of test rows: {len(datapoints_test)}\")\n", + " print(f'\\nTest prompt: {datapoints_test[0][\"prompt\"]}')\n", + " print(f'\\nTest Completion: {datapoints_test[0][\"completion\"]}')\n", + "\n", + " return datapoints_train, datapoints_valid, datapoints_test" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Restrict the number of rows and row length\n", + "def reduce_dataset_size(data, max_row_length, max_rows):\n", + " datapoints = []\n", + " for datapoint in data:\n", + " if len(datapoint[\"prompt\"] + datapoint[\"completion\"]) <= max_row_length:\n", + " datapoints.append(datapoint)\n", + " random.shuffle(datapoints)\n", + " datapoints = datapoints[:max_rows]\n", + " print(f\"\\nData set size: {len(datapoints)}\")\n", + "\n", + " return datapoints\n", + "\n", + "\n", + "#### Define step for splitting the dataset into training, validation, and testing\n", + "# restrict the size of each row to 3000 words\n", + "# We also select 100 rows for training, 10 for validation, and 5 for testing\n", + "# to keep computation costs low for this example\n", + "@step(\n", + " name=\"data-split-step\",\n", + " keep_alive_period_in_seconds=300,\n", + ")\n", + "def data_split(step_load_result: tuple) -> tuple:\n", + " train_lines = reduce_dataset_size(step_load_result[0], 3000, 100)\n", + " validation_lines = reduce_dataset_size(step_load_result[1], 3000, 10)\n", + " test_lines = reduce_dataset_size(step_load_result[2], 3000, 5)\n", + "\n", + " print(f\"\\nNumber of training rows: {len(train_lines)}\")\n", + " print(f\"\\nNumber of training rows: {len(validation_lines)}\")\n", + " print(f\"\\nNumber of training rows: {len(test_lines)}\")\n", + "\n", + " return train_lines, validation_lines, test_lines" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Upload the training, validation, and test files to S3\n", + "def upload_file_to_s3(bucket_name: str, file_names: tuple, s3_key_names: tuple):\n", + " import boto3\n", + "\n", + " s3_client = boto3.client(\"s3\")\n", + " for i in range(len(file_names)):\n", + " s3_client.upload_file(file_names[i], bucket_name, s3_key_names[i])\n", + "\n", + "\n", + "# Save the training, validation, and test files in jsonl format\n", + "# to the local file system\n", + "def write_jsonl_file(abs_path: str, file_name: str, data) -> str:\n", + " saved_file_path = f\"{abs_path}/{file_name}\"\n", + "\n", + " with jsonlines.open(saved_file_path, \"w\") as writer:\n", + " for line in data:\n", + " writer.write(line)\n", + "\n", + " return saved_file_path\n", + "\n", + "\n", + "# Save the s3 uri for test data in SSM.\n", + "def save_s3_uri_in_SSM(parameter_name, parameter_value):\n", + " ssm_client = boto3.client(\"ssm\")\n", + " response = ssm_client.put_parameter(\n", + " Name=parameter_name, Value=parameter_value, Type=\"String\", Overwrite=True\n", + " )\n", + "\n", + "\n", + "#### Define step for uploading the training, validation, and test data to S3\n", + "@step(\n", + " name=\"data-upload-to-s3-step\",\n", + " keep_alive_period_in_seconds=300,\n", + ")\n", + "# Convert the data to jsonl format and upload to S3.\n", + "def data_upload_to_s3(data_split_response: tuple, bucket_name: str) -> tuple:\n", + " dataset_folder = \"fine-tuning-datasets\"\n", + "\n", + " if not os.path.exists(dataset_folder):\n", + " # Create the directory\n", + " os.makedirs(dataset_folder)\n", + " print(f\"Directory {dataset_folder} created successfully!\")\n", + " else:\n", + " print(f\"Directory {dataset_folder} already exists!\")\n", + "\n", + " abs_path = os.path.abspath(dataset_folder)\n", + " print(f\"\\nDataset folder path: {abs_path}\")\n", + "\n", + " print(type(data_split_response[0]))\n", + " train_file = write_jsonl_file(abs_path, \"train-cnn.jsonl\", data_split_response[0])\n", + " val_file = write_jsonl_file(abs_path, \"validation-cnn.jsonl\", data_split_response[1])\n", + " test_file = write_jsonl_file(abs_path, \"test-cnn.jsonl\", data_split_response[2])\n", + "\n", + " file_names = train_file, val_file, test_file\n", + "\n", + " s3_keys = (\n", + " f\"{dataset_folder}/train/train-cnn.jsonl\",\n", + " f\"{dataset_folder}/validation/validation-cnn.jsonl\",\n", + " f\"{dataset_folder}/test/test-cnn.jsonl\",\n", + " )\n", + " print(s3_keys)\n", + "\n", + " upload_file_to_s3(bucket_name, file_names, s3_keys)\n", + "\n", + " # save test file S3 uri for use later while testing the model\n", + " save_s3_uri_in_SSM(\"s3_test_uri\", f\"s3://{bucket_name}/{s3_keys[2]}\")\n", + "\n", + " # return the s3 uris for data files\n", + " return (\n", + " f\"s3://{bucket_name}/{s3_keys[0]}\",\n", + " f\"s3://{bucket_name}/{s3_keys[1]}\",\n", + " f\"s3://{bucket_name}/{s3_keys[2]}\",\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "#### Define step for custom training the model\n", + "@step(\n", + " name=\"model-training-step\",\n", + " keep_alive_period_in_seconds=300,\n", + ")\n", + "def train(\n", + " custom_model_name: str, training_job_name: str, step_data_upload_to_s3_result: tuple\n", + ") -> str:\n", + " # Define the hyperparameters for fine-tuning Titan text model\n", + " hyper_parameters = {\n", + " \"epochCount\": \"2\",\n", + " \"batchSize\": \"1\",\n", + " \"learningRate\": \"0.00003\",\n", + " }\n", + "\n", + " # Specify your data path for training, validation(optional) and output\n", + " training_data_config = {\"s3Uri\": step_data_upload_to_s3_result[0]}\n", + " print(f\"Training data config: {training_data_config}\")\n", + "\n", + " validation_data_config = {\n", + " \"validators\": [\n", + " {\n", + " # \"name\": \"validation\",\n", + " \"s3Uri\": step_data_upload_to_s3_result[1]\n", + " }\n", + " ]\n", + " }\n", + " print(f\"Validation data config: {validation_data_config}\")\n", + "\n", + " output_data_config = {\n", + " \"s3Uri\": f\"s3://{bucket_name}/fine-tuning-datasets/outputs/output-{custom_model_name}\"\n", + " }\n", + "\n", + " bedrock = boto3.client(service_name=\"bedrock\")\n", + "\n", + " print(\"Start training....\")\n", + "\n", + " # Create the customization job\n", + " training_job_response = bedrock.create_model_customization_job(\n", + " customizationType=\"FINE_TUNING\",\n", + " jobName=training_job_name,\n", + " customModelName=custom_model_name,\n", + " roleArn=role_arn,\n", + " baseModelIdentifier=\"amazon.titan-text-lite-v1:0:4k\",\n", + " hyperParameters=hyper_parameters,\n", + " trainingDataConfig=training_data_config,\n", + " validationDataConfig=validation_data_config,\n", + " outputDataConfig=output_data_config,\n", + " )\n", + " print(training_job_response)\n", + "\n", + " job_status = bedrock.get_model_customization_job(jobIdentifier=training_job_name)[\"status\"]\n", + " print(job_status)\n", + "\n", + " while job_status == \"InProgress\":\n", + " time.sleep(60)\n", + " job_status = bedrock.get_model_customization_job(jobIdentifier=training_job_name)[\"status\"]\n", + " print(job_status)\n", + "\n", + " fine_tune_job = bedrock.get_model_customization_job(jobIdentifier=training_job_name)\n", + " pprint.pp(fine_tune_job)\n", + " output_job_name = \"model-customization-job-\" + fine_tune_job[\"jobArn\"].split(\"/\")[-1]\n", + " print(f\"output_job_name: {output_job_name}\")\n", + "\n", + " model_id = bedrock.get_custom_model(modelIdentifier=custom_model_name)[\"modelArn\"]\n", + "\n", + " print(f\"Model id: {model_id}\")\n", + " return model_id" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "#### Define step for creating Provisioned throughput for the custom model\n", + "@step(\n", + " name=\"create-provisioned-throughput-step\",\n", + " keep_alive_period_in_seconds=300,\n", + ")\n", + "def create_prov_thruput(model_id: str, provisioned_model_name: str) -> str:\n", + " bedrock = boto3.client(service_name=\"bedrock\")\n", + "\n", + " provisioned_model_id = bedrock.create_provisioned_model_throughput(\n", + " modelUnits=1, provisionedModelName=provisioned_model_name, modelId=model_id\n", + " )[\"provisionedModelArn\"]\n", + "\n", + " status = bedrock.get_provisioned_model_throughput(provisionedModelId=provisioned_model_id)[\n", + " \"status\"\n", + " ]\n", + "\n", + " print(status)\n", + "\n", + " while status == \"Creating\":\n", + " time.sleep(60)\n", + " status = bedrock.get_provisioned_model_throughput(provisionedModelId=provisioned_model_id)[\n", + " \"status\"\n", + " ]\n", + " print(status)\n", + " time.sleep(60)\n", + "\n", + " return provisioned_model_id" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# Test the custom model\n", + "\n", + "\n", + "def get_ssm_parameter(parameter_name):\n", + " ssm_client = boto3.client(\"ssm\")\n", + " response = ssm_client.get_parameter(Name=parameter_name, WithDecryption=True)\n", + "\n", + " return response[\"Parameter\"][\"Value\"]\n", + "\n", + "\n", + "#### Define step for tesiing the custom model\n", + "@step(\n", + " name=\"model-testing-step\",\n", + " keep_alive_period_in_seconds=300,\n", + ")\n", + "def test_model(provisioned_model_id: str) -> tuple:\n", + " s3_uri = get_ssm_parameter(\"s3_test_uri\")\n", + "\n", + " # Split the s3 uri into bucket name and key\n", + " s3_bucket = s3_uri.split(\"/\")[2]\n", + " s3_key = \"/\".join(s3_uri.split(\"/\")[3:])\n", + " print(f\"s3_bucket : {s3_bucket}, s3_key: {s3_key}\")\n", + "\n", + " # down load the test file\n", + " s3 = boto3.client(\"s3\")\n", + "\n", + " s3.download_file(s3_bucket, s3_key, \"test-cnn.jsonl\")\n", + "\n", + " # Invoke the model\n", + " with open(\"test-cnn.jsonl\") as f:\n", + " lines = f.read().splitlines()\n", + "\n", + " test_prompt = json.loads(lines[0])[\"prompt\"]\n", + " reference_summary = json.loads(lines[0])[\"completion\"]\n", + " pprint.pp(test_prompt)\n", + " print(reference_summary)\n", + "\n", + " prompt = f\"\"\"\n", + " {test_prompt}\n", + " \"\"\"\n", + " body = json.dumps(\n", + " {\n", + " \"inputText\": prompt,\n", + " \"textGenerationConfig\": {\n", + " \"maxTokenCount\": 2048,\n", + " \"stopSequences\": [\"User:\"],\n", + " \"temperature\": 0,\n", + " \"topP\": 0.9,\n", + " },\n", + " }\n", + " )\n", + "\n", + " accept = \"application/json\"\n", + " contentType = \"application/json\"\n", + "\n", + " bedrock_runtime = boto3.client(service_name=\"bedrock-runtime\")\n", + "\n", + " fine_tuned_response = bedrock_runtime.invoke_model(\n", + " body=body, modelId=provisioned_model_id, accept=accept, contentType=contentType\n", + " )\n", + "\n", + " fine_tuned_response_body = json.loads(fine_tuned_response.get(\"body\").read())\n", + " summary = fine_tuned_response_body[\"results\"][0][\"outputText\"]\n", + "\n", + " print(\"Fine tuned model response:\", summary)\n", + " print(\"\\nReference summary from test data: \", reference_summary)\n", + " return prompt, summary" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#### Create the SageMaker pipeline\n", + "# You can see the multi-step directed acyclic graph (DAG) in the Studio UI as a pipeline\n", + "\n", + "pipeline_name = \"bedrock-fine-tune-pipeline\"\n", + "\n", + "ts = datetime.now().strftime(\"%Y-%m-%d-%H-%M-%S\")\n", + "custom_model_name = f\"finetuned-model-{ts}\"\n", + "training_job_name = f\"model-finetune-job-{ts}\"\n", + "provisioned_model_name = f\"summarization-model-{ts}\"\n", + "\n", + "param1 = ParameterString(name=\"ds_name\", default_value=\"cnn_dailymail\")\n", + "param2 = ParameterString(name=\"ds_version\", default_value=\"3.0.0\")\n", + "\n", + "data_load_response = data_load(param1, param2)\n", + "\n", + "data_split_response = data_split(data_load_response)\n", + "\n", + "data_upload_to_s3_response = data_upload_to_s3(data_split_response, bucket_name)\n", + "\n", + "train_response = train(custom_model_name, training_job_name, data_upload_to_s3_response)\n", + "\n", + "create_prov_thruput_response = create_prov_thruput(train_response, provisioned_model_name)\n", + "\n", + "test_model_response = test_model(create_prov_thruput_response)\n", + "\n", + "pipeline = Pipeline(name=pipeline_name, steps=[test_model_response], parameters=[param1, param2])" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.Dependencies\n", + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.IncludeLocalWorkDir\n", + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.CustomFileFilter.IgnoreNamePatterns\n", + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.InstanceType\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-04-12 21:36:37,429 sagemaker.remote_function INFO Uploading serialized function code to s3://sagemaker-us-east-1-095351214964/bedrock-fine-tune-pipeline/model-testing-step/2024-04-12-21-36-35-895/function\n", + "2024-04-12 21:36:37,547 sagemaker.remote_function INFO Uploading serialized function arguments to s3://sagemaker-us-east-1-095351214964/bedrock-fine-tune-pipeline/model-testing-step/2024-04-12-21-36-35-895/arguments\n", + "2024-04-12 21:36:37,851 sagemaker.remote_function INFO Copied dependencies file at './requirements.txt' to '/tmp/tmpv41q6gtg/requirements.txt'\n", + "2024-04-12 21:36:37,912 sagemaker.remote_function INFO Successfully uploaded dependencies and pre execution scripts to 's3://sagemaker-us-east-1-095351214964/bedrock-fine-tune-pipeline/model-testing-step/2024-04-12-21-36-35-895/pre_exec_script_and_dependencies'\n", + "2024-04-12 21:36:38,000 sagemaker.remote_function INFO Copied user workspace to '/tmp/tmpse97qmlu/temp_workspace/sagemaker_remote_function_workspace'\n", + "2024-04-12 21:36:38,946 sagemaker.remote_function INFO Successfully created workdir archive at '/tmp/tmpse97qmlu/workspace.zip'\n", + "2024-04-12 21:36:39,124 sagemaker.remote_function INFO Successfully uploaded workdir to 's3://sagemaker-us-east-1-095351214964/bedrock-fine-tune-pipeline/sm_rf_user_ws/2024-04-12-21-36-35-895/workspace.zip'\n", + "WARNING:sagemaker.workflow.utilities:Popping out 'TrainingJobName' from the pipeline definition by default since it will be overridden at pipeline execution time. Please utilize the PipelineDefinitionConfig to persist this field in the pipeline definition if desired.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.Dependencies\n", + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.IncludeLocalWorkDir\n", + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.CustomFileFilter.IgnoreNamePatterns\n", + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.InstanceType\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-04-12 21:36:40,298 sagemaker.remote_function INFO Uploading serialized function code to s3://sagemaker-us-east-1-095351214964/bedrock-fine-tune-pipeline/create-provisioned-throughput-step/2024-04-12-21-36-35-895/function\n", + "2024-04-12 21:36:40,411 sagemaker.remote_function INFO Uploading serialized function arguments to s3://sagemaker-us-east-1-095351214964/bedrock-fine-tune-pipeline/create-provisioned-throughput-step/2024-04-12-21-36-35-895/arguments\n", + "2024-04-12 21:36:40,487 sagemaker.remote_function INFO Copied dependencies file at './requirements.txt' to '/tmp/tmpnj3veih_/requirements.txt'\n", + "2024-04-12 21:36:40,519 sagemaker.remote_function INFO Successfully uploaded dependencies and pre execution scripts to 's3://sagemaker-us-east-1-095351214964/bedrock-fine-tune-pipeline/create-provisioned-throughput-step/2024-04-12-21-36-35-895/pre_exec_script_and_dependencies'\n", + "WARNING:sagemaker.workflow.utilities:Popping out 'TrainingJobName' from the pipeline definition by default since it will be overridden at pipeline execution time. Please utilize the PipelineDefinitionConfig to persist this field in the pipeline definition if desired.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.Dependencies\n", + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.IncludeLocalWorkDir\n", + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.CustomFileFilter.IgnoreNamePatterns\n", + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.InstanceType\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-04-12 21:36:41,695 sagemaker.remote_function INFO Uploading serialized function code to s3://sagemaker-us-east-1-095351214964/bedrock-fine-tune-pipeline/model-training-step/2024-04-12-21-36-35-895/function\n", + "2024-04-12 21:36:41,792 sagemaker.remote_function INFO Uploading serialized function arguments to s3://sagemaker-us-east-1-095351214964/bedrock-fine-tune-pipeline/model-training-step/2024-04-12-21-36-35-895/arguments\n", + "2024-04-12 21:36:41,912 sagemaker.remote_function INFO Copied dependencies file at './requirements.txt' to '/tmp/tmpzxpgiqlm/requirements.txt'\n", + "2024-04-12 21:36:41,983 sagemaker.remote_function INFO Successfully uploaded dependencies and pre execution scripts to 's3://sagemaker-us-east-1-095351214964/bedrock-fine-tune-pipeline/model-training-step/2024-04-12-21-36-35-895/pre_exec_script_and_dependencies'\n", + "WARNING:sagemaker.workflow.utilities:Popping out 'TrainingJobName' from the pipeline definition by default since it will be overridden at pipeline execution time. Please utilize the PipelineDefinitionConfig to persist this field in the pipeline definition if desired.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.Dependencies\n", + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.IncludeLocalWorkDir\n", + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.CustomFileFilter.IgnoreNamePatterns\n", + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.InstanceType\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-04-12 21:36:43,162 sagemaker.remote_function INFO Uploading serialized function code to s3://sagemaker-us-east-1-095351214964/bedrock-fine-tune-pipeline/data-upload-to-s3-step/2024-04-12-21-36-35-895/function\n", + "2024-04-12 21:36:43,346 sagemaker.remote_function INFO Uploading serialized function arguments to s3://sagemaker-us-east-1-095351214964/bedrock-fine-tune-pipeline/data-upload-to-s3-step/2024-04-12-21-36-35-895/arguments\n", + "2024-04-12 21:36:43,465 sagemaker.remote_function INFO Copied dependencies file at './requirements.txt' to '/tmp/tmp7ujlj15s/requirements.txt'\n", + "2024-04-12 21:36:43,528 sagemaker.remote_function INFO Successfully uploaded dependencies and pre execution scripts to 's3://sagemaker-us-east-1-095351214964/bedrock-fine-tune-pipeline/data-upload-to-s3-step/2024-04-12-21-36-35-895/pre_exec_script_and_dependencies'\n", + "WARNING:sagemaker.workflow.utilities:Popping out 'TrainingJobName' from the pipeline definition by default since it will be overridden at pipeline execution time. Please utilize the PipelineDefinitionConfig to persist this field in the pipeline definition if desired.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.Dependencies\n", + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.IncludeLocalWorkDir\n", + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.CustomFileFilter.IgnoreNamePatterns\n", + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.InstanceType\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-04-12 21:36:44,700 sagemaker.remote_function INFO Uploading serialized function code to s3://sagemaker-us-east-1-095351214964/bedrock-fine-tune-pipeline/data-split-step/2024-04-12-21-36-35-895/function\n", + "2024-04-12 21:36:44,781 sagemaker.remote_function INFO Uploading serialized function arguments to s3://sagemaker-us-east-1-095351214964/bedrock-fine-tune-pipeline/data-split-step/2024-04-12-21-36-35-895/arguments\n", + "2024-04-12 21:36:44,891 sagemaker.remote_function INFO Copied dependencies file at './requirements.txt' to '/tmp/tmp9p4gw5b6/requirements.txt'\n", + "2024-04-12 21:36:44,919 sagemaker.remote_function INFO Successfully uploaded dependencies and pre execution scripts to 's3://sagemaker-us-east-1-095351214964/bedrock-fine-tune-pipeline/data-split-step/2024-04-12-21-36-35-895/pre_exec_script_and_dependencies'\n", + "WARNING:sagemaker.workflow.utilities:Popping out 'TrainingJobName' from the pipeline definition by default since it will be overridden at pipeline execution time. Please utilize the PipelineDefinitionConfig to persist this field in the pipeline definition if desired.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.Dependencies\n", + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.IncludeLocalWorkDir\n", + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.CustomFileFilter.IgnoreNamePatterns\n", + "sagemaker.config INFO - Applied value from config key = SageMaker.PythonSDK.Modules.RemoteFunction.InstanceType\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-04-12 21:36:46,092 sagemaker.remote_function INFO Uploading serialized function code to s3://sagemaker-us-east-1-095351214964/bedrock-fine-tune-pipeline/data-load-step/2024-04-12-21-36-35-895/function\n", + "2024-04-12 21:36:46,213 sagemaker.remote_function INFO Uploading serialized function arguments to s3://sagemaker-us-east-1-095351214964/bedrock-fine-tune-pipeline/data-load-step/2024-04-12-21-36-35-895/arguments\n", + "2024-04-12 21:36:46,292 sagemaker.remote_function INFO Copied dependencies file at './requirements.txt' to '/tmp/tmpx3pmqpqv/requirements.txt'\n", + "2024-04-12 21:36:46,319 sagemaker.remote_function INFO Successfully uploaded dependencies and pre execution scripts to 's3://sagemaker-us-east-1-095351214964/bedrock-fine-tune-pipeline/data-load-step/2024-04-12-21-36-35-895/pre_exec_script_and_dependencies'\n", + "WARNING:sagemaker.workflow.utilities:Popping out 'TrainingJobName' from the pipeline definition by default since it will be overridden at pipeline execution time. Please utilize the PipelineDefinitionConfig to persist this field in the pipeline definition if desired.\n" + ] + }, + { + "data": { + "text/plain": [ + "{'PipelineArn': 'arn:aws:sagemaker:us-east-1:095351214964:pipeline/bedrock-fine-tune-pipeline',\n", + " 'ResponseMetadata': {'RequestId': '8de6e516-fdbf-4d34-bc19-4b61a6cb6474',\n", + " 'HTTPStatusCode': 200,\n", + " 'HTTPHeaders': {'x-amzn-requestid': '8de6e516-fdbf-4d34-bc19-4b61a6cb6474',\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'content-length': '94',\n", + " 'date': 'Fri, 12 Apr 2024 21:36:46 GMT'},\n", + " 'RetryAttempts': 0}}" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pipeline.upsert(role_arn)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "execution = pipeline.start()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'PipelineArn': 'arn:aws:sagemaker:us-east-1:095351214964:pipeline/bedrock-fine-tune-pipeline',\n", + " 'PipelineExecutionArn': 'arn:aws:sagemaker:us-east-1:095351214964:pipeline/bedrock-fine-tune-pipeline/execution/l040kjgtiq4n',\n", + " 'PipelineExecutionDisplayName': 'execution-1712957806959',\n", + " 'PipelineExecutionStatus': 'Executing',\n", + " 'CreationTime': datetime.datetime(2024, 4, 12, 21, 36, 46, 908000, tzinfo=tzlocal()),\n", + " 'LastModifiedTime': datetime.datetime(2024, 4, 12, 21, 36, 46, 908000, tzinfo=tzlocal()),\n", + " 'CreatedBy': {'UserProfileArn': 'arn:aws:sagemaker:us-east-1:095351214964:user-profile/d-ndkfwlyrojeq/blog',\n", + " 'UserProfileName': 'blog',\n", + " 'DomainId': 'd-ndkfwlyrojeq',\n", + " 'IamIdentity': {'Arn': 'arn:aws:sts::095351214964:assumed-role/AmazonSageMaker-ExecutionRole-20200130T133110/SageMaker',\n", + " 'PrincipalId': 'AROARMM3ACN2NE2XC3HPY:SageMaker'}},\n", + " 'LastModifiedBy': {'UserProfileArn': 'arn:aws:sagemaker:us-east-1:095351214964:user-profile/d-ndkfwlyrojeq/blog',\n", + " 'UserProfileName': 'blog',\n", + " 'DomainId': 'd-ndkfwlyrojeq',\n", + " 'IamIdentity': {'Arn': 'arn:aws:sts::095351214964:assumed-role/AmazonSageMaker-ExecutionRole-20200130T133110/SageMaker',\n", + " 'PrincipalId': 'AROARMM3ACN2NE2XC3HPY:SageMaker'}},\n", + " 'ResponseMetadata': {'RequestId': '36b7812f-9de8-4686-9066-107fcda06bee',\n", + " 'HTTPStatusCode': 200,\n", + " 'HTTPHeaders': {'x-amzn-requestid': '36b7812f-9de8-4686-9066-107fcda06bee',\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'content-length': '1041',\n", + " 'date': 'Fri, 12 Apr 2024 21:36:46 GMT'},\n", + " 'RetryAttempts': 0}}" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "execution.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 1.44 s, sys: 87.4 ms, total: 1.53 s\n", + "Wall time: 1h 31min 17s\n" + ] + } + ], + "source": [ + "%%time\n", + "execution.wait(delay=60, max_attempts=250)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'StepName': 'model-testing-step',\n", + " 'StepDisplayName': '__main__.test_model',\n", + " 'StartTime': datetime.datetime(2024, 4, 12, 23, 4, 43, 688000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2024, 4, 12, 23, 7, 33, 776000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:095351214964:training-job/pipelines-l040kjgtiq4n-model-testing-step-pr4gGsj2Rt'}},\n", + " 'AttemptCount': 1},\n", + " {'StepName': 'create-provisioned-throughput-step',\n", + " 'StepDisplayName': '__main__.create_prov_thruput',\n", + " 'StartTime': datetime.datetime(2024, 4, 12, 22, 49, 35, 654000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2024, 4, 12, 23, 4, 42, 774000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:095351214964:training-job/pipelines-l040kjgtiq4n-create-provisioned-t-xDN4wVqlsC'}},\n", + " 'AttemptCount': 1},\n", + " {'StepName': 'model-training-step',\n", + " 'StepDisplayName': '__main__.train',\n", + " 'StartTime': datetime.datetime(2024, 4, 12, 21, 46, 28, 754000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2024, 4, 12, 22, 49, 34, 878000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:095351214964:training-job/pipelines-l040kjgtiq4n-model-training-step-Kc1rJEbgzv'}},\n", + " 'AttemptCount': 1},\n", + " {'StepName': 'data-upload-to-s3-step',\n", + " 'StepDisplayName': '__main__.data_upload_to_s3',\n", + " 'StartTime': datetime.datetime(2024, 4, 12, 21, 43, 39, 142000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2024, 4, 12, 21, 46, 27, 822000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:095351214964:training-job/pipelines-l040kjgtiq4n-data-upload-to-s3-st-eQlNAJKWnc'}},\n", + " 'AttemptCount': 1},\n", + " {'StepName': 'data-split-step',\n", + " 'StepDisplayName': '__main__.data_split',\n", + " 'StartTime': datetime.datetime(2024, 4, 12, 21, 40, 37, 342000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2024, 4, 12, 21, 43, 38, 277000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:095351214964:training-job/pipelines-l040kjgtiq4n-data-split-step-68JtnNtXxn'}},\n", + " 'AttemptCount': 1},\n", + " {'StepName': 'data-load-step',\n", + " 'StartTime': datetime.datetime(2024, 4, 12, 21, 36, 48, 342000, tzinfo=tzlocal()),\n", + " 'EndTime': datetime.datetime(2024, 4, 12, 21, 40, 34, 16000, tzinfo=tzlocal()),\n", + " 'StepStatus': 'Succeeded',\n", + " 'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:095351214964:training-job/pipelines-l040kjgtiq4n-data-load-step-cYwRdw1Qg1'}},\n", + " 'AttemptCount': 1}]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "execution.list_steps()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('\\n Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\\n\\ninstruction:\\n\\nSummarize the news article provided below.\\n\\ninput:\\n\\n(CNN)Remains of up to nearly 400 unaccounted for service members tied to the USS Oklahoma at Pearl Harbor will be exhumed this year, the Defense Department announced Tuesday. The hope is that most of the battleship\\'s sailors and Marines can be identified. \"The secretary of defense and I will work tirelessly to ensure your loved one\\'s remains will be recovered, identified, and returned to you as expeditiously as possible, and we will do so with dignity, respect and care,\" Deputy Secretary of Defense Bob Work said in a statement. \"While not all families will receive an individual identification, we will strive to provide resolution to as many families as possible.\" The USS Oklahoma sank when it was hit by torpedoes on December 7, 1941, during the Japanese attack on Pearl Harbor. A total of 429 sailors and Marines on the ship were killed. Thirty-five crew members were positively identified and buried in the years immediately after the attack, according to the Defense Department. By 1950, all unidentified remains were laid to rest as unknowns at the National Memorial Cemetery of the Pacific. In 2003, five more service members were identified, with the help of historical evidence from Pearl Harbor survivor Ray Emory, 93. Emory, a native of Peoria, Illinois, was serving as a seaman first class on the light cruiser USS Honolulu that fateful day. After the war, Emory worked in Washington state before moving to Hawaii about 30 years ago. The retiree made it his mission to ensure graves are properly identified. \"It\\'s something I looked forward to for a long time,\" he told CNN about Tuesday\\'s announcement. Speaking by phone from Honolulu, Emory said that proper identification means a lot to the families of those who lost loved ones -- and to him. Next of kin were being notified starting Tuesday. Service members who are identified will be returned to their families for burial, with full military honors. WWII pilot, 99, reunited with historic C-47 plane . CNN\\'s Phil Gast contributed to this report.\\n ', '\\nThe USS Oklahoma sank during the Japanese attack on Pearl Harbor on December 7, 1941.\\nThe battleship was hit by torpedoes, killing 429 sailors and Marines.\\nThe Defense Department says it will work to identify as many of the remains as possible.')\n" + ] + } + ], + "source": [ + "print(execution.result(step_name=\"model-testing-step\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cleanup\n", + "Delete the resources that were created to stop incurring charges." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Provisoned throughput deleted for model: arn:aws:bedrock:us-east-1:095351214964:provisioned-model/fj8dou88yq5q\n", + "Custom model arn:aws:bedrock:us-east-1:095351214964:custom-model/amazon.titan-text-lite-v1:0:4k/2zefi5rp4ez1 deleted.\n" + ] + } + ], + "source": [ + "bedrock = boto3.client(service_name=\"bedrock\")\n", + "\n", + "# delete Bedrock provisioned throughput\n", + "provisioned_model_id = execution.result(step_name=\"create-provisioned-throughput-step\")\n", + "try:\n", + " bedrock.delete_provisioned_model_throughput(provisionedModelId=provisioned_model_id)\n", + "except ClientError as e:\n", + " print(e.response[\"Error\"][\"Code\"])\n", + "\n", + "print(f\"Provisoned throughput deleted for model: {provisioned_model_id}\")\n", + "\n", + "# delete the custom model\n", + "custom_model_id = execution.result(step_name=\"model-training-step\")\n", + "try:\n", + " bedrock.delete_custom_model(modelIdentifier=custom_model_id)\n", + "except ClientError as e:\n", + " print(e.response[\"Error\"][\"Code\"])\n", + "\n", + "print(f\"Custom model {custom_model_id} deleted.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ResponseMetadata': {'RequestId': '4a830460-d5d5-48bd-94fa-729f0b5dbfcd',\n", + " 'HTTPStatusCode': 200,\n", + " 'HTTPHeaders': {'server': 'Server',\n", + " 'date': 'Fri, 12 Apr 2024 23:08:07 GMT',\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'content-length': '2',\n", + " 'connection': 'keep-alive',\n", + " 'x-amzn-requestid': '4a830460-d5d5-48bd-94fa-729f0b5dbfcd'},\n", + " 'RetryAttempts': 0}}" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# delete the SSM parameter\n", + "ssm_client = boto3.client(\"ssm\")\n", + "ssm_client.delete_parameter(Name=\"s3_test_uri\")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:sagemaker.workflow.pipeline:If triggers have been setup for this target, they will become orphaned.You will need to clean them up manually via the CLI or EventBridge console.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Deleted pipeline arn:aws:sagemaker:us-east-1:095351214964:pipeline/bedrock-fine-tune-pipeline\n" + ] + } + ], + "source": [ + "# Delete the SageMaker pipeline\n", + "response = pipeline.delete()\n", + "print(f'Deleted pipeline {response[\"PipelineArn\"]}')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Objects in Bucket sagemaker-us-east-1-095351214964 have been deleted.\n" + ] + } + ], + "source": [ + "# delete objects in S3\n", + "def delete_objects_with_prefix(bucket_name, prefix):\n", + " s3 = boto3.client(\"s3\")\n", + "\n", + " response = s3.list_objects_v2(Bucket=bucket_name, Delimiter=\"/\", Prefix=prefix)\n", + "\n", + " if \"Contents\" in response:\n", + " contents = response[\"Contents\"]\n", + " for obj in contents:\n", + " s3.delete_object(Bucket=bucket_name, Key=obj[\"Key\"])\n", + "\n", + " while response[\"IsTruncated\"]:\n", + " response = s3.list_objects_v2(\n", + " Bucket=bucket_name,\n", + " Delimiter=\"/\",\n", + " Prefix=prefix,\n", + " ContinuationToken=response[\"NextContinuationToken\"],\n", + " )\n", + " if \"Contents\" in response:\n", + " contents = response[\"Contents\"]\n", + " for obj in contents:\n", + " s3.delete_object(Bucket=bucket_name, Key=obj[\"Key\"])\n", + "\n", + "\n", + "delete_objects_with_prefix(bucket_name, \"fine-tuning-datasets\")\n", + "delete_objects_with_prefix(bucket_name, pipeline_name)\n", + "\n", + "print(f\"Objects in Bucket {bucket_name} have been deleted.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Notebook CI Test Results\n", + "\n", + "This notebook was tested in multiple regions. The test results are as follows, except for us-west-2 which is shown at the top of the notebook.\n", + "\n", + "\n", + "![This us-east-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-east-1/sagemaker-pipelines|step-decorator|bedrock-examples|fine_tune_bedrock_step_decorator.ipynb)\n", + "\n", + "![This us-east-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-east-2/sagemaker-pipelines|step-decorator|bedrock-examples|fine_tune_bedrock_step_decorator.ipynb)\n", + "\n", + "![This us-west-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/us-west-1/sagemaker-pipelines|step-decorator|bedrock-examples|fine_tune_bedrock_step_decorator.ipynb)\n", + "\n", + "![This ca-central-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ca-central-1/sagemaker-pipelines|step-decorator|bedrock-examples|fine_tune_bedrock_step_decorator.ipynb)\n", + "\n", + "![This sa-east-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/sa-east-1/sagemaker-pipelines|step-decorator|bedrock-examples|fine_tune_bedrock_step_decorator.ipynb)\n", + "\n", + "![This eu-west-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-west-1/sagemaker-pipelines|step-decorator|bedrock-examples|fine_tune_bedrock_step_decorator.ipynb)\n", + "\n", + "![This eu-west-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-west-2/sagemaker-pipelines|step-decorator|bedrock-examples|fine_tune_bedrock_step_decorator.ipynb)\n", + "\n", + "![This eu-west-3 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-west-3/sagemaker-pipelines|step-decorator|bedrock-examples|fine_tune_bedrock_step_decorator.ipynb)\n", + "\n", + "![This eu-central-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-central-1/sagemaker-pipelines|step-decorator|bedrock-examples|fine_tune_bedrock_step_decorator.ipynb)\n", + "\n", + "![This eu-north-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/eu-north-1/sagemaker-pipelines|step-decorator|bedrock-examples|fine_tune_bedrock_step_decorator.ipynb)\n", + "\n", + "![This ap-southeast-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-southeast-1/sagemaker-pipelines|step-decorator|bedrock-examples|fine_tune_bedrock_step_decorator.ipynb)\n", + "\n", + "![This ap-southeast-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-southeast-2/sagemaker-pipelines|step-decorator|bedrock-examples|fine_tune_bedrock_step_decorator.ipynb)\n", + "\n", + "![This ap-northeast-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-northeast-1/sagemaker-pipelines|step-decorator|bedrock-examples|fine_tune_bedrock_step_decorator.ipynb)\n", + "\n", + "![This ap-northeast-2 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-northeast-2/sagemaker-pipelines|step-decorator|bedrock-examples|fine_tune_bedrock_step_decorator.ipynb)\n", + "\n", + "![This ap-south-1 badge failed to load. Check your device's internet connectivity, otherwise the service is currently unavailable](https://prod.us-west-2.tcx-beacon.docs.aws.dev/sagemaker-nb/ap-south-1/sagemaker-pipelines|step-decorator|bedrock-examples|fine_tune_bedrock_step_decorator.ipynb)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "availableInstances": [ + { + "_defaultOrder": 0, + "_isFastLaunch": true, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 4, + "name": "ml.t3.medium", + "vcpuNum": 2 + }, + { + "_defaultOrder": 1, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.t3.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 2, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.t3.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 3, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.t3.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 4, + "_isFastLaunch": true, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.m5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 5, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.m5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 6, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.m5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 7, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.m5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 8, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.m5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 9, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.m5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 10, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.m5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 11, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.m5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 12, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.m5d.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 13, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.m5d.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 14, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.m5d.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 15, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.m5d.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 16, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.m5d.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 17, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.m5d.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 18, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.m5d.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 19, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.m5d.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 20, + "_isFastLaunch": false, + "category": "General purpose", + "gpuNum": 0, + "hideHardwareSpecs": true, + "memoryGiB": 0, + "name": "ml.geospatial.interactive", + "supportedImageNames": [ + "sagemaker-geospatial-v1-0" + ], + "vcpuNum": 0 + }, + { + "_defaultOrder": 21, + "_isFastLaunch": true, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 4, + "name": "ml.c5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 22, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 8, + "name": "ml.c5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 23, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.c5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 24, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.c5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 25, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 72, + "name": "ml.c5.9xlarge", + "vcpuNum": 36 + }, + { + "_defaultOrder": 26, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 96, + "name": "ml.c5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 27, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 144, + "name": "ml.c5.18xlarge", + "vcpuNum": 72 + }, + { + "_defaultOrder": 28, + "_isFastLaunch": false, + "category": "Compute optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.c5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 29, + "_isFastLaunch": true, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.g4dn.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 30, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.g4dn.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 31, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.g4dn.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 32, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.g4dn.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 33, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.g4dn.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 34, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.g4dn.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 35, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 61, + "name": "ml.p3.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 36, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 244, + "name": "ml.p3.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 37, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 488, + "name": "ml.p3.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 38, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.p3dn.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 39, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.r5.large", + "vcpuNum": 2 + }, + { + "_defaultOrder": 40, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.r5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 41, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.r5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 42, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.r5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 43, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.r5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 44, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.r5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 45, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.r5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 46, + "_isFastLaunch": false, + "category": "Memory Optimized", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.r5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 47, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 16, + "name": "ml.g5.xlarge", + "vcpuNum": 4 + }, + { + "_defaultOrder": 48, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.g5.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 49, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 64, + "name": "ml.g5.4xlarge", + "vcpuNum": 16 + }, + { + "_defaultOrder": 50, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 128, + "name": "ml.g5.8xlarge", + "vcpuNum": 32 + }, + { + "_defaultOrder": 51, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 1, + "hideHardwareSpecs": false, + "memoryGiB": 256, + "name": "ml.g5.16xlarge", + "vcpuNum": 64 + }, + { + "_defaultOrder": 52, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 192, + "name": "ml.g5.12xlarge", + "vcpuNum": 48 + }, + { + "_defaultOrder": 53, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 4, + "hideHardwareSpecs": false, + "memoryGiB": 384, + "name": "ml.g5.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 54, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 768, + "name": "ml.g5.48xlarge", + "vcpuNum": 192 + }, + { + "_defaultOrder": 55, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 1152, + "name": "ml.p4d.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 56, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 8, + "hideHardwareSpecs": false, + "memoryGiB": 1152, + "name": "ml.p4de.24xlarge", + "vcpuNum": 96 + }, + { + "_defaultOrder": 57, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 32, + "name": "ml.trn1.2xlarge", + "vcpuNum": 8 + }, + { + "_defaultOrder": 58, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.trn1.32xlarge", + "vcpuNum": 128 + }, + { + "_defaultOrder": 59, + "_isFastLaunch": false, + "category": "Accelerated computing", + "gpuNum": 0, + "hideHardwareSpecs": false, + "memoryGiB": 512, + "name": "ml.trn1n.32xlarge", + "vcpuNum": 128 + } + ], + "instance_type": "ml.t3.medium", + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/sagemaker-pipelines/step-decorator/bedrock-examples/requirements.txt b/sagemaker-pipelines/step-decorator/bedrock-examples/requirements.txt new file mode 100644 index 0000000000..09c8abbc3b --- /dev/null +++ b/sagemaker-pipelines/step-decorator/bedrock-examples/requirements.txt @@ -0,0 +1,10 @@ +botocore>=1.31.57 +boto3>=1.28.57 +sagemaker>=2.198.1,<3 +typing_extensions +pypdf +# urllib3==2.1.0 +ipywidgets==7.7.2 +jsonlines +datasets==2.15.0 +pandas==2.1.3 \ No newline at end of file