Skip to content

Commit 924f52e

Browse files
committed
Add benchmarking folder with common config set ups
1 parent 9286c12 commit 924f52e

File tree

7 files changed

+269
-0
lines changed

7 files changed

+269
-0
lines changed

benchmarking/Chart.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
apiVersion: v2
2+
name: precise-prefix-cache-aware
3+
description: A Helm chart for precise-prefix-cache-aware benchmarking
4+
version: 0.1.0
5+
appVersion: "1.0"

benchmarking/README.md

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# Benchmarking Helm Chart
2+
3+
This Helm chart deploys the `inference-perf` benchmarking tool. This guide will walk you through deploying a basic benchmarking job. By default, the `shareGPT` dataset is used for configuration.
4+
5+
## Prerequisites
6+
7+
Before you begin, ensure you have the following:
8+
9+
* **Helm 3+**: [Installation Guide](https://helm.sh/docs/intro/install/)
10+
* **Kubernetes Cluster**: Access to a Kubernetes cluster
11+
* **Gateway Deployed**: Your inference server/gateway must be deployed and accessible within the cluster.
12+
13+
14+
**Hugging Face Token Secret**
15+
16+
The benchmark requires a Hugging Face token to pull models. Create a Kubernetes Secret named `hf-token` (or a custom name you provide) in your target namespace, containing your Hugging Face token.
17+
18+
To create this secret:
19+
```bash
20+
export _HF_TOKEN='<YOUR_HF_TOKEN>'
21+
kubectl create secret generic hf-token --from-literal=token=$_HF_TOKEN
22+
```
23+
24+
## Deployment
25+
26+
To deploy the benchmarking chart:
27+
28+
```bash
29+
export IP='<YOUR_IP>'
30+
export PORT='<YOUR_PORT>'
31+
helm install benchmark . -f benchmark-values.yaml \
32+
--set hfTokenSecret.name=hf-token \
33+
--set hfTokenSecret.key=token \
34+
--set "config.server.base_url=http://${IP}:${PORT}"
35+
```
36+
37+
**Parameters to customize:**
38+
39+
* `benchmark`: A unique name for this deployment.
40+
* `hfTokenSecret.name`: The name of your Kubernetes Secret containing the Hugging Face token (default: `hf-token`).
41+
* `hfTokenSecret.key`: The key in your Kubernetes Secret pointing to the Hugging Face token (default: `token`).
42+
* `config.server.base_url`: The base URL (IP and port) of your inference server.
43+
44+
### Storage Parameters
45+
46+
The following is how to add storage to the config.
47+
By default we save to local storage however once the inference-perf job is completed the pod is deleted.
48+
49+
```yaml
50+
storage:
51+
local_storage:
52+
path: "reports-{timestamp}" # Local directory path
53+
report_file_prefix: null # Optional filename prefix
54+
google_cloud_storage: # Optional GCS configuration
55+
bucket_name: "your-bucket-name" # Required GCS bucket
56+
path: "reports-{timestamp}" # Optional path prefix
57+
report_file_prefix: null # Optional filename prefix
58+
simple_storage_service:
59+
bucket_name: "your-bucket-name" # Required S3 bucket
60+
path: "reports-{timestamp}" # Optional path prefix
61+
report_file_prefix: null # Optional filename prefix
62+
```
63+
64+
## Uninstalling the Chart
65+
66+
To uninstall the deployed chart:
67+
68+
```bash
69+
helm uninstall my-benchmark
70+
```
71+

benchmarking/benchmark-values.yaml

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# High-Cache Configuration
2+
job:
3+
image: "quay.io/inference-perf/inference-perf:latest"
4+
memory: "8G"
5+
6+
logLevel: INFO
7+
8+
hfTokenSecret:
9+
name: hf-token
10+
key: token
11+
12+
config:
13+
load:
14+
type: constant
15+
interval: 15
16+
stages:
17+
- rate: 10
18+
duration: 20
19+
- rate: 20
20+
duration: 20
21+
- rate: 30
22+
duration: 20
23+
api:
24+
type: completion
25+
streaming: true
26+
server:
27+
type: vllm
28+
model_name: meta-llama/Llama-3.1-8B-Instruct
29+
base_url: http://0.0.0.0:8000
30+
ignore_eos: true
31+
tokenizer:
32+
pretrained_model_name_or_path: meta-llama/Llama-3.1-8B-Instruct
33+
data:
34+
type: shareGPT
35+
storage:
36+
google_cloud_storage:
37+
bucket_name: "inference-perf-results"
38+
report_file_prefix: benchmark
39+
metrics:
40+
type: prometheus
41+
prometheus:
42+
google_managed: true
43+
report:
44+
request_lifecycle:
45+
summary: true
46+
per_stage: true
47+
per_request: true
48+
prometheus:
49+
summary: true
50+
per_stage: true
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
{{/*
2+
Expand the name of the chart.
3+
*/}}
4+
{{- define "benchmarking.name" -}}
5+
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6+
{{- end }}
7+
8+
{{/*
9+
Create a default fully qualified app name.
10+
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11+
If release name contains chart name it will be used as a full name.
12+
*/}}
13+
{{- define "benchmarking.fullname" -}}
14+
{{- if .Values.fullnameOverride }}
15+
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16+
{{- else }}
17+
{{- $name := default .Chart.Name .Values.nameOverride }}
18+
{{- if contains $name .Release.Name }}
19+
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
20+
{{- else }}
21+
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22+
{{- end }}
23+
{{- end }}
24+
{{- end }}
25+
26+
{{/*
27+
Create chart name and version as used by the chart label.
28+
*/}}
29+
{{- define "benchmarking.chart" -}}
30+
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31+
{{- end }}
32+
33+
{{/*
34+
Common labels
35+
*/}}
36+
{{- define "benchmarking.labels" -}}
37+
helm.sh/chart: {{ include "benchmarking.chart" . }}
38+
{{ include "benchmarking.selectorLabels" . }}
39+
{{- if .Chart.AppVersion }}
40+
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41+
{{- end }}
42+
app.kubernetes.io/managed-by: {{ .Release.Service }}
43+
{{- end }}
44+
45+
{{/*
46+
Selector labels
47+
*/}}
48+
{{- define "benchmarking.selectorLabels" -}}
49+
app.kubernetes.io/name: {{ include "benchmarking.name" . }}
50+
app.kubernetes.io/instance: {{ .Release.Name }}
51+
{{- end }}
52+
53+
{{/*
54+
Config Mount Path
55+
*/}}
56+
{{- define "benchmarking.configMount" -}}
57+
{{- print "/etc/inference-perf" -}}
58+
{{- end }}
59+
60+
{{/*
61+
Hugging Face Secret Name
62+
*/}}
63+
{{- define "benchmarking.hfSecret" -}}
64+
{{- printf "%s-hf-secret" (include "benchmarking.fullname" .) -}}
65+
{{- end }}
66+
67+
{{/*
68+
Hugging Face Secret Key
69+
*/}}
70+
{{- define "benchmarking.hfKey" -}}
71+
{{- print "token" -}}
72+
{{- end }}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
apiVersion: v1
2+
kind: ConfigMap
3+
metadata:
4+
name: {{ include "benchmarking.fullname" . }}-config
5+
labels:
6+
{{- include "benchmarking.labels" . | nindent 4 }}
7+
data:
8+
config.yaml: |
9+
{{- $config := .Values.config | deepCopy -}}
10+
{{- if .Values.hfTokenSecret -}}
11+
{{- $secret := lookup "v1" "Secret" .Release.Namespace .Values.hfTokenSecret.name -}}
12+
{{- if $secret -}}
13+
{{- $secretToken := index $secret.data .Values.hfTokenSecret.key | b64dec -}}
14+
{{- $_ := set $config.tokenizer "token" $secretToken -}}
15+
{{- end -}}
16+
{{- end -}}
17+
{{- toYaml $config | nindent 4 }}

benchmarking/templates/job.yaml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
apiVersion: batch/v1
2+
kind: Job
3+
metadata:
4+
name: {{ include "benchmarking.fullname" . }}-job
5+
labels:
6+
{{- include "benchmarking.labels" . | nindent 4 }}
7+
app: inference-perf
8+
spec:
9+
template:
10+
metadata:
11+
labels:
12+
{{- include "benchmarking.selectorLabels" . | nindent 8 }}
13+
app: inference-perf
14+
spec:
15+
restartPolicy: Never
16+
containers:
17+
- name: inference-perf-container
18+
image: {{ .Values.job.image }}
19+
command: ["inference-perf"]
20+
args:
21+
- "--config_file"
22+
- "{{ include "benchmarking.configMount" . }}/config.yaml"
23+
- "--log-level"
24+
- {{ .Values.logLevel }}
25+
env:
26+
{{- if .Values.hfToken }}
27+
- name: HF_TOKEN
28+
valueFrom:
29+
secretKeyRef:
30+
name: {{ include "benchmarking.hfTokenSecret.name" . }}
31+
key: {{ include "benchmarking.hfTokenSecret.key" . }}
32+
{{- end }}
33+
volumeMounts:
34+
- name: config-volume
35+
mountPath: {{ include "benchmarking.configMount" . }}
36+
readOnly: true
37+
resources:
38+
requests:
39+
memory: {{ .Values.job.memory }}
40+
volumes:
41+
- name: config-volume
42+
configMap:
43+
name: {{ include "benchmarking.fullname" . }}-config

benchmarking/templates/secret.yaml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{{- if .Values.hfToken -}}
2+
apiVersion: v1
3+
kind: Secret
4+
metadata:
5+
name: {{ include "precise-prefix-cache-aware.hfSecret" . }}
6+
labels:
7+
{{- include "precise-prefix-cache-aware.labels" . | nindent 4 }}
8+
type: Opaque
9+
data:
10+
token: {{ .Values.hfToken | b64enc }}
11+
{{- end }}

0 commit comments

Comments
 (0)