From 8ea9de78be8c91a40b5acf33de7d97811ab95136 Mon Sep 17 00:00:00 2001 From: Ee Durbin Date: Sun, 9 Apr 2023 21:14:12 -0400 Subject: [PATCH 1/5] configure datadog logging endpoint --- terraform/config.tf | 4 ++++ terraform/file-hosting/fastly-service.tf | 7 +++++++ terraform/file-hosting/main.tf | 1 + terraform/main.tf | 2 ++ 4 files changed, 14 insertions(+) diff --git a/terraform/config.tf b/terraform/config.tf index e4dc6b26..72657117 100644 --- a/terraform/config.tf +++ b/terraform/config.tf @@ -38,6 +38,10 @@ variable "test_pypi_warehouse_token" { type = string sensitive = true } +variable "datadog_token" { + type = string + sensitive = true +} terraform { cloud { diff --git a/terraform/file-hosting/fastly-service.tf b/terraform/file-hosting/fastly-service.tf index 79cc4ee1..ac1aeac6 100644 --- a/terraform/file-hosting/fastly-service.tf +++ b/terraform/file-hosting/fastly-service.tf @@ -201,6 +201,13 @@ resource "fastly_service_vcl" "files" { response_condition = "Never" } + logging_datadog { + name = "DataDog Log" + token = var.datadog_token + placement = "none" + response_condition = "Never" + } + logging_s3 { name = "S3 Error Logs" diff --git a/terraform/file-hosting/main.tf b/terraform/file-hosting/main.tf index 32c62389..15240db8 100644 --- a/terraform/file-hosting/main.tf +++ b/terraform/file-hosting/main.tf @@ -11,6 +11,7 @@ variable "aws_access_key_id" { type = string } variable "aws_secret_access_key" { type = string } variable "gcs_access_key_id" { type = string } variable "gcs_secret_access_key" { type = string } +variable "datadog_token" { type = string } variable "fastly_endpoints" { type = map(any) } variable "domain_map" { type = map(any) } diff --git a/terraform/main.tf b/terraform/main.tf index 1dca4892..afdacf19 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -149,6 +149,7 @@ module "file-hosting" { files_bucket = "pypi-files" mirror = "mirror.dub1.pypi.io" s3_logging_keys = var.fastly_s3_logging + datadog_token = var.datadog_token aws_access_key_id = var.aws_access_key_id aws_secret_access_key = var.aws_secret_access_key @@ -177,6 +178,7 @@ module "test-file-hosting" { files_bucket = "pypi-files-staging" mirror = "test-mirror.dub1.pypi.io" s3_logging_keys = var.fastly_s3_logging + datadog_token = var.datadog_token aws_access_key_id = var.aws_access_key_id aws_secret_access_key = var.aws_secret_access_key From c2696f97d3a0c72904f88640dd6edf5a523e4477 Mon Sep 17 00:00:00 2001 From: Ee Durbin Date: Mon, 10 Apr 2023 06:01:13 -0400 Subject: [PATCH 2/5] log to datadog when packages are served from fallback --- terraform/file-hosting/fastly-service.tf | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/terraform/file-hosting/fastly-service.tf b/terraform/file-hosting/fastly-service.tf index ac1aeac6..2e49995c 100644 --- a/terraform/file-hosting/fastly-service.tf +++ b/terraform/file-hosting/fastly-service.tf @@ -204,8 +204,7 @@ resource "fastly_service_vcl" "files" { logging_datadog { name = "DataDog Log" token = var.datadog_token - placement = "none" - response_condition = "Never" + response_condition = "Package Served From Fallback" } logging_s3 { @@ -257,6 +256,12 @@ resource "fastly_service_vcl" "files" { type = "RESPONSE" statement = "req.http.Fastly-Client-IP == \"127.0.0.1\" && req.http.Fastly-Client-IP != \"127.0.0.1\"" } + + condition { + name = "Package Served From Fallback" + type = "RESPONSE" + statement = "req.restarts == 0 && req.backend == S3 && req.url ~ \"^/packages/[a-f0-9]{2}/[a-f0-9]{2}/[a-f0-9]{60}/\" && http_status_matches(beresp.status, \"200\")" + } } resource "aws_route53_record" "files" { From 7a00e2eb3af55eca0e2f82efdc2ebcb32ed93fe8 Mon Sep 17 00:00:00 2001 From: Ee Durbin Date: Mon, 10 Apr 2023 06:01:32 -0400 Subject: [PATCH 3/5] configure B2 auth --- terraform/file-hosting/fastly-service.tf | 22 +++++++++++++++++++++- terraform/file-hosting/vcl/files.vcl | 22 +++++++++++++++++++--- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/terraform/file-hosting/fastly-service.tf b/terraform/file-hosting/fastly-service.tf index 2e49995c..9ce4f50c 100644 --- a/terraform/file-hosting/fastly-service.tf +++ b/terraform/file-hosting/fastly-service.tf @@ -95,12 +95,32 @@ resource "fastly_service_vcl" "files" { error_threshold = 5 } + backend { + name = "B2" + auto_loadbalance = false + shield = "iad-va-us" + + request_condition = "NeverReq" + healthcheck = "B2 Health" + + address = "${var.files_bucket}.s3.us-east-005.backblazeb2.com" + port = 443 + use_ssl = true + ssl_cert_hostname = "${var.files_bucket}.s3.us-east-005.backblazeb2.com" + ssl_sni_hostname = "${var.files_bucket}.s3.us-east-005.backblazeb2.com" + + connect_timeout = 5000 + first_byte_timeout = 60000 + between_bytes_timeout = 15000 + error_threshold = 5 + } + backend { name = "S3" auto_loadbalance = false - request_condition = "NeverReq" shield = "bfi-wa-us" + request_condition = "NeverReq" healthcheck = "S3 Health" address = "${var.files_bucket}.s3.amazonaws.com" diff --git a/terraform/file-hosting/vcl/files.vcl b/terraform/file-hosting/vcl/files.vcl index bc6c371d..a3c717f9 100644 --- a/terraform/file-hosting/vcl/files.vcl +++ b/terraform/file-hosting/vcl/files.vcl @@ -79,9 +79,24 @@ sub vcl_recv { set req.http.Fastly-Force-Shield = "1"; } - # Requests that are for an *actual* file get disaptched to Amazon S3 instead of - # to our typical backends. We need to setup the request to correctly access - # S3 and to authorize ourselves to S3. + # Requests that are for an *actual* file get disaptched to object storage instead of + # to our typical backends. + + # If our file request is being dispatched to B2, we need to setup the request to correctly + # access B2 and to authorize ourselves to B2 with S3 compatible auth. + if (req.backend == F_B2 && req.url ~ "^/packages/[a-f0-9]{2}/[a-f0-9]{2}/[a-f0-9]{60}/") { + # Setup our environment to better match what S3 expects/needs + set req.http.Host = var.B2-Bucket-Name "s3.us-east-005.backblazeb2.com"; + set req.http.Date = now; + set req.url = regsuball(req.url, "\+", urlencode("+")); + + # Compute the Authorization header that B2 requires to be able to + # access the files stored there. + set req.http.Authorization = "AWS " var.B2-Application-Key-ID":" digest.hmac_sha1_base64(var.B2-Application-Key, "GET" LF LF LF req.http.Date LF "/" var.S3-Bucket-Name req.url.path); + } + + # If our file request is being dispatched to S3, we need to setup the request to correctly + # access S3 and to authorize ourselves to S3. if (req.backend == F_S3 && req.url ~ "^/packages/[a-f0-9]{2}/[a-f0-9]{2}/[a-f0-9]{60}/") { # Setup our environment to better match what S3 expects/needs set req.http.Host = var.S3-Bucket-Name ".s3.amazonaws.com"; @@ -92,6 +107,7 @@ sub vcl_recv { # access the files stored there. set req.http.Authorization = "AWS " var.AWS-Access-Key-ID ":" digest.hmac_sha1_base64(var.AWS-Secret-Access-Key, "GET" LF LF LF req.http.Date LF "/" var.S3-Bucket-Name req.url.path); } + # If our file request is being dispatched to GCS, setup the request to correctly # access GCS and authorize ourselves with GCS interoperability credentials. if (req.backend == GCS && req.url ~ "^/packages/[a-f0-9]{2}/[a-f0-9]{2}/[a-f0-9]{60}/") { From a48de6a0eb26fee1ceba188b465ece1a3d733edf Mon Sep 17 00:00:00 2001 From: Ee Durbin Date: Mon, 10 Apr 2023 06:09:46 -0400 Subject: [PATCH 4/5] configure, but do not put into service the S3 Archive backend --- terraform/file-hosting/fastly-service.tf | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/terraform/file-hosting/fastly-service.tf b/terraform/file-hosting/fastly-service.tf index 9ce4f50c..2cf37461 100644 --- a/terraform/file-hosting/fastly-service.tf +++ b/terraform/file-hosting/fastly-service.tf @@ -115,6 +115,26 @@ resource "fastly_service_vcl" "files" { error_threshold = 5 } + backend { + name = "S3_Archive" + auto_loadbalance = false + shield = "bfi-wa-us" + + request_condition = "NeverReq" + healthcheck = "S3 Health" + + address = "${var.files_bucket}-archive.s3.amazonaws.com" + port = 443 + use_ssl = true + ssl_cert_hostname = "${var.files_bucket}-archive.s3.amazonaws.com" + ssl_sni_hostname = "${var.files_bucket}-archive.s3.amazonaws.com" + + connect_timeout = 5000 + first_byte_timeout = 60000 + between_bytes_timeout = 15000 + error_threshold = 5 + } + backend { name = "S3" auto_loadbalance = false From 40dfd91fa2e2fd910bb109f5683bb34befb8d2d0 Mon Sep 17 00:00:00 2001 From: Ee Durbin Date: Mon, 10 Apr 2023 06:12:24 -0400 Subject: [PATCH 5/5] serve from b2, fallback to gcs --- terraform/file-hosting/fastly-service.tf | 32 ++++++++++++------------ terraform/file-hosting/vcl/files.vcl | 8 +++--- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/terraform/file-hosting/fastly-service.tf b/terraform/file-hosting/fastly-service.tf index 2cf37461..531b5295 100644 --- a/terraform/file-hosting/fastly-service.tf +++ b/terraform/file-hosting/fastly-service.tf @@ -76,18 +76,18 @@ resource "fastly_service_vcl" "files" { } backend { - name = "GCS" + name = "B2" auto_loadbalance = false - shield = "bfi-wa-us" + shield = "iad-va-us" request_condition = "Package File" - healthcheck = "GCS Health" + healthcheck = "B2 Health" - address = "${var.files_bucket}.storage.googleapis.com" - port = 443 - use_ssl = true - ssl_cert_hostname = "${var.files_bucket}.storage.googleapis.com" - ssl_sni_hostname = "${var.files_bucket}.storage.googleapis.com" + address = "${var.files_bucket}.s3.us-east-005.backblazeb2.com" + port = 443 + use_ssl = true + ssl_cert_hostname = "${var.files_bucket}.s3.us-east-005.backblazeb2.com" + ssl_sni_hostname = "${var.files_bucket}.s3.us-east-005.backblazeb2.com" connect_timeout = 5000 first_byte_timeout = 60000 @@ -96,18 +96,18 @@ resource "fastly_service_vcl" "files" { } backend { - name = "B2" + name = "GCS" auto_loadbalance = false - shield = "iad-va-us" + shield = "bfi-wa-us" request_condition = "NeverReq" - healthcheck = "B2 Health" + healthcheck = "GCS Health" - address = "${var.files_bucket}.s3.us-east-005.backblazeb2.com" - port = 443 - use_ssl = true - ssl_cert_hostname = "${var.files_bucket}.s3.us-east-005.backblazeb2.com" - ssl_sni_hostname = "${var.files_bucket}.s3.us-east-005.backblazeb2.com" + address = "${var.files_bucket}.storage.googleapis.com" + port = 443 + use_ssl = true + ssl_cert_hostname = "${var.files_bucket}.storage.googleapis.com" + ssl_sni_hostname = "${var.files_bucket}.storage.googleapis.com" connect_timeout = 5000 first_byte_timeout = 60000 diff --git a/terraform/file-hosting/vcl/files.vcl b/terraform/file-hosting/vcl/files.vcl index a3c717f9..74160ec2 100644 --- a/terraform/file-hosting/vcl/files.vcl +++ b/terraform/file-hosting/vcl/files.vcl @@ -75,7 +75,7 @@ sub vcl_recv { # Change our backend to S3 to look for the file there, re-enable clustering and continue # https://www.slideshare.net/Fastly/advanced-vcl-how-to-use-restart if (req.restarts > 0 && req.url ~ "^/packages/[a-f0-9]{2}/[a-f0-9]{2}/[a-f0-9]{60}/") { - set req.backend = F_S3; + set req.backend = F_GCS; set req.http.Fastly-Force-Shield = "1"; } @@ -142,9 +142,9 @@ sub vcl_fetch { set beresp.cacheable = true; } - # If we successfully got a 404 response from GCS for a Package URL restart - # to check S3 for the file! - if (req.restarts == 0 && req.backend == GCS && req.url ~ "^/packages/[a-f0-9]{2}/[a-f0-9]{2}/[a-f0-9]{60}/" && http_status_matches(beresp.status, "404")) { + # If we successfully got a 404 response from B2 for a Package URL restart + # to check GCS for the file! + if (req.restarts == 0 && req.backend == B2 && req.url ~ "^/packages/[a-f0-9]{2}/[a-f0-9]{2}/[a-f0-9]{60}/" && http_status_matches(beresp.status, "404")) { restart; }