diff --git a/terraform/config.tf b/terraform/config.tf index e4dc6b26..72657117 100644 --- a/terraform/config.tf +++ b/terraform/config.tf @@ -38,6 +38,10 @@ variable "test_pypi_warehouse_token" { type = string sensitive = true } +variable "datadog_token" { + type = string + sensitive = true +} terraform { cloud { diff --git a/terraform/file-hosting/fastly-service.tf b/terraform/file-hosting/fastly-service.tf index 79cc4ee1..531b5295 100644 --- a/terraform/file-hosting/fastly-service.tf +++ b/terraform/file-hosting/fastly-service.tf @@ -75,12 +75,32 @@ resource "fastly_service_vcl" "files" { error_threshold = 5 } + backend { + name = "B2" + auto_loadbalance = false + shield = "iad-va-us" + + request_condition = "Package File" + healthcheck = "B2 Health" + + address = "${var.files_bucket}.s3.us-east-005.backblazeb2.com" + port = 443 + use_ssl = true + ssl_cert_hostname = "${var.files_bucket}.s3.us-east-005.backblazeb2.com" + ssl_sni_hostname = "${var.files_bucket}.s3.us-east-005.backblazeb2.com" + + connect_timeout = 5000 + first_byte_timeout = 60000 + between_bytes_timeout = 15000 + error_threshold = 5 + } + backend { name = "GCS" auto_loadbalance = false shield = "bfi-wa-us" - request_condition = "Package File" + request_condition = "NeverReq" healthcheck = "GCS Health" address = "${var.files_bucket}.storage.googleapis.com" @@ -96,11 +116,31 @@ resource "fastly_service_vcl" "files" { } backend { - name = "S3" + name = "S3_Archive" auto_loadbalance = false + shield = "bfi-wa-us" + request_condition = "NeverReq" + healthcheck = "S3 Health" + + address = "${var.files_bucket}-archive.s3.amazonaws.com" + port = 443 + use_ssl = true + ssl_cert_hostname = "${var.files_bucket}-archive.s3.amazonaws.com" + ssl_sni_hostname = "${var.files_bucket}-archive.s3.amazonaws.com" + + connect_timeout = 5000 + first_byte_timeout = 60000 + between_bytes_timeout = 15000 + error_threshold = 5 + } + + backend { + name = "S3" + auto_loadbalance = false shield = "bfi-wa-us" + request_condition = "NeverReq" healthcheck = "S3 Health" address = "${var.files_bucket}.s3.amazonaws.com" @@ -201,6 +241,12 @@ resource "fastly_service_vcl" "files" { response_condition = "Never" } + logging_datadog { + name = "DataDog Log" + token = var.datadog_token + response_condition = "Package Served From Fallback" + } + logging_s3 { name = "S3 Error Logs" @@ -250,6 +296,12 @@ resource "fastly_service_vcl" "files" { type = "RESPONSE" statement = "req.http.Fastly-Client-IP == \"127.0.0.1\" && req.http.Fastly-Client-IP != \"127.0.0.1\"" } + + condition { + name = "Package Served From Fallback" + type = "RESPONSE" + statement = "req.restarts == 0 && req.backend == S3 && req.url ~ \"^/packages/[a-f0-9]{2}/[a-f0-9]{2}/[a-f0-9]{60}/\" && http_status_matches(beresp.status, \"200\")" + } } resource "aws_route53_record" "files" { diff --git a/terraform/file-hosting/main.tf b/terraform/file-hosting/main.tf index 32c62389..15240db8 100644 --- a/terraform/file-hosting/main.tf +++ b/terraform/file-hosting/main.tf @@ -11,6 +11,7 @@ variable "aws_access_key_id" { type = string } variable "aws_secret_access_key" { type = string } variable "gcs_access_key_id" { type = string } variable "gcs_secret_access_key" { type = string } +variable "datadog_token" { type = string } variable "fastly_endpoints" { type = map(any) } variable "domain_map" { type = map(any) } diff --git a/terraform/file-hosting/vcl/files.vcl b/terraform/file-hosting/vcl/files.vcl index bc6c371d..74160ec2 100644 --- a/terraform/file-hosting/vcl/files.vcl +++ b/terraform/file-hosting/vcl/files.vcl @@ -75,13 +75,28 @@ sub vcl_recv { # Change our backend to S3 to look for the file there, re-enable clustering and continue # https://www.slideshare.net/Fastly/advanced-vcl-how-to-use-restart if (req.restarts > 0 && req.url ~ "^/packages/[a-f0-9]{2}/[a-f0-9]{2}/[a-f0-9]{60}/") { - set req.backend = F_S3; + set req.backend = F_GCS; set req.http.Fastly-Force-Shield = "1"; } - # Requests that are for an *actual* file get disaptched to Amazon S3 instead of - # to our typical backends. We need to setup the request to correctly access - # S3 and to authorize ourselves to S3. + # Requests that are for an *actual* file get disaptched to object storage instead of + # to our typical backends. + + # If our file request is being dispatched to B2, we need to setup the request to correctly + # access B2 and to authorize ourselves to B2 with S3 compatible auth. + if (req.backend == F_B2 && req.url ~ "^/packages/[a-f0-9]{2}/[a-f0-9]{2}/[a-f0-9]{60}/") { + # Setup our environment to better match what S3 expects/needs + set req.http.Host = var.B2-Bucket-Name "s3.us-east-005.backblazeb2.com"; + set req.http.Date = now; + set req.url = regsuball(req.url, "\+", urlencode("+")); + + # Compute the Authorization header that B2 requires to be able to + # access the files stored there. + set req.http.Authorization = "AWS " var.B2-Application-Key-ID":" digest.hmac_sha1_base64(var.B2-Application-Key, "GET" LF LF LF req.http.Date LF "/" var.S3-Bucket-Name req.url.path); + } + + # If our file request is being dispatched to S3, we need to setup the request to correctly + # access S3 and to authorize ourselves to S3. if (req.backend == F_S3 && req.url ~ "^/packages/[a-f0-9]{2}/[a-f0-9]{2}/[a-f0-9]{60}/") { # Setup our environment to better match what S3 expects/needs set req.http.Host = var.S3-Bucket-Name ".s3.amazonaws.com"; @@ -92,6 +107,7 @@ sub vcl_recv { # access the files stored there. set req.http.Authorization = "AWS " var.AWS-Access-Key-ID ":" digest.hmac_sha1_base64(var.AWS-Secret-Access-Key, "GET" LF LF LF req.http.Date LF "/" var.S3-Bucket-Name req.url.path); } + # If our file request is being dispatched to GCS, setup the request to correctly # access GCS and authorize ourselves with GCS interoperability credentials. if (req.backend == GCS && req.url ~ "^/packages/[a-f0-9]{2}/[a-f0-9]{2}/[a-f0-9]{60}/") { @@ -126,9 +142,9 @@ sub vcl_fetch { set beresp.cacheable = true; } - # If we successfully got a 404 response from GCS for a Package URL restart - # to check S3 for the file! - if (req.restarts == 0 && req.backend == GCS && req.url ~ "^/packages/[a-f0-9]{2}/[a-f0-9]{2}/[a-f0-9]{60}/" && http_status_matches(beresp.status, "404")) { + # If we successfully got a 404 response from B2 for a Package URL restart + # to check GCS for the file! + if (req.restarts == 0 && req.backend == B2 && req.url ~ "^/packages/[a-f0-9]{2}/[a-f0-9]{2}/[a-f0-9]{60}/" && http_status_matches(beresp.status, "404")) { restart; } diff --git a/terraform/main.tf b/terraform/main.tf index 1dca4892..afdacf19 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -149,6 +149,7 @@ module "file-hosting" { files_bucket = "pypi-files" mirror = "mirror.dub1.pypi.io" s3_logging_keys = var.fastly_s3_logging + datadog_token = var.datadog_token aws_access_key_id = var.aws_access_key_id aws_secret_access_key = var.aws_secret_access_key @@ -177,6 +178,7 @@ module "test-file-hosting" { files_bucket = "pypi-files-staging" mirror = "test-mirror.dub1.pypi.io" s3_logging_keys = var.fastly_s3_logging + datadog_token = var.datadog_token aws_access_key_id = var.aws_access_key_id aws_secret_access_key = var.aws_secret_access_key