From 6ecf011b59e133afe8375c59d442538a1a59a6f3 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Tue, 20 Dec 2022 10:03:37 +0100 Subject: [PATCH 01/39] asdasd --- src/sentry/conf/server.py | 6 ++ .../dynamic_sampling/adjustment_models.py | 40 ++++++++ .../dynamic_sampling/prioritise_projects.py | 92 +++++++++++++++++++ src/sentry/dynamic_sampling/tasks.py | 19 ++++ .../test_adjusments_models.py | 8 ++ .../test_prioritise_projects.py | 8 ++ 6 files changed, 173 insertions(+) create mode 100644 src/sentry/dynamic_sampling/adjustment_models.py create mode 100644 src/sentry/dynamic_sampling/prioritise_projects.py create mode 100644 src/sentry/dynamic_sampling/tasks.py create mode 100644 tests/sentry/dynamic_sampling/test_adjusments_models.py create mode 100644 tests/sentry/dynamic_sampling/test_prioritise_projects.py diff --git a/src/sentry/conf/server.py b/src/sentry/conf/server.py index 8a9342c947776c..c980705572c049 100644 --- a/src/sentry/conf/server.py +++ b/src/sentry/conf/server.py @@ -846,6 +846,12 @@ def SOCIAL_AUTH_DEFAULT_USERNAME(): "schedule": timedelta(hours=1), "options": {"expires": 3600}, }, + "dynamic-sampling-project-biases": { + "task": "sentry.dynamic_sampling.tasks.prioritize_by_project", + "schedule": timedelta(hours=1), + # TODO: (andrii) adjust defaults + "options": {"expires": 3600}, + }, } BGTASKS = { diff --git a/src/sentry/dynamic_sampling/adjustment_models.py b/src/sentry/dynamic_sampling/adjustment_models.py new file mode 100644 index 00000000000000..be22539ea97848 --- /dev/null +++ b/src/sentry/dynamic_sampling/adjustment_models.py @@ -0,0 +1,40 @@ +import statistics +from dataclasses import dataclass, field +from operator import itemgetter +from typing import List + + +@dataclass +class Project: + id: int + total: int + # sample_rate: float + + +@dataclass +class AdjustedModel: + projects: List[Project] + fidelity_rate: float + + def adjust_sample_rates(self): + # Step 1: sort + sorted_ = sorted(self.projects, reverse=True, key=itemgetter("total")) + + # Step 2: find avg + avg = statistics.mean(sorted_) + + # Step 3: + # Find upper bound + + # One maximum adjustment 1 up to 4 + min_element = sorted_[0] + + max_ = min_element / self.fidelity_rate + adjustments_ceiling_p4 = min((avg - min_element), min_element / self.fidelity_rate) + + d2 = adjustments_ceiling_p4 - sorted_[1] + total_adjusment = d2 + d1 = abs(avg - sorted_[1]) + d11 = d1 / d1 * total_adjusment + + return [d11, d2] diff --git a/src/sentry/dynamic_sampling/prioritise_projects.py b/src/sentry/dynamic_sampling/prioritise_projects.py new file mode 100644 index 00000000000000..6c7208a1420f88 --- /dev/null +++ b/src/sentry/dynamic_sampling/prioritise_projects.py @@ -0,0 +1,92 @@ +import logging +import time +from collections import defaultdict +from datetime import datetime, timedelta +from typing import Mapping, Sequence + +from snuba_sdk import ( + Column, + Condition, + Direction, + Entity, + Function, + Granularity, + Op, + OrderBy, + Query, + Request, +) + +from sentry.release_health.release_monitor.base import BaseReleaseMonitorBackend, Totals +from sentry.sentry_metrics import indexer +from sentry.sentry_metrics.configuration import UseCaseKey +from sentry.sentry_metrics.indexer.strings import SESSION_METRIC_NAMES +from sentry.sentry_metrics.utils import resolve_tag_key +from sentry.snuba.dataset import Dataset, EntityKey +from sentry.snuba.metrics.naming_layer.mri import SessionMRI +from sentry.utils import metrics +from sentry.utils.snuba import raw_snql_query + +logger = logging.getLogger(__name__) +MAX_SECONDS = 60 +CHUNK_SIZE = 1000 + + +def fetch_projects_with_total_volumes() -> Mapping[int, Sequence[int]]: + # TODO: (andrii) include only "disconnected" projects or independent in tracing context + aggregated_projects = defaultdict(list) + start_time = time.time() + offset = 0 + while (time.time() - start_time) < MAX_SECONDS: + query = ( + Query( + match=Entity(EntityKey.OrgMetricsCounters.value), + select=[ + Column("org_id"), + Column("project_id"), + ], + groupby=[Column("org_id"), Column("project_id")], + where=[ + Condition(Column("timestamp"), Op.GTE, datetime.utcnow() - timedelta(hours=6)), + Condition(Column("timestamp"), Op.LT, datetime.utcnow()), + Condition( + Column("metric_id"), + Op.EQ, + SESSION_METRIC_NAMES[SessionMRI.SESSION.value], + ), + ], + granularity=Granularity(3600), + orderby=[ + OrderBy(Column("org_id"), Direction.ASC), + OrderBy(Column("project_id"), Direction.ASC), + ], + ) + .set_limit(CHUNK_SIZE + 1) + .set_offset(offset) + ) + request = Request(dataset=Dataset.Metrics.value, app_id="dynamic_sampling", query=query) + data = raw_snql_query( + # TODO: replace to new referrer + request, + referrer="dynamic_sampling.fetch_projects_with_recent_sessions", + )["data"] + count = len(data) + more_results = count > CHUNK_SIZE + offset += CHUNK_SIZE + + if more_results: + data = data[:-1] + + for row in data: + aggregated_projects[row["org_id"]].append(row["project_id"]) + + if not more_results: + break + + else: + logger.error( + "", + extra={"offset": offset}, + ) + + return aggregated_projects diff --git a/src/sentry/dynamic_sampling/tasks.py b/src/sentry/dynamic_sampling/tasks.py new file mode 100644 index 00000000000000..e709dd54ce3063 --- /dev/null +++ b/src/sentry/dynamic_sampling/tasks.py @@ -0,0 +1,19 @@ +import logging + +from sentry.tasks.base import instrumented_task + +CHUNK_SIZE = 1000 +MAX_SECONDS = 60 + +logger = logging.getLogger("sentry.tasks.dynamic_sampling") + + +@instrumented_task( + name="sentry.dynamic_sampling.tasks.foo", + queue="releasemonitor", + default_retry_delay=5, + max_retries=5, +) # type: ignore +def foo(**kwargs) -> None: + for org_id, project_ids in fetch_projects_with_total_volumes().items(): + process_projects_with_sessions.delay(org_id, project_ids) diff --git a/tests/sentry/dynamic_sampling/test_adjusments_models.py b/tests/sentry/dynamic_sampling/test_adjusments_models.py new file mode 100644 index 00000000000000..e7a0838004a0fc --- /dev/null +++ b/tests/sentry/dynamic_sampling/test_adjusments_models.py @@ -0,0 +1,8 @@ +from sentry.dynamic_sampling.adjustment_models import AdjustedModel, Project + + +def test_adjust_sample_rates(): + p1 = Project(id=1, total=8) + p2 = Project(id=2, total=240) + p = AdjustedModel(projects=[p1, p2], fidelity_rate=0.04) + assert p.adjust_sample_rates() == [116, 132] diff --git a/tests/sentry/dynamic_sampling/test_prioritise_projects.py b/tests/sentry/dynamic_sampling/test_prioritise_projects.py new file mode 100644 index 00000000000000..856d0a01aff2b8 --- /dev/null +++ b/tests/sentry/dynamic_sampling/test_prioritise_projects.py @@ -0,0 +1,8 @@ +from sentry.dynamic_sampling.prioritise_projects import fetch_projects_with_total_volumes + + +def test_prioritize_projects(): + results = fetch_projects_with_total_volumes() + 1 == 1 + + From 64ca9098eedf7ce3f830def83ccc2d97a41719c0 Mon Sep 17 00:00:00 2001 From: "getsantry[bot]" <66042841+getsantry[bot]@users.noreply.github.com> Date: Mon, 9 Jan 2023 09:45:17 +0000 Subject: [PATCH 02/39] style(lint): Auto commit lint changes --- tests/sentry/dynamic_sampling/test_prioritise_projects.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/sentry/dynamic_sampling/test_prioritise_projects.py b/tests/sentry/dynamic_sampling/test_prioritise_projects.py index 856d0a01aff2b8..e2b3008e4ad5e7 100644 --- a/tests/sentry/dynamic_sampling/test_prioritise_projects.py +++ b/tests/sentry/dynamic_sampling/test_prioritise_projects.py @@ -2,7 +2,5 @@ def test_prioritize_projects(): - results = fetch_projects_with_total_volumes() - 1 == 1 - - + results = fetch_projects_with_total_volumes() + 1 == 1 From 7ed9ebca34c94bc6bbd173f3f0e8f0d0ee8d4c1e Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Mon, 16 Jan 2023 09:03:37 +0100 Subject: [PATCH 03/39] refactor a bit --- src/sentry/conf/server.py | 1 + .../dynamic_sampling/prioritise_projects.py | 32 ++++++------------- src/sentry/dynamic_sampling/tasks.py | 27 +++++++++++++--- src/sentry/relay/config/__init__.py | 1 + src/sentry/snuba/metrics/naming_layer/mri.py | 1 + tests/sentry/dynamic_sampling/test_tasks.py | 16 ++++++++++ 6 files changed, 50 insertions(+), 28 deletions(-) create mode 100644 tests/sentry/dynamic_sampling/test_tasks.py diff --git a/src/sentry/conf/server.py b/src/sentry/conf/server.py index c980705572c049..57fbcf2e445aca 100644 --- a/src/sentry/conf/server.py +++ b/src/sentry/conf/server.py @@ -611,6 +611,7 @@ def SOCIAL_AUTH_DEFAULT_USERNAME(): "sentry.tasks.user_report", "sentry.profiles.task", "sentry.release_health.tasks", + "sentry.dynamic_sampling.tasks", "sentry.utils.suspect_resolutions.get_suspect_resolutions", "sentry.utils.suspect_resolutions_releases.get_suspect_resolutions_releases", "sentry.tasks.derive_code_mappings", diff --git a/src/sentry/dynamic_sampling/prioritise_projects.py b/src/sentry/dynamic_sampling/prioritise_projects.py index 6c7208a1420f88..47abe40f229339 100644 --- a/src/sentry/dynamic_sampling/prioritise_projects.py +++ b/src/sentry/dynamic_sampling/prioritise_projects.py @@ -4,27 +4,11 @@ from datetime import datetime, timedelta from typing import Mapping, Sequence -from snuba_sdk import ( - Column, - Condition, - Direction, - Entity, - Function, - Granularity, - Op, - OrderBy, - Query, - Request, -) +from snuba_sdk import Column, Condition, Direction, Entity, Granularity, Op, OrderBy, Query, Request -from sentry.release_health.release_monitor.base import BaseReleaseMonitorBackend, Totals -from sentry.sentry_metrics import indexer -from sentry.sentry_metrics.configuration import UseCaseKey -from sentry.sentry_metrics.indexer.strings import SESSION_METRIC_NAMES -from sentry.sentry_metrics.utils import resolve_tag_key +from sentry.sentry_metrics.indexer.strings import TRANSACTION_METRICS_NAMES from sentry.snuba.dataset import Dataset, EntityKey -from sentry.snuba.metrics.naming_layer.mri import SessionMRI -from sentry.utils import metrics +from sentry.snuba.metrics.naming_layer.mri import TransactionMRI from sentry.utils.snuba import raw_snql_query logger = logging.getLogger(__name__) @@ -33,7 +17,6 @@ def fetch_projects_with_total_volumes() -> Mapping[int, Sequence[int]]: - # TODO: (andrii) include only "disconnected" projects or independent in tracing context aggregated_projects = defaultdict(list) start_time = time.time() offset = 0 @@ -52,7 +35,7 @@ def fetch_projects_with_total_volumes() -> Mapping[int, Sequence[int]]: Condition( Column("metric_id"), Op.EQ, - SESSION_METRIC_NAMES[SessionMRI.SESSION.value], + TRANSACTION_METRICS_NAMES[TransactionMRI.COUNT_PER_ROOT_PROJECT.value], ), ], granularity=Granularity(3600), @@ -66,9 +49,8 @@ def fetch_projects_with_total_volumes() -> Mapping[int, Sequence[int]]: ) request = Request(dataset=Dataset.Metrics.value, app_id="dynamic_sampling", query=query) data = raw_snql_query( - # TODO: replace to new referrer request, - referrer="dynamic_sampling.fetch_projects_with_recent_sessions", + referrer="dynamic_sampling.fetch_projects_with_total_volumes", )["data"] count = len(data) more_results = count > CHUNK_SIZE @@ -90,3 +72,7 @@ def fetch_projects_with_total_volumes() -> Mapping[int, Sequence[int]]: ) return aggregated_projects + + +def process_projects_with_total_volumes(project_ids): + ... diff --git a/src/sentry/dynamic_sampling/tasks.py b/src/sentry/dynamic_sampling/tasks.py index e709dd54ce3063..e541c734e321fb 100644 --- a/src/sentry/dynamic_sampling/tasks.py +++ b/src/sentry/dynamic_sampling/tasks.py @@ -1,19 +1,36 @@ import logging +from sentry.dynamic_sampling.prioritise_projects import fetch_projects_with_total_volumes from sentry.tasks.base import instrumented_task CHUNK_SIZE = 1000 MAX_SECONDS = 60 -logger = logging.getLogger("sentry.tasks.dynamic_sampling") +logger = logging.getLogger(__name__) @instrumented_task( - name="sentry.dynamic_sampling.tasks.foo", - queue="releasemonitor", + name="sentry.dynamic_sampling.tasks.prioritise_projects", + queue="dynamicsampling", default_retry_delay=5, max_retries=5, ) # type: ignore -def foo(**kwargs) -> None: +def prioritise_projects(**kwargs) -> None: for org_id, project_ids in fetch_projects_with_total_volumes().items(): - process_projects_with_sessions.delay(org_id, project_ids) + process_projects_sample_rates.delay(org_id, project_ids) + + +@instrumented_task( + name="sentry.dynamic_sampling.process_projects_sample_rates", + queue="dynamicsampling", + default_retry_delay=5, + max_retries=5, +) # type: ignore +def process_projects_sample_rates(org_id, project_ids) -> None: + """ + Takes a single org id and a list of project ids + """ + ... + + # Get adjusted sample rate via adjustment model + # diff --git a/src/sentry/relay/config/__init__.py b/src/sentry/relay/config/__init__.py index e31ac182eaceab..b8ba7b79f23c45 100644 --- a/src/sentry/relay/config/__init__.py +++ b/src/sentry/relay/config/__init__.py @@ -497,6 +497,7 @@ def _filter_option_to_config_setting(flt: _FilterSpec, setting: str) -> Mapping[ [ "s:transactions/user@none", "d:transactions/duration@millisecond", + "c:transactions/count_per_root_project@none", ] ) diff --git a/src/sentry/snuba/metrics/naming_layer/mri.py b/src/sentry/snuba/metrics/naming_layer/mri.py index ba1c784f5f3648..39f72f346eef13 100644 --- a/src/sentry/snuba/metrics/naming_layer/mri.py +++ b/src/sentry/snuba/metrics/naming_layer/mri.py @@ -83,6 +83,7 @@ class TransactionMRI(Enum): # Ingested USER = "s:transactions/user@none" DURATION = "d:transactions/duration@millisecond" + COUNT_PER_ROOT_PROJECT = "c:transactions/count_per_root_project@none" MEASUREMENTS_FCP = "d:transactions/measurements.fcp@millisecond" MEASUREMENTS_LCP = "d:transactions/measurements.lcp@millisecond" MEASUREMENTS_APP_START_COLD = "d:transactions/measurements.app_start_cold@millisecond" diff --git a/tests/sentry/dynamic_sampling/test_tasks.py b/tests/sentry/dynamic_sampling/test_tasks.py new file mode 100644 index 00000000000000..e1fa3a023abbe5 --- /dev/null +++ b/tests/sentry/dynamic_sampling/test_tasks.py @@ -0,0 +1,16 @@ +import pytest + +from sentry.dynamic_sampling.tasks import fetch_projects_with_total_volumes + + +@pytest.mark.django_db +def test_simple(default_project): + test_data = [ + { + "org_id": [default_project.organization.id], + "project_id": [default_project.id], + }, + ] + assert 1 == 1 + _ = test_data + fetch_projects_with_total_volumes() From de4b755e9dc740b4827632a29ad985a76b20f49c Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Mon, 16 Jan 2023 11:25:07 +0100 Subject: [PATCH 04/39] add more tests --- src/sentry/conf/server.py | 1 - .../{ => models}/adjustment_models.py | 14 ++++++++------ src/sentry/dynamic_sampling/tasks.py | 1 + .../dynamic_sampling/test_adjusments_models.py | 16 ++++++++++------ 4 files changed, 19 insertions(+), 13 deletions(-) rename src/sentry/dynamic_sampling/{ => models}/adjustment_models.py (68%) diff --git a/src/sentry/conf/server.py b/src/sentry/conf/server.py index 57fbcf2e445aca..85dc545eea0bfe 100644 --- a/src/sentry/conf/server.py +++ b/src/sentry/conf/server.py @@ -850,7 +850,6 @@ def SOCIAL_AUTH_DEFAULT_USERNAME(): "dynamic-sampling-project-biases": { "task": "sentry.dynamic_sampling.tasks.prioritize_by_project", "schedule": timedelta(hours=1), - # TODO: (andrii) adjust defaults "options": {"expires": 3600}, }, } diff --git a/src/sentry/dynamic_sampling/adjustment_models.py b/src/sentry/dynamic_sampling/models/adjustment_models.py similarity index 68% rename from src/sentry/dynamic_sampling/adjustment_models.py rename to src/sentry/dynamic_sampling/models/adjustment_models.py index be22539ea97848..a2813d374d5338 100644 --- a/src/sentry/dynamic_sampling/adjustment_models.py +++ b/src/sentry/dynamic_sampling/models/adjustment_models.py @@ -1,6 +1,6 @@ import statistics -from dataclasses import dataclass, field -from operator import itemgetter +from dataclasses import dataclass +from operator import attrgetter from typing import List @@ -17,20 +17,22 @@ class AdjustedModel: fidelity_rate: float def adjust_sample_rates(self): + if len(self.projects) < 2: + return self.projects + # Step 1: sort - sorted_ = sorted(self.projects, reverse=True, key=itemgetter("total")) + sorted_ = list(map(attrgetter("total"), self.projects)) # Step 2: find avg avg = statistics.mean(sorted_) # Step 3: # Find upper bound - # One maximum adjustment 1 up to 4 - min_element = sorted_[0] + min_element = min(sorted_) max_ = min_element / self.fidelity_rate - adjustments_ceiling_p4 = min((avg - min_element), min_element / self.fidelity_rate) + adjustments_ceiling_p4 = min((avg - min_element), max_) d2 = adjustments_ceiling_p4 - sorted_[1] total_adjusment = d2 diff --git a/src/sentry/dynamic_sampling/tasks.py b/src/sentry/dynamic_sampling/tasks.py index e541c734e321fb..6d69e7a638f28a 100644 --- a/src/sentry/dynamic_sampling/tasks.py +++ b/src/sentry/dynamic_sampling/tasks.py @@ -34,3 +34,4 @@ def process_projects_sample_rates(org_id, project_ids) -> None: # Get adjusted sample rate via adjustment model # + # prioritize_projects.delay(org_id, project_ids) diff --git a/tests/sentry/dynamic_sampling/test_adjusments_models.py b/tests/sentry/dynamic_sampling/test_adjusments_models.py index e7a0838004a0fc..ae736e9ddf5e30 100644 --- a/tests/sentry/dynamic_sampling/test_adjusments_models.py +++ b/tests/sentry/dynamic_sampling/test_adjusments_models.py @@ -1,8 +1,12 @@ -from sentry.dynamic_sampling.adjustment_models import AdjustedModel, Project +from sentry.dynamic_sampling.models.adjustment_models import AdjustedModel +from sentry.dynamic_sampling.models.adjustment_models import Project as P -def test_adjust_sample_rates(): - p1 = Project(id=1, total=8) - p2 = Project(id=2, total=240) - p = AdjustedModel(projects=[p1, p2], fidelity_rate=0.04) - assert p.adjust_sample_rates() == [116, 132] +def test_adjust_sample_rates_org_wo_projects(): + p = AdjustedModel(projects=[], fidelity_rate=0.04) + assert p.adjust_sample_rates() == [] + + +def test_adjust_sample_rates_org_with_single_project(): + p = AdjustedModel(projects=[P(id=1, total=10)], fidelity_rate=0.04) + assert p.adjust_sample_rates() == [P(id=1, total=10)] From 089b2a8f905b3d174047b912cfae01cea05fd23d Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Tue, 20 Dec 2022 10:03:37 +0100 Subject: [PATCH 05/39] asdasd --- src/sentry/conf/server.py | 4 ++ .../dynamic_sampling/adjustment_models.py | 40 +++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 src/sentry/dynamic_sampling/adjustment_models.py diff --git a/src/sentry/conf/server.py b/src/sentry/conf/server.py index 85dc545eea0bfe..475b6988b3b043 100644 --- a/src/sentry/conf/server.py +++ b/src/sentry/conf/server.py @@ -1153,6 +1153,8 @@ def SOCIAL_AUTH_DEFAULT_USERNAME(): "organizations:invite-members": True, # Enable rate limits for inviting members. "organizations:invite-members-rate-limits": True, + # Enable new issue actions on issue details + "organizations:issue-actions-v2": True, # Enable new issue alert "issue owners" fallback "organizations:issue-alert-fallback-targeting": False, # Enable removing issue from issue list if action taken. @@ -1264,6 +1266,8 @@ def SOCIAL_AUTH_DEFAULT_USERNAME(): "organizations:onboarding-remove-multiselect-platform": False, # Enable ANR rates in project details page "organizations:anr-rate": False, + # Enable deobfuscating exception values in Java issues + "organizations:java-exception-value-deobfuscation": False, # Enable tag improvements in the issue details page "organizations:issue-details-tag-improvements": False, # Enable the release details performance section diff --git a/src/sentry/dynamic_sampling/adjustment_models.py b/src/sentry/dynamic_sampling/adjustment_models.py new file mode 100644 index 00000000000000..be22539ea97848 --- /dev/null +++ b/src/sentry/dynamic_sampling/adjustment_models.py @@ -0,0 +1,40 @@ +import statistics +from dataclasses import dataclass, field +from operator import itemgetter +from typing import List + + +@dataclass +class Project: + id: int + total: int + # sample_rate: float + + +@dataclass +class AdjustedModel: + projects: List[Project] + fidelity_rate: float + + def adjust_sample_rates(self): + # Step 1: sort + sorted_ = sorted(self.projects, reverse=True, key=itemgetter("total")) + + # Step 2: find avg + avg = statistics.mean(sorted_) + + # Step 3: + # Find upper bound + + # One maximum adjustment 1 up to 4 + min_element = sorted_[0] + + max_ = min_element / self.fidelity_rate + adjustments_ceiling_p4 = min((avg - min_element), min_element / self.fidelity_rate) + + d2 = adjustments_ceiling_p4 - sorted_[1] + total_adjusment = d2 + d1 = abs(avg - sorted_[1]) + d11 = d1 / d1 * total_adjusment + + return [d11, d2] From f54e9c3d14da12a3ac6a8d16d67dd3d642f80ba8 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Mon, 16 Jan 2023 11:25:07 +0100 Subject: [PATCH 06/39] add more tests --- .../dynamic_sampling/adjustment_models.py | 40 ------------------- 1 file changed, 40 deletions(-) delete mode 100644 src/sentry/dynamic_sampling/adjustment_models.py diff --git a/src/sentry/dynamic_sampling/adjustment_models.py b/src/sentry/dynamic_sampling/adjustment_models.py deleted file mode 100644 index be22539ea97848..00000000000000 --- a/src/sentry/dynamic_sampling/adjustment_models.py +++ /dev/null @@ -1,40 +0,0 @@ -import statistics -from dataclasses import dataclass, field -from operator import itemgetter -from typing import List - - -@dataclass -class Project: - id: int - total: int - # sample_rate: float - - -@dataclass -class AdjustedModel: - projects: List[Project] - fidelity_rate: float - - def adjust_sample_rates(self): - # Step 1: sort - sorted_ = sorted(self.projects, reverse=True, key=itemgetter("total")) - - # Step 2: find avg - avg = statistics.mean(sorted_) - - # Step 3: - # Find upper bound - - # One maximum adjustment 1 up to 4 - min_element = sorted_[0] - - max_ = min_element / self.fidelity_rate - adjustments_ceiling_p4 = min((avg - min_element), min_element / self.fidelity_rate) - - d2 = adjustments_ceiling_p4 - sorted_[1] - total_adjusment = d2 - d1 = abs(avg - sorted_[1]) - d11 = d1 / d1 * total_adjusment - - return [d11, d2] From cda6808afa48ca0162a0f3db327691b3e7c84dd0 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Thu, 19 Jan 2023 08:59:07 +0100 Subject: [PATCH 07/39] fixup! --- src/sentry/conf/server.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/sentry/conf/server.py b/src/sentry/conf/server.py index 475b6988b3b043..3bdb69b4e9f65e 100644 --- a/src/sentry/conf/server.py +++ b/src/sentry/conf/server.py @@ -664,6 +664,10 @@ def SOCIAL_AUTH_DEFAULT_USERNAME(): "releasemonitor", routing_key="releasemonitor", ), + Queue( + "dynamicsampling", + routing_key="dynamicsampling", + ), Queue("incidents", routing_key="incidents"), Queue("incident_snapshots", routing_key="incident_snapshots"), Queue("incidents", routing_key="incidents"), @@ -1092,7 +1096,7 @@ def SOCIAL_AUTH_DEFAULT_USERNAME(): # Try to derive normalization rules by clustering transaction names. "organizations:transaction-name-clusterer": False, # Sanitize transaction names in the ingestion pipeline. - "organizations:transaction-name-sanitization": False, # DEPRECATED + "organizations:transaction-name-sanitization": False, # Extraction metrics for transactions during ingestion. "organizations:transaction-metrics-extraction": False, # True if release-health related queries should be run against both From 7bba77df3b5bc5359283ee23fd40d9819bb45524 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Mon, 6 Feb 2023 10:10:29 +0100 Subject: [PATCH 08/39] refactor test --- .../models/adjustment_models.py | 49 ++++++++++++------- .../test_adjusments_models.py | 20 ++++++-- 2 files changed, 48 insertions(+), 21 deletions(-) diff --git a/src/sentry/dynamic_sampling/models/adjustment_models.py b/src/sentry/dynamic_sampling/models/adjustment_models.py index a2813d374d5338..7b347854186678 100644 --- a/src/sentry/dynamic_sampling/models/adjustment_models.py +++ b/src/sentry/dynamic_sampling/models/adjustment_models.py @@ -7,8 +7,10 @@ @dataclass class Project: id: int - total: int - # sample_rate: float + count_per_root: int + blended_sample_rate: float + new_count_per_root: int = 0 + new_sample_rate: float = 0.0 @dataclass @@ -16,27 +18,38 @@ class AdjustedModel: projects: List[Project] fidelity_rate: float + @property def adjust_sample_rates(self): if len(self.projects) < 2: return self.projects - # Step 1: sort - sorted_ = list(map(attrgetter("total"), self.projects)) + # Step 1: sort projects by count per root project + sorted_projects = list(sorted(self.projects, key=attrgetter("count_per_root"))) # Step 2: find avg - avg = statistics.mean(sorted_) + average = statistics.mean([p.count_per_root for p in sorted_projects]) # Step 3: - # Find upper bound - # One maximum adjustment 1 up to 4 - min_element = min(sorted_) - - max_ = min_element / self.fidelity_rate - adjustments_ceiling_p4 = min((avg - min_element), max_) - - d2 = adjustments_ceiling_p4 - sorted_[1] - total_adjusment = d2 - d1 = abs(avg - sorted_[1]) - d11 = d1 / d1 * total_adjusment - - return [d11, d2] + # IF len % 2 == 0 + left_split = sorted_projects[: len(sorted_projects) // 2] + right_split = reversed(sorted_projects[len(sorted_projects) // 2 :]) + + new_left = [] + new_right = [] + coefficient = 1 + for left, right in zip(left_split, right_split): + # We can't increase sample rate more than 1.0, so we calculate upper bound count + # based on project fidelity_rate + # Find an absolute difference + diff = coefficient * min( + (average - left.count_per_root), + ((left.count_per_root / self.fidelity_rate) - left.count_per_root), + ) + left.new_count_per_root = left.count_per_root + diff + right.new_count_per_root = right.count_per_root - diff + new_left.append(left) + new_right.append(right) + # This opinionated `coefficient` reduces adjustment on every step + coefficient = diff / left.new_count_per_root + + return [new_left, new_right] diff --git a/tests/sentry/dynamic_sampling/test_adjusments_models.py b/tests/sentry/dynamic_sampling/test_adjusments_models.py index ae736e9ddf5e30..49fd42f0716095 100644 --- a/tests/sentry/dynamic_sampling/test_adjusments_models.py +++ b/tests/sentry/dynamic_sampling/test_adjusments_models.py @@ -4,9 +4,23 @@ def test_adjust_sample_rates_org_wo_projects(): p = AdjustedModel(projects=[], fidelity_rate=0.04) - assert p.adjust_sample_rates() == [] + assert p.adjust_sample_rates == [] def test_adjust_sample_rates_org_with_single_project(): - p = AdjustedModel(projects=[P(id=1, total=10)], fidelity_rate=0.04) - assert p.adjust_sample_rates() == [P(id=1, total=10)] + p = AdjustedModel( + projects=[P(id=1, count_per_root=10, blended_sample_rate=0.04)], fidelity_rate=0.04 + ) + assert p.adjust_sample_rates == [P(id=1, count_per_root=10, blended_sample_rate=0.04)] + + +def test_adjust_sample_rates_org_with_few_projects(): + projects = [ + P(id=1, count_per_root=9, blended_sample_rate=0.04), + P(id=2, count_per_root=7, blended_sample_rate=0.04), + P(id=3, count_per_root=3, blended_sample_rate=0.04), + P(id=4, count_per_root=1, blended_sample_rate=0.04), + ] + p = AdjustedModel(projects=projects, fidelity_rate=0.25) + + assert p.adjust_sample_rates == [P(id=1, count_per_root=10, blended_sample_rate=0.04)] From ed948a0101ea49c6cdebdc08a6b9e15e5ccd6623 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Mon, 6 Feb 2023 10:46:03 +0100 Subject: [PATCH 09/39] refactor test --- .../models/adjustment_models.py | 8 ++++---- .../dynamic_sampling/test_adjusments_models.py | 17 ++++++++++++----- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/sentry/dynamic_sampling/models/adjustment_models.py b/src/sentry/dynamic_sampling/models/adjustment_models.py index 7b347854186678..be4e66a2a3d5cd 100644 --- a/src/sentry/dynamic_sampling/models/adjustment_models.py +++ b/src/sentry/dynamic_sampling/models/adjustment_models.py @@ -7,9 +7,9 @@ @dataclass class Project: id: int - count_per_root: int + count_per_root: float blended_sample_rate: float - new_count_per_root: int = 0 + new_count_per_root: float = 0.0 new_sample_rate: float = 0.0 @@ -32,7 +32,7 @@ def adjust_sample_rates(self): # Step 3: # IF len % 2 == 0 left_split = sorted_projects[: len(sorted_projects) // 2] - right_split = reversed(sorted_projects[len(sorted_projects) // 2 :]) + right_split = reversed(sorted_projects[len(sorted_projects) // 2 : len(sorted_projects)]) new_left = [] new_right = [] @@ -52,4 +52,4 @@ def adjust_sample_rates(self): # This opinionated `coefficient` reduces adjustment on every step coefficient = diff / left.new_count_per_root - return [new_left, new_right] + return [*new_right, *reversed(new_left)] diff --git a/tests/sentry/dynamic_sampling/test_adjusments_models.py b/tests/sentry/dynamic_sampling/test_adjusments_models.py index 49fd42f0716095..2531a4e0885497 100644 --- a/tests/sentry/dynamic_sampling/test_adjusments_models.py +++ b/tests/sentry/dynamic_sampling/test_adjusments_models.py @@ -16,11 +16,18 @@ def test_adjust_sample_rates_org_with_single_project(): def test_adjust_sample_rates_org_with_few_projects(): projects = [ - P(id=1, count_per_root=9, blended_sample_rate=0.04), - P(id=2, count_per_root=7, blended_sample_rate=0.04), - P(id=3, count_per_root=3, blended_sample_rate=0.04), - P(id=4, count_per_root=1, blended_sample_rate=0.04), + P(id=1, count_per_root=9.0, blended_sample_rate=0.04), + P(id=2, count_per_root=7.0, blended_sample_rate=0.04), + P(id=3, count_per_root=3.0, blended_sample_rate=0.04), + P(id=4, count_per_root=1.0, blended_sample_rate=0.04), ] p = AdjustedModel(projects=projects, fidelity_rate=0.25) - assert p.adjust_sample_rates == [P(id=1, count_per_root=10, blended_sample_rate=0.04)] + expected_projects = [ + P(id=1, count_per_root=9.0, new_count_per_root=6.0, blended_sample_rate=0.04), + P(id=2, count_per_root=7.0, new_count_per_root=5.5, blended_sample_rate=0.04), + P(id=3, count_per_root=3.0, new_count_per_root=4.5, blended_sample_rate=0.04), + P(id=4, count_per_root=1.0, new_count_per_root=4.0, blended_sample_rate=0.04), + ] + + assert p.adjust_sample_rates == expected_projects From 5965ae5eeb6edd6b6f53c2e2f73ec60a1ac77b04 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Tue, 7 Feb 2023 13:37:21 +0100 Subject: [PATCH 10/39] fix tests --- src/sentry/dynamic_sampling/prioritise_projects.py | 8 +++++--- src/sentry/dynamic_sampling/tasks.py | 2 +- src/sentry/snuba/dataset.py | 1 + .../sentry/dynamic_sampling/test_prioritise_projects.py | 9 +++++++-- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/sentry/dynamic_sampling/prioritise_projects.py b/src/sentry/dynamic_sampling/prioritise_projects.py index 47abe40f229339..00ba6ffe9f60fb 100644 --- a/src/sentry/dynamic_sampling/prioritise_projects.py +++ b/src/sentry/dynamic_sampling/prioritise_projects.py @@ -23,7 +23,7 @@ def fetch_projects_with_total_volumes() -> Mapping[int, Sequence[int]]: while (time.time() - start_time) < MAX_SECONDS: query = ( Query( - match=Entity(EntityKey.OrgMetricsCounters.value), + match=Entity(EntityKey.GenericMetricsCounters.value), select=[ Column("org_id"), Column("project_id"), @@ -47,10 +47,12 @@ def fetch_projects_with_total_volumes() -> Mapping[int, Sequence[int]]: .set_limit(CHUNK_SIZE + 1) .set_offset(offset) ) - request = Request(dataset=Dataset.Metrics.value, app_id="dynamic_sampling", query=query) + request = Request( + dataset=Dataset.PerformanceMetrics.value, app_id="dynamic_sampling", query=query + ) data = raw_snql_query( request, - referrer="dynamic_sampling.fetch_projects_with_total_volumes", + referrer="dynamic_sampling.fetch_projects_with_count_per_root_total_volumes", )["data"] count = len(data) more_results = count > CHUNK_SIZE diff --git a/src/sentry/dynamic_sampling/tasks.py b/src/sentry/dynamic_sampling/tasks.py index 6d69e7a638f28a..99a4cbc486465d 100644 --- a/src/sentry/dynamic_sampling/tasks.py +++ b/src/sentry/dynamic_sampling/tasks.py @@ -34,4 +34,4 @@ def process_projects_sample_rates(org_id, project_ids) -> None: # Get adjusted sample rate via adjustment model # - # prioritize_projects.delay(org_id, project_ids) + # prioritise_projects.delay(org_id, project_ids) diff --git a/src/sentry/snuba/dataset.py b/src/sentry/snuba/dataset.py index 47014bd3d99d3b..45a4f460d7bae7 100644 --- a/src/sentry/snuba/dataset.py +++ b/src/sentry/snuba/dataset.py @@ -28,4 +28,5 @@ class EntityKey(Enum): MetricsDistributions = "metrics_distributions" GenericMetricsDistributions = "generic_metrics_distributions" GenericMetricsSets = "generic_metrics_sets" + GenericMetricsCounters = "generic_metrics_counters" IssuePlatform = "search_issues" diff --git a/tests/sentry/dynamic_sampling/test_prioritise_projects.py b/tests/sentry/dynamic_sampling/test_prioritise_projects.py index e2b3008e4ad5e7..99e5ac65ba7b89 100644 --- a/tests/sentry/dynamic_sampling/test_prioritise_projects.py +++ b/tests/sentry/dynamic_sampling/test_prioritise_projects.py @@ -1,6 +1,11 @@ +import pytest + from sentry.dynamic_sampling.prioritise_projects import fetch_projects_with_total_volumes +from sentry.testutils.factories import Factories +@pytest.mark.django_db def test_prioritize_projects(): - results = fetch_projects_with_total_volumes() - 1 == 1 + organization = Factories.create_organization(name="test-org") + Factories.create_project(organization=organization) + fetch_projects_with_total_volumes() From 98b6511dee53ff15287f5ab0f31b92857ff802b6 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Tue, 7 Feb 2023 13:43:55 +0100 Subject: [PATCH 11/39] ignore test before snuba PR is ready --- tests/sentry/dynamic_sampling/test_prioritise_projects.py | 6 +++++- tests/sentry/dynamic_sampling/test_tasks.py | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/sentry/dynamic_sampling/test_prioritise_projects.py b/tests/sentry/dynamic_sampling/test_prioritise_projects.py index 99e5ac65ba7b89..8b6f58863b40c0 100644 --- a/tests/sentry/dynamic_sampling/test_prioritise_projects.py +++ b/tests/sentry/dynamic_sampling/test_prioritise_projects.py @@ -2,10 +2,14 @@ from sentry.dynamic_sampling.prioritise_projects import fetch_projects_with_total_volumes from sentry.testutils.factories import Factories +from sentry.utils.snuba import SnubaError @pytest.mark.django_db def test_prioritize_projects(): organization = Factories.create_organization(name="test-org") Factories.create_project(organization=organization) - fetch_projects_with_total_volumes() + # TODO (andrii): remove it when snuba PR is ready + # https://github.com/getsentry/snuba/pull/3708 + with pytest.raises(SnubaError): + fetch_projects_with_total_volumes() diff --git a/tests/sentry/dynamic_sampling/test_tasks.py b/tests/sentry/dynamic_sampling/test_tasks.py index e1fa3a023abbe5..7090f5567bba00 100644 --- a/tests/sentry/dynamic_sampling/test_tasks.py +++ b/tests/sentry/dynamic_sampling/test_tasks.py @@ -1,6 +1,7 @@ import pytest from sentry.dynamic_sampling.tasks import fetch_projects_with_total_volumes +from sentry.utils.snuba import SnubaError @pytest.mark.django_db @@ -13,4 +14,7 @@ def test_simple(default_project): ] assert 1 == 1 _ = test_data - fetch_projects_with_total_volumes() + # TODO (andrii): remove it when snuba PR is ready + # https://github.com/getsentry/snuba/pull/3708 + with pytest.raises(SnubaError): + fetch_projects_with_total_volumes() From db1491c2d301f1d1006201d5edf3cab9f7aeb691 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Sun, 12 Feb 2023 18:28:16 +0100 Subject: [PATCH 12/39] add more tests --- src/sentry/conf/server.py | 9 ++-- .../dynamic_sampling/prioritise_projects.py | 25 ++++++++--- src/sentry/snuba/dataset.py | 1 + .../test_prioritise_projects.py | 43 +++++++++++++------ tests/sentry/dynamic_sampling/test_tasks.py | 16 +++---- 5 files changed, 59 insertions(+), 35 deletions(-) diff --git a/src/sentry/conf/server.py b/src/sentry/conf/server.py index 3bdb69b4e9f65e..ff761786f6bdc4 100644 --- a/src/sentry/conf/server.py +++ b/src/sentry/conf/server.py @@ -853,7 +853,8 @@ def SOCIAL_AUTH_DEFAULT_USERNAME(): }, "dynamic-sampling-project-biases": { "task": "sentry.dynamic_sampling.tasks.prioritize_by_project", - "schedule": timedelta(hours=1), + # Run daily at 08:00 + "schedule": crontab(hour=8, minute=0), "options": {"expires": 3600}, }, } @@ -1096,7 +1097,7 @@ def SOCIAL_AUTH_DEFAULT_USERNAME(): # Try to derive normalization rules by clustering transaction names. "organizations:transaction-name-clusterer": False, # Sanitize transaction names in the ingestion pipeline. - "organizations:transaction-name-sanitization": False, + "organizations:transaction-name-sanitization": False, # DEPRECATED # Extraction metrics for transactions during ingestion. "organizations:transaction-metrics-extraction": False, # True if release-health related queries should be run against both @@ -1157,8 +1158,6 @@ def SOCIAL_AUTH_DEFAULT_USERNAME(): "organizations:invite-members": True, # Enable rate limits for inviting members. "organizations:invite-members-rate-limits": True, - # Enable new issue actions on issue details - "organizations:issue-actions-v2": True, # Enable new issue alert "issue owners" fallback "organizations:issue-alert-fallback-targeting": False, # Enable removing issue from issue list if action taken. @@ -1270,8 +1269,6 @@ def SOCIAL_AUTH_DEFAULT_USERNAME(): "organizations:onboarding-remove-multiselect-platform": False, # Enable ANR rates in project details page "organizations:anr-rate": False, - # Enable deobfuscating exception values in Java issues - "organizations:java-exception-value-deobfuscation": False, # Enable tag improvements in the issue details page "organizations:issue-details-tag-improvements": False, # Enable the release details performance section diff --git a/src/sentry/dynamic_sampling/prioritise_projects.py b/src/sentry/dynamic_sampling/prioritise_projects.py index 00ba6ffe9f60fb..f9c27e0a2aff41 100644 --- a/src/sentry/dynamic_sampling/prioritise_projects.py +++ b/src/sentry/dynamic_sampling/prioritise_projects.py @@ -4,7 +4,18 @@ from datetime import datetime, timedelta from typing import Mapping, Sequence -from snuba_sdk import Column, Condition, Direction, Entity, Granularity, Op, OrderBy, Query, Request +from snuba_sdk import ( + Column, + Condition, + Direction, + Entity, + Function, + Granularity, + Op, + OrderBy, + Query, + Request, +) from sentry.sentry_metrics.indexer.strings import TRANSACTION_METRICS_NAMES from sentry.snuba.dataset import Dataset, EntityKey @@ -17,14 +28,18 @@ def fetch_projects_with_total_volumes() -> Mapping[int, Sequence[int]]: + """ + This function fetch with pagination orgs and projects with count per root project + """ aggregated_projects = defaultdict(list) start_time = time.time() offset = 0 while (time.time() - start_time) < MAX_SECONDS: query = ( Query( - match=Entity(EntityKey.GenericMetricsCounters.value), + match=Entity(EntityKey.GenericOrgMetricsCounters.value), select=[ + Function("sum", [Column("value")], "root_count_value"), Column("org_id"), Column("project_id"), ], @@ -62,7 +77,7 @@ def fetch_projects_with_total_volumes() -> Mapping[int, Sequence[int]]: data = data[:-1] for row in data: - aggregated_projects[row["org_id"]].append(row["project_id"]) + aggregated_projects[row["org_id"]].append({row["project_id"]: row["root_count_value"]}) if not more_results: break @@ -74,7 +89,3 @@ def fetch_projects_with_total_volumes() -> Mapping[int, Sequence[int]]: ) return aggregated_projects - - -def process_projects_with_total_volumes(project_ids): - ... diff --git a/src/sentry/snuba/dataset.py b/src/sentry/snuba/dataset.py index 45a4f460d7bae7..f0656ca629b589 100644 --- a/src/sentry/snuba/dataset.py +++ b/src/sentry/snuba/dataset.py @@ -29,4 +29,5 @@ class EntityKey(Enum): GenericMetricsDistributions = "generic_metrics_distributions" GenericMetricsSets = "generic_metrics_sets" GenericMetricsCounters = "generic_metrics_counters" + GenericOrgMetricsCounters = "generic_org_metrics_counters" IssuePlatform = "search_issues" diff --git a/tests/sentry/dynamic_sampling/test_prioritise_projects.py b/tests/sentry/dynamic_sampling/test_prioritise_projects.py index 8b6f58863b40c0..abf227d613af32 100644 --- a/tests/sentry/dynamic_sampling/test_prioritise_projects.py +++ b/tests/sentry/dynamic_sampling/test_prioritise_projects.py @@ -1,15 +1,32 @@ -import pytest +from datetime import datetime, timezone + +from freezegun import freeze_time from sentry.dynamic_sampling.prioritise_projects import fetch_projects_with_total_volumes -from sentry.testutils.factories import Factories -from sentry.utils.snuba import SnubaError - - -@pytest.mark.django_db -def test_prioritize_projects(): - organization = Factories.create_organization(name="test-org") - Factories.create_project(organization=organization) - # TODO (andrii): remove it when snuba PR is ready - # https://github.com/getsentry/snuba/pull/3708 - with pytest.raises(SnubaError): - fetch_projects_with_total_volumes() +from sentry.snuba.metrics import TransactionMRI +from sentry.testutils import BaseMetricsLayerTestCase, SnubaTestCase, TestCase + +MOCK_DATETIME = datetime(2023, 8, 7, 0, 0, 0, tzinfo=timezone.utc) + + +@freeze_time(MOCK_DATETIME) +class PrioritiseProjectsSnubaQueryTest(BaseMetricsLayerTestCase, TestCase, SnubaTestCase): + @property + def now(self): + return MOCK_DATETIME + + def test_simple_one_org_one_project(self): + org1 = self.create_organization("test-org") + p1 = self.create_project(organization=org1) + + self.store_performance_metric( + name=TransactionMRI.COUNT_PER_ROOT_PROJECT.value, + tags={"transaction": "foo_transaction"}, + hours_before_now=1, + value=1, + project_id=p1.id, + org_id=org1.id, + ) + + results = fetch_projects_with_total_volumes() + assert results[org1.id] == [{p1.id: 1.0}] diff --git a/tests/sentry/dynamic_sampling/test_tasks.py b/tests/sentry/dynamic_sampling/test_tasks.py index 7090f5567bba00..6d9a1262e1dba2 100644 --- a/tests/sentry/dynamic_sampling/test_tasks.py +++ b/tests/sentry/dynamic_sampling/test_tasks.py @@ -1,20 +1,18 @@ +from collections import defaultdict + import pytest from sentry.dynamic_sampling.tasks import fetch_projects_with_total_volumes -from sentry.utils.snuba import SnubaError @pytest.mark.django_db -def test_simple(default_project): - test_data = [ +def test_simple_no_data(default_project): + _ = [ { "org_id": [default_project.organization.id], "project_id": [default_project.id], }, ] - assert 1 == 1 - _ = test_data - # TODO (andrii): remove it when snuba PR is ready - # https://github.com/getsentry/snuba/pull/3708 - with pytest.raises(SnubaError): - fetch_projects_with_total_volumes() + project_volumes_total = fetch_projects_with_total_volumes() + + assert project_volumes_total == defaultdict(list) From 36cdb1ec7049bf37a99e260bb175c2566ba4574e Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Mon, 13 Feb 2023 10:15:32 +0100 Subject: [PATCH 13/39] add more tests --- .../models/adjustment_models.py | 27 +++++++++++++++---- .../test_adjusments_models.py | 17 ++++++++++++ 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/src/sentry/dynamic_sampling/models/adjustment_models.py b/src/sentry/dynamic_sampling/models/adjustment_models.py index be4e66a2a3d5cd..f47f002667e6d6 100644 --- a/src/sentry/dynamic_sampling/models/adjustment_models.py +++ b/src/sentry/dynamic_sampling/models/adjustment_models.py @@ -15,6 +15,10 @@ class Project: @dataclass class AdjustedModel: + """ + Model which can adjust sample rate per project inside ORG based on this new counter metric from Relay. + """ + projects: List[Project] fidelity_rate: float @@ -26,13 +30,21 @@ def adjust_sample_rates(self): # Step 1: sort projects by count per root project sorted_projects = list(sorted(self.projects, key=attrgetter("count_per_root"))) - # Step 2: find avg + # Step 2: find average project average = statistics.mean([p.count_per_root for p in sorted_projects]) # Step 3: - # IF len % 2 == 0 - left_split = sorted_projects[: len(sorted_projects) // 2] - right_split = reversed(sorted_projects[len(sorted_projects) // 2 : len(sorted_projects)]) + if len(sorted_projects) % 2 == 0: + left_split = sorted_projects[: len(sorted_projects) // 2] + right_split = reversed( + sorted_projects[len(sorted_projects) // 2 : len(sorted_projects)] + ) + else: + left_split = sorted_projects[: len(sorted_projects) // 2] + # ignore middle element, since we don't have capacity to balance it + right_split = reversed( + sorted_projects[(len(sorted_projects) // 2) + 1 : len(sorted_projects)] + ) new_left = [] new_right = [] @@ -52,4 +64,9 @@ def adjust_sample_rates(self): # This opinionated `coefficient` reduces adjustment on every step coefficient = diff / left.new_count_per_root - return [*new_right, *reversed(new_left)] + if len(sorted_projects) % 2 == 0: + return [*new_right, *reversed(new_left)] + else: + mid_element = sorted_projects[len(sorted_projects) // 2] + mid_element.new_count_per_root = mid_element.count_per_root + return [*new_right, mid_element, *reversed(new_left)] diff --git a/tests/sentry/dynamic_sampling/test_adjusments_models.py b/tests/sentry/dynamic_sampling/test_adjusments_models.py index 2531a4e0885497..9e75e2a171341d 100644 --- a/tests/sentry/dynamic_sampling/test_adjusments_models.py +++ b/tests/sentry/dynamic_sampling/test_adjusments_models.py @@ -31,3 +31,20 @@ def test_adjust_sample_rates_org_with_few_projects(): ] assert p.adjust_sample_rates == expected_projects + + +def test_adjust_sample_rates_org_with_even_num_projects(): + projects = [ + P(id=1, count_per_root=8.0, blended_sample_rate=0.04), + P(id=2, count_per_root=7.0, blended_sample_rate=0.04), + P(id=3, count_per_root=3.0, blended_sample_rate=0.04), + ] + p = AdjustedModel(projects=projects, fidelity_rate=0.25) + + expected_projects = [ + P(id=1, count_per_root=8.0, new_count_per_root=5.0, blended_sample_rate=0.04), + P(id=2, count_per_root=7.0, new_count_per_root=7.0, blended_sample_rate=0.04), + P(id=3, count_per_root=3.0, new_count_per_root=6.0, blended_sample_rate=0.04), + ] + + assert p.adjust_sample_rates == expected_projects From 8b1340b2408a052877e748980a1154cb7e67c3cb Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Tue, 14 Feb 2023 12:11:12 +0100 Subject: [PATCH 14/39] adressed code review --- .../models/adjustment_models.py | 27 ++++++--------- src/sentry/dynamic_sampling/tasks.py | 34 ++++++++++++++----- .../test_adjusments_models.py | 27 ++++++++++++--- 3 files changed, 59 insertions(+), 29 deletions(-) diff --git a/src/sentry/dynamic_sampling/models/adjustment_models.py b/src/sentry/dynamic_sampling/models/adjustment_models.py index f47f002667e6d6..6b1c88bda4fd08 100644 --- a/src/sentry/dynamic_sampling/models/adjustment_models.py +++ b/src/sentry/dynamic_sampling/models/adjustment_models.py @@ -1,6 +1,5 @@ import statistics from dataclasses import dataclass -from operator import attrgetter from typing import List @@ -20,35 +19,29 @@ class AdjustedModel: """ projects: List[Project] - fidelity_rate: float + fidelity_rate: float = 0.4 # TODO: discuss this constant - @property - def adjust_sample_rates(self): + def adjust_sample_rates(self) -> List[Project]: if len(self.projects) < 2: return self.projects # Step 1: sort projects by count per root project - sorted_projects = list(sorted(self.projects, key=attrgetter("count_per_root"))) + sorted_projects = list(sorted(self.projects, key=lambda x: (x.count_per_root, -x.id))) # Step 2: find average project average = statistics.mean([p.count_per_root for p in sorted_projects]) # Step 3: - if len(sorted_projects) % 2 == 0: - left_split = sorted_projects[: len(sorted_projects) // 2] - right_split = reversed( - sorted_projects[len(sorted_projects) // 2 : len(sorted_projects)] - ) - else: - left_split = sorted_projects[: len(sorted_projects) // 2] - # ignore middle element, since we don't have capacity to balance it - right_split = reversed( - sorted_projects[(len(sorted_projects) // 2) + 1 : len(sorted_projects)] - ) + num_projects = len(sorted_projects) + half = num_projects // 2 + odd_one = num_projects % 2 + left_split = sorted_projects[:half] + # ignore middle element, since we don't have capacity to balance it + right_split = reversed(sorted_projects[half + odd_one : num_projects]) new_left = [] new_right = [] - coefficient = 1 + coefficient = 1.0 for left, right in zip(left_split, right_split): # We can't increase sample rate more than 1.0, so we calculate upper bound count # based on project fidelity_rate diff --git a/src/sentry/dynamic_sampling/tasks.py b/src/sentry/dynamic_sampling/tasks.py index 99a4cbc486465d..04cf60dcdd7f48 100644 --- a/src/sentry/dynamic_sampling/tasks.py +++ b/src/sentry/dynamic_sampling/tasks.py @@ -1,7 +1,12 @@ import logging +from sentry import features +from sentry.dynamic_sampling.models.adjustment_models import AdjustedModel +from sentry.dynamic_sampling.models.adjustment_models import Project as DSProject from sentry.dynamic_sampling.prioritise_projects import fetch_projects_with_total_volumes +from sentry.models import Organization from sentry.tasks.base import instrumented_task +from sentry.utils import metrics CHUNK_SIZE = 1000 MAX_SECONDS = 60 @@ -16,8 +21,10 @@ max_retries=5, ) # type: ignore def prioritise_projects(**kwargs) -> None: - for org_id, project_ids in fetch_projects_with_total_volumes().items(): - process_projects_sample_rates.delay(org_id, project_ids) + metrics.incr("sentry.tasks.dynamic_sampling.prioritise_projects.start", sample_rate=1.0) + with metrics.timer("sentry.tasks.dynamic_sampling.prioritise_projects", sample_rate=1.0): + for org_id, project_id_with_count_per_root in fetch_projects_with_total_volumes().items(): + process_projects_sample_rates.delay(org_id, project_id_with_count_per_root) @instrumented_task( @@ -26,12 +33,23 @@ def prioritise_projects(**kwargs) -> None: default_retry_delay=5, max_retries=5, ) # type: ignore -def process_projects_sample_rates(org_id, project_ids) -> None: +def process_projects_sample_rates(organization_id, project_id_with_count_per_root) -> None: """ Takes a single org id and a list of project ids """ - ... - - # Get adjusted sample rate via adjustment model - # - # prioritise_projects.delay(org_id, project_ids) + organization = Organization.objects.get_from_cache(id=organization_id) + # Check if feature flag is enabled: + if features.has("organizations:ds-prioritise-by-project-bias", organization): + with metrics.timer("sentry.tasks.dynamic_sampling.process_projects_sample_rates.core"): + adjust_sample_rates(organization_id, project_id_with_count_per_root) + + +def adjust_sample_rates(org_id, project_id_with_count_per_root): + projects = [] + for project_id, count_per_root in project_id_with_count_per_root: + projects.append( + DSProject(id=project_id, count_per_root=count_per_root, blended_sample_rate=0.0) + ) + model = AdjustedModel(projects=projects) + new_rates = model.adjust_sample_rates() + _ = new_rates diff --git a/tests/sentry/dynamic_sampling/test_adjusments_models.py b/tests/sentry/dynamic_sampling/test_adjusments_models.py index 9e75e2a171341d..1e6fec8121a489 100644 --- a/tests/sentry/dynamic_sampling/test_adjusments_models.py +++ b/tests/sentry/dynamic_sampling/test_adjusments_models.py @@ -4,14 +4,14 @@ def test_adjust_sample_rates_org_wo_projects(): p = AdjustedModel(projects=[], fidelity_rate=0.04) - assert p.adjust_sample_rates == [] + assert p.adjust_sample_rates() == [] def test_adjust_sample_rates_org_with_single_project(): p = AdjustedModel( projects=[P(id=1, count_per_root=10, blended_sample_rate=0.04)], fidelity_rate=0.04 ) - assert p.adjust_sample_rates == [P(id=1, count_per_root=10, blended_sample_rate=0.04)] + assert p.adjust_sample_rates() == [P(id=1, count_per_root=10, blended_sample_rate=0.04)] def test_adjust_sample_rates_org_with_few_projects(): @@ -30,7 +30,7 @@ def test_adjust_sample_rates_org_with_few_projects(): P(id=4, count_per_root=1.0, new_count_per_root=4.0, blended_sample_rate=0.04), ] - assert p.adjust_sample_rates == expected_projects + assert p.adjust_sample_rates() == expected_projects def test_adjust_sample_rates_org_with_even_num_projects(): @@ -47,4 +47,23 @@ def test_adjust_sample_rates_org_with_even_num_projects(): P(id=3, count_per_root=3.0, new_count_per_root=6.0, blended_sample_rate=0.04), ] - assert p.adjust_sample_rates == expected_projects + assert p.adjust_sample_rates() == expected_projects + + +def test_adjust_sample_rates_org_with_same_counts_projects(): + projects = [ + P(id=1, count_per_root=9.0, blended_sample_rate=0.04), + P(id=2, count_per_root=6.0, blended_sample_rate=0.04), + P(id=3, count_per_root=6.0, blended_sample_rate=0.04), + P(id=4, count_per_root=1.0, blended_sample_rate=0.04), + ] + p = AdjustedModel(projects=projects, fidelity_rate=0.25) + + expected_projects = [ + P(id=1, count_per_root=9.0, new_count_per_root=6.0, blended_sample_rate=0.04), + P(id=2, count_per_root=6.0, new_count_per_root=6.375, blended_sample_rate=0.04), + P(id=3, count_per_root=6.0, new_count_per_root=5.625, blended_sample_rate=0.04), + P(id=4, count_per_root=1.0, new_count_per_root=4.0, blended_sample_rate=0.04), + ] + + assert p.adjust_sample_rates() == expected_projects From 0f72607a9fc8f90f215154d75820959c2adea27f Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Tue, 14 Feb 2023 12:15:26 +0100 Subject: [PATCH 15/39] adressed code review --- src/sentry/dynamic_sampling/models/adjustment_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentry/dynamic_sampling/models/adjustment_models.py b/src/sentry/dynamic_sampling/models/adjustment_models.py index 6b1c88bda4fd08..521110ead544b5 100644 --- a/src/sentry/dynamic_sampling/models/adjustment_models.py +++ b/src/sentry/dynamic_sampling/models/adjustment_models.py @@ -60,6 +60,6 @@ def adjust_sample_rates(self) -> List[Project]: if len(sorted_projects) % 2 == 0: return [*new_right, *reversed(new_left)] else: - mid_element = sorted_projects[len(sorted_projects) // 2] + mid_element = sorted_projects[half] if odd_one else [] mid_element.new_count_per_root = mid_element.count_per_root return [*new_right, mid_element, *reversed(new_left)] From 0cfbc82c64ff730afd75c584080ea4485bd6de43 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Tue, 14 Feb 2023 14:25:40 +0100 Subject: [PATCH 16/39] add more tests --- .../models/adjustment_models.py | 5 ++++- .../test_adjusments_models.py | 21 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/sentry/dynamic_sampling/models/adjustment_models.py b/src/sentry/dynamic_sampling/models/adjustment_models.py index 521110ead544b5..abb2a2df885e27 100644 --- a/src/sentry/dynamic_sampling/models/adjustment_models.py +++ b/src/sentry/dynamic_sampling/models/adjustment_models.py @@ -46,9 +46,12 @@ def adjust_sample_rates(self) -> List[Project]: # We can't increase sample rate more than 1.0, so we calculate upper bound count # based on project fidelity_rate # Find an absolute difference + + # Max possible counter if sample rate would be 1.0 + max_possible_count = left.count_per_root / self.fidelity_rate diff = coefficient * min( (average - left.count_per_root), - ((left.count_per_root / self.fidelity_rate) - left.count_per_root), + (max_possible_count - left.count_per_root), ) left.new_count_per_root = left.count_per_root + diff right.new_count_per_root = right.count_per_root - diff diff --git a/tests/sentry/dynamic_sampling/test_adjusments_models.py b/tests/sentry/dynamic_sampling/test_adjusments_models.py index 1e6fec8121a489..a5c085136acba8 100644 --- a/tests/sentry/dynamic_sampling/test_adjusments_models.py +++ b/tests/sentry/dynamic_sampling/test_adjusments_models.py @@ -1,3 +1,5 @@ +from operator import attrgetter + from sentry.dynamic_sampling.models.adjustment_models import AdjustedModel from sentry.dynamic_sampling.models.adjustment_models import Project as P @@ -67,3 +69,22 @@ def test_adjust_sample_rates_org_with_same_counts_projects(): ] assert p.adjust_sample_rates() == expected_projects + + +def test_adjust_sample_rates_org_with_counts_projects(): + projects = [ + P(id=1, count_per_root=2.0, blended_sample_rate=0.04), + P(id=2, count_per_root=10.0, blended_sample_rate=0.04), + P(id=3, count_per_root=10.0, blended_sample_rate=0.04), + P(id=4, count_per_root=10.0, blended_sample_rate=0.04), + ] + p = AdjustedModel(projects=projects, fidelity_rate=0.25) + + expected_projects = [ + P(id=1, count_per_root=2.0, new_count_per_root=8.0, blended_sample_rate=0.04), + P(id=2, count_per_root=10.0, new_count_per_root=4.0, blended_sample_rate=0.04), + P(id=3, count_per_root=10.0, new_count_per_root=11.5, blended_sample_rate=0.04), + P(id=4, count_per_root=10.0, new_count_per_root=8.5, blended_sample_rate=0.04), + ] + + assert sorted(p.adjust_sample_rates(), key=attrgetter("id")) == expected_projects From cfa011cf825a233944a98476be647ee359419df7 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Wed, 15 Feb 2023 10:57:49 +0100 Subject: [PATCH 17/39] add more tests --- src/sentry/conf/server.py | 4 ++-- .../dynamic_sampling/models/adjustment_models.py | 9 ++++++--- src/sentry/dynamic_sampling/prioritise_projects.py | 2 +- src/sentry/dynamic_sampling/tasks.py | 11 ++++++++--- .../MONOLITH/with_metrics.pysnap | 1 + .../REGION/with_metrics.pysnap | 1 + 6 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/sentry/conf/server.py b/src/sentry/conf/server.py index ff761786f6bdc4..b8ea844ae65540 100644 --- a/src/sentry/conf/server.py +++ b/src/sentry/conf/server.py @@ -851,8 +851,8 @@ def SOCIAL_AUTH_DEFAULT_USERNAME(): "schedule": timedelta(hours=1), "options": {"expires": 3600}, }, - "dynamic-sampling-project-biases": { - "task": "sentry.dynamic_sampling.tasks.prioritize_by_project", + "dynamic-sampling-prioritize-projects": { + "task": "sentry.dynamic_sampling.tasks.prioritise_projects", # Run daily at 08:00 "schedule": crontab(hour=8, minute=0), "options": {"expires": 3600}, diff --git a/src/sentry/dynamic_sampling/models/adjustment_models.py b/src/sentry/dynamic_sampling/models/adjustment_models.py index abb2a2df885e27..4ecb6d6b81ef4d 100644 --- a/src/sentry/dynamic_sampling/models/adjustment_models.py +++ b/src/sentry/dynamic_sampling/models/adjustment_models.py @@ -63,6 +63,9 @@ def adjust_sample_rates(self) -> List[Project]: if len(sorted_projects) % 2 == 0: return [*new_right, *reversed(new_left)] else: - mid_element = sorted_projects[half] if odd_one else [] - mid_element.new_count_per_root = mid_element.count_per_root - return [*new_right, mid_element, *reversed(new_left)] + if odd_one: + mid_elements = [sorted_projects[half]] + mid_elements[0].new_count_per_root = mid_elements[0].count_per_root + else: + mid_elements = [] + return [*new_right, *mid_elements, *reversed(new_left)] diff --git a/src/sentry/dynamic_sampling/prioritise_projects.py b/src/sentry/dynamic_sampling/prioritise_projects.py index f9c27e0a2aff41..2397176d9fbbd5 100644 --- a/src/sentry/dynamic_sampling/prioritise_projects.py +++ b/src/sentry/dynamic_sampling/prioritise_projects.py @@ -27,7 +27,7 @@ CHUNK_SIZE = 1000 -def fetch_projects_with_total_volumes() -> Mapping[int, Sequence[int]]: +def fetch_projects_with_total_volumes() -> Mapping[int, Sequence[Mapping[int, int]]]: """ This function fetch with pagination orgs and projects with count per root project """ diff --git a/src/sentry/dynamic_sampling/tasks.py b/src/sentry/dynamic_sampling/tasks.py index 04cf60dcdd7f48..06b1605b2e2738 100644 --- a/src/sentry/dynamic_sampling/tasks.py +++ b/src/sentry/dynamic_sampling/tasks.py @@ -1,4 +1,5 @@ import logging +from typing import Mapping, Sequence from sentry import features from sentry.dynamic_sampling.models.adjustment_models import AdjustedModel @@ -20,7 +21,7 @@ default_retry_delay=5, max_retries=5, ) # type: ignore -def prioritise_projects(**kwargs) -> None: +def prioritise_projects() -> None: metrics.incr("sentry.tasks.dynamic_sampling.prioritise_projects.start", sample_rate=1.0) with metrics.timer("sentry.tasks.dynamic_sampling.prioritise_projects", sample_rate=1.0): for org_id, project_id_with_count_per_root in fetch_projects_with_total_volumes().items(): @@ -33,7 +34,9 @@ def prioritise_projects(**kwargs) -> None: default_retry_delay=5, max_retries=5, ) # type: ignore -def process_projects_sample_rates(organization_id, project_id_with_count_per_root) -> None: +def process_projects_sample_rates( + organization_id: int, project_id_with_count_per_root: Sequence[Mapping[int, int]] +) -> None: """ Takes a single org id and a list of project ids """ @@ -44,7 +47,9 @@ def process_projects_sample_rates(organization_id, project_id_with_count_per_roo adjust_sample_rates(organization_id, project_id_with_count_per_root) -def adjust_sample_rates(org_id, project_id_with_count_per_root): +def adjust_sample_rates( + org_id: int, project_id_with_count_per_root: Sequence[Mapping[int, int]] +) -> None: projects = [] for project_id, count_per_root in project_id_with_count_per_root: projects.append( diff --git a/tests/sentry/relay/snapshots/test_config/test_project_config_with_breakdown/MONOLITH/with_metrics.pysnap b/tests/sentry/relay/snapshots/test_config/test_project_config_with_breakdown/MONOLITH/with_metrics.pysnap index b418948bfa707a..15dcff86085662 100644 --- a/tests/sentry/relay/snapshots/test_config/test_project_config_with_breakdown/MONOLITH/with_metrics.pysnap +++ b/tests/sentry/relay/snapshots/test_config/test_project_config_with_breakdown/MONOLITH/with_metrics.pysnap @@ -921,6 +921,7 @@ transactionMetrics: limit: 10 extractCustomTags: [] extractMetrics: + - c:transactions/count_per_root_project@none - d:transactions/duration@millisecond - s:transactions/user@none - d:transactions/measurements.app_start_cold@millisecond diff --git a/tests/sentry/relay/snapshots/test_config/test_project_config_with_breakdown/REGION/with_metrics.pysnap b/tests/sentry/relay/snapshots/test_config/test_project_config_with_breakdown/REGION/with_metrics.pysnap index b57d061b7f1d58..dafd78e0813794 100644 --- a/tests/sentry/relay/snapshots/test_config/test_project_config_with_breakdown/REGION/with_metrics.pysnap +++ b/tests/sentry/relay/snapshots/test_config/test_project_config_with_breakdown/REGION/with_metrics.pysnap @@ -921,6 +921,7 @@ transactionMetrics: limit: 10 extractCustomTags: [] extractMetrics: + - c:transactions/count_per_root_project@none - d:transactions/duration@millisecond - s:transactions/user@none - d:transactions/measurements.app_start_cold@millisecond From 10af365b231e28e9b6f14c1db47636dc2a1483c0 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Thu, 16 Feb 2023 09:45:04 +0100 Subject: [PATCH 18/39] add more tests --- src/sentry/dynamic_sampling/tasks.py | 41 +++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/src/sentry/dynamic_sampling/tasks.py b/src/sentry/dynamic_sampling/tasks.py index 06b1605b2e2738..033d7857a59399 100644 --- a/src/sentry/dynamic_sampling/tasks.py +++ b/src/sentry/dynamic_sampling/tasks.py @@ -1,5 +1,7 @@ import logging -from typing import Mapping, Sequence +from typing import Any, Mapping, Sequence + +from django.conf import settings from sentry import features from sentry.dynamic_sampling.models.adjustment_models import AdjustedModel @@ -7,13 +9,25 @@ from sentry.dynamic_sampling.prioritise_projects import fetch_projects_with_total_volumes from sentry.models import Organization from sentry.tasks.base import instrumented_task -from sentry.utils import metrics +from sentry.tasks.relay import schedule_invalidate_project_config +from sentry.utils import metrics, redis CHUNK_SIZE = 1000 MAX_SECONDS = 60 logger = logging.getLogger(__name__) +REDIS_KEY_TEMPLATE = "ds::b:o:{org_id}:p:{project_id}:" + + +def get_redis_client_for_ds() -> Any: + cluster_key = getattr(settings, "SENTRY_DYNAMIC_SAMPLING_RULES_REDIS_CLUSTER", "default") + return redis.redis_clusters.get(cluster_key) + + +def _generate_cache_key_for_prioritise_projects_bias(org_id, project_id) -> str: + return f"ds::o:{org_id}:p:{project_id}:prioritise_projects" + @instrumented_task( name="sentry.dynamic_sampling.tasks.prioritise_projects", @@ -50,11 +64,30 @@ def process_projects_sample_rates( def adjust_sample_rates( org_id: int, project_id_with_count_per_root: Sequence[Mapping[int, int]] ) -> None: + """ + This function apply model and adjust sample rate per project in org + and store it in DS redis cluster, then we invalidate project config + so relay can reread it, and we'll inject it from redis cache. + """ projects = [] for project_id, count_per_root in project_id_with_count_per_root: projects.append( DSProject(id=project_id, count_per_root=count_per_root, blended_sample_rate=0.0) ) model = AdjustedModel(projects=projects) - new_rates = model.adjust_sample_rates() - _ = new_rates + ds_projects = model.adjust_sample_rates() + + redis_client = get_redis_client_for_ds() + for ds_project in ds_projects: + # TODO: Check that sample rate between 0 < sample_rate < 1.0 + # hash, key, value + redis_client.set( + _generate_cache_key_for_prioritise_projects_bias( + project_id=ds_project.id, org_id=org_id + ), + ds_project.new_sample_rate, # redis stores is as string + 60 * 60 * 24, + ) + schedule_invalidate_project_config( + project_id=ds_project.id, trigger="dynamic_sampling_prioritise_project_bias" + ) From 6a0822dcb3e53dde4c48b169ed6d995d3d6d4d4e Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Thu, 16 Feb 2023 15:18:21 +0100 Subject: [PATCH 19/39] add more tests --- .../rules/biases/uniform_bias.py | 5 +++- .../rules/helpers/latest_releases.py | 10 ++------ .../rules/helpers/prioritise_project.py | 21 +++++++++++++++++ .../dynamic_sampling/rules/helpers/utils.py | 10 ++++++++ src/sentry/dynamic_sampling/tasks.py | 23 ++++--------------- 5 files changed, 42 insertions(+), 27 deletions(-) create mode 100644 src/sentry/dynamic_sampling/rules/helpers/prioritise_project.py create mode 100644 src/sentry/dynamic_sampling/rules/helpers/utils.py diff --git a/src/sentry/dynamic_sampling/rules/biases/uniform_bias.py b/src/sentry/dynamic_sampling/rules/biases/uniform_bias.py index b2c7d21bf7d9a7..10f4ce9d85edbe 100644 --- a/src/sentry/dynamic_sampling/rules/biases/uniform_bias.py +++ b/src/sentry/dynamic_sampling/rules/biases/uniform_bias.py @@ -7,6 +7,7 @@ BiasParams, BiasRulesGenerator, ) +from sentry.dynamic_sampling.rules.helpers.prioritise_project import get_cached_sample_rate from sentry.dynamic_sampling.rules.utils import RESERVED_IDS, PolymorphicRule, RuleType @@ -14,7 +15,9 @@ class UniformDataProvider(BiasDataProvider): def get_bias_data(self, bias_params: BiasParams) -> BiasData: return { "id": RESERVED_IDS[RuleType.UNIFORM_RULE], - "sampleRate": bias_params.base_sample_rate, + "sampleRate": get_cached_sample_rate( + bias_params.project, default_samplerate=bias_params.base_sample_rate + ), } diff --git a/src/sentry/dynamic_sampling/rules/helpers/latest_releases.py b/src/sentry/dynamic_sampling/rules/helpers/latest_releases.py index 313fa63f9d4cef..15350e05c7a84b 100644 --- a/src/sentry/dynamic_sampling/rules/helpers/latest_releases.py +++ b/src/sentry/dynamic_sampling/rules/helpers/latest_releases.py @@ -2,15 +2,14 @@ from collections import namedtuple from dataclasses import dataclass, field from datetime import datetime -from typing import Any, Callable, Dict, List, Optional, Tuple +from typing import Callable, Dict, List, Optional, Tuple -from django.conf import settings from pytz import UTC from sentry.dynamic_sampling.rules.helpers.time_to_adoptions import Platform +from sentry.dynamic_sampling.rules.helpers.utils import get_redis_client_for_ds from sentry.dynamic_sampling.rules.utils import BOOSTED_RELEASES_LIMIT from sentry.models import Project, Release -from sentry.utils import redis ENVIRONMENT_SEPARATOR = ":e:" BOOSTED_RELEASE_CACHE_KEY_REGEX = re.compile( @@ -18,11 +17,6 @@ ) -def get_redis_client_for_ds() -> Any: - cluster_key = getattr(settings, "SENTRY_DYNAMIC_SAMPLING_RULES_REDIS_CLUSTER", "default") - return redis.redis_clusters.get(cluster_key) - - def _get_environment_cache_key(environment: Optional[str]) -> str: return f"{ENVIRONMENT_SEPARATOR}{environment}" if environment else "" diff --git a/src/sentry/dynamic_sampling/rules/helpers/prioritise_project.py b/src/sentry/dynamic_sampling/rules/helpers/prioritise_project.py new file mode 100644 index 00000000000000..47827399c935ee --- /dev/null +++ b/src/sentry/dynamic_sampling/rules/helpers/prioritise_project.py @@ -0,0 +1,21 @@ +from typing import TYPE_CHECKING + +from sentry.dynamic_sampling.rules.helpers.utils import get_redis_client_for_ds + +if TYPE_CHECKING: + from sentry.models import Project + + +def _generate_cache_key(org_id, project_id) -> str: + return f"ds::o:{org_id}:p:{project_id}:prioritise_projects" + + +def get_cached_sample_rate(project: "Project", default_samplerate=None): + """ + This function returns cached sample rate from prioritise by project + celery task or fallback to None + """ + redis_client = get_redis_client_for_ds() + cache_key = _generate_cache_key(project.organization.id, project.id) + cached_sample_rate = redis_client.get(name=cache_key) + return cached_sample_rate if cached_sample_rate else default_samplerate diff --git a/src/sentry/dynamic_sampling/rules/helpers/utils.py b/src/sentry/dynamic_sampling/rules/helpers/utils.py new file mode 100644 index 00000000000000..e00ebf57b0002c --- /dev/null +++ b/src/sentry/dynamic_sampling/rules/helpers/utils.py @@ -0,0 +1,10 @@ +from typing import Any + +from django.conf import settings + +from sentry.utils import redis + + +def get_redis_client_for_ds() -> Any: + cluster_key = getattr(settings, "SENTRY_DYNAMIC_SAMPLING_RULES_REDIS_CLUSTER", "default") + return redis.redis_clusters.get(cluster_key) diff --git a/src/sentry/dynamic_sampling/tasks.py b/src/sentry/dynamic_sampling/tasks.py index 033d7857a59399..afaa999aab0414 100644 --- a/src/sentry/dynamic_sampling/tasks.py +++ b/src/sentry/dynamic_sampling/tasks.py @@ -1,33 +1,22 @@ import logging -from typing import Any, Mapping, Sequence - -from django.conf import settings +from typing import Mapping, Sequence from sentry import features from sentry.dynamic_sampling.models.adjustment_models import AdjustedModel from sentry.dynamic_sampling.models.adjustment_models import Project as DSProject from sentry.dynamic_sampling.prioritise_projects import fetch_projects_with_total_volumes +from sentry.dynamic_sampling.rules.helpers.prioritise_project import _generate_cache_key +from sentry.dynamic_sampling.rules.helpers.utils import get_redis_client_for_ds from sentry.models import Organization from sentry.tasks.base import instrumented_task from sentry.tasks.relay import schedule_invalidate_project_config -from sentry.utils import metrics, redis +from sentry.utils import metrics CHUNK_SIZE = 1000 MAX_SECONDS = 60 logger = logging.getLogger(__name__) -REDIS_KEY_TEMPLATE = "ds::b:o:{org_id}:p:{project_id}:" - - -def get_redis_client_for_ds() -> Any: - cluster_key = getattr(settings, "SENTRY_DYNAMIC_SAMPLING_RULES_REDIS_CLUSTER", "default") - return redis.redis_clusters.get(cluster_key) - - -def _generate_cache_key_for_prioritise_projects_bias(org_id, project_id) -> str: - return f"ds::o:{org_id}:p:{project_id}:prioritise_projects" - @instrumented_task( name="sentry.dynamic_sampling.tasks.prioritise_projects", @@ -82,9 +71,7 @@ def adjust_sample_rates( # TODO: Check that sample rate between 0 < sample_rate < 1.0 # hash, key, value redis_client.set( - _generate_cache_key_for_prioritise_projects_bias( - project_id=ds_project.id, org_id=org_id - ), + _generate_cache_key(project_id=ds_project.id, org_id=org_id), ds_project.new_sample_rate, # redis stores is as string 60 * 60 * 24, ) From 2e017e37757915768055ff00bbb054fb2fb272bb Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Mon, 20 Feb 2023 09:06:57 +0100 Subject: [PATCH 20/39] fix mypy --- src/sentry/dynamic_sampling/__init__.py | 2 +- .../dynamic_sampling/rules/biases/uniform_bias.py | 2 +- .../rules/helpers/prioritise_project.py | 11 +++++++---- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/sentry/dynamic_sampling/__init__.py b/src/sentry/dynamic_sampling/__init__.py index 848ec9e37cd0b0..5f175ff3779dfe 100644 --- a/src/sentry/dynamic_sampling/__init__.py +++ b/src/sentry/dynamic_sampling/__init__.py @@ -11,9 +11,9 @@ LatestReleaseBias, LatestReleaseParams, ProjectBoostedReleases, - get_redis_client_for_ds, ) from .rules.helpers.time_to_adoptions import LATEST_RELEASE_TTAS, Platform +from .rules.helpers.utils import get_redis_client_for_ds from .rules.logging import should_log_rules_change from .rules.utils import ( BOOSTED_KEY_TRANSACTION_LIMIT, diff --git a/src/sentry/dynamic_sampling/rules/biases/uniform_bias.py b/src/sentry/dynamic_sampling/rules/biases/uniform_bias.py index 10f4ce9d85edbe..2d8a9fb1453003 100644 --- a/src/sentry/dynamic_sampling/rules/biases/uniform_bias.py +++ b/src/sentry/dynamic_sampling/rules/biases/uniform_bias.py @@ -16,7 +16,7 @@ def get_bias_data(self, bias_params: BiasParams) -> BiasData: return { "id": RESERVED_IDS[RuleType.UNIFORM_RULE], "sampleRate": get_cached_sample_rate( - bias_params.project, default_samplerate=bias_params.base_sample_rate + bias_params.project, default_sample_rate=bias_params.base_sample_rate ), } diff --git a/src/sentry/dynamic_sampling/rules/helpers/prioritise_project.py b/src/sentry/dynamic_sampling/rules/helpers/prioritise_project.py index 47827399c935ee..71e468c022ad65 100644 --- a/src/sentry/dynamic_sampling/rules/helpers/prioritise_project.py +++ b/src/sentry/dynamic_sampling/rules/helpers/prioritise_project.py @@ -6,16 +6,19 @@ from sentry.models import Project -def _generate_cache_key(org_id, project_id) -> str: +def _generate_cache_key(org_id: int, project_id: int) -> str: return f"ds::o:{org_id}:p:{project_id}:prioritise_projects" -def get_cached_sample_rate(project: "Project", default_samplerate=None): +def get_cached_sample_rate(project: "Project", default_sample_rate: float) -> float: """ This function returns cached sample rate from prioritise by project celery task or fallback to None """ redis_client = get_redis_client_for_ds() cache_key = _generate_cache_key(project.organization.id, project.id) - cached_sample_rate = redis_client.get(name=cache_key) - return cached_sample_rate if cached_sample_rate else default_samplerate + try: + cached_sample_rate = float(redis_client.get(name=cache_key)) + except (TypeError, ValueError): + cached_sample_rate = None + return cached_sample_rate if cached_sample_rate else default_sample_rate From fb7414a03b3c14dbd1c49428c8ac408f925ffa9c Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Mon, 20 Feb 2023 17:02:42 +0100 Subject: [PATCH 21/39] add more tests --- .../models/adjustment_models.py | 18 +- .../dynamic_sampling/prioritise_projects.py | 4 +- src/sentry/dynamic_sampling/rules/base.py | 3 +- .../rules/biases/uniform_bias.py | 5 +- src/sentry/dynamic_sampling/tasks.py | 20 +- .../test_adjusments_models.py | 172 ++++++++++++++---- .../test_prioritise_projects.py | 2 +- tests/sentry/dynamic_sampling/test_tasks.py | 79 ++++++-- 8 files changed, 231 insertions(+), 72 deletions(-) diff --git a/src/sentry/dynamic_sampling/models/adjustment_models.py b/src/sentry/dynamic_sampling/models/adjustment_models.py index 4ecb6d6b81ef4d..f5194b97a105f5 100644 --- a/src/sentry/dynamic_sampling/models/adjustment_models.py +++ b/src/sentry/dynamic_sampling/models/adjustment_models.py @@ -4,7 +4,7 @@ @dataclass -class Project: +class DSProject: id: int count_per_root: float blended_sample_rate: float @@ -18,11 +18,15 @@ class AdjustedModel: Model which can adjust sample rate per project inside ORG based on this new counter metric from Relay. """ - projects: List[Project] + projects: List[DSProject] + # Right now we are not using this constant fidelity_rate: float = 0.4 # TODO: discuss this constant - def adjust_sample_rates(self) -> List[Project]: + def adjust_sample_rates(self) -> List[DSProject]: if len(self.projects) < 2: + # When we have one project we just remind sample rates + if len(self.projects) == 1: + self.projects[0].new_sample_rate = self.projects[0].blended_sample_rate return self.projects # Step 1: sort projects by count per root project @@ -48,13 +52,19 @@ def adjust_sample_rates(self) -> List[Project]: # Find an absolute difference # Max possible counter if sample rate would be 1.0 - max_possible_count = left.count_per_root / self.fidelity_rate + max_possible_count = left.count_per_root / left.blended_sample_rate diff = coefficient * min( (average - left.count_per_root), (max_possible_count - left.count_per_root), ) left.new_count_per_root = left.count_per_root + diff + left.new_sample_rate = left.blended_sample_rate * ( + left.new_count_per_root / left.count_per_root + ) right.new_count_per_root = right.count_per_root - diff + right.new_sample_rate = right.blended_sample_rate * ( + right.new_count_per_root / right.count_per_root + ) new_left.append(left) new_right.append(right) # This opinionated `coefficient` reduces adjustment on every step diff --git a/src/sentry/dynamic_sampling/prioritise_projects.py b/src/sentry/dynamic_sampling/prioritise_projects.py index 2397176d9fbbd5..3bf1cc7289d70f 100644 --- a/src/sentry/dynamic_sampling/prioritise_projects.py +++ b/src/sentry/dynamic_sampling/prioritise_projects.py @@ -27,7 +27,7 @@ CHUNK_SIZE = 1000 -def fetch_projects_with_total_volumes() -> Mapping[int, Sequence[Mapping[int, int]]]: +def fetch_projects_with_total_volumes() -> Mapping[int, Sequence[Sequence[int]]]: """ This function fetch with pagination orgs and projects with count per root project """ @@ -77,7 +77,7 @@ def fetch_projects_with_total_volumes() -> Mapping[int, Sequence[Mapping[int, in data = data[:-1] for row in data: - aggregated_projects[row["org_id"]].append({row["project_id"]: row["root_count_value"]}) + aggregated_projects[row["org_id"]].append((row["project_id"], row["root_count_value"])) if not more_results: break diff --git a/src/sentry/dynamic_sampling/rules/base.py b/src/sentry/dynamic_sampling/rules/base.py index 246ab92ecc8fea..453e75bf5d69fc 100644 --- a/src/sentry/dynamic_sampling/rules/base.py +++ b/src/sentry/dynamic_sampling/rules/base.py @@ -5,6 +5,7 @@ from sentry import quotas from sentry.dynamic_sampling.rules.biases.base import Bias, BiasParams from sentry.dynamic_sampling.rules.combine import get_relay_biases_combinator +from sentry.dynamic_sampling.rules.helpers.prioritise_project import get_cached_sample_rate from sentry.dynamic_sampling.rules.logging import log_rules from sentry.dynamic_sampling.rules.utils import PolymorphicRule, RuleType, get_enabled_user_biases from sentry.models import Project @@ -18,7 +19,7 @@ def get_guarded_blended_sample_rate(project: Project) -> float: if sample_rate is None: raise Exception("get_blended_sample_rate returns none") - return float(sample_rate) + return get_cached_sample_rate(project, default_sample_rate=float(sample_rate)) def _get_rules_of_enabled_biases( diff --git a/src/sentry/dynamic_sampling/rules/biases/uniform_bias.py b/src/sentry/dynamic_sampling/rules/biases/uniform_bias.py index 2d8a9fb1453003..b2c7d21bf7d9a7 100644 --- a/src/sentry/dynamic_sampling/rules/biases/uniform_bias.py +++ b/src/sentry/dynamic_sampling/rules/biases/uniform_bias.py @@ -7,7 +7,6 @@ BiasParams, BiasRulesGenerator, ) -from sentry.dynamic_sampling.rules.helpers.prioritise_project import get_cached_sample_rate from sentry.dynamic_sampling.rules.utils import RESERVED_IDS, PolymorphicRule, RuleType @@ -15,9 +14,7 @@ class UniformDataProvider(BiasDataProvider): def get_bias_data(self, bias_params: BiasParams) -> BiasData: return { "id": RESERVED_IDS[RuleType.UNIFORM_RULE], - "sampleRate": get_cached_sample_rate( - bias_params.project, default_sample_rate=bias_params.base_sample_rate - ), + "sampleRate": bias_params.base_sample_rate, } diff --git a/src/sentry/dynamic_sampling/tasks.py b/src/sentry/dynamic_sampling/tasks.py index afaa999aab0414..42fccdb864de0e 100644 --- a/src/sentry/dynamic_sampling/tasks.py +++ b/src/sentry/dynamic_sampling/tasks.py @@ -1,13 +1,13 @@ import logging -from typing import Mapping, Sequence +from typing import Sequence -from sentry import features +from sentry import features, quotas from sentry.dynamic_sampling.models.adjustment_models import AdjustedModel -from sentry.dynamic_sampling.models.adjustment_models import Project as DSProject +from sentry.dynamic_sampling.models.adjustment_models import DSProject as DSProject from sentry.dynamic_sampling.prioritise_projects import fetch_projects_with_total_volumes from sentry.dynamic_sampling.rules.helpers.prioritise_project import _generate_cache_key from sentry.dynamic_sampling.rules.helpers.utils import get_redis_client_for_ds -from sentry.models import Organization +from sentry.models import Organization, Project from sentry.tasks.base import instrumented_task from sentry.tasks.relay import schedule_invalidate_project_config from sentry.utils import metrics @@ -28,7 +28,7 @@ def prioritise_projects() -> None: metrics.incr("sentry.tasks.dynamic_sampling.prioritise_projects.start", sample_rate=1.0) with metrics.timer("sentry.tasks.dynamic_sampling.prioritise_projects", sample_rate=1.0): for org_id, project_id_with_count_per_root in fetch_projects_with_total_volumes().items(): - process_projects_sample_rates.delay(org_id, project_id_with_count_per_root) + process_projects_sample_rates(org_id, project_id_with_count_per_root) @instrumented_task( @@ -38,7 +38,7 @@ def prioritise_projects() -> None: max_retries=5, ) # type: ignore def process_projects_sample_rates( - organization_id: int, project_id_with_count_per_root: Sequence[Mapping[int, int]] + organization_id: int, project_id_with_count_per_root: Sequence[Sequence[int]] ) -> None: """ Takes a single org id and a list of project ids @@ -51,7 +51,7 @@ def process_projects_sample_rates( def adjust_sample_rates( - org_id: int, project_id_with_count_per_root: Sequence[Mapping[int, int]] + org_id: int, project_id_with_count_per_root: Sequence[Sequence[int]] ) -> None: """ This function apply model and adjust sample rate per project in org @@ -60,8 +60,12 @@ def adjust_sample_rates( """ projects = [] for project_id, count_per_root in project_id_with_count_per_root: + project = Project.objects.get_from_cache(id=project_id) + sample_rate = quotas.get_blended_sample_rate(project) + if sample_rate is None: + continue projects.append( - DSProject(id=project_id, count_per_root=count_per_root, blended_sample_rate=0.0) + DSProject(id=project_id, count_per_root=count_per_root, blended_sample_rate=sample_rate) ) model = AdjustedModel(projects=projects) ds_projects = model.adjust_sample_rates() diff --git a/tests/sentry/dynamic_sampling/test_adjusments_models.py b/tests/sentry/dynamic_sampling/test_adjusments_models.py index a5c085136acba8..4e25eef82dcf1d 100644 --- a/tests/sentry/dynamic_sampling/test_adjusments_models.py +++ b/tests/sentry/dynamic_sampling/test_adjusments_models.py @@ -1,35 +1,61 @@ from operator import attrgetter +import pytest + from sentry.dynamic_sampling.models.adjustment_models import AdjustedModel -from sentry.dynamic_sampling.models.adjustment_models import Project as P +from sentry.dynamic_sampling.models.adjustment_models import DSProject as P def test_adjust_sample_rates_org_wo_projects(): - p = AdjustedModel(projects=[], fidelity_rate=0.04) + p = AdjustedModel(projects=[]) assert p.adjust_sample_rates() == [] def test_adjust_sample_rates_org_with_single_project(): - p = AdjustedModel( - projects=[P(id=1, count_per_root=10, blended_sample_rate=0.04)], fidelity_rate=0.04 - ) - assert p.adjust_sample_rates() == [P(id=1, count_per_root=10, blended_sample_rate=0.04)] + p = AdjustedModel(projects=[P(id=1, count_per_root=10, blended_sample_rate=0.4)]) + assert p.adjust_sample_rates() == [ + P(id=1, count_per_root=10, blended_sample_rate=0.4, new_sample_rate=0.4) + ] def test_adjust_sample_rates_org_with_few_projects(): projects = [ - P(id=1, count_per_root=9.0, blended_sample_rate=0.04), - P(id=2, count_per_root=7.0, blended_sample_rate=0.04), - P(id=3, count_per_root=3.0, blended_sample_rate=0.04), - P(id=4, count_per_root=1.0, blended_sample_rate=0.04), + P(id=1, count_per_root=9.0, blended_sample_rate=0.25), + P(id=2, count_per_root=7.0, blended_sample_rate=0.25), + P(id=3, count_per_root=3.0, blended_sample_rate=0.25), + P(id=4, count_per_root=1.0, blended_sample_rate=0.25), ] - p = AdjustedModel(projects=projects, fidelity_rate=0.25) + p = AdjustedModel(projects=projects) expected_projects = [ - P(id=1, count_per_root=9.0, new_count_per_root=6.0, blended_sample_rate=0.04), - P(id=2, count_per_root=7.0, new_count_per_root=5.5, blended_sample_rate=0.04), - P(id=3, count_per_root=3.0, new_count_per_root=4.5, blended_sample_rate=0.04), - P(id=4, count_per_root=1.0, new_count_per_root=4.0, blended_sample_rate=0.04), + P( + id=1, + count_per_root=9.0, + new_count_per_root=6.0, + blended_sample_rate=0.25, + new_sample_rate=pytest.approx(0.16666666666666666), + ), + P( + id=2, + count_per_root=7.0, + new_count_per_root=5.5, + blended_sample_rate=0.25, + new_sample_rate=pytest.approx(0.19642857142857142), + ), + P( + id=3, + count_per_root=3.0, + new_count_per_root=4.5, + blended_sample_rate=0.25, + new_sample_rate=0.375, + ), + P( + id=4, + count_per_root=1.0, + new_count_per_root=4.0, + blended_sample_rate=0.25, + new_sample_rate=1.0, + ), ] assert p.adjust_sample_rates() == expected_projects @@ -37,16 +63,34 @@ def test_adjust_sample_rates_org_with_few_projects(): def test_adjust_sample_rates_org_with_even_num_projects(): projects = [ - P(id=1, count_per_root=8.0, blended_sample_rate=0.04), - P(id=2, count_per_root=7.0, blended_sample_rate=0.04), - P(id=3, count_per_root=3.0, blended_sample_rate=0.04), + P(id=1, count_per_root=8.0, blended_sample_rate=0.25), + P(id=2, count_per_root=7.0, blended_sample_rate=0.25), + P(id=3, count_per_root=3.0, blended_sample_rate=0.25), ] - p = AdjustedModel(projects=projects, fidelity_rate=0.25) + p = AdjustedModel(projects=projects) expected_projects = [ - P(id=1, count_per_root=8.0, new_count_per_root=5.0, blended_sample_rate=0.04), - P(id=2, count_per_root=7.0, new_count_per_root=7.0, blended_sample_rate=0.04), - P(id=3, count_per_root=3.0, new_count_per_root=6.0, blended_sample_rate=0.04), + P( + id=1, + count_per_root=8.0, + new_count_per_root=5.0, + blended_sample_rate=0.25, + new_sample_rate=0.15625, + ), + P( + id=2, + count_per_root=7.0, + new_count_per_root=7.0, + blended_sample_rate=0.25, + new_sample_rate=0.0, + ), + P( + id=3, + count_per_root=3.0, + new_count_per_root=6.0, + blended_sample_rate=0.25, + new_sample_rate=0.5, + ), ] assert p.adjust_sample_rates() == expected_projects @@ -54,18 +98,42 @@ def test_adjust_sample_rates_org_with_even_num_projects(): def test_adjust_sample_rates_org_with_same_counts_projects(): projects = [ - P(id=1, count_per_root=9.0, blended_sample_rate=0.04), - P(id=2, count_per_root=6.0, blended_sample_rate=0.04), - P(id=3, count_per_root=6.0, blended_sample_rate=0.04), - P(id=4, count_per_root=1.0, blended_sample_rate=0.04), + P(id=1, count_per_root=9.0, blended_sample_rate=0.25), + P(id=2, count_per_root=6.0, blended_sample_rate=0.25), + P(id=3, count_per_root=6.0, blended_sample_rate=0.25), + P(id=4, count_per_root=1.0, blended_sample_rate=0.25), ] - p = AdjustedModel(projects=projects, fidelity_rate=0.25) + p = AdjustedModel(projects=projects) expected_projects = [ - P(id=1, count_per_root=9.0, new_count_per_root=6.0, blended_sample_rate=0.04), - P(id=2, count_per_root=6.0, new_count_per_root=6.375, blended_sample_rate=0.04), - P(id=3, count_per_root=6.0, new_count_per_root=5.625, blended_sample_rate=0.04), - P(id=4, count_per_root=1.0, new_count_per_root=4.0, blended_sample_rate=0.04), + P( + id=1, + count_per_root=9.0, + new_count_per_root=6.0, + blended_sample_rate=0.25, + new_sample_rate=pytest.approx(0.16666666666666666), + ), + P( + id=2, + count_per_root=6.0, + new_count_per_root=6.375, + blended_sample_rate=0.25, + new_sample_rate=0.265625, + ), + P( + id=3, + count_per_root=6.0, + new_count_per_root=5.625, + blended_sample_rate=0.25, + new_sample_rate=0.234375, + ), + P( + id=4, + count_per_root=1.0, + new_count_per_root=4.0, + blended_sample_rate=0.25, + new_sample_rate=1.0, + ), ] assert p.adjust_sample_rates() == expected_projects @@ -73,18 +141,42 @@ def test_adjust_sample_rates_org_with_same_counts_projects(): def test_adjust_sample_rates_org_with_counts_projects(): projects = [ - P(id=1, count_per_root=2.0, blended_sample_rate=0.04), - P(id=2, count_per_root=10.0, blended_sample_rate=0.04), - P(id=3, count_per_root=10.0, blended_sample_rate=0.04), - P(id=4, count_per_root=10.0, blended_sample_rate=0.04), + P(id=1, count_per_root=2.0, blended_sample_rate=0.25), + P(id=2, count_per_root=10.0, blended_sample_rate=0.25), + P(id=3, count_per_root=10.0, blended_sample_rate=0.25), + P(id=4, count_per_root=10.0, blended_sample_rate=0.25), ] - p = AdjustedModel(projects=projects, fidelity_rate=0.25) + p = AdjustedModel(projects=projects) expected_projects = [ - P(id=1, count_per_root=2.0, new_count_per_root=8.0, blended_sample_rate=0.04), - P(id=2, count_per_root=10.0, new_count_per_root=4.0, blended_sample_rate=0.04), - P(id=3, count_per_root=10.0, new_count_per_root=11.5, blended_sample_rate=0.04), - P(id=4, count_per_root=10.0, new_count_per_root=8.5, blended_sample_rate=0.04), + P( + id=1, + count_per_root=2.0, + new_count_per_root=8.0, + blended_sample_rate=0.25, + new_sample_rate=1.0, + ), + P( + id=2, + count_per_root=10.0, + new_count_per_root=4.0, + blended_sample_rate=0.25, + new_sample_rate=0.1, + ), + P( + id=3, + count_per_root=10.0, + new_count_per_root=11.5, + blended_sample_rate=0.25, + new_sample_rate=0.2875, + ), + P( + id=4, + count_per_root=10.0, + new_count_per_root=8.5, + blended_sample_rate=0.25, + new_sample_rate=0.2125, + ), ] assert sorted(p.adjust_sample_rates(), key=attrgetter("id")) == expected_projects diff --git a/tests/sentry/dynamic_sampling/test_prioritise_projects.py b/tests/sentry/dynamic_sampling/test_prioritise_projects.py index abf227d613af32..5e3c91d8734651 100644 --- a/tests/sentry/dynamic_sampling/test_prioritise_projects.py +++ b/tests/sentry/dynamic_sampling/test_prioritise_projects.py @@ -29,4 +29,4 @@ def test_simple_one_org_one_project(self): ) results = fetch_projects_with_total_volumes() - assert results[org1.id] == [{p1.id: 1.0}] + assert results[org1.id] == [(p1.id, 1.0)] diff --git a/tests/sentry/dynamic_sampling/test_tasks.py b/tests/sentry/dynamic_sampling/test_tasks.py index 6d9a1262e1dba2..925cbc515cdbab 100644 --- a/tests/sentry/dynamic_sampling/test_tasks.py +++ b/tests/sentry/dynamic_sampling/test_tasks.py @@ -1,18 +1,73 @@ -from collections import defaultdict +from datetime import datetime +from unittest.mock import patch import pytest +from django.utils import timezone +from freezegun import freeze_time -from sentry.dynamic_sampling.tasks import fetch_projects_with_total_volumes +from sentry.dynamic_sampling import generate_rules +from sentry.dynamic_sampling.tasks import prioritise_projects +from sentry.snuba.metrics import TransactionMRI +from sentry.testutils import BaseMetricsLayerTestCase, SnubaTestCase, TestCase +from sentry.testutils.helpers import Feature +MOCK_DATETIME = datetime(2023, 8, 7, 0, 0, 0, tzinfo=timezone.utc) -@pytest.mark.django_db -def test_simple_no_data(default_project): - _ = [ - { - "org_id": [default_project.organization.id], - "project_id": [default_project.id], - }, - ] - project_volumes_total = fetch_projects_with_total_volumes() - assert project_volumes_total == defaultdict(list) +@freeze_time(MOCK_DATETIME) +class TestPrioritiseProjectsTask(BaseMetricsLayerTestCase, TestCase, SnubaTestCase): + @property + def now(self): + return MOCK_DATETIME + + def create_project_and_add_metrics(self, name, count, org): + # Create 4 projects + proj = self.create_project(name=name, organization=org) + + # disable all biases + proj.update_option( + "sentry:dynamic_sampling_biases", + [ + {"id": "boostEnvironments", "active": False}, + {"id": "ignoreHealthChecks", "active": False}, + {"id": "boostLatestRelease", "active": False}, + {"id": "boostKeyTransactions", "active": False}, + ], + ) + # Store performance metrics for proj A + self.store_performance_metric( + name=TransactionMRI.COUNT_PER_ROOT_PROJECT.value, + tags={"transaction": "foo_transaction"}, + hours_before_now=1, + value=count, + project_id=proj.id, + org_id=org.id, + ) + return proj + + @patch("sentry.dynamic_sampling.rules.base.quotas.get_blended_sample_rate") + def test_prioritise_projects_simple(self, get_blended_sample_rate): + get_blended_sample_rate.return_value = 0.25 + # Create a org + test_org = self.create_organization(name="sample-org") + + # Create 4 projects + proj_a = self.create_project_and_add_metrics("a", 9, test_org) + proj_b = self.create_project_and_add_metrics("b", 7, test_org) + proj_c = self.create_project_and_add_metrics("c", 3, test_org) + proj_d = self.create_project_and_add_metrics("d", 1, test_org) + + with Feature({"organizations:ds-prioritise-by-project-bias": True}): + prioritise_projects() + + # we expect only uniform rule + assert generate_rules(proj_a)[0]["samplingValue"] == { + "type": "sampleRate", + "value": pytest.approx(0.16666666666666666), + } + assert generate_rules(proj_b)[0]["samplingValue"] == { + "type": "sampleRate", + "value": pytest.approx(0.19642857142857142), + } + assert generate_rules(proj_c)[0]["samplingValue"] == {"type": "sampleRate", "value": 0.375} + assert generate_rules(proj_d)[0]["samplingValue"] == {"type": "sampleRate", "value": 1.0} From 4ad92cf944ba501cc3254cd8f03ddefb7cc87661 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Mon, 20 Feb 2023 17:17:45 +0100 Subject: [PATCH 22/39] remove second utils.py --- src/sentry/dynamic_sampling/__init__.py | 2 +- .../dynamic_sampling/rules/helpers/latest_releases.py | 3 +-- .../rules/helpers/prioritise_project.py | 2 +- src/sentry/dynamic_sampling/rules/helpers/utils.py | 10 ---------- src/sentry/dynamic_sampling/rules/utils.py | 9 ++++++++- src/sentry/dynamic_sampling/tasks.py | 2 +- 6 files changed, 12 insertions(+), 16 deletions(-) delete mode 100644 src/sentry/dynamic_sampling/rules/helpers/utils.py diff --git a/src/sentry/dynamic_sampling/__init__.py b/src/sentry/dynamic_sampling/__init__.py index 5f175ff3779dfe..0fee9f3a048807 100644 --- a/src/sentry/dynamic_sampling/__init__.py +++ b/src/sentry/dynamic_sampling/__init__.py @@ -13,7 +13,6 @@ ProjectBoostedReleases, ) from .rules.helpers.time_to_adoptions import LATEST_RELEASE_TTAS, Platform -from .rules.helpers.utils import get_redis_client_for_ds from .rules.logging import should_log_rules_change from .rules.utils import ( BOOSTED_KEY_TRANSACTION_LIMIT, @@ -21,6 +20,7 @@ RESERVED_IDS, RuleType, get_enabled_user_biases, + get_redis_client_for_ds, get_rule_hash, get_supported_biases_ids, get_user_biases, diff --git a/src/sentry/dynamic_sampling/rules/helpers/latest_releases.py b/src/sentry/dynamic_sampling/rules/helpers/latest_releases.py index 15350e05c7a84b..6033cbf2db6d6e 100644 --- a/src/sentry/dynamic_sampling/rules/helpers/latest_releases.py +++ b/src/sentry/dynamic_sampling/rules/helpers/latest_releases.py @@ -7,8 +7,7 @@ from pytz import UTC from sentry.dynamic_sampling.rules.helpers.time_to_adoptions import Platform -from sentry.dynamic_sampling.rules.helpers.utils import get_redis_client_for_ds -from sentry.dynamic_sampling.rules.utils import BOOSTED_RELEASES_LIMIT +from sentry.dynamic_sampling.rules.utils import BOOSTED_RELEASES_LIMIT, get_redis_client_for_ds from sentry.models import Project, Release ENVIRONMENT_SEPARATOR = ":e:" diff --git a/src/sentry/dynamic_sampling/rules/helpers/prioritise_project.py b/src/sentry/dynamic_sampling/rules/helpers/prioritise_project.py index 71e468c022ad65..71a2a6ed3411dd 100644 --- a/src/sentry/dynamic_sampling/rules/helpers/prioritise_project.py +++ b/src/sentry/dynamic_sampling/rules/helpers/prioritise_project.py @@ -1,6 +1,6 @@ from typing import TYPE_CHECKING -from sentry.dynamic_sampling.rules.helpers.utils import get_redis_client_for_ds +from sentry.dynamic_sampling.rules.utils import get_redis_client_for_ds if TYPE_CHECKING: from sentry.models import Project diff --git a/src/sentry/dynamic_sampling/rules/helpers/utils.py b/src/sentry/dynamic_sampling/rules/helpers/utils.py deleted file mode 100644 index e00ebf57b0002c..00000000000000 --- a/src/sentry/dynamic_sampling/rules/helpers/utils.py +++ /dev/null @@ -1,10 +0,0 @@ -from typing import Any - -from django.conf import settings - -from sentry.utils import redis - - -def get_redis_client_for_ds() -> Any: - cluster_key = getattr(settings, "SENTRY_DYNAMIC_SAMPLING_RULES_REDIS_CLUSTER", "default") - return redis.redis_clusters.get(cluster_key) diff --git a/src/sentry/dynamic_sampling/rules/utils.py b/src/sentry/dynamic_sampling/rules/utils.py index 1f752655f845df..882a2add957c2b 100644 --- a/src/sentry/dynamic_sampling/rules/utils.py +++ b/src/sentry/dynamic_sampling/rules/utils.py @@ -1,7 +1,9 @@ from enum import Enum from typing import Any, Dict, List, Literal, Optional, Set, Tuple, TypedDict, Union -from sentry.utils import json +from django.conf import settings + +from sentry.utils import json, redis BOOSTED_RELEASES_LIMIT = 10 BOOSTED_KEY_TRANSACTION_LIMIT = 10 @@ -180,3 +182,8 @@ def apply_dynamic_factor(base_sample_rate: float, x: float) -> float: ) return float(x / x**base_sample_rate) + + +def get_redis_client_for_ds() -> Any: + cluster_key = getattr(settings, "SENTRY_DYNAMIC_SAMPLING_RULES_REDIS_CLUSTER", "default") + return redis.redis_clusters.get(cluster_key) diff --git a/src/sentry/dynamic_sampling/tasks.py b/src/sentry/dynamic_sampling/tasks.py index 42fccdb864de0e..d716fa62266ccc 100644 --- a/src/sentry/dynamic_sampling/tasks.py +++ b/src/sentry/dynamic_sampling/tasks.py @@ -6,7 +6,7 @@ from sentry.dynamic_sampling.models.adjustment_models import DSProject as DSProject from sentry.dynamic_sampling.prioritise_projects import fetch_projects_with_total_volumes from sentry.dynamic_sampling.rules.helpers.prioritise_project import _generate_cache_key -from sentry.dynamic_sampling.rules.helpers.utils import get_redis_client_for_ds +from sentry.dynamic_sampling.rules.utils import get_redis_client_for_ds from sentry.models import Organization, Project from sentry.tasks.base import instrumented_task from sentry.tasks.relay import schedule_invalidate_project_config From 74876cfbda734b762ce7f45f3d9fbc8143390d51 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Mon, 20 Feb 2023 17:19:21 +0100 Subject: [PATCH 23/39] rename to projects_with_tx_count --- src/sentry/dynamic_sampling/tasks.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/sentry/dynamic_sampling/tasks.py b/src/sentry/dynamic_sampling/tasks.py index d716fa62266ccc..a55a07683ccf42 100644 --- a/src/sentry/dynamic_sampling/tasks.py +++ b/src/sentry/dynamic_sampling/tasks.py @@ -27,8 +27,8 @@ def prioritise_projects() -> None: metrics.incr("sentry.tasks.dynamic_sampling.prioritise_projects.start", sample_rate=1.0) with metrics.timer("sentry.tasks.dynamic_sampling.prioritise_projects", sample_rate=1.0): - for org_id, project_id_with_count_per_root in fetch_projects_with_total_volumes().items(): - process_projects_sample_rates(org_id, project_id_with_count_per_root) + for org_id, projects_with_tx_count in fetch_projects_with_total_volumes().items(): + process_projects_sample_rates(org_id, projects_with_tx_count) @instrumented_task( @@ -38,7 +38,7 @@ def prioritise_projects() -> None: max_retries=5, ) # type: ignore def process_projects_sample_rates( - organization_id: int, project_id_with_count_per_root: Sequence[Sequence[int]] + organization_id: int, projects_with_tx_count: Sequence[Sequence[int]] ) -> None: """ Takes a single org id and a list of project ids @@ -47,19 +47,17 @@ def process_projects_sample_rates( # Check if feature flag is enabled: if features.has("organizations:ds-prioritise-by-project-bias", organization): with metrics.timer("sentry.tasks.dynamic_sampling.process_projects_sample_rates.core"): - adjust_sample_rates(organization_id, project_id_with_count_per_root) + adjust_sample_rates(organization_id, projects_with_tx_count) -def adjust_sample_rates( - org_id: int, project_id_with_count_per_root: Sequence[Sequence[int]] -) -> None: +def adjust_sample_rates(org_id: int, projects_with_tx_count: Sequence[Sequence[int]]) -> None: """ This function apply model and adjust sample rate per project in org and store it in DS redis cluster, then we invalidate project config so relay can reread it, and we'll inject it from redis cache. """ projects = [] - for project_id, count_per_root in project_id_with_count_per_root: + for project_id, count_per_root in projects_with_tx_count: project = Project.objects.get_from_cache(id=project_id) sample_rate = quotas.get_blended_sample_rate(project) if sample_rate is None: From 86e8bbeafcd7266cfb6931a98e38e32cf92dda7a Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Tue, 21 Feb 2023 09:42:40 +0100 Subject: [PATCH 24/39] fix MOCK_DATETIME --- tests/sentry/dynamic_sampling/test_prioritise_projects.py | 7 +++++-- tests/sentry/dynamic_sampling/test_tasks.py | 6 ++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/sentry/dynamic_sampling/test_prioritise_projects.py b/tests/sentry/dynamic_sampling/test_prioritise_projects.py index 5e3c91d8734651..f72a829b6ad141 100644 --- a/tests/sentry/dynamic_sampling/test_prioritise_projects.py +++ b/tests/sentry/dynamic_sampling/test_prioritise_projects.py @@ -1,12 +1,15 @@ -from datetime import datetime, timezone +from datetime import timedelta +from django.utils import timezone from freezegun import freeze_time from sentry.dynamic_sampling.prioritise_projects import fetch_projects_with_total_volumes from sentry.snuba.metrics import TransactionMRI from sentry.testutils import BaseMetricsLayerTestCase, SnubaTestCase, TestCase -MOCK_DATETIME = datetime(2023, 8, 7, 0, 0, 0, tzinfo=timezone.utc) +MOCK_DATETIME = (timezone.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 +) @freeze_time(MOCK_DATETIME) diff --git a/tests/sentry/dynamic_sampling/test_tasks.py b/tests/sentry/dynamic_sampling/test_tasks.py index 925cbc515cdbab..fea9d92e8e9277 100644 --- a/tests/sentry/dynamic_sampling/test_tasks.py +++ b/tests/sentry/dynamic_sampling/test_tasks.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import timedelta from unittest.mock import patch import pytest @@ -11,7 +11,9 @@ from sentry.testutils import BaseMetricsLayerTestCase, SnubaTestCase, TestCase from sentry.testutils.helpers import Feature -MOCK_DATETIME = datetime(2023, 8, 7, 0, 0, 0, tzinfo=timezone.utc) +MOCK_DATETIME = (timezone.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 +) @freeze_time(MOCK_DATETIME) From eb970b5cbe1974ad18cb881db1e4a44b894c9c92 Mon Sep 17 00:00:00 2001 From: Nar Saynorath Date: Tue, 21 Feb 2023 10:03:46 -0500 Subject: [PATCH 25/39] chore(view-hierarchy): Add proguard file size to span (#44826) Tracking the file size so we can understand its impact on timing From baef1cfa19ad72dcc016410eb99cfb80aa57a683 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Tue, 21 Feb 2023 16:49:27 +0100 Subject: [PATCH 26/39] switch to redis HSET, rename test --- src/sentry/dynamic_sampling/rules/base.py | 6 ++-- .../rules/helpers/prioritise_project.py | 10 +++--- src/sentry/dynamic_sampling/tasks.py | 7 ++-- .../test_adjusments_models.py | 2 +- .../dynamic_sampling/test_generate_rules.py | 33 ++++++++++--------- 5 files changed, 31 insertions(+), 27 deletions(-) diff --git a/src/sentry/dynamic_sampling/rules/base.py b/src/sentry/dynamic_sampling/rules/base.py index 453e75bf5d69fc..e8165c5de10966 100644 --- a/src/sentry/dynamic_sampling/rules/base.py +++ b/src/sentry/dynamic_sampling/rules/base.py @@ -5,7 +5,9 @@ from sentry import quotas from sentry.dynamic_sampling.rules.biases.base import Bias, BiasParams from sentry.dynamic_sampling.rules.combine import get_relay_biases_combinator -from sentry.dynamic_sampling.rules.helpers.prioritise_project import get_cached_sample_rate +from sentry.dynamic_sampling.rules.helpers.prioritise_project import ( + get_prioritise_by_project_sample_rate, +) from sentry.dynamic_sampling.rules.logging import log_rules from sentry.dynamic_sampling.rules.utils import PolymorphicRule, RuleType, get_enabled_user_biases from sentry.models import Project @@ -19,7 +21,7 @@ def get_guarded_blended_sample_rate(project: Project) -> float: if sample_rate is None: raise Exception("get_blended_sample_rate returns none") - return get_cached_sample_rate(project, default_sample_rate=float(sample_rate)) + return get_prioritise_by_project_sample_rate(project, default_sample_rate=float(sample_rate)) def _get_rules_of_enabled_biases( diff --git a/src/sentry/dynamic_sampling/rules/helpers/prioritise_project.py b/src/sentry/dynamic_sampling/rules/helpers/prioritise_project.py index 71a2a6ed3411dd..85cab068e2fd0f 100644 --- a/src/sentry/dynamic_sampling/rules/helpers/prioritise_project.py +++ b/src/sentry/dynamic_sampling/rules/helpers/prioritise_project.py @@ -6,19 +6,19 @@ from sentry.models import Project -def _generate_cache_key(org_id: int, project_id: int) -> str: - return f"ds::o:{org_id}:p:{project_id}:prioritise_projects" +def _generate_cache_key(org_id: int) -> str: + return f"ds::o:{org_id}:prioritise_projects" -def get_cached_sample_rate(project: "Project", default_sample_rate: float) -> float: +def get_prioritise_by_project_sample_rate(project: "Project", default_sample_rate: float) -> float: """ This function returns cached sample rate from prioritise by project celery task or fallback to None """ redis_client = get_redis_client_for_ds() - cache_key = _generate_cache_key(project.organization.id, project.id) + cache_key = _generate_cache_key(project.organization.id) try: - cached_sample_rate = float(redis_client.get(name=cache_key)) + cached_sample_rate = float(redis_client.hget(cache_key, project.id)) except (TypeError, ValueError): cached_sample_rate = None return cached_sample_rate if cached_sample_rate else default_sample_rate diff --git a/src/sentry/dynamic_sampling/tasks.py b/src/sentry/dynamic_sampling/tasks.py index a55a07683ccf42..f42207593316fa 100644 --- a/src/sentry/dynamic_sampling/tasks.py +++ b/src/sentry/dynamic_sampling/tasks.py @@ -70,12 +70,11 @@ def adjust_sample_rates(org_id: int, projects_with_tx_count: Sequence[Sequence[i redis_client = get_redis_client_for_ds() for ds_project in ds_projects: - # TODO: Check that sample rate between 0 < sample_rate < 1.0 # hash, key, value - redis_client.set( - _generate_cache_key(project_id=ds_project.id, org_id=org_id), + redis_client.hset( + _generate_cache_key(org_id=org_id), + ds_project.id, ds_project.new_sample_rate, # redis stores is as string - 60 * 60 * 24, ) schedule_invalidate_project_config( project_id=ds_project.id, trigger="dynamic_sampling_prioritise_project_bias" diff --git a/tests/sentry/dynamic_sampling/test_adjusments_models.py b/tests/sentry/dynamic_sampling/test_adjusments_models.py index 4e25eef82dcf1d..49c7659de2bc2a 100644 --- a/tests/sentry/dynamic_sampling/test_adjusments_models.py +++ b/tests/sentry/dynamic_sampling/test_adjusments_models.py @@ -6,7 +6,7 @@ from sentry.dynamic_sampling.models.adjustment_models import DSProject as P -def test_adjust_sample_rates_org_wo_projects(): +def test_adjust_sample_rates_org_with_no_projects(): p = AdjustedModel(projects=[]) assert p.adjust_sample_rates() == [] diff --git a/tests/sentry/dynamic_sampling/test_generate_rules.py b/tests/sentry/dynamic_sampling/test_generate_rules.py index 71a9ea3143d306..9bcb6435c9ab22 100644 --- a/tests/sentry/dynamic_sampling/test_generate_rules.py +++ b/tests/sentry/dynamic_sampling/test_generate_rules.py @@ -86,18 +86,16 @@ def test_generate_rules_return_only_uniform_if_sample_rate_is_100_and_other_rule ) +@pytest.mark.django_db @patch("sentry.dynamic_sampling.rules.base.get_enabled_user_biases") @patch("sentry.dynamic_sampling.rules.base.quotas.get_blended_sample_rate") def test_generate_rules_return_uniform_rules_with_rate( - get_blended_sample_rate, get_enabled_user_biases + get_blended_sample_rate, get_enabled_user_biases, default_project ): # it means no enabled user biases get_enabled_user_biases.return_value = {} get_blended_sample_rate.return_value = 0.1 - # since we mock get_blended_sample_rate function - # no need to create real project in DB - fake_project = MagicMock() - assert generate_rules(fake_project) == [ + assert generate_rules(default_project) == [ { "active": True, "condition": {"inner": [], "op": "and"}, @@ -106,12 +104,11 @@ def test_generate_rules_return_uniform_rules_with_rate( "type": "trace", } ] - get_blended_sample_rate.assert_called_with(fake_project) get_enabled_user_biases.assert_called_with( - fake_project.get_option("sentry:dynamic_sampling_biases", None) + default_project.get_option("sentry:dynamic_sampling_biases", None) ) validate_sampling_configuration( - json.dumps({"rules": [], "rulesV2": generate_rules(fake_project)}) + json.dumps({"rules": [], "rulesV2": generate_rules(default_project)}) ) @@ -353,15 +350,22 @@ def test_generate_rules_return_uniform_rules_and_key_transaction_rule_with_many_ ) +@pytest.mark.django_db @patch("sentry.dynamic_sampling.rules.base.quotas.get_blended_sample_rate") def test_generate_rules_return_uniform_rule_with_100_rate_and_without_env_rule( - get_blended_sample_rate, + get_blended_sample_rate, default_project ): get_blended_sample_rate.return_value = 1.0 - # since we mock get_blended_sample_rate function - # no need to create real project in DB - fake_project = MagicMock() - assert generate_rules(fake_project) == [ + default_project.update_option( + "sentry:dynamic_sampling_biases", + [ + {"id": "boostEnvironments", "active": False}, + {"id": "ignoreHealthChecks", "active": False}, + {"id": "boostLatestRelease", "active": False}, + {"id": "boostKeyTransactions", "active": False}, + ], + ) + assert generate_rules(default_project) == [ { "active": True, "condition": {"inner": [], "op": "and"}, @@ -370,9 +374,8 @@ def test_generate_rules_return_uniform_rule_with_100_rate_and_without_env_rule( "type": "trace", }, ] - get_blended_sample_rate.assert_called_with(fake_project) validate_sampling_configuration( - json.dumps({"rules": [], "rulesV2": generate_rules(fake_project)}) + json.dumps({"rules": [], "rulesV2": generate_rules(default_project)}) ) From e8f066fb6daded5406683cf4f557247f06d64a59 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Tue, 21 Feb 2023 17:21:22 +0100 Subject: [PATCH 27/39] undo incorrect change --- src/sentry/dynamic_sampling/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentry/dynamic_sampling/tasks.py b/src/sentry/dynamic_sampling/tasks.py index f42207593316fa..5bcc874064e106 100644 --- a/src/sentry/dynamic_sampling/tasks.py +++ b/src/sentry/dynamic_sampling/tasks.py @@ -28,7 +28,7 @@ def prioritise_projects() -> None: metrics.incr("sentry.tasks.dynamic_sampling.prioritise_projects.start", sample_rate=1.0) with metrics.timer("sentry.tasks.dynamic_sampling.prioritise_projects", sample_rate=1.0): for org_id, projects_with_tx_count in fetch_projects_with_total_volumes().items(): - process_projects_sample_rates(org_id, projects_with_tx_count) + process_projects_sample_rates.delay(org_id, projects_with_tx_count) @instrumented_task( From a4af4f40d5b1e9a12d097791c051007a3be9e8ae Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Wed, 22 Feb 2023 09:09:06 +0100 Subject: [PATCH 28/39] add pexpire key --- src/sentry/dynamic_sampling/tasks.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/sentry/dynamic_sampling/tasks.py b/src/sentry/dynamic_sampling/tasks.py index 5bcc874064e106..4e5abbb63daa71 100644 --- a/src/sentry/dynamic_sampling/tasks.py +++ b/src/sentry/dynamic_sampling/tasks.py @@ -14,6 +14,7 @@ CHUNK_SIZE = 1000 MAX_SECONDS = 60 +CACHE_KEY_TTL = 24 * 60 * 60 * 1000 logger = logging.getLogger(__name__) @@ -71,11 +72,13 @@ def adjust_sample_rates(org_id: int, projects_with_tx_count: Sequence[Sequence[i redis_client = get_redis_client_for_ds() for ds_project in ds_projects: # hash, key, value + cache_key = _generate_cache_key(org_id=org_id) redis_client.hset( - _generate_cache_key(org_id=org_id), + cache_key, ds_project.id, ds_project.new_sample_rate, # redis stores is as string ) + redis_client.pexpire(cache_key, CACHE_KEY_TTL) schedule_invalidate_project_config( project_id=ds_project.id, trigger="dynamic_sampling_prioritise_project_bias" ) From a5a81c02c976296846ec846d7af7d5b6b6634c87 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Thu, 23 Feb 2023 12:12:49 +0100 Subject: [PATCH 29/39] refactor celery task --- .../dynamic_sampling/prioritise_projects.py | 5 ++-- src/sentry/dynamic_sampling/rules/utils.py | 4 +++ src/sentry/dynamic_sampling/tasks.py | 26 +++++++++++++------ tests/sentry/dynamic_sampling/test_tasks.py | 6 ++--- 4 files changed, 28 insertions(+), 13 deletions(-) diff --git a/src/sentry/dynamic_sampling/prioritise_projects.py b/src/sentry/dynamic_sampling/prioritise_projects.py index 3bf1cc7289d70f..3e593aa5facc4d 100644 --- a/src/sentry/dynamic_sampling/prioritise_projects.py +++ b/src/sentry/dynamic_sampling/prioritise_projects.py @@ -2,7 +2,7 @@ import time from collections import defaultdict from datetime import datetime, timedelta -from typing import Mapping, Sequence +from typing import Mapping, Sequence, Tuple from snuba_sdk import ( Column, @@ -17,6 +17,7 @@ Request, ) +from sentry.dynamic_sampling.rules.utils import OrganizationId, ProjectId from sentry.sentry_metrics.indexer.strings import TRANSACTION_METRICS_NAMES from sentry.snuba.dataset import Dataset, EntityKey from sentry.snuba.metrics.naming_layer.mri import TransactionMRI @@ -27,7 +28,7 @@ CHUNK_SIZE = 1000 -def fetch_projects_with_total_volumes() -> Mapping[int, Sequence[Sequence[int]]]: +def fetch_projects_with_total_volumes() -> Mapping[OrganizationId, Sequence[Tuple[ProjectId, int]]]: """ This function fetch with pagination orgs and projects with count per root project """ diff --git a/src/sentry/dynamic_sampling/rules/utils.py b/src/sentry/dynamic_sampling/rules/utils.py index 882a2add957c2b..56861cc584ae31 100644 --- a/src/sentry/dynamic_sampling/rules/utils.py +++ b/src/sentry/dynamic_sampling/rules/utils.py @@ -16,6 +16,10 @@ IGNORE_HEALTH_CHECKS_FACTOR = 5 +ProjectId = int +OrganizationId = int + + class ActivatableBias(TypedDict): """ A bias that can be activated, where activated means that the bias is enabled. diff --git a/src/sentry/dynamic_sampling/tasks.py b/src/sentry/dynamic_sampling/tasks.py index 4e5abbb63daa71..13a49d65740efc 100644 --- a/src/sentry/dynamic_sampling/tasks.py +++ b/src/sentry/dynamic_sampling/tasks.py @@ -1,12 +1,12 @@ import logging -from typing import Sequence +from typing import Sequence, Tuple from sentry import features, quotas from sentry.dynamic_sampling.models.adjustment_models import AdjustedModel from sentry.dynamic_sampling.models.adjustment_models import DSProject as DSProject from sentry.dynamic_sampling.prioritise_projects import fetch_projects_with_total_volumes from sentry.dynamic_sampling.rules.helpers.prioritise_project import _generate_cache_key -from sentry.dynamic_sampling.rules.utils import get_redis_client_for_ds +from sentry.dynamic_sampling.rules.utils import OrganizationId, ProjectId, get_redis_client_for_ds from sentry.models import Organization, Project from sentry.tasks.base import instrumented_task from sentry.tasks.relay import schedule_invalidate_project_config @@ -39,33 +39,43 @@ def prioritise_projects() -> None: max_retries=5, ) # type: ignore def process_projects_sample_rates( - organization_id: int, projects_with_tx_count: Sequence[Sequence[int]] + org_id: OrganizationId, projects_with_tx_count: Sequence[Tuple[ProjectId, int]] ) -> None: """ Takes a single org id and a list of project ids """ - organization = Organization.objects.get_from_cache(id=organization_id) + organization = Organization.objects.get_from_cache(id=org_id) # Check if feature flag is enabled: if features.has("organizations:ds-prioritise-by-project-bias", organization): with metrics.timer("sentry.tasks.dynamic_sampling.process_projects_sample_rates.core"): - adjust_sample_rates(organization_id, projects_with_tx_count) + adjust_sample_rates(org_id, projects_with_tx_count) -def adjust_sample_rates(org_id: int, projects_with_tx_count: Sequence[Sequence[int]]) -> None: +def adjust_sample_rates( + org_id: int, projects_with_tx_count: Sequence[Tuple[ProjectId, int]] +) -> None: """ This function apply model and adjust sample rate per project in org and store it in DS redis cluster, then we invalidate project config so relay can reread it, and we'll inject it from redis cache. """ projects = [] + project_ids_with_counts = {} for project_id, count_per_root in projects_with_tx_count: - project = Project.objects.get_from_cache(id=project_id) + project_ids_with_counts[project_id] = count_per_root + + for project in Project.objects.get_many_from_cache(project_ids_with_counts.keys()): sample_rate = quotas.get_blended_sample_rate(project) if sample_rate is None: continue projects.append( - DSProject(id=project_id, count_per_root=count_per_root, blended_sample_rate=sample_rate) + DSProject( + id=project.id, + count_per_root=project_ids_with_counts[project.id], + blended_sample_rate=sample_rate, + ) ) + model = AdjustedModel(projects=projects) ds_projects = model.adjust_sample_rates() diff --git a/tests/sentry/dynamic_sampling/test_tasks.py b/tests/sentry/dynamic_sampling/test_tasks.py index fea9d92e8e9277..e0818caa1f9a67 100644 --- a/tests/sentry/dynamic_sampling/test_tasks.py +++ b/tests/sentry/dynamic_sampling/test_tasks.py @@ -9,7 +9,6 @@ from sentry.dynamic_sampling.tasks import prioritise_projects from sentry.snuba.metrics import TransactionMRI from sentry.testutils import BaseMetricsLayerTestCase, SnubaTestCase, TestCase -from sentry.testutils.helpers import Feature MOCK_DATETIME = (timezone.now() - timedelta(days=1)).replace( hour=0, minute=0, second=0, microsecond=0 @@ -59,8 +58,9 @@ def test_prioritise_projects_simple(self, get_blended_sample_rate): proj_c = self.create_project_and_add_metrics("c", 3, test_org) proj_d = self.create_project_and_add_metrics("d", 1, test_org) - with Feature({"organizations:ds-prioritise-by-project-bias": True}): - prioritise_projects() + with self.feature({"organizations:ds-prioritise-by-project-bias": True}): + with self.tasks(): + prioritise_projects() # we expect only uniform rule assert generate_rules(proj_a)[0]["samplingValue"] == { From 441d6a5be5eeefff097ab177e122db69f4c60b24 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Thu, 23 Feb 2023 12:22:09 +0100 Subject: [PATCH 30/39] add timeouts to celery tasks --- src/sentry/dynamic_sampling/tasks.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/sentry/dynamic_sampling/tasks.py b/src/sentry/dynamic_sampling/tasks.py index 13a49d65740efc..7b0e8328cd8ba5 100644 --- a/src/sentry/dynamic_sampling/tasks.py +++ b/src/sentry/dynamic_sampling/tasks.py @@ -24,6 +24,8 @@ queue="dynamicsampling", default_retry_delay=5, max_retries=5, + soft_time_limit=2 * 60 * 60, # 2hours + time_limit=2 * 60 * 60 + 5, ) # type: ignore def prioritise_projects() -> None: metrics.incr("sentry.tasks.dynamic_sampling.prioritise_projects.start", sample_rate=1.0) @@ -37,6 +39,8 @@ def prioritise_projects() -> None: queue="dynamicsampling", default_retry_delay=5, max_retries=5, + soft_time_limit=25 * 60, # 25 mins + time_limit=2 * 60 + 5, ) # type: ignore def process_projects_sample_rates( org_id: OrganizationId, projects_with_tx_count: Sequence[Tuple[ProjectId, int]] From 49ec08706bd9b092365c55ed2f451c3b94080d85 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Thu, 23 Feb 2023 14:27:26 +0100 Subject: [PATCH 31/39] refactor using redis pipelines --- src/sentry/dynamic_sampling/tasks.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/sentry/dynamic_sampling/tasks.py b/src/sentry/dynamic_sampling/tasks.py index 7b0e8328cd8ba5..a19d5a6c6b6c73 100644 --- a/src/sentry/dynamic_sampling/tasks.py +++ b/src/sentry/dynamic_sampling/tasks.py @@ -84,15 +84,17 @@ def adjust_sample_rates( ds_projects = model.adjust_sample_rates() redis_client = get_redis_client_for_ds() - for ds_project in ds_projects: - # hash, key, value - cache_key = _generate_cache_key(org_id=org_id) - redis_client.hset( - cache_key, - ds_project.id, - ds_project.new_sample_rate, # redis stores is as string - ) - redis_client.pexpire(cache_key, CACHE_KEY_TTL) - schedule_invalidate_project_config( - project_id=ds_project.id, trigger="dynamic_sampling_prioritise_project_bias" - ) + with redis_client.pipeline(transaction=False) as pipeline: + for ds_project in ds_projects: + # hash, key, value + cache_key = _generate_cache_key(org_id=org_id) + pipeline.hset( + cache_key, + ds_project.id, + ds_project.new_sample_rate, # redis stores is as string + ) + pipeline.pexpire(cache_key, CACHE_KEY_TTL) + schedule_invalidate_project_config( + project_id=ds_project.id, trigger="dynamic_sampling_prioritise_project_bias" + ) + pipeline.execute() From 6019c9bc4ae1bc1cbb2e4df512ef821eef0b067b Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Thu, 23 Feb 2023 17:16:10 +0100 Subject: [PATCH 32/39] remove "c:transactions/count_per_root_project@none" from relay list --- src/sentry/relay/config/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sentry/relay/config/__init__.py b/src/sentry/relay/config/__init__.py index b8ba7b79f23c45..e31ac182eaceab 100644 --- a/src/sentry/relay/config/__init__.py +++ b/src/sentry/relay/config/__init__.py @@ -497,7 +497,6 @@ def _filter_option_to_config_setting(flt: _FilterSpec, setting: str) -> Mapping[ [ "s:transactions/user@none", "d:transactions/duration@millisecond", - "c:transactions/count_per_root_project@none", ] ) From bbdd98531cf709b69db4bf1808f537572b64a06f Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Fri, 24 Feb 2023 13:41:54 +0100 Subject: [PATCH 33/39] add option to filter amount of orgs --- src/sentry/dynamic_sampling/prioritise_projects.py | 3 +++ src/sentry/options/defaults.py | 1 + .../sentry/dynamic_sampling/test_prioritise_projects.py | 6 ++++-- tests/sentry/dynamic_sampling/test_tasks.py | 9 ++++++--- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/sentry/dynamic_sampling/prioritise_projects.py b/src/sentry/dynamic_sampling/prioritise_projects.py index 3e593aa5facc4d..51fd1348da75cb 100644 --- a/src/sentry/dynamic_sampling/prioritise_projects.py +++ b/src/sentry/dynamic_sampling/prioritise_projects.py @@ -17,6 +17,7 @@ Request, ) +from sentry import options from sentry.dynamic_sampling.rules.utils import OrganizationId, ProjectId from sentry.sentry_metrics.indexer.strings import TRANSACTION_METRICS_NAMES from sentry.snuba.dataset import Dataset, EntityKey @@ -35,6 +36,7 @@ def fetch_projects_with_total_volumes() -> Mapping[OrganizationId, Sequence[Tupl aggregated_projects = defaultdict(list) start_time = time.time() offset = 0 + sample_rate = int(options.get("dynamic-sampling.prioritise_projects.sample_rate") * 100) while (time.time() - start_time) < MAX_SECONDS: query = ( Query( @@ -46,6 +48,7 @@ def fetch_projects_with_total_volumes() -> Mapping[OrganizationId, Sequence[Tupl ], groupby=[Column("org_id"), Column("project_id")], where=[ + Condition(Function("modulo", [Column("org_id"), 100]), Op.LT, sample_rate), Condition(Column("timestamp"), Op.GTE, datetime.utcnow() - timedelta(hours=6)), Condition(Column("timestamp"), Op.LT, datetime.utcnow()), Condition( diff --git a/src/sentry/options/defaults.py b/src/sentry/options/defaults.py index 49722477d00cf1..afa19f16427a44 100644 --- a/src/sentry/options/defaults.py +++ b/src/sentry/options/defaults.py @@ -606,6 +606,7 @@ # System-wide options that observes latest releases on transactions and caches these values to be used later in # project config computation. This is temporary option to monitor the performance of this feature. register("dynamic-sampling:boost-latest-release", default=False) +register("dynamic-sampling.prioritise_projects.sample_rate", default=0.0) # Killswitch for deriving code mappings register("post_process.derive-code-mappings", default=True) diff --git a/tests/sentry/dynamic_sampling/test_prioritise_projects.py b/tests/sentry/dynamic_sampling/test_prioritise_projects.py index f72a829b6ad141..86fc51187e446f 100644 --- a/tests/sentry/dynamic_sampling/test_prioritise_projects.py +++ b/tests/sentry/dynamic_sampling/test_prioritise_projects.py @@ -30,6 +30,8 @@ def test_simple_one_org_one_project(self): project_id=p1.id, org_id=org1.id, ) - - results = fetch_projects_with_total_volumes() + with self.settings( + SENTRY_OPTIONS={"dynamic-sampling.prioritise_projects.sample_rate": 1.0} + ): + results = fetch_projects_with_total_volumes() assert results[org1.id] == [(p1.id, 1.0)] diff --git a/tests/sentry/dynamic_sampling/test_tasks.py b/tests/sentry/dynamic_sampling/test_tasks.py index e0818caa1f9a67..7ef7a4c8259596 100644 --- a/tests/sentry/dynamic_sampling/test_tasks.py +++ b/tests/sentry/dynamic_sampling/test_tasks.py @@ -58,9 +58,12 @@ def test_prioritise_projects_simple(self, get_blended_sample_rate): proj_c = self.create_project_and_add_metrics("c", 3, test_org) proj_d = self.create_project_and_add_metrics("d", 1, test_org) - with self.feature({"organizations:ds-prioritise-by-project-bias": True}): - with self.tasks(): - prioritise_projects() + with self.settings( + SENTRY_OPTIONS={"dynamic-sampling.prioritise_projects.sample_rate": 1.0} + ): + with self.feature({"organizations:ds-prioritise-by-project-bias": True}): + with self.tasks(): + prioritise_projects() # we expect only uniform rule assert generate_rules(proj_a)[0]["samplingValue"] == { From 365f3d4472be59afab7458dedc07d2979cda272e Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Fri, 24 Feb 2023 13:51:23 +0100 Subject: [PATCH 34/39] add more tests --- .../test_prioritise_projects.py | 21 ++++++++++++++++--- tests/sentry/dynamic_sampling/test_tasks.py | 4 +--- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/tests/sentry/dynamic_sampling/test_prioritise_projects.py b/tests/sentry/dynamic_sampling/test_prioritise_projects.py index 86fc51187e446f..5cf5f25dc6423d 100644 --- a/tests/sentry/dynamic_sampling/test_prioritise_projects.py +++ b/tests/sentry/dynamic_sampling/test_prioritise_projects.py @@ -30,8 +30,23 @@ def test_simple_one_org_one_project(self): project_id=p1.id, org_id=org1.id, ) - with self.settings( - SENTRY_OPTIONS={"dynamic-sampling.prioritise_projects.sample_rate": 1.0} - ): + with self.options({"dynamic-sampling.prioritise_projects.sample_rate": 1.0}): results = fetch_projects_with_total_volumes() assert results[org1.id] == [(p1.id, 1.0)] + + def test_simple_one_org_one_project_but_filtered_by_option(self): + org1 = self.create_organization("test-org2") + p1 = self.create_project(organization=org1) + + self.store_performance_metric( + name=TransactionMRI.COUNT_PER_ROOT_PROJECT.value, + tags={"transaction": "foo_transaction2"}, + hours_before_now=1, + value=1, + project_id=p1.id, + org_id=org1.id, + ) + with self.options({"dynamic-sampling.prioritise_projects.sample_rate": 0}): + results = fetch_projects_with_total_volumes() + # No data because rate is too small + assert results[org1.id] == [] diff --git a/tests/sentry/dynamic_sampling/test_tasks.py b/tests/sentry/dynamic_sampling/test_tasks.py index 7ef7a4c8259596..28e675d60798f6 100644 --- a/tests/sentry/dynamic_sampling/test_tasks.py +++ b/tests/sentry/dynamic_sampling/test_tasks.py @@ -58,9 +58,7 @@ def test_prioritise_projects_simple(self, get_blended_sample_rate): proj_c = self.create_project_and_add_metrics("c", 3, test_org) proj_d = self.create_project_and_add_metrics("d", 1, test_org) - with self.settings( - SENTRY_OPTIONS={"dynamic-sampling.prioritise_projects.sample_rate": 1.0} - ): + with self.options({"dynamic-sampling.prioritise_projects.sample_rate": 1.0}): with self.feature({"organizations:ds-prioritise-by-project-bias": True}): with self.tasks(): prioritise_projects() From c4f28ae729e47cebaa715caff966edce88b5191c Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Mon, 27 Feb 2023 09:18:08 +0100 Subject: [PATCH 35/39] fix tests --- .../MONOLITH/with_metrics.pysnap | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/sentry/relay/snapshots/test_config/test_project_config_with_breakdown/MONOLITH/with_metrics.pysnap b/tests/sentry/relay/snapshots/test_config/test_project_config_with_breakdown/MONOLITH/with_metrics.pysnap index 15dcff86085662..b418948bfa707a 100644 --- a/tests/sentry/relay/snapshots/test_config/test_project_config_with_breakdown/MONOLITH/with_metrics.pysnap +++ b/tests/sentry/relay/snapshots/test_config/test_project_config_with_breakdown/MONOLITH/with_metrics.pysnap @@ -921,7 +921,6 @@ transactionMetrics: limit: 10 extractCustomTags: [] extractMetrics: - - c:transactions/count_per_root_project@none - d:transactions/duration@millisecond - s:transactions/user@none - d:transactions/measurements.app_start_cold@millisecond From 921be0696d21d3cd1ca112c959ce7721f787b5b4 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Mon, 27 Feb 2023 11:11:29 +0100 Subject: [PATCH 36/39] rewrite using indexer.resolve_shared_org() --- src/sentry/dynamic_sampling/prioritise_projects.py | 9 +++------ src/sentry/sentry_metrics/indexer/__init__.py | 1 + src/sentry/sentry_metrics/indexer/base.py | 9 ++++++++- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/sentry/dynamic_sampling/prioritise_projects.py b/src/sentry/dynamic_sampling/prioritise_projects.py index 51fd1348da75cb..accae52a038808 100644 --- a/src/sentry/dynamic_sampling/prioritise_projects.py +++ b/src/sentry/dynamic_sampling/prioritise_projects.py @@ -19,7 +19,7 @@ from sentry import options from sentry.dynamic_sampling.rules.utils import OrganizationId, ProjectId -from sentry.sentry_metrics.indexer.strings import TRANSACTION_METRICS_NAMES +from sentry.sentry_metrics import indexer from sentry.snuba.dataset import Dataset, EntityKey from sentry.snuba.metrics.naming_layer.mri import TransactionMRI from sentry.utils.snuba import raw_snql_query @@ -37,6 +37,7 @@ def fetch_projects_with_total_volumes() -> Mapping[OrganizationId, Sequence[Tupl start_time = time.time() offset = 0 sample_rate = int(options.get("dynamic-sampling.prioritise_projects.sample_rate") * 100) + metric_id = indexer.resolve_shared_org(str(TransactionMRI.COUNT_PER_ROOT_PROJECT.value)) while (time.time() - start_time) < MAX_SECONDS: query = ( Query( @@ -51,11 +52,7 @@ def fetch_projects_with_total_volumes() -> Mapping[OrganizationId, Sequence[Tupl Condition(Function("modulo", [Column("org_id"), 100]), Op.LT, sample_rate), Condition(Column("timestamp"), Op.GTE, datetime.utcnow() - timedelta(hours=6)), Condition(Column("timestamp"), Op.LT, datetime.utcnow()), - Condition( - Column("metric_id"), - Op.EQ, - TRANSACTION_METRICS_NAMES[TransactionMRI.COUNT_PER_ROOT_PROJECT.value], - ), + Condition(Column("metric_id"), Op.EQ, metric_id), ], granularity=Granularity(3600), orderby=[ diff --git a/src/sentry/sentry_metrics/indexer/__init__.py b/src/sentry/sentry_metrics/indexer/__init__.py index 1b325d4a6c00ba..56e7298564c65b 100644 --- a/src/sentry/sentry_metrics/indexer/__init__.py +++ b/src/sentry/sentry_metrics/indexer/__init__.py @@ -18,3 +18,4 @@ record = StringIndexer().record resolve = StringIndexer().resolve reverse_resolve = StringIndexer().reverse_resolve + resolve_shared_org = StringIndexer().resolve_shared_org diff --git a/src/sentry/sentry_metrics/indexer/base.py b/src/sentry/sentry_metrics/indexer/base.py index 24d980949bb3b9..1c99375bc04953 100644 --- a/src/sentry/sentry_metrics/indexer/base.py +++ b/src/sentry/sentry_metrics/indexer/base.py @@ -197,7 +197,14 @@ class StringIndexer(Service): Check `sentry.snuba.metrics` for convenience functions. """ - __all__ = ("record", "resolve", "reverse_resolve", "bulk_record") + __all__ = ( + "record", + "resolve", + "reverse_resolve", + "bulk_record", + "resolve_shared_org", + "reverse_shared_org_resolve", + ) def bulk_record( self, use_case_id: UseCaseKey, org_strings: Mapping[int, Set[str]] From b3ce62e3f74c67e33a9e6e374a30e73fc758413c Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Mon, 27 Feb 2023 11:36:10 +0100 Subject: [PATCH 37/39] fix tests --- .../REGION/with_metrics.pysnap | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/sentry/relay/snapshots/test_config/test_project_config_with_breakdown/REGION/with_metrics.pysnap b/tests/sentry/relay/snapshots/test_config/test_project_config_with_breakdown/REGION/with_metrics.pysnap index dafd78e0813794..b57d061b7f1d58 100644 --- a/tests/sentry/relay/snapshots/test_config/test_project_config_with_breakdown/REGION/with_metrics.pysnap +++ b/tests/sentry/relay/snapshots/test_config/test_project_config_with_breakdown/REGION/with_metrics.pysnap @@ -921,7 +921,6 @@ transactionMetrics: limit: 10 extractCustomTags: [] extractMetrics: - - c:transactions/count_per_root_project@none - d:transactions/duration@millisecond - s:transactions/user@none - d:transactions/measurements.app_start_cold@millisecond From 08b8fa540b0c8a1c45897eaa0858acf074223a48 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Mon, 27 Feb 2023 16:08:28 +0100 Subject: [PATCH 38/39] update referrer for prioritise by project bias --- src/sentry/dynamic_sampling/prioritise_projects.py | 3 ++- src/sentry/snuba/referrer.py | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/sentry/dynamic_sampling/prioritise_projects.py b/src/sentry/dynamic_sampling/prioritise_projects.py index accae52a038808..3fe7678b6cb002 100644 --- a/src/sentry/dynamic_sampling/prioritise_projects.py +++ b/src/sentry/dynamic_sampling/prioritise_projects.py @@ -22,6 +22,7 @@ from sentry.sentry_metrics import indexer from sentry.snuba.dataset import Dataset, EntityKey from sentry.snuba.metrics.naming_layer.mri import TransactionMRI +from sentry.snuba.referrer import Referrer from sentry.utils.snuba import raw_snql_query logger = logging.getLogger(__name__) @@ -68,7 +69,7 @@ def fetch_projects_with_total_volumes() -> Mapping[OrganizationId, Sequence[Tupl ) data = raw_snql_query( request, - referrer="dynamic_sampling.fetch_projects_with_count_per_root_total_volumes", + referrer=Referrer.DYNAMIC_SAMPLING_DISTRIBUTION_FETCH_PROJECTS_WITH_COUNT_PER_ROOT.value, )["data"] count = len(data) more_results = count > CHUNK_SIZE diff --git a/src/sentry/snuba/referrer.py b/src/sentry/snuba/referrer.py index cac27799ada2d3..cbbd4400291b6c 100644 --- a/src/sentry/snuba/referrer.py +++ b/src/sentry/snuba/referrer.py @@ -349,6 +349,9 @@ class ReferrerBase(Enum): DYNAMIC_SAMPLING_DISTRIBUTION_GET_MOST_RECENT_DAY_WITH_TRANSACTIONS = ( "dynamic-sampling.distribution.get-most-recent-day-with-transactions" ) + DYNAMIC_SAMPLING_DISTRIBUTION_FETCH_PROJECTS_WITH_COUNT_PER_ROOT = ( + "dynamic_sampling.distribution.fetch_projects_with_count_per_root_total_volumes" + ) EVENTSTORE_GET_EVENT_BY_ID_NODESTORE = "eventstore.get_event_by_id_nodestore" EVENTSTORE_GET_EVENTS = "eventstore.get_events" EVENTSTORE_GET_NEXT_OR_PREV_EVENT_ID = "eventstore.get_next_or_prev_event_id" From 382f62ee7f029b4ac540560d756d00f7cf9a6983 Mon Sep 17 00:00:00 2001 From: Andrii Soldatenko Date: Tue, 28 Feb 2023 13:05:54 +0100 Subject: [PATCH 39/39] run prioritise_by_projects cron job every 1 hour --- src/sentry/conf/server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sentry/conf/server.py b/src/sentry/conf/server.py index 0c235468f09493..0622941272bafb 100644 --- a/src/sentry/conf/server.py +++ b/src/sentry/conf/server.py @@ -853,8 +853,8 @@ def SOCIAL_AUTH_DEFAULT_USERNAME(): }, "dynamic-sampling-prioritize-projects": { "task": "sentry.dynamic_sampling.tasks.prioritise_projects", - # Run daily at 08:00 - "schedule": crontab(hour=8, minute=0), + # Run job every 1 hour + "schedule": crontab(minute=0), "options": {"expires": 3600}, }, }