diff --git a/src/sentry/conf/server.py b/src/sentry/conf/server.py index 3324f0e5068200..0622941272bafb 100644 --- a/src/sentry/conf/server.py +++ b/src/sentry/conf/server.py @@ -611,6 +611,7 @@ def SOCIAL_AUTH_DEFAULT_USERNAME(): "sentry.tasks.user_report", "sentry.profiles.task", "sentry.release_health.tasks", + "sentry.dynamic_sampling.tasks", "sentry.utils.suspect_resolutions.get_suspect_resolutions", "sentry.utils.suspect_resolutions_releases.get_suspect_resolutions_releases", "sentry.tasks.derive_code_mappings", @@ -663,6 +664,10 @@ def SOCIAL_AUTH_DEFAULT_USERNAME(): "releasemonitor", routing_key="releasemonitor", ), + Queue( + "dynamicsampling", + routing_key="dynamicsampling", + ), Queue("incidents", routing_key="incidents"), Queue("incident_snapshots", routing_key="incident_snapshots"), Queue("incidents", routing_key="incidents"), @@ -846,6 +851,12 @@ def SOCIAL_AUTH_DEFAULT_USERNAME(): "schedule": timedelta(hours=1), "options": {"expires": 3600}, }, + "dynamic-sampling-prioritize-projects": { + "task": "sentry.dynamic_sampling.tasks.prioritise_projects", + # Run job every 1 hour + "schedule": crontab(minute=0), + "options": {"expires": 3600}, + }, } BGTASKS = { diff --git a/src/sentry/dynamic_sampling/__init__.py b/src/sentry/dynamic_sampling/__init__.py index 848ec9e37cd0b0..0fee9f3a048807 100644 --- a/src/sentry/dynamic_sampling/__init__.py +++ b/src/sentry/dynamic_sampling/__init__.py @@ -11,7 +11,6 @@ LatestReleaseBias, LatestReleaseParams, ProjectBoostedReleases, - get_redis_client_for_ds, ) from .rules.helpers.time_to_adoptions import LATEST_RELEASE_TTAS, Platform from .rules.logging import should_log_rules_change @@ -21,6 +20,7 @@ RESERVED_IDS, RuleType, get_enabled_user_biases, + get_redis_client_for_ds, get_rule_hash, get_supported_biases_ids, get_user_biases, diff --git a/src/sentry/dynamic_sampling/models/adjustment_models.py b/src/sentry/dynamic_sampling/models/adjustment_models.py new file mode 100644 index 00000000000000..f5194b97a105f5 --- /dev/null +++ b/src/sentry/dynamic_sampling/models/adjustment_models.py @@ -0,0 +1,81 @@ +import statistics +from dataclasses import dataclass +from typing import List + + +@dataclass +class DSProject: + id: int + count_per_root: float + blended_sample_rate: float + new_count_per_root: float = 0.0 + new_sample_rate: float = 0.0 + + +@dataclass +class AdjustedModel: + """ + Model which can adjust sample rate per project inside ORG based on this new counter metric from Relay. + """ + + projects: List[DSProject] + # Right now we are not using this constant + fidelity_rate: float = 0.4 # TODO: discuss this constant + + def adjust_sample_rates(self) -> List[DSProject]: + if len(self.projects) < 2: + # When we have one project we just remind sample rates + if len(self.projects) == 1: + self.projects[0].new_sample_rate = self.projects[0].blended_sample_rate + return self.projects + + # Step 1: sort projects by count per root project + sorted_projects = list(sorted(self.projects, key=lambda x: (x.count_per_root, -x.id))) + + # Step 2: find average project + average = statistics.mean([p.count_per_root for p in sorted_projects]) + + # Step 3: + num_projects = len(sorted_projects) + half = num_projects // 2 + odd_one = num_projects % 2 + left_split = sorted_projects[:half] + # ignore middle element, since we don't have capacity to balance it + right_split = reversed(sorted_projects[half + odd_one : num_projects]) + + new_left = [] + new_right = [] + coefficient = 1.0 + for left, right in zip(left_split, right_split): + # We can't increase sample rate more than 1.0, so we calculate upper bound count + # based on project fidelity_rate + # Find an absolute difference + + # Max possible counter if sample rate would be 1.0 + max_possible_count = left.count_per_root / left.blended_sample_rate + diff = coefficient * min( + (average - left.count_per_root), + (max_possible_count - left.count_per_root), + ) + left.new_count_per_root = left.count_per_root + diff + left.new_sample_rate = left.blended_sample_rate * ( + left.new_count_per_root / left.count_per_root + ) + right.new_count_per_root = right.count_per_root - diff + right.new_sample_rate = right.blended_sample_rate * ( + right.new_count_per_root / right.count_per_root + ) + new_left.append(left) + new_right.append(right) + # This opinionated `coefficient` reduces adjustment on every step + coefficient = diff / left.new_count_per_root + + if len(sorted_projects) % 2 == 0: + return [*new_right, *reversed(new_left)] + else: + if odd_one: + mid_elements = [sorted_projects[half]] + mid_elements[0].new_count_per_root = mid_elements[0].count_per_root + else: + mid_elements = [] + return [*new_right, *mid_elements, *reversed(new_left)] diff --git a/src/sentry/dynamic_sampling/prioritise_projects.py b/src/sentry/dynamic_sampling/prioritise_projects.py new file mode 100644 index 00000000000000..3fe7678b6cb002 --- /dev/null +++ b/src/sentry/dynamic_sampling/prioritise_projects.py @@ -0,0 +1,93 @@ +import logging +import time +from collections import defaultdict +from datetime import datetime, timedelta +from typing import Mapping, Sequence, Tuple + +from snuba_sdk import ( + Column, + Condition, + Direction, + Entity, + Function, + Granularity, + Op, + OrderBy, + Query, + Request, +) + +from sentry import options +from sentry.dynamic_sampling.rules.utils import OrganizationId, ProjectId +from sentry.sentry_metrics import indexer +from sentry.snuba.dataset import Dataset, EntityKey +from sentry.snuba.metrics.naming_layer.mri import TransactionMRI +from sentry.snuba.referrer import Referrer +from sentry.utils.snuba import raw_snql_query + +logger = logging.getLogger(__name__) +MAX_SECONDS = 60 +CHUNK_SIZE = 1000 + + +def fetch_projects_with_total_volumes() -> Mapping[OrganizationId, Sequence[Tuple[ProjectId, int]]]: + """ + This function fetch with pagination orgs and projects with count per root project + """ + aggregated_projects = defaultdict(list) + start_time = time.time() + offset = 0 + sample_rate = int(options.get("dynamic-sampling.prioritise_projects.sample_rate") * 100) + metric_id = indexer.resolve_shared_org(str(TransactionMRI.COUNT_PER_ROOT_PROJECT.value)) + while (time.time() - start_time) < MAX_SECONDS: + query = ( + Query( + match=Entity(EntityKey.GenericOrgMetricsCounters.value), + select=[ + Function("sum", [Column("value")], "root_count_value"), + Column("org_id"), + Column("project_id"), + ], + groupby=[Column("org_id"), Column("project_id")], + where=[ + Condition(Function("modulo", [Column("org_id"), 100]), Op.LT, sample_rate), + Condition(Column("timestamp"), Op.GTE, datetime.utcnow() - timedelta(hours=6)), + Condition(Column("timestamp"), Op.LT, datetime.utcnow()), + Condition(Column("metric_id"), Op.EQ, metric_id), + ], + granularity=Granularity(3600), + orderby=[ + OrderBy(Column("org_id"), Direction.ASC), + OrderBy(Column("project_id"), Direction.ASC), + ], + ) + .set_limit(CHUNK_SIZE + 1) + .set_offset(offset) + ) + request = Request( + dataset=Dataset.PerformanceMetrics.value, app_id="dynamic_sampling", query=query + ) + data = raw_snql_query( + request, + referrer=Referrer.DYNAMIC_SAMPLING_DISTRIBUTION_FETCH_PROJECTS_WITH_COUNT_PER_ROOT.value, + )["data"] + count = len(data) + more_results = count > CHUNK_SIZE + offset += CHUNK_SIZE + + if more_results: + data = data[:-1] + + for row in data: + aggregated_projects[row["org_id"]].append((row["project_id"], row["root_count_value"])) + + if not more_results: + break + + else: + logger.error( + "", + extra={"offset": offset}, + ) + + return aggregated_projects diff --git a/src/sentry/dynamic_sampling/rules/base.py b/src/sentry/dynamic_sampling/rules/base.py index 246ab92ecc8fea..e8165c5de10966 100644 --- a/src/sentry/dynamic_sampling/rules/base.py +++ b/src/sentry/dynamic_sampling/rules/base.py @@ -5,6 +5,9 @@ from sentry import quotas from sentry.dynamic_sampling.rules.biases.base import Bias, BiasParams from sentry.dynamic_sampling.rules.combine import get_relay_biases_combinator +from sentry.dynamic_sampling.rules.helpers.prioritise_project import ( + get_prioritise_by_project_sample_rate, +) from sentry.dynamic_sampling.rules.logging import log_rules from sentry.dynamic_sampling.rules.utils import PolymorphicRule, RuleType, get_enabled_user_biases from sentry.models import Project @@ -18,7 +21,7 @@ def get_guarded_blended_sample_rate(project: Project) -> float: if sample_rate is None: raise Exception("get_blended_sample_rate returns none") - return float(sample_rate) + return get_prioritise_by_project_sample_rate(project, default_sample_rate=float(sample_rate)) def _get_rules_of_enabled_biases( diff --git a/src/sentry/dynamic_sampling/rules/helpers/latest_releases.py b/src/sentry/dynamic_sampling/rules/helpers/latest_releases.py index 313fa63f9d4cef..6033cbf2db6d6e 100644 --- a/src/sentry/dynamic_sampling/rules/helpers/latest_releases.py +++ b/src/sentry/dynamic_sampling/rules/helpers/latest_releases.py @@ -2,15 +2,13 @@ from collections import namedtuple from dataclasses import dataclass, field from datetime import datetime -from typing import Any, Callable, Dict, List, Optional, Tuple +from typing import Callable, Dict, List, Optional, Tuple -from django.conf import settings from pytz import UTC from sentry.dynamic_sampling.rules.helpers.time_to_adoptions import Platform -from sentry.dynamic_sampling.rules.utils import BOOSTED_RELEASES_LIMIT +from sentry.dynamic_sampling.rules.utils import BOOSTED_RELEASES_LIMIT, get_redis_client_for_ds from sentry.models import Project, Release -from sentry.utils import redis ENVIRONMENT_SEPARATOR = ":e:" BOOSTED_RELEASE_CACHE_KEY_REGEX = re.compile( @@ -18,11 +16,6 @@ ) -def get_redis_client_for_ds() -> Any: - cluster_key = getattr(settings, "SENTRY_DYNAMIC_SAMPLING_RULES_REDIS_CLUSTER", "default") - return redis.redis_clusters.get(cluster_key) - - def _get_environment_cache_key(environment: Optional[str]) -> str: return f"{ENVIRONMENT_SEPARATOR}{environment}" if environment else "" diff --git a/src/sentry/dynamic_sampling/rules/helpers/prioritise_project.py b/src/sentry/dynamic_sampling/rules/helpers/prioritise_project.py new file mode 100644 index 00000000000000..85cab068e2fd0f --- /dev/null +++ b/src/sentry/dynamic_sampling/rules/helpers/prioritise_project.py @@ -0,0 +1,24 @@ +from typing import TYPE_CHECKING + +from sentry.dynamic_sampling.rules.utils import get_redis_client_for_ds + +if TYPE_CHECKING: + from sentry.models import Project + + +def _generate_cache_key(org_id: int) -> str: + return f"ds::o:{org_id}:prioritise_projects" + + +def get_prioritise_by_project_sample_rate(project: "Project", default_sample_rate: float) -> float: + """ + This function returns cached sample rate from prioritise by project + celery task or fallback to None + """ + redis_client = get_redis_client_for_ds() + cache_key = _generate_cache_key(project.organization.id) + try: + cached_sample_rate = float(redis_client.hget(cache_key, project.id)) + except (TypeError, ValueError): + cached_sample_rate = None + return cached_sample_rate if cached_sample_rate else default_sample_rate diff --git a/src/sentry/dynamic_sampling/rules/utils.py b/src/sentry/dynamic_sampling/rules/utils.py index 1f752655f845df..56861cc584ae31 100644 --- a/src/sentry/dynamic_sampling/rules/utils.py +++ b/src/sentry/dynamic_sampling/rules/utils.py @@ -1,7 +1,9 @@ from enum import Enum from typing import Any, Dict, List, Literal, Optional, Set, Tuple, TypedDict, Union -from sentry.utils import json +from django.conf import settings + +from sentry.utils import json, redis BOOSTED_RELEASES_LIMIT = 10 BOOSTED_KEY_TRANSACTION_LIMIT = 10 @@ -14,6 +16,10 @@ IGNORE_HEALTH_CHECKS_FACTOR = 5 +ProjectId = int +OrganizationId = int + + class ActivatableBias(TypedDict): """ A bias that can be activated, where activated means that the bias is enabled. @@ -180,3 +186,8 @@ def apply_dynamic_factor(base_sample_rate: float, x: float) -> float: ) return float(x / x**base_sample_rate) + + +def get_redis_client_for_ds() -> Any: + cluster_key = getattr(settings, "SENTRY_DYNAMIC_SAMPLING_RULES_REDIS_CLUSTER", "default") + return redis.redis_clusters.get(cluster_key) diff --git a/src/sentry/dynamic_sampling/tasks.py b/src/sentry/dynamic_sampling/tasks.py new file mode 100644 index 00000000000000..a19d5a6c6b6c73 --- /dev/null +++ b/src/sentry/dynamic_sampling/tasks.py @@ -0,0 +1,100 @@ +import logging +from typing import Sequence, Tuple + +from sentry import features, quotas +from sentry.dynamic_sampling.models.adjustment_models import AdjustedModel +from sentry.dynamic_sampling.models.adjustment_models import DSProject as DSProject +from sentry.dynamic_sampling.prioritise_projects import fetch_projects_with_total_volumes +from sentry.dynamic_sampling.rules.helpers.prioritise_project import _generate_cache_key +from sentry.dynamic_sampling.rules.utils import OrganizationId, ProjectId, get_redis_client_for_ds +from sentry.models import Organization, Project +from sentry.tasks.base import instrumented_task +from sentry.tasks.relay import schedule_invalidate_project_config +from sentry.utils import metrics + +CHUNK_SIZE = 1000 +MAX_SECONDS = 60 +CACHE_KEY_TTL = 24 * 60 * 60 * 1000 + +logger = logging.getLogger(__name__) + + +@instrumented_task( + name="sentry.dynamic_sampling.tasks.prioritise_projects", + queue="dynamicsampling", + default_retry_delay=5, + max_retries=5, + soft_time_limit=2 * 60 * 60, # 2hours + time_limit=2 * 60 * 60 + 5, +) # type: ignore +def prioritise_projects() -> None: + metrics.incr("sentry.tasks.dynamic_sampling.prioritise_projects.start", sample_rate=1.0) + with metrics.timer("sentry.tasks.dynamic_sampling.prioritise_projects", sample_rate=1.0): + for org_id, projects_with_tx_count in fetch_projects_with_total_volumes().items(): + process_projects_sample_rates.delay(org_id, projects_with_tx_count) + + +@instrumented_task( + name="sentry.dynamic_sampling.process_projects_sample_rates", + queue="dynamicsampling", + default_retry_delay=5, + max_retries=5, + soft_time_limit=25 * 60, # 25 mins + time_limit=2 * 60 + 5, +) # type: ignore +def process_projects_sample_rates( + org_id: OrganizationId, projects_with_tx_count: Sequence[Tuple[ProjectId, int]] +) -> None: + """ + Takes a single org id and a list of project ids + """ + organization = Organization.objects.get_from_cache(id=org_id) + # Check if feature flag is enabled: + if features.has("organizations:ds-prioritise-by-project-bias", organization): + with metrics.timer("sentry.tasks.dynamic_sampling.process_projects_sample_rates.core"): + adjust_sample_rates(org_id, projects_with_tx_count) + + +def adjust_sample_rates( + org_id: int, projects_with_tx_count: Sequence[Tuple[ProjectId, int]] +) -> None: + """ + This function apply model and adjust sample rate per project in org + and store it in DS redis cluster, then we invalidate project config + so relay can reread it, and we'll inject it from redis cache. + """ + projects = [] + project_ids_with_counts = {} + for project_id, count_per_root in projects_with_tx_count: + project_ids_with_counts[project_id] = count_per_root + + for project in Project.objects.get_many_from_cache(project_ids_with_counts.keys()): + sample_rate = quotas.get_blended_sample_rate(project) + if sample_rate is None: + continue + projects.append( + DSProject( + id=project.id, + count_per_root=project_ids_with_counts[project.id], + blended_sample_rate=sample_rate, + ) + ) + + model = AdjustedModel(projects=projects) + ds_projects = model.adjust_sample_rates() + + redis_client = get_redis_client_for_ds() + with redis_client.pipeline(transaction=False) as pipeline: + for ds_project in ds_projects: + # hash, key, value + cache_key = _generate_cache_key(org_id=org_id) + pipeline.hset( + cache_key, + ds_project.id, + ds_project.new_sample_rate, # redis stores is as string + ) + pipeline.pexpire(cache_key, CACHE_KEY_TTL) + schedule_invalidate_project_config( + project_id=ds_project.id, trigger="dynamic_sampling_prioritise_project_bias" + ) + pipeline.execute() diff --git a/src/sentry/options/defaults.py b/src/sentry/options/defaults.py index 49722477d00cf1..afa19f16427a44 100644 --- a/src/sentry/options/defaults.py +++ b/src/sentry/options/defaults.py @@ -606,6 +606,7 @@ # System-wide options that observes latest releases on transactions and caches these values to be used later in # project config computation. This is temporary option to monitor the performance of this feature. register("dynamic-sampling:boost-latest-release", default=False) +register("dynamic-sampling.prioritise_projects.sample_rate", default=0.0) # Killswitch for deriving code mappings register("post_process.derive-code-mappings", default=True) diff --git a/src/sentry/sentry_metrics/indexer/__init__.py b/src/sentry/sentry_metrics/indexer/__init__.py index 1b325d4a6c00ba..56e7298564c65b 100644 --- a/src/sentry/sentry_metrics/indexer/__init__.py +++ b/src/sentry/sentry_metrics/indexer/__init__.py @@ -18,3 +18,4 @@ record = StringIndexer().record resolve = StringIndexer().resolve reverse_resolve = StringIndexer().reverse_resolve + resolve_shared_org = StringIndexer().resolve_shared_org diff --git a/src/sentry/sentry_metrics/indexer/base.py b/src/sentry/sentry_metrics/indexer/base.py index 24d980949bb3b9..1c99375bc04953 100644 --- a/src/sentry/sentry_metrics/indexer/base.py +++ b/src/sentry/sentry_metrics/indexer/base.py @@ -197,7 +197,14 @@ class StringIndexer(Service): Check `sentry.snuba.metrics` for convenience functions. """ - __all__ = ("record", "resolve", "reverse_resolve", "bulk_record") + __all__ = ( + "record", + "resolve", + "reverse_resolve", + "bulk_record", + "resolve_shared_org", + "reverse_shared_org_resolve", + ) def bulk_record( self, use_case_id: UseCaseKey, org_strings: Mapping[int, Set[str]] diff --git a/src/sentry/snuba/dataset.py b/src/sentry/snuba/dataset.py index 511e0eac50d9c6..fe8a9d67a3570f 100644 --- a/src/sentry/snuba/dataset.py +++ b/src/sentry/snuba/dataset.py @@ -29,4 +29,6 @@ class EntityKey(Enum): MetricsDistributions = "metrics_distributions" GenericMetricsDistributions = "generic_metrics_distributions" GenericMetricsSets = "generic_metrics_sets" + GenericMetricsCounters = "generic_metrics_counters" + GenericOrgMetricsCounters = "generic_org_metrics_counters" IssuePlatform = "search_issues" diff --git a/src/sentry/snuba/metrics/naming_layer/mri.py b/src/sentry/snuba/metrics/naming_layer/mri.py index ba1c784f5f3648..39f72f346eef13 100644 --- a/src/sentry/snuba/metrics/naming_layer/mri.py +++ b/src/sentry/snuba/metrics/naming_layer/mri.py @@ -83,6 +83,7 @@ class TransactionMRI(Enum): # Ingested USER = "s:transactions/user@none" DURATION = "d:transactions/duration@millisecond" + COUNT_PER_ROOT_PROJECT = "c:transactions/count_per_root_project@none" MEASUREMENTS_FCP = "d:transactions/measurements.fcp@millisecond" MEASUREMENTS_LCP = "d:transactions/measurements.lcp@millisecond" MEASUREMENTS_APP_START_COLD = "d:transactions/measurements.app_start_cold@millisecond" diff --git a/src/sentry/snuba/referrer.py b/src/sentry/snuba/referrer.py index cac27799ada2d3..cbbd4400291b6c 100644 --- a/src/sentry/snuba/referrer.py +++ b/src/sentry/snuba/referrer.py @@ -349,6 +349,9 @@ class ReferrerBase(Enum): DYNAMIC_SAMPLING_DISTRIBUTION_GET_MOST_RECENT_DAY_WITH_TRANSACTIONS = ( "dynamic-sampling.distribution.get-most-recent-day-with-transactions" ) + DYNAMIC_SAMPLING_DISTRIBUTION_FETCH_PROJECTS_WITH_COUNT_PER_ROOT = ( + "dynamic_sampling.distribution.fetch_projects_with_count_per_root_total_volumes" + ) EVENTSTORE_GET_EVENT_BY_ID_NODESTORE = "eventstore.get_event_by_id_nodestore" EVENTSTORE_GET_EVENTS = "eventstore.get_events" EVENTSTORE_GET_NEXT_OR_PREV_EVENT_ID = "eventstore.get_next_or_prev_event_id" diff --git a/tests/sentry/dynamic_sampling/test_adjusments_models.py b/tests/sentry/dynamic_sampling/test_adjusments_models.py new file mode 100644 index 00000000000000..49c7659de2bc2a --- /dev/null +++ b/tests/sentry/dynamic_sampling/test_adjusments_models.py @@ -0,0 +1,182 @@ +from operator import attrgetter + +import pytest + +from sentry.dynamic_sampling.models.adjustment_models import AdjustedModel +from sentry.dynamic_sampling.models.adjustment_models import DSProject as P + + +def test_adjust_sample_rates_org_with_no_projects(): + p = AdjustedModel(projects=[]) + assert p.adjust_sample_rates() == [] + + +def test_adjust_sample_rates_org_with_single_project(): + p = AdjustedModel(projects=[P(id=1, count_per_root=10, blended_sample_rate=0.4)]) + assert p.adjust_sample_rates() == [ + P(id=1, count_per_root=10, blended_sample_rate=0.4, new_sample_rate=0.4) + ] + + +def test_adjust_sample_rates_org_with_few_projects(): + projects = [ + P(id=1, count_per_root=9.0, blended_sample_rate=0.25), + P(id=2, count_per_root=7.0, blended_sample_rate=0.25), + P(id=3, count_per_root=3.0, blended_sample_rate=0.25), + P(id=4, count_per_root=1.0, blended_sample_rate=0.25), + ] + p = AdjustedModel(projects=projects) + + expected_projects = [ + P( + id=1, + count_per_root=9.0, + new_count_per_root=6.0, + blended_sample_rate=0.25, + new_sample_rate=pytest.approx(0.16666666666666666), + ), + P( + id=2, + count_per_root=7.0, + new_count_per_root=5.5, + blended_sample_rate=0.25, + new_sample_rate=pytest.approx(0.19642857142857142), + ), + P( + id=3, + count_per_root=3.0, + new_count_per_root=4.5, + blended_sample_rate=0.25, + new_sample_rate=0.375, + ), + P( + id=4, + count_per_root=1.0, + new_count_per_root=4.0, + blended_sample_rate=0.25, + new_sample_rate=1.0, + ), + ] + + assert p.adjust_sample_rates() == expected_projects + + +def test_adjust_sample_rates_org_with_even_num_projects(): + projects = [ + P(id=1, count_per_root=8.0, blended_sample_rate=0.25), + P(id=2, count_per_root=7.0, blended_sample_rate=0.25), + P(id=3, count_per_root=3.0, blended_sample_rate=0.25), + ] + p = AdjustedModel(projects=projects) + + expected_projects = [ + P( + id=1, + count_per_root=8.0, + new_count_per_root=5.0, + blended_sample_rate=0.25, + new_sample_rate=0.15625, + ), + P( + id=2, + count_per_root=7.0, + new_count_per_root=7.0, + blended_sample_rate=0.25, + new_sample_rate=0.0, + ), + P( + id=3, + count_per_root=3.0, + new_count_per_root=6.0, + blended_sample_rate=0.25, + new_sample_rate=0.5, + ), + ] + + assert p.adjust_sample_rates() == expected_projects + + +def test_adjust_sample_rates_org_with_same_counts_projects(): + projects = [ + P(id=1, count_per_root=9.0, blended_sample_rate=0.25), + P(id=2, count_per_root=6.0, blended_sample_rate=0.25), + P(id=3, count_per_root=6.0, blended_sample_rate=0.25), + P(id=4, count_per_root=1.0, blended_sample_rate=0.25), + ] + p = AdjustedModel(projects=projects) + + expected_projects = [ + P( + id=1, + count_per_root=9.0, + new_count_per_root=6.0, + blended_sample_rate=0.25, + new_sample_rate=pytest.approx(0.16666666666666666), + ), + P( + id=2, + count_per_root=6.0, + new_count_per_root=6.375, + blended_sample_rate=0.25, + new_sample_rate=0.265625, + ), + P( + id=3, + count_per_root=6.0, + new_count_per_root=5.625, + blended_sample_rate=0.25, + new_sample_rate=0.234375, + ), + P( + id=4, + count_per_root=1.0, + new_count_per_root=4.0, + blended_sample_rate=0.25, + new_sample_rate=1.0, + ), + ] + + assert p.adjust_sample_rates() == expected_projects + + +def test_adjust_sample_rates_org_with_counts_projects(): + projects = [ + P(id=1, count_per_root=2.0, blended_sample_rate=0.25), + P(id=2, count_per_root=10.0, blended_sample_rate=0.25), + P(id=3, count_per_root=10.0, blended_sample_rate=0.25), + P(id=4, count_per_root=10.0, blended_sample_rate=0.25), + ] + p = AdjustedModel(projects=projects) + + expected_projects = [ + P( + id=1, + count_per_root=2.0, + new_count_per_root=8.0, + blended_sample_rate=0.25, + new_sample_rate=1.0, + ), + P( + id=2, + count_per_root=10.0, + new_count_per_root=4.0, + blended_sample_rate=0.25, + new_sample_rate=0.1, + ), + P( + id=3, + count_per_root=10.0, + new_count_per_root=11.5, + blended_sample_rate=0.25, + new_sample_rate=0.2875, + ), + P( + id=4, + count_per_root=10.0, + new_count_per_root=8.5, + blended_sample_rate=0.25, + new_sample_rate=0.2125, + ), + ] + + assert sorted(p.adjust_sample_rates(), key=attrgetter("id")) == expected_projects diff --git a/tests/sentry/dynamic_sampling/test_generate_rules.py b/tests/sentry/dynamic_sampling/test_generate_rules.py index 71a9ea3143d306..9bcb6435c9ab22 100644 --- a/tests/sentry/dynamic_sampling/test_generate_rules.py +++ b/tests/sentry/dynamic_sampling/test_generate_rules.py @@ -86,18 +86,16 @@ def test_generate_rules_return_only_uniform_if_sample_rate_is_100_and_other_rule ) +@pytest.mark.django_db @patch("sentry.dynamic_sampling.rules.base.get_enabled_user_biases") @patch("sentry.dynamic_sampling.rules.base.quotas.get_blended_sample_rate") def test_generate_rules_return_uniform_rules_with_rate( - get_blended_sample_rate, get_enabled_user_biases + get_blended_sample_rate, get_enabled_user_biases, default_project ): # it means no enabled user biases get_enabled_user_biases.return_value = {} get_blended_sample_rate.return_value = 0.1 - # since we mock get_blended_sample_rate function - # no need to create real project in DB - fake_project = MagicMock() - assert generate_rules(fake_project) == [ + assert generate_rules(default_project) == [ { "active": True, "condition": {"inner": [], "op": "and"}, @@ -106,12 +104,11 @@ def test_generate_rules_return_uniform_rules_with_rate( "type": "trace", } ] - get_blended_sample_rate.assert_called_with(fake_project) get_enabled_user_biases.assert_called_with( - fake_project.get_option("sentry:dynamic_sampling_biases", None) + default_project.get_option("sentry:dynamic_sampling_biases", None) ) validate_sampling_configuration( - json.dumps({"rules": [], "rulesV2": generate_rules(fake_project)}) + json.dumps({"rules": [], "rulesV2": generate_rules(default_project)}) ) @@ -353,15 +350,22 @@ def test_generate_rules_return_uniform_rules_and_key_transaction_rule_with_many_ ) +@pytest.mark.django_db @patch("sentry.dynamic_sampling.rules.base.quotas.get_blended_sample_rate") def test_generate_rules_return_uniform_rule_with_100_rate_and_without_env_rule( - get_blended_sample_rate, + get_blended_sample_rate, default_project ): get_blended_sample_rate.return_value = 1.0 - # since we mock get_blended_sample_rate function - # no need to create real project in DB - fake_project = MagicMock() - assert generate_rules(fake_project) == [ + default_project.update_option( + "sentry:dynamic_sampling_biases", + [ + {"id": "boostEnvironments", "active": False}, + {"id": "ignoreHealthChecks", "active": False}, + {"id": "boostLatestRelease", "active": False}, + {"id": "boostKeyTransactions", "active": False}, + ], + ) + assert generate_rules(default_project) == [ { "active": True, "condition": {"inner": [], "op": "and"}, @@ -370,9 +374,8 @@ def test_generate_rules_return_uniform_rule_with_100_rate_and_without_env_rule( "type": "trace", }, ] - get_blended_sample_rate.assert_called_with(fake_project) validate_sampling_configuration( - json.dumps({"rules": [], "rulesV2": generate_rules(fake_project)}) + json.dumps({"rules": [], "rulesV2": generate_rules(default_project)}) ) diff --git a/tests/sentry/dynamic_sampling/test_prioritise_projects.py b/tests/sentry/dynamic_sampling/test_prioritise_projects.py new file mode 100644 index 00000000000000..5cf5f25dc6423d --- /dev/null +++ b/tests/sentry/dynamic_sampling/test_prioritise_projects.py @@ -0,0 +1,52 @@ +from datetime import timedelta + +from django.utils import timezone +from freezegun import freeze_time + +from sentry.dynamic_sampling.prioritise_projects import fetch_projects_with_total_volumes +from sentry.snuba.metrics import TransactionMRI +from sentry.testutils import BaseMetricsLayerTestCase, SnubaTestCase, TestCase + +MOCK_DATETIME = (timezone.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 +) + + +@freeze_time(MOCK_DATETIME) +class PrioritiseProjectsSnubaQueryTest(BaseMetricsLayerTestCase, TestCase, SnubaTestCase): + @property + def now(self): + return MOCK_DATETIME + + def test_simple_one_org_one_project(self): + org1 = self.create_organization("test-org") + p1 = self.create_project(organization=org1) + + self.store_performance_metric( + name=TransactionMRI.COUNT_PER_ROOT_PROJECT.value, + tags={"transaction": "foo_transaction"}, + hours_before_now=1, + value=1, + project_id=p1.id, + org_id=org1.id, + ) + with self.options({"dynamic-sampling.prioritise_projects.sample_rate": 1.0}): + results = fetch_projects_with_total_volumes() + assert results[org1.id] == [(p1.id, 1.0)] + + def test_simple_one_org_one_project_but_filtered_by_option(self): + org1 = self.create_organization("test-org2") + p1 = self.create_project(organization=org1) + + self.store_performance_metric( + name=TransactionMRI.COUNT_PER_ROOT_PROJECT.value, + tags={"transaction": "foo_transaction2"}, + hours_before_now=1, + value=1, + project_id=p1.id, + org_id=org1.id, + ) + with self.options({"dynamic-sampling.prioritise_projects.sample_rate": 0}): + results = fetch_projects_with_total_volumes() + # No data because rate is too small + assert results[org1.id] == [] diff --git a/tests/sentry/dynamic_sampling/test_tasks.py b/tests/sentry/dynamic_sampling/test_tasks.py new file mode 100644 index 00000000000000..28e675d60798f6 --- /dev/null +++ b/tests/sentry/dynamic_sampling/test_tasks.py @@ -0,0 +1,76 @@ +from datetime import timedelta +from unittest.mock import patch + +import pytest +from django.utils import timezone +from freezegun import freeze_time + +from sentry.dynamic_sampling import generate_rules +from sentry.dynamic_sampling.tasks import prioritise_projects +from sentry.snuba.metrics import TransactionMRI +from sentry.testutils import BaseMetricsLayerTestCase, SnubaTestCase, TestCase + +MOCK_DATETIME = (timezone.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 +) + + +@freeze_time(MOCK_DATETIME) +class TestPrioritiseProjectsTask(BaseMetricsLayerTestCase, TestCase, SnubaTestCase): + @property + def now(self): + return MOCK_DATETIME + + def create_project_and_add_metrics(self, name, count, org): + # Create 4 projects + proj = self.create_project(name=name, organization=org) + + # disable all biases + proj.update_option( + "sentry:dynamic_sampling_biases", + [ + {"id": "boostEnvironments", "active": False}, + {"id": "ignoreHealthChecks", "active": False}, + {"id": "boostLatestRelease", "active": False}, + {"id": "boostKeyTransactions", "active": False}, + ], + ) + # Store performance metrics for proj A + self.store_performance_metric( + name=TransactionMRI.COUNT_PER_ROOT_PROJECT.value, + tags={"transaction": "foo_transaction"}, + hours_before_now=1, + value=count, + project_id=proj.id, + org_id=org.id, + ) + return proj + + @patch("sentry.dynamic_sampling.rules.base.quotas.get_blended_sample_rate") + def test_prioritise_projects_simple(self, get_blended_sample_rate): + get_blended_sample_rate.return_value = 0.25 + # Create a org + test_org = self.create_organization(name="sample-org") + + # Create 4 projects + proj_a = self.create_project_and_add_metrics("a", 9, test_org) + proj_b = self.create_project_and_add_metrics("b", 7, test_org) + proj_c = self.create_project_and_add_metrics("c", 3, test_org) + proj_d = self.create_project_and_add_metrics("d", 1, test_org) + + with self.options({"dynamic-sampling.prioritise_projects.sample_rate": 1.0}): + with self.feature({"organizations:ds-prioritise-by-project-bias": True}): + with self.tasks(): + prioritise_projects() + + # we expect only uniform rule + assert generate_rules(proj_a)[0]["samplingValue"] == { + "type": "sampleRate", + "value": pytest.approx(0.16666666666666666), + } + assert generate_rules(proj_b)[0]["samplingValue"] == { + "type": "sampleRate", + "value": pytest.approx(0.19642857142857142), + } + assert generate_rules(proj_c)[0]["samplingValue"] == {"type": "sampleRate", "value": 0.375} + assert generate_rules(proj_d)[0]["samplingValue"] == {"type": "sampleRate", "value": 1.0}