Skip to content

Commit 0ee6c61

Browse files
committed
fix(backup): Fix query logic for dangling model exports
There are a small number of models that have no unambiguous direct connection to their relocation scope's root model - these are called "dangling" models. The key factor that defines them, and makes them difficult to handle, is that we cannot use our "query already exported foreign keys" filtering methodology to select only the models relevant to our export targets, because these models have no foreign keys that connect them back to the root of that target. For example, `TimeSeriesSnapshot` has no foreign keys at all, see: https://tinyurl.com/27z4x6tk. In cases like the one above, we ended up exporting ALL of the `TimeSeriesSnapshot`s in the database - clearly a very bad outcome when we only want to export those related to a specific org! A better approach is to define custom filtering logic for these models, thereby enabling them to use "adjacent" models in the model graph to select only models that we care about for a given export. In the example above, we query all `Incident`s filtered down by our previous exports to get a sneak-peek at the set of `IncidentSnapshot`s (a set that is currently empty due to going in reverse dependency order), then use that information to work backwards to grab the `TimeSeriesSnapshot`s we need. The upshot is that this commit introduces a generic method for constructing filtered queries for a specific model, the overridable `query_for_relocation_export`. Issue: getsentry/team-ospo#203
1 parent 11fcf1d commit 0ee6c61

File tree

13 files changed

+357
-74
lines changed

13 files changed

+357
-74
lines changed

fixtures/backup/model_dependencies/detailed.json

Lines changed: 229 additions & 0 deletions
Large diffs are not rendered by default.

fixtures/backup/model_dependencies/sorted.json

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
"sentry.file",
1414
"sentry.broadcast",
1515
"sentry.deletedorganization",
16-
"sentry.email",
1716
"sentry.organization",
1817
"sentry.organizationmapping",
1918
"sentry.identityprovider",
@@ -40,12 +39,10 @@
4039
"sentry.controltombstone",
4140
"sentry.projecttransactionthresholdoverride",
4241
"sentry.projecttransactionthreshold",
43-
"sentry.useremail",
4442
"sentry.userip",
4543
"sentry.userpermission",
4644
"sentry.userrole",
4745
"sentry.userroleuser",
48-
"sentry.timeseriessnapshot",
4946
"sentry.discoversavedquery",
5047
"sentry.monitor",
5148
"sentry.monitorlocation",
@@ -80,6 +77,7 @@
8077
"sentry.eventattachment",
8178
"sentry.environment",
8279
"sentry.environmentproject",
80+
"sentry.email",
8381
"sentry.customdynamicsamplingrule",
8482
"sentry.customdynamicsamplingruleproject",
8583
"sentry.distribution",
@@ -188,6 +186,7 @@
188186
"sentry.rulefirehistory",
189187
"sentry.servicehook",
190188
"sentry.teamreplica",
189+
"sentry.useremail",
191190
"sentry.userreport",
192191
"sentry.notificationaction",
193192
"sentry.alertrule",
@@ -213,9 +212,10 @@
213212
"sentry.dashboardwidgetquery",
214213
"sentry.sentryappavatar",
215214
"sentry.pendingincidentsnapshot",
216-
"sentry.incidentsnapshot",
215+
"sentry.timeseriessnapshot",
217216
"sentry.incidentactivity",
218217
"sentry.incidentsubscription",
219218
"sentry.incidenttrigger",
220-
"sentry.monitorincident"
219+
"sentry.monitorincident",
220+
"sentry.incidentsnapshot"
221221
]

fixtures/backup/model_dependencies/truncate.json

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
"sentry_file",
1414
"sentry_broadcast",
1515
"sentry_deletedorganization",
16-
"sentry_email",
1716
"sentry_organization",
1817
"sentry_organizationmapping",
1918
"sentry_identityprovider",
@@ -40,12 +39,10 @@
4039
"sentry_controltombstone",
4140
"sentry_projecttransactionthresholdoverride",
4241
"sentry_projecttransactionthreshold",
43-
"sentry_useremail",
4442
"sentry_userip",
4543
"sentry_userpermission",
4644
"sentry_userrole",
4745
"sentry_userrole_users",
48-
"sentry_timeseriessnapshot",
4946
"sentry_discoversavedquery",
5047
"sentry_monitor",
5148
"sentry_monitorlocation",
@@ -80,6 +77,7 @@
8077
"sentry_eventattachment",
8178
"sentry_environment",
8279
"sentry_environmentproject",
80+
"sentry_email",
8381
"sentry_customdynamicsamplingrule",
8482
"sentry_customdynamicsamplingruleproject",
8583
"sentry_distribution",
@@ -188,6 +186,7 @@
188186
"sentry_rulefirehistory",
189187
"sentry_servicehook",
190188
"sentry_teamreplica",
189+
"sentry_useremail",
191190
"sentry_userreport",
192191
"sentry_notificationaction",
193192
"sentry_alertrule",
@@ -213,9 +212,10 @@
213212
"sentry_dashboardwidgetquery",
214213
"sentry_sentryappavatar",
215214
"sentry_pendingincidentsnapshot",
216-
"sentry_incidentsnapshot",
215+
"sentry_timeseriessnapshot",
217216
"sentry_incidentactivity",
218217
"sentry_incidentsubscription",
219218
"sentry_incidenttrigger",
220-
"sentry_monitorincident"
219+
"sentry_monitorincident",
220+
"sentry_incidentsnapshot"
221221
]

src/sentry/backup/dependencies.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ class ModelRelations:
129129
dangling: Optional[bool]
130130
foreign_keys: dict[str, ForeignField]
131131
model: Type[models.base.Model]
132+
relocation_dependencies: set[Type[models.base.Model]]
132133
relocation_scope: RelocationScope | set[RelocationScope]
133134
silos: list[SiloMode]
134135
table_name: str
@@ -421,6 +422,8 @@ def dependencies() -> dict[NormalizedModelName, ModelRelations]:
421422
dangling=None,
422423
foreign_keys=foreign_keys,
423424
model=model,
425+
# We'll fill this in after the entire dictionary is populated.
426+
relocation_dependencies=set(),
424427
relocation_scope=getattr(model, "__relocation_scope__", RelocationScope.Excluded),
425428
silos=list(
426429
getattr(model._meta, "silo_limit", ModelSiloLimit(SiloMode.MONOLITH)).modes
@@ -491,9 +494,12 @@ def resolve_dangling(seen: Set[NormalizedModelName], model_name: NormalizedModel
491494
seen.remove(model_name)
492495
return model_relations.dangling
493496

494-
for model_name in model_dependencies_dict.keys():
495-
if model_name not in relocation_root_models:
496-
resolve_dangling(set(), model_name)
497+
for model_name, model_relations in model_dependencies_dict.items():
498+
resolve_dangling(set(), model_name)
499+
model_relations.relocation_dependencies = {
500+
model_dependencies_dict[NormalizedModelName(rd)].model
501+
for rd in getattr(model_relations.model, "__relocation_dependencies__", set())
502+
}
497503

498504
return model_dependencies_dict
499505

@@ -524,7 +530,7 @@ def sorted_dependencies() -> list[Type[models.base.Model]]:
524530
changed = False
525531
while model_dependencies_dict:
526532
model_deps = model_dependencies_dict.pop()
527-
deps = model_deps.flatten()
533+
deps = model_deps.flatten().union(model_deps.relocation_dependencies)
528534
model = model_deps.model
529535

530536
# If all of the models in the dependency list are either already

src/sentry/backup/exports.py

Lines changed: 16 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,9 @@ def _export(
5454
"""
5555

5656
# Import here to prevent circular module resolutions.
57-
from sentry.models.email import Email
5857
from sentry.models.organization import Organization
5958
from sentry.models.organizationmember import OrganizationMember
6059
from sentry.models.user import User
61-
from sentry.models.useremail import UserEmail
6260

6361
allowed_relocation_scopes = scope.value
6462
pk_map = PrimaryKeyMap()
@@ -67,32 +65,27 @@ def _export(
6765
filters = []
6866
if filter_by is not None:
6967
filters.append(filter_by)
70-
user_pks = []
71-
7268
if filter_by.model == Organization:
73-
org_pks = [o.pk for o in Organization.objects.filter(slug__in=filter_by.values)]
74-
user_pks = [
75-
o.user_id
76-
for o in OrganizationMember.objects.filter(organization_id__in=set(org_pks))
77-
]
78-
filters.append(Filter(User, "pk", set(user_pks)))
79-
elif filter_by.model == User:
80-
if filter_by.field == "pk":
81-
user_pks = [u.pk for u in User.objects.filter(id__in=filter_by.values)]
82-
elif filter_by.field == "username":
83-
user_pks = [u.pk for u in User.objects.filter(username__in=filter_by.values)]
84-
else:
69+
if filter_by.field not in {"pk", "id", "slug"}:
8570
raise ValueError(
86-
"Filter arguments must only apply to `User`'s `pk`' or `username` fields"
71+
"Filter arguments must only apply to `Organization`'s `slug` field"
72+
)
73+
74+
org_pks = set(
75+
Organization.objects.filter(slug__in=filter_by.values).values_list("id", flat=True)
76+
)
77+
user_pks = set(
78+
OrganizationMember.objects.filter(organization_id__in=org_pks).values_list(
79+
"user_id", flat=True
8780
)
81+
)
82+
filters.append(Filter(User, "pk", set(user_pks)))
83+
elif filter_by.model == User:
84+
if filter_by.field not in {"pk", "id", "username"}:
85+
raise ValueError("Filter arguments must only apply to `User`'s `username` field")
8886
else:
8987
raise ValueError("Filter arguments must only apply to `Organization` or `User` models")
9088

91-
# `Sentry.Email` models don't have any explicit dependencies on `Sentry.User`, so we need to
92-
# find them manually via `UserEmail`.
93-
emails = [ue.email for ue in UserEmail.objects.filter(user__in=user_pks)]
94-
filters.append(Filter(Email, "email", set(emails)))
95-
9689
def filter_objects(queryset_iterator):
9790
# Intercept each value from the queryset iterator, ensure that it has the correct relocation
9891
# scope and that all of its dependencies have already been exported. If they have, store it
@@ -128,17 +121,12 @@ def filter_objects(queryset_iterator):
128121

129122
def yield_objects():
130123
from sentry.db.models.base import BaseModel
131-
from sentry.models.team import Team
132-
133-
deps = dependencies()
134124

135125
# Collate the objects to be serialized.
136126
for model in sorted_dependencies():
137127
if not issubclass(model, BaseModel):
138128
continue
139129

140-
model_name = get_model_name(model)
141-
model_relations = deps[model_name]
142130
possible_relocation_scopes = model.get_possible_relocation_scopes()
143131
includable = possible_relocation_scopes & allowed_relocation_scopes
144132
if not includable or model._meta.proxy:
@@ -155,27 +143,7 @@ def yield_objects():
155143
if f.model == model:
156144
query[f.field + "__in"] = f.values
157145
q &= Q(**query)
158-
159-
# TODO: actor refactor. Remove this conditional. For now, we do no filtering on
160-
# teams.
161-
if model_name != get_model_name(Team):
162-
# Create a filter for each possible FK reference to constrain the amount of data
163-
# being sent over from the database. We only want models where every FK field
164-
# references into a model whose PK we've already exported (or `NULL`, if the FK
165-
# field is nullable).
166-
for field_name, foreign_field in model_relations.foreign_keys.items():
167-
foreign_field_model_name = get_model_name(foreign_field.model)
168-
matched_fks = set(pk_map.get_pks(foreign_field_model_name))
169-
matched_fks_query = dict()
170-
if len(matched_fks) > 0:
171-
matched_fks_query[field_name + "__in"] = matched_fks
172-
173-
if foreign_field.nullable:
174-
match_on_null_query = dict()
175-
match_on_null_query[field_name + "__isnull"] = True
176-
q &= Q(**matched_fks_query) | Q(**match_on_null_query)
177-
else:
178-
q &= Q(**matched_fks_query)
146+
q = model.query_for_relocation_export(q, pk_map)
179147

180148
pk_name = model._meta.pk.name # type: ignore
181149
queryset = model._base_manager.filter(q).order_by(pk_name)

src/sentry/db/models/base.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ class Meta:
4848
abstract = True
4949

5050
__relocation_scope__: RelocationScope | set[RelocationScope]
51+
__relocation_dependencies__: set[str]
5152

5253
objects: BaseManager[Self] = BaseManager()
5354

@@ -134,6 +135,32 @@ def get_possible_relocation_scopes(cls) -> set[RelocationScope]:
134135
else {cls.__relocation_scope__}
135136
)
136137

138+
@classmethod
139+
def query_for_relocation_export(cls, q: models.Q, pk_map: PrimaryKeyMap) -> models.Q:
140+
""" """
141+
142+
model_name = get_model_name(cls)
143+
model_relations = dependencies()[model_name]
144+
145+
# Create a filter for each possible FK reference to constrain the amount of data being sent
146+
# over from the database. We only want models where every FK field references into a model
147+
# whose PK we've already exported (or `NULL`, if the FK field is nullable).
148+
for field_name, foreign_field in model_relations.foreign_keys.items():
149+
foreign_field_model_name = get_model_name(foreign_field.model)
150+
matched_fks = set(pk_map.get_pks(foreign_field_model_name))
151+
matched_fks_query = dict()
152+
if len(matched_fks) > 0:
153+
matched_fks_query[field_name + "__in"] = matched_fks
154+
155+
if foreign_field.nullable:
156+
match_on_null_query = dict()
157+
match_on_null_query[field_name + "__isnull"] = True
158+
q &= models.Q(**matched_fks_query) | models.Q(**match_on_null_query)
159+
else:
160+
q &= models.Q(**matched_fks_query)
161+
162+
return q
163+
137164
def normalize_before_relocation_import(
138165
self, pk_map: PrimaryKeyMap, _s: ImportScope, _f: ImportFlags
139166
) -> int | None:

src/sentry/incidents/models.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from django.db.models.signals import post_delete, post_save
1212
from django.utils import timezone
1313

14-
from sentry.backup.dependencies import PrimaryKeyMap
14+
from sentry.backup.dependencies import PrimaryKeyMap, get_model_name
1515
from sentry.backup.helpers import ImportFlags
1616
from sentry.backup.scopes import ImportScope, RelocationScope
1717
from sentry.db.models import (
@@ -260,6 +260,7 @@ class Meta:
260260
@region_silo_only_model
261261
class TimeSeriesSnapshot(Model):
262262
__relocation_scope__ = RelocationScope.Organization
263+
__relocation_dependencies__ = {"sentry.Incident"}
263264

264265
start = models.DateTimeField()
265266
end = models.DateTimeField()
@@ -271,6 +272,14 @@ class Meta:
271272
app_label = "sentry"
272273
db_table = "sentry_timeseriessnapshot"
273274

275+
@classmethod
276+
def query_for_relocation_export(cls, q: models.Q, pk_map: PrimaryKeyMap) -> models.Q:
277+
pks = IncidentSnapshot.objects.filter(
278+
incident__in=pk_map.get_pks(get_model_name(Incident))
279+
).values_list("event_stats_snapshot_id", flat=True)
280+
281+
return q & models.Q(pk__in=pks)
282+
274283

275284
class IncidentActivityType(Enum):
276285
CREATED = 1

src/sentry/models/actor.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from django.forms import model_to_dict
1111
from rest_framework import serializers
1212

13-
from sentry.backup.dependencies import ImportKind
13+
from sentry.backup.dependencies import ImportKind, PrimaryKeyMap
1414
from sentry.backup.helpers import ImportFlags
1515
from sentry.backup.scopes import ImportScope, RelocationScope
1616
from sentry.db.models import Model, region_silo_only_model
@@ -144,6 +144,13 @@ def get_actor_identifier(self):
144144
# essentially forwards request to ActorTuple.get_actor_identifier
145145
return self.get_actor_tuple().get_actor_identifier()
146146

147+
@classmethod
148+
def query_for_relocation_export(cls, q: models.Q, pk_map: PrimaryKeyMap) -> models.Q:
149+
# Actors that can have both their `user` and `team` value set to null. Exclude such actors # from the export.
150+
q = super().query_for_relocation_export(q, pk_map)
151+
152+
return q & ~models.Q(team__isnull=True, user_id__isnull=True)
153+
147154
# TODO(hybrid-cloud): actor refactor. Remove this method when done.
148155
def write_relocation_import(
149156
self, scope: ImportScope, flags: ImportFlags

src/sentry/models/email.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from django.utils import timezone
88
from django.utils.translation import gettext_lazy as _
99

10-
from sentry.backup.dependencies import ImportKind
10+
from sentry.backup.dependencies import ImportKind, PrimaryKeyMap, get_model_name
1111
from sentry.backup.helpers import ImportFlags
1212
from sentry.backup.scopes import ImportScope, RelocationScope
1313
from sentry.db.models import CIEmailField, Model, control_silo_only_model, sane_repr
@@ -21,6 +21,7 @@ class Email(Model):
2121
"""
2222

2323
__relocation_scope__ = RelocationScope.User
24+
__relocation_dependencies__ = {"sentry.User"}
2425

2526
email = CIEmailField(_("email address"), unique=True, max_length=75)
2627
date_added = models.DateTimeField(default=timezone.now)
@@ -31,6 +32,19 @@ class Meta:
3132

3233
__repr__ = sane_repr("email")
3334

35+
@classmethod
36+
def query_for_relocation_export(cls, q: models.Q, pk_map: PrimaryKeyMap) -> models.Q:
37+
from sentry.models.user import User
38+
from sentry.models.useremail import UserEmail
39+
40+
# `Sentry.Email` models don't have any explicit dependencies on `Sentry.User`, so we need to
41+
# find them manually via `UserEmail`.
42+
emails = UserEmail.objects.filter(
43+
user_id__in=pk_map.get_pks(get_model_name(User))
44+
).values_list("email", flat=True)
45+
46+
return q & models.Q(email__in=emails)
47+
3448
def write_relocation_import(
3549
self, _s: ImportScope, _f: ImportFlags
3650
) -> Optional[Tuple[int, ImportKind]]:

src/sentry/models/team.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,12 @@ def get_member_actor_ids(self):
336336

337337
return owner_ids
338338

339+
# TODO(hybrid-cloud): actor refactor. Remove this method when done. For now, we do no filtering
340+
# on teams.
341+
@classmethod
342+
def query_for_relocation_export(cls, q: models.Q, _: PrimaryKeyMap) -> models.Q:
343+
return q
344+
339345
# TODO(hybrid-cloud): actor refactor. Remove this method when done.
340346
def normalize_before_relocation_import(
341347
self, pk_map: PrimaryKeyMap, scope: ImportScope, flags: ImportFlags

0 commit comments

Comments
 (0)