diff --git a/specifyweb/frontend/js_src/lib/components/DataModel/resourceApi.ts b/specifyweb/frontend/js_src/lib/components/DataModel/resourceApi.ts index 5b86e18681d..5d903228d79 100644 --- a/specifyweb/frontend/js_src/lib/components/DataModel/resourceApi.ts +++ b/specifyweb/frontend/js_src/lib/components/DataModel/resourceApi.ts @@ -69,7 +69,11 @@ function eventHandlerForToMany(related, field) { switch (event) { case 'saverequired': { this.handleChanged(); - if (related.models?.[0]?.specifyTable?.name !== 'CollectionRelationship') {this.trigger.apply(this, args)} + if ( + related.models?.[0]?.specifyTable?.name !== 'CollectionRelationship' + ) { + this.trigger.apply(this, args); + } break; } case 'change': diff --git a/specifyweb/specify/geo_time.py b/specifyweb/specify/geo_time.py index c9831ef886e..406a27043d6 100644 --- a/specifyweb/specify/geo_time.py +++ b/specifyweb/specify/geo_time.py @@ -1,8 +1,12 @@ +import logging +import os from typing import List, Set +from django.db import connection from django.db.models import Case, FloatField, F, Q, Value, When from django.db.models.functions import Coalesce, Greatest, Least, Cast -from sqlalchemy import func, literal, or_, and_, exists +from sqlalchemy import select, union_all, func, cast, DECIMAL, case, or_, and_, String, join from sqlalchemy.orm import aliased +from decimal import Decimal from specifyweb.specify.models import ( Absoluteage, @@ -10,10 +14,22 @@ Geologictimeperiod, Collectionobject, Paleocontext, - Collectingevent, +) +from specifyweb.specify.utils import log_sqlalchemy_query +from specifyweb.stored_queries.models import ( + AbsoluteAge, + RelativeAge, + GeologicTimePeriod, + CollectionObject, + PaleoContext, + CollectingEvent, Locality, ) -from specifyweb.stored_queries import models as sq_models + +logger = logging.getLogger(__name__) + +GEO_TIME_QUERY_IMPLEMENTATION = os.getenv('GEO_TIME_QUERY_IMPLEMENTATION', 'sqlalchemy') # 'django' or 'sqlalchemy' +GEO_TIME_QUERY_SQL_TYPE = os.getenv('GEO_TIME_QUERY_SQL_TYPE', 'meta') # 'modify' or 'raw', or 'filter', or 'meta' # Table paths from CollectionObject to Absoluteage or GeologicTimePeriod: # - collectionobject->absoluteage @@ -335,19 +351,19 @@ def query_co_in_time_range_with_joins( end_time = float(end_time) # Build the absolute age filters - absolute_start_filter = sq_models.Absoluteage.absoluteAge >= ( - start_time - sq_models.Absoluteage.ageUncertainty + absolute_start_filter = Absoluteage.absoluteAge >= ( + start_time - Absoluteage.ageUncertainty ) - absolute_end_filter = sq_models.Absoluteage.absoluteAge <= ( - end_time + sq_models.Absoluteage.ageUncertainty + absolute_end_filter = Absoluteage.absoluteAge <= ( + end_time + Absoluteage.ageUncertainty ) # Build the geologic time period filters - chrono_start_filter = sq_models.GeologicTimePeriod.startPeriod >= ( - start_time - sq_models.GeologicTimePeriod.startUncertainty + chrono_start_filter = GeologicTimePeriod.startPeriod >= ( + start_time - GeologicTimePeriod.startUncertainty ) - chrono_end_filter = sq_models.GeologicTimePeriod.endPeriod <= ( - end_time + sq_models.GeologicTimePeriod.endUncertainty + chrono_end_filter = GeologicTimePeriod.endPeriod <= ( + end_time + GeologicTimePeriod.endUncertainty ) if require_full_overlap: @@ -359,61 +375,61 @@ def query_co_in_time_range_with_joins( # AbsoluteAge query absolute_query = query.join( - sq_models.Absoluteage, - sq_models.CollectionObject.collectionObjectId == sq_models.Absoluteage.collectionObjectId, + Absoluteage, + CollectionObject.collectionObjectId == Absoluteage.collectionObjectId, ).filter(absolute_overlap_filter) # RelativeAge query chrono_query = query.join( - sq_models.RelativeAge, - sq_models.CollectionObject.collectionObjectId == sq_models.RelativeAge.collectionObjectId, + RelativeAge, + CollectionObject.collectionObjectId == RelativeAge.collectionObjectId, ).join( - sq_models.GeologicTimePeriod, - sq_models.RelativeAge.ageNameId == sq_models.GeologicTimePeriod.geologicTimePeriodId, + GeologicTimePeriod, + RelativeAge.ageNameId == GeologicTimePeriod.geologicTimePeriodId, ).filter(chrono_overlap_filter) # PaleoContext via CollectionObject paleocontext_query1 = query.join( - sq_models.PaleoContext, - sq_models.CollectionObject.paleoContextId == sq_models.PaleoContext.paleoContextId, + PaleoContext, + CollectionObject.paleoContextId == PaleoContext.paleoContextId, isouter=True, ).join( - sq_models.GeologicTimePeriod, - sq_models.PaleoContext.chronosStratId == sq_models.GeologicTimePeriod.geologicTimePeriodId, + GeologicTimePeriod, + PaleoContext.chronosStratId == GeologicTimePeriod.geologicTimePeriodId, isouter=True, ).filter(chrono_overlap_filter) # PaleoContext via CollectingEvent paleocontext_query2 = query.join( - sq_models.CollectingEvent, - sq_models.CollectionObject.collectingEventId == sq_models.CollectingEvent.collectingEventId, + CollectingEvent, + CollectionObject.collectingEventId == CollectingEvent.collectingEventId, isouter=True, ).join( - sq_models.PaleoContext, - sq_models.CollectingEvent.paleoContextId == sq_models.PaleoContext.paleoContextId, + PaleoContext, + CollectingEvent.paleoContextId == PaleoContext.paleoContextId, isouter=True, ).join( - sq_models.GeologicTimePeriod, - sq_models.PaleoContext.chronosStratId == sq_models.GeologicTimePeriod.geologicTimePeriodId, + GeologicTimePeriod, + PaleoContext.chronosStratId == GeologicTimePeriod.geologicTimePeriodId, isouter=True, ).filter(chrono_overlap_filter) # PaleoContext via CollectingEvent's Locality paleocontext_query3 = query.join( - sq_models.CollectingEvent, - sq_models.CollectionObject.collectingEventId == sq_models.CollectingEvent.collectingEventId, + CollectingEvent, + CollectionObject.collectingEventId == CollectingEvent.collectingEventId, isouter=True, ).join( - sq_models.Locality, - sq_models.CollectingEvent.localityId == sq_models.Locality.localityId, + Locality, + CollectingEvent.localityId == Locality.localityId, isouter=True, ).join( - sq_models.PaleoContext, - sq_models.Locality.paleoContextId == sq_models.PaleoContext.paleoContextId, + PaleoContext, + Locality.paleoContextId == PaleoContext.paleoContextId, isouter=True, ).join( - sq_models.GeologicTimePeriod, - sq_models.PaleoContext.chronosStratId == sq_models.GeologicTimePeriod.geologicTimePeriodId, + GeologicTimePeriod, + PaleoContext.chronosStratId == GeologicTimePeriod.geologicTimePeriodId, isouter=True, ).filter(chrono_overlap_filter) @@ -439,9 +455,7 @@ def query_co_in_time_margin( end_time = time - uncertainty return query_co_in_time_range_with_joins(query, start_time, end_time, require_full_overlap) -def query_co_in_time_period( - query, time_period_name: str, require_full_overlap: bool = False -): +def query_co_in_time_period(query, time_period_name: str, require_full_overlap: bool = False): """ Modify the given SQLAlchemy query to include filters that select collection objects overlapping with the given geologic time period. @@ -452,7 +466,7 @@ def query_co_in_time_period( :return: A new query with the additional filters applied. """ time_period = ( - sq_models.GeologicTimePeriod.query.filter_by(name=time_period_name).first() + GeologicTimePeriod.query.filter_by(name=time_period_name).first() ) if not time_period: return query.filter(False) # Returns an empty query @@ -461,120 +475,510 @@ def query_co_in_time_period( end_time = time_period.endPeriod return query_co_in_time_range_with_joins(query, start_time, end_time, require_full_overlap) -def query_co_in_time_range(query, start_time, end_time, require_full_overlap=False, session=None): +def modify_query_add_age_range(query, start_time: float, end_time: float, require_full_overlap: bool = False): """ - Filter the given SQLAlchemy query of CollectionObject to include only those that overlap with the given time range. - - :param query: An SQLAlchemy query on CollectionObject. - :param start_time: The start time of the range. - :param end_time: The end time of the range. - :param require_full_overlap: If True, only collections that fully overlap with the range are returned, otherwise partial overlap is used. - :param session: The SQLAlchemy session. - :return: A filtered SQLAlchemy query. + Given an existing SQLAlchemy query whose base entity is CollectionObject, + this function adds an inner join to an aggregated subquery that computes, + for each collection object (by its CollectionObjectID), the minimum end period and + maximum start period (aggregated from three sources: AbsoluteAge, RelativeAge, and PaleoContext). """ - # Validate time range - if start_time > end_time: - raise ValueError("start_time must be less than or equal to end_time") + # Helper functions to build SQL expressions for the relative and paleo subqueries + def build_relative_expr(is_start, a, aend, r): + """ + Build the start (if is_start is True) or end expression for the RelativeAge subquery. + When require_full_overlap is True, the expression uses uncertainty adjustments in one direction, + and when False, in the opposite direction. + """ + if require_full_overlap: + if is_start: + base_expr = ( + cast(a.startPeriod, DECIMAL(10, 6)) + - func.coalesce(a.startUncertainty, 0) + - func.coalesce(r.ageUncertainty, 0) + ) + alt_expr = ( + cast(aend.startPeriod, DECIMAL(10, 6)) + - func.coalesce(aend.startUncertainty, 0) + - func.coalesce(r.ageUncertainty, 0) + ) + expr_func = func.greatest + else: + base_expr = ( + cast(a.endPeriod, DECIMAL(10, 6)) + + func.coalesce(a.endUncertainty, 0) + + func.coalesce(r.ageUncertainty, 0) + ) + alt_expr = ( + cast(aend.endPeriod, DECIMAL(10, 6)) + + func.coalesce(aend.endUncertainty, 0) + + func.coalesce(r.ageUncertainty, 0) + ) + expr_func = func.least + else: + if is_start: + base_expr = ( + cast(a.startPeriod, DECIMAL(10, 6)) + + func.coalesce(a.startUncertainty, 0) + + func.coalesce(r.ageUncertainty, 0) + ) + alt_expr = ( + cast(aend.startPeriod, DECIMAL(10, 6)) + + func.coalesce(aend.startUncertainty, 0) + + func.coalesce(r.ageUncertainty, 0) + ) + expr_func = func.greatest + else: + base_expr = ( + cast(a.endPeriod, DECIMAL(10, 6)) + - func.coalesce(a.endUncertainty, 0) + - func.coalesce(r.ageUncertainty, 0) + ) + alt_expr = ( + cast(aend.endPeriod, DECIMAL(10, 6)) + - func.coalesce(aend.endUncertainty, 0) + - func.coalesce(r.ageUncertainty, 0) + ) + expr_func = func.least + return case( + [(r.AgeNameEndID != None, expr_func(base_expr, alt_expr))], + else_=base_expr + ) - # Build filters for Absoluteage - absolute_start_filter = Absoluteage.absoluteage >= (start_time + Absoluteage.ageuncertainty) - absolute_end_filter = Absoluteage.absoluteage <= (end_time - Absoluteage.ageuncertainty) + def build_paleo_expr(is_start, cs, csend, p): + """ + Build the start (if is_start is True) or end expression for the PaleoContext subquery. + The uncertainty adjustment and use of greatest/least depend on require_full_overlap. + """ + if require_full_overlap: + if is_start: + base_expr = cast(cs.startPeriod, DECIMAL(10, 6)) - func.coalesce(cs.startUncertainty, 0) + alt_expr = cast(csend.startPeriod, DECIMAL(10, 6)) - func.coalesce(csend.startUncertainty, 0) + expr_func = func.greatest + else: + base_expr = cast(cs.endPeriod, DECIMAL(10, 6)) + func.coalesce(cs.endUncertainty, 0) + alt_expr = cast(csend.endPeriod, DECIMAL(10, 6)) + func.coalesce(csend.endUncertainty, 0) + expr_func = func.least + else: + if is_start: + base_expr = cast(cs.startPeriod, DECIMAL(10, 6)) + func.coalesce(cs.startUncertainty, 0) + alt_expr = cast(csend.startPeriod, DECIMAL(10, 6)) + func.coalesce(csend.startUncertainty, 0) + expr_func = func.least + else: + base_expr = cast(cs.endPeriod, DECIMAL(10, 6)) - func.coalesce(cs.endUncertainty, 0) + alt_expr = cast(csend.endPeriod, DECIMAL(10, 6)) - func.coalesce(csend.endUncertainty, 0) + expr_func = func.greatest + return case([(p.ChronosStratEndID != None, expr_func(base_expr, alt_expr))], else_=base_expr) + + # Build the AbsoluteAge subquery + abs_sel = select([ + AbsoluteAge.CollectionObjectID.label("coid"), + ( + cast(AbsoluteAge.absoluteAge, DECIMAL(10, 6)) + - func.coalesce(AbsoluteAge.ageUncertainty, 0) + ).label("startperiod"), + ( + cast(AbsoluteAge.absoluteAge, DECIMAL(10, 6)) + + func.coalesce(AbsoluteAge.ageUncertainty, 0) + ).label("endperiod") + ]).where( + and_( + (cast(AbsoluteAge.absoluteAge, DECIMAL(10, 6)) - func.coalesce(AbsoluteAge.ageUncertainty, 0)) <= start_time, + (cast(AbsoluteAge.absoluteAge, DECIMAL(10, 6)) + func.coalesce(AbsoluteAge.ageUncertainty, 0)) >= end_time + ) + ) - if require_full_overlap: - absolute_overlap_filter = and_(absolute_start_filter, absolute_end_filter) - else: - absolute_overlap_filter = or_(absolute_start_filter, absolute_end_filter) + # Build the RelativeAge subquery + ra = aliased(RelativeAge, name="ra") + csa = aliased(GeologicTimePeriod, name="csa") + csaend = aliased(GeologicTimePeriod, name="csaend") - # Query Absoluteage to get collectionobject_ids - absolute_co_ids_subquery = ( - session.query(Absoluteage.collectionobject_id) - .filter(absolute_overlap_filter) - ).subquery() + rel_start_expr = build_relative_expr(is_start=True, a=csa, aend=csaend, r=ra) + rel_end_expr = build_relative_expr(is_start=False, a=csa, aend=csaend, r=ra) - # Build filters for Geologictimeperiod - chrono_start_filter = Geologictimeperiod.startperiod >= (start_time + Geologictimeperiod.startuncertainty) - chrono_end_filter = Geologictimeperiod.endperiod <= (end_time - Geologictimeperiod.enduncertainty) + rel_join = join( + ra, csa, ra.AgeNameID == csa.geologicTimePeriodId + ).outerjoin( + csaend, ra.AgeNameEndID == csaend.geologicTimePeriodId + ) + rel_sel = select([ + ra.CollectionObjectID.label("coid"), + rel_start_expr.label("startperiod"), + rel_end_expr.label("endperiod") + ]).select_from(rel_join).where( + and_( + csa.startPeriod != None, + csa.endPeriod != None, + csa.startPeriod >= csa.endPeriod, + or_( + ra.AgeNameEndID == None, + and_( + csaend.startPeriod != None, + csaend.endPeriod != None, + csaend.startPeriod >= csaend.endPeriod + ) + ), + rel_start_expr <= start_time, + rel_end_expr >= end_time + ) + ) - if require_full_overlap: - chrono_overlap_filter = and_(chrono_start_filter, chrono_end_filter) - else: - chrono_overlap_filter = or_(chrono_start_filter, chrono_end_filter) + # Build the PaleoContext subquery + co = aliased(CollectionObject, name="co") + ce = aliased(CollectingEvent, name="ce") + loc = aliased(Locality, name="loc") + pc = aliased(PaleoContext, name="pc") + cs = aliased(GeologicTimePeriod, name="cs") + csend = aliased(GeologicTimePeriod, name="csend") - # Get collectionobject_ids via Agename - # Assuming Agename has a relationship to CollectionObject - relative_agename_co_ids_subquery = ( - session.query(sq_models.Agename.collectionobject_id) - .join(Geologictimeperiod, sq_models.Agename.geologictimeperiod_id == Geologictimeperiod.id) - .filter(chrono_overlap_filter) - ).subquery() - - # Get collectionobject_ids via Agenameend - relative_agenameend_co_ids_subquery = ( - session.query(sq_models.Agenameend.collectionobject_id) - .join(Geologictimeperiod, sq_models.Agenameend.geologictimeperiod_id == Geologictimeperiod.id) - .filter(chrono_overlap_filter) - ).subquery() - - # Union of the two - relative_age_co_ids_subquery = ( - session.query(relative_agename_co_ids_subquery.c.collectionobject_id) - .union( - session.query(relative_agenameend_co_ids_subquery.c.collectionobject_id) - ) - ).subquery() + paleo_start_expr = build_paleo_expr(is_start=True, cs=cs, csend=csend, p=pc) + paleo_end_expr = build_paleo_expr(is_start=False, cs=cs, csend=csend, p=pc) - # Build filters for Paleocontext - paleocontext_start_filter = or_( - Paleocontext.startperiod >= (start_time + Paleocontext.startuncertainty), - sq_models.Paleocontextend.startperiod >= (start_time + sq_models.Paleocontextend.startuncertainty) + join_structure = join( + co, ce, co.CollectingEventID == ce.collectingEventId, isouter=True ) - - paleocontext_end_filter = or_( - Paleocontext.endperiod <= (end_time - Paleocontext.enduncertainty), - sq_models.Paleocontextend.endperiod <= (end_time - sq_models.Paleocontextend.enduncertainty) + join_structure = join( + join_structure, loc, ce.LocalityID == loc.localityId, isouter=True + ) + join_structure = join( + join_structure, + pc, + or_( + co.PaleoContextID == pc.paleoContextId, + ce.PaleoContextID == pc.paleoContextId, + loc.PaleoContextID == pc.paleoContextId + ), + isouter=True + ) + join_structure = join( + join_structure, cs, pc.ChronosStratID == cs.geologicTimePeriodId, isouter=True + ) + join_structure = join( + join_structure, csend, pc.ChronosStratEndID == csend.geologicTimePeriodId, isouter=True ) + paleo_sel = select([ + co.collectionObjectId.label("coid"), + paleo_start_expr.label("startperiod"), + paleo_end_expr.label("endperiod") + ]).select_from(join_structure).where( + and_( + pc.paleoContextId != None, + cs.startPeriod != None, + cs.endPeriod != None, + cs.startPeriod >= cs.endPeriod, + or_( + pc.ChronosStratEndID == None, + and_( + csend.startPeriod != None, + csend.endPeriod != None, + csend.startPeriod >= csend.endPeriod + ) + ), + paleo_start_expr <= start_time, + paleo_end_expr >= end_time + ) + ).distinct() + + # Union the three subqueries and aggregate + union_subq = union_all(abs_sel, rel_sel, paleo_sel).alias("unioned") + agg_subq = select([ + union_subq.c.coid, + func.min(union_subq.c.endperiod).label("min_end_period"), + func.max(union_subq.c.startperiod).label("max_start_period") + ]).group_by(union_subq.c.coid).alias("agg_subq") + + # Build the formatted "age" column expression. + age_expr = func.concat_ws( + " - ", + func.ifnull(func.regexp_replace(cast(agg_subq.c.max_start_period, String), "\\.(0+)$", ""), ""), + func.ifnull(func.regexp_replace(cast(agg_subq.c.min_end_period, String), "\\.(0+)$", ""), "") + ).label("age") + + # Modify the incoming query by joining the aggregated subquery + base_entity = query.column_descriptions[0]["entity"] # The base entity is CollectionObject + new_query = query.join(agg_subq, base_entity.collectionObjectId == agg_subq.c.coid) + new_query = new_query.add_columns(age_expr) + return new_query + +def geo_time_query(start_time: float, end_time: float, require_full_overlap: bool = False, query = None): + """ + Search for collection object IDs that overlap with the given time range. + Based on settings, choose the appropriate implementation. + + :param start_time: The start time (older time) of the range. + :param end_time: The end time (younger time) of the range. + :param require_full_overlap: If True, only collections that fully overlap with the range are returned. + :param query: The existing SQLAlchemy query on CollectionObject. + :return: A new query with the additional filters applied. + """ + if GEO_TIME_QUERY_IMPLEMENTATION == 'django': + return search_co_ids_in_time_range(start_time, end_time, require_full_overlap) + elif GEO_TIME_QUERY_IMPLEMENTATION == 'sqlalchemy': + if GEO_TIME_QUERY_SQL_TYPE == 'modify': + return modify_query_add_age_range(query, start_time, end_time, require_full_overlap) + elif GEO_TIME_QUERY_SQL_TYPE == 'meta': + return modify_query_add_meta_age_range(query, start_time, end_time, require_full_overlap) + elif GEO_TIME_QUERY_SQL_TYPE == 'filter': + return query_co_in_time_range_with_joins(query, start_time, end_time, require_full_overlap) + +def geo_time_period_query(time_period_name: str, require_full_overlap: bool = False, query = None): + """ + Query for collection object IDs that overlap with the given geologic time period + + :param time_period_name: The name of the time period. + :param require_full_overlap: If True, only collections that fully overlap with the range are returned. + :param query: The existing SQLAlchemy query on CollectionObject. + :return: A new query with the additional filters applied. + """ + time_period = Geologictimeperiod.objects.filter(name=time_period_name).first() + if not time_period: + return set() + start_time = time_period.startperiod + end_time = time_period.endperiod + # start_time += Decimal(time_period.startuncertainty) if time_period.startuncertainty else Decimal('0.1') + # end_time += Decimal(time_period.enduncertainty) if time_period.enduncertainty else Decimal('0.1') + if start_time is None: + start_time = 13800 # max start time, 13800 is the age of the Universe + if end_time is None: + end_time = 0 + + return geo_time_query(start_time, end_time, require_full_overlap, query) + +def modify_query_add_meta_age_range(query, start_time, end_time, require_full_overlap=False): + """ + Given an existing SQLAlchemy query (whose base is CollectionObject), + add an inner join to an aggregated subquery that calculates, per CollectionObject, + the maximum start period and minimum end period (with their uncertainties) + from nine different age sources, and then applies an age range filter. + """ + aa = aliased(AbsoluteAge, name="aa") + ra = aliased(RelativeAge, name="ra") + co = aliased(CollectionObject, name="co") + ce = aliased(CollectingEvent, name="ce") + loc = aliased(Locality, name="loc") + pc = aliased(PaleoContext, name="pc") + gtp_ra = aliased(GeologicTimePeriod, name="gtp_ra_agename") + gtp_ra_end = aliased(GeologicTimePeriod, name="gtp_ra_agenameend") + gtp_pc = aliased(GeologicTimePeriod, name="gtp_pc_chronostrat") + gtp_pc_end = aliased(GeologicTimePeriod, name="gtp_pc_chronostratend") + gtp_ce = aliased(GeologicTimePeriod, name="gtp_ce_pc_chronostrat") + gtp_ce_end = aliased(GeologicTimePeriod, name="gtp_ce_pc_chronostratend") + gtp_ce_loc = aliased(GeologicTimePeriod, name="gtp_ce_loc_pc_chronostrat") + gtp_ce_loc_end = aliased(GeologicTimePeriod, name="gtp_ce_loc_pc_chronostratend") + + # Build the nine source subqueries (AllAgeData) + # 1. Absolute Age + co_aa_join = join(co, aa, aa.CollectionObjectID == co.collectionObjectId) + abs_query = select([ + co.collectionObjectId.label("CollectionObjectID"), + aa.absoluteAge.label("StartPeriod"), + aa.absoluteAge.label("EndPeriod"), + aa.ageUncertainty.label("StartUncertainty"), + aa.ageUncertainty.label("EndUncertainty") + ]).select_from(co_aa_join) + + # 2. Relative Age – AgeName + co_ra_join = join(co, ra, ra.CollectionObjectID == co.collectionObjectId) + co_ra_gtp_ra_join = join(co_ra_join, gtp_ra, ra.AgeNameID == gtp_ra.geologicTimePeriodId) + rel_query = select([ + co.collectionObjectId.label("CollectionObjectID"), + gtp_ra.startPeriod.label("StartPeriod"), + gtp_ra.endPeriod.label("EndPeriod"), + gtp_ra.startUncertainty.label("StartUncertainty"), + gtp_ra.endUncertainty.label("EndUncertainty") + ]).select_from(co_ra_gtp_ra_join) + + # 3. Relative Age – AgeNameEnd + co_ra_join = join(co, ra, ra.CollectionObjectID == co.collectionObjectId) + co_ra_gtp_ra_end_join = join(co_ra_join, gtp_ra_end, ra.AgeNameEndID == gtp_ra_end.geologicTimePeriodId) + rel_end_query = select([ + co.collectionObjectId.label("CollectionObjectID"), + gtp_ra_end.startPeriod.label("StartPeriod"), + gtp_ra_end.endPeriod.label("EndPeriod"), + gtp_ra_end.startUncertainty.label("StartUncertainty"), + gtp_ra_end.endUncertainty.label("EndUncertainty") + ]).select_from(co_ra_gtp_ra_end_join) + + # 4. PaleoContext – Chronostrat + co_pc_join = join(co, pc, co.PaleoContextID == pc.paleoContextId) + co_pc_gtp_pc_join = join(co_pc_join, gtp_pc, pc.ChronosStratID == gtp_pc.geologicTimePeriodId) + pc_query = select([ + co.collectionObjectId.label("CollectionObjectID"), + gtp_pc.startPeriod.label("StartPeriod"), + gtp_pc.endPeriod.label("EndPeriod"), + gtp_pc.startUncertainty.label("StartUncertainty"), + gtp_pc.endUncertainty.label("EndUncertainty") + ]).select_from(co_pc_gtp_pc_join) + + # 5. PaleoContext – ChronostratEnd + co_pc_join = join(co, pc, co.PaleoContextID == pc.paleoContextId) + co_pc_gtp_pc_end_join = join(co_pc_join, gtp_pc_end, pc.ChronosStratEndID == gtp_pc_end.geologicTimePeriodId) + pc_end_query = select([ + co.collectionObjectId.label("CollectionObjectID"), + gtp_pc_end.startPeriod.label("StartPeriod"), + gtp_pc_end.endPeriod.label("EndPeriod"), + gtp_pc_end.startUncertainty.label("StartUncertainty"), + gtp_pc_end.endUncertainty.label("EndUncertainty") + ]).select_from(co_pc_gtp_pc_end_join) + + # 6. CollectingEvent – PaleoContext Chronostrat + co_ce_join = join(co, ce, co.CollectingEventID == ce.collectingEventId) + co_ce_pc_join = join(co_ce_join, pc, ce.PaleoContextID == pc.paleoContextId) + co_ce_pc_gtp_ce_join = join(co_ce_pc_join, gtp_ce, pc.ChronosStratID == gtp_ce.geologicTimePeriodId) + ce_pc_query = select([ + co.collectionObjectId.label("CollectionObjectID"), + gtp_ce.startPeriod.label("StartPeriod"), + gtp_ce.endPeriod.label("EndPeriod"), + gtp_ce.startUncertainty.label("StartUncertainty"), + gtp_ce.endUncertainty.label("EndUncertainty") + ]).select_from(co_ce_pc_gtp_ce_join) + + # 7. CollectingEvent – PaleoContext ChronostratEnd + co_ce_join = join(co, ce, co.CollectingEventID == ce.collectingEventId) + co_ce_pc_join = join(co_ce_join, pc, ce.PaleoContextID == pc.paleoContextId) + co_ce_pc_gtp_ce_end_join = join(co_ce_pc_join, gtp_ce_end, pc.ChronosStratEndID == gtp_ce_end.geologicTimePeriodId) + ce_pc_end_query = select([ + co.collectionObjectId.label("CollectionObjectID"), + gtp_ce_end.startPeriod.label("StartPeriod"), + gtp_ce_end.endPeriod.label("EndPeriod"), + gtp_ce_end.startUncertainty.label("StartUncertainty"), + gtp_ce_end.endUncertainty.label("EndUncertainty") + ]).select_from(co_ce_pc_gtp_ce_end_join) + + # 8. CollectingEvent Locality – PaleoContext Chronostrat + co_ce_join = join(co, ce, co.CollectingEventID == ce.collectingEventId) + co_ce_loc_join = join(co_ce_join, loc, ce.LocalityID == loc.localityId) + co_ce_loc_pc_join = join(co_ce_loc_join, pc, loc.PaleoContextID == pc.paleoContextId) + co_ce_loc_gtp_ce_loc_join = join(co_ce_loc_pc_join, + gtp_ce_loc, + pc.ChronosStratID == gtp_ce_loc.geologicTimePeriodId) + ce_loc_query = select([ + co.collectionObjectId.label("CollectionObjectID"), + gtp_ce_loc.startPeriod.label("StartPeriod"), + gtp_ce_loc.endPeriod.label("EndPeriod"), + gtp_ce_loc.startUncertainty.label("StartUncertainty"), + gtp_ce_loc.endUncertainty.label("EndUncertainty") + ]).select_from(co_ce_loc_gtp_ce_loc_join) + + # 9. CollectingEvent Locality – PaleoContext ChronostratEnd + co_ce_join = join(co, ce, co.CollectingEventID == ce.collectingEventId) + co_ce_loc_join = join(co_ce_join, loc, ce.LocalityID == loc.localityId) + co_ce_loc_pc_join = join(co_ce_loc_join, pc, loc.PaleoContextID == pc.paleoContextId) + co_ce_loc_gtp_ce_loc_end_join = join(co_ce_loc_pc_join, + gtp_ce_loc_end, + pc.ChronosStratEndID == gtp_ce_loc_end.geologicTimePeriodId) + ce_loc_end_query = select([ + co.collectionObjectId.label("CollectionObjectID"), + gtp_ce_loc_end.startPeriod.label("StartPeriod"), + gtp_ce_loc_end.endPeriod.label("EndPeriod"), + gtp_ce_loc_end.startUncertainty.label("StartUncertainty"), + gtp_ce_loc_end.endUncertainty.label("EndUncertainty") + ]).select_from(co_ce_loc_gtp_ce_loc_end_join) + + # Union all nine subqueries into the AllAgeData CTE. + all_age_data = union_all( + abs_query, rel_query, rel_end_query, pc_query, pc_end_query, + ce_pc_query, ce_pc_end_query, ce_loc_query, ce_loc_end_query + ).alias("AllAgeData") + + # Create the RankedAges CTE using window functions. + ranked_ages = select([ + all_age_data.c.CollectionObjectID, + all_age_data.c.StartPeriod, + all_age_data.c.EndPeriod, + all_age_data.c.StartUncertainty, + all_age_data.c.EndUncertainty, + func.max(all_age_data.c.StartPeriod).over( + partition_by=all_age_data.c.CollectionObjectID + ).label("MaxStartPeriod"), + func.min(all_age_data.c.EndPeriod).over( + partition_by=all_age_data.c.CollectionObjectID + ).label("MinEndPeriod") + ]).alias("RankedAges") + + # Final aggregation: pick the uncertainties for the extreme values. + start_uncertainty_case = func.coalesce( + func.max( + case([(ranked_ages.c.StartPeriod == ranked_ages.c.MaxStartPeriod, + ranked_ages.c.StartUncertainty)]) + ), 0 + ) + end_uncertainty_case = func.coalesce( + func.max( + case([(ranked_ages.c.EndPeriod == ranked_ages.c.MinEndPeriod, + ranked_ages.c.EndUncertainty)]) + ), 0 + ) + + # Build the filter condition based on require_full_overlap. if require_full_overlap: - paleocontext_overlap_filter = and_(paleocontext_start_filter, paleocontext_end_filter) + filter_condition = and_( + ranked_ages.c.MaxStartPeriod - start_uncertainty_case <= start_time, + ranked_ages.c.MinEndPeriod + end_uncertainty_case >= end_time + ) + # filter_condition = or_( + # and_( + # ranked_ages.c.MaxStartPeriod - start_uncertainty_case <= start_time, + # ranked_ages.c.MinEndPeriod + end_uncertainty_case >= end_time + # ), + # and_( + # ranked_ages.c.MaxStartPeriod + start_uncertainty_case >= start_time, + # ranked_ages.c.MinEndPeriod - end_uncertainty_case <= end_time + # ) + # ) else: - paleocontext_overlap_filter = or_(paleocontext_start_filter, paleocontext_end_filter) - - # Get matching Paleocontext IDs - matching_paleocontext_ids_subquery = ( - session.query(Paleocontext.id) - .filter(paleocontext_overlap_filter) - ).subquery() - - # Get collectionobject IDs where Paleocontext matches - paleocontext_co_ids_subquery = ( - session.query(sq_models.CollectionObject.id) - .outerjoin(sq_models.CollectionObject.paleocontext) - .outerjoin(sq_models.CollectionObject.collectingevent) - .outerjoin(sq_models.Collectingevent.paleocontext) - .outerjoin(sq_models.Collectingevent.locality) - .outerjoin(sq_models.Locality.paleocontext) - .filter( - or_( - sq_models.CollectionObject.paleocontext_id.in_(matching_paleocontext_ids_subquery), - sq_models.Collectingevent.paleocontext_id.in_(matching_paleocontext_ids_subquery), - sq_models.Locality.paleocontext_id.in_(matching_paleocontext_ids_subquery) + filter_condition = or_( + and_( + ranked_ages.c.MaxStartPeriod - start_uncertainty_case <= start_time, + ranked_ages.c.MaxStartPeriod + start_uncertainty_case >= end_time + ), + and_( + ranked_ages.c.MinEndPeriod - end_uncertainty_case <= start_time, + ranked_ages.c.MinEndPeriod + end_uncertainty_case >= end_time + ), + and_( + ranked_ages.c.MaxStartPeriod + start_uncertainty_case >= start_time, + ranked_ages.c.MinEndPeriod - end_uncertainty_case <= end_time ) ) - ).subquery() - - # Union all collectionobject IDs - total_co_ids_subquery = ( - session.query(absolute_co_ids_subquery.c.collectionobject_id) - .union( - session.query(relative_age_co_ids_subquery.c.collectionobject_id), - session.query(paleocontext_co_ids_subquery.c.id) + + final_age_subq = select([ + ranked_ages.c.CollectionObjectID, + ranked_ages.c.MaxStartPeriod, + ranked_ages.c.MinEndPeriod, + start_uncertainty_case.label("StartUncertaintyForMaxStartPeriod"), + end_uncertainty_case.label("EndUncertaintyForMinEndPeriod") + ]).group_by( + ranked_ages.c.CollectionObjectID, + ranked_ages.c.MaxStartPeriod, + ranked_ages.c.MinEndPeriod + ).having( + and_( + ranked_ages.c.MaxStartPeriod != None, + ranked_ages.c.MinEndPeriod != None, + filter_condition ) - ).subquery() - - # Filter the original query - filtered_query = query.filter(sq_models.CollectionObject.id.in_( - session.query(total_co_ids_subquery.c.collectionobject_id) - )) - - return filtered_query + ).alias("co_age_filter") + + # Join the aggregated subquery onto the incoming CollectionObject query. + new_query = query.join(final_age_subq, CollectionObject.collectionObjectId == final_age_subq.c.CollectionObjectID) + + # Add a formatted age expression column (for example, concatenating max and min). + age_expr = func.concat_ws( + " - ", + func.regexp_replace( + func.regexp_replace(cast(final_age_subq.c.MaxStartPeriod, String), + r'(\.[0-9]*[1-9])0+$', r'\1'), + r'\.0+$', '' + ), + func.regexp_replace( + func.regexp_replace(cast(final_age_subq.c.MinEndPeriod, String), + r'(\.[0-9]*[1-9])0+$', r'\1'), + r'\.0+$', '' + ) + ).label("age") + new_query = new_query.add_columns(age_expr) + + log_sqlalchemy_query(new_query) + return new_query diff --git a/specifyweb/stored_queries/execution.py b/specifyweb/stored_queries/execution.py index de94d1048ef..95368c42b8c 100644 --- a/specifyweb/stored_queries/execution.py +++ b/specifyweb/stored_queries/execution.py @@ -631,6 +631,8 @@ def build_query(session, collection, user, tableid, field_specs, sort_type = SORT_TYPES[fs.sort_type] query, field, predicate = fs.add_to_query(query, formatauditobjs=formatauditobjs) + if field is None: + continue if fs.display: formatted_field = query.objectformatter.fieldformat(fs, field) query = query.add_columns(formatted_field) diff --git a/specifyweb/stored_queries/query_ops.py b/specifyweb/stored_queries/query_ops.py index 0d299e1a777..145bd10dd57 100644 --- a/specifyweb/stored_queries/query_ops.py +++ b/specifyweb/stored_queries/query_ops.py @@ -1,8 +1,19 @@ from collections import namedtuple import re import sqlalchemy - -from specifyweb.specify.geo_time import search_co_ids_in_time_range, query_co_in_time_range, query_co_in_time_range_with_joins, search_co_ids_in_time_period +from sqlalchemy.orm.query import Query + +# from specifyweb.specify.geo_time import ( +# modify_query_add_age_range, +# query_co_ids_in_time_period, +# search_co_ids_in_time_range, +# query_co_in_time_range, +# query_co_in_time_range_with_joins, +# search_co_ids_in_time_period, +# # search_co_ids_in_time_range_mysql, +# search_co_ids_in_time_range_mysql_with_age_range, +# ) +from specifyweb.specify.geo_time import geo_time_query, geo_time_period_query from specifyweb.specify.uiformatters import CNNField, FormatMismatch @@ -114,28 +125,19 @@ def op_startswith(self, field, value): else: return field.like(value + "%") - def op_age_range_set(self, field, value, is_strict=False): - values = [self.format(v.strip()) for v in value.split(',')[:2]] - start_time, end_time = float(values[0]), float(values[1]) - co_ids = search_co_ids_in_time_range(start_time, end_time, require_full_overlap=is_strict) - return field.in_(co_ids) - - def op_age_range_query(self, field, value, query, is_strict=False): - values = [self.format(v.strip()) for v in value.split(',')[:2]] - start_time, end_time = float(values[0]), float(values[1]) - return query_co_in_time_range(query.query, start_time, end_time, session=None, require_full_overlap=is_strict) - - def op_age_range_query_joins(self, field, value, query, is_strict=False): + def op_age_range(self, field, value, query, is_strict=False): values = [self.format(v.strip()) for v in value.split(',')[:2]] start_time, end_time = float(values[0]), float(values[1]) - return query_co_in_time_range_with_joins(query.query, start_time, end_time, session=None, require_full_overlap=is_strict) - - def op_age_range(self, field, value, query, is_strict=False): - # Choose implementation of age range filtering - return self.op_age_range_set(field, value, is_strict) - # return self.op_age_range_query(field, value, query, is_strict) - # return self.op_age_range_query_joins(field, value, query=query, is_strict=is_strict) + geo_time_co_ids = geo_time_query(start_time, end_time, require_full_overlap=is_strict, query=query.query) + if isinstance(geo_time_co_ids, Query): + return geo_time_co_ids + else: + return field.in_(geo_time_co_ids) def op_age_period(self, field, value, query, is_strict=False): time_period_name = value - return field.in_(search_co_ids_in_time_period(time_period_name, require_full_overlap=is_strict)) \ No newline at end of file + geo_time_co_ids = geo_time_period_query(time_period_name, require_full_overlap=is_strict, query=query.query) + if isinstance(geo_time_co_ids, Query): + return geo_time_co_ids + else: + return field.in_(geo_time_co_ids) diff --git a/specifyweb/stored_queries/queryfieldspec.py b/specifyweb/stored_queries/queryfieldspec.py index feab5828fc4..c9c9c1e779e 100644 --- a/specifyweb/stored_queries/queryfieldspec.py +++ b/specifyweb/stored_queries/queryfieldspec.py @@ -5,10 +5,12 @@ from typing import NamedTuple, Optional, Tuple from sqlalchemy import sql +from sqlalchemy.orm.query import Query from specifyweb.specify.load_datamodel import Field, Table from specifyweb.specify.models import datamodel from specifyweb.specify.uiformatters import get_uiformatter +from specifyweb.stored_queries.query_construct import QueryConstruct # from specifyweb.specify.geo_time import query_co_in_time_range from . import models from .query_ops import QueryOps @@ -228,6 +230,10 @@ def apply_filter(self, query, orm_field, field, table, value=None, op_num=None, # new_query = op(orm_field, value, query, is_strict=strict) # query = query._replace(query=new_query) # f = None + if isinstance(f, Query): + query = query._replace(query=f) + query = query.reset_joinpoint() + return query, None, None else: f = op(orm_field, value) predicate = sql.not_(f) if negate else f