From aa9779955447b4bef59d805f61301593985aa5cd Mon Sep 17 00:00:00 2001 From: alec_dev Date: Wed, 5 Feb 2025 14:04:43 -0600 Subject: [PATCH 01/14] mysql implementation of search_co_ids_in_time_range --- specifyweb/specify/geo_time.py | 178 +++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) diff --git a/specifyweb/specify/geo_time.py b/specifyweb/specify/geo_time.py index c9831ef886e..47dd4a856f1 100644 --- a/specifyweb/specify/geo_time.py +++ b/specifyweb/specify/geo_time.py @@ -1,4 +1,5 @@ from typing import List, Set +from django.db import connection from django.db.models import Case, FloatField, F, Q, Value, When from django.db.models.functions import Coalesce, Greatest, Least, Cast from sqlalchemy import func, literal, or_, and_, exists @@ -578,3 +579,180 @@ def query_co_in_time_range(query, start_time, end_time, require_full_overlap=Fal )) return filtered_query + + +def search_co_ids_in_time_range_mysql( + start_time: float, end_time: float, require_full_overlap: bool = False +) -> set: + """ + Returns the collection object IDs that overlap the given time range by executing + a single MySQL query which unions three subqueries (absolute, relative, and paleocontext ages). + + Note: This example assumes the following table names and columns (which correspond + roughly to the Django models and fields): + + - absoluteage: columns absoluteage, ageuncertainty, collectionobject_id + - relativeage: columns ageuncertainty, collectionobject_id, agename_id, agenameend_id + - agename: columns id, startperiod, endperiod, startuncertainty, enduncertainty + - paleocontext: columns id, chronosstrat_id, chronosstratend_id + - chronosstrat: columns id, startperiod, endperiod, startuncertainty, enduncertainty + - collectionobject: columns id, paleocontext_id, collectingevent_id + - collectingevent: columns id, paleocontext_id, locality_id + - locality: columns id, paleocontext_id + + In addition, “valid” chronostrat filters are implemented by requiring that + startperiod/endperiod are not NULL and that startperiod >= endperiod. + + (Adjust the table/column names as needed for your schema.) + """ + + # For the relative-age subquery, we need to compute the “adjusted time” + # conditions. The Django code uses different annotations for full vs. + # partial overlap. In MySQL we can “inline” these computations. + if require_full_overlap: + rel_time_condition = f"""( + IF(r.agenameendid IS NOT NULL, + GREATEST( + CAST(a.startperiod AS DECIMAL(10,6)) - COALESCE(a.startuncertainty, 0) - COALESCE(r.ageuncertainty, 0), + CAST(aend.startperiod AS DECIMAL(10,6)) - COALESCE(aend.startuncertainty, 0) - COALESCE(r.ageuncertainty, 0) + ), + CAST(a.startperiod AS DECIMAL(10,6)) - COALESCE(a.startuncertainty, 0) - COALESCE(r.ageuncertainty, 0) + ) <= {start_time} + AND + IF(r.agenameendid IS NOT NULL, + LEAST( + CAST(a.endperiod AS DECIMAL(10,6)) + COALESCE(a.enduncertainty, 0) + COALESCE(r.ageuncertainty, 0), + CAST(aend.endperiod AS DECIMAL(10,6)) + COALESCE(aend.enduncertainty, 0) + COALESCE(r.ageuncertainty, 0) + ), + CAST(a.endperiod AS DECIMAL(10,6)) + COALESCE(a.enduncertainty, 0) + COALESCE(r.ageuncertainty, 0) + ) >= {end_time} +)""" + + paleo_time_condition = f"""( + IF(p.chronosstratendid IS NOT NULL, + GREATEST( + CAST(cs.startperiod AS DECIMAL(10,6)) - COALESCE(cs.startuncertainty, 0), + CAST(csend.startperiod AS DECIMAL(10,6)) - COALESCE(csend.startuncertainty, 0) + ), + CAST(cs.startperiod AS DECIMAL(10,6)) - COALESCE(cs.startuncertainty, 0) + ) <= {start_time} + AND + IF(p.chronosstratendid IS NOT NULL, + LEAST( + CAST(cs.endperiod AS DECIMAL(10,6)) + COALESCE(cs.enduncertainty, 0), + CAST(csend.endperiod AS DECIMAL(10,6)) + COALESCE(csend.enduncertainty, 0) + ), + CAST(cs.endperiod AS DECIMAL(10,6)) + COALESCE(cs.enduncertainty, 0) + ) >= {end_time} +)""" + else: + rel_time_condition = f"""( + IF(r.agenameendid IS NOT NULL, + LEAST( + CAST(a.endperiod AS DECIMAL(10,6)) - COALESCE(a.enduncertainty, 0) - COALESCE(r.ageuncertainty, 0), + CAST(aend.endperiod AS DECIMAL(10,6)) - COALESCE(aend.enduncertainty, 0) - COALESCE(r.ageuncertainty, 0) + ), + CAST(a.endperiod AS DECIMAL(10,6)) - COALESCE(a.enduncertainty, 0) - COALESCE(r.ageuncertainty, 0) + ) <= {start_time} + AND + IF(r.agenameendid IS NOT NULL, + GREATEST( + CAST(a.startperiod AS DECIMAL(10,6)) + COALESCE(a.startuncertainty, 0) + COALESCE(r.ageuncertainty, 0), + CAST(aend.startperiod AS DECIMAL(10,6)) + COALESCE(aend.startuncertainty, 0) + COALESCE(r.ageuncertainty, 0) + ), + CAST(a.startperiod AS DECIMAL(10,6)) + COALESCE(a.startuncertainty, 0) + COALESCE(r.ageuncertainty, 0) + ) >= {end_time} +)""" + + paleo_time_condition = f"""( + IF(p.chronosstratendid IS NOT NULL, + LEAST( + CAST(cs.endperiod AS DECIMAL(10,6)) - COALESCE(cs.enduncertainty, 0), + CAST(csend.endperiod AS DECIMAL(10,6)) - COALESCE(csend.enduncertainty, 0) + ), + CAST(cs.endperiod AS DECIMAL(10,6)) - COALESCE(cs.enduncertainty, 0) + ) <= {start_time} + AND + IF(p.chronosstratendid IS NOT NULL, + GREATEST( + CAST(cs.startperiod AS DECIMAL(10,6)) + COALESCE(cs.startuncertainty, 0), + CAST(csend.startperiod AS DECIMAL(10,6)) + COALESCE(csend.startuncertainty, 0) + ), + CAST(cs.startperiod AS DECIMAL(10,6)) + COALESCE(cs.startuncertainty, 0) + ) >= {end_time} +)""" + + # Build the complete union query + co_id_query = f""" + SELECT DISTINCT coid FROM ( + -- Absolute ages subquery: + SELECT collectionobjectid AS coid + FROM absoluteage + WHERE (CAST(absoluteage AS DECIMAL(10,6)) - COALESCE(ageuncertainty, 0)) <= {start_time} + AND (CAST(absoluteage AS DECIMAL(10,6)) + COALESCE(ageuncertainty, 0)) >= {end_time} + + UNION + + -- Relative ages subquery: + SELECT r.collectionobjectid AS coid + FROM relativeage r + JOIN geologictimeperiod a ON r.agenameid = a.geologictimeperiodid + LEFT JOIN geologictimeperiod aend ON r.agenameendid = aend.geologictimeperiodid + WHERE a.startperiod IS NOT NULL + AND a.endperiod IS NOT NULL + AND a.startperiod >= a.endperiod + -- Validity condition for agenameend (if present) + AND (r.agenameendid IS NULL OR (aend.startperiod IS NOT NULL AND aend.endperiod IS NOT NULL AND aend.startperiod >= aend.endperiod)) + AND {rel_time_condition} + + UNION + + -- Paleocontext subquery: fetch collectionobject IDs where either + -- the collectionobject, its collecting event, or its locality links to a paleocontext + -- meeting the required conditions. + SELECT c.collectionobjectid AS coid + FROM collectionobject c + LEFT JOIN collectingevent ce ON c.collectingeventid = ce.collectingeventid + LEFT JOIN locality l ON ce.localityid = l.localityid + WHERE c.paleocontextid IN ( + SELECT p.paleocontextid + FROM paleocontext p + JOIN geologictimeperiod cs ON p.chronosstratid = cs.geologictimeperiodid + LEFT JOIN geologictimeperiod csend ON p.chronosstratendid = csend.geologictimeperiodid + WHERE cs.startperiod IS NOT NULL + AND cs.endperiod IS NOT NULL + AND cs.startperiod >= cs.endperiod + AND (p.chronosstratendid IS NULL OR (csend.startperiod IS NOT NULL AND csend.endperiod IS NOT NULL AND csend.startperiod >= csend.endperiod)) + AND {paleo_time_condition} + ) + OR ce.paleocontextid IN ( + SELECT p.paleocontextid + FROM paleocontext p + JOIN geologictimeperiod cs ON p.chronosstratid = cs.geologictimeperiodid + LEFT JOIN geologictimeperiod csend ON p.chronosstratendid = csend.geologictimeperiodid + WHERE cs.startperiod IS NOT NULL + AND cs.endperiod IS NOT NULL + AND cs.startperiod >= cs.endperiod + AND (p.chronosstratendid IS NULL OR (csend.startperiod IS NOT NULL AND csend.endperiod IS NOT NULL AND csend.startperiod >= csend.endperiod)) + AND {paleo_time_condition} + ) + OR l.paleocontextid IN ( + SELECT p.paleocontextid + FROM paleocontext p + JOIN geologictimeperiod cs ON p.chronosstratid = cs.geologictimeperiodid + LEFT JOIN geologictimeperiod csend ON p.chronosstratendid = csend.geologictimeperiodid + WHERE cs.startperiod IS NOT NULL + AND cs.endperiod IS NOT NULL + AND cs.startperiod >= cs.endperiod + AND (p.chronosstratendid IS NULL OR (csend.startperiod IS NOT NULL AND csend.endperiod IS NOT NULL AND csend.startperiod >= csend.endperiod)) + AND {paleo_time_condition} + ) + ) AS unioned; + """ + + # print(co_id_query) + with connection.cursor() as cursor: + cursor.execute(co_id_query) + rows = cursor.fetchall() + co_ids = {row[0] for row in rows if row[0] is not None} + return co_ids From 3a4bdb44c7599d94501956a93c3ed188c23fb7ec Mon Sep 17 00:00:00 2001 From: alec_dev Date: Fri, 7 Feb 2025 01:55:38 -0600 Subject: [PATCH 02/14] working modify_query_add_age_range, TODO: cleanup --- specifyweb/specify/geo_time.py | 394 ++++++++++++++++++-- specifyweb/stored_queries/queryfieldspec.py | 6 + 2 files changed, 377 insertions(+), 23 deletions(-) diff --git a/specifyweb/specify/geo_time.py b/specifyweb/specify/geo_time.py index 47dd4a856f1..c856c960798 100644 --- a/specifyweb/specify/geo_time.py +++ b/specifyweb/specify/geo_time.py @@ -1,8 +1,9 @@ +import logging from typing import List, Set from django.db import connection from django.db.models import Case, FloatField, F, Q, Value, When from django.db.models.functions import Coalesce, Greatest, Least, Cast -from sqlalchemy import func, literal, or_, and_, exists +from sqlalchemy import select, union_all, func, cast, DECIMAL, case, or_, and_, String, join from sqlalchemy.orm import aliased from specifyweb.specify.models import ( @@ -16,6 +17,8 @@ ) from specifyweb.stored_queries import models as sq_models +logger = logging.getLogger(__name__) + # Table paths from CollectionObject to Absoluteage or GeologicTimePeriod: # - collectionobject->absoluteage # - collectionobject->relativeage->chronostrat @@ -580,30 +583,12 @@ def query_co_in_time_range(query, start_time, end_time, require_full_overlap=Fal return filtered_query - def search_co_ids_in_time_range_mysql( start_time: float, end_time: float, require_full_overlap: bool = False ) -> set: """ Returns the collection object IDs that overlap the given time range by executing a single MySQL query which unions three subqueries (absolute, relative, and paleocontext ages). - - Note: This example assumes the following table names and columns (which correspond - roughly to the Django models and fields): - - - absoluteage: columns absoluteage, ageuncertainty, collectionobject_id - - relativeage: columns ageuncertainty, collectionobject_id, agename_id, agenameend_id - - agename: columns id, startperiod, endperiod, startuncertainty, enduncertainty - - paleocontext: columns id, chronosstrat_id, chronosstratend_id - - chronosstrat: columns id, startperiod, endperiod, startuncertainty, enduncertainty - - collectionobject: columns id, paleocontext_id, collectingevent_id - - collectingevent: columns id, paleocontext_id, locality_id - - locality: columns id, paleocontext_id - - In addition, “valid” chronostrat filters are implemented by requiring that - startperiod/endperiod are not NULL and that startperiod >= endperiod. - - (Adjust the table/column names as needed for your schema.) """ # For the relative-age subquery, we need to compute the “adjusted time” @@ -664,22 +649,30 @@ def search_co_ids_in_time_range_mysql( ) >= {end_time} )""" - paleo_time_condition = f"""( + paleo_start_time = f""" IF(p.chronosstratendid IS NOT NULL, LEAST( CAST(cs.endperiod AS DECIMAL(10,6)) - COALESCE(cs.enduncertainty, 0), CAST(csend.endperiod AS DECIMAL(10,6)) - COALESCE(csend.enduncertainty, 0) ), CAST(cs.endperiod AS DECIMAL(10,6)) - COALESCE(cs.enduncertainty, 0) - ) <= {start_time} - AND + ) +""" + paleo_end_time = f""" IF(p.chronosstratendid IS NOT NULL, GREATEST( CAST(cs.startperiod AS DECIMAL(10,6)) + COALESCE(cs.startuncertainty, 0), CAST(csend.startperiod AS DECIMAL(10,6)) + COALESCE(csend.startuncertainty, 0) ), CAST(cs.startperiod AS DECIMAL(10,6)) + COALESCE(cs.startuncertainty, 0) - ) >= {end_time} + ) +""" + paleo_start_time_condition = f"{paleo_start_time} <= {start_time}" + paleo_end_time_condition = f"{paleo_end_time} <= {end_time}" + paleo_time_condition = f"""( + {paleo_start_time_condition} + AND + {paleo_end_time_condition} )""" # Build the complete union query @@ -756,3 +749,358 @@ def search_co_ids_in_time_range_mysql( rows = cursor.fetchall() co_ids = {row[0] for row in rows if row[0] is not None} return co_ids + +def search_co_ids_in_time_range_mysql_with_age_range( + start_time: float, end_time: float, require_full_overlap: bool = False +) -> list: + """ + Returns a list of tuples (coid, min_end_period, max_start_period) for collection objects + that overlap the given time range by executing a single MySQL query which unions three subqueries: + - Absolute ages, + - Relative ages, and + - Paleocontext ages. + """ + + # Build filtering conditions + if require_full_overlap: + # Relative ages query expressions + rel_start_expr = ( + "IF(r.agenameendid IS NOT NULL, " + " GREATEST( " + " CAST(a.startperiod AS DECIMAL(10,6)) - COALESCE(a.startuncertainty, 0) - COALESCE(r.ageuncertainty, 0), " + " CAST(aend.startperiod AS DECIMAL(10,6)) - COALESCE(aend.startuncertainty, 0) - COALESCE(r.ageuncertainty, 0) " + " ), " + " CAST(a.startperiod AS DECIMAL(10,6)) - COALESCE(a.startuncertainty, 0) - COALESCE(r.ageuncertainty, 0) " + ")" + ) + rel_end_expr = ( + "IF(r.agenameendid IS NOT NULL, " + " LEAST( " + " CAST(a.endperiod AS DECIMAL(10,6)) + COALESCE(a.enduncertainty, 0) + COALESCE(r.ageuncertainty, 0), " + " CAST(aend.endperiod AS DECIMAL(10,6)) + COALESCE(aend.enduncertainty, 0) + COALESCE(r.ageuncertainty, 0) " + " ), " + " CAST(a.endperiod AS DECIMAL(10,6)) + COALESCE(a.enduncertainty, 0) + COALESCE(r.ageuncertainty, 0) " + ")" + ) + rel_start_time_condition = f"{rel_start_expr} <= {start_time}" + rel_end_time_condition = f"{rel_end_expr} >= {end_time}" + rel_time_condition = f"({rel_start_time_condition} AND {rel_end_time_condition})" + + # Paleocontext query expressions + paleo_start_expr = ( + "IF(p.chronosstratendid IS NOT NULL, " + " GREATEST( " + " CAST(cs.startperiod AS DECIMAL(10,6)) - COALESCE(cs.startuncertainty, 0), " + " CAST(csend.startperiod AS DECIMAL(10,6)) - COALESCE(csend.startuncertainty, 0) " + " ), " + " CAST(cs.startperiod AS DECIMAL(10,6)) - COALESCE(cs.startuncertainty, 0) " + ")" + ) + paleo_end_expr = ( + "IF(p.chronosstratendid IS NOT NULL, " + " LEAST( " + " CAST(cs.endperiod AS DECIMAL(10,6)) + COALESCE(cs.enduncertainty, 0), " + " CAST(csend.endperiod AS DECIMAL(10,6)) + COALESCE(csend.enduncertainty, 0) " + " ), " + " CAST(cs.endperiod AS DECIMAL(10,6)) + COALESCE(cs.enduncertainty, 0) " + ")" + ) + paleo_start_time_condition = f"{paleo_start_expr} <= {start_time}" + paleo_end_time_condition = f"{paleo_end_expr} >= {end_time}" + paleo_time_condition = f"({paleo_start_time_condition} AND {paleo_end_time_condition})" + else: + # Relative ages: alternate expressions for partial overlap. + rel_start_expr = ( + "IF(r.agenameendid IS NOT NULL, " + " GREATEST( " + " CAST(a.startperiod AS DECIMAL(10,6)) + COALESCE(a.startuncertainty, 0) + COALESCE(r.ageuncertainty, 0), " + " CAST(aend.startperiod AS DECIMAL(10,6)) + COALESCE(aend.startuncertainty, 0) + COALESCE(r.ageuncertainty, 0) " + " ), " + " CAST(a.startperiod AS DECIMAL(10,6)) + COALESCE(a.startuncertainty, 0) + COALESCE(r.ageuncertainty, 0) " + ")" + ) + rel_end_expr = ( + "IF(r.agenameendid IS NOT NULL, " + " LEAST( " + " CAST(a.endperiod AS DECIMAL(10,6)) - COALESCE(a.enduncertainty, 0) - COALESCE(r.ageuncertainty, 0), " + " CAST(aend.endperiod AS DECIMAL(10,6)) - COALESCE(aend.enduncertainty, 0) - COALESCE(r.ageuncertainty, 0) " + " ), " + " CAST(a.endperiod AS DECIMAL(10,6)) - COALESCE(a.enduncertainty, 0) - COALESCE(r.ageuncertainty, 0) " + ")" + ) + rel_start_time_condition = f"{rel_start_expr} <= {start_time}" + rel_end_time_condition = f"{rel_end_expr} >= {end_time}" + rel_time_condition = f"({rel_start_time_condition} AND {rel_end_time_condition})" + + # Paleocontext: alternate expressions. + paleo_start_expr = ( + "IF(p.chronosstratendid IS NOT NULL, " + " LEAST( " + " CAST(cs.startperiod AS DECIMAL(10,6)) + COALESCE(cs.startuncertainty, 0), " + " CAST(csend.startperiod AS DECIMAL(10,6)) + COALESCE(csend.startuncertainty, 0) " + " ), " + " CAST(cs.startperiod AS DECIMAL(10,6)) + COALESCE(cs.startuncertainty, 0) " + ")" + ) + paleo_end_expr = ( + "IF(p.chronosstratendid IS NOT NULL, " + " GREATEST( " + " CAST(cs.endperiod AS DECIMAL(10,6)) - COALESCE(cs.enduncertainty, 0), " + " CAST(csend.endperiod AS DECIMAL(10,6)) - COALESCE(csend.enduncertainty, 0) " + " ), " + " CAST(cs.endperiod AS DECIMAL(10,6)) - COALESCE(cs.enduncertainty, 0) " + ")" + ) + paleo_start_time_condition = f"{paleo_start_expr} <= {start_time}" + paleo_end_time_condition = f"{paleo_end_expr} >= {end_time}" + paleo_time_condition = f"({paleo_start_time_condition} AND {paleo_end_time_condition})" + + # Build the complete union query. + co_id_query = ( + "SELECT coid, " + "-- MIN(endperiod) AS min_end_period, " + "-- MAX(startperiod) AS max_start_period " + "FROM (" + "SELECT collectionobjectid AS coid, " + "CAST(absoluteage AS DECIMAL(10,6)) - COALESCE(ageuncertainty, 0) AS startperiod, " + "CAST(absoluteage AS DECIMAL(10,6)) + COALESCE(ageuncertainty, 0) AS endperiod " + "FROM absoluteage " + f"WHERE (CAST(absoluteage AS DECIMAL(10,6)) - COALESCE(ageuncertainty, 0)) <= {start_time} " + f"AND (CAST(absoluteage AS DECIMAL(10,6)) + COALESCE(ageuncertainty, 0)) >= {end_time} " + "UNION " + "SELECT r.collectionobjectid AS coid, " + f"{rel_start_expr} AS startperiod, " + f"{rel_end_expr} AS endperiod " + "FROM relativeage r " + "JOIN geologictimeperiod a ON r.agenameid = a.geologictimeperiodid " + "LEFT JOIN geologictimeperiod aend ON r.agenameendid = aend.geologictimeperiodid " + "WHERE a.startperiod IS NOT NULL " + "AND a.endperiod IS NOT NULL " + "AND a.startperiod >= a.endperiod " + "AND (r.agenameendid IS NULL OR (aend.startperiod IS NOT NULL AND aend.endperiod IS NOT NULL AND aend.startperiod >= aend.endperiod)) " + f"AND {rel_time_condition} " + "UNION " + "SELECT DISTINCT c.collectionobjectid AS coid, " + f"{paleo_start_expr} AS startperiod, " + f"{paleo_end_expr} AS endperiod " + "FROM collectionobject c " + "LEFT JOIN collectingevent ce ON c.collectingeventid = ce.collectingeventid " + "LEFT JOIN locality l ON ce.localityid = l.localityid " + "LEFT JOIN paleocontext p ON (c.paleocontextid = p.paleocontextid OR ce.paleocontextid = p.paleocontextid OR l.paleocontextid = p.paleocontextid) " + "LEFT JOIN geologictimeperiod cs ON p.chronosstratid = cs.geologictimeperiodid " + "LEFT JOIN geologictimeperiod csend ON p.chronosstratendid = csend.geologictimeperiodid " + "WHERE p.paleocontextid IS NOT NULL " + "AND cs.startperiod IS NOT NULL " + "AND cs.endperiod IS NOT NULL " + "AND cs.startperiod >= cs.endperiod " + "AND (p.chronosstratendid IS NULL OR (csend.startperiod IS NOT NULL AND csend.endperiod IS NOT NULL AND csend.startperiod >= csend.endperiod)) " + f"AND {paleo_time_condition} " + ") AS unioned " + "GROUP BY coid;" + ) + + logger.debug(co_id_query) + + with connection.cursor() as cursor: + cursor.execute(co_id_query) + rows = cursor.fetchall() + return rows + + +def modify_query_add_age_range(query, start_time: float, end_time: float, require_full_overlap: bool = False): + """ + Given an existing SQLAlchemy query whose base entity is Collectionobject, + this function adds an inner join to an aggregated subquery that computes, + for each collection object (by its CollectionObjectID), the minimum end period and + maximum start period (aggregated from three sources: AbsoluteAge, RelativeAge, and Paleocontext). + """ + + AbsoluteAge = sq_models.AbsoluteAge + RelativeAge = sq_models.RelativeAge + GeologicTimePeriod = sq_models.GeologicTimePeriod + Paleocontext = sq_models.PaleoContext + Collectingevent = sq_models.CollectingEvent + Locality = sq_models.Locality + Collectionobject = sq_models.CollectionObject + + # Build the three subqueries. + # --- AbsoluteAge subquery --- + abs_sel = select([ + AbsoluteAge.CollectionObjectID.label("coid"), + (cast(AbsoluteAge.absoluteAge, DECIMAL(10,6)) - func.coalesce(AbsoluteAge.ageUncertainty, 0)).label("startperiod"), + (cast(AbsoluteAge.absoluteAge, DECIMAL(10,6)) + func.coalesce(AbsoluteAge.ageUncertainty, 0)).label("endperiod") + ]).where( + and_( + (cast(AbsoluteAge.absoluteAge, DECIMAL(10,6)) - func.coalesce(AbsoluteAge.ageUncertainty, 0)) <= start_time, + (cast(AbsoluteAge.absoluteAge, DECIMAL(10,6)) + func.coalesce(AbsoluteAge.ageUncertainty, 0)) >= end_time + ) + ) + + # --- RelativeAge subquery --- + r = aliased(RelativeAge, name="r") + a = aliased(GeologicTimePeriod, name="a") + aend = aliased(GeologicTimePeriod, name="aend") + if require_full_overlap: + rel_start_expr = case( + [(r.AgeNameEndID != None, + func.greatest( + cast(a.startPeriod, DECIMAL(10,6)) - func.coalesce(a.startUncertainty, 0) - func.coalesce(r.ageUncertainty, 0), + cast(aend.startPeriod, DECIMAL(10,6)) - func.coalesce(aend.startUncertainty, 0) - func.coalesce(r.ageUncertainty, 0) + ))], + else_= cast(a.startPeriod, DECIMAL(10,6)) - func.coalesce(a.startUncertainty, 0) - func.coalesce(r.ageUncertainty, 0) + ) + rel_end_expr = case( + [(r.AgeNameEndID != None, + func.least( + cast(a.endPeriod, DECIMAL(10,6)) + func.coalesce(a.endUncertainty, 0) + func.coalesce(r.ageUncertainty, 0), + cast(aend.endPeriod, DECIMAL(10,6)) + func.coalesce(aend.endUncertainty, 0) + func.coalesce(r.ageUncertainty, 0) + ))], + else_= cast(a.endPeriod, DECIMAL(10,6)) + func.coalesce(a.endUncertainty, 0) + func.coalesce(r.ageUncertainty, 0) + ) + else: + rel_start_expr = case( + [(r.AgeNameEndID != None, + func.greatest( + cast(a.startPeriod, DECIMAL(10,6)) + func.coalesce(a.startUncertainty, 0) + func.coalesce(r.ageUncertainty, 0), + cast(aend.startPeriod, DECIMAL(10,6)) + func.coalesce(aend.startUncertainty, 0) + func.coalesce(r.ageUncertainty, 0) + ))], + else_= cast(a.startPeriod, DECIMAL(10,6)) + func.coalesce(a.startUncertainty, 0) + func.coalesce(r.ageUncertainty, 0) + ) + rel_end_expr = case( + [(r.AgeNameEndID != None, + func.least( + cast(a.endPeriod, DECIMAL(10,6)) - func.coalesce(a.endUncertainty, 0) - func.coalesce(r.ageUncertainty, 0), + cast(aend.endPeriod, DECIMAL(10,6)) - func.coalesce(aend.endUncertainty, 0) - func.coalesce(r.ageUncertainty, 0) + ))], + else_= cast(a.endPeriod, DECIMAL(10,6)) - func.coalesce(a.endUncertainty, 0) - func.coalesce(r.ageUncertainty, 0) + ) + + rel_join = join(r, a, r.AgeNameID == a.geologicTimePeriodId).outerjoin(aend, r.AgeNameEndID == aend.geologicTimePeriodId) + rel_sel = select([ + r.CollectionObjectID.label("coid"), + rel_start_expr.label("startperiod"), + rel_end_expr.label("endperiod") + ]).select_from(rel_join).where( + and_( + a.startPeriod != None, + a.endPeriod != None, + a.startPeriod >= a.endPeriod, + or_( + r.AgeNameEndID == None, + and_(aend.startPeriod != None, aend.endPeriod != None, aend.startPeriod >= aend.endPeriod) + ), + rel_start_expr <= start_time, + rel_end_expr >= end_time + ) + ) + + # --- Paleocontext subquery --- + c = aliased(Collectionobject, name="c") + ce = aliased(Collectingevent, name="ce") + l = aliased(Locality, name="l") + p = aliased(Paleocontext, name="p") + cs = aliased(GeologicTimePeriod, name="cs") + csend = aliased(GeologicTimePeriod, name="csend") + if require_full_overlap: + paleo_start_expr = case( + [(p.ChronosStratEndID != None, + func.greatest( + cast(cs.startPeriod, DECIMAL(10,6)) - func.coalesce(cs.startUncertainty, 0), + cast(csend.startPeriod, DECIMAL(10,6)) - func.coalesce(csend.startUncertainty, 0) + ))], + else_= cast(cs.startPeriod, DECIMAL(10,6)) - func.coalesce(cs.startUncertainty, 0) + ) + paleo_end_expr = case( + [(p.ChronosStratEndID != None, + func.least( + cast(cs.endPeriod, DECIMAL(10,6)) + func.coalesce(cs.endUncertainty, 0), + cast(csend.endPeriod, DECIMAL(10,6)) + func.coalesce(csend.endUncertainty, 0) + ))], + else_= cast(cs.endPeriod, DECIMAL(10,6)) + func.coalesce(cs.endUncertainty, 0) + ) + else: + paleo_start_expr = case( + [(p.ChronosStratEndID != None, + func.least( + cast(cs.startPeriod, DECIMAL(10,6)) + func.coalesce(cs.startUncertainty, 0), + cast(csend.startPeriod, DECIMAL(10,6)) + func.coalesce(csend.startUncertainty, 0) + ))], + else_= cast(cs.startPeriod, DECIMAL(10,6)) + func.coalesce(cs.startUncertainty, 0) + ) + paleo_end_expr = case( + [(p.ChronosStratEndID != None, + func.greatest( + cast(cs.endPeriod, DECIMAL(10,6)) - func.coalesce(cs.endUncertainty, 0), + cast(csend.endPeriod, DECIMAL(10,6)) - func.coalesce(csend.endUncertainty, 0) + ))], + else_= cast(cs.endPeriod, DECIMAL(10,6)) - func.coalesce(cs.endUncertainty, 0) + ) + + join_structure = join(c, ce, c.CollectingEventID == ce.collectingEventId, isouter=True) + join_structure = join(join_structure, l, ce.LocalityID == l.localityId, isouter=True) + join_structure = join(join_structure, p, or_( + c.PaleoContextID == p.paleoContextId, + ce.PaleoContextID == p.paleoContextId, + l.PaleoContextID == p.paleoContextId + ), isouter=True) + join_structure = join(join_structure, cs, p.ChronosStratID == cs.geologicTimePeriodId, isouter=True) + join_structure = join(join_structure, csend, p.ChronosStratEndID == csend.geologicTimePeriodId, isouter=True) + + paleo_sel = select([ + c.collectionObjectId.label("coid"), + paleo_start_expr.label("startperiod"), + paleo_end_expr.label("endperiod") + ]).select_from(join_structure).where( + and_( + p.paleoContextId != None, + cs.startPeriod != None, + cs.endPeriod != None, + cs.startPeriod >= cs.endPeriod, + or_( + p.ChronosStratEndID == None, + and_(csend.startPeriod != None, csend.endPeriod != None, csend.startPeriod >= csend.endPeriod) + ), + paleo_start_expr <= start_time, + paleo_end_expr >= end_time + ) + ).distinct() + + # Union the three subqueries and aggregate. + union_subq = union_all(abs_sel, rel_sel, paleo_sel).alias("unioned") + agg_subq = select([ + union_subq.c.coid, + func.min(union_subq.c.endperiod).label("min_end_period"), + func.max(union_subq.c.startperiod).label("max_start_period") + ]).group_by(union_subq.c.coid).alias("agg_subq") + + # Build the formatted "age" column expression. + age_expr = func.concat_ws( + " - ", + func.ifnull(func.regexp_replace(cast(agg_subq.c.max_start_period, String), "\\.(0+)$", ""), ""), + func.ifnull(func.regexp_replace(cast(agg_subq.c.min_end_period, String), "\\.(0+)$", ""), "") + ).label("age") + + # Modify the incoming query by joining the aggregated subquery. + base_entity = query.column_descriptions[0]["entity"] # The base entity is CollectionObject + new_query = query.join(agg_subq, base_entity.collectionObjectId == agg_subq.c.coid) + new_query = new_query.add_columns(age_expr) + return new_query + +def query_co_ids_in_time_period(query, time_period_name: str, require_full_overlap: bool = False) -> Set[int]: + """ + Query for collection object IDs that overlap with the given geologic time period. + + :param time_period_name: The name of the time period. + :param require_full_overlap: If True, only collections that fully overlap with the range are returned. + :return: A set of collection object IDs. + """ + time_period = Geologictimeperiod.objects.filter(name=time_period_name).first() + if not time_period: + return set() + start_time = time_period.startperiod + end_time = time_period.endperiod + if start_time is None: + start_time = 13800 + if end_time is None: + end_time = 0 + return modify_query_add_age_range(query, start_time, end_time, require_full_overlap) diff --git a/specifyweb/stored_queries/queryfieldspec.py b/specifyweb/stored_queries/queryfieldspec.py index feab5828fc4..c9c9c1e779e 100644 --- a/specifyweb/stored_queries/queryfieldspec.py +++ b/specifyweb/stored_queries/queryfieldspec.py @@ -5,10 +5,12 @@ from typing import NamedTuple, Optional, Tuple from sqlalchemy import sql +from sqlalchemy.orm.query import Query from specifyweb.specify.load_datamodel import Field, Table from specifyweb.specify.models import datamodel from specifyweb.specify.uiformatters import get_uiformatter +from specifyweb.stored_queries.query_construct import QueryConstruct # from specifyweb.specify.geo_time import query_co_in_time_range from . import models from .query_ops import QueryOps @@ -228,6 +230,10 @@ def apply_filter(self, query, orm_field, field, table, value=None, op_num=None, # new_query = op(orm_field, value, query, is_strict=strict) # query = query._replace(query=new_query) # f = None + if isinstance(f, Query): + query = query._replace(query=f) + query = query.reset_joinpoint() + return query, None, None else: f = op(orm_field, value) predicate = sql.not_(f) if negate else f From 0de54fa2208254ff42de62f6167d611155941e5d Mon Sep 17 00:00:00 2001 From: alec_dev Date: Fri, 7 Feb 2025 09:04:21 -0600 Subject: [PATCH 03/14] forgot to add this file in last commit --- specifyweb/stored_queries/execution.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/specifyweb/stored_queries/execution.py b/specifyweb/stored_queries/execution.py index de94d1048ef..95368c42b8c 100644 --- a/specifyweb/stored_queries/execution.py +++ b/specifyweb/stored_queries/execution.py @@ -631,6 +631,8 @@ def build_query(session, collection, user, tableid, field_specs, sort_type = SORT_TYPES[fs.sort_type] query, field, predicate = fs.add_to_query(query, formatauditobjs=formatauditobjs) + if field is None: + continue if fs.display: formatted_field = query.objectformatter.fieldformat(fs, field) query = query.add_columns(formatted_field) From 002cf5abf8a34080a699533a5b2cd3011e2a6671 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Fri, 7 Feb 2025 09:10:11 -0600 Subject: [PATCH 04/14] this file got left out too --- specifyweb/stored_queries/query_ops.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/specifyweb/stored_queries/query_ops.py b/specifyweb/stored_queries/query_ops.py index 0d299e1a777..f26ff822c5a 100644 --- a/specifyweb/stored_queries/query_ops.py +++ b/specifyweb/stored_queries/query_ops.py @@ -2,7 +2,16 @@ import re import sqlalchemy -from specifyweb.specify.geo_time import search_co_ids_in_time_range, query_co_in_time_range, query_co_in_time_range_with_joins, search_co_ids_in_time_period +from specifyweb.specify.geo_time import ( + modify_query_add_age_range, + query_co_ids_in_time_period, + search_co_ids_in_time_range, + query_co_in_time_range, + query_co_in_time_range_with_joins, + search_co_ids_in_time_period, + search_co_ids_in_time_range_mysql, + search_co_ids_in_time_range_mysql_with_age_range, +) from specifyweb.specify.uiformatters import CNNField, FormatMismatch @@ -118,6 +127,8 @@ def op_age_range_set(self, field, value, is_strict=False): values = [self.format(v.strip()) for v in value.split(',')[:2]] start_time, end_time = float(values[0]), float(values[1]) co_ids = search_co_ids_in_time_range(start_time, end_time, require_full_overlap=is_strict) + # co_ids = search_co_ids_in_time_range_mysql(start_time, end_time, require_full_overlap=is_strict) + # co_ids = search_co_ids_in_time_range_mysql_2(start_time, end_time, require_full_overlap=is_strict)[:][0] return field.in_(co_ids) def op_age_range_query(self, field, value, query, is_strict=False): @@ -128,14 +139,16 @@ def op_age_range_query(self, field, value, query, is_strict=False): def op_age_range_query_joins(self, field, value, query, is_strict=False): values = [self.format(v.strip()) for v in value.split(',')[:2]] start_time, end_time = float(values[0]), float(values[1]) - return query_co_in_time_range_with_joins(query.query, start_time, end_time, session=None, require_full_overlap=is_strict) + # return query_co_in_time_range_with_joins(query.query, start_time, end_time, session=None, require_full_overlap=is_strict) + return modify_query_add_age_range(query.query, start_time, end_time, require_full_overlap=is_strict) def op_age_range(self, field, value, query, is_strict=False): # Choose implementation of age range filtering - return self.op_age_range_set(field, value, is_strict) + # return self.op_age_range_set(field, value, is_strict) # return self.op_age_range_query(field, value, query, is_strict) - # return self.op_age_range_query_joins(field, value, query=query, is_strict=is_strict) + return self.op_age_range_query_joins(field, value, query=query, is_strict=is_strict) def op_age_period(self, field, value, query, is_strict=False): time_period_name = value - return field.in_(search_co_ids_in_time_period(time_period_name, require_full_overlap=is_strict)) \ No newline at end of file + # return field.in_(search_co_ids_in_time_period(time_period_name, require_full_overlap=is_strict)) + return query_co_ids_in_time_period(query.query, time_period_name, require_full_overlap=is_strict) From f58ac8d7764a5c04a69358d3b21314d7af19ff20 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Mon, 10 Feb 2025 15:32:41 -0600 Subject: [PATCH 05/14] geo_time code cleanup --- specifyweb/specify/geo_time.py | 835 ++++++++----------------- specifyweb/stored_queries/query_ops.py | 59 +- 2 files changed, 268 insertions(+), 626 deletions(-) diff --git a/specifyweb/specify/geo_time.py b/specifyweb/specify/geo_time.py index c856c960798..563a624198c 100644 --- a/specifyweb/specify/geo_time.py +++ b/specifyweb/specify/geo_time.py @@ -1,4 +1,5 @@ import logging +import os from typing import List, Set from django.db import connection from django.db.models import Case, FloatField, F, Q, Value, When @@ -12,13 +13,22 @@ Geologictimeperiod, Collectionobject, Paleocontext, - Collectingevent, +) +from specifyweb.stored_queries.models import ( + AbsoluteAge, + RelativeAge, + GeologicTimePeriod, + CollectionObject, + PaleoContext, + CollectingEvent, Locality, ) -from specifyweb.stored_queries import models as sq_models logger = logging.getLogger(__name__) +GEO_TIME_QUERY_IMPLEMENTATION = os.getenv('GEO_TIME_QUERY_IMPLEMENTATION', 'sqlalchemy') # 'django' or 'sqlalchemy' +GEO_TIME_QUERY_SQL_TYPE = os.getenv('GEO_TIME_QUERY_SQL_TYPE', 'modify') # 'modify' or 'raw', or 'filter' + # Table paths from CollectionObject to Absoluteage or GeologicTimePeriod: # - collectionobject->absoluteage # - collectionobject->relativeage->chronostrat @@ -339,19 +349,19 @@ def query_co_in_time_range_with_joins( end_time = float(end_time) # Build the absolute age filters - absolute_start_filter = sq_models.Absoluteage.absoluteAge >= ( - start_time - sq_models.Absoluteage.ageUncertainty + absolute_start_filter = Absoluteage.absoluteAge >= ( + start_time - Absoluteage.ageUncertainty ) - absolute_end_filter = sq_models.Absoluteage.absoluteAge <= ( - end_time + sq_models.Absoluteage.ageUncertainty + absolute_end_filter = Absoluteage.absoluteAge <= ( + end_time + Absoluteage.ageUncertainty ) # Build the geologic time period filters - chrono_start_filter = sq_models.GeologicTimePeriod.startPeriod >= ( - start_time - sq_models.GeologicTimePeriod.startUncertainty + chrono_start_filter = GeologicTimePeriod.startPeriod >= ( + start_time - GeologicTimePeriod.startUncertainty ) - chrono_end_filter = sq_models.GeologicTimePeriod.endPeriod <= ( - end_time + sq_models.GeologicTimePeriod.endUncertainty + chrono_end_filter = GeologicTimePeriod.endPeriod <= ( + end_time + GeologicTimePeriod.endUncertainty ) if require_full_overlap: @@ -363,61 +373,61 @@ def query_co_in_time_range_with_joins( # AbsoluteAge query absolute_query = query.join( - sq_models.Absoluteage, - sq_models.CollectionObject.collectionObjectId == sq_models.Absoluteage.collectionObjectId, + Absoluteage, + CollectionObject.collectionObjectId == Absoluteage.collectionObjectId, ).filter(absolute_overlap_filter) # RelativeAge query chrono_query = query.join( - sq_models.RelativeAge, - sq_models.CollectionObject.collectionObjectId == sq_models.RelativeAge.collectionObjectId, + RelativeAge, + CollectionObject.collectionObjectId == RelativeAge.collectionObjectId, ).join( - sq_models.GeologicTimePeriod, - sq_models.RelativeAge.ageNameId == sq_models.GeologicTimePeriod.geologicTimePeriodId, + GeologicTimePeriod, + RelativeAge.ageNameId == GeologicTimePeriod.geologicTimePeriodId, ).filter(chrono_overlap_filter) # PaleoContext via CollectionObject paleocontext_query1 = query.join( - sq_models.PaleoContext, - sq_models.CollectionObject.paleoContextId == sq_models.PaleoContext.paleoContextId, + PaleoContext, + CollectionObject.paleoContextId == PaleoContext.paleoContextId, isouter=True, ).join( - sq_models.GeologicTimePeriod, - sq_models.PaleoContext.chronosStratId == sq_models.GeologicTimePeriod.geologicTimePeriodId, + GeologicTimePeriod, + PaleoContext.chronosStratId == GeologicTimePeriod.geologicTimePeriodId, isouter=True, ).filter(chrono_overlap_filter) # PaleoContext via CollectingEvent paleocontext_query2 = query.join( - sq_models.CollectingEvent, - sq_models.CollectionObject.collectingEventId == sq_models.CollectingEvent.collectingEventId, + CollectingEvent, + CollectionObject.collectingEventId == CollectingEvent.collectingEventId, isouter=True, ).join( - sq_models.PaleoContext, - sq_models.CollectingEvent.paleoContextId == sq_models.PaleoContext.paleoContextId, + PaleoContext, + CollectingEvent.paleoContextId == PaleoContext.paleoContextId, isouter=True, ).join( - sq_models.GeologicTimePeriod, - sq_models.PaleoContext.chronosStratId == sq_models.GeologicTimePeriod.geologicTimePeriodId, + GeologicTimePeriod, + PaleoContext.chronosStratId == GeologicTimePeriod.geologicTimePeriodId, isouter=True, ).filter(chrono_overlap_filter) # PaleoContext via CollectingEvent's Locality paleocontext_query3 = query.join( - sq_models.CollectingEvent, - sq_models.CollectionObject.collectingEventId == sq_models.CollectingEvent.collectingEventId, + CollectingEvent, + CollectionObject.collectingEventId == CollectingEvent.collectingEventId, isouter=True, ).join( - sq_models.Locality, - sq_models.CollectingEvent.localityId == sq_models.Locality.localityId, + Locality, + CollectingEvent.localityId == Locality.localityId, isouter=True, ).join( - sq_models.PaleoContext, - sq_models.Locality.paleoContextId == sq_models.PaleoContext.paleoContextId, + PaleoContext, + Locality.paleoContextId == PaleoContext.paleoContextId, isouter=True, ).join( - sq_models.GeologicTimePeriod, - sq_models.PaleoContext.chronosStratId == sq_models.GeologicTimePeriod.geologicTimePeriodId, + GeologicTimePeriod, + PaleoContext.chronosStratId == GeologicTimePeriod.geologicTimePeriodId, isouter=True, ).filter(chrono_overlap_filter) @@ -443,9 +453,7 @@ def query_co_in_time_margin( end_time = time - uncertainty return query_co_in_time_range_with_joins(query, start_time, end_time, require_full_overlap) -def query_co_in_time_period( - query, time_period_name: str, require_full_overlap: bool = False -): +def query_co_in_time_period(query, time_period_name: str, require_full_overlap: bool = False): """ Modify the given SQLAlchemy query to include filters that select collection objects overlapping with the given geologic time period. @@ -456,7 +464,7 @@ def query_co_in_time_period( :return: A new query with the additional filters applied. """ time_period = ( - sq_models.GeologicTimePeriod.query.filter_by(name=time_period_name).first() + GeologicTimePeriod.query.filter_by(name=time_period_name).first() ) if not time_period: return query.filter(False) # Returns an empty query @@ -465,517 +473,132 @@ def query_co_in_time_period( end_time = time_period.endPeriod return query_co_in_time_range_with_joins(query, start_time, end_time, require_full_overlap) -def query_co_in_time_range(query, start_time, end_time, require_full_overlap=False, session=None): - """ - Filter the given SQLAlchemy query of CollectionObject to include only those that overlap with the given time range. - - :param query: An SQLAlchemy query on CollectionObject. - :param start_time: The start time of the range. - :param end_time: The end time of the range. - :param require_full_overlap: If True, only collections that fully overlap with the range are returned, otherwise partial overlap is used. - :param session: The SQLAlchemy session. - :return: A filtered SQLAlchemy query. - """ - - # Validate time range - if start_time > end_time: - raise ValueError("start_time must be less than or equal to end_time") - - # Build filters for Absoluteage - absolute_start_filter = Absoluteage.absoluteage >= (start_time + Absoluteage.ageuncertainty) - absolute_end_filter = Absoluteage.absoluteage <= (end_time - Absoluteage.ageuncertainty) - - if require_full_overlap: - absolute_overlap_filter = and_(absolute_start_filter, absolute_end_filter) - else: - absolute_overlap_filter = or_(absolute_start_filter, absolute_end_filter) - - # Query Absoluteage to get collectionobject_ids - absolute_co_ids_subquery = ( - session.query(Absoluteage.collectionobject_id) - .filter(absolute_overlap_filter) - ).subquery() - - # Build filters for Geologictimeperiod - chrono_start_filter = Geologictimeperiod.startperiod >= (start_time + Geologictimeperiod.startuncertainty) - chrono_end_filter = Geologictimeperiod.endperiod <= (end_time - Geologictimeperiod.enduncertainty) - - if require_full_overlap: - chrono_overlap_filter = and_(chrono_start_filter, chrono_end_filter) - else: - chrono_overlap_filter = or_(chrono_start_filter, chrono_end_filter) - - # Get collectionobject_ids via Agename - # Assuming Agename has a relationship to CollectionObject - relative_agename_co_ids_subquery = ( - session.query(sq_models.Agename.collectionobject_id) - .join(Geologictimeperiod, sq_models.Agename.geologictimeperiod_id == Geologictimeperiod.id) - .filter(chrono_overlap_filter) - ).subquery() - - # Get collectionobject_ids via Agenameend - relative_agenameend_co_ids_subquery = ( - session.query(sq_models.Agenameend.collectionobject_id) - .join(Geologictimeperiod, sq_models.Agenameend.geologictimeperiod_id == Geologictimeperiod.id) - .filter(chrono_overlap_filter) - ).subquery() - - # Union of the two - relative_age_co_ids_subquery = ( - session.query(relative_agename_co_ids_subquery.c.collectionobject_id) - .union( - session.query(relative_agenameend_co_ids_subquery.c.collectionobject_id) - ) - ).subquery() - - # Build filters for Paleocontext - paleocontext_start_filter = or_( - Paleocontext.startperiod >= (start_time + Paleocontext.startuncertainty), - sq_models.Paleocontextend.startperiod >= (start_time + sq_models.Paleocontextend.startuncertainty) - ) - - paleocontext_end_filter = or_( - Paleocontext.endperiod <= (end_time - Paleocontext.enduncertainty), - sq_models.Paleocontextend.endperiod <= (end_time - sq_models.Paleocontextend.enduncertainty) - ) - - if require_full_overlap: - paleocontext_overlap_filter = and_(paleocontext_start_filter, paleocontext_end_filter) - else: - paleocontext_overlap_filter = or_(paleocontext_start_filter, paleocontext_end_filter) - - # Get matching Paleocontext IDs - matching_paleocontext_ids_subquery = ( - session.query(Paleocontext.id) - .filter(paleocontext_overlap_filter) - ).subquery() - - # Get collectionobject IDs where Paleocontext matches - paleocontext_co_ids_subquery = ( - session.query(sq_models.CollectionObject.id) - .outerjoin(sq_models.CollectionObject.paleocontext) - .outerjoin(sq_models.CollectionObject.collectingevent) - .outerjoin(sq_models.Collectingevent.paleocontext) - .outerjoin(sq_models.Collectingevent.locality) - .outerjoin(sq_models.Locality.paleocontext) - .filter( - or_( - sq_models.CollectionObject.paleocontext_id.in_(matching_paleocontext_ids_subquery), - sq_models.Collectingevent.paleocontext_id.in_(matching_paleocontext_ids_subquery), - sq_models.Locality.paleocontext_id.in_(matching_paleocontext_ids_subquery) - ) - ) - ).subquery() - - # Union all collectionobject IDs - total_co_ids_subquery = ( - session.query(absolute_co_ids_subquery.c.collectionobject_id) - .union( - session.query(relative_age_co_ids_subquery.c.collectionobject_id), - session.query(paleocontext_co_ids_subquery.c.id) - ) - ).subquery() - - # Filter the original query - filtered_query = query.filter(sq_models.CollectionObject.id.in_( - session.query(total_co_ids_subquery.c.collectionobject_id) - )) - - return filtered_query - -def search_co_ids_in_time_range_mysql( - start_time: float, end_time: float, require_full_overlap: bool = False -) -> set: - """ - Returns the collection object IDs that overlap the given time range by executing - a single MySQL query which unions three subqueries (absolute, relative, and paleocontext ages). - """ - - # For the relative-age subquery, we need to compute the “adjusted time” - # conditions. The Django code uses different annotations for full vs. - # partial overlap. In MySQL we can “inline” these computations. - if require_full_overlap: - rel_time_condition = f"""( - IF(r.agenameendid IS NOT NULL, - GREATEST( - CAST(a.startperiod AS DECIMAL(10,6)) - COALESCE(a.startuncertainty, 0) - COALESCE(r.ageuncertainty, 0), - CAST(aend.startperiod AS DECIMAL(10,6)) - COALESCE(aend.startuncertainty, 0) - COALESCE(r.ageuncertainty, 0) - ), - CAST(a.startperiod AS DECIMAL(10,6)) - COALESCE(a.startuncertainty, 0) - COALESCE(r.ageuncertainty, 0) - ) <= {start_time} - AND - IF(r.agenameendid IS NOT NULL, - LEAST( - CAST(a.endperiod AS DECIMAL(10,6)) + COALESCE(a.enduncertainty, 0) + COALESCE(r.ageuncertainty, 0), - CAST(aend.endperiod AS DECIMAL(10,6)) + COALESCE(aend.enduncertainty, 0) + COALESCE(r.ageuncertainty, 0) - ), - CAST(a.endperiod AS DECIMAL(10,6)) + COALESCE(a.enduncertainty, 0) + COALESCE(r.ageuncertainty, 0) - ) >= {end_time} -)""" - - paleo_time_condition = f"""( - IF(p.chronosstratendid IS NOT NULL, - GREATEST( - CAST(cs.startperiod AS DECIMAL(10,6)) - COALESCE(cs.startuncertainty, 0), - CAST(csend.startperiod AS DECIMAL(10,6)) - COALESCE(csend.startuncertainty, 0) - ), - CAST(cs.startperiod AS DECIMAL(10,6)) - COALESCE(cs.startuncertainty, 0) - ) <= {start_time} - AND - IF(p.chronosstratendid IS NOT NULL, - LEAST( - CAST(cs.endperiod AS DECIMAL(10,6)) + COALESCE(cs.enduncertainty, 0), - CAST(csend.endperiod AS DECIMAL(10,6)) + COALESCE(csend.enduncertainty, 0) - ), - CAST(cs.endperiod AS DECIMAL(10,6)) + COALESCE(cs.enduncertainty, 0) - ) >= {end_time} -)""" - else: - rel_time_condition = f"""( - IF(r.agenameendid IS NOT NULL, - LEAST( - CAST(a.endperiod AS DECIMAL(10,6)) - COALESCE(a.enduncertainty, 0) - COALESCE(r.ageuncertainty, 0), - CAST(aend.endperiod AS DECIMAL(10,6)) - COALESCE(aend.enduncertainty, 0) - COALESCE(r.ageuncertainty, 0) - ), - CAST(a.endperiod AS DECIMAL(10,6)) - COALESCE(a.enduncertainty, 0) - COALESCE(r.ageuncertainty, 0) - ) <= {start_time} - AND - IF(r.agenameendid IS NOT NULL, - GREATEST( - CAST(a.startperiod AS DECIMAL(10,6)) + COALESCE(a.startuncertainty, 0) + COALESCE(r.ageuncertainty, 0), - CAST(aend.startperiod AS DECIMAL(10,6)) + COALESCE(aend.startuncertainty, 0) + COALESCE(r.ageuncertainty, 0) - ), - CAST(a.startperiod AS DECIMAL(10,6)) + COALESCE(a.startuncertainty, 0) + COALESCE(r.ageuncertainty, 0) - ) >= {end_time} -)""" - - paleo_start_time = f""" - IF(p.chronosstratendid IS NOT NULL, - LEAST( - CAST(cs.endperiod AS DECIMAL(10,6)) - COALESCE(cs.enduncertainty, 0), - CAST(csend.endperiod AS DECIMAL(10,6)) - COALESCE(csend.enduncertainty, 0) - ), - CAST(cs.endperiod AS DECIMAL(10,6)) - COALESCE(cs.enduncertainty, 0) - ) -""" - paleo_end_time = f""" - IF(p.chronosstratendid IS NOT NULL, - GREATEST( - CAST(cs.startperiod AS DECIMAL(10,6)) + COALESCE(cs.startuncertainty, 0), - CAST(csend.startperiod AS DECIMAL(10,6)) + COALESCE(csend.startuncertainty, 0) - ), - CAST(cs.startperiod AS DECIMAL(10,6)) + COALESCE(cs.startuncertainty, 0) - ) -""" - paleo_start_time_condition = f"{paleo_start_time} <= {start_time}" - paleo_end_time_condition = f"{paleo_end_time} <= {end_time}" - paleo_time_condition = f"""( - {paleo_start_time_condition} - AND - {paleo_end_time_condition} -)""" - - # Build the complete union query - co_id_query = f""" - SELECT DISTINCT coid FROM ( - -- Absolute ages subquery: - SELECT collectionobjectid AS coid - FROM absoluteage - WHERE (CAST(absoluteage AS DECIMAL(10,6)) - COALESCE(ageuncertainty, 0)) <= {start_time} - AND (CAST(absoluteage AS DECIMAL(10,6)) + COALESCE(ageuncertainty, 0)) >= {end_time} - - UNION - - -- Relative ages subquery: - SELECT r.collectionobjectid AS coid - FROM relativeage r - JOIN geologictimeperiod a ON r.agenameid = a.geologictimeperiodid - LEFT JOIN geologictimeperiod aend ON r.agenameendid = aend.geologictimeperiodid - WHERE a.startperiod IS NOT NULL - AND a.endperiod IS NOT NULL - AND a.startperiod >= a.endperiod - -- Validity condition for agenameend (if present) - AND (r.agenameendid IS NULL OR (aend.startperiod IS NOT NULL AND aend.endperiod IS NOT NULL AND aend.startperiod >= aend.endperiod)) - AND {rel_time_condition} - - UNION - - -- Paleocontext subquery: fetch collectionobject IDs where either - -- the collectionobject, its collecting event, or its locality links to a paleocontext - -- meeting the required conditions. - SELECT c.collectionobjectid AS coid - FROM collectionobject c - LEFT JOIN collectingevent ce ON c.collectingeventid = ce.collectingeventid - LEFT JOIN locality l ON ce.localityid = l.localityid - WHERE c.paleocontextid IN ( - SELECT p.paleocontextid - FROM paleocontext p - JOIN geologictimeperiod cs ON p.chronosstratid = cs.geologictimeperiodid - LEFT JOIN geologictimeperiod csend ON p.chronosstratendid = csend.geologictimeperiodid - WHERE cs.startperiod IS NOT NULL - AND cs.endperiod IS NOT NULL - AND cs.startperiod >= cs.endperiod - AND (p.chronosstratendid IS NULL OR (csend.startperiod IS NOT NULL AND csend.endperiod IS NOT NULL AND csend.startperiod >= csend.endperiod)) - AND {paleo_time_condition} - ) - OR ce.paleocontextid IN ( - SELECT p.paleocontextid - FROM paleocontext p - JOIN geologictimeperiod cs ON p.chronosstratid = cs.geologictimeperiodid - LEFT JOIN geologictimeperiod csend ON p.chronosstratendid = csend.geologictimeperiodid - WHERE cs.startperiod IS NOT NULL - AND cs.endperiod IS NOT NULL - AND cs.startperiod >= cs.endperiod - AND (p.chronosstratendid IS NULL OR (csend.startperiod IS NOT NULL AND csend.endperiod IS NOT NULL AND csend.startperiod >= csend.endperiod)) - AND {paleo_time_condition} - ) - OR l.paleocontextid IN ( - SELECT p.paleocontextid - FROM paleocontext p - JOIN geologictimeperiod cs ON p.chronosstratid = cs.geologictimeperiodid - LEFT JOIN geologictimeperiod csend ON p.chronosstratendid = csend.geologictimeperiodid - WHERE cs.startperiod IS NOT NULL - AND cs.endperiod IS NOT NULL - AND cs.startperiod >= cs.endperiod - AND (p.chronosstratendid IS NULL OR (csend.startperiod IS NOT NULL AND csend.endperiod IS NOT NULL AND csend.startperiod >= csend.endperiod)) - AND {paleo_time_condition} - ) - ) AS unioned; - """ - - # print(co_id_query) - with connection.cursor() as cursor: - cursor.execute(co_id_query) - rows = cursor.fetchall() - co_ids = {row[0] for row in rows if row[0] is not None} - return co_ids - -def search_co_ids_in_time_range_mysql_with_age_range( - start_time: float, end_time: float, require_full_overlap: bool = False -) -> list: - """ - Returns a list of tuples (coid, min_end_period, max_start_period) for collection objects - that overlap the given time range by executing a single MySQL query which unions three subqueries: - - Absolute ages, - - Relative ages, and - - Paleocontext ages. - """ - - # Build filtering conditions - if require_full_overlap: - # Relative ages query expressions - rel_start_expr = ( - "IF(r.agenameendid IS NOT NULL, " - " GREATEST( " - " CAST(a.startperiod AS DECIMAL(10,6)) - COALESCE(a.startuncertainty, 0) - COALESCE(r.ageuncertainty, 0), " - " CAST(aend.startperiod AS DECIMAL(10,6)) - COALESCE(aend.startuncertainty, 0) - COALESCE(r.ageuncertainty, 0) " - " ), " - " CAST(a.startperiod AS DECIMAL(10,6)) - COALESCE(a.startuncertainty, 0) - COALESCE(r.ageuncertainty, 0) " - ")" - ) - rel_end_expr = ( - "IF(r.agenameendid IS NOT NULL, " - " LEAST( " - " CAST(a.endperiod AS DECIMAL(10,6)) + COALESCE(a.enduncertainty, 0) + COALESCE(r.ageuncertainty, 0), " - " CAST(aend.endperiod AS DECIMAL(10,6)) + COALESCE(aend.enduncertainty, 0) + COALESCE(r.ageuncertainty, 0) " - " ), " - " CAST(a.endperiod AS DECIMAL(10,6)) + COALESCE(a.enduncertainty, 0) + COALESCE(r.ageuncertainty, 0) " - ")" - ) - rel_start_time_condition = f"{rel_start_expr} <= {start_time}" - rel_end_time_condition = f"{rel_end_expr} >= {end_time}" - rel_time_condition = f"({rel_start_time_condition} AND {rel_end_time_condition})" - - # Paleocontext query expressions - paleo_start_expr = ( - "IF(p.chronosstratendid IS NOT NULL, " - " GREATEST( " - " CAST(cs.startperiod AS DECIMAL(10,6)) - COALESCE(cs.startuncertainty, 0), " - " CAST(csend.startperiod AS DECIMAL(10,6)) - COALESCE(csend.startuncertainty, 0) " - " ), " - " CAST(cs.startperiod AS DECIMAL(10,6)) - COALESCE(cs.startuncertainty, 0) " - ")" - ) - paleo_end_expr = ( - "IF(p.chronosstratendid IS NOT NULL, " - " LEAST( " - " CAST(cs.endperiod AS DECIMAL(10,6)) + COALESCE(cs.enduncertainty, 0), " - " CAST(csend.endperiod AS DECIMAL(10,6)) + COALESCE(csend.enduncertainty, 0) " - " ), " - " CAST(cs.endperiod AS DECIMAL(10,6)) + COALESCE(cs.enduncertainty, 0) " - ")" - ) - paleo_start_time_condition = f"{paleo_start_expr} <= {start_time}" - paleo_end_time_condition = f"{paleo_end_expr} >= {end_time}" - paleo_time_condition = f"({paleo_start_time_condition} AND {paleo_end_time_condition})" - else: - # Relative ages: alternate expressions for partial overlap. - rel_start_expr = ( - "IF(r.agenameendid IS NOT NULL, " - " GREATEST( " - " CAST(a.startperiod AS DECIMAL(10,6)) + COALESCE(a.startuncertainty, 0) + COALESCE(r.ageuncertainty, 0), " - " CAST(aend.startperiod AS DECIMAL(10,6)) + COALESCE(aend.startuncertainty, 0) + COALESCE(r.ageuncertainty, 0) " - " ), " - " CAST(a.startperiod AS DECIMAL(10,6)) + COALESCE(a.startuncertainty, 0) + COALESCE(r.ageuncertainty, 0) " - ")" - ) - rel_end_expr = ( - "IF(r.agenameendid IS NOT NULL, " - " LEAST( " - " CAST(a.endperiod AS DECIMAL(10,6)) - COALESCE(a.enduncertainty, 0) - COALESCE(r.ageuncertainty, 0), " - " CAST(aend.endperiod AS DECIMAL(10,6)) - COALESCE(aend.enduncertainty, 0) - COALESCE(r.ageuncertainty, 0) " - " ), " - " CAST(a.endperiod AS DECIMAL(10,6)) - COALESCE(a.enduncertainty, 0) - COALESCE(r.ageuncertainty, 0) " - ")" - ) - rel_start_time_condition = f"{rel_start_expr} <= {start_time}" - rel_end_time_condition = f"{rel_end_expr} >= {end_time}" - rel_time_condition = f"({rel_start_time_condition} AND {rel_end_time_condition})" - - # Paleocontext: alternate expressions. - paleo_start_expr = ( - "IF(p.chronosstratendid IS NOT NULL, " - " LEAST( " - " CAST(cs.startperiod AS DECIMAL(10,6)) + COALESCE(cs.startuncertainty, 0), " - " CAST(csend.startperiod AS DECIMAL(10,6)) + COALESCE(csend.startuncertainty, 0) " - " ), " - " CAST(cs.startperiod AS DECIMAL(10,6)) + COALESCE(cs.startuncertainty, 0) " - ")" - ) - paleo_end_expr = ( - "IF(p.chronosstratendid IS NOT NULL, " - " GREATEST( " - " CAST(cs.endperiod AS DECIMAL(10,6)) - COALESCE(cs.enduncertainty, 0), " - " CAST(csend.endperiod AS DECIMAL(10,6)) - COALESCE(csend.enduncertainty, 0) " - " ), " - " CAST(cs.endperiod AS DECIMAL(10,6)) - COALESCE(cs.enduncertainty, 0) " - ")" - ) - paleo_start_time_condition = f"{paleo_start_expr} <= {start_time}" - paleo_end_time_condition = f"{paleo_end_expr} >= {end_time}" - paleo_time_condition = f"({paleo_start_time_condition} AND {paleo_end_time_condition})" - - # Build the complete union query. - co_id_query = ( - "SELECT coid, " - "-- MIN(endperiod) AS min_end_period, " - "-- MAX(startperiod) AS max_start_period " - "FROM (" - "SELECT collectionobjectid AS coid, " - "CAST(absoluteage AS DECIMAL(10,6)) - COALESCE(ageuncertainty, 0) AS startperiod, " - "CAST(absoluteage AS DECIMAL(10,6)) + COALESCE(ageuncertainty, 0) AS endperiod " - "FROM absoluteage " - f"WHERE (CAST(absoluteage AS DECIMAL(10,6)) - COALESCE(ageuncertainty, 0)) <= {start_time} " - f"AND (CAST(absoluteage AS DECIMAL(10,6)) + COALESCE(ageuncertainty, 0)) >= {end_time} " - "UNION " - "SELECT r.collectionobjectid AS coid, " - f"{rel_start_expr} AS startperiod, " - f"{rel_end_expr} AS endperiod " - "FROM relativeage r " - "JOIN geologictimeperiod a ON r.agenameid = a.geologictimeperiodid " - "LEFT JOIN geologictimeperiod aend ON r.agenameendid = aend.geologictimeperiodid " - "WHERE a.startperiod IS NOT NULL " - "AND a.endperiod IS NOT NULL " - "AND a.startperiod >= a.endperiod " - "AND (r.agenameendid IS NULL OR (aend.startperiod IS NOT NULL AND aend.endperiod IS NOT NULL AND aend.startperiod >= aend.endperiod)) " - f"AND {rel_time_condition} " - "UNION " - "SELECT DISTINCT c.collectionobjectid AS coid, " - f"{paleo_start_expr} AS startperiod, " - f"{paleo_end_expr} AS endperiod " - "FROM collectionobject c " - "LEFT JOIN collectingevent ce ON c.collectingeventid = ce.collectingeventid " - "LEFT JOIN locality l ON ce.localityid = l.localityid " - "LEFT JOIN paleocontext p ON (c.paleocontextid = p.paleocontextid OR ce.paleocontextid = p.paleocontextid OR l.paleocontextid = p.paleocontextid) " - "LEFT JOIN geologictimeperiod cs ON p.chronosstratid = cs.geologictimeperiodid " - "LEFT JOIN geologictimeperiod csend ON p.chronosstratendid = csend.geologictimeperiodid " - "WHERE p.paleocontextid IS NOT NULL " - "AND cs.startperiod IS NOT NULL " - "AND cs.endperiod IS NOT NULL " - "AND cs.startperiod >= cs.endperiod " - "AND (p.chronosstratendid IS NULL OR (csend.startperiod IS NOT NULL AND csend.endperiod IS NOT NULL AND csend.startperiod >= csend.endperiod)) " - f"AND {paleo_time_condition} " - ") AS unioned " - "GROUP BY coid;" - ) - - logger.debug(co_id_query) - - with connection.cursor() as cursor: - cursor.execute(co_id_query) - rows = cursor.fetchall() - return rows - - def modify_query_add_age_range(query, start_time: float, end_time: float, require_full_overlap: bool = False): """ - Given an existing SQLAlchemy query whose base entity is Collectionobject, + Given an existing SQLAlchemy query whose base entity is CollectionObject, this function adds an inner join to an aggregated subquery that computes, for each collection object (by its CollectionObjectID), the minimum end period and - maximum start period (aggregated from three sources: AbsoluteAge, RelativeAge, and Paleocontext). + maximum start period (aggregated from three sources: AbsoluteAge, RelativeAge, and PaleoContext). """ - AbsoluteAge = sq_models.AbsoluteAge - RelativeAge = sq_models.RelativeAge - GeologicTimePeriod = sq_models.GeologicTimePeriod - Paleocontext = sq_models.PaleoContext - Collectingevent = sq_models.CollectingEvent - Locality = sq_models.Locality - Collectionobject = sq_models.CollectionObject + # Helper functions to build SQL expressions for the relative and paleo subqueries + def build_relative_expr(is_start, a, aend, r): + """ + Build the start (if is_start is True) or end expression for the RelativeAge subquery. + When require_full_overlap is True, the expression uses uncertainty adjustments in one direction, + and when False, in the opposite direction. + """ + if require_full_overlap: + if is_start: + base_expr = ( + cast(a.startPeriod, DECIMAL(10, 6)) + - func.coalesce(a.startUncertainty, 0) + - func.coalesce(r.ageUncertainty, 0) + ) + alt_expr = ( + cast(aend.startPeriod, DECIMAL(10, 6)) + - func.coalesce(aend.startUncertainty, 0) + - func.coalesce(r.ageUncertainty, 0) + ) + expr_func = func.greatest + else: + base_expr = ( + cast(a.endPeriod, DECIMAL(10, 6)) + + func.coalesce(a.endUncertainty, 0) + + func.coalesce(r.ageUncertainty, 0) + ) + alt_expr = ( + cast(aend.endPeriod, DECIMAL(10, 6)) + + func.coalesce(aend.endUncertainty, 0) + + func.coalesce(r.ageUncertainty, 0) + ) + expr_func = func.least + else: + if is_start: + base_expr = ( + cast(a.startPeriod, DECIMAL(10, 6)) + + func.coalesce(a.startUncertainty, 0) + + func.coalesce(r.ageUncertainty, 0) + ) + alt_expr = ( + cast(aend.startPeriod, DECIMAL(10, 6)) + + func.coalesce(aend.startUncertainty, 0) + + func.coalesce(r.ageUncertainty, 0) + ) + expr_func = func.greatest + else: + base_expr = ( + cast(a.endPeriod, DECIMAL(10, 6)) + - func.coalesce(a.endUncertainty, 0) + - func.coalesce(r.ageUncertainty, 0) + ) + alt_expr = ( + cast(aend.endPeriod, DECIMAL(10, 6)) + - func.coalesce(aend.endUncertainty, 0) + - func.coalesce(r.ageUncertainty, 0) + ) + expr_func = func.least + return case( + [(r.AgeNameEndID != None, expr_func(base_expr, alt_expr))], + else_=base_expr + ) - # Build the three subqueries. - # --- AbsoluteAge subquery --- + def build_paleo_expr(is_start, cs, csend, p): + """ + Build the start (if is_start is True) or end expression for the PaleoContext subquery. + The uncertainty adjustment and use of greatest/least depend on require_full_overlap. + """ + if require_full_overlap: + if is_start: + base_expr = cast(cs.startPeriod, DECIMAL(10, 6)) - func.coalesce(cs.startUncertainty, 0) + alt_expr = cast(csend.startPeriod, DECIMAL(10, 6)) - func.coalesce(csend.startUncertainty, 0) + expr_func = func.greatest + else: + base_expr = cast(cs.endPeriod, DECIMAL(10, 6)) + func.coalesce(cs.endUncertainty, 0) + alt_expr = cast(csend.endPeriod, DECIMAL(10, 6)) + func.coalesce(csend.endUncertainty, 0) + expr_func = func.least + else: + if is_start: + base_expr = cast(cs.startPeriod, DECIMAL(10, 6)) + func.coalesce(cs.startUncertainty, 0) + alt_expr = cast(csend.startPeriod, DECIMAL(10, 6)) + func.coalesce(csend.startUncertainty, 0) + expr_func = func.least + else: + base_expr = cast(cs.endPeriod, DECIMAL(10, 6)) - func.coalesce(cs.endUncertainty, 0) + alt_expr = cast(csend.endPeriod, DECIMAL(10, 6)) - func.coalesce(csend.endUncertainty, 0) + expr_func = func.greatest + return case([(p.ChronosStratEndID != None, expr_func(base_expr, alt_expr))], else_=base_expr) + + # Build the AbsoluteAge subquery abs_sel = select([ AbsoluteAge.CollectionObjectID.label("coid"), - (cast(AbsoluteAge.absoluteAge, DECIMAL(10,6)) - func.coalesce(AbsoluteAge.ageUncertainty, 0)).label("startperiod"), - (cast(AbsoluteAge.absoluteAge, DECIMAL(10,6)) + func.coalesce(AbsoluteAge.ageUncertainty, 0)).label("endperiod") + ( + cast(AbsoluteAge.absoluteAge, DECIMAL(10, 6)) + - func.coalesce(AbsoluteAge.ageUncertainty, 0) + ).label("startperiod"), + ( + cast(AbsoluteAge.absoluteAge, DECIMAL(10, 6)) + + func.coalesce(AbsoluteAge.ageUncertainty, 0) + ).label("endperiod") ]).where( and_( - (cast(AbsoluteAge.absoluteAge, DECIMAL(10,6)) - func.coalesce(AbsoluteAge.ageUncertainty, 0)) <= start_time, - (cast(AbsoluteAge.absoluteAge, DECIMAL(10,6)) + func.coalesce(AbsoluteAge.ageUncertainty, 0)) >= end_time + (cast(AbsoluteAge.absoluteAge, DECIMAL(10, 6)) - func.coalesce(AbsoluteAge.ageUncertainty, 0)) <= start_time, + (cast(AbsoluteAge.absoluteAge, DECIMAL(10, 6)) + func.coalesce(AbsoluteAge.ageUncertainty, 0)) >= end_time ) ) - - # --- RelativeAge subquery --- + + # Build the RelativeAge subquery r = aliased(RelativeAge, name="r") a = aliased(GeologicTimePeriod, name="a") aend = aliased(GeologicTimePeriod, name="aend") - if require_full_overlap: - rel_start_expr = case( - [(r.AgeNameEndID != None, - func.greatest( - cast(a.startPeriod, DECIMAL(10,6)) - func.coalesce(a.startUncertainty, 0) - func.coalesce(r.ageUncertainty, 0), - cast(aend.startPeriod, DECIMAL(10,6)) - func.coalesce(aend.startUncertainty, 0) - func.coalesce(r.ageUncertainty, 0) - ))], - else_= cast(a.startPeriod, DECIMAL(10,6)) - func.coalesce(a.startUncertainty, 0) - func.coalesce(r.ageUncertainty, 0) - ) - rel_end_expr = case( - [(r.AgeNameEndID != None, - func.least( - cast(a.endPeriod, DECIMAL(10,6)) + func.coalesce(a.endUncertainty, 0) + func.coalesce(r.ageUncertainty, 0), - cast(aend.endPeriod, DECIMAL(10,6)) + func.coalesce(aend.endUncertainty, 0) + func.coalesce(r.ageUncertainty, 0) - ))], - else_= cast(a.endPeriod, DECIMAL(10,6)) + func.coalesce(a.endUncertainty, 0) + func.coalesce(r.ageUncertainty, 0) - ) - else: - rel_start_expr = case( - [(r.AgeNameEndID != None, - func.greatest( - cast(a.startPeriod, DECIMAL(10,6)) + func.coalesce(a.startUncertainty, 0) + func.coalesce(r.ageUncertainty, 0), - cast(aend.startPeriod, DECIMAL(10,6)) + func.coalesce(aend.startUncertainty, 0) + func.coalesce(r.ageUncertainty, 0) - ))], - else_= cast(a.startPeriod, DECIMAL(10,6)) + func.coalesce(a.startUncertainty, 0) + func.coalesce(r.ageUncertainty, 0) - ) - rel_end_expr = case( - [(r.AgeNameEndID != None, - func.least( - cast(a.endPeriod, DECIMAL(10,6)) - func.coalesce(a.endUncertainty, 0) - func.coalesce(r.ageUncertainty, 0), - cast(aend.endPeriod, DECIMAL(10,6)) - func.coalesce(aend.endUncertainty, 0) - func.coalesce(r.ageUncertainty, 0) - ))], - else_= cast(a.endPeriod, DECIMAL(10,6)) - func.coalesce(a.endUncertainty, 0) - func.coalesce(r.ageUncertainty, 0) - ) - - rel_join = join(r, a, r.AgeNameID == a.geologicTimePeriodId).outerjoin(aend, r.AgeNameEndID == aend.geologicTimePeriodId) + + rel_start_expr = build_relative_expr(is_start=True, a=a, aend=aend, r=r) + rel_end_expr = build_relative_expr(is_start=False, a=a, aend=aend, r=r) + + rel_join = join( + r, a, r.AgeNameID == a.geologicTimePeriodId + ).outerjoin( + aend, r.AgeNameEndID == aend.geologicTimePeriodId + ) rel_sel = select([ r.CollectionObjectID.label("coid"), rel_start_expr.label("startperiod"), @@ -987,64 +610,50 @@ def modify_query_add_age_range(query, start_time: float, end_time: float, requir a.startPeriod >= a.endPeriod, or_( r.AgeNameEndID == None, - and_(aend.startPeriod != None, aend.endPeriod != None, aend.startPeriod >= aend.endPeriod) + and_( + aend.startPeriod != None, + aend.endPeriod != None, + aend.startPeriod >= aend.endPeriod + ) ), rel_start_expr <= start_time, rel_end_expr >= end_time ) ) - - # --- Paleocontext subquery --- - c = aliased(Collectionobject, name="c") - ce = aliased(Collectingevent, name="ce") + + # Build the PaleoContext subquery + c = aliased(CollectionObject, name="c") + ce = aliased(CollectingEvent, name="ce") l = aliased(Locality, name="l") - p = aliased(Paleocontext, name="p") + p = aliased(PaleoContext, name="p") cs = aliased(GeologicTimePeriod, name="cs") csend = aliased(GeologicTimePeriod, name="csend") - if require_full_overlap: - paleo_start_expr = case( - [(p.ChronosStratEndID != None, - func.greatest( - cast(cs.startPeriod, DECIMAL(10,6)) - func.coalesce(cs.startUncertainty, 0), - cast(csend.startPeriod, DECIMAL(10,6)) - func.coalesce(csend.startUncertainty, 0) - ))], - else_= cast(cs.startPeriod, DECIMAL(10,6)) - func.coalesce(cs.startUncertainty, 0) - ) - paleo_end_expr = case( - [(p.ChronosStratEndID != None, - func.least( - cast(cs.endPeriod, DECIMAL(10,6)) + func.coalesce(cs.endUncertainty, 0), - cast(csend.endPeriod, DECIMAL(10,6)) + func.coalesce(csend.endUncertainty, 0) - ))], - else_= cast(cs.endPeriod, DECIMAL(10,6)) + func.coalesce(cs.endUncertainty, 0) - ) - else: - paleo_start_expr = case( - [(p.ChronosStratEndID != None, - func.least( - cast(cs.startPeriod, DECIMAL(10,6)) + func.coalesce(cs.startUncertainty, 0), - cast(csend.startPeriod, DECIMAL(10,6)) + func.coalesce(csend.startUncertainty, 0) - ))], - else_= cast(cs.startPeriod, DECIMAL(10,6)) + func.coalesce(cs.startUncertainty, 0) - ) - paleo_end_expr = case( - [(p.ChronosStratEndID != None, - func.greatest( - cast(cs.endPeriod, DECIMAL(10,6)) - func.coalesce(cs.endUncertainty, 0), - cast(csend.endPeriod, DECIMAL(10,6)) - func.coalesce(csend.endUncertainty, 0) - ))], - else_= cast(cs.endPeriod, DECIMAL(10,6)) - func.coalesce(cs.endUncertainty, 0) - ) - - join_structure = join(c, ce, c.CollectingEventID == ce.collectingEventId, isouter=True) - join_structure = join(join_structure, l, ce.LocalityID == l.localityId, isouter=True) - join_structure = join(join_structure, p, or_( - c.PaleoContextID == p.paleoContextId, - ce.PaleoContextID == p.paleoContextId, - l.PaleoContextID == p.paleoContextId - ), isouter=True) - join_structure = join(join_structure, cs, p.ChronosStratID == cs.geologicTimePeriodId, isouter=True) - join_structure = join(join_structure, csend, p.ChronosStratEndID == csend.geologicTimePeriodId, isouter=True) + + paleo_start_expr = build_paleo_expr(is_start=True, cs=cs, csend=csend, p=p) + paleo_end_expr = build_paleo_expr(is_start=False, cs=cs, csend=csend, p=p) + + join_structure = join( + c, ce, c.CollectingEventID == ce.collectingEventId, isouter=True + ) + join_structure = join( + join_structure, l, ce.LocalityID == l.localityId, isouter=True + ) + join_structure = join( + join_structure, + p, + or_( + c.PaleoContextID == p.paleoContextId, + ce.PaleoContextID == p.paleoContextId, + l.PaleoContextID == p.paleoContextId + ), + isouter=True + ) + join_structure = join( + join_structure, cs, p.ChronosStratID == cs.geologicTimePeriodId, isouter=True + ) + join_structure = join( + join_structure, csend, p.ChronosStratEndID == csend.geologicTimePeriodId, isouter=True + ) paleo_sel = select([ c.collectionObjectId.label("coid"), @@ -1058,35 +667,39 @@ def modify_query_add_age_range(query, start_time: float, end_time: float, requir cs.startPeriod >= cs.endPeriod, or_( p.ChronosStratEndID == None, - and_(csend.startPeriod != None, csend.endPeriod != None, csend.startPeriod >= csend.endPeriod) + and_( + csend.startPeriod != None, + csend.endPeriod != None, + csend.startPeriod >= csend.endPeriod + ) ), paleo_start_expr <= start_time, paleo_end_expr >= end_time ) ).distinct() - - # Union the three subqueries and aggregate. + + # Union the three subqueries and aggregate union_subq = union_all(abs_sel, rel_sel, paleo_sel).alias("unioned") agg_subq = select([ union_subq.c.coid, func.min(union_subq.c.endperiod).label("min_end_period"), func.max(union_subq.c.startperiod).label("max_start_period") ]).group_by(union_subq.c.coid).alias("agg_subq") - + # Build the formatted "age" column expression. age_expr = func.concat_ws( " - ", func.ifnull(func.regexp_replace(cast(agg_subq.c.max_start_period, String), "\\.(0+)$", ""), ""), func.ifnull(func.regexp_replace(cast(agg_subq.c.min_end_period, String), "\\.(0+)$", ""), "") ).label("age") - - # Modify the incoming query by joining the aggregated subquery. - base_entity = query.column_descriptions[0]["entity"] # The base entity is CollectionObject + + # Modify the incoming query by joining the aggregated subquery + base_entity = query.column_descriptions[0]["entity"] # The base entity is CollectionObject new_query = query.join(agg_subq, base_entity.collectionObjectId == agg_subq.c.coid) new_query = new_query.add_columns(age_expr) return new_query -def query_co_ids_in_time_period(query, time_period_name: str, require_full_overlap: bool = False) -> Set[int]: +def query_co_ids_in_time_period(query, time_period_name: str, require_full_overlap: bool = False): """ Query for collection object IDs that overlap with the given geologic time period. @@ -1104,3 +717,43 @@ def query_co_ids_in_time_period(query, time_period_name: str, require_full_overl if end_time is None: end_time = 0 return modify_query_add_age_range(query, start_time, end_time, require_full_overlap) + +def geo_time_query(start_time: float, end_time: float, require_full_overlap: bool = False, query = None): + """ + Search for collection object IDs that overlap with the given time range. + Based on settings, choose the appropriate implementation. + + :param start_time: The start time (older time) of the range. + :param end_time: The end time (younger time) of the range. + :param require_full_overlap: If True, only collections that fully overlap with the range are returned. + :param query: The existing SQLAlchemy query on CollectionObject. + :return: A new query with the additional filters applied. + """ + if GEO_TIME_QUERY_IMPLEMENTATION == 'django': + return search_co_ids_in_time_range(start_time, end_time, require_full_overlap) + elif GEO_TIME_QUERY_IMPLEMENTATION == 'sqlalchemy': + if GEO_TIME_QUERY_SQL_TYPE == 'modify': + return modify_query_add_age_range(query, start_time, end_time, require_full_overlap) + elif GEO_TIME_QUERY_SQL_TYPE == 'filter': + return query_co_in_time_range_with_joins(query, start_time, end_time, require_full_overlap) + +def geo_time_period_query(time_period_name: str, require_full_overlap: bool = False, query = None): + """ + Query for collection object IDs that overlap with the given geologic time period + + :param time_period_name: The name of the time period. + :param require_full_overlap: If True, only collections that fully overlap with the range are returned. + :param query: The existing SQLAlchemy query on CollectionObject. + :return: A new query with the additional filters applied. + """ + time_period = Geologictimeperiod.objects.filter(name=time_period_name).first() + if not time_period: + return set() + start_time = time_period.startperiod + end_time = time_period.endperiod + if start_time is None: + start_time = 13800 # max start time, 13800 is the age of the Universe + if end_time is None: + end_time = 0 + + return geo_time_query(start_time, end_time, require_full_overlap, query) \ No newline at end of file diff --git a/specifyweb/stored_queries/query_ops.py b/specifyweb/stored_queries/query_ops.py index f26ff822c5a..145bd10dd57 100644 --- a/specifyweb/stored_queries/query_ops.py +++ b/specifyweb/stored_queries/query_ops.py @@ -1,17 +1,19 @@ from collections import namedtuple import re import sqlalchemy - -from specifyweb.specify.geo_time import ( - modify_query_add_age_range, - query_co_ids_in_time_period, - search_co_ids_in_time_range, - query_co_in_time_range, - query_co_in_time_range_with_joins, - search_co_ids_in_time_period, - search_co_ids_in_time_range_mysql, - search_co_ids_in_time_range_mysql_with_age_range, -) +from sqlalchemy.orm.query import Query + +# from specifyweb.specify.geo_time import ( +# modify_query_add_age_range, +# query_co_ids_in_time_period, +# search_co_ids_in_time_range, +# query_co_in_time_range, +# query_co_in_time_range_with_joins, +# search_co_ids_in_time_period, +# # search_co_ids_in_time_range_mysql, +# search_co_ids_in_time_range_mysql_with_age_range, +# ) +from specifyweb.specify.geo_time import geo_time_query, geo_time_period_query from specifyweb.specify.uiformatters import CNNField, FormatMismatch @@ -123,32 +125,19 @@ def op_startswith(self, field, value): else: return field.like(value + "%") - def op_age_range_set(self, field, value, is_strict=False): - values = [self.format(v.strip()) for v in value.split(',')[:2]] - start_time, end_time = float(values[0]), float(values[1]) - co_ids = search_co_ids_in_time_range(start_time, end_time, require_full_overlap=is_strict) - # co_ids = search_co_ids_in_time_range_mysql(start_time, end_time, require_full_overlap=is_strict) - # co_ids = search_co_ids_in_time_range_mysql_2(start_time, end_time, require_full_overlap=is_strict)[:][0] - return field.in_(co_ids) - - def op_age_range_query(self, field, value, query, is_strict=False): - values = [self.format(v.strip()) for v in value.split(',')[:2]] - start_time, end_time = float(values[0]), float(values[1]) - return query_co_in_time_range(query.query, start_time, end_time, session=None, require_full_overlap=is_strict) - - def op_age_range_query_joins(self, field, value, query, is_strict=False): + def op_age_range(self, field, value, query, is_strict=False): values = [self.format(v.strip()) for v in value.split(',')[:2]] start_time, end_time = float(values[0]), float(values[1]) - # return query_co_in_time_range_with_joins(query.query, start_time, end_time, session=None, require_full_overlap=is_strict) - return modify_query_add_age_range(query.query, start_time, end_time, require_full_overlap=is_strict) - - def op_age_range(self, field, value, query, is_strict=False): - # Choose implementation of age range filtering - # return self.op_age_range_set(field, value, is_strict) - # return self.op_age_range_query(field, value, query, is_strict) - return self.op_age_range_query_joins(field, value, query=query, is_strict=is_strict) + geo_time_co_ids = geo_time_query(start_time, end_time, require_full_overlap=is_strict, query=query.query) + if isinstance(geo_time_co_ids, Query): + return geo_time_co_ids + else: + return field.in_(geo_time_co_ids) def op_age_period(self, field, value, query, is_strict=False): time_period_name = value - # return field.in_(search_co_ids_in_time_period(time_period_name, require_full_overlap=is_strict)) - return query_co_ids_in_time_period(query.query, time_period_name, require_full_overlap=is_strict) + geo_time_co_ids = geo_time_period_query(time_period_name, require_full_overlap=is_strict, query=query.query) + if isinstance(geo_time_co_ids, Query): + return geo_time_co_ids + else: + return field.in_(geo_time_co_ids) From 1019f6c470d66ff0b3c5a6e57483f8e44ffd6d77 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Mon, 24 Feb 2025 09:36:53 -0600 Subject: [PATCH 06/14] sql edits --- specifyweb/specify/geo_time.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/specifyweb/specify/geo_time.py b/specifyweb/specify/geo_time.py index 563a624198c..17c528817ae 100644 --- a/specifyweb/specify/geo_time.py +++ b/specifyweb/specify/geo_time.py @@ -622,51 +622,51 @@ def build_paleo_expr(is_start, cs, csend, p): ) # Build the PaleoContext subquery - c = aliased(CollectionObject, name="c") + co = aliased(CollectionObject, name="co") ce = aliased(CollectingEvent, name="ce") - l = aliased(Locality, name="l") - p = aliased(PaleoContext, name="p") + loc = aliased(Locality, name="loc") + pc = aliased(PaleoContext, name="pc") cs = aliased(GeologicTimePeriod, name="cs") csend = aliased(GeologicTimePeriod, name="csend") - paleo_start_expr = build_paleo_expr(is_start=True, cs=cs, csend=csend, p=p) - paleo_end_expr = build_paleo_expr(is_start=False, cs=cs, csend=csend, p=p) + paleo_start_expr = build_paleo_expr(is_start=True, cs=cs, csend=csend, p=pc) + paleo_end_expr = build_paleo_expr(is_start=False, cs=cs, csend=csend, p=pc) join_structure = join( - c, ce, c.CollectingEventID == ce.collectingEventId, isouter=True + co, ce, co.CollectingEventID == ce.collectingEventId, isouter=True ) join_structure = join( - join_structure, l, ce.LocalityID == l.localityId, isouter=True + join_structure, loc, ce.LocalityID == loc.localityId, isouter=True ) join_structure = join( join_structure, - p, + pc, or_( - c.PaleoContextID == p.paleoContextId, - ce.PaleoContextID == p.paleoContextId, - l.PaleoContextID == p.paleoContextId + co.PaleoContextID == pc.paleoContextId, + ce.PaleoContextID == pc.paleoContextId, + loc.PaleoContextID == pc.paleoContextId ), isouter=True ) join_structure = join( - join_structure, cs, p.ChronosStratID == cs.geologicTimePeriodId, isouter=True + join_structure, cs, pc.ChronosStratID == cs.geologicTimePeriodId, isouter=True ) join_structure = join( - join_structure, csend, p.ChronosStratEndID == csend.geologicTimePeriodId, isouter=True + join_structure, csend, pc.ChronosStratEndID == csend.geologicTimePeriodId, isouter=True ) paleo_sel = select([ - c.collectionObjectId.label("coid"), + co.collectionObjectId.label("coid"), paleo_start_expr.label("startperiod"), paleo_end_expr.label("endperiod") ]).select_from(join_structure).where( and_( - p.paleoContextId != None, + pc.paleoContextId != None, cs.startPeriod != None, cs.endPeriod != None, cs.startPeriod >= cs.endPeriod, or_( - p.ChronosStratEndID == None, + pc.ChronosStratEndID == None, and_( csend.startPeriod != None, csend.endPeriod != None, From af00e0fd656e7523ed57a630b300a612bd4a115d Mon Sep 17 00:00:00 2001 From: alec_dev Date: Mon, 24 Feb 2025 15:45:14 +0000 Subject: [PATCH 07/14] Lint code with ESLint and Prettier Triggered by 08809c66bd4ed018f4c03d2703ec7f3acfee498c on branch refs/heads/issue-6089 --- .../frontend/js_src/lib/components/DataModel/resourceApi.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/specifyweb/frontend/js_src/lib/components/DataModel/resourceApi.ts b/specifyweb/frontend/js_src/lib/components/DataModel/resourceApi.ts index 5b86e18681d..5d903228d79 100644 --- a/specifyweb/frontend/js_src/lib/components/DataModel/resourceApi.ts +++ b/specifyweb/frontend/js_src/lib/components/DataModel/resourceApi.ts @@ -69,7 +69,11 @@ function eventHandlerForToMany(related, field) { switch (event) { case 'saverequired': { this.handleChanged(); - if (related.models?.[0]?.specifyTable?.name !== 'CollectionRelationship') {this.trigger.apply(this, args)} + if ( + related.models?.[0]?.specifyTable?.name !== 'CollectionRelationship' + ) { + this.trigger.apply(this, args); + } break; } case 'change': From 066a053aa9eee6e25c841b009f1dd95000fe3a0a Mon Sep 17 00:00:00 2001 From: alec_dev Date: Mon, 24 Feb 2025 10:13:11 -0600 Subject: [PATCH 08/14] age period adjustment --- specifyweb/specify/geo_time.py | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/specifyweb/specify/geo_time.py b/specifyweb/specify/geo_time.py index 17c528817ae..d58f4943dcd 100644 --- a/specifyweb/specify/geo_time.py +++ b/specifyweb/specify/geo_time.py @@ -6,6 +6,7 @@ from django.db.models.functions import Coalesce, Greatest, Least, Cast from sqlalchemy import select, union_all, func, cast, DECIMAL, case, or_, and_, String, join from sqlalchemy.orm import aliased +from decimal import Decimal from specifyweb.specify.models import ( Absoluteage, @@ -699,25 +700,6 @@ def build_paleo_expr(is_start, cs, csend, p): new_query = new_query.add_columns(age_expr) return new_query -def query_co_ids_in_time_period(query, time_period_name: str, require_full_overlap: bool = False): - """ - Query for collection object IDs that overlap with the given geologic time period. - - :param time_period_name: The name of the time period. - :param require_full_overlap: If True, only collections that fully overlap with the range are returned. - :return: A set of collection object IDs. - """ - time_period = Geologictimeperiod.objects.filter(name=time_period_name).first() - if not time_period: - return set() - start_time = time_period.startperiod - end_time = time_period.endperiod - if start_time is None: - start_time = 13800 - if end_time is None: - end_time = 0 - return modify_query_add_age_range(query, start_time, end_time, require_full_overlap) - def geo_time_query(start_time: float, end_time: float, require_full_overlap: bool = False, query = None): """ Search for collection object IDs that overlap with the given time range. @@ -751,6 +733,9 @@ def geo_time_period_query(time_period_name: str, require_full_overlap: bool = Fa return set() start_time = time_period.startperiod end_time = time_period.endperiod + # if not require_full_overlap: + start_time += Decimal(time_period.startuncertainty) if time_period.startuncertainty else Decimal('0.1') + end_time += Decimal(time_period.enduncertainty) if time_period.enduncertainty else Decimal('0.1') if start_time is None: start_time = 13800 # max start time, 13800 is the age of the Universe if end_time is None: From ae67d78983767fe566664088e9b2b0bda9001e51 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Mon, 24 Feb 2025 12:13:53 -0600 Subject: [PATCH 09/14] sql edit --- specifyweb/specify/geo_time.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/specifyweb/specify/geo_time.py b/specifyweb/specify/geo_time.py index d58f4943dcd..fbe798578e1 100644 --- a/specifyweb/specify/geo_time.py +++ b/specifyweb/specify/geo_time.py @@ -588,33 +588,33 @@ def build_paleo_expr(is_start, cs, csend, p): ) # Build the RelativeAge subquery - r = aliased(RelativeAge, name="r") - a = aliased(GeologicTimePeriod, name="a") - aend = aliased(GeologicTimePeriod, name="aend") + ra = aliased(RelativeAge, name="ra") + csa = aliased(GeologicTimePeriod, name="csa") + csaend = aliased(GeologicTimePeriod, name="csaend") - rel_start_expr = build_relative_expr(is_start=True, a=a, aend=aend, r=r) - rel_end_expr = build_relative_expr(is_start=False, a=a, aend=aend, r=r) + rel_start_expr = build_relative_expr(is_start=True, a=csa, aend=csaend, r=ra) + rel_end_expr = build_relative_expr(is_start=False, a=csa, aend=csaend, r=ra) rel_join = join( - r, a, r.AgeNameID == a.geologicTimePeriodId + ra, csa, ra.AgeNameID == csa.geologicTimePeriodId ).outerjoin( - aend, r.AgeNameEndID == aend.geologicTimePeriodId + csaend, ra.AgeNameEndID == csaend.geologicTimePeriodId ) rel_sel = select([ - r.CollectionObjectID.label("coid"), + ra.CollectionObjectID.label("coid"), rel_start_expr.label("startperiod"), rel_end_expr.label("endperiod") ]).select_from(rel_join).where( and_( - a.startPeriod != None, - a.endPeriod != None, - a.startPeriod >= a.endPeriod, + csa.startPeriod != None, + csa.endPeriod != None, + csa.startPeriod >= csa.endPeriod, or_( - r.AgeNameEndID == None, + ra.AgeNameEndID == None, and_( - aend.startPeriod != None, - aend.endPeriod != None, - aend.startPeriod >= aend.endPeriod + csaend.startPeriod != None, + csaend.endPeriod != None, + csaend.startPeriod >= csaend.endPeriod ) ), rel_start_expr <= start_time, @@ -733,7 +733,6 @@ def geo_time_period_query(time_period_name: str, require_full_overlap: bool = Fa return set() start_time = time_period.startperiod end_time = time_period.endperiod - # if not require_full_overlap: start_time += Decimal(time_period.startuncertainty) if time_period.startuncertainty else Decimal('0.1') end_time += Decimal(time_period.enduncertainty) if time_period.enduncertainty else Decimal('0.1') if start_time is None: From 70fe2ce929f3672083b834518fb0e825249b2e7f Mon Sep 17 00:00:00 2001 From: alec_dev Date: Fri, 28 Feb 2025 14:40:19 -0600 Subject: [PATCH 10/14] add new strict age query behavior by filtering after net path values --- specifyweb/specify/geo_time.py | 229 ++++++++++++++++++++++++++++++++- 1 file changed, 227 insertions(+), 2 deletions(-) diff --git a/specifyweb/specify/geo_time.py b/specifyweb/specify/geo_time.py index fbe798578e1..0f57aa87080 100644 --- a/specifyweb/specify/geo_time.py +++ b/specifyweb/specify/geo_time.py @@ -28,7 +28,7 @@ logger = logging.getLogger(__name__) GEO_TIME_QUERY_IMPLEMENTATION = os.getenv('GEO_TIME_QUERY_IMPLEMENTATION', 'sqlalchemy') # 'django' or 'sqlalchemy' -GEO_TIME_QUERY_SQL_TYPE = os.getenv('GEO_TIME_QUERY_SQL_TYPE', 'modify') # 'modify' or 'raw', or 'filter' +GEO_TIME_QUERY_SQL_TYPE = os.getenv('GEO_TIME_QUERY_SQL_TYPE', 'meta') # 'modify' or 'raw', or 'filter', or 'meta' # Table paths from CollectionObject to Absoluteage or GeologicTimePeriod: # - collectionobject->absoluteage @@ -716,6 +716,8 @@ def geo_time_query(start_time: float, end_time: float, require_full_overlap: boo elif GEO_TIME_QUERY_IMPLEMENTATION == 'sqlalchemy': if GEO_TIME_QUERY_SQL_TYPE == 'modify': return modify_query_add_age_range(query, start_time, end_time, require_full_overlap) + elif GEO_TIME_QUERY_SQL_TYPE == 'meta': + return modify_query_add_meta_age_range(query, start_time, end_time, require_full_overlap) elif GEO_TIME_QUERY_SQL_TYPE == 'filter': return query_co_in_time_range_with_joins(query, start_time, end_time, require_full_overlap) @@ -740,4 +742,227 @@ def geo_time_period_query(time_period_name: str, require_full_overlap: bool = Fa if end_time is None: end_time = 0 - return geo_time_query(start_time, end_time, require_full_overlap, query) \ No newline at end of file + return geo_time_query(start_time, end_time, require_full_overlap, query) + +def modify_query_add_meta_age_range(query, start_time, end_time, require_full_overlap=False): + """ + Given an existing SQLAlchemy query (whose base is CollectionObject), + add an inner join to an aggregated subquery that calculates, per CollectionObject, + the maximum start period and minimum end period (with their uncertainties) + from nine different age sources, and then applies an age range filter. + """ + aa = aliased(AbsoluteAge, name="aa") + ra = aliased(RelativeAge, name="ra") + co = aliased(CollectionObject, name="co") + ce = aliased(CollectingEvent, name="ce") + loc = aliased(Locality, name="loc") + pc = aliased(PaleoContext, name="pc") + gtp_ra = aliased(GeologicTimePeriod, name="gtp_ra_agename") + gtp_ra_end = aliased(GeologicTimePeriod, name="gtp_ra_agenameend") + gtp_pc = aliased(GeologicTimePeriod, name="gtp_pc_chronostrat") + gtp_pc_end = aliased(GeologicTimePeriod, name="gtp_pc_chronostratend") + gtp_ce = aliased(GeologicTimePeriod, name="gtp_ce_pc_chronostrat") + gtp_ce_end = aliased(GeologicTimePeriod, name="gtp_ce_pc_chronostratend") + gtp_ce_loc = aliased(GeologicTimePeriod, name="gtp_ce_loc_pc_chronostrat") + gtp_ce_loc_end = aliased(GeologicTimePeriod, name="gtp_ce_loc_pc_chronostratend") + + # Build the nine source subqueries (AllAgeData) + # 1. Absolute Age + co_aa_join = join(co, aa, aa.CollectionObjectID == co.collectionObjectId) + abs_query = select([ + co.collectionObjectId.label("CollectionObjectID"), + aa.absoluteAge.label("StartPeriod"), + aa.absoluteAge.label("EndPeriod"), + aa.ageUncertainty.label("StartUncertainty"), + aa.ageUncertainty.label("EndUncertainty") + ]).select_from(co_aa_join) + + # 2. Relative Age – AgeName + co_ra_join = join(co, ra, ra.CollectionObjectID == co.collectionObjectId) + co_ra_gtp_ra_join = join(co_ra_join, gtp_ra, ra.AgeNameID == gtp_ra.geologicTimePeriodId) + rel_query = select([ + co.collectionObjectId.label("CollectionObjectID"), + gtp_ra.startPeriod.label("StartPeriod"), + gtp_ra.endPeriod.label("EndPeriod"), + gtp_ra.startUncertainty.label("StartUncertainty"), + gtp_ra.endUncertainty.label("EndUncertainty") + ]).select_from(co_ra_gtp_ra_join) + + # 3. Relative Age – AgeNameEnd + co_ra_join = join(co, ra, ra.CollectionObjectID == co.collectionObjectId) + co_ra_gtp_ra_end_join = join(co_ra_join, gtp_ra_end, ra.AgeNameEndID == gtp_ra_end.geologicTimePeriodId) + rel_end_query = select([ + co.collectionObjectId.label("CollectionObjectID"), + gtp_ra_end.startPeriod.label("StartPeriod"), + gtp_ra_end.endPeriod.label("EndPeriod"), + gtp_ra_end.startUncertainty.label("StartUncertainty"), + gtp_ra_end.endUncertainty.label("EndUncertainty") + ]).select_from(co_ra_gtp_ra_end_join) + + # 4. PaleoContext – Chronostrat + co_pc_join = join(co, pc, co.PaleoContextID == pc.paleoContextId) + co_pc_gtp_pc_join = join(co_pc_join, gtp_pc, pc.ChronosStratID == gtp_pc.geologicTimePeriodId) + pc_query = select([ + co.collectionObjectId.label("CollectionObjectID"), + gtp_pc.startPeriod.label("StartPeriod"), + gtp_pc.endPeriod.label("EndPeriod"), + gtp_pc.startUncertainty.label("StartUncertainty"), + gtp_pc.endUncertainty.label("EndUncertainty") + ]).select_from(co_pc_gtp_pc_join) + + # 5. PaleoContext – ChronostratEnd + co_pc_join = join(co, pc, co.PaleoContextID == pc.paleoContextId) + co_pc_gtp_pc_end_join = join(co_pc_join, gtp_pc_end, pc.ChronosStratEndID == gtp_pc_end.geologicTimePeriodId) + pc_end_query = select([ + co.collectionObjectId.label("CollectionObjectID"), + gtp_pc_end.startPeriod.label("StartPeriod"), + gtp_pc_end.endPeriod.label("EndPeriod"), + gtp_pc_end.startUncertainty.label("StartUncertainty"), + gtp_pc_end.endUncertainty.label("EndUncertainty") + ]).select_from(co_pc_gtp_pc_end_join) + + # 6. CollectingEvent – PaleoContext Chronostrat + co_ce_join = join(co, ce, co.CollectingEventID == ce.collectingEventId) + co_ce_pc_join = join(co_ce_join, pc, ce.PaleoContextID == pc.paleoContextId) + co_ce_pc_gtp_ce_join = join(co_ce_pc_join, gtp_ce, pc.ChronosStratID == gtp_ce.geologicTimePeriodId) + ce_pc_query = select([ + co.collectionObjectId.label("CollectionObjectID"), + gtp_ce.startPeriod.label("StartPeriod"), + gtp_ce.endPeriod.label("EndPeriod"), + gtp_ce.startUncertainty.label("StartUncertainty"), + gtp_ce.endUncertainty.label("EndUncertainty") + ]).select_from(co_ce_pc_gtp_ce_join) + + # 7. CollectingEvent – PaleoContext ChronostratEnd + co_ce_join = join(co, ce, co.CollectingEventID == ce.collectingEventId) + co_ce_pc_join = join(co_ce_join, pc, ce.PaleoContextID == pc.paleoContextId) + co_ce_pc_gtp_ce_end_join = join(co_ce_pc_join, gtp_ce_end, pc.ChronosStratEndID == gtp_ce_end.geologicTimePeriodId) + ce_pc_end_query = select([ + co.collectionObjectId.label("CollectionObjectID"), + gtp_ce_end.startPeriod.label("StartPeriod"), + gtp_ce_end.endPeriod.label("EndPeriod"), + gtp_ce_end.startUncertainty.label("StartUncertainty"), + gtp_ce_end.endUncertainty.label("EndUncertainty") + ]).select_from(co_ce_pc_gtp_ce_end_join) + + # 8. CollectingEvent Locality – PaleoContext Chronostrat + co_ce_join = join(co, ce, co.CollectingEventID == ce.collectingEventId) + co_ce_loc_join = join(co_ce_join, loc, ce.LocalityID == loc.localityId) + co_ce_loc_pc_join = join(co_ce_loc_join, pc, loc.PaleoContextID == pc.paleoContextId) + co_ce_loc_gtp_ce_loc_join = join(co_ce_loc_pc_join, + gtp_ce_loc, + pc.ChronosStratID == gtp_ce_loc.geologicTimePeriodId) + ce_loc_query = select([ + co.collectionObjectId.label("CollectionObjectID"), + gtp_ce_loc.startPeriod.label("StartPeriod"), + gtp_ce_loc.endPeriod.label("EndPeriod"), + gtp_ce_loc.startUncertainty.label("StartUncertainty"), + gtp_ce_loc.endUncertainty.label("EndUncertainty") + ]).select_from(co_ce_loc_gtp_ce_loc_join) + + # 9. CollectingEvent Locality – PaleoContext ChronostratEnd + co_ce_join = join(co, ce, co.CollectingEventID == ce.collectingEventId) + co_ce_loc_join = join(co_ce_join, loc, ce.LocalityID == loc.localityId) + co_ce_loc_pc_join = join(co_ce_loc_join, pc, loc.PaleoContextID == pc.paleoContextId) + co_ce_loc_gtp_ce_loc_end_join = join(co_ce_loc_pc_join, + gtp_ce_loc_end, + pc.ChronosStratEndID == gtp_ce_loc_end.geologicTimePeriodId) + ce_loc_end_query = select([ + co.collectionObjectId.label("CollectionObjectID"), + gtp_ce_loc_end.startPeriod.label("StartPeriod"), + gtp_ce_loc_end.endPeriod.label("EndPeriod"), + gtp_ce_loc_end.startUncertainty.label("StartUncertainty"), + gtp_ce_loc_end.endUncertainty.label("EndUncertainty") + ]).select_from(co_ce_loc_gtp_ce_loc_end_join) + + # Union all nine subqueries into the AllAgeData CTE. + all_age_data = union_all( + abs_query, rel_query, rel_end_query, pc_query, pc_end_query, + ce_pc_query, ce_pc_end_query, ce_loc_query, ce_loc_end_query + ).alias("AllAgeData") + + # Create the RankedAges CTE using window functions. + ranked_ages = select([ + all_age_data.c.CollectionObjectID, + all_age_data.c.StartPeriod, + all_age_data.c.EndPeriod, + all_age_data.c.StartUncertainty, + all_age_data.c.EndUncertainty, + func.max(all_age_data.c.StartPeriod).over( + partition_by=all_age_data.c.CollectionObjectID + ).label("MaxStartPeriod"), + func.min(all_age_data.c.EndPeriod).over( + partition_by=all_age_data.c.CollectionObjectID + ).label("MinEndPeriod") + ]).alias("RankedAges") + + # Final aggregation: pick the uncertainties for the extreme values. + start_uncertainty_case = func.max( + case([(ranked_ages.c.StartPeriod == ranked_ages.c.MaxStartPeriod, + ranked_ages.c.StartUncertainty)]) + ) + end_uncertainty_case = func.max( + case([(ranked_ages.c.EndPeriod == ranked_ages.c.MinEndPeriod, + ranked_ages.c.EndUncertainty)]) + ) + + # Build the filter condition based on require_full_overlap. + if require_full_overlap: + filter_condition = and_( + ranked_ages.c.MaxStartPeriod - start_uncertainty_case <= start_time, + ranked_ages.c.MinEndPeriod + end_uncertainty_case >= end_time + ) + else: + filter_condition = or_( + and_( + ranked_ages.c.MaxStartPeriod - start_uncertainty_case <= start_time, + ranked_ages.c.MaxStartPeriod + start_uncertainty_case >= end_time + ), + and_( + ranked_ages.c.MinEndPeriod - end_uncertainty_case <= start_time, + ranked_ages.c.MinEndPeriod + end_uncertainty_case >= end_time + ), + and_( + ranked_ages.c.MaxStartPeriod + start_uncertainty_case >= start_time, + ranked_ages.c.MinEndPeriod - end_uncertainty_case <= end_time + ) + ) + + final_age_subq = select([ + ranked_ages.c.CollectionObjectID, + ranked_ages.c.MaxStartPeriod, + ranked_ages.c.MinEndPeriod, + start_uncertainty_case.label("StartUncertaintyForMaxStartPeriod"), + end_uncertainty_case.label("EndUncertaintyForMinEndPeriod") + ]).group_by( + ranked_ages.c.CollectionObjectID, + ranked_ages.c.MaxStartPeriod, + ranked_ages.c.MinEndPeriod + ).having( + and_( + ranked_ages.c.MaxStartPeriod != None, + ranked_ages.c.MinEndPeriod != None, + filter_condition + ) + ).alias("co_age_filter") + + # Join the aggregated subquery onto the incoming CollectionObject query. + new_query = query.join(final_age_subq, CollectionObject.collectionObjectId == final_age_subq.c.CollectionObjectID) + + # Add a formatted age expression column (for example, concatenating max and min). + age_expr = func.concat_ws( + " - ", + func.regexp_replace( + func.regexp_replace(cast(final_age_subq.c.MaxStartPeriod, String), + r'(\.[0-9]*[1-9])0+$', r'\1'), + r'\.0+$', '' + ), + func.regexp_replace( + func.regexp_replace(cast(final_age_subq.c.MinEndPeriod, String), + r'(\.[0-9]*[1-9])0+$', r'\1'), + r'\.0+$', '' + ) + ).label("age") + new_query = new_query.add_columns(age_expr) + + return new_query From 3a84e7ff4bf4788376663a0be4b61a7fd4a087d6 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Fri, 28 Feb 2025 15:37:20 -0600 Subject: [PATCH 11/14] handle null uncertainties with zeros --- specifyweb/specify/geo_time.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/specifyweb/specify/geo_time.py b/specifyweb/specify/geo_time.py index 0f57aa87080..f4dbe8d4b70 100644 --- a/specifyweb/specify/geo_time.py +++ b/specifyweb/specify/geo_time.py @@ -15,6 +15,7 @@ Collectionobject, Paleocontext, ) +from specifyweb.specify.utils import log_sqlalchemy_query from specifyweb.stored_queries.models import ( AbsoluteAge, RelativeAge, @@ -897,13 +898,17 @@ def modify_query_add_meta_age_range(query, start_time, end_time, require_full_ov ]).alias("RankedAges") # Final aggregation: pick the uncertainties for the extreme values. - start_uncertainty_case = func.max( - case([(ranked_ages.c.StartPeriod == ranked_ages.c.MaxStartPeriod, + start_uncertainty_case = func.coalesce( + func.max( + case([(ranked_ages.c.StartPeriod == ranked_ages.c.MaxStartPeriod, ranked_ages.c.StartUncertainty)]) + ), 0 ) - end_uncertainty_case = func.max( - case([(ranked_ages.c.EndPeriod == ranked_ages.c.MinEndPeriod, + end_uncertainty_case = func.coalesce( + func.max( + case([(ranked_ages.c.EndPeriod == ranked_ages.c.MinEndPeriod, ranked_ages.c.EndUncertainty)]) + ), 0 ) # Build the filter condition based on require_full_overlap. @@ -965,4 +970,5 @@ def modify_query_add_meta_age_range(query, start_time, end_time, require_full_ov ).label("age") new_query = new_query.add_columns(age_expr) + log_sqlalchemy_query(new_query) return new_query From 3fbc3c95827535a286f182b46ef63034c3ae775a Mon Sep 17 00:00:00 2001 From: alec_dev Date: Fri, 28 Feb 2025 15:40:23 -0600 Subject: [PATCH 12/14] mod strict condition --- specifyweb/specify/geo_time.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/specifyweb/specify/geo_time.py b/specifyweb/specify/geo_time.py index f4dbe8d4b70..312ddf31402 100644 --- a/specifyweb/specify/geo_time.py +++ b/specifyweb/specify/geo_time.py @@ -913,9 +913,19 @@ def modify_query_add_meta_age_range(query, start_time, end_time, require_full_ov # Build the filter condition based on require_full_overlap. if require_full_overlap: - filter_condition = and_( - ranked_ages.c.MaxStartPeriod - start_uncertainty_case <= start_time, - ranked_ages.c.MinEndPeriod + end_uncertainty_case >= end_time + # filter_condition = and_( + # ranked_ages.c.MaxStartPeriod - start_uncertainty_case <= start_time, + # ranked_ages.c.MinEndPeriod + end_uncertainty_case >= end_time + # ) + filter_condition = or_( + and_( + ranked_ages.c.MaxStartPeriod - start_uncertainty_case <= start_time, + ranked_ages.c.MinEndPeriod + end_uncertainty_case >= end_time + ), + and_( + ranked_ages.c.MaxStartPeriod + start_uncertainty_case >= start_time, + ranked_ages.c.MinEndPeriod - end_uncertainty_case <= end_time + ) ) else: filter_condition = or_( From 75c0a68ea939e359da19baaa5e9fcf3ee9022d6f Mon Sep 17 00:00:00 2001 From: alec_dev Date: Fri, 28 Feb 2025 17:02:11 -0600 Subject: [PATCH 13/14] change back last strict logic edit --- specifyweb/specify/geo_time.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/specifyweb/specify/geo_time.py b/specifyweb/specify/geo_time.py index 312ddf31402..8e106eea456 100644 --- a/specifyweb/specify/geo_time.py +++ b/specifyweb/specify/geo_time.py @@ -913,20 +913,20 @@ def modify_query_add_meta_age_range(query, start_time, end_time, require_full_ov # Build the filter condition based on require_full_overlap. if require_full_overlap: - # filter_condition = and_( - # ranked_ages.c.MaxStartPeriod - start_uncertainty_case <= start_time, - # ranked_ages.c.MinEndPeriod + end_uncertainty_case >= end_time - # ) - filter_condition = or_( - and_( - ranked_ages.c.MaxStartPeriod - start_uncertainty_case <= start_time, - ranked_ages.c.MinEndPeriod + end_uncertainty_case >= end_time - ), - and_( - ranked_ages.c.MaxStartPeriod + start_uncertainty_case >= start_time, - ranked_ages.c.MinEndPeriod - end_uncertainty_case <= end_time - ) + filter_condition = and_( + ranked_ages.c.MaxStartPeriod - start_uncertainty_case <= start_time, + ranked_ages.c.MinEndPeriod + end_uncertainty_case >= end_time ) + # filter_condition = or_( + # and_( + # ranked_ages.c.MaxStartPeriod - start_uncertainty_case <= start_time, + # ranked_ages.c.MinEndPeriod + end_uncertainty_case >= end_time + # ), + # and_( + # ranked_ages.c.MaxStartPeriod + start_uncertainty_case >= start_time, + # ranked_ages.c.MinEndPeriod - end_uncertainty_case <= end_time + # ) + # ) else: filter_condition = or_( and_( From 5908ba36f2e69b3f4acf8cfefdae450b71caa3e9 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Mon, 3 Mar 2025 16:26:38 -0600 Subject: [PATCH 14/14] fix age period edit --- specifyweb/specify/geo_time.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/specifyweb/specify/geo_time.py b/specifyweb/specify/geo_time.py index 8e106eea456..406a27043d6 100644 --- a/specifyweb/specify/geo_time.py +++ b/specifyweb/specify/geo_time.py @@ -736,8 +736,8 @@ def geo_time_period_query(time_period_name: str, require_full_overlap: bool = Fa return set() start_time = time_period.startperiod end_time = time_period.endperiod - start_time += Decimal(time_period.startuncertainty) if time_period.startuncertainty else Decimal('0.1') - end_time += Decimal(time_period.enduncertainty) if time_period.enduncertainty else Decimal('0.1') + # start_time += Decimal(time_period.startuncertainty) if time_period.startuncertainty else Decimal('0.1') + # end_time += Decimal(time_period.enduncertainty) if time_period.enduncertainty else Decimal('0.1') if start_time is None: start_time = 13800 # max start time, 13800 is the age of the Universe if end_time is None: