Skip to content
5 changes: 3 additions & 2 deletions api/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,9 @@ def _get_thing_results(session: Session, q: str, limit: int) -> list[dict]:
select(Thing).where(Thing.thing_type == "spring"), q, vector=vector, limit=limit
)

wells = session.scalars(water_well_query).all()
springs = session.scalars(spring_well_query).all()
# unique needs to be called because of eager loads
wells = session.scalars(water_well_query).unique().all()
springs = session.scalars(spring_well_query).unique().all()

def _make_response(group: str, thing: Thing, properties: dict) -> dict:

Expand Down
2 changes: 0 additions & 2 deletions api/thing.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
add_well_screen,
get_db_things,
get_thing_of_a_thing_type_by_id,
get_active_location,
)
from services.lexicon_helper import get_terms_by_category

Expand Down Expand Up @@ -328,7 +327,6 @@ async def get_thing_by_id(
Retrieve a thing by ID from the database.
"""
thing = simple_get_by_id(session, Thing, thing_id)
thing.active_location = get_active_location(session, thing)
return thing


Expand Down
3 changes: 1 addition & 2 deletions core/lexicon.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,8 @@
{"name": "participant_role", "description": null},
{"name": "geochronology", "description": null},
{"name": "horizontal_datum", "description": null},
{"name": "value_reason", "description": null},
{"name": "limit_type", "description": null},
{"name": "groundwater_level_reason", "description": null},
{"name": "limit_type", "description": null},
{"name": "measurement_method", "description": null},
{"name": "monitoring_status", "description": null},
{"name": "parameter_name", "description": null},
Expand Down
4 changes: 3 additions & 1 deletion db/location.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,9 @@ class LocationThingAssociation(Base, AutoBaseMixin):
)

# --- Relationship Definitions ---
location: Mapped["Location"] = relationship(back_populates="thing_associations")
location: Mapped["Location"] = relationship(
back_populates="thing_associations", lazy="joined"
)
thing: Mapped["Thing"] = relationship(back_populates="location_associations")


Expand Down
57 changes: 56 additions & 1 deletion db/thing.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class Thing(Base, AutoBaseMixin, ReleaseMixin, StatusHistoryMixin, PermissionMix
# TODO: what is the purpose of the `description` field? Is it ever used?
# description: Mapped[str] = mapped_column(String(500), nullable=True)
thing_type: Mapped[str] = lexicon_term(
nullable=True,
nullable=False,
comment="A controlled vocabulary field defining the type of infrastructure (e.g., 'Well', 'Spring', 'Stream Gauge').",
)
first_visit_date: Mapped[Date] = mapped_column(
Expand Down Expand Up @@ -119,13 +119,52 @@ class Thing(Base, AutoBaseMixin, ReleaseMixin, StatusHistoryMixin, PermissionMix

# One-To-Many: A Thing can be at many locations over time.
# If the Thing is deleted, its location history will be deleted.
"""
Developer's notes

If there are many location associations related to a thing, eagerly loading
the location associations may overburden the API and DB and introduce
performance issues. If this becomes an issue, active/current locations can
be fetched in queries when retrieving things. Be thorough if following this
route as it will need to be included everywhere where a thing record is
needed, such as for GET /thing, GET /thing/{thing_id}, and GET /contact. See
below for an example of a way to retrieve the active/current location in a
query:

from sqlalchemy.orm import aliased
from sqlalchemy import select, func

LTA = aliased(LocationThingAssociation)
latest_assoc = (
select(
LTA.thing_id,
func.max(LTA.effective_start).label("max_start")
)
.where(LTA.effective_end == None)
.group_by(LTA.thing_id)
.subquery()
)

lta_alias = aliased(LocationThingAssociation)
query = (
select(Thing, Location)
.join(lta_alias, Thing.id == lta_alias.thing_id)
.join(Location, lta_alias.location_id == Location.id)
.join(
latest_assoc,
(latest_assoc.c.thing_id == lta_alias.thing_id) &
(latest_assoc.c.max_start == lta_alias.effective_start)
)
)
"""
location_associations = relationship(
"LocationThingAssociation",
back_populates="thing",
overlaps="location",
cascade="all, delete-orphan",
passive_deletes=True,
order_by="LocationThingAssociation.effective_start.desc()",
lazy="joined",

@ksmuczynski ksmuczynski Oct 8, 2025

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't this fetch the entire location history for a Thing every time you load it? I would think we would only want a list of historical locations if it's explicitly asked for, which is what the default lazy loading provides.

I understand lazy loading can cause N+1 query problems if you loop through many Things and access the history for each one, but is that really something that will be done very often?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That does load the entire history location for a Thing every time thing records are returned from the database. To access the active (current?) location, though, we need to have access to all location associations so that we can get the latest one (currently based off of effective_start).

If the user gets a list of things, whether from /thing, /thing/water-well, or /thing/spring, extra queries will need to be performed for every object in that list. Weaver, for example, fetches many thousands of wells at a time, so if we use lazy loading there'd be many thousands of extra queries made, which would impact the performance of the API.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I apologize if I'm misinterpreting this discussion, but is the current/active location something that we can cache on the server side so that the location history query doesn't have to happen every time a list of things is requested (with a reasonable TTL)? Along those same lines - the many thousands of things with active locations potentially most relevant to the /geospatial endpoint for requesting many geojsons on a map or in a shapefile all at once (the Weaver story you highlighted - eventually maps in Ocotillo). So perhaps a cache or precalculated results would help with that once we arrive at working on that type of user story?

@jacob-a-brown jacob-a-brown Oct 8, 2025

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By eagerly loading the location associations only one query is made; if done correctly it shouldn't impact the performance of the API. Where it can run into issues, I think, is if there is too much data being transferred. I don't think that's too much of an issue for the time being since the LocationThingAssociation and Location tables are pretty small, but it could become a problem in the future.

There are other eager loading techniques I can look into, like requesting specific columns (I think), but right now I think all of a Location's fields are in the response.

Would the caching technique be difficult to implement?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm checking with CoPilot if there are methods to keep the active/current location as an attribute, but avoid eager loading and N+1. If I can get that to work I'll push these updates

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like the two best options, if we want to take care of this via the API, are:

  1. use eager loading (new method)
  2. write explicit queries (previous method)

The issues with 1 is if too much data is loaded. That is, if a thing has hella associations. Wells only have one location, so this is a moot point for the time being. Rain gages may move, but do they move all that much? Would other types of things have many location associations?

The issue with 2 is that this needs to be implemented every place where a thing is needed, which is cumbersome and error-prone. (This was how I first identified the issue because a contact response has its associated things, and none of those had active locations loaded)

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds like you're on a good track then 👍. We don't need to make anything more complicated when we don't have a concrete use case or story showing otherwise.

)

contact_associations = relationship(
Expand Down Expand Up @@ -199,6 +238,22 @@ class Thing(Base, AutoBaseMixin, ReleaseMixin, StatusHistoryMixin, PermissionMix
)
)

@property
def current_location(self):
"""
Returns the currently active Location by sorting the effective_start
field. Thing eagerly loads location_association, which eagerly loads
location, which will hopefully prevent N+1 query problems.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤞

"""
current_location = sorted(
self.location_associations, key=lambda x: x.effective_start, reverse=True
)
return (
current_location[0].location
if current_location and current_location[0].effective_end is None
else None
)


class ThingIdLink(Base, AutoBaseMixin, ReleaseMixin):
"""
Expand Down
87 changes: 70 additions & 17 deletions schemas/thing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,46 @@
# ===============================================================================
from typing import List

from pydantic import BaseModel, model_validator, PastDate
from pydantic import BaseModel, model_validator, PastDate, Field

from schemas import BaseCreateModel, BaseUpdateModel, BaseResponseModel
from schemas.location import LocationResponse

# -------- VALIDATE ----------


class ValidateWell(BaseModel):
well_depth: float | None = None # in feet

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
well_depth: float | None = None # in feet
well_depth_in_feet: float | None = Field(None, alias="well_depth")

This change would improve clarity by using _in_feet to make units explicit, reducing ambiguity and preventing any future unit-mixing errors, while the alias preserves database compatibility. It also makes the API more 'self-documenting.'

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than add the unit to the field name I can add another field called well_depth_unit and have it automatically be set to ft. Awhile ago we made the decision to not include units in the names, so I'll proceed with this method to keep a consistent style.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fields can have descriptions, so I'll add that to document it for create/update

hole_depth: float | None = None # in feet
well_casing_depth: float | None = None # in feet

@model_validator(mode="after")
def check_depths(self):
if (
self.well_depth is not None
and self.hole_depth is not None
and self.well_depth > self.hole_depth
):
raise ValueError("well depth must be less than than or equal to hole depth")
elif (
self.well_depth is not None
and self.well_casing_depth is not None
and self.well_casing_depth > self.well_depth
):
raise ValueError(
"well casing depth must be greater than or equal to well depth"
)
elif (
self.hole_depth is not None
and self.well_casing_depth is not None
and self.well_casing_depth > self.hole_depth
):
raise ValueError(
"well casing depth must be less than or equal to hole depth"
)

return self


# -------- CREATE ----------
class CreateThingIdLink(BaseModel):
Expand All @@ -43,21 +78,32 @@ class CreateBaseThing(BaseCreateModel):
e.g. POST /thing/water-well, POST /thing/spring determines the thing_type
"""

location_id: int | None = None # Optional location ID for the thing
location_id: int | None
group_id: int | None = None # Optional group ID for the thing
name: str # Name of the thing
first_visit_date: PastDate # Date of NMBGMR's first visit


class CreateWell(CreateBaseThing):
class CreateWell(CreateBaseThing, ValidateWell):
"""
Schema for creating a well.
"""

well_purpose: str | None = None
well_depth: float | None = None # in feet
hole_depth: float | None = None # in feet
well_depth: float | None = Field(
default=None, gt=0, description="Well depth in feet"
)
hole_depth: float | None = Field(
default=None, gt=0, description="Hole depth in feet"
)
well_construction_notes: str | None = None
well_casing_diameter: float | None = Field(
default=None, gt=0, description="Well casing diameter in inches"
)
well_casing_depth: float | None = Field(
default=None, gt=0, description="Well casing depth in feet"
)
well_casing_material: str | None = None


class CreateSpring(CreateBaseThing):
Expand All @@ -74,8 +120,8 @@ class CreateWellScreen(BaseCreateModel):
"""

thing_id: int
screen_depth_bottom: float
screen_depth_top: float
screen_depth_bottom: float = Field(gt=0, description="Screen depth bottom in feet")
screen_depth_top: float = Field(gt=0, description="Screen depth top in feet")
screen_type: str | None = None
screen_description: str | None = None

Expand All @@ -93,24 +139,26 @@ def check_depths(self):
class BaseThingResponse(BaseResponseModel):
name: str
thing_type: str
active_location: LocationResponse | None = None
first_visit_date: PastDate | None = None
current_location: LocationResponse | None
first_visit_date: PastDate | None


class WellResponse(BaseThingResponse):
"""
Response schema for well details.
"""

# api_id: str | None = None
# ose_pod_id: str | None = None
# usgs_id: str | None = None

well_purpose: str | None = None # e.g., "Production", "Observation", etc.
well_depth: float | None = None # in feet
hole_depth: float | None = None # in feet
well_depth: float | None = None
well_depth_unit: str = "ft"
hole_depth: float | None = None
hole_depth_unit: str = "ft"
well_casing_diameter: float | None = None # in inches
well_casing_diameter_unit: str = "in"
well_casing_depth: float | None = None
well_casing_depth_unit: str = "ft"
well_casing_material: str | None = None
well_construction_notes: str | None = None
# Additional fields can be added as needed


class SpringResponse(BaseThingResponse):
Expand Down Expand Up @@ -149,7 +197,9 @@ class WellScreenResponse(BaseResponseModel):
thing_id: int
thing: WellResponse
screen_depth_bottom: float
screen_depth_bottom_unit: str = "ft"
screen_depth_top: float
screen_depth_top_unit: str = "ft"
screen_type: str | None = None
screen_description: str | None = None

Expand Down Expand Up @@ -197,12 +247,15 @@ class UpdateThing(BaseUpdateModel):
first_visit_date: PastDate | None = None # Date of NMBGMR's first visit


class UpdateWell(UpdateThing):
class UpdateWell(UpdateThing, ValidateWell):

well_purpose: str | None = None
well_depth: float | None = None # in feet
hole_depth: float | None = None # in feet
well_construction_notes: str | None = None
well_casing_diameter: float | None = None # in inches
well_casing_depth: float | None = None # in feet
well_casing_material: str | None = None


class UpdateSpring(UpdateThing):
Expand Down
29 changes: 26 additions & 3 deletions services/geospatial_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
from geoalchemy2.shape import to_shape
from shapely.wkt import loads as wkt_loads
from sqlalchemy import Select, select
from sqlalchemy.orm import aliased
from sqlalchemy import func


def get_thing_features(
Expand All @@ -42,10 +44,30 @@ def get_thing_features(
# elif thing_type == "spring":
# selection_args.append(SpringThing)

# Subquery: get the latest association for each thing (optionally only active)
lta_alias = aliased(LocationThingAssociation)

latest_assoc = (
select(
LocationThingAssociation.thing_id,
func.max(LocationThingAssociation.effective_start).label("max_start"),
)
.where(
LocationThingAssociation.effective_end == None
) # Only active, remove if you want most recent regardless of end
.group_by(LocationThingAssociation.thing_id)
.subquery()
)

sql = (
select(Thing, ST_AsGeoJSON(Location.point).label("geojson"), Location.elevation)
.join(LocationThingAssociation, Thing.id == LocationThingAssociation.thing_id)
.join(Location, LocationThingAssociation.location_id == Location.id)
.join(lta_alias, Thing.id == lta_alias.thing_id)
.join(Location, lta_alias.location_id == Location.id)
.join(
latest_assoc,
(latest_assoc.c.thing_id == lta_alias.thing_id)
& (latest_assoc.c.max_start == lta_alias.effective_start),
)
)

if thing_type:
Expand All @@ -65,7 +87,8 @@ def get_thing_features(
else:
sql = sql.where(Group.id == group)

return session.execute(sql).all()
# unique needs to be invoked to prevent duplicates from eager loading
return session.execute(sql).unique().all()


def create_shapefile(things: list, filename: str = "things.shp") -> None:
Expand Down
Loading
Loading