Skip to content
1 change: 1 addition & 0 deletions core/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
MonitoringStatus: type[Enum] = build_enum_from_lexicon_category("monitoring_status")
ParameterName: type[Enum] = build_enum_from_lexicon_category("parameter_name")
Organization: type[Enum] = build_enum_from_lexicon_category("organization")
OriginSource: type[Enum] = build_enum_from_lexicon_category("origin_source")
ParameterType: type[Enum] = build_enum_from_lexicon_category("parameter_type")
PhoneType: type[Enum] = build_enum_from_lexicon_category("phone_type")
PublicationType: type[Enum] = build_enum_from_lexicon_category("publication_type")
Expand Down
32 changes: 30 additions & 2 deletions core/lexicon.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@
{"name": "vertical_datum", "description": null},
{"name": "well_purpose", "description": null},
{"name": "status_type", "description": null},
{"name": "status_value", "description": null}
{"name": "status_value", "description": null},
{"name": "origin_source", "description": null}
],
"terms": [
{"categories": ["review_status"], "term": "approved", "definition": "approved"},
Expand Down Expand Up @@ -146,6 +147,7 @@
{"categories": ["unit"], "term": "second", "definition": "second"},
{"categories": ["unit"], "term": "minute", "definition": "minute"},
{"categories": ["unit"], "term": "hour", "definition": "hour"},
{"categories": ["unit"], "term": "m", "definition": "meters"},
{"categories": ["parameter_name"], "term": "groundwater level", "definition": "groundwater level measurement"},
{"categories": ["parameter_name"], "term": "temperature", "definition": "Temperature measurement"},
{"categories": ["parameter_name"], "term": "pH", "definition": "pH"},
Expand Down Expand Up @@ -568,6 +570,20 @@
{"categories": ["organization"], "term": "Winter Brothers", "definition": "Winter Brothers"},
{"categories": ["organization"], "term": "Yates Petroleum Corporation", "definition": "Yates Petroleum Corporation"},
{"categories": ["organization"], "term": "Zamora Accounting Services", "definition": "Zamora Accounting Services"},
{"categories": ["collection_method"], "term": "Altimeter", "definition": "ALtimeter"},
{"categories": ["collection_method"], "term": "Differentially corrected GPS", "definition": "Differentially corrected GPS"},
{"categories": ["collection_method"], "term": "Survey-grade GPS", "definition": "Survey-grade GPS"},
{"categories": ["collection_method"], "term": "Global positioning system (GPS)", "definition": "Global positioning system (GPS)"},
{"categories": ["collection_method"], "term": "LiDAR DEM", "definition": "LiDAR DEM"},
{"categories": ["collection_method"], "term": "Level or other survey method", "definition": "Level or other survey method"},
{"categories": ["collection_method"], "term": "Interpolated from topographic map", "definition": "Interpolated from topographic map"},
{"categories": ["collection_method"], "term": "Interpolated from digital elevation model (DEM)", "definition": "Interpolated from digital elevation model (DEM)"},
{"categories": ["collection_method"], "term": "Reported", "definition": "Reported"},
{"categories": ["collection_method"], "term": "Unknown", "definition": "Unknown"},
{"categories": ["collection_method"], "term": "Survey-grade Global Navigation Satellite Sys, Lvl1", "definition": "Survey-grade Global Navigation Satellite Sys, Lvl1"},
{"categories": ["collection_method"], "term": "USGS National Elevation Dataset (NED)", "definition": "USGS National Elevation Dataset (NED)"},
{"categories": ["collection_method"], "term": "Transit, theodolite, or other survey method", "definition": "Transit, theodolite, or other survey method"},
{"categories": ["role"], "term": "Principal Investigator", "definition": "Principal Investigator"},
{"categories": ["organization"], "term": "PLSS", "definition": "Public Land Survey System"},
{"categories": ["collection_method"], "term": "manual", "definition": "manual sampling"},
{"categories": ["collection_method"], "term": "continuous", "definition": "continuous sampling"},
Expand Down Expand Up @@ -675,6 +691,18 @@
{"categories": ["monitoring_frequency"], "term": "Biannual", "definition": "Location is monitored twice a year."},
{"categories": ["monitoring_frequency"], "term": "Annual", "definition": "Location is monitored once a year."},
{"categories": ["monitoring_frequency"], "term": "Decadal", "definition": "Location is monitored once every ten years."},
{"categories": ["monitoring_frequency"], "term": "Event-based", "definition": "Location is monitored based on specific events or triggers rather than a fixed schedule."}
{"categories": ["monitoring_frequency"], "term": "Event-based", "definition": "Location is monitored based on specific events or triggers rather than a fixed schedule."},
{"categories": ["origin_source"], "term": "Reported by another agency", "definition": "Reported by another agency"},
{"categories": ["origin_source"], "term": "From driller's log or well report", "definition": "From driller's log or well report"},
{"categories": ["origin_source"], "term": "Private geologist, consultant or univ associate", "definition": "Private geologist, consultant or univ associate"},
{"categories": ["origin_source"], "term": "Interpreted fr geophys logs by source agency", "definition": "Interpreted fr geophys logs by source agency"},
{"categories": ["origin_source"], "term": "Memory of owner, operator, driller", "definition": "Memory of owner, operator, driller"},
{"categories": ["origin_source"], "term": "Measured by source agency", "definition": "Measured by source agency"},
{"categories": ["origin_source"], "term": "Reported by owner of well", "definition": "Reported by owner of well"},
{"categories": ["origin_source"], "term": "Reported by person other than driller owner agency", "definition": "Reported by person other than driller owner agency"},
{"categories": ["origin_source"], "term": "Measured by NMBGMR staff", "definition": "Measured by NMBGMR staff"},
{"categories": ["origin_source"], "term": "Other", "definition": "Other"},
{"categories": ["origin_source"], "term": "Data Portal", "definition": "Data Portal"}

]
}
2 changes: 1 addition & 1 deletion db/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
- `ReleaseMixin`: Adds a release status column referencing the `lexicon_term` table.
- `AuditMixin`: Adds standard audit columns (created_at, created_by, updated_at, updated_by).
5. A simple `User` model for tracking user information in audit columns.
6. Polymorphic helper mixins (`StatusHistoryMixin`, `NotesMixin`, `AttributionMixin`, `PermissionMixin`.)
6. Polymorphic helper mixins (`StatusHistoryMixin`, `NotesMixin`, `DataProvenanceMixin`, `PermissionMixin`.)
which provide a clean, reusable way to add relationships to the polymorphic
metadata tables. Any model that can have a status history (like Thing or Location)
can simply inherit from the `StatusHistoryMixin` mixin.
Expand Down
123 changes: 123 additions & 0 deletions db/data_provenance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"""
SQLAlchemy model for the Provenance table.

This is the central polymorphic repository for all provenance (origin) metadata
for foundational or static data in the database, such as elevation details or
well construction information.

***NOTE:***
This table is **not** used to store routine, transactional analytical metadata
(such as lab qualifiers, detection limits, or analysis dates). That information
is an intrinsic part of a lab result and is stored in the `Observation` and
`LabLimit` tables. This table is for sourcing foundational data, such as a well's
construction details or a site's coordinates.

"""

from typing import TYPE_CHECKING

from sqlalchemy import Integer, Index, and_
from sqlalchemy.orm import relationship, Mapped, mapped_column, declared_attr, foreign

from db.base import Base, AutoBaseMixin, ReleaseMixin, pascal_to_snake

from db import lexicon_term

if TYPE_CHECKING:
from db.thing import Thing
from db.location import Location


class DataProvenance(AutoBaseMixin, ReleaseMixin, Base):
"""
Represents a single piece of provenance metadata that can be attached to
any other record or field in the database.
"""

# --- Polymorphic Columns ---
target_id: Mapped[int] = mapped_column(
Integer,
nullable=False,
comment="The primary key (`id`) of the parent record this metadata is about (e.g., the `thing_id` of a well).",
)
target_table: Mapped[str] = mapped_column(
nullable=False,
comment="The name of the parent table this metadata is for (e.g., 'Thing', 'Location', etc).",
)

# --- Columns ---
field_name: Mapped[str] = mapped_column(
nullable=True,
comment="The specific column in the parent table that this metadata applies to (e.g., 'well_depth_ft', 'coordinates')."
"If `NULL`, the record applies to the entire parent object.",
)
# Values from the following NMAquifer tables are included as `origin_source` terms in the lexicon:
# 'LU_DataSource', 'LU_Depth_CompletionSource'.
origin_source: Mapped[str] = lexicon_term(
nullable=True,
comment="Indicates the origin source of the data (e.g'Driller's Log', 'Well Report'.",
)
# Values from the following NMAquifer tables are included as `collection_method` terms in the lexicon:
# 'LU_AltitudeMethod','LU_CoordinateMethod'.
collection_method: Mapped[str] = lexicon_term(
nullable=True,
comment="Indicates the method used to collect the data (e.g., 'GPS - Survey Grade').",
)
accuracy_value: Mapped[float] = mapped_column(
nullable=True, comment="A numeric value representing the data's accuracy."
)
# Unit values from the following NMAquifer tables are included as 'unit' terms in the lexicon: 'LU_CoordinateAccuracy'.
accuracy_unit: Mapped[str] = lexicon_term(
nullable=True,
comment="The unit for the `accuracy_value` (e.g., 'meters', 'feet').",
)
Comment on lines +62 to +73

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these fields are used for a small subset of fields in a subset of tables. should these move to those tables, rather than be here? since they won't apply to a number of fields for which DataProvenance will be used.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe, but I saw this as one of the advantages of the Provenance model - all the sparse, optional, and evolving metadata is organized in one central place. If we move these fields to the Location table we'd have to add even more fields (coordinate_accuracy, coordinate_collection_method, coordinate_accuracy_value, coordiante_accuracy_unit, plus the same ones for elevation). I think storing this type of metadata is more efficient with the DataProvenance table, but will let @jirhiker weigh in, too.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems appropriate to store these fields here. We can reevaluate later if user requirements dictate


# --- Polymorphic Parent Relationships (Internal) ---
# These are view-only relationships used by the 'target' property below.
# They tell SQLAlchemy exactly how to join `DataProvenance` to the parent/target table.
_thing_target: Mapped["Thing"] = relationship(
"Thing",
primaryjoin="and_(foreign(DataProvenance.target_id) == Thing.id, DataProvenance.target_table == 'thing')",
viewonly=True,
)
_location_target: Mapped["Location"] = relationship(
"Location",
primaryjoin="and_(foreign(DataProvenance.target_id) == Location.id, DataProvenance.target_table == 'location')",
viewonly=True,
)
Comment thread
ksmuczynski marked this conversation as resolved.

@property
def target(self):
"""
A generic property to get the parent object (Thing, Location, etc.).
This is useful for simplifying application code by providing a single,
consistent way to access the parent of a polymorphic record.
"""
return getattr(self, f"_{self.target_table.lower()}_target")

# --- Table Arguments ---
__table_args__ = (
# Composite index for fast polymorphic lookups
Index("ix_provenance_targets", "target_id", "target_table"),
)


class DataProvenanceMixin:
"""
Mixin for models that can have data provenance records (e.g., Thing, Location).
It automatically creates a polymorphic One-to-Many relationship to the
DataProvenance table.
"""

@declared_attr
def data_provenance(cls):
# One-to-Many polymorphic relationship
return relationship(
"DataProvenance",
primaryjoin=and_(
cls.id == foreign(DataProvenance.target_id),
DataProvenance.target_table == pascal_to_snake(cls.__name__),
),
lazy="selectin",
viewonly=True,
)
4 changes: 2 additions & 2 deletions db/location.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@
from sqlalchemy.orm import relationship, Mapped, mapped_column

from constants import SRID_WGS84
from db.base import Base, AutoBaseMixin, ReleaseMixin
from db.base import Base, AutoBaseMixin, ReleaseMixin, DataProvenanceMixin
from db.lexicon import lexicon_term

if TYPE_CHECKING:
from db.thing import Thing


class Location(Base, AutoBaseMixin, ReleaseMixin):
class Location(Base, AutoBaseMixin, ReleaseMixin, DataProvenanceMixin):
__versioned__ = {}

nma_pk_location: Mapped[UUID] = mapped_column(String(36), nullable=True)
Expand Down
10 changes: 9 additions & 1 deletion db/thing.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
Base,
ReleaseMixin,
PermissionMixin,
DataProvenanceMixin,
)
from db.status_history import StatusHistoryMixin
from db.measuring_point_history import MeasuringPointHistory
Expand All @@ -41,7 +42,14 @@
from db.group import Group, GroupThingAssociation


class Thing(Base, AutoBaseMixin, ReleaseMixin, StatusHistoryMixin, PermissionMixin):
class Thing(
Base,
AutoBaseMixin,
ReleaseMixin,
StatusHistoryMixin,
PermissionMixin,
DataProvenanceMixin,
):
"""
Represents a physical object of interest being monitored (e.g., a well).
Stores static, core attributes of the physical installation.
Expand Down
Loading