From 57bd63182e79323dcb173b1fa6b46546f42b1649 Mon Sep 17 00:00:00 2001 From: ksmuczynski Date: Wed, 12 Nov 2025 16:04:59 -0700 Subject: [PATCH 1/6] feat: add DataProvenance model and enhance base mixins The current schema lacks a way to store and track provenance (origin) data across the database. Created db/data_provenance.py with a polymorphic DataProvenance model for tracking foundational metadata across tables. Added mixin DataProvenanceMixin to db/base.py for reusable polymorphic relationships. Improved documentation and comments in db/base.py for mixins and helper functions. --- db/base.py | 21 ++++++++- db/data_provenance.py | 103 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 db/data_provenance.py diff --git a/db/base.py b/db/base.py index ba2a45be8..ec33e64e1 100644 --- a/db/base.py +++ b/db/base.py @@ -29,7 +29,7 @@ - `ReleaseMixin`: Adds a release status column referencing the `lexicon_term` table. - `AuditMixin`: Adds standard audit columns (created_at, created_by, updated_at, updated_by). 5. A simple `User` model for tracking user information in audit columns. -6. Polymorphic helper mixins (`StatusHistoryMixin`, `NotesMixin`, `AttributionMixin`, `PermissionMixin`.) +6. Polymorphic helper mixins (`StatusHistoryMixin`, `NotesMixin`, `DataProvenanceMixin`, `PermissionMixin`.) which provide a clean, reusable way to add relationships to the polymorphic metadata tables. Any model that can have a status history (like Thing or Location) can simply inherit from the `StatusHistoryMixin` mixin. @@ -210,6 +210,25 @@ def permissions(self): ) +class DataProvenanceMixin: + """ + Mixin for models that can have data provenance records (e.g., Thing, Location). + It automatically creates a polymorphic One-to-Many relationship to the + DataProvenance table. + """ + + @declared_attr + def data_provenance(self): + # One-to-Many polymorphic relationship + return relationship( + "DataProvenance", + primaryjoin=f"and_({self.__name__}.id==foreign(DataProvenance.target_id), " + f"DataProvenance.target_table=='{self.__name__}')", + lazy="selectin", + viewonly=True, + ) + + class User(Base): """Represents a user in the system.""" diff --git a/db/data_provenance.py b/db/data_provenance.py new file mode 100644 index 000000000..aa48e0364 --- /dev/null +++ b/db/data_provenance.py @@ -0,0 +1,103 @@ +""" +SQLAlchemy model for the Provenance table. + +This is the central polymorphic repository for all provenance (origin) metadata +for foundational or static data in the database, such as elevation details or +well construction information. + +***NOTE:*** +This table is **not** used to store routine, transactional analytical metadata +(such as lab qualifiers, detection limits, or analysis dates). That information +is an intrinsic part of a lab result and is stored in the `Observation` and +`LabLimit` tables. This table is for sourcing foundational data, such as a well's +construction details or a site's coordinates. + +""" + +from typing import TYPE_CHECKING + +from sqlalchemy import Integer, Index +from sqlalchemy.orm import relationship, Mapped, mapped_column + +from db.base import Base, AutoBaseMixin, ReleaseMixin + +from db import lexicon_term + +if TYPE_CHECKING: + from db.thing import Thing + from db.location import Location + + +class DataProvenance(AutoBaseMixin, ReleaseMixin, Base): + """ + Represents a single piece of provenance metadata that can be attached to + any other record or field in the database. + """ + + # --- Polymorphic Columns --- + target_id: Mapped[int] = mapped_column( + Integer, + nullable=False, + comment="The primary key (`id`) of the parent record this metadata is about (e.g., the `thing_id` of a well).", + ) + target_table: Mapped[str] = mapped_column( + nullable=False, + comment="The name of the parent table this metadata is for (e.g., 'Thing', 'Location', etc).", + ) + + # --- Columns --- + field_name: Mapped[str] = mapped_column( + nullable=True, + comment="The specific column in the parent table that this metadata applies to (e.g., 'well_depth_ft', 'coordinates')." + "If `NULL`, the record applies to the entire parent object.", + ) + # TODO: Values from the following NMAquifer tables should be included as terms in the lexicon: + # 'LU_DataSource', 'LU_Depth_CompletionSource'. + origin_source: Mapped[str] = lexicon_term( + nullable=True, + comment="Indicates the origin source of the data (e.g'Driller's Log', 'Well Report'.", + ) + # TODO: Values from the following NMAquifer tables should be included as terms in the lexicon: + # 'LU_AltitudeMethod','LU_CoordinateMethod'. + collection_method: Mapped[str] = lexicon_term( + nullable=True, + comment="Indicates the method used to collect the data (e.g., 'GPS - Survey Grade').", + ) + # TODO: Values from the following NMAquifer tables should be included as terms in the lexicon: 'LU_CoordinateAccuracy'. + accuracy_value: Mapped[float] = mapped_column( + nullable=True, comment="A numeric value representing the data's accuracy." + ) + # TODO: Values from the following NMAquifer tables should be included as terms in the lexicon: 'LU_CoordinateAccuracy'. + accuracy_unit: Mapped[str] = lexicon_term( + nullable=True, + comment="The unit for the `accuracy_value` (e.g., 'meters', 'feet').", + ) + + # --- Polymorphic Parent Relationships (Internal) --- + # These are view-only relationships used by the 'target' property below. + # They tell SQLAlchemy exactly how to find the specific parent record for a given child. + _thing_target: Mapped["Thing"] = relationship( + "Thing", + primaryjoin="and_(foreign(DataProvenance.target_id) == Thing.id, DataProvenance.target_table == 'Thing')", + viewonly=True, + ) + _location_target: Mapped["Location"] = relationship( + "Location", + primaryjoin="and_(foreign(DataProvenance.target_id) == Location.id, DataProvenance.target_table == 'Location')", + viewonly=True, + ) + + @property + def target(self): + """ + A generic property to get the parent object (Thing, Location, etc.). + This is useful for simplifying application code by providing a single, + consistent way to access the parent of a polymorphic record. + """ + return getattr(self, f"_{self.target_table.lower()}_target") + + # --- Table Arguments --- + __table_args__ = ( + # Composite index for fast polymorphic lookups + Index("ix_provenance_targets", "target_id", "target_table"), + ) From 0e601fd4e631ab70cdda22400ed9f21da53db707 Mon Sep 17 00:00:00 2001 From: ksmuczynski Date: Wed, 12 Nov 2025 16:19:49 -0700 Subject: [PATCH 2/6] feat: add DataProvenanceMixin for polymorphic provenance tracking Introduced DataProvenanceMixin to the `Thing` and `Location` models to enable reusable, efficient, polymorphic relationships to the DataProvenance table. --- db/location.py | 4 ++-- db/thing.py | 10 +++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/db/location.py b/db/location.py index aecee84fe..bdc189dd7 100644 --- a/db/location.py +++ b/db/location.py @@ -30,14 +30,14 @@ from sqlalchemy.orm import relationship, Mapped, mapped_column from constants import SRID_WGS84 -from db.base import Base, AutoBaseMixin, ReleaseMixin +from db.base import Base, AutoBaseMixin, ReleaseMixin, DataProvenanceMixin from db.lexicon import lexicon_term if TYPE_CHECKING: from db.thing import Thing -class Location(Base, AutoBaseMixin, ReleaseMixin): +class Location(Base, AutoBaseMixin, ReleaseMixin, DataProvenanceMixin): __versioned__ = {} nma_pk_location: Mapped[UUID] = mapped_column(String(36), nullable=True) diff --git a/db/thing.py b/db/thing.py index 73dc9d4cf..533ebcb57 100644 --- a/db/thing.py +++ b/db/thing.py @@ -28,6 +28,7 @@ ReleaseMixin, StatusHistoryMixin, PermissionMixin, + DataProvenanceMixin, ) from db.measuring_point_history import MeasuringPointHistory @@ -40,7 +41,14 @@ from db.group import Group, GroupThingAssociation -class Thing(Base, AutoBaseMixin, ReleaseMixin, StatusHistoryMixin, PermissionMixin): +class Thing( + Base, + AutoBaseMixin, + ReleaseMixin, + StatusHistoryMixin, + PermissionMixin, + DataProvenanceMixin, +): """ Represents a physical object of interest being monitored (e.g., a well). Stores static, core attributes of the physical installation. From f2184d23c528d79ab90d32c1d55c627e87be8fbb Mon Sep 17 00:00:00 2001 From: ksmuczynski Date: Thu, 13 Nov 2025 09:27:56 -0700 Subject: [PATCH 3/6] refactor: refine polymorphic parent relationships. The database tables are snake_case, so for consistency and ease of debugging, the `target_table` values should also use snake_case. Refined the _thing_target and _location_target relationships to ensure DataProvenance.target_table uses snake_case ('thing', 'location') for the target table name. --- db/data_provenance.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/db/data_provenance.py b/db/data_provenance.py index aa48e0364..4764495e6 100644 --- a/db/data_provenance.py +++ b/db/data_provenance.py @@ -75,15 +75,15 @@ class DataProvenance(AutoBaseMixin, ReleaseMixin, Base): # --- Polymorphic Parent Relationships (Internal) --- # These are view-only relationships used by the 'target' property below. - # They tell SQLAlchemy exactly how to find the specific parent record for a given child. + # They tell SQLAlchemy exactly how to join `DataProvenance` to the parent/target table. _thing_target: Mapped["Thing"] = relationship( "Thing", - primaryjoin="and_(foreign(DataProvenance.target_id) == Thing.id, DataProvenance.target_table == 'Thing')", + primaryjoin="and_(foreign(DataProvenance.target_id) == Thing.id, DataProvenance.target_table == 'thing')", viewonly=True, ) _location_target: Mapped["Location"] = relationship( "Location", - primaryjoin="and_(foreign(DataProvenance.target_id) == Location.id, DataProvenance.target_table == 'Location')", + primaryjoin="and_(foreign(DataProvenance.target_id) == Location.id, DataProvenance.target_table == 'location')", viewonly=True, ) From 27b7c82a9b91d87d4a08614b949d9cb0a4ba987a Mon Sep 17 00:00:00 2001 From: ksmuczynski Date: Thu, 13 Nov 2025 09:46:30 -0700 Subject: [PATCH 4/6] refactor: move DataProvenanceMixin to data_provenance.py and refactor for class-level usage - Relocated DataProvenanceMixin from base.py to data_provenance.py for better modularity and provenance management. - Refactored mixin to use cls in @declared_attr for proper class-level relationship definition. --- db/base.py | 19 ------------------- db/data_provenance.py | 27 ++++++++++++++++++++++++--- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/db/base.py b/db/base.py index ec33e64e1..ccf40f80a 100644 --- a/db/base.py +++ b/db/base.py @@ -210,25 +210,6 @@ def permissions(self): ) -class DataProvenanceMixin: - """ - Mixin for models that can have data provenance records (e.g., Thing, Location). - It automatically creates a polymorphic One-to-Many relationship to the - DataProvenance table. - """ - - @declared_attr - def data_provenance(self): - # One-to-Many polymorphic relationship - return relationship( - "DataProvenance", - primaryjoin=f"and_({self.__name__}.id==foreign(DataProvenance.target_id), " - f"DataProvenance.target_table=='{self.__name__}')", - lazy="selectin", - viewonly=True, - ) - - class User(Base): """Represents a user in the system.""" diff --git a/db/data_provenance.py b/db/data_provenance.py index 4764495e6..1341496f6 100644 --- a/db/data_provenance.py +++ b/db/data_provenance.py @@ -16,10 +16,10 @@ from typing import TYPE_CHECKING -from sqlalchemy import Integer, Index -from sqlalchemy.orm import relationship, Mapped, mapped_column +from sqlalchemy import Integer, Index, and_ +from sqlalchemy.orm import relationship, Mapped, mapped_column, declared_attr, foreign -from db.base import Base, AutoBaseMixin, ReleaseMixin +from db.base import Base, AutoBaseMixin, ReleaseMixin, pascal_to_snake from db import lexicon_term @@ -101,3 +101,24 @@ def target(self): # Composite index for fast polymorphic lookups Index("ix_provenance_targets", "target_id", "target_table"), ) + + +class DataProvenanceMixin: + """ + Mixin for models that can have data provenance records (e.g., Thing, Location). + It automatically creates a polymorphic One-to-Many relationship to the + DataProvenance table. + """ + + @declared_attr + def data_provenance(cls): + # One-to-Many polymorphic relationship + return relationship( + "DataProvenance", + primaryjoin=and_( + cls.id == foreign(DataProvenance.target_id), + DataProvenance.target_table == pascal_to_snake(cls.__name__), + ), + lazy="selectin", + viewonly=True, + ) From 73d3a488f50e7da83c77ef85a3bb330655d35b46 Mon Sep 17 00:00:00 2001 From: ksmuczynski Date: Thu, 13 Nov 2025 13:10:42 -0700 Subject: [PATCH 5/6] refactor: Update lexicon and `enums.py` with DataProvenance related information. - Added new `origin_source` and `collection_method` categories and terms. - Added 'meters' as a term associated with the `unit` category. - Added `OriginStatus` to `enums.py`. --- core/lexicon.json | 34 ++++++++++++++++++++++++++++++---- db/data_provenance.py | 11 +++++------ 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/core/lexicon.json b/core/lexicon.json index 0413f61b4..416ff214b 100644 --- a/core/lexicon.json +++ b/core/lexicon.json @@ -49,7 +49,8 @@ {"name": "vertical_datum", "description": null}, {"name": "well_purpose", "description": null}, {"name": "status_type", "description": null}, - {"name": "status_value", "description": null} + {"name": "status_value", "description": null}, + {"name": "origin_source", "description": null} ], "terms": [ {"categories": ["review_status"], "term": "approved", "definition": "approved"}, @@ -146,6 +147,7 @@ {"categories": ["unit"], "term": "second", "definition": "second"}, {"categories": ["unit"], "term": "minute", "definition": "minute"}, {"categories": ["unit"], "term": "hour", "definition": "hour"}, + {"categories": ["unit"], "term": "m", "definition": "meters"}, {"categories": ["parameter_name"], "term": "groundwater level", "definition": "groundwater level measurement"}, {"categories": ["parameter_name"], "term": "temperature", "definition": "Temperature measurement"}, {"categories": ["parameter_name"], "term": "pH", "definition": "pH"}, @@ -566,8 +568,20 @@ {"categories": ["organization"], "term": "Winter Brothers", "definition": "Winter Brothers"}, {"categories": ["organization"], "term": "Yates Petroleum Corporation", "definition": "Yates Petroleum Corporation"}, {"categories": ["organization"], "term": "Zamora Accounting Services", "definition": "Zamora Accounting Services"}, - {"categories": ["collection_method"], "term": "manual", "definition": "manual sampling"}, - {"categories": ["collection_method"], "term": "continuous", "definition": "continuous sampling"}, + {"categories": ["collection_method"], "term": "Altimeter", "definition": "ALtimeter"}, + {"categories": ["collection_method"], "term": "Differentially corrected GPS", "definition": "Differentially corrected GPS"}, + {"categories": ["collection_method"], "term": "Survey-grade GPS", "definition": "Survey-grade GPS"}, + {"categories": ["collection_method"], "term": "Global positioning system (GPS)", "definition": "Global positioning system (GPS)"}, + {"categories": ["collection_method"], "term": "LiDAR DEM", "definition": "LiDAR DEM"}, + {"categories": ["collection_method"], "term": "Level or other survey method", "definition": "Level or other survey method"}, + {"categories": ["collection_method"], "term": "Interpolated from topographic map", "definition": "Interpolated from topographic map"}, + {"categories": ["collection_method"], "term": "Interpolated from digital elevation model (DEM)", "definition": "Interpolated from digital elevation model (DEM)"}, + {"categories": ["collection_method"], "term": "Reported", "definition": "Reported"}, + {"categories": ["collection_method"], "term": "Unknown", "definition": "Unknown"}, + {"categories": ["collection_method"], "term": "Survey-grade Global Navigation Satellite Sys, Lvl1", "definition": "Survey-grade Global Navigation Satellite Sys, Lvl1"}, + {"categories": ["collection_method"], "term": "USGS National Elevation Dataset (NED)", "definition": "USGS National Elevation Dataset (NED)"}, + {"categories": ["collection_method"], "term": "Transit, theodolite, or other survey method", "definition": "Transit, theodolite, or other survey method"}, + {"categories": ["role"], "term": "Principal Investigator", "definition": "Principal Investigator"}, {"categories": ["role"], "term": "Owner", "definition": "Owner"}, {"categories": ["role"], "term": "Manager", "definition": "Manager"}, {"categories": ["role"], "term": "Operator", "definition": "Operator"}, @@ -672,6 +686,18 @@ {"categories": ["monitoring_frequency"], "term": "Biannual", "definition": "Location is monitored twice a year."}, {"categories": ["monitoring_frequency"], "term": "Annual", "definition": "Location is monitored once a year."}, {"categories": ["monitoring_frequency"], "term": "Decadal", "definition": "Location is monitored once every ten years."}, - {"categories": ["monitoring_frequency"], "term": "Event-based", "definition": "Location is monitored based on specific events or triggers rather than a fixed schedule."} + {"categories": ["monitoring_frequency"], "term": "Event-based", "definition": "Location is monitored based on specific events or triggers rather than a fixed schedule."}, + {"categories": ["origin_source"], "term": "Reported by another agency", "definition": "Reported by another agency"}, + {"categories": ["origin_source"], "term": "From driller's log or well report", "definition": "From driller's log or well report"}, + {"categories": ["origin_source"], "term": "Private geologist, consultant or univ associate", "definition": "Private geologist, consultant or univ associate"}, + {"categories": ["origin_source"], "term": "Interpreted fr geophys logs by source agency", "definition": "Interpreted fr geophys logs by source agency"}, + {"categories": ["origin_source"], "term": "Memory of owner, operator, driller", "definition": "Memory of owner, operator, driller"}, + {"categories": ["origin_source"], "term": "Measured by source agency", "definition": "Measured by source agency"}, + {"categories": ["origin_source"], "term": "Reported by owner of well", "definition": "Reported by owner of well"}, + {"categories": ["origin_source"], "term": "Reported by person other than driller owner agency", "definition": "Reported by person other than driller owner agency"}, + {"categories": ["origin_source"], "term": "Measured by NMBGMR staff", "definition": "Measured by NMBGMR staff"}, + {"categories": ["origin_source"], "term": "Other", "definition": "Other"}, + {"categories": ["origin_source"], "term": "Data Portal", "definition": "Data Portal"} + ] } \ No newline at end of file diff --git a/db/data_provenance.py b/db/data_provenance.py index 1341496f6..4673fbd25 100644 --- a/db/data_provenance.py +++ b/db/data_provenance.py @@ -51,23 +51,22 @@ class DataProvenance(AutoBaseMixin, ReleaseMixin, Base): comment="The specific column in the parent table that this metadata applies to (e.g., 'well_depth_ft', 'coordinates')." "If `NULL`, the record applies to the entire parent object.", ) - # TODO: Values from the following NMAquifer tables should be included as terms in the lexicon: - # 'LU_DataSource', 'LU_Depth_CompletionSource'. + # Values from the following NMAquifer tables are included as `origin_source` terms in the lexicon: + # 'LU_DataSource', 'LU_Depth_CompletionSource'. origin_source: Mapped[str] = lexicon_term( nullable=True, comment="Indicates the origin source of the data (e.g'Driller's Log', 'Well Report'.", ) - # TODO: Values from the following NMAquifer tables should be included as terms in the lexicon: - # 'LU_AltitudeMethod','LU_CoordinateMethod'. + # Values from the following NMAquifer tables are included as `collection_method` terms in the lexicon: + # 'LU_AltitudeMethod','LU_CoordinateMethod'. collection_method: Mapped[str] = lexicon_term( nullable=True, comment="Indicates the method used to collect the data (e.g., 'GPS - Survey Grade').", ) - # TODO: Values from the following NMAquifer tables should be included as terms in the lexicon: 'LU_CoordinateAccuracy'. accuracy_value: Mapped[float] = mapped_column( nullable=True, comment="A numeric value representing the data's accuracy." ) - # TODO: Values from the following NMAquifer tables should be included as terms in the lexicon: 'LU_CoordinateAccuracy'. + # Unit values from the following NMAquifer tables are included as 'unit' terms in the lexicon: 'LU_CoordinateAccuracy'. accuracy_unit: Mapped[str] = lexicon_term( nullable=True, comment="The unit for the `accuracy_value` (e.g., 'meters', 'feet').", From 781d3f47ec67350e65647bb60528775aa5d2d90e Mon Sep 17 00:00:00 2001 From: ksmuczynski Date: Thu, 13 Nov 2025 13:55:26 -0700 Subject: [PATCH 6/6] refactor: Update lexicon and `enums.py` with DataProvenance related information. - Added new `origin_source` and `collection_method` categories and terms. - Added 'meters' as a term associated with the `unit` category. - Added `OriginStatus` to `enums.py`. --- core/enums.py | 1 + 1 file changed, 1 insertion(+) diff --git a/core/enums.py b/core/enums.py index 52e37d805..8fc08c343 100644 --- a/core/enums.py +++ b/core/enums.py @@ -48,6 +48,7 @@ MonitoringStatus: type[Enum] = build_enum_from_lexicon_category("monitoring_status") ParameterName: type[Enum] = build_enum_from_lexicon_category("parameter_name") Organization: type[Enum] = build_enum_from_lexicon_category("organization") +OriginSource: type[Enum] = build_enum_from_lexicon_category("origin_source") ParameterType: type[Enum] = build_enum_from_lexicon_category("parameter_type") PhoneType: type[Enum] = build_enum_from_lexicon_category("phone_type") PublicationType: type[Enum] = build_enum_from_lexicon_category("publication_type")