From c03f4a21f3dca4194a009ac0de05879caa79a94a Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Thu, 18 Dec 2025 10:08:27 -0700 Subject: [PATCH 1/5] refactor: rename data_quality to groundwater_level_accuracy In NM_Aquifer these are strings describing the accuracy of the groundwater level measurement, so the field has been renamed from data_quality to groundwater_level_accuracy for clarity. --- core/enums.py | 4 +++- core/lexicon.json | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/core/enums.py b/core/enums.py index 91b206cab..1e4de575d 100644 --- a/core/enums.py +++ b/core/enums.py @@ -30,7 +30,9 @@ ContactType: type[Enum] = build_enum_from_lexicon_category("contact_type") CoordinateMethod: type[Enum] = build_enum_from_lexicon_category("coordinate_method") WellPurpose: type[Enum] = build_enum_from_lexicon_category("well_purpose") -DataQuality: type[Enum] = build_enum_from_lexicon_category("data_quality") +GroundwaterLevelAccuracy: type[Enum] = build_enum_from_lexicon_category( + "groundwater_level_accuracy" +) DataSource: type[Enum] = build_enum_from_lexicon_category("data_source") DepthCompletionSource: type[Enum] = build_enum_from_lexicon_category( "depth_completion_source" diff --git a/core/lexicon.json b/core/lexicon.json index 90ead61b9..9eb7f492e 100644 --- a/core/lexicon.json +++ b/core/lexicon.json @@ -10,7 +10,7 @@ {"name": "coordinate_method", "description": null}, {"name": "country", "description": null}, {"name": "county", "description": null}, - {"name": "data_quality", "description": null}, + {"name": "groundwater_level_accuracy", "description": null}, {"name": "data_source", "description": null}, {"name": "depth_completion_source", "description": null}, {"name": "discharge_source", "description": null}, @@ -117,15 +117,15 @@ {"categories": ["well_purpose"], "term": "Monitoring", "definition": "Monitoring"}, {"categories": ["well_purpose"], "term": "Production", "definition": "Production"}, {"categories": ["well_purpose"], "term": "Injection", "definition": "Injection"}, - {"categories": ["data_quality"], "term": "Water level accurate to within two hundreths of a foot", "definition": "Good"}, - {"categories": ["data_quality"], "term": "Water level accurate to within one foot", "definition": "Fair"}, - {"categories": ["data_quality"], "term": "Water level accuracy not to nearest foot or water level not repeatable", "definition": "Poor"}, - {"categories": ["data_quality"], "term": "Water level accurate to nearest foot (USGS accuracy level)", "definition": "Water level accurate to nearest foot (USGS accuracy level)"}, - {"categories": ["data_quality"], "term": "Water level accurate to nearest tenth of a foot (USGS accuracy level)", "definition": "Water level accurate to nearest tenth of a foot (USGS accuracy level)"}, - {"categories": ["data_quality"], "term": "Water level accurate to nearest one-hundredth of a foot (USGS accuracy level)", "definition": "Water level accurate to nearest one-hundredth of a foot (USGS accuracy level)"}, - {"categories": ["data_quality"], "term": "Water level accuracy not to nearest foot (USGS accuracy level)", "definition": "Water level accuracy not to nearest foot (USGS accuracy level)"}, - {"categories": ["data_quality"], "term": "Water level accuracy unknown (USGS accuracy level)", "definition": "Water level accuracy unknown (USGS accuracy level)"}, - {"categories": ["data_quality"], "term": "None", "definition": "NA"}, + {"categories": ["groundwater_level_accuracy"], "term": "Water level accurate to within two hundreths of a foot", "definition": "Good"}, + {"categories": ["groundwater_level_accuracy"], "term": "Water level accurate to within one foot", "definition": "Fair"}, + {"categories": ["groundwater_level_accuracy"], "term": "Water level accuracy not to nearest foot or water level not repeatable", "definition": "Poor"}, + {"categories": ["groundwater_level_accuracy"], "term": "Water level accurate to nearest foot (USGS accuracy level)", "definition": "Water level accurate to nearest foot (USGS accuracy level)"}, + {"categories": ["groundwater_level_accuracy"], "term": "Water level accurate to nearest tenth of a foot (USGS accuracy level)", "definition": "Water level accurate to nearest tenth of a foot (USGS accuracy level)"}, + {"categories": ["groundwater_level_accuracy"], "term": "Water level accurate to nearest one-hundredth of a foot (USGS accuracy level)", "definition": "Water level accurate to nearest one-hundredth of a foot (USGS accuracy level)"}, + {"categories": ["groundwater_level_accuracy"], "term": "Water level accuracy not to nearest foot (USGS accuracy level)", "definition": "Water level accuracy not to nearest foot (USGS accuracy level)"}, + {"categories": ["groundwater_level_accuracy"], "term": "Water level accuracy unknown (USGS accuracy level)", "definition": "Water level accuracy unknown (USGS accuracy level)"}, + {"categories": ["groundwater_level_accuracy"], "term": "None", "definition": "NA"}, {"categories": ["data_source", "depth_completion_source", "discharge_source"], "term": "Reported by another agency", "definition": "Reported by another agency"}, {"categories": ["data_source", "depth_completion_source"], "term": "From driller's log or well report", "definition": "From driller's log or well report"}, {"categories": ["data_source", "depth_completion_source", "discharge_source"], "term": "Private geologist, consultant or univ associate", "definition": "Private geologist, consultant or univ associate"}, From 64b9b6b767f4470c17415cccfaf3b8831608e85a Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Thu, 18 Dec 2025 10:09:56 -0700 Subject: [PATCH 2/5] feat: update observation model and transfer to include groundwater_level_accuracy This is now included in both the creation and response of groundwater level observations. --- db/observation.py | 4 ++++ schemas/observation.py | 4 +++- tests/conftest.py | 1 + tests/test_observation.py | 8 ++++++++ 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/db/observation.py b/db/observation.py index 27fe70458..baba5ab20 100644 --- a/db/observation.py +++ b/db/observation.py @@ -75,6 +75,10 @@ class Observation(Base, AutoBaseMixin, ReleaseMixin): nullable=True, comment="The reason describes everything that can effect the observation the moment a sample/observation is attempted (e.g. obstruction, dry well, equipment failure); a null value must have an associated reason in the same record. Factors preventing the obtainment of the observation from the beginning of the field event to attempted sampling/observation (e.g. flat tire, locked gate, destroyed well) are not recorded here but in the notes field of the FieldEvent table; in this situation no sample/observation should be recorded.", ) + groundwater_level_accuracy: Mapped[str] = lexicon_term( + nullable=True, + comment="The accuracy describes the confidence in the observation value itself after the observation is obtained", + ) # --- Relationships --- # Many-To-One: An Observation can be generated by one piece of Equipment. diff --git a/schemas/observation.py b/schemas/observation.py index 2012f002f..e02867879 100644 --- a/schemas/observation.py +++ b/schemas/observation.py @@ -32,7 +32,7 @@ UTCAwareDatetime, ) from schemas.parameter import ParameterResponse -from core.enums import Unit +from core.enums import Unit, GroundwaterLevelAccuracy # class GeothermalMixin: # depth: float @@ -75,6 +75,7 @@ class CreateBaseObservation(BaseCreateModel, ValidateObservation): class CreateGroundwaterLevelObservation(CreateBaseObservation): measuring_point_height: float groundwater_level_reason: str + groundwater_level_accuracy: GroundwaterLevelAccuracy class CreateWaterChemistryObservation(CreateBaseObservation): @@ -117,6 +118,7 @@ class GroundwaterLevelObservationResponse(BaseObservationResponse): depth_to_water_bgs: float | None measuring_point_height: float | None groundwater_level_reason: str | None # NULL from legacy data + groundwater_level_accuracy: GroundwaterLevelAccuracy | None @model_validator(mode="before") def calculate_depth_to_water_bgs(self: Self) -> Self: diff --git a/tests/conftest.py b/tests/conftest.py index 161015e0f..e0d9eff9a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -760,6 +760,7 @@ def groundwater_level_observation(sensor, groundwater_level_sample): unit="ft", measuring_point_height=5.0, groundwater_level_reason="Water level not affected", + groundwater_level_accuracy="Water level accurate to within two hundreths of a foot", ) session.add(observation) session.commit() diff --git a/tests/test_observation.py b/tests/test_observation.py index 643684111..90f644337 100644 --- a/tests/test_observation.py +++ b/tests/test_observation.py @@ -97,6 +97,7 @@ def test_add_groundwater_level_observation(groundwater_level_sample, sensor): "sensor_id": sensor.id, "groundwater_level_reason": "Water level not affected", "unit": "ft", + "groundwater_level_accuracy": "Water level accurate to within two hundreths of a foot", } response = client.post("/observation/groundwater-level", json=payload) data = response.json() @@ -115,6 +116,7 @@ def test_add_groundwater_level_observation(groundwater_level_sample, sensor): data["depth_to_water_bgs"] == payload["value"] - payload["measuring_point_height"] ) + assert data["groundwater_level_accuracy"] == payload["groundwater_level_accuracy"] cleanup_post_test(Observation, data["id"]) @@ -324,6 +326,8 @@ def test_get_observation_by_id( assert data["release_status"] == obs.release_status if obs.parameter.id == groundwater_level_parameter_id: assert data["depth_to_water_bgs"] == obs.value - obs.measuring_point_height + assert data["groundwater_level_reason"] == obs.groundwater_level_reason + assert data["groundwater_level_accuracy"] == obs.groundwater_level_accuracy else: assert data["depth_to_water_bgs"] is None @@ -412,6 +416,10 @@ def test_get_groundwater_level_observation_by_id(groundwater_level_observation): data["measuring_point_height"] == groundwater_level_observation.measuring_point_height ) + assert ( + data["groundwater_level_accuracy"] + == groundwater_level_observation.groundwater_level_accuracy + ) def test_get_groundwater_level_observation_by_id_404_not_found( From 8a6326feca4148475566904d34354ba5770c4749 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Thu, 18 Dec 2025 10:15:19 -0700 Subject: [PATCH 3/5] feat: add groundwater_level_accuracy to wl transfer --- transfers/waterlevels_transfer.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/transfers/waterlevels_transfer.py b/transfers/waterlevels_transfer.py index c09d7d3dd..0f0c87271 100644 --- a/transfers/waterlevels_transfer.py +++ b/transfers/waterlevels_transfer.py @@ -197,7 +197,17 @@ def _make_observation( else: value = row.DepthToWater - # TODO: after sensors have been added to the database update sensor_id (or sensor) for waterlevels that come from db sensors (like e probes?) + if not pd.isna(row.DataQuality): + groundwater_level_accuracy = lexicon_mapper.map_value( + f"LU_DataQuality:{row.DataQuality}" + ) + else: + groundwater_level_accuracy = None + print( + f"Setting groundwater_level_accuracy to {groundwater_level_accuracy} for WaterLevels record with GlobalID {row.GlobalID}" + ) + + # TODO: after sensors have been added to the database update sensor_id (or sensor) for waterlevels that come from db sensors (like e probes?) observation = Observation( nma_pk_waterlevels=row.GlobalID, sample=sample, @@ -209,6 +219,7 @@ def _make_observation( unit="ft", measuring_point_height=measuring_point_height, groundwater_level_reason=glv, + groundwater_level_accuracy=groundwater_level_accuracy, ) return observation From ca0109ef9402a07d6520a2aa74ec971ca82e657d Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Thu, 18 Dec 2025 10:22:05 -0700 Subject: [PATCH 4/5] fix: remove print debugging statement --- docker-compose.yml | 1 + transfers/waterlevels_transfer.py | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 1c6dec4ef..39af42d8f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -28,6 +28,7 @@ services: - POSTGRES_DB=${POSTGRES_DB} - POSTGRES_HOST=db - MODE=${MODE} + - POSTGRES_PORT=5432 - AUTHENTIK_DISABLE_AUTHENTICATION=${AUTHENTIK_DISABLE_AUTHENTICATION} ports: - 8000:8000 diff --git a/transfers/waterlevels_transfer.py b/transfers/waterlevels_transfer.py index 0f0c87271..b14c770a9 100644 --- a/transfers/waterlevels_transfer.py +++ b/transfers/waterlevels_transfer.py @@ -203,9 +203,6 @@ def _make_observation( ) else: groundwater_level_accuracy = None - print( - f"Setting groundwater_level_accuracy to {groundwater_level_accuracy} for WaterLevels record with GlobalID {row.GlobalID}" - ) # TODO: after sensors have been added to the database update sensor_id (or sensor) for waterlevels that come from db sensors (like e probes?) observation = Observation( From 6aeb49add49808f35c506b5b60ab4f994a0a5cac Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Thu, 18 Dec 2025 14:05:58 -0700 Subject: [PATCH 5/5] fix: skip records with invalud DataQuality values --- ...t_water_level_with_unknown_data_quality.py | 36 ++++++++++++++++++ transfers/waterlevels_transfer.py | 37 ++++++++++++++----- 2 files changed, 64 insertions(+), 9 deletions(-) create mode 100644 tests/transfers/test_water_level_with_unknown_data_quality.py diff --git a/tests/transfers/test_water_level_with_unknown_data_quality.py b/tests/transfers/test_water_level_with_unknown_data_quality.py new file mode 100644 index 000000000..077a95fe2 --- /dev/null +++ b/tests/transfers/test_water_level_with_unknown_data_quality.py @@ -0,0 +1,36 @@ +from db import Observation, FieldEvent, FieldActivity, Sample +from db.engine import session_ctx +from transfers.well_transfer import WellTransferer +from transfers.waterlevels_transfer import WaterLevelTransferer + + +def test_water_level_with_unknown_data_quality(): + pointids = ["MG-020"] + wt = WellTransferer(pointids=pointids) + wt.transfer() + + wlt = WaterLevelTransferer() + input_df, cleaned_df = wlt._get_dfs() + wlt.input_df = input_df + wlt.cleaned_df = cleaned_df + wlt.cleaned_df.at[wlt.cleaned_df.index[0], "DataQuality"] = "faux" + + with session_ctx() as session: + wlt._transfer_hook(session) + + assert len(wlt.errors) == 1 + error = wlt.errors[0] + assert error["pointid"] == "MG-020" + assert error["table"] == "WaterLevels" + assert error["field"] == "DataQuality" + assert error["error"] == "Unknown DataQuality value: faux" + + assert session.query(FieldEvent).count() == 2 + assert session.query(FieldActivity).count() == 2 + assert session.query(Sample).count() == 2 + assert session.query(Observation).count() == 2 + + session.query(Observation).delete() + session.query(Sample).delete() + session.query(FieldActivity).delete() + session.query(FieldEvent).delete() diff --git a/transfers/waterlevels_transfer.py b/transfers/waterlevels_transfer.py index b14c770a9..9a0f3ab6a 100644 --- a/transfers/waterlevels_transfer.py +++ b/transfers/waterlevels_transfer.py @@ -113,6 +113,25 @@ def _transfer_hook(self, session: Session) -> None: except KeyError as e: continue + if not pd.isna(row.DataQuality): + try: + groundwater_level_accuracy = lexicon_mapper.map_value( + f"LU_DataQuality:{row.DataQuality}" + ) + if groundwater_level_accuracy == "None": + # 0 maps to the string "None" in LU_DataQuality + groundwater_level_accuracy = None + except KeyError as e: + self._capture_error( + pointid=row.PointID, + error=f"Unknown DataQuality value: {row.DataQuality}", + table="WaterLevels", + field="DataQuality", + ) + continue + else: + groundwater_level_accuracy = None + release_status = "public" if row.PublicRelease else "private" # field event @@ -162,13 +181,20 @@ def _transfer_hook(self, session: Session) -> None: session.add(sample) # Observation - observation = self._make_observation(row, sample, dt_utc, glv) + observation = self._make_observation( + row, sample, dt_utc, glv, groundwater_level_accuracy + ) session.add(observation) session.commit() def _make_observation( - self, row: pd.Series, sample: Sample, dt_utc: datetime, glv: str + self, + row: pd.Series, + sample: Sample, + dt_utc: datetime, + glv: str, + groundwater_level_accuracy: str | None, ) -> Observation: if pd.isna(row.MPHeight): if pd.notna(row.DepthToWater) and pd.notna(row.DepthToWaterBGS): @@ -197,13 +223,6 @@ def _make_observation( else: value = row.DepthToWater - if not pd.isna(row.DataQuality): - groundwater_level_accuracy = lexicon_mapper.map_value( - f"LU_DataQuality:{row.DataQuality}" - ) - else: - groundwater_level_accuracy = None - # TODO: after sensors have been added to the database update sensor_id (or sensor) for waterlevels that come from db sensors (like e probes?) observation = Observation( nma_pk_waterlevels=row.GlobalID,