diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c25a9c145..13b826eaa 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -20,6 +20,7 @@ jobs: # image: postgis/postgis:17-3.5 env: POSTGRES_PASSWORD: postgres + POSTGRES_PORT: 54321 options: >- --health-cmd pg_isready --health-interval 10s @@ -27,7 +28,7 @@ jobs: --health-retries 5 ports: # Maps tcp port 5432 on service container to the host - - 5432:5432 + - 54321:5432 steps: - name: Check out source repository @@ -50,12 +51,12 @@ jobs: env: MODE: development POSTGRES_HOST: localhost - POSTGRES_PORT: 5432 + POSTGRES_PORT: 54321 POSTGRES_USER: postgres POSTGRES_PASSWORD: postgres DB_DRIVER: postgres - run: uv run pytest -vv --durations=20 --cov --cov-report=xml --junitxml=junit.xml + run: uv run pytest -vv --durations=20 --cov --cov-report=xml --junitxml=junit.xml --ignore=tests/transfers - name: Checkout BDD repo (features only) uses: actions/checkout@v4 @@ -71,7 +72,7 @@ jobs: - name: Run BDD tests env: POSTGRES_HOST: localhost - POSTGRES_PORT: 5432 + POSTGRES_PORT: 54321 POSTGRES_USER: postgres POSTGRES_PASSWORD: postgres DB_DRIVER: postgres diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b4dba7bf8..c63fbe0a0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,6 +27,7 @@ repos: always_run: true args: - -x + - --ignore=tests/transfers # - repo: https://github.com/pre-commit/mirrors-mypy # rev: v1.10.0 # Use the latest stable version or pin to your preference diff --git a/core/lexicon.json b/core/lexicon.json index 423be5332..0d14be5ac 100644 --- a/core/lexicon.json +++ b/core/lexicon.json @@ -376,30 +376,27 @@ {"categories": ["organization"], "term": "City of Aztec", "definition": "City of Aztec"}, {"categories": ["organization"], "term": "Daybreak Investments", "definition": "Daybreak Investments"}, {"categories": ["organization"], "term": "Vallecitos HOA", "definition": "Vallecitos HOA"}, - {"categories": ["organization"], "term": "Naiche Development", "definition": "Naiche Corporation"}, - {"categories": ["organization"], "term": "Santa Fe County; Santa Fe Animal Shelter", "definition": "Santa Fe County; Santa Fe Animal Shelter"}, + {"categories": ["organization"], "term": "SFC, Santa Fe Animal Shelter", "definition": "Santa Fe County, Santa Fe Animal Shelter"}, {"categories": ["organization"], "term": "El Guicu Ditch Association", "definition": "El Guicu Ditch Association"}, {"categories": ["organization"], "term": "Santa Fe Municipal Airport", "definition": "Santa Fe Municipal Airport"}, {"categories": ["organization"], "term": "Uluru Development", "definition": "Uluru Development"}, {"categories": ["organization"], "term": "AllSup's Convenience Stores", "definition": "AllSup's Convenience Stores"}, - {"categories": ["organization"], "term": "Santa Fe Downs", "definition": "Santa Fe Downs Resort"}, + {"categories": ["organization"], "term": "Santa Fe Downs Resort", "definition": "Santa Fe Downs Resort"}, {"categories": ["organization"], "term": "City of Truth or Consequences, WWTP", "definition": "City of Truth or Consequences, WWTP"}, {"categories": ["organization"], "term": "Riverbend Hotsprings", "definition": "Riverbend Hotsprings"}, {"categories": ["organization"], "term": "Armendaris Ranch", "definition": "Armendaris Ranch"}, {"categories": ["organization"], "term": "El Paso Water", "definition": "El Paso Water"}, {"categories": ["organization"], "term": "BLM, Socorro Field Office", "definition": "BLM, Socorro Field Office"}, {"categories": ["organization"], "term": "USFWS", "definition": "US Fish & Wildlife Service"}, - {"categories": ["organization"], "term": "NPS", "definition": "National Park Service"}, {"categories": ["organization"], "term": "Sile MDWCA", "definition": "Sile Municipal Domestic Water Assn."}, {"categories": ["organization"], "term": "Pena Blanca Water & Sanitation District", "definition": "Pena Blanca Water & Sanitation District"}, {"categories": ["organization"], "term": "Town of Questa", "definition": "Town of Questa"}, - {"categories": ["organization"], "term": "Lamy MDWCA", "definition": "Lama MDWCA"}, {"categories": ["organization"], "term": "Town of Cerro", "definition": "Town of Cerro"}, {"categories": ["organization"], "term": "Farr Cattle Company", "definition": "Farr Cattle Company (Farr Ranch)"}, {"categories": ["organization"], "term": "Carrizozo Orchard", "definition": "Carrizozo Orchard"}, {"categories": ["organization"], "term": "USFS, Kiowa Grasslands", "definition": "USFS, Kiowa Grasslands"}, {"categories": ["organization"], "term": "Cloud Country West Subdivision", "definition": "Cloud Country West Subdivision"}, - {"categories": ["organization"], "term": "Chama West Water Users Association", "definition": "Chama West Water Users Assn."}, + {"categories": ["organization"], "term": "Chama West WUA", "definition": "Chama West Water Users Assn."}, {"categories": ["organization"], "term": "El Rito Regional Water and Waste Water Association", "definition": "El Rito Regional Water + Waste Water Association"}, {"categories": ["organization"], "term": "West Rim MDWUA", "definition": "West Rim MDWUA"}, {"categories": ["organization"], "term": "Village of Willard", "definition": "Village of Willard"}, @@ -422,7 +419,6 @@ {"categories": ["organization"], "term": "Lake Roberts WUA", "definition": "Lake Roberts Water Assn."}, {"categories": ["organization"], "term": "El Creston MDWCA", "definition": "El Creston MDWCA"}, {"categories": ["organization"], "term": "Reserve Municipality Water Works", "definition": "Reserve Municipality Water Works"}, - {"categories": ["organization"], "term": "Bayard", "definition": "Bayard Municipal Water"}, {"categories": ["organization"], "term": "Town of Estancia", "definition": "Town of Estancia"}, {"categories": ["organization"], "term": "Pie Town MDWCA", "definition": "Pie Town MDWCA"}, {"categories": ["organization"], "term": "Roosevelt SWCD", "definition": "Roosevelt Soil & Water Conservation District"}, @@ -438,7 +434,7 @@ {"categories": ["organization"], "term": "Pinon Ridge WUA", "definition": "Pinon Ridge Water Users Association"}, {"categories": ["organization"], "term": "McSherry Farms", "definition": "McSherry Farms"}, {"categories": ["organization"], "term": "Agua Sana WUA", "definition": "Agua Sana Water Users Assn."}, - {"categories": ["organization"], "term": "Chamita MDWCA", "definition": "Chamita Water Users Association"}, + {"categories": ["organization"], "term": "Chamita MDWCA", "definition": "Chamita Mutual Domestic Water Consumers Assn."}, {"categories": ["organization"], "term": "W Spear-bar Ranch", "definition": "W Spear-bar Ranch"}, {"categories": ["organization"], "term": "Village of Capitan", "definition": "Village of Capitan"}, {"categories": ["organization"], "term": "Brazos MDWCA", "definition": "Brazos Mutual Domestic Water Consumers Assn."}, @@ -597,14 +593,9 @@ {"categories": ["organization"], "term": "Rio En Medio MDWCA", "definition": "Rio En Medio MDWCA"}, {"categories": ["organization"], "term": "San Acacia MDWCA", "definition": "San Acacia MDWCA"}, {"categories": ["organization"], "term": "San Juan Residences", "definition": "San Juan Residences"}, - {"categories": ["organization"], "term": "Sangre de Cristo Center", "definition": "Sangre de Cristo Center"}, {"categories": ["organization"], "term": "Sangre de Cristo Estates", "definition": "Sangre de Cristo Estates"}, {"categories": ["organization"], "term": "Santa Fe Community College", "definition": "Santa Fe Community College"}, - {"categories": ["organization"], "term": "Santa Fe County, Fire Facilities", "definition": "Santa Fe County, Fire Facilities"}, - {"categories": ["organization"], "term": "Santa Fe County, Utilities Dept.", "definition": "Santa Fe County, Utilities Dept."}, {"categories": ["organization"], "term": "Sangre de Cristo Center", "definition": "Sangre de Cristo Center"}, - {"categories": ["organization"], "term": "Valle Vista Water Utility", "definition": "Valle Vista Water Utility"}, - {"categories": ["organization"], "term": "Santa Fe County, Valle Vista Water Utility, Inc.", "definition": "Santa Fe County, Valle Vista Water Utility, Inc."}, {"categories": ["organization"], "term": "Santa Fe Horse Park", "definition": "Santa Fe Horse Park"}, {"categories": ["organization"], "term": "Santa Fe Opera", "definition": "Santa Fe Opera"}, {"categories": ["organization"], "term": "Santa Fe Waldorf School", "definition": "Santa Fe Waldorf School"}, @@ -651,7 +642,6 @@ {"categories": ["organization"], "term": "Yates Petroleum Corporation", "definition": "Yates Petroleum Corporation"}, {"categories": ["organization"], "term": "Zamora Accounting Services", "definition": "Zamora Accounting Services"}, {"categories": ["organization"], "term": "PLSS", "definition": "Public Land Survey System"}, - {"categories": ["organization"], "term": "Quemado Municipal Water & SWA", "definition": "Quemado Municipal Water & SWA"}, {"categories": ["collection_method"], "term": "Altimeter", "definition": "ALtimeter"}, {"categories": ["collection_method"], "term": "Differentially corrected GPS", "definition": "Differentially corrected GPS"}, {"categories": ["collection_method"], "term": "Survey-grade GPS", "definition": "Survey-grade GPS"}, @@ -1176,9 +1166,10 @@ {"categories": ["note_type"], "term": "Construction", "definition": "Construction details, well development, drilling notes, etc. Could create separate `types` for each of these if needed."}, {"categories": ["note_type"], "term": "Maintenance", "definition": "Maintenance observations and issues."}, {"categories": ["note_type"], "term": "Historical", "definition": "Historical information or context about the well or location."}, - {"categories": ["note_type"], "term": "Other", "definition": "Other types of notes that do not fit into the predefined categories."}, + {"categories": ["note_type"], "term": "General", "definition": "Other types of notes that do not fit into the predefined categories."}, {"categories": ["note_type"], "term": "Water", "definition": "Water bearing zone information and other info from ose reports"}, {"categories": ["note_type"], "term": "Measuring", "definition": "Notes about measuring/visiting the well, on Access form"}, + {"categories": ["note_type"], "term": "Coordinate", "definition": "Notes about a location's coordinates"}, {"categories": ["well_pump_type"], "term": "Submersible", "definition": "Submersible"}, {"categories": ["well_pump_type"], "term": "Jet", "definition": "Jet Pump"}, {"categories": ["well_pump_type"], "term": "Line Shaft", "definition": "Line Shaft"}, diff --git a/db/thing.py b/db/thing.py index 92c7bd942..286372242 100644 --- a/db/thing.py +++ b/db/thing.py @@ -13,9 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== -from typing import List, TYPE_CHECKING from datetime import date -from sqlalchemy import Integer, ForeignKey, String, Column, Float, Text, Date +from typing import List, TYPE_CHECKING + +from sqlalchemy import Integer, ForeignKey, String, Column, Float, Date from sqlalchemy.ext.associationproxy import association_proxy, AssociationProxy from sqlalchemy.orm import relationship, mapped_column, Mapped from sqlalchemy_utils import TSVectorType @@ -27,11 +28,10 @@ Base, ReleaseMixin, ) +from db.data_provenance import DataProvenanceMixin +from db.measuring_point_history import MeasuringPointHistory from db.permission_history import PermissionHistoryMixin -from services.util import retrieve_latest_polymorphic_history_table_record from db.status_history import StatusHistoryMixin -from db.measuring_point_history import MeasuringPointHistory -from db.data_provenance import DataProvenanceMixin from services.util import retrieve_latest_polymorphic_history_table_record if TYPE_CHECKING: @@ -117,8 +117,6 @@ class Thing( comment="Depth of the well casing from ground surface to the bottom of the casing (in feet).", ) - well_construction_notes: Mapped[str] = mapped_column(Text, nullable=True) - well_completion_date: Mapped[date] = mapped_column( nullable=True, comment="the date the well was completed if known" ) @@ -348,7 +346,10 @@ class Thing( ) # Full-text search vector - search_vector = Column(TSVectorType("name", "well_construction_notes")) + search_vector = Column(TSVectorType("name")) + + # for temporary backwards compatibility + well_construction_notes = mapped_column(String(1000), nullable=True) @property def current_location(self): @@ -372,12 +373,16 @@ def water_notes(self): @property def general_notes(self): - return self._get_notes("Other") + return self._get_notes("General") @property def measuring_notes(self): return self._get_notes("Measuring") + @property + def construction_notes(self): + return self._get_notes("Construction") + @property def well_status(self) -> str | None: """ diff --git a/docker-compose.yml b/docker-compose.yml index 9d3f1ebd2..50ac1e380 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,7 +9,7 @@ services: - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} - POSTGRES_DB=${POSTGRES_DB} ports: - - 5432:5432 + - 54321:5432 volumes: - postgres_data:/var/lib/postgresql/data healthcheck: diff --git a/schemas/thing.py b/schemas/thing.py index 7a7982494..5aaa17985 100644 --- a/schemas/thing.py +++ b/schemas/thing.py @@ -24,8 +24,6 @@ ScreenType, Organization, MonitoringFrequency, - Organization, - MonitoringFrequency, WellConstructionMethod, WellPumpType, FormationCode, @@ -36,6 +34,7 @@ from schemas.notes import NoteResponse, CreateNote from schemas.permission_history import PermissionHistoryResponse + # -------- VALIDATE ---------- @@ -43,7 +42,7 @@ class ValidateWell(BaseModel): well_depth: float | None = None # in feet hole_depth: float | None = None # in feet well_casing_depth: float | None = None # in feet - measuring_point_height: float | None = None # in feet + measuring_point_height: float | None = None @model_validator(mode="after") def validate_values(self): @@ -60,24 +59,24 @@ def validate_values(self): "well casing depth must be less than or equal to hole depth" ) - if self.measuring_point_height is not None: - if ( - self.hole_depth is not None - and self.measuring_point_height >= self.hole_depth - ): - raise ValueError("measuring point height must be less than hole depth") - elif ( - self.well_casing_depth is not None - and self.measuring_point_height >= self.well_casing_depth - ): - raise ValueError( - "measuring point height must be less than well casing depth" - ) - elif ( - self.well_depth is not None - and self.measuring_point_height >= self.well_depth - ): - raise ValueError("measuring point height must be less than well depth") + # if self.measuring_point_height is not None: + # if ( + # self.hole_depth is not None + # and self.measuring_point_height >= self.hole_depth + # ): + # raise ValueError("measuring point height must be less than hole depth") + # elif ( + # self.well_casing_depth is not None + # and self.measuring_point_height >= self.well_casing_depth + # ): + # raise ValueError( + # "measuring point height must be less than well casing depth" + # ) + # elif ( + # self.well_depth is not None + # and self.measuring_point_height >= self.well_depth + # ): + # raise ValueError("measuring point height must be less than well depth") return self @@ -122,7 +121,6 @@ class CreateWell(CreateBaseThing, ValidateWell): hole_depth: float | None = Field( default=None, gt=0, description="Hole depth in feet" ) - well_construction_notes: str | None = None well_casing_diameter: float | None = Field( default=None, gt=0, description="Well casing diameter in inches" ) @@ -130,9 +128,8 @@ class CreateWell(CreateBaseThing, ValidateWell): default=None, gt=0, description="Well casing depth in feet" ) well_casing_materials: list[CasingMaterial] | None = None - measuring_point_height: float = Field( - ge=0, description="Measuring point height in feet" - ) + + measuring_point_height: float = Field(description="Measuring point height in feet") measuring_point_description: str | None = None notes: list[CreateNote] | None = None well_completion_date: PastOrTodayDate | None = None @@ -195,13 +192,11 @@ class BaseThingResponse(BaseResponseModel): thing_type: str current_location: LocationGeoJSONResponse first_visit_date: PastOrTodayDate | None - # The new relationship to the polymorphic Notes table - notes: List[NoteResponse] = [] - groups: list[GroupResponse] = [] monitoring_status: str | None links: list[ThingIdLinkResponse] = Field(default=[], alias="alternate_ids") monitoring_frequencies: list[MonitoringFrequencyResponse] = [] + general_notes: list[NoteResponse] | None = None @field_validator("monitoring_frequencies", mode="before") def remove_records_with_end_date(cls, monitoring_frequencies): @@ -234,7 +229,6 @@ class WellResponse(BaseThingResponse): well_casing_depth: float | None = None well_casing_depth_unit: str = "ft" well_casing_materials: list[CasingMaterial] = [] - well_construction_notes: str | None = None well_completion_date: PastOrTodayDate | None well_completion_date_source: str | None well_driller_name: str | None @@ -251,7 +245,8 @@ class WellResponse(BaseThingResponse): aquifers: list[dict] = [] water_notes: list[NoteResponse] | None = None measuring_notes: list[NoteResponse] | None = None - general_notes: list[NoteResponse] | None = None + + construction_notes: list[NoteResponse] | None = None permissions: list[PermissionHistoryResponse] formation_completion_code: FormationCode | None diff --git a/services/util.py b/services/util.py index a3ddcf472..6a7316073 100644 --- a/services/util.py +++ b/services/util.py @@ -81,7 +81,7 @@ def get_tiger_data( "returnGeometry": "false", } try: - resp = httpx.get(url, params=params, timeout=30) + resp = httpx.get(url, params=params, timeout=5) except Exception as e: print(f"Error getting TIGER data for POINT ({lon} {lat}) {e}") return None diff --git a/tests/__init__.py b/tests/__init__.py index 91ff327db..e97031d7c 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -15,10 +15,15 @@ # =============================================================================== # Load .env file BEFORE importing anything else # Use override=True to override conflicting shell environment variables +import os + from dotenv import load_dotenv load_dotenv(override=True) +# for safety dont test on the production database port +os.environ["POSTGRES_PORT"] = "54321" + # this should not be needed since all Pydantic serializes all datetimes as UTC # furthermore, tzset is not supported on Windows, so this breaks cross-platform compatibility # # Set timezone to UTC for consistent datetime handling in tests diff --git a/tests/conftest.py b/tests/conftest.py index 022171ed0..cd27b3cea 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -57,7 +57,6 @@ def water_well_thing(location): release_status="draft", well_depth=10, hole_depth=10, - well_construction_notes="Test well construction notes", well_casing_diameter=5.0, well_casing_depth=10.0, ) diff --git a/tests/features/environment.py b/tests/features/environment.py index 13bcdead3..1d655a4da 100644 --- a/tests/features/environment.py +++ b/tests/features/environment.py @@ -74,7 +74,7 @@ def add_location(context, session): session.add(loc) session.commit() session.refresh(loc) - n = loc.add_note("Test location", "Other") + n = loc.add_note("Test location", "General") session.add(n) session.commit() session.refresh(loc) @@ -92,7 +92,6 @@ def add_well(context, session, location, name_num): release_status="draft", well_depth=10, hole_depth=10, - well_construction_notes="Test well construction notes", well_casing_diameter=5.0, well_casing_depth=10.0, well_completion_date="2013-05-15", @@ -114,9 +113,10 @@ def add_well(context, session, location, name_num): session.refresh(well) for nt, c in ( - ("Other", "well notes"), + ("General", "well notes"), ("Water", "water notes"), ("Measuring", "measuring notes"), + ("Construction", "construction notes"), ): n = well.add_note(c, nt) session.add(n) diff --git a/tests/features/geojson-response.feature b/tests/features/geojson-response.feature new file mode 100644 index 000000000..7e5757dcb --- /dev/null +++ b/tests/features/geojson-response.feature @@ -0,0 +1,23 @@ +# Created by jakeross at 10/22/25 +@backend @production +Feature: Geojson Response + # Enter feature description here + Background: + Given a functioning api + And the system has valid well and location data in the database + + @positive @happy_path + Scenario: Request all wells as geojson + When the user requests all the wells as geojson + Then the system should return a 200 status code + And the system should return a response in GEOJSON format + And the response should be a feature collection + And the feature collection should have 3 features + + @positive @happy_path + Scenario: Request all wells in a group as geojson + When the user requests all the wells for group Collabnet + Then the system should return a 200 status code + And the system should return a response in GEOJSON format + And the response should be a feature collection + And the feature collection should have 2 features diff --git a/tests/features/location-notes.feature b/tests/features/location-notes.feature new file mode 100644 index 000000000..3d08bd4d7 --- /dev/null +++ b/tests/features/location-notes.feature @@ -0,0 +1,30 @@ +# Created by jakeross at 10/21/25 +@backend @BDMS-199 @production +Feature: Retrieve location notes by well name + As a user + I want to retrieve location notes for a given well name + So that I can view important information about the well's location + Background: + Given a functioning api + And the system has valid well and location data in the database + + @positive @happy_path + Scenario: Retrieve location notes for an existing well + When the user retrieves the well by ID via path parameter + Then the system should return a 200 status code + And the system should return a response in JSON format + And the response should include a current location + And the current location should include notes + And the notes should be a list of dictionaries + And each note dictionary should have "content" and "note_type" keys + And each note in the notes list should be a non-empty string + + @positive @happy_path + Scenario: Retrieve location notes by location ID + When the user retrieves the location by ID via path parameter + Then the system should return a 200 status code + And the system should return a response in JSON format + And the location response should include notes + And the notes should be a list of dictionaries + And each note dictionary should have "content" and "note_type" keys + And each note in the notes list should be a non-empty string diff --git a/tests/features/search-response.feature b/tests/features/search-response.feature new file mode 100644 index 000000000..8b3761379 --- /dev/null +++ b/tests/features/search-response.feature @@ -0,0 +1,44 @@ +@backend @BDMS-169 +Feature: Unified search API returns grouped results + As a user + I want to search for contacts, wells, and springs + So that I can quickly find relevant information across multiple data types + + Background: + Given a functioning api + And the system has valid contact, well, and spring records in the database + + @positive @happy_path + Scenario: Retrieve mixed search results + When the user searches for "" + Then the system should return a 200 status code + And the system should return a response in JSON format + And the response should include results grouped by: + | Contacts | Wells | Springs | + And each result should include a label, group, and properties + + @positive @happy_path + Scenario: Retrieve contact results + When the user searches for "" + Then the system should return a 200 status code + And the response should include a "Contacts" group + And each contact result should include: + #| TODO: use correct field name syntax | + | id | first_name | last_name | email | phone | address | associated_things | + + @positive @happy_path + Scenario: Retrieve well results + When the user searches for "" + Then the system should return a 200 status code + And the response should include a "Wells" group + And each well result should include: + #| TODO: use correct field name syntax | + | thing_type | id | name | alternate site name | contact first name | contact last name | contact id | county | + + @positive @happy_path + Scenario: Retrieve spring results + When the user searches for "" + Then the system should return a 200 status code + And the response should include a "Springs" group + And each spring result should include: + | thing_type | id | \ No newline at end of file diff --git a/tests/features/sensor-notes.feature b/tests/features/sensor-notes.feature new file mode 100644 index 000000000..b44ed8578 --- /dev/null +++ b/tests/features/sensor-notes.feature @@ -0,0 +1,39 @@ +# Created by jakeross at 10/21/25 +@backend @BDMS-199 +Feature: Retrieve sensor notes + As a user + I want to retrieve sensor notes for a given well name + So that I can view important information about the well's deployed sensors + Background: + Given a functioning api + And the system has valid well and location data in the database + +# @positive @happy_path +# Scenario: Request sensor notes for an existing well +# When the user requests the sensor for well 1 +# Then the system should return a 200 status code +# And the system should return a response in JSON format +# And the response should include notes +# And the notes should be a non-empty string + + @positive + Scenario: Request sensor notes by sensor ID + When the user requests the sensor with ID 1 + Then the system should return a 200 status code + And the system should return a response in JSON format + And the response should include notes + And the notes should be a non-empty string + + @negative + Scenario: Request sensor notes for a non-existing sensor ID + When the user requests the sensor with ID 9999 + Then the system should return a 404 status code + And the system should return a response in JSON format + And the response should include an error message indicating the sensor was not found + +# @negative @sad_path +# Scenario: Request sensor notes for a non-existing well +# When the user requests the sensor for well 9999 +# Then the system should return a 404 status code +# And the system should return a response in JSON format +# And the response should include an error message indicating the well was not found \ No newline at end of file diff --git a/tests/features/steps/well-notes.py b/tests/features/steps/well-notes.py index ffd692234..9e20e84f3 100644 --- a/tests/features/steps/well-notes.py +++ b/tests/features/steps/well-notes.py @@ -49,19 +49,17 @@ def step_impl(context): ) def step_impl(context): data = context.response.json() - assert ( - "well_construction_notes" in data - ), "Response does not include construction notes" - assert data["well_construction_notes"] is not None, "Construction notes is null" - context.notes["construction"] = data["well_construction_notes"] + assert "construction_notes" in data, "Response does not include construction notes" + assert data["construction_notes"] is not None, "Construction notes is null" + context.notes["construction"] = data["construction_notes"] @then("the response should include general well notes (catch all notes field)") def step_impl(context): data = context.response.json() - assert "notes" in data, "Response does not include notes" - assert data["notes"] is not None, "Notes is null" - context.notes["general"] = data["notes"] + assert "general_notes" in data, "Response does not include notes" + assert data["general_notes"] is not None, "Notes is null" + context.notes["general"] = data["general_notes"] @then( diff --git a/tests/features/thing-query-parameters.feature b/tests/features/thing-query-parameters.feature new file mode 100644 index 000000000..b0d3d250a --- /dev/null +++ b/tests/features/thing-query-parameters.feature @@ -0,0 +1,24 @@ +# Created by jakeross at 11/2/25 +@backend @BDMS-218 @production +Feature: Thing query paramaters + Use query parameters to filter things + Background: + Given a functioning api + And the system has valid well and location data in the database + + @positive @happy_path + Scenario: Filter things by type + When the user requests things with type "water well" + Then the system should return a 200 status code + And the system should return a response in JSON format + And the response should include at least one thing + And the response should only include things of type "water well" + + @positive @happy_path + Scenario: + When the user requests things with type "spring" + Then the system should return a 200 status code + And the system should return a response in JSON format + And the response should include at least one thing + And the response should only include things of type "spring" + diff --git a/tests/features/thing-type-path-parameters.feature b/tests/features/thing-type-path-parameters.feature new file mode 100644 index 000000000..f744e4728 --- /dev/null +++ b/tests/features/thing-type-path-parameters.feature @@ -0,0 +1,24 @@ +# Created by jakeross at 11/2/25 +@backend @BDMS-218 @production +Feature: Thing type path paramaters + Use path parameters to filter things + Background: + Given a functioning api + And the system has valid well and location data in the database + + @positive @happy_path + Scenario: Get all Water Well Things + When the user requests things with type "water well" + Then the system should return a 200 status code + And the system should return a response in JSON format + And the response should include at least one thing + And the response should only include things of type "water well" + + @positive @happy_path + Scenario: + When the user requests things with type "spring" + Then the system should return a 200 status code + And the system should return a response in JSON format + And the response should include at least one thing + And the response should only include things of type "spring" + diff --git a/tests/features/transducer-data-response.feature b/tests/features/transducer-data-response.feature new file mode 100644 index 000000000..998503f04 --- /dev/null +++ b/tests/features/transducer-data-response.feature @@ -0,0 +1,30 @@ +# Created by jakeross at 11/4/25 +@backend @production +Feature: Transducer Data Response + This feature tests the API's ability to retrieve transducer data for wells. + Background: + Given a functioning api + And the system has valid well and transducer data in the database + + @positive @happy_path + Scenario: Retrieve transducer data for an existing well + When the user requests transducer data for a well + Then the system should return a 200 status code + And the system should return a response in JSON format + + And the response should be paginated + And each page should be an array of transducer data + And each transducer data entry should include a timestamp, value, status + And the timestamp should be in ISO 8601 format + And the value should be a numeric type + And the status should be one of "approved", "not reviewed" + + + + @negative @sad_path + Scenario: Retrieve transducer data for a non-existing well + When the user requests transducer data for a non-existing well + Then the system should return a 200 status code + And the system should return a response in JSON format + And the response should be paginated + And the items should be an empty list \ No newline at end of file diff --git a/tests/features/well-additional-information.feature b/tests/features/well-additional-information.feature new file mode 100644 index 000000000..b4d1ad08f --- /dev/null +++ b/tests/features/well-additional-information.feature @@ -0,0 +1,40 @@ +@backend @BDMS-227 +Feature: Retrieve additional well information by well ID + As a hydrogeologist or data specialist + I want to view additional well attributes with specific physical and operational characteristics + So that I have all necessary well data to confidently complete fieldwork + + Background: + Given a functioning api + And the system has valid well and location data in the database + + Scenario: Retrieve additional well information for an existing well + When the user retrieves the well by ID via path parameter + Then the system should return a 200 status code + And the system should return a response in JSON format + And null values in the response should be represented as JSON null (not placeholder strings) + + # Permissions / Operational OK flags + And the response should include whether repeat measurement permission is granted for the well + And the response should include whether sampling permission is granted for the well + And the response should include whether datalogger installation permission is granted for the well + + # Well Construction Information + And the response should include the completion date of the well + And the response should include the source of the completion information + And the response should include the driller name + And the response should include the construction method + And the response should include the source of the construction information + + # Additional Well Physical Properties + And the response should include the casing diameter in inches + And the response should include the casing depth in feet below ground surface + And the response should include the casing materials + And the response should include the well pump type (previously well_type field) + And the response should include the well pump depth in feet (new field) + And the response should include whether the well is open and suitable for a datalogger + + # Aquifer / Geology Information + And the response should include the formation as the formation zone of well completion + And the response should include the aquifer class code to classify the aquifer into aquifer system. + And the response should include the aquifer type as the type of aquifers penetrated by the well diff --git a/tests/features/well-core-information.feature b/tests/features/well-core-information.feature new file mode 100644 index 000000000..a1d9598e0 --- /dev/null +++ b/tests/features/well-core-information.feature @@ -0,0 +1,50 @@ +@backend @BDMS-221 +Feature: Retrieve core well information by well ID + As a hydrogeologist or data specialist + I want to view clearly labeled core physical attributes and identifiers for the well in a well information page section + so that I can assess key well characteristics at a glance + + Background: + Given a functioning api + And the system has valid well and location data in the database + + Scenario: Retrieve core well information for an existing well + When the user retrieves the well by ID via path parameter + Then the system should return a 200 status code + And the system should return a response in JSON format + And null values in the response should be represented as JSON null (not placeholder strings) + + # Well names and projects + And the response should include the well name (point ID) (i.e. NM-1234) + And the response should include the project(s) or group(s) associated with the well + + # Well Purpose and Status and Monitoring Status + And the response should include the purpose of the well (current use) + And the response should include the well hole status of the well as the status of the hole in the ground (from previous Status field) + And the response should include the monitoring frequency (new field) + And the response should include whether the well is currently being monitored with status text if applicable (from previous MonitoringStatus field) + + # Data Lifecycle and Public Visibility + # NEEDS USER RESEARCH - keep both under release_status for now? (previously PublicRelease) + And the response should include the release status of the well record + + # Well Physical Properties + And the response should include the hole depth in feet + And the response should include the well depth in feet + And the response should include the source of the well depth information + + # Measuring Point Information + And the response should include the description of the measuring point + And the response should include the measuring point height in feet + + # Location Information + # GeoJSON spec format RFC 7946 (Aug 2016) requires coordinates to be decimal degrees in WGS84 + And the response should include location information in GeoJSON spec format RFC 7946 + And the response should include a geometry object with type "Point" and coordinates array [longitude, latitude, elevation] + And the response should include the elevation in feet with vertical datum NAVD88 in the properties + And the response should include the elevation method (i.e. interpolated from digital elevation model) in the properties + And the response should include the UTM coordinates with datum NAD83 in the properties + + # Alternate Identifiers + And the response should include any alternate IDs for the well like the NMBGMR site_name (i.e. John Smith Well), USGS site number, or the OSE well ID and OSE well tag ID + diff --git a/tests/features/well-notes.feature b/tests/features/well-notes.feature new file mode 100644 index 000000000..5cb5a502b --- /dev/null +++ b/tests/features/well-notes.feature @@ -0,0 +1,29 @@ +# Created by jakeross at 10/21/25 +@backend @BDMS-199 @BDMS-233 @production +Feature: Retrieve well notes by well ID + As a user + I want to retrieve well notes for a given well + So that I can understand all necessary context about the well using info not captured in structured fields + Background: + Given a functioning api + And the system has valid well and location data in the database + + @positive @happy_path + Scenario: Retrieve well notes for an existing well + When the user retrieves the well by ID via path parameter + Then the system should return a 200 status code + And the system should return a response in JSON format + And null values in the response should be represented as JSON null (not placeholder strings) + And the response should include location notes (i.e. driving directions and geographic well location notes) + And the response should include construction notes (i.e. pump notes and other construction notes) + And the response should include general well notes (catch all notes field) + And the response should include measuring notes (notes about measuring/visiting the well, on Access form) + And the response should include water notes (i.e. water bearing zone information and other info from ose reports) + And the notes should be a non-empty string + + @negative @sad_path + Scenario: Retrieve well notes for a non-existing well + When the user retrieves the well 9999 + Then the system should return a 404 status code + And the system should return a response in JSON format + And the response should include an error message indicating the well was not found \ No newline at end of file diff --git a/tests/test_geo_services.py b/tests/test_geo_services.py index 83fabc14f..5ef25ba30 100644 --- a/tests/test_geo_services.py +++ b/tests/test_geo_services.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== +import pytest + from services.util import ( get_state_from_point, get_county_from_point, @@ -20,6 +22,7 @@ ) +@pytest.mark.xfail(reason="Relies on an outside service") def test_quad_name_from_point(): x = -106.904107 y = 34.068198 @@ -27,6 +30,7 @@ def test_quad_name_from_point(): assert quad == "Socorro" +@pytest.mark.xfail(reason="Relies on an outside service") def test_state_name_from_point(): x = -100.904107 y = 34.068198 @@ -34,6 +38,7 @@ def test_state_name_from_point(): assert state == "Texas" +@pytest.mark.xfail(reason="Relies on an outside service") def test_county_name_from_point(): x = -106.904107 y = 34.068198 @@ -41,6 +46,7 @@ def test_county_name_from_point(): assert county == "Socorro" +@pytest.mark.xfail(reason="Relies on an outside service") def test_quad_name_from_point_bad_point(): x = 1.904107 y = 34.068198 @@ -48,6 +54,7 @@ def test_quad_name_from_point_bad_point(): assert quad is None +@pytest.mark.xfail(reason="Relies on an outside service") def test_state_name_from_point_bad_point(): x = 1.904107 y = 34.068198 @@ -55,6 +62,7 @@ def test_state_name_from_point_bad_point(): assert state is None +@pytest.mark.xfail(reason="Relies on an outside service") def test_county_name_from_point_bad_point(): x = 1.904107 y = 34.068198 diff --git a/tests/test_location.py b/tests/test_location.py index 9dcb3d098..54e5fea30 100644 --- a/tests/test_location.py +++ b/tests/test_location.py @@ -81,9 +81,11 @@ def test_add_location(): # assert data["elevation_method"] == payload["elevation_method"] # assert data["coordinate_accuracy"] == payload["coordinate_accuracy"] # assert data["coordinate_method"] == payload["coordinate_method"] - assert data["state"] == "New Mexico" - assert data["county"] == "Bernalillo" - assert data["quad_name"] == "Albuquerque East" + + # relies on external service that is not 100% + # assert data["state"] == "New Mexico" + # assert data["county"] == "Bernalillo" + # assert data["quad_name"] == "Albuquerque East" # cleanup after test cleanup_post_test(Location, data["id"]) @@ -121,14 +123,14 @@ def test_update_location(location): # assert data["elevation_method"] == payload["elevation_method"] # assert data["coordinate_accuracy"] == payload["coordinate_accuracy"] # assert data["coordinate_method"] == payload["coordinate_method"] - assert data["state"] == "New Mexico" - assert data["county"] == "Socorro" - assert data["quad_name"] == "Socorro" + # assert data["state"] == "New Mexico" + # assert data["county"] == "Socorro" + # assert data["quad_name"] == "Socorro" # cleanup after test - payload["state"] = location.state - payload["county"] = location.county - payload["quad_name"] = location.quad_name + # payload["state"] = location.state + # payload["county"] = location.county + # payload["quad_name"] = location.quad_name # cleanup_patch_test(Location, payload, location) diff --git a/tests/test_thing.py b/tests/test_thing.py index 5bd504718..87a016c03 100644 --- a/tests/test_thing.py +++ b/tests/test_thing.py @@ -27,9 +27,9 @@ ) from db import Thing, WellScreen, ThingIdLink from main import app +from schemas import DT_FMT from schemas.location import LocationResponse from schemas.thing import ValidateWell -from schemas import DT_FMT from tests import ( client, override_authentication, @@ -78,29 +78,30 @@ def test_validate_hole_depth_casing_depth(): ValidateWell(hole_depth=100.0, well_casing_depth=110.0) -def test_validate_mp_height_hole_depth(): - with pytest.raises( - ValueError, - match="measuring point height must be less than hole depth", - ): - ValidateWell(hole_depth=100.0, measuring_point_height=110.0) - - -def test_validate_mp_height_well_depth(): - with pytest.raises( - ValueError, - match="measuring point height must be less than well depth", - ): - ValidateWell(well_depth=100.0, measuring_point_height=105.0) - - -def test_validate_mp_height_well_casing_depth(): - with pytest.raises( - ValueError, - match="measuring point height must be less than well casing depth", - ): - ValidateWell(well_casing_depth=100.0, measuring_point_height=105.0) - +# this is not a valid test because measuring_point_height is not related to hole_depth +# def test_validate_mp_height_hole_depth(): +# with pytest.raises( +# ValueError, +# match="measuring point height must be less than hole depth", +# ): +# ValidateWell(hole_depth=100.0, measuring_point_height=110.0) +# +# +# def test_validate_mp_height_well_depth(): +# with pytest.raises( +# ValueError, +# match="measuring point height must be less than well depth", +# ): +# ValidateWell(well_depth=100.0, measuring_point_height=105.0) +# +# +# def test_validate_mp_height_well_casing_depth(): +# with pytest.raises( +# ValueError, +# match="measuring point height must be less than well casing depth", +# ): +# ValidateWell(well_casing_depth=100.0, measuring_point_height=105.0) +# # POST tests =================================================================== diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py index 985214fbb..f871acb3b 100644 --- a/tests/test_transfer_legacy_dates.py +++ b/tests/test_transfer_legacy_dates.py @@ -21,7 +21,7 @@ 2. Location.nma_site_date is populated from CSV SiteDate if not null (read-only post-migration) """ import datetime -from unittest.mock import Mock, patch, MagicMock +from unittest.mock import patch import pandas as pd import pytest @@ -71,7 +71,7 @@ def test_make_location_with_both_ampapi_dates(mock_lexicon_mapper): elevations = {} # Call make_location - location, elevation_method = make_location(row, elevations) + location, elevation_method, location_notes = make_location(row, elevations) # Verify nma_date_created is set from DateCreated assert location.nma_date_created is not None @@ -106,7 +106,7 @@ def test_make_location_with_only_date_created(mock_lexicon_mapper): ) elevations = {} - location, elevation_method = make_location(row, elevations) + location, elevation_method, location_notes = make_location(row, elevations) # Verify nma_date_created is set assert location.nma_date_created == datetime.date(2014, 4, 3) @@ -136,7 +136,7 @@ def test_make_location_with_site_date_later_than_date_created(mock_lexicon_mappe ) elevations = {} - location, elevation_method = make_location(row, elevations) + location, elevation_method, location_notes = make_location(row, elevations) # Both dates should be preserved as-is, regardless of order assert location.nma_date_created == datetime.date(2010, 1, 15) @@ -164,7 +164,7 @@ def test_make_location_with_very_old_site_date(mock_lexicon_mapper): ) elevations = {} - location, elevation_method = make_location(row, elevations) + location, elevation_method, location_notes = make_location(row, elevations) # Verify very old date is preserved assert location.nma_site_date == datetime.date(1954, 5, 1) @@ -196,7 +196,7 @@ def test_make_location_ampapi_dates_are_date_not_datetime(mock_lexicon_mapper): ) elevations = {} - location, elevation_method = make_location(row, elevations) + location, elevation_method, location_notes = make_location(row, elevations) # Verify they are date objects (not datetime) assert isinstance(location.nma_date_created, datetime.date) @@ -231,7 +231,7 @@ def test_make_location_ampapi_dates_independent_of_created_at(mock_lexicon_mappe ) elevations = {} - location, elevation_method = make_location(row, elevations) + location, elevation_method, location_notes = make_location(row, elevations) # created_at should be None during transfer (auto-set by AutoBaseMixin on save) assert location.created_at is None @@ -271,7 +271,7 @@ def test_make_location_with_no_ampapi_dates(mock_lexicon_mapper): ) elevations = {} - location, elevation_method = make_location(row, elevations) + location, elevation_method, location_notes = make_location(row, elevations) # Both AMPAPI date fields should be null assert location.nma_date_created is None @@ -299,7 +299,7 @@ def test_make_location_with_empty_string_dates(mock_lexicon_mapper): ) elevations = {} - location, elevation_method = make_location(row, elevations) + location, elevation_method, location_notes = make_location(row, elevations) # Both AMPAPI date fields should be null (empty strings are falsy) assert location.nma_date_created is None @@ -336,7 +336,7 @@ def create_test_row(i, has_site_date): for i in range(100): row = create_test_row(i, has_site_date=(i < 9)) - location, _ = make_location(row, elevations) + location, _, _ = make_location(row, elevations) # Count coverage if location.nma_date_created is not None: diff --git a/tests/transfers/__init__.py b/tests/transfers/__init__.py new file mode 100644 index 000000000..8e546ddc2 --- /dev/null +++ b/tests/transfers/__init__.py @@ -0,0 +1,17 @@ +# =============================================================================== +# Copyright 2025 ross +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =============================================================================== + +# ============= EOF ============================================= diff --git a/tests/transfers/test_contact_with_multiple_wells.py b/tests/transfers/test_contact_with_multiple_wells.py new file mode 100644 index 000000000..4199142ef --- /dev/null +++ b/tests/transfers/test_contact_with_multiple_wells.py @@ -0,0 +1,35 @@ +# =============================================================================== +# Copyright 2025 ross +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =============================================================================== + +from db import ThingContactAssociation +from db.engine import session_ctx +from transfers.contact_transfer import ContactTransfer +from transfers.well_transfer import WellTransferer + + +def test_multiple_wells(): + pointids = ["MG-022", "MG-030", "MG-043"] + wt = WellTransferer(pointids=pointids) + wt.transfer() + + ct = ContactTransfer(pointids=pointids) + ct.transfer() + + with session_ctx() as sess: + assert sess.query(ThingContactAssociation).count() == 6 + + +# ============= EOF ============================================= diff --git a/transfers/asset_transfer.py b/transfers/asset_transfer.py index b7938f15d..5783fe00b 100644 --- a/transfers/asset_transfer.py +++ b/transfers/asset_transfer.py @@ -15,47 +15,65 @@ # =============================================================================== import io +from sqlalchemy.orm import Session from starlette.datastructures import UploadFile -from db import Asset, AssetThingAssociation +from db import Asset, AssetThingAssociation, Thing from services.gcs_helper import ( gcs_upload, get_storage_bucket, get_storage_client, ) from transfers.logger import logger +from transfers.transferer import Transferer from transfers.util import read_csv, filter_to_valid_point_ids -from transfers.well_transfer import WellChunkTransferer -class AssetTransferer(WellChunkTransferer): +class AssetTransferer(Transferer): def __init__(self, *args, **kw): self.source_table = "WellPhotos" super().__init__(*args, **kw) self._client = get_storage_client() self._bucket = get_storage_bucket(self._client) logger.info(f"Using bucket {self._bucket.name}") + self.chunk_size = 20 def _get_dfs(self): input_df = read_csv(self.source_table) cleaned_df = filter_to_valid_point_ids(input_df) return input_df, cleaned_df - def _chunk_step(self, session, df, i, row, db_item): + def _transfer_hook(self, session: Session): + added_pointid = [] + for i, row in enumerate(self.cleaned_df.itertuples()): + if row.PointID in added_pointid: + continue + + added_pointid.append(row.PointID) + well = ( + session.query(Thing) + .filter(Thing.name == row.PointID, Thing.thing_type == "water well") + .one_or_none() + ) + self._asset_step(session, i, well) + session.commit() + + def _asset_step(self, session, i, db_item): + df = self.cleaned_df photos = df[df["PointID"] == db_item.name] - n = len(df) if photos.empty: photos = df[df["PointID"] == db_item.name.replace("-", "")] if photos.empty: logger.info(f"No photos found for PointID: {db_item.name}") return + n = len(photos) for j, row in enumerate(photos.itertuples()): photo_path = row.OLEPath srcblob = self._bucket.get_blob(f"nma-photos/{photo_path}") if not srcblob: - logger.critical( - f"No photo found for PointID: {db_item.name}, {photo_path}" + self._capture_error( + db_item.name, f"No photo found for {photo_path}", "OLEPath" ) continue @@ -78,7 +96,7 @@ def _chunk_step(self, session, df, i, row, db_item): assoc.asset = asset session.add(assoc) session.add(asset) - session.commit() + # session.commit() logger.info( f"Added asset {i}-{j}/{n} thing.id={db_item.id} thing={db_item.name} uri: {uri}" ) diff --git a/transfers/contact_transfer.py b/transfers/contact_transfer.py index 1c690e0ce..9168eab77 100644 --- a/transfers/contact_transfer.py +++ b/transfers/contact_transfer.py @@ -43,18 +43,32 @@ class ContactTransfer(ThingBasedTransferer): def __init__(self, *args, **kw): super().__init__(*args, **kw) + + """ + Developer's note + + - company to organization mapping is stored in transfers/data/owners_organization_mapper.json + - the key is the value in NM_Aquifer and the value is the standardized organization name used in the lexicon + """ co_to_org_mapper_path = get_transfers_data_path( "owners_organization_mapper.json" ) with open(co_to_org_mapper_path, "r") as f: self._co_to_org_mapper = json.load(f) - organization_mapper_path = get_transfers_data_path("organization_mapping.json") - with open(organization_mapper_path, "r") as f: - self._organization_mapper = json.load(f) - self._added = [] + def calculate_missing_organizations(self): + input_df, cleaned_df = self._get_dfs() + + for row in replace_nans(input_df).itertuples(): + if not row.Company: + continue + try: + _get_organization(row, self._co_to_org_mapper) + except ValueError as e: + logger.critical(f"Invalid Organization {e}") + def _get_dfs(self): input_df = read_csv(self.source_table) odf = input_df.drop(["OBJECTID", "GlobalID"], axis=1) @@ -67,7 +81,7 @@ def _get_dfs(self): odf = replace_nans(odf) - odf = filter_to_valid_point_ids(odf) + odf = filter_to_valid_point_ids(odf, self.pointids) return input_df, odf def _get_prepped_group(self, group) -> DataFrame: @@ -84,7 +98,6 @@ def _group_step(self, session: Session, row: pd.Series, db_item: Base): row, db_item, self._co_to_org_mapper, - self._organization_mapper, self._added, ): session.commit() @@ -102,7 +115,7 @@ def _group_step(self, session: Session, row: pd.Series, db_item: Base): self._capture_error(row.PointID, str(e), "UnknownError") -def _add_first_contact(session, row, thing, co_to_org_mapper, org_mapper, added): +def _add_first_contact(session, row, thing, co_to_org_mapper, added): # TODO: extract role from OwnerComment # role = extract_owner_role(row.OwnerComment) role = "Owner" @@ -111,10 +124,7 @@ def _add_first_contact(session, row, thing, co_to_org_mapper, org_mapper, added) name = _make_name(row.FirstName, row.LastName) # check if organization is in lexicon - organization = _get_organization(row, co_to_org_mapper, org_mapper) - if (name, organization) in added: - return None - added.append((name, organization)) + organization = _get_organization(row, co_to_org_mapper) contact_data = { "thing_id": thing.id, @@ -129,7 +139,12 @@ def _add_first_contact(session, row, thing, co_to_org_mapper, org_mapper, added) "phones": [], } - contact = _make_contact_and_assoc(session, contact_data, thing) + contact, new = _make_contact_and_assoc(session, contact_data, thing, added) + + if not new: + return True + else: + added.append((name, organization)) if row.Email: email = _make_email( @@ -202,22 +217,19 @@ def _add_first_contact(session, row, thing, co_to_org_mapper, org_mapper, added) return True -def _get_organization(row, co_to_org_mapper, org_mapper): +def _get_organization(row, co_to_org_mapper): organization = co_to_org_mapper.get(row.Company, row.Company) + # use Organization enum to catch validation errors try: Organization(organization) except ValueError: - norganization = next( - (k for k, v in org_mapper.items() if v == organization), None - ) - logger.warning(f"mapping {organization} to {norganization}") - organization = norganization + return None return organization -def _add_second_contact(session, row, thing, co_to_org_mapper, org_mapper, added): +def _add_second_contact(session, row, thing, co_to_org_mapper, added): if all( [ getattr(row, f"Second{f}") is None @@ -230,11 +242,7 @@ def _add_second_contact(session, row, thing, co_to_org_mapper, org_mapper, added release_status = "private" name = _make_name(row.SecondFirstName, row.SecondLastName) - organization = _get_organization(row, co_to_org_mapper, org_mapper) - if (name, organization) in added: - return - - added.append((name, organization)) + organization = _get_organization(row, co_to_org_mapper) contact_data = { "thing_id": thing.id, @@ -249,7 +257,11 @@ def _add_second_contact(session, row, thing, co_to_org_mapper, org_mapper, added "phones": [], } - contact = _make_contact_and_assoc(session, contact_data, thing) + contact, new = _make_contact_and_assoc(session, contact_data, thing, added) + if not new: + return True + else: + added.append((name, organization)) if row.SecondCtctEmail: email = _make_email( @@ -339,20 +351,31 @@ def _make_address(first_second, ownerkey, kind, **kw): ) -def _make_contact_and_assoc(session, data, thing): - from schemas.contact import CreateContact +def _make_contact_and_assoc(session, data, thing, added): + new_contact = True + if (data["name"], data["organization"]) in added: + contact = ( + session.query(Contact) + .filter_by(name=data["name"], organization=data["organization"]) + .first() + ) + new_contact = False + else: - contact = CreateContact(**data) - contact_data = contact.model_dump() - contact_data.pop("thing_id") - contact = Contact(**contact_data) + from schemas.contact import CreateContact + + contact = CreateContact(**data) + contact_data = contact.model_dump() + contact_data.pop("thing_id") + contact = Contact(**contact_data) + session.add(contact) assoc = ThingContactAssociation() assoc.thing = thing assoc.contact = contact session.add(assoc) - session.add(contact) - return contact + + return contact, new_contact # ============= EOF ============================================= diff --git a/transfers/data/measured_by_mapper.json b/transfers/data/measured_by_mapper.json index a24ac6a22..b642ef78d 100644 --- a/transfers/data/measured_by_mapper.json +++ b/transfers/data/measured_by_mapper.json @@ -200,6 +200,7 @@ "EA": ["EA", "NMBGMR", "Unknown"], "EA/HB": [["EA", "NMBGMR", "Unknown"], ["HB", "NMBGMR", "Unknown"]], "EM": ["Ethan Mamer", "NMBGMR", "Hydrogeologist"], + "EAM": ["Ethan Mamer", "NMBGMR", "Hydrogeologist"], "EM, AL": [["Ethan Mamer", "NMBGMR", "Hydrogeologist"],["Angela Lucero", "NMBGMR", "Hydrologist"]], "EM, CM": [["Ethan Mamer", "NMBGMR", "Hydrogeologist"], ["Cris Morton", "NMBGMR", "Hydrogeologist"]], "EM,CM": [["Ethan Mamer", "NMBGMR", "Hydrogeologist"], ["Cris Morton", "NMBGMR", "Hydrogeologist"]], @@ -346,6 +347,7 @@ "Steve": ["Steve", "Village of Magdalena", "Operator"], "T.Decker": ["T.Decker", "Unknown", "Unknown"], "TK": ["Trevor Kludt", "NMBGMR", "Technician"], + "tk": ["Trevor Kludt", "NMBGMR", "Technician"], "Trevor Kludt": ["Trevor Kludt", "NMBGMR", "Technician"], "TK BF": [["Trevor Kludt", "NMBGMR", "Technician"], ["Brigitte Felix", "NMBGMR", "Publications Manager"]], "TK, BF": [["Trevor Kludt", "NMBGMR", "Technician"], ["Brigitte Felix", "NMBGMR", "Publications Manager"]], diff --git a/transfers/data/organization_mapping.json b/transfers/data/organization_mapping.json deleted file mode 100644 index 0d3bda9dc..000000000 --- a/transfers/data/organization_mapping.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "City of Aztec": "City of Aztec", - "Daybreak Investments": "Daybreak Investments", - "Vallecitos HOA": "Vallecitos HOA", - "Naiche Development": "Naiche Corporation", - "Santa Fe County; Santa Fe Animal Shelter": "Santa Fe County; Santa Fe Animal Shelter", - "El Guicu Ditch Association": "El Guicu Ditch Association", - "Santa Fe Municipal Airport": "Santa Fe Municipal Airport", - "Uluru Development": "Uluru Development", - "AllSup's Convenience Stores": "AllSup's Convenience Stores", - "Santa Fe Downs": "Santa Fe Downs Resort", - "City of Truth or Consequences, WWTP": "City of Truth or Consequences, WWTP", - "Riverbend Hotsprings": "Riverbend Hotsprings", - "Armendaris Ranch": "Armendaris Ranch", - "El Paso Water": "El Paso Water", - "PVACD": "Pecos Valley Artesian Conservancy District", - "BLM, Socorro Field Office": "BLM, Socorro Field Office", - "USFWS": "US Fish & Wildlife Service", - "NPS": "National Park Service", - "Sile MDWCA": "Sile Municipal Domestic Water Assn.", - "Pena Blanca Water & Sanitation District": "Pena Blanca Water & Sanitation District", - "Town of Questa": "Town of Questa", - "Lamy MDWCA": "Lama MDWCA", - "Town of Cerro": "Town of Cerro", - "Farr Cattle Company": "Farr Cattle Company (Farr Ranch)", - "Carrizozo Orchard": "Carrizozo Orchard", - "USFS, Kiowa Grasslands": "USFS, Kiowa Grasslands", - "Cloud Country West Subdivision": "Cloud Country West Subdivision", - "Chama West Water Users Association": "Chama West Water Users Assn.", - "El Rito Regional Water and Waste Water Association": "El Rito Regional Water + Waste Water Association", - "West Rim MDWUA": "West Rim MDWUA", - "Village of Willard": "Village of Willard", - "Quemado Municipal Water & SWA": "Quemado Mutual Water and Sewage Works Association", - "Coyote Creek MDWUA": "Coyote Creek MDWUA", - "Lamy Mutual Domestic Water Assn.": "Lamy Mutual Domestic Water Assn.", - "La Joya CWDA": "La Joya CWDA", - "NM Firefighters Training Academy": "NM Firefighters Training Academy", - "Cebolleta Land Grant": "Cebolleta Land Grant", - "Madrid Water Co-op": "Madrid Water Co-op", - "Sun Valley Water and Sanitation": "Sun Valley Water and Sanitation", - "Bluewater Lake MDWCA": "Bluewater Lake MDWCA", - "Bluewater Acres Domestic WUA": "Bluewater Acres Domestic Water Users Assn.", - "Lybrook MDWCA": "Lybrook Municipal", - "New Mexico Museum of Natural History": "New Mexico Museum of Natural History", - "Hillsboro MDWCA": "Hillsboro Mutual Domestic Water Consumer Assn.", - "Tyrone MDWCA": "Tyrone Mutual Domestic Water Assn.", - "Santa Clara Water System": "Santa Clara Water System", - "Casas Adobes MDWCA": "Casas Adobes Mutual Domestic", - "Lake Roberts WUA": "Lake Roberts Water Assn.", - "El Creston MDWCA": "El Creston MDWCA", - "Reserve Municipality Water Works": "Reserve Municipality Water Works", - "Bayard": "Bayard Municipal Water", - "Town of Estancia": "Town of Estancia", - "Pie Town MDWCA": "Pie Town MDWCA", - "Roosevelt SWCD": "Roosevelt Soil & Water Conservation District", - "Otis MDWCA": "Otis Mutual Domestic", - "White Cliffs MDWUA": "White Cliffs MDWUA", - "Vista Linda Water Co-op": "Vista Linda Water Co-op", - "Anasazi Trails Water Co-op": "Anasazi Trails Water Cooperative", - "Canon MDWCA": "Canon Mutual Domestic Water Consumer Assn.", - "Placitas Trails Water Co-op": "Placitas Trails Water Coop", - "BLM, Roswell Office": "BLM, Roswell Office", - "Forked Lightning Ranch": "Forked Lightning Ranch", - "Cottonwood RWA": "Cottonwood Rural Water Assn.", - "Pinon Ridge WUA": "Pinon Ridge Water Users Association", - "McSherry Farms": "McSherry Farms", - "Agua Sana WUA": "Agua Sana Water Users Assn.", - "Chamita MDWCA": "Chamita Water Users Association", - "W Spear-bar Ranch": "W Spear-bar Ranch", - "Village of Capitan": "Village of Capitan", - "Brazos MDWCA": "Brazos Mutual Domestic Water Consumers Assn.", - "Alto Alps HOA": "Alto Alps Homeowners Association", - "Chiricahua Desert Museum": "Chiricahua Desert Museum", - "Bike Ranch": "Bike Ranch", - "Hachita MDWCA": "Hachita MDWCA", - "Carrizozo Municipal Water": "Carrizozo Municipal Water", - "Dunhill Ranch": "Dunhill Ranch", - "Santa Fe Conservation Trust": "Santa Fe Conservation Trust" -} diff --git a/transfers/data/owners_organization_mapper.json b/transfers/data/owners_organization_mapper.json index b4f29bd7b..b10f5da0d 100644 --- a/transfers/data/owners_organization_mapper.json +++ b/transfers/data/owners_organization_mapper.json @@ -2,111 +2,173 @@ "Abeyta Engineering": "Abeyta Engineering, Inc", "Adobe Ranch": "Adobe Ranch", "Agua Fria Water Assn.": "Agua Fria Community Water Association", + "Agua Sana Water Users Assn.": "Agua Sana WUA", + "AllSup's Convenience Stores": "AllSup's Convenience Stores", + "Alto Alps Homeowners Association": "Alto Alps HOA", + "Anasazi Trails Water Cooperative": "Anasazi Trails Water Co-op", "Apache Gap Ranch": "Apache Gap Ranch", "Aspendale Baptist Camp": "Aspendale Mountain Retreat", + "Armendaris Ranch": "Armendaris Ranch", "Augustin Ranch LLC": "Augustin Plains Ranch LLC", "B + B Cattle": "B & B Cattle Co", "BLM Taos Office": "BLM Taos Office", + "BLM, Socorro Field Office": "BLM, Socorro Field Office", + "BLM, Roswell Office": "BLM, Roswell Office", + "Bayard Municipal Water": "Bayard", "Berridge Distributing Company": "Berridge Distributing Company", + "Bike Ranch": "Bike Ranch", "Bishop's Lodge": "Bishop's Lodge", + "Bluewater Acres Domestic Water Users Assn.": "Bluewater Acres Domestic WUA", + "Bluewater Lake MDWCA": "Bluewater Lake MDWCA", "Bonanza Creek Ranch": "Bonanza Creek Ranch", + "Brazos Mutual Domestic Water Consumers Assn.": "Brazos MDWCA", "Bug Scuffle Water Association": "Bug Scuffle Water Association", "Bureau of Land Management": "BLM", "Camp Wehinahpay": "Wehinahpay Mountain Camp", "Campbell Ranch": "Campbell Ranch", + "Canon Mutual Domestic Water Consumer Assn.": "Canon MDWCA", "Capitol Ford Auto Body": "Capitol Ford Santa Fe", + "Carrizozo Municipal Water": "Carrizozo Municipal Water", + "Carrizozo Orchard": "Carrizozo Orchard", + "Casas Adobes Mutual Domestic": "Casas Adobes MDWCA", + "Cebolleta Land Grant": "Cebolleta Land Grant", "Cemex Plant": "Cemex, Inc", "Cerro Community Center": "Cerro Community Center", + "Chama West Water Users Assn.": "Chama West WUA", + "Chamita Water Users Association": "Chamita MDWCA", "Chabad Jewish Center": "Santa Fe Jewish Center", + "Chiricahua Desert Museum": "Chiricahua Desert Museum", "Chupadero MDWCA": "Chupadero MDWCA", "Cielo Lumbre HOA": "Cielo Lumbre HOA", "Circle Cross Ranch": "Circle Cross Ranch", + "City of Aztec": "City of Aztec", "City of Alamogordo": "City of Alamogordo", "City of Portales, Public Works Dept.": "City of Portales, Public Works Dept.", "City of Santa Fe WWTP": "City of Santa Fe WWTP", "City of Santa Fe, Municipal Recreation Complex": "City of Santa Fe, Municipal Recreation Complex", "City of Santa Fe, Sangre de Cristo Water Co.": "City of Santa Fe, Sangre de Cristo Water Co.", "City of Socorro": "City of Socorro", + "City of Truth or Consequences, WWTP": "City of Truth or Consequences, WWTP", + "Cloud Country West Subdivision": "Cloud Country West Subdivision", "Commonwealth Conservancy": "Commonwealth Conservancy", + "Cottonwood Rural Water Assn.": "Cottonwood RWA", "Country Club Garden MHP": "Country Club Garden Mobile Home Park", + "Coyote Creek MDWUA": "Coyote Creek MDWUA", "Crossroads Cattle Co., Ltd.": "Crossroads Cattle Co., Ltd", + "Daybreak Investments": "Daybreak Investments", "Double H Ranch": "Double H Ranch", + "Dunhill Ranch": "Dunhill Ranch", "E.A. Meadows East": "E.A. Meadows East", "El Camino Realty": "El Camino Realty, Inc", + "El Creston MDWCA": "El Creston MDWCA", "El Dorado Utilities": "Eldorado Area Water & Sanitation District", "El Gancho Restaurant": "Bourbon Grill at El Gancho", + "El Guicu Ditch Association": "El Guicu Ditch Association", + "El Paso Water": "El Paso Water", "El Prado HOA": "El Prado HOA", "El Rancho de las Golondrinas": "El Rancho de las Golondrinas", "El Rito MDWCA": "El Rito Canyon MDWCA", + "El Rito Regional Water + Waste Water Association": "El Rito Regional Water and Waste Water Association", "Eldorado Water and Sanitation District": "Eldorado Area Water & Sanitation District", "Encantado Enterprises": "Encantado Enterprises", "Estrella Concepts LLC": "Estrella Concepts LLC", "Farr Cattle Company (Farr Ranch": "Farr Cattle Company", + "Farr Cattle Company (Farr Ranch)": "Farr Cattle Company", "Fire Department": "Sixteen Springs Fire Department", "Fire Water Lodge": "Fire Water Lodge", "Ford County Land and Cattle": "Ford County Land & Cattle Company, Inc", + "Forked Lightning Ranch": "Forked Lightning Ranch", "Friendly Construction, Inc.": "Friendly Construction, Inc", + "Hachita MDWCA": "Hachita MDWCA", "Hacienda Del Cerezo, Ltd.": "Hacienda Del Cerezo", "Hefker Vega Ranch": "Hefker Vega Ranch", "High Nogal Ranch": "High Nogal Ranch", + "Hillsboro Mutual Domestic Water Consumer Assn.": "Hillsboro MDWCA", "Holloman Air Force Base": "Holloman Air Force Base", "Hyde Park Estates MDWCA": "Hyde Park Estates MDWCA", "Hyde Park Estates WUA": "Hyde Park Estates MDWCA", "Ideal Mobile Home & RV": "Desert Village RV & Mobile Home Park", "K. Schmitt Trust": "K. Schmitt Trust", "La Cienega Mutual Domestic Assn.": "La Cienega MDWCA", + "La Joya CWDA": "La Joya CWDA", "La Vista HOA": "La Vista HOA", + "Lake Roberts Water Assn.": "Lake Roberts WUA", + "Lama MDWCA": "Lamy MDWCA", "Lamy MDWUA": "Lamy MDWCA", + "Lamy Mutual Domestic Water Assn.": "Lamy MDWCA", "Land Ventures LLC": "Land Ventures LLC", "Las Lagunitas": "Las Lagunitas", "Las Lagunitas Homeowners Assn.": "Las Lagunitas HOA", "Living World Ministries": "Living World Ministries", "Los Atrevidos, Inc.": "Los Atrevidos, Inc", "Los Prados HOA": "Los Prados HOA", + "Lybrook Municipal": "Lybrook MDWCA", + "Madrid Water Co-op": "Madrid Water Co-op", "Malaga Water System": "Malaga MDWCA & SWA", "Mangas Outfitters": "Mangas Outfitters", + "McSherry Farms": "McSherry Farms", "Media Gravel Pit": "Medina Gravel Pit", "Mendenhall Trading Co.": "Mendenhall Trading Co", "Mesa Verde Ranch": "Mesa Verde Ranch", "NESWCD": "Northeastern SWCD", "TSWCD": "Taos SWCD", "NM Environment Dept.": "NMED", + "NM Firefighters Training Academy": "NM Firefighters Training Academy", "NM Game & Fish Dept.": "NMDGF", "NM Office of the State Engineer": "NMOSE", "NM State Highway Dept.": "NMDOT", "NMSU College of Agriculture": "NMSU College of Agriculture", + "Naiche Corporation": "Naiche Development", "Naiche Development": "Naiche Development", "National Radio Astronomy Observatory": "NRAO", + "National Park Service": "NPS", + "New Mexico Museum of Natural History": "New Mexico Museum of Natural History", "New Mexico Spaceport Authority": "NMSA", "New Mexico Tech": "NMT", "Nogal Mutual Domestic Water Consumers Association": "Nogal MDWCA", "O Bar O Ranch": "O Bar O Ranch", "OMI Wastewater Treatment Plant": "OMI Wastewater Treatment Plant", "Old Road Ranch Pardners Ltd.": "Old Road Ranch Pardners Ltd", + "Otis Mutual Domestic": "Otis MDWCA", "PNM Service Center": "PNM Service Center", "PNM, Sangre de Cristo": "PNM Service Center", "Peace Tabernacle Church": "Peace Tabernacle Church", "Pecos Trail Inn": "Pecos Trail Inn", + "Pecos Valley Artesian Conservancy District": "PVACD", + "Pena Blanca Water & Sanitation District": "Pena Blanca Water & Sanitation District", "Pelican Spa": "Pelican Spa", + "Pie Town MDWCA": "Pie Town MDWCA", + "Pinon Ridge Water Users Association": "Pinon Ridge WUA", "Pistachio Tree Ranch": "Pistachio Tree Ranch", + "Placitas Trails Water Coop": "Placitas Trails Water Co-op", "Quemado Mutual Water and Sewage Works Association": "Quemado Municipal Water & SWA", "Rancho Encantado": "Rancho Encantado", "Rancho San Lucas": "Rancho San Lucas", "Rancho San Marcos": "Rancho San Marcos", "Rancho Viejo Partnership": "Rancho Viejo Partnership", "Ranney Ranch": "Ranney Ranch", + "Reserve Municipality Water Works": "Reserve Municipality Water Works", "Rio En Medio MDWCA": "Rio En Medio MDWCA", + "Riverbend Hotsprings": "Riverbend Hotsprings", + "Roosevelt Soil & Water Conservation District": "Roosevelt SWCD", "San Acacia MDWCA": "San Acacia MDWCA", "San Juan Residences": "San Juan Residences", "Sandia National Laboratories": "SNL", "Sangre de Cristo Center": "Sangre de Cristo Center", "Sangre de Cristo Estates": "Sangre de Cristo Estates", + "Santa Clara Water System": "Santa Clara Water System", + "Santa Fe County; Santa Fe Animal Shelter": "SFC, Santa Fe Animal Shelter", "Santa Fe Community College": "Santa Fe Community College", + "Santa Fe Conservation Trust": "Santa Fe Conservation Trust", "Santa Fe County": "SFC", "Santa Fe County, Fire Facilities": "SFC, Fire Facilities", + "SFC, Fire Facilities": "SFC, Fire Facilities", "Santa Fe County, Utilities Dept.": "SFC, Utilities Dept.", - "Santa Fe County, Valle Vista Water Utility, Inc.": "Santa Fe County, Valle Vista Water Utility, Inc.", - "Santa Fe Downs": "Santa Fe Downs", + "SFC, Utilities Dept.": "SFC, Utilities Dept.", + "Santa Fe County, Valle Vista Water Utility, Inc.": "SFC, Valle Vista Water Utility, Inc.", + "SFC, Valle Vista Water Utility, Inc.": "SFC, Valle Vista Water Utility, Inc.", + "Santa Fe Downs": "Santa Fe Downs Resort", + "Santa Fe Downs Resort": "Santa Fe Downs Resort", "Santa Fe Horse Park": "Santa Fe Horse Park", "Santa Fe Municipal Airport": "Santa Fe Municipal Airport", "Santa Fe Opera": "Santa Fe Opera", @@ -114,11 +176,13 @@ "Shidoni Foundry, Inc.": "Shidoni Foundry and Gallery", "Sierra Grande Lodge": "Sierra Grande Lodge", "Sierra Vista Retirement Community": "Sierra Vista Retirement Community", + "Sile Municipal Domestic Water Assn.": "Sile MDWCA", "Slash Triangle Ranch": "Slash Triangle Ranch", "Stagecoach Motel": "Stagecoach Motel", "State of New Mexico": "State of New Mexico", "Stephenson Ranch": "Stephenson Ranch", "Sun Broadcasting Network": "Sun Broadcasting Network", + "Sun Valley Water and Sanitation": "Sun Valley Water and Sanitation", "Tano Rd LLC": "Tano Rd LLC", "Taos Municipal Schools, UNM Taos": "UNM-Taos", "Tee Pee Ranch/Tee Pee Subdivision": "Tee Pee Ranch/Tee Pee Subdivision", @@ -128,31 +192,45 @@ "The Great Cloud Zen Center": "The Great Cloud Zen Center", "Three Rivers Ranch": "Three Rivers Ranch", "Timberon Water and Sanitation District": "Timberon Water and Sanitation District", + "Town of Cerro": "Town of Cerro", + "Town of Estancia": "Town of Estancia", "Town of Magdalena": "Town of Magdalena", + "Town of Questa": "Town of Questa", "Town of Taos": "Town of Taos", "Town of Taos, National Guard Armory": "Town of Taos, National Guard Armory", "Trinity Ranch": "Trinity Ranch", "Tularosa Basin Natl. Desalination Research Facil.": "Tularosa Basin National Desalination Research Facility", "Turquoise Trail Charter School": "Turquoise Trail Charter School", + "Tyrone Mutual Domestic Water Assn.": "Tyrone MDWCA", "US Bureau of Indian Affairs, Indian School": "US Bureau of Indian Affairs, Santa Fe Indian School", "USFS, Carson NF, Taos Office": "USFS, Carson NF, Taos Office", "USFS, Cibola NF, Magdalena Ranger District": "USFS, Cibola NF, Magdalena Ranger District", + "USFS, Kiowa Grasslands": "USFS, Kiowa Grasslands", "USFS, Santa Fe NF, Espanola Ranger District": "USFS, Santa Fe NF, Espanola Ranger District", "USFS/Bluewater Ranch": "USFS", + "US Fish & Wildlife Service": "USFWS", + "Uluru Development": "Uluru Development", "Ute Mountain Farms": "Ute Mountain Farms", "VA Hospital": "VA Hospital", - "Valle Vista Water Utility": "Valle Vista Water Utility", + "Valle Vista Water Utility": "SFC, Valle Vista Water Utility, Inc.", + "Vallecitos HOA": "Vallecitos HOA", "Velte": "Velte", "Vereda Serena Property": "Vereda Serena Property", + "Village of Capitan": "Village of Capitan", "Village of Corona": "Village of Corona", "Village of Floyd": "Village of Floyd", "Village of Melrose": "Village of Melrose", "Village of Vaughn": "Village of Vaughn", + "Village of Willard": "Village of Willard", "Vista Land Company": "Vista Land Company", + "Vista Linda Water Co-op": "Vista Linda Water Co-op", "Vista Redonda MDWCA": "Vista Redonda MDWCA", "Vista Redondo MDWCA": "Vista Redonda MDWCA", "Vista de Oro Water Co-op": "Vista de Oro de Placitas Water Users Coop", + "W Spear-bar Ranch": "W Spear-bar Ranch", "Walker Ranch": "Walker Ranch", + "West Rim MDWUA": "West Rim MDWUA", + "White Cliffs MDWUA": "White Cliffs MDWUA", "Wild & Woolley Trailer Ranch": "Wild & Woolley Trailer Ranch", "Winter Brothers/U.S. Government": "Winter Brothers", "Yates Petroleum": "Yates Petroleum Corporation", diff --git a/transfers/geologic_formation_transfer.py b/transfers/geologic_formation_transfer.py index 7fcd73e4c..4483aa3aa 100644 --- a/transfers/geologic_formation_transfer.py +++ b/transfers/geologic_formation_transfer.py @@ -1,6 +1,7 @@ import time -from sqlalchemy.orm import Session + from pydantic import ValidationError +from sqlalchemy.orm import Session from db import GeologicFormation from schemas.geologic_formation import CreateGeologicFormation @@ -38,11 +39,6 @@ def transfer_geologic_formations(session: Session, limit: int = None) -> tuple: # 4. Process each row for i, row in enumerate(cleaned_df.itertuples()): - # check if limit is reached - if limit and i >= limit: - logger.info(f"Reached limit of {limit} rows. Stopping migration.") - break - # Log progress every 'step' rows if i and not i % step: logger.info( @@ -67,18 +63,18 @@ def transfer_geologic_formations(session: Session, limit: int = None) -> tuple: continue # Check if this formation already exists - existing = ( - session.query(GeologicFormation) - .filter(GeologicFormation.formation_code == formation_code) - .first() - ) - - if existing: - logger.info( - f"Skipping row {i}: Formation code {formation_code} already exists" - ) - skipped_count += 1 - continue + # existing = ( + # session.query(GeologicFormation) + # .filter(GeologicFormation.formation_code == formation_code) + # .first() + # ) + # + # if existing: + # logger.info( + # f"Skipping row {i}: Formation code {formation_code} already exists" + # ) + # skipped_count += 1 + # continue # 6. Prepare data for creation # Note: We only store the formation_code. Formation names will be mapped by the API using a @@ -96,12 +92,12 @@ def transfer_geologic_formations(session: Session, limit: int = None) -> tuple: CreateGeologicFormation.model_validate(data) except ValidationError as e: + skipped_count += 1 errors.append({"code": formation_code, "errors": e.errors()}) - logger.critical( - f"Validation error for row {i} with Code {formation_code}: {e.errors()}" - ) + logger.critical(f"Validation error for row {i} with Code {formation_code}") continue except Exception as e: + skipped_count += 1 errors.append({"code": formation_code, "errors": str(e)}) logger.critical(f"Error preparing data for {formation_code}: {e}") continue diff --git a/transfers/metrics.py b/transfers/metrics.py index 1f2b67bdd..76ebc07ec 100644 --- a/transfers/metrics.py +++ b/transfers/metrics.py @@ -34,6 +34,8 @@ TransducerObservation, Group, Asset, + PermissionHistory, + ThingGeologicFormationAssociation, ) from db.engine import session_ctx from services.gcs_helper import get_storage_bucket @@ -95,6 +97,12 @@ def asset_metrics(self, *args, **kw) -> None: def group_metrics(self, *args, **kw) -> None: self._handle_metrics(Group, *args, **kw) + def permissions_metrics(self, *args, **kw) -> None: + self._handle_metrics(PermissionHistory, *args, **kw) + + def stratigraphy_metrics(self, *args, **kw) -> None: + self._handle_metrics(ThingGeologicFormationAssociation, *args, **kw) + def contact_metrics(self, input_df, cleaned_df, errors) -> None: count = self._get_count( Contact, diff --git a/transfers/permissions_transfer.py b/transfers/permissions_transfer.py index 18daa1040..6bb0f7124 100644 --- a/transfers/permissions_transfer.py +++ b/transfers/permissions_transfer.py @@ -1,9 +1,10 @@ -from sqlalchemy.orm import Session from datetime import datetime + from pandas import isna +from sqlalchemy.orm import Session -from db import Thing, PermissionHistory -from transfers.util import read_csv, logger, replace_nans +from db import Thing, PermissionHistory, Contact, ThingContactAssociation +from transfers.util import read_csv, logger, replace_nans, chunk_by_size """ Developer's notes @@ -14,7 +15,34 @@ """ -def transfer_permissions(session: Session): +def _make_permission( + wdf, well, contact_id, nma_field, permission_type +) -> PermissionHistory | None: + + values = wdf.loc[wdf["PointID"] == well.name, nma_field].values + if len(values) == 0: + return None + elif isna(values[0]): + return None + + permission_allowed = bool(values[0]) + permission = PermissionHistory( + contact_id=contact_id, + permission_type=permission_type, + permission_allowed=permission_allowed, + start_date=datetime.today().date(), + target_id=well.id, + target_table="thing", + ) + + logger.info( + f"Transferred {permission_type} permission for well {well.name}: {permission_allowed}." + ) + + return permission + + +def transfer_permissions(session: Session) -> None: """ The transferred wells and contacts need to be transferred first - to access the auto-generated well IDs @@ -25,71 +53,36 @@ def transfer_permissions(session: Session): wdf = replace_nans(wdf) transferred_wells = ( - session.query(Thing).filter(Thing.thing_type == "water well").all() + session.query(Thing, Contact) + .select_from(Thing) + .join(ThingContactAssociation, ThingContactAssociation.thing_id == Thing.id) + .join(Contact, Contact.id == ThingContactAssociation.contact_id) + .filter(Thing.thing_type == "water well") + .order_by(Thing.name) + .all() ) + visited = [] + for chunk in chunk_by_size(transferred_wells, 100): + objs = [] + for row in chunk.itertuples(): + well = row.Thing + contact = row.Contact + if well.id in visited: + continue + + visited.append(well.id) - for well in transferred_wells: - if len(well.contacts) == 0: - logger.critical( - f"Well {well.name} has no associated contacts; skipping permission transfer." + permission = _make_permission( + wdf, well, contact.id, "SampleOK", "Water Chemistry Sample" ) - continue - else: - # Assuming the first contact is the relevant one - contact_id = well.contacts[0].id - - allow_water_level_samples = wdf.loc[ - wdf["PointID"] == well.name, "MonitorOK" - ].values - if len(allow_water_level_samples) == 0: - pass - elif isna(allow_water_level_samples[0]): - pass - else: - try: - permission_allowed = bool(allow_water_level_samples[0]) - permission = PermissionHistory( - contact_id=contact_id, - permission_type="Water Level Sample", - permission_allowed=permission_allowed, - start_date=datetime.today().date(), - target_id=well.id, - target_table="thing", - ) - session.add(permission) - logger.info( - f"Transferred Water Level Sample permission for well {well.name}: {permission_allowed}." - ) - except Exception as e: - logger.error(f"Error transferring permission for well {well.name}: {e}") - session.rollback() - pass - - allow_water_chemistry_samples = wdf.loc[ - wdf["PointID"] == well.name, "SampleOK" - ].values - if len(allow_water_chemistry_samples) == 0: - pass - elif isna(allow_water_chemistry_samples[0]): - pass - else: - try: - permission_allowed = bool(allow_water_chemistry_samples[0]) - permission = PermissionHistory( - contact_id=contact_id, - permission_type="Water Chemistry Sample", - permission_allowed=permission_allowed, - start_date=datetime.today().date(), - target_id=well.id, - target_table="thing", - ) - session.add(permission) - logger.info( - f"Transferred Water Chemistry Sample permission for well {well.name}: {permission_allowed}." - ) - except Exception as e: - logger.error(f"Error transferring permission for well {well.name}: {e}") - session.rollback() - pass - - session.commit() + if permission: + objs.append(permission) + + permission = _make_permission( + wdf, well, contact.id, "MonitorOK", "Water Level Sample" + ) + if permission: + objs.append(permission) + + session.bulk_save_objects(objs) + session.commit() diff --git a/transfers/stratigraphy_transfer.py b/transfers/stratigraphy_transfer.py index de51e354e..9f97a4904 100644 --- a/transfers/stratigraphy_transfer.py +++ b/transfers/stratigraphy_transfer.py @@ -7,6 +7,7 @@ """ import time + from sqlalchemy.orm import Session from db import Thing, GeologicFormation, ThingGeologicFormationAssociation @@ -49,7 +50,7 @@ def transfer_stratigraphy(session: Session, limit: int = None) -> tuple: cleaned_df = replace_nans(input_df) # Step 2: Filter to only wells that exist in database - cleaned_df = filter_to_valid_point_ids(session, cleaned_df) + cleaned_df = filter_to_valid_point_ids(cleaned_df) n_records = len(cleaned_df) n_wells = len(cleaned_df["PointID"].unique()) @@ -69,6 +70,12 @@ def transfer_stratigraphy(session: Session, limit: int = None) -> tuple: # Step 4: Group by well for efficient processing well_groups = cleaned_df.groupby("PointID") + formations = session.query(GeologicFormation).all() + things = session.query(Thing).all() + + formations = {f.formation_code: f for f in formations} + things = {t.name: t for t in things} + for well_index, (pointid, strat_group) in enumerate(well_groups): # Check limit (on number of wells, not records) if limit and well_index >= limit: @@ -92,7 +99,7 @@ def transfer_stratigraphy(session: Session, limit: int = None) -> tuple: continue # 5. Get the well from database - thing = session.query(Thing).filter(Thing.name == pointid).first() + thing = things.get(pointid) if not thing: logger.warning( f"Well {pointid} not found in database, skipping stratigraphy" @@ -195,13 +202,18 @@ def transfer_stratigraphy(session: Session, limit: int = None) -> tuple: continue # 7. Get or create the formation - formation = ( - session.query(GeologicFormation) - .filter(GeologicFormation.formation_code == formation_code) - .first() - ) + # formation = ( + # session.query(GeologicFormation) + # .filter(GeologicFormation.formation_code == formation_code) + # .first() + # ) + formation = formations.get(formation_code) if not formation: + logger.info(f"Geologic formation not in lexicon: {formation_code}") + skipped_count += 1 + continue + # Create new formation if it doesn't exist logger.info(f"Creating new geologic formation: {formation_code}") formation = GeologicFormation( diff --git a/transfers/thing_transfer.py b/transfers/thing_transfer.py index 3469fbc53..6cfcea4c8 100644 --- a/transfers/thing_transfer.py +++ b/transfers/thing_transfer.py @@ -14,7 +14,7 @@ # limitations under the License. # =============================================================================== import time - +from pandas import isna from pydantic import ValidationError from sqlalchemy.orm import Session @@ -57,9 +57,16 @@ def transfer_thing(session: Session, site_type: str, make_payload, limit=None) - session.commit() try: - location, elevation_method = make_location(row, cached_elevations) + location, elevation_method, location_notes = make_location( + row, cached_elevations + ) session.add(location) session.flush() + for note_type, note_content in location_notes.items(): + if not isna(note_content): + location_note = location.add_note(note_content, note_type) + session.add(location_note) + data_provenances = make_location_data_provenance( row, location, elevation_method ) diff --git a/transfers/transfer.py b/transfers/transfer.py index 45a78cc60..83b99b94a 100644 --- a/transfers/transfer.py +++ b/transfers/transfer.py @@ -21,6 +21,8 @@ from services.util import get_bool_env from transfers.aquifer_system_transfer import transfer_aquifer_systems from transfers.geologic_formation_transfer import transfer_geologic_formations +from transfers.permissions_transfer import transfer_permissions +from transfers.stratigraphy_transfer import transfer_stratigraphy load_dotenv() @@ -109,6 +111,15 @@ def transfer_all(metrics, limit=100): results = _execute_transfer(ContactTransfer, flags=flags) metrics.contact_metrics(*results) + message("TRANSFERRING PERMISSIONS") + with session_ctx() as session: + transfer_permissions(session) + + message("TRANSFERRING STRATIGRAPY") + with session_ctx() as session: + results = transfer_stratigraphy(session, limit=limit) + metrics.stratigraphy_metrics(*results) + if transfer_waterlevels: message("TRANSFERRING WATER LEVELS") results = _execute_transfer(WaterLevelTransferer, flags=flags) @@ -147,7 +158,12 @@ def transfer_all(metrics, limit=100): def _execute_transfer(klass, flags: dict = None): - transferer = klass(flags=flags) + + pointids = None + if os.getenv("TRANSFER_TEST_POINTIDS"): + pointids = os.getenv("TRANSFER_TEST_POINTIDS").split(",") + + transferer = klass(flags=flags, pointids=pointids) transferer.transfer() return transferer.input_df, transferer.cleaned_df, transferer.errors @@ -168,5 +184,4 @@ def main(): if __name__ == "__main__": main() - # ============= EOF ============================================= diff --git a/transfers/transferer.py b/transfers/transferer.py index 4312051fd..7b8076fd4 100644 --- a/transfers/transferer.py +++ b/transfers/transferer.py @@ -17,6 +17,8 @@ import pandas as pd from pandas import DataFrame +from pydantic import ValidationError +from sqlalchemy.exc import DatabaseError from sqlalchemy.orm import Session from db import Thing, Base @@ -35,22 +37,36 @@ class Transferer(object): errors: list = None flags: dict = None source_table: str = None + verbose: bool = False - def __init__(self, flags: dict = None): + def __init__(self, flags: dict = None, pointids: list = None): self.errors = [] self.flags = flags if flags else {} self.manual_fixer = ManualFixer() + self.pointids = pointids - def transfer(self): + def transfer(self) -> None: with session_ctx() as session: self.input_df, self.cleaned_df = self._get_dfs() self._transfer_hook(session) session.commit() - def _capture_error(self, pointid, error, field, table=None): + def _capture_validation_error(self, pointid: str, err: ValidationError) -> None: + self._capture_error( + pointid, f"Validation Error: {err.errors()}", "UnknownField" + ) + + def _capture_database_error(self, pointid: str, err: DatabaseError) -> None: + error_dict = err.orig.args[0] + self._capture_error(pointid, error_dict["D"], error_dict["t"]) + + def _capture_error(self, pointid: str, error: str, field: str, table=None) -> None: if table is None: table = self.source_table + logger.critical( + f"Capture Error: PointID={pointid}, Error: {error}, {table}:{field}" + ) self.errors.append( { "pointid": pointid, @@ -66,7 +82,7 @@ def _transfer_hook(self, session: Session): def _get_df_to_iterate(self) -> pd.DataFrame: return self.cleaned_df - def _limit_iterator(self, session: Session, limit: int, step: int = 25): + def _limit_iterator(self, session: Session, limit: int, step: int = 100): df = self._get_df_to_iterate() n = len(df) start_time = time.time() @@ -83,6 +99,7 @@ def _limit_iterator(self, session: Session, limit: int, step: int = 25): start_time = time.time() try: session.commit() + session.expunge_all() except Exception as e: logger.critical(f"Error committing wells. {e}") session.rollback() @@ -91,6 +108,7 @@ def _limit_iterator(self, session: Session, limit: int, step: int = 25): self._step(session, df, i, row) session.commit() + session.expunge_all() self._after_hook(session) def _step(self, session: Session, df: pd.DataFrame, i: int, row: dict): @@ -122,6 +140,9 @@ def _transfer_hook(self, session: Session): continue self._chunk_step(session, df, i, row, dbitem) + session.commit() + session.expunge_all() + # def chunk_transfer(self): # with session_ctx() as session: # self.input_df, self.cleaned_df = self._get_dfs(session) diff --git a/transfers/util.py b/transfers/util.py index d1bc5d053..35828d21c 100644 --- a/transfers/util.py +++ b/transfers/util.py @@ -25,6 +25,7 @@ import numpy as np import pandas as pd import pytz +from pandas import notna from shapely import Point from sqlalchemy import select from sqlalchemy.orm import Session @@ -59,38 +60,43 @@ def __init__(self): df = read_csv("WaterLevels") df["DateMeasured"] = pd.to_datetime(df["DateMeasured"], errors="coerce") self._df = df.dropna(subset=["DateMeasured"]) + self.verbose = False def estimate_measuring_point_height( self, row - ) -> tuple[float, str, datetime | None]: + ) -> tuple[float, str, datetime | None, datetime | None]: mph = row.MPHeight mph_desc = row.MeasuringPoint df = self._df[self._df["PointID"] == row.PointID] df = df.sort_values("DateMeasured") if mph is None: - logger.info( - f"No MPHeight found for PointID: {row.PointID}. Estimating from measurements." - ) + if self.verbose: + logger.info( + f"No MPHeight found for PointID: {row.PointID}. Estimating from measurements." + ) mphs = [] start_dates = [] mph_descs = [] if len(df) == 0: - logger.warning(f"No measurements found for PointID: {row.PointID}.") + if self.verbose: + logger.warning(f"No measurements found for PointID: {row.PointID}.") else: # try to estimate mpheight from measurements for m in df.itertuples(): mphi = m.DepthToWater - m.DepthToWaterBGS start_date = m.DateMeasured if mphi not in mphs: - mphs.append(mphi) - mph_descs.append( - "Auto calculated from measurements at depth to water and depth to water below ground surface" - ) - start_dates.append(start_date) - logger.info( - f"Estimated MPHeight: {mphs}, {start_dates} for PointID: {row.PointID}." - ) + if notna(mphi): + mphs.append(mphi) + mph_descs.append( + "Auto calculated from measurements at depth to water and depth to water below ground surface" + ) + start_dates.append(start_date) + if mphs: + logger.info( + f"Estimated MPHeight: {mphs}, {start_dates} for PointID: {row.PointID}." + ) else: mphs = [mph] mph_descs = [mph_desc] @@ -105,7 +111,7 @@ def estimate_measuring_point_height( end_dates = [start_dates[i + 1] for i in range(len(start_dates) - 1)] end_dates.append(None) - return zip(mphs, mph_descs, start_dates, end_dates) + return mphs, mph_descs, start_dates, end_dates class SensorParameterEstimator: @@ -357,8 +363,11 @@ def filter_by_valid_measuring_agency(df: pd.DataFrame) -> pd.DataFrame: return df[df["MeasuringAgency"].isin(valid_measuring_agencies)] -def filter_to_valid_point_ids(df: pd.DataFrame) -> pd.DataFrame: +def filter_to_valid_point_ids(df: pd.DataFrame, pointids: list = None) -> pd.DataFrame: valid_point_ids = get_valid_point_ids() + if pointids: + valid_point_ids = list(set(valid_point_ids) & set(pointids)) + return df[df["PointID"].isin(valid_point_ids)] @@ -382,7 +391,10 @@ def convert_mt_to_utc(dt_record: datetime) -> datetime: return dt_record -def chunk_by_size(df: pd.DataFrame, chunk_size: int) -> pd.DataFrame: +def chunk_by_size(df: pd.DataFrame | list, chunk_size: int = 100) -> pd.DataFrame: + if isinstance(df, list): + df = pd.DataFrame(df) + for i in range(0, len(df), chunk_size): yield df.iloc[i : i + chunk_size] @@ -400,7 +412,7 @@ def get_groundwater_parameter_id() -> int: def make_location(row: pd.Series, elevations: dict) -> tuple: """ - Returns a tuple of location data and the elevation method + Returns a tuple of location data, the elevation method, and notes """ point = Point(row.Easting, row.Northing) @@ -435,9 +447,17 @@ def make_location(row: pd.Series, elevations: dict) -> tuple: elif pd.isna(row.AltitudeMethod): elevation_method = None else: - elevation_method = lexicon_mapper.map_value( - f"LU_AltitudeMethod:{row.AltitudeMethod.strip()}" - ) + try: + elevation_method = lexicon_mapper.map_value( + f"LU_AltitudeMethod:{row.AltitudeMethod.strip()}" + ) + except KeyError: + elevation_method = None + + notes = { + "Coordinate": row.CoordinateNotes, + "General": row.LocationNotes, + } # Extract AMPAPI date fields (Date type, not DateTime) nma_date_created = None @@ -455,13 +475,11 @@ def make_location(row: pd.Series, elevations: dict) -> tuple: point=transformed_point.wkt, elevation=z, release_status="public" if row.PublicRelease else "private", - nma_coordinate_notes=row.CoordinateNotes, - nma_notes_location=row.LocationNotes, nma_date_created=nma_date_created, nma_site_date=nma_site_date, ) - return location, elevation_method + return location, elevation_method, notes def make_location_data_provenance( @@ -549,11 +567,14 @@ def make_location_data_provenance( # minus_point_decimal_deg = Point(minus_longitude, minus_latitude) if row.CoordinateMethod or row.CoordinateAccuracy: - coordinate_method = ( - lexicon_mapper.map_value(f"LU_CoordinateMethod:{row.CoordinateMethod}") - if not pd.isna(row.CoordinateMethod) - else None - ) + try: + coordinate_method = ( + lexicon_mapper.map_value(f"LU_CoordinateMethod:{row.CoordinateMethod}") + if pd.notna(row.CoordinateMethod) + else None + ) + except KeyError: + coordinate_method = None accuracy_value, accuracy_unit = NMA_COORDINATE_ACCURACY.get( row.CoordinateAccuracy, (None, None) @@ -591,9 +612,15 @@ class LexiconMapper: def __init__(self): self._mappers: dict[str, str] = None - def map_value(self, value) -> str: + def map_value(self, value, default=None) -> str: value = value.strip() - return self._make_lu_to_lexicon_mapper().get(value, value) + + try: + return self._make_lu_to_lexicon_mapper()[value] + except KeyError: + if default is not None: + return default + raise KeyError(f"No mapping found for {value}") def _make_lu_to_lexicon_mapper(self) -> dict[str, str]: """ diff --git a/transfers/waterlevels_transducer_transfer.py b/transfers/waterlevels_transducer_transfer.py index cd323330c..70400daa2 100644 --- a/transfers/waterlevels_transducer_transfer.py +++ b/transfers/waterlevels_transducer_transfer.py @@ -17,6 +17,7 @@ import pandas as pd from pandas import Timestamp from pydantic import ValidationError +from sqlalchemy.exc import DatabaseError from sqlalchemy.orm import Session from db import Thing, Deployment, Sensor @@ -50,6 +51,10 @@ def _get_dfs(self): # remove rows with no date measured cleaned_df = cleaned_df[cleaned_df.DateMeasured.notna()] + + # remove duplicate rows + cleaned_df = cleaned_df.drop_duplicates(subset=["PointID", "DateMeasured"]) + return input_df, cleaned_df def _transfer_hook(self, session: Session) -> None: @@ -122,12 +127,12 @@ def _transfer_hook(self, session: Session) -> None: ) try: session.commit() - except Exception as e: - self.append({"pointid": pointid, "error": e}) + except DatabaseError as e: + session.rollback() logger.critical( f"Error committing water levels {release_status} block: {e}" ) - session.rollback() + self._capture_database_error(pointid, e) continue # convert nodeployments to errors diff --git a/transfers/waterlevels_transfer.py b/transfers/waterlevels_transfer.py index 270592a66..c09d7d3dd 100644 --- a/transfers/waterlevels_transfer.py +++ b/transfers/waterlevels_transfer.py @@ -91,7 +91,7 @@ def __init__(self, *args, **kw): self._created_contacts = {} def _get_dfs(self) -> tuple[pd.DataFrame, pd.DataFrame]: - input_df = read_csv(self.source_table) + input_df = read_csv(self.source_table, dtype={"MeasuredBy": str}) cleaned_df = filter_to_valid_point_ids(input_df) cleaned_df = filter_by_valid_measuring_agency(cleaned_df) return input_df, cleaned_df @@ -107,6 +107,12 @@ def _transfer_hook(self, session: Session) -> None: if dt_utc is None: continue + # reasons + try: + glv = self._get_groundwater_level_reason(row) + except KeyError as e: + continue + release_status = "public" if row.PublicRelease else "private" # field event @@ -132,8 +138,6 @@ def _transfer_hook(self, session: Session) -> None: session.add(field_event_participant) - # reasons - glv = self._get_groundwater_level_reason(row) if ( glv == "Well was destroyed (no subsequent water levels should be recorded)" @@ -213,9 +217,10 @@ def _make_sample(self, row, field_activity, dt_utc, sampler) -> Sample: "null placeholder" if pd.isna(row.MeasurementMethod) else lexicon_mapper.map_value( - f"LU_MeasurementMethod:{row.MeasurementMethod}" + f"LU_MeasurementMethod:{row.MeasurementMethod}", "null placeholder" ) ) + sample = Sample( nma_pk_waterlevels=row.GlobalID, field_activity=field_activity, @@ -235,9 +240,16 @@ def _get_groundwater_level_reason(self, row) -> str: if pd.isna(glv): return None + if glv == "X?": + glv = "X" glv = lexicon_mapper.map_value(f"LU_LevelStatus:{glv}") if glv == "Water level not affected by status": glv = "Water level not affected" + elif glv is None: + self._capture_error( + row.PointID, f"Unknown groundwater level reason: {glv}", "LevelStatus" + ) + raise ValueError(f"Unknown groundwater level reason: {glv}") return glv def _get_field_event_participants(self, session, row, thing) -> list[Contact]: @@ -246,42 +258,45 @@ def _get_field_event_participants(self, session, row, thing) -> list[Contact]: if measured_by not in ["Owner", "Owner report", "Well owner"]: # --- Contact/FieldEventParticipant --- - contact_info = get_contacts_info(row, measured_by, self._measured_by_mapper) - - for name, organization, role in contact_info: - if (name, organization) in self._created_contacts: - contact = self._created_contacts[(name, organization)] - else: - try: - # create new contact if not already created - contact = Contact( - name=name, - role=role, - contact_type="Field Event Participant", - organization=organization, - nma_pk_waterlevels=row.GlobalID, - ) - session.add(contact) - - logger.info( - f"{SPACE_2}Created contact: | Name {contact.name} | Role {contact.role} | Organization {contact.organization} | nma_pk_waterlevels {contact.nma_pk_waterlevels}" - ) - - self._created_contacts[(name, organization)] = contact - except Exception as e: - logger.critical( - f"Contact cannot be created: Name {name} | Role {role} | Organization {organization} because of the following: {str(e)}" - ) - continue - - field_event_participants.append(contact) + if measured_by: + contact_info = get_contacts_info( + row, measured_by, self._measured_by_mapper + ) + for name, organization, role in contact_info: + if (name, organization) in self._created_contacts: + contact = self._created_contacts[(name, organization)] + else: + try: + # create new contact if not already created + contact = Contact( + name=name, + role=role, + contact_type="Field Event Participant", + organization=organization, + nma_pk_waterlevels=row.GlobalID, + ) + session.add(contact) + + logger.info( + f"{SPACE_2}Created contact: | Name {contact.name} | Role {contact.role} | Organization {contact.organization} | nma_pk_waterlevels {contact.nma_pk_waterlevels}" + ) + + self._created_contacts[(name, organization)] = contact + except Exception as e: + logger.critical( + f"Contact cannot be created: Name {name} | Role {role} | Organization {organization} because of the following: {str(e)}" + ) + continue + + field_event_participants.append(contact) else: contact = thing.contacts[0] field_event_participants.append(contact) if len(field_event_participants) == 0: logger.critical( - f"No contacts can be associated with the WaterLevels record with GlobalID {row.GlobalID}, therefore no field event, field activity, sample, and observation can be made. Skipping." + f"No contacts can be associated with the WaterLevels record with GlobalID {row.GlobalID}, " + f"therefore no field event, field activity, sample, and observation can be made. Skipping." ) return field_event_participants diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index d92f2ece6..aaa2eb0bd 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -17,6 +17,7 @@ import re import time from datetime import datetime, UTC +from zoneinfo import ZoneInfo import pandas as pd from pandas import isna, notna @@ -80,38 +81,26 @@ def _get_first_visit_date(row) -> datetime | None: first_visit_date = None + + def _extract_date(date_str: str) -> datetime: + return datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S.%f").date() + if row.DateCreated and row.SiteDate: - date_created = datetime.strptime(row.DateCreated, "%Y-%m-%d %H:%M:%S.%f").date() - site_date = datetime.strptime(row.SiteDate, "%Y-%m-%d %H:%M:%S.%f").date() + date_created = _extract_date(row.DateCreated) + site_date = _extract_date(row.SiteDate) if date_created < site_date: first_visit_date = date_created else: first_visit_date = site_date elif row.DateCreated and not row.SiteDate: - first_visit_date = datetime.strptime( - row.DateCreated, "%Y-%m-%d %H:%M:%S.%f" - ).date() + first_visit_date = _extract_date(row.DateCreated) elif not row.DateCreated and row.SiteDate: - first_visit_date = datetime.strptime( - row.SiteDate, "%Y-%m-%d %H:%M:%S.%f" - ).date() + first_visit_date = _extract_date(row.SiteDate) return first_visit_date -def _extract_well_purposes(row) -> list[str]: - cu = row.CurrentUse - purposes = ( - [] - if isna(cu) - else [lexicon_mapper.map_value(f"LU_CurrentUse:{cui}") for cui in cu] - ) - - # logger.info(f"well {row.PointID},{cu} has purposes: {purposes}") - return purposes - - def _extract_casing_materials(row) -> list[str]: materials = [] if "pvc" in row.CasingDescription.lower(): @@ -168,51 +157,6 @@ def _extract_aquifer_type_codes(aquifer_code: str) -> list[str]: return individual_codes -# Get or create aquifer system -def get_or_create_aquifer_system( - session: Session, aquifer_name: str, primary_type: str -) -> AquiferSystem | None: - """ - Get existing aquifer or create new one if it doesn't exist. - - With the new AquiferType model, we create ONE aquifer record per named - aquifer (e.g., one "Santa Fe Group"), not multiple variants. - - Args: - session: Database session - aquifer_name: Name of the aquifer (from AqClass or type name) - primary_type: Primary aquifer type for the aquifer_type field - """ - # Try to find existing aquifer by name - aquifer = ( - session.query(AquiferSystem).filter(AquiferSystem.name == aquifer_name).first() - ) - - if aquifer: - return aquifer - - # Create new aquifer - try: - logger.info( - f"Creating new aquifer system: {aquifer_name} (primary type: {primary_type})" - ) - - aquifer = AquiferSystem( - name=aquifer_name, - primary_aquifer_type=primary_type, # Primary type - geographic_scale=None, # Default - ) - session.add(aquifer) - session.commit() - # session.flush() # Get the ID - # session.refresh(aquifer) - return aquifer - except DatabaseError as e: - session.rollback() - logger.critical(f"Error creating aquifer {aquifer_name}: {e}") - return None - - def get_or_create_geologic_formation( session: Session, formation_code: str ) -> GeologicFormation | None: @@ -277,6 +221,8 @@ def __init__(self, *args, **kw): super().__init__(*args, **kw) self._cached_elevations = get_cached_elevations() self._added_locations = {} + self._aquifers = None + self._measuring_point_estimator = MeasuringPointEstimator() def _get_dfs(self): wdf = read_csv("WellData", dtype={"OSEWelltagID": str}) @@ -300,86 +246,85 @@ def _get_dfs(self): # # for example, wells in the "Water Level Network" project # cleaned_df = wdf - cleaned_df = get_transferable_wells(wdf) + cleaned_df = get_transferable_wells(wdf, self.pointids) cleaned_df = filter_non_transferred_wells(cleaned_df) + dupes = cleaned_df["PointID"].duplicated(keep=False) + if dupes.any(): + dup_ids = set(cleaned_df.loc[dupes, "PointID"]) + logger.critical(f"{len(dup_ids)} PointIDs have duplicates; will skip.") + logger.critical(f"Duplicate PointIDs: {dup_ids}") + cleaned_df = cleaned_df[~cleaned_df["PointID"].isin(dup_ids)] + + cleaned_df = cleaned_df.sort_values(by=["PointID"]) + if self.pointids: + cleaned_df = cleaned_df[cleaned_df["PointID"].isin(self.pointids)] return input_df, cleaned_df def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): - pointid = row.PointID - if df[df["PointID"] == pointid].shape[0] > 1: - logger.critical( - f"transfer_wells. PointID {pointid} has duplicate records. Skipping." - ) - self._capture_error(pointid, "duplicate records", "PointID") - return - - location = None - try: - location, elevation_method = make_location(row, self._cached_elevations) - session.add(location) - session.commit() - self._added_locations[row.PointID] = elevation_method - except Exception as e: - self._capture_error(row.PointID, str(e), str(e), "Location") - logger.critical(f"Error making location for {row.PointID}: {e}") - - if location is not None: - session.expunge(location) - - return try: first_visit_date = _get_first_visit_date(row) - well_purposes = [] if isna(row.CurrentUse) else _extract_well_purposes(row) + well_purposes = ( + [] if isna(row.CurrentUse) else self._extract_well_purposes(row) + ) well_casing_materials = ( [] if isna(row.CasingDescription) else _extract_casing_materials(row) ) well_pump_type = _extract_well_pump_type(row) - # manually add the well rather than add_well from services/thing_helper.py - # so that effective_start can be set on the location assocation + wcm = None + if notna(row.ConstructionMethod): + wcm = self._get_lexicon_value( + row, f"LU_ConstructionMethod:{row.ConstructionMethod}", "Unknown" + ) + + is_suitable_for_datalogger = False + if notna(row.OpenWellLoggerOK): + is_suitable_for_datalogger = bool(row.OpenWellLoggerOK) + + mpheight = row.MPHeight + mpheight_description = row.MeasuringPoint + if mpheight is None: + mphs = self._measuring_point_estimator.estimate_measuring_point_height( + row + ) + if mphs: + try: + mpheight = mphs[0][0] + mpheight_description = mphs[1][0] + except IndexError: + if self.verbose: + logger.warning( + f"Measuring point height estimation failed for well {row.PointID}, {mphs}" + ) data = CreateWell( - location_id=location.id, + location_id=0, name=row.PointID, first_visit_date=first_visit_date, hole_depth=row.HoleDepth, well_depth=row.WellDepth, - well_construction_notes=row.ConstructionNotes, well_casing_diameter=( row.CasingDiameter * 12 if row.CasingDiameter else None ), well_casing_depth=row.CasingDepth, release_status="public" if row.PublicRelease else "private", - measuring_point_height=row.MPHeight, - measuring_point_description=row.MeasuringPoint, + measuring_point_height=mpheight, + measuring_point_description=mpheight_description, notes=( [{"content": row.Notes, "note_type": "Other"}] if row.Notes else [] ), well_completion_date=row.CompletionDate, well_driller_name=row.DrillerName, - well_construction_method=( - lexicon_mapper.map_value( - f"LU_ConstructionMethod:{row.ConstructionMethod}" - ) - if not isna(row.ConstructionMethod) - else None - ), + well_construction_method=wcm, well_pump_type=well_pump_type, - is_suitable_for_datalogger=( - bool(row.OpenWellLoggerOK) - if not isna(row.OpenWellLoggerOK) - else None - ), + is_suitable_for_datalogger=is_suitable_for_datalogger, ) CreateWell.model_validate(data) except ValidationError as e: - self._capture_error(row.PointID, str(e), "UnknownField") - logger.critical( - f"Validation error for row {i} with PointID {row.PointID}: {e.errors()}" - ) + self._capture_validation_error(row.PointID, e) return well = None @@ -426,25 +371,43 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): if well is not None: session.expunge(well) - if location is not None: - session.delete(location) - self._capture_error(row.PointID, str(e), "UnknownField") logger.critical(f"Error creating well for {row.PointID}: {e}") return - assoc = LocationThingAssociation(effective_start=location.created_at) + try: + location, elevation_method, notes = make_location( + row, self._cached_elevations + ) + session.add(location) + # session.flush() + self._added_locations[row.PointID] = (elevation_method, notes) + except Exception as e: + import traceback + + traceback.print_exc() + self._capture_error(row.PointID, str(e), str(e), "Location") + logger.critical(f"Error making location for {row.PointID}: {e}") + + return + + assoc = LocationThingAssociation( + effective_start=datetime.now(tz=ZoneInfo("UTC")) + ) assoc.location = location assoc.thing = well session.add(assoc) if isna(row.AquiferType): - logger.info( - f"No AquiferType for {well.name}. Skipping aquifer association." - ) + if self.verbose: + logger.info( + f"No AquiferType for {well.name}. Skipping aquifer association." + ) else: + if self.verbose: + logger.info(f"Trying to associate aquifer for {well.name}") try: self._add_aquifers(session, row, well) except Exception as e: @@ -452,44 +415,50 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): f"Error creating aquifer association for {well.name}: {e}" ) - if isna(row.FormationZone): - logger.info( - f"No FormationZone for {well.name}. Skipping formation association." - ) - else: - try: - self._add_formation_zone(session, row, well) - except Exception as e: - logger.critical( - f"Error creating formation association for {well.name}: {e}" - ) + def _extract_well_purposes(self, row) -> list[str]: + cu = row.CurrentUse - def _add_formation_zone(self, session, row, well): + if isna(cu): + return [] + else: + purposes = [] + for cui in cu: + p = self._get_lexicon_value(row, f"LU_CurrentUse:{cui}") + if p is not None: + purposes.append(p) + return purposes + + def _add_formation_zone(self, row, well, formations): # --- Set Formation Completion (NOT depth-based stratigraphy) --- # This simply records which formation the well was completed in. # For detailed depth-interval stratigraphy, see stratigraphy_transfer.py formation_code = row.FormationZone + if formation_code: + formation_code = formation_code.strip() + if formation_code in formations: + # Formation exists: Set association + well.formation_completion_code = formations[ + formation_code + ].formation_code + if self.verbose: + logger.info( + f"Set completion formation for {well.name}: {formation_code}" + ) + else: + # Formation does NOT exist: Do not create new formation. Flag and log for review + self._capture_error( + row.PointID, f"Unknown formation: {formation_code}", "FormationZone" + ) - # Validate formation exists - formation = ( - session.query(GeologicFormation) - .filter(GeologicFormation.formation_code == formation_code) - .first() - ) - - if formation: - # Formation exists: Set association - well.formation_completion_code = formation_code - logger.info(f"Set completion formation for {well.name}: {formation_code}") - else: - # Formation does NOT exist: Do not create new formation. Flag and log for review - logger.critical( - f"MISSING FORMATION: Formation '{formation_code}' not found for well {well.name}. Flagged for review." - ) + def _get_lexicon_value(self, row, value, default=None): + try: + return lexicon_mapper.map_value(value) + except KeyError: self._capture_error( - row.PointID, f"Unknown formation: {formation_code}", "FormationZone" + row.PointID, f"Unknown lexicon value: {value}", "Unknown" ) + return default def _add_aquifers(self, session, row, well): # Parse codes (handles multi-character codes like "FC") @@ -504,8 +473,8 @@ def _add_aquifers(self, session, row, well): # Map AqClass code to aquifer name using lexicon mapper if isna(row.AqClass): # No AqClass - use first code's mapped name as aquifer name - aquifer_name = lexicon_mapper.map_value( - f"LU_AquiferType:{aquifer_codes[0]}" + aquifer_name = self._get_lexicon_value( + row, f"LU_AquiferType:{aquifer_codes[0]}" ) else: try: @@ -516,8 +485,8 @@ def _add_aquifers(self, session, row, well): logger.warning( f"Unknown AqClass code '{row.AqClass}' for well {row.PointID}, using first type as name" ) - aquifer_name = lexicon_mapper.map_value( - f"LU_AquiferType:{aquifer_codes[0]}" + aquifer_name = self._get_lexicon_value( + row, f"LU_AquiferType:{aquifer_codes[0]}" ) # Determine primary type @@ -534,178 +503,323 @@ def _add_aquifers(self, session, row, well): ) primary_type = "Unknown" # Creates aquifer with placeholder + if self._aquifers is None: + self._aquifers = session.query(AquiferSystem).all() + # Get or create the aquifer - aquifer = get_or_create_aquifer_system(session, aquifer_name, primary_type) + aquifer = self._get_or_create_aquifer_system( + session, row, aquifer_name, primary_type + ) if aquifer: - # Check if association already exists - existing_assoc = ( - session.query(ThingAquiferAssociation) - .filter( - ThingAquiferAssociation.thing_id == well.id, - ThingAquiferAssociation.aquifer_system_id == aquifer.id, - ) - .first() - ) - if not existing_assoc: - # Create the association - logger.info(f"Associating well {well.name} with aquifer {aquifer.name}") - aquifer_assoc = ThingAquiferAssociation( - thing=well, aquifer_system=aquifer - ) - session.add(aquifer_assoc) - session.flush() + aquifer, created = aquifer + if not aquifer: + return - # Create AquiferType records for EACH characteristic - aquifer_type_names = [] - for aquifer_code in aquifer_codes: - try: - type_name = lexicon_mapper.map_value( - f"LU_AquiferType:{aquifer_code}" - ) - aquifer_type = AquiferType( - thing_aquifer_association=aquifer_assoc, - aquifer_type=type_name, - ) - session.add(aquifer_type) - aquifer_type_names.append(type_name) - except KeyError: - logger.critical( - f"Unknown aquifer code '{aquifer_code}' from AquiferType='{row.AquiferType}' " - f"for well {well.name}. Skipping this code." - ) - self._capture_error( - row.PointID, - f"Unknown aquifer code: {aquifer_code}", - "AquiferType", - ) + if created: + self._aquifers.append(aquifer) + # Check if association already exists + # existing_assoc = ( + # session.query(ThingAquiferAssociation) + # .filter( + # ThingAquiferAssociation.thing_id == well.id, + # ThingAquiferAssociation.aquifer_system_id == aquifer.id, + # ) + # .first() + # ) + # if not existing_assoc: + # Create the association + if self.verbose: + logger.info(f"Associating well {well.name} with aquifer {aquifer.name}") + aquifer_assoc = ThingAquiferAssociation(thing=well, aquifer_system=aquifer) + session.add(aquifer_assoc) + # session.flush() + + # Create AquiferType records for EACH characteristic + aquifer_type_names = [] + for aquifer_code in aquifer_codes: + try: + type_name = lexicon_mapper.map_value( + f"LU_AquiferType:{aquifer_code}" + ) + aquifer_type = AquiferType( + thing_aquifer_association=aquifer_assoc, + aquifer_type=type_name, + ) + session.add(aquifer_type) + aquifer_type_names.append(type_name) + except KeyError: + logger.critical( + f"Unknown aquifer code '{aquifer_code}' from AquiferType='{row.AquiferType}' " + f"for well {well.name}. Skipping this code." + ) + self._capture_error( + row.PointID, + f"Unknown aquifer code: {aquifer_code}", + "AquiferType", + ) + + if self.verbose: logger.info( f"Associated well {well.name} with aquifer {aquifer.name} " f"(types: {', '.join(aquifer_type_names)})" ) + # else: + # logger.info( + # f"Well {well.name} already associated with aquifer {aquifer.name}" + # ) + else: + logger.info(f"Failed to create aquifer for well {well.name}") + + # Get or create aquifer system + def _get_or_create_aquifer_system( + self, session: Session, row, aquifer_name: str, primary_type: str + ) -> AquiferSystem | None: + """ + Get existing aquifer or create new one if it doesn't exist. + + With the new AquiferType model, we create ONE aquifer record per named + aquifer (e.g., one "Santa Fe Group"), not multiple variants. + + Args: + session: Database session + aquifer_name: Name of the aquifer (from AqClass or type name) + primary_type: Primary aquifer type for the aquifer_type field + """ + # Try to find existing aquifer by name + # aquifer = ( + # session.query(AquiferSystem).filter(AquiferSystem.name == aquifer_name).first() + # ) + if aquifer_name is None: + return None, False + + aquifer = next((a for a in self._aquifers if a.name == aquifer_name), None) + if aquifer: + return aquifer, False + + # Create new aquifer + try: + logger.info( + f"Creating new aquifer system: {aquifer_name} (primary type: {primary_type})" + ) + + aquifer = AquiferSystem( + name=aquifer_name, + primary_aquifer_type=primary_type, # Primary type + geographic_scale=None, # Default + ) + session.add(aquifer) + session.flush() # Get the ID + return aquifer, True + except DatabaseError as e: + session.rollback() + self._capture_database_error(row.PointID, e) + return None, False def _after_hook(self, session): dump_cached_elevations(self._cached_elevations) - measuring_point_estimator = MeasuringPointEstimator() + + self._row_by_pointid = { + pid: row + for pid, row in self.cleaned_df.set_index("PointID", drop=False).iterrows() + } + + formations = session.query(GeologicFormation).all() + formations = {f.formation_code: f for f in formations} + # add things thate need well id query = session.query(Thing).filter(Thing.thing_type == "water well") + # query = ( + # session.query(Thing) + # .options( + # selectinload(Thing.location_associations).selectinload( + # LocationThingAssociation.location + # ) + # ) + # .filter(Thing.thing_type == "water well") + # ) + chunk_size = 500 count = query.count() - for i, well in enumerate(query.all()): - objs = [] + processed = 0 + chunk = [] + + def _process_chunk(chunk_index: int, wells_chunk: list[Thing]): step_start_time = time.time() - row = self.cleaned_df[self.cleaned_df["PointID"] == well.name].iloc[0] - if notna(row.Notes): - note = well.add_note(row.Notes, "Other") - objs.append(note) - - location = well.current_location - elevation_method = self._added_locations[row.PointID] - data_provenances = make_location_data_provenance( - row, location, elevation_method + + all_objects = [] + for well in wells_chunk: + objs = self._after_hook_chunk(well, formations) + if objs: + all_objects.extend(objs) + + save_time = time.time() + try: + session.bulk_save_objects(all_objects, return_defaults=False) + session.commit() + except DatabaseError as e: + session.rollback() + self._capture_database_error("MultiplePointIDs", e) + finally: + save_time = time.time() - save_time + + processed_count = chunk_index * chunk_size + len(wells_chunk) + logger.info( + f"After hook: {processed_count}/{count} took {time.time() - step_start_time:.2f}s, " + f"n_objects={len(all_objects)}, save_time={save_time}" ) - objs.extend(data_provenances) - - for row_field, kw in ( - ( - "CompletionSource", - dict( - field_name="well_completion_date", - origin_type=lexicon_mapper.map_value( - f"LU_Depth_CompletionSource:{row.CompletionSource}" - ), - ), - ), - ( - "DataSource", - dict( - field_name="well_construction_method", - origin_source=row.DataSource, - ), - ), - ( - "DepthSource", - dict( - field_name="well_depth", - origin_type=lexicon_mapper.map_value( - f"LU_Depth_CompletionSource:{row.DepthSource}" - ), - ), - ), - ): + return processed_count + + for well in query.all(): + chunk.append(well) + if len(chunk) == chunk_size: + processed = _process_chunk(processed // chunk_size, chunk) + chunk = [] + + if chunk: + _process_chunk(processed // chunk_size, chunk) + + def _after_hook_chunk(self, well, formations): + + row = self._row_by_pointid.get(well.name) + if row is None: + return [] + + objs = [] + self._add_formation_zone(row, well, formations) + + if notna(row.Notes): + note = well.add_note(row.Notes, "General") + objs.append(note) + if row.ConstructionNotes: + note = well.add_note(row.ConstructionNotes, "Construction") + objs.append(note) + if row.WaterNotes: + note = well.add_note(row.WaterNotes, "Water") + objs.append(note) + + location = well.current_location + elevation_method, location_notes = self._added_locations[row.PointID] + for note_type, note_content in location_notes.items(): + if notna(note_content): + location_note = location.add_note(note_content, note_type) + objs.append(location_note) + if self.verbose: + logger.info( + f"Added note of type {note_type} for current location of well {well.name}" + ) - if notna(row[row_field]): - dp = DataProvenance(target_id=well.id, target_table="thing", **kw) - objs.append(dp) + data_provenances = make_location_data_provenance( + row, location, elevation_method + ) + objs.extend(data_provenances) - start_time = time.time() - mphs = measuring_point_estimator.estimate_measuring_point_height(row) + cs = ( + "CompletionSource", + { + "field_name": "well_completion_date", + "origin_type": f"LU_Depth_CompletionSource:{row.CompletionSource}", + }, + ) + ds = ( + "DataSource", + { + "field_name": "well_construction_method", + "origin_source": row.DataSource, + }, + ) + des = ( + "DepthSource", + { + "field_name": "well_depth", + "origin_type": f"LU_Depth_CompletionSource:{row.DepthSource}", + }, + ) + + for row_field, kw in (cs, ds, des): + if notna(row[row_field]): + if "origin_type" in kw: + ot = self._get_lexicon_value(row, kw["origin_type"]) + if ot is None: + continue + + kw["origin_type"] = ot + + dp = DataProvenance(target_id=well.id, target_table="thing", **kw) + objs.append(dp) + + start_time = time.time() + mphs = self._measuring_point_estimator.estimate_measuring_point_height(row) + if self.verbose: logger.info( f"Estimated measuring point heights for {well.name}: {time.time() - start_time:.2f}s" ) - for mph, mph_desc, start_date, end_date in mphs: - measuring_point_history = MeasuringPointHistory( - thing_id=well.id, - measuring_point_height=mph, - measuring_point_description=mph_desc, - # start_date=datetime.now(tz=UTC), - start_date=start_date, - end_date=end_date, - ) - objs.append(measuring_point_history) - - """ - Developer's notes - - For all status_history records the start_date will be now since that - isn't recorded in NM_Aquifer - """ - # TODO: if row.MonitoringStatus == "Q" is it monitored or not? <-- AMMP review - # TODO: if row.MonitoringStatus == "X" can that change? <-- AMMP review - # TODO: have AMMP review and verify the various MonitoringStatus codes - - target_id = well.id - target_table = "thing" - if notna(row.MonitoringStatus): - if ( - "X" in row.MonitoringStatus - or "I" in row.MonitoringStatus - or "C" in row.MonitoringStatus - ): - status_value = "Not currently monitored" - else: - status_value = "Currently monitored" - - status_history = StatusHistory( - status_type="Monitoring Status", - status_value=status_value, - reason=row.MonitorStatusReason, - start_date=datetime.now(tz=UTC), - target_id=target_id, - target_table=target_table, - ) - objs.append(status_history) + for mph, mph_desc, start_date, end_date in zip(*mphs): + measuring_point_history = MeasuringPointHistory( + thing_id=well.id, + measuring_point_height=mph, + measuring_point_description=mph_desc, + start_date=start_date, + end_date=end_date, + ) + objs.append(measuring_point_history) + + """ + Developer's notes + + For all status_history records the start_date will be now since that + isn't recorded in NM_Aquifer + """ + # TODO: if row.MonitoringStatus == "Q" is it monitored or not? <-- AMMP review + # TODO: if row.MonitoringStatus == "X" can that change? <-- AMMP review + # TODO: have AMMP review and verify the various MonitoringStatus codes + + target_id = well.id + target_table = "thing" + if notna(row.MonitoringStatus): + if ( + "X" in row.MonitoringStatus + or "I" in row.MonitoringStatus + or "C" in row.MonitoringStatus + ): + status_value = "Not currently monitored" + else: + status_value = "Currently monitored" + + status_history = StatusHistory( + status_type="Monitoring Status", + status_value=status_value, + reason=row.MonitorStatusReason, + start_date=datetime.now(tz=UTC), + target_id=target_id, + target_table=target_table, + ) + objs.append(status_history) + if self.verbose: logger.info( f" Added monitoring status for well {well.name}: {status_value}" ) - for code in NMA_MONITORING_FREQUENCY.keys(): - if code in row.MonitoringStatus: - monitoring_frequency = NMA_MONITORING_FREQUENCY[code] - monitoring_frequency_history = MonitoringFrequencyHistory( - thing_id=well.id, - monitoring_frequency=monitoring_frequency, - start_date=datetime.now(tz=UTC), - end_date=None, - ) + for code in NMA_MONITORING_FREQUENCY.keys(): + if code in row.MonitoringStatus: + monitoring_frequency = NMA_MONITORING_FREQUENCY[code] + monitoring_frequency_history = MonitoringFrequencyHistory( + thing_id=well.id, + monitoring_frequency=monitoring_frequency, + start_date=datetime.now(tz=UTC), + end_date=None, + ) - objs.append(monitoring_frequency_history) + objs.append(monitoring_frequency_history) + if self.verbose: logger.info( f" Adding '{monitoring_frequency}' monitoring frequency for well {well.name}" ) - if notna(row.Status): - status_value = lexicon_mapper.map_value(f"LU_Status:{row.Status}") + if notna(row.Status): + + status_value = self._get_lexicon_value(row, f"LU_Status:{row.Status}") + if status_value is not None: status_history = StatusHistory( status_type="Well Status", status_value=status_value, @@ -715,17 +829,11 @@ def _after_hook(self, session): target_table=target_table, ) objs.append(status_history) - logger.info(f" Added well status for well {well.name}: {status_value}") - try: - session.bulk_save_objects(objs) - except DatabaseError as e: - session.rollback() - error_dict = e.orig.args[0] - self._capture_error(well.name, error_dict["D"], error_dict["t"]) - - logger.info( - f"After hook: {well.name} {i+1}/{count} took {time.time() - step_start_time:.2f}s" - ) + if self.verbose: + logger.info( + f" Added well status for well {well.name}: {status_value}" + ) + return objs class WellChunkTransferer(ChunkTransferer):