From 638c8bd254a5e52cc23f6ea8c5be865274002e18 Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Sat, 22 Nov 2025 18:24:02 -0700
Subject: [PATCH 01/66] refactor: pass metrics object to transfer functions for
 improved data handling

---
 transfers/transfer.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/transfers/transfer.py b/transfers/transfer.py
index 77275ed35..f38bc4220 100644
--- a/transfers/transfer.py
+++ b/transfers/transfer.py
@@ -50,13 +50,12 @@ def message(msg, pad=10, new_line_at_top=True):
 
 
 @timeit
-def transfer_all(sess, limit=100):
+def transfer_all(sess, metrics, limit=100):
     message("STARTING TRANSFER", new_line_at_top=False)
 
     logger.info("Erase and rebuilding database")
     erase_and_rebuild_db()
 
-    metrics = Metrics()
     message("TRANSFERRING WELLS")
 
     flags = {
@@ -125,14 +124,13 @@ def transfer_all(sess, limit=100):
     timeit_direct(transfer_assets, sess)
 
 
-def transfer_debugging(sess, limit=100):
+def transfer_debugging(sess, metrics, limit=100):
     message("STARTING TRANSFER DEBUG", new_line_at_top=False)
 
     if int(os.environ.get("ERASE_AND_REBUILD", 0)):
         logger.info("Erase and rebuilding database")
         erase_and_rebuild_db()
 
-    metrics = Metrics()
     message("TRANSFERRING WELLS")
 
     flags = {"TRANSFER_ALL_WELLS": True}
@@ -205,12 +203,15 @@ def transfer_debugging(sess, limit=100):
 def main():
     message("START--------------------------------------")
     limit = int(os.environ.get("TRANSFER_LIMIT", 1000))
+    metrics = Metrics()
     with session_ctx() as sess:
         if int(os.environ.get("TRANSFER_DEBUG", 0)):
-            transfer_debugging(sess, limit=limit)
+            transfer_debugging(sess, metrics, limit=limit)
         else:
-            transfer_all(sess, limit=limit)
+            transfer_all(sess, metrics, limit=limit)
 
+    metrics.close()
+    metrics.save_to_storage_bucket()
     # todo: move the log file to a storage bucket
     save_log_to_bucket()
     message("END--------------------------------------")

From 1c0936759210310276afc4a245c941487ac0b353 Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Tue, 25 Nov 2025 15:55:22 -0700
Subject: [PATCH 02/66] refactor: enhance sensor transfer process with
 recording interval estimation and chunked transfers

---
 transfers/sensor_transfer.py |  60 ++-
 transfers/transfer.py        |   6 +-
 transfers/util.py            |  64 ++-
 transfers/well_transfer.py   | 731 +++++++++++++++++++++++++++--------
 4 files changed, 669 insertions(+), 192 deletions(-)

diff --git a/transfers/sensor_transfer.py b/transfers/sensor_transfer.py
index f6ff49dcb..90e7273f1 100644
--- a/transfers/sensor_transfer.py
+++ b/transfers/sensor_transfer.py
@@ -18,7 +18,13 @@
 from sqlalchemy import select
 
 from db import Sensor, Deployment, Thing
-from transfers.util import read_csv, logger, filter_to_valid_point_ids, replace_nans
+from transfers.util import (
+    read_csv,
+    logger,
+    filter_to_valid_point_ids,
+    replace_nans,
+    RecordingIntervalEstimator,
+)
 
 EQUIPMENT_TO_SENSOR_TYPE_MAP = {
     "Pressure transducer": "Pressure Transducer",
@@ -37,6 +43,7 @@ def transfer_sensors(session):
     errors = []
     grouped_equipment = cleaned_df.groupby(["PointID"])
     added = {}
+    estimators = {}
     for index, group in grouped_equipment:
         pointid = index[0]
         thing = session.query(Thing).filter(Thing.name == pointid).first()
@@ -127,23 +134,43 @@ def transfer_sensors(session):
                         row.DateRemoved, "%Y-%m-%d %H:%M:%S.%f"
                     ).date()
 
+                recording_interval_unit = "hour"
                 try:
                     recording_interval = int(row.RecordingInterval)
                 except (ValueError, TypeError):
-                    logger.critical(
-                        f"name={sensor.name}, serial_no={sensor.serial_no} RecordingInterval is not an "
-                        f"integer. Setting to None"
-                    )
-                    recording_interval = None
-                    errors.append(
-                        {
-                            "pointid": pointid,
-                            "error": f"row.ID={row.ID}, row.SerialNo={row.SerialNo}. RecordingInterval is "
-                            f"not an integer",
-                            "table": source_table,
-                            "field": "RecordingInterval",
-                        }
+
+                    # try to calculate recording interval from measurements
+                    if sensor_type in estimators:
+                        estimator = estimators[sensor_type]
+                    else:
+                        estimator = RecordingIntervalEstimator(sensor_type)
+                        estimators[sensor_type] = estimator
+
+                    recording_interval, unit = estimator.estimate_recording_interval(
+                        row, installation_date, removal_date
                     )
+
+                    if recording_interval:
+                        recording_interval_unit = unit
+                        logger.info(
+                            f"name={sensor.name}, serial_no={sensor.serial_no}. "
+                            f"estimated recording interval: {recording_interval} "
+                        )
+                    else:
+
+                        logger.critical(
+                            f"name={sensor.name}, serial_no={sensor.serial_no} RecordingInterval is not an integer"
+                        )
+
+                        errors.append(
+                            {
+                                "pointid": pointid,
+                                "error": f"row.ID={row.ID}, row.SerialNo={row.SerialNo}. RecordingInterval is "
+                                f"not an integer",
+                                "table": source_table,
+                                "field": "RecordingInterval",
+                            }
+                        )
                 sql = (
                     select(Deployment)
                     .join(Thing)
@@ -166,7 +193,7 @@ def transfer_sensors(session):
                     installation_date=installation_date,
                     removal_date=removal_date,
                     recording_interval=recording_interval,
-                    recording_interval_units="hour",
+                    recording_interval_units=recording_interval_unit,
                     hanging_cable_length=row.HangingCableLength,
                     hanging_point_height=row.HangingPointHgt,
                     hanging_point_description=row.HangingPointDescription,
@@ -189,6 +216,9 @@ def transfer_sensors(session):
                     sensor.sensor_status = "Retired"
             session.commit()
         except Exception as e:
+            import traceback
+
+            traceback.print_exc()
             logger.critical(f"Could not add sensor and deployment: {e}")
             errors.append({"pointid": pointid, "error": e, "table": source_table})
 
diff --git a/transfers/transfer.py b/transfers/transfer.py
index f38bc4220..2098a85ea 100644
--- a/transfers/transfer.py
+++ b/transfers/transfer.py
@@ -133,13 +133,13 @@ def transfer_debugging(sess, metrics, limit=100):
 
     message("TRANSFERRING WELLS")
 
-    flags = {"TRANSFER_ALL_WELLS": True}
+    flags = {"TRANSFER_ALL_WELLS": True, "LIMIT": limit}
 
-    results = timeit_direct(transfer_wells, sess, flags=flags, limit=limit)
+    results = timeit_direct(transfer_wells, flags=flags)
     metrics.well_metrics(sess, *results)
 
     message("TRANSFERRING WELL SCREENS")
-    results = timeit_direct(transfer_wellscreens, sess)
+    results = timeit_direct(transfer_wellscreens, flags=flags)
     metrics.well_screen_metrics(sess, *results)
 
     message("TRANSFERRING SENSORS")
diff --git a/transfers/util.py b/transfers/util.py
index cbf0f2b17..31ad32e0a 100644
--- a/transfers/util.py
+++ b/transfers/util.py
@@ -53,6 +53,59 @@
 }
 
 
+class RecordingIntervalEstimator:
+    def __init__(self, sensor_type: str):
+        if sensor_type == "Pressure Transducer":
+            self._df = read_csv("WaterLevelsContinuous_Pressure")
+        else:
+            self._df = read_csv("WaterLevelsContinuous_Acoustic")
+
+        # convert "DateMeasured" to date"
+        self._df["DateMeasured"] = pd.to_datetime(self._df["DateMeasured"]).dt.date
+
+    def estimate_recording_interval(
+        self,
+        record: pd.Series,
+        installation_date: datetime = None,
+        removal_date: datetime = None,
+    ):
+        point_id = record.PointID
+
+        cdf = self._df[self._df["PointID"] == point_id]
+        if len(cdf) == 0:
+            return None, None
+
+        cdf = cdf.sort_values("DateMeasured")
+        if installation_date is not None:
+            cdf = cdf[cdf["DateMeasured"] >= installation_date]
+        if removal_date is not None:
+            cdf = cdf[cdf["DateMeasured"] <= removal_date]
+
+        # calculate the average interval in seconds
+        try:
+            date_series = pd.to_datetime(cdf["DateMeasured"])
+            intervals = date_series.diff().dropna().dt.total_seconds()
+            if len(intervals) == 0:
+                avg_interval = None
+            else:
+                avg_interval = intervals.mean()
+        except IndexError:
+            return None, None
+
+        # convert to hours
+        avg_interval /= 3600
+
+        unit = "hour"
+        if avg_interval < 1:
+            avg_interval *= 60
+            unit = "minute"
+            if avg_interval < 1:
+                avg_interval *= 60
+                unit = "second"
+
+        return int(avg_interval), unit
+
+
 def replace_nans(df: pd.DataFrame, default=None) -> pd.DataFrame:
     df = df.replace(pd.NA, default)
     return df.replace({np.nan: default})
@@ -127,11 +180,12 @@ def filter_by_welldata_datasource_and_project(df: pd.DataFrame) -> pd.DataFrame:
         reader = csv.reader(f)
         _ = next(reader)
         valid_datasources = [row[0] for row in reader if row[1] == "Yes"]
-        f.seek(0)
-        invalid_datasources = [row[0] for row in reader if row[1] == "NO"]
-        logger.info("Invalid WellData Datasources:")
-        for vd in invalid_datasources:
-            logger.info(f"  {vd}")
+
+        # f.seek(0)
+        # invalid_datasources = [row[0] for row in reader if row[1] == "NO"]
+        # logger.info("Invalid WellData Datasources:")
+        # for vd in invalid_datasources:
+        #     logger.info(f"  {vd}")
 
     counts = df.groupby("DataSource").size().reset_index(name="WellCount")
     counts = counts.sort_values("WellCount", ascending=False)
diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py
index ee54d0216..bb393c17f 100644
--- a/transfers/well_transfer.py
+++ b/transfers/well_transfer.py
@@ -37,6 +37,7 @@
     MonitoringFrequencyHistory,
     MeasuringPointHistory,
 )
+from db.engine import session_ctx
 from schemas.thing import CreateWell, CreateWellScreen
 from services.gcs_helper import get_storage_bucket
 from services.util import (
@@ -166,60 +167,98 @@ def dump_cached_elevations(lut: dict):
     blob.upload_from_string(json.dumps(lut))
 
 
-def transfer_wells(session: Session, flags: dict = None, limit: int = 0) -> None:
-    input_df, cleaned_df = get_wells_to_transfer(session, flags)
+class Transferer(object):
+    input_df: pd.DataFrame = None
+    cleaned_df: pd.DataFrame = None
+    errors: list = None
+    flags: dict = None
+
+    def __init__(self, flags: dict = None):
+        self.errors = []
+        self.flags = flags if flags else {}
+
+    def transfer(self):
+        with session_ctx() as session:
+            self.input_df, self.cleaned_df = self._get_dfs(session)
+            self._limit_iterator(session, self.flags.get("LIMIT", 0))
+
+    def _get_df_to_iterate(self) -> pd.DataFrame:
+        return self.cleaned_df
+
+    def _limit_iterator(self, session: Session, limit: int, step: int = 25):
+        df = self._get_df_to_iterate()
+        n = len(df)
+        start_time = time.time()
+        for i, row in enumerate(df.itertuples()):
+            if limit and i >= limit:
+                logger.info(f"Reached limit of {limit} rows. Stopping migration.")
+                break
+
+            if i and not i % step:
+                logger.info(
+                    f"Processing row {i} of {n},  avg rows per second: {step / (time.time() - start_time):.2f}"
+                )
+                start_time = time.time()
+                try:
+                    session.commit()
+                except Exception as e:
+                    logger.critical(f"Error committing wells. {e}")
+                    session.rollback()
+                    continue
+
+            self._iterator(session, df, i, row)
+
+        session.commit()
+        self._after_hook(session)
+
+    def _iterator(self, session: Session, df: pd.DataFrame, i: int, row: dict):
+        raise NotImplementedError("Must implement _iterator method")
+
+    def _after_hook(self, session: Session):
+        pass
+
+    def _get_dfs(self, session: Session):
+        raise NotImplementedError("Must implement _get_dfs method")
+
+
+class WellTransferer(Transferer):
     source_table = "WellData"
-    wdf = cleaned_df
-    n = len(wdf)
-
-    step = 25
-    start_time = time.time()
-    errors = []
-    added_locations = {}
-    cached_elevations = get_cached_elevations()
-    for i, row in enumerate(wdf.itertuples()):
+
+    def __init__(self, *args, **kw):
+        super().__init__(*args, **kw)
+        self._cached_elevations = get_cached_elevations()
+        self._added_locations = {}
+
+    def _get_dfs(self, session: Session):
+        return get_wells_to_transfer(session, self.flags)
+
+    def _iterator(self, session, df, i, row):
         pointid = row.PointID
-        if wdf[wdf["PointID"] == pointid].shape[0] > 1:
+        if df[df["PointID"] == pointid].shape[0] > 1:
             logger.critical(
                 f"transfer_wells. PointID {pointid} has duplicate records. Skipping."
             )
-            errors.append(
+            self.errors.append(
                 {
                     "pointid": pointid,
                     "error": "duplicate records",
-                    "table": source_table,
+                    "table": self.source_table,
                     "field": "PointID",
                 }
             )
-            continue
-
-        if limit and i >= limit:
-            logger.info(f"Reached limit of {limit} rows. Stopping migration.")
-            break
-
-        if i and not i % step:
-            logger.info(
-                f"Processing row {i} of {n},  avg rows per second: {step / (time.time() - start_time):.2f}"
-            )
-            start_time = time.time()
-            try:
-                session.commit()
-            except Exception as e:
-                logger.critical(f"Error committing wells. {e}")
-                session.rollback()
-                continue
+            return
 
         location = None
         try:
-            location, elevation_method = make_location(row, cached_elevations)
+            location, elevation_method = make_location(row, self._cached_elevations)
             session.add(location)
-            added_locations[row.PointID] = elevation_method
+            self._added_locations[row.PointID] = elevation_method
         except Exception as e:
             if location is not None:
                 session.expunge(location)
             # these rollbacks are cause an issue because they are discarding good data
             # session.rollback()
-            errors.append(
+            self.errors.append(
                 {
                     "pointid": row.PointID,
                     "error": e,
@@ -228,7 +267,7 @@ def transfer_wells(session: Session, flags: dict = None, limit: int = 0) -> None
                 }
             )
             logger.critical(f"Error making location for {row.PointID}: {e}")
-            continue
+            return
 
         try:
             first_visit_date = _get_first_visit_date(row)
@@ -261,11 +300,13 @@ def transfer_wells(session: Session, flags: dict = None, limit: int = 0) -> None
 
             CreateWell.model_validate(data)
         except ValidationError as e:
-            errors.append({"pointid": row.PointID, "error": e, "table": "WellData"})
+            self.errors.append(
+                {"pointid": row.PointID, "error": e, "table": "WellData"}
+            )
             logger.critical(
                 f"Validation error for row {i} with PointID {row.PointID}: {e.errors()}"
             )
-            continue
+            return
 
         well = None
         try:
@@ -320,9 +361,11 @@ def transfer_wells(session: Session, flags: dict = None, limit: int = 0) -> None
             if well is not None:
                 session.expunge(well)
 
-            errors.append({"pointid": row.PointID, "error": e, "table": "WellData"})
+            self.errors.append(
+                {"pointid": row.PointID, "error": e, "table": "WellData"}
+            )
             logger.critical(f"Error creating well for {row.PointID}: {e}")
-            continue
+            return
 
         assoc = LocationThingAssociation(effective_start=location.created_at)
 
@@ -330,155 +373,194 @@ def transfer_wells(session: Session, flags: dict = None, limit: int = 0) -> None
         assoc.thing = well
         session.add(assoc)
 
-    session.commit()
+    def _after_hook(self, session):
+        dump_cached_elevations(self._cached_elevations)
 
-    # add things thate need well id
-    for well in session.query(Thing).filter(Thing.thing_type == "water well").all():
-        row = wdf[wdf["PointID"] == well.name].iloc[0]
-        if not isna(row.Notes):
-            note = well.add_note(row.Notes, "Other")
-            session.add(note)
+        # add things thate need well id
+        for well in session.query(Thing).filter(Thing.thing_type == "water well").all():
+            row = self.cleaned_df[self.cleaned_df["PointID"] == well.name].iloc[0]
+            if not isna(row.Notes):
+                note = well.add_note(row.Notes, "Other")
+                session.add(note)
 
-        location = well.current_location
-        elevation_method = added_locations[row.PointID]
-        data_provenances = make_location_data_provenance(
-            row, location, elevation_method
-        )
-        for dp in data_provenances:
-            session.add(dp)
-
-        """
-            Developer's note
-
-            It's not clear when the measuring point from NM_Aquifer was 
-            determined, so I'm setting start_date to the day of the transfer
-        """
-        measuring_point_history = MeasuringPointHistory(
-            thing_id=well.id,
-            measuring_point_height=row.MPHeight,
-            measuring_point_description=row.MeasuringPoint,
-            start_date=datetime.now(tz=UTC),
-            end_date=None,
-        )
-        session.add(measuring_point_history)
-
-        """
-        Developer's notes
-
-        For all status_history records the start_date will be now since that
-        isn't recorded in NM_Aquifer
-        """
-        # TODO: if row.MonitoringStatus == "Q" is it monitored or not? <-- AMMP review
-        # TODO: if row.MonitoringStatus == "X" can that change? <-- AMMP review
-        # TODO: have AMMP review and verify the various MonitoringStatus codes
-
-        target_id = well.id
-        target_table = "thing"
-        if not isna(row.MonitoringStatus):
-            if (
-                "X" in row.MonitoringStatus
-                or "I" in row.MonitoringStatus
-                or "C" in row.MonitoringStatus
-            ):
-                status_value = "Not currently monitored"
-            else:
-                status_value = "Currently monitored"
-
-            status_history = StatusHistory(
-                status_type="Monitoring Status",
-                status_value=status_value,
-                reason=row.MonitorStatusReason,
-                start_date=datetime.now(tz=UTC),
-                target_id=target_id,
-                target_table=target_table,
+            location = well.current_location
+            elevation_method = self._added_locations[row.PointID]
+            data_provenances = make_location_data_provenance(
+                row, location, elevation_method
             )
-            session.add(status_history)
-            logger.info(
-                f"  Added monitoring status for well {well.name}: {status_value}"
-            )
-
-            for code in NMA_MONITORING_FREQUENCY.keys():
-                if code in row.MonitoringStatus:
-                    monitoring_frequency = NMA_MONITORING_FREQUENCY[code]
-                    monitoring_frequency_history = MonitoringFrequencyHistory(
-                        thing_id=well.id,
-                        monitoring_frequency=monitoring_frequency,
-                        start_date=datetime.now(tz=UTC),
-                        end_date=None,
-                    )
-                    session.add(monitoring_frequency_history)
-                    logger.info(
-                        f"  Adding '{monitoring_frequency}' monitoring frequency for well {well.name}"
-                    )
-
-        if not isna(row.Status):
-            status_value = lexicon_mapper.map_value(f"LU_Status:{row.Status}")
-            status_history = StatusHistory(
-                status_type="Well Status",
-                status_value=status_value,
-                reason=row.StatusUserNotes,
+            for dp in data_provenances:
+                session.add(dp)
+
+            """
+                Developer's note
+    
+                It's not clear when the measuring point from NM_Aquifer was 
+                determined, so I'm setting start_date to the day of the transfer
+            """
+            measuring_point_history = MeasuringPointHistory(
+                thing_id=well.id,
+                measuring_point_height=row.MPHeight,
+                measuring_point_description=row.MeasuringPoint,
                 start_date=datetime.now(tz=UTC),
-                target_id=target_id,
-                target_table=target_table,
+                end_date=None,
             )
-            session.add(status_history)
-            logger.info(f"  Added well status for well {well.name}: {status_value}")
+            session.add(measuring_point_history)
+
+            """
+            Developer's notes
+    
+            For all status_history records the start_date will be now since that
+            isn't recorded in NM_Aquifer
+            """
+            # TODO: if row.MonitoringStatus == "Q" is it monitored or not? <-- AMMP review
+            # TODO: if row.MonitoringStatus == "X" can that change? <-- AMMP review
+            # TODO: have AMMP review and verify the various MonitoringStatus codes
+
+            target_id = well.id
+            target_table = "thing"
+            if not isna(row.MonitoringStatus):
+                if (
+                    "X" in row.MonitoringStatus
+                    or "I" in row.MonitoringStatus
+                    or "C" in row.MonitoringStatus
+                ):
+                    status_value = "Not currently monitored"
+                else:
+                    status_value = "Currently monitored"
+
+                status_history = StatusHistory(
+                    status_type="Monitoring Status",
+                    status_value=status_value,
+                    reason=row.MonitorStatusReason,
+                    start_date=datetime.now(tz=UTC),
+                    target_id=target_id,
+                    target_table=target_table,
+                )
+                session.add(status_history)
+                logger.info(
+                    f"  Added monitoring status for well {well.name}: {status_value}"
+                )
+
+                for code in NMA_MONITORING_FREQUENCY.keys():
+                    if code in row.MonitoringStatus:
+                        monitoring_frequency = NMA_MONITORING_FREQUENCY[code]
+                        monitoring_frequency_history = MonitoringFrequencyHistory(
+                            thing_id=well.id,
+                            monitoring_frequency=monitoring_frequency,
+                            start_date=datetime.now(tz=UTC),
+                            end_date=None,
+                        )
+                        session.add(monitoring_frequency_history)
+                        logger.info(
+                            f"  Adding '{monitoring_frequency}' monitoring frequency for well {well.name}"
+                        )
+
+            if not isna(row.Status):
+                status_value = lexicon_mapper.map_value(f"LU_Status:{row.Status}")
+                status_history = StatusHistory(
+                    status_type="Well Status",
+                    status_value=status_value,
+                    reason=row.StatusUserNotes,
+                    start_date=datetime.now(tz=UTC),
+                    target_id=target_id,
+                    target_table=target_table,
+                )
+                session.add(status_history)
+                logger.info(f"  Added well status for well {well.name}: {status_value}")
 
-    session.commit()
+        session.commit()
+
+
+class ChunkTransferer(Transferer):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.chunk_size = 1000
+
+    def chunk_transfer(self):
+        with session_ctx() as session:
+            self.input_df, self.cleaned_df = self._get_dfs(session)
+            df = self._get_df_to_iterate()
+            for ci, chunk in enumerate(chunk_by_size(df, self.chunk_size)):
+                dbchunk = self._get_df_chunk(session, chunk)
+                logger.info(
+                    f"Processing chunk {ci}, {len(chunk)} rows, {len(dbchunk)} db items"
+                )
+                for i, row in enumerate(chunk.itertuples()):
+                    dbitem = self._get_db_item(dbchunk, row)
+                    if not dbitem:
+                        self._missing_db_item_warning(row)
+                        continue
+                    self._chunk_iterator(session, df, i, row, dbitem)
+            session.commit()
 
-    dump_cached_elevations(cached_elevations)
-    return input_df, cleaned_df, errors
+    def _get_df_chunk(self, session, chunk):
+        raise NotImplementedError("Must be implemented in subclass")
 
+    def _missing_db_item_warning(self, row):
+        raise NotImplementedError("Must be implemented in subclass")
 
-def transfer_wellscreens(session, limit=None):
+    def _chunk_iterator(self, session, df, i, row, dbitem):
+        raise NotImplementedError("Must be implemented in subclass")
 
-    input_df = read_csv("WellScreens")
-    wdf = replace_nans(input_df)
+    def _get_db_item(self, chunk, row):
+        raise NotImplementedError("Must be implemented in subclass")
 
-    cleaned_df = filter_to_valid_point_ids(session, wdf)
 
-    errors = []
-    for ci, chunk in enumerate(chunk_by_size(cleaned_df, 1000)):
+class WellScreenTransferer(ChunkTransferer):
+    def _get_dfs(self, session: Session):
+        input_df = read_csv("WellScreens")
+        wdf = replace_nans(input_df)
+        cleaned_df = filter_to_valid_point_ids(session, wdf)
+        return input_df, cleaned_df
+
+    def _get_df_chunk(self, session, chunk):
         things = (
             session.query(Thing).filter(Thing.name.in_(chunk.PointID.tolist())).all()
         )
+        return things
+
+    def _get_db_item(self, dbchunk, row):
+        return next((thing for thing in dbchunk if thing.name == row.PointID), None)
+
+    def _missing_db_item_warning(self, row):
+        logger.warning(f"Thing with PointID {row.PointID} not found in database.")
+
+    def _chunk_iterator(self, session, df, i, row, db_item):
+        well_screen_data = {
+            "thing_id": db_item.id,
+            "screen_depth_top": row.ScreenTop,
+            "screen_depth_bottom": row.ScreenBottom,
+            # "screen_type": row.ScreenType,
+            "screen_description": row.ScreenDescription,
+            "release_status": "draft",
+            "nma_pk_wellscreens": row.GlobalID,
+        }
+        try:
+            # TODO: add validation logic here to ensure no overlapping screens for the same well
+            CreateWellScreen.model_validate(well_screen_data)
+        except ValidationError as e:
+            logger.critical(
+                f"Validation error for row {i} with PointID {row.PointID}: {e.errors()}"
+            )
+            self.errors.append(
+                {"pointid": row.PointID, "error": e, "table": "WellScreens"}
+            )
+            return
 
-        logger.info(f"Processing chunk {ci}, {len(chunk)} rows, {len(things)} things")
-        for i, row in enumerate(chunk.itertuples()):
-            thing = next((thing for thing in things if thing.name == row.PointID), None)
-            if not thing:
-                logger.warning(
-                    f"Thing with PointID {row.PointID} not found. Skipping well screen."
-                )
-                continue
-
-            well_screen_data = {
-                "thing_id": thing.id,
-                "screen_depth_top": row.ScreenTop,
-                "screen_depth_bottom": row.ScreenBottom,
-                # "screen_type": row.ScreenType,
-                "screen_description": row.ScreenDescription,
-                "release_status": "draft",
-                "nma_pk_wellscreens": row.GlobalID,
-            }
-            try:
-                # TODO: add validation logic here to ensure no overlapping screens for the same well
-                CreateWellScreen.model_validate(well_screen_data)
-            except ValidationError as e:
-                logger.critical(
-                    f"Validation error for row {i} with PointID {row.PointID}: {e.errors()}"
-                )
-                errors.append(
-                    {"pointid": row.PointID, "error": e, "table": "WellScreens"}
-                )
-                continue
+        well_screen = WellScreen(**well_screen_data)
+        session.add(well_screen)
 
-            well_screen = WellScreen(**well_screen_data)
-            session.add(well_screen)
 
-        session.commit()
+def transfer_wells(flags: dict = None):
+    transferer = WellTransferer(flags=flags)
+    transferer.transfer()
+    return transferer.input_df, transferer.cleaned_df, transferer.errors
 
-    return input_df, cleaned_df, errors
+
+def transfer_wellscreens(flags: dict = None):
+    transferer = WellScreenTransferer(flags=flags)
+    transferer.chunk_transfer()
+    return transferer.input_df, transferer.cleaned_df, transferer.errors
 
 
 def cleanup_locations(session):
@@ -541,3 +623,314 @@ def cleanup_locations(session):
 
 
 # ============= EOF =============================================
+# def transfer_wells_old(session: Session, flags: dict = None, limit: int = 0) -> None:
+#     # input_df, cleaned_df = get_wells_to_transfer(session, flags)
+#     # wdf = cleaned_df
+#     # n = len(wdf)
+#
+#     # step = 25
+#     # start_time = time.time()
+#     errors = []
+#     added_locations = {}
+#     # cached_elevations = get_cached_elevations()
+#     # for i, row in enumerate(wdf.itertuples()):
+#     # pointid = row.PointID
+#     # if wdf[wdf["PointID"] == pointid].shape[0] > 1:
+#     #     logger.critical(
+#     #         f"transfer_wells. PointID {pointid} has duplicate records. Skipping."
+#     #     )
+#     #     errors.append(
+#     #         {
+#     #             "pointid": pointid,
+#     #             "error": "duplicate records",
+#     #             "table": source_table,
+#     #             "field": "PointID",
+#     #         }
+#     #     )
+#     #     continue
+#
+#     # if limit and i >= limit:
+#     #     logger.info(f"Reached limit of {limit} rows. Stopping migration.")
+#     #     break
+#     #
+#     # if i and not i % step:
+#     #     logger.info(
+#     #         f"Processing row {i} of {n},  avg rows per second: {step / (time.time() - start_time):.2f}"
+#     #     )
+#     #     start_time = time.time()
+#     #     try:
+#     #         session.commit()
+#     #     except Exception as e:
+#     #         logger.critical(f"Error committing wells. {e}")
+#     #         session.rollback()
+#     #         continue
+#
+#     # location = None
+#     # try:
+#     #     location, elevation_method = make_location(row, cached_elevations)
+#     #     session.add(location)
+#     #     added_locations[row.PointID] = elevation_method
+#     # except Exception as e:
+#     #     if location is not None:
+#     #         session.expunge(location)
+#     #     # these rollbacks are cause an issue because they are discarding good data
+#     #     # session.rollback()
+#     #     errors.append(
+#     #         {
+#     #             "pointid": row.PointID,
+#     #             "error": e,
+#     #             "table": "Location",
+#     #             "field": str(e),
+#     #         }
+#     #     )
+#     #     logger.critical(f"Error making location for {row.PointID}: {e}")
+#     #     continue
+#     #
+#     # try:
+#     #     first_visit_date = _get_first_visit_date(row)
+#     #     well_purposes = [] if isna(row.CurrentUse) else _extract_well_purposes(row)
+#     #     well_casing_materials = (
+#     #         [] if isna(row.CasingDescription) else _extract_casing_materials(row)
+#     #     )
+#     #
+#     #     # manually add the well rather than add_well from services/thing_helper.py
+#     #     # so that effective_start can be set on the location assocation
+#     #
+#     #     data = CreateWell(
+#     #         location_id=location.id,
+#     #         name=row.PointID,
+#     #         first_visit_date=first_visit_date,
+#     #         hole_depth=row.HoleDepth,
+#     #         well_depth=row.WellDepth,
+#     #         well_construction_notes=row.ConstructionNotes,
+#     #         well_casing_diameter=(
+#     #             row.CasingDiameter * 12 if row.CasingDiameter else None
+#     #         ),
+#     #         well_casing_depth=row.CasingDepth,
+#     #         release_status="public" if row.PublicRelease else "private",
+#     #         measuring_point_height=row.MPHeight,
+#     #         measuring_point_description=row.MeasuringPoint,
+#     #         notes=(
+#     #             [{"content": row.Notes, "note_type": "Other"}] if row.Notes else []
+#     #         ),
+#     #     )
+#     #
+#     #     CreateWell.model_validate(data)
+#     # except ValidationError as e:
+#     #     errors.append({"pointid": row.PointID, "error": e, "table": "WellData"})
+#     #     logger.critical(
+#     #         f"Validation error for row {i} with PointID {row.PointID}: {e.errors()}"
+#     #     )
+#     #     continue
+#     #
+#     # well = None
+#     # try:
+#     #     well_data = data.model_dump(
+#     #         exclude=[
+#     #             "location_id",
+#     #             "group_id",
+#     #             "well_purposes",
+#     #             "well_casing_materials",
+#     #             "measuring_point_height",
+#     #             "measuring_point_description",
+#     #         ]
+#     #     )
+#     #     well_data["thing_type"] = "water well"
+#     #     well_data["nma_pk_welldata"] = row.WellID
+#     #
+#     #     well_data.pop("notes")
+#     #     well = Thing(**well_data)
+#     #     session.add(well)
+#     #     # logger.info(f"Created well for {row.PointID}")
+#     #
+#     #     # flush well to access its ID for status_history
+#     #     # session.flush()
+#     #
+#     #     # session.commit()
+#     #     # session.refresh(well)
+#     #     # if notes:
+#     #     #     for ni in notes:
+#     #     #         nn = well.add_note(ni['content'], ni['note_type'])
+#     #     #         session.add(nn)
+#     #
+#     #     if well_purposes:
+#     #         for wp in well_purposes:
+#     #             # TODO: add validation logic here
+#     #             if wp in WellPurposeEnum:
+#     #                 wp_obj = WellPurpose(thing=well, purpose=wp)
+#     #                 session.add(wp_obj)
+#     #             else:
+#     #                 logger.critical(f"{well.name}. Invalid well purpose: {wp}")
+#     #
+#     #     if well_casing_materials:
+#     #         for wcm in well_casing_materials:
+#     #             # TODO: add validation logic here
+#     #             if wcm in WellCasingMaterialEnum:
+#     #                 wcm_obj = WellCasingMaterial(thing=well, material=wcm)
+#     #                 session.add(wcm_obj)
+#     #             else:
+#     #                 logger.critical(
+#     #                     f"{well.name}. Invalid well casing material: {wcm}"
+#     #                 )
+#     # except Exception as e:
+#     #     if well is not None:
+#     #         session.expunge(well)
+#     #
+#     #     errors.append({"pointid": row.PointID, "error": e, "table": "WellData"})
+#     #     logger.critical(f"Error creating well for {row.PointID}: {e}")
+#     #     continue
+#     #
+#     # assoc = LocationThingAssociation(effective_start=location.created_at)
+#     #
+#     # assoc.location = location
+#     # assoc.thing = well
+#     # session.add(assoc)
+#
+#     # session.commit()
+#
+#     # # add things thate need well id
+#     # for well in session.query(Thing).filter(Thing.thing_type == "water well").all():
+#     #     row = wdf[wdf["PointID"] == well.name].iloc[0]
+#     #     if not isna(row.Notes):
+#     #         note = well.add_note(row.Notes, "Other")
+#     #         session.add(note)
+#     #
+#     #     location = well.current_location
+#     #     elevation_method = added_locations[row.PointID]
+#     #     data_provenances = make_location_data_provenance(
+#     #         row, location, elevation_method
+#     #     )
+#     #     for dp in data_provenances:
+#     #         session.add(dp)
+#     #
+#     #     """
+#     #         Developer's note
+#     #
+#     #         It's not clear when the measuring point from NM_Aquifer was
+#     #         determined, so I'm setting start_date to the day of the transfer
+#     #     """
+#     #     measuring_point_history = MeasuringPointHistory(
+#     #         thing_id=well.id,
+#     #         measuring_point_height=row.MPHeight,
+#     #         measuring_point_description=row.MeasuringPoint,
+#     #         start_date=datetime.now(tz=UTC),
+#     #         end_date=None,
+#     #     )
+#     #     session.add(measuring_point_history)
+#     #
+#     #     """
+#     #     Developer's notes
+#     #
+#     #     For all status_history records the start_date will be now since that
+#     #     isn't recorded in NM_Aquifer
+#     #     """
+#     #     # TODO: if row.MonitoringStatus == "Q" is it monitored or not? <-- AMMP review
+#     #     # TODO: if row.MonitoringStatus == "X" can that change? <-- AMMP review
+#     #     # TODO: have AMMP review and verify the various MonitoringStatus codes
+#     #
+#     #     target_id = well.id
+#     #     target_table = "thing"
+#     #     if not isna(row.MonitoringStatus):
+#     #         if (
+#     #             "X" in row.MonitoringStatus
+#     #             or "I" in row.MonitoringStatus
+#     #             or "C" in row.MonitoringStatus
+#     #         ):
+#     #             status_value = "Not currently monitored"
+#     #         else:
+#     #             status_value = "Currently monitored"
+#     #
+#     #         status_history = StatusHistory(
+#     #             status_type="Monitoring Status",
+#     #             status_value=status_value,
+#     #             reason=row.MonitorStatusReason,
+#     #             start_date=datetime.now(tz=UTC),
+#     #             target_id=target_id,
+#     #             target_table=target_table,
+#     #         )
+#     #         session.add(status_history)
+#     #         logger.info(
+#     #             f"  Added monitoring status for well {well.name}: {status_value}"
+#     #         )
+#     #
+#     #         for code in NMA_MONITORING_FREQUENCY.keys():
+#     #             if code in row.MonitoringStatus:
+#     #                 monitoring_frequency = NMA_MONITORING_FREQUENCY[code]
+#     #                 monitoring_frequency_history = MonitoringFrequencyHistory(
+#     #                     thing_id=well.id,
+#     #                     monitoring_frequency=monitoring_frequency,
+#     #                     start_date=datetime.now(tz=UTC),
+#     #                     end_date=None,
+#     #                 )
+#     #                 session.add(monitoring_frequency_history)
+#     #                 logger.info(
+#     #                     f"  Adding '{monitoring_frequency}' monitoring frequency for well {well.name}"
+#     #                 )
+#     #
+#     #     if not isna(row.Status):
+#     #         status_value = lexicon_mapper.map_value(f"LU_Status:{row.Status}")
+#     #         status_history = StatusHistory(
+#     #             status_type="Well Status",
+#     #             status_value=status_value,
+#     #             reason=row.StatusUserNotes,
+#     #             start_date=datetime.now(tz=UTC),
+#     #             target_id=target_id,
+#     #             target_table=target_table,
+#     #         )
+#     #         session.add(status_history)
+#     #         logger.info(f"  Added well status for well {well.name}: {status_value}")
+#     #
+#     # session.commit()
+#     #
+#     # dump_cached_elevations(cached_elevations)
+#     # return input_df, cleaned_df, errors
+
+# def transfer_wellscreens_old(session, limit=None):
+
+# input_df = read_csv("WellScreens")
+# wdf = replace_nans(input_df)
+#
+# cleaned_df = filter_to_valid_point_ids(session, wdf)
+
+# errors = []
+# for ci, chunk in enumerate(chunk_by_size(cleaned_df, 1000)):
+#     things = (
+#         session.query(Thing).filter(Thing.name.in_(chunk.PointID.tolist())).all()
+#     )
+#
+#     logger.info(f"Processing chunk {ci}, {len(chunk)} rows, {len(things)} things")
+#     for i, row in enumerate(chunk.itertuples()):
+#         thing = next((thing for thing in things if thing.name == row.PointID), None)
+#         if not thing:
+#             logger.warning(
+#                 f"Thing with PointID {row.PointID} not found. Skipping well screen."
+#             )
+#             continue
+#
+#         well_screen_data = {
+#             "thing_id": thing.id,
+#             "screen_depth_top": row.ScreenTop,
+#             "screen_depth_bottom": row.ScreenBottom,
+#             # "screen_type": row.ScreenType,
+#             "screen_description": row.ScreenDescription,
+#             "release_status": "draft",
+#             "nma_pk_wellscreens": row.GlobalID,
+#         }
+#         try:
+#             # TODO: add validation logic here to ensure no overlapping screens for the same well
+#             CreateWellScreen.model_validate(well_screen_data)
+#         except ValidationError as e:
+#             logger.critical(
+#                 f"Validation error for row {i} with PointID {row.PointID}: {e.errors()}"
+#             )
+#             errors.append(
+#                 {"pointid": row.PointID, "error": e, "table": "WellScreens"}
+#             )
+#             continue
+#
+#         well_screen = WellScreen(**well_screen_data)
+#         session.add(well_screen)
+#
+#     session.commit()
+#
+# return input_df, cleaned_df, errors

From cee74b4e355a2c2c8824f4c3d16b4dbc91128194 Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Wed, 26 Nov 2025 08:21:11 -0700
Subject: [PATCH 03/66] refactor: improve error handling and logging in sensor
 transfer and deployment processes

---
 transfers/sensor_transfer.py                 | 17 ++--
 transfers/transfer.py                        | 32 ++-----
 transfers/util.py                            | 92 ++++++++++++++++++--
 transfers/waterlevels_transducer_transfer.py | 85 +++++++++++++-----
 transfers/well_transfer.py                   | 35 ++++----
 5 files changed, 184 insertions(+), 77 deletions(-)

diff --git a/transfers/sensor_transfer.py b/transfers/sensor_transfer.py
index 90e7273f1..0f0b98074 100644
--- a/transfers/sensor_transfer.py
+++ b/transfers/sensor_transfer.py
@@ -138,7 +138,7 @@ def transfer_sensors(session):
                 try:
                     recording_interval = int(row.RecordingInterval)
                 except (ValueError, TypeError):
-
+                    error = "RecordingInterval is not an integer"
                     # try to calculate recording interval from measurements
                     if sensor_type in estimators:
                         estimator = estimators[sensor_type]
@@ -146,27 +146,26 @@ def transfer_sensors(session):
                         estimator = RecordingIntervalEstimator(sensor_type)
                         estimators[sensor_type] = estimator
 
-                    recording_interval, unit = estimator.estimate_recording_interval(
-                        row, installation_date, removal_date
+                    recording_interval, unit, error = (
+                        estimator.estimate_recording_interval(
+                            row, installation_date, removal_date
+                        )
                     )
 
                     if recording_interval:
                         recording_interval_unit = unit
                         logger.info(
                             f"name={sensor.name}, serial_no={sensor.serial_no}. "
-                            f"estimated recording interval: {recording_interval} "
+                            f"estimated recording interval: {recording_interval} {unit}"
                         )
                     else:
-
                         logger.critical(
-                            f"name={sensor.name}, serial_no={sensor.serial_no} RecordingInterval is not an integer"
+                            f"name={sensor.name}, serial_no={sensor.serial_no} error={error}"
                         )
-
                         errors.append(
                             {
                                 "pointid": pointid,
-                                "error": f"row.ID={row.ID}, row.SerialNo={row.SerialNo}. RecordingInterval is "
-                                f"not an integer",
+                                "error": f"name={sensor.name}, row.SerialNo={row.SerialNo}. error={error}",
                                 "table": source_table,
                                 "field": "RecordingInterval",
                             }
diff --git a/transfers/transfer.py b/transfers/transfer.py
index 2098a85ea..5ad8f8c11 100644
--- a/transfers/transfer.py
+++ b/transfers/transfer.py
@@ -162,29 +162,19 @@ def transfer_debugging(sess, metrics, limit=100):
     message("TRANSFERRING CONTACTS")
     results = timeit_direct(transfer_contacts, sess)
     metrics.contact_metrics(sess, *results)
-    #
+
     message("TRANSFERRING WATER LEVELS")
     results = timeit_direct(transfer_water_levels, sess)
     metrics.water_level_metrics(sess, *results)
 
-    # message("TRANSFERRING WATER LEVELS PRESSURE")
-    # results = timeit_direct(transfer_water_levels_pressure, sess)
-    # metrics.pressure_metrics(sess, *results)
-
-    # message("TRANSFERRING WATER LEVELS ACOUSTIC")
-    # results = timeit_direct(transfer_water_levels_acoustic, sess)
-    # metrics.acoustic_metrics(sess, *results)
+    message("TRANSFERRING WATER LEVELS PRESSURE")
+    results = timeit_direct(transfer_water_levels_pressure, sess)
+    metrics.pressure_metrics(sess, *results)
 
-    """
-    Developer's notes
+    message("TRANSFERRING WATER LEVELS ACOUSTIC")
+    results = timeit_direct(transfer_water_levels_acoustic, sess)
+    metrics.acoustic_metrics(sess, *results)
 
-    When transfering water chemistry data use the qc_type field to indicate
-    normal/blanks/duplicates instead of what comes from LU_SampleType. Use
-    those values, however, to map to the standard qc_type fields if applicable
-    (i.e. not applicable when sample type is "Soil or rock sample" or
-    "Precipitation," but is applicable when sample type is "Equipment blank"
-    or "Field duplicate")
-    """
     # message("TRANSFERRING LINK IDS")
     # timeit_direct(transfer_link_ids, sess)
     # timeit_direct(transfer_link_ids_welldata, sess)
@@ -192,20 +182,16 @@ def transfer_debugging(sess, metrics, limit=100):
     # message("TRANSFERRING GROUPS")
     # timeit_direct(transfer_groups, sess)
 
-    # message("TRANSFERRING WATER LEVELS ACOUSTIC")
-    # timeit_direct(transfer_water_levels_acoustic, sess)
     # message("TRANSFERRING ASSETS")
     # timeit_direct(transfer_assets, sess)
-    metrics.close()
-    metrics.save_to_storage_bucket()
 
 
 def main():
     message("START--------------------------------------")
-    limit = int(os.environ.get("TRANSFER_LIMIT", 1000))
+    limit = int(os.getenv("TRANSFER_LIMIT", 1000))
     metrics = Metrics()
     with session_ctx() as sess:
-        if int(os.environ.get("TRANSFER_DEBUG", 0)):
+        if int(os.getenv("TRANSFER_DEBUG", 0)):
             transfer_debugging(sess, metrics, limit=limit)
         else:
             transfer_all(sess, metrics, limit=limit)
diff --git a/transfers/util.py b/transfers/util.py
index 31ad32e0a..a74a6a9d0 100644
--- a/transfers/util.py
+++ b/transfers/util.py
@@ -15,9 +15,10 @@
 # ===============================================================================
 import csv
 import io
+import math
 import os
 import re
-from datetime import datetime, timezone, timedelta
+from datetime import datetime, timezone, timedelta, UTC
 from pathlib import Path
 
 import numpy as np
@@ -53,6 +54,58 @@
 }
 
 
+class MeasuringPointEstimator:
+    def __init__(self):
+        df = read_csv("WaterLevels")
+        df["DateMeasured"] = pd.to_datetime(df["DateMeasured"], errors="coerce")
+        self._df = df.dropna(subset=["DateMeasured"])
+
+    def estimate_measuring_point_height(
+        self, row
+    ) -> tuple[float, str, datetime | None]:
+        mph = row.MPHeight
+        mph_desc = row.MeasuringPoint
+
+        df = self._df[self._df["PointID"] == row.PointID]
+        df = df.sort_values("DateMeasured")
+        if mph is None:
+            logger.info(
+                f"No MPHeight found for PointID: {row.PointID}. Estimating from measurements."
+            )
+            # try to estimate mpheight from measurements
+            mphs = []
+            start_dates = []
+            mph_descs = []
+            for m in df.itertuples():
+                mphi = m.DepthToWater - m.DepthToWaterBGS
+                start_date = m.DateMeasured
+                if mphi not in mphs:
+                    mphs.append(mphi)
+                    mph_descs.append(
+                        "Auto calculated from measurements at depth to water and depth to water below ground surface"
+                    )
+                    start_dates.append(start_date)
+
+        else:
+            mphs = [mph]
+            mph_descs = [mph_desc]
+            if len(df) > 0:
+                start_dates = [df["DateMeasured"].min()]
+            else:
+                start_dates = [datetime.now(tz=UTC)]
+
+        if len(mphs) == 1:
+            end_dates = [None]
+        else:
+            end_dates = [start_dates[i + 1] for i in range(len(start_dates) - 1)]
+            end_dates.append(None)
+
+        logger.info(
+            f"Estimated MPHeight: {mph}, {start_dates} for PointID: {row.PointID}."
+        )
+        return zip(mphs, mph_descs, start_dates, end_dates)
+
+
 class RecordingIntervalEstimator:
     def __init__(self, sensor_type: str):
         if sensor_type == "Pressure Transducer":
@@ -68,12 +121,12 @@ def estimate_recording_interval(
         record: pd.Series,
         installation_date: datetime = None,
         removal_date: datetime = None,
-    ):
+    ) -> tuple[int | None, str | None, str | None]:
         point_id = record.PointID
 
         cdf = self._df[self._df["PointID"] == point_id]
         if len(cdf) == 0:
-            return None, None
+            return None, None, f"No measurements found for PointID: {point_id}"
 
         cdf = cdf.sort_values("DateMeasured")
         if installation_date is not None:
@@ -86,24 +139,47 @@ def estimate_recording_interval(
             date_series = pd.to_datetime(cdf["DateMeasured"])
             intervals = date_series.diff().dropna().dt.total_seconds()
             if len(intervals) == 0:
-                avg_interval = None
+                logger.warning(
+                    f"No intervals found for {point_id} for time range "
+                    f"{installation_date}-{removal_date}. using entire series "
+                )
+                # take average of entire series
+                df = self._df[self._df["PointID"] == point_id]
+                df = df.sort_values("DateMeasured")
+                date_series = pd.to_datetime(df["DateMeasured"])
+                intervals = date_series.diff().dropna().dt.total_seconds()
+                if len(intervals) == 0:
+                    return (
+                        None,
+                        None,
+                        f"No measurements found for {point_id} for entire series",
+                    )
+                else:
+                    avg_interval = intervals.mean()
             else:
                 avg_interval = intervals.mean()
         except IndexError:
-            return None, None
+            return (
+                None,
+                None,
+                (
+                    f"Not enough measurements to calculate interval for PointID: {point_id},"
+                    f"{installation_date} to {removal_date}."
+                ),
+            )
 
         # convert to hours
         avg_interval /= 3600
 
         unit = "hour"
-        if avg_interval < 1:
+        if avg_interval < 0.95:  # if less then 57 minutes convert to minutes
             avg_interval *= 60
             unit = "minute"
-            if avg_interval < 1:
+            if avg_interval < 0.95:  # if less then 57 seconds convert to seconds
                 avg_interval *= 60
                 unit = "second"
 
-        return int(avg_interval), unit
+        return math.ceil(avg_interval), unit, None
 
 
 def replace_nans(df: pd.DataFrame, default=None) -> pd.DataFrame:
diff --git a/transfers/waterlevels_transducer_transfer.py b/transfers/waterlevels_transducer_transfer.py
index 64e39b439..c6e76005c 100644
--- a/transfers/waterlevels_transducer_transfer.py
+++ b/transfers/waterlevels_transducer_transfer.py
@@ -34,6 +34,17 @@ def transfer_water_levels_pressure(session):
     return _transfer_water_levels_continuous(session, wd, "QCed", "Pressure Transducer")
 
 
+def _find_deployment(ts, deployments):
+    for d in deployments:
+        start = Timestamp(d.installation_date)
+        if start > ts:
+            break  # because sorted by start
+        end = Timestamp(d.removal_date) if d.removal_date else Timestamp.max
+        if end >= ts:
+            return d
+    return None
+
+
 def _transfer_water_levels_continuous(session, input_df, partition_field, sensor_type):
     from schemas.transducer import CreateTransducerObservation
 
@@ -46,11 +57,16 @@ def _transfer_water_levels_continuous(session, input_df, partition_field, sensor
     cleaned_df = filter_to_valid_point_ids(session, input_df)
 
     # group by pointid
+    cleaned_df = cleaned_df.sort_values(by=["PointID"])
     gwd = cleaned_df.groupby(["PointID"])
+    n = len(gwd)
     errors = []
-    for index, group in gwd:
+    nodeployments = {}
+    for i, (index, group) in enumerate(gwd):
         pointid = index[0]
-        logger.info(f"Processing PointID: {pointid}")
+        logger.info(
+            f"Processing PointID: {pointid}. {i + 1}/{n} ({100*(i+1)/n:0.2f}) completed."
+        )
 
         deployments = (
             session.query(Deployment)
@@ -98,27 +114,47 @@ def _transfer_water_levels_continuous(session, input_df, partition_field, sensor
                 continue
 
             observations = []
+
+            # min_deployment_date = Timestamp(min([d.installation_date for d in deployments]))
+            # max_deployment_date = Timestamp(max([d.removal_date or d.installation_date for d in deployments]))
+            deps_sorted = sorted(
+                deployments, key=lambda d: Timestamp(d.installation_date)
+            )
+
             for row in rows.itertuples():
-                deployment = next(
-                    (
-                        d
-                        for d in deployments
-                        if Timestamp(d.installation_date) <= row.DateMeasured
-                        and (
-                            d.removal_date is None
-                            or Timestamp(d.removal_date) >= row.DateMeasured
-                        )
-                    ),
-                    None,
-                )
+                deployment = _find_deployment(row.DateMeasured, deps_sorted)
+
+                # if min_deployment_date < row.DateMeasured < max_deployment_date:
+                #     deployment = next(
+                #         (
+                #             d
+                #             for d in deployments
+                #             if Timestamp(d.installation_date) <= row.DateMeasured
+                #             and (
+                #                 d.removal_date is None
+                #                 or Timestamp(d.removal_date) >= row.DateMeasured
+                #             )
+                #         ),
+                #         None,
+                #     )
 
                 if deployment is None:
-                    errors.append(
-                        {
-                            "pointid": pointid,
-                            "error": f"no deployment at {row.DateMeasured}",
-                        }
-                    )
+                    # errors.append(
+                    #     {
+                    #         "pointid": pointid,
+                    #         "error": f"no deployment at {row.DateMeasured}",
+                    #     }
+                    # )
+                    if pointid not in nodeployments:
+                        nodeployments[pointid] = (row.DateMeasured, row.DateMeasured)
+                    else:
+                        min_date, max_date = nodeployments[pointid]
+                        if row.DateMeasured < min_date:
+                            min_date = row.DateMeasured
+                        elif row.DateMeasured > max_date:
+                            max_date = row.DateMeasured
+                        nodeployments[pointid] = min_date, max_date
+
                     logger.critical(
                         f"No deployment found for PointID={pointid} at {row.DateMeasured}"
                     )
@@ -155,6 +191,15 @@ def _transfer_water_levels_continuous(session, input_df, partition_field, sensor
                 session.rollback()
                 continue
 
+    # convert nodeployments to errors
+    for pointid, (min_date, max_date) in nodeployments.items():
+        errors.append(
+            {
+                "pointid": pointid,
+                "error": f"no deployment between {min_date} and {max_date}",
+            }
+        )
+
     return input_df, cleaned_df, errors
 
 
diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py
index bb393c17f..a8a8a22b4 100644
--- a/transfers/well_transfer.py
+++ b/transfers/well_transfer.py
@@ -56,6 +56,7 @@
     lexicon_mapper,
     filter_non_transferred_wells,
     chunk_by_size,
+    MeasuringPointEstimator,
 )
 
 ADDED = []
@@ -291,8 +292,10 @@ def _iterator(self, session, df, i, row):
                 ),
                 well_casing_depth=row.CasingDepth,
                 release_status="public" if row.PublicRelease else "private",
-                measuring_point_height=row.MPHeight,
-                measuring_point_description=row.MeasuringPoint,
+                measuring_point_height=0,
+                measuring_point_description="",
+                # measuring_point_height=row.MPHeight,
+                # measuring_point_description=row.MeasuringPoint,
                 notes=(
                     [{"content": row.Notes, "note_type": "Other"}] if row.Notes else []
                 ),
@@ -375,7 +378,7 @@ def _iterator(self, session, df, i, row):
 
     def _after_hook(self, session):
         dump_cached_elevations(self._cached_elevations)
-
+        measuring_point_estimator = MeasuringPointEstimator()
         # add things thate need well id
         for well in session.query(Thing).filter(Thing.thing_type == "water well").all():
             row = self.cleaned_df[self.cleaned_df["PointID"] == well.name].iloc[0]
@@ -391,20 +394,18 @@ def _after_hook(self, session):
             for dp in data_provenances:
                 session.add(dp)
 
-            """
-                Developer's note
-    
-                It's not clear when the measuring point from NM_Aquifer was 
-                determined, so I'm setting start_date to the day of the transfer
-            """
-            measuring_point_history = MeasuringPointHistory(
-                thing_id=well.id,
-                measuring_point_height=row.MPHeight,
-                measuring_point_description=row.MeasuringPoint,
-                start_date=datetime.now(tz=UTC),
-                end_date=None,
-            )
-            session.add(measuring_point_history)
+            mphs = measuring_point_estimator.estimate_measuring_point_height(row)
+
+            for mph, mph_desc, start_date, end_date in mphs:
+                measuring_point_history = MeasuringPointHistory(
+                    thing_id=well.id,
+                    measuring_point_height=mph,
+                    measuring_point_description=mph_desc,
+                    # start_date=datetime.now(tz=UTC),
+                    start_date=start_date,
+                    end_date=end_date,
+                )
+                session.add(measuring_point_history)
 
             """
             Developer's notes

From cb8c81f8de098f75c531ee3f8300ffd3371bdcbd Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Wed, 26 Nov 2025 08:23:04 -0700
Subject: [PATCH 04/66] refactor: remove commented-out code for deployment date
 checks in waterlevels_transducer_transfer.py

---
 transfers/waterlevels_transducer_transfer.py | 22 --------------------
 1 file changed, 22 deletions(-)

diff --git a/transfers/waterlevels_transducer_transfer.py b/transfers/waterlevels_transducer_transfer.py
index c6e76005c..f1ef30cd1 100644
--- a/transfers/waterlevels_transducer_transfer.py
+++ b/transfers/waterlevels_transducer_transfer.py
@@ -115,8 +115,6 @@ def _transfer_water_levels_continuous(session, input_df, partition_field, sensor
 
             observations = []
 
-            # min_deployment_date = Timestamp(min([d.installation_date for d in deployments]))
-            # max_deployment_date = Timestamp(max([d.removal_date or d.installation_date for d in deployments]))
             deps_sorted = sorted(
                 deployments, key=lambda d: Timestamp(d.installation_date)
             )
@@ -124,27 +122,7 @@ def _transfer_water_levels_continuous(session, input_df, partition_field, sensor
             for row in rows.itertuples():
                 deployment = _find_deployment(row.DateMeasured, deps_sorted)
 
-                # if min_deployment_date < row.DateMeasured < max_deployment_date:
-                #     deployment = next(
-                #         (
-                #             d
-                #             for d in deployments
-                #             if Timestamp(d.installation_date) <= row.DateMeasured
-                #             and (
-                #                 d.removal_date is None
-                #                 or Timestamp(d.removal_date) >= row.DateMeasured
-                #             )
-                #         ),
-                #         None,
-                #     )
-
                 if deployment is None:
-                    # errors.append(
-                    #     {
-                    #         "pointid": pointid,
-                    #         "error": f"no deployment at {row.DateMeasured}",
-                    #     }
-                    # )
                     if pointid not in nodeployments:
                         nodeployments[pointid] = (row.DateMeasured, row.DateMeasured)
                     else:

From bd9a2955f5168f5281de54eebfa36b484e700ffd Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Wed, 26 Nov 2025 11:04:59 -0700
Subject: [PATCH 05/66] refactor: streamline transfer function calls by
 consolidating flags usage

---
 transfers/transfer.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/transfers/transfer.py b/transfers/transfer.py
index 5ad8f8c11..15c3cc40f 100644
--- a/transfers/transfer.py
+++ b/transfers/transfer.py
@@ -61,13 +61,14 @@ def transfer_all(sess, metrics, limit=100):
     flags = {
         "TRANSFER_ALL_WELLS": True,
         "TRANSFER_ALL_WELLSCREENS": True,
+        "LIMIT": limit,
     }
 
-    results = timeit_direct(transfer_wells, sess, flags=flags, limit=limit)
+    results = timeit_direct(transfer_wells, flags=flags)
     metrics.well_metrics(sess, *results)
 
     message("TRANSFERRING WELL SCREENS")
-    results = timeit_direct(transfer_wellscreens, sess)
+    results = timeit_direct(transfer_wellscreens, flags=flags)
     metrics.well_screen_metrics(sess, *results)
 
     message("TRANSFERRING SENSORS")

From 0283aeee6d1c7414e5f075cebcff64ab3193a844 Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Wed, 26 Nov 2025 16:46:08 -0700
Subject: [PATCH 06/66] refactor: implement SensorTransferer class for improved
 sensor data handling and transfer process

---
 transfers/sensor_transfer.py | 552 +++++++++++++++++++++++------------
 transfers/transfer.py        |  28 +-
 transfers/transferer.py      | 189 ++++++++++++
 transfers/util.py            |   7 +-
 transfers/well_transfer.py   | 450 ++--------------------------
 5 files changed, 595 insertions(+), 631 deletions(-)
 create mode 100644 transfers/transferer.py

diff --git a/transfers/sensor_transfer.py b/transfers/sensor_transfer.py
index 0f0b98074..6c9a75cbc 100644
--- a/transfers/sensor_transfer.py
+++ b/transfers/sensor_transfer.py
@@ -18,6 +18,7 @@
 from sqlalchemy import select
 
 from db import Sensor, Deployment, Thing
+from transfers.transferer import ThingBasedTransferer
 from transfers.util import (
     read_csv,
     logger,
@@ -33,207 +34,378 @@
 }
 
 
-def transfer_sensors(session):
+class SensorTransferer(ThingBasedTransferer):
     source_table = "Equipment"
-    input_df = read_csv(source_table)
-    input_df.columns = input_df.columns.str.replace(" ", "_")
-    input_df = input_df[input_df.SerialNo.notna()]
-    cleaned_df = filter_to_valid_point_ids(session, input_df)
-    cleaned_df = replace_nans(cleaned_df)
-    errors = []
-    grouped_equipment = cleaned_df.groupby(["PointID"])
-    added = {}
-    estimators = {}
-    for index, group in grouped_equipment:
-        pointid = index[0]
-        thing = session.query(Thing).filter(Thing.name == pointid).first()
-        if thing is None:
-            logger.warning(
-                f"Skipping sensor transfer for Thing with PointID {pointid} since it is not in the DB"
-            )
-            continue
-        ordered_group = group.sort_values(by=["DateInstalled"])
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._estimators = {}
+        self._added = {}
+
+    def _get_dfs(self, session):
+        input_df = read_csv(self.source_table)
+        input_df.columns = input_df.columns.str.replace(" ", "_")
+        input_df = input_df[input_df.SerialNo.notna()]
+        cleaned_df = filter_to_valid_point_ids(session, input_df)
+        cleaned_df = replace_nans(cleaned_df)
+        return input_df, cleaned_df
+
+    def _no_db_item_warning(self, index):
+        return f"Skipping sensor transfer for Thing with PointID {index[0]} since it is not in the DB"
+
+    def _get_prepped_group(self, group):
+        return group.sort_values(by=["DateInstalled"])
+
+    def _step(self, session, row, db_item):
+        pointid = self._get_point_id(row, db_item)
 
         try:
-            for row in ordered_group.itertuples():
-                try:
-                    sensor_type = EQUIPMENT_TO_SENSOR_TYPE_MAP[row.EquipmentType]
-                except KeyError as e:
-                    logger.critical(
-                        f"Skipping equipment with type {row.EquipmentType} for point {pointid}"
-                    )
-                    error = (
-                        f"key error adding sensor_type:{row.EquipmentType} error: {e}"
-                    )
-                    errors.append(
-                        {
-                            "pointid": pointid,
-                            "error": error,
-                            "table": source_table,
-                            "field": "EquipmentType",
-                        }
-                    )
-                    continue
-
-                if row.SerialNo in added:
-                    logger.info(
-                        f"Sensor with serial number {row.SerialNo} already added in this transfer session. Only creating deployment for that record"
-                    )
-                    sensor = added[row.SerialNo]
-                else:
-                    sensor = (
-                        session.query(Sensor)
-                        .filter(Sensor.serial_no == row.SerialNo)
-                        .one_or_none()
-                    )
-                    if sensor:
-                        logger.info(
-                            f"Sensor with serial number {row.SerialNo} already exists. Only creating deployment for that record"
-                        )
-
-                if not sensor:
-                    # TODO: Add validation
-                    sensor = Sensor(
-                        nma_pk_equipment=row.GlobalID,
-                        name=row.ID,
-                        sensor_type=sensor_type,
-                        model=row.Model,
-                        serial_no=row.SerialNo,
-                        owner_agency="NMBGMR",
-                        notes=row.Equipment_Notes,
-                    )
-                    added[row.SerialNo] = sensor
-                    session.add(sensor)
-                    logger.info(
-                        f"Added sensor {sensor.name} with serial number {sensor.serial_no}"
-                    )
-
-                if row.DateInstalled:
-                    installation_date = datetime.strptime(
-                        row.DateInstalled, "%Y-%m-%d %H:%M:%S.%f"
-                    ).date()
-                else:
-                    logger.critical(
-                        f"Installation Date cannot be None. Skipping deployment. Sensor: {row.ID}, "
-                        f"SerialNo: {row.SerialNo} PointID: {pointid}"
-                    )
-                    errors.append(
-                        {
-                            "pointid": pointid,
-                            "error": f"row.ID={row.ID}, row.SerialNo={row.SerialNo}. Installation Date cannot "
-                            f"be None",
-                            "table": source_table,
-                            "field": "DateInstalled",
-                        }
-                    )
-                    continue
-
-                removal_date = None
-                if row.DateRemoved:
-                    removal_date = datetime.strptime(
-                        row.DateRemoved, "%Y-%m-%d %H:%M:%S.%f"
-                    ).date()
-
-                recording_interval_unit = "hour"
-                try:
-                    recording_interval = int(row.RecordingInterval)
-                except (ValueError, TypeError):
-                    error = "RecordingInterval is not an integer"
-                    # try to calculate recording interval from measurements
-                    if sensor_type in estimators:
-                        estimator = estimators[sensor_type]
-                    else:
-                        estimator = RecordingIntervalEstimator(sensor_type)
-                        estimators[sensor_type] = estimator
-
-                    recording_interval, unit, error = (
-                        estimator.estimate_recording_interval(
-                            row, installation_date, removal_date
-                        )
-                    )
-
-                    if recording_interval:
-                        recording_interval_unit = unit
-                        logger.info(
-                            f"name={sensor.name}, serial_no={sensor.serial_no}. "
-                            f"estimated recording interval: {recording_interval} {unit}"
-                        )
-                    else:
-                        logger.critical(
-                            f"name={sensor.name}, serial_no={sensor.serial_no} error={error}"
-                        )
-                        errors.append(
-                            {
-                                "pointid": pointid,
-                                "error": f"name={sensor.name}, row.SerialNo={row.SerialNo}. error={error}",
-                                "table": source_table,
-                                "field": "RecordingInterval",
-                            }
-                        )
-                sql = (
-                    select(Deployment)
-                    .join(Thing)
-                    .join(Sensor)
-                    .where(Thing.name == pointid)
-                    .where(Sensor.serial_no == sensor.serial_no)
-                    .where(Deployment.installation_date == installation_date)
-                    .where(Deployment.removal_date == removal_date)
-                )
+            sensor_type = EQUIPMENT_TO_SENSOR_TYPE_MAP[row.EquipmentType]
+        except KeyError as e:
+            logger.critical(
+                f"Skipping equipment with type {row.EquipmentType} for point {pointid}"
+            )
+            error = f"key error adding sensor_type:{row.EquipmentType} error: {e}"
+            self.errors.append(
+                {
+                    "pointid": pointid,
+                    "error": error,
+                    "table": self.source_table,
+                    "field": "EquipmentType",
+                }
+            )
+            return
 
-                existing_deployment = session.execute(sql).scalars().one_or_none()
-                if existing_deployment:
-                    logger.info("existing deployment")
-                    continue
-
-                # TODO: add validation
-                deployment = Deployment(
-                    thing=thing,
-                    sensor=sensor,
-                    installation_date=installation_date,
-                    removal_date=removal_date,
-                    recording_interval=recording_interval,
-                    recording_interval_units=recording_interval_unit,
-                    hanging_cable_length=row.HangingCableLength,
-                    hanging_point_height=row.HangingPointHgt,
-                    hanging_point_description=row.HangingPointDescription,
+        if row.SerialNo in self._added:
+            logger.info(
+                f"Sensor with serial number {row.SerialNo} already added in this transfer session. Only creating deployment for that record"
+            )
+            sensor = self._added[row.SerialNo]
+        else:
+            sensor = (
+                session.query(Sensor)
+                .filter(Sensor.serial_no == row.SerialNo)
+                .one_or_none()
+            )
+            if sensor:
+                logger.info(
+                    f"Sensor with serial number {row.SerialNo} already exists. Only creating deployment for that record"
                 )
-                session.add(deployment)
+
+        if not sensor:
+            # TODO: Add validation
+            sensor = Sensor(
+                nma_pk_equipment=row.GlobalID,
+                name=row.ID,
+                sensor_type=sensor_type,
+                model=row.Model,
+                serial_no=row.SerialNo,
+                owner_agency="NMBGMR",
+                notes=row.Equipment_Notes,
+            )
+            self._added[row.SerialNo] = sensor
+            session.add(sensor)
+            logger.info(
+                f"Added sensor {sensor.name} with serial number {sensor.serial_no}"
+            )
+
+        if row.DateInstalled:
+            installation_date = datetime.strptime(
+                row.DateInstalled, "%Y-%m-%d %H:%M:%S.%f"
+            ).date()
+        else:
+            pointid = self._get_point_id(row)
+            logger.critical(
+                f"Installation Date cannot be None. Skipping deployment. Sensor: {row.ID}, "
+                f"SerialNo: {row.SerialNo} PointID: {pointid}"
+            )
+            self.errors.append(
+                {
+                    "pointid": pointid,
+                    "error": f"row.ID={row.ID}, row.SerialNo={row.SerialNo}. Installation Date cannot "
+                    f"be None",
+                    "table": self.source_table,
+                    "field": "DateInstalled",
+                }
+            )
+            return
+
+        removal_date = None
+        if row.DateRemoved:
+            removal_date = datetime.strptime(
+                row.DateRemoved, "%Y-%m-%d %H:%M:%S.%f"
+            ).date()
+
+        recording_interval_unit = "hour"
+        try:
+            recording_interval = int(row.RecordingInterval)
+        except (ValueError, TypeError):
+            # try to calculate recording interval from measurements
+            if sensor_type in self._estimators:
+                estimator = self._estimators[sensor_type]
+            else:
+                estimator = RecordingIntervalEstimator(sensor_type)
+                self._estimators[sensor_type] = estimator
+
+            recording_interval, unit, error = estimator.estimate_recording_interval(
+                row, installation_date, removal_date
+            )
+
+            if recording_interval:
+                recording_interval_unit = unit
                 logger.info(
-                    f"Added deployment for sensor with serial number {sensor.serial_no}, deployed to {thing.name}: | Installation Date: {installation_date} | Removal Date: {removal_date}"
+                    f"name={sensor.name}, serial_no={sensor.serial_no}. "
+                    f"estimated recording interval: {recording_interval} {unit}"
+                )
+            else:
+                logger.critical(
+                    f"name={sensor.name}, serial_no={sensor.serial_no} error={error}"
                 )
 
-                """
-                Developer's notes
+                self.errors.append(
+                    {
+                        "pointid": pointid,
+                        "error": f"name={sensor.name}, row.SerialNo={row.SerialNo}. error={error}",
+                        "table": self.source_table,
+                        "field": "RecordingInterval",
+                    }
+                )
+
+        sql = (
+            select(Deployment)
+            .join(Thing)
+            .join(Sensor)
+            .where(Thing.name == pointid)
+            .where(Sensor.serial_no == sensor.serial_no)
+            .where(Deployment.installation_date == installation_date)
+            .where(Deployment.removal_date == removal_date)
+        )
+
+        existing_deployment = session.execute(sql).scalars().one_or_none()
+        if existing_deployment:
+            logger.info("existing deployment")
+            return
 
-                Since it's unclear beforehand if a sensor has been removed just update
-                the sensor_status based off of each deployments installation/removal
-                dates
-                """
-                if installation_date:
-                    sensor.sensor_status = "In Service"
-                if removal_date:
-                    sensor.sensor_status = "Retired"
-            session.commit()
-        except Exception as e:
-            import traceback
+        # TODO: add validation
+        deployment = Deployment(
+            thing=db_item,
+            sensor=sensor,
+            installation_date=installation_date,
+            removal_date=removal_date,
+            recording_interval=recording_interval,
+            recording_interval_units=recording_interval_unit,
+            hanging_cable_length=row.HangingCableLength,
+            hanging_point_height=row.HangingPointHgt,
+            hanging_point_description=row.HangingPointDescription,
+        )
+        session.add(deployment)
+        logger.info(
+            f"Added deployment for sensor with serial number {sensor.serial_no}, deployed to {db_item.name}: | "
+            f"Installation Date: {installation_date} | Removal Date: {removal_date}"
+        )
 
-            traceback.print_exc()
-            logger.critical(f"Could not add sensor and deployment: {e}")
-            errors.append({"pointid": pointid, "error": e, "table": source_table})
+        """
+        Developer's notes
 
-    return input_df, cleaned_df, errors
+        Since it's unclear beforehand if a sensor has been removed just update
+        the sensor_status based off of each deployments installation/removal
+        dates
+        """
+        if installation_date:
+            sensor.sensor_status = "In Service"
+        if removal_date:
+            sensor.sensor_status = "Retired"
+
+
+# def transfer_sensors(session):
+#     source_table = "Equipment"
+#     input_df = read_csv(source_table)
+#     input_df.columns = input_df.columns.str.replace(" ", "_")
+#     input_df = input_df[input_df.SerialNo.notna()]
+#     cleaned_df = filter_to_valid_point_ids(session, input_df)
+#     cleaned_df = replace_nans(cleaned_df)
+#     errors = []
+#     grouped_equipment = cleaned_df.groupby(["PointID"])
+#     added = {}
+#     estimators = {}
+#     for index, group in grouped_equipment:
+#         pointid = index[0]
+#         thing = session.query(Thing).filter(Thing.name == pointid).first()
+#         if thing is None:
+#             logger.warning(
+#                 f"Skipping sensor transfer for Thing with PointID {pointid} since it is not in the DB"
+#             )
+#             continue
+#         ordered_group = group.sort_values(by=["DateInstalled"])
+#
+#         try:
+#             for row in ordered_group.itertuples():
+#                 try:
+#                     sensor_type = EQUIPMENT_TO_SENSOR_TYPE_MAP[row.EquipmentType]
+#                 except KeyError as e:
+#                     logger.critical(
+#                         f"Skipping equipment with type {row.EquipmentType} for point {pointid}"
+#                     )
+#                     error = (
+#                         f"key error adding sensor_type:{row.EquipmentType} error: {e}"
+#                     )
+#                     errors.append(
+#                         {
+#                             "pointid": pointid,
+#                             "error": error,
+#                             "table": source_table,
+#                             "field": "EquipmentType",
+#                         }
+#                     )
+#                     continue
+#
+#                 if row.SerialNo in added:
+#                     logger.info(
+#                         f"Sensor with serial number {row.SerialNo} already added in this transfer session. Only creating deployment for that record"
+#                     )
+#                     sensor = added[row.SerialNo]
+#                 else:
+#                     sensor = (
+#                         session.query(Sensor)
+#                         .filter(Sensor.serial_no == row.SerialNo)
+#                         .one_or_none()
+#                     )
+#                     if sensor:
+#                         logger.info(
+#                             f"Sensor with serial number {row.SerialNo} already exists. Only creating deployment for that record"
+#                         )
+#
+#                 if not sensor:
+#                     # TODO: Add validation
+#                     sensor = Sensor(
+#                         nma_pk_equipment=row.GlobalID,
+#                         name=row.ID,
+#                         sensor_type=sensor_type,
+#                         model=row.Model,
+#                         serial_no=row.SerialNo,
+#                         owner_agency="NMBGMR",
+#                         notes=row.Equipment_Notes,
+#                     )
+#                     added[row.SerialNo] = sensor
+#                     session.add(sensor)
+#                     logger.info(
+#                         f"Added sensor {sensor.name} with serial number {sensor.serial_no}"
+#                     )
+#
+#                 if row.DateInstalled:
+#                     installation_date = datetime.strptime(
+#                         row.DateInstalled, "%Y-%m-%d %H:%M:%S.%f"
+#                     ).date()
+#                 else:
+#                     logger.critical(
+#                         f"Installation Date cannot be None. Skipping deployment. Sensor: {row.ID}, "
+#                         f"SerialNo: {row.SerialNo} PointID: {pointid}"
+#                     )
+#                     errors.append(
+#                         {
+#                             "pointid": pointid,
+#                             "error": f"row.ID={row.ID}, row.SerialNo={row.SerialNo}. Installation Date cannot "
+#                             f"be None",
+#                             "table": source_table,
+#                             "field": "DateInstalled",
+#                         }
+#                     )
+#                     continue
+#
+#                 removal_date = None
+#                 if row.DateRemoved:
+#                     removal_date = datetime.strptime(
+#                         row.DateRemoved, "%Y-%m-%d %H:%M:%S.%f"
+#                     ).date()
+#
+#                 recording_interval_unit = "hour"
+#                 try:
+#                     recording_interval = int(row.RecordingInterval)
+#                 except (ValueError, TypeError):
+#                     error = "RecordingInterval is not an integer"
+#                     # try to calculate recording interval from measurements
+#                     if sensor_type in estimators:
+#                         estimator = estimators[sensor_type]
+#                     else:
+#                         estimator = RecordingIntervalEstimator(sensor_type)
+#                         estimators[sensor_type] = estimator
+#
+#                     recording_interval, unit, error = (
+#                         estimator.estimate_recording_interval(
+#                             row, installation_date, removal_date
+#                         )
+#                     )
+#
+#                     if recording_interval:
+#                         recording_interval_unit = unit
+#                         logger.info(
+#                             f"name={sensor.name}, serial_no={sensor.serial_no}. "
+#                             f"estimated recording interval: {recording_interval} {unit}"
+#                         )
+#                     else:
+#                         logger.critical(
+#                             f"name={sensor.name}, serial_no={sensor.serial_no} error={error}"
+#                         )
+#                         errors.append(
+#                             {
+#                                 "pointid": pointid,
+#                                 "error": f"name={sensor.name}, row.SerialNo={row.SerialNo}. error={error}",
+#                                 "table": source_table,
+#                                 "field": "RecordingInterval",
+#                             }
+#                         )
+#                 sql = (
+#                     select(Deployment)
+#                     .join(Thing)
+#                     .join(Sensor)
+#                     .where(Thing.name == pointid)
+#                     .where(Sensor.serial_no == sensor.serial_no)
+#                     .where(Deployment.installation_date == installation_date)
+#                     .where(Deployment.removal_date == removal_date)
+#                 )
+#
+#                 existing_deployment = session.execute(sql).scalars().one_or_none()
+#                 if existing_deployment:
+#                     logger.info("existing deployment")
+#                     continue
+#
+#                 # TODO: add validation
+#                 deployment = Deployment(
+#                     thing=thing,
+#                     sensor=sensor,
+#                     installation_date=installation_date,
+#                     removal_date=removal_date,
+#                     recording_interval=recording_interval,
+#                     recording_interval_units=recording_interval_unit,
+#                     hanging_cable_length=row.HangingCableLength,
+#                     hanging_point_height=row.HangingPointHgt,
+#                     hanging_point_description=row.HangingPointDescription,
+#                 )
+#                 session.add(deployment)
+#                 logger.info(
+#                     f"Added deployment for sensor with serial number {sensor.serial_no}, deployed to {thing.name}: | Installation Date: {installation_date} | Removal Date: {removal_date}"
+#                 )
+#
+#                 """
+#                 Developer's notes
+#
+#                 Since it's unclear beforehand if a sensor has been removed just update
+#                 the sensor_status based off of each deployments installation/removal
+#                 dates
+#                 """
+#                 if installation_date:
+#                     sensor.sensor_status = "In Service"
+#                 if removal_date:
+#                     sensor.sensor_status = "Retired"
+#             session.commit()
+#         except Exception as e:
+#             import traceback
+#
+#             traceback.print_exc()
+#             logger.critical(f"Could not add sensor and deployment: {e}")
+#             errors.append({"pointid": pointid, "error": e, "table": source_table})
+#
+#     return input_df, cleaned_df, errors
 
 
 # ============= EOF =============================================
-def init_sensor(session):
-    sensor = Sensor()
-    sensor.name = "Groundwater level manual measurement"
-    sensor.description = "manual gwl measurement. needs to be replaced with measurementmethod(?) e.g. steel tape, eprobe, etc."
-    sensor.unit = "ft"
-    sensor.datetime_installed = datetime.now()
-    session.add(sensor)
-    session.commit()
-
-
-if __name__ == "__main__":
-    transfer_sensors("abc")
diff --git a/transfers/transfer.py b/transfers/transfer.py
index 15c3cc40f..a2d7544a9 100644
--- a/transfers/transfer.py
+++ b/transfers/transfer.py
@@ -30,12 +30,9 @@
 from transfers.group_transfer import transfer_groups
 from transfers.link_ids_transfer import transfer_link_ids, transfer_link_ids_welldata
 from transfers.contact_transfer import transfer_contacts
-from transfers.sensor_transfer import transfer_sensors
+from transfers.sensor_transfer import SensorTransferer
 from transfers.waterlevels_transfer import transfer_water_levels
-from transfers.well_transfer import (
-    transfer_wells,
-    transfer_wellscreens,
-)
+from transfers.well_transfer import WellTransferer, WellScreenTransferer
 
 from transfers.asset_transfer import transfer_assets
 from transfers.util import timeit, timeit_direct
@@ -64,15 +61,15 @@ def transfer_all(sess, metrics, limit=100):
         "LIMIT": limit,
     }
 
-    results = timeit_direct(transfer_wells, flags=flags)
+    results = _execute_transfer(WellTransferer, flags=flags)
     metrics.well_metrics(sess, *results)
 
     message("TRANSFERRING WELL SCREENS")
-    results = timeit_direct(transfer_wellscreens, flags=flags)
+    results = _execute_transfer(WellScreenTransferer, flags=flags)
     metrics.well_screen_metrics(sess, *results)
 
     message("TRANSFERRING SENSORS")
-    results = timeit_direct(transfer_sensors, sess)
+    results = _execute_transfer(SensorTransferer, flags=flags)
     metrics.sensor_metrics(sess, *results)
 
     # Developer's notes all the metadata for these Things are not defined in the models/schemas yet'
@@ -125,6 +122,12 @@ def transfer_all(sess, metrics, limit=100):
     timeit_direct(transfer_assets, sess)
 
 
+def _execute_transfer(klass, flags: dict = None):
+    transferer = klass(flags=flags)
+    transferer.transfer()
+    return transferer.input_df, transferer.cleaned_df, transferer.errors
+
+
 def transfer_debugging(sess, metrics, limit=100):
     message("STARTING TRANSFER DEBUG", new_line_at_top=False)
 
@@ -134,17 +137,18 @@ def transfer_debugging(sess, metrics, limit=100):
 
     message("TRANSFERRING WELLS")
 
-    flags = {"TRANSFER_ALL_WELLS": True, "LIMIT": limit}
+    flags = {"TRANSFER_ALL_WELLS": True, "LIMIT": limit}  # not currently used
 
-    results = timeit_direct(transfer_wells, flags=flags)
+    results = _execute_transfer(WellTransferer, flags=flags)
     metrics.well_metrics(sess, *results)
 
     message("TRANSFERRING WELL SCREENS")
-    results = timeit_direct(transfer_wellscreens, flags=flags)
+    results = _execute_transfer(WellScreenTransferer, flags=flags)
     metrics.well_screen_metrics(sess, *results)
 
     message("TRANSFERRING SENSORS")
-    results = timeit_direct(transfer_sensors, sess)
+    results = _execute_transfer(SensorTransferer, flags=flags)
+    # results = timeit_direct(transfer_sensors, sess)
     metrics.sensor_metrics(sess, *results)
 
     # Developer's notes all the metadata for these Things are not defined in the models/schemas yet'
diff --git a/transfers/transferer.py b/transfers/transferer.py
new file mode 100644
index 000000000..273462585
--- /dev/null
+++ b/transfers/transferer.py
@@ -0,0 +1,189 @@
+# ===============================================================================
+# Copyright 2025 ross
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===============================================================================
+import time
+
+import pandas as pd
+from pandas import DataFrame
+from sqlalchemy.orm import Session
+
+from db import Thing
+from db.engine import session_ctx
+from transfers.logger import logger
+from transfers.util import chunk_by_size
+
+
+class Transferer(object):
+    input_df: pd.DataFrame = None
+    cleaned_df: pd.DataFrame = None
+    errors: list = None
+    flags: dict = None
+
+    def __init__(self, flags: dict = None):
+        self.errors = []
+        self.flags = flags if flags else {}
+
+    def transfer(self):
+        with session_ctx() as session:
+            self.input_df, self.cleaned_df = self._get_dfs(session)
+            self._transfer_hook(session)
+            session.commit()
+
+    def _transfer_hook(self, session: Session):
+        self._limit_iterator(session, self.flags.get("LIMIT", 0))
+
+    def _get_df_to_iterate(self) -> pd.DataFrame:
+        return self.cleaned_df
+
+    def _limit_iterator(self, session: Session, limit: int, step: int = 25):
+        df = self._get_df_to_iterate()
+        n = len(df)
+        start_time = time.time()
+        for i, row in enumerate(df.itertuples()):
+            if limit and i >= limit:
+                logger.info(f"Reached limit of {limit} rows. Stopping migration.")
+                break
+
+            if i and not i % step:
+                logger.info(
+                    f"Processing row {i} of {n},  avg rows per second: {step / (time.time() - start_time):.2f}"
+                )
+                start_time = time.time()
+                try:
+                    session.commit()
+                except Exception as e:
+                    logger.critical(f"Error committing wells. {e}")
+                    session.rollback()
+                    continue
+
+            self._iterator(session, df, i, row)
+
+        session.commit()
+        self._after_hook(session)
+
+    def _iterator(self, session: Session, df: pd.DataFrame, i: int, row: dict):
+        raise NotImplementedError("Must implement _iterator method")
+
+    def _after_hook(self, session: Session):
+        pass
+
+    def _get_dfs(self, session: Session):
+        raise NotImplementedError("Must implement _get_dfs method")
+
+
+class ChunkTransferer(Transferer):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.chunk_size = 1000
+
+    def _transfer_hook(self, session: Session):
+        df = self._get_df_to_iterate()
+        for ci, chunk in enumerate(chunk_by_size(df, self.chunk_size)):
+            dbchunk = self._get_df_chunk(session, chunk)
+            logger.info(
+                f"Processing chunk {ci}, {len(chunk)} rows, {len(dbchunk)} db items"
+            )
+            for i, row in enumerate(chunk.itertuples()):
+                dbitem = self._get_db_item(dbchunk, row)
+                if not dbitem:
+                    self._missing_db_item_warning(row)
+                    continue
+                self._chunk_iterator(session, df, i, row, dbitem)
+
+    # def chunk_transfer(self):
+    #     with session_ctx() as session:
+    #         self.input_df, self.cleaned_df = self._get_dfs(session)
+    #         df = self._get_df_to_iterate()
+    #         for ci, chunk in enumerate(chunk_by_size(df, self.chunk_size)):
+    #             dbchunk = self._get_df_chunk(session, chunk)
+    #             logger.info(
+    #                 f"Processing chunk {ci}, {len(chunk)} rows, {len(dbchunk)} db items"
+    #             )
+    #             for i, row in enumerate(chunk.itertuples()):
+    #                 dbitem = self._get_db_item(dbchunk, row)
+    #                 if not dbitem:
+    #                     self._missing_db_item_warning(row)
+    #                     continue
+    #                 self._chunk_iterator(session, df, i, row, dbitem)
+    #         session.commit()
+
+    def _get_df_chunk(self, session, chunk):
+        raise NotImplementedError("Must be implemented in subclass")
+
+    def _missing_db_item_warning(self, row):
+        raise NotImplementedError("Must be implemented in subclass")
+
+    def _chunk_iterator(self, session, df, i, row, dbitem):
+        raise NotImplementedError("Must be implemented in subclass")
+
+    def _get_db_item(self, chunk, row):
+        raise NotImplementedError("Must be implemented in subclass")
+
+
+class GroupTransferer(Transferer):
+    def _get_group(self):
+        return self.cleaned_df.groupby(["PointID"])
+
+    def _transfer_hook(self, session: Session):
+        self._group_iterator(session)
+
+    def _group_iterator(self, session: Session):
+        groups = self._get_group()
+        for index, group in groups:
+            db_item = self._get_db_item(session, index)
+            if db_item is None:
+                logger.warning(self._no_db_item_warning(index))
+                continue
+
+            prepped_group = self._get_prepped_group(group)
+            for row in prepped_group.itertuples():
+                try:
+                    self._step(session, row, db_item)
+                except Exception as e:
+                    import traceback
+
+                    pointid = self._get_point_id(row, db_item)
+                    traceback.print_exc()
+                    logger.critical(f"Could not add sensor and deployment: {e}")
+                    self.errors.append(
+                        {"pointid": pointid, "error": e, "table": self.source_table}
+                    )
+
+    def _get_point_id(self, row, db_item) -> str:
+        return row.PointID
+
+    def _step(self, session: Session, row, db_item):
+        raise NotImplementedError("Must be implemented in subclass")
+
+    def _get_prepped_group(self, group) -> DataFrame:
+        raise NotImplementedError("Must be implemented in subclass")
+
+    def _no_db_item_warning(self, index) -> str:
+        raise NotImplementedError("Must be implemented in subclass")
+
+    def _get_db_item(self, session, index) -> Thing:
+        raise NotImplementedError("Must be implemented in subclass")
+
+
+class ThingBasedTransferer(GroupTransferer):
+    def _get_group(self):
+        return self.cleaned_df.groupby(["PointID"])
+
+    def _get_db_item(self, session, index) -> Thing:
+        pointid = index[0]
+        return session.query(Thing).filter(Thing.name == pointid).first()
+
+
+# ============= EOF =============================================
diff --git a/transfers/util.py b/transfers/util.py
index a74a6a9d0..023d4a397 100644
--- a/transfers/util.py
+++ b/transfers/util.py
@@ -85,7 +85,9 @@ def estimate_measuring_point_height(
                         "Auto calculated from measurements at depth to water and depth to water below ground surface"
                     )
                     start_dates.append(start_date)
-
+            logger.info(
+                f"Estimated MPHeight: {mphs}, {start_dates} for PointID: {row.PointID}."
+            )
         else:
             mphs = [mph]
             mph_descs = [mph_desc]
@@ -100,9 +102,6 @@ def estimate_measuring_point_height(
             end_dates = [start_dates[i + 1] for i in range(len(start_dates) - 1)]
             end_dates.append(None)
 
-        logger.info(
-            f"Estimated MPHeight: {mph}, {start_dates} for PointID: {row.PointID}."
-        )
         return zip(mphs, mph_descs, start_dates, end_dates)
 
 
diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py
index a8a8a22b4..cc049876d 100644
--- a/transfers/well_transfer.py
+++ b/transfers/well_transfer.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 # ===============================================================================
 import json
-import time
 from datetime import datetime, UTC
 
 import pandas as pd
@@ -37,7 +36,6 @@
     MonitoringFrequencyHistory,
     MeasuringPointHistory,
 )
-from db.engine import session_ctx
 from schemas.thing import CreateWell, CreateWellScreen
 from services.gcs_helper import get_storage_bucket
 from services.util import (
@@ -45,6 +43,7 @@
     get_county_from_point,
     get_quad_name_from_point,
 )
+from transfers.transferer import ChunkTransferer, Transferer
 from transfers.util import (
     make_location,
     make_location_data_provenance,
@@ -55,7 +54,6 @@
     filter_by_welldata_datasource_and_project,
     lexicon_mapper,
     filter_non_transferred_wells,
-    chunk_by_size,
     MeasuringPointEstimator,
 )
 
@@ -122,8 +120,8 @@ def _extract_casing_materials(row) -> list[str]:
 def get_wells_to_transfer(
     sess: Session, flags: dict = None
 ) -> tuple[pd.DataFrame, pd.DataFrame]:
-    if flags is None:
-        flags = {}
+    # if flags is None:
+    #     flags = {}
 
     wdf = read_csv("WellData", dtype={"OSEWelltagID": str})
     ldf = read_csv("Location")
@@ -134,17 +132,19 @@ def get_wells_to_transfer(
 
     input_df = wdf
     wdf = replace_nans(wdf)
-    if flags.get("TRANSFER_ALL_WELLS", True):
-        # todo: filter Locations by DataSource
-        cleaned_df = filter_by_welldata_datasource_and_project(wdf)
-    else:
-        # get a subset of wells that have not been transferred yet
-        # todo: this needs to be defined.
-        #       for now, we are just filtering out wells that have not been transferred yet
-        #       In the future we will be using criteria to determine which wells to transfer
-        #       for example, wells in the "Water Level Network" project
-        cleaned_df = wdf
 
+    # if flags.get("TRANSFER_ALL_WELLS", False):
+    #     # todo: filter Locations by DataSource
+    #     cleaned_df = filter_by_welldata_datasource_and_project(wdf)
+    # else:
+    #     # get a subset of wells that have not been transferred yet
+    #     # todo: this needs to be defined.
+    #     #       for now, we are just filtering out wells that have not been transferred yet
+    #     #       In the future we will be using criteria to determine which wells to transfer
+    #     #       for example, wells in the "Water Level Network" project
+    #     cleaned_df = wdf
+
+    cleaned_df = filter_by_welldata_datasource_and_project(wdf)
     cleaned_df = filter_non_transferred_wells(sess, cleaned_df)
 
     return input_df, cleaned_df
@@ -168,60 +168,6 @@ def dump_cached_elevations(lut: dict):
     blob.upload_from_string(json.dumps(lut))
 
 
-class Transferer(object):
-    input_df: pd.DataFrame = None
-    cleaned_df: pd.DataFrame = None
-    errors: list = None
-    flags: dict = None
-
-    def __init__(self, flags: dict = None):
-        self.errors = []
-        self.flags = flags if flags else {}
-
-    def transfer(self):
-        with session_ctx() as session:
-            self.input_df, self.cleaned_df = self._get_dfs(session)
-            self._limit_iterator(session, self.flags.get("LIMIT", 0))
-
-    def _get_df_to_iterate(self) -> pd.DataFrame:
-        return self.cleaned_df
-
-    def _limit_iterator(self, session: Session, limit: int, step: int = 25):
-        df = self._get_df_to_iterate()
-        n = len(df)
-        start_time = time.time()
-        for i, row in enumerate(df.itertuples()):
-            if limit and i >= limit:
-                logger.info(f"Reached limit of {limit} rows. Stopping migration.")
-                break
-
-            if i and not i % step:
-                logger.info(
-                    f"Processing row {i} of {n},  avg rows per second: {step / (time.time() - start_time):.2f}"
-                )
-                start_time = time.time()
-                try:
-                    session.commit()
-                except Exception as e:
-                    logger.critical(f"Error committing wells. {e}")
-                    session.rollback()
-                    continue
-
-            self._iterator(session, df, i, row)
-
-        session.commit()
-        self._after_hook(session)
-
-    def _iterator(self, session: Session, df: pd.DataFrame, i: int, row: dict):
-        raise NotImplementedError("Must implement _iterator method")
-
-    def _after_hook(self, session: Session):
-        pass
-
-    def _get_dfs(self, session: Session):
-        raise NotImplementedError("Must implement _get_dfs method")
-
-
 class WellTransferer(Transferer):
     source_table = "WellData"
 
@@ -472,41 +418,6 @@ def _after_hook(self, session):
         session.commit()
 
 
-class ChunkTransferer(Transferer):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.chunk_size = 1000
-
-    def chunk_transfer(self):
-        with session_ctx() as session:
-            self.input_df, self.cleaned_df = self._get_dfs(session)
-            df = self._get_df_to_iterate()
-            for ci, chunk in enumerate(chunk_by_size(df, self.chunk_size)):
-                dbchunk = self._get_df_chunk(session, chunk)
-                logger.info(
-                    f"Processing chunk {ci}, {len(chunk)} rows, {len(dbchunk)} db items"
-                )
-                for i, row in enumerate(chunk.itertuples()):
-                    dbitem = self._get_db_item(dbchunk, row)
-                    if not dbitem:
-                        self._missing_db_item_warning(row)
-                        continue
-                    self._chunk_iterator(session, df, i, row, dbitem)
-            session.commit()
-
-    def _get_df_chunk(self, session, chunk):
-        raise NotImplementedError("Must be implemented in subclass")
-
-    def _missing_db_item_warning(self, row):
-        raise NotImplementedError("Must be implemented in subclass")
-
-    def _chunk_iterator(self, session, df, i, row, dbitem):
-        raise NotImplementedError("Must be implemented in subclass")
-
-    def _get_db_item(self, chunk, row):
-        raise NotImplementedError("Must be implemented in subclass")
-
-
 class WellScreenTransferer(ChunkTransferer):
     def _get_dfs(self, session: Session):
         input_df = read_csv("WellScreens")
@@ -552,16 +463,16 @@ def _chunk_iterator(self, session, df, i, row, db_item):
         session.add(well_screen)
 
 
-def transfer_wells(flags: dict = None):
-    transferer = WellTransferer(flags=flags)
-    transferer.transfer()
-    return transferer.input_df, transferer.cleaned_df, transferer.errors
-
-
-def transfer_wellscreens(flags: dict = None):
-    transferer = WellScreenTransferer(flags=flags)
-    transferer.chunk_transfer()
-    return transferer.input_df, transferer.cleaned_df, transferer.errors
+# def transfer_wells(flags: dict = None):
+#     transferer = WellTransferer(flags=flags)
+#     transferer.transfer()
+#     return transferer.input_df, transferer.cleaned_df, transferer.errors
+#
+#
+# def transfer_wellscreens(flags: dict = None):
+#     transferer = WellScreenTransferer(flags=flags)
+#     transferer.chunk_transfer()
+#     return transferer.input_df, transferer.cleaned_df, transferer.errors
 
 
 def cleanup_locations(session):
@@ -624,314 +535,3 @@ def cleanup_locations(session):
 
 
 # ============= EOF =============================================
-# def transfer_wells_old(session: Session, flags: dict = None, limit: int = 0) -> None:
-#     # input_df, cleaned_df = get_wells_to_transfer(session, flags)
-#     # wdf = cleaned_df
-#     # n = len(wdf)
-#
-#     # step = 25
-#     # start_time = time.time()
-#     errors = []
-#     added_locations = {}
-#     # cached_elevations = get_cached_elevations()
-#     # for i, row in enumerate(wdf.itertuples()):
-#     # pointid = row.PointID
-#     # if wdf[wdf["PointID"] == pointid].shape[0] > 1:
-#     #     logger.critical(
-#     #         f"transfer_wells. PointID {pointid} has duplicate records. Skipping."
-#     #     )
-#     #     errors.append(
-#     #         {
-#     #             "pointid": pointid,
-#     #             "error": "duplicate records",
-#     #             "table": source_table,
-#     #             "field": "PointID",
-#     #         }
-#     #     )
-#     #     continue
-#
-#     # if limit and i >= limit:
-#     #     logger.info(f"Reached limit of {limit} rows. Stopping migration.")
-#     #     break
-#     #
-#     # if i and not i % step:
-#     #     logger.info(
-#     #         f"Processing row {i} of {n},  avg rows per second: {step / (time.time() - start_time):.2f}"
-#     #     )
-#     #     start_time = time.time()
-#     #     try:
-#     #         session.commit()
-#     #     except Exception as e:
-#     #         logger.critical(f"Error committing wells. {e}")
-#     #         session.rollback()
-#     #         continue
-#
-#     # location = None
-#     # try:
-#     #     location, elevation_method = make_location(row, cached_elevations)
-#     #     session.add(location)
-#     #     added_locations[row.PointID] = elevation_method
-#     # except Exception as e:
-#     #     if location is not None:
-#     #         session.expunge(location)
-#     #     # these rollbacks are cause an issue because they are discarding good data
-#     #     # session.rollback()
-#     #     errors.append(
-#     #         {
-#     #             "pointid": row.PointID,
-#     #             "error": e,
-#     #             "table": "Location",
-#     #             "field": str(e),
-#     #         }
-#     #     )
-#     #     logger.critical(f"Error making location for {row.PointID}: {e}")
-#     #     continue
-#     #
-#     # try:
-#     #     first_visit_date = _get_first_visit_date(row)
-#     #     well_purposes = [] if isna(row.CurrentUse) else _extract_well_purposes(row)
-#     #     well_casing_materials = (
-#     #         [] if isna(row.CasingDescription) else _extract_casing_materials(row)
-#     #     )
-#     #
-#     #     # manually add the well rather than add_well from services/thing_helper.py
-#     #     # so that effective_start can be set on the location assocation
-#     #
-#     #     data = CreateWell(
-#     #         location_id=location.id,
-#     #         name=row.PointID,
-#     #         first_visit_date=first_visit_date,
-#     #         hole_depth=row.HoleDepth,
-#     #         well_depth=row.WellDepth,
-#     #         well_construction_notes=row.ConstructionNotes,
-#     #         well_casing_diameter=(
-#     #             row.CasingDiameter * 12 if row.CasingDiameter else None
-#     #         ),
-#     #         well_casing_depth=row.CasingDepth,
-#     #         release_status="public" if row.PublicRelease else "private",
-#     #         measuring_point_height=row.MPHeight,
-#     #         measuring_point_description=row.MeasuringPoint,
-#     #         notes=(
-#     #             [{"content": row.Notes, "note_type": "Other"}] if row.Notes else []
-#     #         ),
-#     #     )
-#     #
-#     #     CreateWell.model_validate(data)
-#     # except ValidationError as e:
-#     #     errors.append({"pointid": row.PointID, "error": e, "table": "WellData"})
-#     #     logger.critical(
-#     #         f"Validation error for row {i} with PointID {row.PointID}: {e.errors()}"
-#     #     )
-#     #     continue
-#     #
-#     # well = None
-#     # try:
-#     #     well_data = data.model_dump(
-#     #         exclude=[
-#     #             "location_id",
-#     #             "group_id",
-#     #             "well_purposes",
-#     #             "well_casing_materials",
-#     #             "measuring_point_height",
-#     #             "measuring_point_description",
-#     #         ]
-#     #     )
-#     #     well_data["thing_type"] = "water well"
-#     #     well_data["nma_pk_welldata"] = row.WellID
-#     #
-#     #     well_data.pop("notes")
-#     #     well = Thing(**well_data)
-#     #     session.add(well)
-#     #     # logger.info(f"Created well for {row.PointID}")
-#     #
-#     #     # flush well to access its ID for status_history
-#     #     # session.flush()
-#     #
-#     #     # session.commit()
-#     #     # session.refresh(well)
-#     #     # if notes:
-#     #     #     for ni in notes:
-#     #     #         nn = well.add_note(ni['content'], ni['note_type'])
-#     #     #         session.add(nn)
-#     #
-#     #     if well_purposes:
-#     #         for wp in well_purposes:
-#     #             # TODO: add validation logic here
-#     #             if wp in WellPurposeEnum:
-#     #                 wp_obj = WellPurpose(thing=well, purpose=wp)
-#     #                 session.add(wp_obj)
-#     #             else:
-#     #                 logger.critical(f"{well.name}. Invalid well purpose: {wp}")
-#     #
-#     #     if well_casing_materials:
-#     #         for wcm in well_casing_materials:
-#     #             # TODO: add validation logic here
-#     #             if wcm in WellCasingMaterialEnum:
-#     #                 wcm_obj = WellCasingMaterial(thing=well, material=wcm)
-#     #                 session.add(wcm_obj)
-#     #             else:
-#     #                 logger.critical(
-#     #                     f"{well.name}. Invalid well casing material: {wcm}"
-#     #                 )
-#     # except Exception as e:
-#     #     if well is not None:
-#     #         session.expunge(well)
-#     #
-#     #     errors.append({"pointid": row.PointID, "error": e, "table": "WellData"})
-#     #     logger.critical(f"Error creating well for {row.PointID}: {e}")
-#     #     continue
-#     #
-#     # assoc = LocationThingAssociation(effective_start=location.created_at)
-#     #
-#     # assoc.location = location
-#     # assoc.thing = well
-#     # session.add(assoc)
-#
-#     # session.commit()
-#
-#     # # add things thate need well id
-#     # for well in session.query(Thing).filter(Thing.thing_type == "water well").all():
-#     #     row = wdf[wdf["PointID"] == well.name].iloc[0]
-#     #     if not isna(row.Notes):
-#     #         note = well.add_note(row.Notes, "Other")
-#     #         session.add(note)
-#     #
-#     #     location = well.current_location
-#     #     elevation_method = added_locations[row.PointID]
-#     #     data_provenances = make_location_data_provenance(
-#     #         row, location, elevation_method
-#     #     )
-#     #     for dp in data_provenances:
-#     #         session.add(dp)
-#     #
-#     #     """
-#     #         Developer's note
-#     #
-#     #         It's not clear when the measuring point from NM_Aquifer was
-#     #         determined, so I'm setting start_date to the day of the transfer
-#     #     """
-#     #     measuring_point_history = MeasuringPointHistory(
-#     #         thing_id=well.id,
-#     #         measuring_point_height=row.MPHeight,
-#     #         measuring_point_description=row.MeasuringPoint,
-#     #         start_date=datetime.now(tz=UTC),
-#     #         end_date=None,
-#     #     )
-#     #     session.add(measuring_point_history)
-#     #
-#     #     """
-#     #     Developer's notes
-#     #
-#     #     For all status_history records the start_date will be now since that
-#     #     isn't recorded in NM_Aquifer
-#     #     """
-#     #     # TODO: if row.MonitoringStatus == "Q" is it monitored or not? <-- AMMP review
-#     #     # TODO: if row.MonitoringStatus == "X" can that change? <-- AMMP review
-#     #     # TODO: have AMMP review and verify the various MonitoringStatus codes
-#     #
-#     #     target_id = well.id
-#     #     target_table = "thing"
-#     #     if not isna(row.MonitoringStatus):
-#     #         if (
-#     #             "X" in row.MonitoringStatus
-#     #             or "I" in row.MonitoringStatus
-#     #             or "C" in row.MonitoringStatus
-#     #         ):
-#     #             status_value = "Not currently monitored"
-#     #         else:
-#     #             status_value = "Currently monitored"
-#     #
-#     #         status_history = StatusHistory(
-#     #             status_type="Monitoring Status",
-#     #             status_value=status_value,
-#     #             reason=row.MonitorStatusReason,
-#     #             start_date=datetime.now(tz=UTC),
-#     #             target_id=target_id,
-#     #             target_table=target_table,
-#     #         )
-#     #         session.add(status_history)
-#     #         logger.info(
-#     #             f"  Added monitoring status for well {well.name}: {status_value}"
-#     #         )
-#     #
-#     #         for code in NMA_MONITORING_FREQUENCY.keys():
-#     #             if code in row.MonitoringStatus:
-#     #                 monitoring_frequency = NMA_MONITORING_FREQUENCY[code]
-#     #                 monitoring_frequency_history = MonitoringFrequencyHistory(
-#     #                     thing_id=well.id,
-#     #                     monitoring_frequency=monitoring_frequency,
-#     #                     start_date=datetime.now(tz=UTC),
-#     #                     end_date=None,
-#     #                 )
-#     #                 session.add(monitoring_frequency_history)
-#     #                 logger.info(
-#     #                     f"  Adding '{monitoring_frequency}' monitoring frequency for well {well.name}"
-#     #                 )
-#     #
-#     #     if not isna(row.Status):
-#     #         status_value = lexicon_mapper.map_value(f"LU_Status:{row.Status}")
-#     #         status_history = StatusHistory(
-#     #             status_type="Well Status",
-#     #             status_value=status_value,
-#     #             reason=row.StatusUserNotes,
-#     #             start_date=datetime.now(tz=UTC),
-#     #             target_id=target_id,
-#     #             target_table=target_table,
-#     #         )
-#     #         session.add(status_history)
-#     #         logger.info(f"  Added well status for well {well.name}: {status_value}")
-#     #
-#     # session.commit()
-#     #
-#     # dump_cached_elevations(cached_elevations)
-#     # return input_df, cleaned_df, errors
-
-# def transfer_wellscreens_old(session, limit=None):
-
-# input_df = read_csv("WellScreens")
-# wdf = replace_nans(input_df)
-#
-# cleaned_df = filter_to_valid_point_ids(session, wdf)
-
-# errors = []
-# for ci, chunk in enumerate(chunk_by_size(cleaned_df, 1000)):
-#     things = (
-#         session.query(Thing).filter(Thing.name.in_(chunk.PointID.tolist())).all()
-#     )
-#
-#     logger.info(f"Processing chunk {ci}, {len(chunk)} rows, {len(things)} things")
-#     for i, row in enumerate(chunk.itertuples()):
-#         thing = next((thing for thing in things if thing.name == row.PointID), None)
-#         if not thing:
-#             logger.warning(
-#                 f"Thing with PointID {row.PointID} not found. Skipping well screen."
-#             )
-#             continue
-#
-#         well_screen_data = {
-#             "thing_id": thing.id,
-#             "screen_depth_top": row.ScreenTop,
-#             "screen_depth_bottom": row.ScreenBottom,
-#             # "screen_type": row.ScreenType,
-#             "screen_description": row.ScreenDescription,
-#             "release_status": "draft",
-#             "nma_pk_wellscreens": row.GlobalID,
-#         }
-#         try:
-#             # TODO: add validation logic here to ensure no overlapping screens for the same well
-#             CreateWellScreen.model_validate(well_screen_data)
-#         except ValidationError as e:
-#             logger.critical(
-#                 f"Validation error for row {i} with PointID {row.PointID}: {e.errors()}"
-#             )
-#             errors.append(
-#                 {"pointid": row.PointID, "error": e, "table": "WellScreens"}
-#             )
-#             continue
-#
-#         well_screen = WellScreen(**well_screen_data)
-#         session.add(well_screen)
-#
-#     session.commit()
-#
-# return input_df, cleaned_df, errors

From 9432f8849e99b888ac1030b4a900ff328c13aa21 Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 26 Nov 2025 16:31:32 -0800
Subject: [PATCH 07/66] Unify read csv approaches

---
 .gitignore        |  1 +
 transfers/util.py | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/.gitignore b/.gitignore
index c1d8db1ee..f1bd9dd54 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@ launcher.sh
 gcs_credentials.json
 transfers/data/assets*
 transfers/data/nma_csv_cache/*
+transfers/data/*.csv
 transfers/transfer*.log
 transfer*.log
 transfers/data/nma_csv_cache/*
diff --git a/transfers/util.py b/transfers/util.py
index cbf0f2b17..590c9252d 100644
--- a/transfers/util.py
+++ b/transfers/util.py
@@ -59,10 +59,24 @@ def replace_nans(df: pd.DataFrame, default=None) -> pd.DataFrame:
 
 
 def read_csv(name: str, dtype: dict | None = None) -> pd.DataFrame:
+    # Try to read from local data directory first
+    local_file = Path(__file__).parent / 'data' / f"{name}.csv"
+
+    if local_file.exists():
+        logger.info(f"Reading {name} from local file: {local_file}")
+        if dtype:
+            return pd.read_csv(local_file, dtype=dtype)
+        else:
+            return pd.read_csv(local_file)
+
+    # Check cache directory
     p = get_transfers_data_path(Path("nma_csv_cache") / f"{name}.csv")
     if os.path.exists(p):
+        logger.info(f"Reading {name} from cache: {p}")
         return pd.read_csv(p, dtype=dtype)
 
+    # Fall back to GCS if local file doesn't exist
+    logger.info(f"Local file and cache not found, reading {name} from GCS")
     bucket = get_storage_bucket()
     blob = bucket.blob(f"nma_csv/{name}.csv")
     data = blob.download_as_bytes()

From 5db6964799f93c102c8f28851f84c0e3af69e3de Mon Sep 17 00:00:00 2001
From: kbighorse <kbighorse@users.noreply.github.com>
Date: Thu, 27 Nov 2025 00:31:24 +0000
Subject: [PATCH 08/66] Formatting changes

---
 transfers/util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/transfers/util.py b/transfers/util.py
index 590c9252d..d08798425 100644
--- a/transfers/util.py
+++ b/transfers/util.py
@@ -60,7 +60,7 @@ def replace_nans(df: pd.DataFrame, default=None) -> pd.DataFrame:
 
 def read_csv(name: str, dtype: dict | None = None) -> pd.DataFrame:
     # Try to read from local data directory first
-    local_file = Path(__file__).parent / 'data' / f"{name}.csv"
+    local_file = Path(__file__).parent / "data" / f"{name}.csv"
 
     if local_file.exists():
         logger.info(f"Reading {name} from local file: {local_file}")

From fe6f50ccf91825676a8d57fbd574ce85dd6819ee Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 26 Nov 2025 16:55:38 -0800
Subject: [PATCH 09/66] Un-ignore features; add features for location and well
 dates

---
 .gitignore                                   |  1 -
 tests/features/location-legacy-dates.feature | 57 +++++++++++++++++
 tests/features/well-completion-date.feature  | 64 ++++++++++++++++++++
 3 files changed, 121 insertions(+), 1 deletion(-)
 create mode 100644 tests/features/location-legacy-dates.feature
 create mode 100644 tests/features/well-completion-date.feature

diff --git a/.gitignore b/.gitignore
index f1bd9dd54..44b28e13c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,7 +30,6 @@ transfers/transfer*.log
 transfer*.log
 transfers/data/nma_csv_cache/*
 !transfers/data/nma_csv_cache/.gitkeep
-tests/features/*.feature
 transfers/metrics/*
 transfers/logs/*
 run_bdd-local.sh
diff --git a/tests/features/location-legacy-dates.feature b/tests/features/location-legacy-dates.feature
new file mode 100644
index 000000000..1486d9edc
--- /dev/null
+++ b/tests/features/location-legacy-dates.feature
@@ -0,0 +1,57 @@
+Feature: Location Legacy Date Fields
+  As a data manager
+  I want to preserve legacy date information from the AMPAPI system
+  So that historical temporal context is not lost during migration
+
+  Background:
+    Given a functioning api
+
+  Scenario: Create location with both legacy dates
+    When I create a location with legacy_date_created "2014-10-17" and inventoried_on "2003-12-10"
+    Then the response should include legacy_date_created as "2014-10-17"
+    And the response should include inventoried_on as "2003-12-10"
+    And the created_at timestamp should be the current system time
+    And the time gap between inventoried_on and legacy_date_created should be preserved
+
+  Scenario: Create location with only legacy_date_created
+    When I create a location with legacy_date_created "2014-10-17"
+    Then the response should include legacy_date_created as "2014-10-17"
+    And the response should include inventoried_on as null
+    And the created_at timestamp should be the current system time
+
+  Scenario: Create location with only inventoried_on
+    When I create a location with inventoried_on "2003-12-10"
+    Then the response should include inventoried_on as "2003-12-10"
+    And the response should include legacy_date_created as null
+    And the created_at timestamp should be the current system time
+
+  Scenario: Create location with neither legacy date
+    When I create a location without legacy dates
+    Then the response should include legacy_date_created as null
+    And the response should include inventoried_on as null
+    And the created_at timestamp should be the current system time
+
+  Scenario: Update location legacy dates
+    Given a location exists with legacy_date_created "2014-10-17"
+    When I update the location to add inventoried_on "2003-12-10"
+    Then the response should include legacy_date_created as "2014-10-17"
+    And the response should include inventoried_on as "2003-12-10"
+
+  Scenario: Retrieve location with legacy dates via GET
+    Given a location exists with legacy_date_created "2014-10-17" and inventoried_on "2003-12-10"
+    When I retrieve the location by ID
+    Then the response should include legacy_date_created as "2014-10-17"
+    And the response should include inventoried_on as "2003-12-10"
+
+  Scenario: Historical data preservation - 54 year gap (Site SM-0227)
+    When I create a location with legacy_date_created "2008-05-28" and inventoried_on "1954-05-01"
+    Then the response should include legacy_date_created as "2008-05-28"
+    And the response should include inventoried_on as "1954-05-01"
+    And the time gap should be approximately 19751 days
+
+  Scenario: List locations includes legacy dates
+    Given multiple locations exist with various legacy dates
+    When I retrieve all locations
+    Then each location should include legacy_date_created field
+    And each location should include inventoried_on field
+    And the fields should be null for locations without legacy dates
diff --git a/tests/features/well-completion-date.feature b/tests/features/well-completion-date.feature
new file mode 100644
index 000000000..54f211ef5
--- /dev/null
+++ b/tests/features/well-completion-date.feature
@@ -0,0 +1,64 @@
+Feature: Well Completion Date
+  As a hydrogeologist
+  I want to track when wells were completed/constructed
+  So that I can analyze well age and relate construction standards to time periods
+
+  Background:
+    Given a functioning api
+
+  Scenario: Create water well with completion date
+    When I create a water well with well_completed_on "2004-08-08"
+    Then the response should include well_completed_on as "2004-08-08"
+    And the response should have thing_type "water well"
+
+  Scenario: Create water well without completion date
+    When I create a water well without well_completed_on
+    Then the response should include well_completed_on as null
+    And the well should be created successfully
+
+  Scenario: Update well to add completion date
+    Given a water well exists without well_completed_on
+    When I update the well to add well_completed_on "2004-08-08"
+    Then the response should include well_completed_on as "2004-08-08"
+
+  Scenario: Update well to change completion date
+    Given a water well exists with well_completed_on "2004-08-08"
+    When I update the well to change well_completed_on to "2005-03-15"
+    Then the response should include well_completed_on as "2005-03-15"
+
+  Scenario: Historical well from 1936
+    When I create a water well with well_completed_on "1936-01-01"
+    Then the response should include well_completed_on as "1936-01-01"
+    And the well age should be over 88 years
+
+  Scenario: Retrieve well with completion date via GET
+    Given a water well exists with well_completed_on "2004-08-08"
+    When I retrieve the well by ID
+    Then the response should include well_completed_on as "2004-08-08"
+    And the response should include the well's age in years
+
+  Scenario: List wells includes completion dates
+    Given multiple wells exist with various completion dates
+    When I retrieve all water wells
+    Then each well should include well_completed_on field
+    And the field should be null for wells without completion dates
+
+  Scenario: Spring does not have completion date
+    When I create a spring
+    Then the response should include well_completed_on as null
+    And the spring should be created successfully
+
+  Scenario: Filter wells by completion date range
+    Given wells exist with completion dates ranging from 1936 to 2024
+    When I filter wells completed between "2000-01-01" and "2010-12-31"
+    Then the response should only include wells completed in that range
+    And wells from 1936 should not be included
+    And wells from 2020 should not be included
+
+  Scenario: Well completion date with location legacy dates
+    When I create a water well with well_completed_on "2004-08-08"
+    And the well's location has legacy_date_created "2014-10-17" and inventoried_on "2013-05-01"
+    Then the well should have well_completed_on as "2004-08-08"
+    And the location should have legacy_date_created as "2014-10-17"
+    And the location should have inventoried_on as "2013-05-01"
+    And all three date fields should be independently queryable

From 738c1ef123120dca01ce9cb86ac234a594b9f7af Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 26 Nov 2025 17:04:36 -0800
Subject: [PATCH 10/66] Remove features we won't keep

---
 tests/features/location-legacy-dates.feature | 57 -----------------
 tests/features/well-completion-date.feature  | 64 --------------------
 2 files changed, 121 deletions(-)
 delete mode 100644 tests/features/location-legacy-dates.feature
 delete mode 100644 tests/features/well-completion-date.feature

diff --git a/tests/features/location-legacy-dates.feature b/tests/features/location-legacy-dates.feature
deleted file mode 100644
index 1486d9edc..000000000
--- a/tests/features/location-legacy-dates.feature
+++ /dev/null
@@ -1,57 +0,0 @@
-Feature: Location Legacy Date Fields
-  As a data manager
-  I want to preserve legacy date information from the AMPAPI system
-  So that historical temporal context is not lost during migration
-
-  Background:
-    Given a functioning api
-
-  Scenario: Create location with both legacy dates
-    When I create a location with legacy_date_created "2014-10-17" and inventoried_on "2003-12-10"
-    Then the response should include legacy_date_created as "2014-10-17"
-    And the response should include inventoried_on as "2003-12-10"
-    And the created_at timestamp should be the current system time
-    And the time gap between inventoried_on and legacy_date_created should be preserved
-
-  Scenario: Create location with only legacy_date_created
-    When I create a location with legacy_date_created "2014-10-17"
-    Then the response should include legacy_date_created as "2014-10-17"
-    And the response should include inventoried_on as null
-    And the created_at timestamp should be the current system time
-
-  Scenario: Create location with only inventoried_on
-    When I create a location with inventoried_on "2003-12-10"
-    Then the response should include inventoried_on as "2003-12-10"
-    And the response should include legacy_date_created as null
-    And the created_at timestamp should be the current system time
-
-  Scenario: Create location with neither legacy date
-    When I create a location without legacy dates
-    Then the response should include legacy_date_created as null
-    And the response should include inventoried_on as null
-    And the created_at timestamp should be the current system time
-
-  Scenario: Update location legacy dates
-    Given a location exists with legacy_date_created "2014-10-17"
-    When I update the location to add inventoried_on "2003-12-10"
-    Then the response should include legacy_date_created as "2014-10-17"
-    And the response should include inventoried_on as "2003-12-10"
-
-  Scenario: Retrieve location with legacy dates via GET
-    Given a location exists with legacy_date_created "2014-10-17" and inventoried_on "2003-12-10"
-    When I retrieve the location by ID
-    Then the response should include legacy_date_created as "2014-10-17"
-    And the response should include inventoried_on as "2003-12-10"
-
-  Scenario: Historical data preservation - 54 year gap (Site SM-0227)
-    When I create a location with legacy_date_created "2008-05-28" and inventoried_on "1954-05-01"
-    Then the response should include legacy_date_created as "2008-05-28"
-    And the response should include inventoried_on as "1954-05-01"
-    And the time gap should be approximately 19751 days
-
-  Scenario: List locations includes legacy dates
-    Given multiple locations exist with various legacy dates
-    When I retrieve all locations
-    Then each location should include legacy_date_created field
-    And each location should include inventoried_on field
-    And the fields should be null for locations without legacy dates
diff --git a/tests/features/well-completion-date.feature b/tests/features/well-completion-date.feature
deleted file mode 100644
index 54f211ef5..000000000
--- a/tests/features/well-completion-date.feature
+++ /dev/null
@@ -1,64 +0,0 @@
-Feature: Well Completion Date
-  As a hydrogeologist
-  I want to track when wells were completed/constructed
-  So that I can analyze well age and relate construction standards to time periods
-
-  Background:
-    Given a functioning api
-
-  Scenario: Create water well with completion date
-    When I create a water well with well_completed_on "2004-08-08"
-    Then the response should include well_completed_on as "2004-08-08"
-    And the response should have thing_type "water well"
-
-  Scenario: Create water well without completion date
-    When I create a water well without well_completed_on
-    Then the response should include well_completed_on as null
-    And the well should be created successfully
-
-  Scenario: Update well to add completion date
-    Given a water well exists without well_completed_on
-    When I update the well to add well_completed_on "2004-08-08"
-    Then the response should include well_completed_on as "2004-08-08"
-
-  Scenario: Update well to change completion date
-    Given a water well exists with well_completed_on "2004-08-08"
-    When I update the well to change well_completed_on to "2005-03-15"
-    Then the response should include well_completed_on as "2005-03-15"
-
-  Scenario: Historical well from 1936
-    When I create a water well with well_completed_on "1936-01-01"
-    Then the response should include well_completed_on as "1936-01-01"
-    And the well age should be over 88 years
-
-  Scenario: Retrieve well with completion date via GET
-    Given a water well exists with well_completed_on "2004-08-08"
-    When I retrieve the well by ID
-    Then the response should include well_completed_on as "2004-08-08"
-    And the response should include the well's age in years
-
-  Scenario: List wells includes completion dates
-    Given multiple wells exist with various completion dates
-    When I retrieve all water wells
-    Then each well should include well_completed_on field
-    And the field should be null for wells without completion dates
-
-  Scenario: Spring does not have completion date
-    When I create a spring
-    Then the response should include well_completed_on as null
-    And the spring should be created successfully
-
-  Scenario: Filter wells by completion date range
-    Given wells exist with completion dates ranging from 1936 to 2024
-    When I filter wells completed between "2000-01-01" and "2010-12-31"
-    Then the response should only include wells completed in that range
-    And wells from 1936 should not be included
-    And wells from 2020 should not be included
-
-  Scenario: Well completion date with location legacy dates
-    When I create a water well with well_completed_on "2004-08-08"
-    And the well's location has legacy_date_created "2014-10-17" and inventoried_on "2013-05-01"
-    Then the well should have well_completed_on as "2004-08-08"
-    And the location should have legacy_date_created as "2014-10-17"
-    And the location should have inventoried_on as "2013-05-01"
-    And all three date fields should be independently queryable

From 953263252428153889933ffe74ecbd97ca133109 Mon Sep 17 00:00:00 2001
From: kbighorse <kbighorse@users.noreply.github.com>
Date: Thu, 27 Nov 2025 01:12:56 +0000
Subject: [PATCH 11/66] Formatting changes

---
 .../steps/post_migration_legacy_data.py       | 200 +++++++++++-------
 1 file changed, 129 insertions(+), 71 deletions(-)

diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py
index dca15d638..e78afbde7 100644
--- a/tests/features/steps/post_migration_legacy_data.py
+++ b/tests/features/steps/post_migration_legacy_data.py
@@ -27,6 +27,7 @@
 def parse_number(text):
     return int(text)
 
+
 register_type(Number=parse_number)
 
 
@@ -91,14 +92,21 @@ def step_given_data_migrated(context: Context):
 @given("a location exists with")
 def step_given_location_with_table(context: Context):
     """Create location with fields from table."""
-    data = {row['field']: row['value'] for row in context.table}
+    data = {row["field"]: row["value"] for row in context.table}
 
-    legacy_date_created = date.fromisoformat(data['legacy_date_created']) if data.get('legacy_date_created') and data['legacy_date_created'] != 'null' else None
-    inventoried_on = date.fromisoformat(data['inventoried_on']) if data.get('inventoried_on') and data['inventoried_on'] != 'null' else None
+    legacy_date_created = (
+        date.fromisoformat(data["legacy_date_created"])
+        if data.get("legacy_date_created") and data["legacy_date_created"] != "null"
+        else None
+    )
+    inventoried_on = (
+        date.fromisoformat(data["inventoried_on"])
+        if data.get("inventoried_on") and data["inventoried_on"] != "null"
+        else None
+    )
 
     location = create_test_location(
-        legacy_date_created=legacy_date_created,
-        inventoried_on=inventoried_on
+        legacy_date_created=legacy_date_created, inventoried_on=inventoried_on
     )
 
     context.test_location = location
@@ -122,12 +130,16 @@ def step_given_multiple_locations(context: Context, count: int):
         legacy_date, inventory_date = test_data[i]
         location = create_test_location(
             legacy_date_created=date.fromisoformat(legacy_date),
-            inventoried_on=date.fromisoformat(inventory_date) if inventory_date else None
+            inventoried_on=(
+                date.fromisoformat(inventory_date) if inventory_date else None
+            ),
         )
         context.test_locations.append(location)
 
 
-@given("locations exist with inventoried_on ranging from {start_year:Number} to {end_year:Number}")
+@given(
+    "locations exist with inventoried_on ranging from {start_year:Number} to {end_year:Number}"
+)
 def step_given_locations_date_range(context: Context, start_year: int, end_year: int):
     """Create locations with inventoried_on across a date range."""
     context.test_locations = []
@@ -136,15 +148,17 @@ def step_given_locations_date_range(context: Context, start_year: int, end_year:
     for year in years:
         location = create_test_location(
             legacy_date_created=date(year + 5, 1, 1),  # Always 5 years after inventory
-            inventoried_on=date(year, 6, 15)
+            inventoried_on=date(year, 6, 15),
         )
         context.test_locations.append(location)
 
 
 @given('{count:Number} locations exist with legacy_date_created "{target_date}"')
-def step_given_locations_with_specific_date(context: Context, count: int, target_date: str):
+def step_given_locations_with_specific_date(
+    context: Context, count: int, target_date: str
+):
     """Create locations with specific legacy_date_created."""
-    if not hasattr(context, 'test_locations'):
+    if not hasattr(context, "test_locations"):
         context.test_locations = []
 
     target = date.fromisoformat(target_date)
@@ -152,7 +166,7 @@ def step_given_locations_with_specific_date(context: Context, count: int, target
     for i in range(count):
         location = create_test_location(
             legacy_date_created=target,
-            inventoried_on=date(2000 + i, 1, 1)  # Vary the inventory dates
+            inventoried_on=date(2000 + i, 1, 1),  # Vary the inventory dates
         )
         context.test_locations.append(location)
 
@@ -160,7 +174,9 @@ def step_given_locations_with_specific_date(context: Context, count: int, target
 @given('a well exists with well_completed_on "{completion_date}"')
 def step_given_well_with_completion(context: Context, completion_date: str):
     """Create well with completion date."""
-    completed_on = date.fromisoformat(completion_date) if completion_date != 'null' else None
+    completed_on = (
+        date.fromisoformat(completion_date) if completion_date != "null" else None
+    )
 
     thing, location = create_test_well(well_completed_on=completed_on)
 
@@ -185,7 +201,9 @@ def step_given_multiple_wells(context: Context, count: int):
     ]
 
     for i in range(min(count, len(completion_dates))):
-        completed_on = date.fromisoformat(completion_dates[i]) if completion_dates[i] else None
+        completed_on = (
+            date.fromisoformat(completion_dates[i]) if completion_dates[i] else None
+        )
         thing, location = create_test_well(well_completed_on=completed_on)
         context.test_wells.append(thing)
 
@@ -197,7 +215,9 @@ def step_given_wells_with_null_completion(context: Context, null_count: int):
     pass
 
 
-@given("wells exist with completion dates from {start_year:Number} to {end_year:Number}")
+@given(
+    "wells exist with completion dates from {start_year:Number} to {end_year:Number}"
+)
 def step_given_wells_date_range(context: Context, start_year: int, end_year: int):
     """Create wells with completion dates across range."""
     context.test_wells = []
@@ -213,7 +233,7 @@ def step_given_wells_specific_years(context: Context, years: str):
     """Create wells with specific completion years."""
     context.test_wells = []
 
-    year_list = [int(y.strip()) for y in years.split(',')]
+    year_list = [int(y.strip()) for y in years.split(",")]
 
     for year in year_list:
         thing, location = create_test_well(well_completed_on=date(year, 6, 15))
@@ -223,7 +243,7 @@ def step_given_wells_specific_years(context: Context, years: str):
 @given("some wells have null well_completed_on")
 def step_given_some_wells_null(context: Context):
     """Add wells without completion dates."""
-    if not hasattr(context, 'test_wells'):
+    if not hasattr(context, "test_wells"):
         context.test_wells = []
 
     for i in range(2):
@@ -234,10 +254,18 @@ def step_given_some_wells_null(context: Context):
 @given("that well's location has")
 def step_given_well_location_has_table(context: Context):
     """Set legacy dates on the well's location."""
-    data = {row['field']: row['value'] for row in context.table}
+    data = {row["field"]: row["value"] for row in context.table}
 
-    legacy_date_created = date.fromisoformat(data.get('legacy_date_created')) if data.get('legacy_date_created') else None
-    inventoried_on = date.fromisoformat(data.get('inventoried_on')) if data.get('inventoried_on') else None
+    legacy_date_created = (
+        date.fromisoformat(data.get("legacy_date_created"))
+        if data.get("legacy_date_created")
+        else None
+    )
+    inventoried_on = (
+        date.fromisoformat(data.get("inventoried_on"))
+        if data.get("inventoried_on")
+        else None
+    )
 
     with session_ctx() as session:
         location = session.get(Location, context.test_well_location.id)
@@ -255,11 +283,11 @@ def step_given_count_locations_migrated(context: Context, count: int):
 
     for i in range(count):
         # 9% have inventoried_on
-        has_inventory = (i < count * 0.09)
+        has_inventory = i < count * 0.09
 
         location = create_test_location(
             legacy_date_created=date(2014, 1, i % 28 + 1),
-            inventoried_on=date(2003, 1, i % 28 + 1) if has_inventory else None
+            inventoried_on=date(2003, 1, i % 28 + 1) if has_inventory else None,
         )
         context.test_locations.append(location)
 
@@ -277,7 +305,7 @@ def step_given_count_wells_migrated(context: Context, count: int):
 
     for i in range(count):
         # 30% have completion dates
-        has_completion = (i < count * 0.30)
+        has_completion = i < count * 0.30
 
         thing, location = create_test_well(
             well_completed_on=date(2000 + (i % 24), 1, 1) if has_completion else None
@@ -295,8 +323,7 @@ def step_given_completion_count(context: Context, count: int):
 def step_given_location_migrated_with_dates(context: Context):
     """Create location with both legacy dates."""
     location = create_test_location(
-        legacy_date_created=date(2014, 4, 3),
-        inventoried_on=date(2002, 12, 10)
+        legacy_date_created=date(2014, 4, 3), inventoried_on=date(2002, 12, 10)
     )
     context.test_location = location
 
@@ -319,6 +346,7 @@ def step_given_well_null_completion(context: Context):
 
 # WHEN steps
 
+
 @when("I retrieve that location via the API")
 def step_when_retrieve_location_api(context: Context):
     """Retrieve location via GET API."""
@@ -335,7 +363,9 @@ def step_when_get_all_locations(context: Context):
     context.locations_response = response.json()
 
 
-@when('I filter locations where inventoried_on is between "{start_date}" and "{end_date}"')
+@when(
+    'I filter locations where inventoried_on is between "{start_date}" and "{end_date}"'
+)
 def step_when_filter_locations(context: Context, start_date: str, end_date: str):
     """Filter locations by date range."""
     # Since API may not support this yet, query database directly
@@ -343,10 +373,11 @@ def step_when_filter_locations(context: Context, start_date: str, end_date: str)
         start = date.fromisoformat(start_date)
         end = date.fromisoformat(end_date)
 
-        locations = session.query(Location).filter(
-            Location.inventoried_on >= start,
-            Location.inventoried_on <= end
-        ).all()
+        locations = (
+            session.query(Location)
+            .filter(Location.inventoried_on >= start, Location.inventoried_on <= end)
+            .all()
+        )
 
         context.filtered_locations = locations
 
@@ -356,9 +387,9 @@ def step_when_query_by_legacy_date(context: Context, target_date: str):
     """Query locations by legacy_date_created."""
     with session_ctx() as session:
         target = date.fromisoformat(target_date)
-        locations = session.query(Location).filter(
-            Location.legacy_date_created == target
-        ).all()
+        locations = (
+            session.query(Location).filter(Location.legacy_date_created == target).all()
+        )
         context.queried_locations = locations
 
 
@@ -378,18 +409,24 @@ def step_when_get_all_wells(context: Context):
     context.wells_response = response.json()
 
 
-@when('I filter wells where well_completed_on is between "{start_date}" and "{end_date}"')
+@when(
+    'I filter wells where well_completed_on is between "{start_date}" and "{end_date}"'
+)
 def step_when_filter_wells(context: Context, start_date: str, end_date: str):
     """Filter wells by completion date range."""
     with session_ctx() as session:
         start = date.fromisoformat(start_date)
         end = date.fromisoformat(end_date)
 
-        wells = session.query(Thing).filter(
-            Thing.thing_type == "water well",
-            Thing.well_completed_on >= start,
-            Thing.well_completed_on <= end
-        ).all()
+        wells = (
+            session.query(Thing)
+            .filter(
+                Thing.thing_type == "water well",
+                Thing.well_completed_on >= start,
+                Thing.well_completed_on <= end,
+            )
+            .all()
+        )
 
         context.filtered_wells = wells
 
@@ -398,9 +435,12 @@ def step_when_filter_wells(context: Context, start_date: str, end_date: str):
 def step_when_get_wells_sorted(context: Context):
     """Get wells sorted by completion date."""
     with session_ctx() as session:
-        wells = session.query(Thing).filter(
-            Thing.thing_type == "water well"
-        ).order_by(Thing.well_completed_on.asc().nullslast()).all()
+        wells = (
+            session.query(Thing)
+            .filter(Thing.thing_type == "water well")
+            .order_by(Thing.well_completed_on.asc().nullslast())
+            .all()
+        )
 
         context.sorted_wells = wells
 
@@ -461,6 +501,7 @@ def step_when_retrieve_well(context: Context):
 
 # THEN steps
 
+
 @then('the response should include legacy_date_created as "{expected_date}"')
 def step_then_legacy_date_created(context: Context, expected_date: str):
     """Assert legacy_date_created matches."""
@@ -492,8 +533,9 @@ def step_then_time_gap_years(context: Context, years: str):
 
     expected_years = float(years)
     tolerance = 0.5
-    assert abs(gap_years - expected_years) < tolerance, \
-        f"Expected ~{expected_years} year gap, got {gap_years:.1f} years"
+    assert (
+        abs(gap_years - expected_years) < tolerance
+    ), f"Expected ~{expected_years} year gap, got {gap_years:.1f} years"
 
 
 @then("each location should have a legacy_date_created field")
@@ -524,24 +566,27 @@ def step_then_some_null_inventory(context: Context):
 def step_then_locations_in_decade(context: Context):
     """Assert filtered locations are in range."""
     for loc in context.filtered_locations:
-        assert 2000 <= loc.inventoried_on.year <= 2010, \
-            f"Location not in 2000-2010: {loc.inventoried_on}"
+        assert (
+            2000 <= loc.inventoried_on.year <= 2010
+        ), f"Location not in 2000-2010: {loc.inventoried_on}"
 
 
 @then("locations inventoried before {year:Number} should not be included")
 def step_then_locations_before_excluded(context: Context, year: int):
     """Assert no locations before year."""
     for loc in context.filtered_locations:
-        assert loc.inventoried_on.year >= year, \
-            f"Location from {loc.inventoried_on.year} should not be included"
+        assert (
+            loc.inventoried_on.year >= year
+        ), f"Location from {loc.inventoried_on.year} should not be included"
 
 
 @then("locations inventoried after {year:Number} should not be included")
 def step_then_locations_after_excluded(context: Context, year: int):
     """Assert no locations after year."""
     for loc in context.filtered_locations:
-        assert loc.inventoried_on.year <= year, \
-            f"Location from {loc.inventoried_on.year} should not be included"
+        assert (
+            loc.inventoried_on.year <= year
+        ), f"Location from {loc.inventoried_on.year} should not be included"
 
 
 @then("the response should include exactly {count:Number} locations")
@@ -556,8 +601,9 @@ def step_then_all_have_date(context: Context, expected_date: str):
     """Assert all have same date."""
     expected = date.fromisoformat(expected_date)
     for loc in context.queried_locations:
-        assert loc.legacy_date_created == expected, \
-            f"Location has {loc.legacy_date_created}, expected {expected}"
+        assert (
+            loc.legacy_date_created == expected
+        ), f"Location has {loc.legacy_date_created}, expected {expected}"
 
 
 @then('the response should include well_completed_on as "{expected_date}"')
@@ -610,8 +656,9 @@ def step_then_percentage_populated(context: Context, percentage: int):
     actual_pct = (populated / total) * 100
 
     tolerance = 10
-    assert abs(actual_pct - percentage) < tolerance, \
-        f"Expected ~{percentage}%, got {actual_pct:.1f}%"
+    assert (
+        abs(actual_pct - percentage) < tolerance
+    ), f"Expected ~{percentage}%, got {actual_pct:.1f}%"
 
 
 @then("the response should only include wells completed in that decade")
@@ -650,11 +697,13 @@ def step_then_nulls_last(context: Context):
     """Assert nulls at end."""
     first_null_idx = next(
         (i for i, w in enumerate(context.sorted_wells) if w.well_completed_on is None),
-        len(context.sorted_wells)
+        len(context.sorted_wells),
     )
 
     for well in context.sorted_wells[first_null_idx:]:
-        assert well.well_completed_on is None, "Found non-null after null in sorted list"
+        assert (
+            well.well_completed_on is None
+        ), "Found non-null after null in sorted list"
 
 
 @then('the well should have well_completed_on as "{expected_date}"')
@@ -680,15 +729,21 @@ def step_then_location_has_inventory(context: Context, expected_date: str):
     assert actual == expected_date, f"Expected {expected_date}, got {actual}"
 
 
-@then("the temporal sequence should be: well_completed_on → inventoried_on → legacy_date_created")
+@then(
+    "the temporal sequence should be: well_completed_on → inventoried_on → legacy_date_created"
+)
 def step_then_temporal_sequence(context: Context):
     """Assert temporal order."""
     well_completed = context.retrieved_well.well_completed_on
     inventoried = context.retrieved_location.inventoried_on
     legacy_created = context.retrieved_location.legacy_date_created
 
-    assert well_completed < inventoried, "Well should be completed before site inventoried"
-    assert inventoried < legacy_created, "Site should be inventoried before DB record created"
+    assert (
+        well_completed < inventoried
+    ), "Well should be completed before site inventoried"
+    assert (
+        inventoried < legacy_created
+    ), "Site should be inventoried before DB record created"
 
 
 @then("the timeline should show: {year1:Number} → {year2:Number} → {year3:Number}")
@@ -707,8 +762,9 @@ def step_then_percentage_inventory(context: Context, percentage: int):
     actual_pct = (populated / total) * 100
 
     tolerance = 2
-    assert abs(actual_pct - percentage) < tolerance, \
-        f"Expected ~{percentage}%, got {actual_pct:.1f}%"
+    assert (
+        abs(actual_pct - percentage) < tolerance
+    ), f"Expected ~{percentage}%, got {actual_pct:.1f}%"
 
 
 @then("{percentage:Number}% should have non-null legacy_date_created")
@@ -719,8 +775,9 @@ def step_then_percentage_legacy(context: Context, percentage: int):
     actual_pct = (populated / total) * 100
 
     tolerance = 2
-    assert abs(actual_pct - percentage) < tolerance, \
-        f"Expected ~{percentage}%, got {actual_pct:.1f}%"
+    assert (
+        abs(actual_pct - percentage) < tolerance
+    ), f"Expected ~{percentage}%, got {actual_pct:.1f}%"
 
 
 @then("{percentage:Number}% should have non-null well_completed_on")
@@ -731,8 +788,9 @@ def step_then_percentage_completion(context: Context, percentage: int):
     actual_pct = (populated / total) * 100
 
     tolerance = 2
-    assert abs(actual_pct - percentage) < tolerance, \
-        f"Expected ~{percentage}%, got {actual_pct:.1f}%"
+    assert (
+        abs(actual_pct - percentage) < tolerance
+    ), f"Expected ~{percentage}%, got {actual_pct:.1f}%"
 
 
 @then("it should have created_at (new system timestamp from migration)")
@@ -756,9 +814,9 @@ def step_then_has_inventory_date(context: Context):
 @then("all three timestamps should be independently queryable")
 def step_then_all_queryable(context: Context):
     """Assert all fields are queryable."""
-    assert hasattr(context.retrieved_location, 'created_at')
-    assert hasattr(context.retrieved_location, 'legacy_date_created')
-    assert hasattr(context.retrieved_location, 'inventoried_on')
+    assert hasattr(context.retrieved_location, "created_at")
+    assert hasattr(context.retrieved_location, "legacy_date_created")
+    assert hasattr(context.retrieved_location, "inventoried_on")
 
 
 @then("created_at should be a recent timestamp")
@@ -803,17 +861,17 @@ def step_then_no_error(context: Context):
 @then("well_completed_on should be null")
 def step_then_completion_null(context: Context):
     """Assert well_completed_on is null."""
-    if hasattr(context, 'retrieved_thing'):
+    if hasattr(context, "retrieved_thing"):
         assert context.retrieved_thing.well_completed_on is None
-    elif hasattr(context, 'retrieved_well'):
+    elif hasattr(context, "retrieved_well"):
         assert context.retrieved_well.well_completed_on is None
 
 
 @then("the field should exist in the response schema")
 def step_then_field_exists_in_schema(context: Context):
     """Assert field exists in schema."""
-    if hasattr(context, 'retrieved_thing'):
-        assert hasattr(context.retrieved_thing, 'well_completed_on')
+    if hasattr(context, "retrieved_thing"):
+        assert hasattr(context.retrieved_thing, "well_completed_on")
 
 
 @then("it should not cause validation errors")

From ac04b26af2638ed7a59d06a70942437efaca7537 Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 26 Nov 2025 17:13:06 -0800
Subject: [PATCH 12/66] Add features that describe post-migration behaviors

---
 ...st-migration-legacy-data-retrieval.feature | 172 ++++
 .../steps/post_migration_legacy_data.py       | 837 ++++++++++++++++++
 2 files changed, 1009 insertions(+)
 create mode 100644 tests/features/post-migration-legacy-data-retrieval.feature
 create mode 100644 tests/features/steps/post_migration_legacy_data.py

diff --git a/tests/features/post-migration-legacy-data-retrieval.feature b/tests/features/post-migration-legacy-data-retrieval.feature
new file mode 100644
index 000000000..69d2c5506
--- /dev/null
+++ b/tests/features/post-migration-legacy-data-retrieval.feature
@@ -0,0 +1,172 @@
+Feature: Post-Migration Legacy Data Retrieval
+  As a data manager
+  After migrating data from AMPAPI to NMSampleLocations
+  I want to verify that all legacy temporal information is preserved and queryable
+  So that no historical context is lost
+
+  Background:
+    Given a functioning api
+    And the AMPAPI data has been migrated to the database
+
+  # Location Legacy Date Lookups
+
+  Scenario: Retrieve location with both legacy dates via API
+    Given a location exists with:
+      | field                | value      |
+      | legacy_date_created  | 2014-04-03 |
+      | inventoried_on       | 2002-12-10 |
+    When I retrieve that location via the API
+    Then the response should include legacy_date_created as "2014-04-03"
+    And the response should include inventoried_on as "2002-12-10"
+    And the time gap should be approximately 11.3 years
+
+  Scenario: Retrieve location with large time gap (54 years)
+    Given a location exists with:
+      | field                | value      |
+      | legacy_date_created  | 2008-05-28 |
+      | inventoried_on       | 1954-05-01 |
+    When I retrieve that location via the API
+    Then the response should include legacy_date_created as "2008-05-28"
+    And the response should include inventoried_on as "2002-12-10"
+    And the time gap should be approximately 54 years
+
+  Scenario: List all locations includes legacy date fields
+    Given 5 locations exist with various legacy dates
+    When I GET /location to list all locations
+    Then each location should have a legacy_date_created field
+    And each location should have an inventoried_on field
+    And some locations should have null inventoried_on
+
+  Scenario: Filter locations by inventory date range
+    Given locations exist with inventoried_on ranging from 1950 to 2024
+    When I filter locations where inventoried_on is between "2000-01-01" and "2010-12-31"
+    Then the response should only include locations inventoried in that decade
+    And locations inventoried before 2000 should not be included
+    And locations inventoried after 2010 should not be included
+
+  Scenario: Query location by legacy_date_created
+    Given 3 locations exist with legacy_date_created "2014-04-03"
+    And 2 locations exist with legacy_date_created "2017-12-06"
+    When I query for locations with legacy_date_created "2014-04-03"
+    Then the response should include exactly 3 locations
+    And all should have legacy_date_created "2014-04-03"
+
+  # Well Completion Date Lookups
+
+  Scenario: Retrieve well with completion date via API
+    Given a well exists with well_completed_on "2004-08-08"
+    When I retrieve that well via the API
+    Then the response should include well_completed_on as "2004-08-08"
+    And the well age should be calculable
+
+  Scenario: Retrieve old well from early 1900s
+    Given a well exists with well_completed_on "1936-01-01"
+    When I retrieve that well via the API
+    Then the response should include well_completed_on as "1936-01-01"
+    And the well should be over 88 years old
+
+  Scenario: List all wells includes completion date field
+    Given 10 wells exist with various completion dates
+    And 3 of those wells have null well_completed_on
+    When I GET /thing/water-well to list all wells
+    Then each well should have a well_completed_on field
+    And 70% of wells should have well_completed_on populated
+
+  Scenario: Filter wells by completion date range
+    Given wells exist with completion dates from 1936 to 2024
+    When I filter wells where well_completed_on is between "2000-01-01" and "2010-12-31"
+    Then the response should only include wells completed in that decade
+    And wells from 1936 should not be included
+    And wells from 2020 should not be included
+
+  Scenario: Sort wells by completion date (oldest first)
+    Given wells exist with completion dates: 1936, 1965, 2004, 2020
+    And some wells have null well_completed_on
+    When I GET /thing/water-well sorted by well_completed_on ascending
+    Then the first well should be from 1936
+    And the last well with a date should be from 2020
+    And wells without completion dates should appear last
+
+  # Combined Queries - Location + Well Legacy Dates
+
+  Scenario: Retrieve well with location showing all legacy dates
+    Given a well exists with well_completed_on "2004-08-08"
+    And that well's location has:
+      | field                | value      |
+      | legacy_date_created  | 2014-04-03 |
+      | inventoried_on       | 2002-12-10 |
+    When I retrieve the well via the API
+    Then the well should have well_completed_on as "2004-08-08"
+    And the current_location should include legacy_date_created as "2014-04-03"
+    And the current_location should include inventoried_on as "2002-12-10"
+
+  Scenario: Timeline reconstruction - well completed before site inventoried
+    Given a well exists with well_completed_on "1995-06-15"
+    And that well's location has:
+      | field                | value      |
+      | inventoried_on       | 2003-12-10 |
+      | legacy_date_created  | 2014-04-03 |
+    When I retrieve the well and its location
+    Then the temporal sequence should be: well_completed_on → inventoried_on → legacy_date_created
+    And the timeline should show: 1995 → 2003 → 2014
+
+  # Data Quality Validation
+
+  Scenario: Verify migration preserved expected percentage of legacy dates
+    Given 100 locations were migrated
+    And 9 of them had non-null SiteDate in AMPAPI
+    When I query the migrated locations
+    Then 9% should have non-null inventoried_on
+    And 100% should have non-null legacy_date_created
+
+  Scenario: Verify well completion date coverage matches expectation
+    Given 100 wells were migrated
+    And 30 of them had non-null CompletionDate in AMPAPI
+    When I query the migrated wells
+    Then 30% should have non-null well_completed_on
+
+  # Audit Trail Verification
+
+  Scenario: Legacy dates preserved alongside audit timestamps
+    Given a location was migrated with legacy dates
+    When I retrieve that location
+    Then it should have created_at (new system timestamp from migration)
+    And it should have legacy_date_created (original AMPAPI DateCreated)
+    And it should have inventoried_on (original AMPAPI SiteDate)
+    And all three timestamps should be independently queryable
+    And created_at should be a recent timestamp
+    And legacy_date_created should be an older date
+
+  # Edge Cases
+
+  Scenario: Location where SiteDate is later than DateCreated (data anomaly)
+    Given a location exists with:
+      | field                | value      |
+      | legacy_date_created  | 2010-01-15 |
+      | inventoried_on       | 2015-06-20 |
+    When I retrieve that location
+    Then legacy_date_created should be "2010-01-15"
+    And inventoried_on should be "2015-06-20"
+    And the system should accept this without error
+
+  Scenario: Spring does not use well_completed_on field
+    Given a thing of type "spring" exists
+    When I retrieve that spring
+    Then well_completed_on should be null
+    And the field should exist in the response schema
+    And it should not cause validation errors
+
+  Scenario: Location with only legacy_date_created (no inventoried_on)
+    Given a location exists with:
+      | field                | value      |
+      | legacy_date_created  | 2014-10-17 |
+      | inventoried_on       | null       |
+    When I retrieve that location
+    Then legacy_date_created should be "2014-10-17"
+    And inventoried_on should be null
+
+  Scenario: Well without completion date
+    Given a well exists with well_completed_on null
+    When I retrieve that well
+    Then well_completed_on should be null
+    And the well should still be valid
diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py
new file mode 100644
index 000000000..dca15d638
--- /dev/null
+++ b/tests/features/steps/post_migration_legacy_data.py
@@ -0,0 +1,837 @@
+# ===============================================================================
+# Copyright 2025 ross
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===============================================================================
+from datetime import date, datetime
+from behave import given, when, then, register_type
+from behave.runner import Context
+import parse
+
+from db import Location, Thing, LocationThingAssociation
+from db.engine import session_ctx
+
+
+# Custom type parsers
+@parse.with_pattern(r"\d+")
+def parse_number(text):
+    return int(text)
+
+register_type(Number=parse_number)
+
+
+def create_test_location(legacy_date_created=None, inventoried_on=None):
+    """Helper to create a test location with legacy dates."""
+    with session_ctx() as session:
+        location = Location(
+            point="POINT(-106.607784 35.118924)",
+            elevation=1558.8,
+            release_status="public",
+            legacy_date_created=legacy_date_created,
+            inventoried_on=inventoried_on,
+        )
+        session.add(location)
+        session.commit()
+        session.refresh(location)
+        return location
+
+
+def create_test_well(well_completed_on=None, thing_type="water well"):
+    """Helper to create a test well with completion date."""
+    with session_ctx() as session:
+        # Create location
+        location = Location(
+            point="POINT(-106.607784 35.118924)",
+            elevation=1558.8,
+            release_status="public",
+        )
+        session.add(location)
+        session.commit()
+
+        # Create thing
+        thing = Thing(
+            name=f"Test-{thing_type}-{datetime.now().timestamp()}",
+            first_visit_date="2023-03-03",
+            thing_type=thing_type,
+            release_status="public",
+            well_depth=100.0 if thing_type == "water well" else None,
+            hole_depth=110.0 if thing_type == "water well" else None,
+            well_completed_on=well_completed_on,
+        )
+        session.add(thing)
+        session.commit()
+
+        # Associate
+        assoc = LocationThingAssociation(location=location, thing=thing)
+        assoc.effective_start = "2000-01-01T00:00:00Z"
+        session.add(assoc)
+        session.commit()
+
+        session.refresh(thing)
+        session.refresh(location)
+        return thing, location
+
+
+@given("the AMPAPI data has been migrated to the database")
+def step_given_data_migrated(context: Context):
+    """Assumption that migration has occurred."""
+    context.migrated = True
+
+
+@given("a location exists with")
+def step_given_location_with_table(context: Context):
+    """Create location with fields from table."""
+    data = {row['field']: row['value'] for row in context.table}
+
+    legacy_date_created = date.fromisoformat(data['legacy_date_created']) if data.get('legacy_date_created') and data['legacy_date_created'] != 'null' else None
+    inventoried_on = date.fromisoformat(data['inventoried_on']) if data.get('inventoried_on') and data['inventoried_on'] != 'null' else None
+
+    location = create_test_location(
+        legacy_date_created=legacy_date_created,
+        inventoried_on=inventoried_on
+    )
+
+    context.test_location = location
+    context.test_location_id = location.id
+
+
+@given("{count:Number} locations exist with various legacy dates")
+def step_given_multiple_locations(context: Context, count: int):
+    """Create multiple locations with various legacy dates."""
+    context.test_locations = []
+
+    test_data = [
+        ("2014-04-03", "2002-12-10"),
+        ("2014-04-03", "2003-01-07"),
+        ("2017-12-06", "2003-12-11"),
+        ("2008-05-28", "1954-05-01"),
+        ("2020-01-15", None),
+    ]
+
+    for i in range(min(count, len(test_data))):
+        legacy_date, inventory_date = test_data[i]
+        location = create_test_location(
+            legacy_date_created=date.fromisoformat(legacy_date),
+            inventoried_on=date.fromisoformat(inventory_date) if inventory_date else None
+        )
+        context.test_locations.append(location)
+
+
+@given("locations exist with inventoried_on ranging from {start_year:Number} to {end_year:Number}")
+def step_given_locations_date_range(context: Context, start_year: int, end_year: int):
+    """Create locations with inventoried_on across a date range."""
+    context.test_locations = []
+
+    years = [1954, 2002, 2003, 2010, 2015, 2020, 2024]
+    for year in years:
+        location = create_test_location(
+            legacy_date_created=date(year + 5, 1, 1),  # Always 5 years after inventory
+            inventoried_on=date(year, 6, 15)
+        )
+        context.test_locations.append(location)
+
+
+@given('{count:Number} locations exist with legacy_date_created "{target_date}"')
+def step_given_locations_with_specific_date(context: Context, count: int, target_date: str):
+    """Create locations with specific legacy_date_created."""
+    if not hasattr(context, 'test_locations'):
+        context.test_locations = []
+
+    target = date.fromisoformat(target_date)
+
+    for i in range(count):
+        location = create_test_location(
+            legacy_date_created=target,
+            inventoried_on=date(2000 + i, 1, 1)  # Vary the inventory dates
+        )
+        context.test_locations.append(location)
+
+
+@given('a well exists with well_completed_on "{completion_date}"')
+def step_given_well_with_completion(context: Context, completion_date: str):
+    """Create well with completion date."""
+    completed_on = date.fromisoformat(completion_date) if completion_date != 'null' else None
+
+    thing, location = create_test_well(well_completed_on=completed_on)
+
+    context.test_well = thing
+    context.test_well_id = thing.id
+    context.test_well_location = location
+
+
+@given("{count:Number} wells exist with various completion dates")
+def step_given_multiple_wells(context: Context, count: int):
+    """Create multiple wells with various completion dates."""
+    context.test_wells = []
+
+    completion_dates = [
+        "1936-01-01",
+        "1965-06-15",
+        "2004-08-08",
+        "2020-05-15",
+        None,  # No completion date
+        None,
+        None,
+    ]
+
+    for i in range(min(count, len(completion_dates))):
+        completed_on = date.fromisoformat(completion_dates[i]) if completion_dates[i] else None
+        thing, location = create_test_well(well_completed_on=completed_on)
+        context.test_wells.append(thing)
+
+
+@given("{null_count:Number} of those wells have null well_completed_on")
+def step_given_wells_with_null_completion(context: Context, null_count: int):
+    """Verify expected number of nulls (declarative - already created)."""
+    # Wells were created in previous step with nulls
+    pass
+
+
+@given("wells exist with completion dates from {start_year:Number} to {end_year:Number}")
+def step_given_wells_date_range(context: Context, start_year: int, end_year: int):
+    """Create wells with completion dates across range."""
+    context.test_wells = []
+
+    years = [1936, 1965, 2004, 2010, 2020, 2024]
+    for year in years:
+        thing, location = create_test_well(well_completed_on=date(year, 6, 15))
+        context.test_wells.append(thing)
+
+
+@given("wells exist with completion dates: {years}")
+def step_given_wells_specific_years(context: Context, years: str):
+    """Create wells with specific completion years."""
+    context.test_wells = []
+
+    year_list = [int(y.strip()) for y in years.split(',')]
+
+    for year in year_list:
+        thing, location = create_test_well(well_completed_on=date(year, 6, 15))
+        context.test_wells.append(thing)
+
+
+@given("some wells have null well_completed_on")
+def step_given_some_wells_null(context: Context):
+    """Add wells without completion dates."""
+    if not hasattr(context, 'test_wells'):
+        context.test_wells = []
+
+    for i in range(2):
+        thing, location = create_test_well(well_completed_on=None)
+        context.test_wells.append(thing)
+
+
+@given("that well's location has")
+def step_given_well_location_has_table(context: Context):
+    """Set legacy dates on the well's location."""
+    data = {row['field']: row['value'] for row in context.table}
+
+    legacy_date_created = date.fromisoformat(data.get('legacy_date_created')) if data.get('legacy_date_created') else None
+    inventoried_on = date.fromisoformat(data.get('inventoried_on')) if data.get('inventoried_on') else None
+
+    with session_ctx() as session:
+        location = session.get(Location, context.test_well_location.id)
+        location.legacy_date_created = legacy_date_created
+        location.inventoried_on = inventoried_on
+        session.commit()
+        session.refresh(location)
+        context.test_well_location = location
+
+
+@given("{count:Number} locations were migrated")
+def step_given_count_locations_migrated(context: Context, count: int):
+    """Create specified number of test locations."""
+    context.test_locations = []
+
+    for i in range(count):
+        # 9% have inventoried_on
+        has_inventory = (i < count * 0.09)
+
+        location = create_test_location(
+            legacy_date_created=date(2014, 1, i % 28 + 1),
+            inventoried_on=date(2003, 1, i % 28 + 1) if has_inventory else None
+        )
+        context.test_locations.append(location)
+
+
+@given("{count:Number} of them had non-null SiteDate in AMPAPI")
+def step_given_sitedate_count(context: Context, count: int):
+    """Declarative - data created in previous step."""
+    pass
+
+
+@given("{count:Number} wells were migrated")
+def step_given_count_wells_migrated(context: Context, count: int):
+    """Create specified number of test wells."""
+    context.test_wells = []
+
+    for i in range(count):
+        # 30% have completion dates
+        has_completion = (i < count * 0.30)
+
+        thing, location = create_test_well(
+            well_completed_on=date(2000 + (i % 24), 1, 1) if has_completion else None
+        )
+        context.test_wells.append(thing)
+
+
+@given("{count:Number} of them had non-null CompletionDate in AMPAPI")
+def step_given_completion_count(context: Context, count: int):
+    """Declarative - data created in previous step."""
+    pass
+
+
+@given("a location was migrated with legacy dates")
+def step_given_location_migrated_with_dates(context: Context):
+    """Create location with both legacy dates."""
+    location = create_test_location(
+        legacy_date_created=date(2014, 4, 3),
+        inventoried_on=date(2002, 12, 10)
+    )
+    context.test_location = location
+
+
+@given('a thing of type "{thing_type}" exists')
+def step_given_thing_of_type(context: Context, thing_type: str):
+    """Create a thing of specified type."""
+    thing, location = create_test_well(well_completed_on=None, thing_type=thing_type)
+    context.test_thing = thing
+    context.test_thing_id = thing.id
+
+
+@given("a well exists with well_completed_on null")
+def step_given_well_null_completion(context: Context):
+    """Create well without completion date."""
+    thing, location = create_test_well(well_completed_on=None)
+    context.test_well = thing
+    context.test_well_id = thing.id
+
+
+# WHEN steps
+
+@when("I retrieve that location via the API")
+def step_when_retrieve_location_api(context: Context):
+    """Retrieve location via GET API."""
+    response = context.client.get(f"/location/{context.test_location_id}")
+    assert response.status_code == 200
+    context.location_response = response.json()
+
+
+@when("I GET /location to list all locations")
+def step_when_get_all_locations(context: Context):
+    """Get all locations."""
+    response = context.client.get("/location")
+    assert response.status_code == 200
+    context.locations_response = response.json()
+
+
+@when('I filter locations where inventoried_on is between "{start_date}" and "{end_date}"')
+def step_when_filter_locations(context: Context, start_date: str, end_date: str):
+    """Filter locations by date range."""
+    # Since API may not support this yet, query database directly
+    with session_ctx() as session:
+        start = date.fromisoformat(start_date)
+        end = date.fromisoformat(end_date)
+
+        locations = session.query(Location).filter(
+            Location.inventoried_on >= start,
+            Location.inventoried_on <= end
+        ).all()
+
+        context.filtered_locations = locations
+
+
+@when('I query for locations with legacy_date_created "{target_date}"')
+def step_when_query_by_legacy_date(context: Context, target_date: str):
+    """Query locations by legacy_date_created."""
+    with session_ctx() as session:
+        target = date.fromisoformat(target_date)
+        locations = session.query(Location).filter(
+            Location.legacy_date_created == target
+        ).all()
+        context.queried_locations = locations
+
+
+@when("I retrieve that well via the API")
+def step_when_retrieve_well_api(context: Context):
+    """Retrieve well via GET API."""
+    response = context.client.get(f"/thing/water-well/{context.test_well_id}")
+    assert response.status_code == 200
+    context.well_response = response.json()
+
+
+@when("I GET /thing/water-well to list all wells")
+def step_when_get_all_wells(context: Context):
+    """Get all wells."""
+    response = context.client.get("/thing/water-well")
+    assert response.status_code == 200
+    context.wells_response = response.json()
+
+
+@when('I filter wells where well_completed_on is between "{start_date}" and "{end_date}"')
+def step_when_filter_wells(context: Context, start_date: str, end_date: str):
+    """Filter wells by completion date range."""
+    with session_ctx() as session:
+        start = date.fromisoformat(start_date)
+        end = date.fromisoformat(end_date)
+
+        wells = session.query(Thing).filter(
+            Thing.thing_type == "water well",
+            Thing.well_completed_on >= start,
+            Thing.well_completed_on <= end
+        ).all()
+
+        context.filtered_wells = wells
+
+
+@when("I GET /thing/water-well sorted by well_completed_on ascending")
+def step_when_get_wells_sorted(context: Context):
+    """Get wells sorted by completion date."""
+    with session_ctx() as session:
+        wells = session.query(Thing).filter(
+            Thing.thing_type == "water well"
+        ).order_by(Thing.well_completed_on.asc().nullslast()).all()
+
+        context.sorted_wells = wells
+
+
+@when("I retrieve the well and its location")
+def step_when_retrieve_well_and_location(context: Context):
+    """Retrieve well with location."""
+    with session_ctx() as session:
+        well = session.get(Thing, context.test_well.id)
+        location = session.get(Location, context.test_well_location.id)
+
+        context.retrieved_well = well
+        context.retrieved_location = location
+
+
+@when("I query the migrated locations")
+def step_when_query_migrated_locations(context: Context):
+    """Query all test locations."""
+    with session_ctx() as session:
+        # Query only our test locations
+        location_ids = [loc.id for loc in context.test_locations]
+        locations = session.query(Location).filter(Location.id.in_(location_ids)).all()
+        context.queried_locations = locations
+
+
+@when("I query the migrated wells")
+def step_when_query_migrated_wells(context: Context):
+    """Query all test wells."""
+    with session_ctx() as session:
+        well_ids = [well.id for well in context.test_wells]
+        wells = session.query(Thing).filter(Thing.id.in_(well_ids)).all()
+        context.queried_wells = wells
+
+
+@when("I retrieve that location")
+def step_when_retrieve_location(context: Context):
+    """Retrieve location by ID."""
+    with session_ctx() as session:
+        location = session.get(Location, context.test_location.id)
+        context.retrieved_location = location
+
+
+@when("I retrieve that spring")
+def step_when_retrieve_spring(context: Context):
+    """Retrieve spring/thing by ID."""
+    with session_ctx() as session:
+        thing = session.get(Thing, context.test_thing.id)
+        context.retrieved_thing = thing
+
+
+@when("I retrieve that well")
+def step_when_retrieve_well(context: Context):
+    """Retrieve well by ID."""
+    with session_ctx() as session:
+        well = session.get(Thing, context.test_well.id)
+        context.retrieved_well = well
+
+
+# THEN steps
+
+@then('the response should include legacy_date_created as "{expected_date}"')
+def step_then_legacy_date_created(context: Context, expected_date: str):
+    """Assert legacy_date_created matches."""
+    actual = context.location_response.get("legacy_date_created")
+    assert actual == expected_date, f"Expected {expected_date}, got {actual}"
+
+
+@then('the response should include inventoried_on as "{expected_date}"')
+def step_then_inventoried_on(context: Context, expected_date: str):
+    """Assert inventoried_on matches."""
+    actual = context.location_response.get("inventoried_on")
+    assert actual == expected_date, f"Expected {expected_date}, got {actual}"
+
+
+@then("the time gap should be approximately {years} years")
+def step_then_time_gap_years(context: Context, years: str):
+    """Assert approximate year gap."""
+    legacy_str = context.location_response.get("legacy_date_created")
+    inventory_str = context.location_response.get("inventoried_on")
+
+    if not legacy_str or not inventory_str:
+        raise AssertionError("Missing date fields for gap calculation")
+
+    legacy_date = date.fromisoformat(legacy_str)
+    inventory_date = date.fromisoformat(inventory_str)
+
+    gap_days = (legacy_date - inventory_date).days
+    gap_years = gap_days / 365.25
+
+    expected_years = float(years)
+    tolerance = 0.5
+    assert abs(gap_years - expected_years) < tolerance, \
+        f"Expected ~{expected_years} year gap, got {gap_years:.1f} years"
+
+
+@then("each location should have a legacy_date_created field")
+def step_then_all_have_legacy_field(context: Context):
+    """Assert all locations have the field."""
+    items = context.locations_response.get("items", [])
+    for item in items:
+        assert "legacy_date_created" in item, f"Location missing legacy_date_created"
+
+
+@then("each location should have an inventoried_on field")
+def step_then_all_have_inventory_field(context: Context):
+    """Assert all locations have the field."""
+    items = context.locations_response.get("items", [])
+    for item in items:
+        assert "inventoried_on" in item, f"Location missing inventoried_on"
+
+
+@then("some locations should have null inventoried_on")
+def step_then_some_null_inventory(context: Context):
+    """Assert some locations have null."""
+    items = context.locations_response.get("items", [])
+    null_count = sum(1 for item in items if item.get("inventoried_on") is None)
+    assert null_count > 0, "Expected at least one location with null inventoried_on"
+
+
+@then("the response should only include locations inventoried in that decade")
+def step_then_locations_in_decade(context: Context):
+    """Assert filtered locations are in range."""
+    for loc in context.filtered_locations:
+        assert 2000 <= loc.inventoried_on.year <= 2010, \
+            f"Location not in 2000-2010: {loc.inventoried_on}"
+
+
+@then("locations inventoried before {year:Number} should not be included")
+def step_then_locations_before_excluded(context: Context, year: int):
+    """Assert no locations before year."""
+    for loc in context.filtered_locations:
+        assert loc.inventoried_on.year >= year, \
+            f"Location from {loc.inventoried_on.year} should not be included"
+
+
+@then("locations inventoried after {year:Number} should not be included")
+def step_then_locations_after_excluded(context: Context, year: int):
+    """Assert no locations after year."""
+    for loc in context.filtered_locations:
+        assert loc.inventoried_on.year <= year, \
+            f"Location from {loc.inventoried_on.year} should not be included"
+
+
+@then("the response should include exactly {count:Number} locations")
+def step_then_exact_count_locations(context: Context, count: int):
+    """Assert exact count."""
+    actual = len(context.queried_locations)
+    assert actual == count, f"Expected {count} locations, got {actual}"
+
+
+@then('all should have legacy_date_created "{expected_date}"')
+def step_then_all_have_date(context: Context, expected_date: str):
+    """Assert all have same date."""
+    expected = date.fromisoformat(expected_date)
+    for loc in context.queried_locations:
+        assert loc.legacy_date_created == expected, \
+            f"Location has {loc.legacy_date_created}, expected {expected}"
+
+
+@then('the response should include well_completed_on as "{expected_date}"')
+def step_then_well_completed_on(context: Context, expected_date: str):
+    """Assert well_completed_on matches."""
+    actual = context.well_response.get("well_completed_on")
+    assert actual == expected_date, f"Expected {expected_date}, got {actual}"
+
+
+@then("the well age should be calculable")
+def step_then_age_calculable(context: Context):
+    """Assert age can be calculated."""
+    completion_str = context.well_response.get("well_completed_on")
+    assert completion_str is not None, "Cannot calculate age without completion date"
+
+    completed = date.fromisoformat(completion_str)
+    today = date.today()
+    age_years = (today - completed).days / 365.25
+    assert age_years >= 0, "Age cannot be negative"
+
+
+@then("the well should be over {min_age:Number} years old")
+def step_then_well_over_age(context: Context, min_age: int):
+    """Assert well age exceeds minimum."""
+    completion_str = context.well_response.get("well_completed_on")
+    completed = date.fromisoformat(completion_str)
+    today = date.today()
+    age_years = (today - completed).days / 365.25
+
+    assert age_years >= min_age, f"Expected over {min_age} years, got {age_years:.1f}"
+
+
+@then("each well should have a well_completed_on field")
+def step_then_all_wells_have_field(context: Context):
+    """Assert all wells have the field."""
+    items = context.wells_response.get("items", [])
+    for item in items:
+        assert "well_completed_on" in item, f"Well missing well_completed_on"
+
+
+@then("{percentage:Number}% of wells should have well_completed_on populated")
+def step_then_percentage_populated(context: Context, percentage: int):
+    """Assert approximate percentage."""
+    items = context.wells_response.get("items", [])
+    total = len(items)
+    if total == 0:
+        return
+
+    populated = sum(1 for item in items if item.get("well_completed_on") is not None)
+    actual_pct = (populated / total) * 100
+
+    tolerance = 10
+    assert abs(actual_pct - percentage) < tolerance, \
+        f"Expected ~{percentage}%, got {actual_pct:.1f}%"
+
+
+@then("the response should only include wells completed in that decade")
+def step_then_wells_in_decade(context: Context):
+    """Assert filtered wells in range."""
+    for well in context.filtered_wells:
+        assert 2000 <= well.well_completed_on.year <= 2010
+
+
+@then("wells from {year:Number} should not be included")
+def step_then_wells_year_excluded(context: Context, year: int):
+    """Assert wells from year excluded."""
+    for well in context.filtered_wells:
+        assert well.well_completed_on.year != year
+
+
+@then("the first well should be from {year:Number}")
+def step_then_first_well_year(context: Context, year: int):
+    """Assert first well year."""
+    if context.sorted_wells and context.sorted_wells[0].well_completed_on:
+        actual_year = context.sorted_wells[0].well_completed_on.year
+        assert actual_year == year, f"Expected {year}, got {actual_year}"
+
+
+@then("the last well with a date should be from {year:Number}")
+def step_then_last_well_year(context: Context, year: int):
+    """Assert last non-null well year."""
+    non_null = [w for w in context.sorted_wells if w.well_completed_on]
+    if non_null:
+        actual_year = non_null[-1].well_completed_on.year
+        assert actual_year == year, f"Expected {year}, got {actual_year}"
+
+
+@then("wells without completion dates should appear last")
+def step_then_nulls_last(context: Context):
+    """Assert nulls at end."""
+    first_null_idx = next(
+        (i for i, w in enumerate(context.sorted_wells) if w.well_completed_on is None),
+        len(context.sorted_wells)
+    )
+
+    for well in context.sorted_wells[first_null_idx:]:
+        assert well.well_completed_on is None, "Found non-null after null in sorted list"
+
+
+@then('the well should have well_completed_on as "{expected_date}"')
+def step_then_well_has_completion(context: Context, expected_date: str):
+    """Assert well has completion date."""
+    actual = context.well_response.get("well_completed_on")
+    assert actual == expected_date, f"Expected {expected_date}, got {actual}"
+
+
+@then('the current_location should include legacy_date_created as "{expected_date}"')
+def step_then_location_has_legacy(context: Context, expected_date: str):
+    """Assert location has legacy_date_created."""
+    current_location = context.well_response.get("current_location", {})
+    actual = current_location.get("legacy_date_created")
+    assert actual == expected_date, f"Expected {expected_date}, got {actual}"
+
+
+@then('the current_location should include inventoried_on as "{expected_date}"')
+def step_then_location_has_inventory(context: Context, expected_date: str):
+    """Assert location has inventoried_on."""
+    current_location = context.well_response.get("current_location", {})
+    actual = current_location.get("inventoried_on")
+    assert actual == expected_date, f"Expected {expected_date}, got {actual}"
+
+
+@then("the temporal sequence should be: well_completed_on → inventoried_on → legacy_date_created")
+def step_then_temporal_sequence(context: Context):
+    """Assert temporal order."""
+    well_completed = context.retrieved_well.well_completed_on
+    inventoried = context.retrieved_location.inventoried_on
+    legacy_created = context.retrieved_location.legacy_date_created
+
+    assert well_completed < inventoried, "Well should be completed before site inventoried"
+    assert inventoried < legacy_created, "Site should be inventoried before DB record created"
+
+
+@then("the timeline should show: {year1:Number} → {year2:Number} → {year3:Number}")
+def step_then_timeline_years(context: Context, year1: int, year2: int, year3: int):
+    """Assert specific years in sequence."""
+    assert context.retrieved_well.well_completed_on.year == year1
+    assert context.retrieved_location.inventoried_on.year == year2
+    assert context.retrieved_location.legacy_date_created.year == year3
+
+
+@then("{percentage:Number}% should have non-null inventoried_on")
+def step_then_percentage_inventory(context: Context, percentage: int):
+    """Assert percentage with inventoried_on."""
+    total = len(context.queried_locations)
+    populated = sum(1 for loc in context.queried_locations if loc.inventoried_on)
+    actual_pct = (populated / total) * 100
+
+    tolerance = 2
+    assert abs(actual_pct - percentage) < tolerance, \
+        f"Expected ~{percentage}%, got {actual_pct:.1f}%"
+
+
+@then("{percentage:Number}% should have non-null legacy_date_created")
+def step_then_percentage_legacy(context: Context, percentage: int):
+    """Assert percentage with legacy_date_created."""
+    total = len(context.queried_locations)
+    populated = sum(1 for loc in context.queried_locations if loc.legacy_date_created)
+    actual_pct = (populated / total) * 100
+
+    tolerance = 2
+    assert abs(actual_pct - percentage) < tolerance, \
+        f"Expected ~{percentage}%, got {actual_pct:.1f}%"
+
+
+@then("{percentage:Number}% should have non-null well_completed_on")
+def step_then_percentage_completion(context: Context, percentage: int):
+    """Assert percentage with well_completed_on."""
+    total = len(context.queried_wells)
+    populated = sum(1 for well in context.queried_wells if well.well_completed_on)
+    actual_pct = (populated / total) * 100
+
+    tolerance = 2
+    assert abs(actual_pct - percentage) < tolerance, \
+        f"Expected ~{percentage}%, got {actual_pct:.1f}%"
+
+
+@then("it should have created_at (new system timestamp from migration)")
+def step_then_has_created_at(context: Context):
+    """Assert created_at exists."""
+    assert context.retrieved_location.created_at is not None
+
+
+@then("it should have legacy_date_created (original AMPAPI DateCreated)")
+def step_then_has_legacy_date(context: Context):
+    """Assert legacy_date_created exists."""
+    assert context.retrieved_location.legacy_date_created is not None
+
+
+@then("it should have inventoried_on (original AMPAPI SiteDate)")
+def step_then_has_inventory_date(context: Context):
+    """Assert inventoried_on exists."""
+    assert context.retrieved_location.inventoried_on is not None
+
+
+@then("all three timestamps should be independently queryable")
+def step_then_all_queryable(context: Context):
+    """Assert all fields are queryable."""
+    assert hasattr(context.retrieved_location, 'created_at')
+    assert hasattr(context.retrieved_location, 'legacy_date_created')
+    assert hasattr(context.retrieved_location, 'inventoried_on')
+
+
+@then("created_at should be a recent timestamp")
+def step_then_created_at_recent(context: Context):
+    """Assert created_at is recent."""
+    created_at = context.retrieved_location.created_at.replace(tzinfo=None)
+    now = datetime.utcnow()
+    diff_seconds = abs((now - created_at).total_seconds())
+    assert diff_seconds < 3600, "created_at should be within last hour"
+
+
+@then("legacy_date_created should be an older date")
+def step_then_legacy_date_older(context: Context):
+    """Assert legacy_date_created is old."""
+    legacy_date = context.retrieved_location.legacy_date_created
+    assert legacy_date.year < 2024, "legacy_date_created should be from the past"
+
+
+@then('legacy_date_created should be "{expected_date}"')
+def step_then_legacy_is(context: Context, expected_date: str):
+    """Assert legacy_date_created value."""
+    actual = context.retrieved_location.legacy_date_created
+    expected = date.fromisoformat(expected_date)
+    assert actual == expected, f"Expected {expected}, got {actual}"
+
+
+@then('inventoried_on should be "{expected_date}"')
+def step_then_inventory_is(context: Context, expected_date: str):
+    """Assert inventoried_on value."""
+    actual = context.retrieved_location.inventoried_on
+    expected = date.fromisoformat(expected_date)
+    assert actual == expected, f"Expected {expected}, got {actual}"
+
+
+@then("the system should accept this without error")
+def step_then_no_error(context: Context):
+    """Assert no errors."""
+    # If we got here, no errors
+    pass
+
+
+@then("well_completed_on should be null")
+def step_then_completion_null(context: Context):
+    """Assert well_completed_on is null."""
+    if hasattr(context, 'retrieved_thing'):
+        assert context.retrieved_thing.well_completed_on is None
+    elif hasattr(context, 'retrieved_well'):
+        assert context.retrieved_well.well_completed_on is None
+
+
+@then("the field should exist in the response schema")
+def step_then_field_exists_in_schema(context: Context):
+    """Assert field exists in schema."""
+    if hasattr(context, 'retrieved_thing'):
+        assert hasattr(context.retrieved_thing, 'well_completed_on')
+
+
+@then("it should not cause validation errors")
+def step_then_no_validation_errors(context: Context):
+    """Assert no validation errors."""
+    pass
+
+
+@then("inventoried_on should be null")
+def step_then_inventory_null(context: Context):
+    """Assert inventoried_on is null."""
+    assert context.retrieved_location.inventoried_on is None
+
+
+@then("the well should still be valid")
+def step_then_well_valid(context: Context):
+    """Assert well is valid."""
+    assert context.retrieved_well.id is not None
+
+
+# ============= EOF =============================================

From 6169c3eeab3487d650894a73c447af070ae32b94 Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Wed, 26 Nov 2025 18:28:04 -0700
Subject: [PATCH 13/66] refactor: enhance water level transfer functions by
 introducing source table variable

---
 transfers/waterlevels_transducer_transfer.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/transfers/waterlevels_transducer_transfer.py b/transfers/waterlevels_transducer_transfer.py
index f1ef30cd1..e4ce178c0 100644
--- a/transfers/waterlevels_transducer_transfer.py
+++ b/transfers/waterlevels_transducer_transfer.py
@@ -23,15 +23,19 @@
 
 
 def transfer_water_levels_acoustic(session):
-    wd = read_csv("WaterLevelsContinuous_Acoustic")
+    source_table = "WaterLevelsContinuous_Acoustic"
+    wd = read_csv(source_table)
     return _transfer_water_levels_continuous(
-        session, wd, "PublicRelease", "Acoustic Sounder"
+        session, source_table, wd, "PublicRelease", "Acoustic Sounder"
     )
 
 
 def transfer_water_levels_pressure(session):
-    wd = read_csv("WaterLevelsContinuous_Pressure")
-    return _transfer_water_levels_continuous(session, wd, "QCed", "Pressure Transducer")
+    source_table = "WaterLevelsContinuous_Pressure"
+    wd = read_csv(source_table)
+    return _transfer_water_levels_continuous(
+        session, source_table, wd, "QCed", "Pressure Transducer"
+    )
 
 
 def _find_deployment(ts, deployments):
@@ -45,7 +49,9 @@ def _find_deployment(ts, deployments):
     return None
 
 
-def _transfer_water_levels_continuous(session, input_df, partition_field, sensor_type):
+def _transfer_water_levels_continuous(
+    session, source_table, input_df, partition_field, sensor_type
+):
     from schemas.transducer import CreateTransducerObservation
 
     groundwater_parameter_id = (
@@ -173,6 +179,7 @@ def _transfer_water_levels_continuous(session, input_df, partition_field, sensor
     for pointid, (min_date, max_date) in nodeployments.items():
         errors.append(
             {
+                "table": source_table,
                 "pointid": pointid,
                 "error": f"no deployment between {min_date} and {max_date}",
             }

From 952c5db040e783d2386c62a1b46410d225d0b8df Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 26 Nov 2025 17:31:38 -0800
Subject: [PATCH 14/66] Rename `inventoried_on` to `legacy_start_date` since it
 won't continue on

---
 ...st-migration-legacy-data-retrieval.feature | 46 +++++++++----------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/tests/features/post-migration-legacy-data-retrieval.feature b/tests/features/post-migration-legacy-data-retrieval.feature
index 69d2c5506..fa4663e1b 100644
--- a/tests/features/post-migration-legacy-data-retrieval.feature
+++ b/tests/features/post-migration-legacy-data-retrieval.feature
@@ -14,35 +14,35 @@ Feature: Post-Migration Legacy Data Retrieval
     Given a location exists with:
       | field                | value      |
       | legacy_date_created  | 2014-04-03 |
-      | inventoried_on       | 2002-12-10 |
+      | legacy_site_date     | 2002-12-10 |
     When I retrieve that location via the API
     Then the response should include legacy_date_created as "2014-04-03"
-    And the response should include inventoried_on as "2002-12-10"
+    And the response should include legacy_site_date as "2002-12-10"
     And the time gap should be approximately 11.3 years
 
   Scenario: Retrieve location with large time gap (54 years)
     Given a location exists with:
       | field                | value      |
       | legacy_date_created  | 2008-05-28 |
-      | inventoried_on       | 1954-05-01 |
+      | legacy_site_date     | 1954-05-01 |
     When I retrieve that location via the API
     Then the response should include legacy_date_created as "2008-05-28"
-    And the response should include inventoried_on as "2002-12-10"
+    And the response should include legacy_site_date as "1954-05-01"
     And the time gap should be approximately 54 years
 
   Scenario: List all locations includes legacy date fields
     Given 5 locations exist with various legacy dates
     When I GET /location to list all locations
     Then each location should have a legacy_date_created field
-    And each location should have an inventoried_on field
-    And some locations should have null inventoried_on
+    And each location should have a legacy_site_date field
+    And some locations should have null legacy_site_date
 
-  Scenario: Filter locations by inventory date range
-    Given locations exist with inventoried_on ranging from 1950 to 2024
-    When I filter locations where inventoried_on is between "2000-01-01" and "2010-12-31"
-    Then the response should only include locations inventoried in that decade
-    And locations inventoried before 2000 should not be included
-    And locations inventoried after 2010 should not be included
+  Scenario: Filter locations by legacy site date range
+    Given locations exist with legacy_site_date ranging from 1950 to 2024
+    When I filter locations where legacy_site_date is between "2000-01-01" and "2010-12-31"
+    Then the response should only include locations with legacy_site_date in that decade
+    And locations with legacy_site_date before 2000 should not be included
+    And locations with legacy_site_date after 2010 should not be included
 
   Scenario: Query location by legacy_date_created
     Given 3 locations exist with legacy_date_created "2014-04-03"
@@ -94,20 +94,20 @@ Feature: Post-Migration Legacy Data Retrieval
     And that well's location has:
       | field                | value      |
       | legacy_date_created  | 2014-04-03 |
-      | inventoried_on       | 2002-12-10 |
+      | legacy_site_date     | 2002-12-10 |
     When I retrieve the well via the API
     Then the well should have well_completed_on as "2004-08-08"
     And the current_location should include legacy_date_created as "2014-04-03"
-    And the current_location should include inventoried_on as "2002-12-10"
+    And the current_location should include legacy_site_date as "2002-12-10"
 
   Scenario: Timeline reconstruction - well completed before site inventoried
     Given a well exists with well_completed_on "1995-06-15"
     And that well's location has:
       | field                | value      |
-      | inventoried_on       | 2003-12-10 |
+      | legacy_site_date     | 2003-12-10 |
       | legacy_date_created  | 2014-04-03 |
     When I retrieve the well and its location
-    Then the temporal sequence should be: well_completed_on → inventoried_on → legacy_date_created
+    Then the temporal sequence should be: well_completed_on → legacy_site_date → legacy_date_created
     And the timeline should show: 1995 → 2003 → 2014
 
   # Data Quality Validation
@@ -116,7 +116,7 @@ Feature: Post-Migration Legacy Data Retrieval
     Given 100 locations were migrated
     And 9 of them had non-null SiteDate in AMPAPI
     When I query the migrated locations
-    Then 9% should have non-null inventoried_on
+    Then 9% should have non-null legacy_site_date
     And 100% should have non-null legacy_date_created
 
   Scenario: Verify well completion date coverage matches expectation
@@ -132,7 +132,7 @@ Feature: Post-Migration Legacy Data Retrieval
     When I retrieve that location
     Then it should have created_at (new system timestamp from migration)
     And it should have legacy_date_created (original AMPAPI DateCreated)
-    And it should have inventoried_on (original AMPAPI SiteDate)
+    And it should have legacy_site_date (original AMPAPI SiteDate)
     And all three timestamps should be independently queryable
     And created_at should be a recent timestamp
     And legacy_date_created should be an older date
@@ -143,10 +143,10 @@ Feature: Post-Migration Legacy Data Retrieval
     Given a location exists with:
       | field                | value      |
       | legacy_date_created  | 2010-01-15 |
-      | inventoried_on       | 2015-06-20 |
+      | legacy_site_date     | 2015-06-20 |
     When I retrieve that location
     Then legacy_date_created should be "2010-01-15"
-    And inventoried_on should be "2015-06-20"
+    And legacy_site_date should be "2015-06-20"
     And the system should accept this without error
 
   Scenario: Spring does not use well_completed_on field
@@ -156,14 +156,14 @@ Feature: Post-Migration Legacy Data Retrieval
     And the field should exist in the response schema
     And it should not cause validation errors
 
-  Scenario: Location with only legacy_date_created (no inventoried_on)
+  Scenario: Location with only legacy_date_created (no legacy_site_date)
     Given a location exists with:
       | field                | value      |
       | legacy_date_created  | 2014-10-17 |
-      | inventoried_on       | null       |
+      | legacy_site_date     | null       |
     When I retrieve that location
     Then legacy_date_created should be "2014-10-17"
-    And inventoried_on should be null
+    And legacy_site_date should be null
 
   Scenario: Well without completion date
     Given a well exists with well_completed_on null

From dbfc8ef6dfadc46ead68cdb7aad121e01f975dbe Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 26 Nov 2025 17:37:47 -0800
Subject: [PATCH 15/66] Add new fields to unit tests

---
 tests/test_location.py |  79 ++++++++++++++++++++++
 tests/test_thing.py    | 147 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 226 insertions(+)

diff --git a/tests/test_location.py b/tests/test_location.py
index 4b6ec6faa..b86211a58 100644
--- a/tests/test_location.py
+++ b/tests/test_location.py
@@ -235,4 +235,83 @@ def test_delete_location_404_not_found(second_location):
     assert data["detail"] == f"Location with ID {bad_location_id} not found."
 
 
+#  ============= Legacy date field tests =======================================
+
+
+def test_new_location_has_null_legacy_fields():
+    """Test that newly created locations have null legacy date fields (legacy fields are migration-only)"""
+    payload = {
+        "point": "POINT (-106.607784 35.118924)",
+        "elevation": 1558.8,
+        "release_status": "draft",
+    }
+    response = client.post("/location", json=payload)
+
+    assert response.status_code == 201
+    data = response.json()
+    assert "id" in data
+    # Legacy fields should be present in response but null (not set during creation)
+    assert "legacy_date_created" in data
+    assert "legacy_site_date" in data
+    assert data["legacy_date_created"] is None
+    assert data["legacy_site_date"] is None
+
+    # cleanup after test
+    cleanup_post_test(Location, data["id"])
+
+
+def test_legacy_fields_present_in_location_response():
+    """Test that legacy fields are included in location GET response"""
+    # Create a new location (without legacy fields)
+    payload = {
+        "point": "POINT (-106.607784 35.118924)",
+        "elevation": 1558.8,
+        "release_status": "draft",
+    }
+    create_response = client.post("/location", json=payload)
+    assert create_response.status_code == 201
+    location_id = create_response.json()["id"]
+
+    # Retrieve the location and verify legacy fields are in the schema
+    get_response = client.get(f"/location/{location_id}")
+    assert get_response.status_code == 200
+    data = get_response.json()
+
+    # Verify fields exist in response (even if null)
+    assert "legacy_date_created" in data
+    assert "legacy_site_date" in data
+    assert data["legacy_date_created"] is None
+    assert data["legacy_site_date"] is None
+
+    # cleanup after test
+    cleanup_post_test(Location, location_id)
+
+
+def test_legacy_fields_independent_of_created_at():
+    """Test that created_at (system timestamp) is separate from legacy fields"""
+    payload = {
+        "point": "POINT (-106.607784 35.118924)",
+        "elevation": 1558.8,
+        "release_status": "draft",
+    }
+    response = client.post("/location", json=payload)
+
+    assert response.status_code == 201
+    data = response.json()
+
+    # created_at is automatically set by AutoBaseMixin
+    assert "created_at" in data
+    assert data["created_at"] is not None
+
+    # legacy_date_created is separate and null for new records
+    assert "legacy_date_created" in data
+    assert data["legacy_date_created"] is None
+
+    # These are independent fields with different purposes
+    assert "created_at" != "legacy_date_created"
+
+    # cleanup after test
+    cleanup_post_test(Location, data["id"])
+
+
 # ============= EOF =============================================
diff --git a/tests/test_thing.py b/tests/test_thing.py
index 378f72d02..12aafef1a 100644
--- a/tests/test_thing.py
+++ b/tests/test_thing.py
@@ -1101,3 +1101,150 @@ def test_delete_thing_id_link_404_not_found(second_thing_id_link):
     assert response.status_code == 404
     data = response.json()
     assert data["detail"] == f"ThingIdLink with ID {bad_id} not found."
+
+
+#  ============= Well completion date tests ====================================
+
+
+def test_create_well_with_completion_date(location):
+    """Test creating a well with well_completed_on (active field - users can set this)"""
+    payload = {
+        "name": "Test Well",
+        "location_id": location.id,
+        "measuring_point_height": 2.5,
+        "measuring_point_description": "top of casing",
+        "release_status": "draft",
+        "well_completed_on": "2004-08-08",
+    }
+    response = client.post("/thing/water-well", json=payload)
+
+    assert response.status_code == 201
+    data = response.json()
+    assert "id" in data
+    assert data["well_completed_on"] == "2004-08-08"
+
+    # cleanup after test
+    from db import Thing
+    from tests import cleanup_post_test
+
+    cleanup_post_test(Thing, data["id"])
+
+
+def test_create_well_with_old_completion_date(location):
+    """Test creating a well with very old completion date (e.g., for documenting historical wells)"""
+    payload = {
+        "name": "Historical Well",
+        "location_id": location.id,
+        "measuring_point_height": 2.5,
+        "measuring_point_description": "top of casing",
+        "release_status": "draft",
+        "well_completed_on": "1936-01-01",
+    }
+    response = client.post("/thing/water-well", json=payload)
+
+    assert response.status_code == 201
+    data = response.json()
+    assert data["well_completed_on"] == "1936-01-01"
+
+    # cleanup after test
+    from db import Thing
+    from tests import cleanup_post_test
+
+    cleanup_post_test(Thing, data["id"])
+
+
+def test_create_well_without_completion_date(location):
+    """Test that well_completed_on is optional (nullable) when creating a well"""
+    payload = {
+        "name": "Test Well Without Date",
+        "location_id": location.id,
+        "measuring_point_height": 2.5,
+        "measuring_point_description": "top of casing",
+        "release_status": "draft",
+    }
+    response = client.post("/thing/water-well", json=payload)
+
+    assert response.status_code == 201
+    data = response.json()
+    # Field should be present but null
+    assert "well_completed_on" in data
+    assert data["well_completed_on"] is None
+
+    # cleanup after test
+    from db import Thing
+    from tests import cleanup_post_test
+
+    cleanup_post_test(Thing, data["id"])
+
+
+def test_spring_well_completed_on_is_null(location):
+    """Test that springs have null well_completed_on field"""
+    payload = {
+        "name": "Test Spring",
+        "location_id": location.id,
+        "spring_type": "Artesian",
+        "release_status": "draft",
+    }
+    response = client.post("/thing/spring", json=payload)
+
+    assert response.status_code == 201
+    data = response.json()
+    # Springs should have null well_completed_on
+    assert "well_completed_on" in data
+    assert data["well_completed_on"] is None
+    assert data["thing_type"] == "spring"
+
+    # cleanup after test
+    from db import Thing
+    from tests import cleanup_post_test
+
+    cleanup_post_test(Thing, data["id"])
+
+
+def test_well_with_completion_date_and_location_legacy_fields(location):
+    """Test combined scenario: new well with completion date + location legacy fields (null for new locations)"""
+    # Create a new location (without legacy fields - they're migration-only)
+    from tests import cleanup_post_test
+
+    location_payload = {
+        "point": "POINT (-106.607784 35.118924)",
+        "elevation": 1558.8,
+        "release_status": "draft",
+    }
+    location_response = client.post("/location", json=location_payload)
+    assert location_response.status_code == 201
+    location_id = location_response.json()["id"]
+
+    # Create well with completion date at that location
+    well_payload = {
+        "name": "Test Well",
+        "location_id": location_id,
+        "measuring_point_height": 2.5,
+        "measuring_point_description": "top of casing",
+        "release_status": "draft",
+        "well_completed_on": "2020-06-15",  # User can set this for new wells
+    }
+    well_response = client.post("/thing/water-well", json=well_payload)
+    assert well_response.status_code == 201
+    well_id = well_response.json()["id"]
+
+    # Retrieve the well
+    get_response = client.get(f"/thing/water-well/{well_id}")
+    assert get_response.status_code == 200
+    data = get_response.json()
+
+    # well_completed_on is set (active field)
+    assert data["well_completed_on"] == "2020-06-15"
+
+    # Location legacy fields are null (migration-only fields)
+    assert data["current_location"]["legacy_date_created"] is None
+    assert data["current_location"]["legacy_site_date"] is None
+
+    # cleanup after test
+    from db import Thing, Location
+
+    cleanup_post_test(Thing, well_id)
+    cleanup_post_test(Location, location_id)
+
+
+# ============= EOF =============================================

From 5d519545a41fde65f176308ded37cc01b1981452 Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 26 Nov 2025 17:41:41 -0800
Subject: [PATCH 16/66] Create test_transfer_legacy_dates.py

---
 tests/test_transfer_legacy_dates.py | 410 ++++++++++++++++++++++++++++
 1 file changed, 410 insertions(+)
 create mode 100644 tests/test_transfer_legacy_dates.py

diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py
new file mode 100644
index 000000000..a0cec1014
--- /dev/null
+++ b/tests/test_transfer_legacy_dates.py
@@ -0,0 +1,410 @@
+# ===============================================================================
+# Copyright 2025 ross
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===============================================================================
+"""
+Unit tests for legacy date field population during AMPAPI → NMSampleLocations migration.
+
+These tests verify that:
+1. Location.legacy_date_created is populated from CSV DateCreated
+2. Location.legacy_site_date is populated from CSV SiteDate (if not null)
+3. Thing.well_completed_on is populated from CSV CompletionDate (if not null)
+"""
+import datetime
+from unittest.mock import Mock, patch
+import pandas as pd
+import pytest
+
+from transfers.util import make_location
+from schemas.thing import CreateWell
+
+
+# ============================================================================
+# LOCATION LEGACY DATE TESTS
+# ============================================================================
+
+
+def test_make_location_with_both_legacy_dates():
+    """Test that make_location populates both legacy_date_created and legacy_site_date"""
+    # Create a mock CSV row with both DateCreated and SiteDate
+    row = pd.Series({
+        'PointID': 'TEST-001',
+        'Easting': 350000,
+        'Northing': 3880000,
+        'DateCreated': '2014-04-03 00:00:00.000',
+        'SiteDate': '2002-12-10 00:00:00.000',
+        'Altitude': 1558.8,
+        'AltDatum': 'NAVD88',
+        'AltitudeMethod': 'GPS',
+        'LocationId': 1,
+        'PublicRelease': True,
+        'CoordinateNotes': None,
+        'LocationNotes': None,
+        'AltitudeAccuracy': None,
+    })
+
+    elevations = {}
+
+    # Call make_location
+    location, elevation_method = make_location(row, elevations)
+
+    # Verify legacy_date_created is set from DateCreated
+    assert location.legacy_date_created is not None
+    assert location.legacy_date_created == datetime.date(2014, 4, 3)
+
+    # Verify legacy_site_date is set from SiteDate
+    assert location.legacy_site_date is not None
+    assert location.legacy_site_date == datetime.date(2002, 12, 10)
+
+    # Verify created_at is still set (should be the later date)
+    assert location.created_at is not None
+
+
+def test_make_location_with_only_date_created():
+    """Test that make_location handles locations with only DateCreated (no SiteDate)"""
+    row = pd.Series({
+        'PointID': 'TEST-002',
+        'Easting': 350000,
+        'Northing': 3880000,
+        'DateCreated': '2014-04-03 00:00:00.000',
+        'SiteDate': None,  # No SiteDate
+        'Altitude': 1558.8,
+        'AltDatum': 'NAVD88',
+        'AltitudeMethod': 'GPS',
+        'LocationId': 2,
+        'PublicRelease': True,
+        'CoordinateNotes': None,
+        'LocationNotes': None,
+        'AltitudeAccuracy': None,
+    })
+
+    elevations = {}
+    location, elevation_method = make_location(row, elevations)
+
+    # Verify legacy_date_created is set
+    assert location.legacy_date_created == datetime.date(2014, 4, 3)
+
+    # Verify legacy_site_date is null (91% of locations don't have SiteDate)
+    assert location.legacy_site_date is None
+
+
+def test_make_location_with_site_date_later_than_date_created():
+    """Test data anomaly: SiteDate is later than DateCreated (should still be accepted)"""
+    row = pd.Series({
+        'PointID': 'TEST-003',
+        'Easting': 350000,
+        'Northing': 3880000,
+        'DateCreated': '2010-01-15 00:00:00.000',
+        'SiteDate': '2015-06-20 00:00:00.000',  # Later than DateCreated (anomaly)
+        'Altitude': 1558.8,
+        'AltDatum': 'NAVD88',
+        'AltitudeMethod': 'GPS',
+        'LocationId': 3,
+        'PublicRelease': True,
+        'CoordinateNotes': None,
+        'LocationNotes': None,
+        'AltitudeAccuracy': None,
+    })
+
+    elevations = {}
+    location, elevation_method = make_location(row, elevations)
+
+    # Both dates should be preserved as-is, regardless of order
+    assert location.legacy_date_created == datetime.date(2010, 1, 15)
+    assert location.legacy_site_date == datetime.date(2015, 6, 20)
+
+
+def test_make_location_with_very_old_site_date():
+    """Test that very old SiteDates (1950s) are preserved correctly"""
+    row = pd.Series({
+        'PointID': 'SM-0227',  # Real example from dataset
+        'Easting': 350000,
+        'Northing': 3880000,
+        'DateCreated': '2008-05-28 00:00:00.000',
+        'SiteDate': '1954-05-01 00:00:00.000',  # 54 years earlier!
+        'Altitude': 1558.8,
+        'AltDatum': 'NAVD88',
+        'AltitudeMethod': 'GPS',
+        'LocationId': 4,
+        'PublicRelease': True,
+        'CoordinateNotes': None,
+        'LocationNotes': None,
+        'AltitudeAccuracy': None,
+    })
+
+    elevations = {}
+    location, elevation_method = make_location(row, elevations)
+
+    # Verify very old date is preserved
+    assert location.legacy_site_date == datetime.date(1954, 5, 1)
+    assert location.legacy_date_created == datetime.date(2008, 5, 28)
+
+    # Verify 54-year time gap
+    time_gap = (location.legacy_date_created - location.legacy_site_date).days
+    assert time_gap == 19751  # Approximately 54 years
+
+
+def test_make_location_legacy_dates_are_date_not_datetime():
+    """Test that legacy date fields are Date type (not DateTime)"""
+    row = pd.Series({
+        'PointID': 'TEST-004',
+        'Easting': 350000,
+        'Northing': 3880000,
+        'DateCreated': '2014-04-03 10:30:45.123',  # Has time component
+        'SiteDate': '2002-12-10 14:22:33.456',  # Has time component
+        'Altitude': 1558.8,
+        'AltDatum': 'NAVD88',
+        'AltitudeMethod': 'GPS',
+        'LocationId': 5,
+        'PublicRelease': True,
+        'CoordinateNotes': None,
+        'LocationNotes': None,
+        'AltitudeAccuracy': None,
+    })
+
+    elevations = {}
+    location, elevation_method = make_location(row, elevations)
+
+    # Verify they are date objects (not datetime)
+    assert isinstance(location.legacy_date_created, datetime.date)
+    assert not isinstance(location.legacy_date_created, datetime.datetime)
+
+    assert isinstance(location.legacy_site_date, datetime.date)
+    assert not isinstance(location.legacy_site_date, datetime.datetime)
+
+    # Verify time component is stripped
+    assert location.legacy_date_created == datetime.date(2014, 4, 3)
+    assert location.legacy_site_date == datetime.date(2002, 12, 10)
+
+
+def test_make_location_legacy_dates_independent_of_created_at():
+    """Test that legacy dates don't affect created_at timestamp"""
+    row = pd.Series({
+        'PointID': 'TEST-005',
+        'Easting': 350000,
+        'Northing': 3880000,
+        'DateCreated': '2014-04-03 00:00:00.000',
+        'SiteDate': '2002-12-10 00:00:00.000',
+        'Altitude': 1558.8,
+        'AltDatum': 'NAVD88',
+        'AltitudeMethod': 'GPS',
+        'LocationId': 6,
+        'PublicRelease': True,
+        'CoordinateNotes': None,
+        'LocationNotes': None,
+        'AltitudeAccuracy': None,
+    })
+
+    elevations = {}
+    location, elevation_method = make_location(row, elevations)
+
+    # created_at should be a DateTime (with timezone)
+    assert isinstance(location.created_at, datetime.datetime)
+
+    # legacy fields should be Date (no timezone)
+    assert isinstance(location.legacy_date_created, datetime.date)
+    assert isinstance(location.legacy_site_date, datetime.date)
+
+    # They should be independent
+    assert location.created_at is not None
+    assert location.legacy_date_created is not None
+    assert location.legacy_site_date is not None
+
+
+# ============================================================================
+# WELL COMPLETION DATE TESTS
+# ============================================================================
+
+
+def test_create_well_schema_accepts_well_completed_on():
+    """Test that CreateWell schema accepts well_completed_on from CSV CompletionDate"""
+    # Simulate data from CSV transfer
+    well_data = {
+        'location_id': 1,
+        'name': 'TEST-WELL-001',
+        'well_completed_on': datetime.date(2004, 8, 8),  # From CSV CompletionDate
+        'hole_depth': 100.0,
+        'well_depth': 95.0,
+        'measuring_point_height': 2.5,
+        'measuring_point_description': 'top of casing',
+        'release_status': 'public',
+    }
+
+    # Validate using CreateWell schema
+    schema = CreateWell(**well_data)
+
+    assert schema.well_completed_on == datetime.date(2004, 8, 8)
+
+
+def test_create_well_schema_well_completed_on_optional():
+    """Test that well_completed_on is optional (70% of wells don't have CompletionDate)"""
+    well_data = {
+        'location_id': 1,
+        'name': 'TEST-WELL-002',
+        'hole_depth': 100.0,
+        'well_depth': 95.0,
+        'measuring_point_height': 2.5,
+        'measuring_point_description': 'top of casing',
+        'release_status': 'public',
+        # No well_completed_on provided
+    }
+
+    # Should not raise validation error
+    schema = CreateWell(**well_data)
+
+    # Field should be optional
+    assert hasattr(schema, 'well_completed_on')
+    # Value should be None when not provided
+    assert schema.well_completed_on is None
+
+
+def test_create_well_with_very_old_completion_date():
+    """Test that very old completion dates (1936) are accepted"""
+    well_data = {
+        'location_id': 1,
+        'name': 'HISTORICAL-WELL',
+        'well_completed_on': datetime.date(1936, 1, 1),  # Oldest well in dataset
+        'hole_depth': 100.0,
+        'well_depth': 95.0,
+        'measuring_point_height': 2.5,
+        'measuring_point_description': 'top of casing',
+        'release_status': 'public',
+    }
+
+    schema = CreateWell(**well_data)
+
+    assert schema.well_completed_on == datetime.date(1936, 1, 1)
+
+
+def test_create_well_completed_on_is_date_not_datetime():
+    """Test that well_completed_on is Date type (not DateTime)"""
+    well_data = {
+        'location_id': 1,
+        'name': 'TEST-WELL-003',
+        'well_completed_on': datetime.date(2004, 8, 8),  # Date, not DateTime
+        'hole_depth': 100.0,
+        'well_depth': 95.0,
+        'measuring_point_height': 2.5,
+        'measuring_point_description': 'top of casing',
+        'release_status': 'public',
+    }
+
+    schema = CreateWell(**well_data)
+
+    # Should accept date type
+    assert isinstance(schema.well_completed_on, datetime.date)
+    assert not isinstance(schema.well_completed_on, datetime.datetime)
+
+
+# ============================================================================
+# DATA COVERAGE TESTS (Simulating Migration Statistics)
+# ============================================================================
+
+
+def test_location_legacy_date_coverage_statistics():
+    """Test that migration preserves expected percentages of legacy dates"""
+    # Simulate 100 location records from CSV
+    locations_created = 0
+    locations_with_site_date = 0
+
+    for i in range(100):
+        if i < 9:  # 9% have SiteDate
+            row = pd.Series({
+                'PointID': f'TEST-{i:03d}',
+                'Easting': 350000 + i,
+                'Northing': 3880000 + i,
+                'DateCreated': '2014-04-03 00:00:00.000',
+                'SiteDate': '2002-12-10 00:00:00.000',
+                'Altitude': 1558.8,
+                'AltDatum': 'NAVD88',
+                'AltitudeMethod': 'GPS',
+                'LocationId': i,
+                'PublicRelease': True,
+                'CoordinateNotes': None,
+                'LocationNotes': None,
+                'AltitudeAccuracy': None,
+            })
+        else:  # 91% don't have SiteDate
+            row = pd.Series({
+                'PointID': f'TEST-{i:03d}',
+                'Easting': 350000 + i,
+                'Northing': 3880000 + i,
+                'DateCreated': '2014-04-03 00:00:00.000',
+                'SiteDate': None,
+                'Altitude': 1558.8,
+                'AltDatum': 'NAVD88',
+                'AltitudeMethod': 'GPS',
+                'LocationId': i,
+                'PublicRelease': True,
+                'CoordinateNotes': None,
+                'LocationNotes': None,
+                'AltitudeAccuracy': None,
+            })
+
+        elevations = {}
+        location, _ = make_location(row, elevations)
+
+        # Count coverage
+        if location.legacy_date_created is not None:
+            locations_created += 1
+
+        if location.legacy_site_date is not None:
+            locations_with_site_date += 1
+
+    # Verify expected coverage
+    assert locations_created == 100  # 100% should have legacy_date_created
+    assert locations_with_site_date == 9  # 9% should have legacy_site_date
+
+
+def test_well_completion_date_coverage_statistics():
+    """Test that expected percentage of wells have completion dates"""
+    # Simulate 100 wells from CSV
+    wells_with_completion_date = 0
+
+    for i in range(100):
+        if i < 30:  # 30% have CompletionDate
+            well_data = {
+                'location_id': 1,
+                'name': f'WELL-{i:03d}',
+                'well_completed_on': datetime.date(2004, 8, 8),
+                'hole_depth': 100.0,
+                'well_depth': 95.0,
+                'measuring_point_height': 2.5,
+                'measuring_point_description': 'top of casing',
+                'release_status': 'public',
+            }
+        else:  # 70% don't have CompletionDate
+            well_data = {
+                'location_id': 1,
+                'name': f'WELL-{i:03d}',
+                'hole_depth': 100.0,
+                'well_depth': 95.0,
+                'measuring_point_height': 2.5,
+                'measuring_point_description': 'top of casing',
+                'release_status': 'public',
+                # No well_completed_on
+            }
+
+        schema = CreateWell(**well_data)
+
+        if schema.well_completed_on is not None:
+            wells_with_completion_date += 1
+
+    # Verify expected coverage
+    assert wells_with_completion_date == 30  # 30% should have completion dates
+
+
+# ============================================================================
+# EOF
+# ============================================================================

From 687fb4aa1b5c4060f14d0fe140b78572f5909c9f Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 26 Nov 2025 18:05:52 -0800
Subject: [PATCH 17/66] Support changes in unit tests for thing and transfer
 script

---
 tests/test_thing.py                 |  5 +++--
 tests/test_transfer_legacy_dates.py | 11 +++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/test_thing.py b/tests/test_thing.py
index 12aafef1a..3d76e3b99 100644
--- a/tests/test_thing.py
+++ b/tests/test_thing.py
@@ -1237,8 +1237,9 @@ def test_well_with_completion_date_and_location_legacy_fields(location):
     assert data["well_completed_on"] == "2020-06-15"
 
     # Location legacy fields are null (migration-only fields)
-    assert data["current_location"]["legacy_date_created"] is None
-    assert data["current_location"]["legacy_site_date"] is None
+    # current_location is a GeoJSON Feature, so fields are under properties
+    assert data["current_location"]["properties"]["legacy_date_created"] is None
+    assert data["current_location"]["properties"]["legacy_site_date"] is None
 
     # cleanup after test
     from db import Thing, Location
diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py
index a0cec1014..53f304c4a 100644
--- a/tests/test_transfer_legacy_dates.py
+++ b/tests/test_transfer_legacy_dates.py
@@ -67,8 +67,8 @@ def test_make_location_with_both_legacy_dates():
     assert location.legacy_site_date is not None
     assert location.legacy_site_date == datetime.date(2002, 12, 10)
 
-    # Verify created_at is still set (should be the later date)
-    assert location.created_at is not None
+    # Verify created_at is NOT set during migration (it's auto-set by AutoBaseMixin on save)
+    assert location.created_at is None
 
 
 def test_make_location_with_only_date_created():
@@ -209,15 +209,14 @@ def test_make_location_legacy_dates_independent_of_created_at():
     elevations = {}
     location, elevation_method = make_location(row, elevations)
 
-    # created_at should be a DateTime (with timezone)
-    assert isinstance(location.created_at, datetime.datetime)
+    # created_at should be None during transfer (auto-set by AutoBaseMixin on save)
+    assert location.created_at is None
 
     # legacy fields should be Date (no timezone)
     assert isinstance(location.legacy_date_created, datetime.date)
     assert isinstance(location.legacy_site_date, datetime.date)
 
-    # They should be independent
-    assert location.created_at is not None
+    # Legacy fields should be populated
     assert location.legacy_date_created is not None
     assert location.legacy_site_date is not None
 

From 6552bc00fc3560fcd8abfae02486d6a1363d61e5 Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 26 Nov 2025 18:06:34 -0800
Subject: [PATCH 18/66] Implement changes in db and schemas

---
 db/location.py      |  9 +++++++++
 db/thing.py         |  5 +++++
 schemas/location.py | 15 +++++++++++++++
 schemas/thing.py    |  7 +++++++
 4 files changed, 36 insertions(+)

diff --git a/db/location.py b/db/location.py
index 50b1aa0db..3b4271592 100644
--- a/db/location.py
+++ b/db/location.py
@@ -23,6 +23,7 @@
     String,
     ForeignKey,
     DateTime,
+    Date,
     func,
     Text,
 )
@@ -61,6 +62,14 @@ class Location(Base, AutoBaseMixin, ReleaseMixin, NotesMixin, DataProvenanceMixi
     nma_notes_location: Mapped[str] = mapped_column(Text, nullable=True)
     nma_coordinate_notes: Mapped[str] = mapped_column(Text, nullable=True)
 
+    # --- Legacy AMPAPI Date Fields (Migration-Only, Read-Only Post-Migration) ---
+    legacy_date_created: Mapped[datetime.date] = mapped_column(
+        Date, nullable=True, comment="Original AMPAPI DateCreated (migration-only field)"
+    )
+    legacy_site_date: Mapped[datetime.date] = mapped_column(
+        Date, nullable=True, comment="Original AMPAPI SiteDate (migration-only field)"
+    )
+
     # --- Relationship Definitions ---
     thing_associations: Mapped[list["LocationThingAssociation"]] = relationship(
         back_populates="location", cascade="all, delete-orphan"
diff --git a/db/thing.py b/db/thing.py
index 9f30d08e2..b42b70d56 100644
--- a/db/thing.py
+++ b/db/thing.py
@@ -115,6 +115,11 @@ class Thing(
     )
 
     well_construction_notes: Mapped[str] = mapped_column(Text, nullable=True)
+    well_completed_on: Mapped[date] = mapped_column(
+        Date,
+        nullable=True,
+        comment="Date when well construction/drilling was completed (from AMPAPI CompletionDate, active field for new wells)",
+    )
 
     # Spring-related columns
     spring_type: Mapped[str] = lexicon_term(
diff --git a/schemas/location.py b/schemas/location.py
index e911e3359..e18b76996 100644
--- a/schemas/location.py
+++ b/schemas/location.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ===============================================================================
+from datetime import date
 from typing import List
 
 from geoalchemy2 import WKBElement
@@ -106,6 +107,9 @@ class GeoJSONProperties(BaseModel):
         default_factory=GeoJSONUTMCoordinates
     )
     notes: list[NoteResponse] = []
+    # Legacy AMPAPI date fields (migration-only, read-only)
+    legacy_date_created: date | None = None
+    legacy_site_date: date | None = None
 
     model_config = ConfigDict(
         from_attributes=True,
@@ -150,6 +154,9 @@ def populate_fields(cls, data: Any) -> Any:
         data_dict["properties"]["notes"] = data_dict.get("notes")
         data_dict["properties"]["elevation"] = convert_m_to_ft(elevation_m)
         data_dict["properties"]["elevation_method"] = data_dict.get("elevation_method")
+        # populate legacy date fields
+        data_dict["properties"]["legacy_date_created"] = data_dict.get("legacy_date_created")
+        data_dict["properties"]["legacy_site_date"] = data_dict.get("legacy_site_date")
 
         # populate UTM coordinates
         point_utm_zone_13n_wkt = transform_srid(
@@ -181,6 +188,10 @@ class LocationResponse(BaseResponseModel):
     county: str | None
     quad_name: str | None
 
+    # Legacy AMPAPI date fields (migration-only, read-only post-migration)
+    legacy_date_created: date | None = None
+    legacy_site_date: date | None = None
+
     @field_validator("point", mode="before")
     def point_to_wkt(cls, value):
         if isinstance(value, WKBElement):
@@ -219,5 +230,9 @@ class UpdateLocation(BaseUpdateModel, ValidateLocation):
     coordinate_accuracy: float | None = None
     coordinate_method: CoordinateMethod | None = None
 
+    # Legacy AMPAPI date fields (migration-only, can be updated but not created)
+    legacy_date_created: date | None = None
+    legacy_site_date: date | None = None
+
 
 # ============= EOF =============================================
diff --git a/schemas/thing.py b/schemas/thing.py
index cf8c3ef2b..6de5908cc 100644
--- a/schemas/thing.py
+++ b/schemas/thing.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ===============================================================================
+from datetime import date
 from typing import List
 
 from pydantic import BaseModel, model_validator, Field, field_validator
@@ -130,6 +131,8 @@ class CreateWell(CreateBaseThing, ValidateWell):
     )
     measuring_point_description: str | None
     notes: list[CreateNote] | None = None
+    # Active field: users can set this for new wells
+    well_completed_on: date | None = None
 
 
 class CreateSpring(CreateBaseThing):
@@ -224,6 +227,8 @@ class WellResponse(BaseThingResponse):
     measuring_point_height: float
     measuring_point_height_unit: str = "ft"
     measuring_point_description: str | None
+    # Active field: completion date for wells
+    well_completed_on: date | None = None
 
     water_notes: list[NoteResponse] | None = None
     measuring_notes: list[NoteResponse] | None = None
@@ -329,6 +334,8 @@ class UpdateWell(UpdateThing, ValidateWell):
     well_casing_diameter: float | None = None  # in inches
     well_casing_depth: float | None = None  # in feet
     well_casing_materials: list[str] | None = None
+    # Active field: users can update completion date
+    well_completed_on: date | None = None
 
 
 class UpdateSpring(UpdateThing):

From 08fb22105834b3fa70dc030cffb0af246bf3471b Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 26 Nov 2025 18:06:49 -0800
Subject: [PATCH 19/66] Implement changes in transfer scripts

---
 transfers/util.py          | 43 +++++++++++++-------------------------
 transfers/well_transfer.py | 15 +++++++++++++
 2 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/transfers/util.py b/transfers/util.py
index d08798425..d39845f44 100644
--- a/transfers/util.py
+++ b/transfers/util.py
@@ -214,33 +214,6 @@ def make_location(row: pd.Series, elevations: dict) -> tuple:
         point, source_srid=SRID_UTM_ZONE_13N, target_srid=SRID_WGS84
     )
 
-    """
-    Developer's notes
-
-    AMP folks said that the earlier date between DateCreated and SiteDate is when
-    the site was inventoried, whereas the later is when the record was made in
-    the database. This was because they were used interchangeably. 
-    """
-    if row.DateCreated and row.SiteDate:
-
-        date_created = datetime.strptime(row.DateCreated, "%Y-%m-%d %H:%M:%S.%f")
-        site_date = datetime.strptime(row.SiteDate, "%Y-%m-%d %H:%M:%S.%f")
-
-        if date_created > site_date:
-            created_at = date_created
-        else:
-            created_at = site_date
-    elif row.DateCreated and not row.SiteDate:
-        created_at = datetime.strptime(row.DateCreated, "%Y-%m-%d %H:%M:%S.%f")
-    elif not row.DateCreated and row.SiteDate:
-        created_at = datetime.strptime(row.SiteDate, "%Y-%m-%d %H:%M:%S.%f")
-    else:
-        created_at = None
-
-    # convert created_at from MST/MDT to UTC
-    if created_at is not None:
-        created_at = convert_mt_to_utc(created_at)
-
     z = row.Altitude
     if z:
         elevation_from_epqs = False
@@ -271,14 +244,28 @@ def make_location(row: pd.Series, elevations: dict) -> tuple:
             f"LU_AltitudeMethod:{row.AltitudeMethod.strip()}"
         )
 
+    # Extract legacy date fields (Date type, not DateTime)
+    legacy_date_created = None
+    if row.DateCreated:
+        legacy_date_created = datetime.strptime(
+            row.DateCreated, "%Y-%m-%d %H:%M:%S.%f"
+        ).date()
+
+    legacy_site_date = None
+    if row.SiteDate:
+        legacy_site_date = datetime.strptime(
+            row.SiteDate, "%Y-%m-%d %H:%M:%S.%f"
+        ).date()
+
     location = Location(
         nma_pk_location=row.LocationId,
         point=transformed_point.wkt,
         elevation=z,
         release_status="public" if row.PublicRelease else "private",
-        created_at=created_at,
         nma_coordinate_notes=row.CoordinateNotes,
         nma_notes_location=row.LocationNotes,
+        legacy_date_created=legacy_date_created,
+        legacy_site_date=legacy_site_date,
     )
 
     return location, elevation_method
diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py
index ee54d0216..5daa1d8ee 100644
--- a/transfers/well_transfer.py
+++ b/transfers/well_transfer.py
@@ -237,6 +237,19 @@ def transfer_wells(session: Session, flags: dict = None, limit: int = 0) -> None
                 [] if isna(row.CasingDescription) else _extract_casing_materials(row)
             )
 
+            # Extract well_completed_on from CompletionDate (Date type, not DateTime)
+            well_completed_on = None
+            if not isna(row.CompletionDate):
+                try:
+                    well_completed_on = datetime.strptime(
+                        row.CompletionDate, "%Y-%m-%d %H:%M:%S.%f"
+                    ).date()
+                except (ValueError, AttributeError):
+                    # If parsing fails, leave as None
+                    logger.warning(
+                        f"Could not parse CompletionDate for {row.PointID}: {row.CompletionDate}"
+                    )
+
             # manually add the well rather than add_well from services/thing_helper.py
             # so that effective_start can be set on the location assocation
 
@@ -254,6 +267,7 @@ def transfer_wells(session: Session, flags: dict = None, limit: int = 0) -> None
                 release_status="public" if row.PublicRelease else "private",
                 measuring_point_height=row.MPHeight,
                 measuring_point_description=row.MeasuringPoint,
+                well_completed_on=well_completed_on,
                 notes=(
                     [{"content": row.Notes, "note_type": "Other"}] if row.Notes else []
                 ),
@@ -283,6 +297,7 @@ def transfer_wells(session: Session, flags: dict = None, limit: int = 0) -> None
             well_data["nma_pk_welldata"] = row.WellID
 
             well_data.pop("notes")
+            # well_completed_on is kept in well_data (not excluded above)
             well = Thing(**well_data)
             session.add(well)
             # logger.info(f"Created well for {row.PointID}")

From 47aad3f14d0bbe059299cc919f332c8d1d7febcf Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 26 Nov 2025 18:07:08 -0800
Subject: [PATCH 20/66] Address measuring point bug

---
 services/thing_helper.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/services/thing_helper.py b/services/thing_helper.py
index 53ce54577..084a8b02b 100644
--- a/services/thing_helper.py
+++ b/services/thing_helper.py
@@ -13,6 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ===============================================================================
+from datetime import datetime
+from zoneinfo import ZoneInfo
+
 from fastapi import Request
 from fastapi_pagination.ext.sqlalchemy import paginate
 from pydantic import BaseModel
@@ -32,6 +35,7 @@
     WellCasingMaterial,
 )
 from db.group import GroupThingAssociation
+from db.measuring_point_history import MeasuringPointHistory
 from services.audit_helper import audit_add
 from services.crud_helper import model_patcher
 from services.exceptions_helper import PydanticStyleException
@@ -159,6 +163,10 @@ def add_thing(
     location_id = data.pop("location_id", None)
     group_id = data.pop("group_id", None)
 
+    # Extract measuring point data (stored in separate history table)
+    measuring_point_height = data.pop("measuring_point_height", None)
+    measuring_point_description = data.pop("measuring_point_description", None)
+
     try:
         thing = Thing(**data)
         thing.thing_type = thing_type
@@ -169,6 +177,18 @@ def add_thing(
         session.flush()
         session.refresh(thing)
 
+        # Create MeasuringPointHistory record if measuring_point_height provided
+        if measuring_point_height is not None:
+            measuring_point_history = MeasuringPointHistory(
+                thing_id=thing.id,
+                measuring_point_height=measuring_point_height,
+                measuring_point_description=measuring_point_description,
+                start_date=datetime.now(tz=ZoneInfo("UTC")),
+                end_date=None,
+            )
+            audit_add(user, measuring_point_history)
+            session.add(measuring_point_history)
+
         # endpoint catches ProgrammingError if location_id or group_id do not exist
         if group_id:
             assoc = GroupThingAssociation()

From 546b7013286c37529b5e2a8e0524ae09daac1f5f Mon Sep 17 00:00:00 2001
From: kbighorse <kbighorse@users.noreply.github.com>
Date: Thu, 27 Nov 2025 02:07:05 +0000
Subject: [PATCH 21/66] Formatting changes

---
 db/location.py                      |   4 +-
 schemas/location.py                 |   4 +-
 tests/test_transfer_legacy_dates.py | 350 +++++++++++++++-------------
 3 files changed, 189 insertions(+), 169 deletions(-)

diff --git a/db/location.py b/db/location.py
index 3b4271592..a07958346 100644
--- a/db/location.py
+++ b/db/location.py
@@ -64,7 +64,9 @@ class Location(Base, AutoBaseMixin, ReleaseMixin, NotesMixin, DataProvenanceMixi
 
     # --- Legacy AMPAPI Date Fields (Migration-Only, Read-Only Post-Migration) ---
     legacy_date_created: Mapped[datetime.date] = mapped_column(
-        Date, nullable=True, comment="Original AMPAPI DateCreated (migration-only field)"
+        Date,
+        nullable=True,
+        comment="Original AMPAPI DateCreated (migration-only field)",
     )
     legacy_site_date: Mapped[datetime.date] = mapped_column(
         Date, nullable=True, comment="Original AMPAPI SiteDate (migration-only field)"
diff --git a/schemas/location.py b/schemas/location.py
index e18b76996..1f4bad472 100644
--- a/schemas/location.py
+++ b/schemas/location.py
@@ -155,7 +155,9 @@ def populate_fields(cls, data: Any) -> Any:
         data_dict["properties"]["elevation"] = convert_m_to_ft(elevation_m)
         data_dict["properties"]["elevation_method"] = data_dict.get("elevation_method")
         # populate legacy date fields
-        data_dict["properties"]["legacy_date_created"] = data_dict.get("legacy_date_created")
+        data_dict["properties"]["legacy_date_created"] = data_dict.get(
+            "legacy_date_created"
+        )
         data_dict["properties"]["legacy_site_date"] = data_dict.get("legacy_site_date")
 
         # populate UTM coordinates
diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py
index 53f304c4a..30fbcd5ae 100644
--- a/tests/test_transfer_legacy_dates.py
+++ b/tests/test_transfer_legacy_dates.py
@@ -38,21 +38,23 @@
 def test_make_location_with_both_legacy_dates():
     """Test that make_location populates both legacy_date_created and legacy_site_date"""
     # Create a mock CSV row with both DateCreated and SiteDate
-    row = pd.Series({
-        'PointID': 'TEST-001',
-        'Easting': 350000,
-        'Northing': 3880000,
-        'DateCreated': '2014-04-03 00:00:00.000',
-        'SiteDate': '2002-12-10 00:00:00.000',
-        'Altitude': 1558.8,
-        'AltDatum': 'NAVD88',
-        'AltitudeMethod': 'GPS',
-        'LocationId': 1,
-        'PublicRelease': True,
-        'CoordinateNotes': None,
-        'LocationNotes': None,
-        'AltitudeAccuracy': None,
-    })
+    row = pd.Series(
+        {
+            "PointID": "TEST-001",
+            "Easting": 350000,
+            "Northing": 3880000,
+            "DateCreated": "2014-04-03 00:00:00.000",
+            "SiteDate": "2002-12-10 00:00:00.000",
+            "Altitude": 1558.8,
+            "AltDatum": "NAVD88",
+            "AltitudeMethod": "GPS",
+            "LocationId": 1,
+            "PublicRelease": True,
+            "CoordinateNotes": None,
+            "LocationNotes": None,
+            "AltitudeAccuracy": None,
+        }
+    )
 
     elevations = {}
 
@@ -73,21 +75,23 @@ def test_make_location_with_both_legacy_dates():
 
 def test_make_location_with_only_date_created():
     """Test that make_location handles locations with only DateCreated (no SiteDate)"""
-    row = pd.Series({
-        'PointID': 'TEST-002',
-        'Easting': 350000,
-        'Northing': 3880000,
-        'DateCreated': '2014-04-03 00:00:00.000',
-        'SiteDate': None,  # No SiteDate
-        'Altitude': 1558.8,
-        'AltDatum': 'NAVD88',
-        'AltitudeMethod': 'GPS',
-        'LocationId': 2,
-        'PublicRelease': True,
-        'CoordinateNotes': None,
-        'LocationNotes': None,
-        'AltitudeAccuracy': None,
-    })
+    row = pd.Series(
+        {
+            "PointID": "TEST-002",
+            "Easting": 350000,
+            "Northing": 3880000,
+            "DateCreated": "2014-04-03 00:00:00.000",
+            "SiteDate": None,  # No SiteDate
+            "Altitude": 1558.8,
+            "AltDatum": "NAVD88",
+            "AltitudeMethod": "GPS",
+            "LocationId": 2,
+            "PublicRelease": True,
+            "CoordinateNotes": None,
+            "LocationNotes": None,
+            "AltitudeAccuracy": None,
+        }
+    )
 
     elevations = {}
     location, elevation_method = make_location(row, elevations)
@@ -101,21 +105,23 @@ def test_make_location_with_only_date_created():
 
 def test_make_location_with_site_date_later_than_date_created():
     """Test data anomaly: SiteDate is later than DateCreated (should still be accepted)"""
-    row = pd.Series({
-        'PointID': 'TEST-003',
-        'Easting': 350000,
-        'Northing': 3880000,
-        'DateCreated': '2010-01-15 00:00:00.000',
-        'SiteDate': '2015-06-20 00:00:00.000',  # Later than DateCreated (anomaly)
-        'Altitude': 1558.8,
-        'AltDatum': 'NAVD88',
-        'AltitudeMethod': 'GPS',
-        'LocationId': 3,
-        'PublicRelease': True,
-        'CoordinateNotes': None,
-        'LocationNotes': None,
-        'AltitudeAccuracy': None,
-    })
+    row = pd.Series(
+        {
+            "PointID": "TEST-003",
+            "Easting": 350000,
+            "Northing": 3880000,
+            "DateCreated": "2010-01-15 00:00:00.000",
+            "SiteDate": "2015-06-20 00:00:00.000",  # Later than DateCreated (anomaly)
+            "Altitude": 1558.8,
+            "AltDatum": "NAVD88",
+            "AltitudeMethod": "GPS",
+            "LocationId": 3,
+            "PublicRelease": True,
+            "CoordinateNotes": None,
+            "LocationNotes": None,
+            "AltitudeAccuracy": None,
+        }
+    )
 
     elevations = {}
     location, elevation_method = make_location(row, elevations)
@@ -127,21 +133,23 @@ def test_make_location_with_site_date_later_than_date_created():
 
 def test_make_location_with_very_old_site_date():
     """Test that very old SiteDates (1950s) are preserved correctly"""
-    row = pd.Series({
-        'PointID': 'SM-0227',  # Real example from dataset
-        'Easting': 350000,
-        'Northing': 3880000,
-        'DateCreated': '2008-05-28 00:00:00.000',
-        'SiteDate': '1954-05-01 00:00:00.000',  # 54 years earlier!
-        'Altitude': 1558.8,
-        'AltDatum': 'NAVD88',
-        'AltitudeMethod': 'GPS',
-        'LocationId': 4,
-        'PublicRelease': True,
-        'CoordinateNotes': None,
-        'LocationNotes': None,
-        'AltitudeAccuracy': None,
-    })
+    row = pd.Series(
+        {
+            "PointID": "SM-0227",  # Real example from dataset
+            "Easting": 350000,
+            "Northing": 3880000,
+            "DateCreated": "2008-05-28 00:00:00.000",
+            "SiteDate": "1954-05-01 00:00:00.000",  # 54 years earlier!
+            "Altitude": 1558.8,
+            "AltDatum": "NAVD88",
+            "AltitudeMethod": "GPS",
+            "LocationId": 4,
+            "PublicRelease": True,
+            "CoordinateNotes": None,
+            "LocationNotes": None,
+            "AltitudeAccuracy": None,
+        }
+    )
 
     elevations = {}
     location, elevation_method = make_location(row, elevations)
@@ -157,21 +165,23 @@ def test_make_location_with_very_old_site_date():
 
 def test_make_location_legacy_dates_are_date_not_datetime():
     """Test that legacy date fields are Date type (not DateTime)"""
-    row = pd.Series({
-        'PointID': 'TEST-004',
-        'Easting': 350000,
-        'Northing': 3880000,
-        'DateCreated': '2014-04-03 10:30:45.123',  # Has time component
-        'SiteDate': '2002-12-10 14:22:33.456',  # Has time component
-        'Altitude': 1558.8,
-        'AltDatum': 'NAVD88',
-        'AltitudeMethod': 'GPS',
-        'LocationId': 5,
-        'PublicRelease': True,
-        'CoordinateNotes': None,
-        'LocationNotes': None,
-        'AltitudeAccuracy': None,
-    })
+    row = pd.Series(
+        {
+            "PointID": "TEST-004",
+            "Easting": 350000,
+            "Northing": 3880000,
+            "DateCreated": "2014-04-03 10:30:45.123",  # Has time component
+            "SiteDate": "2002-12-10 14:22:33.456",  # Has time component
+            "Altitude": 1558.8,
+            "AltDatum": "NAVD88",
+            "AltitudeMethod": "GPS",
+            "LocationId": 5,
+            "PublicRelease": True,
+            "CoordinateNotes": None,
+            "LocationNotes": None,
+            "AltitudeAccuracy": None,
+        }
+    )
 
     elevations = {}
     location, elevation_method = make_location(row, elevations)
@@ -190,21 +200,23 @@ def test_make_location_legacy_dates_are_date_not_datetime():
 
 def test_make_location_legacy_dates_independent_of_created_at():
     """Test that legacy dates don't affect created_at timestamp"""
-    row = pd.Series({
-        'PointID': 'TEST-005',
-        'Easting': 350000,
-        'Northing': 3880000,
-        'DateCreated': '2014-04-03 00:00:00.000',
-        'SiteDate': '2002-12-10 00:00:00.000',
-        'Altitude': 1558.8,
-        'AltDatum': 'NAVD88',
-        'AltitudeMethod': 'GPS',
-        'LocationId': 6,
-        'PublicRelease': True,
-        'CoordinateNotes': None,
-        'LocationNotes': None,
-        'AltitudeAccuracy': None,
-    })
+    row = pd.Series(
+        {
+            "PointID": "TEST-005",
+            "Easting": 350000,
+            "Northing": 3880000,
+            "DateCreated": "2014-04-03 00:00:00.000",
+            "SiteDate": "2002-12-10 00:00:00.000",
+            "Altitude": 1558.8,
+            "AltDatum": "NAVD88",
+            "AltitudeMethod": "GPS",
+            "LocationId": 6,
+            "PublicRelease": True,
+            "CoordinateNotes": None,
+            "LocationNotes": None,
+            "AltitudeAccuracy": None,
+        }
+    )
 
     elevations = {}
     location, elevation_method = make_location(row, elevations)
@@ -230,14 +242,14 @@ def test_create_well_schema_accepts_well_completed_on():
     """Test that CreateWell schema accepts well_completed_on from CSV CompletionDate"""
     # Simulate data from CSV transfer
     well_data = {
-        'location_id': 1,
-        'name': 'TEST-WELL-001',
-        'well_completed_on': datetime.date(2004, 8, 8),  # From CSV CompletionDate
-        'hole_depth': 100.0,
-        'well_depth': 95.0,
-        'measuring_point_height': 2.5,
-        'measuring_point_description': 'top of casing',
-        'release_status': 'public',
+        "location_id": 1,
+        "name": "TEST-WELL-001",
+        "well_completed_on": datetime.date(2004, 8, 8),  # From CSV CompletionDate
+        "hole_depth": 100.0,
+        "well_depth": 95.0,
+        "measuring_point_height": 2.5,
+        "measuring_point_description": "top of casing",
+        "release_status": "public",
     }
 
     # Validate using CreateWell schema
@@ -249,13 +261,13 @@ def test_create_well_schema_accepts_well_completed_on():
 def test_create_well_schema_well_completed_on_optional():
     """Test that well_completed_on is optional (70% of wells don't have CompletionDate)"""
     well_data = {
-        'location_id': 1,
-        'name': 'TEST-WELL-002',
-        'hole_depth': 100.0,
-        'well_depth': 95.0,
-        'measuring_point_height': 2.5,
-        'measuring_point_description': 'top of casing',
-        'release_status': 'public',
+        "location_id": 1,
+        "name": "TEST-WELL-002",
+        "hole_depth": 100.0,
+        "well_depth": 95.0,
+        "measuring_point_height": 2.5,
+        "measuring_point_description": "top of casing",
+        "release_status": "public",
         # No well_completed_on provided
     }
 
@@ -263,7 +275,7 @@ def test_create_well_schema_well_completed_on_optional():
     schema = CreateWell(**well_data)
 
     # Field should be optional
-    assert hasattr(schema, 'well_completed_on')
+    assert hasattr(schema, "well_completed_on")
     # Value should be None when not provided
     assert schema.well_completed_on is None
 
@@ -271,14 +283,14 @@ def test_create_well_schema_well_completed_on_optional():
 def test_create_well_with_very_old_completion_date():
     """Test that very old completion dates (1936) are accepted"""
     well_data = {
-        'location_id': 1,
-        'name': 'HISTORICAL-WELL',
-        'well_completed_on': datetime.date(1936, 1, 1),  # Oldest well in dataset
-        'hole_depth': 100.0,
-        'well_depth': 95.0,
-        'measuring_point_height': 2.5,
-        'measuring_point_description': 'top of casing',
-        'release_status': 'public',
+        "location_id": 1,
+        "name": "HISTORICAL-WELL",
+        "well_completed_on": datetime.date(1936, 1, 1),  # Oldest well in dataset
+        "hole_depth": 100.0,
+        "well_depth": 95.0,
+        "measuring_point_height": 2.5,
+        "measuring_point_description": "top of casing",
+        "release_status": "public",
     }
 
     schema = CreateWell(**well_data)
@@ -289,14 +301,14 @@ def test_create_well_with_very_old_completion_date():
 def test_create_well_completed_on_is_date_not_datetime():
     """Test that well_completed_on is Date type (not DateTime)"""
     well_data = {
-        'location_id': 1,
-        'name': 'TEST-WELL-003',
-        'well_completed_on': datetime.date(2004, 8, 8),  # Date, not DateTime
-        'hole_depth': 100.0,
-        'well_depth': 95.0,
-        'measuring_point_height': 2.5,
-        'measuring_point_description': 'top of casing',
-        'release_status': 'public',
+        "location_id": 1,
+        "name": "TEST-WELL-003",
+        "well_completed_on": datetime.date(2004, 8, 8),  # Date, not DateTime
+        "hole_depth": 100.0,
+        "well_depth": 95.0,
+        "measuring_point_height": 2.5,
+        "measuring_point_description": "top of casing",
+        "release_status": "public",
     }
 
     schema = CreateWell(**well_data)
@@ -319,37 +331,41 @@ def test_location_legacy_date_coverage_statistics():
 
     for i in range(100):
         if i < 9:  # 9% have SiteDate
-            row = pd.Series({
-                'PointID': f'TEST-{i:03d}',
-                'Easting': 350000 + i,
-                'Northing': 3880000 + i,
-                'DateCreated': '2014-04-03 00:00:00.000',
-                'SiteDate': '2002-12-10 00:00:00.000',
-                'Altitude': 1558.8,
-                'AltDatum': 'NAVD88',
-                'AltitudeMethod': 'GPS',
-                'LocationId': i,
-                'PublicRelease': True,
-                'CoordinateNotes': None,
-                'LocationNotes': None,
-                'AltitudeAccuracy': None,
-            })
+            row = pd.Series(
+                {
+                    "PointID": f"TEST-{i:03d}",
+                    "Easting": 350000 + i,
+                    "Northing": 3880000 + i,
+                    "DateCreated": "2014-04-03 00:00:00.000",
+                    "SiteDate": "2002-12-10 00:00:00.000",
+                    "Altitude": 1558.8,
+                    "AltDatum": "NAVD88",
+                    "AltitudeMethod": "GPS",
+                    "LocationId": i,
+                    "PublicRelease": True,
+                    "CoordinateNotes": None,
+                    "LocationNotes": None,
+                    "AltitudeAccuracy": None,
+                }
+            )
         else:  # 91% don't have SiteDate
-            row = pd.Series({
-                'PointID': f'TEST-{i:03d}',
-                'Easting': 350000 + i,
-                'Northing': 3880000 + i,
-                'DateCreated': '2014-04-03 00:00:00.000',
-                'SiteDate': None,
-                'Altitude': 1558.8,
-                'AltDatum': 'NAVD88',
-                'AltitudeMethod': 'GPS',
-                'LocationId': i,
-                'PublicRelease': True,
-                'CoordinateNotes': None,
-                'LocationNotes': None,
-                'AltitudeAccuracy': None,
-            })
+            row = pd.Series(
+                {
+                    "PointID": f"TEST-{i:03d}",
+                    "Easting": 350000 + i,
+                    "Northing": 3880000 + i,
+                    "DateCreated": "2014-04-03 00:00:00.000",
+                    "SiteDate": None,
+                    "Altitude": 1558.8,
+                    "AltDatum": "NAVD88",
+                    "AltitudeMethod": "GPS",
+                    "LocationId": i,
+                    "PublicRelease": True,
+                    "CoordinateNotes": None,
+                    "LocationNotes": None,
+                    "AltitudeAccuracy": None,
+                }
+            )
 
         elevations = {}
         location, _ = make_location(row, elevations)
@@ -374,24 +390,24 @@ def test_well_completion_date_coverage_statistics():
     for i in range(100):
         if i < 30:  # 30% have CompletionDate
             well_data = {
-                'location_id': 1,
-                'name': f'WELL-{i:03d}',
-                'well_completed_on': datetime.date(2004, 8, 8),
-                'hole_depth': 100.0,
-                'well_depth': 95.0,
-                'measuring_point_height': 2.5,
-                'measuring_point_description': 'top of casing',
-                'release_status': 'public',
+                "location_id": 1,
+                "name": f"WELL-{i:03d}",
+                "well_completed_on": datetime.date(2004, 8, 8),
+                "hole_depth": 100.0,
+                "well_depth": 95.0,
+                "measuring_point_height": 2.5,
+                "measuring_point_description": "top of casing",
+                "release_status": "public",
             }
         else:  # 70% don't have CompletionDate
             well_data = {
-                'location_id': 1,
-                'name': f'WELL-{i:03d}',
-                'hole_depth': 100.0,
-                'well_depth': 95.0,
-                'measuring_point_height': 2.5,
-                'measuring_point_description': 'top of casing',
-                'release_status': 'public',
+                "location_id": 1,
+                "name": f"WELL-{i:03d}",
+                "hole_depth": 100.0,
+                "well_depth": 95.0,
+                "measuring_point_height": 2.5,
+                "measuring_point_description": "top of casing",
+                "release_status": "public",
                 # No well_completed_on
             }
 

From 0ceee93f69f5c38087558b6e71ab1b0f997a2173 Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Fri, 28 Nov 2025 13:46:24 -0700
Subject: [PATCH 22/66] refactor: enhance asset transfer process by
 implementing AssetTransferer class and improving data handling

---
 transfers/asset_transfer.py    | 125 +++++--------
 transfers/group_transfer.py    |  85 +++++++--
 transfers/link_ids_transfer.py | 313 +++++++++++++++------------------
 transfers/metrics.py           |  80 +++++----
 transfers/sensor_transfer.py   | 284 ++++++------------------------
 transfers/transfer.py          |  92 +++++-----
 transfers/transferer.py        |  43 +++--
 transfers/util.py              |  75 +++++---
 transfers/well_transfer.py     |  71 ++++----
 9 files changed, 524 insertions(+), 644 deletions(-)

diff --git a/transfers/asset_transfer.py b/transfers/asset_transfer.py
index 71d3ad23b..b7938f15d 100644
--- a/transfers/asset_transfer.py
+++ b/transfers/asset_transfer.py
@@ -13,53 +13,49 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ===============================================================================
-# for testing only. remove later
-from dotenv import load_dotenv
-from db.engine import session_ctx
-
-load_dotenv()
-# -----------------------------------------------
-
 import io
 
 from starlette.datastructures import UploadFile
-from sqlalchemy.orm import Session
-from db import Asset, AssetThingAssociation, Thing
-from services.audit_helper import audit_add
+
+from db import Asset, AssetThingAssociation
 from services.gcs_helper import (
     gcs_upload,
-    check_asset_exists,
     get_storage_bucket,
     get_storage_client,
 )
-from transfers.util import get_valid_things, read_csv
 from transfers.logger import logger
-
-
-def transfer_assets(session: Session) -> None:
-    client = get_storage_client()
-
-    bucket = get_storage_bucket(client)
-    logger.info(f"Using bucket {bucket.name}")
-
-    well_photos = read_csv("WellPhotos")
-    # for name in ['AR0001']: # for testing
-    valid_things = get_valid_things(session)
-    n = len(valid_things)
-    for j, thing in enumerate(valid_things):
-        photos = well_photos[well_photos["PointID"] == thing.name]
+from transfers.util import read_csv, filter_to_valid_point_ids
+from transfers.well_transfer import WellChunkTransferer
+
+
+class AssetTransferer(WellChunkTransferer):
+    def __init__(self, *args, **kw):
+        self.source_table = "WellPhotos"
+        super().__init__(*args, **kw)
+        self._client = get_storage_client()
+        self._bucket = get_storage_bucket(self._client)
+        logger.info(f"Using bucket {self._bucket.name}")
+
+    def _get_dfs(self):
+        input_df = read_csv(self.source_table)
+        cleaned_df = filter_to_valid_point_ids(input_df)
+        return input_df, cleaned_df
+
+    def _chunk_step(self, session, df, i, row, db_item):
+        photos = df[df["PointID"] == db_item.name]
+        n = len(df)
         if photos.empty:
-            photos = well_photos[well_photos["PointID"] == thing.name.replace("-", "")]
+            photos = df[df["PointID"] == db_item.name.replace("-", "")]
             if photos.empty:
-                logger.info(f"No photos found for PointID: {thing.name}")
-                continue
+                logger.info(f"No photos found for PointID: {db_item.name}")
+                return
 
-        for i, row in enumerate(photos.itertuples()):
+        for j, row in enumerate(photos.itertuples()):
             photo_path = row.OLEPath
-            srcblob = bucket.get_blob(f"nma-photos/{photo_path}")
+            srcblob = self._bucket.get_blob(f"nma-photos/{photo_path}")
             if not srcblob:
                 logger.critical(
-                    f"No photo found for PointID: {thing.name}, {photo_path}"
+                    f"No photo found for PointID: {db_item.name}, {photo_path}"
                 )
                 continue
 
@@ -67,56 +63,25 @@ def transfer_assets(session: Session) -> None:
             f = srcblob.download_as_bytes()
             ff = UploadFile(file=io.BytesIO(f), filename=filename, size=len(f))
 
-            uri, blob_name = gcs_upload(ff, bucket)
-            add_asset(session, ff, filename, thing.id, uri, blob_name)
+            uri, blob_name = gcs_upload(ff, self._bucket)
+            asset = Asset(
+                name=filename,
+                label=filename,
+                storage_path=blob_name,
+                storage_service="gcs",
+                mime_type="image/png",
+                size=ff.size,
+                uri=uri,
+            )
+            assoc = AssetThingAssociation()
+            assoc.thing = db_item
+            assoc.asset = asset
+            session.add(assoc)
+            session.add(asset)
+            session.commit()
             logger.info(
-                f"Added asset {j}-{i}/{n} thing.id={thing.id} thing={thing.name} uri: {uri}"
+                f"Added asset {i}-{j}/{n} thing.id={db_item.id} thing={db_item.name} uri: {uri}"
             )
 
 
-def transfer_assets_testing(session: Session) -> None:
-    for p in ("asset1.png", "asset2.png", "asset3.png"):
-        with open(f"./transfers/data/assets/{p}", "rb") as f:
-            uf = UploadFile(file=f, filename=p, size=10)
-            uri, blob_name = gcs_upload(uf)
-            thing_id = 151
-
-            if check_asset_exists(session, blob_name, thing_id):
-                logger.warning(f"Asset {blob_name} already exists. Skipping.")
-                continue
-            add_asset(session, uf, p, thing_id, uri, blob_name)
-
-
-def add_asset(
-    session: Session,
-    uf: UploadFile,
-    label: str,
-    thing_id: int,
-    uri: str,
-    blob_name: str,
-) -> None:
-    asset = Asset(
-        name=label,
-        label=label,
-        storage_path=blob_name,
-        storage_service="gcs",
-        mime_type="image/png",
-        size=uf.size,
-        uri=uri,
-    )
-    assoc = AssetThingAssociation()
-    audit_add({"sub": "foobar", "name": "Mr. Foobar"}, assoc)
-    thing = session.get(Thing, thing_id)
-    assoc.thing = thing
-    assoc.asset = asset
-    session.add(assoc)
-    session.add(asset)
-    session.commit()
-
-
-if __name__ == "__main__":
-
-    with session_ctx() as session:
-        transfer_assets(session)
-
 # ============= EOF =============================================
diff --git a/transfers/group_transfer.py b/transfers/group_transfer.py
index 0bad85cb7..5549a81d1 100644
--- a/transfers/group_transfer.py
+++ b/transfers/group_transfer.py
@@ -13,21 +13,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ===============================================================================
+import pandas as pd
 from sqlalchemy import select
 from sqlalchemy.orm import Session
 
 from db import Thing, Group, GroupThingAssociation
-from db.engine import session_ctx
-from transfers.util import read_csv
-from transfers.logger import logger
 from services.util import retrieve_latest_polymorphic_history_table_record
+from transfers.logger import logger
+from transfers.transferer import Transferer
+from transfers.util import read_csv
+
+
+class ProjectGroupTransferer(Transferer):
+    def __init__(self, *args, **kw):
+        super().__init__(*args, **kw)
+        self.source_table = "Projects"
+        self.source_dtypes = {"Project": str, "PointIDPrefix": str}
 
+    def _get_dfs(self):
+        df = read_csv(self.source_table, self.source_dtypes)
+        return df, df
 
-def transfer_groups(
-    session: Session,
-) -> None:
-    wdf = read_csv("Projects")
-    for i, row in enumerate(wdf.itertuples()):
+    def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series):
 
         sql = select(Group).where(Group.name == row.Project)
         group = session.scalars(sql).one_or_none()
@@ -79,7 +86,63 @@ def transfer_groups(
         session.commit()
 
 
-if __name__ == "__main__":
-    with session_ctx() as session:
-        transfer_groups(session)
+# def transfer_groups(
+#     session: Session,
+# ) -> None:
+#     wdf = read_csv("Projects")
+#     for i, row in enumerate(wdf.itertuples()):
+#
+#         sql = select(Group).where(Group.name == row.Project)
+#         group = session.scalars(sql).one_or_none()
+#         if not group:
+#             # add a group for each project
+#             group = Group(name=row.Project)
+#
+#         for prefix in row.PointIDPrefix.split(","):
+#             prefix = prefix.strip()
+#             if prefix:
+#                 # get all PointIDs that start with prefix
+#                 sql = select(Thing).where(Thing.name.like(f"{prefix}%"))
+#                 records = session.scalars(sql).unique().all()
+#                 if records:
+#                     logger.info(
+#                         f"Adding {len(records)} things to group {group.name}, prefix {prefix}"
+#                     )
+#                     group_is_monitoring_plan = False
+#                     for record in records:
+#                         # set the group_type to Monitoring Plan if at least one well is currently monitored
+#                         if not group_is_monitoring_plan:
+#                             if record.status_history:
+#                                 monitoring_status = [
+#                                     sh
+#                                     for sh in record.status_history
+#                                     if sh.status_type == "Monitoring Status"
+#                                 ]
+#                                 if monitoring_status:
+#                                     monitoring_status = retrieve_latest_polymorphic_history_table_record(
+#                                         record,
+#                                         "status_history",
+#                                         "Monitoring Status",
+#                                     )
+#                                     if (
+#                                         monitoring_status.status_value
+#                                         == "Currently monitored"
+#                                     ):
+#                                         group_is_monitoring_plan = True
+#                                         group.group_type = "Monitoring Plan"
+#                                         logger.info(
+#                                             f"  Setting group {group.name} type to Monitoring Plan based on thing {record.name}"
+#                                         )
+#
+#                         gta = GroupThingAssociation(group=group, thing=record)
+#                         session.add(gta)
+#                         group.thing_associations.append(gta)
+#
+#         session.add(group)
+#         session.commit()
+#
+#
+# if __name__ == "__main__":
+#     with session_ctx() as session:
+#         transfer_groups(session)
 # ============= EOF =============================================
diff --git a/transfers/link_ids_transfer.py b/transfers/link_ids_transfer.py
index f11f8bb97..dbb33f76f 100644
--- a/transfers/link_ids_transfer.py
+++ b/transfers/link_ids_transfer.py
@@ -24,184 +24,161 @@
     extract_organization,
     read_csv,
     replace_nans,
-    chunk_by_size,
 )
+from transfers.well_transfer import WellChunkTransferer
+
+
+class LinkIdsWellDataTransferer(WellChunkTransferer):
+    source_table = "WellData"
+    source_dtypes = {"OSEWellID": str, "OSEWelltagID": str}
+
+    def _chunk_step(self, session, dr, i, row, db_item):
+        if pd.isna(row.OSEWellID) and pd.isna(row.OSEWelltagID):
+            return
+
+        for aid, klass, regex in (
+            (row.OSEWellID, "OSEPOD", r"^[A-Z]{1,3}-\d{3,6}"),
+            (
+                row.OSEWelltagID,
+                "OSEWellTagID",
+                r"",
+            ),  # TODO: need to figure out regex for this field
+        ):
+            if pd.isna(aid):
+                # logger.warning(f"{klass} is null for {row.PointID}")
+                continue
+            print("aid", aid, type(aid))
+            # RULE: exclude any id that == 'X', '?'
+            if aid.strip().lower() in ("x", "?", "exempt"):
+                logger.critical(
+                    f'{klass} is "X", "?", or "exempt", id={aid} for {row.PointID}'
+                )
+                continue
 
+            if regex and not re.match(regex, aid):
+                logger.critical(
+                    f"{klass} id does not match regex {regex}, id={aid} for {row.PointID}"
+                )
+                continue
 
-def transfer_link_ids_welldata(session):
-    ldf = read_csv("WellData", dtype={"OSEWelltagID": str})
+            # TODO: add guards for null values
+            link_id = ThingIdLink()
+            link_id.thing = db_item
+            link_id.relation = klass
+            link_id.alternate_id = aid
+            link_id.alternate_organization = "NMOSE"
 
-    ldf = filter_to_valid_point_ids(session, ldf)
+            # does link_id need a class  e.g.
+            # link_id.alternate_id_class = klass
 
-    for chunk in chunk_by_size(ldf, 100):
-        things = (
-            session.query(Thing).filter(Thing.name.in_(chunk.PointID.tolist())).all()
-        )
-        for row in chunk.itertuples():
-            # RULE: exclude rows where both ids are null
-            if pd.isna(row.OSEWellID) and pd.isna(row.OSEWelltagID):
-                # logger.warning(
-                #     f"Both OSEWellID and OSEWelltagID are null for {row.PointID}"
-                # )
-                continue
+            session.add(link_id)
 
-            thing = next((l for l in things if l.name == row.PointID), None)
-            if thing is None:
-                logger.warning(
-                    f"Thing not found forPointID {row.PointID}. Skipping link ids."
-                )
-                continue
 
-            for aid, klass, regex in (
-                (row.OSEWellID, "OSEPOD", r"^[A-Z]{1,3}-\d{3,6}"),
-                (
-                    row.OSEWelltagID,
-                    "OSEWellTagID",
-                    r"",
-                ),  # TODO: need to figure out regex for this field
-            ):
-                if pd.isna(aid):
-                    # logger.warning(f"{klass} is null for {row.PointID}")
-                    continue
-
-                # RULE: exclude any id that == 'X', '?'
-                if aid.strip().lower() in ("x", "?", "exempt"):
-                    logger.critical(
-                        f'{klass} is "X", "?", or "exempt", id={aid} for {row.PointID}'
-                    )
-                    continue
-
-                if regex and not re.match(regex, aid):
-                    logger.critical(
-                        f"{klass} id does not match regex {regex}, id={aid} for {row.PointID}"
-                    )
-                    continue
-
-                # TODO: add guards for null values
-                link_id = ThingIdLink()
-                link_id.thing = thing
-                link_id.relation = klass
-                link_id.alternate_id = aid
-                link_id.alternate_organization = "NMOSE"
-
-                # does link_id need a class  e.g.
-                # link_id.alternate_id_class = klass
-
-                session.add(link_id)
-        session.commit()
-
-
-def add_link_alternate_site_id(session, row, thing):
-    if not row.AlternateSiteID:
-        return
-
-    link_id = ThingIdLink()
-    link_id.thing = thing
-    link_id.relation = "same_as"
-    link_id.alternate_id = row.AlternateSiteID
-
-    link_id.alternate_organization = extract_organization(str(row.AlternateSiteID))
-
-    # logger.info(f"adding link id: {row.PointID}")
-    session.add(link_id)
-
-
-def add_link_site_id(session, row, thing):
-    if not row.SiteID:
-        return
-
-    link_id = ThingIdLink()
-    link_id.thing = thing
-    link_id.relation = "same_as"
-
-    site_id = row.SiteID.strip()
-    if not re.match(r"^\d{15}$", site_id):
-        # TODO: lets make a sweet function for flagging issues
-        # flag for interrogation
-        logger.critical(
-            f"{row.PointID} alternate id {site_id} is not a valid USGS site id"
+class LinkIdsLocationDataTransferer(WellChunkTransferer):
+    source_table = "Location"
+    site_type = "GW"
+
+    def __init__(self, *args, **kw):
+        super().__init__(*args, **kw)
+
+        self._plss_regex = re.compile(
+            r"^T\d{1,3}[NS]\.R\d{1,3}[EW]\.S(?:[1-9]|[12]\d|3[0-6])(?:\.\d{1,5})?$"
         )
-        return
-
-    link_id.alternate_id = row.SiteID
-    link_id.alternate_organization = "USGS"
-    session.add(link_id)
-
-
-def add_link_plss(session, row, thing):
-
-    township = row.Township
-    township_direction = row.TownshipDirection
-    _range = row.Range
-    range_direction = row.RangeDirection
-    section = row.Section
-    section_direction = row.SectionDirection
-
-    if not township or not _range or not section:
-        return
-
-    link_id = ThingIdLink()
-    link_id.thing = thing
-    link_id.relation = "same_as"
-    link_id.alternate_organization = "PLSS"
-
-    alternate_id = f"T{township}{township_direction}.R{_range}{range_direction}.S{section}{section_direction}"
-    if not re.match(r"T\d{1,3}.R\d{1,3}.S\d{1,3}", alternate_id):
-        # flag for interrogation
-        logger.warning(f"alternate id {alternate_id} is not a valid PLSS")
-        return
-    link_id.alternate_id = alternate_id
-    link_id.alternate_organization = "PLSS"
-    session.add(link_id)
-
-
-def transfer_link_ids(session, site_type="GW"):
-    ldf = read_csv("Location")
-    ldf = ldf[ldf["SiteType"] == site_type]
-    ldf = ldf[ldf["Easting"].notna() & ldf["Northing"].notna()]
-    ldf = replace_nans(ldf)
-
-    ldf = filter_to_valid_point_ids(session, ldf)
-    for chunk in chunk_by_size(ldf, 100):
-        locations = (
-            session.query(Thing).filter(Thing.name.in_(chunk.PointID.tolist())).all()
+        self._usgs_regex = re.compile(r"^\d{15}$")
+
+    def _get_dfs(self):
+        input_df = read_csv(
+            self.source_table,
+            {
+                "SiteID": str,
+                "Township": str,
+                "TownshipDirection": str,
+                "Range": str,
+                "RangeDirection": str,
+                "SectionQuarters": str,
+            },
         )
-        for row in chunk.itertuples():
-            thing = next((l for l in locations if l.name == row.PointID), None)
-            if thing is None:
-                logger.warning(
-                    f"Thing with PointID {row.PointID} not found. Skipping link id."
-                )
-                continue
-            logger.info(
-                f"Processing PointID: {row.PointID}, Thing ID: {thing.id}, AlternateSiteID={row.AlternateSiteID}, "
-                f"AlternateSiteID2={row.AlternateSiteID2}"
+
+        ldf = input_df[input_df["SiteType"] == self.site_type]
+        ldf = ldf[ldf["Easting"].notna() & ldf["Northing"].notna()]
+        ldf = replace_nans(ldf)
+        cleaned_df = filter_to_valid_point_ids(ldf)
+        return input_df, cleaned_df
+
+    def _chunk_step(self, session, df, i, row, db_item):
+        logger.info(
+            f"Processing PointID: {row.PointID}, "
+            f"Thing ID: {db_item.id}, "
+            f"AlternateSiteID={row.AlternateSiteID}, "
+            f"AlternateSiteID2={row.AlternateSiteID2}"
+        )
+        for func in (
+            self._add_link_alternate_site_id,
+            self._add_link_site_id,
+            self._add_link_plss,
+        ):
+            link = func(row, db_item)
+            if link:
+                session.add(link)
+
+    def _add_link_alternate_site_id(self, row: pd.Series, thing: Thing):
+        if not row.AlternateSiteID:
+            return
+
+        return _make_thing_id_link(
+            thing, row.AlternateSiteID, extract_organization(str(row.AlternateSiteID))
+        )
+
+    def _add_link_site_id(self, row, thing):
+        if not row.SiteID:
+            return
+
+        site_id = row.SiteID.strip()
+        if not self._usgs_regex.match(site_id):
+            self._capture_error(
+                row.PointID, f"{site_id} is not a valid USGS site id", "SiteID"
+            )
+            logger.critical(
+                f"{row.PointID} alternate id {site_id} is not a valid USGS site id"
+            )
+            return
+
+        return _make_thing_id_link(thing, row.SiteID, "USGS")
+
+    def _add_link_plss(self, row, thing):
+        township = row.Township
+        township_direction = row.TownshipDirection
+        _range = row.Range
+        range_direction = row.RangeDirection
+        section = row.SectionQuarters
+        if not township or not _range or not section:
+            return
+
+        alternate_id = (
+            f"T{township}{township_direction}.R{_range}{range_direction}.S{section}"
+        )
+        if not self._plss_regex.match(alternate_id):
+            self._capture_error(
+                row.PointID,
+                f"{alternate_id} is not a valid PLSS",
+                "Township, TownshipDirection, Range, RangeDirection, Section, SectionDirection",
             )
-            add_link_alternate_site_id(session, row, thing)
-        session.commit()
-
-    # for i, row in enumerate(ldf.itertuples()):
-    #     thing = session.query(Thing).where(Thing.name == row.PointID).first()
-    #     if thing is None:
-    #         logger.warning(
-    #             f"Thing with PointID {row.PointID} not found. Skipping link id."
-    #         )
-    #         continue
-    #     logger.info(
-    #         f"Processing PointID: {row.PointID}, Thing ID: {thing.id}, AlternateSiteID={row.AlternateSiteID}, "
-    #         f"AlternateSiteID2={row.AlternateSiteID2}"
-    #     )
-    #     add_link_alternate_site_id(session, row, thing)
-    #     # add_link_site_id(session, row, thing)
-    #     # add_link_plss(session, row, thing)
-    #
-    #     # not clear what alternate_id2 is for, or what it maps to
-    #     # add_link_alternate_site_id2(session, row, thing)
-    #     if i and not i % 25:
-    #         session.commit()
-    #         session.flush()
-    #
-    # session.commit()
+
+            logger.critical(f"alternate id {alternate_id} is not a valid PLSS")
+            return
+
+        return _make_thing_id_link(thing, alternate_id, "PLSS")
+
+
+def _make_thing_id_link(
+    thing, alternate_id, alternate_organization, relation="same_as"
+):
+    return ThingIdLink(
+        thing=thing,
+        relation=relation,
+        alternate_id=alternate_id,
+        alternate_organization=alternate_organization,
+    )
 
 
 # ============= EOF =============================================
diff --git a/transfers/metrics.py b/transfers/metrics.py
index 25b6b626b..1f2b67bdd 100644
--- a/transfers/metrics.py
+++ b/transfers/metrics.py
@@ -22,7 +22,6 @@
 from pydantic import ValidationError
 from sqlalchemy import select, func
 from sqlalchemy.exc import ProgrammingError
-from sqlalchemy.orm import Session
 
 from db import (
     Thing,
@@ -33,7 +32,10 @@
     Parameter,
     Deployment,
     TransducerObservation,
+    Group,
+    Asset,
 )
+from db.engine import session_ctx
 from services.gcs_helper import get_storage_bucket
 
 
@@ -77,9 +79,24 @@ def sensor_metrics(self, *args, **kw) -> None:
     def well_screen_metrics(self, *args, **kw) -> None:
         self._handle_metrics(WellScreen, *args, **kw)
 
-    def contact_metrics(self, sess, input_df, cleaned_df, errors) -> None:
+    def welldata_link_ids_metrics(self, input_df, cleaned_df, errors) -> None:
+        self._write_metrics("WellData Link IDs", len(input_df), input_df, cleaned_df)
+        self._write_errors(errors)
+
+    def location_link_ids_metrics(self, input_df, cleaned_df, errors) -> None:
+        self._write_metrics(
+            "LocationData Link IDs", len(input_df), input_df, cleaned_df
+        )
+        self._write_errors(errors)
+
+    def asset_metrics(self, *args, **kw) -> None:
+        self._handle_metrics(Asset, *args, **kw)
+
+    def group_metrics(self, *args, **kw) -> None:
+        self._handle_metrics(Group, *args, **kw)
+
+    def contact_metrics(self, input_df, cleaned_df, errors) -> None:
         count = self._get_count(
-            sess,
             Contact,
         )
 
@@ -90,14 +107,15 @@ def contact_metrics(self, sess, input_df, cleaned_df, errors) -> None:
         self._writer.writerow(metrics)
         self._write_errors(errors)
 
-    def water_level_metrics(self, sess, input_df, cleaned_df, errors) -> None:
-        sql = (
-            select(func.count())
-            .select_from(Observation)
-            .join(Parameter)
-            .where(Parameter.parameter_name == "groundwater level")
-        )
-        count = sess.execute(sql).scalar_one()
+    def water_level_metrics(self, input_df, cleaned_df, errors) -> None:
+        with session_ctx() as sess:
+            sql = (
+                select(func.count())
+                .select_from(Observation)
+                .join(Parameter)
+                .where(Parameter.parameter_name == "groundwater level")
+            )
+            count = sess.execute(sql).scalar_one()
 
         metrics = self._make_metrics(
             "Manual Water Levels", len(input_df), len(cleaned_df), count
@@ -111,19 +129,18 @@ def acoustic_metrics(self, *args, **kw) -> None:
     def pressure_metrics(self, *args, **kw) -> None:
         self._transducer_metrics("Pressure Transducer", *args, **kw)
 
-    def _transducer_metrics(
-        self, sensor_type, sess, input_df, cleaned_df, errors
-    ) -> None:
-        sql = (
-            select(func.count())
-            .select_from(TransducerObservation)
-            .join(Deployment)
-            .join(Sensor)
-            .join(Parameter)
-            .where(Sensor.sensor_type == sensor_type)
-            .where(Parameter.parameter_name == "groundwater level")
-        )
-        count = sess.execute(sql).scalar_one()
+    def _transducer_metrics(self, sensor_type, input_df, cleaned_df, errors) -> None:
+        with session_ctx() as sess:
+            sql = (
+                select(func.count())
+                .select_from(TransducerObservation)
+                .join(Deployment)
+                .join(Sensor)
+                .join(Parameter)
+                .where(Sensor.sensor_type == sensor_type)
+                .where(Parameter.parameter_name == "groundwater level")
+            )
+            count = sess.execute(sql).scalar_one()
         metrics = self._make_metrics(sensor_type, len(input_df), len(cleaned_df), count)
         self._writer.writerow(metrics)
         self._write_errors(errors)
@@ -133,9 +150,9 @@ def _make_metrics(self, name, input_n, cleaned_n, count):
         return [name, input_n, cleaned_n, count, percent_issue]
 
     def _handle_metrics(
-        self, model, sess, input_df, cleaned_df, errors, where=None, name=None
+        self, model, input_df, cleaned_df, errors, where=None, name=None
     ) -> None:
-        count = self._get_count(sess, model, where=where)
+        count = self._get_count(model, where=where)
 
         if name is None:
             name = model.__name__
@@ -183,11 +200,12 @@ def _write_metrics(
         metrics = self._make_metrics(name, len(input_df), len(cleaned_df), count)
         self._writer.writerow(metrics)
 
-    def _get_count(self, sess: Session, model, where=None) -> int:
-        sql = select(func.count()).select_from(model)
-        if where:
-            sql = sql.where(where)
-        count = sess.execute(sql).scalar_one()
+    def _get_count(self, model, where=None) -> int:
+        with session_ctx() as sess:
+            sql = select(func.count()).select_from(model)
+            if where:
+                sql = sql.where(where)
+            count = sess.execute(sql).scalar_one()
         return count
 
 
diff --git a/transfers/sensor_transfer.py b/transfers/sensor_transfer.py
index 6c9a75cbc..76f9f4fe9 100644
--- a/transfers/sensor_transfer.py
+++ b/transfers/sensor_transfer.py
@@ -15,16 +15,18 @@
 # ===============================================================================
 from datetime import datetime
 
+import pandas as pd
 from sqlalchemy import select
+from sqlalchemy.orm import Session
 
-from db import Sensor, Deployment, Thing
+from db import Sensor, Deployment, Thing, Base
 from transfers.transferer import ThingBasedTransferer
 from transfers.util import (
     read_csv,
     logger,
     filter_to_valid_point_ids,
     replace_nans,
-    RecordingIntervalEstimator,
+    SensorParameterEstimator,
 )
 
 EQUIPMENT_TO_SENSOR_TYPE_MAP = {
@@ -42,11 +44,11 @@ def __init__(self, *args, **kwargs):
         self._estimators = {}
         self._added = {}
 
-    def _get_dfs(self, session):
+    def _get_dfs(self):
         input_df = read_csv(self.source_table)
         input_df.columns = input_df.columns.str.replace(" ", "_")
         input_df = input_df[input_df.SerialNo.notna()]
-        cleaned_df = filter_to_valid_point_ids(session, input_df)
+        cleaned_df = filter_to_valid_point_ids(input_df)
         cleaned_df = replace_nans(cleaned_df)
         return input_df, cleaned_df
 
@@ -56,7 +58,15 @@ def _no_db_item_warning(self, index):
     def _get_prepped_group(self, group):
         return group.sort_values(by=["DateInstalled"])
 
-    def _step(self, session, row, db_item):
+    def _get_estimator(self, sensor_type):
+        if sensor_type in self._estimators:
+            estimator = self._estimators[sensor_type]
+        else:
+            estimator = SensorParameterEstimator(sensor_type)
+            self._estimators[sensor_type] = estimator
+        return estimator
+
+    def _group_step(self, session: Session, row: pd.Series, db_item: Base):
         pointid = self._get_point_id(row, db_item)
 
         try:
@@ -66,14 +76,8 @@ def _step(self, session, row, db_item):
                 f"Skipping equipment with type {row.EquipmentType} for point {pointid}"
             )
             error = f"key error adding sensor_type:{row.EquipmentType} error: {e}"
-            self.errors.append(
-                {
-                    "pointid": pointid,
-                    "error": error,
-                    "table": self.source_table,
-                    "field": "EquipmentType",
-                }
-            )
+            self._capture_error(pointid, error, "EquipmentType")
+
             return
 
         if row.SerialNo in self._added:
@@ -114,21 +118,29 @@ def _step(self, session, row, db_item):
                 row.DateInstalled, "%Y-%m-%d %H:%M:%S.%f"
             ).date()
         else:
-            pointid = self._get_point_id(row)
-            logger.critical(
-                f"Installation Date cannot be None. Skipping deployment. Sensor: {row.ID}, "
-                f"SerialNo: {row.SerialNo} PointID: {pointid}"
-            )
-            self.errors.append(
-                {
-                    "pointid": pointid,
-                    "error": f"row.ID={row.ID}, row.SerialNo={row.SerialNo}. Installation Date cannot "
-                    f"be None",
-                    "table": self.source_table,
-                    "field": "DateInstalled",
-                }
-            )
-            return
+            pointid = self._get_point_id(row, None)
+            estimator = self._get_estimator(sensor_type)
+            installation_date = estimator.estimate_installation_date(row)
+            if not installation_date:
+                logger.critical(
+                    f"Installation Date cannot be None. Skipping deployment. Sensor: {row.ID}, "
+                    f"SerialNo: {row.SerialNo} PointID: {pointid}"
+                )
+                self._capture_error(
+                    pointid,
+                    f"row.SerialNo={row.SerialNo}. Installation Date cannot be None",
+                    "DateInstalled",
+                )
+                return
+            else:
+                logger.warning(
+                    f"Estimated installation date={installation_date} for {pointid}"
+                )
+                self._capture_error(
+                    pointid,
+                    f"Estimated installation date={installation_date}. Is this correct?",
+                    "DateInstalled",
+                )
 
         removal_date = None
         if row.DateRemoved:
@@ -141,12 +153,7 @@ def _step(self, session, row, db_item):
             recording_interval = int(row.RecordingInterval)
         except (ValueError, TypeError):
             # try to calculate recording interval from measurements
-            if sensor_type in self._estimators:
-                estimator = self._estimators[sensor_type]
-            else:
-                estimator = RecordingIntervalEstimator(sensor_type)
-                self._estimators[sensor_type] = estimator
-
+            estimator = self._get_estimator(sensor_type)
             recording_interval, unit, error = estimator.estimate_recording_interval(
                 row, installation_date, removal_date
             )
@@ -157,18 +164,20 @@ def _step(self, session, row, db_item):
                     f"name={sensor.name}, serial_no={sensor.serial_no}. "
                     f"estimated recording interval: {recording_interval} {unit}"
                 )
+                self._capture_error(
+                    pointid,
+                    f"Estimated recording interval={recording_interval} {unit}. Is this correct?",
+                    "RecordingInterval",
+                )
+
             else:
                 logger.critical(
                     f"name={sensor.name}, serial_no={sensor.serial_no} error={error}"
                 )
-
-                self.errors.append(
-                    {
-                        "pointid": pointid,
-                        "error": f"name={sensor.name}, row.SerialNo={row.SerialNo}. error={error}",
-                        "table": self.source_table,
-                        "field": "RecordingInterval",
-                    }
+                self._capture_error(
+                    pointid,
+                    f"name={sensor.name}, row.SerialNo={row.SerialNo}. error={error}",
+                    "RecordingInterval",
                 )
 
         sql = (
@@ -217,195 +226,4 @@ def _step(self, session, row, db_item):
             sensor.sensor_status = "Retired"
 
 
-# def transfer_sensors(session):
-#     source_table = "Equipment"
-#     input_df = read_csv(source_table)
-#     input_df.columns = input_df.columns.str.replace(" ", "_")
-#     input_df = input_df[input_df.SerialNo.notna()]
-#     cleaned_df = filter_to_valid_point_ids(session, input_df)
-#     cleaned_df = replace_nans(cleaned_df)
-#     errors = []
-#     grouped_equipment = cleaned_df.groupby(["PointID"])
-#     added = {}
-#     estimators = {}
-#     for index, group in grouped_equipment:
-#         pointid = index[0]
-#         thing = session.query(Thing).filter(Thing.name == pointid).first()
-#         if thing is None:
-#             logger.warning(
-#                 f"Skipping sensor transfer for Thing with PointID {pointid} since it is not in the DB"
-#             )
-#             continue
-#         ordered_group = group.sort_values(by=["DateInstalled"])
-#
-#         try:
-#             for row in ordered_group.itertuples():
-#                 try:
-#                     sensor_type = EQUIPMENT_TO_SENSOR_TYPE_MAP[row.EquipmentType]
-#                 except KeyError as e:
-#                     logger.critical(
-#                         f"Skipping equipment with type {row.EquipmentType} for point {pointid}"
-#                     )
-#                     error = (
-#                         f"key error adding sensor_type:{row.EquipmentType} error: {e}"
-#                     )
-#                     errors.append(
-#                         {
-#                             "pointid": pointid,
-#                             "error": error,
-#                             "table": source_table,
-#                             "field": "EquipmentType",
-#                         }
-#                     )
-#                     continue
-#
-#                 if row.SerialNo in added:
-#                     logger.info(
-#                         f"Sensor with serial number {row.SerialNo} already added in this transfer session. Only creating deployment for that record"
-#                     )
-#                     sensor = added[row.SerialNo]
-#                 else:
-#                     sensor = (
-#                         session.query(Sensor)
-#                         .filter(Sensor.serial_no == row.SerialNo)
-#                         .one_or_none()
-#                     )
-#                     if sensor:
-#                         logger.info(
-#                             f"Sensor with serial number {row.SerialNo} already exists. Only creating deployment for that record"
-#                         )
-#
-#                 if not sensor:
-#                     # TODO: Add validation
-#                     sensor = Sensor(
-#                         nma_pk_equipment=row.GlobalID,
-#                         name=row.ID,
-#                         sensor_type=sensor_type,
-#                         model=row.Model,
-#                         serial_no=row.SerialNo,
-#                         owner_agency="NMBGMR",
-#                         notes=row.Equipment_Notes,
-#                     )
-#                     added[row.SerialNo] = sensor
-#                     session.add(sensor)
-#                     logger.info(
-#                         f"Added sensor {sensor.name} with serial number {sensor.serial_no}"
-#                     )
-#
-#                 if row.DateInstalled:
-#                     installation_date = datetime.strptime(
-#                         row.DateInstalled, "%Y-%m-%d %H:%M:%S.%f"
-#                     ).date()
-#                 else:
-#                     logger.critical(
-#                         f"Installation Date cannot be None. Skipping deployment. Sensor: {row.ID}, "
-#                         f"SerialNo: {row.SerialNo} PointID: {pointid}"
-#                     )
-#                     errors.append(
-#                         {
-#                             "pointid": pointid,
-#                             "error": f"row.ID={row.ID}, row.SerialNo={row.SerialNo}. Installation Date cannot "
-#                             f"be None",
-#                             "table": source_table,
-#                             "field": "DateInstalled",
-#                         }
-#                     )
-#                     continue
-#
-#                 removal_date = None
-#                 if row.DateRemoved:
-#                     removal_date = datetime.strptime(
-#                         row.DateRemoved, "%Y-%m-%d %H:%M:%S.%f"
-#                     ).date()
-#
-#                 recording_interval_unit = "hour"
-#                 try:
-#                     recording_interval = int(row.RecordingInterval)
-#                 except (ValueError, TypeError):
-#                     error = "RecordingInterval is not an integer"
-#                     # try to calculate recording interval from measurements
-#                     if sensor_type in estimators:
-#                         estimator = estimators[sensor_type]
-#                     else:
-#                         estimator = RecordingIntervalEstimator(sensor_type)
-#                         estimators[sensor_type] = estimator
-#
-#                     recording_interval, unit, error = (
-#                         estimator.estimate_recording_interval(
-#                             row, installation_date, removal_date
-#                         )
-#                     )
-#
-#                     if recording_interval:
-#                         recording_interval_unit = unit
-#                         logger.info(
-#                             f"name={sensor.name}, serial_no={sensor.serial_no}. "
-#                             f"estimated recording interval: {recording_interval} {unit}"
-#                         )
-#                     else:
-#                         logger.critical(
-#                             f"name={sensor.name}, serial_no={sensor.serial_no} error={error}"
-#                         )
-#                         errors.append(
-#                             {
-#                                 "pointid": pointid,
-#                                 "error": f"name={sensor.name}, row.SerialNo={row.SerialNo}. error={error}",
-#                                 "table": source_table,
-#                                 "field": "RecordingInterval",
-#                             }
-#                         )
-#                 sql = (
-#                     select(Deployment)
-#                     .join(Thing)
-#                     .join(Sensor)
-#                     .where(Thing.name == pointid)
-#                     .where(Sensor.serial_no == sensor.serial_no)
-#                     .where(Deployment.installation_date == installation_date)
-#                     .where(Deployment.removal_date == removal_date)
-#                 )
-#
-#                 existing_deployment = session.execute(sql).scalars().one_or_none()
-#                 if existing_deployment:
-#                     logger.info("existing deployment")
-#                     continue
-#
-#                 # TODO: add validation
-#                 deployment = Deployment(
-#                     thing=thing,
-#                     sensor=sensor,
-#                     installation_date=installation_date,
-#                     removal_date=removal_date,
-#                     recording_interval=recording_interval,
-#                     recording_interval_units=recording_interval_unit,
-#                     hanging_cable_length=row.HangingCableLength,
-#                     hanging_point_height=row.HangingPointHgt,
-#                     hanging_point_description=row.HangingPointDescription,
-#                 )
-#                 session.add(deployment)
-#                 logger.info(
-#                     f"Added deployment for sensor with serial number {sensor.serial_no}, deployed to {thing.name}: | Installation Date: {installation_date} | Removal Date: {removal_date}"
-#                 )
-#
-#                 """
-#                 Developer's notes
-#
-#                 Since it's unclear beforehand if a sensor has been removed just update
-#                 the sensor_status based off of each deployments installation/removal
-#                 dates
-#                 """
-#                 if installation_date:
-#                     sensor.sensor_status = "In Service"
-#                 if removal_date:
-#                     sensor.sensor_status = "Retired"
-#             session.commit()
-#         except Exception as e:
-#             import traceback
-#
-#             traceback.print_exc()
-#             logger.critical(f"Could not add sensor and deployment: {e}")
-#             errors.append({"pointid": pointid, "error": e, "table": source_table})
-#
-#     return input_df, cleaned_df, errors
-
-
 # ============= EOF =============================================
diff --git a/transfers/transfer.py b/transfers/transfer.py
index a2d7544a9..97086d10b 100644
--- a/transfers/transfer.py
+++ b/transfers/transfer.py
@@ -25,16 +25,18 @@
     transfer_water_levels_acoustic,
 )
 from core.initializers import erase_and_rebuild_db
-from db.engine import session_ctx
 
-from transfers.group_transfer import transfer_groups
-from transfers.link_ids_transfer import transfer_link_ids, transfer_link_ids_welldata
+from transfers.group_transfer import ProjectGroupTransferer
+from transfers.link_ids_transfer import (
+    LinkIdsWellDataTransferer,
+    LinkIdsLocationDataTransferer,
+)
 from transfers.contact_transfer import transfer_contacts
 from transfers.sensor_transfer import SensorTransferer
 from transfers.waterlevels_transfer import transfer_water_levels
 from transfers.well_transfer import WellTransferer, WellScreenTransferer
 
-from transfers.asset_transfer import transfer_assets
+from transfers.asset_transfer import AssetTransferer
 from transfers.util import timeit, timeit_direct
 from transfers.logger import logger, save_log_to_bucket
 
@@ -112,14 +114,18 @@ def transfer_all(sess, metrics, limit=100):
     or "Field duplicate")
     """
     message("TRANSFERRING LINK IDS")
-    timeit_direct(transfer_link_ids, sess)
-    timeit_direct(transfer_link_ids_welldata, sess)
+    results = _execute_transfer(LinkIdsWellDataTransferer, flags=flags)
+    metrics.welldata_link_ids_metrics(*results)
+    results = _execute_transfer(LinkIdsLocationDataTransferer, flags=flags)
+    metrics.location_link_ids_metrics(*results)
 
     message("TRANSFERRING GROUPS")
-    timeit_direct(transfer_groups, sess)
+    results = _execute_transfer(ProjectGroupTransferer, flags=flags)
+    metrics.group_metrics(*results)
 
     message("TRANSFERRING ASSETS")
-    timeit_direct(transfer_assets, sess)
+    results = _execute_transfer(AssetTransferer, flags=flags)
+    metrics.asset_metrics(*results)
 
 
 def _execute_transfer(klass, flags: dict = None):
@@ -128,28 +134,26 @@ def _execute_transfer(klass, flags: dict = None):
     return transferer.input_df, transferer.cleaned_df, transferer.errors
 
 
-def transfer_debugging(sess, metrics, limit=100):
+def transfer_debugging(metrics, limit=100):
     message("STARTING TRANSFER DEBUG", new_line_at_top=False)
 
     if int(os.environ.get("ERASE_AND_REBUILD", 0)):
         logger.info("Erase and rebuilding database")
         erase_and_rebuild_db()
 
-    message("TRANSFERRING WELLS")
-
     flags = {"TRANSFER_ALL_WELLS": True, "LIMIT": limit}  # not currently used
 
+    message("TRANSFERRING WELLS")
     results = _execute_transfer(WellTransferer, flags=flags)
-    metrics.well_metrics(sess, *results)
+    metrics.well_metrics(*results)
 
     message("TRANSFERRING WELL SCREENS")
     results = _execute_transfer(WellScreenTransferer, flags=flags)
-    metrics.well_screen_metrics(sess, *results)
+    metrics.well_screen_metrics(*results)
 
     message("TRANSFERRING SENSORS")
     results = _execute_transfer(SensorTransferer, flags=flags)
-    # results = timeit_direct(transfer_sensors, sess)
-    metrics.sensor_metrics(sess, *results)
+    metrics.sensor_metrics(*results)
 
     # Developer's notes all the metadata for these Things are not defined in the models/schemas yet'
     # message("TRANSFERRING SPRINGS")
@@ -164,42 +168,46 @@ def transfer_debugging(sess, metrics, limit=100):
     # message("TRANSFERRING METEOROLOGICAL")
     # timeit_direct(transfer_met, sess, limit)
 
-    message("TRANSFERRING CONTACTS")
-    results = timeit_direct(transfer_contacts, sess)
-    metrics.contact_metrics(sess, *results)
-
-    message("TRANSFERRING WATER LEVELS")
-    results = timeit_direct(transfer_water_levels, sess)
-    metrics.water_level_metrics(sess, *results)
-
-    message("TRANSFERRING WATER LEVELS PRESSURE")
-    results = timeit_direct(transfer_water_levels_pressure, sess)
-    metrics.pressure_metrics(sess, *results)
-
-    message("TRANSFERRING WATER LEVELS ACOUSTIC")
-    results = timeit_direct(transfer_water_levels_acoustic, sess)
-    metrics.acoustic_metrics(sess, *results)
+    # message("TRANSFERRING CONTACTS")
+    # results = timeit_direct(transfer_contacts, sess)
+    # metrics.contact_metrics(sess, *results)
+    #
+    # message("TRANSFERRING WATER LEVELS")
+    # results = timeit_direct(transfer_water_levels, sess)
+    # metrics.water_level_metrics(sess, *results)
+    #
+    # message("TRANSFERRING WATER LEVELS PRESSURE")
+    # results = timeit_direct(transfer_water_levels_pressure, sess)
+    # metrics.pressure_metrics(sess, *results)
+    #
+    # message("TRANSFERRING WATER LEVELS ACOUSTIC")
+    # results = timeit_direct(transfer_water_levels_acoustic, sess)
+    # metrics.acoustic_metrics(sess, *results)
 
-    # message("TRANSFERRING LINK IDS")
-    # timeit_direct(transfer_link_ids, sess)
-    # timeit_direct(transfer_link_ids_welldata, sess)
+    message("TRANSFERRING LINK IDS")
+    results = _execute_transfer(LinkIdsWellDataTransferer, flags=flags)
+    metrics.welldata_link_ids_metrics(*results)
+    results = _execute_transfer(LinkIdsLocationDataTransferer, flags=flags)
+    metrics.location_link_ids_metrics(*results)
 
-    # message("TRANSFERRING GROUPS")
-    # timeit_direct(transfer_groups, sess)
+    message("TRANSFERRING GROUPS")
+    results = _execute_transfer(ProjectGroupTransferer, flags=flags)
+    metrics.group_metrics(*results)
 
-    # message("TRANSFERRING ASSETS")
-    # timeit_direct(transfer_assets, sess)
+    message("TRANSFERRING ASSETS")
+    results = _execute_transfer(AssetTransferer, flags=flags)
+    metrics.asset_metrics(*results)
 
 
 def main():
     message("START--------------------------------------")
     limit = int(os.getenv("TRANSFER_LIMIT", 1000))
     metrics = Metrics()
-    with session_ctx() as sess:
-        if int(os.getenv("TRANSFER_DEBUG", 0)):
-            transfer_debugging(sess, metrics, limit=limit)
-        else:
-            transfer_all(sess, metrics, limit=limit)
+
+    if int(os.getenv("TRANSFER_DEBUG", 0)):
+        transfer_debugging(metrics, limit=limit)
+    else:
+        transfer_all(metrics, limit=limit)
 
     metrics.close()
     metrics.save_to_storage_bucket()
diff --git a/transfers/transferer.py b/transfers/transferer.py
index 273462585..8d84e1170 100644
--- a/transfers/transferer.py
+++ b/transfers/transferer.py
@@ -19,28 +19,47 @@
 from pandas import DataFrame
 from sqlalchemy.orm import Session
 
-from db import Thing
+from db import Thing, Base
 from db.engine import session_ctx
 from transfers.logger import logger
 from transfers.util import chunk_by_size
 
 
+class ManualFixer(object):
+    pass
+
+
 class Transferer(object):
     input_df: pd.DataFrame = None
     cleaned_df: pd.DataFrame = None
     errors: list = None
     flags: dict = None
+    source_table: str = None
 
     def __init__(self, flags: dict = None):
         self.errors = []
         self.flags = flags if flags else {}
+        self.manual_fixer = ManualFixer()
 
     def transfer(self):
         with session_ctx() as session:
-            self.input_df, self.cleaned_df = self._get_dfs(session)
+            self.input_df, self.cleaned_df = self._get_dfs()
             self._transfer_hook(session)
             session.commit()
 
+    def _capture_error(self, pointid, error, field, table=None):
+        if table is None:
+            table = self.source_table
+
+        self.errors.append(
+            {
+                "pointid": pointid,
+                "error": error,
+                "table": table,
+                "field": field,
+            }
+        )
+
     def _transfer_hook(self, session: Session):
         self._limit_iterator(session, self.flags.get("LIMIT", 0))
 
@@ -68,18 +87,18 @@ def _limit_iterator(self, session: Session, limit: int, step: int = 25):
                     session.rollback()
                     continue
 
-            self._iterator(session, df, i, row)
+            self._step(session, df, i, row)
 
         session.commit()
         self._after_hook(session)
 
-    def _iterator(self, session: Session, df: pd.DataFrame, i: int, row: dict):
+    def _step(self, session: Session, df: pd.DataFrame, i: int, row: dict):
         raise NotImplementedError("Must implement _iterator method")
 
     def _after_hook(self, session: Session):
         pass
 
-    def _get_dfs(self, session: Session):
+    def _get_dfs(self):
         raise NotImplementedError("Must implement _get_dfs method")
 
 
@@ -100,7 +119,7 @@ def _transfer_hook(self, session: Session):
                 if not dbitem:
                     self._missing_db_item_warning(row)
                     continue
-                self._chunk_iterator(session, df, i, row, dbitem)
+                self._chunk_step(session, df, i, row, dbitem)
 
     # def chunk_transfer(self):
     #     with session_ctx() as session:
@@ -125,7 +144,7 @@ def _get_df_chunk(self, session, chunk):
     def _missing_db_item_warning(self, row):
         raise NotImplementedError("Must be implemented in subclass")
 
-    def _chunk_iterator(self, session, df, i, row, dbitem):
+    def _chunk_step(self, session, df, i, row, dbitem):
         raise NotImplementedError("Must be implemented in subclass")
 
     def _get_db_item(self, chunk, row):
@@ -150,21 +169,19 @@ def _group_iterator(self, session: Session):
             prepped_group = self._get_prepped_group(group)
             for row in prepped_group.itertuples():
                 try:
-                    self._step(session, row, db_item)
+                    self._group_step(session, row, db_item)
                 except Exception as e:
                     import traceback
 
                     pointid = self._get_point_id(row, db_item)
                     traceback.print_exc()
                     logger.critical(f"Could not add sensor and deployment: {e}")
-                    self.errors.append(
-                        {"pointid": pointid, "error": e, "table": self.source_table}
-                    )
+                    self._capture_error(pointid, e, "UnknownField")
 
-    def _get_point_id(self, row, db_item) -> str:
+    def _get_point_id(self, row: pd.Series, db_item: Base) -> str:
         return row.PointID
 
-    def _step(self, session: Session, row, db_item):
+    def _group_step(self, session: Session, row: pd.Series, db_item: Base):
         raise NotImplementedError("Must be implemented in subclass")
 
     def _get_prepped_group(self, group) -> DataFrame:
diff --git a/transfers/util.py b/transfers/util.py
index 023d4a397..70e6952a5 100644
--- a/transfers/util.py
+++ b/transfers/util.py
@@ -26,10 +26,10 @@
 import pytz
 from shapely import Point
 from sqlalchemy import select
-from sqlalchemy.orm import Session
 
 from constants import SRID_WGS84, SRID_UTM_ZONE_13N
 from db import Thing, Location, DataProvenance
+from db.engine import session_ctx
 from services.gcs_helper import get_storage_bucket
 
 # from services.lexicon_mapper import lexicon_mapper
@@ -72,22 +72,26 @@ def estimate_measuring_point_height(
             logger.info(
                 f"No MPHeight found for PointID: {row.PointID}. Estimating from measurements."
             )
-            # try to estimate mpheight from measurements
             mphs = []
             start_dates = []
             mph_descs = []
-            for m in df.itertuples():
-                mphi = m.DepthToWater - m.DepthToWaterBGS
-                start_date = m.DateMeasured
-                if mphi not in mphs:
-                    mphs.append(mphi)
-                    mph_descs.append(
-                        "Auto calculated from measurements at depth to water and depth to water below ground surface"
-                    )
-                    start_dates.append(start_date)
-            logger.info(
-                f"Estimated MPHeight: {mphs}, {start_dates} for PointID: {row.PointID}."
-            )
+
+            if len(df) == 0:
+                logger.warning(f"No measurements found for PointID: {row.PointID}.")
+            else:
+                # try to estimate mpheight from measurements
+                for m in df.itertuples():
+                    mphi = m.DepthToWater - m.DepthToWaterBGS
+                    start_date = m.DateMeasured
+                    if mphi not in mphs:
+                        mphs.append(mphi)
+                        mph_descs.append(
+                            "Auto calculated from measurements at depth to water and depth to water below ground surface"
+                        )
+                        start_dates.append(start_date)
+                logger.info(
+                    f"Estimated MPHeight: {mphs}, {start_dates} for PointID: {row.PointID}."
+                )
         else:
             mphs = [mph]
             mph_descs = [mph_desc]
@@ -105,7 +109,7 @@ def estimate_measuring_point_height(
         return zip(mphs, mph_descs, start_dates, end_dates)
 
 
-class RecordingIntervalEstimator:
+class SensorParameterEstimator:
     def __init__(self, sensor_type: str):
         if sensor_type == "Pressure Transducer":
             self._df = read_csv("WaterLevelsContinuous_Pressure")
@@ -115,6 +119,23 @@ def __init__(self, sensor_type: str):
         # convert "DateMeasured" to date"
         self._df["DateMeasured"] = pd.to_datetime(self._df["DateMeasured"]).dt.date
 
+    def estimate_installation_date(
+        self, record: pd.Series
+    ) -> tuple[datetime | None, str | None]:
+        # get the first measurement for this pointid
+        point_id = record.PointID
+        cdf = self._get_values(point_id)
+        if len(cdf) == 0:
+            logger.warning(
+                f"Unable to estimate installation date, no measurements found for PointID: {point_id}."
+            )
+            return None
+        return cdf["DateMeasured"].min()
+
+    def _get_values(self, point_id: str):
+        cdf = self._df[self._df["PointID"] == point_id]
+        return cdf.sort_values("DateMeasured")
+
     def estimate_recording_interval(
         self,
         record: pd.Series,
@@ -122,12 +143,10 @@ def estimate_recording_interval(
         removal_date: datetime = None,
     ) -> tuple[int | None, str | None, str | None]:
         point_id = record.PointID
-
-        cdf = self._df[self._df["PointID"] == point_id]
+        cdf = self._get_values(point_id)
         if len(cdf) == 0:
             return None, None, f"No measurements found for PointID: {point_id}"
 
-        cdf = cdf.sort_values("DateMeasured")
         if installation_date is not None:
             cdf = cdf[cdf["DateMeasured"] >= installation_date]
         if removal_date is not None:
@@ -203,9 +222,10 @@ def read_csv(name: str, dtype: dict | None = None) -> pd.DataFrame:
         return pd.read_csv(io.BytesIO(data))
 
 
-def get_valid_point_ids(session, thing_type="water well"):
-    things = get_valid_things(session, thing_type)
-    valid_pointids = [thing.name for thing in things]
+def get_valid_point_ids(thing_type="water well"):
+    with session_ctx() as session:
+        things = get_valid_things(session, thing_type)
+        valid_pointids = [thing.name for thing in things]
     return valid_pointids
 
 
@@ -243,9 +263,10 @@ def data_path(r):
     return root / name
 
 
-def filter_non_transferred_wells(sess: Session, df: pd.DataFrame) -> pd.DataFrame:
-    sql = select(Thing.name).where(Thing.thing_type == "water well")
-    existing_ids = sess.execute(sql).scalars().all()
+def filter_non_transferred_wells(df: pd.DataFrame) -> pd.DataFrame:
+    with session_ctx() as sess:
+        sql = select(Thing.name).where(Thing.thing_type == "water well")
+        existing_ids = sess.execute(sql).scalars().all()
     return df[~(df["PointID"].isin(existing_ids))]
 
 
@@ -265,7 +286,7 @@ def filter_by_welldata_datasource_and_project(df: pd.DataFrame) -> pd.DataFrame:
     counts = df.groupby("DataSource").size().reset_index(name="WellCount")
     counts = counts.sort_values("WellCount", ascending=False)
     for count in counts.itertuples():
-        logger.info(f"{count.DataSource}: {count.WellCount}")
+        logger.info(f"{count.WellCount}: {count.DataSource[:50]} ")
 
     pldf = read_csv("ProjectLocations")
     collabnet = pldf[pldf["ProjectName"] == "Water Level Network"]
@@ -288,8 +309,8 @@ def filter_by_valid_measuring_agency(df: pd.DataFrame) -> pd.DataFrame:
     return df[df["MeasuringAgency"].isin(valid_measuring_agencies)]
 
 
-def filter_to_valid_point_ids(session: Session, df: pd.DataFrame) -> pd.DataFrame:
-    valid_point_ids = get_valid_point_ids(session)
+def filter_to_valid_point_ids(df: pd.DataFrame) -> pd.DataFrame:
+    valid_point_ids = get_valid_point_ids()
     return df[df["PointID"].isin(valid_point_ids)]
 
 
diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py
index cc049876d..45a867a72 100644
--- a/transfers/well_transfer.py
+++ b/transfers/well_transfer.py
@@ -117,9 +117,7 @@ def _extract_casing_materials(row) -> list[str]:
     return materials
 
 
-def get_wells_to_transfer(
-    sess: Session, flags: dict = None
-) -> tuple[pd.DataFrame, pd.DataFrame]:
+def get_wells_to_transfer(flags: dict = None) -> tuple[pd.DataFrame, pd.DataFrame]:
     # if flags is None:
     #     flags = {}
 
@@ -145,7 +143,7 @@ def get_wells_to_transfer(
     #     cleaned_df = wdf
 
     cleaned_df = filter_by_welldata_datasource_and_project(wdf)
-    cleaned_df = filter_non_transferred_wells(sess, cleaned_df)
+    cleaned_df = filter_non_transferred_wells(cleaned_df)
 
     return input_df, cleaned_df
 
@@ -176,23 +174,16 @@ def __init__(self, *args, **kw):
         self._cached_elevations = get_cached_elevations()
         self._added_locations = {}
 
-    def _get_dfs(self, session: Session):
-        return get_wells_to_transfer(session, self.flags)
+    def _get_dfs(self):
+        return get_wells_to_transfer(self.flags)
 
-    def _iterator(self, session, df, i, row):
+    def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series):
         pointid = row.PointID
         if df[df["PointID"] == pointid].shape[0] > 1:
             logger.critical(
                 f"transfer_wells. PointID {pointid} has duplicate records. Skipping."
             )
-            self.errors.append(
-                {
-                    "pointid": pointid,
-                    "error": "duplicate records",
-                    "table": self.source_table,
-                    "field": "PointID",
-                }
-            )
+            self._capture_error(pointid, "duplicate records", "PointID")
             return
 
         location = None
@@ -203,16 +194,8 @@ def _iterator(self, session, df, i, row):
         except Exception as e:
             if location is not None:
                 session.expunge(location)
-            # these rollbacks are cause an issue because they are discarding good data
-            # session.rollback()
-            self.errors.append(
-                {
-                    "pointid": row.PointID,
-                    "error": e,
-                    "table": "Location",
-                    "field": str(e),
-                }
-            )
+
+            self._capture_error(row.PointID, str(e), str(e), "Location")
             logger.critical(f"Error making location for {row.PointID}: {e}")
             return
 
@@ -249,9 +232,7 @@ def _iterator(self, session, df, i, row):
 
             CreateWell.model_validate(data)
         except ValidationError as e:
-            self.errors.append(
-                {"pointid": row.PointID, "error": e, "table": "WellData"}
-            )
+            self._capture_error(row.PointID, str(e), "UnknownField")
             logger.critical(
                 f"Validation error for row {i} with PointID {row.PointID}: {e.errors()}"
             )
@@ -310,9 +291,8 @@ def _iterator(self, session, df, i, row):
             if well is not None:
                 session.expunge(well)
 
-            self.errors.append(
-                {"pointid": row.PointID, "error": e, "table": "WellData"}
-            )
+            self._capture_error(row.PointID, str(e), "UnknownField")
+
             logger.critical(f"Error creating well for {row.PointID}: {e}")
             return
 
@@ -418,11 +398,22 @@ def _after_hook(self, session):
         session.commit()
 
 
-class WellScreenTransferer(ChunkTransferer):
-    def _get_dfs(self, session: Session):
-        input_df = read_csv("WellScreens")
+class WellChunkTransferer(ChunkTransferer):
+    source_table: str = None
+    source_dtypes: dict = None
+
+    def __init__(self, *args, **kw):
+        super().__init__(*args, **kw)
+        if self.source_table is None:
+            raise ValueError("source_table must be set")
+
+    def _get_dfs(self):
+        if self.source_table is None:
+            raise ValueError("source_table must be set")
+
+        input_df = read_csv(self.source_table, self.source_dtypes)
         wdf = replace_nans(input_df)
-        cleaned_df = filter_to_valid_point_ids(session, wdf)
+        cleaned_df = filter_to_valid_point_ids(wdf)
         return input_df, cleaned_df
 
     def _get_df_chunk(self, session, chunk):
@@ -437,7 +428,11 @@ def _get_db_item(self, dbchunk, row):
     def _missing_db_item_warning(self, row):
         logger.warning(f"Thing with PointID {row.PointID} not found in database.")
 
-    def _chunk_iterator(self, session, df, i, row, db_item):
+
+class WellScreenTransferer(WellChunkTransferer):
+    source_table = "WellScreens"
+
+    def _chunk_step(self, session, df, i, row, db_item):
         well_screen_data = {
             "thing_id": db_item.id,
             "screen_depth_top": row.ScreenTop,
@@ -454,9 +449,7 @@ def _chunk_iterator(self, session, df, i, row, db_item):
             logger.critical(
                 f"Validation error for row {i} with PointID {row.PointID}: {e.errors()}"
             )
-            self.errors.append(
-                {"pointid": row.PointID, "error": e, "table": "WellScreens"}
-            )
+            self._capture_error(row.PointID, str(e), "UnknownField")
             return
 
         well_screen = WellScreen(**well_screen_data)

From 078493c3763aa8361fc61cf2fb6176432040018c Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Fri, 28 Nov 2025 20:39:59 -0700
Subject: [PATCH 23/66] refactor: replace transfer_water_levels function with
 WaterLevelTransferer class for improved data handling and transfer process

---
 transfers/link_ids_transfer.py    |   2 +-
 transfers/transfer.py             |  12 +-
 transfers/waterlevels_transfer.py | 541 +++++++++++++-----------------
 3 files changed, 242 insertions(+), 313 deletions(-)

diff --git a/transfers/link_ids_transfer.py b/transfers/link_ids_transfer.py
index dbb33f76f..c32fd0b8d 100644
--- a/transfers/link_ids_transfer.py
+++ b/transfers/link_ids_transfer.py
@@ -47,7 +47,7 @@ def _chunk_step(self, session, dr, i, row, db_item):
             if pd.isna(aid):
                 # logger.warning(f"{klass} is null for {row.PointID}")
                 continue
-            print("aid", aid, type(aid))
+
             # RULE: exclude any id that == 'X', '?'
             if aid.strip().lower() in ("x", "?", "exempt"):
                 logger.critical(
diff --git a/transfers/transfer.py b/transfers/transfer.py
index 97086d10b..5cfc6e63d 100644
--- a/transfers/transfer.py
+++ b/transfers/transfer.py
@@ -33,7 +33,7 @@
 )
 from transfers.contact_transfer import transfer_contacts
 from transfers.sensor_transfer import SensorTransferer
-from transfers.waterlevels_transfer import transfer_water_levels
+from transfers.waterlevels_transfer import WaterLevelTransferer
 from transfers.well_transfer import WellTransferer, WellScreenTransferer
 
 from transfers.asset_transfer import AssetTransferer
@@ -92,8 +92,8 @@ def transfer_all(sess, metrics, limit=100):
     metrics.contact_metrics(sess, *results)
 
     message("TRANSFERRING WATER LEVELS")
-    results = timeit_direct(transfer_water_levels, sess)
-    metrics.water_level_metrics(sess, *results)
+    results = _execute_transfer(WaterLevelTransferer, flags=flags)
+    metrics.water_level_metrics(*results)
 
     message("TRANSFERRING WATER LEVELS PRESSURE")
     results = timeit_direct(transfer_water_levels_pressure, sess)
@@ -172,9 +172,9 @@ def transfer_debugging(metrics, limit=100):
     # results = timeit_direct(transfer_contacts, sess)
     # metrics.contact_metrics(sess, *results)
     #
-    # message("TRANSFERRING WATER LEVELS")
-    # results = timeit_direct(transfer_water_levels, sess)
-    # metrics.water_level_metrics(sess, *results)
+    message("TRANSFERRING WATER LEVELS")
+    results = _execute_transfer(WaterLevelTransferer, flags=flags)
+    metrics.water_level_metrics(*results)
     #
     # message("TRANSFERRING WATER LEVELS PRESSURE")
     # results = timeit_direct(transfer_water_levels_pressure, sess)
diff --git a/transfers/waterlevels_transfer.py b/transfers/waterlevels_transfer.py
index a1bb32717..80b8a4bd8 100644
--- a/transfers/waterlevels_transfer.py
+++ b/transfers/waterlevels_transfer.py
@@ -14,11 +14,11 @@
 # limitations under the License.
 # ===============================================================================
 import json
-import time
 import uuid
 from datetime import datetime
 
 import pandas as pd
+from sqlalchemy.orm import Session
 
 from db import (
     Thing,
@@ -30,6 +30,8 @@
     FieldEventParticipant,
     Parameter,
 )
+from db.engine import session_ctx
+from transfers.transferer import Transferer
 from transfers.util import (
     filter_to_valid_point_ids,
     logger,
@@ -46,348 +48,275 @@
 SPACE_6 = " " * 6
 
 
-def get_dt_utc(row, errors):
-    if pd.isna(row.DateMeasured):
-        logger.critical(
-            f"transfer_water_levels. Skipping row PointID={row.PointID}, objectid={row.OBJECTID} because there is no DateMeasured"
-        )
-        errors.append(
-            {
-                "pointid": row.PointID,
-                "error": "no DateMeasured",
-                "table": "WaterLevels",
-                "field": "DateMeasured",
-            }
-        )
-        return
-
-    if pd.isna(row.TimeMeasured):
-        fmt = "%Y-%m-%d"
-        dt_measured = row.DateMeasured
-    else:
-        fmt = "%Y-%m-%d %H:%M:%S.%f"
-        t = row.TimeMeasured
-        # Truncate microseconds to 6 digits if present
-        if "." in t:
-            t = t[:-6]
-
-        dt_measured = f"{row.DateMeasured} {t}"
-
-    try:
-        dt = datetime.strptime(dt_measured, fmt)
-        return convert_mt_to_utc(dt)
-    except ValueError as e:
-        errors.append(
-            {
-                "pointid": row.PointID,
-                "error": str(e),
-                "table": "WaterLevels",
-                "field": "DateMeasured",
-            }
-        )
-        logger.critical(
-            f"transfer_water_levels. Skipping row PointID={row.PointID}, objectid={row.OBJECTID} due to "
-            f"invalid date/time: {e}"
-        )
-        return None
-
-
-def get_contacts_info(row, measured_by, measured_by_mapper):
-    # measuring_agency = (
-    #     "Unknown" if pd.isna(row.MeasuringAgency) else row.MeasuringAgency
-    # )
-
-    # ns --> names
-    # os --> organizations
-    # rs --> roles
+def get_contacts_info(
+    row, measured_by, measured_by_mapper
+) -> list[tuple[str, str, str]]:
 
     # TODO: get help figuring out (AMP)
     if measured_by in measured_by_mapper:
         args = measured_by_mapper[measured_by]
         if isinstance(args[0], list):
-            ns, os, rs = zip(*args)
+            names, orgs, roles = zip(*args)
         else:
-            ns = [args[0]]
-            os = [args[1]]
-            rs = [args[2]]
+            names, orgs, roles = [args[0]], [args[1]], [args[2]]
+
     else:
-        ns = [measured_by]
-        os = ["Unknown"]
-        rs = ["Unknown"]
+        names = [measured_by]
+        orgs = ["Unknown"]
+        roles = ["Unknown"]
         logger.warning(
             f"{SPACE_6}The following record has not been mapped to a Contact: MeasuredBy {row.MeasuredBy} | MeasuringAgency {row.MeasuringAgency} for WaterLevels record with GLobalID {row.GlobalID}"
         )
 
-    return ns, os, rs
-
-
-def transfer_water_levels(session):
-    groundwater_parameter_id = (
-        session.query(Parameter)
-        .filter(Parameter.parameter_name == "groundwater level")
-        .one()
-        .id
-    )
-
-    # keep a dictionary of created Contacts to avoid repeated SQL queries
-    # keys are a tuple of (name, organization) since None is a common "name"
-    created_contacts = {}
-    path = get_transfers_data_path("measured_by_mapper.json")
-
-    with open(path, "r") as f:
-        measured_by_mapper = json.load(f)
-    source_table = "WaterLevels"
-    input_df = read_csv(source_table)
-    cleaned_df = filter_to_valid_point_ids(session, input_df)
-    cleaned_df = filter_by_valid_measuring_agency(cleaned_df)
-
-    gwd = cleaned_df.groupby(["PointID"])
-
-    start_time = time.time()
-    errors = []
-
-    # TODO: this needs to be cleaned up
-    # the for loop is too long and hard to read
-    # adding contacts should be done in a separate function
-    for index, group in gwd:
-        pointid = index[0]
-        logger.info(f"Processing PointID: {pointid}")
-        thing = session.query(Thing).where(Thing.name == pointid).first()
-        if thing is None:
-            logger.critical(
-                f"Thing with PointID={pointid} not found. Skipping water levels"
-            )
-            errors.append(
-                {
-                    "pointid": pointid,
-                    "error": "Thing with PointID not found",
-                    "table": source_table,
-                    "field": "PointID",
-                }
-            )
-            continue
-
-        n = len(group)
-        for i, row in enumerate(group.itertuples()):
-            if i and not i % 25:
-                logger.info(
-                    f"Processing row {i} of {n}. {row.PointID},  avg rows per second: {i / (time.time() - start_time):.2f}"
-                )
-                session.commit()
-
-            dt_utc = get_dt_utc(row, errors)
-            if dt_utc is None:
-                continue
-
-            release_status = "public" if row.PublicRelease else "private"
-
-            measured_by = None if pd.isna(row.MeasuredBy) else row.MeasuredBy
-
-            """
-            Developer's notes
+    return zip(names, orgs, roles)
 
-            Use existing contact for the thing if measured by is the owner.
 
-            If no contacts can be made or retrieved for the field event skip
-            it altogether and note in the log file. There must be at least one
-            contact associated with an event
-            """
-            field_event_participants = []
-            if measured_by not in ["Owner", "Owner report", "Well owner"]:
-                # --- Contact/FieldEventParticipant ---
-                contact_info = get_contacts_info(row, measured_by, measured_by_mapper)
-
-                for name, organization, role in zip(*contact_info):
-                    if (name, organization) in created_contacts:
-                        contact = created_contacts[(name, organization)]
-                    else:
-                        try:
-                            # create new contact if not already created
-                            contact = Contact(
-                                name=name,
-                                role=role,
-                                contact_type="Field Event Participant",
-                                organization=organization,
-                                nma_pk_waterlevels=row.GlobalID,
-                            )
-                            session.add(contact)
-                            # session.flush()  # to get the contact.id
-
-                            logger.info(
-                                f"{SPACE_2}Created contact: | Name {contact.name} | Role {contact.role} | Organization {contact.organization} | nma_pk_waterlevels {contact.nma_pk_waterlevels}"
-                            )
-
-                            created_contacts[(name, organization)] = contact
-                        except Exception as e:
-                            logger.critical(
-                                f"Contact cannot be created: Name {name} | Role {role} | Organization {organization} because of the following: {str(e)}"
-                            )
-                            continue
-
-                    field_event_participants.append(contact)
-            else:
-                contact = thing.contacts[0]
-                field_event_participants.append(contact)
-
-            if len(field_event_participants) == 0:
-                logger.critical(
-                    f"No contacts can be associated with the WaterLevels record with GlobalID {row.GlobalID}, therefore no field event, field activity, sample, and observation can be made. Skipping."
+class WaterLevelTransferer(Transferer):
+    def __init__(self, *args, **kw):
+        super().__init__(*args, **kw)
+        self.source_table = "WaterLevels"
+        with session_ctx() as session:
+            groundwater_parameter_id = (
+                session.query(Parameter)
+                .filter(Parameter.parameter_name == "groundwater level")
+                .one()
+                .id
+            )
+            self.groundwater_parameter_id = groundwater_parameter_id
+
+        path = get_transfers_data_path("measured_by_mapper.json")
+        with open(path, "r") as f:
+            self._measured_by_mapper = json.load(f)
+
+        self._created_contacts = {}
+
+    def _get_dfs(self) -> tuple[pd.DataFrame, pd.DataFrame]:
+        input_df = read_csv(self.source_table)
+        cleaned_df = filter_to_valid_point_ids(input_df)
+        cleaned_df = filter_by_valid_measuring_agency(cleaned_df)
+        return input_df, cleaned_df
+
+    def _transfer_hook(self, session: Session) -> None:
+        gwd = self.cleaned_df.groupby(["PointID"])
+        for index, group in gwd:
+            pointid = index[0]
+            thing = session.query(Thing).where(Thing.name == pointid).first()
+
+            for i, row in enumerate(group.itertuples()):
+                dt_utc = self._get_dt_utc(row)
+                if dt_utc is None:
+                    continue
+
+                release_status = "public" if row.PublicRelease else "private"
+
+                # field event
+                field_event = FieldEvent(
+                    thing=thing,
+                    event_date=dt_utc,
+                    release_status=release_status,
                 )
-                continue
-
-            """
-            Developer's notes
+                session.add(field_event)
+                field_event_participants = self._get_field_event_participants(
+                    session, row, thing
+                )
+                sampler = None
+                for i, participant in enumerate(field_event_participants):
+                    field_event_participant = FieldEventParticipant(
+                        field_event=field_event, participant=participant
+                    )
+                    if i == 0:
+                        field_event_participant.participant_role = "Lead"
+                        sampler = field_event_participant
+                    else:
+                        field_event_participant.participant_role = "Participant"
 
-            Assumes for manual water levels that the date/time of the water level
-            measurement is the same as the date/time of the field event.
-            """
+                    session.add(field_event_participant)
 
-            # --- FieldEvent ---
-            # TODO: use create schema to validate data
-            field_event = FieldEvent(
-                thing=thing,
-                event_date=dt_utc,
-                release_status=release_status,
-            )
+                # reasons
+                glv = self._get_groundwater_level_reason(row)
+                if (
+                    glv
+                    == "Well was destroyed (no subsequent water levels should be recorded)"
+                ):
+                    logger.warning(
+                        "Well is destroyed - no field activity/sample/observation will be made"
+                    )
+                    field_event.notes = glv
+                    continue
+
+                # Field Activity
+                # TODO: use create schema to validate data
+                field_activity = FieldActivity(
+                    field_event=field_event,
+                    activity_type="groundwater level",
+                    release_status=release_status,
+                )
+                session.add(field_activity)
 
-            session.add(field_event)
+                # Sample
+                sample = self._make_sample(row, field_activity, dt_utc, sampler)
+                session.add(sample)
 
-            logger.info(
-                f"{SPACE_2}Created field event: ID {field_event.id} | Date {field_event.event_date} | Thing ID {field_event.thing.id} | Thing Name {field_event.thing.name}"
-            )
+                # Observation
+                observation = self._make_observation(row, sample, dt_utc, glv)
+                session.add(observation)
 
-            """
-            Developer's notes
+            session.commit()
 
-            Assumes that the first listed contact is the lead and the
-            person who took the sample. The subsequent contact will be
-            participants in the field event
-            """
-            for i, participant in enumerate(field_event_participants):
-                field_event_participant = FieldEventParticipant(
-                    field_event=field_event, participant=participant
+    def _make_observation(
+        self, row: pd.Series, sample: Sample, dt_utc: datetime, glv: str
+    ) -> Observation:
+        if pd.isna(row.MPHeight):
+            if pd.notna(row.DepthToWater) and pd.notna(row.DepthToWaterBGS):
+                logger.warning(
+                    f"{SPACE_6}Calculating measuring_point_height as DepthToWater - DepthToWaterBGS because MPHeight is NULL"
                 )
-                if i == 0:
-                    field_event_participant.participant_role = "Lead"
-                    sampler = field_event_participant
-                else:
-                    field_event_participant.participant_role = "Participant"
-
-                session.add(field_event_participant)
-                logger.info(
-                    f"{SPACE_4}Created field event contact: ID {field_event_participant.id} | Role {field_event_participant.participant_role} | Contact ID {field_event_participant.participant.id} | Contact Name {field_event_participant.participant.name} | Contact Org {field_event_participant.participant.organization}"
+                measuring_point_height = row.DepthToWater - row.DepthToWaterBGS
+            else:
+                logger.warning(
+                    f"{SPACE_6}Setting measuring_point_height to None because MPHeight is NULL and DepthToWater or DepthToWaterBGS is NULL"
                 )
+                measuring_point_height = None
+        else:
+            # some mp heights are recorded as negative numbers, but they should be positive
+            measuring_point_height = abs(row.MPHeight)
 
-            groundwater_level_reason = (
-                lexicon_mapper.map_value(f"LU_LevelStatus:{row.LevelStatus}")
-                if not pd.isna(row.LevelStatus)
-                else None
-            )
-            groundwater_level_reason = (
-                "Water level not affected"
-                if groundwater_level_reason == "Water level not affected by status"
-                else groundwater_level_reason
-            )
-
-            if (
-                groundwater_level_reason
-                == "Well was destroyed (no subsequent water levels should be recorded)"
-            ):
+        if pd.isna(row.DepthToWater):
+            if pd.notna(row.DepthToWaterBGS):
                 logger.warning(
-                    "Well is destroyed - no field activity/sample/observation will be made"
+                    f"{SPACE_6}Calculating observation value as DepthToWaterBGS + MPHeight (0 if MPHeight is NULL) because DepthToWater is NULL"
                 )
-                field_event.notes = groundwater_level_reason
-                continue
-
-            # --- FieldActivity ---
-            # TODO: use create schema to validate data
-            field_activity = FieldActivity(
-                field_event=field_event,
-                activity_type="groundwater level",
-                release_status=release_status,
-            )
-            session.add(field_activity)
+                value = row.DepthToWaterBGS + measuring_point_height
+            else:
+                # use None not NaN
+                value = None
+        else:
+            value = row.DepthToWater
 
-            logger.info(
-                f"{SPACE_4}Created field activity: ID {field_activity.id} | Type {field_activity.activity_type}"
+            # TODO: after sensors have been added to the database update sensor_id (or sensor) for waterlevels that come from db sensors (like e probes?)
+        observation = Observation(
+            nma_pk_waterlevels=row.GlobalID,
+            sample=sample,
+            sensor_id=None,
+            analysis_method_id=None,
+            observation_datetime=dt_utc,
+            parameter_id=self.groundwater_parameter_id,
+            value=value,
+            unit="ft",
+            measuring_point_height=measuring_point_height,
+            groundwater_level_reason=glv,
+        )
+        return observation
+
+    def _make_sample(self, row, field_activity, dt_utc, sampler) -> Sample:
+        sample_method = (
+            "null placeholder"
+            if pd.isna(row.MeasurementMethod)
+            else lexicon_mapper.map_value(
+                f"LU_MeasurementMethod:{row.MeasurementMethod}"
             )
+        )
+        sample = Sample(
+            nma_pk_waterlevels=row.GlobalID,
+            field_activity=field_activity,
+            field_event_participant=sampler,
+            sample_date=dt_utc,
+            sample_matrix="water",
+            sample_name=str(uuid.uuid4()),
+            sample_method=sample_method,
+            qc_type="Normal",
+            depth_top=None,
+            depth_bottom=None,
+        )
+        return sample
 
-            # --- Sample ---
-            sample_method = (
-                "null placeholder"
-                if pd.isna(row.MeasurementMethod)
-                else lexicon_mapper.map_value(
-                    f"LU_MeasurementMethod:{row.MeasurementMethod}"
-                )
-            )
+    def _get_groundwater_level_reason(self, row) -> str:
+        glv = row.LevelStatus
+        if pd.isna(glv):
+            return None
 
-            # todo: use create schema to validate data
-            sample = Sample(
-                nma_pk_waterlevels=row.GlobalID,
-                field_activity=field_activity,
-                field_event_participant=sampler,
-                sample_date=dt_utc,
-                sample_matrix="water",
-                sample_name=str(uuid.uuid4()),
-                sample_method=sample_method,
-                qc_type="Normal",
-                depth_top=None,
-                depth_bottom=None,
-            )
-            session.add(sample)
-            logger.info(
-                f"{SPACE_4}Created sample: ID {sample.id} | Date {sample.sample_date} | Matrix {sample.sample_matrix} | Method {sample.sample_method}"
-            )
+        glv = lexicon_mapper.map_value(f"LU_LevelStatus:{glv}")
+        if glv == "Water level not affected by status":
+            glv = "Water level not affected"
+        return glv
 
-            # TODO: use create schema to validate data
+    def _get_field_event_participants(self, session, row, thing) -> list[Contact]:
+        field_event_participants = []
+        measured_by = None if pd.isna(row.MeasuredBy) else row.MeasuredBy
 
-            if pd.isna(row.MPHeight):
-                if not pd.isna(row.DepthToWater) and not pd.isna(row.DepthToWaterBGS):
-                    logger.warning(
-                        f"{SPACE_6}Calculating measuring_point_height as DepthToWater - DepthToWaterBGS because MPHeight is NULL"
-                    )
-                    measuring_point_height = row.DepthToWater - row.DepthToWaterBGS
-                else:
-                    logger.warning(
-                        f"{SPACE_6}Setting measuring_point_height to None because MPHeight is NULL and DepthToWater or DepthToWaterBGS is NULL"
-                    )
-                    measuring_point_height = None
-            else:
-                # some mp heights are recorded as negative numbers, but they should be positive
-                measuring_point_height = abs(row.MPHeight)
+        if measured_by not in ["Owner", "Owner report", "Well owner"]:
+            # --- Contact/FieldEventParticipant ---
+            contact_info = get_contacts_info(row, measured_by, self._measured_by_mapper)
 
-            if pd.isna(row.DepthToWater):
-                if not pd.isna(row.DepthToWaterBGS):
-                    logger.warning(
-                        f"{SPACE_6}Calculating observation value as DepthToWaterBGS + MPHeight (0 if MPHeight is NULL) because DepthToWater is NULL"
-                    )
-                    value = row.DepthToWaterBGS + measuring_point_height
+            for name, organization, role in contact_info:
+                if (name, organization) in self._created_contacts:
+                    contact = self._created_contacts[(name, organization)]
                 else:
-                    # use None not NaN
-                    value = None
-            else:
-                value = row.DepthToWater
+                    try:
+                        # create new contact if not already created
+                        contact = Contact(
+                            name=name,
+                            role=role,
+                            contact_type="Field Event Participant",
+                            organization=organization,
+                            nma_pk_waterlevels=row.GlobalID,
+                        )
+                        session.add(contact)
+
+                        logger.info(
+                            f"{SPACE_2}Created contact: | Name {contact.name} | Role {contact.role} | Organization {contact.organization} | nma_pk_waterlevels {contact.nma_pk_waterlevels}"
+                        )
+
+                        self._created_contacts[(name, organization)] = contact
+                    except Exception as e:
+                        logger.critical(
+                            f"Contact cannot be created: Name {name} | Role {role} | Organization {organization} because of the following: {str(e)}"
+                        )
+                        continue
 
-            # TODO: after sensors have been added to the database update sensor_id (or sensor) for waterlevels that come from db sensors (like e probes?)
-            observation = Observation(
-                nma_pk_waterlevels=row.GlobalID,
-                sample=sample,
-                sensor_id=None,
-                analysis_method_id=None,
-                observation_datetime=dt_utc,
-                parameter_id=groundwater_parameter_id,
-                value=value,
-                unit="ft",
-                measuring_point_height=measuring_point_height,
-                groundwater_level_reason=groundwater_level_reason,
+                field_event_participants.append(contact)
+        else:
+            contact = thing.contacts[0]
+            field_event_participants.append(contact)
+
+        if len(field_event_participants) == 0:
+            logger.critical(
+                f"No contacts can be associated with the WaterLevels record with GlobalID {row.GlobalID}, therefore no field event, field activity, sample, and observation can be made. Skipping."
             )
-            session.add(observation)
-            logger.info(
-                f"{SPACE_4}Created observation: ID {observation.id} | DT {observation.observation_datetime} | Value {observation.value} | MPHeight {observation.measuring_point_height} | nma_pk_waterlevels {observation.nma_pk_waterlevels}"
+            return None
+
+        return field_event_participants
+
+    def _get_dt_utc(self, row) -> datetime | None:
+        if pd.isna(row.DateMeasured):
+            logger.critical(
+                f"transfer_water_levels. Skipping row PointID={row.PointID}, objectid={row.OBJECTID} because there is no DateMeasured"
             )
-        session.commit()
+            self._capture_error(row.PointID, "no DateMeasured", "DateMeasured")
+            return None
 
-    return input_df, cleaned_df, errors
+        if pd.isna(row.TimeMeasured):
+            fmt = "%Y-%m-%d"
+            dt_measured = row.DateMeasured
+        else:
+            fmt = "%Y-%m-%d %H:%M:%S.%f"
+            t = row.TimeMeasured
+            # Truncate microseconds to 6 digits if present
+            if "." in t:
+                t = t[:-6]
+
+            dt_measured = f"{row.DateMeasured} {t}"
+
+        try:
+            dt = datetime.strptime(dt_measured, fmt)
+            return convert_mt_to_utc(dt)
+        except ValueError as e:
+            self._capture_error(row.PointID, str(e), "DateMeasured")
+            logger.critical(
+                f"transfer_water_levels. Skipping row PointID={row.PointID}, objectid={row.OBJECTID} due to "
+                f"invalid date/time: {e}"
+            )
+            return None
 
 
 # ============= EOF =============================================

From 09c71271f9f5dd4d29716bb76e7f709f0db2392b Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Sun, 30 Nov 2025 09:55:07 -0700
Subject: [PATCH 24/66] refactor: enhance water levels transfer process by
 introducing WaterLevelsContinuousPressureTransferer and
 WaterLevelsContinuousAcousticTransferer classes

---
 transfers/util.py | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/transfers/util.py b/transfers/util.py
index 70e6952a5..cf290c591 100644
--- a/transfers/util.py
+++ b/transfers/util.py
@@ -18,6 +18,7 @@
 import math
 import os
 import re
+import time
 from datetime import datetime, timezone, timedelta, UTC
 from pathlib import Path
 
@@ -28,7 +29,7 @@
 from sqlalchemy import select
 
 from constants import SRID_WGS84, SRID_UTM_ZONE_13N
-from db import Thing, Location, DataProvenance
+from db import Thing, Location, DataProvenance, Parameter
 from db.engine import session_ctx
 from services.gcs_helper import get_storage_bucket
 
@@ -205,10 +206,16 @@ def replace_nans(df: pd.DataFrame, default=None) -> pd.DataFrame:
     return df.replace({np.nan: default})
 
 
-def read_csv(name: str, dtype: dict | None = None) -> pd.DataFrame:
+def read_csv(name: str, dtype: dict | None = None, *args, **kw) -> pd.DataFrame:
     p = get_transfers_data_path(Path("nma_csv_cache") / f"{name}.csv")
     if os.path.exists(p):
-        return pd.read_csv(p, dtype=dtype)
+        logger.info(f"Using cached csv: {p}")
+        starttime = time.time()
+        df = pd.read_csv(p, dtype=dtype, *args, **kw)
+        logger.info(f"Read csv in {time.time()-starttime:0.2f}")
+        return df
+    else:
+        logger.info(f"Downloading csv: {name}")
 
     bucket = get_storage_bucket()
     blob = bucket.blob(f"nma_csv/{name}.csv")
@@ -216,10 +223,7 @@ def read_csv(name: str, dtype: dict | None = None) -> pd.DataFrame:
     with open(p, "wb") as f:
         f.write(data)
 
-    if dtype:
-        return pd.read_csv(io.BytesIO(data), dtype=dtype)
-    else:
-        return pd.read_csv(io.BytesIO(data))
+    return pd.read_csv(io.BytesIO(data), dtype=dtype)
 
 
 def get_valid_point_ids(thing_type="water well"):
@@ -339,6 +343,17 @@ def chunk_by_size(df, chunk_size):
         yield df.iloc[i : i + chunk_size]
 
 
+def get_groundwater_parameter_id():
+    with session_ctx() as session:
+        groundwater_parameter_id = (
+            session.query(Parameter)
+            .filter(Parameter.parameter_name == "groundwater level")
+            .one()
+            .id
+        )
+    return groundwater_parameter_id
+
+
 def make_location(row: pd.Series, elevations: dict) -> tuple:
     """
     Returns a tuple of location data and the elevation method

From 76b1d3b965672ec31e9784d83cca35811f26624f Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Sun, 30 Nov 2025 09:57:22 -0700
Subject: [PATCH 25/66] refactor: enhance water levels transfer process by
 introducing WaterLevelsContinuousPressureTransferer and
 WaterLevelsContinuousAcousticTransferer classes

---
 transfers/transfer.py                        | 102 +++---
 transfers/transferer.py                      |   4 +
 transfers/waterlevels_transducer_transfer.py | 319 ++++++++++---------
 3 files changed, 233 insertions(+), 192 deletions(-)

diff --git a/transfers/transfer.py b/transfers/transfer.py
index 5cfc6e63d..04d5c44c1 100644
--- a/transfers/transfer.py
+++ b/transfers/transfer.py
@@ -19,11 +19,12 @@
 
 load_dotenv()
 
-from transfers.metrics import Metrics
 from transfers.waterlevels_transducer_transfer import (
-    transfer_water_levels_pressure,
-    transfer_water_levels_acoustic,
+    WaterLevelsContinuousPressureTransferer,
+    WaterLevelsContinuousAcousticTransferer,
 )
+
+from transfers.metrics import Metrics
 from core.initializers import erase_and_rebuild_db
 
 from transfers.group_transfer import ProjectGroupTransferer
@@ -95,13 +96,13 @@ def transfer_all(sess, metrics, limit=100):
     results = _execute_transfer(WaterLevelTransferer, flags=flags)
     metrics.water_level_metrics(*results)
 
-    message("TRANSFERRING WATER LEVELS PRESSURE")
-    results = timeit_direct(transfer_water_levels_pressure, sess)
-    metrics.pressure_metrics(sess, *results)
-
-    message("TRANSFERRING WATER LEVELS ACOUSTIC")
-    results = timeit_direct(transfer_water_levels_acoustic, sess)
-    metrics.acoustic_metrics(sess, *results)
+    # message("TRANSFERRING WATER LEVELS PRESSURE")
+    # results = timeit_direct(transfer_water_levels_pressure, sess)
+    # metrics.pressure_metrics(sess, *results)
+    #
+    # message("TRANSFERRING WATER LEVELS ACOUSTIC")
+    # results = timeit_direct(transfer_water_levels_acoustic, sess)
+    # metrics.acoustic_metrics(sess, *results)
 
     """
     Developer's notes
@@ -147,13 +148,23 @@ def transfer_debugging(metrics, limit=100):
     results = _execute_transfer(WellTransferer, flags=flags)
     metrics.well_metrics(*results)
 
-    message("TRANSFERRING WELL SCREENS")
-    results = _execute_transfer(WellScreenTransferer, flags=flags)
-    metrics.well_screen_metrics(*results)
+    transfer_screens = False
+    transfer_sensors = True
+    transfer_pressure = True
+    transfer_acoustic = True
+    transfer_link_ids = False
+    transfer_groups = False
+    transfer_assets = False
 
-    message("TRANSFERRING SENSORS")
-    results = _execute_transfer(SensorTransferer, flags=flags)
-    metrics.sensor_metrics(*results)
+    if transfer_screens:
+        message("TRANSFERRING WELL SCREENS")
+        results = _execute_transfer(WellScreenTransferer, flags=flags)
+        metrics.well_screen_metrics(*results)
+
+    if transfer_sensors:
+        message("TRANSFERRING SENSORS")
+        results = _execute_transfer(SensorTransferer, flags=flags)
+        metrics.sensor_metrics(*results)
 
     # Developer's notes all the metadata for these Things are not defined in the models/schemas yet'
     # message("TRANSFERRING SPRINGS")
@@ -172,31 +183,40 @@ def transfer_debugging(metrics, limit=100):
     # results = timeit_direct(transfer_contacts, sess)
     # metrics.contact_metrics(sess, *results)
     #
-    message("TRANSFERRING WATER LEVELS")
-    results = _execute_transfer(WaterLevelTransferer, flags=flags)
-    metrics.water_level_metrics(*results)
-    #
-    # message("TRANSFERRING WATER LEVELS PRESSURE")
-    # results = timeit_direct(transfer_water_levels_pressure, sess)
-    # metrics.pressure_metrics(sess, *results)
-    #
-    # message("TRANSFERRING WATER LEVELS ACOUSTIC")
-    # results = timeit_direct(transfer_water_levels_acoustic, sess)
-    # metrics.acoustic_metrics(sess, *results)
-
-    message("TRANSFERRING LINK IDS")
-    results = _execute_transfer(LinkIdsWellDataTransferer, flags=flags)
-    metrics.welldata_link_ids_metrics(*results)
-    results = _execute_transfer(LinkIdsLocationDataTransferer, flags=flags)
-    metrics.location_link_ids_metrics(*results)
-
-    message("TRANSFERRING GROUPS")
-    results = _execute_transfer(ProjectGroupTransferer, flags=flags)
-    metrics.group_metrics(*results)
-
-    message("TRANSFERRING ASSETS")
-    results = _execute_transfer(AssetTransferer, flags=flags)
-    metrics.asset_metrics(*results)
+    # message("TRANSFERRING WATER LEVELS")
+    # results = _execute_transfer(WaterLevelTransferer, flags=flags)
+    # metrics.water_level_metrics(*results)
+
+    if transfer_pressure:
+        message("TRANSFERRING WATER LEVELS PRESSURE")
+        results = _execute_transfer(
+            WaterLevelsContinuousPressureTransferer, flags=flags
+        )
+        metrics.pressure_metrics(*results)
+
+    if transfer_acoustic:
+        message("TRANSFERRING WATER LEVELS ACOUSTIC")
+        results = _execute_transfer(
+            WaterLevelsContinuousAcousticTransferer, flags=flags
+        )
+        metrics.acoustic_metrics(*results)
+
+    if transfer_link_ids:
+        message("TRANSFERRING LINK IDS")
+        results = _execute_transfer(LinkIdsWellDataTransferer, flags=flags)
+        metrics.welldata_link_ids_metrics(*results)
+        results = _execute_transfer(LinkIdsLocationDataTransferer, flags=flags)
+        metrics.location_link_ids_metrics(*results)
+
+    if transfer_groups:
+        message("TRANSFERRING GROUPS")
+        results = _execute_transfer(ProjectGroupTransferer, flags=flags)
+        metrics.group_metrics(*results)
+
+    if transfer_assets:
+        message("TRANSFERRING ASSETS")
+        results = _execute_transfer(AssetTransferer, flags=flags)
+        metrics.asset_metrics(*results)
 
 
 def main():
diff --git a/transfers/transferer.py b/transfers/transferer.py
index 8d84e1170..a8045dccb 100644
--- a/transfers/transferer.py
+++ b/transfers/transferer.py
@@ -167,6 +167,7 @@ def _group_iterator(self, session: Session):
                 continue
 
             prepped_group = self._get_prepped_group(group)
+            self._pre_group_step(session, prepped_group, db_item)
             for row in prepped_group.itertuples():
                 try:
                     self._group_step(session, row, db_item)
@@ -181,6 +182,9 @@ def _group_iterator(self, session: Session):
     def _get_point_id(self, row: pd.Series, db_item: Base) -> str:
         return row.PointID
 
+    def _pre_group_step(self, session: Session, group: DataFrame, db_item: Base):
+        pass
+
     def _group_step(self, session: Session, row: pd.Series, db_item: Base):
         raise NotImplementedError("Must be implemented in subclass")
 
diff --git a/transfers/waterlevels_transducer_transfer.py b/transfers/waterlevels_transducer_transfer.py
index e4ce178c0..927d8d6b8 100644
--- a/transfers/waterlevels_transducer_transfer.py
+++ b/transfers/waterlevels_transducer_transfer.py
@@ -13,179 +13,196 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ===============================================================================
-from pandas import to_datetime, Timestamp
+
+import pandas as pd
+from pandas import Timestamp
 from pydantic import ValidationError
+from sqlalchemy.orm import Session
 
-from db import Parameter, Thing, Deployment, Sensor
+from db import Thing, Deployment, Sensor
 from db.transducer import TransducerObservation, TransducerObservationBlock
+from schemas.transducer import CreateTransducerObservation
 from transfers.logger import logger
-from transfers.util import read_csv, filter_to_valid_point_ids
+from transfers.transferer import Transferer
+from transfers.util import (
+    read_csv,
+    filter_to_valid_point_ids,
+    get_groundwater_parameter_id,
+)
+
+
+class WaterLevelsContinuousTransferer(Transferer):
+    _partition_field: str
+    _sensor_type: str
+
+    def __init__(self, *args, **kw):
+        super().__init__(*args, **kw)
+        self.groundwater_parameter_id = get_groundwater_parameter_id()
+        if self._sensor_type is None:
+            raise ValueError("_sensor_type must be set")
+        if self._partition_field is None:
+            raise ValueError("_partition_field must be set")
+
+    def _get_dfs(self):
+        input_df = read_csv(self.source_table, parse_dates=["DateMeasured"])
+        cleaned_df = filter_to_valid_point_ids(input_df)
+        cleaned_df = cleaned_df.sort_values(by=["PointID"])
 
+        # remove rows with no date measured
+        cleaned_df = cleaned_df[cleaned_df.DateMeasured.notna()]
+        return input_df, cleaned_df
+
+    def _transfer_hook(self, session: Session) -> None:
+        gwd = self.cleaned_df.groupby(["PointID"])
+        n = len(gwd)
+        nodeployments = {}
+        for i, (index, group) in enumerate(gwd):
+            pointid = index[0]
+            logger.info(
+                f"Processing PointID: {pointid}. {i + 1}/{n} ({100*(i+1)/n:0.2f}) completed."
+            )
 
-def transfer_water_levels_acoustic(session):
-    source_table = "WaterLevelsContinuous_Acoustic"
-    wd = read_csv(source_table)
-    return _transfer_water_levels_continuous(
-        session, source_table, wd, "PublicRelease", "Acoustic Sounder"
-    )
+            deployments = (
+                session.query(Deployment)
+                .join(Thing)
+                .join(Sensor)
+                .where(Sensor.sensor_type == self._sensor_type)
+                .where(Thing.name == pointid)
+                .all()
+            )
 
+            # sort rows by date measured
+            group = group.sort_values(by="DateMeasured")
+            field = getattr(group, self._partition_field)
 
-def transfer_water_levels_pressure(session):
-    source_table = "WaterLevelsContinuous_Pressure"
-    wd = read_csv(source_table)
-    return _transfer_water_levels_continuous(
-        session, source_table, wd, "QCed", "Pressure Transducer"
-    )
+            qced = group[field == 1]
+            notqced = group[~(field == 1)]
 
+            qced_block = TransducerObservationBlock(
+                parameter_id=self.groundwater_parameter_id, review_status="approved"
+            )
+            notqced_block = TransducerObservationBlock(
+                parameter_id=self.groundwater_parameter_id, review_status="not reviewed"
+            )
 
-def _find_deployment(ts, deployments):
-    for d in deployments:
-        start = Timestamp(d.installation_date)
-        if start > ts:
-            break  # because sorted by start
-        end = Timestamp(d.removal_date) if d.removal_date else Timestamp.max
-        if end >= ts:
-            return d
-    return None
+            for block, rows, release_status in (
+                (qced_block, qced, "public"),
+                (notqced_block, notqced, "private"),
+            ):
+                block.start_datetime = rows.DateMeasured.min()
+                block.end_datetime = rows.DateMeasured.max()
 
+                if not deployments:
+                    logger.critical(
+                        f"Thing with PointID={pointid} has no deployments. Skipping water levels {release_status} block"
+                    )
+                    self._capture_error(pointid, "no deployments", "DateMeasured")
+                    continue
 
-def _transfer_water_levels_continuous(
-    session, source_table, input_df, partition_field, sensor_type
-):
-    from schemas.transducer import CreateTransducerObservation
-
-    groundwater_parameter_id = (
-        session.query(Parameter)
-        .filter(Parameter.parameter_name == "groundwater level")
-        .one()
-        .id
-    )
-    cleaned_df = filter_to_valid_point_ids(session, input_df)
-
-    # group by pointid
-    cleaned_df = cleaned_df.sort_values(by=["PointID"])
-    gwd = cleaned_df.groupby(["PointID"])
-    n = len(gwd)
-    errors = []
-    nodeployments = {}
-    for i, (index, group) in enumerate(gwd):
-        pointid = index[0]
-        logger.info(
-            f"Processing PointID: {pointid}. {i + 1}/{n} ({100*(i+1)/n:0.2f}) completed."
-        )
-
-        deployments = (
-            session.query(Deployment)
-            .join(Thing)
-            .join(Sensor)
-            .where(Sensor.sensor_type == sensor_type)
-            .where(Thing.name == pointid)
-            .all()
-        )
+                if rows.empty:
+                    logger.info(f"no {release_status} records for pointid {pointid}")
+                    continue
 
-        # remove rows with no date measured
-        group = group[group.DateMeasured.notna()]
-        group["DateMeasured"] = to_datetime(group["DateMeasured"], errors="coerce")
-
-        # sort rows by date measured
-        group = group.sort_values(by="DateMeasured")
-        field = getattr(group, partition_field)
-
-        qced = group[field == 1]
-        notqced = group[~(field == 1)]
-
-        qced_block = TransducerObservationBlock(
-            parameter_id=groundwater_parameter_id, review_status="approved"
-        )
-        notqced_block = TransducerObservationBlock(
-            parameter_id=groundwater_parameter_id, review_status="not reviewed"
-        )
-
-        for block, rows, release_status in (
-            (qced_block, qced, "public"),
-            (notqced_block, notqced, "private"),
-        ):
-            block.start_datetime = rows.DateMeasured.min()
-            block.end_datetime = rows.DateMeasured.max()
-
-            if not deployments:
-                logger.critical(
-                    f"Thing with PointID={pointid} has no deployments. Skipping water levels {release_status} block"
+                deps_sorted = sorted(
+                    deployments, key=lambda d: Timestamp(d.installation_date)
                 )
-                errors.append({"pointid": pointid, "error": "no deployments"})
-                continue
 
-            if rows.empty:
-                logger.info(f"no {release_status} records for pointid {pointid}")
-                continue
+                observations = [
+                    self._make_observation(
+                        pointid, row, release_status, deps_sorted, nodeployments
+                    )
+                    for row in rows.itertuples()
+                ]
+
+                observations = [obs for obs in observations if obs is not None]
+                session.bulk_save_objects(observations)
+                session.add(block)
+                logger.info(
+                    f"Added {len(observations)} water levels {release_status} block"
+                )
+                try:
+                    session.commit()
+                except Exception as e:
+                    self.append({"pointid": pointid, "error": e})
+                    logger.critical(
+                        f"Error committing water levels {release_status} block: {e}"
+                    )
+                    session.rollback()
+                    continue
 
-            observations = []
+        # convert nodeployments to errors
+        for pointid, (min_date, max_date) in nodeployments.items():
+            self._capture_error(
+                pointid,
+                "DateMeasured",
+                f"no deployment between {min_date} and {max_date}",
+            )
 
-            deps_sorted = sorted(
-                deployments, key=lambda d: Timestamp(d.installation_date)
+    def _make_observation(
+        self,
+        pointid: str,
+        row: pd.Series,
+        release_status: str,
+        deps_sorted: list,
+        nodeployments: dict,
+    ) -> TransducerObservation | None:
+        deployment = _find_deployment(row.DateMeasured, deps_sorted)
+
+        if deployment is None:
+            if pointid not in nodeployments:
+                nodeployments[pointid] = (row.DateMeasured, row.DateMeasured)
+            else:
+                min_date, max_date = nodeployments[pointid]
+                if row.DateMeasured < min_date:
+                    min_date = row.DateMeasured
+                elif row.DateMeasured > max_date:
+                    max_date = row.DateMeasured
+                nodeployments[pointid] = min_date, max_date
+
+            logger.critical(
+                f"No deployment found for PointID={pointid} at {row.DateMeasured}"
+            )
+            return None
+
+        try:
+            payload = dict(
+                parameter_id=self.groundwater_parameter_id,
+                deployment_id=deployment.id,
+                observation_datetime=row.DateMeasured,
+                value=row.DepthToWaterBGS,
+                release_status=release_status,
             )
+            obspayload = CreateTransducerObservation.model_validate(
+                payload
+            ).model_dump()
+            return TransducerObservation(**obspayload)
 
-            for row in rows.itertuples():
-                deployment = _find_deployment(row.DateMeasured, deps_sorted)
+        except ValidationError as e:
+            logger.critical(f"Observation validation error: {e.errors()}")
+            self._capture_error(pointid, str(e), "DepthToWaterBGS")
 
-                if deployment is None:
-                    if pointid not in nodeployments:
-                        nodeployments[pointid] = (row.DateMeasured, row.DateMeasured)
-                    else:
-                        min_date, max_date = nodeployments[pointid]
-                        if row.DateMeasured < min_date:
-                            min_date = row.DateMeasured
-                        elif row.DateMeasured > max_date:
-                            max_date = row.DateMeasured
-                        nodeployments[pointid] = min_date, max_date
 
-                    logger.critical(
-                        f"No deployment found for PointID={pointid} at {row.DateMeasured}"
-                    )
-                    continue
+class WaterLevelsContinuousPressureTransferer(WaterLevelsContinuousTransferer):
+    source_table = "WaterLevelsContinuous_Pressure"
+    _partition_field = "QCed"
+    _sensor_type = "Pressure Transducer"
 
-                try:
-                    payload = dict(
-                        parameter_id=groundwater_parameter_id,
-                        deployment_id=deployment.id,
-                        observation_datetime=row.DateMeasured,
-                        value=row.DepthToWaterBGS,
-                        release_status=release_status,
-                    )
-                    obspayload = CreateTransducerObservation.model_validate(
-                        payload
-                    ).model_dump()
-                    observations.append(TransducerObservation(**obspayload))
-                except ValidationError as e:
-                    logger.critical(f"Observation validation error: {e.errors()}")
-                    errors.append({"pointid": pointid, "error": e.errors()})
-
-            session.bulk_save_objects(observations)
-            session.add(block)
-            logger.info(
-                f"Added {len(observations)} water levels {release_status} block"
-            )
-            try:
-                session.commit()
-            except Exception as e:
-                errors.append({"pointid": pointid, "error": e})
-                logger.critical(
-                    f"Error committing water levels {release_status} block: {e}"
-                )
-                session.rollback()
-                continue
-
-    # convert nodeployments to errors
-    for pointid, (min_date, max_date) in nodeployments.items():
-        errors.append(
-            {
-                "table": source_table,
-                "pointid": pointid,
-                "error": f"no deployment between {min_date} and {max_date}",
-            }
-        )
-
-    return input_df, cleaned_df, errors
+
+class WaterLevelsContinuousAcousticTransferer(WaterLevelsContinuousTransferer):
+    source_table = "WaterLevelsContinuous_Acoustic"
+    _partition_field = "PublicRelease"
+    _sensor_type = "Acoustic Sounder"
+
+
+def _find_deployment(ts, deployments):
+    for d in deployments:
+        start = Timestamp(d.installation_date)
+        if start > ts:
+            break  # because sorted by start
+        end = Timestamp(d.removal_date) if d.removal_date else Timestamp.max
+        if end >= ts:
+            return d
+    return None
 
 
 # ============= EOF =============================================

From b6e5039ae69942b69fd612b1dc34385b54035127 Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Sun, 30 Nov 2025 10:30:07 -0700
Subject: [PATCH 26/66] refactor: simplify transfer_all function by removing
 unnecessary parameters and restructuring transfer logic

---
 transfers/transfer.py | 120 ++++++++----------------------------------
 1 file changed, 22 insertions(+), 98 deletions(-)

diff --git a/transfers/transfer.py b/transfers/transfer.py
index 04d5c44c1..5d167d7f5 100644
--- a/transfers/transfer.py
+++ b/transfers/transfer.py
@@ -17,6 +17,8 @@
 
 from dotenv import load_dotenv
 
+from db.engine import session_ctx
+
 load_dotenv()
 
 from transfers.waterlevels_transducer_transfer import (
@@ -50,94 +52,8 @@ def message(msg, pad=10, new_line_at_top=True):
 
 
 @timeit
-def transfer_all(sess, metrics, limit=100):
+def transfer_all(metrics, limit=100):
     message("STARTING TRANSFER", new_line_at_top=False)
-
-    logger.info("Erase and rebuilding database")
-    erase_and_rebuild_db()
-
-    message("TRANSFERRING WELLS")
-
-    flags = {
-        "TRANSFER_ALL_WELLS": True,
-        "TRANSFER_ALL_WELLSCREENS": True,
-        "LIMIT": limit,
-    }
-
-    results = _execute_transfer(WellTransferer, flags=flags)
-    metrics.well_metrics(sess, *results)
-
-    message("TRANSFERRING WELL SCREENS")
-    results = _execute_transfer(WellScreenTransferer, flags=flags)
-    metrics.well_screen_metrics(sess, *results)
-
-    message("TRANSFERRING SENSORS")
-    results = _execute_transfer(SensorTransferer, flags=flags)
-    metrics.sensor_metrics(sess, *results)
-
-    # Developer's notes all the metadata for these Things are not defined in the models/schemas yet'
-    # message("TRANSFERRING SPRINGS")
-    # timeit_direct(transfer_springs, sess, limit=limit)
-    #
-    # message("TRANSFERRING PERENNIAL STREAMS")
-    # timeit_direct(transfer_perennial_stream, sess, limit=limit)
-    #
-    # message("TRANSFERRING EPHEMERAL STREAMS")
-    # timeit_direct(transfer_ephemeral_stream, sess, limit=limit)
-    #
-    # message("TRANSFERRING METEOROLOGICAL")
-    # timeit_direct(transfer_met, sess, limit)
-
-    message("TRANSFERRING CONTACTS")
-    results = timeit_direct(transfer_contacts, sess)
-    metrics.contact_metrics(sess, *results)
-
-    message("TRANSFERRING WATER LEVELS")
-    results = _execute_transfer(WaterLevelTransferer, flags=flags)
-    metrics.water_level_metrics(*results)
-
-    # message("TRANSFERRING WATER LEVELS PRESSURE")
-    # results = timeit_direct(transfer_water_levels_pressure, sess)
-    # metrics.pressure_metrics(sess, *results)
-    #
-    # message("TRANSFERRING WATER LEVELS ACOUSTIC")
-    # results = timeit_direct(transfer_water_levels_acoustic, sess)
-    # metrics.acoustic_metrics(sess, *results)
-
-    """
-    Developer's notes
-
-    When transfering water chemistry data use the qc_type field to indicate
-    normal/blanks/duplicates instead of what comes from LU_SampleType. Use
-    those values, however, to map to the standard qc_type fields if applicable
-    (i.e. not applicable when sample type is "Soil or rock sample" or
-    "Precipitation," but is applicable when sample type is "Equipment blank"
-    or "Field duplicate")
-    """
-    message("TRANSFERRING LINK IDS")
-    results = _execute_transfer(LinkIdsWellDataTransferer, flags=flags)
-    metrics.welldata_link_ids_metrics(*results)
-    results = _execute_transfer(LinkIdsLocationDataTransferer, flags=flags)
-    metrics.location_link_ids_metrics(*results)
-
-    message("TRANSFERRING GROUPS")
-    results = _execute_transfer(ProjectGroupTransferer, flags=flags)
-    metrics.group_metrics(*results)
-
-    message("TRANSFERRING ASSETS")
-    results = _execute_transfer(AssetTransferer, flags=flags)
-    metrics.asset_metrics(*results)
-
-
-def _execute_transfer(klass, flags: dict = None):
-    transferer = klass(flags=flags)
-    transferer.transfer()
-    return transferer.input_df, transferer.cleaned_df, transferer.errors
-
-
-def transfer_debugging(metrics, limit=100):
-    message("STARTING TRANSFER DEBUG", new_line_at_top=False)
-
     if int(os.environ.get("ERASE_AND_REBUILD", 0)):
         logger.info("Erase and rebuilding database")
         erase_and_rebuild_db()
@@ -150,11 +66,13 @@ def transfer_debugging(metrics, limit=100):
 
     transfer_screens = False
     transfer_sensors = True
+    transfer_waterlevels = False
     transfer_pressure = True
     transfer_acoustic = True
     transfer_link_ids = False
     transfer_groups = False
     transfer_assets = False
+    do_transfer_contacts = False
 
     if transfer_screens:
         message("TRANSFERRING WELL SCREENS")
@@ -179,13 +97,16 @@ def transfer_debugging(metrics, limit=100):
     # message("TRANSFERRING METEOROLOGICAL")
     # timeit_direct(transfer_met, sess, limit)
 
-    # message("TRANSFERRING CONTACTS")
-    # results = timeit_direct(transfer_contacts, sess)
-    # metrics.contact_metrics(sess, *results)
-    #
-    # message("TRANSFERRING WATER LEVELS")
-    # results = _execute_transfer(WaterLevelTransferer, flags=flags)
-    # metrics.water_level_metrics(*results)
+    if do_transfer_contacts:
+        message("TRANSFERRING CONTACTS")
+        with session_ctx() as sess:
+            results = timeit_direct(transfer_contacts, sess)
+            metrics.contact_metrics(sess, *results)
+
+    if transfer_waterlevels:
+        message("TRANSFERRING WATER LEVELS")
+        results = _execute_transfer(WaterLevelTransferer, flags=flags)
+        metrics.water_level_metrics(*results)
 
     if transfer_pressure:
         message("TRANSFERRING WATER LEVELS PRESSURE")
@@ -219,15 +140,18 @@ def transfer_debugging(metrics, limit=100):
         metrics.asset_metrics(*results)
 
 
+def _execute_transfer(klass, flags: dict = None):
+    transferer = klass(flags=flags)
+    transferer.transfer()
+    return transferer.input_df, transferer.cleaned_df, transferer.errors
+
+
 def main():
     message("START--------------------------------------")
     limit = int(os.getenv("TRANSFER_LIMIT", 1000))
     metrics = Metrics()
 
-    if int(os.getenv("TRANSFER_DEBUG", 0)):
-        transfer_debugging(metrics, limit=limit)
-    else:
-        transfer_all(metrics, limit=limit)
+    transfer_all(metrics, limit=limit)
 
     metrics.close()
     metrics.save_to_storage_bucket()

From c77411d504760d87bc5e4901ea6d24c539249b85 Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Sun, 30 Nov 2025 16:42:19 -0700
Subject: [PATCH 27/66] refactor: implement ContactTransfer class for improved
 contact data handling and transfer process

---
 services/query_helper.py      |  13 +--
 services/util.py              |  22 ++++-
 transfers/contact_transfer.py | 148 +++++++++++-----------------------
 transfers/transfer.py         |  31 ++++---
 4 files changed, 82 insertions(+), 132 deletions(-)

diff --git a/services/query_helper.py b/services/query_helper.py
index 3f0e3dd24..970ad1720 100644
--- a/services/query_helper.py
+++ b/services/query_helper.py
@@ -25,18 +25,7 @@
 
 from db import search as search_func
 from services.regex import QUERY_REGEX
-
-
-def to_bool(value: str) -> bool | str:
-    """Convert a string to a boolean."""
-    if isinstance(value, bool):
-        return value
-    if value.lower() in ("true", "1", "yes"):
-        return True
-    elif value.lower() in ("false", "0", "no"):
-        return False
-
-    return value
+from services.util import to_bool
 
 
 def make_where(col: Column, op: str, v: str) -> OperatorExpression:
diff --git a/services/util.py b/services/util.py
index 77cd5d5cd..313a922ec 100644
--- a/services/util.py
+++ b/services/util.py
@@ -1,17 +1,33 @@
 import json
+import os
 
-from shapely.ops import transform
-import pyproj
 import httpx
+import pyproj
+from shapely.ops import transform
 from sqlalchemy.orm import DeclarativeBase
 
 from constants import SRID_WGS84
 
-
 TRANSFORMERS = {}
 METERS_TO_FEET = 3.28084
 
 
+def to_bool(value: str) -> bool | str:
+    """Convert a string to a boolean."""
+    if isinstance(value, bool):
+        return value
+    if value.lower() in ("true", "1", "yes"):
+        return True
+    elif value.lower() in ("false", "0", "no"):
+        return False
+
+    return value
+
+
+def get_bool_env(key, default=False):
+    return to_bool(os.getenv(key, default))
+
+
 def transform_srid(geometry, source_srid, target_srid):
     """
     geometry must be a shapely geometry object, like Point, Polygon, or MultiPolygon
diff --git a/transfers/contact_transfer.py b/transfers/contact_transfer.py
index c9b1c9fb0..a1d545a03 100644
--- a/transfers/contact_transfer.py
+++ b/transfers/contact_transfer.py
@@ -15,140 +15,79 @@
 # ===============================================================================
 import json
 
+import pandas as pd
+from pandas import DataFrame
 from pydantic import ValidationError
+from sqlalchemy.orm import Session
 
 from db import (
-    Thing,
     Contact,
     ThingContactAssociation,
     Email,
     Phone,
     Address,
     IncompleteNMAPhone,
+    Base,
 )
 from transfers.logger import logger
+from transfers.transferer import ThingBasedTransferer
 from transfers.util import (
     get_transfers_data_path,
-    chunk_by_size,
 )
 from transfers.util import read_csv, filter_to_valid_point_ids, replace_nans
 
 
-def extract_owner_role(comment):
-    # if comment is None:
-    #     return "Owner"
-    # if "Owner" in comment:
-    #     return "Owner"
-    # if "Manager" in comment:
-    #     return "Manager"
-    # if "Director" in comment:
-    #     return "Director"
+class ContactTransfer(ThingBasedTransferer):
+    source_table = "OwnersData"
 
-    return "Owner"
+    def __init__(self, *args, **kw):
+        super().__init__(*args, **kw)
+        co_to_org_mapper_path = get_transfers_data_path(
+            "owners_organization_mapper.json"
+        )
+        with open(co_to_org_mapper_path, "r") as f:
+            self._co_to_org_mapper = json.load(f)
 
+        self._added = []
 
-"""
-Developer's notes
+    def _get_dfs(self):
+        input_df = read_csv(self.source_table)
+        odf = input_df.drop(["OBJECTID", "GlobalID"], axis=1)
+        ldf = read_csv("OwnerLink")
+        ldf = ldf.drop(["OBJECTID", "GlobalID"], axis=1)
+        locdf = read_csv("Location")
+        ldf = ldf.join(locdf.set_index("LocationId"), on="LocationId")
 
-Use Pydantic to perform model validations since all restrictions will
-be built into the models
-"""
+        odf = odf.join(ldf.set_index("OwnerKey"), on="OwnerKey")
 
+        odf = replace_nans(odf)
 
-def transfer_contacts(session):
+        odf = filter_to_valid_point_ids(odf)
+        return input_df, odf
 
-    co_to_org_mapper_path = get_transfers_data_path("owners_organization_mapper.json")
-    with open(co_to_org_mapper_path, "r") as f:
-        co_to_org_mapper = json.load(f)
-
-    source_table = "OwnersData"
-    input_df = read_csv(source_table)
-    odf = input_df.drop(["OBJECTID", "GlobalID"], axis=1)
-    ldf = read_csv("OwnerLink")
-    ldf = ldf.drop(["OBJECTID", "GlobalID"], axis=1)
-    locdf = read_csv("Location")
-    ldf = ldf.join(locdf.set_index("LocationId"), on="LocationId")
-
-    odf = odf.join(ldf.set_index("OwnerKey"), on="OwnerKey")
-
-    odf = replace_nans(odf)
-
-    odf = filter_to_valid_point_ids(session, odf)
-    cleaned_df = odf
-    errors = []
-    added = []
-    odf = odf.sort_values(by=["PointID"])
-
-    for chunk in chunk_by_size(odf, 100):
-        pointids = chunk.PointID.tolist()
-        logger.info(f"Processing chunk {pointids[0]} to {pointids[-1]}")
-        things = session.query(Thing).filter(Thing.name.in_(pointids)).all()
-        for i, row in chunk.iterrows():
-            thing = next((thing for thing in things if thing.name == row.PointID), None)
-            logger.info(f"Processing PointID: {i} {row.PointID}")
-            if thing is None:
-                logger.critical(
-                    f"Thing with PointID {row.PointID} not found. Skipping owner."
-                )
-                continue
-
-            # TODO: use contact_helper.add_contact
-            try:
-                if _add_first_contact(session, row, thing, co_to_org_mapper, added):
-                    session.commit()
-                    # session.flush()
-                    logger.info(f"added first contact for PointID {row.PointID}")
-            except ValidationError as e:
-                logger.critical(
-                    f"Skipping first contact for PointID {row.PointID} due to validation error: {e.errors()}"
-                )
-                # session.rollback()
-                errors.append(
-                    {"pointid": row.PointID, "error": e, "table": source_table}
-                )
-            except Exception as e:
-                logger.critical(
-                    f"Skipping first contact for PointID {row.PointID} due to error: {e}"
-                )
-                session.rollback()
-                errors.append(
-                    {"pointid": row.PointID, "error": e, "table": source_table}
-                )
+    def _get_prepped_group(self, group) -> DataFrame:
+        return group.sort_values(by=["PointID"])
 
+    def _group_step(self, session: Session, row: pd.Series, db_item: Base):
+        for adder, tag in (_add_first_contact, "first"), (
+            _add_second_contact,
+            "second",
+        ):
             try:
-                if (
-                    row.SecondFirstName is None
-                    and row.SecondLastName is None
-                    and row.SecondCtctEmail is None
-                    and row.SecondCtctPhone is None
-                ):
-                    logger.warning(
-                        f"No second contact info for PointID {row.PointID}, skipping."
-                    )
-                    continue
-                if _add_second_contact(session, row, thing, co_to_org_mapper, added):
+                if adder(session, row, db_item, self._co_to_org_mapper, self._added):
                     session.commit()
-                    # session.flush()
-                    logger.info(f"added second contact for PointID {row.PointID}")
-
+                    logger.info(f"added {tag} contact for PointID {row.PointID}")
             except ValidationError as e:
                 logger.critical(
-                    f"Skipping second contact for PointID {row.PointID} due to validation error: {e.errors()}"
-                )
-                # session.rollback()
-                errors.append(
-                    {"pointid": row.PointID, "error": e, "table": source_table}
+                    f"Skipping {tag} contact for PointID {row.PointID} due to validation error: {e.errors()}"
                 )
+                self._capture_error(row.PointID, str(e), "ValidationError")
             except Exception as e:
                 logger.critical(
-                    f"Skipping second contact for PointID {row.PointID} due to error: {e}"
+                    f"Skipping {tag} contact for PointID {row.PointID} due to error: {e}"
                 )
                 session.rollback()
-                errors.append(
-                    {"pointid": row.PointID, "error": e, "table": source_table}
-                )
-
-    return input_df, cleaned_df, errors
+                self._capture_error(row.PointID, str(e), "UnknownError")
 
 
 def _add_first_contact(session, row, thing, co_to_org_mapper, added):
@@ -252,6 +191,14 @@ def _add_first_contact(session, row, thing, co_to_org_mapper, added):
 
 
 def _add_second_contact(session, row, thing, co_to_org_mapper, added):
+    if all(
+        [
+            getattr(row, f"Second{f}") is None
+            for f in ["FirstName", "LastName", "CtctEmail", "CtctPhone"]
+        ]
+    ):
+        logger.warning(f"No second contact info for PointID {row.PointID}, skipping.")
+        return
 
     release_status = "private"
     name = _make_name(row.SecondFirstName, row.SecondLastName)
@@ -364,7 +311,6 @@ def _make_address(first_second, ownerkey, kind, **kw):
         )
 
 
-#
 def _make_contact_and_assoc(session, data, thing):
     from schemas.contact import CreateContact
 
diff --git a/transfers/transfer.py b/transfers/transfer.py
index 5d167d7f5..8a9c3bed3 100644
--- a/transfers/transfer.py
+++ b/transfers/transfer.py
@@ -17,7 +17,7 @@
 
 from dotenv import load_dotenv
 
-from db.engine import session_ctx
+from services.util import get_bool_env
 
 load_dotenv()
 
@@ -34,13 +34,13 @@
     LinkIdsWellDataTransferer,
     LinkIdsLocationDataTransferer,
 )
-from transfers.contact_transfer import transfer_contacts
+from transfers.contact_transfer import ContactTransfer
 from transfers.sensor_transfer import SensorTransferer
 from transfers.waterlevels_transfer import WaterLevelTransferer
 from transfers.well_transfer import WellTransferer, WellScreenTransferer
 
 from transfers.asset_transfer import AssetTransferer
-from transfers.util import timeit, timeit_direct
+from transfers.util import timeit
 from transfers.logger import logger, save_log_to_bucket
 
 
@@ -64,15 +64,15 @@ def transfer_all(metrics, limit=100):
     results = _execute_transfer(WellTransferer, flags=flags)
     metrics.well_metrics(*results)
 
-    transfer_screens = False
-    transfer_sensors = True
-    transfer_waterlevels = False
-    transfer_pressure = True
-    transfer_acoustic = True
-    transfer_link_ids = False
-    transfer_groups = False
-    transfer_assets = False
-    do_transfer_contacts = False
+    transfer_screens = get_bool_env("TRANSFER_WELL_SCREENS", True)
+    transfer_sensors = get_bool_env("TRANSFER_SENSORS", True)
+    transfer_contacts = get_bool_env("TRANSFER_CONTACTS", True)
+    transfer_waterlevels = get_bool_env("TRANSFER_WATERLEVELS", True)
+    transfer_pressure = get_bool_env("TRANSFER_WATERLEVELS_PRESSURE", True)
+    transfer_acoustic = get_bool_env("TRANSFER_WATERLEVELS_ACOUSTIC", True)
+    transfer_link_ids = get_bool_env("TRANSFER_LINK_IDS", True)
+    transfer_groups = get_bool_env("TRANSFER_GROUPS", True)
+    transfer_assets = get_bool_env("TRANSFER_ASSETS", True)
 
     if transfer_screens:
         message("TRANSFERRING WELL SCREENS")
@@ -97,11 +97,10 @@ def transfer_all(metrics, limit=100):
     # message("TRANSFERRING METEOROLOGICAL")
     # timeit_direct(transfer_met, sess, limit)
 
-    if do_transfer_contacts:
+    if transfer_contacts:
         message("TRANSFERRING CONTACTS")
-        with session_ctx() as sess:
-            results = timeit_direct(transfer_contacts, sess)
-            metrics.contact_metrics(sess, *results)
+        results = _execute_transfer(ContactTransfer, flags=flags)
+        metrics.contact_metrics(*results)
 
     if transfer_waterlevels:
         message("TRANSFERRING WATER LEVELS")

From ec7965528ba897c22e6cd6a1641bd8eb381bae0a Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Mon, 1 Dec 2025 22:56:12 -0700
Subject: [PATCH 28/66] refactor: rename
 filter_by_welldata_datasource_and_project to get_transferable_wells for
 clarity and update logic to include additional point IDs

---
 transfers/util.py          | 67 +++++++++++++++++++++--------
 transfers/well_transfer.py | 86 ++++++++++++++++++++++++--------------
 2 files changed, 104 insertions(+), 49 deletions(-)

diff --git a/transfers/util.py b/transfers/util.py
index cf290c591..83c968f4b 100644
--- a/transfers/util.py
+++ b/transfers/util.py
@@ -206,16 +206,22 @@ def replace_nans(df: pd.DataFrame, default=None) -> pd.DataFrame:
     return df.replace({np.nan: default})
 
 
-def read_csv(name: str, dtype: dict | None = None, *args, **kw) -> pd.DataFrame:
+def read_csv(
+    name: str, dtype: dict | None = None, verbose=False, *args, **kw
+) -> pd.DataFrame:
     p = get_transfers_data_path(Path("nma_csv_cache") / f"{name}.csv")
     if os.path.exists(p):
-        logger.info(f"Using cached csv: {p}")
+        if verbose:
+            logger.info(f"Using cached csv: {p}")
         starttime = time.time()
         df = pd.read_csv(p, dtype=dtype, *args, **kw)
-        logger.info(f"Read csv in {time.time()-starttime:0.2f}")
+
+        if verbose:
+            logger.info(f"Read csv in {time.time()-starttime:0.2f}")
         return df
     else:
-        logger.info(f"Downloading csv: {name}")
+        if verbose:
+            logger.info(f"Downloading csv: {name}")
 
     bucket = get_storage_bucket()
     blob = bucket.blob(f"nma_csv/{name}.csv")
@@ -274,30 +280,55 @@ def filter_non_transferred_wells(df: pd.DataFrame) -> pd.DataFrame:
     return df[~(df["PointID"].isin(existing_ids))]
 
 
-def filter_by_welldata_datasource_and_project(df: pd.DataFrame) -> pd.DataFrame:
+def get_transferable_wells(
+    df: pd.DataFrame, log_datasource_counts=False, log_invalid_datasources=False
+) -> pd.DataFrame:
     path = get_transfers_data_path("valid_welldata_datasources.csv")
     with open(path, "r") as f:
         reader = csv.reader(f)
         _ = next(reader)
         valid_datasources = [row[0] for row in reader if row[1] == "Yes"]
 
-        # f.seek(0)
-        # invalid_datasources = [row[0] for row in reader if row[1] == "NO"]
-        # logger.info("Invalid WellData Datasources:")
-        # for vd in invalid_datasources:
-        #     logger.info(f"  {vd}")
+        if log_invalid_datasources:
+            f.seek(0)
+            invalid_datasources = [row[0] for row in reader if row[1] == "NO"]
+            logger.info("Invalid WellData Datasources:")
+            for vd in invalid_datasources:
+                logger.info(f"  {vd}")
 
-    counts = df.groupby("DataSource").size().reset_index(name="WellCount")
-    counts = counts.sort_values("WellCount", ascending=False)
-    for count in counts.itertuples():
-        logger.info(f"{count.WellCount}: {count.DataSource[:50]} ")
+    if log_datasource_counts:
+        counts = df.groupby("DataSource").size().reset_index(name="WellCount")
+        counts = counts.sort_values("WellCount", ascending=False)
+        for count in counts.itertuples():
+            logger.info(f"{count.WellCount}: {count.DataSource[:50]} ")
 
     pldf = read_csv("ProjectLocations")
     collabnet = pldf[pldf["ProjectName"] == "Water Level Network"]
-    return df[
-        df["DataSource"].isin(valid_datasources)
-        | df["PointID"].isin(collabnet["PointID"])
-    ]
+
+    collabnet_pointids = collabnet["PointID"].unique().tolist()
+    logger.info(
+        f"collabnet pointids: {len(collabnet_pointids)} {collabnet_pointids[:10]}"
+    )
+
+    # get all pointids that have USGS as the DataSource but also have WaterLevel measurements where datasource is
+    # NMBGMR
+    usgs_df = df[df["DataSource"] == "USGS"]
+
+    waterlevel_df = read_csv("WaterLevels")
+    waterlevel_df = waterlevel_df[waterlevel_df["MeasuringAgency"] == "NMBGMR"]
+
+    usgs_pointids = (
+        usgs_df[usgs_df["PointID"].isin(waterlevel_df["PointID"])]["PointID"]
+        .unique()
+        .tolist()
+    )
+    logger.info(f"usgs pointids: {len(usgs_pointids)} {usgs_pointids[:10]}")
+
+    # get all the pointids from the well photos and include them
+    wellphotos_df = read_csv("WellPhotos")
+    wellphotos_pointids = wellphotos_df["PointID"].unique().tolist()
+    pointids = list(set(usgs_pointids + collabnet_pointids + wellphotos_pointids))
+    return df[df["DataSource"].isin(valid_datasources) | df["PointID"].isin(pointids)]
 
 
 def filter_by_valid_measuring_agency(df: pd.DataFrame) -> pd.DataFrame:
diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py
index 45a867a72..fa912ed18 100644
--- a/transfers/well_transfer.py
+++ b/transfers/well_transfer.py
@@ -51,7 +51,7 @@
     read_csv,
     logger,
     replace_nans,
-    filter_by_welldata_datasource_and_project,
+    get_transferable_wells,
     lexicon_mapper,
     filter_non_transferred_wells,
     MeasuringPointEstimator,
@@ -117,35 +117,35 @@ def _extract_casing_materials(row) -> list[str]:
     return materials
 
 
-def get_wells_to_transfer(flags: dict = None) -> tuple[pd.DataFrame, pd.DataFrame]:
-    # if flags is None:
-    #     flags = {}
-
-    wdf = read_csv("WellData", dtype={"OSEWelltagID": str})
-    ldf = read_csv("Location")
-    ldf = ldf.drop(["PointID", "SSMA_TimeStamp"], axis=1)
-    wdf = wdf.join(ldf.set_index("LocationId"), on="LocationId")
-    wdf = wdf[wdf["SiteType"] == "GW"]
-    wdf = wdf[wdf["Easting"].notna() & wdf["Northing"].notna()]
-
-    input_df = wdf
-    wdf = replace_nans(wdf)
-
-    # if flags.get("TRANSFER_ALL_WELLS", False):
-    #     # todo: filter Locations by DataSource
-    #     cleaned_df = filter_by_welldata_datasource_and_project(wdf)
-    # else:
-    #     # get a subset of wells that have not been transferred yet
-    #     # todo: this needs to be defined.
-    #     #       for now, we are just filtering out wells that have not been transferred yet
-    #     #       In the future we will be using criteria to determine which wells to transfer
-    #     #       for example, wells in the "Water Level Network" project
-    #     cleaned_df = wdf
-
-    cleaned_df = filter_by_welldata_datasource_and_project(wdf)
-    cleaned_df = filter_non_transferred_wells(cleaned_df)
-
-    return input_df, cleaned_df
+# def get_wells_to_transfer(flags: dict = None) -> tuple[pd.DataFrame, pd.DataFrame]:
+#     # if flags is None:
+#     #     flags = {}
+#
+#     wdf = read_csv("WellData", dtype={"OSEWelltagID": str})
+#     ldf = read_csv("Location")
+#     ldf = ldf.drop(["PointID", "SSMA_TimeStamp"], axis=1)
+#     wdf = wdf.join(ldf.set_index("LocationId"), on="LocationId")
+#     wdf = wdf[wdf["SiteType"] == "GW"]
+#     wdf = wdf[wdf["Easting"].notna() & wdf["Northing"].notna()]
+#
+#     input_df = wdf
+#     wdf = replace_nans(wdf)
+#
+#     # if flags.get("TRANSFER_ALL_WELLS", False):
+#     #     # todo: filter Locations by DataSource
+#     #     cleaned_df = filter_by_welldata_datasource_and_project(wdf)
+#     # else:
+#     #     # get a subset of wells that have not been transferred yet
+#     #     # todo: this needs to be defined.
+#     #     #       for now, we are just filtering out wells that have not been transferred yet
+#     #     #       In the future we will be using criteria to determine which wells to transfer
+#     #     #       for example, wells in the "Water Level Network" project
+#     #     cleaned_df = wdf
+#
+#     cleaned_df = get_transferable_wells(wdf)
+#     cleaned_df = filter_non_transferred_wells(cleaned_df)
+#
+#     return input_df, cleaned_df
 
 
 def get_cached_elevations() -> dict:
@@ -175,7 +175,31 @@ def __init__(self, *args, **kw):
         self._added_locations = {}
 
     def _get_dfs(self):
-        return get_wells_to_transfer(self.flags)
+        wdf = read_csv("WellData", dtype={"OSEWelltagID": str})
+        ldf = read_csv("Location")
+        ldf = ldf.drop(["PointID", "SSMA_TimeStamp"], axis=1)
+        wdf = wdf.join(ldf.set_index("LocationId"), on="LocationId")
+        wdf = wdf[wdf["SiteType"] == "GW"]
+        wdf = wdf[wdf["Easting"].notna() & wdf["Northing"].notna()]
+
+        input_df = wdf
+        wdf = replace_nans(wdf)
+
+        # if flags.get("TRANSFER_ALL_WELLS", False):
+        #     # todo: filter Locations by DataSource
+        #     cleaned_df = filter_by_welldata_datasource_and_project(wdf)
+        # else:
+        #     # get a subset of wells that have not been transferred yet
+        #     # todo: this needs to be defined.
+        #     #       for now, we are just filtering out wells that have not been transferred yet
+        #     #       In the future we will be using criteria to determine which wells to transfer
+        #     #       for example, wells in the "Water Level Network" project
+        #     cleaned_df = wdf
+
+        cleaned_df = get_transferable_wells(wdf)
+        cleaned_df = filter_non_transferred_wells(cleaned_df)
+
+        return input_df, cleaned_df
 
     def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series):
         pointid = row.PointID

From 6c08a2595d669a212ebf2da938f611bca0c4c37b Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Tue, 2 Dec 2025 09:33:47 -0700
Subject: [PATCH 29/66] refactor: add type hints to functions in util.py for
 improved code clarity and maintainability

---
 transfers/util.py | 44 +++++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/transfers/util.py b/transfers/util.py
index 83c968f4b..d459ee4ff 100644
--- a/transfers/util.py
+++ b/transfers/util.py
@@ -27,13 +27,12 @@
 import pytz
 from shapely import Point
 from sqlalchemy import select
+from sqlalchemy.orm import Session
 
 from constants import SRID_WGS84, SRID_UTM_ZONE_13N
 from db import Thing, Location, DataProvenance, Parameter
 from db.engine import session_ctx
 from services.gcs_helper import get_storage_bucket
-
-# from services.lexicon_mapper import lexicon_mapper
 from services.util import (
     transform_srid,
     get_epqs_elevation_from_point,
@@ -232,14 +231,14 @@ def read_csv(
     return pd.read_csv(io.BytesIO(data), dtype=dtype)
 
 
-def get_valid_point_ids(thing_type="water well"):
+def get_valid_point_ids(thing_type: str = "water well") -> list[str]:
     with session_ctx() as session:
         things = get_valid_things(session, thing_type)
         valid_pointids = [thing.name for thing in things]
     return valid_pointids
 
 
-def get_valid_things(session, thing_type="water well"):
+def get_valid_things(session: Session, thing_type: str = "water well") -> list[Thing]:
     return session.query(Thing).where(Thing.thing_type == thing_type).all()
 
 
@@ -260,7 +259,7 @@ def extract_organization(alternate_id: str) -> str:
     return "Unknown"
 
 
-def get_transfers_data_path(name):
+def get_transfers_data_path(name: str) -> Path:
     def data_path(r):
         return Path(r) / "transfers" / "data"
 
@@ -349,7 +348,7 @@ def filter_to_valid_point_ids(df: pd.DataFrame) -> pd.DataFrame:
     return df[df["PointID"].isin(valid_point_ids)]
 
 
-def convert_mt_to_utc(dt_record: datetime):
+def convert_mt_to_utc(dt_record: datetime) -> datetime:
     t = dt_record.time()
     if t.hour == 0 and t.minute == 0:
         # no time was measured, so just set the timezone to UTC and keep
@@ -369,12 +368,12 @@ def convert_mt_to_utc(dt_record: datetime):
     return dt_record
 
 
-def chunk_by_size(df, chunk_size):
+def chunk_by_size(df: pd.DataFrame, chunk_size: int) -> pd.DataFrame:
     for i in range(0, len(df), chunk_size):
         yield df.iloc[i : i + chunk_size]
 
 
-def get_groundwater_parameter_id():
+def get_groundwater_parameter_id() -> int:
     with session_ctx() as session:
         groundwater_parameter_id = (
             session.query(Parameter)
@@ -592,13 +591,26 @@ def wrapper(*args, **kwargs):
 
 class LexiconMapper:
     def __init__(self):
-        self._mappers = None
+        self._mappers: dict[str, str] = None
 
-    def map_value(self, value):
+    def map_value(self, value) -> str:
         value = value.strip()
         return self._make_lu_to_lexicon_mapper().get(value, value)
 
-    def _make_lu_to_lexicon_mapper(self):
+    def _make_lu_to_lexicon_mapper(self) -> dict[str, str]:
+        """
+        Lookup tables intentionally skipped (kept for documentation only)
+        Each entry explains why the table is excluded
+
+        "LU_AltitudeDatum": "code is the value, so no need for mapping",
+        "LU_CoordinateDatum": "code is the value, so no need for mapping",
+        "LU_FieldNoteTypes": "not being used in the transfers since there are no records",
+        "LU_Formations": "needs to be cleaned before it can be used",
+        "LU_Lithology": "needs to be cleaned before it can be used",
+        "LU_MeasuringAgency": "the abbreviation is what is used in the new schema",
+
+        :return: dict
+        """
         if self._mappers:
             return self._mappers
 
@@ -624,16 +636,6 @@ def _make_lu_to_lexicon_mapper(self):
             "LU_Status",
         ]
 
-        # Lookup tables intentionally skipped (kept for documentation only)
-        # Each entry explains why the table is excluded
-        _lu_tables_skipped = {
-            "LU_AltitudeDatum": "code is the value, so no need for mapping",
-            "LU_CoordinateDatum": "code is the value, so no need for mapping",
-            "LU_FieldNoteTypes": "not being used in the transfers since there are no records",
-            "LU_Formations": "needs to be cleaned before it can be used",
-            "LU_Lithology": "needs to be cleaned before it can be used",
-            "LU_MeasuringAgency": "the abbreviation is what is used in the new schema",
-        }
         mappers = {}
 
         for lu_table in lu_tables:

From 89e8994ce528d36ffaa53eac03815419bfc28db7 Mon Sep 17 00:00:00 2001
From: jross <jake.ross@nmt.edu>
Date: Tue, 2 Dec 2025 17:24:54 -0700
Subject: [PATCH 30/66] feat: implement aquifer and geologic formation models
 with transfer functionality

---
 .pre-commit-config.yaml                       |   2 +
 core/enums.py                                 |  10 +-
 core/formations.json                          |   0
 core/lexicon.json                             |  53 ++-
 db/__init__.py                                |   7 +-
 db/aquifer_system.py                          |  84 ++++
 db/aquifer_type.py                            |  58 +++
 db/contact.py                                 |   8 +-
 db/data_provenance.py                         |  12 +-
 db/geologic_formation.py                      |  82 ++++
 db/notes.py                                   |   4 +-
 db/permission_history.py                      |  96 ++++
 db/status_history.py                          |   4 +-
 db/thing.py                                   | 128 +++++-
 db/thing_aquifer_association.py               |  51 +++
 db/thing_geologic_formation_association.py    |  60 +++
 schemas/aquifer_system.py                     |  51 +++
 schemas/geologic_formation.py                 |  88 ++++
 schemas/permission_history.py                 |  18 +
 schemas/thing.py                              |  94 +++-
 schemas/validators.py                         |  43 ++
 services/thing_helper.py                      |  20 +
 services/util.py                              |  17 +-
 tests/features/environment.py                 | 413 ++++++++++++------
 .../steps/well-additional-information.py      | 270 ++++++++++++
 tests/features/steps/well-core-information.py |   2 +-
 tests/test_thing.py                           |  32 ++
 transfers/aquifer_system_transfer.py          | 141 ++++++
 .../data/owners_organization_mapper.json      |   1 +
 transfers/geologic_formation_transfer.py      | 141 ++++++
 transfers/permissions_transfer.py             |  95 ++++
 transfers/stratigraphy_transfer.py            | 285 ++++++++++++
 transfers/util.py                             |  13 +-
 transfers/well_transfer.py                    |   8 +
 34 files changed, 2219 insertions(+), 172 deletions(-)
 create mode 100644 core/formations.json
 create mode 100644 db/aquifer_system.py
 create mode 100644 db/aquifer_type.py
 create mode 100644 db/geologic_formation.py
 create mode 100644 db/permission_history.py
 create mode 100644 db/thing_aquifer_association.py
 create mode 100644 db/thing_geologic_formation_association.py
 create mode 100644 schemas/aquifer_system.py
 create mode 100644 schemas/geologic_formation.py
 create mode 100644 schemas/permission_history.py
 create mode 100644 schemas/validators.py
 create mode 100644 tests/features/steps/well-additional-information.py
 create mode 100644 transfers/aquifer_system_transfer.py
 create mode 100644 transfers/geologic_formation_transfer.py
 create mode 100644 transfers/permissions_transfer.py
 create mode 100644 transfers/stratigraphy_transfer.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5d74e6a6c..b4dba7bf8 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -25,6 +25,8 @@ repos:
         types: [python] # Specify relevant file types for your tests
         pass_filenames: false
         always_run: true
+        args:
+          - -x
 
   # - repo: https://github.com/pre-commit/mirrors-mypy
   #   rev: v1.10.0  # Use the latest stable version or pin to your preference
diff --git a/core/enums.py b/core/enums.py
index 568f3f96a..91b206cab 100644
--- a/core/enums.py
+++ b/core/enums.py
@@ -24,7 +24,9 @@
 )
 CasingMaterial: type[Enum] = build_enum_from_lexicon_category("casing_material")
 CollectionMethod: type[Enum] = build_enum_from_lexicon_category("collection_method")
-ConstructionMethod: type[Enum] = build_enum_from_lexicon_category("construction_method")
+WellConstructionMethod: type[Enum] = build_enum_from_lexicon_category(
+    "well_construction_method"
+)
 ContactType: type[Enum] = build_enum_from_lexicon_category("contact_type")
 CoordinateMethod: type[Enum] = build_enum_from_lexicon_category("coordinate_method")
 WellPurpose: type[Enum] = build_enum_from_lexicon_category("well_purpose")
@@ -68,8 +70,14 @@
 Vertical_datum: type[Enum] = build_enum_from_lexicon_category("vertical_datum")
 ScreenType: type[Enum] = build_enum_from_lexicon_category("screen_type")
 SensorType: type[Enum] = build_enum_from_lexicon_category("sensor_type")
+WellPumpType: type[Enum] = build_enum_from_lexicon_category("well_pump_type")
+PermissionType: type[Enum] = build_enum_from_lexicon_category("permission_type")
 GroupType: type[Enum] = build_enum_from_lexicon_category("group_type")
 MonitoringFrequency: type[Enum] = build_enum_from_lexicon_category(
     "monitoring_frequency"
 )
+AquiferType: type[Enum] = build_enum_from_lexicon_category("aquifer_type")
+GeographicScale: type[Enum] = build_enum_from_lexicon_category("geographic_scale")
+Lithology: type[Enum] = build_enum_from_lexicon_category("lithology")
+FormationCode: type[Enum] = build_enum_from_lexicon_category("formation_code")
 # ============= EOF =============================================
diff --git a/core/formations.json b/core/formations.json
new file mode 100644
index 000000000..e69de29bb
diff --git a/core/lexicon.json b/core/lexicon.json
index 9aa9b88ae..815a40d2f 100644
--- a/core/lexicon.json
+++ b/core/lexicon.json
@@ -2,9 +2,10 @@
     {"name": "activity_type", "description": null},
     {"name": "address_type", "description": null},
     {"name": "analysis_method_type", "description": null},
+    {"name": "aquifer_type", "description": null},
     {"name": "casing_material", "description": null},
     {"name": "collection_method", "description": null},
-    {"name": "construction_method", "description": null},
+    {"name": "well_construction_method", "description": null},
     {"name": "contact_type", "description": null},
     {"name": "coordinate_method", "description": null},
     {"name": "country", "description": null},
@@ -18,6 +19,7 @@
     {"name": "email_type", "description": null},
     {"name": "participant_role", "description": null},
     {"name": "geochronology", "description": null},
+    {"name": "geographic_scale", "description": null},
     {"name": "groundwater_level_reason", "description": null},
     {"name": "group_type", "description": null},
     {"name": "horizontal_datum", "description": null},
@@ -51,7 +53,11 @@
     {"name": "well_purpose", "description": null},
     {"name": "status_type", "description": null},
     {"name": "status_value", "description": null},
-    {"name": "origin_source", "description": null}
+    {"name": "origin_source", "description": null},
+    {"name": "well_pump_type", "description": null},
+    {"name": "permission_type", "description": null},
+    {"name": "formation_code", "description": null},
+    {"name": "lithology", "description": null}
   ],
   "terms": [
     {"categories": ["review_status"], "term": "approved", "definition": "approved"},
@@ -78,15 +84,15 @@
     {"categories": ["elevation_method"], "term": "Reported", "definition": "Reported"},
     {"categories": ["elevation_method"], "term": "Survey-grade Global Navigation Satellite Sys, Lvl1", "definition": "Survey-grade Global Navigation Satellite Sys, Lvl1"},
     {"categories": ["elevation_method"], "term": "USGS National Elevation Dataset (NED)", "definition": "USGS National Elevation Dataset (NED)"},
-    {"categories": ["elevation_method", "sample_method", "coordinate_method", "well_purpose", "status", "organization", "role"], "term": "Unknown", "definition": "Unknown"},
-    {"categories": ["construction_method"], "term": "Air-rotary", "definition": "Air-rotary"},
-    {"categories": ["construction_method"], "term": "Bored or augered", "definition": "Bored or augered"},
-    {"categories": ["construction_method"], "term": "Cable-tool", "definition": "Cable-tool"},
-    {"categories": ["construction_method"], "term": "Hydraulic rotary (mud or water)", "definition": "Hydraulic rotary (mud or water)"},
-    {"categories": ["construction_method"], "term": "Air percussion", "definition": "Air percussion"},
-    {"categories": ["construction_method"], "term": "Reverse rotary", "definition": "Reverse rotary"},
-    {"categories": ["construction_method"], "term": "Driven", "definition": "Driven"},
-    {"categories": ["construction_method", "measurement_method"], "term": "Other (explain in notes)", "definition": "Other (explain in notes)"},
+    {"categories": ["elevation_method", "sample_method", "coordinate_method", "well_purpose", "status", "organization", "role", "aquifer_type"], "term": "Unknown", "definition": "Unknown"},
+    {"categories": ["well_construction_method"], "term": "Air-Rotary", "definition": "Air-Rotary"},
+    {"categories": ["well_construction_method"], "term": "Bored or augered", "definition": "Bored or augered"},
+    {"categories": ["well_construction_method"], "term": "Cable-tool", "definition": "Cable-tool"},
+    {"categories": ["well_construction_method"], "term": "Hydraulic rotary (mud or water)", "definition": "Hydraulic rotary (mud or water)"},
+    {"categories": ["well_construction_method"], "term": "Air percussion", "definition": "Air percussion"},
+    {"categories": ["well_construction_method"], "term": "Reverse rotary", "definition": "Reverse rotary"},
+    {"categories": ["well_construction_method"], "term": "Driven", "definition": "Driven"},
+    {"categories": ["well_construction_method", "measurement_method"], "term": "Other (explain in notes)", "definition": "Other (explain in notes)"},
     {"categories": ["coordinate_method"], "term": "Differentially corrected GPS", "definition": "Differentially corrected GPS"},
     {"categories": ["coordinate_method"], "term": "Survey-grade global positioning system (SGPS)", "definition": "Survey-grade global positioning system (SGPS)"},
     {"categories": ["coordinate_method"], "term": "GPS, uncorrected", "definition": "GPS, uncorrected"},
@@ -572,6 +578,7 @@
     {"categories": ["organization"], "term": "Yates Petroleum Corporation", "definition": "Yates Petroleum Corporation"},
     {"categories": ["organization"], "term": "Zamora Accounting Services", "definition": "Zamora Accounting Services"},
     {"categories": ["organization"], "term": "PLSS", "definition": "Public Land Survey System"},
+    {"categories": ["organization"], "term": "Quemado Municipal Water & SWA", "definition": "Quemado Municipal Water & SWA"},
     {"categories": ["collection_method"], "term": "Altimeter", "definition": "ALtimeter"},
     {"categories": ["collection_method"], "term": "Differentially corrected GPS", "definition": "Differentially corrected GPS"},
     {"categories": ["collection_method"], "term": "Survey-grade GPS", "definition": "Survey-grade GPS"},
@@ -692,6 +699,21 @@
     {"categories": ["monitoring_frequency"], "term": "Annual", "definition": "Location is monitored once a year."},
     {"categories": ["monitoring_frequency"], "term": "Decadal", "definition": "Location is monitored once every ten years."},
     {"categories": ["monitoring_frequency"], "term": "Event-based", "definition": "Location is monitored based on specific events or triggers rather than a fixed schedule."},
+    {"categories": ["aquifer_type"], "term": "Artesian", "definition": "Artesian"},
+    {"categories": ["aquifer_type"], "term": "Confined single aquifer", "definition": "Confined single aquifer"},
+    {"categories": ["aquifer_type"], "term": "Unsaturated (dry)", "definition": "Unsaturated (dry)"},
+    {"categories": ["aquifer_type"], "term": "Fractured", "definition": "Fractured"},
+    {"categories": ["aquifer_type"], "term": "Confined multiple aquifers", "definition": "Confined multiple aquifers"},
+    {"categories": ["aquifer_type"], "term": "Unconfined multiple aquifers", "definition": "Unconfined multiple aquifers"},
+    {"categories": ["aquifer_type"], "term": "Perched aquifer", "definition": "Perched aquifer"},
+    {"categories": ["aquifer_type"], "term": "Confining layer or aquitard", "definition": "Confining layer or aquitard"},
+    {"categories": ["aquifer_type"], "term": "Semi-confined", "definition": "Semi-confined"},
+    {"categories": ["aquifer_type"], "term": "Unconfined single aquifer", "definition": "Unconfined single aquifer"},
+    {"categories": ["aquifer_type"], "term": "Mixed (confined and unconfined multiple aquifers)", "definition": "Mixed (confined and unconfined multiple aquifers)"},
+    {"categories": ["geographic_scale"], "term": "Major", "definition": "Major aquifers of national significance"},
+    {"categories": ["geographic_scale"], "term": "Regional", "definition": "Important aquifers serving regions"},
+    {"categories": ["geographic_scale"], "term": "Local", "definition": "Smaller, locally important aquifers"},
+    {"categories": ["geographic_scale"], "term": "Minor", "definition": "Limited extent or yield"},
     {"categories": ["origin_source"], "term": "Reported by another agency", "definition": "Reported by another agency"},
     {"categories": ["origin_source"], "term": "From driller's log or well report", "definition": "From driller's log or well report"},
     {"categories": ["origin_source"], "term": "Private geologist, consultant or univ associate", "definition": "Private geologist, consultant or univ associate"},
@@ -709,6 +731,13 @@
     {"categories": ["note_type"], "term": "Historical", "definition": "Historical information or context about the well or location."},
     {"categories": ["note_type"], "term": "Other", "definition": "Other types of notes that do not fit into the predefined categories."},
     {"categories": ["note_type"], "term": "Water", "definition": "Water bearing zone information and other info from ose reports"},
-    {"categories": ["note_type"], "term": "Measuring", "definition": "Notes about measuring/visiting the well, on Access form"}
+    {"categories": ["note_type"], "term": "Measuring", "definition": "Notes about measuring/visiting the well, on Access form"},
+    {"categories": ["well_pump_type"], "term": "Submersible", "definition": "Submersible"},
+    {"categories": ["well_pump_type"], "term": "Jet", "definition": "Jet Pump"},
+    {"categories": ["well_pump_type"], "term": "Line Shaft", "definition": "Line Shaft"},
+    {"categories": ["well_pump_type"], "term": "Hand", "definition": "Hand Pump"},
+    {"categories": ["permission_type"], "term": "Water Level Sample", "definition": "Permissions for taking water level samples"},
+    {"categories": ["permission_type"], "term": "Water Chemistry Sample", "definition": "Permissions for water taking chemistry samples"},
+    {"categories": ["permission_type"], "term": "Datalogger Installation", "definition": "Permissions for installing dataloggers"}
   ]
 }
\ No newline at end of file
diff --git a/db/__init__.py b/db/__init__.py
index 5a58441f8..4a0fc8e70 100644
--- a/db/__init__.py
+++ b/db/__init__.py
@@ -33,7 +33,7 @@
 from db.notes import *
 from db.observation import *
 from db.parameter import *
-from db.permission import *
+from db.permission_history import *
 from db.publication import *
 from db.regulatory_limit import *
 from db.sample import *
@@ -43,6 +43,11 @@
 from db.transducer import *
 from db.measuring_point_history import *
 from db.data_provenance import *
+from db.aquifer_system import *
+from db.geologic_formation import *
+from db.thing_aquifer_association import *
+from db.thing_geologic_formation_association import *
+from db.aquifer_type import *
 
 from sqlalchemy import (
     func,
diff --git a/db/aquifer_system.py b/db/aquifer_system.py
new file mode 100644
index 000000000..c202d77c9
--- /dev/null
+++ b/db/aquifer_system.py
@@ -0,0 +1,84 @@
+"""
+SQLAlchemy model for the AquiferSystem table.
+
+This is a master reference table for aquifer systems and hydrogeologic units.
+"""
+
+from typing import List, TYPE_CHECKING
+
+from sqlalchemy import Text, Index
+from sqlalchemy.orm import relationship, Mapped, mapped_column
+from sqlalchemy.ext.associationproxy import association_proxy, AssociationProxy
+from geoalchemy2 import Geometry
+
+from db.base import Base, AutoBaseMixin, ReleaseMixin
+from db.lexicon import lexicon_term
+
+from constants import SRID_WGS84
+
+if TYPE_CHECKING:
+    from db.thing import WellScreen, ThingAquiferAssociation, Thing
+    from db.aquifer_type import AquiferType
+
+
+class AquiferSystem(Base, AutoBaseMixin, ReleaseMixin):
+    __versioned__ = {}
+
+    name: Mapped[str] = mapped_column(
+        nullable=False,
+        unique=True,
+        comment="The full, human-readable name of the aquifer system (e.g., 'Ogallala Aquifer').",
+    )
+    description: Mapped[str] = mapped_column(
+        Text,
+        nullable=True,
+        comment="A detailed description of the aquifer system, its characteristics, and its significance.",
+    )
+    # Lexicon terms were retrieved from NMAquifer's 'LU_AquiferType' table.
+    primary_aquifer_type: Mapped[str] = lexicon_term(
+        nullable=False,
+        comment="A controlled vocabulary field to classify the aquifer system as a whole (e.g., 'Unconfined', 'Confined', 'Perched').",
+    )
+    geographic_scale: Mapped[str] = lexicon_term(
+        nullable=True,
+        comment="A controlled vocabulary field to classify the aquifer's geographic scale (e.g., 'Major', 'Regional', 'Local').",
+    )
+    boundary: Mapped[Geometry] = mapped_column(
+        Geometry(geometry_type="MULTIPOLYGON", srid=SRID_WGS84, spatial_index=True),
+        nullable=True,
+        comment="A spatial representation of the aquifer system's boundary.",
+    )
+    # Hierarchical relationship fields (may be implemented in future iterations)
+    # Example: High Plains Aquifer (parent) contains Ogallala Aquifer (child)
+    # parent_id = Column(Integer, ForeignKey('aquifer_system.id'))
+    # parent = relationship('AquiferSystem', remote_side=[id], backref='subsystems')
+
+    # --- Relationships ---
+    # One-To-Many: An AquiferSystem can be associated with many wells (Things) via the ThingAquiferAssociation join table.
+    thing_associations: Mapped[List["ThingAquiferAssociation"]] = relationship(
+        "ThingAquiferAssociation",
+        back_populates="aquifer_system",
+        cascade="all, delete-orphan",
+        passive_deletes=True,
+    )
+
+    # One-To-Many: An AquiferSystem can be the target for many individual WellScreens.
+    well_screens: Mapped[List["WellScreen"]] = relationship(
+        "WellScreen",
+        back_populates="aquifer_system",
+        cascade="all, delete-orphan",
+        passive_deletes=True,
+    )
+
+    # --- Association Proxies ---
+    # Proxy to directly access Things (wells) associated with this AquiferSystem.
+    things: AssociationProxy[List["Thing"]] = association_proxy(
+        "thing_associations", "thing"
+    )
+    # Proxy to directly access all AquiferTypes associated with this AquiferSystem.
+    aquifer_types: AssociationProxy[List["AquiferType"]] = association_proxy(
+        "thing_associations", "aquifer_types"
+    )
+
+    # --- Table Arguments ---
+    __table_args__ = (Index("ix_aquifersystem_name", "name"),)
diff --git a/db/aquifer_type.py b/db/aquifer_type.py
new file mode 100644
index 000000000..32900d801
--- /dev/null
+++ b/db/aquifer_type.py
@@ -0,0 +1,58 @@
+"""
+SQLAlchemy model for the AquiferType table.
+
+This table stores the specific aquifer characteristics/types associated with
+a Thing-AquiferSystem relationship. It allows capturing that a single aquifer
+can have multiple characteristics simultaneously.
+
+Example:
+    A well in the "Ogallala" aquifer might tap portions that are both
+    "Fractured" AND "Confined". This would create:
+    - One AquiferSystem: "Ogallala"
+    - One ThingAquiferAssociation: linking well to Ogallala
+    - Two AquiferType records: "Fractured" and "Confined"
+"""
+
+from typing import TYPE_CHECKING
+
+from sqlalchemy import ForeignKey
+from sqlalchemy.orm import relationship, Mapped, mapped_column
+
+from db.base import Base, AutoBaseMixin, ReleaseMixin, lexicon_term
+
+if TYPE_CHECKING:
+    from db.thing_aquifer_association import ThingAquiferAssociation
+
+
+class AquiferType(Base, AutoBaseMixin, ReleaseMixin):
+    """
+    Represents the specific aquifer types/characteristics for a
+    Thing-AquiferSystem association.
+
+    This allows modeling the fact that:
+    - A single aquifer can have multiple characteristics
+    - Different wells may tap different characteristics of the same aquifer
+    - Characteristics are attributes of the relationship, not the aquifer itself
+
+    Fields from WellData CSV:
+        - AquiferType: May contain multiple codes (e.g., "FC" = Fractured + Confined)
+        - Each code becomes a separate AquiferType record
+    """
+
+    # --- Columns ---
+    thing_aquifer_association_id: Mapped[int] = mapped_column(
+        ForeignKey("thing_aquifer_association.id", ondelete="CASCADE"),
+        nullable=False,
+        comment="Links to the Thing-Aquifer association this type describes.",
+    )
+    aquifer_type: Mapped[str] = lexicon_term(
+        nullable=False,
+        comment="Controlled vocabulary for aquifer hydrologic properties. "
+        "Examples: 'Unconfined', 'Confined', 'Perched', 'Fractured', 'Unconsolidated'.",
+    )
+
+    # --- Relationships ---
+    # Many-to-One: Multiple aquifer types can belong to one association
+    thing_aquifer_association: Mapped["ThingAquiferAssociation"] = relationship(
+        "ThingAquiferAssociation", back_populates="aquifer_types"
+    )
diff --git a/db/contact.py b/db/contact.py
index 7855814fb..558724df9 100644
--- a/db/contact.py
+++ b/db/contact.py
@@ -26,7 +26,7 @@
     from db.field import FieldEventParticipant, FieldEvent
     from db.thing import Thing
     from db.publication import Author, AuthorContactAssociation
-    from db.permission import Permission
+    from db.permission_history import PermissionHistory
 
 
 class ThingContactAssociation(Base, AutoBaseMixin):
@@ -74,8 +74,10 @@ class Contact(Base, AutoBaseMixin, ReleaseMixin):
     )
 
     # One-To-Many: A Contact can grant many Permissions.
-    permissions: Mapped[List["Permission"]] = relationship(
-        "Permission", back_populates="contact", cascade="all, delete, delete-orphan"
+    permissions: Mapped[List["PermissionHistory"]] = relationship(
+        "PermissionHistory",
+        back_populates="contact",
+        cascade="all, delete, delete-orphan",
     )
     # One-To-Many: A Contact can be associated with many Authors (in Publications).
     author_associations: Mapped[List["AuthorContactAssociation"]] = relationship(
diff --git a/db/data_provenance.py b/db/data_provenance.py
index 06c468c8d..20505d94c 100644
--- a/db/data_provenance.py
+++ b/db/data_provenance.py
@@ -19,7 +19,7 @@
 from sqlalchemy import Integer, Index, and_
 from sqlalchemy.orm import relationship, Mapped, mapped_column, declared_attr, foreign
 
-from db.base import Base, AutoBaseMixin, ReleaseMixin, pascal_to_snake
+from db.base import Base, AutoBaseMixin, ReleaseMixin
 
 from db import lexicon_term
 
@@ -53,9 +53,13 @@ class DataProvenance(AutoBaseMixin, ReleaseMixin, Base):
     )
     # Values from the following NMAquifer tables are included as `origin_source` terms in the lexicon:
     # 'LU_DataSource', 'LU_Depth_CompletionSource'.
-    origin_source: Mapped[str] = lexicon_term(
+    origin_type: Mapped[str] = lexicon_term(
         nullable=True,
-        comment="Indicates the origin source of the data (e.g'Driller's Log', 'Well Report'.",
+        comment="Indicates the type of origin the data (e.g'Driller's Log', 'Well Report'.",
+    )
+    origin_source: Mapped[str] = mapped_column(
+        nullable=True,
+        comment="The specific source of the data (e.g., 'J. Brown Thesis, \"I like APIs\", Pomona College, 1994').",
     )
     # Values from the following NMAquifer tables are included as `collection_method` terms in the lexicon:
     # 'LU_AltitudeMethod','LU_CoordinateMethod'.
@@ -116,7 +120,7 @@ def data_provenance(cls):
             "DataProvenance",
             primaryjoin=and_(
                 cls.id == foreign(DataProvenance.target_id),
-                DataProvenance.target_table == pascal_to_snake(cls.__name__),
+                DataProvenance.target_table == cls.__tablename__,
             ),
             lazy="selectin",
             viewonly=True,
diff --git a/db/geologic_formation.py b/db/geologic_formation.py
new file mode 100644
index 000000000..2379f50f4
--- /dev/null
+++ b/db/geologic_formation.py
@@ -0,0 +1,82 @@
+"""
+SQLAlchemy model for the GeologicFormation table.
+
+This table is a master reference table for geologic formations. Its purpose is to store definitions and descriptions
+of various geologic formations that can be referenced by other tables in the database.
+"""
+
+from typing import List, TYPE_CHECKING
+
+from sqlalchemy import Text, Index
+from sqlalchemy.orm import relationship, Mapped, mapped_column
+from sqlalchemy.ext.associationproxy import association_proxy, AssociationProxy
+from geoalchemy2 import Geometry
+
+from db.base import Base, AutoBaseMixin, ReleaseMixin
+from db.lexicon import lexicon_term
+
+from constants import SRID_WGS84
+
+if TYPE_CHECKING:
+    from db.thing import Thing, WellScreen
+    from db.thing_geologic_formation_association import (
+        ThingGeologicFormationAssociation,
+    )
+
+
+class GeologicFormation(Base, AutoBaseMixin, ReleaseMixin):
+    __versioned__ = {}
+
+    # TODO: Let the API map formation codes to names using a formations.json file that can be periodically updated
+    #  from the authoritative source (.e.g USGS). A placeholder `formations.json` file had been added to the `core`
+    #  directory.
+    # name: Mapped[str] = mapped_column(
+    #     nullable=False,
+    #     unique=True,
+    #     comment="The full, human-readable name of the geologic formation (e.g., 'Navajo Sandstone').",
+    # )
+    formation_code: Mapped[str] = lexicon_term(
+        nullable=True,
+        unique=True,
+        comment="A short code or abbreviation for the geologic formation (e.g., '120ELRT').",
+    )
+    description: Mapped[str] = mapped_column(
+        Text,
+        nullable=True,
+        comment="A detailed description of the geologic formation, its characteristics, and its significance.",
+    )
+    # TODO: Implement controlled vocabularies for `lithology` using NMAquifer's 'LU_Lithology' table.
+    #  This should be implemented after AMMP reviews and cleans up their formation terms and codes.
+    lithology: Mapped[str] = lexicon_term(
+        nullable=True,
+        comment="A controlled vocabulary for the primary, dominant rock type"
+        "(e.g., 'Tuff', 'Sandstone', 'Alluvium', 'Shale').",
+    )
+    boundary: Mapped[Geometry] = mapped_column(
+        Geometry(geometry_type="MULTIPOLYGON", srid=SRID_WGS84, spatial_index=True),
+        nullable=True,
+        comment="A spatial representation of the geologic formation's extent.",
+    )
+
+    # --- Relationships ---
+    # One-To-Many (Association Object): A GeologicFormation can be associated with many Things (e.g., wells) via the
+    # ThingGeologicFormationAssociation join table.
+    thing_associations: Mapped[List["ThingGeologicFormationAssociation"]] = (
+        relationship(
+            "ThingGeologicFormationAssociation",
+            back_populates="geologic_formation",
+            cascade="all, delete-orphan",
+            passive_deletes=True,
+        )
+    )
+    # One-To-Many: A GeologicFormation can have many physical WellScreens installed in it.
+    well_screens: Mapped[List["WellScreen"]] = relationship(
+        "WellScreen", back_populates="geologic_formation", passive_deletes=True
+    )
+
+    # --- Association Proxies ---
+    # Provides direct access to Things (wells) that penetrate this formation.
+    things: AssociationProxy["Thing"] = association_proxy("thing_associations", "thing")
+
+    # --- Table Arguments ---
+    __table_args__ = (Index("ix_geologicformation_formation_code", "formation_code"),)
diff --git a/db/notes.py b/db/notes.py
index ab8384064..0e2e8ab8b 100644
--- a/db/notes.py
+++ b/db/notes.py
@@ -97,7 +97,7 @@ def notes(cls):
             "Notes",
             primaryjoin=and_(
                 cls.id == foreign(Notes.target_id),
-                Notes.target_table == cls.__name__,
+                Notes.target_table == cls.__tablename__,
             ),
             cascade="all, delete-orphan",
             lazy="selectin",
@@ -120,7 +120,7 @@ def add_note(
             content=content,
             note_type=note_type,
             target_id=self.id,
-            target_table=self.__class__.__name__,
+            target_table=self.__class__.__tablename__,
             release_status=release_status,
         )
 
diff --git a/db/permission_history.py b/db/permission_history.py
new file mode 100644
index 000000000..591046bba
--- /dev/null
+++ b/db/permission_history.py
@@ -0,0 +1,96 @@
+"""
+models/permission.py
+
+This model defines the `Permission` table, a polymorphic table that tracks
+all legal and administrative agreements related to site access and activity.
+Its purpose is to track who granted permission, what activities they authorized,
+which entity the permission applies to, and for what period of time.
+"""
+
+from typing import TYPE_CHECKING
+from datetime import date
+from sqlalchemy import Integer, ForeignKey, String, and_
+from sqlalchemy.orm import relationship, Mapped, mapped_column, declared_attr, foreign
+
+from db.base import Base, AutoBaseMixin, ReleaseMixin, lexicon_term
+
+
+if TYPE_CHECKING:
+    from db.contact import Contact
+    from db.thing import Thing
+    from db.location import Location
+
+
+class PermissionHistory(Base, AutoBaseMixin, ReleaseMixin):
+    """
+    Represents a specific grant of permission from a Contact for a
+    specific entity (e.g., a Thing or Location).
+    """
+
+    # --- Foreign Keys ---
+    contact_id: Mapped[int] = mapped_column(
+        Integer, ForeignKey("contact.id", ondelete="CASCADE"), nullable=False
+    )
+
+    # --- Columns ---
+    permission_type: Mapped[str] = lexicon_term(nullable=False)
+    permission_allowed: Mapped[bool] = mapped_column(nullable=False, default=False)
+    start_date: Mapped[date] = mapped_column(nullable=False)
+    end_date: Mapped[date] = mapped_column(nullable=True)
+    notes: Mapped[str] = mapped_column(nullable=True)
+
+    # --- Polymorphic Columns ---
+    target_id: Mapped[int] = mapped_column(nullable=False)
+    target_table: Mapped[str] = mapped_column(String(50), nullable=False)
+
+    # --- Relationships ---
+    # Many-To-One: A Permission is granted by one Contact.
+    contact: Mapped["Contact"] = relationship("Contact", back_populates="permissions")
+
+    # --- Polymorphic Parent Relationships (Internal) ---
+    # These are view-only relationships used by the 'target' property below.
+    # They tell SQLAlchemy exactly how to find the specific parent record for a given child.
+    _thing_target: Mapped["Thing"] = relationship(
+        "Thing",
+        primaryjoin="and_(foreign(PermissionHistory.target_id) == Thing.id, "
+        "PermissionHistory.target_table == 'thing')",
+        viewonly=True,
+    )
+    _location_target: Mapped["Location"] = relationship(
+        "Location",
+        primaryjoin="and_(foreign(PermissionHistory.target_id) == Location.id, "
+        "PermissionHistory.target_table == 'location')",
+        viewonly=True,
+    )
+
+    @property
+    def target(self):
+        """
+        A generic property to get the parent object (Thing, Location, etc.).
+        This is useful for simplifying application code by providing a single,
+        consistent way to access the parent of a polymorphic record.
+        """
+        return getattr(self, f"_{self.target_table}_target")
+
+
+class PermissionHistoryMixin:
+    """
+    Mixin for models that can have permissions (e.g., Thing, Location).
+    It automatically creates a polymorphic One-to-Many relationship to the
+    Permission table.
+    """
+
+    @declared_attr
+    def permission_history(cls):
+        # One-to-Many polymorphic relationship
+        return relationship(
+            "PermissionHistory",
+            primaryjoin=(
+                and_(
+                    cls.id == foreign(PermissionHistory.target_id),
+                    PermissionHistory.target_table == cls.__tablename__,
+                )
+            ),
+            lazy="selectin",
+            viewonly=True,
+        )
diff --git a/db/status_history.py b/db/status_history.py
index 8b3ee2321..15b5aec2f 100644
--- a/db/status_history.py
+++ b/db/status_history.py
@@ -19,7 +19,7 @@
 )
 from sqlalchemy.orm import Mapped, mapped_column, declared_attr, relationship, foreign
 
-from db.base import Base, AutoBaseMixin, ReleaseMixin, lexicon_term, pascal_to_snake
+from db.base import Base, AutoBaseMixin, ReleaseMixin, lexicon_term
 
 
 class StatusHistory(Base, AutoBaseMixin, ReleaseMixin):
@@ -47,7 +47,7 @@ def status_history(cls):
             "StatusHistory",
             primaryjoin=and_(
                 cls.id == foreign(StatusHistory.target_id),
-                StatusHistory.target_table == pascal_to_snake(cls.__name__),
+                StatusHistory.target_table == cls.__tablename__,
             ),
             cascade="all, delete-orphan",
             lazy="selectin",
diff --git a/db/thing.py b/db/thing.py
index 9f30d08e2..92c7bd942 100644
--- a/db/thing.py
+++ b/db/thing.py
@@ -26,8 +26,9 @@
     AutoBaseMixin,
     Base,
     ReleaseMixin,
-    PermissionMixin,
 )
+from db.permission_history import PermissionHistoryMixin
+from services.util import retrieve_latest_polymorphic_history_table_record
 from db.status_history import StatusHistoryMixin
 from db.measuring_point_history import MeasuringPointHistory
 from db.data_provenance import DataProvenanceMixin
@@ -40,6 +41,12 @@
     from db.sensor import Sensor
     from db.contact import Contact
     from db.group import Group, GroupThingAssociation
+    from db.aquifer_system import AquiferSystem
+    from db.thing_aquifer_association import ThingAquiferAssociation
+    from db.geologic_formation import GeologicFormation
+    from db.thing_geologic_formation_association import (
+        ThingGeologicFormationAssociation,
+    )
 
 
 class Thing(
@@ -47,7 +54,7 @@ class Thing(
     AutoBaseMixin,
     ReleaseMixin,
     StatusHistoryMixin,
-    PermissionMixin,
+    PermissionHistoryMixin,
     DataProvenanceMixin,
     NotesMixin,
 ):
@@ -64,10 +71,6 @@ class Thing(
         comment="To audit where the data came from in NM_Aquifer if it was transferred over",
     )
 
-    # notes = mapped_column(Text, nullable=True)
-    # measuring_notes = mapped_column(Text, nullable=True)
-    # water_notes = mapped_column(Text, nullable=True)
-
     # TODO: should `name` be unique?
     name: Mapped[str] = mapped_column(
         nullable=False,
@@ -116,6 +119,32 @@ class Thing(
 
     well_construction_notes: Mapped[str] = mapped_column(Text, nullable=True)
 
+    well_completion_date: Mapped[date] = mapped_column(
+        nullable=True, comment="the date the well was completed if known"
+    )
+    well_driller_name: Mapped[str] = mapped_column(
+        String(200), nullable=True, comment="Name of the well driller."
+    )
+    well_construction_method: Mapped[str] = lexicon_term(nullable=True)
+    well_pump_type: Mapped[str] = lexicon_term(nullable=True)
+    well_pump_depth: Mapped[float] = mapped_column(
+        Float,
+        nullable=True,
+        info={"unit": "feet below ground surface"},
+        comment="Depth of the well pump from ground surface to the pump intake (in feet).",
+    )
+    formation_completion_code: Mapped[str] = lexicon_term(
+        nullable=True,
+        comment="The geologic formation in which the well was completed (from WellData.FormationZone). "
+        "This indicates the target formation for the well, not the full stratigraphic column. "
+        "For detailed depth-interval stratigraphy, see formation_associations.",
+    )
+    # TODO: should this be required for every well in the database? AMMP review
+    is_suitable_for_datalogger: Mapped[bool] = mapped_column(
+        nullable=True,
+        comment="Indicates if the well is suitable for datalogger installation.",
+    )
+
     # Spring-related columns
     spring_type: Mapped[str] = lexicon_term(
         nullable=True,
@@ -263,6 +292,26 @@ class Thing(
         lazy="joined",
     )
 
+    # One-To-Many: A Thing can be associated with many AquiferSystems via the ThingAquiferAssociation join table.
+    aquifer_associations: Mapped[List["ThingAquiferAssociation"]] = relationship(
+        "ThingAquiferAssociation",
+        back_populates="thing",
+        cascade="all, delete-orphan",
+        passive_deletes=True,
+        lazy="joined",
+    )
+
+    # Many-To-Many: A Thing can penetrate many GeologicFormations.
+    formation_associations: Mapped[List["ThingGeologicFormationAssociation"]] = (
+        relationship(
+            "ThingGeologicFormationAssociation",
+            back_populates="thing",
+            cascade="all, delete-orphan",
+            passive_deletes=True,
+            lazy="joined",
+        )
+    )
+
     # --- Association Proxies ---
     assets: AssociationProxy[list["Asset"]] = association_proxy(
         "asset_associations", "asset"
@@ -288,6 +337,16 @@ class Thing(
         "group_associations", "group"
     )
 
+    # Proxy to directly access AquiferSystems associated with this Thing
+    aquifer_systems: AssociationProxy[List["AquiferSystem"]] = association_proxy(
+        "aquifer_associations", "aquifer_system"
+    )
+
+    # Proxy to directly access the GeologicFormations penetrated by this Thing.
+    geologic_formations: AssociationProxy[List["GeologicFormation"]] = (
+        association_proxy("formation_associations", "geologic_formation")
+    )
+
     # Full-text search vector
     search_vector = Column(TSVectorType("name", "well_construction_notes"))
 
@@ -379,7 +438,48 @@ def measuring_point_description(self) -> str | None:
 
     @property
     def well_depth_source(self) -> str | None:
-        return self._get_data_provenance_attribute("well_depth", "origin_source")
+        return self._get_data_provenance_attribute("well_depth", "origin_type")
+
+    @property
+    def well_completion_date_source(self) -> str | None:
+        return self._get_data_provenance_attribute(
+            "well_completion_date", "origin_type"
+        )
+
+    @property
+    def well_construction_method_source(self) -> str | None:
+        return self._get_data_provenance_attribute(
+            "well_construction_method", "origin_source"
+        )
+
+    @property
+    def aquifers(self) -> List[dict]:
+        """
+        Returns a list of aquifer systems and their associated types for this Thing.
+        Each aquifer system is represented as a dictionary with its name and a list of types.
+        """
+        aquifer_list = []
+        for association in self.aquifer_associations:
+            aquifer_info = {
+                "aquifer_system": association.aquifer_system.name,
+                "aquifer_types": [
+                    atype.aquifer_type for atype in association.aquifer_types
+                ],
+            }
+            aquifer_list.append(aquifer_info)
+        return aquifer_list
+
+    @property
+    def permissions(self) -> list:
+        """
+        Returns the associated permissions or an empty list. If there are no
+        associated permissions, an empty list is returned instead of None to
+        allow the API to serialize correctly (see schemas/thing.py).
+        """
+        if self.permission_history:
+            return self.permission_history
+        else:
+            return []
 
 
 class ThingIdLink(Base, AutoBaseMixin, ReleaseMixin):
@@ -406,6 +506,12 @@ class WellScreen(Base, AutoBaseMixin, ReleaseMixin):
     thing_id: Mapped[int] = mapped_column(
         ForeignKey("thing.id", ondelete="CASCADE"), nullable=False
     )
+    aquifer_system_id: Mapped[int] = mapped_column(
+        ForeignKey("aquifer_system.id", ondelete="SET NULL"), nullable=True
+    )
+    geologic_formation_id: Mapped[int] = mapped_column(
+        ForeignKey("geologic_formation.id", ondelete="SET NULL"), nullable=True
+    )
     screen_depth_top: Mapped[float] = mapped_column(
         info={"unit": "feet below ground surface"}, nullable=True
     )
@@ -423,6 +529,14 @@ class WellScreen(Base, AutoBaseMixin, ReleaseMixin):
     # Many-To-One: A WellScreen belongs to one Thing.
     thing: Mapped["Thing"] = relationship("Thing", back_populates="screens")
 
+    aquifer_system: Mapped["AquiferSystem"] = relationship(
+        "AquiferSystem", back_populates="well_screens", passive_deletes=True
+    )
+
+    geologic_formation: Mapped["GeologicFormation"] = relationship(
+        "GeologicFormation", back_populates="well_screens", passive_deletes=True
+    )
+
 
 class WellPurpose(Base, AutoBaseMixin, ReleaseMixin):
     """
diff --git a/db/thing_aquifer_association.py b/db/thing_aquifer_association.py
new file mode 100644
index 000000000..cca5758a9
--- /dev/null
+++ b/db/thing_aquifer_association.py
@@ -0,0 +1,51 @@
+"""
+SQLAlchemy model for the ThingAquiferAssociation table.
+
+This table is a join table (or "association object") whose purpose is to manage
+the many-to-many relationship between a Thing and an AquiferSystem.
+"""
+
+from typing import TYPE_CHECKING
+
+from sqlalchemy import ForeignKey
+
+from sqlalchemy.orm import relationship, Mapped, mapped_column
+
+from db.base import Base, AutoBaseMixin, ReleaseMixin
+
+if TYPE_CHECKING:
+    from db.thing import Thing
+    from db.aquifer_system import AquiferSystem
+    from db.aquifer_type import AquiferType
+
+
+class ThingAquiferAssociation(Base, AutoBaseMixin, ReleaseMixin):
+    """
+    Represents the association of a Thing to an AquiferSystem. This is an Association Object.
+    """
+
+    thing_id: Mapped[int] = mapped_column(
+        ForeignKey("thing.id", ondelete="CASCADE"), nullable=False
+    )
+    aquifer_system_id: Mapped[int] = mapped_column(
+        ForeignKey("aquifer_system.id", ondelete="CASCADE"), nullable=False
+    )
+
+    # --- Relationship Definitions ---
+    # Many-To-One: This association links to one Thing.
+    thing: Mapped["Thing"] = relationship(
+        "Thing", back_populates="aquifer_associations", lazy="joined"
+    )
+
+    # Many-To-One: This association links to one AquiferSystem.
+    aquifer_system: Mapped["AquiferSystem"] = relationship(
+        "AquiferSystem", back_populates="thing_associations", lazy="joined"
+    )
+    # One-To-Many: An association can have multiple aquifer types.
+    aquifer_types: Mapped[list["AquiferType"]] = relationship(
+        "AquiferType",
+        back_populates="thing_aquifer_association",
+        cascade="all, delete-orphan",
+        passive_deletes=True,
+        lazy="joined",
+    )
diff --git a/db/thing_geologic_formation_association.py b/db/thing_geologic_formation_association.py
new file mode 100644
index 000000000..0707df269
--- /dev/null
+++ b/db/thing_geologic_formation_association.py
@@ -0,0 +1,60 @@
+"""
+SQLAlchemy model for the ThingGeologicFormationAssociation table.
+
+This table is an association object that creates a many-to-many relationship between a Thing (well) and a
+GeologicFormation. It stores the lithology for a well, detailing the depth intervals for each formation it penetrates.
+"""
+
+from typing import TYPE_CHECKING
+
+from sqlalchemy import ForeignKey
+from sqlalchemy.orm import relationship, Mapped, mapped_column
+
+from db.base import Base, AutoBaseMixin, ReleaseMixin
+
+if TYPE_CHECKING:
+    from db.thing import Thing
+    from db.geologic_formation import GeologicFormation
+
+
+class ThingGeologicFormationAssociation(Base, AutoBaseMixin, ReleaseMixin):
+    """
+    This is a= join table (Association Object). It represents the association of a Thing to a
+    GeologicFormation at a specific depth interval.
+    """
+
+    # --- Foreign Keys ---
+    thing_id: Mapped[int] = mapped_column(
+        ForeignKey("thing.id", ondelete="CASCADE"),
+        nullable=False,
+        comment="The foreign key linking this record to the `Thing` table."
+        "Deleting a `Thing` will cascade and delete its formation log.",
+    )
+    geologic_formation_id: Mapped[int] = mapped_column(
+        ForeignKey("geologic_formation.id", ondelete="SET NULL"),
+        nullable=True,
+        comment="The foreign key linking this record to the `GeologicFormation` table."
+        "This is set to `SET NULL` on delete, as deleting a formation definition (a rare admin action)"
+        "should not delete the historical fact that a well had a pick at this depth.",
+    )
+
+    # Depth interval fields
+    top_depth: Mapped[float] = mapped_column(
+        nullable=False,
+        comment="The depth (in feet) to the top of the geologic formation, as measured from ground surface.",
+    )
+    bottom_depth: Mapped[float] = mapped_column(
+        nullable=False,
+        comment="The depth (in feet) to the bottom of the geologic formation, as measured from ground surface.",
+    )
+
+    # --- Relationship Definitions ---
+    # Many-To-One: This association links to one Thing.
+    thing: Mapped["Thing"] = relationship(
+        "Thing", back_populates="formation_associations", lazy="joined"
+    )
+
+    # Many-To-One: This association links to one GeologicFormation.
+    geologic_formation: Mapped["GeologicFormation"] = relationship(
+        "GeologicFormation", back_populates="thing_associations", lazy="joined"
+    )
diff --git a/schemas/aquifer_system.py b/schemas/aquifer_system.py
new file mode 100644
index 000000000..1e1961873
--- /dev/null
+++ b/schemas/aquifer_system.py
@@ -0,0 +1,51 @@
+from typing import List
+
+from pydantic import BaseModel
+from schemas import BaseResponseModel
+from schemas.validators import GeometryMixin
+from core.enums import AquiferType, GeographicScale  # Import specific Enums
+
+
+# ------ CREATE ----------
+class CreateAquiferSystem(GeometryMixin):
+    """
+    Schema for creating an aquifer system.
+    Used during data transfer and API creation.
+    """
+
+    name: str
+    description: str | None = None
+    primary_aquifer_type: AquiferType
+    geographic_scale: GeographicScale | None = None
+    # boundary field inherited from GeometryMixin
+
+
+# ------ RESPONSE ----------
+class GeoJSONGeometry(BaseModel):
+    """
+    Geometry schema for GeoJSON response.
+    """
+
+    type: str = "MULTIPOLYGON"
+    coordinates: List[List[List[float]]]
+
+
+class GeoJSONProperties(BaseResponseModel):
+    """
+    Response schema for aquifer system details.
+    """
+
+    name: str
+    description: str | None = None
+    primary_aquifer_type: AquiferType
+    geographic_scale: GeographicScale | None
+
+
+class AquiferSystemGeoJSONResponse(BaseModel):
+    """
+    Response schema for aquifer system details.
+    """
+
+    type: str = "Feature"
+    geometry: GeoJSONGeometry
+    properties: GeoJSONProperties
diff --git a/schemas/geologic_formation.py b/schemas/geologic_formation.py
new file mode 100644
index 000000000..67a3cb24a
--- /dev/null
+++ b/schemas/geologic_formation.py
@@ -0,0 +1,88 @@
+from typing import List
+
+from pydantic import BaseModel, field_validator, Field
+
+from schemas import BaseResponseModel
+from schemas.validators import DepthIntervalMixin, GeometryMixin
+from core.enums import FormationCode, Lithology
+
+
+# ------ CREATE ----------
+class CreateGeologicFormation(GeometryMixin):
+    """
+    Schema for creating a geologic formation.
+    Used during data transfer and API creation.
+    """
+
+    # formation_code has its own custom uppercase validator
+    formation_code: FormationCode | None = None
+    description: str | None = None
+    lithology: Lithology | None = None
+    # boundary: inherited from GeometryMixin
+
+    @field_validator("formation_code", mode="before")
+    @classmethod
+    def upper_case_code(cls, v: str | None) -> str | None:
+        """
+        Automatically uppercase the formation code.
+        """
+        if isinstance(v, str):
+            return v.upper()
+        return v
+
+
+class CreateThingGeologicFormationAssociation(DepthIntervalMixin):
+    """
+    Schema for linking a Thing (Well) to a GeologicFormation.
+    Uses DepthIntervalMixin to enforce bottom_depth > top_depth.
+    """
+
+    thing_id: int
+    geologic_formation_id: int
+    top_depth: float = Field(ge=0)
+    bottom_depth: float = Field(ge=0)
+
+
+# ------ RESPONSE ----------
+class GeoJSONGeometry(BaseModel):
+    """
+    Geometry schema for GeoJSON response.
+    """
+
+    type: str = "MULTIPOLYGON"
+    coordinates: List[List[List[float]]]
+
+
+class GeoJSONProperties(BaseResponseModel):
+    """
+    Response schema for geologic formation details.
+    """
+
+    formation_code: str | None = None
+    description: str | None = None
+    lithology: str | None = None
+
+
+class GeologicFormationGeoJSONResponse(BaseModel):
+    """
+    Response schema for geologic formation details.
+    """
+
+    type: str = "Feature"
+    geometry: GeoJSONGeometry
+    properties: GeoJSONProperties
+
+
+class ThingGeologicFormationAssociationResponse(BaseResponseModel):
+    """
+    Response schema for the association between a Thing and a GeologicFormation.
+    Includes depth interval information.
+    """
+
+    thing_id: int
+    geologic_formation_id: int | None = None
+    geologic_formation: GeologicFormationGeoJSONResponse | None = None
+    top_depth: float
+    top_depth_unit: str = "ft"
+    bottom_depth: float
+    bottom_depth_unit: str = "ft"
diff --git a/schemas/permission_history.py b/schemas/permission_history.py
new file mode 100644
index 000000000..e0619d90e
--- /dev/null
+++ b/schemas/permission_history.py
@@ -0,0 +1,18 @@
+from pydantic import BaseModel
+from schemas import PastOrTodayDate
+
+from core.enums import PermissionType
+
+
+# ------ RESPONSE ----------
+class PermissionHistoryResponse(BaseModel):
+    """
+    Even though permission_allowed and start_date are not-nullable in the
+    database, they are nullable here to accommodate cases where no permission
+    record exists for a given permission type.
+    """
+
+    permission_type: PermissionType
+    permission_allowed: bool | None
+    start_date: PastOrTodayDate | None
+    end_date: PastOrTodayDate | None
diff --git a/schemas/thing.py b/schemas/thing.py
index cf8c3ef2b..7a7982494 100644
--- a/schemas/thing.py
+++ b/schemas/thing.py
@@ -24,12 +24,17 @@
     ScreenType,
     Organization,
     MonitoringFrequency,
+    Organization,
+    MonitoringFrequency,
+    WellConstructionMethod,
+    WellPumpType,
+    FormationCode,
 )
 from schemas import BaseCreateModel, BaseUpdateModel, BaseResponseModel, PastOrTodayDate
 from schemas.group import GroupResponse
 from schemas.location import LocationGeoJSONResponse
 from schemas.notes import NoteResponse, CreateNote
-
+from schemas.permission_history import PermissionHistoryResponse
 
 # -------- VALIDATE ----------
 
@@ -128,8 +133,16 @@ class CreateWell(CreateBaseThing, ValidateWell):
     measuring_point_height: float = Field(
         ge=0, description="Measuring point height in feet"
     )
-    measuring_point_description: str | None
+    measuring_point_description: str | None = None
     notes: list[CreateNote] | None = None
+    well_completion_date: PastOrTodayDate | None = None
+    well_completion_date_source: str | None = None
+    well_driller_name: str | None = None
+    well_construction_method: WellConstructionMethod | None = None
+    well_construction_method_source: str | None = None
+    well_pump_type: WellPumpType | None = None
+    is_suitable_for_datalogger: bool | None
+    formation_completion_code: FormationCode | None = None
 
 
 class CreateSpring(CreateBaseThing):
@@ -146,6 +159,8 @@ class CreateWellScreen(BaseCreateModel):
     """
 
     thing_id: int
+    aquifer_system_id: int | None = None
+    geologic_formation_id: int | None = None
     screen_depth_bottom: float = Field(gt=0, description="Screen depth bottom in feet")
     screen_depth_top: float = Field(gt=0, description="Screen depth top in feet")
     screen_type: ScreenType | None = None
@@ -220,14 +235,25 @@ class WellResponse(BaseThingResponse):
     well_casing_depth_unit: str = "ft"
     well_casing_materials: list[CasingMaterial] = []
     well_construction_notes: str | None = None
+    well_completion_date: PastOrTodayDate | None
+    well_completion_date_source: str | None
+    well_driller_name: str | None
+    well_construction_method: WellConstructionMethod | None
+    well_construction_method_source: str | None
+    well_pump_type: WellPumpType | None
+    well_pump_depth: float | None
+    well_pump_depth_unit: str = "ft"
+    is_suitable_for_datalogger: bool | None
     well_status: str | None
     measuring_point_height: float
     measuring_point_height_unit: str = "ft"
     measuring_point_description: str | None
-
+    aquifers: list[dict] = []
     water_notes: list[NoteResponse] | None = None
     measuring_notes: list[NoteResponse] | None = None
     general_notes: list[NoteResponse] | None = None
+    permissions: list[PermissionHistoryResponse]
+    formation_completion_code: FormationCode | None
 
     @field_validator("well_purposes", mode="before")
     def populate_well_purposes_with_strings(cls, well_purposes):
@@ -248,6 +274,43 @@ def populate_well_casing_materials_with_strings(cls, well_casing_materials):
             materials = []
         return materials
 
+    @field_validator("permissions", mode="before")
+    def populate_permission_history_with_latest_records(cls, permissions):
+        """
+        Populate the permission history with the latest records for each
+        type of permission. If multiple records exist for the same permission type
+        only the most recent one is included. If there are no records
+        the permission_allowed will be None
+        """
+        permissions_to_return = []
+        for permission_type in [
+            "Water Level Sample",
+            "Water Chemistry Sample",
+            "Datalogger Installation",
+        ]:
+            # Filter records for the current permission type
+            filtered_records = [
+                record
+                for record in permissions
+                if record.permission_type == permission_type and record.end_date is None
+            ]
+            if filtered_records:
+                # Get the most recent record based on start_date
+                latest_record = max(
+                    filtered_records, key=lambda record: record.start_date
+                )
+                permissions_to_return.append(latest_record)
+            else:
+                permissions_to_return.append(
+                    PermissionHistoryResponse(
+                        permission_type=permission_type,
+                        permission_allowed=None,
+                        start_date=None,
+                        end_date=None,
+                    )
+                )
+        return permissions_to_return
+
 
 class SpringResponse(BaseThingResponse):
     """
@@ -269,6 +332,11 @@ class WellScreenResponse(BaseResponseModel):
 
     thing_id: int
     thing: WellResponse
+    aquifer_system_id: int | None = None
+    aquifer_system: str | None = None
+    aquifer_type: str | None = None
+    geologic_formation_id: int | None = None
+    geologic_formation: str | None = None
     screen_depth_bottom: float
     screen_depth_bottom_unit: str = "ft"
     screen_depth_top: float
@@ -276,6 +344,24 @@ class WellScreenResponse(BaseResponseModel):
     screen_type: str | None = None
     screen_description: str | None = None
 
+    @field_validator("aquifer_system", mode="before")
+    def populate_aquifer_system_with_name(cls, aquifer_system):
+        if aquifer_system is not None:
+            return aquifer_system.name
+        return None
+
+    @field_validator("aquifer_type", mode="before")
+    def populate_aquifer_type_with_name(cls, aquifer_type):
+        if aquifer_type is not None:
+            return aquifer_type.name
+        return None
+
+    @field_validator("geologic_formation", mode="before")
+    def populate_geologic_formation_with_code(cls, geologic_formation):
+        if geologic_formation is not None:
+            return geologic_formation.formation_code
+        return None
+
 
 class GeoJSONGeometry(BaseModel):
     """
@@ -342,6 +428,8 @@ class UpdateThingIdLink(BaseUpdateModel):
 
 
 class UpdateWellScreen(BaseUpdateModel):
+    aquifer_system_id: int | None = None
+    geologic_formation_id: int | None = None
     screen_depth_bottom: float | None = None
     screen_depth_top: float | None = None
     screen_description: str | None = None
diff --git a/schemas/validators.py b/schemas/validators.py
new file mode 100644
index 000000000..963047bc2
--- /dev/null
+++ b/schemas/validators.py
@@ -0,0 +1,43 @@
+"""
+schemas/validators.py
+Reusable Pydantic validators and mixins for aquifer and geology related schemas.
+May consider expansion for other domain models in the future.
+"""
+
+from pydantic import model_validator, field_validator, BaseModel, Field
+from services.validation.geospatial import validate_wkt_geometry
+
+
+class DepthIntervalMixin(BaseModel):
+    """
+    Mixin to enforce:
+    1. Depths are non-negative (via Field constraints).
+    2. Bottom depth > top depth (via model_validator).
+    Assumes the model has 'top_depth' and 'bottom_depth' fields.
+    """
+
+    top_depth: float = Field(ge=0)
+    bottom_depth: float = Field(ge=0)
+
+    @model_validator(mode="after")
+    def check_depth_logical_order(self) -> "DepthIntervalMixin":
+        if self.bottom_depth <= self.top_depth:
+            raise ValueError(
+                f"Bottom depth ({self.bottom_depth}) must be greater "
+                f"than top depth ({self.top_depth})"
+            )
+        return self
+
+
+class GeometryMixin(BaseModel):
+    """
+    Mixin to validate WKT strings for boundary fields.
+    Delegates logic to the validate_wkt_geometry service function.
+    """
+
+    boundary: str | None = None
+
+    @field_validator("boundary")
+    @classmethod
+    def validate_wkt(cls, v: str | None) -> str | None:
+        return validate_wkt_geometry(v)
diff --git a/services/thing_helper.py b/services/thing_helper.py
index 53ce54577..fdd0424db 100644
--- a/services/thing_helper.py
+++ b/services/thing_helper.py
@@ -13,6 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ===============================================================================
+from datetime import datetime
+from zoneinfo import ZoneInfo
+
 from fastapi import Request
 from fastapi_pagination.ext.sqlalchemy import paginate
 from pydantic import BaseModel
@@ -32,6 +35,7 @@
     WellCasingMaterial,
 )
 from db.group import GroupThingAssociation
+from db.measuring_point_history import MeasuringPointHistory
 from services.audit_helper import audit_add
 from services.crud_helper import model_patcher
 from services.exceptions_helper import PydanticStyleException
@@ -159,6 +163,10 @@ def add_thing(
     location_id = data.pop("location_id", None)
     group_id = data.pop("group_id", None)
 
+    # Extract measuring point data (stored in separate history table, not as Thing columns)
+    measuring_point_height = data.pop("measuring_point_height", None)
+    measuring_point_description = data.pop("measuring_point_description", None)
+
     try:
         thing = Thing(**data)
         thing.thing_type = thing_type
@@ -169,6 +177,18 @@ def add_thing(
         session.flush()
         session.refresh(thing)
 
+        # Create MeasuringPointHistory record if measuring_point_height provided
+        if measuring_point_height is not None:
+            measuring_point_history = MeasuringPointHistory(
+                thing_id=thing.id,
+                measuring_point_height=measuring_point_height,
+                measuring_point_description=measuring_point_description,
+                start_date=datetime.now(tz=ZoneInfo("UTC")),
+                end_date=None,
+            )
+            audit_add(user, measuring_point_history)
+            session.add(measuring_point_history)
+
         # endpoint catches ProgrammingError if location_id or group_id do not exist
         if group_id:
             assoc = GroupThingAssociation()
diff --git a/services/util.py b/services/util.py
index 313a922ec..e9ec08a94 100644
--- a/services/util.py
+++ b/services/util.py
@@ -1,13 +1,14 @@
 import json
 import os
-
-import httpx
-import pyproj
 from shapely.ops import transform
+import pyproj
+import httpx
+from sqlalchemy.orm import DeclarativeBase
 from sqlalchemy.orm import DeclarativeBase
 
 from constants import SRID_WGS84
 
+
 TRANSFORMERS = {}
 METERS_TO_FEET = 3.28084
 
@@ -59,6 +60,13 @@ def convert_ft_to_m(feet: float | None) -> float | None:
     return round(feet / METERS_TO_FEET, 6)
 
 
+def convert_m_to_ft(meters: float | None) -> float | None:
+    """Convert a length from meters to feet."""
+    if meters is None:
+        return None
+    return round(meters * METERS_TO_FEET, 6)
+
+
 def get_tiger_data(
     lon: float, lat: float, layer: int, outfields: str = "*"
 ) -> dict | None:
@@ -197,11 +205,10 @@ def retrieve_latest_polymorphic_history_table_record(
     DeclarativeBase | None
         The latest record from the specified polymorphic table with the defined type if it exists.
     """
-    if polymorphic_relationship == "permissions":
+    if polymorphic_relationship == "permission_history":
         type_field = "permission_type"
     elif polymorphic_relationship == "status_history":
         type_field = "status_type"
-
     polymorphic_records = getattr(target_record, polymorphic_relationship)
     type_polymorphic_records = [
         r
diff --git a/tests/features/environment.py b/tests/features/environment.py
index 9b801e9d7..afbc2d13c 100644
--- a/tests/features/environment.py
+++ b/tests/features/environment.py
@@ -28,12 +28,20 @@
     Parameter,
     Deployment,
     TransducerObservationBlock,
+    WellCasingMaterial,
+    PermissionHistory,
+    Contact,
     StatusHistory,
     ThingIdLink,
     WellPurpose,
     MeasuringPointHistory,
     MonitoringFrequencyHistory,
     DataProvenance,
+    AquiferSystem,
+    AquiferType,
+    ThingAquiferAssociation,
+    GeologicFormation,
+    ThingGeologicFormationAssociation,
 )
 from db.engine import session_ctx
 
@@ -87,9 +95,13 @@ def add_well(context, session, location, name_num):
         well_construction_notes="Test well construction notes",
         well_casing_diameter=5.0,
         well_casing_depth=10.0,
-        # notes="These are some test well notes",
-        # measuring_notes="These are some measuring notes",
-        # water_notes="This are some water notes",
+        well_completion_date="2013-05-15",
+        well_driller_name="Jonsi",
+        well_construction_method="Driven",
+        well_pump_type="Submersible",
+        well_pump_depth=8,
+        is_suitable_for_datalogger=True,
+        formation_completion_code="000EXRV",
     )
 
     session.add(well)
@@ -116,6 +128,20 @@ def add_well(context, session, location, name_num):
     return well
 
 
+@add_context_object_container("well_casing_materials")
+def add_well_casing_material(context, session, well):
+    wcm = WellCasingMaterial(
+        thing_id=well.id,
+        material="PVC",
+    )
+    session.add(wcm)
+    session.commit()
+    session.refresh(wcm)
+
+    context.objects["well_casing_materials"].append(wcm)
+    return wcm
+
+
 @add_context_object_container("well_purposes")
 def add_well_purpose(context, session, well, purpose_term):
     purpose = WellPurpose(thing=well, purpose=purpose_term)
@@ -189,6 +215,54 @@ def add_spring(context, session, location, name_num):
     return spring
 
 
+@add_context_object_container("contacts")
+def add_contact(context, session):
+    contact = Contact(
+        name="Test Contact",
+        role="Software Developer",
+        organization="NMBGMR",
+        release_status="draft",
+        contact_type="Primary",
+    )
+    session.add(contact)
+    session.commit()
+    session.refresh(contact)
+
+    context.objects["contacts"].append(contact)
+    return contact
+
+
+@add_context_object_container("permission_histories")
+def add_permission_history(
+    context,
+    session,
+    contact_id,
+    permission_type,
+    permission_allowed,
+    start_date,
+    end_date,
+    notes,
+    target_id,
+    target_table,
+):
+    permission_history = PermissionHistory(
+        contact_id=contact_id,
+        permission_type=permission_type,
+        permission_allowed=permission_allowed,
+        start_date=start_date,
+        end_date=end_date,
+        notes=notes,
+        target_id=target_id,
+        target_table=target_table,
+    )
+    session.add(permission_history)
+    session.commit()
+    session.refresh(permission_history)
+
+    context.objects["permission_histories"].append(permission_history)
+    return permission_history
+
+
 @add_context_object_container("sensors")
 def add_sensor(context, session):
     sensor = Sensor(
@@ -317,7 +391,8 @@ def add_data_provenance(
     target_id,
     target_table,
     field_name,
-    origin_source,
+    origin_type=None,
+    origin_source=None,
     collection_method=None,
     accuracy_value=None,
     accuracy_unit=None,
@@ -327,6 +402,7 @@ def add_data_provenance(
         collection_method=collection_method,
         target_id=target_id,
         target_table=target_table,
+        origin_type=origin_type,
         origin_source=origin_source,
         accuracy_value=accuracy_value,
         accuracy_unit=accuracy_unit,
@@ -353,6 +429,71 @@ def add_transducer_observation(context, session, block, deployment_id, value):
     return obs
 
 
+@add_context_object_container("aquifer_systems")
+def add_aquifer_system(context, session, name, well):
+    aquifer_system = AquiferSystem(
+        name=name,
+        description="this is a test aquifer",
+        primary_aquifer_type="Artesian",
+        geographic_scale="Major",
+        boundary="MULTIPOLYGON(((0 0, 1 1, 2 2, 3 3, 1 2, 0 0)))",
+    )
+    session.add(aquifer_system)
+    session.commit()
+    session.refresh(aquifer_system)
+
+    context.objects["aquifer_systems"].append(aquifer_system)
+    return aquifer_system
+
+
+@add_context_object_container("thing_aquifer_associations")
+def add_thing_aquifer_association(context, session, well, aquifer_system):
+    association = ThingAquiferAssociation(thing=well, aquifer_system=aquifer_system)
+    session.add(association)
+    session.commit()
+    session.refresh(association)
+
+    context.objects["thing_aquifer_associations"].append(association)
+    return association
+
+
+@add_context_object_container("aquifer_types")
+def add_aquifer_type(context, session, aquifer_type_str, thing_aquifer_association):
+    aquifer_type = AquiferType(
+        aquifer_type=aquifer_type_str,
+        thing_aquifer_association=thing_aquifer_association,
+    )
+    session.add(aquifer_type)
+    session.commit()
+    session.refresh(aquifer_type)
+
+    context.objects["aquifer_types"].append(aquifer_type)
+    return aquifer_type
+
+
+@add_context_object_container("geologic_formations")
+def add_geologic_formation(context, session, formation_code, well):
+    formation = GeologicFormation(
+        formation_code=formation_code,
+        description="This is a test geologic formation.",
+        lithology="Peat",
+        boundary="MULTIPOLYGON(((0 0, 1 1, 2 2, 3 3, 1 2, 0 0)))",
+    )
+    session.add(formation)
+    session.commit()
+    session.refresh(formation)
+
+    association = ThingGeologicFormationAssociation(
+        top_depth=1, bottom_depth=10, thing=well, geologic_formation=formation
+    )
+    session.add(association)
+    session.commit()
+    session.refresh(association)
+
+    context.objects["geologic_formations"].append(formation)
+    return formation
+
+
 def before_all(context):
     context.objects = {}
     rebuild = False
@@ -374,133 +515,145 @@ def before_all(context):
         sensor_1 = add_sensor(context, session)
         deployment = add_deployment(context, session, well_1.id, sensor_1.id)
 
-        measuring_point_history_1 = add_measuring_point_history(
-            context, session, well=well_1
-        )
-        measuring_point_history_2 = add_measuring_point_history(
-            context, session, well=well_2
-        )
-        measuring_point_history_3 = add_measuring_point_history(
-            context, session, well=well_3
-        )
-
-        well_status_1 = add_status_history(
-            context,
-            session,
-            status_type="Well Status",
-            status_value="Active, pumping well",
-            start_date=datetime(2020, 1, 1),
-            end_date=datetime(2021, 1, 1),
-            reason="Initial status",
-            target_id=context.objects["wells"][0].id,
-            target_table="thing",
-        )
-
-        well_status_2 = add_status_history(
-            context,
-            session,
-            status_type="Well Status",
-            status_value="Destroyed, exists but not usable",
-            start_date=datetime(2021, 1, 1),
-            end_date=None,
-            reason="Roving bovine",
-            target_id=context.objects["wells"][0].id,
-            target_table="thing",
-        )
-
-        monitoring_status_1 = add_status_history(
-            context,
-            session,
-            status_type="Monitoring Status",
-            status_value="Currently monitored",
-            start_date=datetime(2020, 1, 1),
-            end_date=datetime(2021, 1, 1),
-            reason="Initial monitoring status",
-            target_id=context.objects["wells"][0].id,
-            target_table="thing",
-        )
-
-        monitoring_status_2 = add_status_history(
-            context,
-            session,
-            status_type="Monitoring Status",
-            status_value="Not currently monitored",
-            start_date=datetime(2021, 1, 1),
-            end_date=None,
-            reason="Roving bovine destroyed well",
-            target_id=context.objects["wells"][0].id,
-            target_table="thing",
-        )
-
-        monitoring_frequency_history_1 = add_monitoring_frequency_history(
-            context,
-            session,
-            well=well_1,
-            monitoring_frequency="Monthly",
-            start_date="2020-01-01",
-            end_date="2021-01-01",
-        )
-
-        monitoring_frequency_history_2 = add_monitoring_frequency_history(
-            context,
-            session,
-            well=well_1,
-            monitoring_frequency="Annual",
-            start_date="2020-01-01",
-            end_date=None,
-        )
-
-        id_link_1 = add_id_link(
-            context,
-            session,
-            thing=well_1,
-            relation="same_as",
-            alternate_id="12345678",
-            alternate_organization="USGS",
-        )
-
-        id_link_2 = add_id_link(
-            context,
-            session,
-            thing=well_1,
-            relation="same_as",
-            alternate_id="OSE-0001",
-            alternate_organization="NMOSE",
-        )
-
-        id_link_3 = add_id_link(
-            context,
-            session,
-            thing=well_1,
-            relation="same_as",
-            alternate_id="Roving Bovine Ranch Well #1",
-            alternate_organization="NMBGMR",
-        )
-
-        group = add_group(context, session, [well_1, well_2])
-
-        elevation_method = add_data_provenance(
-            context,
-            session,
-            target_id=loc_1.id,
-            target_table="location",
-            field_name="elevation",
-            origin_source="Private geologist, consultant or univ associate",
-            collection_method="LiDAR DEM",
-        )
-
-        well_depth_source = add_data_provenance(
-            context,
-            session,
-            target_id=well_1.id,
-            target_table="thing",
-            field_name="well_depth",
-            origin_source="Other",
-        )
+        add_well_casing_material(context, session, well_1)
+
+        contact = add_contact(context, session)
+
+        for permission in [
+            "Datalogger Installation",
+            "Water Level Sample",
+            "Water Chemistry Sample",
+        ]:
+            add_permission_history(
+                context,
+                session,
+                contact_id=context.objects["contacts"][0].id,
+                permission_type=permission,
+                permission_allowed=True,
+                start_date=datetime(2025, 1, 1).date(),
+                end_date=None,
+                notes=f"Permission granted for {permission.lower()}.",
+                target_id=well_1.id,
+                target_table="thing",
+            )
+
+        for well in (well_1, well_2, well_3):
+            add_measuring_point_history(context, session, well=well)
+        for value, start, end in (
+            (
+                "Active, pumping well",
+                datetime(2020, 1, 1),
+                datetime(2021, 1, 1),
+                "initial status",
+            ),
+            (
+                "Destroyed, exists but not usable",
+                datetime(2021, 1, 1),
+                None,
+                "roving bovine",
+            ),
+        ):
+            add_status_history(
+                context,
+                session,
+                status_type="Well Status",
+                status_value=value,
+                start_date=start,
+                end_date=end,
+                reason="Initial status",
+                target_id=context.objects["wells"][0].id,
+                target_table="thing",
+            )
+
+        for value, start, end in (
+            ("Currently monitored", datetime(2020, 1, 1), datetime(2021, 1, 1)),
+            ("Not currently monitored", datetime(2021, 1, 1), None),
+        ):
+            add_status_history(
+                context,
+                session,
+                status_type="Monitoring Status",
+                status_value=value,
+                start_date=start,
+                end_date=end,
+                reason="Initial monitoring status",
+                target_id=context.objects["wells"][0].id,
+                target_table="thing",
+            )
+
+        for f, start, end in (
+            ("Monthly", "2020-01-01", "2021-01-01"),
+            ("Annual", "2020-01-01", None),
+        ):
+            add_monitoring_frequency_history(
+                context,
+                session,
+                well=well_1,
+                monitoring_frequency=f,
+                start_date=start,
+                end_date=end,
+            )
+
+        for aid, aorg in (
+            ("12345678", "USGS"),
+            ("OSE-0001", "NMOSE"),
+            ("Roving Bovine Ranch Well #1", "NMBGMR"),
+        ):
+            add_id_link(
+                context,
+                session,
+                thing=well_1,
+                relation="same_as",
+                alternate_id=aid,
+                alternate_organization=aorg,
+            )
+
+        add_well_casing_material(context, session, well_1)
+
+        add_group(context, session, [well_1, well_2])
+
+        for kwargs in (
+            {
+                "target_id": loc_1.id,
+                "target_table": "location",
+                "field_name": "elevation",
+                "origin_source": "Private geologist, consultant or univ associate",
+                "collection_method": "LiDAR DEM",
+            },
+            {
+                "target_id": well_1.id,
+                "target_table": "thing",
+                "field_name": "well_depth",
+                "origin_type": "Other",
+            },
+            {
+                "target_id": well_1.id,
+                "target_table": "thing",
+                "field_name": "well_completion_date",
+                "origin_type": "Data Portal",
+            },
+            {
+                "target_id": well_1.id,
+                "target_table": "thing",
+                "field_name": "well_construction_method",
+                "origin_source": "Jacob's 2013 Thesis",
+            },
+        ):
+            add_data_provenance(context, session, **kwargs)
 
         for purpose in ["Domestic", "Irrigation"]:
             add_well_purpose(context, session, well_1, purpose)
 
+        for name in ["Aquifer A", "Aquifer B"]:
+            system = add_aquifer_system(context, session, name, well_1)
+            add_thing_aquifer_association(context, session, well_1, system)
+
+        for t in ["Artesian", "Fractured"]:
+            taa = context.objects["thing_aquifer_associations"][0]
+            add_aquifer_type(context, session, t, taa)
+
+        add_geologic_formation(context, session, "000EXRV", well_1)
+
         # parameter ID can be hardcoded because init_parameter always creates the same one
         parameter = session.get(Parameter, 1)
         block = add_block(context, session, parameter)
@@ -519,8 +672,10 @@ def before_all(context):
 def after_all(context):
     with session_ctx() as session:
         for table in context.objects.values():
-            for obj in table:
-                session.delete(obj)
+            for record in table:
+                obj = session.get(record.__class__, record.id)
+                if obj:
+                    session.delete(obj)
         session.commit()
 
 
diff --git a/tests/features/steps/well-additional-information.py b/tests/features/steps/well-additional-information.py
new file mode 100644
index 000000000..8b00f7eb7
--- /dev/null
+++ b/tests/features/steps/well-additional-information.py
@@ -0,0 +1,270 @@
+from behave import then
+
+from services.util import retrieve_latest_polymorphic_history_table_record
+
+
+# ------------------------------------------------------------------------------
+# Permissions / Operational OK flags
+# ------------------------------------------------------------------------------
+@then(
+    "the response should include whether repeat measurement permission is granted for the well"
+)
+def step_impl(context):
+    permission_type = "Water Level Sample"
+    assert "permissions" in context.water_well_data
+
+    permission_record = retrieve_latest_polymorphic_history_table_record(
+        context.objects["wells"][0], "permission_history", permission_type
+    )
+
+    water_well_data_permissions = [
+        p
+        for p in context.water_well_data["permissions"]
+        if p["permission_type"] == permission_type
+    ][0]
+    assert (
+        water_well_data_permissions["permission_type"]
+        == permission_record.permission_type
+    )
+    assert (
+        water_well_data_permissions["permission_allowed"]
+        == permission_record.permission_allowed
+    )
+    assert water_well_data_permissions[
+        "start_date"
+    ] == permission_record.start_date.strftime("%Y-%m-%d")
+    if permission_record.end_date:
+        assert water_well_data_permissions[
+            "end_date"
+        ] == permission_record.end_date.strftime("%Y-%m-%d")
+    else:
+        assert water_well_data_permissions["end_date"] is None
+
+
+@then("the response should include whether sampling permission is granted for the well")
+def step_impl(context):
+    permission_type = "Water Chemistry Sample"
+    assert "permissions" in context.water_well_data
+
+    permission_record = retrieve_latest_polymorphic_history_table_record(
+        context.objects["wells"][0], "permission_history", permission_type
+    )
+
+    water_well_data_permissions = [
+        p
+        for p in context.water_well_data["permissions"]
+        if p["permission_type"] == permission_type
+    ][0]
+    assert (
+        water_well_data_permissions["permission_type"]
+        == permission_record.permission_type
+    )
+    assert (
+        water_well_data_permissions["permission_allowed"]
+        == permission_record.permission_allowed
+    )
+    assert water_well_data_permissions[
+        "start_date"
+    ] == permission_record.start_date.strftime("%Y-%m-%d")
+    if permission_record.end_date:
+        assert water_well_data_permissions[
+            "end_date"
+        ] == permission_record.end_date.strftime("%Y-%m-%d")
+    else:
+        assert water_well_data_permissions["end_date"] is None
+
+
+@then(
+    "the response should include whether datalogger installation permission is granted for the well"
+)
+def step_impl(context):
+    permission_type = "Datalogger Installation"
+    assert "permissions" in context.water_well_data
+
+    permission_record = retrieve_latest_polymorphic_history_table_record(
+        context.objects["wells"][0], "permission_history", permission_type
+    )
+
+    water_well_data_permissions = [
+        p
+        for p in context.water_well_data["permissions"]
+        if p["permission_type"] == permission_type
+    ][0]
+    assert (
+        water_well_data_permissions["permission_type"]
+        == permission_record.permission_type
+    )
+    assert (
+        water_well_data_permissions["permission_allowed"]
+        == permission_record.permission_allowed
+    )
+    assert water_well_data_permissions[
+        "start_date"
+    ] == permission_record.start_date.strftime("%Y-%m-%d")
+    if permission_record.end_date:
+        assert water_well_data_permissions[
+            "end_date"
+        ] == permission_record.end_date.strftime("%Y-%m-%d")
+    else:
+        assert water_well_data_permissions["end_date"] is None
+
+
+# ------------------------------------------------------------------------------
+# Well Construction Information
+# ------------------------------------------------------------------------------
+
+
+@then("the response should include the completion date of the well")
+def step_impl(context):
+    assert "well_completion_date" in context.water_well_data
+    assert context.water_well_data["well_completion_date"] == context.objects["wells"][
+        0
+    ].well_completion_date.strftime("%Y-%m-%d")
+
+
+@then("the response should include the source of the completion information")
+def step_impl(context):
+    assert "well_completion_date_source" in context.water_well_data
+
+    assert (
+        context.water_well_data["well_completion_date_source"]
+        == context.objects["wells"][0].well_completion_date_source
+    )
+
+
+@then("the response should include the driller name")
+def step_impl(context):
+    assert "well_driller_name" in context.water_well_data
+    assert (
+        context.water_well_data["well_driller_name"]
+        == context.objects["wells"][0].well_driller_name
+    )
+
+
+@then("the response should include the construction method")
+def step_impl(context):
+    assert "well_construction_method" in context.water_well_data
+    assert (
+        context.water_well_data["well_construction_method"]
+        == context.objects["wells"][0].well_construction_method
+    )
+
+
+@then("the response should include the source of the construction information")
+def step_impl(context):
+    assert "well_construction_method_source" in context.water_well_data
+    assert (
+        context.water_well_data["well_construction_method_source"]
+        == context.objects["wells"][0].well_construction_method_source
+    )
+
+
+# ------------------------------------------------------------------------------
+# Additional Well Physical Properties
+# ------------------------------------------------------------------------------
+
+
+@then("the response should include the casing diameter in inches")
+def step_impl(context):
+    assert "well_casing_diameter" in context.water_well_data
+    assert "well_casing_diameter_unit" in context.water_well_data
+
+    assert (
+        context.water_well_data["well_casing_diameter"]
+        == context.objects["wells"][0].well_casing_diameter
+    )
+    assert context.water_well_data["well_casing_diameter_unit"] == "in"
+
+
+@then("the response should include the casing depth in feet below ground surface")
+def step_impl(context):
+    assert "well_casing_depth" in context.water_well_data
+    assert "well_casing_depth_unit" in context.water_well_data
+
+    assert (
+        context.water_well_data["well_casing_depth"]
+        == context.objects["wells"][0].well_casing_depth
+    )
+    assert context.water_well_data["well_casing_depth_unit"] == "ft"
+
+
+@then("the response should include the casing materials")
+def step_impl(context):
+    assert "well_casing_materials" in context.water_well_data
+    assert set(context.water_well_data["well_casing_materials"]) == {
+        m.material for m in context.objects["wells"][0].well_casing_materials
+    }
+
+
+@then("the response should include the well pump type (previously well_type field)")
+def step_impl(context):
+    assert "well_pump_type" in context.water_well_data
+    assert (
+        context.water_well_data["well_pump_type"]
+        == context.objects["wells"][0].well_pump_type
+    )
+
+
+@then("the response should include the well pump depth in feet (new field)")
+def step_impl(context):
+    assert "well_pump_depth" in context.water_well_data
+    assert "well_pump_depth_unit" in context.water_well_data
+
+    assert (
+        context.water_well_data["well_pump_depth"]
+        == context.objects["wells"][0].well_pump_depth
+    )
+    assert context.water_well_data["well_pump_depth_unit"] == "ft"
+
+
+@then(
+    "the response should include whether the well is open and suitable for a datalogger"
+)
+def step_impl(context):
+    assert "is_suitable_for_datalogger" in context.water_well_data
+    assert (
+        context.water_well_data["is_suitable_for_datalogger"]
+        == context.objects["wells"][0].is_suitable_for_datalogger
+    )
+
+
+# ------------------------------------------------------------------------------
+# Aquifer/ Geology Information
+# ------------------------------------------------------------------------------
+
+
+@then(
+    "the response should include the formation as the formation zone of well completion"
+)
+def step_impl(context):
+    assert "formation_completion_code" in context.water_well_data
+    assert (
+        context.water_well_data["formation_completion_code"]
+        == context.objects["wells"][0].formation_completion_code
+    )
+
+
+@then(
+    "the response should include the aquifer class code to classify the aquifer into aquifer system."
+)
+def step_impl(context):
+    for aquifer in context.water_well_data["aquifers"]:
+        assert "aquifer_system" in aquifer
+    assert {a.get("aquifer_system") for a in context.water_well_data["aquifers"]} == {
+        system.name for system in context.objects["aquifer_systems"]
+    }
+
+
+@then(
+    "the response should include the aquifer type as the type of aquifers penetrated by the well"
+)
+def step_impl(context):
+    for aquifer in context.water_well_data["aquifers"]:
+        assert "aquifer_types" in aquifer
+
+        if aquifer["aquifer_system"] == "Aquifer A":
+            assert set(aquifer["aquifer_types"]) == {
+                a.aquifer_type for a in context.objects["aquifer_types"]
+            }
+        else:
+            assert aquifer["aquifer_types"] == []
diff --git a/tests/features/steps/well-core-information.py b/tests/features/steps/well-core-information.py
index b0adc8346..1f56161f6 100644
--- a/tests/features/steps/well-core-information.py
+++ b/tests/features/steps/well-core-information.py
@@ -163,7 +163,7 @@ def step_impl(context):
         and r.target_table == "thing"
         and r.target_id == context.objects["wells"][0].id
     ]
-    well_depth_source = well_depth_source_records[0].origin_source
+    well_depth_source = well_depth_source_records[0].origin_type
 
     assert context.water_well_data["well_depth_source"] == well_depth_source
 
diff --git a/tests/test_thing.py b/tests/test_thing.py
index 378f72d02..5bd504718 100644
--- a/tests/test_thing.py
+++ b/tests/test_thing.py
@@ -152,6 +152,38 @@ def test_add_water_well(location, group):
     cleanup_post_test(Thing, data["id"])
 
 
+@pytest.mark.skip(
+    "This duplicates the test above. That one will need to eventually be updated"
+)
+def test_add_water_well_with_measuring_point(location, group):
+    """
+    Test creating a well with measuring_point_height and measuring_point_description.
+
+    This reproduces the bug where measuring_point fields are properties (from MeasuringPointHistory table)
+    and cannot be set directly on Thing objects.
+
+    Expected error (before fix): AttributeError: property 'measuring_point_height' of 'Thing' object has no setter
+    """
+    payload = {
+        "location_id": location.id,
+        "group_id": group.id,
+        "release_status": "draft",
+        "name": "Test Well with Measuring Point",
+        "measuring_point_height": 2.5,
+        "measuring_point_description": "top of casing",
+    }
+
+    response = client.post("/thing/water-well", json=payload)
+    assert response.status_code == 201
+    data = response.json()
+
+    assert data["name"] == payload["name"]
+    assert data["measuring_point_height"] == 2.5
+    assert data["measuring_point_description"] == "top of casing"
+
+    cleanup_post_test(Thing, data["id"])
+
+
 @pytest.mark.skip("Needs to be updated per changes made from feature files")
 def test_add_water_well_409_bad_group_id(location):
     bad_group_id = 9999
diff --git a/transfers/aquifer_system_transfer.py b/transfers/aquifer_system_transfer.py
new file mode 100644
index 000000000..a0ba1f02e
--- /dev/null
+++ b/transfers/aquifer_system_transfer.py
@@ -0,0 +1,141 @@
+import time
+from sqlalchemy.orm import Session
+from pydantic import ValidationError
+
+from db import AquiferSystem
+from schemas.aquifer_system import CreateAquiferSystem
+from transfers.util import read_csv, replace_nans, logger
+
+
+def transfer_aquifer_systems(session: Session, limit: int = None) -> tuple:
+    """
+    Transfer aquifer system data from LU_AquiferClass CSV to the database.
+
+    This creates the master list of named aquifer systems (e.g., Ogallala Aquifer). the primary_type field is set
+    to "Unknown" as a placeholder and will be updated during well transfer when we know what type each well encounters.
+
+    This should be run BEFORE well_transfer.py so that aquifer records exist for wells to reference.
+
+    Args:
+        session (Session): SQLAlchemy database session
+        limit (int, optional): Limit the number of records to transfer (for testing).
+
+    Returns:
+        tuple: (input_df, cleaned_df, errors)
+    """
+    # 1. Read the CSV file
+    input_df = read_csv("LU_AquiferClass")
+
+    # 2. Replace NaNs with NOne
+    cleaned_df = replace_nans(input_df)
+
+    # 3. Initialize tracking variables for logging
+    n = len(input_df)
+    step = 25
+    start_time = time.time()
+    errors = []
+    created_count = 0
+    skipped_count = 0
+
+    logger.info(f"Starting transfer of {n} aquifer systems from LU_AquiferClass.")
+
+    # 4. Process each row
+    for i, row in enumerate(cleaned_df.itertuples()):
+        # check if limit is reached
+        if limit and i >= limit:
+            logger.info(f"Reached limit of {limit} rows. Stopping migration.")
+            break
+
+        # Log progress every 'step' rows
+        if i and not i % step:
+            logger.info(
+                f"Processing row {i} of {n}. Avg rows per second: {step / (time.time() - start_time):.2f}"
+            )
+            start_time = time.time()
+
+            # Commit progress periodically
+            try:
+                session.commit()
+            except Exception as e:
+                logger.critical(f"Error committing aquifer system {i}: {e}")
+                session.rollback()
+                continue
+
+        # 5. Extract aquifer code and name
+        aquifer_code = row.CODE
+        aquifer_name = row.MEANING
+
+        if not aquifer_name:
+            error_msg = f"Row {i} (code: {aquifer_code}) has no aquifer name (MEANING)."
+            logger.critical(error_msg)
+            errors.append({"row": i, "code": aquifer_code, "error": error_msg})
+            skipped_count += 1
+            continue
+
+        # 6. Check if aquifer system already exists
+        existing = (
+            session.query(AquiferSystem)
+            .filter(AquiferSystem.name == aquifer_name)
+            .first()
+        )
+
+        if existing:
+            logger.info(
+                f"Aquifer '{aquifer_name}' (code: {aquifer_code}) already exists. Skipping."
+            )
+            skipped_count += 1
+            continue
+
+        # 7. Prepare data dictionary
+        try:
+            data = CreateAquiferSystem(
+                name=aquifer_name,
+                description=None,  # can be updated later
+                primary_aquifer_type="Unknown",  # placeholder - will be updated during well transfer
+            )
+
+            # Validate data using Pydantic schema
+            CreateAquiferSystem.model_validate(data)
+
+        except ValidationError as e:
+            errors.append(
+                {"code": aquifer_code, "name": aquifer_name, "error": e.errors()}
+            )
+            logger.critical(
+                f"Error creating aquifer system '{aquifer_name}' (code: {aquifer_code}) (row {i}): {e.errors()}"
+            )
+            continue
+
+        # 8. Create database record
+        aquifer_system = None
+        try:
+            aquifer_data = data.model_dump()
+            aquifer_system = AquiferSystem(**aquifer_data)
+            session.add(aquifer_system)
+            created_count += 1
+
+            logger.info(
+                f"Created aquifer system: {aquifer_system.name} (code: {aquifer_code})"
+            )
+
+        except Exception as e:
+            if aquifer_system is not None:
+                session.expunge(aquifer_system)
+            errors.append({"code": aquifer_code, "name": aquifer_name, "error": str(e)})
+            logger.critical(
+                f"Error creating aquifer system record '{aquifer_name}': {e}"
+            )
+            continue
+
+        # 9. Final commit
+    try:
+        session.commit()
+        logger.info(
+            f"Successfully transferred {created_count} aquifer systems, skipped {skipped_count}. "
+            f"Note: primary_type set to 'Unknown' and will be updated during well transfer."
+        )
+    except Exception as e:
+        logger.critical(f"Error in final commit: {e}")
+        session.rollback()
+
+    return input_df, cleaned_df, errors
diff --git a/transfers/data/owners_organization_mapper.json b/transfers/data/owners_organization_mapper.json
index 5ce45a8bf..b4f29bd7b 100644
--- a/transfers/data/owners_organization_mapper.json
+++ b/transfers/data/owners_organization_mapper.json
@@ -89,6 +89,7 @@
 	"Pecos Trail Inn": "Pecos Trail Inn",
 	"Pelican Spa": "Pelican Spa",
 	"Pistachio Tree Ranch": "Pistachio Tree Ranch",
+	"Quemado Mutual Water and Sewage Works Association": "Quemado Municipal Water & SWA",
 	"Rancho Encantado": "Rancho Encantado",
 	"Rancho San Lucas": "Rancho San Lucas",
 	"Rancho San Marcos": "Rancho San Marcos",
diff --git a/transfers/geologic_formation_transfer.py b/transfers/geologic_formation_transfer.py
new file mode 100644
index 000000000..7fcd73e4c
--- /dev/null
+++ b/transfers/geologic_formation_transfer.py
@@ -0,0 +1,141 @@
+import time
+from sqlalchemy.orm import Session
+from pydantic import ValidationError
+
+from db import GeologicFormation
+from schemas.geologic_formation import CreateGeologicFormation
+from transfers.util import read_csv, replace_nans, logger
+
+
+def transfer_geologic_formations(session: Session, limit: int = None) -> tuple:
+    """
+    Transfer geologic formation data from LU_GeologicFormation CSV to the database.
+
+    This should be run BEFORE well_transfer.py so that geologic formation records exist for wells to reference.
+
+    Args:
+        session (Session): SQLAlchemy database session
+        limit (int, optional): Optional limit on number of records to transfer (for testing).
+
+    Returns:
+        tuple: (input_df, cleaned_df, errors)
+    """
+    # 1. Read the CSV file
+    input_df = read_csv("LU_Formations")
+
+    # 2. Replace NaNs with None
+    cleaned_df = replace_nans(input_df)
+
+    # 3. Initialize tracking variables for logging
+    n = len(cleaned_df)
+    step = 25
+    start_time = time.time()
+    errors = []
+    created_count = 0
+    skipped_count = 0
+
+    logger.info(f"Starting transfer of {n} geologic formations")
+
+    # 4. Process each row
+    for i, row in enumerate(cleaned_df.itertuples()):
+        # check if limit is reached
+        if limit and i >= limit:
+            logger.info(f"Reached limit of {limit} rows. Stopping migration.")
+            break
+
+        # Log progress every 'step' rows
+        if i and not i % step:
+            logger.info(
+                f"Processing row {i} of {n}. Avg rows per second: {step / (time.time() - start_time):.2f}"
+            )
+            start_time = time.time()
+
+            # Commit progress periodically
+            try:
+                session.commit()
+            except Exception as e:
+                logger.critical(f"Error committing geologic formations: {e}")
+                session.rollback()
+                continue
+
+        # 5. Extract formation code and description
+        formation_code = row.Code
+
+        if not formation_code:
+            logger.warning(f"Skipping row {i}: Missing formation code")
+            skipped_count += 1
+            continue
+
+        # Check if this formation already exists
+        existing = (
+            session.query(GeologicFormation)
+            .filter(GeologicFormation.formation_code == formation_code)
+            .first()
+        )
+
+        if existing:
+            logger.info(
+                f"Skipping row {i}: Formation code {formation_code} already exists"
+            )
+            skipped_count += 1
+            continue
+
+        # 6. Prepare data for creation
+        # Note: We only store the formation_code. Formation names will be mapped by the API using a
+        # formations.json file from authoritative sources (e.g., USGS).
+        # The description field is left as None and can be populated later if needed.
+        # Note: lithology is set to None here and will be updated during stratigraphy transfer
+        try:
+            data = CreateGeologicFormation(
+                formation_code=formation_code,
+                description=None,  # Not storing from legacy data
+                lithology=None,  # Will be populated from Stratigraphy.csv
+            )
+
+            # Validate the data using Pydantic schema
+            CreateGeologicFormation.model_validate(data)
+
+        except ValidationError as e:
+            errors.append({"code": formation_code, "errors": e.errors()})
+            logger.critical(
+                f"Validation error for row {i} with Code {formation_code}: {e.errors()}"
+            )
+            continue
+        except Exception as e:
+            errors.append({"code": formation_code, "errors": str(e)})
+            logger.critical(f"Error preparing data for {formation_code}: {e}")
+            continue
+
+        # 7. Create database object
+        geologic_formation = None
+        try:
+            formation_data = data.model_dump()
+            geologic_formation = GeologicFormation(**formation_data)
+            session.add(geologic_formation)
+            created_count += 1
+
+            logger.info(
+                f"Created geologic formation: {geologic_formation.formation_code}"
+            )
+
+        except Exception as e:
+            if geologic_formation is not None:
+                session.expunge(geologic_formation)
+            errors.append({"code": formation_code, "error": str(e)})
+            logger.critical(
+                f"Error creating geologic formation for {formation_code}: {e}"
+            )
+            continue
+
+    # 8. Final commit
+    try:
+        session.commit()
+        logger.info(
+            f"Successfully transferred {created_count} geologic formations, skipped {skipped_count}. "
+            f"Note: lithology is None and will be updated during stratigraphy transfer."
+        )
+    except Exception as e:
+        logger.critical(f"Error during final commit of geologic formations: {e}")
+        session.rollback()
+
+    return input_df, cleaned_df, errors
diff --git a/transfers/permissions_transfer.py b/transfers/permissions_transfer.py
new file mode 100644
index 000000000..18daa1040
--- /dev/null
+++ b/transfers/permissions_transfer.py
@@ -0,0 +1,95 @@
+from sqlalchemy.orm import Session
+from datetime import datetime
+from pandas import isna
+
+from db import Thing, PermissionHistory
+from transfers.util import read_csv, logger, replace_nans
+
+"""
+Developer's notes
+
+According to Laila the column WellData.OpenWellLoggerOK only pertains to the
+physical properties of a well (that is, if a datalogger can be installed). It
+does not pertain to permissions.
+"""
+
+
+def transfer_permissions(session: Session):
+    """
+    The transferred wells and contacts need to be transferred first
+    - to access the auto-generated well IDs
+    - to know who gave permission to which well since contact_id is required for
+        PermissionHistory
+    """
+    wdf = read_csv("WellData", dtype={"OSEWelltagID": str})
+    wdf = replace_nans(wdf)
+
+    transferred_wells = (
+        session.query(Thing).filter(Thing.thing_type == "water well").all()
+    )
+
+    for well in transferred_wells:
+        if len(well.contacts) == 0:
+            logger.critical(
+                f"Well {well.name} has no associated contacts; skipping permission transfer."
+            )
+            continue
+        else:
+            # Assuming the first contact is the relevant one
+            contact_id = well.contacts[0].id
+
+        allow_water_level_samples = wdf.loc[
+            wdf["PointID"] == well.name, "MonitorOK"
+        ].values
+        if len(allow_water_level_samples) == 0:
+            pass
+        elif isna(allow_water_level_samples[0]):
+            pass
+        else:
+            try:
+                permission_allowed = bool(allow_water_level_samples[0])
+                permission = PermissionHistory(
+                    contact_id=contact_id,
+                    permission_type="Water Level Sample",
+                    permission_allowed=permission_allowed,
+                    start_date=datetime.today().date(),
+                    target_id=well.id,
+                    target_table="thing",
+                )
+                session.add(permission)
+                logger.info(
+                    f"Transferred Water Level Sample permission for well {well.name}: {permission_allowed}."
+                )
+            except Exception as e:
+                logger.error(f"Error transferring permission for well {well.name}: {e}")
+                session.rollback()
+                pass
+
+        allow_water_chemistry_samples = wdf.loc[
+            wdf["PointID"] == well.name, "SampleOK"
+        ].values
+        if len(allow_water_chemistry_samples) == 0:
+            pass
+        elif isna(allow_water_chemistry_samples[0]):
+            pass
+        else:
+            try:
+                permission_allowed = bool(allow_water_chemistry_samples[0])
+                permission = PermissionHistory(
+                    contact_id=contact_id,
+                    permission_type="Water Chemistry Sample",
+                    permission_allowed=permission_allowed,
+                    start_date=datetime.today().date(),
+                    target_id=well.id,
+                    target_table="thing",
+                )
+                session.add(permission)
+                logger.info(
+                    f"Transferred Water Chemistry Sample permission for well {well.name}: {permission_allowed}."
+                )
+            except Exception as e:
+                logger.error(f"Error transferring permission for well {well.name}: {e}")
+                session.rollback()
+                pass
+
+    session.commit()
diff --git a/transfers/stratigraphy_transfer.py b/transfers/stratigraphy_transfer.py
new file mode 100644
index 000000000..de51e354e
--- /dev/null
+++ b/transfers/stratigraphy_transfer.py
@@ -0,0 +1,285 @@
+"""
+Transfer script for stratigraphy (lithology log) data.
+
+This creates ThingGeologicFormationAssociation records from the Stratigraphy CSV, which contains depth-specific
+formation information for wells. It also updates the GeologicFormation.lithology field based on the
+Stratigraphy.Lithology data.
+"""
+
+import time
+from sqlalchemy.orm import Session
+
+from db import Thing, GeologicFormation, ThingGeologicFormationAssociation
+from transfers.util import (
+    read_csv,
+    replace_nans,
+    filter_to_valid_point_ids,
+    lexicon_mapper,
+    logger,
+)
+
+
+def transfer_stratigraphy(session: Session, limit: int = None) -> tuple:
+    """
+    Transfer detailed stratigraphy (lithology log) data from Stratigraphy CSV.
+
+    The Stratigraphy CSV contains multiple rows per well, each representing a
+    depth interval, the formation encountered, and its lithology.
+
+    Fields used:
+        - PointID: Links to the well
+        - UnitIdentifier: Formation code (maps to LU_Formations)
+        - StratTop: Top depth of the layer (feet below ground surface)
+        - StratBottom: Bottom depth of the layer (feet below ground surface)
+        - Lithology: Lithology code (maps to LU_Lithology via ABBREVIATION field)
+
+    This should be run AFTER:
+        1. transfer_geologic_formations.py (so formations exist)
+        2. transfer_wells.py (so wells exist)
+
+    Args:
+        session: Database session
+        limit: Optional limit on number of WELLS to process (for testing)
+
+    Returns:
+        tuple: (input_df, cleaned_df, errors)
+    """
+    # 1. Read and clean data
+    input_df = read_csv("Stratigraphy")
+    cleaned_df = replace_nans(input_df)
+
+    # Step 2: Filter to only wells that exist in database
+    cleaned_df = filter_to_valid_point_ids(session, cleaned_df)
+
+    n_records = len(cleaned_df)
+    n_wells = len(cleaned_df["PointID"].unique())
+
+    logger.info(
+        f"Starting transfer of {n_records} stratigraphy records for {n_wells} wells"
+    )
+
+    # 3. Initialize tracking variables for logging
+    step = 25
+    start_time = time.time()
+    errors = []
+    created_count = 0
+    skipped_count = 0
+    lithology_updates = 0
+
+    # Step 4: Group by well for efficient processing
+    well_groups = cleaned_df.groupby("PointID")
+
+    for well_index, (pointid, strat_group) in enumerate(well_groups):
+        # Check limit (on number of wells, not records)
+        if limit and well_index >= limit:
+            logger.info(f"Reached limit of {limit} wells. Stopping.")
+            break
+
+        # Progress logging every 25 wells
+        if well_index and not well_index % step:
+            logger.info(
+                f"Processing well {well_index} of {n_wells}, "
+                f"avg wells per second: {step / (time.time() - start_time):.2f}"
+            )
+            start_time = time.time()
+
+            # Periodic commit
+            try:
+                session.commit()
+            except Exception as e:
+                logger.critical(f"Error committing stratigraphy records: {e}")
+                session.rollback()
+                continue
+
+        # 5. Get the well from database
+        thing = session.query(Thing).filter(Thing.name == pointid).first()
+        if not thing:
+            logger.warning(
+                f"Well {pointid} not found in database, skipping stratigraphy"
+            )
+            skipped_count += len(strat_group)
+            continue
+
+        logger.info(
+            f"Processing {len(strat_group)} stratigraphy layers for well {pointid}"
+        )
+
+        # 6. Process each stratigraphy record for this well
+        for layer_index, row in enumerate(strat_group.itertuples()):
+            # Validate required fields
+            # UnitIdentifier
+            if not hasattr(row, "UnitIdentifier") or not row.UnitIdentifier:
+                logger.critical(
+                    f"Stratigraphy record {layer_index} for {pointid} has no UnitIdentifier, skipping"
+                )
+                skipped_count += 1
+                errors.append(
+                    {
+                        "pointid": pointid,
+                        "layer": layer_index,
+                        "error": "Missing UnitIdentifier",
+                    }
+                )
+                continue
+            # StratTop
+            if not hasattr(row, "StratTop") or row.StratTop is None:
+                logger.critical(
+                    f"Stratigraphy record {layer_index} for {pointid} has no StratTop, skipping"
+                )
+                skipped_count += 1
+                errors.append(
+                    {
+                        "pointid": pointid,
+                        "layer": layer_index,
+                        "error": "Missing StratTop",
+                    }
+                )
+                continue
+            # StratBottom
+            if not hasattr(row, "StratBottom") or row.StratBottom is None:
+                logger.critical(
+                    f"Stratigraphy record {layer_index} for {pointid} has no StratBottom, skipping"
+                )
+                skipped_count += 1
+                errors.append(
+                    {
+                        "pointid": pointid,
+                        "layer": layer_index,
+                        "error": "Missing StratBottom",
+                    }
+                )
+                continue
+
+            # Extract formation code
+            formation_code = row.UnitIdentifier.strip()
+
+            # Validate depth values
+            try:
+                top_depth = float(row.StratTop)
+                bottom_depth = float(row.StratBottom)
+            except (ValueError, TypeError) as e:
+                error_msg = f"Invalid depth values: StratTop={row.StratTop}, StratBottom={row.StratBottom}"
+                logger.critical(
+                    f"{pointid} layer {layer_index}: {error_msg}, error: {e}"
+                )
+                errors.append(
+                    {
+                        "pointid": pointid,
+                        "layer": layer_index,
+                        "error": error_msg,
+                        "details": str(e),  # for conversion errors
+                    }
+                )
+                skipped_count += 1
+                continue
+
+            # Validate depth logic
+            if top_depth >= bottom_depth:
+                error_msg = (
+                    f"Invalid depth logic: top={top_depth} >= bottom={bottom_depth}"
+                )
+                logger.critical(f"{pointid} layer {layer_index}: {error_msg}")
+                errors.append(
+                    {"pointid": pointid, "layer": layer_index, "error": error_msg}
+                )
+                skipped_count += 1
+                continue
+
+            if top_depth < 0:
+                error_msg = f"Negative top depth: {top_depth}"
+                logger.critical(f"{pointid} layer {layer_index}: {error_msg}")
+                errors.append(
+                    {"pointid": pointid, "layer": layer_index, "error": error_msg}
+                )
+                skipped_count += 1
+                continue
+
+            # 7. Get or create the formation
+            formation = (
+                session.query(GeologicFormation)
+                .filter(GeologicFormation.formation_code == formation_code)
+                .first()
+            )
+
+            if not formation:
+                # Create new formation if it doesn't exist
+                logger.info(f"Creating new geologic formation: {formation_code}")
+                formation = GeologicFormation(
+                    formation_code=formation_code,
+                    description=None,
+                    lithology=None,  # Will be set below
+                )
+                session.add(formation)
+                session.flush()
+
+            # 8. Update formation lithology if available and not already set
+            if hasattr(row, "Lithology") and row.Lithology:
+                try:
+                    # Map lithology code to geologic_formation.lithology using ABBREVIATION field
+                    lithology = lexicon_mapper.map_value(
+                        f"LU_Lithology:{row.Lithology}"
+                    )
+
+                    # Update if formation does not have lithology yet
+                    if not formation.lithology:
+                        formation.lithology = lithology
+                        lithology_updates += 1
+                        logger.info(f"Set lithology for {formation_code}: {lithology}")
+                    elif formation.lithology != lithology:
+                        # Log if there's a mismatch (different lithology for same formation)
+                        logger.warning(
+                            f"Formation {formation_code} has conflicting lithology: "
+                            f"existing='{formation.lithology}', new='{lithology}'."
+                        )
+                except KeyError:
+                    logger.warning(
+                        f"Unknown lithology code '{row.Lithology}' for {pointid}, skipping lithology update"
+                    )
+                except Exception as e:
+                    logger.warning(f"Error mapping lithology '{row.Lithology}': {e}")
+
+            # 9. Create ThingGeologicFormationAssociation record
+            try:
+                formation_assoc = ThingGeologicFormationAssociation(
+                    thing=thing,
+                    geologic_formation=formation,
+                    top_depth=top_depth,
+                    bottom_depth=bottom_depth,
+                )
+                session.add(formation_assoc)
+                created_count += 1
+
+                logger.info(
+                    f"  Layer {layer_index + 1}: {formation.formation_code} "
+                    f"from {top_depth:.1f} to {bottom_depth:.1f} ft"
+                )
+
+            except Exception as e:
+                logger.critical(
+                    f"Error creating stratigraphy association for {pointid}, "
+                    f"formation {formation_code}: {e}"
+                )
+                errors.append(
+                    {
+                        "pointid": pointid,
+                        "formation": formation_code,
+                        "layer": layer_index,
+                        "error": str(e),
+                    }
+                )
+                skipped_count += 1
+                continue
+
+    # 10. Final commit
+    try:
+        session.commit()
+        logger.info(
+            f"Successfully transferred stratigraphy: "
+            f"{created_count} associations created, {skipped_count} skipped, "
+            f"{lithology_updates} lithology fields updated, {len(errors)} errors"
+        )
+    except Exception as e:
+        logger.critical(f"Error in final commit: {e}")
+        session.rollback()
+
+    return input_df, cleaned_df, errors
diff --git a/transfers/util.py b/transfers/util.py
index d459ee4ff..24389bc6d 100644
--- a/transfers/util.py
+++ b/transfers/util.py
@@ -326,6 +326,9 @@ def get_transferable_wells(
     # get all the pointids from the well photos and include them
     wellphotos_df = read_csv("WellPhotos")
     wellphotos_pointids = wellphotos_df["PointID"].unique().tolist()
+
+    # get all pointids that have owner info
+
     pointids = list(set(usgs_pointids + collabnet_pointids + wellphotos_pointids))
     return df[df["DataSource"].isin(valid_datasources) | df["PointID"].isin(pointids)]
 
@@ -470,7 +473,7 @@ def make_location_data_provenance(
 ) -> list[DataProvenance]:
     provenance_records = []
 
-    if row.AltitudeAccuracy or row.CoordinateAccuracy:
+    if row.AltitudeAccuracy:
         provenance = DataProvenance(
             target_id=location.id,
             target_table="location",
@@ -564,7 +567,6 @@ def make_location_data_provenance(
             target_id=location.id,
             target_table="location",
             field_name="point",
-            origin_source=None,
             collection_method=coordinate_method,
             accuracy_value=accuracy_value,
             accuracy_unit=accuracy_unit,
@@ -617,6 +619,8 @@ def _make_lu_to_lexicon_mapper(self) -> dict[str, str]:
         # Lookup tables where CODE maps to MEANING
         lu_tables = [
             "LU_AltitudeMethod",
+            "LU_AquiferClass",
+            "LU_AquiferType",
             "LU_CollectionMethod",
             "LU_ConstructionMethod",
             "LU_CoordinateAccuracy",
@@ -626,7 +630,9 @@ def _make_lu_to_lexicon_mapper(self) -> dict[str, str]:
             "LU_DataSource",
             "LU_Depth_CompletionSource",
             "LU_Discharge_ChemistrySource",
+            "LU_Formations",
             "LU_LevelStatus",
+            "LU_Lithology",
             "LU_MajorAnalyte",
             "LU_MeasurementMethod",
             "LU_MinorTraceAnalyte",
@@ -645,6 +651,9 @@ def _make_lu_to_lexicon_mapper(self) -> dict[str, str]:
                 if lu_table == "LU_Formations":
                     code = row.Code
                     meaning = row.Meaning
+                elif lu_table == "LU_Lithology":
+                    code = row.ABBREVIATION
+                    meaning = row.TERM
                 else:
                     code = row.CODE
                     meaning = row.MEANING
diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py
index fa912ed18..eed5c3eb8 100644
--- a/transfers/well_transfer.py
+++ b/transfers/well_transfer.py
@@ -14,8 +14,11 @@
 # limitations under the License.
 # ===============================================================================
 import json
+
+# import time
 from datetime import datetime, UTC
 
+# import re
 import pandas as pd
 from pandas import isna
 from pydantic import ValidationError
@@ -35,6 +38,11 @@
     StatusHistory,
     MonitoringFrequencyHistory,
     MeasuringPointHistory,
+    # DataProvenance,
+    # AquiferSystem,
+    # AquiferType,
+    # GeologicFormation,
+    # ThingAquiferAssociation,
 )
 from schemas.thing import CreateWell, CreateWellScreen
 from services.gcs_helper import get_storage_bucket

From b68900ed3965d9e57cff47f63a16f2a85890cf09 Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Tue, 2 Dec 2025 20:57:44 -0700
Subject: [PATCH 31/66] refactor: enhance transfer process by adding aquifer
 system and geologic formation transfers, improving logging and error handling

---
 core/lexicon.json          | 371 +++++++++++++++++++++++++++++++++
 services/util.py           |   8 +-
 transfers/transfer.py      |   7 +
 transfers/transferer.py    |   1 +
 transfers/util.py          |  15 +-
 transfers/well_transfer.py | 416 +++++++++++++++++++++++++++++++------
 6 files changed, 751 insertions(+), 67 deletions(-)

diff --git a/core/lexicon.json b/core/lexicon.json
index 815a40d2f..142f1745c 100644
--- a/core/lexicon.json
+++ b/core/lexicon.json
@@ -714,6 +714,377 @@
     {"categories": ["geographic_scale"], "term": "Regional", "definition": "Important aquifers serving regions"},
     {"categories": ["geographic_scale"], "term": "Local", "definition": "Smaller, locally important aquifers"},
     {"categories": ["geographic_scale"], "term": "Minor", "definition": "Limited extent or yield"},
+    {"categories": ["formation_code"],"term": "000EXRV","definition": "Extrusive Rocks"},
+    {"categories": ["formation_code"],"term": "000IRSV","definition": "Intrusive Rocks"},
+    {"categories": ["formation_code"],"term": "050QUAL","definition": "Quaternary Alluvium in Valleys"},
+    {"categories": ["formation_code"],"term": "100QBAS","definition": "Quaternary basalt"},
+    {"categories": ["formation_code"],"term": "110ALVM","definition": "Quaternary Alluvium"},
+    {"categories": ["formation_code"],"term": "110AVMB","definition": "Alluvium, Bolson Deposits and Other Surface Deposits"},
+    {"categories": ["formation_code"],"term": "110BLSN","definition": "Bolson Fill"},
+    {"categories": ["formation_code"],"term": "110NTGU","definition": "Naha and Tsegi Alluvium Deposits, undifferentiated"},
+    {"categories": ["formation_code"],"term": "110PTODC","definition": "Pediment, Terrace and Other Deposits of Gravel, Sand and Caliche"},
+    {"categories": ["formation_code"],"term": "111MCCR","definition": "McCathys Basalt Flow"},
+    {"categories": ["formation_code"],"term": "112ANCH","definition": "Upper Santa Fe Group, Ancha Formation (QTa)"},
+    {"categories": ["formation_code"],"term": "112CURB","definition": "Cuerbio Basalt"},
+    {"categories": ["formation_code"],"term": "112LAMA","definition": "Lama Formation (QTl, QTbh) and other mountain front alluvial fans"},
+    {"categories": ["formation_code"],"term": "112LAMAb","definition": "Lama Fm (QTl, QTbh) between Servilleta Basalts"},
+    {"categories": ["formation_code"],"term": "112LGUN","definition": "Laguna Basalt Flow"},
+    {"categories": ["formation_code"],"term": "112QTBF","definition": "Quaternary-Tertiary basin fill (not in valleys)"},
+    {"categories": ["formation_code"],"term": "112QTBFlac","definition": "Quaternary-Tertiary basin fill, lacustrian-playa lithofacies"},
+    {"categories": ["formation_code"],"term": "112QTBFpd","definition": "Quaternary-Tertiary basin fill, distal piedmont lithofacies"},
+    {"categories": ["formation_code"],"term": "112QTBFppm","definition": "Quaternary-Tertiary basin fill, proximal and medial piedmont lithofacies"},
+    {"categories": ["formation_code"],"term": "112SNTF","definition": "Santa Fe Group, undivided"},
+    {"categories": ["formation_code"],"term": "112SNTFA","definition": "Upper Santa Fe Group, axial facies"},
+    {"categories": ["formation_code"],"term": "112SNTFOB","definition": "Upper SantaFe Group, Loma Barbon member of Arroyo Ojito Formatin"},
+    {"categories": ["formation_code"],"term": "112SNTFP","definition": "Upper Santa Fe Group, piedmont facies"},
+    {"categories": ["formation_code"],"term": "112TRTO","definition": "Tuerto Gravels (QTt)"},
+    {"categories": ["formation_code"],"term": "120DTIL","definition": "Datil Formation"},
+    {"categories": ["formation_code"],"term": "120ELRT","definition": "El Rito Formation"},
+    {"categories": ["formation_code"],"term": "120IRSV","definition": "Tertiary Intrusives"},
+    {"categories": ["formation_code"],"term": "120SBLC","definition": "Sierra Blanca Volcanics, undivided"},
+    {"categories": ["formation_code"],"term": "120SRVB","definition": "Tertiary Servilletta Basalts (Tsb)"},
+    {"categories": ["formation_code"],"term": "120SRVBf","definition": "Tertiary Servilletta Basalts, fractured (Tsbf)"},
+    {"categories": ["formation_code"],"term": "120TSBV_Lower","definition": "Tertiary Sierra Blanca area lower volcanic unit (Hog Pen Fm)"},
+    {"categories": ["formation_code"],"term": "120TSBV_Upper","definition": "Tertiary Sierra Blanca area upper volcanic unit (above Hog Pen Fm)"},
+    {"categories": ["formation_code"],"term": "121CHMT","definition": "Chamita Formation (Tc)"},
+    {"categories": ["formation_code"],"term": "121CHMTv","definition": "Chamita Fm, Vallito member (Tcv)"},
+    {"categories": ["formation_code"],"term": "121CHMTvs","definition": "Chamita Fm, sandy Vallito member (Tcvs)"},
+    {"categories": ["formation_code"],"term": "121OGLL","definition": "Ogallala Formation"},
+    {"categories": ["formation_code"],"term": "121PUYEF","definition": "Puye Conglomerate, Fanglomerate Member"},
+    {"categories": ["formation_code"],"term": "121TSUQ","definition": "Tesuque Formation, undifferentiated unit"},
+    {"categories": ["formation_code"],"term": "121TSUQa","definition": "Tesuque Fm lithosome A (Tta)"},
+    {"categories": ["formation_code"],"term": "121TSUQacu","definition": "Tesuque Fm (upper), Cuarteles member lithosome A (Ttacu)"},
+    {"categories": ["formation_code"],"term": "121TSUQacuf","definition": "Tesuque Fm (upper), fine-grained Cuarteles member lithosome A (Ttacuf)"},
+    {"categories": ["formation_code"],"term": "121TSUQaml","definition": "Tesuque Fm lower-middle lithosome A (Ttaml)"},
+    {"categories": ["formation_code"],"term": "121TSUQb","definition": "Tesuque Fm lithosome B (Ttb)"},
+    {"categories": ["formation_code"],"term": "121TSUQbfl","definition": "Tesuque Fm lower lithosome B, basin-floor deposits (Ttbfl)"},
+    {"categories": ["formation_code"],"term": "121TSUQbfm","definition": "Tesuque Fm middle lithosome B, basin-floor deposits (Ttbfm)"},
+    {"categories": ["formation_code"],"term": "121TSUQbp","definition": "Tesuque Fm lithosome B, Pojoaque member (Ttbp)"},
+    {"categories": ["formation_code"],"term": "121TSUQce","definition": "Tesuque Fm, Cejita member (Ttce)"},
+    {"categories": ["formation_code"],"term": "121TSUQe","definition": "Tesuque Fm lithosome E (Tte)"},
+    {"categories": ["formation_code"],"term": "121TSUQs","definition": "Tesuque Fm lithosome S (Tts)"},
+    {"categories": ["formation_code"],"term": "121TSUQsa","definition": "Tesuque Fm lateral gradation lithosomes S and A (Ttsag)"},
+    {"categories": ["formation_code"],"term": "121TSUQsc","definition": "Tesuque Fm coarse-grained lithosome S (Ttsc)"},
+    {"categories": ["formation_code"],"term": "121TSUQsf","definition": "Tesuque Fm, fine-grained lithosome S (Ttsf)"},
+    {"categories": ["formation_code"],"term": "122CHOC","definition": "Chamita and Ojo Caliente interlayered (Ttoc)"},
+    {"categories": ["formation_code"],"term": "122CRTO","definition": "Chama El Rito Formation (Tesuque member, Ttc)"},
+    {"categories": ["formation_code"],"term": "122OJOC","definition": "Ojo Caliente Formation (Tesuque member, Tto)"},
+    {"categories": ["formation_code"],"term": "122PICR","definition": "Picuris Tuff"},
+    {"categories": ["formation_code"],"term": "122PPTS","definition": "Popotosa Formation"},
+    {"categories": ["formation_code"],"term": "122SNTFP","definition": "Lower Santa Fe Group, piedmont facies"},
+    {"categories": ["formation_code"],"term": "123DTILSPRS","definition": "Datil Group ignimbrites and lavas and Spears Group, interbedded"},
+    {"categories": ["formation_code"],"term": "123DTMGandbas","definition": "Datil and Mogollon Group andesite, basaltic andesite, and basalt flows"},
+    {"categories": ["formation_code"],"term": "123DTMGign","definition": "Datil and Mogollon Group ignimbrites"},
+    {"categories": ["formation_code"],"term": "123DTMGrhydac","definition": "Datil and Mogollon Group rhyolite and dacite flows"},
+    {"categories": ["formation_code"],"term": "123ESPN","definition": "T Espinaso Formation (Te)"},
+    {"categories": ["formation_code"],"term": "123GLST","definition": "T Galisteo Formation"},
+    {"categories": ["formation_code"],"term": "123PICS","definition": "T Picuris Formation (Tp)"},
+    {"categories": ["formation_code"],"term": "123PICSc","definition": "T Picuris Formation, basal conglomerate (Tpc)"},
+    {"categories": ["formation_code"],"term": "123PICSl","definition": "T lower Picuris Formation (Tpl)"},
+    {"categories": ["formation_code"],"term": "123SPRSDTMGlava","definition": "Spears Group and Datil-Mogollon intermediate-mafic lavas, interbedded"},
+    {"categories": ["formation_code"],"term": "123SPRSlower","definition": "Spears Group, lower part; tuffaceous, gravelly debris and mud flows"},
+    {"categories": ["formation_code"],"term": "123SPRSmid_uppe","definition": "Spears Group, middle-upper part; excludes Dog Spring Formation"},
+    {"categories": ["formation_code"],"term": "124BACA","definition": "Baca Formation"},
+    {"categories": ["formation_code"],"term": "124CBMN","definition": "Cub Mountain Formation"},
+    {"categories": ["formation_code"],"term": "124LLVS","definition": "Llaves Member of San Jose Formation"},
+    {"categories": ["formation_code"],"term": "124PSCN","definition": "Poison Canyon Formation"},
+    {"categories": ["formation_code"],"term": "124RGIN","definition": "Regina Member of San Jose Formation"},
+    {"categories": ["formation_code"],"term": "124SNJS","definition": "San Jose Formation"},
+    {"categories": ["formation_code"],"term": "124TPCS","definition": "TapicitosMember of San Jose Formation"},
+    {"categories": ["formation_code"],"term": "125NCMN","definition": "Nacimiento Formation"},
+    {"categories": ["formation_code"],"term": "125NCMNS","definition": "Nacimiento Formation, Sandy Shale Facies"},
+    {"categories": ["formation_code"],"term": "125RTON","definition": "Raton Formation"},
+    {"categories": ["formation_code"],"term": "130CALDFLOOR","definition": "Caldera Floor bedrock S. of San Agustin Plains. Mostly DTILSPRS & Paleo."},
+    {"categories": ["formation_code"],"term": "180TKSCC_Upper","definition": "Tertiary-Cretaceous, Sanders Canyon, Cub Mtn. and upper Crevasse Canyon Fm"},
+    {"categories": ["formation_code"],"term": "180TKTR","definition": "Tertiary-Cretaceous-Triassic, Baca, Crevasse Cyn, Gallup, Mancos, Dakota, T"},
+    {"categories": ["formation_code"],"term": "210CRCS","definition": "Cretaceous System, undivided"},
+    {"categories": ["formation_code"],"term": "210GLUPC_Lower","definition": "K Gallup Sandstone and lower Crevasse Canyon Fm"},
+    {"categories": ["formation_code"],"term": "210HOSTD","definition": "K Hosta Dalton"},
+    {"categories": ["formation_code"],"term": "210MCDK","definition": "K Mancos/Dakota undivided"},
+    {"categories": ["formation_code"],"term": "210MNCS","definition": "Mancos Shale, undivided"},
+    {"categories": ["formation_code"],"term": "210MNCSL","definition": "K Lower Mancos"},
+    {"categories": ["formation_code"],"term": "210MNCSU","definition": "K Upper Mancos"},
+    {"categories": ["formation_code"],"term": "211CLFHV","definition": "Cliff House Sandstone, includes La Ventana Tongues in NW Sandoval Co."},
+    {"categories": ["formation_code"],"term": "211CRLL","definition": "Carlile Shale"},
+    {"categories": ["formation_code"],"term": "211CRVC","definition": "Crevasse Canyon Formation of Mesaverde Group"},
+    {"categories": ["formation_code"],"term": "211DKOT","definition": "Dakota Sandstone or Formation"},
+    {"categories": ["formation_code"],"term": "211DLCO","definition": "Dilco Coal Member of Crevasse Canyon Formation of Mesaverde Group"},
+    {"categories": ["formation_code"],"term": "211DLTN","definition": "Dalton Sandstone Member of Crevasse Canyon Formation of Mesaverde Group"},
+    {"categories": ["formation_code"],"term": "211FRHS","definition": "Fort Hays Limestone Member of Niobrara Formation"},
+    {"categories": ["formation_code"],"term": "211FRLD","definition": "Fruitland Formation"},
+    {"categories": ["formation_code"],"term": "211FRMG","definition": "Farmington Sandstone Member of Kirtland Shale"},
+    {"categories": ["formation_code"],"term": "211GBSNC","definition": "Gibson Coal Member of Crevasse Canyon Formation of Mesaverde Group"},
+    {"categories": ["formation_code"],"term": "211GLLG","definition": "Gallego Sandstone Member of Gallup Sandstone"},
+    {"categories": ["formation_code"],"term": "211GLLP","definition": "Gallup Sandstone"},
+    {"categories": ["formation_code"],"term": "211GRRG","definition": "Greenhorn and Graneros Formations"},
+    {"categories": ["formation_code"],"term": "211GRRS","definition": "Graneros Shale"},
+    {"categories": ["formation_code"],"term": "211HOST","definition": "Hosta Tongue of Point Lookout Sandstone of Mesaverde Group"},
+    {"categories": ["formation_code"],"term": "211KRLD","definition": "Kirtland Shale"},
+    {"categories": ["formation_code"],"term": "211LWIS","definition": "Lewis Shale"},
+    {"categories": ["formation_code"],"term": "211MENF","definition": "Menefee Formation"},
+    {"categories": ["formation_code"],"term": "211MENFU","definition": "K Upper Menefee (above Harmon Sandstone)"},
+    {"categories": ["formation_code"],"term": "211MVRD","definition": "Mesaverde Group"},
+    {"categories": ["formation_code"],"term": "211OJAM","definition": "Ojo Alamo Sandstone"},
+    {"categories": ["formation_code"],"term": "211PCCF","definition": "Pictured Cliffs Sandstone"},
+    {"categories": ["formation_code"],"term": "211PIRR","definition": "Pierre Shale"},
+    {"categories": ["formation_code"],"term": "211PNLK","definition": "Point Lookout Sandstone"},
+    {"categories": ["formation_code"],"term": "211SMKH","definition": "Smoky Hill Marl Member"},
+    {"categories": ["formation_code"],"term": "211TLLS","definition": "Twowells Sandstone Lentil of Pike of Dakota Sandstone"},
+    {"categories": ["formation_code"],"term": "212KTRP","definition": "K Dakota Sandstone, Moenkopi Fm, Artesia Group"},
+    {"categories": ["formation_code"],"term": "217PRGR","definition": "Purgatoire Formation"},
+    {"categories": ["formation_code"],"term": "220ENRD","definition": "Entrada Sandstone"},
+    {"categories": ["formation_code"],"term": "220JURC","definition": "Jurassic undivided"},
+    {"categories": ["formation_code"],"term": "220NAVJ","definition": "Navajo Sandstone"},
+    {"categories": ["formation_code"],"term": "221BLFF","definition": "Bluff Sandstone of Morrison Formation"},
+    {"categories": ["formation_code"],"term": "221CSPG","definition": "Cow Springs Sandstone of Morrison Formation"},
+    {"categories": ["formation_code"],"term": "221ERADU","definition": "Entrada Sandstone of San Rafael Group, Upper"},
+    {"categories": ["formation_code"],"term": "221MRSN","definition": "Morrison Formation"},
+    {"categories": ["formation_code"],"term": "221MRSN/BBSN","definition": "Brushy Basin Member of Morrison"},
+    {"categories": ["formation_code"],"term": "221MRSN/JCKP","definition": "Jackpile Sandstone Member of Morrison"},
+    {"categories": ["formation_code"],"term": "221MRSN/RCAP","definition": "Recapture Shale Member of Morrison"},
+    {"categories": ["formation_code"],"term": "221MRSN/WWCN","definition": "Westwater Canyon Member of Morrison"},
+    {"categories": ["formation_code"],"term": "221SLWS","definition": "Salt Wash Sandstone Member of Morrison Formation"},
+    {"categories": ["formation_code"],"term": "221SMVL","definition": "Summerville Formation of San Rafael Group"},
+    {"categories": ["formation_code"],"term": "221TDLT","definition": "J Todilto"},
+    {"categories": ["formation_code"],"term": "221WSRC","definition": "Westwater Canyon Sandstone Member of Morrison Formation"},
+    {"categories": ["formation_code"],"term": "221ZUNIS","definition": "Zuni Sandstone"},
+    {"categories": ["formation_code"],"term": "231AGZC","definition": "Tr Agua Zarca"},
+    {"categories": ["formation_code"],"term": "231AGZCU","definition": "Tr Upper Agua Zarca"},
+    {"categories": ["formation_code"],"term": "231CHNL","definition": "Chinle Formation"},
+    {"categories": ["formation_code"],"term": "231CORR","definition": "Correo Sandstone Member of Chinle Formation"},
+    {"categories": ["formation_code"],"term": "231DCKM","definition": "Dockum Group"},
+    {"categories": ["formation_code"],"term": "231PFDF","definition": "Tr Petrified Forest"},
+    {"categories": ["formation_code"],"term": "231PFDFL","definition": "Tr Lower Petrified Forest (below middle sandstone)"},
+    {"categories": ["formation_code"],"term": "231PFDFM","definition": "Tr Middle Petrified Forest sandstone"},
+    {"categories": ["formation_code"],"term": "231PFDFU","definition": "Tr Upper Petrified Forest (above middle sandstone)"},
+    {"categories": ["formation_code"],"term": "231RCKP","definition": "Rock Point Member of Wingate Sandstone"},
+    {"categories": ["formation_code"],"term": "231SNRS","definition": "Santa Rosa Sandstone"},
+    {"categories": ["formation_code"],"term": "231SNSL","definition": "Sonsela Sandstone Bed of Petrified Forest Member of Chinle Formation"},
+    {"categories": ["formation_code"],"term": "231SRMP","definition": "Shinarump Member of Chinle Formation"},
+    {"categories": ["formation_code"],"term": "231WNGT","definition": "Wingate Sandstone"},
+    {"categories": ["formation_code"],"term": "260SNAN","definition": "P San Andres"},
+    {"categories": ["formation_code"],"term": "260SNAN_lower","definition": "Lower San Andres Formation"},
+    {"categories": ["formation_code"],"term": "261SNGL","definition": "P San Andres - Glorieta Sandstone in Rio Bonito member"},
+    {"categories": ["formation_code"],"term": "300YESO","definition": "P Yeso"},
+    {"categories": ["formation_code"],"term": "300YESO_lower","definition": "Lower Yeso Formation"},
+    {"categories": ["formation_code"],"term": "300YESO_upper","definition": "Upper Yeso Formation"},
+    {"categories": ["formation_code"],"term": "310ABO","definition": "P Abo"},
+    {"categories": ["formation_code"],"term": "310DCLL","definition": "De Chelly Sandstone Member of Cutler Formation"},
+    {"categories": ["formation_code"],"term": "310GLOR","definition": "Glorieta Sandstone Member of San Andres Formation (of Manzano Group)"},
+    {"categories": ["formation_code"],"term": "310MBLC","definition": "Meseta Blanca Sandstone Member of Yeso Formation"},
+    {"categories": ["formation_code"],"term": "310TRRS","definition": "Torres Member of Yeso Formation"},
+    {"categories": ["formation_code"],"term": "310YESO","definition": "Yeso Formation"},
+    {"categories": ["formation_code"],"term": "310YESOG","definition": "Yeso Formation, Manzono Group"},
+    {"categories": ["formation_code"],"term": "312CSTL","definition": "Castile Formation"},
+    {"categories": ["formation_code"],"term": "312RSLR","definition": "Rustler Formation"},
+    {"categories": ["formation_code"],"term": "313ARTS","definition": "Artesia Group"},
+    {"categories": ["formation_code"],"term": "313BLCN","definition": "Bell Canyon Formation"},
+    {"categories": ["formation_code"],"term": "313BRUC","definition": "Brushy Canyon Formation of Delaware Mountain Group"},
+    {"categories": ["formation_code"],"term": "313CKBF","definition": "Chalk Bluff Formation"},
+    {"categories": ["formation_code"],"term": "313CLBD","definition": "Carlsbad Limestone"},
+    {"categories": ["formation_code"],"term": "313CPTN","definition": "Capitan Limestone"},
+    {"categories": ["formation_code"],"term": "313GDLP","definition": "Guadalupian Series"},
+    {"categories": ["formation_code"],"term": "313GOSP","definition": "Goat Seep Dolomite"},
+    {"categories": ["formation_code"],"term": "313SADG","definition": "San Andres Limestone and Glorieta Sandstone"},
+    {"categories": ["formation_code"],"term": "313SADR","definition": "San Andres Limestone, undivided"},
+    {"categories": ["formation_code"],"term": "313TNSL","definition": "Tansill Formation"},
+    {"categories": ["formation_code"],"term": "313YATS","definition": "Yates Formation, Guadalupe Group"},
+    {"categories": ["formation_code"],"term": "315LABR","definition": "P Laborcita (Bursum)"},
+    {"categories": ["formation_code"],"term": "315YESOABO","definition": "Alamosa Creek and San Agustin Plains area - Yeso and Abo Formations"},
+    {"categories": ["formation_code"],"term": "318ABO","definition": "P Abo"},
+    {"categories": ["formation_code"],"term": "318BSPG","definition": "Bone Spring Limestone"},
+    {"categories": ["formation_code"],"term": "318JOYT","definition": "Joyita Sandstone Member of Yeso Formation"},
+    {"categories": ["formation_code"],"term": "318YESO","definition": "Yeso Formation"},
+    {"categories": ["formation_code"],"term": "319BRSM","definition": "Bursum Formation and Equivalent Rocks"},
+    {"categories": ["formation_code"],"term": "320HLDR","definition": "Penn Holder"},
+    {"categories": ["formation_code"],"term": "320PENN","definition": "Pennsylvanian undivided"},
+    {"categories": ["formation_code"],"term": "320SNDI","definition": "Sandia Formation"},
+    {"categories": ["formation_code"],"term": "321SGDC","definition": "Sangre de Cristo Formation"},
+    {"categories": ["formation_code"],"term": "322BEMN","definition": "Penn Beeman"},
+    {"categories": ["formation_code"],"term": "325GBLR","definition": "Penn Gobbler"},
+    {"categories": ["formation_code"],"term": "325MDER","definition": "Madera Limestone, undivided"},
+    {"categories": ["formation_code"],"term": "325MDERL","definition": "Penn Lower Madera"},
+    {"categories": ["formation_code"],"term": "325MDERU","definition": "Penn Upper Madera"},
+    {"categories": ["formation_code"],"term": "325SAND","definition": "Penn Sandia"},
+    {"categories": ["formation_code"],"term": "326MGDL","definition": "Magdalena Group"},
+    {"categories": ["formation_code"],"term": "340EPRS","definition": "Espiritu Santo Formation"},
+    {"categories": ["formation_code"],"term": "350PZBA","definition": "Alamosa Creek and San Agustin Plains area - Paleozoic strata beneath Abo Fm"},
+    {"categories": ["formation_code"],"term": "350PZBB","definition": "Tul Basin area - Paleozoic strata below Bursum Fm"},
+    {"categories": ["formation_code"],"term": "400EMBD","definition": "Embudo Granite (undifferentiated PreCambrian near Santa Fe)"},
+    {"categories": ["formation_code"],"term": "400PCMB","definition": "Precambrian Erathem"},
+    {"categories": ["formation_code"],"term": "400PREC","definition": "undifferentiated PreCambrian crystalline rocks (X)"},
+    {"categories": ["formation_code"],"term": "400PRECintr","definition": "PreCambrian crystalline rocks and local Tertiary intrusives"},
+    {"categories": ["formation_code"],"term": "400PRST","definition": "Priest Granite"},
+    {"categories": ["formation_code"],"term": "400TUSS","definition": "Tusas Granite"},
+    {"categories": ["formation_code"],"term": "410PRCG","definition": "PreCambrian granite (Xg)"},
+    {"categories": ["formation_code"],"term": "410PRCGf","definition": "PreCambrian granite, fractured (Xgf)"},
+    {"categories": ["formation_code"],"term": "410PRCQ","definition": "PreCambrian quartzite (Xq)"},
+    {"categories": ["formation_code"],"term": "410PRCQf","definition": "PreCambrian quartzite, fractured (Xqf)"},
+    {"categories": ["formation_code"],"term": "121GILA","definition": "Gila Conglomerate (group)"},
+    {"categories": ["formation_code"],"term": "312DYLK","definition": "Dewey Lake Redbeds"},
+    {"categories": ["formation_code"],"term": "120WMVL","definition": "Wimsattville Formation"},
+    {"categories": ["formation_code"],"term": "313GRBG","definition": "Grayburg Formation of Artesia Group"},
+    {"categories": ["formation_code"],"term": "318ABOL","definition": "Abo Sandstone (Lower Tongue)"},
+    {"categories": ["formation_code"],"term": "318ABOU","definition": "Abo Sandstone (Upper Tongue)"},
+    {"categories": ["formation_code"],"term": "112SNTFU","definition": "Santa Fe Group, Upper Part"},
+    {"categories": ["formation_code"],"term": "310FRNR","definition": "Forty-Niner Member of Rustler Formation"},
+    {"categories": ["formation_code"],"term": "312OCHO","definition": "Ochoan Series"},
+    {"categories": ["formation_code"],"term": "313AZOT","definition": "Azotea Tongue of Seven Rivers Formation"},
+    {"categories": ["formation_code"],"term": "313QUEN","definition": "Queen Formation"},
+    {"categories": ["formation_code"],"term": "319HUCO","definition": "Hueco Limestone"},
+    {"categories": ["formation_code"],"term": "313SVRV","definition": "Seven Rivers Formation"},
+    {"categories": ["formation_code"],"term": "313CABD","definition": "Carlsbad Group"},
+    {"categories": ["formation_code"],"term": "320GRMS","definition": "Gray Mesa Member of Madera Formation"},
+    {"categories": ["formation_code"],"term": "211CLRDH","definition": "Colorado Shale"},
+    {"categories": ["formation_code"],"term": "120BRLM","definition": "Bearwallow Mountain Andesite"},
+    {"categories": ["formation_code"],"term": "122RUBO","definition": "Rubio Peak Formation"},
+    {"categories": ["formation_code"],"term": "313SADRL","definition": "San Andres Limestone, Lower Cherty Member"},
+    {"categories": ["formation_code"],"term": "313SADRU","definition": "San Andres Limestone, Upper Clastic Member"},
+    {"categories": ["formation_code"],"term": "313BRNL","definition": "Bernal Formation of Artesia Group"},
+    {"categories": ["formation_code"],"term": "318CPDR","definition": "Chupadera Formation"},
+    {"categories": ["formation_code"],"term": "121BDHC","definition": "Bidahochi Formation"},
+    {"categories": ["formation_code"],"term": "313SADY","definition": "San Andres Limestone and Yeso Formation, undivided"},
+    {"categories": ["formation_code"],"term": "221SRFLL","definition": "San Rafael Group, Lower Part"},
+    {"categories": ["formation_code"],"term": "221BLUF","definition": "Bluff Sandstone of Morrison Formation"},
+    {"categories": ["formation_code"],"term": "221COSP","definition": "Cow Springs Sandstone of Morrison Formation"},
+    {"categories": ["formation_code"],"term": "317ABYS","definition": "Abo and Yeso, undifferentiated"},
+    {"categories": ["formation_code"],"term": "221BRSB","definition": "Brushy Basin Shale Member of Morrison Formation"},
+    {"categories": ["formation_code"],"term": "310SYDR","definition": "San Ysidro Member of Yeso Formation"},
+    {"categories": ["formation_code"],"term": "400SDVL","definition": "Sandoval Granite"},
+    {"categories": ["formation_code"],"term": "221SRFL","definition": "San Rafael Group"},
+    {"categories": ["formation_code"],"term": "310SGRC","definition": "Sangre de Cristo Formation"},
+    {"categories": ["formation_code"],"term": "231TCVS","definition": "Tecovas Formation of Dockum Group"},
+    {"categories": ["formation_code"],"term": "211DCRS","definition": "D-Cross Tongue of Mancos Shale of Mesaverde Group"},
+    {"categories": ["formation_code"],"term": "211ALSN","definition": "Allison Member of Menefee Formation of Mesaverde Group"},
+    {"categories": ["formation_code"],"term": "211LVNN","definition": "La Ventana Tongue of Cliff House Sandstone"},
+    {"categories": ["formation_code"],"term": "211MORD","definition": "Madrid Formation"},
+    {"categories": ["formation_code"],"term": "210PRMD","definition": "Pyramid Shale"},
+    {"categories": ["formation_code"],"term": "124ANMS","definition": "Animas Formation"},
+    {"categories": ["formation_code"],"term": "211NBRR","definition": "Niobrara Formation"},
+    {"categories": ["formation_code"],"term": "111ALVM","definition": "Holocene Alluvium"},
+    {"categories": ["formation_code"],"term": "122SNTFL","definition": "Santa Fe Group, Lower Part"},
+    {"categories": ["formation_code"],"term": "111CPLN","definition": "Capulin Basalts"},
+    {"categories": ["formation_code"],"term": "120CRSN","definition": "Carson Conflomerate"},
+    {"categories": ["formation_code"],"term": "111CRMS","definition": "Covered/Reclaimed Mine Spoil"},
+    {"categories": ["formation_code"],"term": "111CRMSA","definition": "Covered/Reclaimed Mine Spoil and Ash"},
+    {"categories": ["formation_code"],"term": "111SPOL","definition": "Spoil"},
+    {"categories": ["formation_code"],"term": "110TURT","definition": "Tuerto Gravel of Santa Fe Group"},
+    {"categories": ["formation_code"],"term": "221RCPR","definition": "Recapture Shale Member of Morrison Formation"},
+    {"categories": ["formation_code"],"term": "320BLNG","definition": "Bullington Member of Magdalena Formation"},
+    {"categories": ["formation_code"],"term": "112ANCHsr","definition": "Upper Santa Fe Group, Ancha Formation & ancestral Santa Fe river deposits"},
+    {"categories": ["formation_code"],"term": "121TSUQae","definition": "Tesuque Fm Lithosomes A and E"},
+    {"categories": ["formation_code"],"term": "230TRSC","definition": "Triassic undifferentiated"},
+    {"categories": ["formation_code"],"term": "122TSUQdx","definition": "Tesuque Fm, Dixon member (Ttd)"},
+    {"categories": ["formation_code"],"term": "123PICSu","definition": "T upper Picuris Formation (Tpu)"},
+    {"categories": ["formation_code"],"term": "123PICSm","definition": "T middle Picuris Formation (Tpm)"},
+    {"categories": ["formation_code"],"term": "123PICSmc","definition": "T middle conglomerate Picuris Formation (Tpmc)"},
+    {"categories": ["formation_code"],"term": "120VBVC","definition": "Tertiary volcanic breccia/volcaniclastic conglomerate"},
+    {"categories": ["formation_code"],"term": "120VCSS","definition": "Tertiary volcaniclastic sandstone"},
+    {"categories": ["formation_code"],"term": "124DMDT","definition": "Diamond Tail Formation"},
+    {"categories": ["formation_code"],"term": "325ALMT","definition": "Penn Alamitos Formation"},
+    {"categories": ["formation_code"],"term": "400SAND","definition": "Sandia Granite"},
+    {"categories": ["formation_code"],"term": "318VCPK","definition": "Victorio Peak Limestone"},
+    {"categories": ["formation_code"],"term": "318BSVP","definition": "Bone Spring and Victorio Peak Limestones"},
+    {"categories": ["formation_code"],"term": "100ALVM","definition": "Alluvium"},
+    {"categories": ["formation_code"],"term": "310PRMN","definition": "Permian System"},
+    {"categories": ["formation_code"],"term": "110AVPS","definition": "Alluvium and Permian System"},
+    {"categories": ["formation_code"],"term": "313CRCX","definition": "Capitan Reef Complex and Associated Limestones"},
+    {"categories": ["formation_code"],"term": "112SLBL","definition": "Salt Bolson"},
+    {"categories": ["formation_code"],"term": "112SBCRC","definition": "Salt Bolson and Capitan Reef Complex"},
+    {"categories": ["formation_code"],"term": "313CRDM","definition": "Capitan Reef Complex - Delaware Mountain Group"},
+    {"categories": ["formation_code"],"term": "112SBDM","definition": "Salt Bolson and Delaware Mountain Group"},
+    {"categories": ["formation_code"],"term": "120BLSN","definition": "Bolson Deposits"},
+    {"categories": ["formation_code"],"term": "112SBCR","definition": "Salt Bolson and Cretaceous Rocks"},
+    {"categories": ["formation_code"],"term": "112HCBL","definition": "Hueco Bolson"},
+    {"categories": ["formation_code"],"term": "120IVIG","definition": "Intrusive Rocks"},
+    {"categories": ["formation_code"],"term": "112RLBL","definition": "Red Light Draw Bolson"},
+    {"categories": ["formation_code"],"term": "112EFBL","definition": "Eagle Flat Bolson"},
+    {"categories": ["formation_code"],"term": "112GRBL","definition": "Green River Bolson"},
+    {"categories": ["formation_code"],"term": "123SAND","definition": "Sanders Canyon Formation"},
+    {"categories": ["formation_code"],"term": "210MRNH","definition": "Moreno Hill Formation"},
+    {"categories": ["formation_code"],"term": "320ALMT","definition": "Alamito Shale"},
+    {"categories": ["formation_code"],"term": "313DLRM","definition": "Delaware Mountain Group"},
+    {"categories": ["formation_code"],"term": "300PLZC","definition": "Paleozoic Erathem"},
+    {"categories": ["formation_code"],"term": "122SPRS","definition": "Spears Member of Datil Formation"},
+    {"categories": ["formation_code"],"term": "110AVTV","definition": "Alluvium and Tertiary Volcanics"},
+    {"categories": ["formation_code"],"term": "313DMBS","definition": "Delaware Mountain Group - Bone Spring Limestone"},
+    {"categories": ["formation_code"],"term": "120ERSV","definition": "Tertiary extrusives"},
+    {"categories": ["lithology"],"term": "Alluvium","definition": "Alluvium"},
+    {"categories": ["lithology"],"term": "Anhydrite","definition": "Anhydrite"},
+    {"categories": ["lithology"],"term": "Arkose","definition": "Arkose"},
+    {"categories": ["lithology"],"term": "Boulders","definition": "Boulders"},
+    {"categories": ["lithology"],"term": "Boulders, silt and clay","definition": "Boulders, silt and clay"},
+    {"categories": ["lithology"],"term": "Boulders and sand","definition": "Boulders and sand"},
+    {"categories": ["lithology"],"term": "Bentonite","definition": "Bentonite"},
+    {"categories": ["lithology"],"term": "Breccia","definition": "Breccia"},
+    {"categories": ["lithology"],"term": "Basalt","definition": "Basalt"},
+    {"categories": ["lithology"],"term": "Conglomerate","definition": "Conglomerate"},
+    {"categories": ["lithology"],"term": "Chalk","definition": "Chalk"},
+    {"categories": ["lithology"],"term": "Chert","definition": "Chert"},
+    {"categories": ["lithology"],"term": "Clay","definition": "Clay"},
+    {"categories": ["lithology"],"term": "Caliche","definition": "Caliche"},
+    {"categories": ["lithology"],"term": "Calcite","definition": "Calcite"},
+    {"categories": ["lithology"],"term": "Clay, some sand","definition": "Clay, some sand"},
+    {"categories": ["lithology"],"term": "Claystone","definition": "Claystone"},
+    {"categories": ["lithology"],"term": "Coal","definition": "Coal"},
+    {"categories": ["lithology"],"term": "Cobbles","definition": "Cobbles"},
+    {"categories": ["lithology"],"term": "Cobbles, silt and clay","definition": "Cobbles, silt and clay"},
+    {"categories": ["lithology"],"term": "Cobbles and sand","definition": "Cobbles and sand"},
+    {"categories": ["lithology"],"term": "Dolomite","definition": "Dolomite"},
+    {"categories": ["lithology"],"term": "Dolomite and shale","definition": "Dolomite and shale"},
+    {"categories": ["lithology"],"term": "Evaporite","definition": "Evaporite"},
+    {"categories": ["lithology"],"term": "Gneiss","definition": "Gneiss"},
+    {"categories": ["lithology"],"term": "Gypsum","definition": "Gypsum"},
+    {"categories": ["lithology"],"term": "Graywacke","definition": "Graywacke"},
+    {"categories": ["lithology"],"term": "Gravel and clay","definition": "Gravel and clay"},
+    {"categories": ["lithology"],"term": "Gravel, cemented","definition": "Gravel, cemented"},
+    {"categories": ["lithology"],"term": "Gravel, sand and silt","definition": "Gravel, sand and silt"},
+    {"categories": ["lithology"],"term": "Granite, gneiss","definition": "Granite, gneiss"},
+    {"categories": ["lithology"],"term": "Granite","definition": "Granite"},
+    {"categories": ["lithology"],"term": "Gravel, silt and clay","definition": "Gravel, silt and clay"},
+    {"categories": ["lithology"],"term": "Gravel","definition": "Gravel"},
+    {"categories": ["lithology"],"term": "Igneous undifferentiated","definition": "Igneous undifferentiated"},
+    {"categories": ["lithology"],"term": "Lignite","definition": "Lignite"},
+    {"categories": ["lithology"],"term": "Limestone and dolomite","definition": "Limestone and dolomite"},
+    {"categories": ["lithology"],"term": "Limestone and shale","definition": "Limestone and shale"},
+    {"categories": ["lithology"],"term": "Limestone","definition": "Limestone"},
+    {"categories": ["lithology"],"term": "Marl","definition": "Marl"},
+    {"categories": ["lithology"],"term": "Mudstone","definition": "Mudstone"},
+    {"categories": ["lithology"],"term": "Metamorphic undifferentiated","definition": "Metamorphic undifferentiated"},
+    {"categories": ["lithology"],"term": "Marlstone","definition": "Marlstone"},
+    {"categories": ["lithology"],"term": "No Recovery","definition": "No Recovery"},
+    {"categories": ["lithology"],"term": "Peat","definition": "Peat"},
+    {"categories": ["lithology"],"term": "Quartzite","definition": "Quartzite"},
+    {"categories": ["lithology"],"term": "Rhyolite","definition": "Rhyolite"},
+    {"categories": ["lithology"],"term": "Sand","definition": "Sand"},
+    {"categories": ["lithology"],"term": "Schist","definition": "Schist"},
+    {"categories": ["lithology"],"term": "Sand and clay","definition": "Sand and clay"},
+    {"categories": ["lithology"],"term": "Sand and gravel","definition": "Sand and gravel"},
+    {"categories": ["lithology"],"term": "Sandstone and shale","definition": "Sandstone and shale"},
+    {"categories": ["lithology"],"term": "Sand and silt","definition": "Sand and silt"},
+    {"categories": ["lithology"],"term": "Sand, gravel and clay","definition": "Sand, gravel and clay"},
+    {"categories": ["lithology"],"term": "Shale","definition": "Shale"},
+    {"categories": ["lithology"],"term": "Silt","definition": "Silt"},
+    {"categories": ["lithology"],"term": "Siltstone and shale","definition": "Siltstone and shale"},
+    {"categories": ["lithology"],"term": "Siltstone","definition": "Siltstone"},
+    {"categories": ["lithology"],"term": "Slate","definition": "Slate"},
+    {"categories": ["lithology"],"term": "Sand, some clay","definition": "Sand, some clay"},
+    {"categories": ["lithology"],"term": "Sandstone","definition": "Sandstone"},
+    {"categories": ["lithology"],"term": "Silt and clay","definition": "Silt and clay"},
+    {"categories": ["lithology"],"term": "Travertine","definition": "Travertine"},
+    {"categories": ["lithology"],"term": "Tuff","definition": "Tuff"},
+    {"categories": ["lithology"],"term": "Volcanic undifferentiated","definition": "Volcanic undifferentiated"},
+    {"categories": ["lithology"],"term": "Clay, yellow","definition": "Clay, yellow"},
+    {"categories": ["lithology"],"term": "Clay, red","definition": "Clay, red"},
+    {"categories": ["lithology"],"term": "Surficial sediment","definition": "Surficial sediment"},
+    {"categories": ["lithology"],"term": "Limestone and sandstone, interbedded","definition": "Limestone and sandstone, interbedded"},
+    {"categories": ["lithology"],"term": "Gravel and boulders","definition": "Gravel and boulders"},
+    {"categories": ["lithology"],"term": "Sand, silt and gravel","definition": "Sand, silt and gravel"},
+    {"categories": ["lithology"],"term": "Sand, gravel, silt and clay","definition": "Sand, gravel, silt and clay"},
+    {"categories": ["lithology"],"term": "Andesite","definition": "Andesite"},
+    {"categories": ["lithology"],"term": "Ignesous, intrusive, undifferentiated","definition": "Ignesous, intrusive, undifferentiated"},
+    {"categories": ["lithology"],"term": "Limestone, sandstone and shale","definition": "Limestone, sandstone and shale"},
+    {"categories": ["lithology"],"term": "Sand, silt and clay","definition": "Sand, silt and clay"},
     {"categories": ["origin_source"], "term": "Reported by another agency", "definition": "Reported by another agency"},
     {"categories": ["origin_source"], "term": "From driller's log or well report", "definition": "From driller's log or well report"},
     {"categories": ["origin_source"], "term": "Private geologist, consultant or univ associate", "definition": "Private geologist, consultant or univ associate"},
diff --git a/services/util.py b/services/util.py
index e9ec08a94..a3ddcf472 100644
--- a/services/util.py
+++ b/services/util.py
@@ -1,14 +1,13 @@
 import json
 import os
-from shapely.ops import transform
-import pyproj
+
 import httpx
-from sqlalchemy.orm import DeclarativeBase
+import pyproj
+from shapely.ops import transform
 from sqlalchemy.orm import DeclarativeBase
 
 from constants import SRID_WGS84
 
-
 TRANSFORMERS = {}
 METERS_TO_FEET = 3.28084
 
@@ -151,6 +150,7 @@ def get_epqs_elevation_from_point(lon: float, lat: float) -> float | None:
     try:
         data = resp.json()
     except json.decoder.JSONDecodeError:
+        print(f"Error decoding JSON from EPQS: {resp.text}")
         return None
 
     return data["value"]
diff --git a/transfers/transfer.py b/transfers/transfer.py
index 8a9c3bed3..bf0c69b85 100644
--- a/transfers/transfer.py
+++ b/transfers/transfer.py
@@ -17,7 +17,10 @@
 
 from dotenv import load_dotenv
 
+from db.engine import session_ctx
 from services.util import get_bool_env
+from transfers.aquifer_system_transfer import transfer_aquifer_systems
+from transfers.geologic_formation_transfer import transfer_geologic_formations
 
 load_dotenv()
 
@@ -60,6 +63,10 @@ def transfer_all(metrics, limit=100):
 
     flags = {"TRANSFER_ALL_WELLS": True, "LIMIT": limit}  # not currently used
 
+    with session_ctx() as session:
+        transfer_aquifer_systems(session, limit=limit)
+        transfer_geologic_formations(session, limit=limit)
+
     message("TRANSFERRING WELLS")
     results = _execute_transfer(WellTransferer, flags=flags)
     metrics.well_metrics(*results)
diff --git a/transfers/transferer.py b/transfers/transferer.py
index a8045dccb..4312051fd 100644
--- a/transfers/transferer.py
+++ b/transfers/transferer.py
@@ -70,6 +70,7 @@ def _limit_iterator(self, session: Session, limit: int, step: int = 25):
         df = self._get_df_to_iterate()
         n = len(df)
         start_time = time.time()
+        logger.info(f"Starting transfer of {n} [limit={limit}] rows")
         for i, row in enumerate(df.itertuples()):
             if limit and i >= limit:
                 logger.info(f"Reached limit of {limit} rows. Stopping migration.")
diff --git a/transfers/util.py b/transfers/util.py
index 24389bc6d..68fe890ec 100644
--- a/transfers/util.py
+++ b/transfers/util.py
@@ -65,7 +65,6 @@ def estimate_measuring_point_height(
     ) -> tuple[float, str, datetime | None]:
         mph = row.MPHeight
         mph_desc = row.MeasuringPoint
-
         df = self._df[self._df["PointID"] == row.PointID]
         df = df.sort_values("DateMeasured")
         if mph is None:
@@ -327,9 +326,19 @@ def get_transferable_wells(
     wellphotos_df = read_csv("WellPhotos")
     wellphotos_pointids = wellphotos_df["PointID"].unique().tolist()
 
+    pointids = list(set(usgs_pointids + collabnet_pointids + wellphotos_pointids))
+    logger.info(f"total pointids: {len(pointids)} {pointids[:10]}")
+
     # get all pointids that have owner info
+    ownerlinks_df = read_csv("OwnerLink")
+    locdf = read_csv("Location")
+
+    ownerlinks_df = ownerlinks_df.join(locdf.set_index("LocationId"), on="LocationId")
+    ownerlinks_pointids = ownerlinks_df["PointID"].unique().tolist()
+    ownerpointids = list(set(ownerlinks_pointids) - set(pointids))
+    logger.info(f"ownerpointids: {len(ownerpointids)} {ownerpointids[:10]}")
+    pointids = pointids + ownerpointids
 
-    pointids = list(set(usgs_pointids + collabnet_pointids + wellphotos_pointids))
     return df[df["DataSource"].isin(valid_datasources) | df["PointID"].isin(pointids)]
 
 
@@ -442,7 +451,7 @@ def make_location(row: pd.Series, elevations: dict) -> tuple:
     else:
         elevation_from_epqs = True
         logger.info(
-            f"Location {row.PointID} has no Altitude. Setting from National Map EPQS for "
+            f"Location {row.PointID} has no Altitude. Setting from National Map EPQS. "
         )
         z = get_epqs_elevation_from_point(transformed_point.x, transformed_point.y)
 
diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py
index eed5c3eb8..9c3c88ec9 100644
--- a/transfers/well_transfer.py
+++ b/transfers/well_transfer.py
@@ -14,14 +14,14 @@
 # limitations under the License.
 # ===============================================================================
 import json
-
-# import time
+import re
+import time
 from datetime import datetime, UTC
 
-# import re
 import pandas as pd
-from pandas import isna
+from pandas import isna, notna
 from pydantic import ValidationError
+from sqlalchemy.exc import DatabaseError
 from sqlalchemy.orm import Session
 
 from core.enums import (
@@ -38,11 +38,11 @@
     StatusHistory,
     MonitoringFrequencyHistory,
     MeasuringPointHistory,
-    # DataProvenance,
-    # AquiferSystem,
-    # AquiferType,
-    # GeologicFormation,
-    # ThingAquiferAssociation,
+    DataProvenance,
+    AquiferSystem,
+    AquiferType,
+    GeologicFormation,
+    ThingAquiferAssociation,
 )
 from schemas.thing import CreateWell, CreateWellScreen
 from services.gcs_helper import get_storage_bucket
@@ -125,35 +125,130 @@ def _extract_casing_materials(row) -> list[str]:
     return materials
 
 
-# def get_wells_to_transfer(flags: dict = None) -> tuple[pd.DataFrame, pd.DataFrame]:
-#     # if flags is None:
-#     #     flags = {}
-#
-#     wdf = read_csv("WellData", dtype={"OSEWelltagID": str})
-#     ldf = read_csv("Location")
-#     ldf = ldf.drop(["PointID", "SSMA_TimeStamp"], axis=1)
-#     wdf = wdf.join(ldf.set_index("LocationId"), on="LocationId")
-#     wdf = wdf[wdf["SiteType"] == "GW"]
-#     wdf = wdf[wdf["Easting"].notna() & wdf["Northing"].notna()]
-#
-#     input_df = wdf
-#     wdf = replace_nans(wdf)
-#
-#     # if flags.get("TRANSFER_ALL_WELLS", False):
-#     #     # todo: filter Locations by DataSource
-#     #     cleaned_df = filter_by_welldata_datasource_and_project(wdf)
-#     # else:
-#     #     # get a subset of wells that have not been transferred yet
-#     #     # todo: this needs to be defined.
-#     #     #       for now, we are just filtering out wells that have not been transferred yet
-#     #     #       In the future we will be using criteria to determine which wells to transfer
-#     #     #       for example, wells in the "Water Level Network" project
-#     #     cleaned_df = wdf
-#
-#     cleaned_df = get_transferable_wells(wdf)
-#     cleaned_df = filter_non_transferred_wells(cleaned_df)
-#
-#     return input_df, cleaned_df
+pattern = re.compile(
+    r"\b(?P<term>jet|hand|submersible)\b|\b(?P<phrase>line[-\s]+shaft)\b", re.IGNORECASE
+)
+
+
+def first_matched_term(text: str):
+    m = pattern.search(text)
+    if not m:
+        return None
+    return m.group("term") or m.group("phrase")
+
+
+PUMP_MAPPING = {"jet": "Jet", "hand": "Hand", "submersible": "Submersible"}
+
+
+def _extract_well_pump_type(row) -> str | None:
+    if isna(row.ConstructionNotes):
+        return None
+    construction_notes = row.ConstructionNotes.lower()
+    return PUMP_MAPPING.get(first_matched_term(construction_notes), None)
+
+
+# Parse aquifer codes
+def _extract_aquifer_type_codes(aquifer_code: str) -> list[str]:
+    """
+    Parse aquifer type codes that may contain multiple values.
+
+    Args:
+        aquifer_code: Raw code from AquiferType field
+
+    Returns:
+        List of individual codes
+    """
+    if not aquifer_code:
+        return []
+    # clean the code
+    code = aquifer_code.strip().upper()
+    # split into individual characters. This handles cases like "FC" -> ["F", "C"]
+    individual_codes = list(code)
+    return individual_codes
+
+
+# Get or create aquifer system
+def get_or_create_aquifer_system(
+    session: Session, aquifer_name: str, primary_type: str
+) -> AquiferSystem | None:
+    """
+    Get existing aquifer or create new one if it doesn't exist.
+
+    With the new AquiferType model, we create ONE aquifer record per named
+    aquifer (e.g., one "Santa Fe Group"), not multiple variants.
+
+    Args:
+        session: Database session
+        aquifer_name: Name of the aquifer (from AqClass or type name)
+        primary_type: Primary aquifer type for the aquifer_type field
+    """
+    # Try to find existing aquifer by name
+    aquifer = (
+        session.query(AquiferSystem).filter(AquiferSystem.name == aquifer_name).first()
+    )
+
+    if aquifer:
+        return aquifer
+
+    # Create new aquifer
+    try:
+        logger.info(
+            f"Creating new aquifer system: {aquifer_name} (primary type: {primary_type})"
+        )
+
+        aquifer = AquiferSystem(
+            name=aquifer_name,
+            primary_aquifer_type=primary_type,  # Primary type
+            geographic_scale=None,  # Default
+        )
+        session.add(aquifer)
+        session.commit()
+        # session.flush()  # Get the ID
+        # session.refresh(aquifer)
+        return aquifer
+    except DatabaseError as e:
+        session.rollback()
+        logger.critical(f"Error creating aquifer {aquifer_name}: {e}")
+        return None
+
+
+def get_or_create_geologic_formation(
+    session: Session, formation_code: str
+) -> GeologicFormation | None:
+    """
+    Get existing geologic formation or create new one if it doesn't exist.
+
+    Args:
+        session: Database session
+        formation_code: The formation code from FormationZone field
+
+    Returns:
+        GeologicFormation object or None if creation fails
+    """
+    # Try to find existing formation
+    formation = (
+        session.query(GeologicFormation)
+        .filter(GeologicFormation.formation_code == formation_code)
+        .first()
+    )
+
+    if formation:
+        return formation
+
+    # If not found, create new formation
+    try:
+        logger.info(f"Creating new geologic formation: {formation_code}")
+        formation = GeologicFormation(
+            formation_code=formation_code,
+            description=None,
+            lithology=None,
+        )
+        session.add(formation)
+        session.flush()
+        return formation
+    except Exception as e:
+        logger.critical(f"Error creating formation {formation_code}: {e}")
+        return None
 
 
 def get_cached_elevations() -> dict:
@@ -222,13 +317,15 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series):
         try:
             location, elevation_method = make_location(row, self._cached_elevations)
             session.add(location)
+            session.commit()
             self._added_locations[row.PointID] = elevation_method
         except Exception as e:
+            self._capture_error(row.PointID, str(e), str(e), "Location")
+            logger.critical(f"Error making location for {row.PointID}: {e}")
+
             if location is not None:
                 session.expunge(location)
 
-            self._capture_error(row.PointID, str(e), str(e), "Location")
-            logger.critical(f"Error making location for {row.PointID}: {e}")
             return
 
         try:
@@ -237,6 +334,7 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series):
             well_casing_materials = (
                 [] if isna(row.CasingDescription) else _extract_casing_materials(row)
             )
+            well_pump_type = _extract_well_pump_type(row)
 
             # manually add the well rather than add_well from services/thing_helper.py
             # so that effective_start can be set on the location assocation
@@ -253,13 +351,26 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series):
                 ),
                 well_casing_depth=row.CasingDepth,
                 release_status="public" if row.PublicRelease else "private",
-                measuring_point_height=0,
-                measuring_point_description="",
-                # measuring_point_height=row.MPHeight,
-                # measuring_point_description=row.MeasuringPoint,
+                measuring_point_height=row.MPHeight,
+                measuring_point_description=row.MeasuringPoint,
                 notes=(
                     [{"content": row.Notes, "note_type": "Other"}] if row.Notes else []
                 ),
+                well_completion_date=row.CompletionDate,
+                well_driller_name=row.DrillerName,
+                well_construction_method=(
+                    lexicon_mapper.map_value(
+                        f"LU_ConstructionMethod:{row.ConstructionMethod}"
+                    )
+                    if not isna(row.ConstructionMethod)
+                    else None
+                ),
+                well_pump_type=well_pump_type,
+                is_suitable_for_datalogger=(
+                    bool(row.OpenWellLoggerOK)
+                    if not isna(row.OpenWellLoggerOK)
+                    else None
+                ),
             )
 
             CreateWell.model_validate(data)
@@ -280,6 +391,8 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series):
                     "well_casing_materials",
                     "measuring_point_height",
                     "measuring_point_description",
+                    "well_completion_date_source",
+                    "well_construction_method_source",
                 ]
             )
             well_data["thing_type"] = "water well"
@@ -288,17 +401,6 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series):
             well_data.pop("notes")
             well = Thing(**well_data)
             session.add(well)
-            # logger.info(f"Created well for {row.PointID}")
-
-            # flush well to access its ID for status_history
-            # session.flush()
-
-            # session.commit()
-            # session.refresh(well)
-            # if notes:
-            #     for ni in notes:
-            #         nn = well.add_note(ni['content'], ni['note_type'])
-            #         session.add(nn)
 
             if well_purposes:
                 for wp in well_purposes:
@@ -334,13 +436,162 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series):
         assoc.thing = well
         session.add(assoc)
 
+        if isna(row.AquiferType):
+            logger.info(
+                f"No AquiferType for {well.name}. Skipping aquifer association."
+            )
+        else:
+            try:
+                self._add_aquifers(session, row, well)
+            except Exception as e:
+                logger.critical(
+                    f"Error creating aquifer association for {well.name}: {e}"
+                )
+
+        if isna(row.FormationZone):
+            logger.info(
+                f"No FormationZone for {well.name}. Skipping formation association."
+            )
+        else:
+            try:
+                self._add_formation_zone(session, row, well)
+            except Exception as e:
+                logger.critical(
+                    f"Error creating formation association for {well.name}: {e}"
+                )
+
+    def _add_formation_zone(self, session, row, well):
+        # --- Set Formation Completion (NOT depth-based stratigraphy) ---
+        # This simply records which formation the well was completed in.
+        # For detailed depth-interval stratigraphy, see stratigraphy_transfer.py
+
+        formation_code = row.FormationZone
+
+        # Validate formation exists
+        formation = (
+            session.query(GeologicFormation)
+            .filter(GeologicFormation.formation_code == formation_code)
+            .first()
+        )
+
+        if formation:
+            # Formation exists: Set association
+            well.formation_completion_code = formation_code
+            logger.info(f"Set completion formation for {well.name}: {formation_code}")
+        else:
+            # Formation does NOT exist: Do not create new formation. Flag and log for review
+            logger.critical(
+                f"MISSING FORMATION: Formation '{formation_code}' not found for well {well.name}. Flagged for review."
+            )
+            self._capture_error(
+                row.PointID, f"Unknown formation: {formation_code}", "FormationZone"
+            )
+
+    def _add_aquifers(self, session, row, well):
+        # Parse codes (handles multi-character codes like "FC")
+        aquifer_codes = _extract_aquifer_type_codes(row.AquiferType)
+
+        if not aquifer_codes:
+            logger.warning(
+                f"Well {row.PointID}: Empty aquifer codes after parsing '{row.AquiferType}'"
+            )
+            return
+
+        # Map AqClass code to aquifer name using lexicon mapper
+        if isna(row.AqClass):
+            # No AqClass - use first code's mapped name as aquifer name
+            aquifer_name = lexicon_mapper.map_value(
+                f"LU_AquiferType:{aquifer_codes[0]}"
+            )
+        else:
+            try:
+                aquifer_name = lexicon_mapper.map_value(
+                    f"LU_AquiferClass:{row.AqClass}"
+                )
+            except KeyError:
+                logger.warning(
+                    f"Unknown AqClass code '{row.AqClass}' for well {row.PointID}, using first type as name"
+                )
+                aquifer_name = lexicon_mapper.map_value(
+                    f"LU_AquiferType:{aquifer_codes[0]}"
+                )
+
+        # Determine primary type
+        # This assumes the first recorded type of a compound type is the primary type of the aquifer.
+        # TODO: verify with AMMP
+        try:
+            primary_type = lexicon_mapper.map_value(
+                f"LU_AquiferType:{aquifer_codes[0]}"
+            )
+        except KeyError:
+            logger.warning(
+                f"Unknown aquifer type code '{aquifer_codes[0]}' for well {row.PointID}."
+                f"Setting primary_type to 'Unknown'"
+            )
+            primary_type = "Unknown"  # Creates aquifer with placeholder
+
+        # Get or create the aquifer
+        aquifer = get_or_create_aquifer_system(session, aquifer_name, primary_type)
+        logger.info(f"working with {aquifer}, {aquifer.id}")
+        if aquifer:
+            # Check if association already exists
+            existing_assoc = (
+                session.query(ThingAquiferAssociation)
+                .filter(
+                    ThingAquiferAssociation.thing_id == well.id,
+                    ThingAquiferAssociation.aquifer_system_id == aquifer.id,
+                )
+                .first()
+            )
+
+            if not existing_assoc:
+                # Create the association
+                logger.info(f"Associating well {well.name} with aquifer {aquifer.name}")
+                aquifer_assoc = ThingAquiferAssociation(
+                    thing=well, aquifer_system=aquifer
+                )
+                session.add(aquifer_assoc)
+                session.flush()
+
+                # Create AquiferType records for EACH characteristic
+                aquifer_type_names = []
+                for aquifer_code in aquifer_codes:
+                    try:
+                        type_name = lexicon_mapper.map_value(
+                            f"LU_AquiferType:{aquifer_code}"
+                        )
+                        aquifer_type = AquiferType(
+                            thing_aquifer_association=aquifer_assoc,
+                            aquifer_type=type_name,
+                        )
+                        session.add(aquifer_type)
+                        aquifer_type_names.append(type_name)
+                    except KeyError:
+                        logger.critical(
+                            f"Unknown aquifer code '{aquifer_code}' from AquiferType='{row.AquiferType}' "
+                            f"for well {well.name}. Skipping this code."
+                        )
+                        self._capture_error(
+                            row.PointID,
+                            f"Unknown aquifer code: {aquifer_code}",
+                            "AquiferType",
+                        )
+
+                logger.info(
+                    f"Associated well {well.name} with aquifer {aquifer.name} "
+                    f"(types: {', '.join(aquifer_type_names)})"
+                )
+
     def _after_hook(self, session):
         dump_cached_elevations(self._cached_elevations)
         measuring_point_estimator = MeasuringPointEstimator()
         # add things thate need well id
-        for well in session.query(Thing).filter(Thing.thing_type == "water well").all():
+        query = session.query(Thing).filter(Thing.thing_type == "water well")
+        count = query.count()
+        for i, well in enumerate(query.all()):
+            step_start_time = time.time()
             row = self.cleaned_df[self.cleaned_df["PointID"] == well.name].iloc[0]
-            if not isna(row.Notes):
+            if notna(row.Notes):
                 note = well.add_note(row.Notes, "Other")
                 session.add(note)
 
@@ -352,8 +603,49 @@ def _after_hook(self, session):
             for dp in data_provenances:
                 session.add(dp)
 
-            mphs = measuring_point_estimator.estimate_measuring_point_height(row)
+            for row_field, kw in (
+                (
+                    "CompletionSource",
+                    dict(
+                        field_name="well_completion_date",
+                        origin_type=lexicon_mapper.map_value(
+                            f"LU_Depth_CompletionSource:{row.CompletionSource}"
+                        ),
+                    ),
+                ),
+                (
+                    "DataSource",
+                    dict(
+                        field_name="well_construction_method",
+                        origin_source=row.DataSource,
+                    ),
+                ),
+                (
+                    "DepthSource",
+                    dict(
+                        field_name="well_depth",
+                        origin_type=lexicon_mapper.map_value(
+                            f"LU_Depth_CompletionSource:{row.DepthSource}"
+                        ),
+                    ),
+                ),
+            ):
 
+                if notna(row[row_field]):
+                    try:
+                        dp = DataProvenance(
+                            target_id=well.id, target_table="thing", **kw
+                        )
+                        session.add(dp)
+                        session.commit()
+                    except DatabaseError as e:
+                        self._capture_error(row.PointID, str(e), "DataProvenance")
+                        session.rollback()
+            start_time = time.time()
+            mphs = measuring_point_estimator.estimate_measuring_point_height(row)
+            logger.info(
+                f"Estimated measuring point heights for {well.name}: {time.time() - start_time:.2f}s"
+            )
             for mph, mph_desc, start_date, end_date in mphs:
                 measuring_point_history = MeasuringPointHistory(
                     thing_id=well.id,
@@ -377,7 +669,7 @@ def _after_hook(self, session):
 
             target_id = well.id
             target_table = "thing"
-            if not isna(row.MonitoringStatus):
+            if notna(row.MonitoringStatus):
                 if (
                     "X" in row.MonitoringStatus
                     or "I" in row.MonitoringStatus
@@ -414,7 +706,7 @@ def _after_hook(self, session):
                             f"  Adding '{monitoring_frequency}' monitoring frequency for well {well.name}"
                         )
 
-            if not isna(row.Status):
+            if notna(row.Status):
                 status_value = lexicon_mapper.map_value(f"LU_Status:{row.Status}")
                 status_history = StatusHistory(
                     status_type="Well Status",
@@ -427,6 +719,10 @@ def _after_hook(self, session):
                 session.add(status_history)
                 logger.info(f"  Added well status for well {well.name}: {status_value}")
 
+            logger.info(
+                f"After hook: {well.name} {i+1}/{count} took {time.time() - step_start_time:.2f}s"
+            )
+
         session.commit()
 
 
From 308a7ca773d8ea63504ea9bfe79290fc23ca69b7 Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Tue, 2 Dec 2025 21:11:22 -0700
Subject: [PATCH 32/66] fix: enable database rebuild and update measuring point
 history to include reason

---
 tests/features/environment.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/features/environment.py b/tests/features/environment.py
index afbc2d13c..13bcdead3 100644
--- a/tests/features/environment.py
+++ b/tests/features/environment.py
@@ -497,7 +497,7 @@ def add_geologic_formation(context, session, formation_code, well):
 def before_all(context):
     context.objects = {}
     rebuild = False
-    # rebuild = True
+    rebuild = True
     if rebuild:
         erase_and_rebuild_db()
 
@@ -539,7 +539,7 @@ def before_all(context):
 
         for well in (well_1, well_2, well_3):
             add_measuring_point_history(context, session, well=well)
-        for value, start, end in (
+        for value, start, end, reason in (
             (
                 "Active, pumping well",
                 datetime(2020, 1, 1),
@@ -560,7 +560,7 @@ def before_all(context):
                 status_value=value,
                 start_date=start,
                 end_date=end,
-                reason="Initial status",
+                reason=reason,
                 target_id=context.objects["wells"][0].id,
                 target_table="thing",
             )

From ab5a600fe5639d1e555612edea9b772f2a924200 Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Tue, 2 Dec 2025 21:29:19 -0700
Subject: [PATCH 33/66] refactor: remove unnecessary return statements and
 logging for clarity in transfer processes

---
 transfers/waterlevels_transfer.py | 1 -
 transfers/well_transfer.py        | 1 -
 2 files changed, 2 deletions(-)

diff --git a/transfers/waterlevels_transfer.py b/transfers/waterlevels_transfer.py
index 80b8a4bd8..270592a66 100644
--- a/transfers/waterlevels_transfer.py
+++ b/transfers/waterlevels_transfer.py
@@ -283,7 +283,6 @@ def _get_field_event_participants(self, session, row, thing) -> list[Contact]:
             logger.critical(
                 f"No contacts can be associated with the WaterLevels record with GlobalID {row.GlobalID}, therefore no field event, field activity, sample, and observation can be made. Skipping."
             )
-            return None
 
         return field_event_participants
 
diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py
index 9c3c88ec9..314593250 100644
--- a/transfers/well_transfer.py
+++ b/transfers/well_transfer.py
@@ -532,7 +532,6 @@ def _add_aquifers(self, session, row, well):
 
         # Get or create the aquifer
         aquifer = get_or_create_aquifer_system(session, aquifer_name, primary_type)
-        logger.info(f"working with {aquifer}, {aquifer.id}")
         if aquifer:
             # Check if association already exists
             existing_assoc = (

From 5ade1b2d3b7b71e565384c3ff252afc056332778 Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Tue, 2 Dec 2025 21:50:50 -0700
Subject: [PATCH 34/66] refactor: optimize date handling in deployment search
 logic for improved clarity

---
 transfers/waterlevels_transducer_transfer.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/transfers/waterlevels_transducer_transfer.py b/transfers/waterlevels_transducer_transfer.py
index 927d8d6b8..74eaafd06 100644
--- a/transfers/waterlevels_transducer_transfer.py
+++ b/transfers/waterlevels_transducer_transfer.py
@@ -195,12 +195,12 @@ class WaterLevelsContinuousAcousticTransferer(WaterLevelsContinuousTransferer):
 
 
 def _find_deployment(ts, deployments):
+    date = ts.date()
     for d in deployments:
-        start = Timestamp(d.installation_date)
-        if start > ts:
+        if d.installation_date > date:
             break  # because sorted by start
-        end = Timestamp(d.removal_date) if d.removal_date else Timestamp.max
-        if end >= ts:
+        end = d.removal_date if d.removal_date else Timestamp.max.date()
+        if end >= date:
             return d
     return None
 

From 306dabcd655621e1882b0e1a75406bc243783e75 Mon Sep 17 00:00:00 2001
From: kbighorse <kbighorse@users.noreply.github.com>
Date: Wed, 3 Dec 2025 06:41:26 +0000
Subject: [PATCH 35/66] Formatting changes

---
 tests/test_transfer_legacy_dates.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py
index aa054740c..05dbe8dfe 100644
--- a/tests/test_transfer_legacy_dates.py
+++ b/tests/test_transfer_legacy_dates.py
@@ -35,7 +35,7 @@
 # ============================================================================
 
 
-@patch('transfers.util.lexicon_mapper')
+@patch("transfers.util.lexicon_mapper")
 def test_make_location_with_both_legacy_dates(mock_lexicon_mapper):
     """Test that make_location populates both legacy_date_created and legacy_site_date"""
     # Mock lexicon mapper to avoid GCS calls
@@ -77,7 +77,7 @@ def test_make_location_with_both_legacy_dates(mock_lexicon_mapper):
     assert location.created_at is None
 
 
-@patch('transfers.util.lexicon_mapper')
+@patch("transfers.util.lexicon_mapper")
 def test_make_location_with_only_date_created(mock_lexicon_mapper):
     """Test that make_location handles locations with only DateCreated (no SiteDate)"""
     # Mock lexicon mapper to avoid GCS calls
@@ -111,7 +111,7 @@ def test_make_location_with_only_date_created(mock_lexicon_mapper):
     assert location.legacy_site_date is None
 
 
-@patch('transfers.util.lexicon_mapper')
+@patch("transfers.util.lexicon_mapper")
 def test_make_location_with_site_date_later_than_date_created(mock_lexicon_mapper):
     """Test data anomaly: SiteDate is later than DateCreated (should still be accepted)"""
     # Mock lexicon mapper to avoid GCS calls
@@ -143,7 +143,7 @@ def test_make_location_with_site_date_later_than_date_created(mock_lexicon_mappe
     assert location.legacy_site_date == datetime.date(2015, 6, 20)
 
 
-@patch('transfers.util.lexicon_mapper')
+@patch("transfers.util.lexicon_mapper")
 def test_make_location_with_very_old_site_date(mock_lexicon_mapper):
     """Test that very old SiteDates (1950s) are preserved correctly"""
     # Mock lexicon mapper to avoid GCS calls
@@ -179,7 +179,7 @@ def test_make_location_with_very_old_site_date(mock_lexicon_mapper):
     assert time_gap == 19751  # Approximately 54 years
 
 
-@patch('transfers.util.lexicon_mapper')
+@patch("transfers.util.lexicon_mapper")
 def test_make_location_legacy_dates_are_date_not_datetime(mock_lexicon_mapper):
     """Test that legacy date fields are Date type (not DateTime)"""
     # Mock lexicon mapper to avoid GCS calls
@@ -218,7 +218,7 @@ def test_make_location_legacy_dates_are_date_not_datetime(mock_lexicon_mapper):
     assert location.legacy_site_date == datetime.date(2002, 12, 10)
 
 
-@patch('transfers.util.lexicon_mapper')
+@patch("transfers.util.lexicon_mapper")
 def test_make_location_legacy_dates_independent_of_created_at(mock_lexicon_mapper):
     """Test that legacy dates don't affect created_at timestamp"""
     # Mock lexicon mapper to avoid GCS calls
@@ -347,7 +347,7 @@ def test_create_well_completed_on_is_date_not_datetime():
 # ============================================================================
 
 
-@patch('transfers.util.lexicon_mapper')
+@patch("transfers.util.lexicon_mapper")
 def test_location_legacy_date_coverage_statistics(mock_lexicon_mapper):
     """Test that migration preserves expected percentages of legacy dates"""
     # Mock lexicon mapper to avoid GCS calls

From d8167a7e94c8687f01e3092912077a3dde618f1c Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Tue, 2 Dec 2025 22:41:37 -0800
Subject: [PATCH 36/66] Resolve test failures

---
 tests/test_thing.py                 |  7 ++---
 tests/test_transfer_legacy_dates.py | 44 +++++++++++++++++++++++------
 2 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/tests/test_thing.py b/tests/test_thing.py
index eaa541668..94d00aa85 100644
--- a/tests/test_thing.py
+++ b/tests/test_thing.py
@@ -1207,7 +1207,7 @@ def test_create_well_without_completion_date(location):
 
 
 def test_spring_well_completed_on_is_null(location):
-    """Test that springs have null well_completed_on field"""
+    """Test that springs do NOT have well_completed_on field (it's well-specific)"""
     payload = {
         "name": "Test Spring",
         "location_id": location.id,
@@ -1218,9 +1218,8 @@ def test_spring_well_completed_on_is_null(location):
 
     assert response.status_code == 201
     data = response.json()
-    # Springs should have null well_completed_on
-    assert "well_completed_on" in data
-    assert data["well_completed_on"] is None
+    # Springs should NOT have well_completed_on field (only wells have completion dates)
+    assert "well_completed_on" not in data
     assert data["thing_type"] == "spring"
 
     # cleanup after test
diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py
index 30fbcd5ae..aa054740c 100644
--- a/tests/test_transfer_legacy_dates.py
+++ b/tests/test_transfer_legacy_dates.py
@@ -22,7 +22,7 @@
 3. Thing.well_completed_on is populated from CSV CompletionDate (if not null)
 """
 import datetime
-from unittest.mock import Mock, patch
+from unittest.mock import Mock, patch, MagicMock
 import pandas as pd
 import pytest
 
@@ -35,8 +35,12 @@
 # ============================================================================
 
 
-def test_make_location_with_both_legacy_dates():
+@patch('transfers.util.lexicon_mapper')
+def test_make_location_with_both_legacy_dates(mock_lexicon_mapper):
     """Test that make_location populates both legacy_date_created and legacy_site_date"""
+    # Mock lexicon mapper to avoid GCS calls
+    mock_lexicon_mapper.map_value.return_value = "GPS"
+
     # Create a mock CSV row with both DateCreated and SiteDate
     row = pd.Series(
         {
@@ -73,8 +77,12 @@ def test_make_location_with_both_legacy_dates():
     assert location.created_at is None
 
 
-def test_make_location_with_only_date_created():
+@patch('transfers.util.lexicon_mapper')
+def test_make_location_with_only_date_created(mock_lexicon_mapper):
     """Test that make_location handles locations with only DateCreated (no SiteDate)"""
+    # Mock lexicon mapper to avoid GCS calls
+    mock_lexicon_mapper.map_value.return_value = "GPS"
+
     row = pd.Series(
         {
             "PointID": "TEST-002",
@@ -103,8 +111,12 @@ def test_make_location_with_only_date_created():
     assert location.legacy_site_date is None
 
 
-def test_make_location_with_site_date_later_than_date_created():
+@patch('transfers.util.lexicon_mapper')
+def test_make_location_with_site_date_later_than_date_created(mock_lexicon_mapper):
     """Test data anomaly: SiteDate is later than DateCreated (should still be accepted)"""
+    # Mock lexicon mapper to avoid GCS calls
+    mock_lexicon_mapper.map_value.return_value = "GPS"
+
     row = pd.Series(
         {
             "PointID": "TEST-003",
@@ -131,8 +143,12 @@ def test_make_location_with_site_date_later_than_date_created():
     assert location.legacy_site_date == datetime.date(2015, 6, 20)
 
 
-def test_make_location_with_very_old_site_date():
+@patch('transfers.util.lexicon_mapper')
+def test_make_location_with_very_old_site_date(mock_lexicon_mapper):
     """Test that very old SiteDates (1950s) are preserved correctly"""
+    # Mock lexicon mapper to avoid GCS calls
+    mock_lexicon_mapper.map_value.return_value = "GPS"
+
     row = pd.Series(
         {
             "PointID": "SM-0227",  # Real example from dataset
@@ -163,8 +179,12 @@ def test_make_location_with_very_old_site_date():
     assert time_gap == 19751  # Approximately 54 years
 
 
-def test_make_location_legacy_dates_are_date_not_datetime():
+@patch('transfers.util.lexicon_mapper')
+def test_make_location_legacy_dates_are_date_not_datetime(mock_lexicon_mapper):
     """Test that legacy date fields are Date type (not DateTime)"""
+    # Mock lexicon mapper to avoid GCS calls
+    mock_lexicon_mapper.map_value.return_value = "GPS"
+
     row = pd.Series(
         {
             "PointID": "TEST-004",
@@ -198,8 +218,12 @@ def test_make_location_legacy_dates_are_date_not_datetime():
     assert location.legacy_site_date == datetime.date(2002, 12, 10)
 
 
-def test_make_location_legacy_dates_independent_of_created_at():
+@patch('transfers.util.lexicon_mapper')
+def test_make_location_legacy_dates_independent_of_created_at(mock_lexicon_mapper):
     """Test that legacy dates don't affect created_at timestamp"""
+    # Mock lexicon mapper to avoid GCS calls
+    mock_lexicon_mapper.map_value.return_value = "GPS"
+
     row = pd.Series(
         {
             "PointID": "TEST-005",
@@ -323,8 +347,12 @@ def test_create_well_completed_on_is_date_not_datetime():
 # ============================================================================
 
 
-def test_location_legacy_date_coverage_statistics():
+@patch('transfers.util.lexicon_mapper')
+def test_location_legacy_date_coverage_statistics(mock_lexicon_mapper):
     """Test that migration preserves expected percentages of legacy dates"""
+    # Mock lexicon mapper to avoid GCS calls
+    mock_lexicon_mapper.map_value.return_value = "GPS"
+
     # Simulate 100 location records from CSV
     locations_created = 0
     locations_with_site_date = 0

From de1e5cb916a2fe9e577b8a85e509cc1144ad95f7 Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Tue, 2 Dec 2025 22:55:25 -0800
Subject: [PATCH 37/66] Update column name in BDD tests

---
 .../steps/post_migration_legacy_data.py       | 152 +++++++++---------
 1 file changed, 76 insertions(+), 76 deletions(-)

diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py
index e78afbde7..162358308 100644
--- a/tests/features/steps/post_migration_legacy_data.py
+++ b/tests/features/steps/post_migration_legacy_data.py
@@ -31,7 +31,7 @@ def parse_number(text):
 register_type(Number=parse_number)
 
 
-def create_test_location(legacy_date_created=None, inventoried_on=None):
+def create_test_location(legacy_date_created=None, legacy_site_date=None):
     """Helper to create a test location with legacy dates."""
     with session_ctx() as session:
         location = Location(
@@ -39,7 +39,7 @@ def create_test_location(legacy_date_created=None, inventoried_on=None):
             elevation=1558.8,
             release_status="public",
             legacy_date_created=legacy_date_created,
-            inventoried_on=inventoried_on,
+            legacy_site_date=legacy_site_date,
         )
         session.add(location)
         session.commit()
@@ -99,14 +99,14 @@ def step_given_location_with_table(context: Context):
         if data.get("legacy_date_created") and data["legacy_date_created"] != "null"
         else None
     )
-    inventoried_on = (
-        date.fromisoformat(data["inventoried_on"])
-        if data.get("inventoried_on") and data["inventoried_on"] != "null"
+    legacy_site_date = (
+        date.fromisoformat(data["legacy_site_date"])
+        if data.get("legacy_site_date") and data["legacy_site_date"] != "null"
         else None
     )
 
     location = create_test_location(
-        legacy_date_created=legacy_date_created, inventoried_on=inventoried_on
+        legacy_date_created=legacy_date_created, legacy_site_date=legacy_site_date
     )
 
     context.test_location = location
@@ -127,28 +127,28 @@ def step_given_multiple_locations(context: Context, count: int):
     ]
 
     for i in range(min(count, len(test_data))):
-        legacy_date, inventory_date = test_data[i]
+        legacy_date, site_date = test_data[i]
         location = create_test_location(
             legacy_date_created=date.fromisoformat(legacy_date),
-            inventoried_on=(
-                date.fromisoformat(inventory_date) if inventory_date else None
+            legacy_site_date=(
+                date.fromisoformat(site_date) if site_date else None
             ),
         )
         context.test_locations.append(location)
 
 
 @given(
-    "locations exist with inventoried_on ranging from {start_year:Number} to {end_year:Number}"
+    "locations exist with legacy_site_date ranging from {start_year:Number} to {end_year:Number}"
 )
 def step_given_locations_date_range(context: Context, start_year: int, end_year: int):
-    """Create locations with inventoried_on across a date range."""
+    """Create locations with legacy_site_date across a date range."""
     context.test_locations = []
 
     years = [1954, 2002, 2003, 2010, 2015, 2020, 2024]
     for year in years:
         location = create_test_location(
-            legacy_date_created=date(year + 5, 1, 1),  # Always 5 years after inventory
-            inventoried_on=date(year, 6, 15),
+            legacy_date_created=date(year + 5, 1, 1),  # Always 5 years after site date
+            legacy_site_date=date(year, 6, 15),
         )
         context.test_locations.append(location)
 
@@ -166,7 +166,7 @@ def step_given_locations_with_specific_date(
     for i in range(count):
         location = create_test_location(
             legacy_date_created=target,
-            inventoried_on=date(2000 + i, 1, 1),  # Vary the inventory dates
+            legacy_site_date=date(2000 + i, 1, 1),  # Vary the site dates
         )
         context.test_locations.append(location)
 
@@ -261,16 +261,16 @@ def step_given_well_location_has_table(context: Context):
         if data.get("legacy_date_created")
         else None
     )
-    inventoried_on = (
-        date.fromisoformat(data.get("inventoried_on"))
-        if data.get("inventoried_on")
+    legacy_site_date = (
+        date.fromisoformat(data.get("legacy_site_date"))
+        if data.get("legacy_site_date")
         else None
     )
 
     with session_ctx() as session:
         location = session.get(Location, context.test_well_location.id)
         location.legacy_date_created = legacy_date_created
-        location.inventoried_on = inventoried_on
+        location.legacy_site_date = legacy_site_date
         session.commit()
         session.refresh(location)
         context.test_well_location = location
@@ -282,12 +282,12 @@ def step_given_count_locations_migrated(context: Context, count: int):
     context.test_locations = []
 
     for i in range(count):
-        # 9% have inventoried_on
-        has_inventory = i < count * 0.09
+        # 9% have legacy_site_date
+        has_site_date = i < count * 0.09
 
         location = create_test_location(
             legacy_date_created=date(2014, 1, i % 28 + 1),
-            inventoried_on=date(2003, 1, i % 28 + 1) if has_inventory else None,
+            legacy_site_date=date(2003, 1, i % 28 + 1) if has_site_date else None,
         )
         context.test_locations.append(location)
 
@@ -323,7 +323,7 @@ def step_given_completion_count(context: Context, count: int):
 def step_given_location_migrated_with_dates(context: Context):
     """Create location with both legacy dates."""
     location = create_test_location(
-        legacy_date_created=date(2014, 4, 3), inventoried_on=date(2002, 12, 10)
+        legacy_date_created=date(2014, 4, 3), legacy_site_date=date(2002, 12, 10)
     )
     context.test_location = location
 
@@ -364,7 +364,7 @@ def step_when_get_all_locations(context: Context):
 
 
 @when(
-    'I filter locations where inventoried_on is between "{start_date}" and "{end_date}"'
+    'I filter locations where legacy_site_date is between "{start_date}" and "{end_date}"'
 )
 def step_when_filter_locations(context: Context, start_date: str, end_date: str):
     """Filter locations by date range."""
@@ -375,7 +375,7 @@ def step_when_filter_locations(context: Context, start_date: str, end_date: str)
 
         locations = (
             session.query(Location)
-            .filter(Location.inventoried_on >= start, Location.inventoried_on <= end)
+            .filter(Location.legacy_site_date >= start, Location.legacy_site_date <= end)
             .all()
         )
 
@@ -509,10 +509,10 @@ def step_then_legacy_date_created(context: Context, expected_date: str):
     assert actual == expected_date, f"Expected {expected_date}, got {actual}"
 
 
-@then('the response should include inventoried_on as "{expected_date}"')
-def step_then_inventoried_on(context: Context, expected_date: str):
-    """Assert inventoried_on matches."""
-    actual = context.location_response.get("inventoried_on")
+@then('the response should include legacy_site_date as "{expected_date}"')
+def step_then_legacy_site_date(context: Context, expected_date: str):
+    """Assert legacy_site_date matches."""
+    actual = context.location_response.get("legacy_site_date")
     assert actual == expected_date, f"Expected {expected_date}, got {actual}"
 
 
@@ -520,15 +520,15 @@ def step_then_inventoried_on(context: Context, expected_date: str):
 def step_then_time_gap_years(context: Context, years: str):
     """Assert approximate year gap."""
     legacy_str = context.location_response.get("legacy_date_created")
-    inventory_str = context.location_response.get("inventoried_on")
+    site_date_str = context.location_response.get("legacy_site_date")
 
-    if not legacy_str or not inventory_str:
+    if not legacy_str or not site_date_str:
         raise AssertionError("Missing date fields for gap calculation")
 
     legacy_date = date.fromisoformat(legacy_str)
-    inventory_date = date.fromisoformat(inventory_str)
+    site_date = date.fromisoformat(site_date_str)
 
-    gap_days = (legacy_date - inventory_date).days
+    gap_days = (legacy_date - site_date).days
     gap_years = gap_days / 365.25
 
     expected_years = float(years)
@@ -546,47 +546,47 @@ def step_then_all_have_legacy_field(context: Context):
         assert "legacy_date_created" in item, f"Location missing legacy_date_created"
 
 
-@then("each location should have an inventoried_on field")
-def step_then_all_have_inventory_field(context: Context):
+@then("each location should have a legacy_site_date field")
+def step_then_all_have_site_date_field(context: Context):
     """Assert all locations have the field."""
     items = context.locations_response.get("items", [])
     for item in items:
-        assert "inventoried_on" in item, f"Location missing inventoried_on"
+        assert "legacy_site_date" in item, f"Location missing legacy_site_date"
 
 
-@then("some locations should have null inventoried_on")
-def step_then_some_null_inventory(context: Context):
+@then("some locations should have null legacy_site_date")
+def step_then_some_null_site_date(context: Context):
     """Assert some locations have null."""
     items = context.locations_response.get("items", [])
-    null_count = sum(1 for item in items if item.get("inventoried_on") is None)
-    assert null_count > 0, "Expected at least one location with null inventoried_on"
+    null_count = sum(1 for item in items if item.get("legacy_site_date") is None)
+    assert null_count > 0, "Expected at least one location with null legacy_site_date"
 
 
-@then("the response should only include locations inventoried in that decade")
+@then("the response should only include locations with site date in that decade")
 def step_then_locations_in_decade(context: Context):
     """Assert filtered locations are in range."""
     for loc in context.filtered_locations:
         assert (
-            2000 <= loc.inventoried_on.year <= 2010
-        ), f"Location not in 2000-2010: {loc.inventoried_on}"
+            2000 <= loc.legacy_site_date.year <= 2010
+        ), f"Location not in 2000-2010: {loc.legacy_site_date}"
 
 
-@then("locations inventoried before {year:Number} should not be included")
+@then("locations with site date before {year:Number} should not be included")
 def step_then_locations_before_excluded(context: Context, year: int):
     """Assert no locations before year."""
     for loc in context.filtered_locations:
         assert (
-            loc.inventoried_on.year >= year
-        ), f"Location from {loc.inventoried_on.year} should not be included"
+            loc.legacy_site_date.year >= year
+        ), f"Location from {loc.legacy_site_date.year} should not be included"
 
 
-@then("locations inventoried after {year:Number} should not be included")
+@then("locations with site date after {year:Number} should not be included")
 def step_then_locations_after_excluded(context: Context, year: int):
     """Assert no locations after year."""
     for loc in context.filtered_locations:
         assert (
-            loc.inventoried_on.year <= year
-        ), f"Location from {loc.inventoried_on.year} should not be included"
+            loc.legacy_site_date.year <= year
+        ), f"Location from {loc.legacy_site_date.year} should not be included"
 
 
 @then("the response should include exactly {count:Number} locations")
@@ -721,44 +721,44 @@ def step_then_location_has_legacy(context: Context, expected_date: str):
     assert actual == expected_date, f"Expected {expected_date}, got {actual}"
 
 
-@then('the current_location should include inventoried_on as "{expected_date}"')
-def step_then_location_has_inventory(context: Context, expected_date: str):
-    """Assert location has inventoried_on."""
+@then('the current_location should include legacy_site_date as "{expected_date}"')
+def step_then_location_has_site_date(context: Context, expected_date: str):
+    """Assert location has legacy_site_date."""
     current_location = context.well_response.get("current_location", {})
-    actual = current_location.get("inventoried_on")
+    actual = current_location.get("legacy_site_date")
     assert actual == expected_date, f"Expected {expected_date}, got {actual}"
 
 
 @then(
-    "the temporal sequence should be: well_completed_on → inventoried_on → legacy_date_created"
+    "the temporal sequence should be: well_completed_on → legacy_site_date → legacy_date_created"
 )
 def step_then_temporal_sequence(context: Context):
     """Assert temporal order."""
     well_completed = context.retrieved_well.well_completed_on
-    inventoried = context.retrieved_location.inventoried_on
+    site_date = context.retrieved_location.legacy_site_date
     legacy_created = context.retrieved_location.legacy_date_created
 
     assert (
-        well_completed < inventoried
-    ), "Well should be completed before site inventoried"
+        well_completed < site_date
+    ), "Well should be completed before site date"
     assert (
-        inventoried < legacy_created
-    ), "Site should be inventoried before DB record created"
+        site_date < legacy_created
+    ), "Site date should be before DB record created"
 
 
 @then("the timeline should show: {year1:Number} → {year2:Number} → {year3:Number}")
 def step_then_timeline_years(context: Context, year1: int, year2: int, year3: int):
     """Assert specific years in sequence."""
     assert context.retrieved_well.well_completed_on.year == year1
-    assert context.retrieved_location.inventoried_on.year == year2
+    assert context.retrieved_location.legacy_site_date.year == year2
     assert context.retrieved_location.legacy_date_created.year == year3
 
 
-@then("{percentage:Number}% should have non-null inventoried_on")
-def step_then_percentage_inventory(context: Context, percentage: int):
-    """Assert percentage with inventoried_on."""
+@then("{percentage:Number}% should have non-null legacy_site_date")
+def step_then_percentage_site_date(context: Context, percentage: int):
+    """Assert percentage with legacy_site_date."""
     total = len(context.queried_locations)
-    populated = sum(1 for loc in context.queried_locations if loc.inventoried_on)
+    populated = sum(1 for loc in context.queried_locations if loc.legacy_site_date)
     actual_pct = (populated / total) * 100
 
     tolerance = 2
@@ -805,10 +805,10 @@ def step_then_has_legacy_date(context: Context):
     assert context.retrieved_location.legacy_date_created is not None
 
 
-@then("it should have inventoried_on (original AMPAPI SiteDate)")
-def step_then_has_inventory_date(context: Context):
-    """Assert inventoried_on exists."""
-    assert context.retrieved_location.inventoried_on is not None
+@then("it should have legacy_site_date (original AMPAPI SiteDate)")
+def step_then_has_site_date(context: Context):
+    """Assert legacy_site_date exists."""
+    assert context.retrieved_location.legacy_site_date is not None
 
 
 @then("all three timestamps should be independently queryable")
@@ -816,7 +816,7 @@ def step_then_all_queryable(context: Context):
     """Assert all fields are queryable."""
     assert hasattr(context.retrieved_location, "created_at")
     assert hasattr(context.retrieved_location, "legacy_date_created")
-    assert hasattr(context.retrieved_location, "inventoried_on")
+    assert hasattr(context.retrieved_location, "legacy_site_date")
 
 
 @then("created_at should be a recent timestamp")
@@ -843,10 +843,10 @@ def step_then_legacy_is(context: Context, expected_date: str):
     assert actual == expected, f"Expected {expected}, got {actual}"
 
 
-@then('inventoried_on should be "{expected_date}"')
-def step_then_inventory_is(context: Context, expected_date: str):
-    """Assert inventoried_on value."""
-    actual = context.retrieved_location.inventoried_on
+@then('legacy_site_date should be "{expected_date}"')
+def step_then_site_date_is(context: Context, expected_date: str):
+    """Assert legacy_site_date value."""
+    actual = context.retrieved_location.legacy_site_date
     expected = date.fromisoformat(expected_date)
     assert actual == expected, f"Expected {expected}, got {actual}"
 
@@ -880,10 +880,10 @@ def step_then_no_validation_errors(context: Context):
     pass
 
 
-@then("inventoried_on should be null")
-def step_then_inventory_null(context: Context):
-    """Assert inventoried_on is null."""
-    assert context.retrieved_location.inventoried_on is None
+@then("legacy_site_date should be null")
+def step_then_site_date_null(context: Context):
+    """Assert legacy_site_date is null."""
+    assert context.retrieved_location.legacy_site_date is None
 
 
 @then("the well should still be valid")

From a9293bb71260a303a609212f973e9e9bb3451995 Mon Sep 17 00:00:00 2001
From: kbighorse <kbighorse@users.noreply.github.com>
Date: Wed, 3 Dec 2025 06:55:26 +0000
Subject: [PATCH 38/66] Formatting changes

---
 .../features/steps/post_migration_legacy_data.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py
index 162358308..b36dfa461 100644
--- a/tests/features/steps/post_migration_legacy_data.py
+++ b/tests/features/steps/post_migration_legacy_data.py
@@ -130,9 +130,7 @@ def step_given_multiple_locations(context: Context, count: int):
         legacy_date, site_date = test_data[i]
         location = create_test_location(
             legacy_date_created=date.fromisoformat(legacy_date),
-            legacy_site_date=(
-                date.fromisoformat(site_date) if site_date else None
-            ),
+            legacy_site_date=(date.fromisoformat(site_date) if site_date else None),
         )
         context.test_locations.append(location)
 
@@ -375,7 +373,9 @@ def step_when_filter_locations(context: Context, start_date: str, end_date: str)
 
         locations = (
             session.query(Location)
-            .filter(Location.legacy_site_date >= start, Location.legacy_site_date <= end)
+            .filter(
+                Location.legacy_site_date >= start, Location.legacy_site_date <= end
+            )
             .all()
         )
 
@@ -738,12 +738,8 @@ def step_then_temporal_sequence(context: Context):
     site_date = context.retrieved_location.legacy_site_date
     legacy_created = context.retrieved_location.legacy_date_created
 
-    assert (
-        well_completed < site_date
-    ), "Well should be completed before site date"
-    assert (
-        site_date < legacy_created
-    ), "Site date should be before DB record created"
+    assert well_completed < site_date, "Well should be completed before site date"
+    assert site_date < legacy_created, "Site date should be before DB record created"
 
 
 @then("the timeline should show: {year1:Number} → {year2:Number} → {year3:Number}")

From dc7a31b93ddf564af0d0905a788108087cc32e93 Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 3 Dec 2025 00:30:21 -0800
Subject: [PATCH 39/66] Remove `well_completed_on`

---
 db/thing.py                                   |   5 -
 schemas/thing.py                              |   6 -
 ...st-migration-legacy-data-retrieval.feature |  78 ---
 .../steps/post_migration_legacy_data.py       | 445 ------------------
 tests/test_thing.py                           | 144 ------
 tests/test_transfer_legacy_dates.py           | 125 -----
 transfers/well_transfer.py                    |  15 -
 7 files changed, 818 deletions(-)

diff --git a/db/thing.py b/db/thing.py
index b42b70d56..9f30d08e2 100644
--- a/db/thing.py
+++ b/db/thing.py
@@ -115,11 +115,6 @@ class Thing(
     )
 
     well_construction_notes: Mapped[str] = mapped_column(Text, nullable=True)
-    well_completed_on: Mapped[date] = mapped_column(
-        Date,
-        nullable=True,
-        comment="Date when well construction/drilling was completed (from AMPAPI CompletionDate, active field for new wells)",
-    )
 
     # Spring-related columns
     spring_type: Mapped[str] = lexicon_term(
diff --git a/schemas/thing.py b/schemas/thing.py
index 6de5908cc..692b78459 100644
--- a/schemas/thing.py
+++ b/schemas/thing.py
@@ -131,8 +131,6 @@ class CreateWell(CreateBaseThing, ValidateWell):
     )
     measuring_point_description: str | None
     notes: list[CreateNote] | None = None
-    # Active field: users can set this for new wells
-    well_completed_on: date | None = None
 
 
 class CreateSpring(CreateBaseThing):
@@ -227,8 +225,6 @@ class WellResponse(BaseThingResponse):
     measuring_point_height: float
     measuring_point_height_unit: str = "ft"
     measuring_point_description: str | None
-    # Active field: completion date for wells
-    well_completed_on: date | None = None
 
     water_notes: list[NoteResponse] | None = None
     measuring_notes: list[NoteResponse] | None = None
@@ -334,8 +330,6 @@ class UpdateWell(UpdateThing, ValidateWell):
     well_casing_diameter: float | None = None  # in inches
     well_casing_depth: float | None = None  # in feet
     well_casing_materials: list[str] | None = None
-    # Active field: users can update completion date
-    well_completed_on: date | None = None
 
 
 class UpdateSpring(UpdateThing):
diff --git a/tests/features/post-migration-legacy-data-retrieval.feature b/tests/features/post-migration-legacy-data-retrieval.feature
index fa4663e1b..b5329ad9c 100644
--- a/tests/features/post-migration-legacy-data-retrieval.feature
+++ b/tests/features/post-migration-legacy-data-retrieval.feature
@@ -51,65 +51,6 @@ Feature: Post-Migration Legacy Data Retrieval
     Then the response should include exactly 3 locations
     And all should have legacy_date_created "2014-04-03"
 
-  # Well Completion Date Lookups
-
-  Scenario: Retrieve well with completion date via API
-    Given a well exists with well_completed_on "2004-08-08"
-    When I retrieve that well via the API
-    Then the response should include well_completed_on as "2004-08-08"
-    And the well age should be calculable
-
-  Scenario: Retrieve old well from early 1900s
-    Given a well exists with well_completed_on "1936-01-01"
-    When I retrieve that well via the API
-    Then the response should include well_completed_on as "1936-01-01"
-    And the well should be over 88 years old
-
-  Scenario: List all wells includes completion date field
-    Given 10 wells exist with various completion dates
-    And 3 of those wells have null well_completed_on
-    When I GET /thing/water-well to list all wells
-    Then each well should have a well_completed_on field
-    And 70% of wells should have well_completed_on populated
-
-  Scenario: Filter wells by completion date range
-    Given wells exist with completion dates from 1936 to 2024
-    When I filter wells where well_completed_on is between "2000-01-01" and "2010-12-31"
-    Then the response should only include wells completed in that decade
-    And wells from 1936 should not be included
-    And wells from 2020 should not be included
-
-  Scenario: Sort wells by completion date (oldest first)
-    Given wells exist with completion dates: 1936, 1965, 2004, 2020
-    And some wells have null well_completed_on
-    When I GET /thing/water-well sorted by well_completed_on ascending
-    Then the first well should be from 1936
-    And the last well with a date should be from 2020
-    And wells without completion dates should appear last
-
-  # Combined Queries - Location + Well Legacy Dates
-
-  Scenario: Retrieve well with location showing all legacy dates
-    Given a well exists with well_completed_on "2004-08-08"
-    And that well's location has:
-      | field                | value      |
-      | legacy_date_created  | 2014-04-03 |
-      | legacy_site_date     | 2002-12-10 |
-    When I retrieve the well via the API
-    Then the well should have well_completed_on as "2004-08-08"
-    And the current_location should include legacy_date_created as "2014-04-03"
-    And the current_location should include legacy_site_date as "2002-12-10"
-
-  Scenario: Timeline reconstruction - well completed before site inventoried
-    Given a well exists with well_completed_on "1995-06-15"
-    And that well's location has:
-      | field                | value      |
-      | legacy_site_date     | 2003-12-10 |
-      | legacy_date_created  | 2014-04-03 |
-    When I retrieve the well and its location
-    Then the temporal sequence should be: well_completed_on → legacy_site_date → legacy_date_created
-    And the timeline should show: 1995 → 2003 → 2014
-
   # Data Quality Validation
 
   Scenario: Verify migration preserved expected percentage of legacy dates
@@ -119,12 +60,6 @@ Feature: Post-Migration Legacy Data Retrieval
     Then 9% should have non-null legacy_site_date
     And 100% should have non-null legacy_date_created
 
-  Scenario: Verify well completion date coverage matches expectation
-    Given 100 wells were migrated
-    And 30 of them had non-null CompletionDate in AMPAPI
-    When I query the migrated wells
-    Then 30% should have non-null well_completed_on
-
   # Audit Trail Verification
 
   Scenario: Legacy dates preserved alongside audit timestamps
@@ -149,13 +84,6 @@ Feature: Post-Migration Legacy Data Retrieval
     And legacy_site_date should be "2015-06-20"
     And the system should accept this without error
 
-  Scenario: Spring does not use well_completed_on field
-    Given a thing of type "spring" exists
-    When I retrieve that spring
-    Then well_completed_on should be null
-    And the field should exist in the response schema
-    And it should not cause validation errors
-
   Scenario: Location with only legacy_date_created (no legacy_site_date)
     Given a location exists with:
       | field                | value      |
@@ -164,9 +92,3 @@ Feature: Post-Migration Legacy Data Retrieval
     When I retrieve that location
     Then legacy_date_created should be "2014-10-17"
     And legacy_site_date should be null
-
-  Scenario: Well without completion date
-    Given a well exists with well_completed_on null
-    When I retrieve that well
-    Then well_completed_on should be null
-    And the well should still be valid
diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py
index b36dfa461..25e932159 100644
--- a/tests/features/steps/post_migration_legacy_data.py
+++ b/tests/features/steps/post_migration_legacy_data.py
@@ -47,42 +47,6 @@ def create_test_location(legacy_date_created=None, legacy_site_date=None):
         return location
 
 
-def create_test_well(well_completed_on=None, thing_type="water well"):
-    """Helper to create a test well with completion date."""
-    with session_ctx() as session:
-        # Create location
-        location = Location(
-            point="POINT(-106.607784 35.118924)",
-            elevation=1558.8,
-            release_status="public",
-        )
-        session.add(location)
-        session.commit()
-
-        # Create thing
-        thing = Thing(
-            name=f"Test-{thing_type}-{datetime.now().timestamp()}",
-            first_visit_date="2023-03-03",
-            thing_type=thing_type,
-            release_status="public",
-            well_depth=100.0 if thing_type == "water well" else None,
-            hole_depth=110.0 if thing_type == "water well" else None,
-            well_completed_on=well_completed_on,
-        )
-        session.add(thing)
-        session.commit()
-
-        # Associate
-        assoc = LocationThingAssociation(location=location, thing=thing)
-        assoc.effective_start = "2000-01-01T00:00:00Z"
-        session.add(assoc)
-        session.commit()
-
-        session.refresh(thing)
-        session.refresh(location)
-        return thing, location
-
-
 @given("the AMPAPI data has been migrated to the database")
 def step_given_data_migrated(context: Context):
     """Assumption that migration has occurred."""
@@ -169,111 +133,6 @@ def step_given_locations_with_specific_date(
         context.test_locations.append(location)
 
 
-@given('a well exists with well_completed_on "{completion_date}"')
-def step_given_well_with_completion(context: Context, completion_date: str):
-    """Create well with completion date."""
-    completed_on = (
-        date.fromisoformat(completion_date) if completion_date != "null" else None
-    )
-
-    thing, location = create_test_well(well_completed_on=completed_on)
-
-    context.test_well = thing
-    context.test_well_id = thing.id
-    context.test_well_location = location
-
-
-@given("{count:Number} wells exist with various completion dates")
-def step_given_multiple_wells(context: Context, count: int):
-    """Create multiple wells with various completion dates."""
-    context.test_wells = []
-
-    completion_dates = [
-        "1936-01-01",
-        "1965-06-15",
-        "2004-08-08",
-        "2020-05-15",
-        None,  # No completion date
-        None,
-        None,
-    ]
-
-    for i in range(min(count, len(completion_dates))):
-        completed_on = (
-            date.fromisoformat(completion_dates[i]) if completion_dates[i] else None
-        )
-        thing, location = create_test_well(well_completed_on=completed_on)
-        context.test_wells.append(thing)
-
-
-@given("{null_count:Number} of those wells have null well_completed_on")
-def step_given_wells_with_null_completion(context: Context, null_count: int):
-    """Verify expected number of nulls (declarative - already created)."""
-    # Wells were created in previous step with nulls
-    pass
-
-
-@given(
-    "wells exist with completion dates from {start_year:Number} to {end_year:Number}"
-)
-def step_given_wells_date_range(context: Context, start_year: int, end_year: int):
-    """Create wells with completion dates across range."""
-    context.test_wells = []
-
-    years = [1936, 1965, 2004, 2010, 2020, 2024]
-    for year in years:
-        thing, location = create_test_well(well_completed_on=date(year, 6, 15))
-        context.test_wells.append(thing)
-
-
-@given("wells exist with completion dates: {years}")
-def step_given_wells_specific_years(context: Context, years: str):
-    """Create wells with specific completion years."""
-    context.test_wells = []
-
-    year_list = [int(y.strip()) for y in years.split(",")]
-
-    for year in year_list:
-        thing, location = create_test_well(well_completed_on=date(year, 6, 15))
-        context.test_wells.append(thing)
-
-
-@given("some wells have null well_completed_on")
-def step_given_some_wells_null(context: Context):
-    """Add wells without completion dates."""
-    if not hasattr(context, "test_wells"):
-        context.test_wells = []
-
-    for i in range(2):
-        thing, location = create_test_well(well_completed_on=None)
-        context.test_wells.append(thing)
-
-
-@given("that well's location has")
-def step_given_well_location_has_table(context: Context):
-    """Set legacy dates on the well's location."""
-    data = {row["field"]: row["value"] for row in context.table}
-
-    legacy_date_created = (
-        date.fromisoformat(data.get("legacy_date_created"))
-        if data.get("legacy_date_created")
-        else None
-    )
-    legacy_site_date = (
-        date.fromisoformat(data.get("legacy_site_date"))
-        if data.get("legacy_site_date")
-        else None
-    )
-
-    with session_ctx() as session:
-        location = session.get(Location, context.test_well_location.id)
-        location.legacy_date_created = legacy_date_created
-        location.legacy_site_date = legacy_site_date
-        session.commit()
-        session.refresh(location)
-        context.test_well_location = location
-
-
 @given("{count:Number} locations were migrated")
 def step_given_count_locations_migrated(context: Context, count: int):
     """Create specified number of test locations."""
@@ -296,27 +155,6 @@ def step_given_sitedate_count(context: Context, count: int):
     pass
 
 
-@given("{count:Number} wells were migrated")
-def step_given_count_wells_migrated(context: Context, count: int):
-    """Create specified number of test wells."""
-    context.test_wells = []
-
-    for i in range(count):
-        # 30% have completion dates
-        has_completion = i < count * 0.30
-
-        thing, location = create_test_well(
-            well_completed_on=date(2000 + (i % 24), 1, 1) if has_completion else None
-        )
-        context.test_wells.append(thing)
-
-
-@given("{count:Number} of them had non-null CompletionDate in AMPAPI")
-def step_given_completion_count(context: Context, count: int):
-    """Declarative - data created in previous step."""
-    pass
-
-
 @given("a location was migrated with legacy dates")
 def step_given_location_migrated_with_dates(context: Context):
     """Create location with both legacy dates."""
@@ -326,22 +164,6 @@ def step_given_location_migrated_with_dates(context: Context):
     context.test_location = location
 
 
-@given('a thing of type "{thing_type}" exists')
-def step_given_thing_of_type(context: Context, thing_type: str):
-    """Create a thing of specified type."""
-    thing, location = create_test_well(well_completed_on=None, thing_type=thing_type)
-    context.test_thing = thing
-    context.test_thing_id = thing.id
-
-
-@given("a well exists with well_completed_on null")
-def step_given_well_null_completion(context: Context):
-    """Create well without completion date."""
-    thing, location = create_test_well(well_completed_on=None)
-    context.test_well = thing
-    context.test_well_id = thing.id
-
-
 # WHEN steps
 
 
@@ -393,69 +215,6 @@ def step_when_query_by_legacy_date(context: Context, target_date: str):
         context.queried_locations = locations
 
 
-@when("I retrieve that well via the API")
-def step_when_retrieve_well_api(context: Context):
-    """Retrieve well via GET API."""
-    response = context.client.get(f"/thing/water-well/{context.test_well_id}")
-    assert response.status_code == 200
-    context.well_response = response.json()
-
-
-@when("I GET /thing/water-well to list all wells")
-def step_when_get_all_wells(context: Context):
-    """Get all wells."""
-    response = context.client.get("/thing/water-well")
-    assert response.status_code == 200
-    context.wells_response = response.json()
-
-
-@when(
-    'I filter wells where well_completed_on is between "{start_date}" and "{end_date}"'
-)
-def step_when_filter_wells(context: Context, start_date: str, end_date: str):
-    """Filter wells by completion date range."""
-    with session_ctx() as session:
-        start = date.fromisoformat(start_date)
-        end = date.fromisoformat(end_date)
-
-        wells = (
-            session.query(Thing)
-            .filter(
-                Thing.thing_type == "water well",
-                Thing.well_completed_on >= start,
-                Thing.well_completed_on <= end,
-            )
-            .all()
-        )
-
-        context.filtered_wells = wells
-
-
-@when("I GET /thing/water-well sorted by well_completed_on ascending")
-def step_when_get_wells_sorted(context: Context):
-    """Get wells sorted by completion date."""
-    with session_ctx() as session:
-        wells = (
-            session.query(Thing)
-            .filter(Thing.thing_type == "water well")
-            .order_by(Thing.well_completed_on.asc().nullslast())
-            .all()
-        )
-
-        context.sorted_wells = wells
-
-
-@when("I retrieve the well and its location")
-def step_when_retrieve_well_and_location(context: Context):
-    """Retrieve well with location."""
-    with session_ctx() as session:
-        well = session.get(Thing, context.test_well.id)
-        location = session.get(Location, context.test_well_location.id)
-
-        context.retrieved_well = well
-        context.retrieved_location = location
-
-
 @when("I query the migrated locations")
 def step_when_query_migrated_locations(context: Context):
     """Query all test locations."""
@@ -466,15 +225,6 @@ def step_when_query_migrated_locations(context: Context):
         context.queried_locations = locations
 
 
-@when("I query the migrated wells")
-def step_when_query_migrated_wells(context: Context):
-    """Query all test wells."""
-    with session_ctx() as session:
-        well_ids = [well.id for well in context.test_wells]
-        wells = session.query(Thing).filter(Thing.id.in_(well_ids)).all()
-        context.queried_wells = wells
-
-
 @when("I retrieve that location")
 def step_when_retrieve_location(context: Context):
     """Retrieve location by ID."""
@@ -483,22 +233,6 @@ def step_when_retrieve_location(context: Context):
         context.retrieved_location = location
 
 
-@when("I retrieve that spring")
-def step_when_retrieve_spring(context: Context):
-    """Retrieve spring/thing by ID."""
-    with session_ctx() as session:
-        thing = session.get(Thing, context.test_thing.id)
-        context.retrieved_thing = thing
-
-
-@when("I retrieve that well")
-def step_when_retrieve_well(context: Context):
-    """Retrieve well by ID."""
-    with session_ctx() as session:
-        well = session.get(Thing, context.test_well.id)
-        context.retrieved_well = well
-
-
 # THEN steps
 
 
@@ -606,150 +340,6 @@ def step_then_all_have_date(context: Context, expected_date: str):
         ), f"Location has {loc.legacy_date_created}, expected {expected}"
 
 
-@then('the response should include well_completed_on as "{expected_date}"')
-def step_then_well_completed_on(context: Context, expected_date: str):
-    """Assert well_completed_on matches."""
-    actual = context.well_response.get("well_completed_on")
-    assert actual == expected_date, f"Expected {expected_date}, got {actual}"
-
-
-@then("the well age should be calculable")
-def step_then_age_calculable(context: Context):
-    """Assert age can be calculated."""
-    completion_str = context.well_response.get("well_completed_on")
-    assert completion_str is not None, "Cannot calculate age without completion date"
-
-    completed = date.fromisoformat(completion_str)
-    today = date.today()
-    age_years = (today - completed).days / 365.25
-    assert age_years >= 0, "Age cannot be negative"
-
-
-@then("the well should be over {min_age:Number} years old")
-def step_then_well_over_age(context: Context, min_age: int):
-    """Assert well age exceeds minimum."""
-    completion_str = context.well_response.get("well_completed_on")
-    completed = date.fromisoformat(completion_str)
-    today = date.today()
-    age_years = (today - completed).days / 365.25
-
-    assert age_years >= min_age, f"Expected over {min_age} years, got {age_years:.1f}"
-
-
-@then("each well should have a well_completed_on field")
-def step_then_all_wells_have_field(context: Context):
-    """Assert all wells have the field."""
-    items = context.wells_response.get("items", [])
-    for item in items:
-        assert "well_completed_on" in item, f"Well missing well_completed_on"
-
-
-@then("{percentage:Number}% of wells should have well_completed_on populated")
-def step_then_percentage_populated(context: Context, percentage: int):
-    """Assert approximate percentage."""
-    items = context.wells_response.get("items", [])
-    total = len(items)
-    if total == 0:
-        return
-
-    populated = sum(1 for item in items if item.get("well_completed_on") is not None)
-    actual_pct = (populated / total) * 100
-
-    tolerance = 10
-    assert (
-        abs(actual_pct - percentage) < tolerance
-    ), f"Expected ~{percentage}%, got {actual_pct:.1f}%"
-
-
-@then("the response should only include wells completed in that decade")
-def step_then_wells_in_decade(context: Context):
-    """Assert filtered wells in range."""
-    for well in context.filtered_wells:
-        assert 2000 <= well.well_completed_on.year <= 2010
-
-
-@then("wells from {year:Number} should not be included")
-def step_then_wells_year_excluded(context: Context, year: int):
-    """Assert wells from year excluded."""
-    for well in context.filtered_wells:
-        assert well.well_completed_on.year != year
-
-
-@then("the first well should be from {year:Number}")
-def step_then_first_well_year(context: Context, year: int):
-    """Assert first well year."""
-    if context.sorted_wells and context.sorted_wells[0].well_completed_on:
-        actual_year = context.sorted_wells[0].well_completed_on.year
-        assert actual_year == year, f"Expected {year}, got {actual_year}"
-
-
-@then("the last well with a date should be from {year:Number}")
-def step_then_last_well_year(context: Context, year: int):
-    """Assert last non-null well year."""
-    non_null = [w for w in context.sorted_wells if w.well_completed_on]
-    if non_null:
-        actual_year = non_null[-1].well_completed_on.year
-        assert actual_year == year, f"Expected {year}, got {actual_year}"
-
-
-@then("wells without completion dates should appear last")
-def step_then_nulls_last(context: Context):
-    """Assert nulls at end."""
-    first_null_idx = next(
-        (i for i, w in enumerate(context.sorted_wells) if w.well_completed_on is None),
-        len(context.sorted_wells),
-    )
-
-    for well in context.sorted_wells[first_null_idx:]:
-        assert (
-            well.well_completed_on is None
-        ), "Found non-null after null in sorted list"
-
-
-@then('the well should have well_completed_on as "{expected_date}"')
-def step_then_well_has_completion(context: Context, expected_date: str):
-    """Assert well has completion date."""
-    actual = context.well_response.get("well_completed_on")
-    assert actual == expected_date, f"Expected {expected_date}, got {actual}"
-
-
-@then('the current_location should include legacy_date_created as "{expected_date}"')
-def step_then_location_has_legacy(context: Context, expected_date: str):
-    """Assert location has legacy_date_created."""
-    current_location = context.well_response.get("current_location", {})
-    actual = current_location.get("legacy_date_created")
-    assert actual == expected_date, f"Expected {expected_date}, got {actual}"
-
-
-@then('the current_location should include legacy_site_date as "{expected_date}"')
-def step_then_location_has_site_date(context: Context, expected_date: str):
-    """Assert location has legacy_site_date."""
-    current_location = context.well_response.get("current_location", {})
-    actual = current_location.get("legacy_site_date")
-    assert actual == expected_date, f"Expected {expected_date}, got {actual}"
-
-
-@then(
-    "the temporal sequence should be: well_completed_on → legacy_site_date → legacy_date_created"
-)
-def step_then_temporal_sequence(context: Context):
-    """Assert temporal order."""
-    well_completed = context.retrieved_well.well_completed_on
-    site_date = context.retrieved_location.legacy_site_date
-    legacy_created = context.retrieved_location.legacy_date_created
-
-    assert well_completed < site_date, "Well should be completed before site date"
-    assert site_date < legacy_created, "Site date should be before DB record created"
-
-
-@then("the timeline should show: {year1:Number} → {year2:Number} → {year3:Number}")
-def step_then_timeline_years(context: Context, year1: int, year2: int, year3: int):
-    """Assert specific years in sequence."""
-    assert context.retrieved_well.well_completed_on.year == year1
-    assert context.retrieved_location.legacy_site_date.year == year2
-    assert context.retrieved_location.legacy_date_created.year == year3
-
-
 @then("{percentage:Number}% should have non-null legacy_site_date")
 def step_then_percentage_site_date(context: Context, percentage: int):
     """Assert percentage with legacy_site_date."""
@@ -776,19 +366,6 @@ def step_then_percentage_legacy(context: Context, percentage: int):
     ), f"Expected ~{percentage}%, got {actual_pct:.1f}%"
 
 
-@then("{percentage:Number}% should have non-null well_completed_on")
-def step_then_percentage_completion(context: Context, percentage: int):
-    """Assert percentage with well_completed_on."""
-    total = len(context.queried_wells)
-    populated = sum(1 for well in context.queried_wells if well.well_completed_on)
-    actual_pct = (populated / total) * 100
-
-    tolerance = 2
-    assert (
-        abs(actual_pct - percentage) < tolerance
-    ), f"Expected ~{percentage}%, got {actual_pct:.1f}%"
-
-
 @then("it should have created_at (new system timestamp from migration)")
 def step_then_has_created_at(context: Context):
     """Assert created_at exists."""
@@ -854,28 +431,6 @@ def step_then_no_error(context: Context):
     pass
 
 
-@then("well_completed_on should be null")
-def step_then_completion_null(context: Context):
-    """Assert well_completed_on is null."""
-    if hasattr(context, "retrieved_thing"):
-        assert context.retrieved_thing.well_completed_on is None
-    elif hasattr(context, "retrieved_well"):
-        assert context.retrieved_well.well_completed_on is None
-
-
-@then("the field should exist in the response schema")
-def step_then_field_exists_in_schema(context: Context):
-    """Assert field exists in schema."""
-    if hasattr(context, "retrieved_thing"):
-        assert hasattr(context.retrieved_thing, "well_completed_on")
-
-
-@then("it should not cause validation errors")
-def step_then_no_validation_errors(context: Context):
-    """Assert no validation errors."""
-    pass
-
-
 @then("legacy_site_date should be null")
 def step_then_site_date_null(context: Context):
     """Assert legacy_site_date is null."""
diff --git a/tests/test_thing.py b/tests/test_thing.py
index 94d00aa85..3792b4302 100644
--- a/tests/test_thing.py
+++ b/tests/test_thing.py
@@ -1132,148 +1132,4 @@ def test_delete_thing_id_link_404_not_found(second_thing_id_link):
     assert data["detail"] == f"ThingIdLink with ID {bad_id} not found."
 
 
-#  ============= Well completion date tests ====================================
-
-
-def test_create_well_with_completion_date(location):
-    """Test creating a well with well_completed_on (active field - users can set this)"""
-    payload = {
-        "name": "Test Well",
-        "location_id": location.id,
-        "measuring_point_height": 2.5,
-        "measuring_point_description": "top of casing",
-        "release_status": "draft",
-        "well_completed_on": "2004-08-08",
-    }
-    response = client.post("/thing/water-well", json=payload)
-
-    assert response.status_code == 201
-    data = response.json()
-    assert "id" in data
-    assert data["well_completed_on"] == "2004-08-08"
-
-    # cleanup after test
-    from db import Thing
-    from tests import cleanup_post_test
-
-    cleanup_post_test(Thing, data["id"])
-
-
-def test_create_well_with_old_completion_date(location):
-    """Test creating a well with very old completion date (e.g., for documenting historical wells)"""
-    payload = {
-        "name": "Historical Well",
-        "location_id": location.id,
-        "measuring_point_height": 2.5,
-        "measuring_point_description": "top of casing",
-        "release_status": "draft",
-        "well_completed_on": "1936-01-01",
-    }
-    response = client.post("/thing/water-well", json=payload)
-
-    assert response.status_code == 201
-    data = response.json()
-    assert data["well_completed_on"] == "1936-01-01"
-
-    # cleanup after test
-    from db import Thing
-    from tests import cleanup_post_test
-
-    cleanup_post_test(Thing, data["id"])
-
-
-def test_create_well_without_completion_date(location):
-    """Test that well_completed_on is optional (nullable) when creating a well"""
-    payload = {
-        "name": "Test Well Without Date",
-        "location_id": location.id,
-        "measuring_point_height": 2.5,
-        "measuring_point_description": "top of casing",
-        "release_status": "draft",
-    }
-    response = client.post("/thing/water-well", json=payload)
-
-    assert response.status_code == 201
-    data = response.json()
-    # Field should be present but null
-    assert "well_completed_on" in data
-    assert data["well_completed_on"] is None
-
-    # cleanup after test
-    from db import Thing
-    from tests import cleanup_post_test
-
-    cleanup_post_test(Thing, data["id"])
-
-
-def test_spring_well_completed_on_is_null(location):
-    """Test that springs do NOT have well_completed_on field (it's well-specific)"""
-    payload = {
-        "name": "Test Spring",
-        "location_id": location.id,
-        "spring_type": "Artesian",
-        "release_status": "draft",
-    }
-    response = client.post("/thing/spring", json=payload)
-
-    assert response.status_code == 201
-    data = response.json()
-    # Springs should NOT have well_completed_on field (only wells have completion dates)
-    assert "well_completed_on" not in data
-    assert data["thing_type"] == "spring"
-
-    # cleanup after test
-    from db import Thing
-    from tests import cleanup_post_test
-
-    cleanup_post_test(Thing, data["id"])
-
-
-def test_well_with_completion_date_and_location_legacy_fields(location):
-    """Test combined scenario: new well with completion date + location legacy fields (null for new locations)"""
-    # Create a new location (without legacy fields - they're migration-only)
-    from tests import cleanup_post_test
-
-    location_payload = {
-        "point": "POINT (-106.607784 35.118924)",
-        "elevation": 1558.8,
-        "release_status": "draft",
-    }
-    location_response = client.post("/location", json=location_payload)
-    assert location_response.status_code == 201
-    location_id = location_response.json()["id"]
-
-    # Create well with completion date at that location
-    well_payload = {
-        "name": "Test Well",
-        "location_id": location_id,
-        "measuring_point_height": 2.5,
-        "measuring_point_description": "top of casing",
-        "release_status": "draft",
-        "well_completed_on": "2020-06-15",  # User can set this for new wells
-    }
-    well_response = client.post("/thing/water-well", json=well_payload)
-    assert well_response.status_code == 201
-    well_id = well_response.json()["id"]
-
-    # Retrieve the well
-    get_response = client.get(f"/thing/water-well/{well_id}")
-    assert get_response.status_code == 200
-    data = get_response.json()
-
-    # well_completed_on is set (active field)
-    assert data["well_completed_on"] == "2020-06-15"
-
-    # Location legacy fields are null (migration-only fields)
-    # current_location is a GeoJSON Feature, so fields are under properties
-    assert data["current_location"]["properties"]["legacy_date_created"] is None
-    assert data["current_location"]["properties"]["legacy_site_date"] is None
-
-    # cleanup after test
-    from db import Thing, Location
-
-    cleanup_post_test(Thing, well_id)
-    cleanup_post_test(Location, location_id)
-
-
 # ============= EOF =============================================
diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py
index 05dbe8dfe..795820ec8 100644
--- a/tests/test_transfer_legacy_dates.py
+++ b/tests/test_transfer_legacy_dates.py
@@ -19,7 +19,6 @@
 These tests verify that:
 1. Location.legacy_date_created is populated from CSV DateCreated
 2. Location.legacy_site_date is populated from CSV SiteDate (if not null)
-3. Thing.well_completed_on is populated from CSV CompletionDate (if not null)
 """
 import datetime
 from unittest.mock import Mock, patch, MagicMock
@@ -27,7 +26,6 @@
 import pytest
 
 from transfers.util import make_location
-from schemas.thing import CreateWell
 
 
 # ============================================================================
@@ -257,91 +255,6 @@ def test_make_location_legacy_dates_independent_of_created_at(mock_lexicon_mappe
     assert location.legacy_site_date is not None
 
 
-# ============================================================================
-# WELL COMPLETION DATE TESTS
-# ============================================================================
-
-
-def test_create_well_schema_accepts_well_completed_on():
-    """Test that CreateWell schema accepts well_completed_on from CSV CompletionDate"""
-    # Simulate data from CSV transfer
-    well_data = {
-        "location_id": 1,
-        "name": "TEST-WELL-001",
-        "well_completed_on": datetime.date(2004, 8, 8),  # From CSV CompletionDate
-        "hole_depth": 100.0,
-        "well_depth": 95.0,
-        "measuring_point_height": 2.5,
-        "measuring_point_description": "top of casing",
-        "release_status": "public",
-    }
-
-    # Validate using CreateWell schema
-    schema = CreateWell(**well_data)
-
-    assert schema.well_completed_on == datetime.date(2004, 8, 8)
-
-
-def test_create_well_schema_well_completed_on_optional():
-    """Test that well_completed_on is optional (70% of wells don't have CompletionDate)"""
-    well_data = {
-        "location_id": 1,
-        "name": "TEST-WELL-002",
-        "hole_depth": 100.0,
-        "well_depth": 95.0,
-        "measuring_point_height": 2.5,
-        "measuring_point_description": "top of casing",
-        "release_status": "public",
-        # No well_completed_on provided
-    }
-
-    # Should not raise validation error
-    schema = CreateWell(**well_data)
-
-    # Field should be optional
-    assert hasattr(schema, "well_completed_on")
-    # Value should be None when not provided
-    assert schema.well_completed_on is None
-
-
-def test_create_well_with_very_old_completion_date():
-    """Test that very old completion dates (1936) are accepted"""
-    well_data = {
-        "location_id": 1,
-        "name": "HISTORICAL-WELL",
-        "well_completed_on": datetime.date(1936, 1, 1),  # Oldest well in dataset
-        "hole_depth": 100.0,
-        "well_depth": 95.0,
-        "measuring_point_height": 2.5,
-        "measuring_point_description": "top of casing",
-        "release_status": "public",
-    }
-
-    schema = CreateWell(**well_data)
-
-    assert schema.well_completed_on == datetime.date(1936, 1, 1)
-
-
-def test_create_well_completed_on_is_date_not_datetime():
-    """Test that well_completed_on is Date type (not DateTime)"""
-    well_data = {
-        "location_id": 1,
-        "name": "TEST-WELL-003",
-        "well_completed_on": datetime.date(2004, 8, 8),  # Date, not DateTime
-        "hole_depth": 100.0,
-        "well_depth": 95.0,
-        "measuring_point_height": 2.5,
-        "measuring_point_description": "top of casing",
-        "release_status": "public",
-    }
-
-    schema = CreateWell(**well_data)
-
-    # Should accept date type
-    assert isinstance(schema.well_completed_on, datetime.date)
-    assert not isinstance(schema.well_completed_on, datetime.datetime)
-
-
 # ============================================================================
 # DATA COVERAGE TESTS (Simulating Migration Statistics)
 # ============================================================================
@@ -410,44 +323,6 @@ def test_location_legacy_date_coverage_statistics(mock_lexicon_mapper):
     assert locations_with_site_date == 9  # 9% should have legacy_site_date
 
 
-def test_well_completion_date_coverage_statistics():
-    """Test that expected percentage of wells have completion dates"""
-    # Simulate 100 wells from CSV
-    wells_with_completion_date = 0
-
-    for i in range(100):
-        if i < 30:  # 30% have CompletionDate
-            well_data = {
-                "location_id": 1,
-                "name": f"WELL-{i:03d}",
-                "well_completed_on": datetime.date(2004, 8, 8),
-                "hole_depth": 100.0,
-                "well_depth": 95.0,
-                "measuring_point_height": 2.5,
-                "measuring_point_description": "top of casing",
-                "release_status": "public",
-            }
-        else:  # 70% don't have CompletionDate
-            well_data = {
-                "location_id": 1,
-                "name": f"WELL-{i:03d}",
-                "hole_depth": 100.0,
-                "well_depth": 95.0,
-                "measuring_point_height": 2.5,
-                "measuring_point_description": "top of casing",
-                "release_status": "public",
-                # No well_completed_on
-            }
-
-        schema = CreateWell(**well_data)
-
-        if schema.well_completed_on is not None:
-            wells_with_completion_date += 1
-
-    # Verify expected coverage
-    assert wells_with_completion_date == 30  # 30% should have completion dates
-
-
 # ============================================================================
 # EOF
 # ============================================================================
diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py
index 5daa1d8ee..ee54d0216 100644
--- a/transfers/well_transfer.py
+++ b/transfers/well_transfer.py
@@ -237,19 +237,6 @@ def transfer_wells(session: Session, flags: dict = None, limit: int = 0) -> None
                 [] if isna(row.CasingDescription) else _extract_casing_materials(row)
             )
 
-            # Extract well_completed_on from CompletionDate (Date type, not DateTime)
-            well_completed_on = None
-            if not isna(row.CompletionDate):
-                try:
-                    well_completed_on = datetime.strptime(
-                        row.CompletionDate, "%Y-%m-%d %H:%M:%S.%f"
-                    ).date()
-                except (ValueError, AttributeError):
-                    # If parsing fails, leave as None
-                    logger.warning(
-                        f"Could not parse CompletionDate for {row.PointID}: {row.CompletionDate}"
-                    )
-
             # manually add the well rather than add_well from services/thing_helper.py
             # so that effective_start can be set on the location assocation
 
@@ -267,7 +254,6 @@ def transfer_wells(session: Session, flags: dict = None, limit: int = 0) -> None
                 release_status="public" if row.PublicRelease else "private",
                 measuring_point_height=row.MPHeight,
                 measuring_point_description=row.MeasuringPoint,
-                well_completed_on=well_completed_on,
                 notes=(
                     [{"content": row.Notes, "note_type": "Other"}] if row.Notes else []
                 ),
@@ -297,7 +283,6 @@ def transfer_wells(session: Session, flags: dict = None, limit: int = 0) -> None
             well_data["nma_pk_welldata"] = row.WellID
 
             well_data.pop("notes")
-            # well_completed_on is kept in well_data (not excluded above)
             well = Thing(**well_data)
             session.add(well)
             # logger.info(f"Created well for {row.PointID}")

From f0112264cb71eafeb36fca6363b3d9fc21e69ee5 Mon Sep 17 00:00:00 2001
From: kbighorse <kbighorse@users.noreply.github.com>
Date: Wed, 3 Dec 2025 08:55:09 +0000
Subject: [PATCH 40/66] Formatting changes

---
 schemas/location.py                                | 4 +---
 tests/features/steps/post_migration_legacy_data.py | 4 +---
 transfers/util.py                                  | 4 +---
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/schemas/location.py b/schemas/location.py
index f34c54115..ca182ebd5 100644
--- a/schemas/location.py
+++ b/schemas/location.py
@@ -155,9 +155,7 @@ def populate_fields(cls, data: Any) -> Any:
         data_dict["properties"]["elevation"] = convert_m_to_ft(elevation_m)
         data_dict["properties"]["elevation_method"] = data_dict.get("elevation_method")
         # populate AMPAPI date fields
-        data_dict["properties"]["nma_date_created"] = data_dict.get(
-            "nma_date_created"
-        )
+        data_dict["properties"]["nma_date_created"] = data_dict.get("nma_date_created")
         data_dict["properties"]["nma_site_date"] = data_dict.get("nma_site_date")
 
         # populate UTM coordinates
diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py
index 99ddd028e..7c2c36ffe 100644
--- a/tests/features/steps/post_migration_legacy_data.py
+++ b/tests/features/steps/post_migration_legacy_data.py
@@ -195,9 +195,7 @@ def step_when_filter_locations(context: Context, start_date: str, end_date: str)
 
         locations = (
             session.query(Location)
-            .filter(
-                Location.nma_site_date >= start, Location.nma_site_date <= end
-            )
+            .filter(Location.nma_site_date >= start, Location.nma_site_date <= end)
             .all()
         )
 
diff --git a/transfers/util.py b/transfers/util.py
index c8d054a0a..5216c204f 100644
--- a/transfers/util.py
+++ b/transfers/util.py
@@ -253,9 +253,7 @@ def make_location(row: pd.Series, elevations: dict) -> tuple:
 
     nma_site_date = None
     if row.SiteDate:
-        nma_site_date = datetime.strptime(
-            row.SiteDate, "%Y-%m-%d %H:%M:%S.%f"
-        ).date()
+        nma_site_date = datetime.strptime(row.SiteDate, "%Y-%m-%d %H:%M:%S.%f").date()
 
     location = Location(
         nma_pk_location=row.LocationId,

From f021c4be309fa69dd94fd2762e249cbc97b2e64d Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 3 Dec 2025 00:55:20 -0800
Subject: [PATCH 41/66] Replace `legacy_` prefix with `nma_`

---
 db/location.py                                |   6 +-
 schemas/location.py                           |  26 +--
 ...st-migration-legacy-data-retrieval.feature |  70 ++++----
 .../steps/post_migration_legacy_data.py       | 164 +++++++++---------
 tests/test_location.py                        |  24 +--
 tests/test_transfer_legacy_dates.py           |  64 +++----
 transfers/util.py                             |  14 +-
 7 files changed, 184 insertions(+), 184 deletions(-)

diff --git a/db/location.py b/db/location.py
index a07958346..c00c11a70 100644
--- a/db/location.py
+++ b/db/location.py
@@ -62,13 +62,13 @@ class Location(Base, AutoBaseMixin, ReleaseMixin, NotesMixin, DataProvenanceMixi
     nma_notes_location: Mapped[str] = mapped_column(Text, nullable=True)
     nma_coordinate_notes: Mapped[str] = mapped_column(Text, nullable=True)
 
-    # --- Legacy AMPAPI Date Fields (Migration-Only, Read-Only Post-Migration) ---
-    legacy_date_created: Mapped[datetime.date] = mapped_column(
+    # --- AMPAPI Date Fields (Migration-Only, Read-Only Post-Migration) ---
+    nma_date_created: Mapped[datetime.date] = mapped_column(
         Date,
         nullable=True,
         comment="Original AMPAPI DateCreated (migration-only field)",
     )
-    legacy_site_date: Mapped[datetime.date] = mapped_column(
+    nma_site_date: Mapped[datetime.date] = mapped_column(
         Date, nullable=True, comment="Original AMPAPI SiteDate (migration-only field)"
     )
 
diff --git a/schemas/location.py b/schemas/location.py
index 1f4bad472..f34c54115 100644
--- a/schemas/location.py
+++ b/schemas/location.py
@@ -107,9 +107,9 @@ class GeoJSONProperties(BaseModel):
         default_factory=GeoJSONUTMCoordinates
     )
     notes: list[NoteResponse] = []
-    # Legacy AMPAPI date fields (migration-only, read-only)
-    legacy_date_created: date | None = None
-    legacy_site_date: date | None = None
+    # AMPAPI date fields (migration-only, read-only)
+    nma_date_created: date | None = None
+    nma_site_date: date | None = None
 
     model_config = ConfigDict(
         from_attributes=True,
@@ -154,11 +154,11 @@ def populate_fields(cls, data: Any) -> Any:
         data_dict["properties"]["notes"] = data_dict.get("notes")
         data_dict["properties"]["elevation"] = convert_m_to_ft(elevation_m)
         data_dict["properties"]["elevation_method"] = data_dict.get("elevation_method")
-        # populate legacy date fields
-        data_dict["properties"]["legacy_date_created"] = data_dict.get(
-            "legacy_date_created"
+        # populate AMPAPI date fields
+        data_dict["properties"]["nma_date_created"] = data_dict.get(
+            "nma_date_created"
         )
-        data_dict["properties"]["legacy_site_date"] = data_dict.get("legacy_site_date")
+        data_dict["properties"]["nma_site_date"] = data_dict.get("nma_site_date")
 
         # populate UTM coordinates
         point_utm_zone_13n_wkt = transform_srid(
@@ -190,9 +190,9 @@ class LocationResponse(BaseResponseModel):
     county: str | None
     quad_name: str | None
 
-    # Legacy AMPAPI date fields (migration-only, read-only post-migration)
-    legacy_date_created: date | None = None
-    legacy_site_date: date | None = None
+    # AMPAPI date fields (migration-only, read-only post-migration)
+    nma_date_created: date | None = None
+    nma_site_date: date | None = None
 
     @field_validator("point", mode="before")
     def point_to_wkt(cls, value):
@@ -232,9 +232,9 @@ class UpdateLocation(BaseUpdateModel, ValidateLocation):
     coordinate_accuracy: float | None = None
     coordinate_method: CoordinateMethod | None = None
 
-    # Legacy AMPAPI date fields (migration-only, can be updated but not created)
-    legacy_date_created: date | None = None
-    legacy_site_date: date | None = None
+    # AMPAPI date fields (migration-only, can be updated but not created)
+    nma_date_created: date | None = None
+    nma_site_date: date | None = None
 
 
 # ============= EOF =============================================
diff --git a/tests/features/post-migration-legacy-data-retrieval.feature b/tests/features/post-migration-legacy-data-retrieval.feature
index b5329ad9c..61f3e8c40 100644
--- a/tests/features/post-migration-legacy-data-retrieval.feature
+++ b/tests/features/post-migration-legacy-data-retrieval.feature
@@ -13,43 +13,43 @@ Feature: Post-Migration Legacy Data Retrieval
   Scenario: Retrieve location with both legacy dates via API
     Given a location exists with:
       | field                | value      |
-      | legacy_date_created  | 2014-04-03 |
-      | legacy_site_date     | 2002-12-10 |
+      | nma_date_created  | 2014-04-03 |
+      | nma_site_date     | 2002-12-10 |
     When I retrieve that location via the API
-    Then the response should include legacy_date_created as "2014-04-03"
-    And the response should include legacy_site_date as "2002-12-10"
+    Then the response should include nma_date_created as "2014-04-03"
+    And the response should include nma_site_date as "2002-12-10"
     And the time gap should be approximately 11.3 years
 
   Scenario: Retrieve location with large time gap (54 years)
     Given a location exists with:
       | field                | value      |
-      | legacy_date_created  | 2008-05-28 |
-      | legacy_site_date     | 1954-05-01 |
+      | nma_date_created  | 2008-05-28 |
+      | nma_site_date     | 1954-05-01 |
     When I retrieve that location via the API
-    Then the response should include legacy_date_created as "2008-05-28"
-    And the response should include legacy_site_date as "1954-05-01"
+    Then the response should include nma_date_created as "2008-05-28"
+    And the response should include nma_site_date as "1954-05-01"
     And the time gap should be approximately 54 years
 
   Scenario: List all locations includes legacy date fields
     Given 5 locations exist with various legacy dates
     When I GET /location to list all locations
-    Then each location should have a legacy_date_created field
-    And each location should have a legacy_site_date field
-    And some locations should have null legacy_site_date
+    Then each location should have a nma_date_created field
+    And each location should have a nma_site_date field
+    And some locations should have null nma_site_date
 
   Scenario: Filter locations by legacy site date range
-    Given locations exist with legacy_site_date ranging from 1950 to 2024
-    When I filter locations where legacy_site_date is between "2000-01-01" and "2010-12-31"
-    Then the response should only include locations with legacy_site_date in that decade
-    And locations with legacy_site_date before 2000 should not be included
-    And locations with legacy_site_date after 2010 should not be included
+    Given locations exist with nma_site_date ranging from 1950 to 2024
+    When I filter locations where nma_site_date is between "2000-01-01" and "2010-12-31"
+    Then the response should only include locations with nma_site_date in that decade
+    And locations with nma_site_date before 2000 should not be included
+    And locations with nma_site_date after 2010 should not be included
 
-  Scenario: Query location by legacy_date_created
-    Given 3 locations exist with legacy_date_created "2014-04-03"
-    And 2 locations exist with legacy_date_created "2017-12-06"
-    When I query for locations with legacy_date_created "2014-04-03"
+  Scenario: Query location by nma_date_created
+    Given 3 locations exist with nma_date_created "2014-04-03"
+    And 2 locations exist with nma_date_created "2017-12-06"
+    When I query for locations with nma_date_created "2014-04-03"
     Then the response should include exactly 3 locations
-    And all should have legacy_date_created "2014-04-03"
+    And all should have nma_date_created "2014-04-03"
 
   # Data Quality Validation
 
@@ -57,8 +57,8 @@ Feature: Post-Migration Legacy Data Retrieval
     Given 100 locations were migrated
     And 9 of them had non-null SiteDate in AMPAPI
     When I query the migrated locations
-    Then 9% should have non-null legacy_site_date
-    And 100% should have non-null legacy_date_created
+    Then 9% should have non-null nma_site_date
+    And 100% should have non-null nma_date_created
 
   # Audit Trail Verification
 
@@ -66,29 +66,29 @@ Feature: Post-Migration Legacy Data Retrieval
     Given a location was migrated with legacy dates
     When I retrieve that location
     Then it should have created_at (new system timestamp from migration)
-    And it should have legacy_date_created (original AMPAPI DateCreated)
-    And it should have legacy_site_date (original AMPAPI SiteDate)
+    And it should have nma_date_created (original AMPAPI DateCreated)
+    And it should have nma_site_date (original AMPAPI SiteDate)
     And all three timestamps should be independently queryable
     And created_at should be a recent timestamp
-    And legacy_date_created should be an older date
+    And nma_date_created should be an older date
 
   # Edge Cases
 
   Scenario: Location where SiteDate is later than DateCreated (data anomaly)
     Given a location exists with:
       | field                | value      |
-      | legacy_date_created  | 2010-01-15 |
-      | legacy_site_date     | 2015-06-20 |
+      | nma_date_created  | 2010-01-15 |
+      | nma_site_date     | 2015-06-20 |
     When I retrieve that location
-    Then legacy_date_created should be "2010-01-15"
-    And legacy_site_date should be "2015-06-20"
+    Then nma_date_created should be "2010-01-15"
+    And nma_site_date should be "2015-06-20"
     And the system should accept this without error
 
-  Scenario: Location with only legacy_date_created (no legacy_site_date)
+  Scenario: Location with only nma_date_created (no nma_site_date)
     Given a location exists with:
       | field                | value      |
-      | legacy_date_created  | 2014-10-17 |
-      | legacy_site_date     | null       |
+      | nma_date_created  | 2014-10-17 |
+      | nma_site_date     | null       |
     When I retrieve that location
-    Then legacy_date_created should be "2014-10-17"
-    And legacy_site_date should be null
+    Then nma_date_created should be "2014-10-17"
+    And nma_site_date should be null
diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py
index 25e932159..99ddd028e 100644
--- a/tests/features/steps/post_migration_legacy_data.py
+++ b/tests/features/steps/post_migration_legacy_data.py
@@ -31,15 +31,15 @@ def parse_number(text):
 register_type(Number=parse_number)
 
 
-def create_test_location(legacy_date_created=None, legacy_site_date=None):
+def create_test_location(nma_date_created=None, nma_site_date=None):
     """Helper to create a test location with legacy dates."""
     with session_ctx() as session:
         location = Location(
             point="POINT(-106.607784 35.118924)",
             elevation=1558.8,
             release_status="public",
-            legacy_date_created=legacy_date_created,
-            legacy_site_date=legacy_site_date,
+            nma_date_created=nma_date_created,
+            nma_site_date=nma_site_date,
         )
         session.add(location)
         session.commit()
@@ -58,19 +58,19 @@ def step_given_location_with_table(context: Context):
     """Create location with fields from table."""
     data = {row["field"]: row["value"] for row in context.table}
 
-    legacy_date_created = (
-        date.fromisoformat(data["legacy_date_created"])
-        if data.get("legacy_date_created") and data["legacy_date_created"] != "null"
+    nma_date_created = (
+        date.fromisoformat(data["nma_date_created"])
+        if data.get("nma_date_created") and data["nma_date_created"] != "null"
         else None
     )
-    legacy_site_date = (
-        date.fromisoformat(data["legacy_site_date"])
-        if data.get("legacy_site_date") and data["legacy_site_date"] != "null"
+    nma_site_date = (
+        date.fromisoformat(data["nma_site_date"])
+        if data.get("nma_site_date") and data["nma_site_date"] != "null"
         else None
     )
 
     location = create_test_location(
-        legacy_date_created=legacy_date_created, legacy_site_date=legacy_site_date
+        nma_date_created=nma_date_created, nma_site_date=nma_site_date
     )
 
     context.test_location = location
@@ -93,33 +93,33 @@ def step_given_multiple_locations(context: Context, count: int):
     for i in range(min(count, len(test_data))):
         legacy_date, site_date = test_data[i]
         location = create_test_location(
-            legacy_date_created=date.fromisoformat(legacy_date),
-            legacy_site_date=(date.fromisoformat(site_date) if site_date else None),
+            nma_date_created=date.fromisoformat(legacy_date),
+            nma_site_date=(date.fromisoformat(site_date) if site_date else None),
         )
         context.test_locations.append(location)
 
 
 @given(
-    "locations exist with legacy_site_date ranging from {start_year:Number} to {end_year:Number}"
+    "locations exist with nma_site_date ranging from {start_year:Number} to {end_year:Number}"
 )
 def step_given_locations_date_range(context: Context, start_year: int, end_year: int):
-    """Create locations with legacy_site_date across a date range."""
+    """Create locations with nma_site_date across a date range."""
     context.test_locations = []
 
     years = [1954, 2002, 2003, 2010, 2015, 2020, 2024]
     for year in years:
         location = create_test_location(
-            legacy_date_created=date(year + 5, 1, 1),  # Always 5 years after site date
-            legacy_site_date=date(year, 6, 15),
+            nma_date_created=date(year + 5, 1, 1),  # Always 5 years after site date
+            nma_site_date=date(year, 6, 15),
         )
         context.test_locations.append(location)
 
 
-@given('{count:Number} locations exist with legacy_date_created "{target_date}"')
+@given('{count:Number} locations exist with nma_date_created "{target_date}"')
 def step_given_locations_with_specific_date(
     context: Context, count: int, target_date: str
 ):
-    """Create locations with specific legacy_date_created."""
+    """Create locations with specific nma_date_created."""
     if not hasattr(context, "test_locations"):
         context.test_locations = []
 
@@ -127,8 +127,8 @@ def step_given_locations_with_specific_date(
 
     for i in range(count):
         location = create_test_location(
-            legacy_date_created=target,
-            legacy_site_date=date(2000 + i, 1, 1),  # Vary the site dates
+            nma_date_created=target,
+            nma_site_date=date(2000 + i, 1, 1),  # Vary the site dates
         )
         context.test_locations.append(location)
 
@@ -139,12 +139,12 @@ def step_given_count_locations_migrated(context: Context, count: int):
     context.test_locations = []
 
     for i in range(count):
-        # 9% have legacy_site_date
+        # 9% have nma_site_date
         has_site_date = i < count * 0.09
 
         location = create_test_location(
-            legacy_date_created=date(2014, 1, i % 28 + 1),
-            legacy_site_date=date(2003, 1, i % 28 + 1) if has_site_date else None,
+            nma_date_created=date(2014, 1, i % 28 + 1),
+            nma_site_date=date(2003, 1, i % 28 + 1) if has_site_date else None,
         )
         context.test_locations.append(location)
 
@@ -159,7 +159,7 @@ def step_given_sitedate_count(context: Context, count: int):
 def step_given_location_migrated_with_dates(context: Context):
     """Create location with both legacy dates."""
     location = create_test_location(
-        legacy_date_created=date(2014, 4, 3), legacy_site_date=date(2002, 12, 10)
+        nma_date_created=date(2014, 4, 3), nma_site_date=date(2002, 12, 10)
     )
     context.test_location = location
 
@@ -184,7 +184,7 @@ def step_when_get_all_locations(context: Context):
 
 
 @when(
-    'I filter locations where legacy_site_date is between "{start_date}" and "{end_date}"'
+    'I filter locations where nma_site_date is between "{start_date}" and "{end_date}"'
 )
 def step_when_filter_locations(context: Context, start_date: str, end_date: str):
     """Filter locations by date range."""
@@ -196,7 +196,7 @@ def step_when_filter_locations(context: Context, start_date: str, end_date: str)
         locations = (
             session.query(Location)
             .filter(
-                Location.legacy_site_date >= start, Location.legacy_site_date <= end
+                Location.nma_site_date >= start, Location.nma_site_date <= end
             )
             .all()
         )
@@ -204,13 +204,13 @@ def step_when_filter_locations(context: Context, start_date: str, end_date: str)
         context.filtered_locations = locations
 
 
-@when('I query for locations with legacy_date_created "{target_date}"')
+@when('I query for locations with nma_date_created "{target_date}"')
 def step_when_query_by_legacy_date(context: Context, target_date: str):
-    """Query locations by legacy_date_created."""
+    """Query locations by nma_date_created."""
     with session_ctx() as session:
         target = date.fromisoformat(target_date)
         locations = (
-            session.query(Location).filter(Location.legacy_date_created == target).all()
+            session.query(Location).filter(Location.nma_date_created == target).all()
         )
         context.queried_locations = locations
 
@@ -236,25 +236,25 @@ def step_when_retrieve_location(context: Context):
 # THEN steps
 
 
-@then('the response should include legacy_date_created as "{expected_date}"')
-def step_then_legacy_date_created(context: Context, expected_date: str):
-    """Assert legacy_date_created matches."""
-    actual = context.location_response.get("legacy_date_created")
+@then('the response should include nma_date_created as "{expected_date}"')
+def step_then_nma_date_created(context: Context, expected_date: str):
+    """Assert nma_date_created matches."""
+    actual = context.location_response.get("nma_date_created")
     assert actual == expected_date, f"Expected {expected_date}, got {actual}"
 
 
-@then('the response should include legacy_site_date as "{expected_date}"')
-def step_then_legacy_site_date(context: Context, expected_date: str):
-    """Assert legacy_site_date matches."""
-    actual = context.location_response.get("legacy_site_date")
+@then('the response should include nma_site_date as "{expected_date}"')
+def step_then_nma_site_date(context: Context, expected_date: str):
+    """Assert nma_site_date matches."""
+    actual = context.location_response.get("nma_site_date")
     assert actual == expected_date, f"Expected {expected_date}, got {actual}"
 
 
 @then("the time gap should be approximately {years} years")
 def step_then_time_gap_years(context: Context, years: str):
     """Assert approximate year gap."""
-    legacy_str = context.location_response.get("legacy_date_created")
-    site_date_str = context.location_response.get("legacy_site_date")
+    legacy_str = context.location_response.get("nma_date_created")
+    site_date_str = context.location_response.get("nma_site_date")
 
     if not legacy_str or not site_date_str:
         raise AssertionError("Missing date fields for gap calculation")
@@ -272,28 +272,28 @@ def step_then_time_gap_years(context: Context, years: str):
     ), f"Expected ~{expected_years} year gap, got {gap_years:.1f} years"
 
 
-@then("each location should have a legacy_date_created field")
+@then("each location should have a nma_date_created field")
 def step_then_all_have_legacy_field(context: Context):
     """Assert all locations have the field."""
     items = context.locations_response.get("items", [])
     for item in items:
-        assert "legacy_date_created" in item, f"Location missing legacy_date_created"
+        assert "nma_date_created" in item, f"Location missing nma_date_created"
 
 
-@then("each location should have a legacy_site_date field")
+@then("each location should have a nma_site_date field")
 def step_then_all_have_site_date_field(context: Context):
     """Assert all locations have the field."""
     items = context.locations_response.get("items", [])
     for item in items:
-        assert "legacy_site_date" in item, f"Location missing legacy_site_date"
+        assert "nma_site_date" in item, f"Location missing nma_site_date"
 
 
-@then("some locations should have null legacy_site_date")
+@then("some locations should have null nma_site_date")
 def step_then_some_null_site_date(context: Context):
     """Assert some locations have null."""
     items = context.locations_response.get("items", [])
-    null_count = sum(1 for item in items if item.get("legacy_site_date") is None)
-    assert null_count > 0, "Expected at least one location with null legacy_site_date"
+    null_count = sum(1 for item in items if item.get("nma_site_date") is None)
+    assert null_count > 0, "Expected at least one location with null nma_site_date"
 
 
 @then("the response should only include locations with site date in that decade")
@@ -301,8 +301,8 @@ def step_then_locations_in_decade(context: Context):
     """Assert filtered locations are in range."""
     for loc in context.filtered_locations:
         assert (
-            2000 <= loc.legacy_site_date.year <= 2010
-        ), f"Location not in 2000-2010: {loc.legacy_site_date}"
+            2000 <= loc.nma_site_date.year <= 2010
+        ), f"Location not in 2000-2010: {loc.nma_site_date}"
 
 
 @then("locations with site date before {year:Number} should not be included")
@@ -310,8 +310,8 @@ def step_then_locations_before_excluded(context: Context, year: int):
     """Assert no locations before year."""
     for loc in context.filtered_locations:
         assert (
-            loc.legacy_site_date.year >= year
-        ), f"Location from {loc.legacy_site_date.year} should not be included"
+            loc.nma_site_date.year >= year
+        ), f"Location from {loc.nma_site_date.year} should not be included"
 
 
 @then("locations with site date after {year:Number} should not be included")
@@ -319,8 +319,8 @@ def step_then_locations_after_excluded(context: Context, year: int):
     """Assert no locations after year."""
     for loc in context.filtered_locations:
         assert (
-            loc.legacy_site_date.year <= year
-        ), f"Location from {loc.legacy_site_date.year} should not be included"
+            loc.nma_site_date.year <= year
+        ), f"Location from {loc.nma_site_date.year} should not be included"
 
 
 @then("the response should include exactly {count:Number} locations")
@@ -330,21 +330,21 @@ def step_then_exact_count_locations(context: Context, count: int):
     assert actual == count, f"Expected {count} locations, got {actual}"
 
 
-@then('all should have legacy_date_created "{expected_date}"')
+@then('all should have nma_date_created "{expected_date}"')
 def step_then_all_have_date(context: Context, expected_date: str):
     """Assert all have same date."""
     expected = date.fromisoformat(expected_date)
     for loc in context.queried_locations:
         assert (
-            loc.legacy_date_created == expected
-        ), f"Location has {loc.legacy_date_created}, expected {expected}"
+            loc.nma_date_created == expected
+        ), f"Location has {loc.nma_date_created}, expected {expected}"
 
 
-@then("{percentage:Number}% should have non-null legacy_site_date")
+@then("{percentage:Number}% should have non-null nma_site_date")
 def step_then_percentage_site_date(context: Context, percentage: int):
-    """Assert percentage with legacy_site_date."""
+    """Assert percentage with nma_site_date."""
     total = len(context.queried_locations)
-    populated = sum(1 for loc in context.queried_locations if loc.legacy_site_date)
+    populated = sum(1 for loc in context.queried_locations if loc.nma_site_date)
     actual_pct = (populated / total) * 100
 
     tolerance = 2
@@ -353,11 +353,11 @@ def step_then_percentage_site_date(context: Context, percentage: int):
     ), f"Expected ~{percentage}%, got {actual_pct:.1f}%"
 
 
-@then("{percentage:Number}% should have non-null legacy_date_created")
+@then("{percentage:Number}% should have non-null nma_date_created")
 def step_then_percentage_legacy(context: Context, percentage: int):
-    """Assert percentage with legacy_date_created."""
+    """Assert percentage with nma_date_created."""
     total = len(context.queried_locations)
-    populated = sum(1 for loc in context.queried_locations if loc.legacy_date_created)
+    populated = sum(1 for loc in context.queried_locations if loc.nma_date_created)
     actual_pct = (populated / total) * 100
 
     tolerance = 2
@@ -372,24 +372,24 @@ def step_then_has_created_at(context: Context):
     assert context.retrieved_location.created_at is not None
 
 
-@then("it should have legacy_date_created (original AMPAPI DateCreated)")
+@then("it should have nma_date_created (original AMPAPI DateCreated)")
 def step_then_has_legacy_date(context: Context):
-    """Assert legacy_date_created exists."""
-    assert context.retrieved_location.legacy_date_created is not None
+    """Assert nma_date_created exists."""
+    assert context.retrieved_location.nma_date_created is not None
 
 
-@then("it should have legacy_site_date (original AMPAPI SiteDate)")
+@then("it should have nma_site_date (original AMPAPI SiteDate)")
 def step_then_has_site_date(context: Context):
-    """Assert legacy_site_date exists."""
-    assert context.retrieved_location.legacy_site_date is not None
+    """Assert nma_site_date exists."""
+    assert context.retrieved_location.nma_site_date is not None
 
 
 @then("all three timestamps should be independently queryable")
 def step_then_all_queryable(context: Context):
     """Assert all fields are queryable."""
     assert hasattr(context.retrieved_location, "created_at")
-    assert hasattr(context.retrieved_location, "legacy_date_created")
-    assert hasattr(context.retrieved_location, "legacy_site_date")
+    assert hasattr(context.retrieved_location, "nma_date_created")
+    assert hasattr(context.retrieved_location, "nma_site_date")
 
 
 @then("created_at should be a recent timestamp")
@@ -401,25 +401,25 @@ def step_then_created_at_recent(context: Context):
     assert diff_seconds < 3600, "created_at should be within last hour"
 
 
-@then("legacy_date_created should be an older date")
+@then("nma_date_created should be an older date")
 def step_then_legacy_date_older(context: Context):
-    """Assert legacy_date_created is old."""
-    legacy_date = context.retrieved_location.legacy_date_created
-    assert legacy_date.year < 2024, "legacy_date_created should be from the past"
+    """Assert nma_date_created is old."""
+    legacy_date = context.retrieved_location.nma_date_created
+    assert legacy_date.year < 2024, "nma_date_created should be from the past"
 
 
-@then('legacy_date_created should be "{expected_date}"')
+@then('nma_date_created should be "{expected_date}"')
 def step_then_legacy_is(context: Context, expected_date: str):
-    """Assert legacy_date_created value."""
-    actual = context.retrieved_location.legacy_date_created
+    """Assert nma_date_created value."""
+    actual = context.retrieved_location.nma_date_created
     expected = date.fromisoformat(expected_date)
     assert actual == expected, f"Expected {expected}, got {actual}"
 
 
-@then('legacy_site_date should be "{expected_date}"')
+@then('nma_site_date should be "{expected_date}"')
 def step_then_site_date_is(context: Context, expected_date: str):
-    """Assert legacy_site_date value."""
-    actual = context.retrieved_location.legacy_site_date
+    """Assert nma_site_date value."""
+    actual = context.retrieved_location.nma_site_date
     expected = date.fromisoformat(expected_date)
     assert actual == expected, f"Expected {expected}, got {actual}"
 
@@ -431,10 +431,10 @@ def step_then_no_error(context: Context):
     pass
 
 
-@then("legacy_site_date should be null")
+@then("nma_site_date should be null")
 def step_then_site_date_null(context: Context):
-    """Assert legacy_site_date is null."""
-    assert context.retrieved_location.legacy_site_date is None
+    """Assert nma_site_date is null."""
+    assert context.retrieved_location.nma_site_date is None
 
 
 @then("the well should still be valid")
diff --git a/tests/test_location.py b/tests/test_location.py
index b86211a58..67a4615c8 100644
--- a/tests/test_location.py
+++ b/tests/test_location.py
@@ -251,10 +251,10 @@ def test_new_location_has_null_legacy_fields():
     data = response.json()
     assert "id" in data
     # Legacy fields should be present in response but null (not set during creation)
-    assert "legacy_date_created" in data
-    assert "legacy_site_date" in data
-    assert data["legacy_date_created"] is None
-    assert data["legacy_site_date"] is None
+    assert "nma_date_created" in data
+    assert "nma_site_date" in data
+    assert data["nma_date_created"] is None
+    assert data["nma_site_date"] is None
 
     # cleanup after test
     cleanup_post_test(Location, data["id"])
@@ -278,10 +278,10 @@ def test_legacy_fields_present_in_location_response():
     data = get_response.json()
 
     # Verify fields exist in response (even if null)
-    assert "legacy_date_created" in data
-    assert "legacy_site_date" in data
-    assert data["legacy_date_created"] is None
-    assert data["legacy_site_date"] is None
+    assert "nma_date_created" in data
+    assert "nma_site_date" in data
+    assert data["nma_date_created"] is None
+    assert data["nma_site_date"] is None
 
     # cleanup after test
     cleanup_post_test(Location, location_id)
@@ -303,12 +303,12 @@ def test_legacy_fields_independent_of_created_at():
     assert "created_at" in data
     assert data["created_at"] is not None
 
-    # legacy_date_created is separate and null for new records
-    assert "legacy_date_created" in data
-    assert data["legacy_date_created"] is None
+    # nma_date_created is separate and null for new records
+    assert "nma_date_created" in data
+    assert data["nma_date_created"] is None
 
     # These are independent fields with different purposes
-    assert "created_at" != "legacy_date_created"
+    assert "created_at" != "nma_date_created"
 
     # cleanup after test
     cleanup_post_test(Location, data["id"])
diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py
index 795820ec8..5068d8882 100644
--- a/tests/test_transfer_legacy_dates.py
+++ b/tests/test_transfer_legacy_dates.py
@@ -17,8 +17,8 @@
 Unit tests for legacy date field population during AMPAPI → NMSampleLocations migration.
 
 These tests verify that:
-1. Location.legacy_date_created is populated from CSV DateCreated
-2. Location.legacy_site_date is populated from CSV SiteDate (if not null)
+1. Location.nma_date_created is populated from CSV DateCreated
+2. Location.nma_site_date is populated from CSV SiteDate (if not null)
 """
 import datetime
 from unittest.mock import Mock, patch, MagicMock
@@ -35,7 +35,7 @@
 
 @patch("transfers.util.lexicon_mapper")
 def test_make_location_with_both_legacy_dates(mock_lexicon_mapper):
-    """Test that make_location populates both legacy_date_created and legacy_site_date"""
+    """Test that make_location populates both nma_date_created and nma_site_date"""
     # Mock lexicon mapper to avoid GCS calls
     mock_lexicon_mapper.map_value.return_value = "GPS"
 
@@ -63,13 +63,13 @@ def test_make_location_with_both_legacy_dates(mock_lexicon_mapper):
     # Call make_location
     location, elevation_method = make_location(row, elevations)
 
-    # Verify legacy_date_created is set from DateCreated
-    assert location.legacy_date_created is not None
-    assert location.legacy_date_created == datetime.date(2014, 4, 3)
+    # Verify nma_date_created is set from DateCreated
+    assert location.nma_date_created is not None
+    assert location.nma_date_created == datetime.date(2014, 4, 3)
 
-    # Verify legacy_site_date is set from SiteDate
-    assert location.legacy_site_date is not None
-    assert location.legacy_site_date == datetime.date(2002, 12, 10)
+    # Verify nma_site_date is set from SiteDate
+    assert location.nma_site_date is not None
+    assert location.nma_site_date == datetime.date(2002, 12, 10)
 
     # Verify created_at is NOT set during migration (it's auto-set by AutoBaseMixin on save)
     assert location.created_at is None
@@ -102,11 +102,11 @@ def test_make_location_with_only_date_created(mock_lexicon_mapper):
     elevations = {}
     location, elevation_method = make_location(row, elevations)
 
-    # Verify legacy_date_created is set
-    assert location.legacy_date_created == datetime.date(2014, 4, 3)
+    # Verify nma_date_created is set
+    assert location.nma_date_created == datetime.date(2014, 4, 3)
 
-    # Verify legacy_site_date is null (91% of locations don't have SiteDate)
-    assert location.legacy_site_date is None
+    # Verify nma_site_date is null (91% of locations don't have SiteDate)
+    assert location.nma_site_date is None
 
 
 @patch("transfers.util.lexicon_mapper")
@@ -137,8 +137,8 @@ def test_make_location_with_site_date_later_than_date_created(mock_lexicon_mappe
     location, elevation_method = make_location(row, elevations)
 
     # Both dates should be preserved as-is, regardless of order
-    assert location.legacy_date_created == datetime.date(2010, 1, 15)
-    assert location.legacy_site_date == datetime.date(2015, 6, 20)
+    assert location.nma_date_created == datetime.date(2010, 1, 15)
+    assert location.nma_site_date == datetime.date(2015, 6, 20)
 
 
 @patch("transfers.util.lexicon_mapper")
@@ -169,11 +169,11 @@ def test_make_location_with_very_old_site_date(mock_lexicon_mapper):
     location, elevation_method = make_location(row, elevations)
 
     # Verify very old date is preserved
-    assert location.legacy_site_date == datetime.date(1954, 5, 1)
-    assert location.legacy_date_created == datetime.date(2008, 5, 28)
+    assert location.nma_site_date == datetime.date(1954, 5, 1)
+    assert location.nma_date_created == datetime.date(2008, 5, 28)
 
     # Verify 54-year time gap
-    time_gap = (location.legacy_date_created - location.legacy_site_date).days
+    time_gap = (location.nma_date_created - location.nma_site_date).days
     assert time_gap == 19751  # Approximately 54 years
 
 
@@ -205,15 +205,15 @@ def test_make_location_legacy_dates_are_date_not_datetime(mock_lexicon_mapper):
     location, elevation_method = make_location(row, elevations)
 
     # Verify they are date objects (not datetime)
-    assert isinstance(location.legacy_date_created, datetime.date)
-    assert not isinstance(location.legacy_date_created, datetime.datetime)
+    assert isinstance(location.nma_date_created, datetime.date)
+    assert not isinstance(location.nma_date_created, datetime.datetime)
 
-    assert isinstance(location.legacy_site_date, datetime.date)
-    assert not isinstance(location.legacy_site_date, datetime.datetime)
+    assert isinstance(location.nma_site_date, datetime.date)
+    assert not isinstance(location.nma_site_date, datetime.datetime)
 
     # Verify time component is stripped
-    assert location.legacy_date_created == datetime.date(2014, 4, 3)
-    assert location.legacy_site_date == datetime.date(2002, 12, 10)
+    assert location.nma_date_created == datetime.date(2014, 4, 3)
+    assert location.nma_site_date == datetime.date(2002, 12, 10)
 
 
 @patch("transfers.util.lexicon_mapper")
@@ -247,12 +247,12 @@ def test_make_location_legacy_dates_independent_of_created_at(mock_lexicon_mappe
     assert location.created_at is None
 
     # legacy fields should be Date (no timezone)
-    assert isinstance(location.legacy_date_created, datetime.date)
-    assert isinstance(location.legacy_site_date, datetime.date)
+    assert isinstance(location.nma_date_created, datetime.date)
+    assert isinstance(location.nma_site_date, datetime.date)
 
     # Legacy fields should be populated
-    assert location.legacy_date_created is not None
-    assert location.legacy_site_date is not None
+    assert location.nma_date_created is not None
+    assert location.nma_site_date is not None
 
 
 # ============================================================================
@@ -312,15 +312,15 @@ def test_location_legacy_date_coverage_statistics(mock_lexicon_mapper):
         location, _ = make_location(row, elevations)
 
         # Count coverage
-        if location.legacy_date_created is not None:
+        if location.nma_date_created is not None:
             locations_created += 1
 
-        if location.legacy_site_date is not None:
+        if location.nma_site_date is not None:
             locations_with_site_date += 1
 
     # Verify expected coverage
-    assert locations_created == 100  # 100% should have legacy_date_created
-    assert locations_with_site_date == 9  # 9% should have legacy_site_date
+    assert locations_created == 100  # 100% should have nma_date_created
+    assert locations_with_site_date == 9  # 9% should have nma_site_date
 
 
 # ============================================================================
diff --git a/transfers/util.py b/transfers/util.py
index d39845f44..c8d054a0a 100644
--- a/transfers/util.py
+++ b/transfers/util.py
@@ -244,16 +244,16 @@ def make_location(row: pd.Series, elevations: dict) -> tuple:
             f"LU_AltitudeMethod:{row.AltitudeMethod.strip()}"
         )
 
-    # Extract legacy date fields (Date type, not DateTime)
-    legacy_date_created = None
+    # Extract AMPAPI date fields (Date type, not DateTime)
+    nma_date_created = None
     if row.DateCreated:
-        legacy_date_created = datetime.strptime(
+        nma_date_created = datetime.strptime(
             row.DateCreated, "%Y-%m-%d %H:%M:%S.%f"
         ).date()
 
-    legacy_site_date = None
+    nma_site_date = None
     if row.SiteDate:
-        legacy_site_date = datetime.strptime(
+        nma_site_date = datetime.strptime(
             row.SiteDate, "%Y-%m-%d %H:%M:%S.%f"
         ).date()
 
@@ -264,8 +264,8 @@ def make_location(row: pd.Series, elevations: dict) -> tuple:
         release_status="public" if row.PublicRelease else "private",
         nma_coordinate_notes=row.CoordinateNotes,
         nma_notes_location=row.LocationNotes,
-        legacy_date_created=legacy_date_created,
-        legacy_site_date=legacy_site_date,
+        nma_date_created=nma_date_created,
+        nma_site_date=nma_site_date,
     )
 
     return location, elevation_method

From 2e33f83842886b903f4a9c6481f656e9b5424af5 Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 3 Dec 2025 01:02:08 -0800
Subject: [PATCH 42/66] Remove legacy fields from `UpdateLocation` schema

---
 db/location.py                                |  4 ++--
 schemas/location.py                           |  8 ++------
 ...st-migration-legacy-data-retrieval.feature | 20 +++++++++----------
 .../steps/post_migration_legacy_data.py       |  2 +-
 tests/test_location.py                        | 14 ++++++-------
 tests/test_transfer_legacy_dates.py           |  8 ++++----
 6 files changed, 26 insertions(+), 30 deletions(-)

diff --git a/db/location.py b/db/location.py
index c00c11a70..cef3d0857 100644
--- a/db/location.py
+++ b/db/location.py
@@ -66,10 +66,10 @@ class Location(Base, AutoBaseMixin, ReleaseMixin, NotesMixin, DataProvenanceMixi
     nma_date_created: Mapped[datetime.date] = mapped_column(
         Date,
         nullable=True,
-        comment="Original AMPAPI DateCreated (migration-only field)",
+        comment="Original AMPAPI DateCreated (read-only, populated only during migration)",
     )
     nma_site_date: Mapped[datetime.date] = mapped_column(
-        Date, nullable=True, comment="Original AMPAPI SiteDate (migration-only field)"
+        Date, nullable=True, comment="Original AMPAPI SiteDate (read-only, populated only during migration)"
     )
 
     # --- Relationship Definitions ---
diff --git a/schemas/location.py b/schemas/location.py
index f34c54115..fce13ef99 100644
--- a/schemas/location.py
+++ b/schemas/location.py
@@ -107,7 +107,7 @@ class GeoJSONProperties(BaseModel):
         default_factory=GeoJSONUTMCoordinates
     )
     notes: list[NoteResponse] = []
-    # AMPAPI date fields (migration-only, read-only)
+    # AMPAPI date fields (read-only, populated only during migration)
     nma_date_created: date | None = None
     nma_site_date: date | None = None
 
@@ -190,7 +190,7 @@ class LocationResponse(BaseResponseModel):
     county: str | None
     quad_name: str | None
 
-    # AMPAPI date fields (migration-only, read-only post-migration)
+    # AMPAPI date fields (read-only, populated only during migration, not in Create/Update schemas)
     nma_date_created: date | None = None
     nma_site_date: date | None = None
 
@@ -232,9 +232,5 @@ class UpdateLocation(BaseUpdateModel, ValidateLocation):
     coordinate_accuracy: float | None = None
     coordinate_method: CoordinateMethod | None = None
 
-    # AMPAPI date fields (migration-only, can be updated but not created)
-    nma_date_created: date | None = None
-    nma_site_date: date | None = None
-
 
 # ============= EOF =============================================
diff --git a/tests/features/post-migration-legacy-data-retrieval.feature b/tests/features/post-migration-legacy-data-retrieval.feature
index 61f3e8c40..99fd08190 100644
--- a/tests/features/post-migration-legacy-data-retrieval.feature
+++ b/tests/features/post-migration-legacy-data-retrieval.feature
@@ -1,16 +1,16 @@
-Feature: Post-Migration Legacy Data Retrieval
+Feature: Post-Migration AMPAPI Date Field Retrieval
   As a data manager
   After migrating data from AMPAPI to NMSampleLocations
-  I want to verify that all legacy temporal information is preserved and queryable
+  I want to verify that all AMPAPI temporal information is preserved and queryable
   So that no historical context is lost
 
   Background:
     Given a functioning api
     And the AMPAPI data has been migrated to the database
 
-  # Location Legacy Date Lookups
+  # Location AMPAPI Date Lookups (Read-Only Fields)
 
-  Scenario: Retrieve location with both legacy dates via API
+  Scenario: Retrieve location with both AMPAPI date fields via API
     Given a location exists with:
       | field                | value      |
       | nma_date_created  | 2014-04-03 |
@@ -30,14 +30,14 @@ Feature: Post-Migration Legacy Data Retrieval
     And the response should include nma_site_date as "1954-05-01"
     And the time gap should be approximately 54 years
 
-  Scenario: List all locations includes legacy date fields
-    Given 5 locations exist with various legacy dates
+  Scenario: List all locations includes AMPAPI date fields
+    Given 5 locations exist with various AMPAPI dates
     When I GET /location to list all locations
     Then each location should have a nma_date_created field
     And each location should have a nma_site_date field
     And some locations should have null nma_site_date
 
-  Scenario: Filter locations by legacy site date range
+  Scenario: Filter locations by AMPAPI site date range
     Given locations exist with nma_site_date ranging from 1950 to 2024
     When I filter locations where nma_site_date is between "2000-01-01" and "2010-12-31"
     Then the response should only include locations with nma_site_date in that decade
@@ -53,7 +53,7 @@ Feature: Post-Migration Legacy Data Retrieval
 
   # Data Quality Validation
 
-  Scenario: Verify migration preserved expected percentage of legacy dates
+  Scenario: Verify migration preserved expected percentage of AMPAPI dates
     Given 100 locations were migrated
     And 9 of them had non-null SiteDate in AMPAPI
     When I query the migrated locations
@@ -62,8 +62,8 @@ Feature: Post-Migration Legacy Data Retrieval
 
   # Audit Trail Verification
 
-  Scenario: Legacy dates preserved alongside audit timestamps
-    Given a location was migrated with legacy dates
+  Scenario: AMPAPI dates preserved alongside audit timestamps
+    Given a location was migrated with AMPAPI dates
     When I retrieve that location
     Then it should have created_at (new system timestamp from migration)
     And it should have nma_date_created (original AMPAPI DateCreated)
diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py
index 99ddd028e..d568c0296 100644
--- a/tests/features/steps/post_migration_legacy_data.py
+++ b/tests/features/steps/post_migration_legacy_data.py
@@ -32,7 +32,7 @@ def parse_number(text):
 
 
 def create_test_location(nma_date_created=None, nma_site_date=None):
-    """Helper to create a test location with legacy dates."""
+    """Helper to create a test location with AMPAPI date fields (read-only post-migration)."""
     with session_ctx() as session:
         location = Location(
             point="POINT(-106.607784 35.118924)",
diff --git a/tests/test_location.py b/tests/test_location.py
index 67a4615c8..6e143f1eb 100644
--- a/tests/test_location.py
+++ b/tests/test_location.py
@@ -250,7 +250,7 @@ def test_new_location_has_null_legacy_fields():
     assert response.status_code == 201
     data = response.json()
     assert "id" in data
-    # Legacy fields should be present in response but null (not set during creation)
+    # AMPAPI date fields should be present in response but null (not set during creation, read-only)
     assert "nma_date_created" in data
     assert "nma_site_date" in data
     assert data["nma_date_created"] is None
@@ -261,8 +261,8 @@ def test_new_location_has_null_legacy_fields():
 
 
 def test_legacy_fields_present_in_location_response():
-    """Test that legacy fields are included in location GET response"""
-    # Create a new location (without legacy fields)
+    """Test that AMPAPI date fields (read-only) are included in location GET response"""
+    # Create a new location (without AMPAPI date fields set - they're read-only)
     payload = {
         "point": "POINT (-106.607784 35.118924)",
         "elevation": 1558.8,
@@ -272,12 +272,12 @@ def test_legacy_fields_present_in_location_response():
     assert create_response.status_code == 201
     location_id = create_response.json()["id"]
 
-    # Retrieve the location and verify legacy fields are in the schema
+    # Retrieve the location and verify AMPAPI date fields are in the schema
     get_response = client.get(f"/location/{location_id}")
     assert get_response.status_code == 200
     data = get_response.json()
 
-    # Verify fields exist in response (even if null)
+    # Verify read-only fields exist in response (even if null)
     assert "nma_date_created" in data
     assert "nma_site_date" in data
     assert data["nma_date_created"] is None
@@ -288,7 +288,7 @@ def test_legacy_fields_present_in_location_response():
 
 
 def test_legacy_fields_independent_of_created_at():
-    """Test that created_at (system timestamp) is separate from legacy fields"""
+    """Test that created_at (system timestamp) is separate from AMPAPI date fields (read-only)"""
     payload = {
         "point": "POINT (-106.607784 35.118924)",
         "elevation": 1558.8,
@@ -303,7 +303,7 @@ def test_legacy_fields_independent_of_created_at():
     assert "created_at" in data
     assert data["created_at"] is not None
 
-    # nma_date_created is separate and null for new records
+    # nma_date_created is separate and null for new records (read-only, populated only during migration)
     assert "nma_date_created" in data
     assert data["nma_date_created"] is None
 
diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py
index 5068d8882..c4e06755f 100644
--- a/tests/test_transfer_legacy_dates.py
+++ b/tests/test_transfer_legacy_dates.py
@@ -14,11 +14,11 @@
 # limitations under the License.
 # ===============================================================================
 """
-Unit tests for legacy date field population during AMPAPI → NMSampleLocations migration.
+Unit tests for AMPAPI date field population during AMPAPI → NMSampleLocations migration.
 
 These tests verify that:
-1. Location.nma_date_created is populated from CSV DateCreated
-2. Location.nma_site_date is populated from CSV SiteDate (if not null)
+1. Location.nma_date_created is populated from CSV DateCreated (read-only post-migration)
+2. Location.nma_site_date is populated from CSV SiteDate if not null (read-only post-migration)
 """
 import datetime
 from unittest.mock import Mock, patch, MagicMock
@@ -29,7 +29,7 @@
 
 
 # ============================================================================
-# LOCATION LEGACY DATE TESTS
+# LOCATION AMPAPI DATE TESTS (Read-Only Post-Migration)
 # ============================================================================
 
 
From aef077b0f8e45ccbdcb8e7247128c053f329ff8f Mon Sep 17 00:00:00 2001
From: kbighorse <kbighorse@users.noreply.github.com>
Date: Wed, 3 Dec 2025 09:02:10 +0000
Subject: [PATCH 43/66] Formatting changes

---
 db/location.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/db/location.py b/db/location.py
index cef3d0857..fda4611f9 100644
--- a/db/location.py
+++ b/db/location.py
@@ -69,7 +69,9 @@ class Location(Base, AutoBaseMixin, ReleaseMixin, NotesMixin, DataProvenanceMixi
         comment="Original AMPAPI DateCreated (read-only, populated only during migration)",
     )
     nma_site_date: Mapped[datetime.date] = mapped_column(
-        Date, nullable=True, comment="Original AMPAPI SiteDate (read-only, populated only during migration)"
+        Date,
+        nullable=True,
+        comment="Original AMPAPI SiteDate (read-only, populated only during migration)",
     )
 
     # --- Relationship Definitions ---

From 6258e7de97d367e4b1c2814113457f902219b85a Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 3 Dec 2025 01:06:08 -0800
Subject: [PATCH 44/66] DRY up the mock lexicon mapper into a fixture

---
 tests/test_transfer_legacy_dates.py | 46 ++++++++++-------------------
 1 file changed, 16 insertions(+), 30 deletions(-)

diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py
index c4e06755f..badaec8b2 100644
--- a/tests/test_transfer_legacy_dates.py
+++ b/tests/test_transfer_legacy_dates.py
@@ -28,16 +28,26 @@
 from transfers.util import make_location
 
 
+# ============================================================================
+# FIXTURES
+# ============================================================================
+
+
+@pytest.fixture
+def mock_lexicon_mapper():
+    """Fixture to mock lexicon_mapper for all transfer tests"""
+    with patch("transfers.util.lexicon_mapper") as mock:
+        mock.map_value.return_value = "GPS"
+        yield mock
+
+
 # ============================================================================
 # LOCATION AMPAPI DATE TESTS (Read-Only Post-Migration)
 # ============================================================================
 
 
-@patch("transfers.util.lexicon_mapper")
 def test_make_location_with_both_legacy_dates(mock_lexicon_mapper):
     """Test that make_location populates both nma_date_created and nma_site_date"""
-    # Mock lexicon mapper to avoid GCS calls
-    mock_lexicon_mapper.map_value.return_value = "GPS"
 
     # Create a mock CSV row with both DateCreated and SiteDate
     row = pd.Series(
@@ -75,12 +85,8 @@ def test_make_location_with_both_legacy_dates(mock_lexicon_mapper):
     assert location.created_at is None
 
 
-@patch("transfers.util.lexicon_mapper")
 def test_make_location_with_only_date_created(mock_lexicon_mapper):
     """Test that make_location handles locations with only DateCreated (no SiteDate)"""
-    # Mock lexicon mapper to avoid GCS calls
-    mock_lexicon_mapper.map_value.return_value = "GPS"
-
     row = pd.Series(
         {
             "PointID": "TEST-002",
@@ -109,12 +115,8 @@ def test_make_location_with_only_date_created(mock_lexicon_mapper):
     assert location.nma_site_date is None
 
 
-@patch("transfers.util.lexicon_mapper")
 def test_make_location_with_site_date_later_than_date_created(mock_lexicon_mapper):
     """Test data anomaly: SiteDate is later than DateCreated (should still be accepted)"""
-    # Mock lexicon mapper to avoid GCS calls
-    mock_lexicon_mapper.map_value.return_value = "GPS"
-
     row = pd.Series(
         {
             "PointID": "TEST-003",
@@ -141,12 +143,8 @@ def test_make_location_with_site_date_later_than_date_created(mock_lexicon_mappe
     assert location.nma_site_date == datetime.date(2015, 6, 20)
 
 
-@patch("transfers.util.lexicon_mapper")
 def test_make_location_with_very_old_site_date(mock_lexicon_mapper):
     """Test that very old SiteDates (1950s) are preserved correctly"""
-    # Mock lexicon mapper to avoid GCS calls
-    mock_lexicon_mapper.map_value.return_value = "GPS"
-
     row = pd.Series(
         {
             "PointID": "SM-0227",  # Real example from dataset
@@ -177,12 +175,8 @@ def test_make_location_with_very_old_site_date(mock_lexicon_mapper):
     assert time_gap == 19751  # Approximately 54 years
 
 
-@patch("transfers.util.lexicon_mapper")
 def test_make_location_legacy_dates_are_date_not_datetime(mock_lexicon_mapper):
-    """Test that legacy date fields are Date type (not DateTime)"""
-    # Mock lexicon mapper to avoid GCS calls
-    mock_lexicon_mapper.map_value.return_value = "GPS"
-
+    """Test that AMPAPI date fields are Date type (not DateTime)"""
     row = pd.Series(
         {
             "PointID": "TEST-004",
@@ -216,12 +210,8 @@ def test_make_location_legacy_dates_are_date_not_datetime(mock_lexicon_mapper):
     assert location.nma_site_date == datetime.date(2002, 12, 10)
 
 
-@patch("transfers.util.lexicon_mapper")
 def test_make_location_legacy_dates_independent_of_created_at(mock_lexicon_mapper):
-    """Test that legacy dates don't affect created_at timestamp"""
-    # Mock lexicon mapper to avoid GCS calls
-    mock_lexicon_mapper.map_value.return_value = "GPS"
-
+    """Test that AMPAPI dates don't affect created_at timestamp"""
     row = pd.Series(
         {
             "PointID": "TEST-005",
@@ -260,12 +250,8 @@ def test_make_location_legacy_dates_independent_of_created_at(mock_lexicon_mappe
 # ============================================================================
 
 
-@patch("transfers.util.lexicon_mapper")
 def test_location_legacy_date_coverage_statistics(mock_lexicon_mapper):
-    """Test that migration preserves expected percentages of legacy dates"""
-    # Mock lexicon mapper to avoid GCS calls
-    mock_lexicon_mapper.map_value.return_value = "GPS"
-
+    """Test that migration preserves expected percentages of AMPAPI dates"""
     # Simulate 100 location records from CSV
     locations_created = 0
     locations_with_site_date = 0

From fd4562a785a2f5bfaba7ddf535bc7d53acce3161 Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 3 Dec 2025 01:12:35 -0800
Subject: [PATCH 45/66] Replace legacy python timestamp call with current
 implementation

---
 tests/features/steps/post_migration_legacy_data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py
index bd7425269..0327d6f0f 100644
--- a/tests/features/steps/post_migration_legacy_data.py
+++ b/tests/features/steps/post_migration_legacy_data.py
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ===============================================================================
-from datetime import date, datetime
+from datetime import date, datetime, timezone
 from behave import given, when, then, register_type
 from behave.runner import Context
 import parse
@@ -394,7 +394,7 @@ def step_then_all_queryable(context: Context):
 def step_then_created_at_recent(context: Context):
     """Assert created_at is recent."""
     created_at = context.retrieved_location.created_at.replace(tzinfo=None)
-    now = datetime.utcnow()
+    now = datetime.now(timezone.utc).replace(tzinfo=None)
     diff_seconds = abs((now - created_at).total_seconds())
     assert diff_seconds < 3600, "created_at should be within last hour"
 

From 5b1a07dd10eb672441a166e7241af82ea29b77d6 Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 3 Dec 2025 01:20:16 -0800
Subject: [PATCH 46/66] Preserve timezone in comparison

---
 tests/features/steps/post_migration_legacy_data.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py
index 0327d6f0f..6e504734e 100644
--- a/tests/features/steps/post_migration_legacy_data.py
+++ b/tests/features/steps/post_migration_legacy_data.py
@@ -393,8 +393,13 @@ def step_then_all_queryable(context: Context):
 @then("created_at should be a recent timestamp")
 def step_then_created_at_recent(context: Context):
     """Assert created_at is recent."""
-    created_at = context.retrieved_location.created_at.replace(tzinfo=None)
-    now = datetime.now(timezone.utc).replace(tzinfo=None)
+    created_at = context.retrieved_location.created_at
+    now = datetime.now(timezone.utc)
+
+    # Ensure both datetimes are timezone-aware for accurate comparison
+    if created_at.tzinfo is None:
+        created_at = created_at.replace(tzinfo=timezone.utc)
+
     diff_seconds = abs((now - created_at).total_seconds())
     assert diff_seconds < 3600, "created_at should be within last hour"
 

From b92a9864a38bff51a03a4d0c8500ef81b9161f2e Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 3 Dec 2025 01:23:16 -0800
Subject: [PATCH 47/66] Make features more human-readable

---
 .../post-migration-legacy-data-retrieval.feature   | 12 ++++++------
 tests/features/steps/post_migration_legacy_data.py | 14 +++++++-------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/tests/features/post-migration-legacy-data-retrieval.feature b/tests/features/post-migration-legacy-data-retrieval.feature
index 99fd08190..13b2b347d 100644
--- a/tests/features/post-migration-legacy-data-retrieval.feature
+++ b/tests/features/post-migration-legacy-data-retrieval.feature
@@ -33,16 +33,16 @@ Feature: Post-Migration AMPAPI Date Field Retrieval
   Scenario: List all locations includes AMPAPI date fields
     Given 5 locations exist with various AMPAPI dates
     When I GET /location to list all locations
-    Then each location should have a nma_date_created field
-    And each location should have a nma_site_date field
-    And some locations should have null nma_site_date
+    Then each location should have a date created field
+    And each location should have a site date field
+    And some locations should have null site date
 
   Scenario: Filter locations by AMPAPI site date range
     Given locations exist with nma_site_date ranging from 1950 to 2024
     When I filter locations where nma_site_date is between "2000-01-01" and "2010-12-31"
-    Then the response should only include locations with nma_site_date in that decade
-    And locations with nma_site_date before 2000 should not be included
-    And locations with nma_site_date after 2010 should not be included
+    Then the response should only include locations with site date in that decade
+    And locations with site date before 2000 should not be included
+    And locations with site date after 2010 should not be included
 
   Scenario: Query location by nma_date_created
     Given 3 locations exist with nma_date_created "2014-04-03"
diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py
index 6e504734e..bf6e8b443 100644
--- a/tests/features/steps/post_migration_legacy_data.py
+++ b/tests/features/steps/post_migration_legacy_data.py
@@ -270,28 +270,28 @@ def step_then_time_gap_years(context: Context, years: str):
     ), f"Expected ~{expected_years} year gap, got {gap_years:.1f} years"
 
 
-@then("each location should have a nma_date_created field")
+@then("each location should have a date created field")
 def step_then_all_have_legacy_field(context: Context):
-    """Assert all locations have the field."""
+    """Assert all locations have the date created field."""
     items = context.locations_response.get("items", [])
     for item in items:
         assert "nma_date_created" in item, f"Location missing nma_date_created"
 
 
-@then("each location should have a nma_site_date field")
+@then("each location should have a site date field")
 def step_then_all_have_site_date_field(context: Context):
-    """Assert all locations have the field."""
+    """Assert all locations have the site date field."""
     items = context.locations_response.get("items", [])
     for item in items:
         assert "nma_site_date" in item, f"Location missing nma_site_date"
 
 
-@then("some locations should have null nma_site_date")
+@then("some locations should have null site date")
 def step_then_some_null_site_date(context: Context):
-    """Assert some locations have null."""
+    """Assert some locations have null site date."""
     items = context.locations_response.get("items", [])
     null_count = sum(1 for item in items if item.get("nma_site_date") is None)
-    assert null_count > 0, "Expected at least one location with null nma_site_date"
+    assert null_count > 0, "Expected at least one location with null site date"
 
 
 @then("the response should only include locations with site date in that decade")

From 94addc7315ee69b35703a1068c8d37f96d328121 Mon Sep 17 00:00:00 2001
From: kbighorse <kbighorse@users.noreply.github.com>
Date: Wed, 3 Dec 2025 09:26:00 +0000
Subject: [PATCH 48/66] Formatting changes

---
 tests/test_transfer_legacy_dates.py | 32 +++++++++++++++--------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py
index c298b129e..e2b4ca0f2 100644
--- a/tests/test_transfer_legacy_dates.py
+++ b/tests/test_transfer_legacy_dates.py
@@ -255,21 +255,23 @@ def test_location_legacy_date_coverage_statistics(mock_lexicon_mapper):
 
     def create_test_row(i, has_site_date):
         """Helper to create test row with common fields"""
-        return pd.Series({
-            "PointID": f"TEST-{i:03d}",
-            "Easting": 350000 + i,
-            "Northing": 3880000 + i,
-            "DateCreated": "2014-04-03 00:00:00.000",
-            "SiteDate": "2002-12-10 00:00:00.000" if has_site_date else None,
-            "Altitude": 1558.8,
-            "AltDatum": "NAVD88",
-            "AltitudeMethod": "GPS",
-            "LocationId": i,
-            "PublicRelease": True,
-            "CoordinateNotes": None,
-            "LocationNotes": None,
-            "AltitudeAccuracy": None,
-        })
+        return pd.Series(
+            {
+                "PointID": f"TEST-{i:03d}",
+                "Easting": 350000 + i,
+                "Northing": 3880000 + i,
+                "DateCreated": "2014-04-03 00:00:00.000",
+                "SiteDate": "2002-12-10 00:00:00.000" if has_site_date else None,
+                "Altitude": 1558.8,
+                "AltDatum": "NAVD88",
+                "AltitudeMethod": "GPS",
+                "LocationId": i,
+                "PublicRelease": True,
+                "CoordinateNotes": None,
+                "LocationNotes": None,
+                "AltitudeAccuracy": None,
+            }
+        )
 
     # Simulate 100 location records from CSV (9% with SiteDate, 91% without)
     locations_created = 0

From 0b4d77d181052170de06ea251dc6881f2a797f0f Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 3 Dec 2025 01:26:07 -0800
Subject: [PATCH 49/66] Simulate CSV rows more effiiently

---
 tests/test_transfer_legacy_dates.py | 62 ++++++++++-------------------
 1 file changed, 22 insertions(+), 40 deletions(-)

diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py
index badaec8b2..c298b129e 100644
--- a/tests/test_transfer_legacy_dates.py
+++ b/tests/test_transfer_legacy_dates.py
@@ -252,55 +252,37 @@ def test_make_location_legacy_dates_independent_of_created_at(mock_lexicon_mappe
 
 def test_location_legacy_date_coverage_statistics(mock_lexicon_mapper):
     """Test that migration preserves expected percentages of AMPAPI dates"""
-    # Simulate 100 location records from CSV
+
+    def create_test_row(i, has_site_date):
+        """Helper to create test row with common fields"""
+        return pd.Series({
+            "PointID": f"TEST-{i:03d}",
+            "Easting": 350000 + i,
+            "Northing": 3880000 + i,
+            "DateCreated": "2014-04-03 00:00:00.000",
+            "SiteDate": "2002-12-10 00:00:00.000" if has_site_date else None,
+            "Altitude": 1558.8,
+            "AltDatum": "NAVD88",
+            "AltitudeMethod": "GPS",
+            "LocationId": i,
+            "PublicRelease": True,
+            "CoordinateNotes": None,
+            "LocationNotes": None,
+            "AltitudeAccuracy": None,
+        })
+
+    # Simulate 100 location records from CSV (9% with SiteDate, 91% without)
     locations_created = 0
     locations_with_site_date = 0
+    elevations = {}
 
     for i in range(100):
-        if i < 9:  # 9% have SiteDate
-            row = pd.Series(
-                {
-                    "PointID": f"TEST-{i:03d}",
-                    "Easting": 350000 + i,
-                    "Northing": 3880000 + i,
-                    "DateCreated": "2014-04-03 00:00:00.000",
-                    "SiteDate": "2002-12-10 00:00:00.000",
-                    "Altitude": 1558.8,
-                    "AltDatum": "NAVD88",
-                    "AltitudeMethod": "GPS",
-                    "LocationId": i,
-                    "PublicRelease": True,
-                    "CoordinateNotes": None,
-                    "LocationNotes": None,
-                    "AltitudeAccuracy": None,
-                }
-            )
-        else:  # 91% don't have SiteDate
-            row = pd.Series(
-                {
-                    "PointID": f"TEST-{i:03d}",
-                    "Easting": 350000 + i,
-                    "Northing": 3880000 + i,
-                    "DateCreated": "2014-04-03 00:00:00.000",
-                    "SiteDate": None,
-                    "Altitude": 1558.8,
-                    "AltDatum": "NAVD88",
-                    "AltitudeMethod": "GPS",
-                    "LocationId": i,
-                    "PublicRelease": True,
-                    "CoordinateNotes": None,
-                    "LocationNotes": None,
-                    "AltitudeAccuracy": None,
-                }
-            )
-
-        elevations = {}
+        row = create_test_row(i, has_site_date=(i < 9))
         location, _ = make_location(row, elevations)
 
         # Count coverage
         if location.nma_date_created is not None:
             locations_created += 1
-
         if location.nma_site_date is not None:
             locations_with_site_date += 1
 

From 2d12844f305091758277b45a49976170397c06e3 Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 3 Dec 2025 01:39:05 -0800
Subject: [PATCH 50/66] Replace `legacy_` in method names

---
 .../steps/post_migration_legacy_data.py       | 26 +++++++++----------
 tests/test_location.py                        | 10 +++----
 tests/test_transfer_legacy_dates.py           |  8 +++---
 3 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py
index bf6e8b443..5850bf04e 100644
--- a/tests/features/steps/post_migration_legacy_data.py
+++ b/tests/features/steps/post_migration_legacy_data.py
@@ -91,9 +91,9 @@ def step_given_multiple_locations(context: Context, count: int):
     ]
 
     for i in range(min(count, len(test_data))):
-        legacy_date, site_date = test_data[i]
+        created_date, site_date = test_data[i]
         location = create_test_location(
-            nma_date_created=date.fromisoformat(legacy_date),
+            nma_date_created=date.fromisoformat(created_date),
             nma_site_date=(date.fromisoformat(site_date) if site_date else None),
         )
         context.test_locations.append(location)
@@ -203,7 +203,7 @@ def step_when_filter_locations(context: Context, start_date: str, end_date: str)
 
 
 @when('I query for locations with nma_date_created "{target_date}"')
-def step_when_query_by_legacy_date(context: Context, target_date: str):
+def step_when_query_by_ampapi_date(context: Context, target_date: str):
     """Query locations by nma_date_created."""
     with session_ctx() as session:
         target = date.fromisoformat(target_date)
@@ -251,16 +251,16 @@ def step_then_nma_site_date(context: Context, expected_date: str):
 @then("the time gap should be approximately {years} years")
 def step_then_time_gap_years(context: Context, years: str):
     """Assert approximate year gap."""
-    legacy_str = context.location_response.get("nma_date_created")
+    created_str = context.location_response.get("nma_date_created")
     site_date_str = context.location_response.get("nma_site_date")
 
-    if not legacy_str or not site_date_str:
+    if not created_str or not site_date_str:
         raise AssertionError("Missing date fields for gap calculation")
 
-    legacy_date = date.fromisoformat(legacy_str)
+    created_date = date.fromisoformat(created_str)
     site_date = date.fromisoformat(site_date_str)
 
-    gap_days = (legacy_date - site_date).days
+    gap_days = (created_date - site_date).days
     gap_years = gap_days / 365.25
 
     expected_years = float(years)
@@ -271,7 +271,7 @@ def step_then_time_gap_years(context: Context, years: str):
 
 
 @then("each location should have a date created field")
-def step_then_all_have_legacy_field(context: Context):
+def step_then_all_have_date_created_field(context: Context):
     """Assert all locations have the date created field."""
     items = context.locations_response.get("items", [])
     for item in items:
@@ -371,7 +371,7 @@ def step_then_has_created_at(context: Context):
 
 
 @then("it should have nma_date_created (original AMPAPI DateCreated)")
-def step_then_has_legacy_date(context: Context):
+def step_then_has_ampapi_date_created(context: Context):
     """Assert nma_date_created exists."""
     assert context.retrieved_location.nma_date_created is not None
 
@@ -405,14 +405,14 @@ def step_then_created_at_recent(context: Context):
 
 
 @then("nma_date_created should be an older date")
-def step_then_legacy_date_older(context: Context):
+def step_then_ampapi_date_older(context: Context):
     """Assert nma_date_created is old."""
-    legacy_date = context.retrieved_location.nma_date_created
-    assert legacy_date.year < 2024, "nma_date_created should be from the past"
+    ampapi_created_date = context.retrieved_location.nma_date_created
+    assert ampapi_created_date.year < 2024, "nma_date_created should be from the past"
 
 
 @then('nma_date_created should be "{expected_date}"')
-def step_then_legacy_is(context: Context, expected_date: str):
+def step_then_ampapi_created_is(context: Context, expected_date: str):
     """Assert nma_date_created value."""
     actual = context.retrieved_location.nma_date_created
     expected = date.fromisoformat(expected_date)
diff --git a/tests/test_location.py b/tests/test_location.py
index 6e143f1eb..9dcb3d098 100644
--- a/tests/test_location.py
+++ b/tests/test_location.py
@@ -235,11 +235,11 @@ def test_delete_location_404_not_found(second_location):
     assert data["detail"] == f"Location with ID {bad_location_id} not found."
 
 
-#  ============= Legacy date field tests =======================================
+#  ============= AMPAPI date field tests =======================================
 
 
-def test_new_location_has_null_legacy_fields():
-    """Test that newly created locations have null legacy date fields (legacy fields are migration-only)"""
+def test_new_location_has_null_ampapi_fields():
+    """Test that newly created locations have null AMPAPI date fields (AMPAPI fields are migration-only)"""
     payload = {
         "point": "POINT (-106.607784 35.118924)",
         "elevation": 1558.8,
@@ -260,7 +260,7 @@ def test_new_location_has_null_legacy_fields():
     cleanup_post_test(Location, data["id"])
 
 
-def test_legacy_fields_present_in_location_response():
+def test_ampapi_fields_present_in_location_response():
     """Test that AMPAPI date fields (read-only) are included in location GET response"""
     # Create a new location (without AMPAPI date fields set - they're read-only)
     payload = {
@@ -287,7 +287,7 @@ def test_legacy_fields_present_in_location_response():
     cleanup_post_test(Location, location_id)
 
 
-def test_legacy_fields_independent_of_created_at():
+def test_ampapi_fields_independent_of_created_at():
     """Test that created_at (system timestamp) is separate from AMPAPI date fields (read-only)"""
     payload = {
         "point": "POINT (-106.607784 35.118924)",
diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py
index c298b129e..d700ab470 100644
--- a/tests/test_transfer_legacy_dates.py
+++ b/tests/test_transfer_legacy_dates.py
@@ -46,7 +46,7 @@ def mock_lexicon_mapper():
 # ============================================================================
 
 
-def test_make_location_with_both_legacy_dates(mock_lexicon_mapper):
+def test_make_location_with_both_ampapi_dates(mock_lexicon_mapper):
     """Test that make_location populates both nma_date_created and nma_site_date"""
 
     # Create a mock CSV row with both DateCreated and SiteDate
@@ -175,7 +175,7 @@ def test_make_location_with_very_old_site_date(mock_lexicon_mapper):
     assert time_gap == 19751  # Approximately 54 years
 
 
-def test_make_location_legacy_dates_are_date_not_datetime(mock_lexicon_mapper):
+def test_make_location_ampapi_dates_are_date_not_datetime(mock_lexicon_mapper):
     """Test that AMPAPI date fields are Date type (not DateTime)"""
     row = pd.Series(
         {
@@ -210,7 +210,7 @@ def test_make_location_legacy_dates_are_date_not_datetime(mock_lexicon_mapper):
     assert location.nma_site_date == datetime.date(2002, 12, 10)
 
 
-def test_make_location_legacy_dates_independent_of_created_at(mock_lexicon_mapper):
+def test_make_location_ampapi_dates_independent_of_created_at(mock_lexicon_mapper):
     """Test that AMPAPI dates don't affect created_at timestamp"""
     row = pd.Series(
         {
@@ -250,7 +250,7 @@ def test_make_location_legacy_dates_independent_of_created_at(mock_lexicon_mappe
 # ============================================================================
 
 
-def test_location_legacy_date_coverage_statistics(mock_lexicon_mapper):
+def test_location_ampapi_date_coverage_statistics(mock_lexicon_mapper):
     """Test that migration preserves expected percentages of AMPAPI dates"""
 
     def create_test_row(i, has_site_date):

From 8c96e72d21fedcc7b939eb63b1e18effb1ac7eda Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 3 Dec 2025 01:46:39 -0800
Subject: [PATCH 51/66] Increase code test coverage

---
 .coverage                           | Bin 0 -> 53248 bytes
 tests/test_transfer_legacy_dates.py |  56 ++++
 transfers/util.py,cover             | 461 ++++++++++++++++++++++++++++
 3 files changed, 517 insertions(+)
 create mode 100644 .coverage
 create mode 100644 transfers/util.py,cover

diff --git a/.coverage b/.coverage
new file mode 100644
index 0000000000000000000000000000000000000000..5417251745baf9f63193ac63e03d4d4b0edab5c4
GIT binary patch
literal 53248
zcmeI)&u$Y(90%}S+t^vh)&|kYiXxQhftrXU#_geMdw@`)3Y8WpX)l#9V|$!z@$Q=4
zH6(|sHB>@E>Kjyj1o{X(L!W>iIrY{{p;CXpon8M4HsVkeNWUxF**`P0GoSfq$JSiG
zcG-!P==gyvBk_T;V3?+HUI@c5EP9sdnQjHz$)yMM*BslQwQCt058o-<-x#I*&xU=Y
ze8pZb{Zf8!{`Zn=J)HlwAgvZ%zy<*bKmY>&Z-K$JQlYZ6WZwHelFhD)0@+f5el36c
z)z;?rme}6BaCuAUbK-PS(6+uVHbvmy6+IP*j?-1b@!C#HMvk{5qFt4S?uV+)6CItS
zsg46~#PNBr;j}4Mq;@EYUf{Sg*cZ3d{#qO$xm%?6B7K4qQO=IXA;jq<`l<+2M+M4j
zsZi(Qw9{TKuHOHlSg4#hVLniCnPkwS$NWTT=)|ZfmpYVC%MaR|`(_}$)~*WIgdCQz
z<x@WQB3;_RcWaR^x{enY(+i!*@japTRI4AU_S6EdF<QD2-gs#!t(#n+n>uE4uQ8ob
z<S8+e;7Q~TWzO?c)=@;hHARl|?DgFyk$=~Bdt5mbL*F>^XT_WqH}aFhzH;46smoD$
zC#o|g=Ni?q`kr&UuZESsCPp#W;$nFIn&3iE3ph2OuY7dEG^kaoz3_II-fuNzKl0<_
z233BePA{dOj?Wb;D=X%M+i^o-o<`G&!r_iJ(P)OJXEYtYKD*s;FPUs4+>5$h;dkOL
z!?A^~9BR$ueWsRT$bdVV@RNX)(?e}-oLS0)Xm<VPWLt|{OsJ@X&{&WL>-4=Aca@OK
z(DLu7K<=oG<f<MP4!a>y)2Une%GS!PrZa2;jrwr6I9SRTD$C2}Ju7ZTNo^&63KLBz
zt;s3FXnM(2GVBNGK=QIdGL11BCDT5VT%qEu7xI<O<ypl^!Zqs2&fK7oD^wO2&4KO_
zJTCF`*hC@t%%}?-m<V1y`#l-$iWl?FrzzvrWBJM_i?cG$S!mSxHGluxOq}HhOR>I7
zgFZjA6Itdnqbz@}_Sf&oE{!HV=>*heX$;mgfZ%{_Nr7e4??-X_<-@pn^4v4ZuTEfe
zoZ3^U)i^kH@2h?oIy9~Fv=x4wo)y(y8S*F;Rz=hIyGnX`ZlD;KLb|6DiZ~B1&PwM-
zX@vWn#?GB;oTqeh;bDH8I$heoXvcM>*G}duo`!X*H1>1n<ooDBgd&P6b;^*s=z%oT
zM3bvLVx{SC^vHx$b!A(3cdnGN6z}e;7n0Mn(M1`l$Z?h6AaQ2mhILnlq7^8*NOS3G
zHT$w=z_rz=^DBQ!p5wQ2gB7z-IdjGw^pno0$C*Y*le#O@Y$u1AiAHkZ;;6k`p#hiP
z?Toh*wj1N%LwyGx+;j~#wL%&yd9PDk<q!C#{mh^THV8lf0uX=z1Rwwb2tWV=5P$##
zj-Ei)%$NoK{GYM^Fzi3+1sen)009U<00Izz00bZa0SG_<0&k^2DU&^Er@!zxyI^LQ
zmy+KAe7I3RyYYUJN|mvn8unBBueY**$TS2X009U<00Izz00bZa0SG_<0z-jP_N1Bq
z8z586E?dce0rc<x9~t(eVNmQq00Izz00bZa0SG_<0uX=z1R!vz0$0tPF*sHGI;52m
zYPTG>DZ5=-kfp6^w0?oS#0wY&wC<7beHQrrUhT^-u1mMqRhNBU4T%;zsnJqKUP#Ln
zg|&X<bZNZ={{G*ze>3QT4FV8=00bZa0SG_<0uX=z1Rwx`H$&iRZg9%_FF*fd{l7P(
zVv!37KmY;|fB*y_009U<00Izzz-to7npvZ)zyE)3*w0_nU63pUAOHafKmY;|fB*y_
z009U<00M_9kj<7Y{r&%A!+w0IS;5UA009U<00Izz00bZa0SG_<0uY!jaLzntSp&NM
z_=)xR%Dc3)aMAeDSjZX1oI!^d+Bb~zOHcl(7RJ*MqIX;$=k)jg&kXz7cmV7}00Izz
z00bZa0SG_<0uX=z1R!uk1adjciogFi7LG`XPz(q_00Izz00bZa0SG_<0uX=z1f~k`
z_y73(Ka~b2ApijgKmY;|fB*y_009U<00Kuw0PFuBnW{uVAOHafKmY;|fB*y_009U<
M00Pql;`RUk1r+UXZU6uP

literal 0
HcmV?d00001

diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py
index 5129b7d9c..985214fbb 100644
--- a/tests/test_transfer_legacy_dates.py
+++ b/tests/test_transfer_legacy_dates.py
@@ -250,6 +250,62 @@ def test_make_location_ampapi_dates_independent_of_created_at(mock_lexicon_mappe
 # ============================================================================
 
 
+def test_make_location_with_no_ampapi_dates(mock_lexicon_mapper):
+    """Test that make_location handles locations with no AMPAPI dates (both null)"""
+    row = pd.Series(
+        {
+            "PointID": "TEST-NODATES",
+            "Easting": 350000,
+            "Northing": 3880000,
+            "DateCreated": None,  # No DateCreated
+            "SiteDate": None,  # No SiteDate
+            "Altitude": 1558.8,
+            "AltDatum": "NAVD88",
+            "AltitudeMethod": "GPS",
+            "LocationId": 999,
+            "PublicRelease": True,
+            "CoordinateNotes": None,
+            "LocationNotes": None,
+            "AltitudeAccuracy": None,
+        }
+    )
+
+    elevations = {}
+    location, elevation_method = make_location(row, elevations)
+
+    # Both AMPAPI date fields should be null
+    assert location.nma_date_created is None
+    assert location.nma_site_date is None
+
+
+def test_make_location_with_empty_string_dates(mock_lexicon_mapper):
+    """Test that make_location handles empty string dates (CSV edge case)"""
+    row = pd.Series(
+        {
+            "PointID": "TEST-EMPTY",
+            "Easting": 350000,
+            "Northing": 3880000,
+            "DateCreated": "",  # Empty string
+            "SiteDate": "",  # Empty string
+            "Altitude": 1558.8,
+            "AltDatum": "NAVD88",
+            "AltitudeMethod": "GPS",
+            "LocationId": 998,
+            "PublicRelease": True,
+            "CoordinateNotes": None,
+            "LocationNotes": None,
+            "AltitudeAccuracy": None,
+        }
+    )
+
+    elevations = {}
+    location, elevation_method = make_location(row, elevations)
+
+    # Both AMPAPI date fields should be null (empty strings are falsy)
+    assert location.nma_date_created is None
+    assert location.nma_site_date is None
+
+
 def test_location_ampapi_date_coverage_statistics(mock_lexicon_mapper):
     """Test that migration preserves expected percentages of AMPAPI dates"""
 
diff --git a/transfers/util.py,cover b/transfers/util.py,cover
new file mode 100644
index 000000000..5c2803392
--- /dev/null
+++ b/transfers/util.py,cover
@@ -0,0 +1,461 @@
+  # ===============================================================================
+  # Copyright 2025 ross
+  #
+  # Licensed under the Apache License, Version 2.0 (the "License");
+  # you may not use this file except in compliance with the License.
+  # You may obtain a copy of the License at
+  #
+  # http://www.apache.org/licenses/LICENSE-2.0
+  #
+  # Unless required by applicable law or agreed to in writing, software
+  # distributed under the License is distributed on an "AS IS" BASIS,
+  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  # See the License for the specific language governing permissions and
+  # limitations under the License.
+  # ===============================================================================
+> import csv
+> import io
+> import os
+> import re
+> from datetime import datetime, timezone, timedelta
+> from pathlib import Path
+  
+> import numpy as np
+> import pandas as pd
+> import pytz
+> from shapely import Point
+> from sqlalchemy import select
+> from sqlalchemy.orm import Session
+  
+> from constants import SRID_WGS84, SRID_UTM_ZONE_13N
+> from db import Thing, Location, DataProvenance
+> from services.gcs_helper import get_storage_bucket
+  
+  # from services.lexicon_mapper import lexicon_mapper
+> from services.util import (
+>     transform_srid,
+>     get_epqs_elevation_from_point,
+>     convert_ft_to_m,
+>     convert_ngvd29_to_navd88,
+> )
+> from transfers.logger import logger
+  
+> NMA_COORDINATE_ACCURACY = {
+>     "5m": (5, "m"),
+>     "1": (0.1, "second"),
+>     "5": (0.5, "second"),
+>     "F": (5, "second"),
+>     "H": (0.01, "second"),
+>     "M": (1, "minute"),
+>     "R": (3, "second"),
+>     "S": (1, "second"),
+>     "T": (10, "second"),
+> }
+  
+  
+> def replace_nans(df: pd.DataFrame, default=None) -> pd.DataFrame:
+!     df = df.replace(pd.NA, default)
+!     return df.replace({np.nan: default})
+  
+  
+> def read_csv(name: str, dtype: dict | None = None) -> pd.DataFrame:
+      # Try to read from local data directory first
+!     local_file = Path(__file__).parent / "data" / f"{name}.csv"
+  
+!     if local_file.exists():
+!         logger.info(f"Reading {name} from local file: {local_file}")
+!         if dtype:
+!             return pd.read_csv(local_file, dtype=dtype)
+!         else:
+!             return pd.read_csv(local_file)
+  
+      # Check cache directory
+!     p = get_transfers_data_path(Path("nma_csv_cache") / f"{name}.csv")
+!     if os.path.exists(p):
+!         logger.info(f"Reading {name} from cache: {p}")
+!         return pd.read_csv(p, dtype=dtype)
+  
+      # Fall back to GCS if local file doesn't exist
+!     logger.info(f"Local file and cache not found, reading {name} from GCS")
+!     bucket = get_storage_bucket()
+!     blob = bucket.blob(f"nma_csv/{name}.csv")
+!     data = blob.download_as_bytes()
+!     with open(p, "wb") as f:
+!         f.write(data)
+  
+!     if dtype:
+!         return pd.read_csv(io.BytesIO(data), dtype=dtype)
+!     else:
+!         return pd.read_csv(io.BytesIO(data))
+  
+  
+> def get_valid_point_ids(session, thing_type="water well"):
+!     things = get_valid_things(session, thing_type)
+!     valid_pointids = [thing.name for thing in things]
+!     return valid_pointids
+  
+  
+> def get_valid_things(session, thing_type="water well"):
+!     return session.query(Thing).where(Thing.thing_type == thing_type).all()
+  
+  
+> def extract_organization(alternate_id: str) -> str:
+!     if alternate_id.startswith("TWDB"):
+!         return "TWDB"
+!     elif alternate_id.startswith("NMED"):
+!         return "NMED"
+  
+      # TODO: There are a bunch of other formats used for AlternateSiteID.
+      # we should try to handle as many as possible but its not the end of the world
+      # if we have to update the organization for a particular alternate id at a later time
+!     for regex, org in ((r"^A-Z{1,2}-\d{5,6}$", "NMOSE"), (r"\d+(\.\d+){3,}", "PLSS")):
+  
+!         if re.match(regex, alternate_id):
+!             return org
+  
+!     return "Unknown"
+  
+  
+> def get_transfers_data_path(name):
+!     def data_path(r):
+!         return Path(r) / "transfers" / "data"
+  
+!     root = data_path("/workspace")
+!     if not os.path.exists(root):
+!         root = data_path("..")
+!         if not os.path.exists(root):
+!             root = data_path(".")
+  
+!     return root / name
+  
+  
+> def filter_non_transferred_wells(sess: Session, df: pd.DataFrame) -> pd.DataFrame:
+!     sql = select(Thing.name).where(Thing.thing_type == "water well")
+!     existing_ids = sess.execute(sql).scalars().all()
+!     return df[~(df["PointID"].isin(existing_ids))]
+  
+  
+> def filter_by_welldata_datasource_and_project(df: pd.DataFrame) -> pd.DataFrame:
+!     path = get_transfers_data_path("valid_welldata_datasources.csv")
+!     with open(path, "r") as f:
+!         reader = csv.reader(f)
+!         _ = next(reader)
+!         valid_datasources = [row[0] for row in reader if row[1] == "Yes"]
+!         f.seek(0)
+!         invalid_datasources = [row[0] for row in reader if row[1] == "NO"]
+!         logger.info("Invalid WellData Datasources:")
+!         for vd in invalid_datasources:
+!             logger.info(f"  {vd}")
+  
+!     counts = df.groupby("DataSource").size().reset_index(name="WellCount")
+!     counts = counts.sort_values("WellCount", ascending=False)
+!     for count in counts.itertuples():
+!         logger.info(f"{count.DataSource}: {count.WellCount}")
+  
+!     pldf = read_csv("ProjectLocations")
+!     collabnet = pldf[pldf["ProjectName"] == "Water Level Network"]
+!     return df[
+!         df["DataSource"].isin(valid_datasources)
+!         | df["PointID"].isin(collabnet["PointID"])
+!     ]
+  
+  
+> def filter_by_valid_measuring_agency(df: pd.DataFrame) -> pd.DataFrame:
+!     path = get_transfers_data_path("valid_measuring_agency.csv")
+  
+!     with open(path, "r") as f:
+!         reader = csv.reader(f)
+!         _ = next(reader)
+!         valid_measuring_agencies = [row[0] for row in reader if row[1] == "Yes"]
+!         logger.info("Valid Measuring Agencies:")
+!         for vma in valid_measuring_agencies:
+!             logger.info(f"  {vma}")
+!     return df[df["MeasuringAgency"].isin(valid_measuring_agencies)]
+  
+  
+> def filter_to_valid_point_ids(session: Session, df: pd.DataFrame) -> pd.DataFrame:
+!     valid_point_ids = get_valid_point_ids(session)
+!     return df[df["PointID"].isin(valid_point_ids)]
+  
+  
+> def convert_mt_to_utc(dt_record: datetime):
+!     t = dt_record.time()
+!     if t.hour == 0 and t.minute == 0:
+          # no time was measured, so just set the timezone to UTC and keep
+          # time at 00:00
+!         dt_record = dt_record.replace(tzinfo=timezone.utc)
+!     else:
+!         tz = pytz.timezone("America/Denver")
+!         dt_record = tz.localize(dt_record)
+!         if dt_record.dst() == timedelta(0):
+              # MST
+!             utc_offset = 7
+!         else:
+              # MDT
+!             utc_offset = 6
+!         dt_record = dt_record - timedelta(hours=utc_offset)
+!         dt_record = dt_record.replace(tzinfo=timezone.utc)
+!     return dt_record
+  
+  
+> def chunk_by_size(df, chunk_size):
+!     for i in range(0, len(df), chunk_size):
+!         yield df.iloc[i : i + chunk_size]
+  
+  
+> def make_location(row: pd.Series, elevations: dict) -> tuple:
+>     """
+>     Returns a tuple of location data and the elevation method
+>     """
+>     point = Point(row.Easting, row.Northing)
+  
+      # Convert the point to a WGS84 coordinate system
+>     transformed_point = transform_srid(
+>         point, source_srid=SRID_UTM_ZONE_13N, target_srid=SRID_WGS84
+>     )
+  
+>     z = row.Altitude
+>     if z:
+>         elevation_from_epqs = False
+>         z = convert_ft_to_m(z)
+  
+>         if row.AltDatum == "NGVD29":
+!             key = f"{row.PointID}, {transformed_point.x, transformed_point.y}"
+!             if key in elevations:
+!                 z = elevations[key]
+!             else:
+!                 z = convert_ngvd29_to_navd88(
+!                     z, transformed_point.x, transformed_point.y
+!                 )
+!             elevations[key] = z
+!     else:
+!         elevation_from_epqs = True
+!         logger.info(
+!             f"Location {row.PointID} has no Altitude. Setting from National Map EPQS for "
+!         )
+!         z = get_epqs_elevation_from_point(transformed_point.x, transformed_point.y)
+  
+>     if elevation_from_epqs:
+!         elevation_method = "USGS National Elevation Dataset (NED)"
+>     elif pd.isna(row.AltitudeMethod):
+!         elevation_method = None
+>     else:
+>         elevation_method = lexicon_mapper.map_value(
+>             f"LU_AltitudeMethod:{row.AltitudeMethod.strip()}"
+>         )
+  
+      # Extract AMPAPI date fields (Date type, not DateTime)
+>     nma_date_created = None
+>     if row.DateCreated:
+>         nma_date_created = datetime.strptime(
+>             row.DateCreated, "%Y-%m-%d %H:%M:%S.%f"
+>         ).date()
+  
+>     nma_site_date = None
+>     if row.SiteDate:
+>         nma_site_date = datetime.strptime(row.SiteDate, "%Y-%m-%d %H:%M:%S.%f").date()
+  
+>     location = Location(
+>         nma_pk_location=row.LocationId,
+>         point=transformed_point.wkt,
+>         elevation=z,
+>         release_status="public" if row.PublicRelease else "private",
+>         nma_coordinate_notes=row.CoordinateNotes,
+>         nma_notes_location=row.LocationNotes,
+>         nma_date_created=nma_date_created,
+>         nma_site_date=nma_site_date,
+>     )
+  
+>     return location, elevation_method
+  
+  
+> def make_location_data_provenance(
+>     row: pd.Series, location: Location, elevation_method: str | None
+> ) -> list[DataProvenance]:
+!     provenance_records = []
+  
+!     if row.AltitudeAccuracy or row.CoordinateAccuracy:
+!         provenance = DataProvenance(
+!             target_id=location.id,
+!             target_table="location",
+!             field_name="elevation",
+!             origin_source=None,
+!             collection_method=elevation_method,
+!             accuracy_value=(
+!                 None
+!                 if pd.isna(row.AltitudeAccuracy)
+!                 else convert_ft_to_m(row.AltitudeAccuracy)
+!             ),
+!             accuracy_unit="m",
+!         )
+!         provenance_records.append(provenance)
+  
+      # TODO: AMP feedback is required for transfering coordinate accuracy values
+      #       from NM_Aquifer to Ocotillo
+      # if row.CoordinateAccuracy == "U" or pd.isna(row.CoordinateAccuracy):
+      #     # map "Unknown" to None
+      #     row.CoordinateAccuracy = None
+      # elif row.CoordinateAccuracy == "5m":
+      #     row.CoordinateAccuracy = 5.0
+      # else:
+      #     seconds = 0
+      #     minutes = 0
+      #     if row.CoordinateAccuracy == "1":
+      #         seconds = 0.1
+      #     elif row.CoordinateAccuracy == "5":
+      #         seconds = 0.5
+      #     elif row.CoordinateAccuracy == "F":
+      #         seconds = 5
+      #     elif row.CoordinateAccuracy == "H":
+      #         seconds = 0.01
+      #     elif row.CoordinateAccuracy == "M":
+      #         minutes = 1
+      #     elif row.CoordinateAccuracy == "R":
+      #         seconds = 3
+      #     elif row.CoordinateAccuracy == "S":
+      #         seconds = 1
+      #     else:
+      #         seconds = 10
+      #     coordinate_accuracy_decimal_deg = minutes/60 + seconds / 3600
+  
+      #     """
+      #     Developer's notes
+  
+      #     To convert accuracy from decimal degrees to meters we do the following:
+  
+      #     1. Add the coordinate accuracy to both the latitude and longitude to
+      #         find the "+" distance from the location
+      #     2. Convert "+" accuracy coordinates from decimal degrees to UTM Zone 13
+      #         N
+      #     3. Find the distance in meters from the original Easting/Northing and
+      #         define this as the "+" accuracy in meters
+      #     4. Subtract the coordinate accuracy to both the latitude and longitude
+      #         to find the "-" distance from the location
+      #     5. Convert the "-" accuracy coordinates from decimal degrees to UTM Zone
+      #         13 N
+      #     6. Find the distance in meters from the original Easting/Northing and
+      #         define this as the "-" accuracy in meters
+      #     7. Set the coordinate accuracy in meters as the mean of the "+" and "-"
+      #         distances from the location
+      #     """
+      #     original_longitude = transformed_point.x
+      #     original_latitude = transformed_point.y
+  
+      #     plus_longitude = original_longitude + coordinate_accuracy_decimal_deg
+      #     plus_latitude = original_latitude + coordinate_accuracy_decimal_deg
+      #     plus_point_decimal_deg = Point(plus_longitude, plus_latitude)
+      #     plus_point_utm_zone_13_n = transform_srid(
+      #         plus_point_decimal_deg,
+      #         SRID_WGS84,
+      #         SRID_UTM_ZONE_13N)
+  
+      #     minus_longitude = original_longitude - coordinate_accuracy_decimal_deg
+      #     minus_latitude = original_latitude - coordinate_accuracy_decimal_deg
+      #     minus_point_decimal_deg = Point(minus_longitude, minus_latitude)
+  
+!     if row.CoordinateMethod or row.CoordinateAccuracy:
+!         coordinate_method = (
+!             lexicon_mapper.map_value(f"LU_CoordinateMethod:{row.CoordinateMethod}")
+!             if not pd.isna(row.CoordinateMethod)
+!             else None
+!         )
+  
+!         accuracy_value, accuracy_unit = NMA_COORDINATE_ACCURACY.get(
+!             row.CoordinateAccuracy, (None, None)
+!         )
+  
+!         provenance = DataProvenance(
+!             target_id=location.id,
+!             target_table="location",
+!             field_name="point",
+!             origin_source=None,
+!             collection_method=coordinate_method,
+!             accuracy_value=accuracy_value,
+!             accuracy_unit=accuracy_unit,
+!         )
+!         provenance_records.append(provenance)
+  
+!     return provenance_records
+  
+  
+> def timeit_direct(func, *args, **kwargs):
+!     start = datetime.now()
+!     result = func(*args, **kwargs)
+!     end = datetime.now()
+!     logger.info(f"TIMING: {func.__name__} took {(end - start).total_seconds()} seconds")
+!     return result
+  
+  
+> def timeit(func):
+!     def wrapper(*args, **kwargs):
+!         return timeit_direct(func, *args, **kwargs)
+  
+!     return wrapper
+  
+  
+> class LexiconMapper:
+>     def __init__(self):
+>         self._mappers = None
+  
+>     def map_value(self, value):
+!         value = value.strip()
+!         return self._make_lu_to_lexicon_mapper().get(value, value)
+  
+>     def _make_lu_to_lexicon_mapper(self):
+!         if self._mappers:
+!             return self._mappers
+  
+          # Lookup tables where CODE maps to MEANING
+!         lu_tables = [
+!             "LU_AltitudeMethod",
+!             "LU_CollectionMethod",
+!             "LU_ConstructionMethod",
+!             "LU_CoordinateAccuracy",
+!             "LU_CoordinateMethod",
+!             "LU_CurrentUse",
+!             "LU_DataQuality",
+!             "LU_DataSource",
+!             "LU_Depth_CompletionSource",
+!             "LU_Discharge_ChemistrySource",
+!             "LU_LevelStatus",
+!             "LU_MajorAnalyte",
+!             "LU_MeasurementMethod",
+!             "LU_MinorTraceAnalyte",
+!             "LU_MonitoringStatus",
+!             "LU_SampleType",
+!             "LU_SiteType",
+!             "LU_Status",
+!         ]
+  
+          # Lookup tables intentionally skipped (kept for documentation only)
+          # Each entry explains why the table is excluded
+!         _lu_tables_skipped = {
+!             "LU_AltitudeDatum": "code is the value, so no need for mapping",
+!             "LU_CoordinateDatum": "code is the value, so no need for mapping",
+!             "LU_FieldNoteTypes": "not being used in the transfers since there are no records",
+!             "LU_Formations": "needs to be cleaned before it can be used",
+!             "LU_Lithology": "needs to be cleaned before it can be used",
+!             "LU_MeasuringAgency": "the abbreviation is what is used in the new schema",
+!         }
+!         mappers = {}
+  
+!         for lu_table in lu_tables:
+!             table = read_csv(lu_table)
+  
+!             for i, row in table.iterrows():
+!                 if lu_table == "LU_Formations":
+!                     code = row.Code
+!                     meaning = row.Meaning
+!                 else:
+!                     code = row.CODE
+!                     meaning = row.MEANING
+  
+!                 mappers.update({f"{lu_table}:{code}": meaning})
+!         self._mappers = mappers
+!         return mappers
+  
+  
+> lexicon_mapper = LexiconMapper()
+  
+  
+  # ============= EOF =============================================

From 48f503d1afbab2579e447522001e66ab9fcc5543 Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 3 Dec 2025 01:49:34 -0800
Subject: [PATCH 52/66] Enforce timezone info on `created_at`

---
 tests/features/steps/post_migration_legacy_data.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py
index 5850bf04e..3baa7f5f3 100644
--- a/tests/features/steps/post_migration_legacy_data.py
+++ b/tests/features/steps/post_migration_legacy_data.py
@@ -396,9 +396,14 @@ def step_then_created_at_recent(context: Context):
     created_at = context.retrieved_location.created_at
     now = datetime.now(timezone.utc)
 
-    # Ensure both datetimes are timezone-aware for accurate comparison
+    # created_at should always be timezone-aware (configured in AutoBaseMixin with DateTime(timezone=True))
+    # If it's naive, this indicates a database/ORM configuration issue
     if created_at.tzinfo is None:
-        created_at = created_at.replace(tzinfo=timezone.utc)
+        raise AssertionError(
+            "created_at is a naive datetime (no timezone info). "
+            "Ensure the database and ORM are configured to return timezone-aware datetimes in UTC. "
+            "AutoBaseMixin.created_at uses DateTime(timezone=True) with server_default=func.timezone('UTC', func.now())"
+        )
 
     diff_seconds = abs((now - created_at).total_seconds())
     assert diff_seconds < 3600, "created_at should be within last hour"

From 43a8c5f5649ccd7b42ba826c4da8e150ffbd51c2 Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 3 Dec 2025 01:51:21 -0800
Subject: [PATCH 53/66] Ignore test coverage artifacts

---
 .gitignore              |   7 +
 transfers/util.py,cover | 461 ----------------------------------------
 2 files changed, 7 insertions(+), 461 deletions(-)
 delete mode 100644 transfers/util.py,cover

diff --git a/.gitignore b/.gitignore
index 44b28e13c..4bf6245e0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,13 @@ dist/
 wheels/
 *.egg-info
 
+# Test coverage reports
+*.cover
+.coverage
+.coverage.*
+htmlcov/
+coverage.xml
+
 # Virtual environments
 .venv
 requirements.txt
diff --git a/transfers/util.py,cover b/transfers/util.py,cover
deleted file mode 100644
index 5c2803392..000000000
--- a/transfers/util.py,cover
+++ /dev/null
@@ -1,461 +0,0 @@
-  # ===============================================================================
-  # Copyright 2025 ross
-  #
-  # Licensed under the Apache License, Version 2.0 (the "License");
-  # you may not use this file except in compliance with the License.
-  # You may obtain a copy of the License at
-  #
-  # http://www.apache.org/licenses/LICENSE-2.0
-  #
-  # Unless required by applicable law or agreed to in writing, software
-  # distributed under the License is distributed on an "AS IS" BASIS,
-  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  # See the License for the specific language governing permissions and
-  # limitations under the License.
-  # ===============================================================================
-> import csv
-> import io
-> import os
-> import re
-> from datetime import datetime, timezone, timedelta
-> from pathlib import Path
-  
-> import numpy as np
-> import pandas as pd
-> import pytz
-> from shapely import Point
-> from sqlalchemy import select
-> from sqlalchemy.orm import Session
-  
-> from constants import SRID_WGS84, SRID_UTM_ZONE_13N
-> from db import Thing, Location, DataProvenance
-> from services.gcs_helper import get_storage_bucket
-  
-  # from services.lexicon_mapper import lexicon_mapper
-> from services.util import (
->     transform_srid,
->     get_epqs_elevation_from_point,
->     convert_ft_to_m,
->     convert_ngvd29_to_navd88,
-> )
-> from transfers.logger import logger
-  
-> NMA_COORDINATE_ACCURACY = {
->     "5m": (5, "m"),
->     "1": (0.1, "second"),
->     "5": (0.5, "second"),
->     "F": (5, "second"),
->     "H": (0.01, "second"),
->     "M": (1, "minute"),
->     "R": (3, "second"),
->     "S": (1, "second"),
->     "T": (10, "second"),
-> }
-  
-  
-> def replace_nans(df: pd.DataFrame, default=None) -> pd.DataFrame:
-!     df = df.replace(pd.NA, default)
-!     return df.replace({np.nan: default})
-  
-  
-> def read_csv(name: str, dtype: dict | None = None) -> pd.DataFrame:
-      # Try to read from local data directory first
-!     local_file = Path(__file__).parent / "data" / f"{name}.csv"
-  
-!     if local_file.exists():
-!         logger.info(f"Reading {name} from local file: {local_file}")
-!         if dtype:
-!             return pd.read_csv(local_file, dtype=dtype)
-!         else:
-!             return pd.read_csv(local_file)
-  
-      # Check cache directory
-!     p = get_transfers_data_path(Path("nma_csv_cache") / f"{name}.csv")
-!     if os.path.exists(p):
-!         logger.info(f"Reading {name} from cache: {p}")
-!         return pd.read_csv(p, dtype=dtype)
-  
-      # Fall back to GCS if local file doesn't exist
-!     logger.info(f"Local file and cache not found, reading {name} from GCS")
-!     bucket = get_storage_bucket()
-!     blob = bucket.blob(f"nma_csv/{name}.csv")
-!     data = blob.download_as_bytes()
-!     with open(p, "wb") as f:
-!         f.write(data)
-  
-!     if dtype:
-!         return pd.read_csv(io.BytesIO(data), dtype=dtype)
-!     else:
-!         return pd.read_csv(io.BytesIO(data))
-  
-  
-> def get_valid_point_ids(session, thing_type="water well"):
-!     things = get_valid_things(session, thing_type)
-!     valid_pointids = [thing.name for thing in things]
-!     return valid_pointids
-  
-  
-> def get_valid_things(session, thing_type="water well"):
-!     return session.query(Thing).where(Thing.thing_type == thing_type).all()
-  
-  
-> def extract_organization(alternate_id: str) -> str:
-!     if alternate_id.startswith("TWDB"):
-!         return "TWDB"
-!     elif alternate_id.startswith("NMED"):
-!         return "NMED"
-  
-      # TODO: There are a bunch of other formats used for AlternateSiteID.
-      # we should try to handle as many as possible but its not the end of the world
-      # if we have to update the organization for a particular alternate id at a later time
-!     for regex, org in ((r"^A-Z{1,2}-\d{5,6}$", "NMOSE"), (r"\d+(\.\d+){3,}", "PLSS")):
-  
-!         if re.match(regex, alternate_id):
-!             return org
-  
-!     return "Unknown"
-  
-  
-> def get_transfers_data_path(name):
-!     def data_path(r):
-!         return Path(r) / "transfers" / "data"
-  
-!     root = data_path("/workspace")
-!     if not os.path.exists(root):
-!         root = data_path("..")
-!         if not os.path.exists(root):
-!             root = data_path(".")
-  
-!     return root / name
-  
-  
-> def filter_non_transferred_wells(sess: Session, df: pd.DataFrame) -> pd.DataFrame:
-!     sql = select(Thing.name).where(Thing.thing_type == "water well")
-!     existing_ids = sess.execute(sql).scalars().all()
-!     return df[~(df["PointID"].isin(existing_ids))]
-  
-  
-> def filter_by_welldata_datasource_and_project(df: pd.DataFrame) -> pd.DataFrame:
-!     path = get_transfers_data_path("valid_welldata_datasources.csv")
-!     with open(path, "r") as f:
-!         reader = csv.reader(f)
-!         _ = next(reader)
-!         valid_datasources = [row[0] for row in reader if row[1] == "Yes"]
-!         f.seek(0)
-!         invalid_datasources = [row[0] for row in reader if row[1] == "NO"]
-!         logger.info("Invalid WellData Datasources:")
-!         for vd in invalid_datasources:
-!             logger.info(f"  {vd}")
-  
-!     counts = df.groupby("DataSource").size().reset_index(name="WellCount")
-!     counts = counts.sort_values("WellCount", ascending=False)
-!     for count in counts.itertuples():
-!         logger.info(f"{count.DataSource}: {count.WellCount}")
-  
-!     pldf = read_csv("ProjectLocations")
-!     collabnet = pldf[pldf["ProjectName"] == "Water Level Network"]
-!     return df[
-!         df["DataSource"].isin(valid_datasources)
-!         | df["PointID"].isin(collabnet["PointID"])
-!     ]
-  
-  
-> def filter_by_valid_measuring_agency(df: pd.DataFrame) -> pd.DataFrame:
-!     path = get_transfers_data_path("valid_measuring_agency.csv")
-  
-!     with open(path, "r") as f:
-!         reader = csv.reader(f)
-!         _ = next(reader)
-!         valid_measuring_agencies = [row[0] for row in reader if row[1] == "Yes"]
-!         logger.info("Valid Measuring Agencies:")
-!         for vma in valid_measuring_agencies:
-!             logger.info(f"  {vma}")
-!     return df[df["MeasuringAgency"].isin(valid_measuring_agencies)]
-  
-  
-> def filter_to_valid_point_ids(session: Session, df: pd.DataFrame) -> pd.DataFrame:
-!     valid_point_ids = get_valid_point_ids(session)
-!     return df[df["PointID"].isin(valid_point_ids)]
-  
-  
-> def convert_mt_to_utc(dt_record: datetime):
-!     t = dt_record.time()
-!     if t.hour == 0 and t.minute == 0:
-          # no time was measured, so just set the timezone to UTC and keep
-          # time at 00:00
-!         dt_record = dt_record.replace(tzinfo=timezone.utc)
-!     else:
-!         tz = pytz.timezone("America/Denver")
-!         dt_record = tz.localize(dt_record)
-!         if dt_record.dst() == timedelta(0):
-              # MST
-!             utc_offset = 7
-!         else:
-              # MDT
-!             utc_offset = 6
-!         dt_record = dt_record - timedelta(hours=utc_offset)
-!         dt_record = dt_record.replace(tzinfo=timezone.utc)
-!     return dt_record
-  
-  
-> def chunk_by_size(df, chunk_size):
-!     for i in range(0, len(df), chunk_size):
-!         yield df.iloc[i : i + chunk_size]
-  
-  
-> def make_location(row: pd.Series, elevations: dict) -> tuple:
->     """
->     Returns a tuple of location data and the elevation method
->     """
->     point = Point(row.Easting, row.Northing)
-  
-      # Convert the point to a WGS84 coordinate system
->     transformed_point = transform_srid(
->         point, source_srid=SRID_UTM_ZONE_13N, target_srid=SRID_WGS84
->     )
-  
->     z = row.Altitude
->     if z:
->         elevation_from_epqs = False
->         z = convert_ft_to_m(z)
-  
->         if row.AltDatum == "NGVD29":
-!             key = f"{row.PointID}, {transformed_point.x, transformed_point.y}"
-!             if key in elevations:
-!                 z = elevations[key]
-!             else:
-!                 z = convert_ngvd29_to_navd88(
-!                     z, transformed_point.x, transformed_point.y
-!                 )
-!             elevations[key] = z
-!     else:
-!         elevation_from_epqs = True
-!         logger.info(
-!             f"Location {row.PointID} has no Altitude. Setting from National Map EPQS for "
-!         )
-!         z = get_epqs_elevation_from_point(transformed_point.x, transformed_point.y)
-  
->     if elevation_from_epqs:
-!         elevation_method = "USGS National Elevation Dataset (NED)"
->     elif pd.isna(row.AltitudeMethod):
-!         elevation_method = None
->     else:
->         elevation_method = lexicon_mapper.map_value(
->             f"LU_AltitudeMethod:{row.AltitudeMethod.strip()}"
->         )
-  
-      # Extract AMPAPI date fields (Date type, not DateTime)
->     nma_date_created = None
->     if row.DateCreated:
->         nma_date_created = datetime.strptime(
->             row.DateCreated, "%Y-%m-%d %H:%M:%S.%f"
->         ).date()
-  
->     nma_site_date = None
->     if row.SiteDate:
->         nma_site_date = datetime.strptime(row.SiteDate, "%Y-%m-%d %H:%M:%S.%f").date()
-  
->     location = Location(
->         nma_pk_location=row.LocationId,
->         point=transformed_point.wkt,
->         elevation=z,
->         release_status="public" if row.PublicRelease else "private",
->         nma_coordinate_notes=row.CoordinateNotes,
->         nma_notes_location=row.LocationNotes,
->         nma_date_created=nma_date_created,
->         nma_site_date=nma_site_date,
->     )
-  
->     return location, elevation_method
-  
-  
-> def make_location_data_provenance(
->     row: pd.Series, location: Location, elevation_method: str | None
-> ) -> list[DataProvenance]:
-!     provenance_records = []
-  
-!     if row.AltitudeAccuracy or row.CoordinateAccuracy:
-!         provenance = DataProvenance(
-!             target_id=location.id,
-!             target_table="location",
-!             field_name="elevation",
-!             origin_source=None,
-!             collection_method=elevation_method,
-!             accuracy_value=(
-!                 None
-!                 if pd.isna(row.AltitudeAccuracy)
-!                 else convert_ft_to_m(row.AltitudeAccuracy)
-!             ),
-!             accuracy_unit="m",
-!         )
-!         provenance_records.append(provenance)
-  
-      # TODO: AMP feedback is required for transfering coordinate accuracy values
-      #       from NM_Aquifer to Ocotillo
-      # if row.CoordinateAccuracy == "U" or pd.isna(row.CoordinateAccuracy):
-      #     # map "Unknown" to None
-      #     row.CoordinateAccuracy = None
-      # elif row.CoordinateAccuracy == "5m":
-      #     row.CoordinateAccuracy = 5.0
-      # else:
-      #     seconds = 0
-      #     minutes = 0
-      #     if row.CoordinateAccuracy == "1":
-      #         seconds = 0.1
-      #     elif row.CoordinateAccuracy == "5":
-      #         seconds = 0.5
-      #     elif row.CoordinateAccuracy == "F":
-      #         seconds = 5
-      #     elif row.CoordinateAccuracy == "H":
-      #         seconds = 0.01
-      #     elif row.CoordinateAccuracy == "M":
-      #         minutes = 1
-      #     elif row.CoordinateAccuracy == "R":
-      #         seconds = 3
-      #     elif row.CoordinateAccuracy == "S":
-      #         seconds = 1
-      #     else:
-      #         seconds = 10
-      #     coordinate_accuracy_decimal_deg = minutes/60 + seconds / 3600
-  
-      #     """
-      #     Developer's notes
-  
-      #     To convert accuracy from decimal degrees to meters we do the following:
-  
-      #     1. Add the coordinate accuracy to both the latitude and longitude to
-      #         find the "+" distance from the location
-      #     2. Convert "+" accuracy coordinates from decimal degrees to UTM Zone 13
-      #         N
-      #     3. Find the distance in meters from the original Easting/Northing and
-      #         define this as the "+" accuracy in meters
-      #     4. Subtract the coordinate accuracy to both the latitude and longitude
-      #         to find the "-" distance from the location
-      #     5. Convert the "-" accuracy coordinates from decimal degrees to UTM Zone
-      #         13 N
-      #     6. Find the distance in meters from the original Easting/Northing and
-      #         define this as the "-" accuracy in meters
-      #     7. Set the coordinate accuracy in meters as the mean of the "+" and "-"
-      #         distances from the location
-      #     """
-      #     original_longitude = transformed_point.x
-      #     original_latitude = transformed_point.y
-  
-      #     plus_longitude = original_longitude + coordinate_accuracy_decimal_deg
-      #     plus_latitude = original_latitude + coordinate_accuracy_decimal_deg
-      #     plus_point_decimal_deg = Point(plus_longitude, plus_latitude)
-      #     plus_point_utm_zone_13_n = transform_srid(
-      #         plus_point_decimal_deg,
-      #         SRID_WGS84,
-      #         SRID_UTM_ZONE_13N)
-  
-      #     minus_longitude = original_longitude - coordinate_accuracy_decimal_deg
-      #     minus_latitude = original_latitude - coordinate_accuracy_decimal_deg
-      #     minus_point_decimal_deg = Point(minus_longitude, minus_latitude)
-  
-!     if row.CoordinateMethod or row.CoordinateAccuracy:
-!         coordinate_method = (
-!             lexicon_mapper.map_value(f"LU_CoordinateMethod:{row.CoordinateMethod}")
-!             if not pd.isna(row.CoordinateMethod)
-!             else None
-!         )
-  
-!         accuracy_value, accuracy_unit = NMA_COORDINATE_ACCURACY.get(
-!             row.CoordinateAccuracy, (None, None)
-!         )
-  
-!         provenance = DataProvenance(
-!             target_id=location.id,
-!             target_table="location",
-!             field_name="point",
-!             origin_source=None,
-!             collection_method=coordinate_method,
-!             accuracy_value=accuracy_value,
-!             accuracy_unit=accuracy_unit,
-!         )
-!         provenance_records.append(provenance)
-  
-!     return provenance_records
-  
-  
-> def timeit_direct(func, *args, **kwargs):
-!     start = datetime.now()
-!     result = func(*args, **kwargs)
-!     end = datetime.now()
-!     logger.info(f"TIMING: {func.__name__} took {(end - start).total_seconds()} seconds")
-!     return result
-  
-  
-> def timeit(func):
-!     def wrapper(*args, **kwargs):
-!         return timeit_direct(func, *args, **kwargs)
-  
-!     return wrapper
-  
-  
-> class LexiconMapper:
->     def __init__(self):
->         self._mappers = None
-  
->     def map_value(self, value):
-!         value = value.strip()
-!         return self._make_lu_to_lexicon_mapper().get(value, value)
-  
->     def _make_lu_to_lexicon_mapper(self):
-!         if self._mappers:
-!             return self._mappers
-  
-          # Lookup tables where CODE maps to MEANING
-!         lu_tables = [
-!             "LU_AltitudeMethod",
-!             "LU_CollectionMethod",
-!             "LU_ConstructionMethod",
-!             "LU_CoordinateAccuracy",
-!             "LU_CoordinateMethod",
-!             "LU_CurrentUse",
-!             "LU_DataQuality",
-!             "LU_DataSource",
-!             "LU_Depth_CompletionSource",
-!             "LU_Discharge_ChemistrySource",
-!             "LU_LevelStatus",
-!             "LU_MajorAnalyte",
-!             "LU_MeasurementMethod",
-!             "LU_MinorTraceAnalyte",
-!             "LU_MonitoringStatus",
-!             "LU_SampleType",
-!             "LU_SiteType",
-!             "LU_Status",
-!         ]
-  
-          # Lookup tables intentionally skipped (kept for documentation only)
-          # Each entry explains why the table is excluded
-!         _lu_tables_skipped = {
-!             "LU_AltitudeDatum": "code is the value, so no need for mapping",
-!             "LU_CoordinateDatum": "code is the value, so no need for mapping",
-!             "LU_FieldNoteTypes": "not being used in the transfers since there are no records",
-!             "LU_Formations": "needs to be cleaned before it can be used",
-!             "LU_Lithology": "needs to be cleaned before it can be used",
-!             "LU_MeasuringAgency": "the abbreviation is what is used in the new schema",
-!         }
-!         mappers = {}
-  
-!         for lu_table in lu_tables:
-!             table = read_csv(lu_table)
-  
-!             for i, row in table.iterrows():
-!                 if lu_table == "LU_Formations":
-!                     code = row.Code
-!                     meaning = row.Meaning
-!                 else:
-!                     code = row.CODE
-!                     meaning = row.MEANING
-  
-!                 mappers.update({f"{lu_table}:{code}": meaning})
-!         self._mappers = mappers
-!         return mappers
-  
-  
-> lexicon_mapper = LexiconMapper()
-  
-  
-  # ============= EOF =============================================

From 027299060f94d38819f20412f931060ad6ec372d Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 3 Dec 2025 01:51:29 -0800
Subject: [PATCH 54/66] Delete .coverage

---
 .coverage | Bin 53248 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 .coverage

diff --git a/.coverage b/.coverage
deleted file mode 100644
index 5417251745baf9f63193ac63e03d4d4b0edab5c4..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 53248
zcmeI)&u$Y(90%}S+t^vh)&|kYiXxQhftrXU#_geMdw@`)3Y8WpX)l#9V|$!z@$Q=4
zH6(|sHB>@E>Kjyj1o{X(L!W>iIrY{{p;CXpon8M4HsVkeNWUxF**`P0GoSfq$JSiG
zcG-!P==gyvBk_T;V3?+HUI@c5EP9sdnQjHz$)yMM*BslQwQCt058o-<-x#I*&xU=Y
ze8pZb{Zf8!{`Zn=J)HlwAgvZ%zy<*bKmY>&Z-K$JQlYZ6WZwHelFhD)0@+f5el36c
z)z;?rme}6BaCuAUbK-PS(6+uVHbvmy6+IP*j?-1b@!C#HMvk{5qFt4S?uV+)6CItS
zsg46~#PNBr;j}4Mq;@EYUf{Sg*cZ3d{#qO$xm%?6B7K4qQO=IXA;jq<`l<+2M+M4j
zsZi(Qw9{TKuHOHlSg4#hVLniCnPkwS$NWTT=)|ZfmpYVC%MaR|`(_}$)~*WIgdCQz
z<x@WQB3;_RcWaR^x{enY(+i!*@japTRI4AU_S6EdF<QD2-gs#!t(#n+n>uE4uQ8ob
z<S8+e;7Q~TWzO?c)=@;hHARl|?DgFyk$=~Bdt5mbL*F>^XT_WqH}aFhzH;46smoD$
zC#o|g=Ni?q`kr&UuZESsCPp#W;$nFIn&3iE3ph2OuY7dEG^kaoz3_II-fuNzKl0<_
z233BePA{dOj?Wb;D=X%M+i^o-o<`G&!r_iJ(P)OJXEYtYKD*s;FPUs4+>5$h;dkOL
z!?A^~9BR$ueWsRT$bdVV@RNX)(?e}-oLS0)Xm<VPWLt|{OsJ@X&{&WL>-4=Aca@OK
z(DLu7K<=oG<f<MP4!a>y)2Une%GS!PrZa2;jrwr6I9SRTD$C2}Ju7ZTNo^&63KLBz
zt;s3FXnM(2GVBNGK=QIdGL11BCDT5VT%qEu7xI<O<ypl^!Zqs2&fK7oD^wO2&4KO_
zJTCF`*hC@t%%}?-m<V1y`#l-$iWl?FrzzvrWBJM_i?cG$S!mSxHGluxOq}HhOR>I7
zgFZjA6Itdnqbz@}_Sf&oE{!HV=>*heX$;mgfZ%{_Nr7e4??-X_<-@pn^4v4ZuTEfe
zoZ3^U)i^kH@2h?oIy9~Fv=x4wo)y(y8S*F;Rz=hIyGnX`ZlD;KLb|6DiZ~B1&PwM-
zX@vWn#?GB;oTqeh;bDH8I$heoXvcM>*G}duo`!X*H1>1n<ooDBgd&P6b;^*s=z%oT
zM3bvLVx{SC^vHx$b!A(3cdnGN6z}e;7n0Mn(M1`l$Z?h6AaQ2mhILnlq7^8*NOS3G
zHT$w=z_rz=^DBQ!p5wQ2gB7z-IdjGw^pno0$C*Y*le#O@Y$u1AiAHkZ;;6k`p#hiP
z?Toh*wj1N%LwyGx+;j~#wL%&yd9PDk<q!C#{mh^THV8lf0uX=z1Rwwb2tWV=5P$##
zj-Ei)%$NoK{GYM^Fzi3+1sen)009U<00Izz00bZa0SG_<0&k^2DU&^Er@!zxyI^LQ
zmy+KAe7I3RyYYUJN|mvn8unBBueY**$TS2X009U<00Izz00bZa0SG_<0z-jP_N1Bq
z8z586E?dce0rc<x9~t(eVNmQq00Izz00bZa0SG_<0uX=z1R!vz0$0tPF*sHGI;52m
zYPTG>DZ5=-kfp6^w0?oS#0wY&wC<7beHQrrUhT^-u1mMqRhNBU4T%;zsnJqKUP#Ln
zg|&X<bZNZ={{G*ze>3QT4FV8=00bZa0SG_<0uX=z1Rwx`H$&iRZg9%_FF*fd{l7P(
zVv!37KmY;|fB*y_009U<00Izzz-to7npvZ)zyE)3*w0_nU63pUAOHafKmY;|fB*y_
z009U<00M_9kj<7Y{r&%A!+w0IS;5UA009U<00Izz00bZa0SG_<0uY!jaLzntSp&NM
z_=)xR%Dc3)aMAeDSjZX1oI!^d+Bb~zOHcl(7RJ*MqIX;$=k)jg&kXz7cmV7}00Izz
z00bZa0SG_<0uX=z1R!uk1adjciogFi7LG`XPz(q_00Izz00bZa0SG_<0uX=z1f~k`
z_y73(Ka~b2ApijgKmY;|fB*y_009U<00Kuw0PFuBnW{uVAOHafKmY;|fB*y_009U<
M00Pql;`RUk1r+UXZU6uP


From f0e730c2f06f1b2abce75f43840f482ebbc8e4c8 Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 3 Dec 2025 01:53:56 -0800
Subject: [PATCH 55/66] Remove noisy EOF

---
 tests/test_thing.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/test_thing.py b/tests/test_thing.py
index 3792b4302..28290dada 100644
--- a/tests/test_thing.py
+++ b/tests/test_thing.py
@@ -1130,6 +1130,3 @@ def test_delete_thing_id_link_404_not_found(second_thing_id_link):
     assert response.status_code == 404
     data = response.json()
     assert data["detail"] == f"ThingIdLink with ID {bad_id} not found."
-
-
-# ============= EOF =============================================

From 070fcbae2dd849ba386f52a5201452634abdba03 Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 3 Dec 2025 01:58:00 -0800
Subject: [PATCH 56/66] Simplify error message

---
 tests/features/steps/post_migration_legacy_data.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py
index 3baa7f5f3..185b1a758 100644
--- a/tests/features/steps/post_migration_legacy_data.py
+++ b/tests/features/steps/post_migration_legacy_data.py
@@ -401,8 +401,7 @@ def step_then_created_at_recent(context: Context):
     if created_at.tzinfo is None:
         raise AssertionError(
             "created_at is a naive datetime (no timezone info). "
-            "Ensure the database and ORM are configured to return timezone-aware datetimes in UTC. "
-            "AutoBaseMixin.created_at uses DateTime(timezone=True) with server_default=func.timezone('UTC', func.now())"
+            "Check ORM/database config for timezone-aware UTC datetimes (see AutoBaseMixin.created_at)."
         )
 
     diff_seconds = abs((now - created_at).total_seconds())

From f3e9587ad96bf88b01340f532a26a48a92347ec1 Mon Sep 17 00:00:00 2001
From: Kimball Bighorse <kbighorse@yahoo.com>
Date: Wed, 3 Dec 2025 01:59:45 -0800
Subject: [PATCH 57/66] Remove unnecessary conditionals

---
 transfers/util.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/transfers/util.py b/transfers/util.py
index 5216c204f..876e142fc 100644
--- a/transfers/util.py
+++ b/transfers/util.py
@@ -64,10 +64,7 @@ def read_csv(name: str, dtype: dict | None = None) -> pd.DataFrame:
 
     if local_file.exists():
         logger.info(f"Reading {name} from local file: {local_file}")
-        if dtype:
-            return pd.read_csv(local_file, dtype=dtype)
-        else:
-            return pd.read_csv(local_file)
+        return pd.read_csv(local_file, dtype=dtype)
 
     # Check cache directory
     p = get_transfers_data_path(Path("nma_csv_cache") / f"{name}.csv")
@@ -83,10 +80,7 @@ def read_csv(name: str, dtype: dict | None = None) -> pd.DataFrame:
     with open(p, "wb") as f:
         f.write(data)
 
-    if dtype:
-        return pd.read_csv(io.BytesIO(data), dtype=dtype)
-    else:
-        return pd.read_csv(io.BytesIO(data))
+    return pd.read_csv(io.BytesIO(data), dtype=dtype)
 
 
 def get_valid_point_ids(session, thing_type="water well"):

From 56694a3cc6c2ad88b4c3c52012b411e54e5ed4d3 Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Wed, 3 Dec 2025 10:47:03 -0700
Subject: [PATCH 58/66] feat: update sensor type handling to support multiple
 sensor types in water levels transfer

---
 core/lexicon.json                            |  3 +++
 transfers/waterlevels_transducer_transfer.py | 12 ++++++------
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/core/lexicon.json b/core/lexicon.json
index 142f1745c..bec62b46e 100644
--- a/core/lexicon.json
+++ b/core/lexicon.json
@@ -676,6 +676,9 @@
     {"categories": ["parameter_type"], "term": "Major Element", "definition": "Major Element"},
     {"categories": ["parameter_type"], "term": "Minor Element", "definition": "Minor Element"},
     {"categories": ["parameter_type"], "term": "Physical property", "definition": "Physical property"},
+
+    {"categories": ["sensor_type"], "term": "DiverLink", "definition": "DiverLink"},
+    {"categories": ["sensor_type"], "term": "Diver Cable", "definition": "Diver Cable"},
     {"categories": ["sensor_type"], "term": "Pressure Transducer", "definition": "Pressure Transducer"},
     {"categories": ["sensor_type"], "term": "Data Logger", "definition": "Data Logger"},
     {"categories": ["sensor_type"], "term": "Barometer", "definition": "Barometer"},
diff --git a/transfers/waterlevels_transducer_transfer.py b/transfers/waterlevels_transducer_transfer.py
index 74eaafd06..cd323330c 100644
--- a/transfers/waterlevels_transducer_transfer.py
+++ b/transfers/waterlevels_transducer_transfer.py
@@ -33,13 +33,13 @@
 
 class WaterLevelsContinuousTransferer(Transferer):
     _partition_field: str
-    _sensor_type: str
+    _sensor_types: tuple[str]
 
     def __init__(self, *args, **kw):
         super().__init__(*args, **kw)
         self.groundwater_parameter_id = get_groundwater_parameter_id()
-        if self._sensor_type is None:
-            raise ValueError("_sensor_type must be set")
+        if self._sensor_types is None:
+            raise ValueError("_sensor_types must be set")
         if self._partition_field is None:
             raise ValueError("_partition_field must be set")
 
@@ -66,7 +66,7 @@ def _transfer_hook(self, session: Session) -> None:
                 session.query(Deployment)
                 .join(Thing)
                 .join(Sensor)
-                .where(Sensor.sensor_type == self._sensor_type)
+                .where(Sensor.sensor_type.in_(self._sensor_types))
                 .where(Thing.name == pointid)
                 .all()
             )
@@ -185,13 +185,13 @@ def _make_observation(
 class WaterLevelsContinuousPressureTransferer(WaterLevelsContinuousTransferer):
     source_table = "WaterLevelsContinuous_Pressure"
     _partition_field = "QCed"
-    _sensor_type = "Pressure Transducer"
+    _sensor_types = ("Pressure Transducer", "Barometer", "DiverLink", "Diver Cable")
 
 
 class WaterLevelsContinuousAcousticTransferer(WaterLevelsContinuousTransferer):
     source_table = "WaterLevelsContinuous_Acoustic"
     _partition_field = "PublicRelease"
-    _sensor_type = "Acoustic Sounder"
+    _sensor_types = ("Acoustic Sounder",)
 
 
 def _find_deployment(ts, deployments):

From 1101e2e1b388ded28a8839f8a4f5c78031186598 Mon Sep 17 00:00:00 2001
From: jross <jake.ross@nmt.edu>
Date: Wed, 3 Dec 2025 12:22:02 -0700
Subject: [PATCH 59/66] feat: refactor well transfer logic to use bulk save for
 improved performance and error handling

---
 transfers/transfer.py      |  2 +-
 transfers/well_transfer.py | 38 ++++++++++++++++++++------------------
 2 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/transfers/transfer.py b/transfers/transfer.py
index bf0c69b85..45a78cc60 100644
--- a/transfers/transfer.py
+++ b/transfers/transfer.py
@@ -57,7 +57,7 @@ def message(msg, pad=10, new_line_at_top=True):
 @timeit
 def transfer_all(metrics, limit=100):
     message("STARTING TRANSFER", new_line_at_top=False)
-    if int(os.environ.get("ERASE_AND_REBUILD", 0)):
+    if get_bool_env("ERASE_AND_REBUILD", False):
         logger.info("Erase and rebuilding database")
         erase_and_rebuild_db()
 
diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py
index 314593250..754536e41 100644
--- a/transfers/well_transfer.py
+++ b/transfers/well_transfer.py
@@ -425,6 +425,9 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series):
             if well is not None:
                 session.expunge(well)
 
+            if location is not None:
+                session.delete(location)
+
             self._capture_error(row.PointID, str(e), "UnknownField")
 
             logger.critical(f"Error creating well for {row.PointID}: {e}")
@@ -588,19 +591,19 @@ def _after_hook(self, session):
         query = session.query(Thing).filter(Thing.thing_type == "water well")
         count = query.count()
         for i, well in enumerate(query.all()):
+            objs = []
             step_start_time = time.time()
             row = self.cleaned_df[self.cleaned_df["PointID"] == well.name].iloc[0]
             if notna(row.Notes):
                 note = well.add_note(row.Notes, "Other")
-                session.add(note)
+                objs.append(note)
 
             location = well.current_location
             elevation_method = self._added_locations[row.PointID]
             data_provenances = make_location_data_provenance(
                 row, location, elevation_method
             )
-            for dp in data_provenances:
-                session.add(dp)
+            objs.extend(data_provenances)
 
             for row_field, kw in (
                 (
@@ -631,15 +634,9 @@ def _after_hook(self, session):
             ):
 
                 if notna(row[row_field]):
-                    try:
-                        dp = DataProvenance(
-                            target_id=well.id, target_table="thing", **kw
-                        )
-                        session.add(dp)
-                        session.commit()
-                    except DatabaseError as e:
-                        self._capture_error(row.PointID, str(e), "DataProvenance")
-                        session.rollback()
+                    dp = DataProvenance(target_id=well.id, target_table="thing", **kw)
+                    objs.append(dp)
+
             start_time = time.time()
             mphs = measuring_point_estimator.estimate_measuring_point_height(row)
             logger.info(
@@ -654,7 +651,7 @@ def _after_hook(self, session):
                     start_date=start_date,
                     end_date=end_date,
                 )
-                session.add(measuring_point_history)
+                objs.append(measuring_point_history)
 
             """
             Developer's notes
@@ -686,7 +683,7 @@ def _after_hook(self, session):
                     target_id=target_id,
                     target_table=target_table,
                 )
-                session.add(status_history)
+                objs.append(status_history)
                 logger.info(
                     f"  Added monitoring status for well {well.name}: {status_value}"
                 )
@@ -700,7 +697,8 @@ def _after_hook(self, session):
                             start_date=datetime.now(tz=UTC),
                             end_date=None,
                         )
-                        session.add(monitoring_frequency_history)
+
+                        objs.append(monitoring_frequency_history)
                         logger.info(
                             f"  Adding '{monitoring_frequency}' monitoring frequency for well {well.name}"
                         )
@@ -715,15 +713,19 @@ def _after_hook(self, session):
                     target_id=target_id,
                     target_table=target_table,
                 )
-                session.add(status_history)
+                objs.append(status_history)
                 logger.info(f"  Added well status for well {well.name}: {status_value}")
+            try:
+                session.bulk_save_objects(objs)
+            except DatabaseError as e:
+                session.rollback()
+                error_dict = e.orig.args[0]
+                self._capture_error(well.name, error_dict["D"], error_dict["t"])
 
             logger.info(
                 f"After hook: {well.name} {i+1}/{count} took {time.time() - step_start_time:.2f}s"
             )
 
-        session.commit()
-
 
 class WellChunkTransferer(ChunkTransferer):
     source_table: str = None

From be3a11d604fff7720da62711d3a82b58c0c80cd0 Mon Sep 17 00:00:00 2001
From: jross <jake.ross@nmt.edu>
Date: Wed, 3 Dec 2025 12:38:56 -0700
Subject: [PATCH 60/66] refactor: rename regex pattern for pump types and
 simplify extraction logic

---
 transfers/well_transfer.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py
index 754536e41..d92f2ece6 100644
--- a/transfers/well_transfer.py
+++ b/transfers/well_transfer.py
@@ -125,26 +125,27 @@ def _extract_casing_materials(row) -> list[str]:
     return materials
 
 
-pattern = re.compile(
+PUMP_PATTERN = re.compile(
     r"\b(?P<term>jet|hand|submersible)\b|\b(?P<phrase>line[-\s]+shaft)\b", re.IGNORECASE
 )
 
 
 def first_matched_term(text: str):
-    m = pattern.search(text)
+    m = PUMP_PATTERN.search(text)
     if not m:
         return None
     return m.group("term") or m.group("phrase")
 
 
-PUMP_MAPPING = {"jet": "Jet", "hand": "Hand", "submersible": "Submersible"}
-
-
 def _extract_well_pump_type(row) -> str | None:
     if isna(row.ConstructionNotes):
         return None
     construction_notes = row.ConstructionNotes.lower()
-    return PUMP_MAPPING.get(first_matched_term(construction_notes), None)
+    pump = first_matched_term(construction_notes)
+    if pump:
+        return pump.capitalize()
+    else:
+        return None
 
 
 # Parse aquifer codes

From 4b6d8f280f97e24152783dc96552e8e0cace752b Mon Sep 17 00:00:00 2001
From: jross <jake.ross@nmt.edu>
Date: Wed, 3 Dec 2025 14:25:46 -0700
Subject: [PATCH 61/66] feat: add organization mapping functionality and update
 contact transfer logic

---
 core/lexicon.json                        | 76 +++++++++++++++++++++++
 transfers/contact_transfer.py            | 42 ++++++++++---
 transfers/data/organization_mapping.json | 79 ++++++++++++++++++++++++
 3 files changed, 190 insertions(+), 7 deletions(-)
 create mode 100644 transfers/data/organization_mapping.json

diff --git a/core/lexicon.json b/core/lexicon.json
index bec62b46e..35f949802 100644
--- a/core/lexicon.json
+++ b/core/lexicon.json
@@ -373,6 +373,82 @@
     {"categories": ["analysis_method_type"], "term": "Laboratory", "definition": "A procedure performed on a physical sample in a controlled, off-site laboratory environment. These methods typically involve complex instrumentation, standardized reagents, and formal quality control protocols."},
     {"categories": ["analysis_method_type"], "term": "Field Procedure", "definition": "A standardized procedure performed on-site at the time of sample collection. This can involve direct measurement of the environmental medium using a calibrated field instrument or a specific, documented technique for collecting a sample."},
     {"categories": ["analysis_method_type"], "term": "Calculation", "definition": "A mathematical procedure used to derive a new data point from one or more directly measured values. This type is used to document the provenance of calculated data, providing an auditable trail."},
+      {"categories":["organization"],"term":"City of Aztec","definition":"City of Aztec"},
+      {"categories":["organization"],"term":"Daybreak Investments","definition":"Daybreak Investments"},
+      {"categories":["organization"],"term":"Vallecitos HOA","definition":"Vallecitos HOA"},
+      {"categories":["organization"],"term":"Naiche Development","definition":"Naiche Corporation"},
+      {"categories":["organization"],"term":"Santa Fe County; Santa Fe Animal Shelter","definition":"Santa Fe County; Santa Fe Animal Shelter"},
+      {"categories":["organization"],"term":"El Guicu Ditch Association","definition":"El Guicu Ditch Association"},
+      {"categories":["organization"],"term":"Santa Fe Municipal Airport","definition":"Santa Fe Municipal Airport"},
+      {"categories":["organization"],"term":"Uluru Development","definition":"Uluru Development"},
+      {"categories":["organization"],"term":"AllSup's Convenience Stores","definition":"AllSup's Convenience Stores"},
+      {"categories":["organization"],"term":"Santa Fe Downs","definition":"Santa Fe Downs Resort"},
+      {"categories":["organization"],"term":"City of Truth or Consequences, WWTP","definition":"City of Truth or Consequences, WWTP"},
+      {"categories":["organization"],"term":"Riverbend Hotsprings","definition":"Riverbend Hotsprings"},
+      {"categories":["organization"],"term":"Armendaris Ranch","definition":"Armendaris Ranch"},
+      {"categories":["organization"],"term":"El Paso Water","definition":"El Paso Water"},
+      {"categories":["organization"],"term":"BLM, Socorro Field Office","definition":"BLM, Socorro Field Office"},
+      {"categories":["organization"],"term":"USFWS","definition":"US Fish & Wildlife Service"},
+      {"categories":["organization"],"term":"NPS","definition":"National Park Service"},
+      {"categories":["organization"],"term":"Sile MDWCA","definition":"Sile Municipal Domestic Water Assn."},
+      {"categories":["organization"],"term":"Pena Blanca Water & Sanitation District","definition":"Pena Blanca Water & Sanitation District"},
+      {"categories":["organization"],"term":"Town of Questa","definition":"Town of Questa"},
+      {"categories":["organization"],"term":"Lamy MDWCA","definition":"Lama MDWCA"},
+      {"categories":["organization"],"term":"Town of Cerro","definition":"Town of Cerro"},
+      {"categories":["organization"],"term":"Farr Cattle Company","definition":"Farr Cattle Company (Farr Ranch)"},
+      {"categories":["organization"],"term":"Carrizozo Orchard","definition":"Carrizozo Orchard"},
+      {"categories":["organization"],"term":"USFS, Kiowa Grasslands","definition":"USFS, Kiowa Grasslands"},
+      {"categories":["organization"],"term":"Cloud Country West Subdivision","definition":"Cloud Country West Subdivision"},
+      {"categories":["organization"],"term":"Chama West Water Users Association","definition":"Chama West Water Users Assn."},
+      {"categories":["organization"],"term":"El Rito Regional Water and Waste Water Association","definition":"El Rito Regional Water + Waste Water Association"},
+      {"categories":["organization"],"term":"West Rim MDWUA","definition":"West Rim MDWUA"},
+      {"categories":["organization"],"term":"Village of Willard","definition":"Village of Willard"},
+      {"categories":["organization"],"term":"Quemado Municipal Water & SWA","definition":"Quemado Mutual Water and Sewage Works Association"},
+      {"categories":["organization"],"term":"Coyote Creek MDWUA","definition":"Coyote Creek MDWUA"},
+      {"categories":["organization"],"term":"Lamy MDWCA","definition":"Lamy Mutual Domestic Water Assn."},
+      {"categories":["organization"],"term":"La Joya CWDA","definition":"La Joya CWDA"},
+      {"categories":["organization"],"term":"NM Firefighters Training Academy","definition":"NM Firefighters Training Academy"},
+      {"categories":["organization"],"term":"Cebolleta Land Grant","definition":"Cebolleta Land Grant"},
+      {"categories":["organization"],"term":"Madrid Water Co-op","definition":"Madrid Water Co-op"},
+      {"categories":["organization"],"term":"Sun Valley Water and Sanitation","definition":"Sun Valley Water and Sanitation"},
+      {"categories":["organization"],"term":"Bluewater Lake MDWCA","definition":"Bluewater Lake MDWCA"},
+      {"categories":["organization"],"term":"Bluewater Acres Domestic WUA","definition":"Bluewater Acres Domestic Water Users Assn."},
+      {"categories":["organization"],"term":"Lybrook MDWCA","definition":"Lybrook Municipal"},
+      {"categories":["organization"],"term":"New Mexico Museum of Natural History","definition":"New Mexico Museum of Natural History"},
+      {"categories":["organization"],"term":"Hillsboro MDWCA","definition":"Hillsboro Mutual Domestic Water Consumer Assn."},
+      {"categories":["organization"],"term":"Tyrone MDWCA","definition":"Tyrone Mutual Domestic Water Assn."},
+      {"categories":["organization"],"term":"Santa Clara Water System","definition":"Santa Clara Water System"},
+      {"categories":["organization"],"term":"Casas Adobes MDWCA","definition":"Casas Adobes Mutual Domestic"},
+      {"categories":["organization"],"term":"Lake Roberts WUA","definition":"Lake Roberts Water Assn."},
+      {"categories":["organization"],"term":"El Creston MDWCA","definition":"El Creston MDWCA"},
+      {"categories":["organization"],"term":"Reserve Municipality Water Works","definition":"Reserve Municipality Water Works"},
+      {"categories":["organization"],"term":"Bayard","definition":"Bayard Municipal Water"},
+      {"categories":["organization"],"term":"Town of Estancia","definition":"Town of Estancia"},
+      {"categories":["organization"],"term":"Pie Town MDWCA","definition":"Pie Town MDWCA"},
+      {"categories":["organization"],"term":"Roosevelt SWCD","definition":"Roosevelt Soil & Water Conservation District"},
+      {"categories":["organization"],"term":"Otis MDWCA","definition":"Otis Mutual Domestic"},
+      {"categories":["organization"],"term":"White Cliffs MDWUA","definition":"White Cliffs MDWUA"},
+      {"categories":["organization"],"term":"Vista Linda Water Co-op","definition":"Vista Linda Water Co-op"},
+      {"categories":["organization"],"term":"Anasazi Trails Water Co-op","definition":"Anasazi Trails Water Cooperative"},
+      {"categories":["organization"],"term":"Canon MDWCA","definition":"Canon Mutual Domestic Water Consumer Assn."},
+      {"categories":["organization"],"term":"Placitas Trails Water Co-op","definition":"Placitas Trails Water Coop"},
+      {"categories":["organization"],"term":"BLM, Roswell Office","definition":"BLM, Roswell Office"},
+      {"categories":["organization"],"term":"Forked Lightning Ranch","definition":"Forked Lightning Ranch"},
+      {"categories":["organization"],"term":"Cottonwood RWA","definition":"Cottonwood Rural Water Assn."},
+      {"categories":["organization"],"term":"Pinon Ridge WUA","definition":"Pinon Ridge Water Users Association"},
+      {"categories":["organization"],"term":"McSherry Farms","definition":"McSherry Farms"},
+      {"categories":["organization"],"term":"Agua Sana WUA","definition":"Agua Sana Water Users Assn."},
+      {"categories":["organization"],"term":"Chamita MDWCA","definition":"Chamita Water Users Association"},
+      {"categories":["organization"],"term":"W Spear-bar Ranch","definition":"W Spear-bar Ranch"},
+      {"categories":["organization"],"term":"Village of Capitan","definition":"Village of Capitan"},
+      {"categories":["organization"],"term":"Brazos MDWCA","definition":"Brazos Mutual Domestic Water Consumers Assn."},
+      {"categories":["organization"],"term":"Alto Alps HOA","definition":"Alto Alps Homeowners Association"},
+      {"categories":["organization"],"term":"Chiricahua Desert Museum","definition":"Chiricahua Desert Museum"},
+      {"categories":["organization"],"term":"Bike Ranch","definition":"Bike Ranch"},
+      {"categories":["organization"],"term":"Hachita MDWCA","definition":"Hachita MDWCA"},
+      {"categories":["organization"],"term":"Carrizozo Municipal Water","definition":"Carrizozo Municipal Water"},
+      {"categories":["organization"],"term":"Dunhill Ranch","definition":"Dunhill Ranch"},
+      {"categories":["organization"],"term":"Santa Fe Conservation Trust","definition":"Santa Fe Conservation Trust"},
     {"categories": ["organization"], "term": "NMSU", "definition": "New Mexico State University"},
     {"categories": ["organization"], "term": "USGS", "definition": "US Geological Survey"},
     {"categories": ["organization"], "term": "TWDB", "definition": "Texas Water Development Board"},
diff --git a/transfers/contact_transfer.py b/transfers/contact_transfer.py
index a1d545a03..1c690e0ce 100644
--- a/transfers/contact_transfer.py
+++ b/transfers/contact_transfer.py
@@ -20,6 +20,7 @@
 from pydantic import ValidationError
 from sqlalchemy.orm import Session
 
+from core.enums import Organization
 from db import (
     Contact,
     ThingContactAssociation,
@@ -48,6 +49,10 @@ def __init__(self, *args, **kw):
         with open(co_to_org_mapper_path, "r") as f:
             self._co_to_org_mapper = json.load(f)
 
+        organization_mapper_path = get_transfers_data_path("organization_mapping.json")
+        with open(organization_mapper_path, "r") as f:
+            self._organization_mapper = json.load(f)
+
         self._added = []
 
     def _get_dfs(self):
@@ -74,7 +79,14 @@ def _group_step(self, session: Session, row: pd.Series, db_item: Base):
             "second",
         ):
             try:
-                if adder(session, row, db_item, self._co_to_org_mapper, self._added):
+                if adder(
+                    session,
+                    row,
+                    db_item,
+                    self._co_to_org_mapper,
+                    self._organization_mapper,
+                    self._added,
+                ):
                     session.commit()
                     logger.info(f"added {tag} contact for PointID {row.PointID}")
             except ValidationError as e:
@@ -90,7 +102,7 @@ def _group_step(self, session: Session, row: pd.Series, db_item: Base):
                 self._capture_error(row.PointID, str(e), "UnknownError")
 
 
-def _add_first_contact(session, row, thing, co_to_org_mapper, added):
+def _add_first_contact(session, row, thing, co_to_org_mapper, org_mapper, added):
     # TODO: extract role from OwnerComment
     # role = extract_owner_role(row.OwnerComment)
     role = "Owner"
@@ -98,10 +110,10 @@ def _add_first_contact(session, row, thing, co_to_org_mapper, added):
 
     name = _make_name(row.FirstName, row.LastName)
 
-    organization = co_to_org_mapper.get(row.Company, row.Company)
-
+    # check if organization is in lexicon
+    organization = _get_organization(row, co_to_org_mapper, org_mapper)
     if (name, organization) in added:
-        return
+        return None
     added.append((name, organization))
 
     contact_data = {
@@ -190,7 +202,22 @@ def _add_first_contact(session, row, thing, co_to_org_mapper, added):
     return True
 
 
-def _add_second_contact(session, row, thing, co_to_org_mapper, added):
+def _get_organization(row, co_to_org_mapper, org_mapper):
+    organization = co_to_org_mapper.get(row.Company, row.Company)
+
+    try:
+        Organization(organization)
+    except ValueError:
+        norganization = next(
+            (k for k, v in org_mapper.items() if v == organization), None
+        )
+        logger.warning(f"mapping {organization} to {norganization}")
+        organization = norganization
+
+    return organization
+
+
+def _add_second_contact(session, row, thing, co_to_org_mapper, org_mapper, added):
     if all(
         [
             getattr(row, f"Second{f}") is None
@@ -203,9 +230,10 @@ def _add_second_contact(session, row, thing, co_to_org_mapper, added):
     release_status = "private"
     name = _make_name(row.SecondFirstName, row.SecondLastName)
 
-    organization = co_to_org_mapper.get(row.Company, row.Company)
+    organization = _get_organization(row, co_to_org_mapper, org_mapper)
     if (name, organization) in added:
         return
+
     added.append((name, organization))
 
     contact_data = {
diff --git a/transfers/data/organization_mapping.json b/transfers/data/organization_mapping.json
new file mode 100644
index 000000000..0d3bda9dc
--- /dev/null
+++ b/transfers/data/organization_mapping.json
@@ -0,0 +1,79 @@
+{
+  "City of Aztec": "City of Aztec",
+  "Daybreak Investments": "Daybreak Investments",
+  "Vallecitos HOA": "Vallecitos HOA",
+  "Naiche Development": "Naiche Corporation",
+  "Santa Fe County; Santa Fe Animal Shelter": "Santa Fe County; Santa Fe Animal Shelter",
+  "El Guicu Ditch Association": "El Guicu Ditch Association",
+  "Santa Fe Municipal Airport": "Santa Fe Municipal Airport",
+  "Uluru Development": "Uluru Development",
+  "AllSup's Convenience Stores": "AllSup's Convenience Stores",
+  "Santa Fe Downs": "Santa Fe Downs Resort",
+  "City of Truth or Consequences, WWTP": "City of Truth or Consequences, WWTP",
+  "Riverbend Hotsprings": "Riverbend Hotsprings",
+  "Armendaris Ranch": "Armendaris Ranch",
+  "El Paso Water": "El Paso Water",
+  "PVACD": "Pecos Valley Artesian Conservancy District",
+  "BLM, Socorro Field Office": "BLM, Socorro Field Office",
+  "USFWS": "US Fish & Wildlife Service",
+  "NPS": "National Park Service",
+  "Sile MDWCA": "Sile Municipal Domestic Water Assn.",
+  "Pena Blanca Water & Sanitation District": "Pena Blanca Water & Sanitation District",
+  "Town of Questa": "Town of Questa",
+  "Lamy MDWCA": "Lama MDWCA",
+  "Town of Cerro": "Town of Cerro",
+  "Farr Cattle Company": "Farr Cattle Company (Farr Ranch)",
+  "Carrizozo Orchard": "Carrizozo Orchard",
+  "USFS, Kiowa Grasslands": "USFS, Kiowa Grasslands",
+  "Cloud Country West Subdivision": "Cloud Country West Subdivision",
+  "Chama West Water Users Association": "Chama West Water Users Assn.",
+  "El Rito Regional Water and Waste Water Association": "El Rito Regional Water + Waste Water Association",
+  "West Rim MDWUA": "West Rim MDWUA",
+  "Village of Willard": "Village of Willard",
+  "Quemado Municipal Water & SWA": "Quemado Mutual Water and Sewage Works Association",
+  "Coyote Creek MDWUA": "Coyote Creek MDWUA",
+  "Lamy Mutual Domestic Water Assn.": "Lamy Mutual Domestic Water Assn.",
+  "La Joya CWDA": "La Joya CWDA",
+  "NM Firefighters Training Academy": "NM Firefighters Training Academy",
+  "Cebolleta Land Grant": "Cebolleta Land Grant",
+  "Madrid Water Co-op": "Madrid Water Co-op",
+  "Sun Valley Water and Sanitation": "Sun Valley Water and Sanitation",
+  "Bluewater Lake MDWCA": "Bluewater Lake MDWCA",
+  "Bluewater Acres Domestic WUA": "Bluewater Acres Domestic Water Users Assn.",
+  "Lybrook MDWCA": "Lybrook Municipal",
+  "New Mexico Museum of Natural History": "New Mexico Museum of Natural History",
+  "Hillsboro MDWCA": "Hillsboro Mutual Domestic Water Consumer Assn.",
+  "Tyrone MDWCA": "Tyrone Mutual Domestic Water Assn.",
+  "Santa Clara Water System": "Santa Clara Water System",
+  "Casas Adobes MDWCA": "Casas Adobes Mutual Domestic",
+  "Lake Roberts WUA": "Lake Roberts Water Assn.",
+  "El Creston MDWCA": "El Creston MDWCA",
+  "Reserve Municipality Water Works": "Reserve Municipality Water Works",
+  "Bayard": "Bayard Municipal Water",
+  "Town of Estancia": "Town of Estancia",
+  "Pie Town MDWCA": "Pie Town MDWCA",
+  "Roosevelt SWCD": "Roosevelt Soil & Water Conservation District",
+  "Otis MDWCA": "Otis Mutual Domestic",
+  "White Cliffs MDWUA": "White Cliffs MDWUA",
+  "Vista Linda Water Co-op": "Vista Linda Water Co-op",
+  "Anasazi Trails Water Co-op": "Anasazi Trails Water Cooperative",
+  "Canon MDWCA": "Canon Mutual Domestic Water Consumer Assn.",
+  "Placitas Trails Water Co-op": "Placitas Trails Water Coop",
+  "BLM, Roswell Office": "BLM, Roswell Office",
+  "Forked Lightning Ranch": "Forked Lightning Ranch",
+  "Cottonwood RWA": "Cottonwood Rural Water Assn.",
+  "Pinon Ridge WUA": "Pinon Ridge Water Users Association",
+  "McSherry Farms": "McSherry Farms",
+  "Agua Sana WUA": "Agua Sana Water Users Assn.",
+  "Chamita MDWCA": "Chamita Water Users Association",
+  "W Spear-bar Ranch": "W Spear-bar Ranch",
+  "Village of Capitan": "Village of Capitan",
+  "Brazos MDWCA": "Brazos Mutual Domestic Water Consumers Assn.",
+  "Alto Alps HOA": "Alto Alps Homeowners Association",
+  "Chiricahua Desert Museum": "Chiricahua Desert Museum",
+  "Bike Ranch": "Bike Ranch",
+  "Hachita MDWCA": "Hachita MDWCA",
+  "Carrizozo Municipal Water": "Carrizozo Municipal Water",
+  "Dunhill Ranch": "Dunhill Ranch",
+  "Santa Fe Conservation Trust": "Santa Fe Conservation Trust"
+}

From 1bb06ba0e0ad7301a2a8e4ef27ede37e21253e70 Mon Sep 17 00:00:00 2001
From: jross <jake.ross@nmt.edu>
Date: Wed, 3 Dec 2025 14:35:55 -0700
Subject: [PATCH 62/66] feat: add organization mapping functionality and update
 contact transfer logic

---
 core/lexicon.json | 152 +++++++++++++++++++++++-----------------------
 1 file changed, 76 insertions(+), 76 deletions(-)

diff --git a/core/lexicon.json b/core/lexicon.json
index 35f949802..8fca294be 100644
--- a/core/lexicon.json
+++ b/core/lexicon.json
@@ -373,82 +373,82 @@
     {"categories": ["analysis_method_type"], "term": "Laboratory", "definition": "A procedure performed on a physical sample in a controlled, off-site laboratory environment. These methods typically involve complex instrumentation, standardized reagents, and formal quality control protocols."},
     {"categories": ["analysis_method_type"], "term": "Field Procedure", "definition": "A standardized procedure performed on-site at the time of sample collection. This can involve direct measurement of the environmental medium using a calibrated field instrument or a specific, documented technique for collecting a sample."},
     {"categories": ["analysis_method_type"], "term": "Calculation", "definition": "A mathematical procedure used to derive a new data point from one or more directly measured values. This type is used to document the provenance of calculated data, providing an auditable trail."},
-      {"categories":["organization"],"term":"City of Aztec","definition":"City of Aztec"},
-      {"categories":["organization"],"term":"Daybreak Investments","definition":"Daybreak Investments"},
-      {"categories":["organization"],"term":"Vallecitos HOA","definition":"Vallecitos HOA"},
-      {"categories":["organization"],"term":"Naiche Development","definition":"Naiche Corporation"},
-      {"categories":["organization"],"term":"Santa Fe County; Santa Fe Animal Shelter","definition":"Santa Fe County; Santa Fe Animal Shelter"},
-      {"categories":["organization"],"term":"El Guicu Ditch Association","definition":"El Guicu Ditch Association"},
-      {"categories":["organization"],"term":"Santa Fe Municipal Airport","definition":"Santa Fe Municipal Airport"},
-      {"categories":["organization"],"term":"Uluru Development","definition":"Uluru Development"},
-      {"categories":["organization"],"term":"AllSup's Convenience Stores","definition":"AllSup's Convenience Stores"},
-      {"categories":["organization"],"term":"Santa Fe Downs","definition":"Santa Fe Downs Resort"},
-      {"categories":["organization"],"term":"City of Truth or Consequences, WWTP","definition":"City of Truth or Consequences, WWTP"},
-      {"categories":["organization"],"term":"Riverbend Hotsprings","definition":"Riverbend Hotsprings"},
-      {"categories":["organization"],"term":"Armendaris Ranch","definition":"Armendaris Ranch"},
-      {"categories":["organization"],"term":"El Paso Water","definition":"El Paso Water"},
-      {"categories":["organization"],"term":"BLM, Socorro Field Office","definition":"BLM, Socorro Field Office"},
-      {"categories":["organization"],"term":"USFWS","definition":"US Fish & Wildlife Service"},
-      {"categories":["organization"],"term":"NPS","definition":"National Park Service"},
-      {"categories":["organization"],"term":"Sile MDWCA","definition":"Sile Municipal Domestic Water Assn."},
-      {"categories":["organization"],"term":"Pena Blanca Water & Sanitation District","definition":"Pena Blanca Water & Sanitation District"},
-      {"categories":["organization"],"term":"Town of Questa","definition":"Town of Questa"},
-      {"categories":["organization"],"term":"Lamy MDWCA","definition":"Lama MDWCA"},
-      {"categories":["organization"],"term":"Town of Cerro","definition":"Town of Cerro"},
-      {"categories":["organization"],"term":"Farr Cattle Company","definition":"Farr Cattle Company (Farr Ranch)"},
-      {"categories":["organization"],"term":"Carrizozo Orchard","definition":"Carrizozo Orchard"},
-      {"categories":["organization"],"term":"USFS, Kiowa Grasslands","definition":"USFS, Kiowa Grasslands"},
-      {"categories":["organization"],"term":"Cloud Country West Subdivision","definition":"Cloud Country West Subdivision"},
-      {"categories":["organization"],"term":"Chama West Water Users Association","definition":"Chama West Water Users Assn."},
-      {"categories":["organization"],"term":"El Rito Regional Water and Waste Water Association","definition":"El Rito Regional Water + Waste Water Association"},
-      {"categories":["organization"],"term":"West Rim MDWUA","definition":"West Rim MDWUA"},
-      {"categories":["organization"],"term":"Village of Willard","definition":"Village of Willard"},
-      {"categories":["organization"],"term":"Quemado Municipal Water & SWA","definition":"Quemado Mutual Water and Sewage Works Association"},
-      {"categories":["organization"],"term":"Coyote Creek MDWUA","definition":"Coyote Creek MDWUA"},
-      {"categories":["organization"],"term":"Lamy MDWCA","definition":"Lamy Mutual Domestic Water Assn."},
-      {"categories":["organization"],"term":"La Joya CWDA","definition":"La Joya CWDA"},
-      {"categories":["organization"],"term":"NM Firefighters Training Academy","definition":"NM Firefighters Training Academy"},
-      {"categories":["organization"],"term":"Cebolleta Land Grant","definition":"Cebolleta Land Grant"},
-      {"categories":["organization"],"term":"Madrid Water Co-op","definition":"Madrid Water Co-op"},
-      {"categories":["organization"],"term":"Sun Valley Water and Sanitation","definition":"Sun Valley Water and Sanitation"},
-      {"categories":["organization"],"term":"Bluewater Lake MDWCA","definition":"Bluewater Lake MDWCA"},
-      {"categories":["organization"],"term":"Bluewater Acres Domestic WUA","definition":"Bluewater Acres Domestic Water Users Assn."},
-      {"categories":["organization"],"term":"Lybrook MDWCA","definition":"Lybrook Municipal"},
-      {"categories":["organization"],"term":"New Mexico Museum of Natural History","definition":"New Mexico Museum of Natural History"},
-      {"categories":["organization"],"term":"Hillsboro MDWCA","definition":"Hillsboro Mutual Domestic Water Consumer Assn."},
-      {"categories":["organization"],"term":"Tyrone MDWCA","definition":"Tyrone Mutual Domestic Water Assn."},
-      {"categories":["organization"],"term":"Santa Clara Water System","definition":"Santa Clara Water System"},
-      {"categories":["organization"],"term":"Casas Adobes MDWCA","definition":"Casas Adobes Mutual Domestic"},
-      {"categories":["organization"],"term":"Lake Roberts WUA","definition":"Lake Roberts Water Assn."},
-      {"categories":["organization"],"term":"El Creston MDWCA","definition":"El Creston MDWCA"},
-      {"categories":["organization"],"term":"Reserve Municipality Water Works","definition":"Reserve Municipality Water Works"},
-      {"categories":["organization"],"term":"Bayard","definition":"Bayard Municipal Water"},
-      {"categories":["organization"],"term":"Town of Estancia","definition":"Town of Estancia"},
-      {"categories":["organization"],"term":"Pie Town MDWCA","definition":"Pie Town MDWCA"},
-      {"categories":["organization"],"term":"Roosevelt SWCD","definition":"Roosevelt Soil & Water Conservation District"},
-      {"categories":["organization"],"term":"Otis MDWCA","definition":"Otis Mutual Domestic"},
-      {"categories":["organization"],"term":"White Cliffs MDWUA","definition":"White Cliffs MDWUA"},
-      {"categories":["organization"],"term":"Vista Linda Water Co-op","definition":"Vista Linda Water Co-op"},
-      {"categories":["organization"],"term":"Anasazi Trails Water Co-op","definition":"Anasazi Trails Water Cooperative"},
-      {"categories":["organization"],"term":"Canon MDWCA","definition":"Canon Mutual Domestic Water Consumer Assn."},
-      {"categories":["organization"],"term":"Placitas Trails Water Co-op","definition":"Placitas Trails Water Coop"},
-      {"categories":["organization"],"term":"BLM, Roswell Office","definition":"BLM, Roswell Office"},
-      {"categories":["organization"],"term":"Forked Lightning Ranch","definition":"Forked Lightning Ranch"},
-      {"categories":["organization"],"term":"Cottonwood RWA","definition":"Cottonwood Rural Water Assn."},
-      {"categories":["organization"],"term":"Pinon Ridge WUA","definition":"Pinon Ridge Water Users Association"},
-      {"categories":["organization"],"term":"McSherry Farms","definition":"McSherry Farms"},
-      {"categories":["organization"],"term":"Agua Sana WUA","definition":"Agua Sana Water Users Assn."},
-      {"categories":["organization"],"term":"Chamita MDWCA","definition":"Chamita Water Users Association"},
-      {"categories":["organization"],"term":"W Spear-bar Ranch","definition":"W Spear-bar Ranch"},
-      {"categories":["organization"],"term":"Village of Capitan","definition":"Village of Capitan"},
-      {"categories":["organization"],"term":"Brazos MDWCA","definition":"Brazos Mutual Domestic Water Consumers Assn."},
-      {"categories":["organization"],"term":"Alto Alps HOA","definition":"Alto Alps Homeowners Association"},
-      {"categories":["organization"],"term":"Chiricahua Desert Museum","definition":"Chiricahua Desert Museum"},
-      {"categories":["organization"],"term":"Bike Ranch","definition":"Bike Ranch"},
-      {"categories":["organization"],"term":"Hachita MDWCA","definition":"Hachita MDWCA"},
-      {"categories":["organization"],"term":"Carrizozo Municipal Water","definition":"Carrizozo Municipal Water"},
-      {"categories":["organization"],"term":"Dunhill Ranch","definition":"Dunhill Ranch"},
-      {"categories":["organization"],"term":"Santa Fe Conservation Trust","definition":"Santa Fe Conservation Trust"},
+    {"categories": ["organization"], "term": "City of Aztec", "definition": "City of Aztec"},
+    {"categories": ["organization"], "term": "Daybreak Investments", "definition": "Daybreak Investments"},
+    {"categories": ["organization"], "term": "Vallecitos HOA", "definition": "Vallecitos HOA"},
+    {"categories": ["organization"], "term": "Naiche Development", "definition": "Naiche Corporation"},
+    {"categories": ["organization"], "term": "Santa Fe County; Santa Fe Animal Shelter", "definition": "Santa Fe County; Santa Fe Animal Shelter"},
+    {"categories": ["organization"], "term": "El Guicu Ditch Association", "definition": "El Guicu Ditch Association"},
+    {"categories": ["organization"], "term": "Santa Fe Municipal Airport", "definition": "Santa Fe Municipal Airport"},
+    {"categories": ["organization"], "term": "Uluru Development", "definition": "Uluru Development"},
+    {"categories": ["organization"], "term": "AllSup's Convenience Stores", "definition": "AllSup's Convenience Stores"},
+    {"categories": ["organization"], "term": "Santa Fe Downs", "definition": "Santa Fe Downs Resort"},
+    {"categories": ["organization"], "term": "City of Truth or Consequences, WWTP", "definition": "City of Truth or Consequences, WWTP"},
+    {"categories": ["organization"], "term": "Riverbend Hotsprings", "definition": "Riverbend Hotsprings"},
+    {"categories": ["organization"], "term": "Armendaris Ranch", "definition": "Armendaris Ranch"},
+    {"categories": ["organization"], "term": "El Paso Water", "definition": "El Paso Water"},
+    {"categories": ["organization"], "term": "BLM, Socorro Field Office", "definition": "BLM, Socorro Field Office"},
+    {"categories": ["organization"], "term": "USFWS", "definition": "US Fish & Wildlife Service"},
+    {"categories": ["organization"], "term": "NPS", "definition": "National Park Service"},
+    {"categories": ["organization"], "term": "Sile MDWCA", "definition": "Sile Municipal Domestic Water Assn."},
+    {"categories": ["organization"], "term": "Pena Blanca Water & Sanitation District", "definition": "Pena Blanca Water & Sanitation District"},
+    {"categories": ["organization"], "term": "Town of Questa", "definition": "Town of Questa"},
+    {"categories": ["organization"], "term": "Lamy MDWCA", "definition": "Lama MDWCA"},
+    {"categories": ["organization"], "term": "Town of Cerro", "definition": "Town of Cerro"},
+    {"categories": ["organization"], "term": "Farr Cattle Company", "definition": "Farr Cattle Company (Farr Ranch)"},
+    {"categories": ["organization"], "term": "Carrizozo Orchard", "definition": "Carrizozo Orchard"},
+    {"categories": ["organization"], "term": "USFS, Kiowa Grasslands", "definition": "USFS, Kiowa Grasslands"},
+    {"categories": ["organization"], "term": "Cloud Country West Subdivision", "definition": "Cloud Country West Subdivision"},
+    {"categories": ["organization"], "term": "Chama West Water Users Association", "definition": "Chama West Water Users Assn."},
+    {"categories": ["organization"], "term": "El Rito Regional Water and Waste Water Association", "definition": "El Rito Regional Water + Waste Water Association"},
+    {"categories": ["organization"], "term": "West Rim MDWUA", "definition": "West Rim MDWUA"},
+    {"categories": ["organization"], "term": "Village of Willard", "definition": "Village of Willard"},
+    {"categories": ["organization"], "term": "Quemado Municipal Water & SWA", "definition": "Quemado Mutual Water and Sewage Works Association"},
+    {"categories": ["organization"], "term": "Coyote Creek MDWUA", "definition": "Coyote Creek MDWUA"},
+    {"categories": ["organization"], "term": "Lamy MDWCA", "definition": "Lamy Mutual Domestic Water Assn."},
+    {"categories": ["organization"], "term": "La Joya CWDA", "definition": "La Joya CWDA"},
+    {"categories": ["organization"], "term": "NM Firefighters Training Academy", "definition": "NM Firefighters Training Academy"},
+    {"categories": ["organization"], "term": "Cebolleta Land Grant", "definition": "Cebolleta Land Grant"},
+    {"categories": ["organization"], "term": "Madrid Water Co-op", "definition": "Madrid Water Co-op"},
+    {"categories": ["organization"], "term": "Sun Valley Water and Sanitation", "definition": "Sun Valley Water and Sanitation"},
+    {"categories": ["organization"], "term": "Bluewater Lake MDWCA", "definition": "Bluewater Lake MDWCA"},
+    {"categories": ["organization"], "term": "Bluewater Acres Domestic WUA", "definition": "Bluewater Acres Domestic Water Users Assn."},
+    {"categories": ["organization"], "term": "Lybrook MDWCA", "definition": "Lybrook Municipal"},
+    {"categories": ["organization"], "term": "New Mexico Museum of Natural History", "definition": "New Mexico Museum of Natural History"},
+    {"categories": ["organization"], "term": "Hillsboro MDWCA", "definition": "Hillsboro Mutual Domestic Water Consumer Assn."},
+    {"categories": ["organization"], "term": "Tyrone MDWCA", "definition": "Tyrone Mutual Domestic Water Assn."},
+    {"categories": ["organization"], "term": "Santa Clara Water System", "definition": "Santa Clara Water System"},
+    {"categories": ["organization"], "term": "Casas Adobes MDWCA", "definition": "Casas Adobes Mutual Domestic"},
+    {"categories": ["organization"], "term": "Lake Roberts WUA", "definition": "Lake Roberts Water Assn."},
+    {"categories": ["organization"], "term": "El Creston MDWCA", "definition": "El Creston MDWCA"},
+    {"categories": ["organization"], "term": "Reserve Municipality Water Works", "definition": "Reserve Municipality Water Works"},
+    {"categories": ["organization"], "term": "Bayard", "definition": "Bayard Municipal Water"},
+    {"categories": ["organization"], "term": "Town of Estancia", "definition": "Town of Estancia"},
+    {"categories": ["organization"], "term": "Pie Town MDWCA", "definition": "Pie Town MDWCA"},
+    {"categories": ["organization"], "term": "Roosevelt SWCD", "definition": "Roosevelt Soil & Water Conservation District"},
+    {"categories": ["organization"], "term": "Otis MDWCA", "definition": "Otis Mutual Domestic"},
+    {"categories": ["organization"], "term": "White Cliffs MDWUA", "definition": "White Cliffs MDWUA"},
+    {"categories": ["organization"], "term": "Vista Linda Water Co-op", "definition": "Vista Linda Water Co-op"},
+    {"categories": ["organization"], "term": "Anasazi Trails Water Co-op", "definition": "Anasazi Trails Water Cooperative"},
+    {"categories": ["organization"], "term": "Canon MDWCA", "definition": "Canon Mutual Domestic Water Consumer Assn."},
+    {"categories": ["organization"], "term": "Placitas Trails Water Co-op", "definition": "Placitas Trails Water Coop"},
+    {"categories": ["organization"], "term": "BLM, Roswell Office", "definition": "BLM, Roswell Office"},
+    {"categories": ["organization"], "term": "Forked Lightning Ranch", "definition": "Forked Lightning Ranch"},
+    {"categories": ["organization"], "term": "Cottonwood RWA", "definition": "Cottonwood Rural Water Assn."},
+    {"categories": ["organization"], "term": "Pinon Ridge WUA", "definition": "Pinon Ridge Water Users Association"},
+    {"categories": ["organization"], "term": "McSherry Farms", "definition": "McSherry Farms"},
+    {"categories": ["organization"], "term": "Agua Sana WUA", "definition": "Agua Sana Water Users Assn."},
+    {"categories": ["organization"], "term": "Chamita MDWCA", "definition": "Chamita Water Users Association"},
+    {"categories": ["organization"], "term": "W Spear-bar Ranch", "definition": "W Spear-bar Ranch"},
+    {"categories": ["organization"], "term": "Village of Capitan", "definition": "Village of Capitan"},
+    {"categories": ["organization"], "term": "Brazos MDWCA", "definition": "Brazos Mutual Domestic Water Consumers Assn."},
+    {"categories": ["organization"], "term": "Alto Alps HOA", "definition": "Alto Alps Homeowners Association"},
+    {"categories": ["organization"], "term": "Chiricahua Desert Museum", "definition": "Chiricahua Desert Museum"},
+    {"categories": ["organization"], "term": "Bike Ranch", "definition": "Bike Ranch"},
+    {"categories": ["organization"], "term": "Hachita MDWCA", "definition": "Hachita MDWCA"},
+    {"categories": ["organization"], "term": "Carrizozo Municipal Water", "definition": "Carrizozo Municipal Water"},
+    {"categories": ["organization"], "term": "Dunhill Ranch", "definition": "Dunhill Ranch"},
+    {"categories": ["organization"], "term": "Santa Fe Conservation Trust", "definition": "Santa Fe Conservation Trust"},
     {"categories": ["organization"], "term": "NMSU", "definition": "New Mexico State University"},
     {"categories": ["organization"], "term": "USGS", "definition": "US Geological Survey"},
     {"categories": ["organization"], "term": "TWDB", "definition": "Texas Water Development Board"},

From 650203243ebe20a0933c7793844879942c338a98 Mon Sep 17 00:00:00 2001
From: jross <jake.ross@nmt.edu>
Date: Wed, 3 Dec 2025 15:11:42 -0700
Subject: [PATCH 63/66] feat: add DiverLink and Diver Cable to sensor mapping

---
 transfers/sensor_transfer.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/transfers/sensor_transfer.py b/transfers/sensor_transfer.py
index 76f9f4fe9..2f4ce7cf3 100644
--- a/transfers/sensor_transfer.py
+++ b/transfers/sensor_transfer.py
@@ -33,6 +33,8 @@
     "Pressure transducer": "Pressure Transducer",
     "Acoustic sounder": "Acoustic Sounder",
     "Barometer": "Barometer",
+    "DiverLink": "DiverLink",
+    "Diver Cable": "Diver Cable",
 }
 
 
From d8a16784299121229e42329993602530dae17e9a Mon Sep 17 00:00:00 2001
From: jross <jake.ross@nmt.edu>
Date: Wed, 3 Dec 2025 15:14:56 -0700
Subject: [PATCH 64/66] feat: remove Farr Cattle Company from organization
 lexicon

---
 core/lexicon.json | 1 -
 1 file changed, 1 deletion(-)

diff --git a/core/lexicon.json b/core/lexicon.json
index 8fca294be..f7a4f381f 100644
--- a/core/lexicon.json
+++ b/core/lexicon.json
@@ -551,7 +551,6 @@
     {"categories": ["organization"], "term": "El Rito Canyon MDWCA", "definition": "El Rito Canyon MDWCA"},
     {"categories": ["organization"], "term": "Encantado Enterprises", "definition": "Encantado Enterprises"},
     {"categories": ["organization"], "term": "Estrella Concepts LLC", "definition": "Estrella Concepts LLC"},
-    {"categories": ["organization"], "term": "Farr Cattle Company", "definition": "Farr Cattle Company"},
     {"categories": ["organization"], "term": "Sixteen Springs Fire Department", "definition": "Sixteen Springs Fire Department"},
     {"categories": ["organization"], "term": "Fire Water Lodge", "definition": "Fire Water Lodge"},
     {"categories": ["organization"], "term": "Ford County Land & Cattle Company, Inc", "definition": "Ford County Land & Cattle Company, Inc"},

From b2674058240ff82e90467bca4a5a04717549c6a0 Mon Sep 17 00:00:00 2001
From: jross <jake.ross@nmt.edu>
Date: Wed, 3 Dec 2025 15:17:21 -0700
Subject: [PATCH 65/66] feat: remove Lamy MDWCA from organization lexicon

---
 core/lexicon.json | 1 -
 1 file changed, 1 deletion(-)

diff --git a/core/lexicon.json b/core/lexicon.json
index f7a4f381f..e5048eb49 100644
--- a/core/lexicon.json
+++ b/core/lexicon.json
@@ -564,7 +564,6 @@
     {"categories": ["organization"], "term": "K. Schmitt Trust", "definition": "K. Schmitt Trust"},
     {"categories": ["organization"], "term": "La Cienega MDWCA", "definition": "La Cienega MDWCA"},
     {"categories": ["organization"], "term": "La Vista HOA", "definition": "La Vista HOA"},
-    {"categories": ["organization"], "term": "Lamy MDWCA", "definition": "Lamy MDWCA"},
     {"categories": ["organization"], "term": "Land Ventures LLC", "definition": "Land Ventures LLC"},
     {"categories": ["organization"], "term": "Las Lagunitas", "definition": "Las Lagunitas"},
     {"categories": ["organization"], "term": "Las Lagunitas HOA", "definition": "Las Lagunitas HOA"},

From 147db27258975c5c6cd158f750b291800bd49cc2 Mon Sep 17 00:00:00 2001
From: jross <jake.ross@nmt.edu>
Date: Wed, 3 Dec 2025 15:21:35 -0700
Subject: [PATCH 66/66] feat: remove Santa Fe Downs from organization lexicon

---
 core/lexicon.json | 1 -
 1 file changed, 1 deletion(-)

diff --git a/core/lexicon.json b/core/lexicon.json
index e5048eb49..423be5332 100644
--- a/core/lexicon.json
+++ b/core/lexicon.json
@@ -605,7 +605,6 @@
     {"categories": ["organization"], "term": "Sangre de Cristo Center", "definition": "Sangre de Cristo Center"},
     {"categories": ["organization"], "term": "Valle Vista Water Utility", "definition": "Valle Vista Water Utility"},
     {"categories": ["organization"], "term": "Santa Fe County, Valle Vista Water Utility, Inc.", "definition": "Santa Fe County, Valle Vista Water Utility, Inc."},
-    {"categories": ["organization"], "term": "Santa Fe Downs", "definition": "Santa Fe Downs"},
     {"categories": ["organization"], "term": "Santa Fe Horse Park", "definition": "Santa Fe Horse Park"},
     {"categories": ["organization"], "term": "Santa Fe Opera", "definition": "Santa Fe Opera"},
     {"categories": ["organization"], "term": "Santa Fe Waldorf School", "definition": "Santa Fe Waldorf School"},