From 9432f8849e99b888ac1030b4a900ff328c13aa21 Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 26 Nov 2025 16:31:32 -0800 Subject: [PATCH 01/37] Unify read csv approaches --- .gitignore | 1 + transfers/util.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/.gitignore b/.gitignore index c1d8db1ee..f1bd9dd54 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ launcher.sh gcs_credentials.json transfers/data/assets* transfers/data/nma_csv_cache/* +transfers/data/*.csv transfers/transfer*.log transfer*.log transfers/data/nma_csv_cache/* diff --git a/transfers/util.py b/transfers/util.py index cbf0f2b17..590c9252d 100644 --- a/transfers/util.py +++ b/transfers/util.py @@ -59,10 +59,24 @@ def replace_nans(df: pd.DataFrame, default=None) -> pd.DataFrame: def read_csv(name: str, dtype: dict | None = None) -> pd.DataFrame: + # Try to read from local data directory first + local_file = Path(__file__).parent / 'data' / f"{name}.csv" + + if local_file.exists(): + logger.info(f"Reading {name} from local file: {local_file}") + if dtype: + return pd.read_csv(local_file, dtype=dtype) + else: + return pd.read_csv(local_file) + + # Check cache directory p = get_transfers_data_path(Path("nma_csv_cache") / f"{name}.csv") if os.path.exists(p): + logger.info(f"Reading {name} from cache: {p}") return pd.read_csv(p, dtype=dtype) + # Fall back to GCS if local file doesn't exist + logger.info(f"Local file and cache not found, reading {name} from GCS") bucket = get_storage_bucket() blob = bucket.blob(f"nma_csv/{name}.csv") data = blob.download_as_bytes() From 5db6964799f93c102c8f28851f84c0e3af69e3de Mon Sep 17 00:00:00 2001 From: kbighorse Date: Thu, 27 Nov 2025 00:31:24 +0000 Subject: [PATCH 02/37] Formatting changes --- transfers/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transfers/util.py b/transfers/util.py index 590c9252d..d08798425 100644 --- a/transfers/util.py +++ b/transfers/util.py @@ -60,7 +60,7 @@ def replace_nans(df: pd.DataFrame, default=None) -> pd.DataFrame: def read_csv(name: str, dtype: dict | None = None) -> pd.DataFrame: # Try to read from local data directory first - local_file = Path(__file__).parent / 'data' / f"{name}.csv" + local_file = Path(__file__).parent / "data" / f"{name}.csv" if local_file.exists(): logger.info(f"Reading {name} from local file: {local_file}") From fe6f50ccf91825676a8d57fbd574ce85dd6819ee Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 26 Nov 2025 16:55:38 -0800 Subject: [PATCH 03/37] Un-ignore features; add features for location and well dates --- .gitignore | 1 - tests/features/location-legacy-dates.feature | 57 +++++++++++++++++ tests/features/well-completion-date.feature | 64 ++++++++++++++++++++ 3 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 tests/features/location-legacy-dates.feature create mode 100644 tests/features/well-completion-date.feature diff --git a/.gitignore b/.gitignore index f1bd9dd54..44b28e13c 100644 --- a/.gitignore +++ b/.gitignore @@ -30,7 +30,6 @@ transfers/transfer*.log transfer*.log transfers/data/nma_csv_cache/* !transfers/data/nma_csv_cache/.gitkeep -tests/features/*.feature transfers/metrics/* transfers/logs/* run_bdd-local.sh diff --git a/tests/features/location-legacy-dates.feature b/tests/features/location-legacy-dates.feature new file mode 100644 index 000000000..1486d9edc --- /dev/null +++ b/tests/features/location-legacy-dates.feature @@ -0,0 +1,57 @@ +Feature: Location Legacy Date Fields + As a data manager + I want to preserve legacy date information from the AMPAPI system + So that historical temporal context is not lost during migration + + Background: + Given a functioning api + + Scenario: Create location with both legacy dates + When I create a location with legacy_date_created "2014-10-17" and inventoried_on "2003-12-10" + Then the response should include legacy_date_created as "2014-10-17" + And the response should include inventoried_on as "2003-12-10" + And the created_at timestamp should be the current system time + And the time gap between inventoried_on and legacy_date_created should be preserved + + Scenario: Create location with only legacy_date_created + When I create a location with legacy_date_created "2014-10-17" + Then the response should include legacy_date_created as "2014-10-17" + And the response should include inventoried_on as null + And the created_at timestamp should be the current system time + + Scenario: Create location with only inventoried_on + When I create a location with inventoried_on "2003-12-10" + Then the response should include inventoried_on as "2003-12-10" + And the response should include legacy_date_created as null + And the created_at timestamp should be the current system time + + Scenario: Create location with neither legacy date + When I create a location without legacy dates + Then the response should include legacy_date_created as null + And the response should include inventoried_on as null + And the created_at timestamp should be the current system time + + Scenario: Update location legacy dates + Given a location exists with legacy_date_created "2014-10-17" + When I update the location to add inventoried_on "2003-12-10" + Then the response should include legacy_date_created as "2014-10-17" + And the response should include inventoried_on as "2003-12-10" + + Scenario: Retrieve location with legacy dates via GET + Given a location exists with legacy_date_created "2014-10-17" and inventoried_on "2003-12-10" + When I retrieve the location by ID + Then the response should include legacy_date_created as "2014-10-17" + And the response should include inventoried_on as "2003-12-10" + + Scenario: Historical data preservation - 54 year gap (Site SM-0227) + When I create a location with legacy_date_created "2008-05-28" and inventoried_on "1954-05-01" + Then the response should include legacy_date_created as "2008-05-28" + And the response should include inventoried_on as "1954-05-01" + And the time gap should be approximately 19751 days + + Scenario: List locations includes legacy dates + Given multiple locations exist with various legacy dates + When I retrieve all locations + Then each location should include legacy_date_created field + And each location should include inventoried_on field + And the fields should be null for locations without legacy dates diff --git a/tests/features/well-completion-date.feature b/tests/features/well-completion-date.feature new file mode 100644 index 000000000..54f211ef5 --- /dev/null +++ b/tests/features/well-completion-date.feature @@ -0,0 +1,64 @@ +Feature: Well Completion Date + As a hydrogeologist + I want to track when wells were completed/constructed + So that I can analyze well age and relate construction standards to time periods + + Background: + Given a functioning api + + Scenario: Create water well with completion date + When I create a water well with well_completed_on "2004-08-08" + Then the response should include well_completed_on as "2004-08-08" + And the response should have thing_type "water well" + + Scenario: Create water well without completion date + When I create a water well without well_completed_on + Then the response should include well_completed_on as null + And the well should be created successfully + + Scenario: Update well to add completion date + Given a water well exists without well_completed_on + When I update the well to add well_completed_on "2004-08-08" + Then the response should include well_completed_on as "2004-08-08" + + Scenario: Update well to change completion date + Given a water well exists with well_completed_on "2004-08-08" + When I update the well to change well_completed_on to "2005-03-15" + Then the response should include well_completed_on as "2005-03-15" + + Scenario: Historical well from 1936 + When I create a water well with well_completed_on "1936-01-01" + Then the response should include well_completed_on as "1936-01-01" + And the well age should be over 88 years + + Scenario: Retrieve well with completion date via GET + Given a water well exists with well_completed_on "2004-08-08" + When I retrieve the well by ID + Then the response should include well_completed_on as "2004-08-08" + And the response should include the well's age in years + + Scenario: List wells includes completion dates + Given multiple wells exist with various completion dates + When I retrieve all water wells + Then each well should include well_completed_on field + And the field should be null for wells without completion dates + + Scenario: Spring does not have completion date + When I create a spring + Then the response should include well_completed_on as null + And the spring should be created successfully + + Scenario: Filter wells by completion date range + Given wells exist with completion dates ranging from 1936 to 2024 + When I filter wells completed between "2000-01-01" and "2010-12-31" + Then the response should only include wells completed in that range + And wells from 1936 should not be included + And wells from 2020 should not be included + + Scenario: Well completion date with location legacy dates + When I create a water well with well_completed_on "2004-08-08" + And the well's location has legacy_date_created "2014-10-17" and inventoried_on "2013-05-01" + Then the well should have well_completed_on as "2004-08-08" + And the location should have legacy_date_created as "2014-10-17" + And the location should have inventoried_on as "2013-05-01" + And all three date fields should be independently queryable From 738c1ef123120dca01ce9cb86ac234a594b9f7af Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 26 Nov 2025 17:04:36 -0800 Subject: [PATCH 04/37] Remove features we won't keep --- tests/features/location-legacy-dates.feature | 57 ----------------- tests/features/well-completion-date.feature | 64 -------------------- 2 files changed, 121 deletions(-) delete mode 100644 tests/features/location-legacy-dates.feature delete mode 100644 tests/features/well-completion-date.feature diff --git a/tests/features/location-legacy-dates.feature b/tests/features/location-legacy-dates.feature deleted file mode 100644 index 1486d9edc..000000000 --- a/tests/features/location-legacy-dates.feature +++ /dev/null @@ -1,57 +0,0 @@ -Feature: Location Legacy Date Fields - As a data manager - I want to preserve legacy date information from the AMPAPI system - So that historical temporal context is not lost during migration - - Background: - Given a functioning api - - Scenario: Create location with both legacy dates - When I create a location with legacy_date_created "2014-10-17" and inventoried_on "2003-12-10" - Then the response should include legacy_date_created as "2014-10-17" - And the response should include inventoried_on as "2003-12-10" - And the created_at timestamp should be the current system time - And the time gap between inventoried_on and legacy_date_created should be preserved - - Scenario: Create location with only legacy_date_created - When I create a location with legacy_date_created "2014-10-17" - Then the response should include legacy_date_created as "2014-10-17" - And the response should include inventoried_on as null - And the created_at timestamp should be the current system time - - Scenario: Create location with only inventoried_on - When I create a location with inventoried_on "2003-12-10" - Then the response should include inventoried_on as "2003-12-10" - And the response should include legacy_date_created as null - And the created_at timestamp should be the current system time - - Scenario: Create location with neither legacy date - When I create a location without legacy dates - Then the response should include legacy_date_created as null - And the response should include inventoried_on as null - And the created_at timestamp should be the current system time - - Scenario: Update location legacy dates - Given a location exists with legacy_date_created "2014-10-17" - When I update the location to add inventoried_on "2003-12-10" - Then the response should include legacy_date_created as "2014-10-17" - And the response should include inventoried_on as "2003-12-10" - - Scenario: Retrieve location with legacy dates via GET - Given a location exists with legacy_date_created "2014-10-17" and inventoried_on "2003-12-10" - When I retrieve the location by ID - Then the response should include legacy_date_created as "2014-10-17" - And the response should include inventoried_on as "2003-12-10" - - Scenario: Historical data preservation - 54 year gap (Site SM-0227) - When I create a location with legacy_date_created "2008-05-28" and inventoried_on "1954-05-01" - Then the response should include legacy_date_created as "2008-05-28" - And the response should include inventoried_on as "1954-05-01" - And the time gap should be approximately 19751 days - - Scenario: List locations includes legacy dates - Given multiple locations exist with various legacy dates - When I retrieve all locations - Then each location should include legacy_date_created field - And each location should include inventoried_on field - And the fields should be null for locations without legacy dates diff --git a/tests/features/well-completion-date.feature b/tests/features/well-completion-date.feature deleted file mode 100644 index 54f211ef5..000000000 --- a/tests/features/well-completion-date.feature +++ /dev/null @@ -1,64 +0,0 @@ -Feature: Well Completion Date - As a hydrogeologist - I want to track when wells were completed/constructed - So that I can analyze well age and relate construction standards to time periods - - Background: - Given a functioning api - - Scenario: Create water well with completion date - When I create a water well with well_completed_on "2004-08-08" - Then the response should include well_completed_on as "2004-08-08" - And the response should have thing_type "water well" - - Scenario: Create water well without completion date - When I create a water well without well_completed_on - Then the response should include well_completed_on as null - And the well should be created successfully - - Scenario: Update well to add completion date - Given a water well exists without well_completed_on - When I update the well to add well_completed_on "2004-08-08" - Then the response should include well_completed_on as "2004-08-08" - - Scenario: Update well to change completion date - Given a water well exists with well_completed_on "2004-08-08" - When I update the well to change well_completed_on to "2005-03-15" - Then the response should include well_completed_on as "2005-03-15" - - Scenario: Historical well from 1936 - When I create a water well with well_completed_on "1936-01-01" - Then the response should include well_completed_on as "1936-01-01" - And the well age should be over 88 years - - Scenario: Retrieve well with completion date via GET - Given a water well exists with well_completed_on "2004-08-08" - When I retrieve the well by ID - Then the response should include well_completed_on as "2004-08-08" - And the response should include the well's age in years - - Scenario: List wells includes completion dates - Given multiple wells exist with various completion dates - When I retrieve all water wells - Then each well should include well_completed_on field - And the field should be null for wells without completion dates - - Scenario: Spring does not have completion date - When I create a spring - Then the response should include well_completed_on as null - And the spring should be created successfully - - Scenario: Filter wells by completion date range - Given wells exist with completion dates ranging from 1936 to 2024 - When I filter wells completed between "2000-01-01" and "2010-12-31" - Then the response should only include wells completed in that range - And wells from 1936 should not be included - And wells from 2020 should not be included - - Scenario: Well completion date with location legacy dates - When I create a water well with well_completed_on "2004-08-08" - And the well's location has legacy_date_created "2014-10-17" and inventoried_on "2013-05-01" - Then the well should have well_completed_on as "2004-08-08" - And the location should have legacy_date_created as "2014-10-17" - And the location should have inventoried_on as "2013-05-01" - And all three date fields should be independently queryable From 953263252428153889933ffe74ecbd97ca133109 Mon Sep 17 00:00:00 2001 From: kbighorse Date: Thu, 27 Nov 2025 01:12:56 +0000 Subject: [PATCH 05/37] Formatting changes --- .../steps/post_migration_legacy_data.py | 200 +++++++++++------- 1 file changed, 129 insertions(+), 71 deletions(-) diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py index dca15d638..e78afbde7 100644 --- a/tests/features/steps/post_migration_legacy_data.py +++ b/tests/features/steps/post_migration_legacy_data.py @@ -27,6 +27,7 @@ def parse_number(text): return int(text) + register_type(Number=parse_number) @@ -91,14 +92,21 @@ def step_given_data_migrated(context: Context): @given("a location exists with") def step_given_location_with_table(context: Context): """Create location with fields from table.""" - data = {row['field']: row['value'] for row in context.table} + data = {row["field"]: row["value"] for row in context.table} - legacy_date_created = date.fromisoformat(data['legacy_date_created']) if data.get('legacy_date_created') and data['legacy_date_created'] != 'null' else None - inventoried_on = date.fromisoformat(data['inventoried_on']) if data.get('inventoried_on') and data['inventoried_on'] != 'null' else None + legacy_date_created = ( + date.fromisoformat(data["legacy_date_created"]) + if data.get("legacy_date_created") and data["legacy_date_created"] != "null" + else None + ) + inventoried_on = ( + date.fromisoformat(data["inventoried_on"]) + if data.get("inventoried_on") and data["inventoried_on"] != "null" + else None + ) location = create_test_location( - legacy_date_created=legacy_date_created, - inventoried_on=inventoried_on + legacy_date_created=legacy_date_created, inventoried_on=inventoried_on ) context.test_location = location @@ -122,12 +130,16 @@ def step_given_multiple_locations(context: Context, count: int): legacy_date, inventory_date = test_data[i] location = create_test_location( legacy_date_created=date.fromisoformat(legacy_date), - inventoried_on=date.fromisoformat(inventory_date) if inventory_date else None + inventoried_on=( + date.fromisoformat(inventory_date) if inventory_date else None + ), ) context.test_locations.append(location) -@given("locations exist with inventoried_on ranging from {start_year:Number} to {end_year:Number}") +@given( + "locations exist with inventoried_on ranging from {start_year:Number} to {end_year:Number}" +) def step_given_locations_date_range(context: Context, start_year: int, end_year: int): """Create locations with inventoried_on across a date range.""" context.test_locations = [] @@ -136,15 +148,17 @@ def step_given_locations_date_range(context: Context, start_year: int, end_year: for year in years: location = create_test_location( legacy_date_created=date(year + 5, 1, 1), # Always 5 years after inventory - inventoried_on=date(year, 6, 15) + inventoried_on=date(year, 6, 15), ) context.test_locations.append(location) @given('{count:Number} locations exist with legacy_date_created "{target_date}"') -def step_given_locations_with_specific_date(context: Context, count: int, target_date: str): +def step_given_locations_with_specific_date( + context: Context, count: int, target_date: str +): """Create locations with specific legacy_date_created.""" - if not hasattr(context, 'test_locations'): + if not hasattr(context, "test_locations"): context.test_locations = [] target = date.fromisoformat(target_date) @@ -152,7 +166,7 @@ def step_given_locations_with_specific_date(context: Context, count: int, target for i in range(count): location = create_test_location( legacy_date_created=target, - inventoried_on=date(2000 + i, 1, 1) # Vary the inventory dates + inventoried_on=date(2000 + i, 1, 1), # Vary the inventory dates ) context.test_locations.append(location) @@ -160,7 +174,9 @@ def step_given_locations_with_specific_date(context: Context, count: int, target @given('a well exists with well_completed_on "{completion_date}"') def step_given_well_with_completion(context: Context, completion_date: str): """Create well with completion date.""" - completed_on = date.fromisoformat(completion_date) if completion_date != 'null' else None + completed_on = ( + date.fromisoformat(completion_date) if completion_date != "null" else None + ) thing, location = create_test_well(well_completed_on=completed_on) @@ -185,7 +201,9 @@ def step_given_multiple_wells(context: Context, count: int): ] for i in range(min(count, len(completion_dates))): - completed_on = date.fromisoformat(completion_dates[i]) if completion_dates[i] else None + completed_on = ( + date.fromisoformat(completion_dates[i]) if completion_dates[i] else None + ) thing, location = create_test_well(well_completed_on=completed_on) context.test_wells.append(thing) @@ -197,7 +215,9 @@ def step_given_wells_with_null_completion(context: Context, null_count: int): pass -@given("wells exist with completion dates from {start_year:Number} to {end_year:Number}") +@given( + "wells exist with completion dates from {start_year:Number} to {end_year:Number}" +) def step_given_wells_date_range(context: Context, start_year: int, end_year: int): """Create wells with completion dates across range.""" context.test_wells = [] @@ -213,7 +233,7 @@ def step_given_wells_specific_years(context: Context, years: str): """Create wells with specific completion years.""" context.test_wells = [] - year_list = [int(y.strip()) for y in years.split(',')] + year_list = [int(y.strip()) for y in years.split(",")] for year in year_list: thing, location = create_test_well(well_completed_on=date(year, 6, 15)) @@ -223,7 +243,7 @@ def step_given_wells_specific_years(context: Context, years: str): @given("some wells have null well_completed_on") def step_given_some_wells_null(context: Context): """Add wells without completion dates.""" - if not hasattr(context, 'test_wells'): + if not hasattr(context, "test_wells"): context.test_wells = [] for i in range(2): @@ -234,10 +254,18 @@ def step_given_some_wells_null(context: Context): @given("that well's location has") def step_given_well_location_has_table(context: Context): """Set legacy dates on the well's location.""" - data = {row['field']: row['value'] for row in context.table} + data = {row["field"]: row["value"] for row in context.table} - legacy_date_created = date.fromisoformat(data.get('legacy_date_created')) if data.get('legacy_date_created') else None - inventoried_on = date.fromisoformat(data.get('inventoried_on')) if data.get('inventoried_on') else None + legacy_date_created = ( + date.fromisoformat(data.get("legacy_date_created")) + if data.get("legacy_date_created") + else None + ) + inventoried_on = ( + date.fromisoformat(data.get("inventoried_on")) + if data.get("inventoried_on") + else None + ) with session_ctx() as session: location = session.get(Location, context.test_well_location.id) @@ -255,11 +283,11 @@ def step_given_count_locations_migrated(context: Context, count: int): for i in range(count): # 9% have inventoried_on - has_inventory = (i < count * 0.09) + has_inventory = i < count * 0.09 location = create_test_location( legacy_date_created=date(2014, 1, i % 28 + 1), - inventoried_on=date(2003, 1, i % 28 + 1) if has_inventory else None + inventoried_on=date(2003, 1, i % 28 + 1) if has_inventory else None, ) context.test_locations.append(location) @@ -277,7 +305,7 @@ def step_given_count_wells_migrated(context: Context, count: int): for i in range(count): # 30% have completion dates - has_completion = (i < count * 0.30) + has_completion = i < count * 0.30 thing, location = create_test_well( well_completed_on=date(2000 + (i % 24), 1, 1) if has_completion else None @@ -295,8 +323,7 @@ def step_given_completion_count(context: Context, count: int): def step_given_location_migrated_with_dates(context: Context): """Create location with both legacy dates.""" location = create_test_location( - legacy_date_created=date(2014, 4, 3), - inventoried_on=date(2002, 12, 10) + legacy_date_created=date(2014, 4, 3), inventoried_on=date(2002, 12, 10) ) context.test_location = location @@ -319,6 +346,7 @@ def step_given_well_null_completion(context: Context): # WHEN steps + @when("I retrieve that location via the API") def step_when_retrieve_location_api(context: Context): """Retrieve location via GET API.""" @@ -335,7 +363,9 @@ def step_when_get_all_locations(context: Context): context.locations_response = response.json() -@when('I filter locations where inventoried_on is between "{start_date}" and "{end_date}"') +@when( + 'I filter locations where inventoried_on is between "{start_date}" and "{end_date}"' +) def step_when_filter_locations(context: Context, start_date: str, end_date: str): """Filter locations by date range.""" # Since API may not support this yet, query database directly @@ -343,10 +373,11 @@ def step_when_filter_locations(context: Context, start_date: str, end_date: str) start = date.fromisoformat(start_date) end = date.fromisoformat(end_date) - locations = session.query(Location).filter( - Location.inventoried_on >= start, - Location.inventoried_on <= end - ).all() + locations = ( + session.query(Location) + .filter(Location.inventoried_on >= start, Location.inventoried_on <= end) + .all() + ) context.filtered_locations = locations @@ -356,9 +387,9 @@ def step_when_query_by_legacy_date(context: Context, target_date: str): """Query locations by legacy_date_created.""" with session_ctx() as session: target = date.fromisoformat(target_date) - locations = session.query(Location).filter( - Location.legacy_date_created == target - ).all() + locations = ( + session.query(Location).filter(Location.legacy_date_created == target).all() + ) context.queried_locations = locations @@ -378,18 +409,24 @@ def step_when_get_all_wells(context: Context): context.wells_response = response.json() -@when('I filter wells where well_completed_on is between "{start_date}" and "{end_date}"') +@when( + 'I filter wells where well_completed_on is between "{start_date}" and "{end_date}"' +) def step_when_filter_wells(context: Context, start_date: str, end_date: str): """Filter wells by completion date range.""" with session_ctx() as session: start = date.fromisoformat(start_date) end = date.fromisoformat(end_date) - wells = session.query(Thing).filter( - Thing.thing_type == "water well", - Thing.well_completed_on >= start, - Thing.well_completed_on <= end - ).all() + wells = ( + session.query(Thing) + .filter( + Thing.thing_type == "water well", + Thing.well_completed_on >= start, + Thing.well_completed_on <= end, + ) + .all() + ) context.filtered_wells = wells @@ -398,9 +435,12 @@ def step_when_filter_wells(context: Context, start_date: str, end_date: str): def step_when_get_wells_sorted(context: Context): """Get wells sorted by completion date.""" with session_ctx() as session: - wells = session.query(Thing).filter( - Thing.thing_type == "water well" - ).order_by(Thing.well_completed_on.asc().nullslast()).all() + wells = ( + session.query(Thing) + .filter(Thing.thing_type == "water well") + .order_by(Thing.well_completed_on.asc().nullslast()) + .all() + ) context.sorted_wells = wells @@ -461,6 +501,7 @@ def step_when_retrieve_well(context: Context): # THEN steps + @then('the response should include legacy_date_created as "{expected_date}"') def step_then_legacy_date_created(context: Context, expected_date: str): """Assert legacy_date_created matches.""" @@ -492,8 +533,9 @@ def step_then_time_gap_years(context: Context, years: str): expected_years = float(years) tolerance = 0.5 - assert abs(gap_years - expected_years) < tolerance, \ - f"Expected ~{expected_years} year gap, got {gap_years:.1f} years" + assert ( + abs(gap_years - expected_years) < tolerance + ), f"Expected ~{expected_years} year gap, got {gap_years:.1f} years" @then("each location should have a legacy_date_created field") @@ -524,24 +566,27 @@ def step_then_some_null_inventory(context: Context): def step_then_locations_in_decade(context: Context): """Assert filtered locations are in range.""" for loc in context.filtered_locations: - assert 2000 <= loc.inventoried_on.year <= 2010, \ - f"Location not in 2000-2010: {loc.inventoried_on}" + assert ( + 2000 <= loc.inventoried_on.year <= 2010 + ), f"Location not in 2000-2010: {loc.inventoried_on}" @then("locations inventoried before {year:Number} should not be included") def step_then_locations_before_excluded(context: Context, year: int): """Assert no locations before year.""" for loc in context.filtered_locations: - assert loc.inventoried_on.year >= year, \ - f"Location from {loc.inventoried_on.year} should not be included" + assert ( + loc.inventoried_on.year >= year + ), f"Location from {loc.inventoried_on.year} should not be included" @then("locations inventoried after {year:Number} should not be included") def step_then_locations_after_excluded(context: Context, year: int): """Assert no locations after year.""" for loc in context.filtered_locations: - assert loc.inventoried_on.year <= year, \ - f"Location from {loc.inventoried_on.year} should not be included" + assert ( + loc.inventoried_on.year <= year + ), f"Location from {loc.inventoried_on.year} should not be included" @then("the response should include exactly {count:Number} locations") @@ -556,8 +601,9 @@ def step_then_all_have_date(context: Context, expected_date: str): """Assert all have same date.""" expected = date.fromisoformat(expected_date) for loc in context.queried_locations: - assert loc.legacy_date_created == expected, \ - f"Location has {loc.legacy_date_created}, expected {expected}" + assert ( + loc.legacy_date_created == expected + ), f"Location has {loc.legacy_date_created}, expected {expected}" @then('the response should include well_completed_on as "{expected_date}"') @@ -610,8 +656,9 @@ def step_then_percentage_populated(context: Context, percentage: int): actual_pct = (populated / total) * 100 tolerance = 10 - assert abs(actual_pct - percentage) < tolerance, \ - f"Expected ~{percentage}%, got {actual_pct:.1f}%" + assert ( + abs(actual_pct - percentage) < tolerance + ), f"Expected ~{percentage}%, got {actual_pct:.1f}%" @then("the response should only include wells completed in that decade") @@ -650,11 +697,13 @@ def step_then_nulls_last(context: Context): """Assert nulls at end.""" first_null_idx = next( (i for i, w in enumerate(context.sorted_wells) if w.well_completed_on is None), - len(context.sorted_wells) + len(context.sorted_wells), ) for well in context.sorted_wells[first_null_idx:]: - assert well.well_completed_on is None, "Found non-null after null in sorted list" + assert ( + well.well_completed_on is None + ), "Found non-null after null in sorted list" @then('the well should have well_completed_on as "{expected_date}"') @@ -680,15 +729,21 @@ def step_then_location_has_inventory(context: Context, expected_date: str): assert actual == expected_date, f"Expected {expected_date}, got {actual}" -@then("the temporal sequence should be: well_completed_on → inventoried_on → legacy_date_created") +@then( + "the temporal sequence should be: well_completed_on → inventoried_on → legacy_date_created" +) def step_then_temporal_sequence(context: Context): """Assert temporal order.""" well_completed = context.retrieved_well.well_completed_on inventoried = context.retrieved_location.inventoried_on legacy_created = context.retrieved_location.legacy_date_created - assert well_completed < inventoried, "Well should be completed before site inventoried" - assert inventoried < legacy_created, "Site should be inventoried before DB record created" + assert ( + well_completed < inventoried + ), "Well should be completed before site inventoried" + assert ( + inventoried < legacy_created + ), "Site should be inventoried before DB record created" @then("the timeline should show: {year1:Number} → {year2:Number} → {year3:Number}") @@ -707,8 +762,9 @@ def step_then_percentage_inventory(context: Context, percentage: int): actual_pct = (populated / total) * 100 tolerance = 2 - assert abs(actual_pct - percentage) < tolerance, \ - f"Expected ~{percentage}%, got {actual_pct:.1f}%" + assert ( + abs(actual_pct - percentage) < tolerance + ), f"Expected ~{percentage}%, got {actual_pct:.1f}%" @then("{percentage:Number}% should have non-null legacy_date_created") @@ -719,8 +775,9 @@ def step_then_percentage_legacy(context: Context, percentage: int): actual_pct = (populated / total) * 100 tolerance = 2 - assert abs(actual_pct - percentage) < tolerance, \ - f"Expected ~{percentage}%, got {actual_pct:.1f}%" + assert ( + abs(actual_pct - percentage) < tolerance + ), f"Expected ~{percentage}%, got {actual_pct:.1f}%" @then("{percentage:Number}% should have non-null well_completed_on") @@ -731,8 +788,9 @@ def step_then_percentage_completion(context: Context, percentage: int): actual_pct = (populated / total) * 100 tolerance = 2 - assert abs(actual_pct - percentage) < tolerance, \ - f"Expected ~{percentage}%, got {actual_pct:.1f}%" + assert ( + abs(actual_pct - percentage) < tolerance + ), f"Expected ~{percentage}%, got {actual_pct:.1f}%" @then("it should have created_at (new system timestamp from migration)") @@ -756,9 +814,9 @@ def step_then_has_inventory_date(context: Context): @then("all three timestamps should be independently queryable") def step_then_all_queryable(context: Context): """Assert all fields are queryable.""" - assert hasattr(context.retrieved_location, 'created_at') - assert hasattr(context.retrieved_location, 'legacy_date_created') - assert hasattr(context.retrieved_location, 'inventoried_on') + assert hasattr(context.retrieved_location, "created_at") + assert hasattr(context.retrieved_location, "legacy_date_created") + assert hasattr(context.retrieved_location, "inventoried_on") @then("created_at should be a recent timestamp") @@ -803,17 +861,17 @@ def step_then_no_error(context: Context): @then("well_completed_on should be null") def step_then_completion_null(context: Context): """Assert well_completed_on is null.""" - if hasattr(context, 'retrieved_thing'): + if hasattr(context, "retrieved_thing"): assert context.retrieved_thing.well_completed_on is None - elif hasattr(context, 'retrieved_well'): + elif hasattr(context, "retrieved_well"): assert context.retrieved_well.well_completed_on is None @then("the field should exist in the response schema") def step_then_field_exists_in_schema(context: Context): """Assert field exists in schema.""" - if hasattr(context, 'retrieved_thing'): - assert hasattr(context.retrieved_thing, 'well_completed_on') + if hasattr(context, "retrieved_thing"): + assert hasattr(context.retrieved_thing, "well_completed_on") @then("it should not cause validation errors") From ac04b26af2638ed7a59d06a70942437efaca7537 Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 26 Nov 2025 17:13:06 -0800 Subject: [PATCH 06/37] Add features that describe post-migration behaviors --- ...st-migration-legacy-data-retrieval.feature | 172 ++++ .../steps/post_migration_legacy_data.py | 837 ++++++++++++++++++ 2 files changed, 1009 insertions(+) create mode 100644 tests/features/post-migration-legacy-data-retrieval.feature create mode 100644 tests/features/steps/post_migration_legacy_data.py diff --git a/tests/features/post-migration-legacy-data-retrieval.feature b/tests/features/post-migration-legacy-data-retrieval.feature new file mode 100644 index 000000000..69d2c5506 --- /dev/null +++ b/tests/features/post-migration-legacy-data-retrieval.feature @@ -0,0 +1,172 @@ +Feature: Post-Migration Legacy Data Retrieval + As a data manager + After migrating data from AMPAPI to NMSampleLocations + I want to verify that all legacy temporal information is preserved and queryable + So that no historical context is lost + + Background: + Given a functioning api + And the AMPAPI data has been migrated to the database + + # Location Legacy Date Lookups + + Scenario: Retrieve location with both legacy dates via API + Given a location exists with: + | field | value | + | legacy_date_created | 2014-04-03 | + | inventoried_on | 2002-12-10 | + When I retrieve that location via the API + Then the response should include legacy_date_created as "2014-04-03" + And the response should include inventoried_on as "2002-12-10" + And the time gap should be approximately 11.3 years + + Scenario: Retrieve location with large time gap (54 years) + Given a location exists with: + | field | value | + | legacy_date_created | 2008-05-28 | + | inventoried_on | 1954-05-01 | + When I retrieve that location via the API + Then the response should include legacy_date_created as "2008-05-28" + And the response should include inventoried_on as "2002-12-10" + And the time gap should be approximately 54 years + + Scenario: List all locations includes legacy date fields + Given 5 locations exist with various legacy dates + When I GET /location to list all locations + Then each location should have a legacy_date_created field + And each location should have an inventoried_on field + And some locations should have null inventoried_on + + Scenario: Filter locations by inventory date range + Given locations exist with inventoried_on ranging from 1950 to 2024 + When I filter locations where inventoried_on is between "2000-01-01" and "2010-12-31" + Then the response should only include locations inventoried in that decade + And locations inventoried before 2000 should not be included + And locations inventoried after 2010 should not be included + + Scenario: Query location by legacy_date_created + Given 3 locations exist with legacy_date_created "2014-04-03" + And 2 locations exist with legacy_date_created "2017-12-06" + When I query for locations with legacy_date_created "2014-04-03" + Then the response should include exactly 3 locations + And all should have legacy_date_created "2014-04-03" + + # Well Completion Date Lookups + + Scenario: Retrieve well with completion date via API + Given a well exists with well_completed_on "2004-08-08" + When I retrieve that well via the API + Then the response should include well_completed_on as "2004-08-08" + And the well age should be calculable + + Scenario: Retrieve old well from early 1900s + Given a well exists with well_completed_on "1936-01-01" + When I retrieve that well via the API + Then the response should include well_completed_on as "1936-01-01" + And the well should be over 88 years old + + Scenario: List all wells includes completion date field + Given 10 wells exist with various completion dates + And 3 of those wells have null well_completed_on + When I GET /thing/water-well to list all wells + Then each well should have a well_completed_on field + And 70% of wells should have well_completed_on populated + + Scenario: Filter wells by completion date range + Given wells exist with completion dates from 1936 to 2024 + When I filter wells where well_completed_on is between "2000-01-01" and "2010-12-31" + Then the response should only include wells completed in that decade + And wells from 1936 should not be included + And wells from 2020 should not be included + + Scenario: Sort wells by completion date (oldest first) + Given wells exist with completion dates: 1936, 1965, 2004, 2020 + And some wells have null well_completed_on + When I GET /thing/water-well sorted by well_completed_on ascending + Then the first well should be from 1936 + And the last well with a date should be from 2020 + And wells without completion dates should appear last + + # Combined Queries - Location + Well Legacy Dates + + Scenario: Retrieve well with location showing all legacy dates + Given a well exists with well_completed_on "2004-08-08" + And that well's location has: + | field | value | + | legacy_date_created | 2014-04-03 | + | inventoried_on | 2002-12-10 | + When I retrieve the well via the API + Then the well should have well_completed_on as "2004-08-08" + And the current_location should include legacy_date_created as "2014-04-03" + And the current_location should include inventoried_on as "2002-12-10" + + Scenario: Timeline reconstruction - well completed before site inventoried + Given a well exists with well_completed_on "1995-06-15" + And that well's location has: + | field | value | + | inventoried_on | 2003-12-10 | + | legacy_date_created | 2014-04-03 | + When I retrieve the well and its location + Then the temporal sequence should be: well_completed_on → inventoried_on → legacy_date_created + And the timeline should show: 1995 → 2003 → 2014 + + # Data Quality Validation + + Scenario: Verify migration preserved expected percentage of legacy dates + Given 100 locations were migrated + And 9 of them had non-null SiteDate in AMPAPI + When I query the migrated locations + Then 9% should have non-null inventoried_on + And 100% should have non-null legacy_date_created + + Scenario: Verify well completion date coverage matches expectation + Given 100 wells were migrated + And 30 of them had non-null CompletionDate in AMPAPI + When I query the migrated wells + Then 30% should have non-null well_completed_on + + # Audit Trail Verification + + Scenario: Legacy dates preserved alongside audit timestamps + Given a location was migrated with legacy dates + When I retrieve that location + Then it should have created_at (new system timestamp from migration) + And it should have legacy_date_created (original AMPAPI DateCreated) + And it should have inventoried_on (original AMPAPI SiteDate) + And all three timestamps should be independently queryable + And created_at should be a recent timestamp + And legacy_date_created should be an older date + + # Edge Cases + + Scenario: Location where SiteDate is later than DateCreated (data anomaly) + Given a location exists with: + | field | value | + | legacy_date_created | 2010-01-15 | + | inventoried_on | 2015-06-20 | + When I retrieve that location + Then legacy_date_created should be "2010-01-15" + And inventoried_on should be "2015-06-20" + And the system should accept this without error + + Scenario: Spring does not use well_completed_on field + Given a thing of type "spring" exists + When I retrieve that spring + Then well_completed_on should be null + And the field should exist in the response schema + And it should not cause validation errors + + Scenario: Location with only legacy_date_created (no inventoried_on) + Given a location exists with: + | field | value | + | legacy_date_created | 2014-10-17 | + | inventoried_on | null | + When I retrieve that location + Then legacy_date_created should be "2014-10-17" + And inventoried_on should be null + + Scenario: Well without completion date + Given a well exists with well_completed_on null + When I retrieve that well + Then well_completed_on should be null + And the well should still be valid diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py new file mode 100644 index 000000000..dca15d638 --- /dev/null +++ b/tests/features/steps/post_migration_legacy_data.py @@ -0,0 +1,837 @@ +# =============================================================================== +# Copyright 2025 ross +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =============================================================================== +from datetime import date, datetime +from behave import given, when, then, register_type +from behave.runner import Context +import parse + +from db import Location, Thing, LocationThingAssociation +from db.engine import session_ctx + + +# Custom type parsers +@parse.with_pattern(r"\d+") +def parse_number(text): + return int(text) + +register_type(Number=parse_number) + + +def create_test_location(legacy_date_created=None, inventoried_on=None): + """Helper to create a test location with legacy dates.""" + with session_ctx() as session: + location = Location( + point="POINT(-106.607784 35.118924)", + elevation=1558.8, + release_status="public", + legacy_date_created=legacy_date_created, + inventoried_on=inventoried_on, + ) + session.add(location) + session.commit() + session.refresh(location) + return location + + +def create_test_well(well_completed_on=None, thing_type="water well"): + """Helper to create a test well with completion date.""" + with session_ctx() as session: + # Create location + location = Location( + point="POINT(-106.607784 35.118924)", + elevation=1558.8, + release_status="public", + ) + session.add(location) + session.commit() + + # Create thing + thing = Thing( + name=f"Test-{thing_type}-{datetime.now().timestamp()}", + first_visit_date="2023-03-03", + thing_type=thing_type, + release_status="public", + well_depth=100.0 if thing_type == "water well" else None, + hole_depth=110.0 if thing_type == "water well" else None, + well_completed_on=well_completed_on, + ) + session.add(thing) + session.commit() + + # Associate + assoc = LocationThingAssociation(location=location, thing=thing) + assoc.effective_start = "2000-01-01T00:00:00Z" + session.add(assoc) + session.commit() + + session.refresh(thing) + session.refresh(location) + return thing, location + + +@given("the AMPAPI data has been migrated to the database") +def step_given_data_migrated(context: Context): + """Assumption that migration has occurred.""" + context.migrated = True + + +@given("a location exists with") +def step_given_location_with_table(context: Context): + """Create location with fields from table.""" + data = {row['field']: row['value'] for row in context.table} + + legacy_date_created = date.fromisoformat(data['legacy_date_created']) if data.get('legacy_date_created') and data['legacy_date_created'] != 'null' else None + inventoried_on = date.fromisoformat(data['inventoried_on']) if data.get('inventoried_on') and data['inventoried_on'] != 'null' else None + + location = create_test_location( + legacy_date_created=legacy_date_created, + inventoried_on=inventoried_on + ) + + context.test_location = location + context.test_location_id = location.id + + +@given("{count:Number} locations exist with various legacy dates") +def step_given_multiple_locations(context: Context, count: int): + """Create multiple locations with various legacy dates.""" + context.test_locations = [] + + test_data = [ + ("2014-04-03", "2002-12-10"), + ("2014-04-03", "2003-01-07"), + ("2017-12-06", "2003-12-11"), + ("2008-05-28", "1954-05-01"), + ("2020-01-15", None), + ] + + for i in range(min(count, len(test_data))): + legacy_date, inventory_date = test_data[i] + location = create_test_location( + legacy_date_created=date.fromisoformat(legacy_date), + inventoried_on=date.fromisoformat(inventory_date) if inventory_date else None + ) + context.test_locations.append(location) + + +@given("locations exist with inventoried_on ranging from {start_year:Number} to {end_year:Number}") +def step_given_locations_date_range(context: Context, start_year: int, end_year: int): + """Create locations with inventoried_on across a date range.""" + context.test_locations = [] + + years = [1954, 2002, 2003, 2010, 2015, 2020, 2024] + for year in years: + location = create_test_location( + legacy_date_created=date(year + 5, 1, 1), # Always 5 years after inventory + inventoried_on=date(year, 6, 15) + ) + context.test_locations.append(location) + + +@given('{count:Number} locations exist with legacy_date_created "{target_date}"') +def step_given_locations_with_specific_date(context: Context, count: int, target_date: str): + """Create locations with specific legacy_date_created.""" + if not hasattr(context, 'test_locations'): + context.test_locations = [] + + target = date.fromisoformat(target_date) + + for i in range(count): + location = create_test_location( + legacy_date_created=target, + inventoried_on=date(2000 + i, 1, 1) # Vary the inventory dates + ) + context.test_locations.append(location) + + +@given('a well exists with well_completed_on "{completion_date}"') +def step_given_well_with_completion(context: Context, completion_date: str): + """Create well with completion date.""" + completed_on = date.fromisoformat(completion_date) if completion_date != 'null' else None + + thing, location = create_test_well(well_completed_on=completed_on) + + context.test_well = thing + context.test_well_id = thing.id + context.test_well_location = location + + +@given("{count:Number} wells exist with various completion dates") +def step_given_multiple_wells(context: Context, count: int): + """Create multiple wells with various completion dates.""" + context.test_wells = [] + + completion_dates = [ + "1936-01-01", + "1965-06-15", + "2004-08-08", + "2020-05-15", + None, # No completion date + None, + None, + ] + + for i in range(min(count, len(completion_dates))): + completed_on = date.fromisoformat(completion_dates[i]) if completion_dates[i] else None + thing, location = create_test_well(well_completed_on=completed_on) + context.test_wells.append(thing) + + +@given("{null_count:Number} of those wells have null well_completed_on") +def step_given_wells_with_null_completion(context: Context, null_count: int): + """Verify expected number of nulls (declarative - already created).""" + # Wells were created in previous step with nulls + pass + + +@given("wells exist with completion dates from {start_year:Number} to {end_year:Number}") +def step_given_wells_date_range(context: Context, start_year: int, end_year: int): + """Create wells with completion dates across range.""" + context.test_wells = [] + + years = [1936, 1965, 2004, 2010, 2020, 2024] + for year in years: + thing, location = create_test_well(well_completed_on=date(year, 6, 15)) + context.test_wells.append(thing) + + +@given("wells exist with completion dates: {years}") +def step_given_wells_specific_years(context: Context, years: str): + """Create wells with specific completion years.""" + context.test_wells = [] + + year_list = [int(y.strip()) for y in years.split(',')] + + for year in year_list: + thing, location = create_test_well(well_completed_on=date(year, 6, 15)) + context.test_wells.append(thing) + + +@given("some wells have null well_completed_on") +def step_given_some_wells_null(context: Context): + """Add wells without completion dates.""" + if not hasattr(context, 'test_wells'): + context.test_wells = [] + + for i in range(2): + thing, location = create_test_well(well_completed_on=None) + context.test_wells.append(thing) + + +@given("that well's location has") +def step_given_well_location_has_table(context: Context): + """Set legacy dates on the well's location.""" + data = {row['field']: row['value'] for row in context.table} + + legacy_date_created = date.fromisoformat(data.get('legacy_date_created')) if data.get('legacy_date_created') else None + inventoried_on = date.fromisoformat(data.get('inventoried_on')) if data.get('inventoried_on') else None + + with session_ctx() as session: + location = session.get(Location, context.test_well_location.id) + location.legacy_date_created = legacy_date_created + location.inventoried_on = inventoried_on + session.commit() + session.refresh(location) + context.test_well_location = location + + +@given("{count:Number} locations were migrated") +def step_given_count_locations_migrated(context: Context, count: int): + """Create specified number of test locations.""" + context.test_locations = [] + + for i in range(count): + # 9% have inventoried_on + has_inventory = (i < count * 0.09) + + location = create_test_location( + legacy_date_created=date(2014, 1, i % 28 + 1), + inventoried_on=date(2003, 1, i % 28 + 1) if has_inventory else None + ) + context.test_locations.append(location) + + +@given("{count:Number} of them had non-null SiteDate in AMPAPI") +def step_given_sitedate_count(context: Context, count: int): + """Declarative - data created in previous step.""" + pass + + +@given("{count:Number} wells were migrated") +def step_given_count_wells_migrated(context: Context, count: int): + """Create specified number of test wells.""" + context.test_wells = [] + + for i in range(count): + # 30% have completion dates + has_completion = (i < count * 0.30) + + thing, location = create_test_well( + well_completed_on=date(2000 + (i % 24), 1, 1) if has_completion else None + ) + context.test_wells.append(thing) + + +@given("{count:Number} of them had non-null CompletionDate in AMPAPI") +def step_given_completion_count(context: Context, count: int): + """Declarative - data created in previous step.""" + pass + + +@given("a location was migrated with legacy dates") +def step_given_location_migrated_with_dates(context: Context): + """Create location with both legacy dates.""" + location = create_test_location( + legacy_date_created=date(2014, 4, 3), + inventoried_on=date(2002, 12, 10) + ) + context.test_location = location + + +@given('a thing of type "{thing_type}" exists') +def step_given_thing_of_type(context: Context, thing_type: str): + """Create a thing of specified type.""" + thing, location = create_test_well(well_completed_on=None, thing_type=thing_type) + context.test_thing = thing + context.test_thing_id = thing.id + + +@given("a well exists with well_completed_on null") +def step_given_well_null_completion(context: Context): + """Create well without completion date.""" + thing, location = create_test_well(well_completed_on=None) + context.test_well = thing + context.test_well_id = thing.id + + +# WHEN steps + +@when("I retrieve that location via the API") +def step_when_retrieve_location_api(context: Context): + """Retrieve location via GET API.""" + response = context.client.get(f"/location/{context.test_location_id}") + assert response.status_code == 200 + context.location_response = response.json() + + +@when("I GET /location to list all locations") +def step_when_get_all_locations(context: Context): + """Get all locations.""" + response = context.client.get("/location") + assert response.status_code == 200 + context.locations_response = response.json() + + +@when('I filter locations where inventoried_on is between "{start_date}" and "{end_date}"') +def step_when_filter_locations(context: Context, start_date: str, end_date: str): + """Filter locations by date range.""" + # Since API may not support this yet, query database directly + with session_ctx() as session: + start = date.fromisoformat(start_date) + end = date.fromisoformat(end_date) + + locations = session.query(Location).filter( + Location.inventoried_on >= start, + Location.inventoried_on <= end + ).all() + + context.filtered_locations = locations + + +@when('I query for locations with legacy_date_created "{target_date}"') +def step_when_query_by_legacy_date(context: Context, target_date: str): + """Query locations by legacy_date_created.""" + with session_ctx() as session: + target = date.fromisoformat(target_date) + locations = session.query(Location).filter( + Location.legacy_date_created == target + ).all() + context.queried_locations = locations + + +@when("I retrieve that well via the API") +def step_when_retrieve_well_api(context: Context): + """Retrieve well via GET API.""" + response = context.client.get(f"/thing/water-well/{context.test_well_id}") + assert response.status_code == 200 + context.well_response = response.json() + + +@when("I GET /thing/water-well to list all wells") +def step_when_get_all_wells(context: Context): + """Get all wells.""" + response = context.client.get("/thing/water-well") + assert response.status_code == 200 + context.wells_response = response.json() + + +@when('I filter wells where well_completed_on is between "{start_date}" and "{end_date}"') +def step_when_filter_wells(context: Context, start_date: str, end_date: str): + """Filter wells by completion date range.""" + with session_ctx() as session: + start = date.fromisoformat(start_date) + end = date.fromisoformat(end_date) + + wells = session.query(Thing).filter( + Thing.thing_type == "water well", + Thing.well_completed_on >= start, + Thing.well_completed_on <= end + ).all() + + context.filtered_wells = wells + + +@when("I GET /thing/water-well sorted by well_completed_on ascending") +def step_when_get_wells_sorted(context: Context): + """Get wells sorted by completion date.""" + with session_ctx() as session: + wells = session.query(Thing).filter( + Thing.thing_type == "water well" + ).order_by(Thing.well_completed_on.asc().nullslast()).all() + + context.sorted_wells = wells + + +@when("I retrieve the well and its location") +def step_when_retrieve_well_and_location(context: Context): + """Retrieve well with location.""" + with session_ctx() as session: + well = session.get(Thing, context.test_well.id) + location = session.get(Location, context.test_well_location.id) + + context.retrieved_well = well + context.retrieved_location = location + + +@when("I query the migrated locations") +def step_when_query_migrated_locations(context: Context): + """Query all test locations.""" + with session_ctx() as session: + # Query only our test locations + location_ids = [loc.id for loc in context.test_locations] + locations = session.query(Location).filter(Location.id.in_(location_ids)).all() + context.queried_locations = locations + + +@when("I query the migrated wells") +def step_when_query_migrated_wells(context: Context): + """Query all test wells.""" + with session_ctx() as session: + well_ids = [well.id for well in context.test_wells] + wells = session.query(Thing).filter(Thing.id.in_(well_ids)).all() + context.queried_wells = wells + + +@when("I retrieve that location") +def step_when_retrieve_location(context: Context): + """Retrieve location by ID.""" + with session_ctx() as session: + location = session.get(Location, context.test_location.id) + context.retrieved_location = location + + +@when("I retrieve that spring") +def step_when_retrieve_spring(context: Context): + """Retrieve spring/thing by ID.""" + with session_ctx() as session: + thing = session.get(Thing, context.test_thing.id) + context.retrieved_thing = thing + + +@when("I retrieve that well") +def step_when_retrieve_well(context: Context): + """Retrieve well by ID.""" + with session_ctx() as session: + well = session.get(Thing, context.test_well.id) + context.retrieved_well = well + + +# THEN steps + +@then('the response should include legacy_date_created as "{expected_date}"') +def step_then_legacy_date_created(context: Context, expected_date: str): + """Assert legacy_date_created matches.""" + actual = context.location_response.get("legacy_date_created") + assert actual == expected_date, f"Expected {expected_date}, got {actual}" + + +@then('the response should include inventoried_on as "{expected_date}"') +def step_then_inventoried_on(context: Context, expected_date: str): + """Assert inventoried_on matches.""" + actual = context.location_response.get("inventoried_on") + assert actual == expected_date, f"Expected {expected_date}, got {actual}" + + +@then("the time gap should be approximately {years} years") +def step_then_time_gap_years(context: Context, years: str): + """Assert approximate year gap.""" + legacy_str = context.location_response.get("legacy_date_created") + inventory_str = context.location_response.get("inventoried_on") + + if not legacy_str or not inventory_str: + raise AssertionError("Missing date fields for gap calculation") + + legacy_date = date.fromisoformat(legacy_str) + inventory_date = date.fromisoformat(inventory_str) + + gap_days = (legacy_date - inventory_date).days + gap_years = gap_days / 365.25 + + expected_years = float(years) + tolerance = 0.5 + assert abs(gap_years - expected_years) < tolerance, \ + f"Expected ~{expected_years} year gap, got {gap_years:.1f} years" + + +@then("each location should have a legacy_date_created field") +def step_then_all_have_legacy_field(context: Context): + """Assert all locations have the field.""" + items = context.locations_response.get("items", []) + for item in items: + assert "legacy_date_created" in item, f"Location missing legacy_date_created" + + +@then("each location should have an inventoried_on field") +def step_then_all_have_inventory_field(context: Context): + """Assert all locations have the field.""" + items = context.locations_response.get("items", []) + for item in items: + assert "inventoried_on" in item, f"Location missing inventoried_on" + + +@then("some locations should have null inventoried_on") +def step_then_some_null_inventory(context: Context): + """Assert some locations have null.""" + items = context.locations_response.get("items", []) + null_count = sum(1 for item in items if item.get("inventoried_on") is None) + assert null_count > 0, "Expected at least one location with null inventoried_on" + + +@then("the response should only include locations inventoried in that decade") +def step_then_locations_in_decade(context: Context): + """Assert filtered locations are in range.""" + for loc in context.filtered_locations: + assert 2000 <= loc.inventoried_on.year <= 2010, \ + f"Location not in 2000-2010: {loc.inventoried_on}" + + +@then("locations inventoried before {year:Number} should not be included") +def step_then_locations_before_excluded(context: Context, year: int): + """Assert no locations before year.""" + for loc in context.filtered_locations: + assert loc.inventoried_on.year >= year, \ + f"Location from {loc.inventoried_on.year} should not be included" + + +@then("locations inventoried after {year:Number} should not be included") +def step_then_locations_after_excluded(context: Context, year: int): + """Assert no locations after year.""" + for loc in context.filtered_locations: + assert loc.inventoried_on.year <= year, \ + f"Location from {loc.inventoried_on.year} should not be included" + + +@then("the response should include exactly {count:Number} locations") +def step_then_exact_count_locations(context: Context, count: int): + """Assert exact count.""" + actual = len(context.queried_locations) + assert actual == count, f"Expected {count} locations, got {actual}" + + +@then('all should have legacy_date_created "{expected_date}"') +def step_then_all_have_date(context: Context, expected_date: str): + """Assert all have same date.""" + expected = date.fromisoformat(expected_date) + for loc in context.queried_locations: + assert loc.legacy_date_created == expected, \ + f"Location has {loc.legacy_date_created}, expected {expected}" + + +@then('the response should include well_completed_on as "{expected_date}"') +def step_then_well_completed_on(context: Context, expected_date: str): + """Assert well_completed_on matches.""" + actual = context.well_response.get("well_completed_on") + assert actual == expected_date, f"Expected {expected_date}, got {actual}" + + +@then("the well age should be calculable") +def step_then_age_calculable(context: Context): + """Assert age can be calculated.""" + completion_str = context.well_response.get("well_completed_on") + assert completion_str is not None, "Cannot calculate age without completion date" + + completed = date.fromisoformat(completion_str) + today = date.today() + age_years = (today - completed).days / 365.25 + assert age_years >= 0, "Age cannot be negative" + + +@then("the well should be over {min_age:Number} years old") +def step_then_well_over_age(context: Context, min_age: int): + """Assert well age exceeds minimum.""" + completion_str = context.well_response.get("well_completed_on") + completed = date.fromisoformat(completion_str) + today = date.today() + age_years = (today - completed).days / 365.25 + + assert age_years >= min_age, f"Expected over {min_age} years, got {age_years:.1f}" + + +@then("each well should have a well_completed_on field") +def step_then_all_wells_have_field(context: Context): + """Assert all wells have the field.""" + items = context.wells_response.get("items", []) + for item in items: + assert "well_completed_on" in item, f"Well missing well_completed_on" + + +@then("{percentage:Number}% of wells should have well_completed_on populated") +def step_then_percentage_populated(context: Context, percentage: int): + """Assert approximate percentage.""" + items = context.wells_response.get("items", []) + total = len(items) + if total == 0: + return + + populated = sum(1 for item in items if item.get("well_completed_on") is not None) + actual_pct = (populated / total) * 100 + + tolerance = 10 + assert abs(actual_pct - percentage) < tolerance, \ + f"Expected ~{percentage}%, got {actual_pct:.1f}%" + + +@then("the response should only include wells completed in that decade") +def step_then_wells_in_decade(context: Context): + """Assert filtered wells in range.""" + for well in context.filtered_wells: + assert 2000 <= well.well_completed_on.year <= 2010 + + +@then("wells from {year:Number} should not be included") +def step_then_wells_year_excluded(context: Context, year: int): + """Assert wells from year excluded.""" + for well in context.filtered_wells: + assert well.well_completed_on.year != year + + +@then("the first well should be from {year:Number}") +def step_then_first_well_year(context: Context, year: int): + """Assert first well year.""" + if context.sorted_wells and context.sorted_wells[0].well_completed_on: + actual_year = context.sorted_wells[0].well_completed_on.year + assert actual_year == year, f"Expected {year}, got {actual_year}" + + +@then("the last well with a date should be from {year:Number}") +def step_then_last_well_year(context: Context, year: int): + """Assert last non-null well year.""" + non_null = [w for w in context.sorted_wells if w.well_completed_on] + if non_null: + actual_year = non_null[-1].well_completed_on.year + assert actual_year == year, f"Expected {year}, got {actual_year}" + + +@then("wells without completion dates should appear last") +def step_then_nulls_last(context: Context): + """Assert nulls at end.""" + first_null_idx = next( + (i for i, w in enumerate(context.sorted_wells) if w.well_completed_on is None), + len(context.sorted_wells) + ) + + for well in context.sorted_wells[first_null_idx:]: + assert well.well_completed_on is None, "Found non-null after null in sorted list" + + +@then('the well should have well_completed_on as "{expected_date}"') +def step_then_well_has_completion(context: Context, expected_date: str): + """Assert well has completion date.""" + actual = context.well_response.get("well_completed_on") + assert actual == expected_date, f"Expected {expected_date}, got {actual}" + + +@then('the current_location should include legacy_date_created as "{expected_date}"') +def step_then_location_has_legacy(context: Context, expected_date: str): + """Assert location has legacy_date_created.""" + current_location = context.well_response.get("current_location", {}) + actual = current_location.get("legacy_date_created") + assert actual == expected_date, f"Expected {expected_date}, got {actual}" + + +@then('the current_location should include inventoried_on as "{expected_date}"') +def step_then_location_has_inventory(context: Context, expected_date: str): + """Assert location has inventoried_on.""" + current_location = context.well_response.get("current_location", {}) + actual = current_location.get("inventoried_on") + assert actual == expected_date, f"Expected {expected_date}, got {actual}" + + +@then("the temporal sequence should be: well_completed_on → inventoried_on → legacy_date_created") +def step_then_temporal_sequence(context: Context): + """Assert temporal order.""" + well_completed = context.retrieved_well.well_completed_on + inventoried = context.retrieved_location.inventoried_on + legacy_created = context.retrieved_location.legacy_date_created + + assert well_completed < inventoried, "Well should be completed before site inventoried" + assert inventoried < legacy_created, "Site should be inventoried before DB record created" + + +@then("the timeline should show: {year1:Number} → {year2:Number} → {year3:Number}") +def step_then_timeline_years(context: Context, year1: int, year2: int, year3: int): + """Assert specific years in sequence.""" + assert context.retrieved_well.well_completed_on.year == year1 + assert context.retrieved_location.inventoried_on.year == year2 + assert context.retrieved_location.legacy_date_created.year == year3 + + +@then("{percentage:Number}% should have non-null inventoried_on") +def step_then_percentage_inventory(context: Context, percentage: int): + """Assert percentage with inventoried_on.""" + total = len(context.queried_locations) + populated = sum(1 for loc in context.queried_locations if loc.inventoried_on) + actual_pct = (populated / total) * 100 + + tolerance = 2 + assert abs(actual_pct - percentage) < tolerance, \ + f"Expected ~{percentage}%, got {actual_pct:.1f}%" + + +@then("{percentage:Number}% should have non-null legacy_date_created") +def step_then_percentage_legacy(context: Context, percentage: int): + """Assert percentage with legacy_date_created.""" + total = len(context.queried_locations) + populated = sum(1 for loc in context.queried_locations if loc.legacy_date_created) + actual_pct = (populated / total) * 100 + + tolerance = 2 + assert abs(actual_pct - percentage) < tolerance, \ + f"Expected ~{percentage}%, got {actual_pct:.1f}%" + + +@then("{percentage:Number}% should have non-null well_completed_on") +def step_then_percentage_completion(context: Context, percentage: int): + """Assert percentage with well_completed_on.""" + total = len(context.queried_wells) + populated = sum(1 for well in context.queried_wells if well.well_completed_on) + actual_pct = (populated / total) * 100 + + tolerance = 2 + assert abs(actual_pct - percentage) < tolerance, \ + f"Expected ~{percentage}%, got {actual_pct:.1f}%" + + +@then("it should have created_at (new system timestamp from migration)") +def step_then_has_created_at(context: Context): + """Assert created_at exists.""" + assert context.retrieved_location.created_at is not None + + +@then("it should have legacy_date_created (original AMPAPI DateCreated)") +def step_then_has_legacy_date(context: Context): + """Assert legacy_date_created exists.""" + assert context.retrieved_location.legacy_date_created is not None + + +@then("it should have inventoried_on (original AMPAPI SiteDate)") +def step_then_has_inventory_date(context: Context): + """Assert inventoried_on exists.""" + assert context.retrieved_location.inventoried_on is not None + + +@then("all three timestamps should be independently queryable") +def step_then_all_queryable(context: Context): + """Assert all fields are queryable.""" + assert hasattr(context.retrieved_location, 'created_at') + assert hasattr(context.retrieved_location, 'legacy_date_created') + assert hasattr(context.retrieved_location, 'inventoried_on') + + +@then("created_at should be a recent timestamp") +def step_then_created_at_recent(context: Context): + """Assert created_at is recent.""" + created_at = context.retrieved_location.created_at.replace(tzinfo=None) + now = datetime.utcnow() + diff_seconds = abs((now - created_at).total_seconds()) + assert diff_seconds < 3600, "created_at should be within last hour" + + +@then("legacy_date_created should be an older date") +def step_then_legacy_date_older(context: Context): + """Assert legacy_date_created is old.""" + legacy_date = context.retrieved_location.legacy_date_created + assert legacy_date.year < 2024, "legacy_date_created should be from the past" + + +@then('legacy_date_created should be "{expected_date}"') +def step_then_legacy_is(context: Context, expected_date: str): + """Assert legacy_date_created value.""" + actual = context.retrieved_location.legacy_date_created + expected = date.fromisoformat(expected_date) + assert actual == expected, f"Expected {expected}, got {actual}" + + +@then('inventoried_on should be "{expected_date}"') +def step_then_inventory_is(context: Context, expected_date: str): + """Assert inventoried_on value.""" + actual = context.retrieved_location.inventoried_on + expected = date.fromisoformat(expected_date) + assert actual == expected, f"Expected {expected}, got {actual}" + + +@then("the system should accept this without error") +def step_then_no_error(context: Context): + """Assert no errors.""" + # If we got here, no errors + pass + + +@then("well_completed_on should be null") +def step_then_completion_null(context: Context): + """Assert well_completed_on is null.""" + if hasattr(context, 'retrieved_thing'): + assert context.retrieved_thing.well_completed_on is None + elif hasattr(context, 'retrieved_well'): + assert context.retrieved_well.well_completed_on is None + + +@then("the field should exist in the response schema") +def step_then_field_exists_in_schema(context: Context): + """Assert field exists in schema.""" + if hasattr(context, 'retrieved_thing'): + assert hasattr(context.retrieved_thing, 'well_completed_on') + + +@then("it should not cause validation errors") +def step_then_no_validation_errors(context: Context): + """Assert no validation errors.""" + pass + + +@then("inventoried_on should be null") +def step_then_inventory_null(context: Context): + """Assert inventoried_on is null.""" + assert context.retrieved_location.inventoried_on is None + + +@then("the well should still be valid") +def step_then_well_valid(context: Context): + """Assert well is valid.""" + assert context.retrieved_well.id is not None + + +# ============= EOF ============================================= From 952c5db040e783d2386c62a1b46410d225d0b8df Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 26 Nov 2025 17:31:38 -0800 Subject: [PATCH 07/37] Rename `inventoried_on` to `legacy_start_date` since it won't continue on --- ...st-migration-legacy-data-retrieval.feature | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/tests/features/post-migration-legacy-data-retrieval.feature b/tests/features/post-migration-legacy-data-retrieval.feature index 69d2c5506..fa4663e1b 100644 --- a/tests/features/post-migration-legacy-data-retrieval.feature +++ b/tests/features/post-migration-legacy-data-retrieval.feature @@ -14,35 +14,35 @@ Feature: Post-Migration Legacy Data Retrieval Given a location exists with: | field | value | | legacy_date_created | 2014-04-03 | - | inventoried_on | 2002-12-10 | + | legacy_site_date | 2002-12-10 | When I retrieve that location via the API Then the response should include legacy_date_created as "2014-04-03" - And the response should include inventoried_on as "2002-12-10" + And the response should include legacy_site_date as "2002-12-10" And the time gap should be approximately 11.3 years Scenario: Retrieve location with large time gap (54 years) Given a location exists with: | field | value | | legacy_date_created | 2008-05-28 | - | inventoried_on | 1954-05-01 | + | legacy_site_date | 1954-05-01 | When I retrieve that location via the API Then the response should include legacy_date_created as "2008-05-28" - And the response should include inventoried_on as "2002-12-10" + And the response should include legacy_site_date as "1954-05-01" And the time gap should be approximately 54 years Scenario: List all locations includes legacy date fields Given 5 locations exist with various legacy dates When I GET /location to list all locations Then each location should have a legacy_date_created field - And each location should have an inventoried_on field - And some locations should have null inventoried_on + And each location should have a legacy_site_date field + And some locations should have null legacy_site_date - Scenario: Filter locations by inventory date range - Given locations exist with inventoried_on ranging from 1950 to 2024 - When I filter locations where inventoried_on is between "2000-01-01" and "2010-12-31" - Then the response should only include locations inventoried in that decade - And locations inventoried before 2000 should not be included - And locations inventoried after 2010 should not be included + Scenario: Filter locations by legacy site date range + Given locations exist with legacy_site_date ranging from 1950 to 2024 + When I filter locations where legacy_site_date is between "2000-01-01" and "2010-12-31" + Then the response should only include locations with legacy_site_date in that decade + And locations with legacy_site_date before 2000 should not be included + And locations with legacy_site_date after 2010 should not be included Scenario: Query location by legacy_date_created Given 3 locations exist with legacy_date_created "2014-04-03" @@ -94,20 +94,20 @@ Feature: Post-Migration Legacy Data Retrieval And that well's location has: | field | value | | legacy_date_created | 2014-04-03 | - | inventoried_on | 2002-12-10 | + | legacy_site_date | 2002-12-10 | When I retrieve the well via the API Then the well should have well_completed_on as "2004-08-08" And the current_location should include legacy_date_created as "2014-04-03" - And the current_location should include inventoried_on as "2002-12-10" + And the current_location should include legacy_site_date as "2002-12-10" Scenario: Timeline reconstruction - well completed before site inventoried Given a well exists with well_completed_on "1995-06-15" And that well's location has: | field | value | - | inventoried_on | 2003-12-10 | + | legacy_site_date | 2003-12-10 | | legacy_date_created | 2014-04-03 | When I retrieve the well and its location - Then the temporal sequence should be: well_completed_on → inventoried_on → legacy_date_created + Then the temporal sequence should be: well_completed_on → legacy_site_date → legacy_date_created And the timeline should show: 1995 → 2003 → 2014 # Data Quality Validation @@ -116,7 +116,7 @@ Feature: Post-Migration Legacy Data Retrieval Given 100 locations were migrated And 9 of them had non-null SiteDate in AMPAPI When I query the migrated locations - Then 9% should have non-null inventoried_on + Then 9% should have non-null legacy_site_date And 100% should have non-null legacy_date_created Scenario: Verify well completion date coverage matches expectation @@ -132,7 +132,7 @@ Feature: Post-Migration Legacy Data Retrieval When I retrieve that location Then it should have created_at (new system timestamp from migration) And it should have legacy_date_created (original AMPAPI DateCreated) - And it should have inventoried_on (original AMPAPI SiteDate) + And it should have legacy_site_date (original AMPAPI SiteDate) And all three timestamps should be independently queryable And created_at should be a recent timestamp And legacy_date_created should be an older date @@ -143,10 +143,10 @@ Feature: Post-Migration Legacy Data Retrieval Given a location exists with: | field | value | | legacy_date_created | 2010-01-15 | - | inventoried_on | 2015-06-20 | + | legacy_site_date | 2015-06-20 | When I retrieve that location Then legacy_date_created should be "2010-01-15" - And inventoried_on should be "2015-06-20" + And legacy_site_date should be "2015-06-20" And the system should accept this without error Scenario: Spring does not use well_completed_on field @@ -156,14 +156,14 @@ Feature: Post-Migration Legacy Data Retrieval And the field should exist in the response schema And it should not cause validation errors - Scenario: Location with only legacy_date_created (no inventoried_on) + Scenario: Location with only legacy_date_created (no legacy_site_date) Given a location exists with: | field | value | | legacy_date_created | 2014-10-17 | - | inventoried_on | null | + | legacy_site_date | null | When I retrieve that location Then legacy_date_created should be "2014-10-17" - And inventoried_on should be null + And legacy_site_date should be null Scenario: Well without completion date Given a well exists with well_completed_on null From dbfc8ef6dfadc46ead68cdb7aad121e01f975dbe Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 26 Nov 2025 17:37:47 -0800 Subject: [PATCH 08/37] Add new fields to unit tests --- tests/test_location.py | 79 ++++++++++++++++++++++ tests/test_thing.py | 147 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 226 insertions(+) diff --git a/tests/test_location.py b/tests/test_location.py index 4b6ec6faa..b86211a58 100644 --- a/tests/test_location.py +++ b/tests/test_location.py @@ -235,4 +235,83 @@ def test_delete_location_404_not_found(second_location): assert data["detail"] == f"Location with ID {bad_location_id} not found." +# ============= Legacy date field tests ======================================= + + +def test_new_location_has_null_legacy_fields(): + """Test that newly created locations have null legacy date fields (legacy fields are migration-only)""" + payload = { + "point": "POINT (-106.607784 35.118924)", + "elevation": 1558.8, + "release_status": "draft", + } + response = client.post("/location", json=payload) + + assert response.status_code == 201 + data = response.json() + assert "id" in data + # Legacy fields should be present in response but null (not set during creation) + assert "legacy_date_created" in data + assert "legacy_site_date" in data + assert data["legacy_date_created"] is None + assert data["legacy_site_date"] is None + + # cleanup after test + cleanup_post_test(Location, data["id"]) + + +def test_legacy_fields_present_in_location_response(): + """Test that legacy fields are included in location GET response""" + # Create a new location (without legacy fields) + payload = { + "point": "POINT (-106.607784 35.118924)", + "elevation": 1558.8, + "release_status": "draft", + } + create_response = client.post("/location", json=payload) + assert create_response.status_code == 201 + location_id = create_response.json()["id"] + + # Retrieve the location and verify legacy fields are in the schema + get_response = client.get(f"/location/{location_id}") + assert get_response.status_code == 200 + data = get_response.json() + + # Verify fields exist in response (even if null) + assert "legacy_date_created" in data + assert "legacy_site_date" in data + assert data["legacy_date_created"] is None + assert data["legacy_site_date"] is None + + # cleanup after test + cleanup_post_test(Location, location_id) + + +def test_legacy_fields_independent_of_created_at(): + """Test that created_at (system timestamp) is separate from legacy fields""" + payload = { + "point": "POINT (-106.607784 35.118924)", + "elevation": 1558.8, + "release_status": "draft", + } + response = client.post("/location", json=payload) + + assert response.status_code == 201 + data = response.json() + + # created_at is automatically set by AutoBaseMixin + assert "created_at" in data + assert data["created_at"] is not None + + # legacy_date_created is separate and null for new records + assert "legacy_date_created" in data + assert data["legacy_date_created"] is None + + # These are independent fields with different purposes + assert "created_at" != "legacy_date_created" + + # cleanup after test + cleanup_post_test(Location, data["id"]) + + # ============= EOF ============================================= diff --git a/tests/test_thing.py b/tests/test_thing.py index 378f72d02..12aafef1a 100644 --- a/tests/test_thing.py +++ b/tests/test_thing.py @@ -1101,3 +1101,150 @@ def test_delete_thing_id_link_404_not_found(second_thing_id_link): assert response.status_code == 404 data = response.json() assert data["detail"] == f"ThingIdLink with ID {bad_id} not found." + + +# ============= Well completion date tests ==================================== + + +def test_create_well_with_completion_date(location): + """Test creating a well with well_completed_on (active field - users can set this)""" + payload = { + "name": "Test Well", + "location_id": location.id, + "measuring_point_height": 2.5, + "measuring_point_description": "top of casing", + "release_status": "draft", + "well_completed_on": "2004-08-08", + } + response = client.post("/thing/water-well", json=payload) + + assert response.status_code == 201 + data = response.json() + assert "id" in data + assert data["well_completed_on"] == "2004-08-08" + + # cleanup after test + from db import Thing + from tests import cleanup_post_test + + cleanup_post_test(Thing, data["id"]) + + +def test_create_well_with_old_completion_date(location): + """Test creating a well with very old completion date (e.g., for documenting historical wells)""" + payload = { + "name": "Historical Well", + "location_id": location.id, + "measuring_point_height": 2.5, + "measuring_point_description": "top of casing", + "release_status": "draft", + "well_completed_on": "1936-01-01", + } + response = client.post("/thing/water-well", json=payload) + + assert response.status_code == 201 + data = response.json() + assert data["well_completed_on"] == "1936-01-01" + + # cleanup after test + from db import Thing + from tests import cleanup_post_test + + cleanup_post_test(Thing, data["id"]) + + +def test_create_well_without_completion_date(location): + """Test that well_completed_on is optional (nullable) when creating a well""" + payload = { + "name": "Test Well Without Date", + "location_id": location.id, + "measuring_point_height": 2.5, + "measuring_point_description": "top of casing", + "release_status": "draft", + } + response = client.post("/thing/water-well", json=payload) + + assert response.status_code == 201 + data = response.json() + # Field should be present but null + assert "well_completed_on" in data + assert data["well_completed_on"] is None + + # cleanup after test + from db import Thing + from tests import cleanup_post_test + + cleanup_post_test(Thing, data["id"]) + + +def test_spring_well_completed_on_is_null(location): + """Test that springs have null well_completed_on field""" + payload = { + "name": "Test Spring", + "location_id": location.id, + "spring_type": "Artesian", + "release_status": "draft", + } + response = client.post("/thing/spring", json=payload) + + assert response.status_code == 201 + data = response.json() + # Springs should have null well_completed_on + assert "well_completed_on" in data + assert data["well_completed_on"] is None + assert data["thing_type"] == "spring" + + # cleanup after test + from db import Thing + from tests import cleanup_post_test + + cleanup_post_test(Thing, data["id"]) + + +def test_well_with_completion_date_and_location_legacy_fields(location): + """Test combined scenario: new well with completion date + location legacy fields (null for new locations)""" + # Create a new location (without legacy fields - they're migration-only) + from tests import cleanup_post_test + + location_payload = { + "point": "POINT (-106.607784 35.118924)", + "elevation": 1558.8, + "release_status": "draft", + } + location_response = client.post("/location", json=location_payload) + assert location_response.status_code == 201 + location_id = location_response.json()["id"] + + # Create well with completion date at that location + well_payload = { + "name": "Test Well", + "location_id": location_id, + "measuring_point_height": 2.5, + "measuring_point_description": "top of casing", + "release_status": "draft", + "well_completed_on": "2020-06-15", # User can set this for new wells + } + well_response = client.post("/thing/water-well", json=well_payload) + assert well_response.status_code == 201 + well_id = well_response.json()["id"] + + # Retrieve the well + get_response = client.get(f"/thing/water-well/{well_id}") + assert get_response.status_code == 200 + data = get_response.json() + + # well_completed_on is set (active field) + assert data["well_completed_on"] == "2020-06-15" + + # Location legacy fields are null (migration-only fields) + assert data["current_location"]["legacy_date_created"] is None + assert data["current_location"]["legacy_site_date"] is None + + # cleanup after test + from db import Thing, Location + + cleanup_post_test(Thing, well_id) + cleanup_post_test(Location, location_id) + + +# ============= EOF ============================================= From 5d519545a41fde65f176308ded37cc01b1981452 Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 26 Nov 2025 17:41:41 -0800 Subject: [PATCH 09/37] Create test_transfer_legacy_dates.py --- tests/test_transfer_legacy_dates.py | 410 ++++++++++++++++++++++++++++ 1 file changed, 410 insertions(+) create mode 100644 tests/test_transfer_legacy_dates.py diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py new file mode 100644 index 000000000..a0cec1014 --- /dev/null +++ b/tests/test_transfer_legacy_dates.py @@ -0,0 +1,410 @@ +# =============================================================================== +# Copyright 2025 ross +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =============================================================================== +""" +Unit tests for legacy date field population during AMPAPI → NMSampleLocations migration. + +These tests verify that: +1. Location.legacy_date_created is populated from CSV DateCreated +2. Location.legacy_site_date is populated from CSV SiteDate (if not null) +3. Thing.well_completed_on is populated from CSV CompletionDate (if not null) +""" +import datetime +from unittest.mock import Mock, patch +import pandas as pd +import pytest + +from transfers.util import make_location +from schemas.thing import CreateWell + + +# ============================================================================ +# LOCATION LEGACY DATE TESTS +# ============================================================================ + + +def test_make_location_with_both_legacy_dates(): + """Test that make_location populates both legacy_date_created and legacy_site_date""" + # Create a mock CSV row with both DateCreated and SiteDate + row = pd.Series({ + 'PointID': 'TEST-001', + 'Easting': 350000, + 'Northing': 3880000, + 'DateCreated': '2014-04-03 00:00:00.000', + 'SiteDate': '2002-12-10 00:00:00.000', + 'Altitude': 1558.8, + 'AltDatum': 'NAVD88', + 'AltitudeMethod': 'GPS', + 'LocationId': 1, + 'PublicRelease': True, + 'CoordinateNotes': None, + 'LocationNotes': None, + 'AltitudeAccuracy': None, + }) + + elevations = {} + + # Call make_location + location, elevation_method = make_location(row, elevations) + + # Verify legacy_date_created is set from DateCreated + assert location.legacy_date_created is not None + assert location.legacy_date_created == datetime.date(2014, 4, 3) + + # Verify legacy_site_date is set from SiteDate + assert location.legacy_site_date is not None + assert location.legacy_site_date == datetime.date(2002, 12, 10) + + # Verify created_at is still set (should be the later date) + assert location.created_at is not None + + +def test_make_location_with_only_date_created(): + """Test that make_location handles locations with only DateCreated (no SiteDate)""" + row = pd.Series({ + 'PointID': 'TEST-002', + 'Easting': 350000, + 'Northing': 3880000, + 'DateCreated': '2014-04-03 00:00:00.000', + 'SiteDate': None, # No SiteDate + 'Altitude': 1558.8, + 'AltDatum': 'NAVD88', + 'AltitudeMethod': 'GPS', + 'LocationId': 2, + 'PublicRelease': True, + 'CoordinateNotes': None, + 'LocationNotes': None, + 'AltitudeAccuracy': None, + }) + + elevations = {} + location, elevation_method = make_location(row, elevations) + + # Verify legacy_date_created is set + assert location.legacy_date_created == datetime.date(2014, 4, 3) + + # Verify legacy_site_date is null (91% of locations don't have SiteDate) + assert location.legacy_site_date is None + + +def test_make_location_with_site_date_later_than_date_created(): + """Test data anomaly: SiteDate is later than DateCreated (should still be accepted)""" + row = pd.Series({ + 'PointID': 'TEST-003', + 'Easting': 350000, + 'Northing': 3880000, + 'DateCreated': '2010-01-15 00:00:00.000', + 'SiteDate': '2015-06-20 00:00:00.000', # Later than DateCreated (anomaly) + 'Altitude': 1558.8, + 'AltDatum': 'NAVD88', + 'AltitudeMethod': 'GPS', + 'LocationId': 3, + 'PublicRelease': True, + 'CoordinateNotes': None, + 'LocationNotes': None, + 'AltitudeAccuracy': None, + }) + + elevations = {} + location, elevation_method = make_location(row, elevations) + + # Both dates should be preserved as-is, regardless of order + assert location.legacy_date_created == datetime.date(2010, 1, 15) + assert location.legacy_site_date == datetime.date(2015, 6, 20) + + +def test_make_location_with_very_old_site_date(): + """Test that very old SiteDates (1950s) are preserved correctly""" + row = pd.Series({ + 'PointID': 'SM-0227', # Real example from dataset + 'Easting': 350000, + 'Northing': 3880000, + 'DateCreated': '2008-05-28 00:00:00.000', + 'SiteDate': '1954-05-01 00:00:00.000', # 54 years earlier! + 'Altitude': 1558.8, + 'AltDatum': 'NAVD88', + 'AltitudeMethod': 'GPS', + 'LocationId': 4, + 'PublicRelease': True, + 'CoordinateNotes': None, + 'LocationNotes': None, + 'AltitudeAccuracy': None, + }) + + elevations = {} + location, elevation_method = make_location(row, elevations) + + # Verify very old date is preserved + assert location.legacy_site_date == datetime.date(1954, 5, 1) + assert location.legacy_date_created == datetime.date(2008, 5, 28) + + # Verify 54-year time gap + time_gap = (location.legacy_date_created - location.legacy_site_date).days + assert time_gap == 19751 # Approximately 54 years + + +def test_make_location_legacy_dates_are_date_not_datetime(): + """Test that legacy date fields are Date type (not DateTime)""" + row = pd.Series({ + 'PointID': 'TEST-004', + 'Easting': 350000, + 'Northing': 3880000, + 'DateCreated': '2014-04-03 10:30:45.123', # Has time component + 'SiteDate': '2002-12-10 14:22:33.456', # Has time component + 'Altitude': 1558.8, + 'AltDatum': 'NAVD88', + 'AltitudeMethod': 'GPS', + 'LocationId': 5, + 'PublicRelease': True, + 'CoordinateNotes': None, + 'LocationNotes': None, + 'AltitudeAccuracy': None, + }) + + elevations = {} + location, elevation_method = make_location(row, elevations) + + # Verify they are date objects (not datetime) + assert isinstance(location.legacy_date_created, datetime.date) + assert not isinstance(location.legacy_date_created, datetime.datetime) + + assert isinstance(location.legacy_site_date, datetime.date) + assert not isinstance(location.legacy_site_date, datetime.datetime) + + # Verify time component is stripped + assert location.legacy_date_created == datetime.date(2014, 4, 3) + assert location.legacy_site_date == datetime.date(2002, 12, 10) + + +def test_make_location_legacy_dates_independent_of_created_at(): + """Test that legacy dates don't affect created_at timestamp""" + row = pd.Series({ + 'PointID': 'TEST-005', + 'Easting': 350000, + 'Northing': 3880000, + 'DateCreated': '2014-04-03 00:00:00.000', + 'SiteDate': '2002-12-10 00:00:00.000', + 'Altitude': 1558.8, + 'AltDatum': 'NAVD88', + 'AltitudeMethod': 'GPS', + 'LocationId': 6, + 'PublicRelease': True, + 'CoordinateNotes': None, + 'LocationNotes': None, + 'AltitudeAccuracy': None, + }) + + elevations = {} + location, elevation_method = make_location(row, elevations) + + # created_at should be a DateTime (with timezone) + assert isinstance(location.created_at, datetime.datetime) + + # legacy fields should be Date (no timezone) + assert isinstance(location.legacy_date_created, datetime.date) + assert isinstance(location.legacy_site_date, datetime.date) + + # They should be independent + assert location.created_at is not None + assert location.legacy_date_created is not None + assert location.legacy_site_date is not None + + +# ============================================================================ +# WELL COMPLETION DATE TESTS +# ============================================================================ + + +def test_create_well_schema_accepts_well_completed_on(): + """Test that CreateWell schema accepts well_completed_on from CSV CompletionDate""" + # Simulate data from CSV transfer + well_data = { + 'location_id': 1, + 'name': 'TEST-WELL-001', + 'well_completed_on': datetime.date(2004, 8, 8), # From CSV CompletionDate + 'hole_depth': 100.0, + 'well_depth': 95.0, + 'measuring_point_height': 2.5, + 'measuring_point_description': 'top of casing', + 'release_status': 'public', + } + + # Validate using CreateWell schema + schema = CreateWell(**well_data) + + assert schema.well_completed_on == datetime.date(2004, 8, 8) + + +def test_create_well_schema_well_completed_on_optional(): + """Test that well_completed_on is optional (70% of wells don't have CompletionDate)""" + well_data = { + 'location_id': 1, + 'name': 'TEST-WELL-002', + 'hole_depth': 100.0, + 'well_depth': 95.0, + 'measuring_point_height': 2.5, + 'measuring_point_description': 'top of casing', + 'release_status': 'public', + # No well_completed_on provided + } + + # Should not raise validation error + schema = CreateWell(**well_data) + + # Field should be optional + assert hasattr(schema, 'well_completed_on') + # Value should be None when not provided + assert schema.well_completed_on is None + + +def test_create_well_with_very_old_completion_date(): + """Test that very old completion dates (1936) are accepted""" + well_data = { + 'location_id': 1, + 'name': 'HISTORICAL-WELL', + 'well_completed_on': datetime.date(1936, 1, 1), # Oldest well in dataset + 'hole_depth': 100.0, + 'well_depth': 95.0, + 'measuring_point_height': 2.5, + 'measuring_point_description': 'top of casing', + 'release_status': 'public', + } + + schema = CreateWell(**well_data) + + assert schema.well_completed_on == datetime.date(1936, 1, 1) + + +def test_create_well_completed_on_is_date_not_datetime(): + """Test that well_completed_on is Date type (not DateTime)""" + well_data = { + 'location_id': 1, + 'name': 'TEST-WELL-003', + 'well_completed_on': datetime.date(2004, 8, 8), # Date, not DateTime + 'hole_depth': 100.0, + 'well_depth': 95.0, + 'measuring_point_height': 2.5, + 'measuring_point_description': 'top of casing', + 'release_status': 'public', + } + + schema = CreateWell(**well_data) + + # Should accept date type + assert isinstance(schema.well_completed_on, datetime.date) + assert not isinstance(schema.well_completed_on, datetime.datetime) + + +# ============================================================================ +# DATA COVERAGE TESTS (Simulating Migration Statistics) +# ============================================================================ + + +def test_location_legacy_date_coverage_statistics(): + """Test that migration preserves expected percentages of legacy dates""" + # Simulate 100 location records from CSV + locations_created = 0 + locations_with_site_date = 0 + + for i in range(100): + if i < 9: # 9% have SiteDate + row = pd.Series({ + 'PointID': f'TEST-{i:03d}', + 'Easting': 350000 + i, + 'Northing': 3880000 + i, + 'DateCreated': '2014-04-03 00:00:00.000', + 'SiteDate': '2002-12-10 00:00:00.000', + 'Altitude': 1558.8, + 'AltDatum': 'NAVD88', + 'AltitudeMethod': 'GPS', + 'LocationId': i, + 'PublicRelease': True, + 'CoordinateNotes': None, + 'LocationNotes': None, + 'AltitudeAccuracy': None, + }) + else: # 91% don't have SiteDate + row = pd.Series({ + 'PointID': f'TEST-{i:03d}', + 'Easting': 350000 + i, + 'Northing': 3880000 + i, + 'DateCreated': '2014-04-03 00:00:00.000', + 'SiteDate': None, + 'Altitude': 1558.8, + 'AltDatum': 'NAVD88', + 'AltitudeMethod': 'GPS', + 'LocationId': i, + 'PublicRelease': True, + 'CoordinateNotes': None, + 'LocationNotes': None, + 'AltitudeAccuracy': None, + }) + + elevations = {} + location, _ = make_location(row, elevations) + + # Count coverage + if location.legacy_date_created is not None: + locations_created += 1 + + if location.legacy_site_date is not None: + locations_with_site_date += 1 + + # Verify expected coverage + assert locations_created == 100 # 100% should have legacy_date_created + assert locations_with_site_date == 9 # 9% should have legacy_site_date + + +def test_well_completion_date_coverage_statistics(): + """Test that expected percentage of wells have completion dates""" + # Simulate 100 wells from CSV + wells_with_completion_date = 0 + + for i in range(100): + if i < 30: # 30% have CompletionDate + well_data = { + 'location_id': 1, + 'name': f'WELL-{i:03d}', + 'well_completed_on': datetime.date(2004, 8, 8), + 'hole_depth': 100.0, + 'well_depth': 95.0, + 'measuring_point_height': 2.5, + 'measuring_point_description': 'top of casing', + 'release_status': 'public', + } + else: # 70% don't have CompletionDate + well_data = { + 'location_id': 1, + 'name': f'WELL-{i:03d}', + 'hole_depth': 100.0, + 'well_depth': 95.0, + 'measuring_point_height': 2.5, + 'measuring_point_description': 'top of casing', + 'release_status': 'public', + # No well_completed_on + } + + schema = CreateWell(**well_data) + + if schema.well_completed_on is not None: + wells_with_completion_date += 1 + + # Verify expected coverage + assert wells_with_completion_date == 30 # 30% should have completion dates + + +# ============================================================================ +# EOF +# ============================================================================ From 687fb4aa1b5c4060f14d0fe140b78572f5909c9f Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 26 Nov 2025 18:05:52 -0800 Subject: [PATCH 10/37] Support changes in unit tests for thing and transfer script --- tests/test_thing.py | 5 +++-- tests/test_transfer_legacy_dates.py | 11 +++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_thing.py b/tests/test_thing.py index 12aafef1a..3d76e3b99 100644 --- a/tests/test_thing.py +++ b/tests/test_thing.py @@ -1237,8 +1237,9 @@ def test_well_with_completion_date_and_location_legacy_fields(location): assert data["well_completed_on"] == "2020-06-15" # Location legacy fields are null (migration-only fields) - assert data["current_location"]["legacy_date_created"] is None - assert data["current_location"]["legacy_site_date"] is None + # current_location is a GeoJSON Feature, so fields are under properties + assert data["current_location"]["properties"]["legacy_date_created"] is None + assert data["current_location"]["properties"]["legacy_site_date"] is None # cleanup after test from db import Thing, Location diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py index a0cec1014..53f304c4a 100644 --- a/tests/test_transfer_legacy_dates.py +++ b/tests/test_transfer_legacy_dates.py @@ -67,8 +67,8 @@ def test_make_location_with_both_legacy_dates(): assert location.legacy_site_date is not None assert location.legacy_site_date == datetime.date(2002, 12, 10) - # Verify created_at is still set (should be the later date) - assert location.created_at is not None + # Verify created_at is NOT set during migration (it's auto-set by AutoBaseMixin on save) + assert location.created_at is None def test_make_location_with_only_date_created(): @@ -209,15 +209,14 @@ def test_make_location_legacy_dates_independent_of_created_at(): elevations = {} location, elevation_method = make_location(row, elevations) - # created_at should be a DateTime (with timezone) - assert isinstance(location.created_at, datetime.datetime) + # created_at should be None during transfer (auto-set by AutoBaseMixin on save) + assert location.created_at is None # legacy fields should be Date (no timezone) assert isinstance(location.legacy_date_created, datetime.date) assert isinstance(location.legacy_site_date, datetime.date) - # They should be independent - assert location.created_at is not None + # Legacy fields should be populated assert location.legacy_date_created is not None assert location.legacy_site_date is not None From 6552bc00fc3560fcd8abfae02486d6a1363d61e5 Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 26 Nov 2025 18:06:34 -0800 Subject: [PATCH 11/37] Implement changes in db and schemas --- db/location.py | 9 +++++++++ db/thing.py | 5 +++++ schemas/location.py | 15 +++++++++++++++ schemas/thing.py | 7 +++++++ 4 files changed, 36 insertions(+) diff --git a/db/location.py b/db/location.py index 50b1aa0db..3b4271592 100644 --- a/db/location.py +++ b/db/location.py @@ -23,6 +23,7 @@ String, ForeignKey, DateTime, + Date, func, Text, ) @@ -61,6 +62,14 @@ class Location(Base, AutoBaseMixin, ReleaseMixin, NotesMixin, DataProvenanceMixi nma_notes_location: Mapped[str] = mapped_column(Text, nullable=True) nma_coordinate_notes: Mapped[str] = mapped_column(Text, nullable=True) + # --- Legacy AMPAPI Date Fields (Migration-Only, Read-Only Post-Migration) --- + legacy_date_created: Mapped[datetime.date] = mapped_column( + Date, nullable=True, comment="Original AMPAPI DateCreated (migration-only field)" + ) + legacy_site_date: Mapped[datetime.date] = mapped_column( + Date, nullable=True, comment="Original AMPAPI SiteDate (migration-only field)" + ) + # --- Relationship Definitions --- thing_associations: Mapped[list["LocationThingAssociation"]] = relationship( back_populates="location", cascade="all, delete-orphan" diff --git a/db/thing.py b/db/thing.py index 9f30d08e2..b42b70d56 100644 --- a/db/thing.py +++ b/db/thing.py @@ -115,6 +115,11 @@ class Thing( ) well_construction_notes: Mapped[str] = mapped_column(Text, nullable=True) + well_completed_on: Mapped[date] = mapped_column( + Date, + nullable=True, + comment="Date when well construction/drilling was completed (from AMPAPI CompletionDate, active field for new wells)", + ) # Spring-related columns spring_type: Mapped[str] = lexicon_term( diff --git a/schemas/location.py b/schemas/location.py index e911e3359..e18b76996 100644 --- a/schemas/location.py +++ b/schemas/location.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== +from datetime import date from typing import List from geoalchemy2 import WKBElement @@ -106,6 +107,9 @@ class GeoJSONProperties(BaseModel): default_factory=GeoJSONUTMCoordinates ) notes: list[NoteResponse] = [] + # Legacy AMPAPI date fields (migration-only, read-only) + legacy_date_created: date | None = None + legacy_site_date: date | None = None model_config = ConfigDict( from_attributes=True, @@ -150,6 +154,9 @@ def populate_fields(cls, data: Any) -> Any: data_dict["properties"]["notes"] = data_dict.get("notes") data_dict["properties"]["elevation"] = convert_m_to_ft(elevation_m) data_dict["properties"]["elevation_method"] = data_dict.get("elevation_method") + # populate legacy date fields + data_dict["properties"]["legacy_date_created"] = data_dict.get("legacy_date_created") + data_dict["properties"]["legacy_site_date"] = data_dict.get("legacy_site_date") # populate UTM coordinates point_utm_zone_13n_wkt = transform_srid( @@ -181,6 +188,10 @@ class LocationResponse(BaseResponseModel): county: str | None quad_name: str | None + # Legacy AMPAPI date fields (migration-only, read-only post-migration) + legacy_date_created: date | None = None + legacy_site_date: date | None = None + @field_validator("point", mode="before") def point_to_wkt(cls, value): if isinstance(value, WKBElement): @@ -219,5 +230,9 @@ class UpdateLocation(BaseUpdateModel, ValidateLocation): coordinate_accuracy: float | None = None coordinate_method: CoordinateMethod | None = None + # Legacy AMPAPI date fields (migration-only, can be updated but not created) + legacy_date_created: date | None = None + legacy_site_date: date | None = None + # ============= EOF ============================================= diff --git a/schemas/thing.py b/schemas/thing.py index cf8c3ef2b..6de5908cc 100644 --- a/schemas/thing.py +++ b/schemas/thing.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== +from datetime import date from typing import List from pydantic import BaseModel, model_validator, Field, field_validator @@ -130,6 +131,8 @@ class CreateWell(CreateBaseThing, ValidateWell): ) measuring_point_description: str | None notes: list[CreateNote] | None = None + # Active field: users can set this for new wells + well_completed_on: date | None = None class CreateSpring(CreateBaseThing): @@ -224,6 +227,8 @@ class WellResponse(BaseThingResponse): measuring_point_height: float measuring_point_height_unit: str = "ft" measuring_point_description: str | None + # Active field: completion date for wells + well_completed_on: date | None = None water_notes: list[NoteResponse] | None = None measuring_notes: list[NoteResponse] | None = None @@ -329,6 +334,8 @@ class UpdateWell(UpdateThing, ValidateWell): well_casing_diameter: float | None = None # in inches well_casing_depth: float | None = None # in feet well_casing_materials: list[str] | None = None + # Active field: users can update completion date + well_completed_on: date | None = None class UpdateSpring(UpdateThing): From 08fb22105834b3fa70dc030cffb0af246bf3471b Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 26 Nov 2025 18:06:49 -0800 Subject: [PATCH 12/37] Implement changes in transfer scripts --- transfers/util.py | 43 +++++++++++++------------------------- transfers/well_transfer.py | 15 +++++++++++++ 2 files changed, 30 insertions(+), 28 deletions(-) diff --git a/transfers/util.py b/transfers/util.py index d08798425..d39845f44 100644 --- a/transfers/util.py +++ b/transfers/util.py @@ -214,33 +214,6 @@ def make_location(row: pd.Series, elevations: dict) -> tuple: point, source_srid=SRID_UTM_ZONE_13N, target_srid=SRID_WGS84 ) - """ - Developer's notes - - AMP folks said that the earlier date between DateCreated and SiteDate is when - the site was inventoried, whereas the later is when the record was made in - the database. This was because they were used interchangeably. - """ - if row.DateCreated and row.SiteDate: - - date_created = datetime.strptime(row.DateCreated, "%Y-%m-%d %H:%M:%S.%f") - site_date = datetime.strptime(row.SiteDate, "%Y-%m-%d %H:%M:%S.%f") - - if date_created > site_date: - created_at = date_created - else: - created_at = site_date - elif row.DateCreated and not row.SiteDate: - created_at = datetime.strptime(row.DateCreated, "%Y-%m-%d %H:%M:%S.%f") - elif not row.DateCreated and row.SiteDate: - created_at = datetime.strptime(row.SiteDate, "%Y-%m-%d %H:%M:%S.%f") - else: - created_at = None - - # convert created_at from MST/MDT to UTC - if created_at is not None: - created_at = convert_mt_to_utc(created_at) - z = row.Altitude if z: elevation_from_epqs = False @@ -271,14 +244,28 @@ def make_location(row: pd.Series, elevations: dict) -> tuple: f"LU_AltitudeMethod:{row.AltitudeMethod.strip()}" ) + # Extract legacy date fields (Date type, not DateTime) + legacy_date_created = None + if row.DateCreated: + legacy_date_created = datetime.strptime( + row.DateCreated, "%Y-%m-%d %H:%M:%S.%f" + ).date() + + legacy_site_date = None + if row.SiteDate: + legacy_site_date = datetime.strptime( + row.SiteDate, "%Y-%m-%d %H:%M:%S.%f" + ).date() + location = Location( nma_pk_location=row.LocationId, point=transformed_point.wkt, elevation=z, release_status="public" if row.PublicRelease else "private", - created_at=created_at, nma_coordinate_notes=row.CoordinateNotes, nma_notes_location=row.LocationNotes, + legacy_date_created=legacy_date_created, + legacy_site_date=legacy_site_date, ) return location, elevation_method diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index ee54d0216..5daa1d8ee 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -237,6 +237,19 @@ def transfer_wells(session: Session, flags: dict = None, limit: int = 0) -> None [] if isna(row.CasingDescription) else _extract_casing_materials(row) ) + # Extract well_completed_on from CompletionDate (Date type, not DateTime) + well_completed_on = None + if not isna(row.CompletionDate): + try: + well_completed_on = datetime.strptime( + row.CompletionDate, "%Y-%m-%d %H:%M:%S.%f" + ).date() + except (ValueError, AttributeError): + # If parsing fails, leave as None + logger.warning( + f"Could not parse CompletionDate for {row.PointID}: {row.CompletionDate}" + ) + # manually add the well rather than add_well from services/thing_helper.py # so that effective_start can be set on the location assocation @@ -254,6 +267,7 @@ def transfer_wells(session: Session, flags: dict = None, limit: int = 0) -> None release_status="public" if row.PublicRelease else "private", measuring_point_height=row.MPHeight, measuring_point_description=row.MeasuringPoint, + well_completed_on=well_completed_on, notes=( [{"content": row.Notes, "note_type": "Other"}] if row.Notes else [] ), @@ -283,6 +297,7 @@ def transfer_wells(session: Session, flags: dict = None, limit: int = 0) -> None well_data["nma_pk_welldata"] = row.WellID well_data.pop("notes") + # well_completed_on is kept in well_data (not excluded above) well = Thing(**well_data) session.add(well) # logger.info(f"Created well for {row.PointID}") From 47aad3f14d0bbe059299cc919f332c8d1d7febcf Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 26 Nov 2025 18:07:08 -0800 Subject: [PATCH 13/37] Address measuring point bug --- services/thing_helper.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/services/thing_helper.py b/services/thing_helper.py index 53ce54577..084a8b02b 100644 --- a/services/thing_helper.py +++ b/services/thing_helper.py @@ -13,6 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== +from datetime import datetime +from zoneinfo import ZoneInfo + from fastapi import Request from fastapi_pagination.ext.sqlalchemy import paginate from pydantic import BaseModel @@ -32,6 +35,7 @@ WellCasingMaterial, ) from db.group import GroupThingAssociation +from db.measuring_point_history import MeasuringPointHistory from services.audit_helper import audit_add from services.crud_helper import model_patcher from services.exceptions_helper import PydanticStyleException @@ -159,6 +163,10 @@ def add_thing( location_id = data.pop("location_id", None) group_id = data.pop("group_id", None) + # Extract measuring point data (stored in separate history table) + measuring_point_height = data.pop("measuring_point_height", None) + measuring_point_description = data.pop("measuring_point_description", None) + try: thing = Thing(**data) thing.thing_type = thing_type @@ -169,6 +177,18 @@ def add_thing( session.flush() session.refresh(thing) + # Create MeasuringPointHistory record if measuring_point_height provided + if measuring_point_height is not None: + measuring_point_history = MeasuringPointHistory( + thing_id=thing.id, + measuring_point_height=measuring_point_height, + measuring_point_description=measuring_point_description, + start_date=datetime.now(tz=ZoneInfo("UTC")), + end_date=None, + ) + audit_add(user, measuring_point_history) + session.add(measuring_point_history) + # endpoint catches ProgrammingError if location_id or group_id do not exist if group_id: assoc = GroupThingAssociation() From 546b7013286c37529b5e2a8e0524ae09daac1f5f Mon Sep 17 00:00:00 2001 From: kbighorse Date: Thu, 27 Nov 2025 02:07:05 +0000 Subject: [PATCH 14/37] Formatting changes --- db/location.py | 4 +- schemas/location.py | 4 +- tests/test_transfer_legacy_dates.py | 350 +++++++++++++++------------- 3 files changed, 189 insertions(+), 169 deletions(-) diff --git a/db/location.py b/db/location.py index 3b4271592..a07958346 100644 --- a/db/location.py +++ b/db/location.py @@ -64,7 +64,9 @@ class Location(Base, AutoBaseMixin, ReleaseMixin, NotesMixin, DataProvenanceMixi # --- Legacy AMPAPI Date Fields (Migration-Only, Read-Only Post-Migration) --- legacy_date_created: Mapped[datetime.date] = mapped_column( - Date, nullable=True, comment="Original AMPAPI DateCreated (migration-only field)" + Date, + nullable=True, + comment="Original AMPAPI DateCreated (migration-only field)", ) legacy_site_date: Mapped[datetime.date] = mapped_column( Date, nullable=True, comment="Original AMPAPI SiteDate (migration-only field)" diff --git a/schemas/location.py b/schemas/location.py index e18b76996..1f4bad472 100644 --- a/schemas/location.py +++ b/schemas/location.py @@ -155,7 +155,9 @@ def populate_fields(cls, data: Any) -> Any: data_dict["properties"]["elevation"] = convert_m_to_ft(elevation_m) data_dict["properties"]["elevation_method"] = data_dict.get("elevation_method") # populate legacy date fields - data_dict["properties"]["legacy_date_created"] = data_dict.get("legacy_date_created") + data_dict["properties"]["legacy_date_created"] = data_dict.get( + "legacy_date_created" + ) data_dict["properties"]["legacy_site_date"] = data_dict.get("legacy_site_date") # populate UTM coordinates diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py index 53f304c4a..30fbcd5ae 100644 --- a/tests/test_transfer_legacy_dates.py +++ b/tests/test_transfer_legacy_dates.py @@ -38,21 +38,23 @@ def test_make_location_with_both_legacy_dates(): """Test that make_location populates both legacy_date_created and legacy_site_date""" # Create a mock CSV row with both DateCreated and SiteDate - row = pd.Series({ - 'PointID': 'TEST-001', - 'Easting': 350000, - 'Northing': 3880000, - 'DateCreated': '2014-04-03 00:00:00.000', - 'SiteDate': '2002-12-10 00:00:00.000', - 'Altitude': 1558.8, - 'AltDatum': 'NAVD88', - 'AltitudeMethod': 'GPS', - 'LocationId': 1, - 'PublicRelease': True, - 'CoordinateNotes': None, - 'LocationNotes': None, - 'AltitudeAccuracy': None, - }) + row = pd.Series( + { + "PointID": "TEST-001", + "Easting": 350000, + "Northing": 3880000, + "DateCreated": "2014-04-03 00:00:00.000", + "SiteDate": "2002-12-10 00:00:00.000", + "Altitude": 1558.8, + "AltDatum": "NAVD88", + "AltitudeMethod": "GPS", + "LocationId": 1, + "PublicRelease": True, + "CoordinateNotes": None, + "LocationNotes": None, + "AltitudeAccuracy": None, + } + ) elevations = {} @@ -73,21 +75,23 @@ def test_make_location_with_both_legacy_dates(): def test_make_location_with_only_date_created(): """Test that make_location handles locations with only DateCreated (no SiteDate)""" - row = pd.Series({ - 'PointID': 'TEST-002', - 'Easting': 350000, - 'Northing': 3880000, - 'DateCreated': '2014-04-03 00:00:00.000', - 'SiteDate': None, # No SiteDate - 'Altitude': 1558.8, - 'AltDatum': 'NAVD88', - 'AltitudeMethod': 'GPS', - 'LocationId': 2, - 'PublicRelease': True, - 'CoordinateNotes': None, - 'LocationNotes': None, - 'AltitudeAccuracy': None, - }) + row = pd.Series( + { + "PointID": "TEST-002", + "Easting": 350000, + "Northing": 3880000, + "DateCreated": "2014-04-03 00:00:00.000", + "SiteDate": None, # No SiteDate + "Altitude": 1558.8, + "AltDatum": "NAVD88", + "AltitudeMethod": "GPS", + "LocationId": 2, + "PublicRelease": True, + "CoordinateNotes": None, + "LocationNotes": None, + "AltitudeAccuracy": None, + } + ) elevations = {} location, elevation_method = make_location(row, elevations) @@ -101,21 +105,23 @@ def test_make_location_with_only_date_created(): def test_make_location_with_site_date_later_than_date_created(): """Test data anomaly: SiteDate is later than DateCreated (should still be accepted)""" - row = pd.Series({ - 'PointID': 'TEST-003', - 'Easting': 350000, - 'Northing': 3880000, - 'DateCreated': '2010-01-15 00:00:00.000', - 'SiteDate': '2015-06-20 00:00:00.000', # Later than DateCreated (anomaly) - 'Altitude': 1558.8, - 'AltDatum': 'NAVD88', - 'AltitudeMethod': 'GPS', - 'LocationId': 3, - 'PublicRelease': True, - 'CoordinateNotes': None, - 'LocationNotes': None, - 'AltitudeAccuracy': None, - }) + row = pd.Series( + { + "PointID": "TEST-003", + "Easting": 350000, + "Northing": 3880000, + "DateCreated": "2010-01-15 00:00:00.000", + "SiteDate": "2015-06-20 00:00:00.000", # Later than DateCreated (anomaly) + "Altitude": 1558.8, + "AltDatum": "NAVD88", + "AltitudeMethod": "GPS", + "LocationId": 3, + "PublicRelease": True, + "CoordinateNotes": None, + "LocationNotes": None, + "AltitudeAccuracy": None, + } + ) elevations = {} location, elevation_method = make_location(row, elevations) @@ -127,21 +133,23 @@ def test_make_location_with_site_date_later_than_date_created(): def test_make_location_with_very_old_site_date(): """Test that very old SiteDates (1950s) are preserved correctly""" - row = pd.Series({ - 'PointID': 'SM-0227', # Real example from dataset - 'Easting': 350000, - 'Northing': 3880000, - 'DateCreated': '2008-05-28 00:00:00.000', - 'SiteDate': '1954-05-01 00:00:00.000', # 54 years earlier! - 'Altitude': 1558.8, - 'AltDatum': 'NAVD88', - 'AltitudeMethod': 'GPS', - 'LocationId': 4, - 'PublicRelease': True, - 'CoordinateNotes': None, - 'LocationNotes': None, - 'AltitudeAccuracy': None, - }) + row = pd.Series( + { + "PointID": "SM-0227", # Real example from dataset + "Easting": 350000, + "Northing": 3880000, + "DateCreated": "2008-05-28 00:00:00.000", + "SiteDate": "1954-05-01 00:00:00.000", # 54 years earlier! + "Altitude": 1558.8, + "AltDatum": "NAVD88", + "AltitudeMethod": "GPS", + "LocationId": 4, + "PublicRelease": True, + "CoordinateNotes": None, + "LocationNotes": None, + "AltitudeAccuracy": None, + } + ) elevations = {} location, elevation_method = make_location(row, elevations) @@ -157,21 +165,23 @@ def test_make_location_with_very_old_site_date(): def test_make_location_legacy_dates_are_date_not_datetime(): """Test that legacy date fields are Date type (not DateTime)""" - row = pd.Series({ - 'PointID': 'TEST-004', - 'Easting': 350000, - 'Northing': 3880000, - 'DateCreated': '2014-04-03 10:30:45.123', # Has time component - 'SiteDate': '2002-12-10 14:22:33.456', # Has time component - 'Altitude': 1558.8, - 'AltDatum': 'NAVD88', - 'AltitudeMethod': 'GPS', - 'LocationId': 5, - 'PublicRelease': True, - 'CoordinateNotes': None, - 'LocationNotes': None, - 'AltitudeAccuracy': None, - }) + row = pd.Series( + { + "PointID": "TEST-004", + "Easting": 350000, + "Northing": 3880000, + "DateCreated": "2014-04-03 10:30:45.123", # Has time component + "SiteDate": "2002-12-10 14:22:33.456", # Has time component + "Altitude": 1558.8, + "AltDatum": "NAVD88", + "AltitudeMethod": "GPS", + "LocationId": 5, + "PublicRelease": True, + "CoordinateNotes": None, + "LocationNotes": None, + "AltitudeAccuracy": None, + } + ) elevations = {} location, elevation_method = make_location(row, elevations) @@ -190,21 +200,23 @@ def test_make_location_legacy_dates_are_date_not_datetime(): def test_make_location_legacy_dates_independent_of_created_at(): """Test that legacy dates don't affect created_at timestamp""" - row = pd.Series({ - 'PointID': 'TEST-005', - 'Easting': 350000, - 'Northing': 3880000, - 'DateCreated': '2014-04-03 00:00:00.000', - 'SiteDate': '2002-12-10 00:00:00.000', - 'Altitude': 1558.8, - 'AltDatum': 'NAVD88', - 'AltitudeMethod': 'GPS', - 'LocationId': 6, - 'PublicRelease': True, - 'CoordinateNotes': None, - 'LocationNotes': None, - 'AltitudeAccuracy': None, - }) + row = pd.Series( + { + "PointID": "TEST-005", + "Easting": 350000, + "Northing": 3880000, + "DateCreated": "2014-04-03 00:00:00.000", + "SiteDate": "2002-12-10 00:00:00.000", + "Altitude": 1558.8, + "AltDatum": "NAVD88", + "AltitudeMethod": "GPS", + "LocationId": 6, + "PublicRelease": True, + "CoordinateNotes": None, + "LocationNotes": None, + "AltitudeAccuracy": None, + } + ) elevations = {} location, elevation_method = make_location(row, elevations) @@ -230,14 +242,14 @@ def test_create_well_schema_accepts_well_completed_on(): """Test that CreateWell schema accepts well_completed_on from CSV CompletionDate""" # Simulate data from CSV transfer well_data = { - 'location_id': 1, - 'name': 'TEST-WELL-001', - 'well_completed_on': datetime.date(2004, 8, 8), # From CSV CompletionDate - 'hole_depth': 100.0, - 'well_depth': 95.0, - 'measuring_point_height': 2.5, - 'measuring_point_description': 'top of casing', - 'release_status': 'public', + "location_id": 1, + "name": "TEST-WELL-001", + "well_completed_on": datetime.date(2004, 8, 8), # From CSV CompletionDate + "hole_depth": 100.0, + "well_depth": 95.0, + "measuring_point_height": 2.5, + "measuring_point_description": "top of casing", + "release_status": "public", } # Validate using CreateWell schema @@ -249,13 +261,13 @@ def test_create_well_schema_accepts_well_completed_on(): def test_create_well_schema_well_completed_on_optional(): """Test that well_completed_on is optional (70% of wells don't have CompletionDate)""" well_data = { - 'location_id': 1, - 'name': 'TEST-WELL-002', - 'hole_depth': 100.0, - 'well_depth': 95.0, - 'measuring_point_height': 2.5, - 'measuring_point_description': 'top of casing', - 'release_status': 'public', + "location_id": 1, + "name": "TEST-WELL-002", + "hole_depth": 100.0, + "well_depth": 95.0, + "measuring_point_height": 2.5, + "measuring_point_description": "top of casing", + "release_status": "public", # No well_completed_on provided } @@ -263,7 +275,7 @@ def test_create_well_schema_well_completed_on_optional(): schema = CreateWell(**well_data) # Field should be optional - assert hasattr(schema, 'well_completed_on') + assert hasattr(schema, "well_completed_on") # Value should be None when not provided assert schema.well_completed_on is None @@ -271,14 +283,14 @@ def test_create_well_schema_well_completed_on_optional(): def test_create_well_with_very_old_completion_date(): """Test that very old completion dates (1936) are accepted""" well_data = { - 'location_id': 1, - 'name': 'HISTORICAL-WELL', - 'well_completed_on': datetime.date(1936, 1, 1), # Oldest well in dataset - 'hole_depth': 100.0, - 'well_depth': 95.0, - 'measuring_point_height': 2.5, - 'measuring_point_description': 'top of casing', - 'release_status': 'public', + "location_id": 1, + "name": "HISTORICAL-WELL", + "well_completed_on": datetime.date(1936, 1, 1), # Oldest well in dataset + "hole_depth": 100.0, + "well_depth": 95.0, + "measuring_point_height": 2.5, + "measuring_point_description": "top of casing", + "release_status": "public", } schema = CreateWell(**well_data) @@ -289,14 +301,14 @@ def test_create_well_with_very_old_completion_date(): def test_create_well_completed_on_is_date_not_datetime(): """Test that well_completed_on is Date type (not DateTime)""" well_data = { - 'location_id': 1, - 'name': 'TEST-WELL-003', - 'well_completed_on': datetime.date(2004, 8, 8), # Date, not DateTime - 'hole_depth': 100.0, - 'well_depth': 95.0, - 'measuring_point_height': 2.5, - 'measuring_point_description': 'top of casing', - 'release_status': 'public', + "location_id": 1, + "name": "TEST-WELL-003", + "well_completed_on": datetime.date(2004, 8, 8), # Date, not DateTime + "hole_depth": 100.0, + "well_depth": 95.0, + "measuring_point_height": 2.5, + "measuring_point_description": "top of casing", + "release_status": "public", } schema = CreateWell(**well_data) @@ -319,37 +331,41 @@ def test_location_legacy_date_coverage_statistics(): for i in range(100): if i < 9: # 9% have SiteDate - row = pd.Series({ - 'PointID': f'TEST-{i:03d}', - 'Easting': 350000 + i, - 'Northing': 3880000 + i, - 'DateCreated': '2014-04-03 00:00:00.000', - 'SiteDate': '2002-12-10 00:00:00.000', - 'Altitude': 1558.8, - 'AltDatum': 'NAVD88', - 'AltitudeMethod': 'GPS', - 'LocationId': i, - 'PublicRelease': True, - 'CoordinateNotes': None, - 'LocationNotes': None, - 'AltitudeAccuracy': None, - }) + row = pd.Series( + { + "PointID": f"TEST-{i:03d}", + "Easting": 350000 + i, + "Northing": 3880000 + i, + "DateCreated": "2014-04-03 00:00:00.000", + "SiteDate": "2002-12-10 00:00:00.000", + "Altitude": 1558.8, + "AltDatum": "NAVD88", + "AltitudeMethod": "GPS", + "LocationId": i, + "PublicRelease": True, + "CoordinateNotes": None, + "LocationNotes": None, + "AltitudeAccuracy": None, + } + ) else: # 91% don't have SiteDate - row = pd.Series({ - 'PointID': f'TEST-{i:03d}', - 'Easting': 350000 + i, - 'Northing': 3880000 + i, - 'DateCreated': '2014-04-03 00:00:00.000', - 'SiteDate': None, - 'Altitude': 1558.8, - 'AltDatum': 'NAVD88', - 'AltitudeMethod': 'GPS', - 'LocationId': i, - 'PublicRelease': True, - 'CoordinateNotes': None, - 'LocationNotes': None, - 'AltitudeAccuracy': None, - }) + row = pd.Series( + { + "PointID": f"TEST-{i:03d}", + "Easting": 350000 + i, + "Northing": 3880000 + i, + "DateCreated": "2014-04-03 00:00:00.000", + "SiteDate": None, + "Altitude": 1558.8, + "AltDatum": "NAVD88", + "AltitudeMethod": "GPS", + "LocationId": i, + "PublicRelease": True, + "CoordinateNotes": None, + "LocationNotes": None, + "AltitudeAccuracy": None, + } + ) elevations = {} location, _ = make_location(row, elevations) @@ -374,24 +390,24 @@ def test_well_completion_date_coverage_statistics(): for i in range(100): if i < 30: # 30% have CompletionDate well_data = { - 'location_id': 1, - 'name': f'WELL-{i:03d}', - 'well_completed_on': datetime.date(2004, 8, 8), - 'hole_depth': 100.0, - 'well_depth': 95.0, - 'measuring_point_height': 2.5, - 'measuring_point_description': 'top of casing', - 'release_status': 'public', + "location_id": 1, + "name": f"WELL-{i:03d}", + "well_completed_on": datetime.date(2004, 8, 8), + "hole_depth": 100.0, + "well_depth": 95.0, + "measuring_point_height": 2.5, + "measuring_point_description": "top of casing", + "release_status": "public", } else: # 70% don't have CompletionDate well_data = { - 'location_id': 1, - 'name': f'WELL-{i:03d}', - 'hole_depth': 100.0, - 'well_depth': 95.0, - 'measuring_point_height': 2.5, - 'measuring_point_description': 'top of casing', - 'release_status': 'public', + "location_id": 1, + "name": f"WELL-{i:03d}", + "hole_depth": 100.0, + "well_depth": 95.0, + "measuring_point_height": 2.5, + "measuring_point_description": "top of casing", + "release_status": "public", # No well_completed_on } From 306dabcd655621e1882b0e1a75406bc243783e75 Mon Sep 17 00:00:00 2001 From: kbighorse Date: Wed, 3 Dec 2025 06:41:26 +0000 Subject: [PATCH 15/37] Formatting changes --- tests/test_transfer_legacy_dates.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py index aa054740c..05dbe8dfe 100644 --- a/tests/test_transfer_legacy_dates.py +++ b/tests/test_transfer_legacy_dates.py @@ -35,7 +35,7 @@ # ============================================================================ -@patch('transfers.util.lexicon_mapper') +@patch("transfers.util.lexicon_mapper") def test_make_location_with_both_legacy_dates(mock_lexicon_mapper): """Test that make_location populates both legacy_date_created and legacy_site_date""" # Mock lexicon mapper to avoid GCS calls @@ -77,7 +77,7 @@ def test_make_location_with_both_legacy_dates(mock_lexicon_mapper): assert location.created_at is None -@patch('transfers.util.lexicon_mapper') +@patch("transfers.util.lexicon_mapper") def test_make_location_with_only_date_created(mock_lexicon_mapper): """Test that make_location handles locations with only DateCreated (no SiteDate)""" # Mock lexicon mapper to avoid GCS calls @@ -111,7 +111,7 @@ def test_make_location_with_only_date_created(mock_lexicon_mapper): assert location.legacy_site_date is None -@patch('transfers.util.lexicon_mapper') +@patch("transfers.util.lexicon_mapper") def test_make_location_with_site_date_later_than_date_created(mock_lexicon_mapper): """Test data anomaly: SiteDate is later than DateCreated (should still be accepted)""" # Mock lexicon mapper to avoid GCS calls @@ -143,7 +143,7 @@ def test_make_location_with_site_date_later_than_date_created(mock_lexicon_mappe assert location.legacy_site_date == datetime.date(2015, 6, 20) -@patch('transfers.util.lexicon_mapper') +@patch("transfers.util.lexicon_mapper") def test_make_location_with_very_old_site_date(mock_lexicon_mapper): """Test that very old SiteDates (1950s) are preserved correctly""" # Mock lexicon mapper to avoid GCS calls @@ -179,7 +179,7 @@ def test_make_location_with_very_old_site_date(mock_lexicon_mapper): assert time_gap == 19751 # Approximately 54 years -@patch('transfers.util.lexicon_mapper') +@patch("transfers.util.lexicon_mapper") def test_make_location_legacy_dates_are_date_not_datetime(mock_lexicon_mapper): """Test that legacy date fields are Date type (not DateTime)""" # Mock lexicon mapper to avoid GCS calls @@ -218,7 +218,7 @@ def test_make_location_legacy_dates_are_date_not_datetime(mock_lexicon_mapper): assert location.legacy_site_date == datetime.date(2002, 12, 10) -@patch('transfers.util.lexicon_mapper') +@patch("transfers.util.lexicon_mapper") def test_make_location_legacy_dates_independent_of_created_at(mock_lexicon_mapper): """Test that legacy dates don't affect created_at timestamp""" # Mock lexicon mapper to avoid GCS calls @@ -347,7 +347,7 @@ def test_create_well_completed_on_is_date_not_datetime(): # ============================================================================ -@patch('transfers.util.lexicon_mapper') +@patch("transfers.util.lexicon_mapper") def test_location_legacy_date_coverage_statistics(mock_lexicon_mapper): """Test that migration preserves expected percentages of legacy dates""" # Mock lexicon mapper to avoid GCS calls From d8167a7e94c8687f01e3092912077a3dde618f1c Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Tue, 2 Dec 2025 22:41:37 -0800 Subject: [PATCH 16/37] Resolve test failures --- tests/test_thing.py | 7 ++--- tests/test_transfer_legacy_dates.py | 44 +++++++++++++++++++++++------ 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/tests/test_thing.py b/tests/test_thing.py index eaa541668..94d00aa85 100644 --- a/tests/test_thing.py +++ b/tests/test_thing.py @@ -1207,7 +1207,7 @@ def test_create_well_without_completion_date(location): def test_spring_well_completed_on_is_null(location): - """Test that springs have null well_completed_on field""" + """Test that springs do NOT have well_completed_on field (it's well-specific)""" payload = { "name": "Test Spring", "location_id": location.id, @@ -1218,9 +1218,8 @@ def test_spring_well_completed_on_is_null(location): assert response.status_code == 201 data = response.json() - # Springs should have null well_completed_on - assert "well_completed_on" in data - assert data["well_completed_on"] is None + # Springs should NOT have well_completed_on field (only wells have completion dates) + assert "well_completed_on" not in data assert data["thing_type"] == "spring" # cleanup after test diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py index 30fbcd5ae..aa054740c 100644 --- a/tests/test_transfer_legacy_dates.py +++ b/tests/test_transfer_legacy_dates.py @@ -22,7 +22,7 @@ 3. Thing.well_completed_on is populated from CSV CompletionDate (if not null) """ import datetime -from unittest.mock import Mock, patch +from unittest.mock import Mock, patch, MagicMock import pandas as pd import pytest @@ -35,8 +35,12 @@ # ============================================================================ -def test_make_location_with_both_legacy_dates(): +@patch('transfers.util.lexicon_mapper') +def test_make_location_with_both_legacy_dates(mock_lexicon_mapper): """Test that make_location populates both legacy_date_created and legacy_site_date""" + # Mock lexicon mapper to avoid GCS calls + mock_lexicon_mapper.map_value.return_value = "GPS" + # Create a mock CSV row with both DateCreated and SiteDate row = pd.Series( { @@ -73,8 +77,12 @@ def test_make_location_with_both_legacy_dates(): assert location.created_at is None -def test_make_location_with_only_date_created(): +@patch('transfers.util.lexicon_mapper') +def test_make_location_with_only_date_created(mock_lexicon_mapper): """Test that make_location handles locations with only DateCreated (no SiteDate)""" + # Mock lexicon mapper to avoid GCS calls + mock_lexicon_mapper.map_value.return_value = "GPS" + row = pd.Series( { "PointID": "TEST-002", @@ -103,8 +111,12 @@ def test_make_location_with_only_date_created(): assert location.legacy_site_date is None -def test_make_location_with_site_date_later_than_date_created(): +@patch('transfers.util.lexicon_mapper') +def test_make_location_with_site_date_later_than_date_created(mock_lexicon_mapper): """Test data anomaly: SiteDate is later than DateCreated (should still be accepted)""" + # Mock lexicon mapper to avoid GCS calls + mock_lexicon_mapper.map_value.return_value = "GPS" + row = pd.Series( { "PointID": "TEST-003", @@ -131,8 +143,12 @@ def test_make_location_with_site_date_later_than_date_created(): assert location.legacy_site_date == datetime.date(2015, 6, 20) -def test_make_location_with_very_old_site_date(): +@patch('transfers.util.lexicon_mapper') +def test_make_location_with_very_old_site_date(mock_lexicon_mapper): """Test that very old SiteDates (1950s) are preserved correctly""" + # Mock lexicon mapper to avoid GCS calls + mock_lexicon_mapper.map_value.return_value = "GPS" + row = pd.Series( { "PointID": "SM-0227", # Real example from dataset @@ -163,8 +179,12 @@ def test_make_location_with_very_old_site_date(): assert time_gap == 19751 # Approximately 54 years -def test_make_location_legacy_dates_are_date_not_datetime(): +@patch('transfers.util.lexicon_mapper') +def test_make_location_legacy_dates_are_date_not_datetime(mock_lexicon_mapper): """Test that legacy date fields are Date type (not DateTime)""" + # Mock lexicon mapper to avoid GCS calls + mock_lexicon_mapper.map_value.return_value = "GPS" + row = pd.Series( { "PointID": "TEST-004", @@ -198,8 +218,12 @@ def test_make_location_legacy_dates_are_date_not_datetime(): assert location.legacy_site_date == datetime.date(2002, 12, 10) -def test_make_location_legacy_dates_independent_of_created_at(): +@patch('transfers.util.lexicon_mapper') +def test_make_location_legacy_dates_independent_of_created_at(mock_lexicon_mapper): """Test that legacy dates don't affect created_at timestamp""" + # Mock lexicon mapper to avoid GCS calls + mock_lexicon_mapper.map_value.return_value = "GPS" + row = pd.Series( { "PointID": "TEST-005", @@ -323,8 +347,12 @@ def test_create_well_completed_on_is_date_not_datetime(): # ============================================================================ -def test_location_legacy_date_coverage_statistics(): +@patch('transfers.util.lexicon_mapper') +def test_location_legacy_date_coverage_statistics(mock_lexicon_mapper): """Test that migration preserves expected percentages of legacy dates""" + # Mock lexicon mapper to avoid GCS calls + mock_lexicon_mapper.map_value.return_value = "GPS" + # Simulate 100 location records from CSV locations_created = 0 locations_with_site_date = 0 From de1e5cb916a2fe9e577b8a85e509cc1144ad95f7 Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Tue, 2 Dec 2025 22:55:25 -0800 Subject: [PATCH 17/37] Update column name in BDD tests --- .../steps/post_migration_legacy_data.py | 152 +++++++++--------- 1 file changed, 76 insertions(+), 76 deletions(-) diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py index e78afbde7..162358308 100644 --- a/tests/features/steps/post_migration_legacy_data.py +++ b/tests/features/steps/post_migration_legacy_data.py @@ -31,7 +31,7 @@ def parse_number(text): register_type(Number=parse_number) -def create_test_location(legacy_date_created=None, inventoried_on=None): +def create_test_location(legacy_date_created=None, legacy_site_date=None): """Helper to create a test location with legacy dates.""" with session_ctx() as session: location = Location( @@ -39,7 +39,7 @@ def create_test_location(legacy_date_created=None, inventoried_on=None): elevation=1558.8, release_status="public", legacy_date_created=legacy_date_created, - inventoried_on=inventoried_on, + legacy_site_date=legacy_site_date, ) session.add(location) session.commit() @@ -99,14 +99,14 @@ def step_given_location_with_table(context: Context): if data.get("legacy_date_created") and data["legacy_date_created"] != "null" else None ) - inventoried_on = ( - date.fromisoformat(data["inventoried_on"]) - if data.get("inventoried_on") and data["inventoried_on"] != "null" + legacy_site_date = ( + date.fromisoformat(data["legacy_site_date"]) + if data.get("legacy_site_date") and data["legacy_site_date"] != "null" else None ) location = create_test_location( - legacy_date_created=legacy_date_created, inventoried_on=inventoried_on + legacy_date_created=legacy_date_created, legacy_site_date=legacy_site_date ) context.test_location = location @@ -127,28 +127,28 @@ def step_given_multiple_locations(context: Context, count: int): ] for i in range(min(count, len(test_data))): - legacy_date, inventory_date = test_data[i] + legacy_date, site_date = test_data[i] location = create_test_location( legacy_date_created=date.fromisoformat(legacy_date), - inventoried_on=( - date.fromisoformat(inventory_date) if inventory_date else None + legacy_site_date=( + date.fromisoformat(site_date) if site_date else None ), ) context.test_locations.append(location) @given( - "locations exist with inventoried_on ranging from {start_year:Number} to {end_year:Number}" + "locations exist with legacy_site_date ranging from {start_year:Number} to {end_year:Number}" ) def step_given_locations_date_range(context: Context, start_year: int, end_year: int): - """Create locations with inventoried_on across a date range.""" + """Create locations with legacy_site_date across a date range.""" context.test_locations = [] years = [1954, 2002, 2003, 2010, 2015, 2020, 2024] for year in years: location = create_test_location( - legacy_date_created=date(year + 5, 1, 1), # Always 5 years after inventory - inventoried_on=date(year, 6, 15), + legacy_date_created=date(year + 5, 1, 1), # Always 5 years after site date + legacy_site_date=date(year, 6, 15), ) context.test_locations.append(location) @@ -166,7 +166,7 @@ def step_given_locations_with_specific_date( for i in range(count): location = create_test_location( legacy_date_created=target, - inventoried_on=date(2000 + i, 1, 1), # Vary the inventory dates + legacy_site_date=date(2000 + i, 1, 1), # Vary the site dates ) context.test_locations.append(location) @@ -261,16 +261,16 @@ def step_given_well_location_has_table(context: Context): if data.get("legacy_date_created") else None ) - inventoried_on = ( - date.fromisoformat(data.get("inventoried_on")) - if data.get("inventoried_on") + legacy_site_date = ( + date.fromisoformat(data.get("legacy_site_date")) + if data.get("legacy_site_date") else None ) with session_ctx() as session: location = session.get(Location, context.test_well_location.id) location.legacy_date_created = legacy_date_created - location.inventoried_on = inventoried_on + location.legacy_site_date = legacy_site_date session.commit() session.refresh(location) context.test_well_location = location @@ -282,12 +282,12 @@ def step_given_count_locations_migrated(context: Context, count: int): context.test_locations = [] for i in range(count): - # 9% have inventoried_on - has_inventory = i < count * 0.09 + # 9% have legacy_site_date + has_site_date = i < count * 0.09 location = create_test_location( legacy_date_created=date(2014, 1, i % 28 + 1), - inventoried_on=date(2003, 1, i % 28 + 1) if has_inventory else None, + legacy_site_date=date(2003, 1, i % 28 + 1) if has_site_date else None, ) context.test_locations.append(location) @@ -323,7 +323,7 @@ def step_given_completion_count(context: Context, count: int): def step_given_location_migrated_with_dates(context: Context): """Create location with both legacy dates.""" location = create_test_location( - legacy_date_created=date(2014, 4, 3), inventoried_on=date(2002, 12, 10) + legacy_date_created=date(2014, 4, 3), legacy_site_date=date(2002, 12, 10) ) context.test_location = location @@ -364,7 +364,7 @@ def step_when_get_all_locations(context: Context): @when( - 'I filter locations where inventoried_on is between "{start_date}" and "{end_date}"' + 'I filter locations where legacy_site_date is between "{start_date}" and "{end_date}"' ) def step_when_filter_locations(context: Context, start_date: str, end_date: str): """Filter locations by date range.""" @@ -375,7 +375,7 @@ def step_when_filter_locations(context: Context, start_date: str, end_date: str) locations = ( session.query(Location) - .filter(Location.inventoried_on >= start, Location.inventoried_on <= end) + .filter(Location.legacy_site_date >= start, Location.legacy_site_date <= end) .all() ) @@ -509,10 +509,10 @@ def step_then_legacy_date_created(context: Context, expected_date: str): assert actual == expected_date, f"Expected {expected_date}, got {actual}" -@then('the response should include inventoried_on as "{expected_date}"') -def step_then_inventoried_on(context: Context, expected_date: str): - """Assert inventoried_on matches.""" - actual = context.location_response.get("inventoried_on") +@then('the response should include legacy_site_date as "{expected_date}"') +def step_then_legacy_site_date(context: Context, expected_date: str): + """Assert legacy_site_date matches.""" + actual = context.location_response.get("legacy_site_date") assert actual == expected_date, f"Expected {expected_date}, got {actual}" @@ -520,15 +520,15 @@ def step_then_inventoried_on(context: Context, expected_date: str): def step_then_time_gap_years(context: Context, years: str): """Assert approximate year gap.""" legacy_str = context.location_response.get("legacy_date_created") - inventory_str = context.location_response.get("inventoried_on") + site_date_str = context.location_response.get("legacy_site_date") - if not legacy_str or not inventory_str: + if not legacy_str or not site_date_str: raise AssertionError("Missing date fields for gap calculation") legacy_date = date.fromisoformat(legacy_str) - inventory_date = date.fromisoformat(inventory_str) + site_date = date.fromisoformat(site_date_str) - gap_days = (legacy_date - inventory_date).days + gap_days = (legacy_date - site_date).days gap_years = gap_days / 365.25 expected_years = float(years) @@ -546,47 +546,47 @@ def step_then_all_have_legacy_field(context: Context): assert "legacy_date_created" in item, f"Location missing legacy_date_created" -@then("each location should have an inventoried_on field") -def step_then_all_have_inventory_field(context: Context): +@then("each location should have a legacy_site_date field") +def step_then_all_have_site_date_field(context: Context): """Assert all locations have the field.""" items = context.locations_response.get("items", []) for item in items: - assert "inventoried_on" in item, f"Location missing inventoried_on" + assert "legacy_site_date" in item, f"Location missing legacy_site_date" -@then("some locations should have null inventoried_on") -def step_then_some_null_inventory(context: Context): +@then("some locations should have null legacy_site_date") +def step_then_some_null_site_date(context: Context): """Assert some locations have null.""" items = context.locations_response.get("items", []) - null_count = sum(1 for item in items if item.get("inventoried_on") is None) - assert null_count > 0, "Expected at least one location with null inventoried_on" + null_count = sum(1 for item in items if item.get("legacy_site_date") is None) + assert null_count > 0, "Expected at least one location with null legacy_site_date" -@then("the response should only include locations inventoried in that decade") +@then("the response should only include locations with site date in that decade") def step_then_locations_in_decade(context: Context): """Assert filtered locations are in range.""" for loc in context.filtered_locations: assert ( - 2000 <= loc.inventoried_on.year <= 2010 - ), f"Location not in 2000-2010: {loc.inventoried_on}" + 2000 <= loc.legacy_site_date.year <= 2010 + ), f"Location not in 2000-2010: {loc.legacy_site_date}" -@then("locations inventoried before {year:Number} should not be included") +@then("locations with site date before {year:Number} should not be included") def step_then_locations_before_excluded(context: Context, year: int): """Assert no locations before year.""" for loc in context.filtered_locations: assert ( - loc.inventoried_on.year >= year - ), f"Location from {loc.inventoried_on.year} should not be included" + loc.legacy_site_date.year >= year + ), f"Location from {loc.legacy_site_date.year} should not be included" -@then("locations inventoried after {year:Number} should not be included") +@then("locations with site date after {year:Number} should not be included") def step_then_locations_after_excluded(context: Context, year: int): """Assert no locations after year.""" for loc in context.filtered_locations: assert ( - loc.inventoried_on.year <= year - ), f"Location from {loc.inventoried_on.year} should not be included" + loc.legacy_site_date.year <= year + ), f"Location from {loc.legacy_site_date.year} should not be included" @then("the response should include exactly {count:Number} locations") @@ -721,44 +721,44 @@ def step_then_location_has_legacy(context: Context, expected_date: str): assert actual == expected_date, f"Expected {expected_date}, got {actual}" -@then('the current_location should include inventoried_on as "{expected_date}"') -def step_then_location_has_inventory(context: Context, expected_date: str): - """Assert location has inventoried_on.""" +@then('the current_location should include legacy_site_date as "{expected_date}"') +def step_then_location_has_site_date(context: Context, expected_date: str): + """Assert location has legacy_site_date.""" current_location = context.well_response.get("current_location", {}) - actual = current_location.get("inventoried_on") + actual = current_location.get("legacy_site_date") assert actual == expected_date, f"Expected {expected_date}, got {actual}" @then( - "the temporal sequence should be: well_completed_on → inventoried_on → legacy_date_created" + "the temporal sequence should be: well_completed_on → legacy_site_date → legacy_date_created" ) def step_then_temporal_sequence(context: Context): """Assert temporal order.""" well_completed = context.retrieved_well.well_completed_on - inventoried = context.retrieved_location.inventoried_on + site_date = context.retrieved_location.legacy_site_date legacy_created = context.retrieved_location.legacy_date_created assert ( - well_completed < inventoried - ), "Well should be completed before site inventoried" + well_completed < site_date + ), "Well should be completed before site date" assert ( - inventoried < legacy_created - ), "Site should be inventoried before DB record created" + site_date < legacy_created + ), "Site date should be before DB record created" @then("the timeline should show: {year1:Number} → {year2:Number} → {year3:Number}") def step_then_timeline_years(context: Context, year1: int, year2: int, year3: int): """Assert specific years in sequence.""" assert context.retrieved_well.well_completed_on.year == year1 - assert context.retrieved_location.inventoried_on.year == year2 + assert context.retrieved_location.legacy_site_date.year == year2 assert context.retrieved_location.legacy_date_created.year == year3 -@then("{percentage:Number}% should have non-null inventoried_on") -def step_then_percentage_inventory(context: Context, percentage: int): - """Assert percentage with inventoried_on.""" +@then("{percentage:Number}% should have non-null legacy_site_date") +def step_then_percentage_site_date(context: Context, percentage: int): + """Assert percentage with legacy_site_date.""" total = len(context.queried_locations) - populated = sum(1 for loc in context.queried_locations if loc.inventoried_on) + populated = sum(1 for loc in context.queried_locations if loc.legacy_site_date) actual_pct = (populated / total) * 100 tolerance = 2 @@ -805,10 +805,10 @@ def step_then_has_legacy_date(context: Context): assert context.retrieved_location.legacy_date_created is not None -@then("it should have inventoried_on (original AMPAPI SiteDate)") -def step_then_has_inventory_date(context: Context): - """Assert inventoried_on exists.""" - assert context.retrieved_location.inventoried_on is not None +@then("it should have legacy_site_date (original AMPAPI SiteDate)") +def step_then_has_site_date(context: Context): + """Assert legacy_site_date exists.""" + assert context.retrieved_location.legacy_site_date is not None @then("all three timestamps should be independently queryable") @@ -816,7 +816,7 @@ def step_then_all_queryable(context: Context): """Assert all fields are queryable.""" assert hasattr(context.retrieved_location, "created_at") assert hasattr(context.retrieved_location, "legacy_date_created") - assert hasattr(context.retrieved_location, "inventoried_on") + assert hasattr(context.retrieved_location, "legacy_site_date") @then("created_at should be a recent timestamp") @@ -843,10 +843,10 @@ def step_then_legacy_is(context: Context, expected_date: str): assert actual == expected, f"Expected {expected}, got {actual}" -@then('inventoried_on should be "{expected_date}"') -def step_then_inventory_is(context: Context, expected_date: str): - """Assert inventoried_on value.""" - actual = context.retrieved_location.inventoried_on +@then('legacy_site_date should be "{expected_date}"') +def step_then_site_date_is(context: Context, expected_date: str): + """Assert legacy_site_date value.""" + actual = context.retrieved_location.legacy_site_date expected = date.fromisoformat(expected_date) assert actual == expected, f"Expected {expected}, got {actual}" @@ -880,10 +880,10 @@ def step_then_no_validation_errors(context: Context): pass -@then("inventoried_on should be null") -def step_then_inventory_null(context: Context): - """Assert inventoried_on is null.""" - assert context.retrieved_location.inventoried_on is None +@then("legacy_site_date should be null") +def step_then_site_date_null(context: Context): + """Assert legacy_site_date is null.""" + assert context.retrieved_location.legacy_site_date is None @then("the well should still be valid") From a9293bb71260a303a609212f973e9e9bb3451995 Mon Sep 17 00:00:00 2001 From: kbighorse Date: Wed, 3 Dec 2025 06:55:26 +0000 Subject: [PATCH 18/37] Formatting changes --- .../features/steps/post_migration_legacy_data.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py index 162358308..b36dfa461 100644 --- a/tests/features/steps/post_migration_legacy_data.py +++ b/tests/features/steps/post_migration_legacy_data.py @@ -130,9 +130,7 @@ def step_given_multiple_locations(context: Context, count: int): legacy_date, site_date = test_data[i] location = create_test_location( legacy_date_created=date.fromisoformat(legacy_date), - legacy_site_date=( - date.fromisoformat(site_date) if site_date else None - ), + legacy_site_date=(date.fromisoformat(site_date) if site_date else None), ) context.test_locations.append(location) @@ -375,7 +373,9 @@ def step_when_filter_locations(context: Context, start_date: str, end_date: str) locations = ( session.query(Location) - .filter(Location.legacy_site_date >= start, Location.legacy_site_date <= end) + .filter( + Location.legacy_site_date >= start, Location.legacy_site_date <= end + ) .all() ) @@ -738,12 +738,8 @@ def step_then_temporal_sequence(context: Context): site_date = context.retrieved_location.legacy_site_date legacy_created = context.retrieved_location.legacy_date_created - assert ( - well_completed < site_date - ), "Well should be completed before site date" - assert ( - site_date < legacy_created - ), "Site date should be before DB record created" + assert well_completed < site_date, "Well should be completed before site date" + assert site_date < legacy_created, "Site date should be before DB record created" @then("the timeline should show: {year1:Number} → {year2:Number} → {year3:Number}") From dc7a31b93ddf564af0d0905a788108087cc32e93 Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 3 Dec 2025 00:30:21 -0800 Subject: [PATCH 19/37] Remove `well_completed_on` --- db/thing.py | 5 - schemas/thing.py | 6 - ...st-migration-legacy-data-retrieval.feature | 78 --- .../steps/post_migration_legacy_data.py | 445 ------------------ tests/test_thing.py | 144 ------ tests/test_transfer_legacy_dates.py | 125 ----- transfers/well_transfer.py | 15 - 7 files changed, 818 deletions(-) diff --git a/db/thing.py b/db/thing.py index b42b70d56..9f30d08e2 100644 --- a/db/thing.py +++ b/db/thing.py @@ -115,11 +115,6 @@ class Thing( ) well_construction_notes: Mapped[str] = mapped_column(Text, nullable=True) - well_completed_on: Mapped[date] = mapped_column( - Date, - nullable=True, - comment="Date when well construction/drilling was completed (from AMPAPI CompletionDate, active field for new wells)", - ) # Spring-related columns spring_type: Mapped[str] = lexicon_term( diff --git a/schemas/thing.py b/schemas/thing.py index 6de5908cc..692b78459 100644 --- a/schemas/thing.py +++ b/schemas/thing.py @@ -131,8 +131,6 @@ class CreateWell(CreateBaseThing, ValidateWell): ) measuring_point_description: str | None notes: list[CreateNote] | None = None - # Active field: users can set this for new wells - well_completed_on: date | None = None class CreateSpring(CreateBaseThing): @@ -227,8 +225,6 @@ class WellResponse(BaseThingResponse): measuring_point_height: float measuring_point_height_unit: str = "ft" measuring_point_description: str | None - # Active field: completion date for wells - well_completed_on: date | None = None water_notes: list[NoteResponse] | None = None measuring_notes: list[NoteResponse] | None = None @@ -334,8 +330,6 @@ class UpdateWell(UpdateThing, ValidateWell): well_casing_diameter: float | None = None # in inches well_casing_depth: float | None = None # in feet well_casing_materials: list[str] | None = None - # Active field: users can update completion date - well_completed_on: date | None = None class UpdateSpring(UpdateThing): diff --git a/tests/features/post-migration-legacy-data-retrieval.feature b/tests/features/post-migration-legacy-data-retrieval.feature index fa4663e1b..b5329ad9c 100644 --- a/tests/features/post-migration-legacy-data-retrieval.feature +++ b/tests/features/post-migration-legacy-data-retrieval.feature @@ -51,65 +51,6 @@ Feature: Post-Migration Legacy Data Retrieval Then the response should include exactly 3 locations And all should have legacy_date_created "2014-04-03" - # Well Completion Date Lookups - - Scenario: Retrieve well with completion date via API - Given a well exists with well_completed_on "2004-08-08" - When I retrieve that well via the API - Then the response should include well_completed_on as "2004-08-08" - And the well age should be calculable - - Scenario: Retrieve old well from early 1900s - Given a well exists with well_completed_on "1936-01-01" - When I retrieve that well via the API - Then the response should include well_completed_on as "1936-01-01" - And the well should be over 88 years old - - Scenario: List all wells includes completion date field - Given 10 wells exist with various completion dates - And 3 of those wells have null well_completed_on - When I GET /thing/water-well to list all wells - Then each well should have a well_completed_on field - And 70% of wells should have well_completed_on populated - - Scenario: Filter wells by completion date range - Given wells exist with completion dates from 1936 to 2024 - When I filter wells where well_completed_on is between "2000-01-01" and "2010-12-31" - Then the response should only include wells completed in that decade - And wells from 1936 should not be included - And wells from 2020 should not be included - - Scenario: Sort wells by completion date (oldest first) - Given wells exist with completion dates: 1936, 1965, 2004, 2020 - And some wells have null well_completed_on - When I GET /thing/water-well sorted by well_completed_on ascending - Then the first well should be from 1936 - And the last well with a date should be from 2020 - And wells without completion dates should appear last - - # Combined Queries - Location + Well Legacy Dates - - Scenario: Retrieve well with location showing all legacy dates - Given a well exists with well_completed_on "2004-08-08" - And that well's location has: - | field | value | - | legacy_date_created | 2014-04-03 | - | legacy_site_date | 2002-12-10 | - When I retrieve the well via the API - Then the well should have well_completed_on as "2004-08-08" - And the current_location should include legacy_date_created as "2014-04-03" - And the current_location should include legacy_site_date as "2002-12-10" - - Scenario: Timeline reconstruction - well completed before site inventoried - Given a well exists with well_completed_on "1995-06-15" - And that well's location has: - | field | value | - | legacy_site_date | 2003-12-10 | - | legacy_date_created | 2014-04-03 | - When I retrieve the well and its location - Then the temporal sequence should be: well_completed_on → legacy_site_date → legacy_date_created - And the timeline should show: 1995 → 2003 → 2014 - # Data Quality Validation Scenario: Verify migration preserved expected percentage of legacy dates @@ -119,12 +60,6 @@ Feature: Post-Migration Legacy Data Retrieval Then 9% should have non-null legacy_site_date And 100% should have non-null legacy_date_created - Scenario: Verify well completion date coverage matches expectation - Given 100 wells were migrated - And 30 of them had non-null CompletionDate in AMPAPI - When I query the migrated wells - Then 30% should have non-null well_completed_on - # Audit Trail Verification Scenario: Legacy dates preserved alongside audit timestamps @@ -149,13 +84,6 @@ Feature: Post-Migration Legacy Data Retrieval And legacy_site_date should be "2015-06-20" And the system should accept this without error - Scenario: Spring does not use well_completed_on field - Given a thing of type "spring" exists - When I retrieve that spring - Then well_completed_on should be null - And the field should exist in the response schema - And it should not cause validation errors - Scenario: Location with only legacy_date_created (no legacy_site_date) Given a location exists with: | field | value | @@ -164,9 +92,3 @@ Feature: Post-Migration Legacy Data Retrieval When I retrieve that location Then legacy_date_created should be "2014-10-17" And legacy_site_date should be null - - Scenario: Well without completion date - Given a well exists with well_completed_on null - When I retrieve that well - Then well_completed_on should be null - And the well should still be valid diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py index b36dfa461..25e932159 100644 --- a/tests/features/steps/post_migration_legacy_data.py +++ b/tests/features/steps/post_migration_legacy_data.py @@ -47,42 +47,6 @@ def create_test_location(legacy_date_created=None, legacy_site_date=None): return location -def create_test_well(well_completed_on=None, thing_type="water well"): - """Helper to create a test well with completion date.""" - with session_ctx() as session: - # Create location - location = Location( - point="POINT(-106.607784 35.118924)", - elevation=1558.8, - release_status="public", - ) - session.add(location) - session.commit() - - # Create thing - thing = Thing( - name=f"Test-{thing_type}-{datetime.now().timestamp()}", - first_visit_date="2023-03-03", - thing_type=thing_type, - release_status="public", - well_depth=100.0 if thing_type == "water well" else None, - hole_depth=110.0 if thing_type == "water well" else None, - well_completed_on=well_completed_on, - ) - session.add(thing) - session.commit() - - # Associate - assoc = LocationThingAssociation(location=location, thing=thing) - assoc.effective_start = "2000-01-01T00:00:00Z" - session.add(assoc) - session.commit() - - session.refresh(thing) - session.refresh(location) - return thing, location - - @given("the AMPAPI data has been migrated to the database") def step_given_data_migrated(context: Context): """Assumption that migration has occurred.""" @@ -169,111 +133,6 @@ def step_given_locations_with_specific_date( context.test_locations.append(location) -@given('a well exists with well_completed_on "{completion_date}"') -def step_given_well_with_completion(context: Context, completion_date: str): - """Create well with completion date.""" - completed_on = ( - date.fromisoformat(completion_date) if completion_date != "null" else None - ) - - thing, location = create_test_well(well_completed_on=completed_on) - - context.test_well = thing - context.test_well_id = thing.id - context.test_well_location = location - - -@given("{count:Number} wells exist with various completion dates") -def step_given_multiple_wells(context: Context, count: int): - """Create multiple wells with various completion dates.""" - context.test_wells = [] - - completion_dates = [ - "1936-01-01", - "1965-06-15", - "2004-08-08", - "2020-05-15", - None, # No completion date - None, - None, - ] - - for i in range(min(count, len(completion_dates))): - completed_on = ( - date.fromisoformat(completion_dates[i]) if completion_dates[i] else None - ) - thing, location = create_test_well(well_completed_on=completed_on) - context.test_wells.append(thing) - - -@given("{null_count:Number} of those wells have null well_completed_on") -def step_given_wells_with_null_completion(context: Context, null_count: int): - """Verify expected number of nulls (declarative - already created).""" - # Wells were created in previous step with nulls - pass - - -@given( - "wells exist with completion dates from {start_year:Number} to {end_year:Number}" -) -def step_given_wells_date_range(context: Context, start_year: int, end_year: int): - """Create wells with completion dates across range.""" - context.test_wells = [] - - years = [1936, 1965, 2004, 2010, 2020, 2024] - for year in years: - thing, location = create_test_well(well_completed_on=date(year, 6, 15)) - context.test_wells.append(thing) - - -@given("wells exist with completion dates: {years}") -def step_given_wells_specific_years(context: Context, years: str): - """Create wells with specific completion years.""" - context.test_wells = [] - - year_list = [int(y.strip()) for y in years.split(",")] - - for year in year_list: - thing, location = create_test_well(well_completed_on=date(year, 6, 15)) - context.test_wells.append(thing) - - -@given("some wells have null well_completed_on") -def step_given_some_wells_null(context: Context): - """Add wells without completion dates.""" - if not hasattr(context, "test_wells"): - context.test_wells = [] - - for i in range(2): - thing, location = create_test_well(well_completed_on=None) - context.test_wells.append(thing) - - -@given("that well's location has") -def step_given_well_location_has_table(context: Context): - """Set legacy dates on the well's location.""" - data = {row["field"]: row["value"] for row in context.table} - - legacy_date_created = ( - date.fromisoformat(data.get("legacy_date_created")) - if data.get("legacy_date_created") - else None - ) - legacy_site_date = ( - date.fromisoformat(data.get("legacy_site_date")) - if data.get("legacy_site_date") - else None - ) - - with session_ctx() as session: - location = session.get(Location, context.test_well_location.id) - location.legacy_date_created = legacy_date_created - location.legacy_site_date = legacy_site_date - session.commit() - session.refresh(location) - context.test_well_location = location - - @given("{count:Number} locations were migrated") def step_given_count_locations_migrated(context: Context, count: int): """Create specified number of test locations.""" @@ -296,27 +155,6 @@ def step_given_sitedate_count(context: Context, count: int): pass -@given("{count:Number} wells were migrated") -def step_given_count_wells_migrated(context: Context, count: int): - """Create specified number of test wells.""" - context.test_wells = [] - - for i in range(count): - # 30% have completion dates - has_completion = i < count * 0.30 - - thing, location = create_test_well( - well_completed_on=date(2000 + (i % 24), 1, 1) if has_completion else None - ) - context.test_wells.append(thing) - - -@given("{count:Number} of them had non-null CompletionDate in AMPAPI") -def step_given_completion_count(context: Context, count: int): - """Declarative - data created in previous step.""" - pass - - @given("a location was migrated with legacy dates") def step_given_location_migrated_with_dates(context: Context): """Create location with both legacy dates.""" @@ -326,22 +164,6 @@ def step_given_location_migrated_with_dates(context: Context): context.test_location = location -@given('a thing of type "{thing_type}" exists') -def step_given_thing_of_type(context: Context, thing_type: str): - """Create a thing of specified type.""" - thing, location = create_test_well(well_completed_on=None, thing_type=thing_type) - context.test_thing = thing - context.test_thing_id = thing.id - - -@given("a well exists with well_completed_on null") -def step_given_well_null_completion(context: Context): - """Create well without completion date.""" - thing, location = create_test_well(well_completed_on=None) - context.test_well = thing - context.test_well_id = thing.id - - # WHEN steps @@ -393,69 +215,6 @@ def step_when_query_by_legacy_date(context: Context, target_date: str): context.queried_locations = locations -@when("I retrieve that well via the API") -def step_when_retrieve_well_api(context: Context): - """Retrieve well via GET API.""" - response = context.client.get(f"/thing/water-well/{context.test_well_id}") - assert response.status_code == 200 - context.well_response = response.json() - - -@when("I GET /thing/water-well to list all wells") -def step_when_get_all_wells(context: Context): - """Get all wells.""" - response = context.client.get("/thing/water-well") - assert response.status_code == 200 - context.wells_response = response.json() - - -@when( - 'I filter wells where well_completed_on is between "{start_date}" and "{end_date}"' -) -def step_when_filter_wells(context: Context, start_date: str, end_date: str): - """Filter wells by completion date range.""" - with session_ctx() as session: - start = date.fromisoformat(start_date) - end = date.fromisoformat(end_date) - - wells = ( - session.query(Thing) - .filter( - Thing.thing_type == "water well", - Thing.well_completed_on >= start, - Thing.well_completed_on <= end, - ) - .all() - ) - - context.filtered_wells = wells - - -@when("I GET /thing/water-well sorted by well_completed_on ascending") -def step_when_get_wells_sorted(context: Context): - """Get wells sorted by completion date.""" - with session_ctx() as session: - wells = ( - session.query(Thing) - .filter(Thing.thing_type == "water well") - .order_by(Thing.well_completed_on.asc().nullslast()) - .all() - ) - - context.sorted_wells = wells - - -@when("I retrieve the well and its location") -def step_when_retrieve_well_and_location(context: Context): - """Retrieve well with location.""" - with session_ctx() as session: - well = session.get(Thing, context.test_well.id) - location = session.get(Location, context.test_well_location.id) - - context.retrieved_well = well - context.retrieved_location = location - - @when("I query the migrated locations") def step_when_query_migrated_locations(context: Context): """Query all test locations.""" @@ -466,15 +225,6 @@ def step_when_query_migrated_locations(context: Context): context.queried_locations = locations -@when("I query the migrated wells") -def step_when_query_migrated_wells(context: Context): - """Query all test wells.""" - with session_ctx() as session: - well_ids = [well.id for well in context.test_wells] - wells = session.query(Thing).filter(Thing.id.in_(well_ids)).all() - context.queried_wells = wells - - @when("I retrieve that location") def step_when_retrieve_location(context: Context): """Retrieve location by ID.""" @@ -483,22 +233,6 @@ def step_when_retrieve_location(context: Context): context.retrieved_location = location -@when("I retrieve that spring") -def step_when_retrieve_spring(context: Context): - """Retrieve spring/thing by ID.""" - with session_ctx() as session: - thing = session.get(Thing, context.test_thing.id) - context.retrieved_thing = thing - - -@when("I retrieve that well") -def step_when_retrieve_well(context: Context): - """Retrieve well by ID.""" - with session_ctx() as session: - well = session.get(Thing, context.test_well.id) - context.retrieved_well = well - - # THEN steps @@ -606,150 +340,6 @@ def step_then_all_have_date(context: Context, expected_date: str): ), f"Location has {loc.legacy_date_created}, expected {expected}" -@then('the response should include well_completed_on as "{expected_date}"') -def step_then_well_completed_on(context: Context, expected_date: str): - """Assert well_completed_on matches.""" - actual = context.well_response.get("well_completed_on") - assert actual == expected_date, f"Expected {expected_date}, got {actual}" - - -@then("the well age should be calculable") -def step_then_age_calculable(context: Context): - """Assert age can be calculated.""" - completion_str = context.well_response.get("well_completed_on") - assert completion_str is not None, "Cannot calculate age without completion date" - - completed = date.fromisoformat(completion_str) - today = date.today() - age_years = (today - completed).days / 365.25 - assert age_years >= 0, "Age cannot be negative" - - -@then("the well should be over {min_age:Number} years old") -def step_then_well_over_age(context: Context, min_age: int): - """Assert well age exceeds minimum.""" - completion_str = context.well_response.get("well_completed_on") - completed = date.fromisoformat(completion_str) - today = date.today() - age_years = (today - completed).days / 365.25 - - assert age_years >= min_age, f"Expected over {min_age} years, got {age_years:.1f}" - - -@then("each well should have a well_completed_on field") -def step_then_all_wells_have_field(context: Context): - """Assert all wells have the field.""" - items = context.wells_response.get("items", []) - for item in items: - assert "well_completed_on" in item, f"Well missing well_completed_on" - - -@then("{percentage:Number}% of wells should have well_completed_on populated") -def step_then_percentage_populated(context: Context, percentage: int): - """Assert approximate percentage.""" - items = context.wells_response.get("items", []) - total = len(items) - if total == 0: - return - - populated = sum(1 for item in items if item.get("well_completed_on") is not None) - actual_pct = (populated / total) * 100 - - tolerance = 10 - assert ( - abs(actual_pct - percentage) < tolerance - ), f"Expected ~{percentage}%, got {actual_pct:.1f}%" - - -@then("the response should only include wells completed in that decade") -def step_then_wells_in_decade(context: Context): - """Assert filtered wells in range.""" - for well in context.filtered_wells: - assert 2000 <= well.well_completed_on.year <= 2010 - - -@then("wells from {year:Number} should not be included") -def step_then_wells_year_excluded(context: Context, year: int): - """Assert wells from year excluded.""" - for well in context.filtered_wells: - assert well.well_completed_on.year != year - - -@then("the first well should be from {year:Number}") -def step_then_first_well_year(context: Context, year: int): - """Assert first well year.""" - if context.sorted_wells and context.sorted_wells[0].well_completed_on: - actual_year = context.sorted_wells[0].well_completed_on.year - assert actual_year == year, f"Expected {year}, got {actual_year}" - - -@then("the last well with a date should be from {year:Number}") -def step_then_last_well_year(context: Context, year: int): - """Assert last non-null well year.""" - non_null = [w for w in context.sorted_wells if w.well_completed_on] - if non_null: - actual_year = non_null[-1].well_completed_on.year - assert actual_year == year, f"Expected {year}, got {actual_year}" - - -@then("wells without completion dates should appear last") -def step_then_nulls_last(context: Context): - """Assert nulls at end.""" - first_null_idx = next( - (i for i, w in enumerate(context.sorted_wells) if w.well_completed_on is None), - len(context.sorted_wells), - ) - - for well in context.sorted_wells[first_null_idx:]: - assert ( - well.well_completed_on is None - ), "Found non-null after null in sorted list" - - -@then('the well should have well_completed_on as "{expected_date}"') -def step_then_well_has_completion(context: Context, expected_date: str): - """Assert well has completion date.""" - actual = context.well_response.get("well_completed_on") - assert actual == expected_date, f"Expected {expected_date}, got {actual}" - - -@then('the current_location should include legacy_date_created as "{expected_date}"') -def step_then_location_has_legacy(context: Context, expected_date: str): - """Assert location has legacy_date_created.""" - current_location = context.well_response.get("current_location", {}) - actual = current_location.get("legacy_date_created") - assert actual == expected_date, f"Expected {expected_date}, got {actual}" - - -@then('the current_location should include legacy_site_date as "{expected_date}"') -def step_then_location_has_site_date(context: Context, expected_date: str): - """Assert location has legacy_site_date.""" - current_location = context.well_response.get("current_location", {}) - actual = current_location.get("legacy_site_date") - assert actual == expected_date, f"Expected {expected_date}, got {actual}" - - -@then( - "the temporal sequence should be: well_completed_on → legacy_site_date → legacy_date_created" -) -def step_then_temporal_sequence(context: Context): - """Assert temporal order.""" - well_completed = context.retrieved_well.well_completed_on - site_date = context.retrieved_location.legacy_site_date - legacy_created = context.retrieved_location.legacy_date_created - - assert well_completed < site_date, "Well should be completed before site date" - assert site_date < legacy_created, "Site date should be before DB record created" - - -@then("the timeline should show: {year1:Number} → {year2:Number} → {year3:Number}") -def step_then_timeline_years(context: Context, year1: int, year2: int, year3: int): - """Assert specific years in sequence.""" - assert context.retrieved_well.well_completed_on.year == year1 - assert context.retrieved_location.legacy_site_date.year == year2 - assert context.retrieved_location.legacy_date_created.year == year3 - - @then("{percentage:Number}% should have non-null legacy_site_date") def step_then_percentage_site_date(context: Context, percentage: int): """Assert percentage with legacy_site_date.""" @@ -776,19 +366,6 @@ def step_then_percentage_legacy(context: Context, percentage: int): ), f"Expected ~{percentage}%, got {actual_pct:.1f}%" -@then("{percentage:Number}% should have non-null well_completed_on") -def step_then_percentage_completion(context: Context, percentage: int): - """Assert percentage with well_completed_on.""" - total = len(context.queried_wells) - populated = sum(1 for well in context.queried_wells if well.well_completed_on) - actual_pct = (populated / total) * 100 - - tolerance = 2 - assert ( - abs(actual_pct - percentage) < tolerance - ), f"Expected ~{percentage}%, got {actual_pct:.1f}%" - - @then("it should have created_at (new system timestamp from migration)") def step_then_has_created_at(context: Context): """Assert created_at exists.""" @@ -854,28 +431,6 @@ def step_then_no_error(context: Context): pass -@then("well_completed_on should be null") -def step_then_completion_null(context: Context): - """Assert well_completed_on is null.""" - if hasattr(context, "retrieved_thing"): - assert context.retrieved_thing.well_completed_on is None - elif hasattr(context, "retrieved_well"): - assert context.retrieved_well.well_completed_on is None - - -@then("the field should exist in the response schema") -def step_then_field_exists_in_schema(context: Context): - """Assert field exists in schema.""" - if hasattr(context, "retrieved_thing"): - assert hasattr(context.retrieved_thing, "well_completed_on") - - -@then("it should not cause validation errors") -def step_then_no_validation_errors(context: Context): - """Assert no validation errors.""" - pass - - @then("legacy_site_date should be null") def step_then_site_date_null(context: Context): """Assert legacy_site_date is null.""" diff --git a/tests/test_thing.py b/tests/test_thing.py index 94d00aa85..3792b4302 100644 --- a/tests/test_thing.py +++ b/tests/test_thing.py @@ -1132,148 +1132,4 @@ def test_delete_thing_id_link_404_not_found(second_thing_id_link): assert data["detail"] == f"ThingIdLink with ID {bad_id} not found." -# ============= Well completion date tests ==================================== - - -def test_create_well_with_completion_date(location): - """Test creating a well with well_completed_on (active field - users can set this)""" - payload = { - "name": "Test Well", - "location_id": location.id, - "measuring_point_height": 2.5, - "measuring_point_description": "top of casing", - "release_status": "draft", - "well_completed_on": "2004-08-08", - } - response = client.post("/thing/water-well", json=payload) - - assert response.status_code == 201 - data = response.json() - assert "id" in data - assert data["well_completed_on"] == "2004-08-08" - - # cleanup after test - from db import Thing - from tests import cleanup_post_test - - cleanup_post_test(Thing, data["id"]) - - -def test_create_well_with_old_completion_date(location): - """Test creating a well with very old completion date (e.g., for documenting historical wells)""" - payload = { - "name": "Historical Well", - "location_id": location.id, - "measuring_point_height": 2.5, - "measuring_point_description": "top of casing", - "release_status": "draft", - "well_completed_on": "1936-01-01", - } - response = client.post("/thing/water-well", json=payload) - - assert response.status_code == 201 - data = response.json() - assert data["well_completed_on"] == "1936-01-01" - - # cleanup after test - from db import Thing - from tests import cleanup_post_test - - cleanup_post_test(Thing, data["id"]) - - -def test_create_well_without_completion_date(location): - """Test that well_completed_on is optional (nullable) when creating a well""" - payload = { - "name": "Test Well Without Date", - "location_id": location.id, - "measuring_point_height": 2.5, - "measuring_point_description": "top of casing", - "release_status": "draft", - } - response = client.post("/thing/water-well", json=payload) - - assert response.status_code == 201 - data = response.json() - # Field should be present but null - assert "well_completed_on" in data - assert data["well_completed_on"] is None - - # cleanup after test - from db import Thing - from tests import cleanup_post_test - - cleanup_post_test(Thing, data["id"]) - - -def test_spring_well_completed_on_is_null(location): - """Test that springs do NOT have well_completed_on field (it's well-specific)""" - payload = { - "name": "Test Spring", - "location_id": location.id, - "spring_type": "Artesian", - "release_status": "draft", - } - response = client.post("/thing/spring", json=payload) - - assert response.status_code == 201 - data = response.json() - # Springs should NOT have well_completed_on field (only wells have completion dates) - assert "well_completed_on" not in data - assert data["thing_type"] == "spring" - - # cleanup after test - from db import Thing - from tests import cleanup_post_test - - cleanup_post_test(Thing, data["id"]) - - -def test_well_with_completion_date_and_location_legacy_fields(location): - """Test combined scenario: new well with completion date + location legacy fields (null for new locations)""" - # Create a new location (without legacy fields - they're migration-only) - from tests import cleanup_post_test - - location_payload = { - "point": "POINT (-106.607784 35.118924)", - "elevation": 1558.8, - "release_status": "draft", - } - location_response = client.post("/location", json=location_payload) - assert location_response.status_code == 201 - location_id = location_response.json()["id"] - - # Create well with completion date at that location - well_payload = { - "name": "Test Well", - "location_id": location_id, - "measuring_point_height": 2.5, - "measuring_point_description": "top of casing", - "release_status": "draft", - "well_completed_on": "2020-06-15", # User can set this for new wells - } - well_response = client.post("/thing/water-well", json=well_payload) - assert well_response.status_code == 201 - well_id = well_response.json()["id"] - - # Retrieve the well - get_response = client.get(f"/thing/water-well/{well_id}") - assert get_response.status_code == 200 - data = get_response.json() - - # well_completed_on is set (active field) - assert data["well_completed_on"] == "2020-06-15" - - # Location legacy fields are null (migration-only fields) - # current_location is a GeoJSON Feature, so fields are under properties - assert data["current_location"]["properties"]["legacy_date_created"] is None - assert data["current_location"]["properties"]["legacy_site_date"] is None - - # cleanup after test - from db import Thing, Location - - cleanup_post_test(Thing, well_id) - cleanup_post_test(Location, location_id) - - # ============= EOF ============================================= diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py index 05dbe8dfe..795820ec8 100644 --- a/tests/test_transfer_legacy_dates.py +++ b/tests/test_transfer_legacy_dates.py @@ -19,7 +19,6 @@ These tests verify that: 1. Location.legacy_date_created is populated from CSV DateCreated 2. Location.legacy_site_date is populated from CSV SiteDate (if not null) -3. Thing.well_completed_on is populated from CSV CompletionDate (if not null) """ import datetime from unittest.mock import Mock, patch, MagicMock @@ -27,7 +26,6 @@ import pytest from transfers.util import make_location -from schemas.thing import CreateWell # ============================================================================ @@ -257,91 +255,6 @@ def test_make_location_legacy_dates_independent_of_created_at(mock_lexicon_mappe assert location.legacy_site_date is not None -# ============================================================================ -# WELL COMPLETION DATE TESTS -# ============================================================================ - - -def test_create_well_schema_accepts_well_completed_on(): - """Test that CreateWell schema accepts well_completed_on from CSV CompletionDate""" - # Simulate data from CSV transfer - well_data = { - "location_id": 1, - "name": "TEST-WELL-001", - "well_completed_on": datetime.date(2004, 8, 8), # From CSV CompletionDate - "hole_depth": 100.0, - "well_depth": 95.0, - "measuring_point_height": 2.5, - "measuring_point_description": "top of casing", - "release_status": "public", - } - - # Validate using CreateWell schema - schema = CreateWell(**well_data) - - assert schema.well_completed_on == datetime.date(2004, 8, 8) - - -def test_create_well_schema_well_completed_on_optional(): - """Test that well_completed_on is optional (70% of wells don't have CompletionDate)""" - well_data = { - "location_id": 1, - "name": "TEST-WELL-002", - "hole_depth": 100.0, - "well_depth": 95.0, - "measuring_point_height": 2.5, - "measuring_point_description": "top of casing", - "release_status": "public", - # No well_completed_on provided - } - - # Should not raise validation error - schema = CreateWell(**well_data) - - # Field should be optional - assert hasattr(schema, "well_completed_on") - # Value should be None when not provided - assert schema.well_completed_on is None - - -def test_create_well_with_very_old_completion_date(): - """Test that very old completion dates (1936) are accepted""" - well_data = { - "location_id": 1, - "name": "HISTORICAL-WELL", - "well_completed_on": datetime.date(1936, 1, 1), # Oldest well in dataset - "hole_depth": 100.0, - "well_depth": 95.0, - "measuring_point_height": 2.5, - "measuring_point_description": "top of casing", - "release_status": "public", - } - - schema = CreateWell(**well_data) - - assert schema.well_completed_on == datetime.date(1936, 1, 1) - - -def test_create_well_completed_on_is_date_not_datetime(): - """Test that well_completed_on is Date type (not DateTime)""" - well_data = { - "location_id": 1, - "name": "TEST-WELL-003", - "well_completed_on": datetime.date(2004, 8, 8), # Date, not DateTime - "hole_depth": 100.0, - "well_depth": 95.0, - "measuring_point_height": 2.5, - "measuring_point_description": "top of casing", - "release_status": "public", - } - - schema = CreateWell(**well_data) - - # Should accept date type - assert isinstance(schema.well_completed_on, datetime.date) - assert not isinstance(schema.well_completed_on, datetime.datetime) - - # ============================================================================ # DATA COVERAGE TESTS (Simulating Migration Statistics) # ============================================================================ @@ -410,44 +323,6 @@ def test_location_legacy_date_coverage_statistics(mock_lexicon_mapper): assert locations_with_site_date == 9 # 9% should have legacy_site_date -def test_well_completion_date_coverage_statistics(): - """Test that expected percentage of wells have completion dates""" - # Simulate 100 wells from CSV - wells_with_completion_date = 0 - - for i in range(100): - if i < 30: # 30% have CompletionDate - well_data = { - "location_id": 1, - "name": f"WELL-{i:03d}", - "well_completed_on": datetime.date(2004, 8, 8), - "hole_depth": 100.0, - "well_depth": 95.0, - "measuring_point_height": 2.5, - "measuring_point_description": "top of casing", - "release_status": "public", - } - else: # 70% don't have CompletionDate - well_data = { - "location_id": 1, - "name": f"WELL-{i:03d}", - "hole_depth": 100.0, - "well_depth": 95.0, - "measuring_point_height": 2.5, - "measuring_point_description": "top of casing", - "release_status": "public", - # No well_completed_on - } - - schema = CreateWell(**well_data) - - if schema.well_completed_on is not None: - wells_with_completion_date += 1 - - # Verify expected coverage - assert wells_with_completion_date == 30 # 30% should have completion dates - - # ============================================================================ # EOF # ============================================================================ diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index 5daa1d8ee..ee54d0216 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -237,19 +237,6 @@ def transfer_wells(session: Session, flags: dict = None, limit: int = 0) -> None [] if isna(row.CasingDescription) else _extract_casing_materials(row) ) - # Extract well_completed_on from CompletionDate (Date type, not DateTime) - well_completed_on = None - if not isna(row.CompletionDate): - try: - well_completed_on = datetime.strptime( - row.CompletionDate, "%Y-%m-%d %H:%M:%S.%f" - ).date() - except (ValueError, AttributeError): - # If parsing fails, leave as None - logger.warning( - f"Could not parse CompletionDate for {row.PointID}: {row.CompletionDate}" - ) - # manually add the well rather than add_well from services/thing_helper.py # so that effective_start can be set on the location assocation @@ -267,7 +254,6 @@ def transfer_wells(session: Session, flags: dict = None, limit: int = 0) -> None release_status="public" if row.PublicRelease else "private", measuring_point_height=row.MPHeight, measuring_point_description=row.MeasuringPoint, - well_completed_on=well_completed_on, notes=( [{"content": row.Notes, "note_type": "Other"}] if row.Notes else [] ), @@ -297,7 +283,6 @@ def transfer_wells(session: Session, flags: dict = None, limit: int = 0) -> None well_data["nma_pk_welldata"] = row.WellID well_data.pop("notes") - # well_completed_on is kept in well_data (not excluded above) well = Thing(**well_data) session.add(well) # logger.info(f"Created well for {row.PointID}") From f0112264cb71eafeb36fca6363b3d9fc21e69ee5 Mon Sep 17 00:00:00 2001 From: kbighorse Date: Wed, 3 Dec 2025 08:55:09 +0000 Subject: [PATCH 20/37] Formatting changes --- schemas/location.py | 4 +--- tests/features/steps/post_migration_legacy_data.py | 4 +--- transfers/util.py | 4 +--- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/schemas/location.py b/schemas/location.py index f34c54115..ca182ebd5 100644 --- a/schemas/location.py +++ b/schemas/location.py @@ -155,9 +155,7 @@ def populate_fields(cls, data: Any) -> Any: data_dict["properties"]["elevation"] = convert_m_to_ft(elevation_m) data_dict["properties"]["elevation_method"] = data_dict.get("elevation_method") # populate AMPAPI date fields - data_dict["properties"]["nma_date_created"] = data_dict.get( - "nma_date_created" - ) + data_dict["properties"]["nma_date_created"] = data_dict.get("nma_date_created") data_dict["properties"]["nma_site_date"] = data_dict.get("nma_site_date") # populate UTM coordinates diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py index 99ddd028e..7c2c36ffe 100644 --- a/tests/features/steps/post_migration_legacy_data.py +++ b/tests/features/steps/post_migration_legacy_data.py @@ -195,9 +195,7 @@ def step_when_filter_locations(context: Context, start_date: str, end_date: str) locations = ( session.query(Location) - .filter( - Location.nma_site_date >= start, Location.nma_site_date <= end - ) + .filter(Location.nma_site_date >= start, Location.nma_site_date <= end) .all() ) diff --git a/transfers/util.py b/transfers/util.py index c8d054a0a..5216c204f 100644 --- a/transfers/util.py +++ b/transfers/util.py @@ -253,9 +253,7 @@ def make_location(row: pd.Series, elevations: dict) -> tuple: nma_site_date = None if row.SiteDate: - nma_site_date = datetime.strptime( - row.SiteDate, "%Y-%m-%d %H:%M:%S.%f" - ).date() + nma_site_date = datetime.strptime(row.SiteDate, "%Y-%m-%d %H:%M:%S.%f").date() location = Location( nma_pk_location=row.LocationId, From f021c4be309fa69dd94fd2762e249cbc97b2e64d Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 3 Dec 2025 00:55:20 -0800 Subject: [PATCH 21/37] Replace `legacy_` prefix with `nma_` --- db/location.py | 6 +- schemas/location.py | 26 +-- ...st-migration-legacy-data-retrieval.feature | 70 ++++---- .../steps/post_migration_legacy_data.py | 164 +++++++++--------- tests/test_location.py | 24 +-- tests/test_transfer_legacy_dates.py | 64 +++---- transfers/util.py | 14 +- 7 files changed, 184 insertions(+), 184 deletions(-) diff --git a/db/location.py b/db/location.py index a07958346..c00c11a70 100644 --- a/db/location.py +++ b/db/location.py @@ -62,13 +62,13 @@ class Location(Base, AutoBaseMixin, ReleaseMixin, NotesMixin, DataProvenanceMixi nma_notes_location: Mapped[str] = mapped_column(Text, nullable=True) nma_coordinate_notes: Mapped[str] = mapped_column(Text, nullable=True) - # --- Legacy AMPAPI Date Fields (Migration-Only, Read-Only Post-Migration) --- - legacy_date_created: Mapped[datetime.date] = mapped_column( + # --- AMPAPI Date Fields (Migration-Only, Read-Only Post-Migration) --- + nma_date_created: Mapped[datetime.date] = mapped_column( Date, nullable=True, comment="Original AMPAPI DateCreated (migration-only field)", ) - legacy_site_date: Mapped[datetime.date] = mapped_column( + nma_site_date: Mapped[datetime.date] = mapped_column( Date, nullable=True, comment="Original AMPAPI SiteDate (migration-only field)" ) diff --git a/schemas/location.py b/schemas/location.py index 1f4bad472..f34c54115 100644 --- a/schemas/location.py +++ b/schemas/location.py @@ -107,9 +107,9 @@ class GeoJSONProperties(BaseModel): default_factory=GeoJSONUTMCoordinates ) notes: list[NoteResponse] = [] - # Legacy AMPAPI date fields (migration-only, read-only) - legacy_date_created: date | None = None - legacy_site_date: date | None = None + # AMPAPI date fields (migration-only, read-only) + nma_date_created: date | None = None + nma_site_date: date | None = None model_config = ConfigDict( from_attributes=True, @@ -154,11 +154,11 @@ def populate_fields(cls, data: Any) -> Any: data_dict["properties"]["notes"] = data_dict.get("notes") data_dict["properties"]["elevation"] = convert_m_to_ft(elevation_m) data_dict["properties"]["elevation_method"] = data_dict.get("elevation_method") - # populate legacy date fields - data_dict["properties"]["legacy_date_created"] = data_dict.get( - "legacy_date_created" + # populate AMPAPI date fields + data_dict["properties"]["nma_date_created"] = data_dict.get( + "nma_date_created" ) - data_dict["properties"]["legacy_site_date"] = data_dict.get("legacy_site_date") + data_dict["properties"]["nma_site_date"] = data_dict.get("nma_site_date") # populate UTM coordinates point_utm_zone_13n_wkt = transform_srid( @@ -190,9 +190,9 @@ class LocationResponse(BaseResponseModel): county: str | None quad_name: str | None - # Legacy AMPAPI date fields (migration-only, read-only post-migration) - legacy_date_created: date | None = None - legacy_site_date: date | None = None + # AMPAPI date fields (migration-only, read-only post-migration) + nma_date_created: date | None = None + nma_site_date: date | None = None @field_validator("point", mode="before") def point_to_wkt(cls, value): @@ -232,9 +232,9 @@ class UpdateLocation(BaseUpdateModel, ValidateLocation): coordinate_accuracy: float | None = None coordinate_method: CoordinateMethod | None = None - # Legacy AMPAPI date fields (migration-only, can be updated but not created) - legacy_date_created: date | None = None - legacy_site_date: date | None = None + # AMPAPI date fields (migration-only, can be updated but not created) + nma_date_created: date | None = None + nma_site_date: date | None = None # ============= EOF ============================================= diff --git a/tests/features/post-migration-legacy-data-retrieval.feature b/tests/features/post-migration-legacy-data-retrieval.feature index b5329ad9c..61f3e8c40 100644 --- a/tests/features/post-migration-legacy-data-retrieval.feature +++ b/tests/features/post-migration-legacy-data-retrieval.feature @@ -13,43 +13,43 @@ Feature: Post-Migration Legacy Data Retrieval Scenario: Retrieve location with both legacy dates via API Given a location exists with: | field | value | - | legacy_date_created | 2014-04-03 | - | legacy_site_date | 2002-12-10 | + | nma_date_created | 2014-04-03 | + | nma_site_date | 2002-12-10 | When I retrieve that location via the API - Then the response should include legacy_date_created as "2014-04-03" - And the response should include legacy_site_date as "2002-12-10" + Then the response should include nma_date_created as "2014-04-03" + And the response should include nma_site_date as "2002-12-10" And the time gap should be approximately 11.3 years Scenario: Retrieve location with large time gap (54 years) Given a location exists with: | field | value | - | legacy_date_created | 2008-05-28 | - | legacy_site_date | 1954-05-01 | + | nma_date_created | 2008-05-28 | + | nma_site_date | 1954-05-01 | When I retrieve that location via the API - Then the response should include legacy_date_created as "2008-05-28" - And the response should include legacy_site_date as "1954-05-01" + Then the response should include nma_date_created as "2008-05-28" + And the response should include nma_site_date as "1954-05-01" And the time gap should be approximately 54 years Scenario: List all locations includes legacy date fields Given 5 locations exist with various legacy dates When I GET /location to list all locations - Then each location should have a legacy_date_created field - And each location should have a legacy_site_date field - And some locations should have null legacy_site_date + Then each location should have a nma_date_created field + And each location should have a nma_site_date field + And some locations should have null nma_site_date Scenario: Filter locations by legacy site date range - Given locations exist with legacy_site_date ranging from 1950 to 2024 - When I filter locations where legacy_site_date is between "2000-01-01" and "2010-12-31" - Then the response should only include locations with legacy_site_date in that decade - And locations with legacy_site_date before 2000 should not be included - And locations with legacy_site_date after 2010 should not be included + Given locations exist with nma_site_date ranging from 1950 to 2024 + When I filter locations where nma_site_date is between "2000-01-01" and "2010-12-31" + Then the response should only include locations with nma_site_date in that decade + And locations with nma_site_date before 2000 should not be included + And locations with nma_site_date after 2010 should not be included - Scenario: Query location by legacy_date_created - Given 3 locations exist with legacy_date_created "2014-04-03" - And 2 locations exist with legacy_date_created "2017-12-06" - When I query for locations with legacy_date_created "2014-04-03" + Scenario: Query location by nma_date_created + Given 3 locations exist with nma_date_created "2014-04-03" + And 2 locations exist with nma_date_created "2017-12-06" + When I query for locations with nma_date_created "2014-04-03" Then the response should include exactly 3 locations - And all should have legacy_date_created "2014-04-03" + And all should have nma_date_created "2014-04-03" # Data Quality Validation @@ -57,8 +57,8 @@ Feature: Post-Migration Legacy Data Retrieval Given 100 locations were migrated And 9 of them had non-null SiteDate in AMPAPI When I query the migrated locations - Then 9% should have non-null legacy_site_date - And 100% should have non-null legacy_date_created + Then 9% should have non-null nma_site_date + And 100% should have non-null nma_date_created # Audit Trail Verification @@ -66,29 +66,29 @@ Feature: Post-Migration Legacy Data Retrieval Given a location was migrated with legacy dates When I retrieve that location Then it should have created_at (new system timestamp from migration) - And it should have legacy_date_created (original AMPAPI DateCreated) - And it should have legacy_site_date (original AMPAPI SiteDate) + And it should have nma_date_created (original AMPAPI DateCreated) + And it should have nma_site_date (original AMPAPI SiteDate) And all three timestamps should be independently queryable And created_at should be a recent timestamp - And legacy_date_created should be an older date + And nma_date_created should be an older date # Edge Cases Scenario: Location where SiteDate is later than DateCreated (data anomaly) Given a location exists with: | field | value | - | legacy_date_created | 2010-01-15 | - | legacy_site_date | 2015-06-20 | + | nma_date_created | 2010-01-15 | + | nma_site_date | 2015-06-20 | When I retrieve that location - Then legacy_date_created should be "2010-01-15" - And legacy_site_date should be "2015-06-20" + Then nma_date_created should be "2010-01-15" + And nma_site_date should be "2015-06-20" And the system should accept this without error - Scenario: Location with only legacy_date_created (no legacy_site_date) + Scenario: Location with only nma_date_created (no nma_site_date) Given a location exists with: | field | value | - | legacy_date_created | 2014-10-17 | - | legacy_site_date | null | + | nma_date_created | 2014-10-17 | + | nma_site_date | null | When I retrieve that location - Then legacy_date_created should be "2014-10-17" - And legacy_site_date should be null + Then nma_date_created should be "2014-10-17" + And nma_site_date should be null diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py index 25e932159..99ddd028e 100644 --- a/tests/features/steps/post_migration_legacy_data.py +++ b/tests/features/steps/post_migration_legacy_data.py @@ -31,15 +31,15 @@ def parse_number(text): register_type(Number=parse_number) -def create_test_location(legacy_date_created=None, legacy_site_date=None): +def create_test_location(nma_date_created=None, nma_site_date=None): """Helper to create a test location with legacy dates.""" with session_ctx() as session: location = Location( point="POINT(-106.607784 35.118924)", elevation=1558.8, release_status="public", - legacy_date_created=legacy_date_created, - legacy_site_date=legacy_site_date, + nma_date_created=nma_date_created, + nma_site_date=nma_site_date, ) session.add(location) session.commit() @@ -58,19 +58,19 @@ def step_given_location_with_table(context: Context): """Create location with fields from table.""" data = {row["field"]: row["value"] for row in context.table} - legacy_date_created = ( - date.fromisoformat(data["legacy_date_created"]) - if data.get("legacy_date_created") and data["legacy_date_created"] != "null" + nma_date_created = ( + date.fromisoformat(data["nma_date_created"]) + if data.get("nma_date_created") and data["nma_date_created"] != "null" else None ) - legacy_site_date = ( - date.fromisoformat(data["legacy_site_date"]) - if data.get("legacy_site_date") and data["legacy_site_date"] != "null" + nma_site_date = ( + date.fromisoformat(data["nma_site_date"]) + if data.get("nma_site_date") and data["nma_site_date"] != "null" else None ) location = create_test_location( - legacy_date_created=legacy_date_created, legacy_site_date=legacy_site_date + nma_date_created=nma_date_created, nma_site_date=nma_site_date ) context.test_location = location @@ -93,33 +93,33 @@ def step_given_multiple_locations(context: Context, count: int): for i in range(min(count, len(test_data))): legacy_date, site_date = test_data[i] location = create_test_location( - legacy_date_created=date.fromisoformat(legacy_date), - legacy_site_date=(date.fromisoformat(site_date) if site_date else None), + nma_date_created=date.fromisoformat(legacy_date), + nma_site_date=(date.fromisoformat(site_date) if site_date else None), ) context.test_locations.append(location) @given( - "locations exist with legacy_site_date ranging from {start_year:Number} to {end_year:Number}" + "locations exist with nma_site_date ranging from {start_year:Number} to {end_year:Number}" ) def step_given_locations_date_range(context: Context, start_year: int, end_year: int): - """Create locations with legacy_site_date across a date range.""" + """Create locations with nma_site_date across a date range.""" context.test_locations = [] years = [1954, 2002, 2003, 2010, 2015, 2020, 2024] for year in years: location = create_test_location( - legacy_date_created=date(year + 5, 1, 1), # Always 5 years after site date - legacy_site_date=date(year, 6, 15), + nma_date_created=date(year + 5, 1, 1), # Always 5 years after site date + nma_site_date=date(year, 6, 15), ) context.test_locations.append(location) -@given('{count:Number} locations exist with legacy_date_created "{target_date}"') +@given('{count:Number} locations exist with nma_date_created "{target_date}"') def step_given_locations_with_specific_date( context: Context, count: int, target_date: str ): - """Create locations with specific legacy_date_created.""" + """Create locations with specific nma_date_created.""" if not hasattr(context, "test_locations"): context.test_locations = [] @@ -127,8 +127,8 @@ def step_given_locations_with_specific_date( for i in range(count): location = create_test_location( - legacy_date_created=target, - legacy_site_date=date(2000 + i, 1, 1), # Vary the site dates + nma_date_created=target, + nma_site_date=date(2000 + i, 1, 1), # Vary the site dates ) context.test_locations.append(location) @@ -139,12 +139,12 @@ def step_given_count_locations_migrated(context: Context, count: int): context.test_locations = [] for i in range(count): - # 9% have legacy_site_date + # 9% have nma_site_date has_site_date = i < count * 0.09 location = create_test_location( - legacy_date_created=date(2014, 1, i % 28 + 1), - legacy_site_date=date(2003, 1, i % 28 + 1) if has_site_date else None, + nma_date_created=date(2014, 1, i % 28 + 1), + nma_site_date=date(2003, 1, i % 28 + 1) if has_site_date else None, ) context.test_locations.append(location) @@ -159,7 +159,7 @@ def step_given_sitedate_count(context: Context, count: int): def step_given_location_migrated_with_dates(context: Context): """Create location with both legacy dates.""" location = create_test_location( - legacy_date_created=date(2014, 4, 3), legacy_site_date=date(2002, 12, 10) + nma_date_created=date(2014, 4, 3), nma_site_date=date(2002, 12, 10) ) context.test_location = location @@ -184,7 +184,7 @@ def step_when_get_all_locations(context: Context): @when( - 'I filter locations where legacy_site_date is between "{start_date}" and "{end_date}"' + 'I filter locations where nma_site_date is between "{start_date}" and "{end_date}"' ) def step_when_filter_locations(context: Context, start_date: str, end_date: str): """Filter locations by date range.""" @@ -196,7 +196,7 @@ def step_when_filter_locations(context: Context, start_date: str, end_date: str) locations = ( session.query(Location) .filter( - Location.legacy_site_date >= start, Location.legacy_site_date <= end + Location.nma_site_date >= start, Location.nma_site_date <= end ) .all() ) @@ -204,13 +204,13 @@ def step_when_filter_locations(context: Context, start_date: str, end_date: str) context.filtered_locations = locations -@when('I query for locations with legacy_date_created "{target_date}"') +@when('I query for locations with nma_date_created "{target_date}"') def step_when_query_by_legacy_date(context: Context, target_date: str): - """Query locations by legacy_date_created.""" + """Query locations by nma_date_created.""" with session_ctx() as session: target = date.fromisoformat(target_date) locations = ( - session.query(Location).filter(Location.legacy_date_created == target).all() + session.query(Location).filter(Location.nma_date_created == target).all() ) context.queried_locations = locations @@ -236,25 +236,25 @@ def step_when_retrieve_location(context: Context): # THEN steps -@then('the response should include legacy_date_created as "{expected_date}"') -def step_then_legacy_date_created(context: Context, expected_date: str): - """Assert legacy_date_created matches.""" - actual = context.location_response.get("legacy_date_created") +@then('the response should include nma_date_created as "{expected_date}"') +def step_then_nma_date_created(context: Context, expected_date: str): + """Assert nma_date_created matches.""" + actual = context.location_response.get("nma_date_created") assert actual == expected_date, f"Expected {expected_date}, got {actual}" -@then('the response should include legacy_site_date as "{expected_date}"') -def step_then_legacy_site_date(context: Context, expected_date: str): - """Assert legacy_site_date matches.""" - actual = context.location_response.get("legacy_site_date") +@then('the response should include nma_site_date as "{expected_date}"') +def step_then_nma_site_date(context: Context, expected_date: str): + """Assert nma_site_date matches.""" + actual = context.location_response.get("nma_site_date") assert actual == expected_date, f"Expected {expected_date}, got {actual}" @then("the time gap should be approximately {years} years") def step_then_time_gap_years(context: Context, years: str): """Assert approximate year gap.""" - legacy_str = context.location_response.get("legacy_date_created") - site_date_str = context.location_response.get("legacy_site_date") + legacy_str = context.location_response.get("nma_date_created") + site_date_str = context.location_response.get("nma_site_date") if not legacy_str or not site_date_str: raise AssertionError("Missing date fields for gap calculation") @@ -272,28 +272,28 @@ def step_then_time_gap_years(context: Context, years: str): ), f"Expected ~{expected_years} year gap, got {gap_years:.1f} years" -@then("each location should have a legacy_date_created field") +@then("each location should have a nma_date_created field") def step_then_all_have_legacy_field(context: Context): """Assert all locations have the field.""" items = context.locations_response.get("items", []) for item in items: - assert "legacy_date_created" in item, f"Location missing legacy_date_created" + assert "nma_date_created" in item, f"Location missing nma_date_created" -@then("each location should have a legacy_site_date field") +@then("each location should have a nma_site_date field") def step_then_all_have_site_date_field(context: Context): """Assert all locations have the field.""" items = context.locations_response.get("items", []) for item in items: - assert "legacy_site_date" in item, f"Location missing legacy_site_date" + assert "nma_site_date" in item, f"Location missing nma_site_date" -@then("some locations should have null legacy_site_date") +@then("some locations should have null nma_site_date") def step_then_some_null_site_date(context: Context): """Assert some locations have null.""" items = context.locations_response.get("items", []) - null_count = sum(1 for item in items if item.get("legacy_site_date") is None) - assert null_count > 0, "Expected at least one location with null legacy_site_date" + null_count = sum(1 for item in items if item.get("nma_site_date") is None) + assert null_count > 0, "Expected at least one location with null nma_site_date" @then("the response should only include locations with site date in that decade") @@ -301,8 +301,8 @@ def step_then_locations_in_decade(context: Context): """Assert filtered locations are in range.""" for loc in context.filtered_locations: assert ( - 2000 <= loc.legacy_site_date.year <= 2010 - ), f"Location not in 2000-2010: {loc.legacy_site_date}" + 2000 <= loc.nma_site_date.year <= 2010 + ), f"Location not in 2000-2010: {loc.nma_site_date}" @then("locations with site date before {year:Number} should not be included") @@ -310,8 +310,8 @@ def step_then_locations_before_excluded(context: Context, year: int): """Assert no locations before year.""" for loc in context.filtered_locations: assert ( - loc.legacy_site_date.year >= year - ), f"Location from {loc.legacy_site_date.year} should not be included" + loc.nma_site_date.year >= year + ), f"Location from {loc.nma_site_date.year} should not be included" @then("locations with site date after {year:Number} should not be included") @@ -319,8 +319,8 @@ def step_then_locations_after_excluded(context: Context, year: int): """Assert no locations after year.""" for loc in context.filtered_locations: assert ( - loc.legacy_site_date.year <= year - ), f"Location from {loc.legacy_site_date.year} should not be included" + loc.nma_site_date.year <= year + ), f"Location from {loc.nma_site_date.year} should not be included" @then("the response should include exactly {count:Number} locations") @@ -330,21 +330,21 @@ def step_then_exact_count_locations(context: Context, count: int): assert actual == count, f"Expected {count} locations, got {actual}" -@then('all should have legacy_date_created "{expected_date}"') +@then('all should have nma_date_created "{expected_date}"') def step_then_all_have_date(context: Context, expected_date: str): """Assert all have same date.""" expected = date.fromisoformat(expected_date) for loc in context.queried_locations: assert ( - loc.legacy_date_created == expected - ), f"Location has {loc.legacy_date_created}, expected {expected}" + loc.nma_date_created == expected + ), f"Location has {loc.nma_date_created}, expected {expected}" -@then("{percentage:Number}% should have non-null legacy_site_date") +@then("{percentage:Number}% should have non-null nma_site_date") def step_then_percentage_site_date(context: Context, percentage: int): - """Assert percentage with legacy_site_date.""" + """Assert percentage with nma_site_date.""" total = len(context.queried_locations) - populated = sum(1 for loc in context.queried_locations if loc.legacy_site_date) + populated = sum(1 for loc in context.queried_locations if loc.nma_site_date) actual_pct = (populated / total) * 100 tolerance = 2 @@ -353,11 +353,11 @@ def step_then_percentage_site_date(context: Context, percentage: int): ), f"Expected ~{percentage}%, got {actual_pct:.1f}%" -@then("{percentage:Number}% should have non-null legacy_date_created") +@then("{percentage:Number}% should have non-null nma_date_created") def step_then_percentage_legacy(context: Context, percentage: int): - """Assert percentage with legacy_date_created.""" + """Assert percentage with nma_date_created.""" total = len(context.queried_locations) - populated = sum(1 for loc in context.queried_locations if loc.legacy_date_created) + populated = sum(1 for loc in context.queried_locations if loc.nma_date_created) actual_pct = (populated / total) * 100 tolerance = 2 @@ -372,24 +372,24 @@ def step_then_has_created_at(context: Context): assert context.retrieved_location.created_at is not None -@then("it should have legacy_date_created (original AMPAPI DateCreated)") +@then("it should have nma_date_created (original AMPAPI DateCreated)") def step_then_has_legacy_date(context: Context): - """Assert legacy_date_created exists.""" - assert context.retrieved_location.legacy_date_created is not None + """Assert nma_date_created exists.""" + assert context.retrieved_location.nma_date_created is not None -@then("it should have legacy_site_date (original AMPAPI SiteDate)") +@then("it should have nma_site_date (original AMPAPI SiteDate)") def step_then_has_site_date(context: Context): - """Assert legacy_site_date exists.""" - assert context.retrieved_location.legacy_site_date is not None + """Assert nma_site_date exists.""" + assert context.retrieved_location.nma_site_date is not None @then("all three timestamps should be independently queryable") def step_then_all_queryable(context: Context): """Assert all fields are queryable.""" assert hasattr(context.retrieved_location, "created_at") - assert hasattr(context.retrieved_location, "legacy_date_created") - assert hasattr(context.retrieved_location, "legacy_site_date") + assert hasattr(context.retrieved_location, "nma_date_created") + assert hasattr(context.retrieved_location, "nma_site_date") @then("created_at should be a recent timestamp") @@ -401,25 +401,25 @@ def step_then_created_at_recent(context: Context): assert diff_seconds < 3600, "created_at should be within last hour" -@then("legacy_date_created should be an older date") +@then("nma_date_created should be an older date") def step_then_legacy_date_older(context: Context): - """Assert legacy_date_created is old.""" - legacy_date = context.retrieved_location.legacy_date_created - assert legacy_date.year < 2024, "legacy_date_created should be from the past" + """Assert nma_date_created is old.""" + legacy_date = context.retrieved_location.nma_date_created + assert legacy_date.year < 2024, "nma_date_created should be from the past" -@then('legacy_date_created should be "{expected_date}"') +@then('nma_date_created should be "{expected_date}"') def step_then_legacy_is(context: Context, expected_date: str): - """Assert legacy_date_created value.""" - actual = context.retrieved_location.legacy_date_created + """Assert nma_date_created value.""" + actual = context.retrieved_location.nma_date_created expected = date.fromisoformat(expected_date) assert actual == expected, f"Expected {expected}, got {actual}" -@then('legacy_site_date should be "{expected_date}"') +@then('nma_site_date should be "{expected_date}"') def step_then_site_date_is(context: Context, expected_date: str): - """Assert legacy_site_date value.""" - actual = context.retrieved_location.legacy_site_date + """Assert nma_site_date value.""" + actual = context.retrieved_location.nma_site_date expected = date.fromisoformat(expected_date) assert actual == expected, f"Expected {expected}, got {actual}" @@ -431,10 +431,10 @@ def step_then_no_error(context: Context): pass -@then("legacy_site_date should be null") +@then("nma_site_date should be null") def step_then_site_date_null(context: Context): - """Assert legacy_site_date is null.""" - assert context.retrieved_location.legacy_site_date is None + """Assert nma_site_date is null.""" + assert context.retrieved_location.nma_site_date is None @then("the well should still be valid") diff --git a/tests/test_location.py b/tests/test_location.py index b86211a58..67a4615c8 100644 --- a/tests/test_location.py +++ b/tests/test_location.py @@ -251,10 +251,10 @@ def test_new_location_has_null_legacy_fields(): data = response.json() assert "id" in data # Legacy fields should be present in response but null (not set during creation) - assert "legacy_date_created" in data - assert "legacy_site_date" in data - assert data["legacy_date_created"] is None - assert data["legacy_site_date"] is None + assert "nma_date_created" in data + assert "nma_site_date" in data + assert data["nma_date_created"] is None + assert data["nma_site_date"] is None # cleanup after test cleanup_post_test(Location, data["id"]) @@ -278,10 +278,10 @@ def test_legacy_fields_present_in_location_response(): data = get_response.json() # Verify fields exist in response (even if null) - assert "legacy_date_created" in data - assert "legacy_site_date" in data - assert data["legacy_date_created"] is None - assert data["legacy_site_date"] is None + assert "nma_date_created" in data + assert "nma_site_date" in data + assert data["nma_date_created"] is None + assert data["nma_site_date"] is None # cleanup after test cleanup_post_test(Location, location_id) @@ -303,12 +303,12 @@ def test_legacy_fields_independent_of_created_at(): assert "created_at" in data assert data["created_at"] is not None - # legacy_date_created is separate and null for new records - assert "legacy_date_created" in data - assert data["legacy_date_created"] is None + # nma_date_created is separate and null for new records + assert "nma_date_created" in data + assert data["nma_date_created"] is None # These are independent fields with different purposes - assert "created_at" != "legacy_date_created" + assert "created_at" != "nma_date_created" # cleanup after test cleanup_post_test(Location, data["id"]) diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py index 795820ec8..5068d8882 100644 --- a/tests/test_transfer_legacy_dates.py +++ b/tests/test_transfer_legacy_dates.py @@ -17,8 +17,8 @@ Unit tests for legacy date field population during AMPAPI → NMSampleLocations migration. These tests verify that: -1. Location.legacy_date_created is populated from CSV DateCreated -2. Location.legacy_site_date is populated from CSV SiteDate (if not null) +1. Location.nma_date_created is populated from CSV DateCreated +2. Location.nma_site_date is populated from CSV SiteDate (if not null) """ import datetime from unittest.mock import Mock, patch, MagicMock @@ -35,7 +35,7 @@ @patch("transfers.util.lexicon_mapper") def test_make_location_with_both_legacy_dates(mock_lexicon_mapper): - """Test that make_location populates both legacy_date_created and legacy_site_date""" + """Test that make_location populates both nma_date_created and nma_site_date""" # Mock lexicon mapper to avoid GCS calls mock_lexicon_mapper.map_value.return_value = "GPS" @@ -63,13 +63,13 @@ def test_make_location_with_both_legacy_dates(mock_lexicon_mapper): # Call make_location location, elevation_method = make_location(row, elevations) - # Verify legacy_date_created is set from DateCreated - assert location.legacy_date_created is not None - assert location.legacy_date_created == datetime.date(2014, 4, 3) + # Verify nma_date_created is set from DateCreated + assert location.nma_date_created is not None + assert location.nma_date_created == datetime.date(2014, 4, 3) - # Verify legacy_site_date is set from SiteDate - assert location.legacy_site_date is not None - assert location.legacy_site_date == datetime.date(2002, 12, 10) + # Verify nma_site_date is set from SiteDate + assert location.nma_site_date is not None + assert location.nma_site_date == datetime.date(2002, 12, 10) # Verify created_at is NOT set during migration (it's auto-set by AutoBaseMixin on save) assert location.created_at is None @@ -102,11 +102,11 @@ def test_make_location_with_only_date_created(mock_lexicon_mapper): elevations = {} location, elevation_method = make_location(row, elevations) - # Verify legacy_date_created is set - assert location.legacy_date_created == datetime.date(2014, 4, 3) + # Verify nma_date_created is set + assert location.nma_date_created == datetime.date(2014, 4, 3) - # Verify legacy_site_date is null (91% of locations don't have SiteDate) - assert location.legacy_site_date is None + # Verify nma_site_date is null (91% of locations don't have SiteDate) + assert location.nma_site_date is None @patch("transfers.util.lexicon_mapper") @@ -137,8 +137,8 @@ def test_make_location_with_site_date_later_than_date_created(mock_lexicon_mappe location, elevation_method = make_location(row, elevations) # Both dates should be preserved as-is, regardless of order - assert location.legacy_date_created == datetime.date(2010, 1, 15) - assert location.legacy_site_date == datetime.date(2015, 6, 20) + assert location.nma_date_created == datetime.date(2010, 1, 15) + assert location.nma_site_date == datetime.date(2015, 6, 20) @patch("transfers.util.lexicon_mapper") @@ -169,11 +169,11 @@ def test_make_location_with_very_old_site_date(mock_lexicon_mapper): location, elevation_method = make_location(row, elevations) # Verify very old date is preserved - assert location.legacy_site_date == datetime.date(1954, 5, 1) - assert location.legacy_date_created == datetime.date(2008, 5, 28) + assert location.nma_site_date == datetime.date(1954, 5, 1) + assert location.nma_date_created == datetime.date(2008, 5, 28) # Verify 54-year time gap - time_gap = (location.legacy_date_created - location.legacy_site_date).days + time_gap = (location.nma_date_created - location.nma_site_date).days assert time_gap == 19751 # Approximately 54 years @@ -205,15 +205,15 @@ def test_make_location_legacy_dates_are_date_not_datetime(mock_lexicon_mapper): location, elevation_method = make_location(row, elevations) # Verify they are date objects (not datetime) - assert isinstance(location.legacy_date_created, datetime.date) - assert not isinstance(location.legacy_date_created, datetime.datetime) + assert isinstance(location.nma_date_created, datetime.date) + assert not isinstance(location.nma_date_created, datetime.datetime) - assert isinstance(location.legacy_site_date, datetime.date) - assert not isinstance(location.legacy_site_date, datetime.datetime) + assert isinstance(location.nma_site_date, datetime.date) + assert not isinstance(location.nma_site_date, datetime.datetime) # Verify time component is stripped - assert location.legacy_date_created == datetime.date(2014, 4, 3) - assert location.legacy_site_date == datetime.date(2002, 12, 10) + assert location.nma_date_created == datetime.date(2014, 4, 3) + assert location.nma_site_date == datetime.date(2002, 12, 10) @patch("transfers.util.lexicon_mapper") @@ -247,12 +247,12 @@ def test_make_location_legacy_dates_independent_of_created_at(mock_lexicon_mappe assert location.created_at is None # legacy fields should be Date (no timezone) - assert isinstance(location.legacy_date_created, datetime.date) - assert isinstance(location.legacy_site_date, datetime.date) + assert isinstance(location.nma_date_created, datetime.date) + assert isinstance(location.nma_site_date, datetime.date) # Legacy fields should be populated - assert location.legacy_date_created is not None - assert location.legacy_site_date is not None + assert location.nma_date_created is not None + assert location.nma_site_date is not None # ============================================================================ @@ -312,15 +312,15 @@ def test_location_legacy_date_coverage_statistics(mock_lexicon_mapper): location, _ = make_location(row, elevations) # Count coverage - if location.legacy_date_created is not None: + if location.nma_date_created is not None: locations_created += 1 - if location.legacy_site_date is not None: + if location.nma_site_date is not None: locations_with_site_date += 1 # Verify expected coverage - assert locations_created == 100 # 100% should have legacy_date_created - assert locations_with_site_date == 9 # 9% should have legacy_site_date + assert locations_created == 100 # 100% should have nma_date_created + assert locations_with_site_date == 9 # 9% should have nma_site_date # ============================================================================ diff --git a/transfers/util.py b/transfers/util.py index d39845f44..c8d054a0a 100644 --- a/transfers/util.py +++ b/transfers/util.py @@ -244,16 +244,16 @@ def make_location(row: pd.Series, elevations: dict) -> tuple: f"LU_AltitudeMethod:{row.AltitudeMethod.strip()}" ) - # Extract legacy date fields (Date type, not DateTime) - legacy_date_created = None + # Extract AMPAPI date fields (Date type, not DateTime) + nma_date_created = None if row.DateCreated: - legacy_date_created = datetime.strptime( + nma_date_created = datetime.strptime( row.DateCreated, "%Y-%m-%d %H:%M:%S.%f" ).date() - legacy_site_date = None + nma_site_date = None if row.SiteDate: - legacy_site_date = datetime.strptime( + nma_site_date = datetime.strptime( row.SiteDate, "%Y-%m-%d %H:%M:%S.%f" ).date() @@ -264,8 +264,8 @@ def make_location(row: pd.Series, elevations: dict) -> tuple: release_status="public" if row.PublicRelease else "private", nma_coordinate_notes=row.CoordinateNotes, nma_notes_location=row.LocationNotes, - legacy_date_created=legacy_date_created, - legacy_site_date=legacy_site_date, + nma_date_created=nma_date_created, + nma_site_date=nma_site_date, ) return location, elevation_method From 2e33f83842886b903f4a9c6481f656e9b5424af5 Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 3 Dec 2025 01:02:08 -0800 Subject: [PATCH 22/37] Remove legacy fields from `UpdateLocation` schema --- db/location.py | 4 ++-- schemas/location.py | 8 ++------ ...st-migration-legacy-data-retrieval.feature | 20 +++++++++---------- .../steps/post_migration_legacy_data.py | 2 +- tests/test_location.py | 14 ++++++------- tests/test_transfer_legacy_dates.py | 8 ++++---- 6 files changed, 26 insertions(+), 30 deletions(-) diff --git a/db/location.py b/db/location.py index c00c11a70..cef3d0857 100644 --- a/db/location.py +++ b/db/location.py @@ -66,10 +66,10 @@ class Location(Base, AutoBaseMixin, ReleaseMixin, NotesMixin, DataProvenanceMixi nma_date_created: Mapped[datetime.date] = mapped_column( Date, nullable=True, - comment="Original AMPAPI DateCreated (migration-only field)", + comment="Original AMPAPI DateCreated (read-only, populated only during migration)", ) nma_site_date: Mapped[datetime.date] = mapped_column( - Date, nullable=True, comment="Original AMPAPI SiteDate (migration-only field)" + Date, nullable=True, comment="Original AMPAPI SiteDate (read-only, populated only during migration)" ) # --- Relationship Definitions --- diff --git a/schemas/location.py b/schemas/location.py index f34c54115..fce13ef99 100644 --- a/schemas/location.py +++ b/schemas/location.py @@ -107,7 +107,7 @@ class GeoJSONProperties(BaseModel): default_factory=GeoJSONUTMCoordinates ) notes: list[NoteResponse] = [] - # AMPAPI date fields (migration-only, read-only) + # AMPAPI date fields (read-only, populated only during migration) nma_date_created: date | None = None nma_site_date: date | None = None @@ -190,7 +190,7 @@ class LocationResponse(BaseResponseModel): county: str | None quad_name: str | None - # AMPAPI date fields (migration-only, read-only post-migration) + # AMPAPI date fields (read-only, populated only during migration, not in Create/Update schemas) nma_date_created: date | None = None nma_site_date: date | None = None @@ -232,9 +232,5 @@ class UpdateLocation(BaseUpdateModel, ValidateLocation): coordinate_accuracy: float | None = None coordinate_method: CoordinateMethod | None = None - # AMPAPI date fields (migration-only, can be updated but not created) - nma_date_created: date | None = None - nma_site_date: date | None = None - # ============= EOF ============================================= diff --git a/tests/features/post-migration-legacy-data-retrieval.feature b/tests/features/post-migration-legacy-data-retrieval.feature index 61f3e8c40..99fd08190 100644 --- a/tests/features/post-migration-legacy-data-retrieval.feature +++ b/tests/features/post-migration-legacy-data-retrieval.feature @@ -1,16 +1,16 @@ -Feature: Post-Migration Legacy Data Retrieval +Feature: Post-Migration AMPAPI Date Field Retrieval As a data manager After migrating data from AMPAPI to NMSampleLocations - I want to verify that all legacy temporal information is preserved and queryable + I want to verify that all AMPAPI temporal information is preserved and queryable So that no historical context is lost Background: Given a functioning api And the AMPAPI data has been migrated to the database - # Location Legacy Date Lookups + # Location AMPAPI Date Lookups (Read-Only Fields) - Scenario: Retrieve location with both legacy dates via API + Scenario: Retrieve location with both AMPAPI date fields via API Given a location exists with: | field | value | | nma_date_created | 2014-04-03 | @@ -30,14 +30,14 @@ Feature: Post-Migration Legacy Data Retrieval And the response should include nma_site_date as "1954-05-01" And the time gap should be approximately 54 years - Scenario: List all locations includes legacy date fields - Given 5 locations exist with various legacy dates + Scenario: List all locations includes AMPAPI date fields + Given 5 locations exist with various AMPAPI dates When I GET /location to list all locations Then each location should have a nma_date_created field And each location should have a nma_site_date field And some locations should have null nma_site_date - Scenario: Filter locations by legacy site date range + Scenario: Filter locations by AMPAPI site date range Given locations exist with nma_site_date ranging from 1950 to 2024 When I filter locations where nma_site_date is between "2000-01-01" and "2010-12-31" Then the response should only include locations with nma_site_date in that decade @@ -53,7 +53,7 @@ Feature: Post-Migration Legacy Data Retrieval # Data Quality Validation - Scenario: Verify migration preserved expected percentage of legacy dates + Scenario: Verify migration preserved expected percentage of AMPAPI dates Given 100 locations were migrated And 9 of them had non-null SiteDate in AMPAPI When I query the migrated locations @@ -62,8 +62,8 @@ Feature: Post-Migration Legacy Data Retrieval # Audit Trail Verification - Scenario: Legacy dates preserved alongside audit timestamps - Given a location was migrated with legacy dates + Scenario: AMPAPI dates preserved alongside audit timestamps + Given a location was migrated with AMPAPI dates When I retrieve that location Then it should have created_at (new system timestamp from migration) And it should have nma_date_created (original AMPAPI DateCreated) diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py index 99ddd028e..d568c0296 100644 --- a/tests/features/steps/post_migration_legacy_data.py +++ b/tests/features/steps/post_migration_legacy_data.py @@ -32,7 +32,7 @@ def parse_number(text): def create_test_location(nma_date_created=None, nma_site_date=None): - """Helper to create a test location with legacy dates.""" + """Helper to create a test location with AMPAPI date fields (read-only post-migration).""" with session_ctx() as session: location = Location( point="POINT(-106.607784 35.118924)", diff --git a/tests/test_location.py b/tests/test_location.py index 67a4615c8..6e143f1eb 100644 --- a/tests/test_location.py +++ b/tests/test_location.py @@ -250,7 +250,7 @@ def test_new_location_has_null_legacy_fields(): assert response.status_code == 201 data = response.json() assert "id" in data - # Legacy fields should be present in response but null (not set during creation) + # AMPAPI date fields should be present in response but null (not set during creation, read-only) assert "nma_date_created" in data assert "nma_site_date" in data assert data["nma_date_created"] is None @@ -261,8 +261,8 @@ def test_new_location_has_null_legacy_fields(): def test_legacy_fields_present_in_location_response(): - """Test that legacy fields are included in location GET response""" - # Create a new location (without legacy fields) + """Test that AMPAPI date fields (read-only) are included in location GET response""" + # Create a new location (without AMPAPI date fields set - they're read-only) payload = { "point": "POINT (-106.607784 35.118924)", "elevation": 1558.8, @@ -272,12 +272,12 @@ def test_legacy_fields_present_in_location_response(): assert create_response.status_code == 201 location_id = create_response.json()["id"] - # Retrieve the location and verify legacy fields are in the schema + # Retrieve the location and verify AMPAPI date fields are in the schema get_response = client.get(f"/location/{location_id}") assert get_response.status_code == 200 data = get_response.json() - # Verify fields exist in response (even if null) + # Verify read-only fields exist in response (even if null) assert "nma_date_created" in data assert "nma_site_date" in data assert data["nma_date_created"] is None @@ -288,7 +288,7 @@ def test_legacy_fields_present_in_location_response(): def test_legacy_fields_independent_of_created_at(): - """Test that created_at (system timestamp) is separate from legacy fields""" + """Test that created_at (system timestamp) is separate from AMPAPI date fields (read-only)""" payload = { "point": "POINT (-106.607784 35.118924)", "elevation": 1558.8, @@ -303,7 +303,7 @@ def test_legacy_fields_independent_of_created_at(): assert "created_at" in data assert data["created_at"] is not None - # nma_date_created is separate and null for new records + # nma_date_created is separate and null for new records (read-only, populated only during migration) assert "nma_date_created" in data assert data["nma_date_created"] is None diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py index 5068d8882..c4e06755f 100644 --- a/tests/test_transfer_legacy_dates.py +++ b/tests/test_transfer_legacy_dates.py @@ -14,11 +14,11 @@ # limitations under the License. # =============================================================================== """ -Unit tests for legacy date field population during AMPAPI → NMSampleLocations migration. +Unit tests for AMPAPI date field population during AMPAPI → NMSampleLocations migration. These tests verify that: -1. Location.nma_date_created is populated from CSV DateCreated -2. Location.nma_site_date is populated from CSV SiteDate (if not null) +1. Location.nma_date_created is populated from CSV DateCreated (read-only post-migration) +2. Location.nma_site_date is populated from CSV SiteDate if not null (read-only post-migration) """ import datetime from unittest.mock import Mock, patch, MagicMock @@ -29,7 +29,7 @@ # ============================================================================ -# LOCATION LEGACY DATE TESTS +# LOCATION AMPAPI DATE TESTS (Read-Only Post-Migration) # ============================================================================ From aef077b0f8e45ccbdcb8e7247128c053f329ff8f Mon Sep 17 00:00:00 2001 From: kbighorse Date: Wed, 3 Dec 2025 09:02:10 +0000 Subject: [PATCH 23/37] Formatting changes --- db/location.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/db/location.py b/db/location.py index cef3d0857..fda4611f9 100644 --- a/db/location.py +++ b/db/location.py @@ -69,7 +69,9 @@ class Location(Base, AutoBaseMixin, ReleaseMixin, NotesMixin, DataProvenanceMixi comment="Original AMPAPI DateCreated (read-only, populated only during migration)", ) nma_site_date: Mapped[datetime.date] = mapped_column( - Date, nullable=True, comment="Original AMPAPI SiteDate (read-only, populated only during migration)" + Date, + nullable=True, + comment="Original AMPAPI SiteDate (read-only, populated only during migration)", ) # --- Relationship Definitions --- From 6258e7de97d367e4b1c2814113457f902219b85a Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 3 Dec 2025 01:06:08 -0800 Subject: [PATCH 24/37] DRY up the mock lexicon mapper into a fixture --- tests/test_transfer_legacy_dates.py | 46 ++++++++++------------------- 1 file changed, 16 insertions(+), 30 deletions(-) diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py index c4e06755f..badaec8b2 100644 --- a/tests/test_transfer_legacy_dates.py +++ b/tests/test_transfer_legacy_dates.py @@ -28,16 +28,26 @@ from transfers.util import make_location +# ============================================================================ +# FIXTURES +# ============================================================================ + + +@pytest.fixture +def mock_lexicon_mapper(): + """Fixture to mock lexicon_mapper for all transfer tests""" + with patch("transfers.util.lexicon_mapper") as mock: + mock.map_value.return_value = "GPS" + yield mock + + # ============================================================================ # LOCATION AMPAPI DATE TESTS (Read-Only Post-Migration) # ============================================================================ -@patch("transfers.util.lexicon_mapper") def test_make_location_with_both_legacy_dates(mock_lexicon_mapper): """Test that make_location populates both nma_date_created and nma_site_date""" - # Mock lexicon mapper to avoid GCS calls - mock_lexicon_mapper.map_value.return_value = "GPS" # Create a mock CSV row with both DateCreated and SiteDate row = pd.Series( @@ -75,12 +85,8 @@ def test_make_location_with_both_legacy_dates(mock_lexicon_mapper): assert location.created_at is None -@patch("transfers.util.lexicon_mapper") def test_make_location_with_only_date_created(mock_lexicon_mapper): """Test that make_location handles locations with only DateCreated (no SiteDate)""" - # Mock lexicon mapper to avoid GCS calls - mock_lexicon_mapper.map_value.return_value = "GPS" - row = pd.Series( { "PointID": "TEST-002", @@ -109,12 +115,8 @@ def test_make_location_with_only_date_created(mock_lexicon_mapper): assert location.nma_site_date is None -@patch("transfers.util.lexicon_mapper") def test_make_location_with_site_date_later_than_date_created(mock_lexicon_mapper): """Test data anomaly: SiteDate is later than DateCreated (should still be accepted)""" - # Mock lexicon mapper to avoid GCS calls - mock_lexicon_mapper.map_value.return_value = "GPS" - row = pd.Series( { "PointID": "TEST-003", @@ -141,12 +143,8 @@ def test_make_location_with_site_date_later_than_date_created(mock_lexicon_mappe assert location.nma_site_date == datetime.date(2015, 6, 20) -@patch("transfers.util.lexicon_mapper") def test_make_location_with_very_old_site_date(mock_lexicon_mapper): """Test that very old SiteDates (1950s) are preserved correctly""" - # Mock lexicon mapper to avoid GCS calls - mock_lexicon_mapper.map_value.return_value = "GPS" - row = pd.Series( { "PointID": "SM-0227", # Real example from dataset @@ -177,12 +175,8 @@ def test_make_location_with_very_old_site_date(mock_lexicon_mapper): assert time_gap == 19751 # Approximately 54 years -@patch("transfers.util.lexicon_mapper") def test_make_location_legacy_dates_are_date_not_datetime(mock_lexicon_mapper): - """Test that legacy date fields are Date type (not DateTime)""" - # Mock lexicon mapper to avoid GCS calls - mock_lexicon_mapper.map_value.return_value = "GPS" - + """Test that AMPAPI date fields are Date type (not DateTime)""" row = pd.Series( { "PointID": "TEST-004", @@ -216,12 +210,8 @@ def test_make_location_legacy_dates_are_date_not_datetime(mock_lexicon_mapper): assert location.nma_site_date == datetime.date(2002, 12, 10) -@patch("transfers.util.lexicon_mapper") def test_make_location_legacy_dates_independent_of_created_at(mock_lexicon_mapper): - """Test that legacy dates don't affect created_at timestamp""" - # Mock lexicon mapper to avoid GCS calls - mock_lexicon_mapper.map_value.return_value = "GPS" - + """Test that AMPAPI dates don't affect created_at timestamp""" row = pd.Series( { "PointID": "TEST-005", @@ -260,12 +250,8 @@ def test_make_location_legacy_dates_independent_of_created_at(mock_lexicon_mappe # ============================================================================ -@patch("transfers.util.lexicon_mapper") def test_location_legacy_date_coverage_statistics(mock_lexicon_mapper): - """Test that migration preserves expected percentages of legacy dates""" - # Mock lexicon mapper to avoid GCS calls - mock_lexicon_mapper.map_value.return_value = "GPS" - + """Test that migration preserves expected percentages of AMPAPI dates""" # Simulate 100 location records from CSV locations_created = 0 locations_with_site_date = 0 From fd4562a785a2f5bfaba7ddf535bc7d53acce3161 Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 3 Dec 2025 01:12:35 -0800 Subject: [PATCH 25/37] Replace legacy python timestamp call with current implementation --- tests/features/steps/post_migration_legacy_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py index bd7425269..0327d6f0f 100644 --- a/tests/features/steps/post_migration_legacy_data.py +++ b/tests/features/steps/post_migration_legacy_data.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== -from datetime import date, datetime +from datetime import date, datetime, timezone from behave import given, when, then, register_type from behave.runner import Context import parse @@ -394,7 +394,7 @@ def step_then_all_queryable(context: Context): def step_then_created_at_recent(context: Context): """Assert created_at is recent.""" created_at = context.retrieved_location.created_at.replace(tzinfo=None) - now = datetime.utcnow() + now = datetime.now(timezone.utc).replace(tzinfo=None) diff_seconds = abs((now - created_at).total_seconds()) assert diff_seconds < 3600, "created_at should be within last hour" From 5b1a07dd10eb672441a166e7241af82ea29b77d6 Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 3 Dec 2025 01:20:16 -0800 Subject: [PATCH 26/37] Preserve timezone in comparison --- tests/features/steps/post_migration_legacy_data.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py index 0327d6f0f..6e504734e 100644 --- a/tests/features/steps/post_migration_legacy_data.py +++ b/tests/features/steps/post_migration_legacy_data.py @@ -393,8 +393,13 @@ def step_then_all_queryable(context: Context): @then("created_at should be a recent timestamp") def step_then_created_at_recent(context: Context): """Assert created_at is recent.""" - created_at = context.retrieved_location.created_at.replace(tzinfo=None) - now = datetime.now(timezone.utc).replace(tzinfo=None) + created_at = context.retrieved_location.created_at + now = datetime.now(timezone.utc) + + # Ensure both datetimes are timezone-aware for accurate comparison + if created_at.tzinfo is None: + created_at = created_at.replace(tzinfo=timezone.utc) + diff_seconds = abs((now - created_at).total_seconds()) assert diff_seconds < 3600, "created_at should be within last hour" From b92a9864a38bff51a03a4d0c8500ef81b9161f2e Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 3 Dec 2025 01:23:16 -0800 Subject: [PATCH 27/37] Make features more human-readable --- .../post-migration-legacy-data-retrieval.feature | 12 ++++++------ tests/features/steps/post_migration_legacy_data.py | 14 +++++++------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/features/post-migration-legacy-data-retrieval.feature b/tests/features/post-migration-legacy-data-retrieval.feature index 99fd08190..13b2b347d 100644 --- a/tests/features/post-migration-legacy-data-retrieval.feature +++ b/tests/features/post-migration-legacy-data-retrieval.feature @@ -33,16 +33,16 @@ Feature: Post-Migration AMPAPI Date Field Retrieval Scenario: List all locations includes AMPAPI date fields Given 5 locations exist with various AMPAPI dates When I GET /location to list all locations - Then each location should have a nma_date_created field - And each location should have a nma_site_date field - And some locations should have null nma_site_date + Then each location should have a date created field + And each location should have a site date field + And some locations should have null site date Scenario: Filter locations by AMPAPI site date range Given locations exist with nma_site_date ranging from 1950 to 2024 When I filter locations where nma_site_date is between "2000-01-01" and "2010-12-31" - Then the response should only include locations with nma_site_date in that decade - And locations with nma_site_date before 2000 should not be included - And locations with nma_site_date after 2010 should not be included + Then the response should only include locations with site date in that decade + And locations with site date before 2000 should not be included + And locations with site date after 2010 should not be included Scenario: Query location by nma_date_created Given 3 locations exist with nma_date_created "2014-04-03" diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py index 6e504734e..bf6e8b443 100644 --- a/tests/features/steps/post_migration_legacy_data.py +++ b/tests/features/steps/post_migration_legacy_data.py @@ -270,28 +270,28 @@ def step_then_time_gap_years(context: Context, years: str): ), f"Expected ~{expected_years} year gap, got {gap_years:.1f} years" -@then("each location should have a nma_date_created field") +@then("each location should have a date created field") def step_then_all_have_legacy_field(context: Context): - """Assert all locations have the field.""" + """Assert all locations have the date created field.""" items = context.locations_response.get("items", []) for item in items: assert "nma_date_created" in item, f"Location missing nma_date_created" -@then("each location should have a nma_site_date field") +@then("each location should have a site date field") def step_then_all_have_site_date_field(context: Context): - """Assert all locations have the field.""" + """Assert all locations have the site date field.""" items = context.locations_response.get("items", []) for item in items: assert "nma_site_date" in item, f"Location missing nma_site_date" -@then("some locations should have null nma_site_date") +@then("some locations should have null site date") def step_then_some_null_site_date(context: Context): - """Assert some locations have null.""" + """Assert some locations have null site date.""" items = context.locations_response.get("items", []) null_count = sum(1 for item in items if item.get("nma_site_date") is None) - assert null_count > 0, "Expected at least one location with null nma_site_date" + assert null_count > 0, "Expected at least one location with null site date" @then("the response should only include locations with site date in that decade") From 94addc7315ee69b35703a1068c8d37f96d328121 Mon Sep 17 00:00:00 2001 From: kbighorse Date: Wed, 3 Dec 2025 09:26:00 +0000 Subject: [PATCH 28/37] Formatting changes --- tests/test_transfer_legacy_dates.py | 32 +++++++++++++++-------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py index c298b129e..e2b4ca0f2 100644 --- a/tests/test_transfer_legacy_dates.py +++ b/tests/test_transfer_legacy_dates.py @@ -255,21 +255,23 @@ def test_location_legacy_date_coverage_statistics(mock_lexicon_mapper): def create_test_row(i, has_site_date): """Helper to create test row with common fields""" - return pd.Series({ - "PointID": f"TEST-{i:03d}", - "Easting": 350000 + i, - "Northing": 3880000 + i, - "DateCreated": "2014-04-03 00:00:00.000", - "SiteDate": "2002-12-10 00:00:00.000" if has_site_date else None, - "Altitude": 1558.8, - "AltDatum": "NAVD88", - "AltitudeMethod": "GPS", - "LocationId": i, - "PublicRelease": True, - "CoordinateNotes": None, - "LocationNotes": None, - "AltitudeAccuracy": None, - }) + return pd.Series( + { + "PointID": f"TEST-{i:03d}", + "Easting": 350000 + i, + "Northing": 3880000 + i, + "DateCreated": "2014-04-03 00:00:00.000", + "SiteDate": "2002-12-10 00:00:00.000" if has_site_date else None, + "Altitude": 1558.8, + "AltDatum": "NAVD88", + "AltitudeMethod": "GPS", + "LocationId": i, + "PublicRelease": True, + "CoordinateNotes": None, + "LocationNotes": None, + "AltitudeAccuracy": None, + } + ) # Simulate 100 location records from CSV (9% with SiteDate, 91% without) locations_created = 0 From 0b4d77d181052170de06ea251dc6881f2a797f0f Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 3 Dec 2025 01:26:07 -0800 Subject: [PATCH 29/37] Simulate CSV rows more effiiently --- tests/test_transfer_legacy_dates.py | 62 ++++++++++------------------- 1 file changed, 22 insertions(+), 40 deletions(-) diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py index badaec8b2..c298b129e 100644 --- a/tests/test_transfer_legacy_dates.py +++ b/tests/test_transfer_legacy_dates.py @@ -252,55 +252,37 @@ def test_make_location_legacy_dates_independent_of_created_at(mock_lexicon_mappe def test_location_legacy_date_coverage_statistics(mock_lexicon_mapper): """Test that migration preserves expected percentages of AMPAPI dates""" - # Simulate 100 location records from CSV + + def create_test_row(i, has_site_date): + """Helper to create test row with common fields""" + return pd.Series({ + "PointID": f"TEST-{i:03d}", + "Easting": 350000 + i, + "Northing": 3880000 + i, + "DateCreated": "2014-04-03 00:00:00.000", + "SiteDate": "2002-12-10 00:00:00.000" if has_site_date else None, + "Altitude": 1558.8, + "AltDatum": "NAVD88", + "AltitudeMethod": "GPS", + "LocationId": i, + "PublicRelease": True, + "CoordinateNotes": None, + "LocationNotes": None, + "AltitudeAccuracy": None, + }) + + # Simulate 100 location records from CSV (9% with SiteDate, 91% without) locations_created = 0 locations_with_site_date = 0 + elevations = {} for i in range(100): - if i < 9: # 9% have SiteDate - row = pd.Series( - { - "PointID": f"TEST-{i:03d}", - "Easting": 350000 + i, - "Northing": 3880000 + i, - "DateCreated": "2014-04-03 00:00:00.000", - "SiteDate": "2002-12-10 00:00:00.000", - "Altitude": 1558.8, - "AltDatum": "NAVD88", - "AltitudeMethod": "GPS", - "LocationId": i, - "PublicRelease": True, - "CoordinateNotes": None, - "LocationNotes": None, - "AltitudeAccuracy": None, - } - ) - else: # 91% don't have SiteDate - row = pd.Series( - { - "PointID": f"TEST-{i:03d}", - "Easting": 350000 + i, - "Northing": 3880000 + i, - "DateCreated": "2014-04-03 00:00:00.000", - "SiteDate": None, - "Altitude": 1558.8, - "AltDatum": "NAVD88", - "AltitudeMethod": "GPS", - "LocationId": i, - "PublicRelease": True, - "CoordinateNotes": None, - "LocationNotes": None, - "AltitudeAccuracy": None, - } - ) - - elevations = {} + row = create_test_row(i, has_site_date=(i < 9)) location, _ = make_location(row, elevations) # Count coverage if location.nma_date_created is not None: locations_created += 1 - if location.nma_site_date is not None: locations_with_site_date += 1 From 2d12844f305091758277b45a49976170397c06e3 Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 3 Dec 2025 01:39:05 -0800 Subject: [PATCH 30/37] Replace `legacy_` in method names --- .../steps/post_migration_legacy_data.py | 26 +++++++++---------- tests/test_location.py | 10 +++---- tests/test_transfer_legacy_dates.py | 8 +++--- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py index bf6e8b443..5850bf04e 100644 --- a/tests/features/steps/post_migration_legacy_data.py +++ b/tests/features/steps/post_migration_legacy_data.py @@ -91,9 +91,9 @@ def step_given_multiple_locations(context: Context, count: int): ] for i in range(min(count, len(test_data))): - legacy_date, site_date = test_data[i] + created_date, site_date = test_data[i] location = create_test_location( - nma_date_created=date.fromisoformat(legacy_date), + nma_date_created=date.fromisoformat(created_date), nma_site_date=(date.fromisoformat(site_date) if site_date else None), ) context.test_locations.append(location) @@ -203,7 +203,7 @@ def step_when_filter_locations(context: Context, start_date: str, end_date: str) @when('I query for locations with nma_date_created "{target_date}"') -def step_when_query_by_legacy_date(context: Context, target_date: str): +def step_when_query_by_ampapi_date(context: Context, target_date: str): """Query locations by nma_date_created.""" with session_ctx() as session: target = date.fromisoformat(target_date) @@ -251,16 +251,16 @@ def step_then_nma_site_date(context: Context, expected_date: str): @then("the time gap should be approximately {years} years") def step_then_time_gap_years(context: Context, years: str): """Assert approximate year gap.""" - legacy_str = context.location_response.get("nma_date_created") + created_str = context.location_response.get("nma_date_created") site_date_str = context.location_response.get("nma_site_date") - if not legacy_str or not site_date_str: + if not created_str or not site_date_str: raise AssertionError("Missing date fields for gap calculation") - legacy_date = date.fromisoformat(legacy_str) + created_date = date.fromisoformat(created_str) site_date = date.fromisoformat(site_date_str) - gap_days = (legacy_date - site_date).days + gap_days = (created_date - site_date).days gap_years = gap_days / 365.25 expected_years = float(years) @@ -271,7 +271,7 @@ def step_then_time_gap_years(context: Context, years: str): @then("each location should have a date created field") -def step_then_all_have_legacy_field(context: Context): +def step_then_all_have_date_created_field(context: Context): """Assert all locations have the date created field.""" items = context.locations_response.get("items", []) for item in items: @@ -371,7 +371,7 @@ def step_then_has_created_at(context: Context): @then("it should have nma_date_created (original AMPAPI DateCreated)") -def step_then_has_legacy_date(context: Context): +def step_then_has_ampapi_date_created(context: Context): """Assert nma_date_created exists.""" assert context.retrieved_location.nma_date_created is not None @@ -405,14 +405,14 @@ def step_then_created_at_recent(context: Context): @then("nma_date_created should be an older date") -def step_then_legacy_date_older(context: Context): +def step_then_ampapi_date_older(context: Context): """Assert nma_date_created is old.""" - legacy_date = context.retrieved_location.nma_date_created - assert legacy_date.year < 2024, "nma_date_created should be from the past" + ampapi_created_date = context.retrieved_location.nma_date_created + assert ampapi_created_date.year < 2024, "nma_date_created should be from the past" @then('nma_date_created should be "{expected_date}"') -def step_then_legacy_is(context: Context, expected_date: str): +def step_then_ampapi_created_is(context: Context, expected_date: str): """Assert nma_date_created value.""" actual = context.retrieved_location.nma_date_created expected = date.fromisoformat(expected_date) diff --git a/tests/test_location.py b/tests/test_location.py index 6e143f1eb..9dcb3d098 100644 --- a/tests/test_location.py +++ b/tests/test_location.py @@ -235,11 +235,11 @@ def test_delete_location_404_not_found(second_location): assert data["detail"] == f"Location with ID {bad_location_id} not found." -# ============= Legacy date field tests ======================================= +# ============= AMPAPI date field tests ======================================= -def test_new_location_has_null_legacy_fields(): - """Test that newly created locations have null legacy date fields (legacy fields are migration-only)""" +def test_new_location_has_null_ampapi_fields(): + """Test that newly created locations have null AMPAPI date fields (AMPAPI fields are migration-only)""" payload = { "point": "POINT (-106.607784 35.118924)", "elevation": 1558.8, @@ -260,7 +260,7 @@ def test_new_location_has_null_legacy_fields(): cleanup_post_test(Location, data["id"]) -def test_legacy_fields_present_in_location_response(): +def test_ampapi_fields_present_in_location_response(): """Test that AMPAPI date fields (read-only) are included in location GET response""" # Create a new location (without AMPAPI date fields set - they're read-only) payload = { @@ -287,7 +287,7 @@ def test_legacy_fields_present_in_location_response(): cleanup_post_test(Location, location_id) -def test_legacy_fields_independent_of_created_at(): +def test_ampapi_fields_independent_of_created_at(): """Test that created_at (system timestamp) is separate from AMPAPI date fields (read-only)""" payload = { "point": "POINT (-106.607784 35.118924)", diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py index c298b129e..d700ab470 100644 --- a/tests/test_transfer_legacy_dates.py +++ b/tests/test_transfer_legacy_dates.py @@ -46,7 +46,7 @@ def mock_lexicon_mapper(): # ============================================================================ -def test_make_location_with_both_legacy_dates(mock_lexicon_mapper): +def test_make_location_with_both_ampapi_dates(mock_lexicon_mapper): """Test that make_location populates both nma_date_created and nma_site_date""" # Create a mock CSV row with both DateCreated and SiteDate @@ -175,7 +175,7 @@ def test_make_location_with_very_old_site_date(mock_lexicon_mapper): assert time_gap == 19751 # Approximately 54 years -def test_make_location_legacy_dates_are_date_not_datetime(mock_lexicon_mapper): +def test_make_location_ampapi_dates_are_date_not_datetime(mock_lexicon_mapper): """Test that AMPAPI date fields are Date type (not DateTime)""" row = pd.Series( { @@ -210,7 +210,7 @@ def test_make_location_legacy_dates_are_date_not_datetime(mock_lexicon_mapper): assert location.nma_site_date == datetime.date(2002, 12, 10) -def test_make_location_legacy_dates_independent_of_created_at(mock_lexicon_mapper): +def test_make_location_ampapi_dates_independent_of_created_at(mock_lexicon_mapper): """Test that AMPAPI dates don't affect created_at timestamp""" row = pd.Series( { @@ -250,7 +250,7 @@ def test_make_location_legacy_dates_independent_of_created_at(mock_lexicon_mappe # ============================================================================ -def test_location_legacy_date_coverage_statistics(mock_lexicon_mapper): +def test_location_ampapi_date_coverage_statistics(mock_lexicon_mapper): """Test that migration preserves expected percentages of AMPAPI dates""" def create_test_row(i, has_site_date): From 8c96e72d21fedcc7b939eb63b1e18effb1ac7eda Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 3 Dec 2025 01:46:39 -0800 Subject: [PATCH 31/37] Increase code test coverage --- .coverage | Bin 0 -> 53248 bytes tests/test_transfer_legacy_dates.py | 56 ++++ transfers/util.py,cover | 461 ++++++++++++++++++++++++++++ 3 files changed, 517 insertions(+) create mode 100644 .coverage create mode 100644 transfers/util.py,cover diff --git a/.coverage b/.coverage new file mode 100644 index 0000000000000000000000000000000000000000..5417251745baf9f63193ac63e03d4d4b0edab5c4 GIT binary patch literal 53248 zcmeI)&u$Y(90%}S+t^vh)&|kYiXxQhftrXU#_geMdw@`)3Y8WpX)l#9V|$!z@$Q=4 zH6(|sHB>@E>Kjyj1o{X(L!W>iIrY{{p;CXpon8M4HsVkeNWUxF**`P0GoSfq$JSiG zcG-!P==gyvBk_T;V3?+HUI@c5EP9sdnQjHz$)yMM*BslQwQCt058o-<-x#I*&xU=Y ze8pZb{Zf8!{`Zn=J)HlwAgvZ%zy<*bKmY>&Z-K$JQlYZ6WZwHelFhD)0@+f5el36c z)z;?rme}6BaCuAUbK-PS(6+uVHbvmy6+IP*j?-1b@!C#HMvk{5qFt4S?uV+)6CItS zsg46~#PNBr;j}4Mq;@EYUf{Sg*cZ3d{#qO$xm%?6B7K4qQO=IXA;jq<`l<+2M+M4j zsZi(Qw9{TKuHOHlSg4#hVLniCnPkwS$NWTT=)|ZfmpYVC%MaR|`(_}$)~*WIgdCQz zuE4uQ8ob z^XT_WqH}aFhzH;46smoD$ zC#o|g=Ni?q`kr&UuZESsCPp#W;$nFIn&3iE3ph2OuY7dEG^kaoz3_II-fuNzKl0<_ z233BePA{dOj?Wb;D=X%M+i^o-o<`G&!r_iJ(P)OJXEYtYKD*s;FPUs4+>5$h;dkOL z!?A^~9BR$ueWsRT$bdVV@RNX)(?e}-oLS0)Xm-4=Aca@OK z(DLu7K<=oGy)2Une%GS!PrZa2;jrwr6I9SRTD$C2}Ju7ZTNo^&63KLBz zt;s3FXnM(2GVBNGK=QIdGL11BCDT5VT%qEu7xII7 zgFZjA6Itdnqbz@}_Sf&oE{!HV=>*heX$;mgfZ%{_Nr7e4??-X_<-@pn^4v4ZuTEfe zoZ3^U)i^kH@2h?oIy9~Fv=x4wo)y(y8S*F;Rz=hIyGnX`ZlD;KLb|6DiZ~B1&PwM- zX@vWn#?GB;oTqeh;bDH8I$heoXvcM>*G}duo`!X*H1>1nDZ5=-kfp6^w0?oS#0wY&wC<7beHQrrUhT^-u1mMqRhNBU4T%;zsnJqKUP#Ln zg|&X3QT4FV8=00bZa0SG_<0uX=z1Rwx`H$&iRZg9%_FF*fd{l7P( zVv!37KmY;|fB*y_009U<00Izzz-to7npvZ)zyE)3*w0_nU63pUAOHafKmY;|fB*y_ z009U<00M_9kj<7Y{r&%A!+w0IS;5UA009U<00Izz00bZa0SG_<0uY!jaLzntSp&NM z_=)xR%Dc3)aMAeDSjZX1oI!^d+Bb~zOHcl(7RJ*MqIX;$=k)jg&kXz7cmV7}00Izz z00bZa0SG_<0uX=z1R!uk1adjciogFi7LG`XPz(q_00Izz00bZa0SG_<0uX=z1f~k` z_y73(Ka~b2ApijgKmY;|fB*y_009U<00Kuw0PFuBnW{uVAOHafKmY;|fB*y_009U< M00Pql;`RUk1r+UXZU6uP literal 0 HcmV?d00001 diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py index 5129b7d9c..985214fbb 100644 --- a/tests/test_transfer_legacy_dates.py +++ b/tests/test_transfer_legacy_dates.py @@ -250,6 +250,62 @@ def test_make_location_ampapi_dates_independent_of_created_at(mock_lexicon_mappe # ============================================================================ +def test_make_location_with_no_ampapi_dates(mock_lexicon_mapper): + """Test that make_location handles locations with no AMPAPI dates (both null)""" + row = pd.Series( + { + "PointID": "TEST-NODATES", + "Easting": 350000, + "Northing": 3880000, + "DateCreated": None, # No DateCreated + "SiteDate": None, # No SiteDate + "Altitude": 1558.8, + "AltDatum": "NAVD88", + "AltitudeMethod": "GPS", + "LocationId": 999, + "PublicRelease": True, + "CoordinateNotes": None, + "LocationNotes": None, + "AltitudeAccuracy": None, + } + ) + + elevations = {} + location, elevation_method = make_location(row, elevations) + + # Both AMPAPI date fields should be null + assert location.nma_date_created is None + assert location.nma_site_date is None + + +def test_make_location_with_empty_string_dates(mock_lexicon_mapper): + """Test that make_location handles empty string dates (CSV edge case)""" + row = pd.Series( + { + "PointID": "TEST-EMPTY", + "Easting": 350000, + "Northing": 3880000, + "DateCreated": "", # Empty string + "SiteDate": "", # Empty string + "Altitude": 1558.8, + "AltDatum": "NAVD88", + "AltitudeMethod": "GPS", + "LocationId": 998, + "PublicRelease": True, + "CoordinateNotes": None, + "LocationNotes": None, + "AltitudeAccuracy": None, + } + ) + + elevations = {} + location, elevation_method = make_location(row, elevations) + + # Both AMPAPI date fields should be null (empty strings are falsy) + assert location.nma_date_created is None + assert location.nma_site_date is None + + def test_location_ampapi_date_coverage_statistics(mock_lexicon_mapper): """Test that migration preserves expected percentages of AMPAPI dates""" diff --git a/transfers/util.py,cover b/transfers/util.py,cover new file mode 100644 index 000000000..5c2803392 --- /dev/null +++ b/transfers/util.py,cover @@ -0,0 +1,461 @@ + # =============================================================================== + # Copyright 2025 ross + # + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + # =============================================================================== +> import csv +> import io +> import os +> import re +> from datetime import datetime, timezone, timedelta +> from pathlib import Path + +> import numpy as np +> import pandas as pd +> import pytz +> from shapely import Point +> from sqlalchemy import select +> from sqlalchemy.orm import Session + +> from constants import SRID_WGS84, SRID_UTM_ZONE_13N +> from db import Thing, Location, DataProvenance +> from services.gcs_helper import get_storage_bucket + + # from services.lexicon_mapper import lexicon_mapper +> from services.util import ( +> transform_srid, +> get_epqs_elevation_from_point, +> convert_ft_to_m, +> convert_ngvd29_to_navd88, +> ) +> from transfers.logger import logger + +> NMA_COORDINATE_ACCURACY = { +> "5m": (5, "m"), +> "1": (0.1, "second"), +> "5": (0.5, "second"), +> "F": (5, "second"), +> "H": (0.01, "second"), +> "M": (1, "minute"), +> "R": (3, "second"), +> "S": (1, "second"), +> "T": (10, "second"), +> } + + +> def replace_nans(df: pd.DataFrame, default=None) -> pd.DataFrame: +! df = df.replace(pd.NA, default) +! return df.replace({np.nan: default}) + + +> def read_csv(name: str, dtype: dict | None = None) -> pd.DataFrame: + # Try to read from local data directory first +! local_file = Path(__file__).parent / "data" / f"{name}.csv" + +! if local_file.exists(): +! logger.info(f"Reading {name} from local file: {local_file}") +! if dtype: +! return pd.read_csv(local_file, dtype=dtype) +! else: +! return pd.read_csv(local_file) + + # Check cache directory +! p = get_transfers_data_path(Path("nma_csv_cache") / f"{name}.csv") +! if os.path.exists(p): +! logger.info(f"Reading {name} from cache: {p}") +! return pd.read_csv(p, dtype=dtype) + + # Fall back to GCS if local file doesn't exist +! logger.info(f"Local file and cache not found, reading {name} from GCS") +! bucket = get_storage_bucket() +! blob = bucket.blob(f"nma_csv/{name}.csv") +! data = blob.download_as_bytes() +! with open(p, "wb") as f: +! f.write(data) + +! if dtype: +! return pd.read_csv(io.BytesIO(data), dtype=dtype) +! else: +! return pd.read_csv(io.BytesIO(data)) + + +> def get_valid_point_ids(session, thing_type="water well"): +! things = get_valid_things(session, thing_type) +! valid_pointids = [thing.name for thing in things] +! return valid_pointids + + +> def get_valid_things(session, thing_type="water well"): +! return session.query(Thing).where(Thing.thing_type == thing_type).all() + + +> def extract_organization(alternate_id: str) -> str: +! if alternate_id.startswith("TWDB"): +! return "TWDB" +! elif alternate_id.startswith("NMED"): +! return "NMED" + + # TODO: There are a bunch of other formats used for AlternateSiteID. + # we should try to handle as many as possible but its not the end of the world + # if we have to update the organization for a particular alternate id at a later time +! for regex, org in ((r"^A-Z{1,2}-\d{5,6}$", "NMOSE"), (r"\d+(\.\d+){3,}", "PLSS")): + +! if re.match(regex, alternate_id): +! return org + +! return "Unknown" + + +> def get_transfers_data_path(name): +! def data_path(r): +! return Path(r) / "transfers" / "data" + +! root = data_path("/workspace") +! if not os.path.exists(root): +! root = data_path("..") +! if not os.path.exists(root): +! root = data_path(".") + +! return root / name + + +> def filter_non_transferred_wells(sess: Session, df: pd.DataFrame) -> pd.DataFrame: +! sql = select(Thing.name).where(Thing.thing_type == "water well") +! existing_ids = sess.execute(sql).scalars().all() +! return df[~(df["PointID"].isin(existing_ids))] + + +> def filter_by_welldata_datasource_and_project(df: pd.DataFrame) -> pd.DataFrame: +! path = get_transfers_data_path("valid_welldata_datasources.csv") +! with open(path, "r") as f: +! reader = csv.reader(f) +! _ = next(reader) +! valid_datasources = [row[0] for row in reader if row[1] == "Yes"] +! f.seek(0) +! invalid_datasources = [row[0] for row in reader if row[1] == "NO"] +! logger.info("Invalid WellData Datasources:") +! for vd in invalid_datasources: +! logger.info(f" {vd}") + +! counts = df.groupby("DataSource").size().reset_index(name="WellCount") +! counts = counts.sort_values("WellCount", ascending=False) +! for count in counts.itertuples(): +! logger.info(f"{count.DataSource}: {count.WellCount}") + +! pldf = read_csv("ProjectLocations") +! collabnet = pldf[pldf["ProjectName"] == "Water Level Network"] +! return df[ +! df["DataSource"].isin(valid_datasources) +! | df["PointID"].isin(collabnet["PointID"]) +! ] + + +> def filter_by_valid_measuring_agency(df: pd.DataFrame) -> pd.DataFrame: +! path = get_transfers_data_path("valid_measuring_agency.csv") + +! with open(path, "r") as f: +! reader = csv.reader(f) +! _ = next(reader) +! valid_measuring_agencies = [row[0] for row in reader if row[1] == "Yes"] +! logger.info("Valid Measuring Agencies:") +! for vma in valid_measuring_agencies: +! logger.info(f" {vma}") +! return df[df["MeasuringAgency"].isin(valid_measuring_agencies)] + + +> def filter_to_valid_point_ids(session: Session, df: pd.DataFrame) -> pd.DataFrame: +! valid_point_ids = get_valid_point_ids(session) +! return df[df["PointID"].isin(valid_point_ids)] + + +> def convert_mt_to_utc(dt_record: datetime): +! t = dt_record.time() +! if t.hour == 0 and t.minute == 0: + # no time was measured, so just set the timezone to UTC and keep + # time at 00:00 +! dt_record = dt_record.replace(tzinfo=timezone.utc) +! else: +! tz = pytz.timezone("America/Denver") +! dt_record = tz.localize(dt_record) +! if dt_record.dst() == timedelta(0): + # MST +! utc_offset = 7 +! else: + # MDT +! utc_offset = 6 +! dt_record = dt_record - timedelta(hours=utc_offset) +! dt_record = dt_record.replace(tzinfo=timezone.utc) +! return dt_record + + +> def chunk_by_size(df, chunk_size): +! for i in range(0, len(df), chunk_size): +! yield df.iloc[i : i + chunk_size] + + +> def make_location(row: pd.Series, elevations: dict) -> tuple: +> """ +> Returns a tuple of location data and the elevation method +> """ +> point = Point(row.Easting, row.Northing) + + # Convert the point to a WGS84 coordinate system +> transformed_point = transform_srid( +> point, source_srid=SRID_UTM_ZONE_13N, target_srid=SRID_WGS84 +> ) + +> z = row.Altitude +> if z: +> elevation_from_epqs = False +> z = convert_ft_to_m(z) + +> if row.AltDatum == "NGVD29": +! key = f"{row.PointID}, {transformed_point.x, transformed_point.y}" +! if key in elevations: +! z = elevations[key] +! else: +! z = convert_ngvd29_to_navd88( +! z, transformed_point.x, transformed_point.y +! ) +! elevations[key] = z +! else: +! elevation_from_epqs = True +! logger.info( +! f"Location {row.PointID} has no Altitude. Setting from National Map EPQS for " +! ) +! z = get_epqs_elevation_from_point(transformed_point.x, transformed_point.y) + +> if elevation_from_epqs: +! elevation_method = "USGS National Elevation Dataset (NED)" +> elif pd.isna(row.AltitudeMethod): +! elevation_method = None +> else: +> elevation_method = lexicon_mapper.map_value( +> f"LU_AltitudeMethod:{row.AltitudeMethod.strip()}" +> ) + + # Extract AMPAPI date fields (Date type, not DateTime) +> nma_date_created = None +> if row.DateCreated: +> nma_date_created = datetime.strptime( +> row.DateCreated, "%Y-%m-%d %H:%M:%S.%f" +> ).date() + +> nma_site_date = None +> if row.SiteDate: +> nma_site_date = datetime.strptime(row.SiteDate, "%Y-%m-%d %H:%M:%S.%f").date() + +> location = Location( +> nma_pk_location=row.LocationId, +> point=transformed_point.wkt, +> elevation=z, +> release_status="public" if row.PublicRelease else "private", +> nma_coordinate_notes=row.CoordinateNotes, +> nma_notes_location=row.LocationNotes, +> nma_date_created=nma_date_created, +> nma_site_date=nma_site_date, +> ) + +> return location, elevation_method + + +> def make_location_data_provenance( +> row: pd.Series, location: Location, elevation_method: str | None +> ) -> list[DataProvenance]: +! provenance_records = [] + +! if row.AltitudeAccuracy or row.CoordinateAccuracy: +! provenance = DataProvenance( +! target_id=location.id, +! target_table="location", +! field_name="elevation", +! origin_source=None, +! collection_method=elevation_method, +! accuracy_value=( +! None +! if pd.isna(row.AltitudeAccuracy) +! else convert_ft_to_m(row.AltitudeAccuracy) +! ), +! accuracy_unit="m", +! ) +! provenance_records.append(provenance) + + # TODO: AMP feedback is required for transfering coordinate accuracy values + # from NM_Aquifer to Ocotillo + # if row.CoordinateAccuracy == "U" or pd.isna(row.CoordinateAccuracy): + # # map "Unknown" to None + # row.CoordinateAccuracy = None + # elif row.CoordinateAccuracy == "5m": + # row.CoordinateAccuracy = 5.0 + # else: + # seconds = 0 + # minutes = 0 + # if row.CoordinateAccuracy == "1": + # seconds = 0.1 + # elif row.CoordinateAccuracy == "5": + # seconds = 0.5 + # elif row.CoordinateAccuracy == "F": + # seconds = 5 + # elif row.CoordinateAccuracy == "H": + # seconds = 0.01 + # elif row.CoordinateAccuracy == "M": + # minutes = 1 + # elif row.CoordinateAccuracy == "R": + # seconds = 3 + # elif row.CoordinateAccuracy == "S": + # seconds = 1 + # else: + # seconds = 10 + # coordinate_accuracy_decimal_deg = minutes/60 + seconds / 3600 + + # """ + # Developer's notes + + # To convert accuracy from decimal degrees to meters we do the following: + + # 1. Add the coordinate accuracy to both the latitude and longitude to + # find the "+" distance from the location + # 2. Convert "+" accuracy coordinates from decimal degrees to UTM Zone 13 + # N + # 3. Find the distance in meters from the original Easting/Northing and + # define this as the "+" accuracy in meters + # 4. Subtract the coordinate accuracy to both the latitude and longitude + # to find the "-" distance from the location + # 5. Convert the "-" accuracy coordinates from decimal degrees to UTM Zone + # 13 N + # 6. Find the distance in meters from the original Easting/Northing and + # define this as the "-" accuracy in meters + # 7. Set the coordinate accuracy in meters as the mean of the "+" and "-" + # distances from the location + # """ + # original_longitude = transformed_point.x + # original_latitude = transformed_point.y + + # plus_longitude = original_longitude + coordinate_accuracy_decimal_deg + # plus_latitude = original_latitude + coordinate_accuracy_decimal_deg + # plus_point_decimal_deg = Point(plus_longitude, plus_latitude) + # plus_point_utm_zone_13_n = transform_srid( + # plus_point_decimal_deg, + # SRID_WGS84, + # SRID_UTM_ZONE_13N) + + # minus_longitude = original_longitude - coordinate_accuracy_decimal_deg + # minus_latitude = original_latitude - coordinate_accuracy_decimal_deg + # minus_point_decimal_deg = Point(minus_longitude, minus_latitude) + +! if row.CoordinateMethod or row.CoordinateAccuracy: +! coordinate_method = ( +! lexicon_mapper.map_value(f"LU_CoordinateMethod:{row.CoordinateMethod}") +! if not pd.isna(row.CoordinateMethod) +! else None +! ) + +! accuracy_value, accuracy_unit = NMA_COORDINATE_ACCURACY.get( +! row.CoordinateAccuracy, (None, None) +! ) + +! provenance = DataProvenance( +! target_id=location.id, +! target_table="location", +! field_name="point", +! origin_source=None, +! collection_method=coordinate_method, +! accuracy_value=accuracy_value, +! accuracy_unit=accuracy_unit, +! ) +! provenance_records.append(provenance) + +! return provenance_records + + +> def timeit_direct(func, *args, **kwargs): +! start = datetime.now() +! result = func(*args, **kwargs) +! end = datetime.now() +! logger.info(f"TIMING: {func.__name__} took {(end - start).total_seconds()} seconds") +! return result + + +> def timeit(func): +! def wrapper(*args, **kwargs): +! return timeit_direct(func, *args, **kwargs) + +! return wrapper + + +> class LexiconMapper: +> def __init__(self): +> self._mappers = None + +> def map_value(self, value): +! value = value.strip() +! return self._make_lu_to_lexicon_mapper().get(value, value) + +> def _make_lu_to_lexicon_mapper(self): +! if self._mappers: +! return self._mappers + + # Lookup tables where CODE maps to MEANING +! lu_tables = [ +! "LU_AltitudeMethod", +! "LU_CollectionMethod", +! "LU_ConstructionMethod", +! "LU_CoordinateAccuracy", +! "LU_CoordinateMethod", +! "LU_CurrentUse", +! "LU_DataQuality", +! "LU_DataSource", +! "LU_Depth_CompletionSource", +! "LU_Discharge_ChemistrySource", +! "LU_LevelStatus", +! "LU_MajorAnalyte", +! "LU_MeasurementMethod", +! "LU_MinorTraceAnalyte", +! "LU_MonitoringStatus", +! "LU_SampleType", +! "LU_SiteType", +! "LU_Status", +! ] + + # Lookup tables intentionally skipped (kept for documentation only) + # Each entry explains why the table is excluded +! _lu_tables_skipped = { +! "LU_AltitudeDatum": "code is the value, so no need for mapping", +! "LU_CoordinateDatum": "code is the value, so no need for mapping", +! "LU_FieldNoteTypes": "not being used in the transfers since there are no records", +! "LU_Formations": "needs to be cleaned before it can be used", +! "LU_Lithology": "needs to be cleaned before it can be used", +! "LU_MeasuringAgency": "the abbreviation is what is used in the new schema", +! } +! mappers = {} + +! for lu_table in lu_tables: +! table = read_csv(lu_table) + +! for i, row in table.iterrows(): +! if lu_table == "LU_Formations": +! code = row.Code +! meaning = row.Meaning +! else: +! code = row.CODE +! meaning = row.MEANING + +! mappers.update({f"{lu_table}:{code}": meaning}) +! self._mappers = mappers +! return mappers + + +> lexicon_mapper = LexiconMapper() + + + # ============= EOF ============================================= From 48f503d1afbab2579e447522001e66ab9fcc5543 Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 3 Dec 2025 01:49:34 -0800 Subject: [PATCH 32/37] Enforce timezone info on `created_at` --- tests/features/steps/post_migration_legacy_data.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py index 5850bf04e..3baa7f5f3 100644 --- a/tests/features/steps/post_migration_legacy_data.py +++ b/tests/features/steps/post_migration_legacy_data.py @@ -396,9 +396,14 @@ def step_then_created_at_recent(context: Context): created_at = context.retrieved_location.created_at now = datetime.now(timezone.utc) - # Ensure both datetimes are timezone-aware for accurate comparison + # created_at should always be timezone-aware (configured in AutoBaseMixin with DateTime(timezone=True)) + # If it's naive, this indicates a database/ORM configuration issue if created_at.tzinfo is None: - created_at = created_at.replace(tzinfo=timezone.utc) + raise AssertionError( + "created_at is a naive datetime (no timezone info). " + "Ensure the database and ORM are configured to return timezone-aware datetimes in UTC. " + "AutoBaseMixin.created_at uses DateTime(timezone=True) with server_default=func.timezone('UTC', func.now())" + ) diff_seconds = abs((now - created_at).total_seconds()) assert diff_seconds < 3600, "created_at should be within last hour" From 43a8c5f5649ccd7b42ba826c4da8e150ffbd51c2 Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 3 Dec 2025 01:51:21 -0800 Subject: [PATCH 33/37] Ignore test coverage artifacts --- .gitignore | 7 + transfers/util.py,cover | 461 ---------------------------------------- 2 files changed, 7 insertions(+), 461 deletions(-) delete mode 100644 transfers/util.py,cover diff --git a/.gitignore b/.gitignore index 44b28e13c..4bf6245e0 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,13 @@ dist/ wheels/ *.egg-info +# Test coverage reports +*.cover +.coverage +.coverage.* +htmlcov/ +coverage.xml + # Virtual environments .venv requirements.txt diff --git a/transfers/util.py,cover b/transfers/util.py,cover deleted file mode 100644 index 5c2803392..000000000 --- a/transfers/util.py,cover +++ /dev/null @@ -1,461 +0,0 @@ - # =============================================================================== - # Copyright 2025 ross - # - # Licensed under the Apache License, Version 2.0 (the "License"); - # you may not use this file except in compliance with the License. - # You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - # =============================================================================== -> import csv -> import io -> import os -> import re -> from datetime import datetime, timezone, timedelta -> from pathlib import Path - -> import numpy as np -> import pandas as pd -> import pytz -> from shapely import Point -> from sqlalchemy import select -> from sqlalchemy.orm import Session - -> from constants import SRID_WGS84, SRID_UTM_ZONE_13N -> from db import Thing, Location, DataProvenance -> from services.gcs_helper import get_storage_bucket - - # from services.lexicon_mapper import lexicon_mapper -> from services.util import ( -> transform_srid, -> get_epqs_elevation_from_point, -> convert_ft_to_m, -> convert_ngvd29_to_navd88, -> ) -> from transfers.logger import logger - -> NMA_COORDINATE_ACCURACY = { -> "5m": (5, "m"), -> "1": (0.1, "second"), -> "5": (0.5, "second"), -> "F": (5, "second"), -> "H": (0.01, "second"), -> "M": (1, "minute"), -> "R": (3, "second"), -> "S": (1, "second"), -> "T": (10, "second"), -> } - - -> def replace_nans(df: pd.DataFrame, default=None) -> pd.DataFrame: -! df = df.replace(pd.NA, default) -! return df.replace({np.nan: default}) - - -> def read_csv(name: str, dtype: dict | None = None) -> pd.DataFrame: - # Try to read from local data directory first -! local_file = Path(__file__).parent / "data" / f"{name}.csv" - -! if local_file.exists(): -! logger.info(f"Reading {name} from local file: {local_file}") -! if dtype: -! return pd.read_csv(local_file, dtype=dtype) -! else: -! return pd.read_csv(local_file) - - # Check cache directory -! p = get_transfers_data_path(Path("nma_csv_cache") / f"{name}.csv") -! if os.path.exists(p): -! logger.info(f"Reading {name} from cache: {p}") -! return pd.read_csv(p, dtype=dtype) - - # Fall back to GCS if local file doesn't exist -! logger.info(f"Local file and cache not found, reading {name} from GCS") -! bucket = get_storage_bucket() -! blob = bucket.blob(f"nma_csv/{name}.csv") -! data = blob.download_as_bytes() -! with open(p, "wb") as f: -! f.write(data) - -! if dtype: -! return pd.read_csv(io.BytesIO(data), dtype=dtype) -! else: -! return pd.read_csv(io.BytesIO(data)) - - -> def get_valid_point_ids(session, thing_type="water well"): -! things = get_valid_things(session, thing_type) -! valid_pointids = [thing.name for thing in things] -! return valid_pointids - - -> def get_valid_things(session, thing_type="water well"): -! return session.query(Thing).where(Thing.thing_type == thing_type).all() - - -> def extract_organization(alternate_id: str) -> str: -! if alternate_id.startswith("TWDB"): -! return "TWDB" -! elif alternate_id.startswith("NMED"): -! return "NMED" - - # TODO: There are a bunch of other formats used for AlternateSiteID. - # we should try to handle as many as possible but its not the end of the world - # if we have to update the organization for a particular alternate id at a later time -! for regex, org in ((r"^A-Z{1,2}-\d{5,6}$", "NMOSE"), (r"\d+(\.\d+){3,}", "PLSS")): - -! if re.match(regex, alternate_id): -! return org - -! return "Unknown" - - -> def get_transfers_data_path(name): -! def data_path(r): -! return Path(r) / "transfers" / "data" - -! root = data_path("/workspace") -! if not os.path.exists(root): -! root = data_path("..") -! if not os.path.exists(root): -! root = data_path(".") - -! return root / name - - -> def filter_non_transferred_wells(sess: Session, df: pd.DataFrame) -> pd.DataFrame: -! sql = select(Thing.name).where(Thing.thing_type == "water well") -! existing_ids = sess.execute(sql).scalars().all() -! return df[~(df["PointID"].isin(existing_ids))] - - -> def filter_by_welldata_datasource_and_project(df: pd.DataFrame) -> pd.DataFrame: -! path = get_transfers_data_path("valid_welldata_datasources.csv") -! with open(path, "r") as f: -! reader = csv.reader(f) -! _ = next(reader) -! valid_datasources = [row[0] for row in reader if row[1] == "Yes"] -! f.seek(0) -! invalid_datasources = [row[0] for row in reader if row[1] == "NO"] -! logger.info("Invalid WellData Datasources:") -! for vd in invalid_datasources: -! logger.info(f" {vd}") - -! counts = df.groupby("DataSource").size().reset_index(name="WellCount") -! counts = counts.sort_values("WellCount", ascending=False) -! for count in counts.itertuples(): -! logger.info(f"{count.DataSource}: {count.WellCount}") - -! pldf = read_csv("ProjectLocations") -! collabnet = pldf[pldf["ProjectName"] == "Water Level Network"] -! return df[ -! df["DataSource"].isin(valid_datasources) -! | df["PointID"].isin(collabnet["PointID"]) -! ] - - -> def filter_by_valid_measuring_agency(df: pd.DataFrame) -> pd.DataFrame: -! path = get_transfers_data_path("valid_measuring_agency.csv") - -! with open(path, "r") as f: -! reader = csv.reader(f) -! _ = next(reader) -! valid_measuring_agencies = [row[0] for row in reader if row[1] == "Yes"] -! logger.info("Valid Measuring Agencies:") -! for vma in valid_measuring_agencies: -! logger.info(f" {vma}") -! return df[df["MeasuringAgency"].isin(valid_measuring_agencies)] - - -> def filter_to_valid_point_ids(session: Session, df: pd.DataFrame) -> pd.DataFrame: -! valid_point_ids = get_valid_point_ids(session) -! return df[df["PointID"].isin(valid_point_ids)] - - -> def convert_mt_to_utc(dt_record: datetime): -! t = dt_record.time() -! if t.hour == 0 and t.minute == 0: - # no time was measured, so just set the timezone to UTC and keep - # time at 00:00 -! dt_record = dt_record.replace(tzinfo=timezone.utc) -! else: -! tz = pytz.timezone("America/Denver") -! dt_record = tz.localize(dt_record) -! if dt_record.dst() == timedelta(0): - # MST -! utc_offset = 7 -! else: - # MDT -! utc_offset = 6 -! dt_record = dt_record - timedelta(hours=utc_offset) -! dt_record = dt_record.replace(tzinfo=timezone.utc) -! return dt_record - - -> def chunk_by_size(df, chunk_size): -! for i in range(0, len(df), chunk_size): -! yield df.iloc[i : i + chunk_size] - - -> def make_location(row: pd.Series, elevations: dict) -> tuple: -> """ -> Returns a tuple of location data and the elevation method -> """ -> point = Point(row.Easting, row.Northing) - - # Convert the point to a WGS84 coordinate system -> transformed_point = transform_srid( -> point, source_srid=SRID_UTM_ZONE_13N, target_srid=SRID_WGS84 -> ) - -> z = row.Altitude -> if z: -> elevation_from_epqs = False -> z = convert_ft_to_m(z) - -> if row.AltDatum == "NGVD29": -! key = f"{row.PointID}, {transformed_point.x, transformed_point.y}" -! if key in elevations: -! z = elevations[key] -! else: -! z = convert_ngvd29_to_navd88( -! z, transformed_point.x, transformed_point.y -! ) -! elevations[key] = z -! else: -! elevation_from_epqs = True -! logger.info( -! f"Location {row.PointID} has no Altitude. Setting from National Map EPQS for " -! ) -! z = get_epqs_elevation_from_point(transformed_point.x, transformed_point.y) - -> if elevation_from_epqs: -! elevation_method = "USGS National Elevation Dataset (NED)" -> elif pd.isna(row.AltitudeMethod): -! elevation_method = None -> else: -> elevation_method = lexicon_mapper.map_value( -> f"LU_AltitudeMethod:{row.AltitudeMethod.strip()}" -> ) - - # Extract AMPAPI date fields (Date type, not DateTime) -> nma_date_created = None -> if row.DateCreated: -> nma_date_created = datetime.strptime( -> row.DateCreated, "%Y-%m-%d %H:%M:%S.%f" -> ).date() - -> nma_site_date = None -> if row.SiteDate: -> nma_site_date = datetime.strptime(row.SiteDate, "%Y-%m-%d %H:%M:%S.%f").date() - -> location = Location( -> nma_pk_location=row.LocationId, -> point=transformed_point.wkt, -> elevation=z, -> release_status="public" if row.PublicRelease else "private", -> nma_coordinate_notes=row.CoordinateNotes, -> nma_notes_location=row.LocationNotes, -> nma_date_created=nma_date_created, -> nma_site_date=nma_site_date, -> ) - -> return location, elevation_method - - -> def make_location_data_provenance( -> row: pd.Series, location: Location, elevation_method: str | None -> ) -> list[DataProvenance]: -! provenance_records = [] - -! if row.AltitudeAccuracy or row.CoordinateAccuracy: -! provenance = DataProvenance( -! target_id=location.id, -! target_table="location", -! field_name="elevation", -! origin_source=None, -! collection_method=elevation_method, -! accuracy_value=( -! None -! if pd.isna(row.AltitudeAccuracy) -! else convert_ft_to_m(row.AltitudeAccuracy) -! ), -! accuracy_unit="m", -! ) -! provenance_records.append(provenance) - - # TODO: AMP feedback is required for transfering coordinate accuracy values - # from NM_Aquifer to Ocotillo - # if row.CoordinateAccuracy == "U" or pd.isna(row.CoordinateAccuracy): - # # map "Unknown" to None - # row.CoordinateAccuracy = None - # elif row.CoordinateAccuracy == "5m": - # row.CoordinateAccuracy = 5.0 - # else: - # seconds = 0 - # minutes = 0 - # if row.CoordinateAccuracy == "1": - # seconds = 0.1 - # elif row.CoordinateAccuracy == "5": - # seconds = 0.5 - # elif row.CoordinateAccuracy == "F": - # seconds = 5 - # elif row.CoordinateAccuracy == "H": - # seconds = 0.01 - # elif row.CoordinateAccuracy == "M": - # minutes = 1 - # elif row.CoordinateAccuracy == "R": - # seconds = 3 - # elif row.CoordinateAccuracy == "S": - # seconds = 1 - # else: - # seconds = 10 - # coordinate_accuracy_decimal_deg = minutes/60 + seconds / 3600 - - # """ - # Developer's notes - - # To convert accuracy from decimal degrees to meters we do the following: - - # 1. Add the coordinate accuracy to both the latitude and longitude to - # find the "+" distance from the location - # 2. Convert "+" accuracy coordinates from decimal degrees to UTM Zone 13 - # N - # 3. Find the distance in meters from the original Easting/Northing and - # define this as the "+" accuracy in meters - # 4. Subtract the coordinate accuracy to both the latitude and longitude - # to find the "-" distance from the location - # 5. Convert the "-" accuracy coordinates from decimal degrees to UTM Zone - # 13 N - # 6. Find the distance in meters from the original Easting/Northing and - # define this as the "-" accuracy in meters - # 7. Set the coordinate accuracy in meters as the mean of the "+" and "-" - # distances from the location - # """ - # original_longitude = transformed_point.x - # original_latitude = transformed_point.y - - # plus_longitude = original_longitude + coordinate_accuracy_decimal_deg - # plus_latitude = original_latitude + coordinate_accuracy_decimal_deg - # plus_point_decimal_deg = Point(plus_longitude, plus_latitude) - # plus_point_utm_zone_13_n = transform_srid( - # plus_point_decimal_deg, - # SRID_WGS84, - # SRID_UTM_ZONE_13N) - - # minus_longitude = original_longitude - coordinate_accuracy_decimal_deg - # minus_latitude = original_latitude - coordinate_accuracy_decimal_deg - # minus_point_decimal_deg = Point(minus_longitude, minus_latitude) - -! if row.CoordinateMethod or row.CoordinateAccuracy: -! coordinate_method = ( -! lexicon_mapper.map_value(f"LU_CoordinateMethod:{row.CoordinateMethod}") -! if not pd.isna(row.CoordinateMethod) -! else None -! ) - -! accuracy_value, accuracy_unit = NMA_COORDINATE_ACCURACY.get( -! row.CoordinateAccuracy, (None, None) -! ) - -! provenance = DataProvenance( -! target_id=location.id, -! target_table="location", -! field_name="point", -! origin_source=None, -! collection_method=coordinate_method, -! accuracy_value=accuracy_value, -! accuracy_unit=accuracy_unit, -! ) -! provenance_records.append(provenance) - -! return provenance_records - - -> def timeit_direct(func, *args, **kwargs): -! start = datetime.now() -! result = func(*args, **kwargs) -! end = datetime.now() -! logger.info(f"TIMING: {func.__name__} took {(end - start).total_seconds()} seconds") -! return result - - -> def timeit(func): -! def wrapper(*args, **kwargs): -! return timeit_direct(func, *args, **kwargs) - -! return wrapper - - -> class LexiconMapper: -> def __init__(self): -> self._mappers = None - -> def map_value(self, value): -! value = value.strip() -! return self._make_lu_to_lexicon_mapper().get(value, value) - -> def _make_lu_to_lexicon_mapper(self): -! if self._mappers: -! return self._mappers - - # Lookup tables where CODE maps to MEANING -! lu_tables = [ -! "LU_AltitudeMethod", -! "LU_CollectionMethod", -! "LU_ConstructionMethod", -! "LU_CoordinateAccuracy", -! "LU_CoordinateMethod", -! "LU_CurrentUse", -! "LU_DataQuality", -! "LU_DataSource", -! "LU_Depth_CompletionSource", -! "LU_Discharge_ChemistrySource", -! "LU_LevelStatus", -! "LU_MajorAnalyte", -! "LU_MeasurementMethod", -! "LU_MinorTraceAnalyte", -! "LU_MonitoringStatus", -! "LU_SampleType", -! "LU_SiteType", -! "LU_Status", -! ] - - # Lookup tables intentionally skipped (kept for documentation only) - # Each entry explains why the table is excluded -! _lu_tables_skipped = { -! "LU_AltitudeDatum": "code is the value, so no need for mapping", -! "LU_CoordinateDatum": "code is the value, so no need for mapping", -! "LU_FieldNoteTypes": "not being used in the transfers since there are no records", -! "LU_Formations": "needs to be cleaned before it can be used", -! "LU_Lithology": "needs to be cleaned before it can be used", -! "LU_MeasuringAgency": "the abbreviation is what is used in the new schema", -! } -! mappers = {} - -! for lu_table in lu_tables: -! table = read_csv(lu_table) - -! for i, row in table.iterrows(): -! if lu_table == "LU_Formations": -! code = row.Code -! meaning = row.Meaning -! else: -! code = row.CODE -! meaning = row.MEANING - -! mappers.update({f"{lu_table}:{code}": meaning}) -! self._mappers = mappers -! return mappers - - -> lexicon_mapper = LexiconMapper() - - - # ============= EOF ============================================= From 027299060f94d38819f20412f931060ad6ec372d Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 3 Dec 2025 01:51:29 -0800 Subject: [PATCH 34/37] Delete .coverage --- .coverage | Bin 53248 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .coverage diff --git a/.coverage b/.coverage deleted file mode 100644 index 5417251745baf9f63193ac63e03d4d4b0edab5c4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 53248 zcmeI)&u$Y(90%}S+t^vh)&|kYiXxQhftrXU#_geMdw@`)3Y8WpX)l#9V|$!z@$Q=4 zH6(|sHB>@E>Kjyj1o{X(L!W>iIrY{{p;CXpon8M4HsVkeNWUxF**`P0GoSfq$JSiG zcG-!P==gyvBk_T;V3?+HUI@c5EP9sdnQjHz$)yMM*BslQwQCt058o-<-x#I*&xU=Y ze8pZb{Zf8!{`Zn=J)HlwAgvZ%zy<*bKmY>&Z-K$JQlYZ6WZwHelFhD)0@+f5el36c z)z;?rme}6BaCuAUbK-PS(6+uVHbvmy6+IP*j?-1b@!C#HMvk{5qFt4S?uV+)6CItS zsg46~#PNBr;j}4Mq;@EYUf{Sg*cZ3d{#qO$xm%?6B7K4qQO=IXA;jq<`l<+2M+M4j zsZi(Qw9{TKuHOHlSg4#hVLniCnPkwS$NWTT=)|ZfmpYVC%MaR|`(_}$)~*WIgdCQz zuE4uQ8ob z^XT_WqH}aFhzH;46smoD$ zC#o|g=Ni?q`kr&UuZESsCPp#W;$nFIn&3iE3ph2OuY7dEG^kaoz3_II-fuNzKl0<_ z233BePA{dOj?Wb;D=X%M+i^o-o<`G&!r_iJ(P)OJXEYtYKD*s;FPUs4+>5$h;dkOL z!?A^~9BR$ueWsRT$bdVV@RNX)(?e}-oLS0)Xm-4=Aca@OK z(DLu7K<=oGy)2Une%GS!PrZa2;jrwr6I9SRTD$C2}Ju7ZTNo^&63KLBz zt;s3FXnM(2GVBNGK=QIdGL11BCDT5VT%qEu7xII7 zgFZjA6Itdnqbz@}_Sf&oE{!HV=>*heX$;mgfZ%{_Nr7e4??-X_<-@pn^4v4ZuTEfe zoZ3^U)i^kH@2h?oIy9~Fv=x4wo)y(y8S*F;Rz=hIyGnX`ZlD;KLb|6DiZ~B1&PwM- zX@vWn#?GB;oTqeh;bDH8I$heoXvcM>*G}duo`!X*H1>1nDZ5=-kfp6^w0?oS#0wY&wC<7beHQrrUhT^-u1mMqRhNBU4T%;zsnJqKUP#Ln zg|&X3QT4FV8=00bZa0SG_<0uX=z1Rwx`H$&iRZg9%_FF*fd{l7P( zVv!37KmY;|fB*y_009U<00Izzz-to7npvZ)zyE)3*w0_nU63pUAOHafKmY;|fB*y_ z009U<00M_9kj<7Y{r&%A!+w0IS;5UA009U<00Izz00bZa0SG_<0uY!jaLzntSp&NM z_=)xR%Dc3)aMAeDSjZX1oI!^d+Bb~zOHcl(7RJ*MqIX;$=k)jg&kXz7cmV7}00Izz z00bZa0SG_<0uX=z1R!uk1adjciogFi7LG`XPz(q_00Izz00bZa0SG_<0uX=z1f~k` z_y73(Ka~b2ApijgKmY;|fB*y_009U<00Kuw0PFuBnW{uVAOHafKmY;|fB*y_009U< M00Pql;`RUk1r+UXZU6uP From f0e730c2f06f1b2abce75f43840f482ebbc8e4c8 Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 3 Dec 2025 01:53:56 -0800 Subject: [PATCH 35/37] Remove noisy EOF --- tests/test_thing.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/test_thing.py b/tests/test_thing.py index 3792b4302..28290dada 100644 --- a/tests/test_thing.py +++ b/tests/test_thing.py @@ -1130,6 +1130,3 @@ def test_delete_thing_id_link_404_not_found(second_thing_id_link): assert response.status_code == 404 data = response.json() assert data["detail"] == f"ThingIdLink with ID {bad_id} not found." - - -# ============= EOF ============================================= From 070fcbae2dd849ba386f52a5201452634abdba03 Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 3 Dec 2025 01:58:00 -0800 Subject: [PATCH 36/37] Simplify error message --- tests/features/steps/post_migration_legacy_data.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/features/steps/post_migration_legacy_data.py b/tests/features/steps/post_migration_legacy_data.py index 3baa7f5f3..185b1a758 100644 --- a/tests/features/steps/post_migration_legacy_data.py +++ b/tests/features/steps/post_migration_legacy_data.py @@ -401,8 +401,7 @@ def step_then_created_at_recent(context: Context): if created_at.tzinfo is None: raise AssertionError( "created_at is a naive datetime (no timezone info). " - "Ensure the database and ORM are configured to return timezone-aware datetimes in UTC. " - "AutoBaseMixin.created_at uses DateTime(timezone=True) with server_default=func.timezone('UTC', func.now())" + "Check ORM/database config for timezone-aware UTC datetimes (see AutoBaseMixin.created_at)." ) diff_seconds = abs((now - created_at).total_seconds()) From f3e9587ad96bf88b01340f532a26a48a92347ec1 Mon Sep 17 00:00:00 2001 From: Kimball Bighorse Date: Wed, 3 Dec 2025 01:59:45 -0800 Subject: [PATCH 37/37] Remove unnecessary conditionals --- transfers/util.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/transfers/util.py b/transfers/util.py index 5216c204f..876e142fc 100644 --- a/transfers/util.py +++ b/transfers/util.py @@ -64,10 +64,7 @@ def read_csv(name: str, dtype: dict | None = None) -> pd.DataFrame: if local_file.exists(): logger.info(f"Reading {name} from local file: {local_file}") - if dtype: - return pd.read_csv(local_file, dtype=dtype) - else: - return pd.read_csv(local_file) + return pd.read_csv(local_file, dtype=dtype) # Check cache directory p = get_transfers_data_path(Path("nma_csv_cache") / f"{name}.csv") @@ -83,10 +80,7 @@ def read_csv(name: str, dtype: dict | None = None) -> pd.DataFrame: with open(p, "wb") as f: f.write(data) - if dtype: - return pd.read_csv(io.BytesIO(data), dtype=dtype) - else: - return pd.read_csv(io.BytesIO(data)) + return pd.read_csv(io.BytesIO(data), dtype=dtype) def get_valid_point_ids(session, thing_type="water well"):