From f8b54fcfdc809d36acafb9671fa1769690af273a Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 13 Jan 2025 11:28:56 -0700 Subject: [PATCH 01/53] Update README sources to indicate all names and include links Some sources were listed under ST2 and CKAN. This change explicitly names the individual sources. This will help with development because some organizations plan to add data to CKAN (Santa Fe expressed interest). So by explicitly naming the sources, we can better communicate to users where the data is coming from. Also, each source now has a link to where the data is hosted. --- README.md | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 5a9dd9a..64565e7 100644 --- a/README.md +++ b/README.md @@ -18,20 +18,18 @@ pip install nmuwd ## Sources Data comes from the following sources. We are continuously adding new sources as we learn of them and they become available. If you have data that you would like to be part of the Data Integration Engine please get in touch at newmexicowaterdata@nmt.edu. - - [Bureau of Reclamation](https://data.usbr.gov/) - - [USGS (NWIS)](https://waterdata.usgs.gov/nwis) - - [ST2 (NMWDI)](https://st2.newmexicowaterdata.org/FROST-Server/v1.1/) - - Pecos Valley Artesian Conservancy District - - Bernalillo County - - New Mexico Environment Department Drinking Water Bureau - - [NM Water Data CKAN catalog](https://catalog.newmexicowaterdata.org/) - - OSE Roswell District Office - - ISC Seven Rivers - - [New Mexico Bureau of Geology and Mineral Resources (AMP)](https://waterdata.nmt.edu/) - - [Water Quality Portal](https://www.waterqualitydata.us/) - - USGS - - EPA - - and over 400 state, federal, tribal, and local agencies +- [Bernalillo County (BernCo)](https://st2.newmexicowaterdata.org/FROST-Server/v1.1/Locations?$filter=properties/agency%20eq%20%27BernCo%27) +- [Bureau of Reclamation (BoR)](https://data.usbr.gov/) +- [New Mexico Bureau of Geology and Mineral Resources (AMP)](https://waterdata.nmt.edu/) +- [New Mexico Environment Department Drinking Water Bureau (DWB)](https://nmenv.newmexicowaterdata.org/FROST-Server/v1.1/) +- [New Mexico Office of the State Engineer ISC Seven Rivers (ISC Seven Rivers)](https://nmisc-wf.gladata.com/api/getMonitoringPoints.ashx) +- [New Mexico Office of the State Engineer Roswell District Office (OSE Roswell)](https://catalog.newmexicowaterdata.org/dataset/pecos_region_manual_groundwater_levels) +- [Pecos Valley Artesian Conservancy District (PVACD)](https://st2.newmexicowaterdata.org/FROST-Server/v1.1/Locations?$filter=properties/agency%20eq%20%27PVACD%27) +- [USGS (NWIS)](https://waterdata.usgs.gov/nwis) +- [Water Quality Portal (WQP)](https://www.waterqualitydata.us/) + - USGS + - EPA + - and over 400 state, federal, tribal, and local agencies ### Source Inclusion & Exclusion From debb02e9c3cba2ee11c6d18c4e77cc9b9aedaaf8 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 13 Jan 2025 11:30:42 -0700 Subject: [PATCH 02/53] Separate analyte and water level sources to differentiate where data comes from in config report There are some sources that only report water levels or analyte measurements. Prior to this update, for example, if the user requested water level data, the sources that only reported analyte measurements would say that data was being pulled from them. Now, the sources that only report water level or analyte measurements are separated so that the user can see where the data is coming from. --- backend/config.py | 24 +++++------ frontend/cli.py | 103 ++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 96 insertions(+), 31 deletions(-) diff --git a/backend/config.py b/backend/config.py index 69af728..33f1d0b 100644 --- a/backend/config.py +++ b/backend/config.py @@ -106,15 +106,15 @@ class Config(Loggable): wkt: str = "" # sources - use_source_nmbgmr: bool = True - use_source_wqp: bool = True - use_source_iscsevenrivers: bool = True - use_source_nwis: bool = True - use_source_oseroswell: bool = True - use_source_pvacd: bool = True - use_source_bor: bool = True - use_source_dwb: bool = True - use_source_bernco: bool = True + use_source_nmbgmr: bool = False + use_source_wqp: bool = False + use_source_iscsevenrivers: bool = False + use_source_nwis: bool = False + use_source_oseroswell: bool = False + use_source_pvacd: bool = False + use_source_bor: bool = False + use_source_dwb: bool = False + use_source_bernco: bool = False analyte: str = "" @@ -171,8 +171,6 @@ def __init__(self, model=None, payload=None): def analyte_sources(self): sources = [] - # if self.use_source_wqp: - # sources.append((WQPSiteSource, WQPAnalyteSource)) if self.use_source_bor: sources.append((BORSiteSource(), BORAnalyteSource())) if self.use_source_wqp: @@ -227,11 +225,9 @@ def water_level_sources(self): ) if self.use_source_pvacd: sources.append((PVACDSiteSource(), PVACDWaterLevelSource())) - # sources.append((EBIDSiteSource, EBIDWaterLevelSource)) if self.use_source_bernco: sources.append((BernCoSiteSource(), BernCoWaterLevelSource())) - # if self.use_source_bor: - # sources.append((BORSiteSource(), BORWaterLevelSource())) + for s, ss in sources: s.set_config(self) diff --git a/frontend/cli.py b/frontend/cli.py index 208ca68..1c94608 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -31,7 +31,7 @@ def cli(): pass -SOURCE_OPTIONS = [ +ALL_SOURCE_OPTIONS = [ click.option( "--no-amp", is_flag=True, @@ -97,6 +97,89 @@ def cli(): ), ] +ANALYTE_SOURCE_OPTIONS = [ + click.option( + "--no-amp", + is_flag=True, + default=True, + show_default=True, + help="Include/Exclude AMP data. Default is to include", + ), + click.option( + "--no-isc-seven-rivers", + is_flag=True, + default=True, + show_default=True, + help="Exclude ISC Seven Rivers data. Default is to include", + ), + click.option( + "--no-bor", + is_flag=True, + default=True, + show_default=True, + help="Exclude BOR data. Default is to include", + ), + click.option( + "--no-wqp", + is_flag=True, + default=True, + show_default=True, + help="Exclude WQP data. Default is to include", + ), + click.option( + "--no-dwb", + is_flag=True, + default=True, + show_default=True, + help="Exclude DWB data. Default is to include", + ), +] + +WATERLEVEL_SOURCE_OPTIONS = [ + click.option( + "--no-amp", + is_flag=True, + default=True, + show_default=True, + help="Include/Exclude AMP data. Default is to include", + ), + click.option( + "--no-nwis", + is_flag=True, + default=True, + show_default=True, + help="Exclude NWIS data. Default is to include", + ), + click.option( + "--no-pvacd", + is_flag=True, + default=True, + show_default=True, + help="Exclude PVACD data. Default is to include", + ), + click.option( + "--no-isc-seven-rivers", + is_flag=True, + default=True, + show_default=True, + help="Exclude ISC Seven Rivers data. Default is to include", + ), + click.option( + "--no-ckan", + is_flag=True, + default=True, + show_default=True, + help="Exclude CKAN data. Default is to include", + ), + click.option( + "--no-bernco", + is_flag=True, + default=True, + show_default=True, + help="Exclude Bernalillo County Water Authority data. Default is to include", + ), +] + SPATIAL_OPTIONS = [ click.option( "--bbox", @@ -179,7 +262,7 @@ def wells(bbox, county): @add_options(TIMESERIES_OPTIONS) @add_options(DT_OPTIONS) @add_options(SPATIAL_OPTIONS) -@add_options(SOURCE_OPTIONS) +@add_options(WATERLEVEL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) def waterlevels( separated_timeseries, @@ -192,10 +275,7 @@ def waterlevels( no_nwis, no_pvacd, no_isc_seven_rivers, - no_bor, - no_wqp, no_ckan, - no_dwb, no_bernco, site_limit, dry, @@ -211,10 +291,7 @@ def waterlevels( config.use_source_nwis = no_nwis config.use_source_pvacd = no_pvacd config.use_source_iscsevenrivers = no_isc_seven_rivers - config.use_source_bor = no_bor - config.use_source_wqp = no_wqp config.use_source_oseroswell = no_ckan - config.use_source_dwb = no_dwb config.use_source_bernco = no_bernco config.start_date = start_date @@ -234,7 +311,7 @@ def waterlevels( @add_options(TIMESERIES_OPTIONS) @add_options(DT_OPTIONS) @add_options(SPATIAL_OPTIONS) -@add_options(SOURCE_OPTIONS) +@add_options(ANALYTE_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) def analytes( analyte, @@ -245,14 +322,10 @@ def analytes( bbox, county, no_amp, - no_nwis, - no_pvacd, no_isc_seven_rivers, no_bor, no_wqp, - no_ckan, no_dwb, - no_bernco, site_limit, dry, ): @@ -267,14 +340,10 @@ def analytes( config.output_single_timeseries = unified_timeseries config.use_source_nmbgmr = no_amp - config.use_source_nwis = no_nwis - config.use_source_pvacd = no_pvacd config.use_source_iscsevenrivers = no_isc_seven_rivers config.use_source_bor = no_bor config.use_source_wqp = no_wqp - config.use_source_oseroswell = no_ckan config.use_source_dwb = no_dwb - config.use_source_bernco = no_bernco config.start_date = start_date config.end_date = end_date From 92e804d211fe76079c5f383fef346808434a6d76 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Mon, 13 Jan 2025 18:33:28 +0000 Subject: [PATCH 03/53] Formatting changes --- backend/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/config.py b/backend/config.py index 33f1d0b..dc6f291 100644 --- a/backend/config.py +++ b/backend/config.py @@ -228,7 +228,6 @@ def water_level_sources(self): if self.use_source_bernco: sources.append((BernCoSiteSource(), BernCoWaterLevelSource())) - for s, ss in sources: s.set_config(self) ss.set_config(self) From acb9fd0a0a8b337dd8a866e86a03c7ebd2b0f350 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 13 Jan 2025 12:29:29 -0700 Subject: [PATCH 04/53] Update CKAN to NM OSE Roswell in sources documentation More sources will be added to CKAN, so it will no longer only be NM OSE Roswell data. So, NM OSE Roswell should be excluded/included based on the source, not that the data is hosted on CKAN. --- backend/config.py | 36 ++--------------------- frontend/cli.py | 75 +++-------------------------------------------- 2 files changed, 7 insertions(+), 104 deletions(-) diff --git a/backend/config.py b/backend/config.py index 33f1d0b..821894f 100644 --- a/backend/config.py +++ b/backend/config.py @@ -61,7 +61,7 @@ "wqp", "iscsevenrivers", "nwis", - "oseroswell", + "nmoseroswell", "pvacd", "bor", "dwb", @@ -110,7 +110,7 @@ class Config(Loggable): use_source_wqp: bool = False use_source_iscsevenrivers: bool = False use_source_nwis: bool = False - use_source_oseroswell: bool = False + use_source_nmoseroswell: bool = False use_source_pvacd: bool = False use_source_bor: bool = False use_source_dwb: bool = False @@ -204,7 +204,7 @@ def water_level_sources(self): if self.use_source_nwis: sources.append((NWISSiteSource(), NWISWaterLevelSource())) - if self.use_source_oseroswell: + if self.use_source_nmoseroswell: sources.append( ( OSERoswellSiteSource(HONDO_RESOURCE_ID), @@ -235,36 +235,6 @@ def water_level_sources(self): return sources - # def site_sources(self): - # sources = [ - # NMBGMRSiteSource(), - # WQPSiteSource(), - # ISCSevenRiversSiteSource(), - # NWISSiteSource(), - # DWBSiteSource(), - # BORSiteSource(), - # PVACDSiteSource(), - # EBIDSiteSource(), - # OSERoswellSiteSource(HONDO_RESOURCE_ID), - # OSERoswellSiteSource(FORT_SUMNER_RESOURCE_ID), - # OSERoswellSiteSource(ROSWELL_RESOURCE_ID), - # ] - # - # # if self.use_source_nmbgmr: - # # sources.append(NMBGMRSiteSource) - # # if self.use_source_isc_seven_rivers: - # # sources.append(ISCSevenRiversSiteSource) - # # if self.use_source_ose_roswell: - # # sources.append(OSERoswellSiteSource) - # # if self.use_source_nwis: - # # sources.append(USGSSiteSource) - # # if self.use_source_st2: - # # sources.append(PVACDSiteSource) - # # sources.append(EBIDSiteSource) - # # if self.use_source_bor: - # # sources.append(BORSiteSource) - # return sources - def bbox_bounding_points(self, bbox=None): if bbox is None: bbox = self.bbox diff --git a/frontend/cli.py b/frontend/cli.py index 1c94608..e2eb1e0 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -30,73 +30,6 @@ def cli(): pass - -ALL_SOURCE_OPTIONS = [ - click.option( - "--no-amp", - is_flag=True, - default=True, - show_default=True, - help="Include/Exclude AMP data. Default is to include", - ), - click.option( - "--no-nwis", - is_flag=True, - default=True, - show_default=True, - help="Exclude NWIS data. Default is to include", - ), - click.option( - "--no-pvacd", - is_flag=True, - default=True, - show_default=True, - help="Exclude PVACD data. Default is to include", - ), - click.option( - "--no-isc-seven-rivers", - is_flag=True, - default=True, - show_default=True, - help="Exclude ISC Seven Rivers data. Default is to include", - ), - click.option( - "--no-bor", - is_flag=True, - default=True, - show_default=True, - help="Exclude BOR data. Default is to include", - ), - click.option( - "--no-wqp", - is_flag=True, - default=True, - show_default=True, - help="Exclude WQP data. Default is to include", - ), - click.option( - "--no-ckan", - is_flag=True, - default=True, - show_default=True, - help="Exclude CKAN data. Default is to include", - ), - click.option( - "--no-dwb", - is_flag=True, - default=True, - show_default=True, - help="Exclude DWB data. Default is to include", - ), - click.option( - "--no-bernco", - is_flag=True, - default=True, - show_default=True, - help="Exclude Bernalillo County Water Authority data. Default is to include", - ), -] - ANALYTE_SOURCE_OPTIONS = [ click.option( "--no-amp", @@ -165,11 +98,11 @@ def cli(): help="Exclude ISC Seven Rivers data. Default is to include", ), click.option( - "--no-ckan", + "--no-nm-ose-roswell", is_flag=True, default=True, show_default=True, - help="Exclude CKAN data. Default is to include", + help="Exclude NM OSE Roswell data. Default is to include", ), click.option( "--no-bernco", @@ -275,7 +208,7 @@ def waterlevels( no_nwis, no_pvacd, no_isc_seven_rivers, - no_ckan, + no_nm_ose_roswell, no_bernco, site_limit, dry, @@ -291,7 +224,7 @@ def waterlevels( config.use_source_nwis = no_nwis config.use_source_pvacd = no_pvacd config.use_source_iscsevenrivers = no_isc_seven_rivers - config.use_source_oseroswell = no_ckan + config.use_source_nmoseroswell = no_nm_ose_roswell config.use_source_bernco = no_bernco config.start_date = start_date From 899a17e663069b0cecd77ceff5cb1b1471e0c224 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 14 Jan 2025 09:12:28 -0700 Subject: [PATCH 05/53] Updated source names | Set source to False if not in use Source names have been updated to indicate the organization and not where the data is hosted (such as CKAN or ST2). State agencies have also been prepended to OSE Roswell, ISC Seven Rivers, AMP, and DWB for clarity. Set source to False if the source is not in use. With this change, however, the user can still set `--no-{source}` even if it doesn't report that parameter. This will prevent unecessary crashes --- README.md | 53 ++++++++------------- backend/config.py | 65 +++++++++++++------------- frontend/cli.py | 117 ++++++++++++++++++++++++++-------------------- 3 files changed, 118 insertions(+), 117 deletions(-) diff --git a/README.md b/README.md index 64565e7..b0af681 100644 --- a/README.md +++ b/README.md @@ -19,31 +19,36 @@ pip install nmuwd Data comes from the following sources. We are continuously adding new sources as we learn of them and they become available. If you have data that you would like to be part of the Data Integration Engine please get in touch at newmexicowaterdata@nmt.edu. - [Bernalillo County (BernCo)](https://st2.newmexicowaterdata.org/FROST-Server/v1.1/Locations?$filter=properties/agency%20eq%20%27BernCo%27) + - Available data: water levels - [Bureau of Reclamation (BoR)](https://data.usbr.gov/) -- [New Mexico Bureau of Geology and Mineral Resources (AMP)](https://waterdata.nmt.edu/) -- [New Mexico Environment Department Drinking Water Bureau (DWB)](https://nmenv.newmexicowaterdata.org/FROST-Server/v1.1/) -- [New Mexico Office of the State Engineer ISC Seven Rivers (ISC Seven Rivers)](https://nmisc-wf.gladata.com/api/getMonitoringPoints.ashx) -- [New Mexico Office of the State Engineer Roswell District Office (OSE Roswell)](https://catalog.newmexicowaterdata.org/dataset/pecos_region_manual_groundwater_levels) + - Available data: water quality +- [New Mexico Bureau of Geology and Mineral Resources (NMBGMR) Aquifer Mapping Program (AMP)](https://waterdata.nmt.edu/) + - Available data: water levels, water quality +- [New Mexico Environment Department Drinking Water Bureau (NMED DWB)](https://nmenv.newmexicowaterdata.org/FROST-Server/v1.1/) + - Available data: water quality +- [New Mexico Office of the State Engineer ISC Seven Rivers (NMOSE ISC Seven Rivers)](https://nmisc-wf.gladata.com/api/getMonitoringPoints.ashx) + - Available data: water levels, water quality +- [New Mexico Office of the State Engineer Roswell District Office (NMOSE Roswell)](https://catalog.newmexicowaterdata.org/dataset/pecos_region_manual_groundwater_levels) + - Available data: water levels - [Pecos Valley Artesian Conservancy District (PVACD)](https://st2.newmexicowaterdata.org/FROST-Server/v1.1/Locations?$filter=properties/agency%20eq%20%27PVACD%27) + - Available data: water levels - [USGS (NWIS)](https://waterdata.usgs.gov/nwis) + - Available data: water levels - [Water Quality Portal (WQP)](https://www.waterqualitydata.us/) - - USGS - - EPA - - and over 400 state, federal, tribal, and local agencies - + - Available data: water quality ### Source Inclusion & Exclusion -The Data Integration Engine enables the user to obtain groundwater level and groundwater quality data from a variety of sources. Data from sources are included in the output unless specifically excluded. The following flags are available to exclude a specific data source: +The Data Integration Engine enables the user to obtain groundwater level and groundwater quality data from a variety of sources. Data from sources are automatically included in the output unless specifically excluded. The following flags are available to exclude a specific data source: -- `--no-amp` to exclude New Mexico Bureau of Geology and Mineral Resources Aquifer Mapping Program (AMP) data +- `--no-bernco` to exclude Bernalillo County (BernCo) data - `--no-bor` to exclude Bureaof of Reclamation data +- `--no-nmbgmr-amp` to exclude New Mexico Bureau of Geology and Mineral Resources (NMBGMR) Aquifer Mapping Program (AMP) data +- `--no-nmed-dwb` to exclude New Mexico Environment Department (NMED) Drinking Water Bureau (DWB) data +- `--no-nmose-isc-seven-rivers` to exclude New Mexico Office of State Engineer (NMOSE) Interstate Stream Commission (ISC) Seven Rivers data +- `--no-nmose-roswell` to exclude New Mexico Office of State Engineer (NMOSE) Roswell data - `--no-nwis` to exclude USGS NWIS data - `--no-pvacd` to exclude Pecos Valley Artesian Convservancy District (PVACD) data -- `--no-isc-seven-rivers` to exclude Interstate Stream Commission (ISC) Seven Rivers data - `--no-wqp` to exclude Water Quality Portal (WQP) data -- `--no-ckan` to exclude NM OSE Roswell data that is hosted on CKAN -- `--no-dwb` to exclude New Mexico Environment Department Drinking Water Bureau (DWB) data -- `--no-bernco` to exclude Bernalillo County (BernCo) data ### Water Levels @@ -55,18 +60,6 @@ weave waterlevels followed by the desired output type, source filters, date filters, geographic filters, and excluded data sources. -#### Available Data Sources -The following data sources are available for groundwater levels: - -- amp -- bor -- ckan -- dwb -- isc-seven-rivers -- nwis -- pvacd -- bernco - ### Water Quality To obtain groundwater quality, use @@ -93,14 +86,6 @@ The following analytes are currently available for retrieval: - TDS - Uranium -#### Available Data Sources -The follow data sources are available for analytes, though not every source has measurements for every analyte: -- bor -- wqp -- isc-seven-rivers -- amp -- dwb - ### Geographic Filters The following flags can be used to geographically filter data: diff --git a/backend/config.py b/backend/config.py index 821894f..c2efec9 100644 --- a/backend/config.py +++ b/backend/config.py @@ -57,37 +57,37 @@ from .connectors.wqp.source import WQPSiteSource, WQPAnalyteSource SOURCE_KEYS = ( - "nmbgmr", - "wqp", - "iscsevenrivers", + "bernco", + "bor", + "nmbgmr_amp", + "nmed_dwb", + "nmose_isc_seven_rivers", + "nmose_roswell", "nwis", - "nmoseroswell", "pvacd", - "bor", - "dwb", - "bernco", + "wqp", ) def get_source(source): - if source == "nmbgmr": + if source == "bernco": + return BernCoSiteSource() + elif source == "bor": + return BORSiteSource() + elif source == "nmbgmr_amp": return NMBGMRSiteSource() - elif source == "wqp": - return WQPSiteSource() - elif source == "iscsevenrivers": + elif source == "nmed_dwb": + return DWBSiteSource() + elif source == "nmose_isc_seven_rivers": return ISCSevenRiversSiteSource() + elif source == "nmose_roswell": + return OSERoswellSiteSource(HONDO_RESOURCE_ID) elif source == "nwis": return NWISSiteSource() - elif source == "oseroswell": - return OSERoswellSiteSource(HONDO_RESOURCE_ID) elif source == "pvacd": return PVACDSiteSource() - elif source == "bor": - return BORSiteSource() - elif source == "dwb": - return DWBSiteSource() - elif source == "bernco": - return BernCoSiteSource() + elif source == "wqp": + return WQPSiteSource() return None @@ -106,15 +106,16 @@ class Config(Loggable): wkt: str = "" # sources - use_source_nmbgmr: bool = False - use_source_wqp: bool = False - use_source_iscsevenrivers: bool = False + use_source_bernco: bool = False + use_source_bor: bool = False + use_source_nmbgmr_amp: bool = False + use_source_nmed_dwb: bool = False + use_source_nmose_isc_seven_rivers: bool = False + use_source_nmose_roswell: bool = False use_source_nwis: bool = False - use_source_nmoseroswell: bool = False use_source_pvacd: bool = False - use_source_bor: bool = False - use_source_dwb: bool = False - use_source_bernco: bool = False + use_source_wqp: bool = False + analyte: str = "" @@ -175,11 +176,11 @@ def analyte_sources(self): sources.append((BORSiteSource(), BORAnalyteSource())) if self.use_source_wqp: sources.append((WQPSiteSource(), WQPAnalyteSource())) - if self.use_source_iscsevenrivers: + if self.use_source_nmose_isc_seven_rivers: sources.append((ISCSevenRiversSiteSource(), ISCSevenRiversAnalyteSource())) - if self.use_source_nmbgmr: + if self.use_source_nmbgmr_amp: sources.append((NMBGMRSiteSource(), NMBGMRAnalyteSource())) - if self.use_source_dwb: + if self.use_source_nmed_dwb: sources.append((DWBSiteSource(), DWBAnalyteSource())) for s, ss in sources: @@ -193,10 +194,10 @@ def analyte_sources(self): def water_level_sources(self): sources = [] - if self.use_source_nmbgmr: + if self.use_source_nmbgmr_amp: sources.append((NMBGMRSiteSource(), NMBGMRWaterLevelSource())) - if self.use_source_iscsevenrivers: + if self.use_source_nmose_isc_seven_rivers: sources.append( (ISCSevenRiversSiteSource(), ISCSevenRiversWaterLevelSource()) ) @@ -204,7 +205,7 @@ def water_level_sources(self): if self.use_source_nwis: sources.append((NWISSiteSource(), NWISWaterLevelSource())) - if self.use_source_nmoseroswell: + if self.use_source_nmose_roswell: sources.append( ( OSERoswellSiteSource(HONDO_RESOURCE_ID), diff --git a/frontend/cli.py b/frontend/cli.py index e2eb1e0..4d6e4bb 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -30,51 +30,48 @@ def cli(): pass -ANALYTE_SOURCE_OPTIONS = [ +ALL_SOURCE_OPTIONS = [ click.option( - "--no-amp", + "--no-bernco", is_flag=True, default=True, show_default=True, - help="Include/Exclude AMP data. Default is to include", + help="Exclude Bernalillo County Water Authority data. Default is to include", ), click.option( - "--no-isc-seven-rivers", + "--no-bor", is_flag=True, default=True, show_default=True, - help="Exclude ISC Seven Rivers data. Default is to include", + help="Exclude BoR data. Default is to include", ), click.option( - "--no-bor", + "--no-nmed-dwb", is_flag=True, default=True, show_default=True, - help="Exclude BOR data. Default is to include", + help="Exclude NMED DWB data. Default is to include", ), click.option( - "--no-wqp", + "--no-nmose-isc-seven-rivers", is_flag=True, default=True, show_default=True, - help="Exclude WQP data. Default is to include", + help="Exclude NMOSE ISC Seven Rivers data. Default is to include", ), click.option( - "--no-dwb", + "--no-nmose-roswell", is_flag=True, default=True, show_default=True, - help="Exclude DWB data. Default is to include", + help="Exclude NMOSE Roswell data. Default is to include", ), -] - -WATERLEVEL_SOURCE_OPTIONS = [ click.option( - "--no-amp", + "--no-nmbgmr-amp", is_flag=True, default=True, show_default=True, - help="Include/Exclude AMP data. Default is to include", + help="Exclude NMBGMR AMP data. Default is to include", ), click.option( "--no-nwis", @@ -91,26 +88,12 @@ def cli(): help="Exclude PVACD data. Default is to include", ), click.option( - "--no-isc-seven-rivers", - is_flag=True, - default=True, - show_default=True, - help="Exclude ISC Seven Rivers data. Default is to include", - ), - click.option( - "--no-nm-ose-roswell", - is_flag=True, - default=True, - show_default=True, - help="Exclude NM OSE Roswell data. Default is to include", - ), - click.option( - "--no-bernco", + "--no-wqp", is_flag=True, default=True, show_default=True, - help="Exclude Bernalillo County Water Authority data. Default is to include", - ), + help="Exclude WQP data. Default is to include", + ) ] SPATIAL_OPTIONS = [ @@ -195,7 +178,7 @@ def wells(bbox, county): @add_options(TIMESERIES_OPTIONS) @add_options(DT_OPTIONS) @add_options(SPATIAL_OPTIONS) -@add_options(WATERLEVEL_SOURCE_OPTIONS) +@add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) def waterlevels( separated_timeseries, @@ -204,29 +187,42 @@ def waterlevels( end_date, bbox, county, - no_amp, + no_bernco, + no_bor, # has no water levels + no_nmbgmr_amp, + no_nmed_dwb, # has no water levels + no_nmose_isc_seven_rivers, + no_nmose_roswell, no_nwis, no_pvacd, - no_isc_seven_rivers, - no_nm_ose_roswell, - no_bernco, + no_wqp, # has no water levels site_limit, dry, ): + # output type if separated_timeseries or unified_timeseries: timeseries = True else: timeseries = False + + # instantiate config config = setup_config("waterlevels", timeseries, bbox, county, site_limit, dry) config.output_single_timeseries = unified_timeseries - config.use_source_nmbgmr = no_amp + + # sources + config.use_source_bernco = no_bernco + config.use_source_nmbgmr_amp = no_nmbgmr_amp + config.use_source_nmose_isc_seven_rivers = no_nmose_isc_seven_rivers + config.use_source_nmose_roswell = no_nmose_roswell config.use_source_nwis = no_nwis config.use_source_pvacd = no_pvacd - config.use_source_iscsevenrivers = no_isc_seven_rivers - config.use_source_nmoseroswell = no_nm_ose_roswell - config.use_source_bernco = no_bernco + config.use_source_bor = False + config.use_source_nmed_dwb = False + config.use_source_wqp = False + + # dates config.start_date = start_date config.end_date = end_date @@ -244,7 +240,7 @@ def waterlevels( @add_options(TIMESERIES_OPTIONS) @add_options(DT_OPTIONS) @add_options(SPATIAL_OPTIONS) -@add_options(ANALYTE_SOURCE_OPTIONS) +@add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) def analytes( analyte, @@ -254,33 +250,52 @@ def analytes( end_date, bbox, county, - no_amp, - no_isc_seven_rivers, + no_bernco, # has no analyte measurements no_bor, + no_nmbgmr_amp, + no_nmed_dwb, + no_nmose_isc_seven_rivers, + no_nmose_roswell, # has no analyte measurements + no_nwis, # has no analyte measurements + no_pvacd, # has no analyte measurements no_wqp, - no_dwb, site_limit, dry, ): + # output type if separated_timeseries or unified_timeseries: timeseries = True else: timeseries = False + + # instantiate config config = setup_config( f"analytes ({analyte})", timeseries, bbox, county, site_limit, dry ) - config.analyte = analyte - config.output_single_timeseries = unified_timeseries - config.use_source_nmbgmr = no_amp - config.use_source_iscsevenrivers = no_isc_seven_rivers + + # sources config.use_source_bor = no_bor + config.use_source_nmbgmr_amp = no_nmbgmr_amp + config.use_source_nmed_dwb = no_nmed_dwb + config.use_source_nmose_isc_seven_rivers = no_nmose_isc_seven_rivers config.use_source_wqp = no_wqp - config.use_source_dwb = no_dwb + config.use_source_bernco = False + config.use_source_nmose_roswell = False + config.use_source_nwis = False + config.use_source_pvacd = False + + + # analyte + config.analyte = analyte + + # dates config.start_date = start_date config.end_date = end_date + + if not dry: config.report() # prompt user to continue From bc9dc6741a658f1753e79b49a7959f16e2a36f58 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 14 Jan 2025 09:24:31 -0700 Subject: [PATCH 06/53] Updated README for clarity --- README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index b0af681..c68746e 100644 --- a/README.md +++ b/README.md @@ -19,26 +19,26 @@ pip install nmuwd Data comes from the following sources. We are continuously adding new sources as we learn of them and they become available. If you have data that you would like to be part of the Data Integration Engine please get in touch at newmexicowaterdata@nmt.edu. - [Bernalillo County (BernCo)](https://st2.newmexicowaterdata.org/FROST-Server/v1.1/Locations?$filter=properties/agency%20eq%20%27BernCo%27) - - Available data: water levels + - Available data: `water levels` - [Bureau of Reclamation (BoR)](https://data.usbr.gov/) - - Available data: water quality + - Available data: `water quality` - [New Mexico Bureau of Geology and Mineral Resources (NMBGMR) Aquifer Mapping Program (AMP)](https://waterdata.nmt.edu/) - - Available data: water levels, water quality + - Available data: `water levels`, `water quality` - [New Mexico Environment Department Drinking Water Bureau (NMED DWB)](https://nmenv.newmexicowaterdata.org/FROST-Server/v1.1/) - - Available data: water quality + - Available data: `water quality` - [New Mexico Office of the State Engineer ISC Seven Rivers (NMOSE ISC Seven Rivers)](https://nmisc-wf.gladata.com/api/getMonitoringPoints.ashx) - - Available data: water levels, water quality + - Available data: `water levels`, `water quality` - [New Mexico Office of the State Engineer Roswell District Office (NMOSE Roswell)](https://catalog.newmexicowaterdata.org/dataset/pecos_region_manual_groundwater_levels) - - Available data: water levels + - Available data: `water levels` - [Pecos Valley Artesian Conservancy District (PVACD)](https://st2.newmexicowaterdata.org/FROST-Server/v1.1/Locations?$filter=properties/agency%20eq%20%27PVACD%27) - - Available data: water levels + - Available data: `water levels` - [USGS (NWIS)](https://waterdata.usgs.gov/nwis) - - Available data: water levels + - Available data: `water levels` - [Water Quality Portal (WQP)](https://www.waterqualitydata.us/) - - Available data: water quality + - Available data: `water quality` ### Source Inclusion & Exclusion -The Data Integration Engine enables the user to obtain groundwater level and groundwater quality data from a variety of sources. Data from sources are automatically included in the output unless specifically excluded. The following flags are available to exclude a specific data source: +The Data Integration Engine enables the user to obtain groundwater level and groundwater quality data from a variety of sources. Data from sources are automatically included in the output if available unless specifically excluded. The following flags are available to exclude specific data sources: - `--no-bernco` to exclude Bernalillo County (BernCo) data - `--no-bor` to exclude Bureaof of Reclamation data From f840436b4987064d3bce440bad6e25a066a7d1df Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Tue, 14 Jan 2025 16:25:35 +0000 Subject: [PATCH 07/53] Formatting changes --- backend/config.py | 3 +-- frontend/cli.py | 22 ++++++++++------------ 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/backend/config.py b/backend/config.py index f0f4feb..884c7c7 100644 --- a/backend/config.py +++ b/backend/config.py @@ -65,7 +65,7 @@ "nmose_roswell", "nwis", "pvacd", - "wqp", + "wqp", ) @@ -115,7 +115,6 @@ class Config(Loggable): use_source_nwis: bool = False use_source_pvacd: bool = False use_source_wqp: bool = False - analyte: str = "" diff --git a/frontend/cli.py b/frontend/cli.py index 4d6e4bb..c2db9f1 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -30,6 +30,7 @@ def cli(): pass + ALL_SOURCE_OPTIONS = [ click.option( "--no-bernco", @@ -93,7 +94,7 @@ def cli(): default=True, show_default=True, help="Exclude WQP data. Default is to include", - ) + ), ] SPATIAL_OPTIONS = [ @@ -188,14 +189,14 @@ def waterlevels( bbox, county, no_bernco, - no_bor, # has no water levels + no_bor, # has no water levels no_nmbgmr_amp, - no_nmed_dwb, # has no water levels + no_nmed_dwb, # has no water levels no_nmose_isc_seven_rivers, no_nmose_roswell, no_nwis, no_pvacd, - no_wqp, # has no water levels + no_wqp, # has no water levels site_limit, dry, ): @@ -221,7 +222,7 @@ def waterlevels( config.use_source_bor = False config.use_source_nmed_dwb = False config.use_source_wqp = False - + # dates config.start_date = start_date config.end_date = end_date @@ -255,9 +256,9 @@ def analytes( no_nmbgmr_amp, no_nmed_dwb, no_nmose_isc_seven_rivers, - no_nmose_roswell, # has no analyte measurements - no_nwis, # has no analyte measurements - no_pvacd, # has no analyte measurements + no_nmose_roswell, # has no analyte measurements + no_nwis, # has no analyte measurements + no_pvacd, # has no analyte measurements no_wqp, site_limit, dry, @@ -273,7 +274,7 @@ def analytes( f"analytes ({analyte})", timeseries, bbox, county, site_limit, dry ) config.output_single_timeseries = unified_timeseries - + # sources config.use_source_bor = no_bor config.use_source_nmbgmr_amp = no_nmbgmr_amp @@ -285,7 +286,6 @@ def analytes( config.use_source_nmose_roswell = False config.use_source_nwis = False config.use_source_pvacd = False - # analyte config.analyte = analyte @@ -294,8 +294,6 @@ def analytes( config.start_date = start_date config.end_date = end_date - - if not dry: config.report() # prompt user to continue From f8eb16371a1ea36c0d59b6dcbd869778ea454eee Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 14 Jan 2025 10:00:47 -0700 Subject: [PATCH 08/53] Write output.sites.csv for separated timeseries This enables location data to be easily determined for all sites --- backend/unifier.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/unifier.py b/backend/unifier.py index 6fe3815..e67449b 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -202,6 +202,7 @@ def _unify_parameter( else: persister.dump_combined(f"{config.output_path}.combined") persister.dump_timeseries(f"{config.output_path}_timeseries") + persister.dump_sites(f"{config.output_path}.sites") persister.finalize(config.output_name) From 4871b7e758c52eaf91fb69833568ac99f240840d Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 14 Jan 2025 11:05:53 -0700 Subject: [PATCH 09/53] Deprecate combined Even if a site has only one measurement, its data will be in a standalone csv to have exports be standardized and findable --- README.md | 54 +++++--------------------------------------- backend/persister.py | 9 ++++---- backend/unifier.py | 18 ++++----------- 3 files changed, 15 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index c68746e..6f1ffd1 100644 --- a/README.md +++ b/README.md @@ -114,13 +114,15 @@ The following flags can be used to filter by dates: The data is saved to the current working directory. A log of the inputs and processes, called `die.log`, is also saved to the current working directory. If a subsquent process is run and the log from the previous process has not been moved or stored elsewhere, the log for the subsequent process will be appended to the existing log. ### Timeseries Data -The flag `--separated_timeseries` exports timeseries for every location in their own file in the directory output_series (e.g. `AB-0002.csv`, `AB-0003.csv`). Locations with only one observation are gathered and exported to the file `output.combined.csv`. +The flag `--separated_timeseries` exports timeseries for every location in their own file in the directory output_series (e.g. `AB-0002.csv`, `AB-0003.csv`). -The flag `--unified_timeseries` exports all timeseries for all locations in one file titled `output.timeseries.csv`. It also exports a file titled `output.sites.csv` that contains site information, such as latitude, longitude, and elevation. +The flag `--unified_timeseries` exports all timeseries for all locations in one file titled `output.timeseries.csv`. -#### Table Headers: Unified +Both time series export a file titled `output.sites.csv` that contains site information, such as latitude, longitude, and elevation. -The table headers for unified timeseries data are as follows: +#### Table Headers + +The table headers for timeseries data are as follows: **output.sites.csv** - `source`: the organization/source for the site @@ -154,50 +156,6 @@ The table headers for unified timeseries data are as follows: - `date_measured`: date of measurement in YYYY-MM-DD format - `time_measured`: time of measurement if it exists -#### Table Headers: Separated - -The files for the individual sites contain the same headers as **output.timeseries.csv** from the unified time series tables. - -**output.combined.csv - waterlevels** -- `source`: the organization/source for the site -- `id`: the id of the site. The id is used as the key to join the output.timeseries.csv table -- `name`: the colloquial name for the site if it exists -- `latitude`: latitude in decimal degrees -- `longitude`: the longitude in decimal degrees -- `elevation` ground surface elevation of the site in feet -- `elevation_units`: the units of the ground surface elevation. Defaults to ft -- `horizontal_datum`: horizontal datum of the latitude and longitude. Defaults to WGS84 -- `vertical_datum`: the vertical datum of the elevation -- `usgs_site_id`: USGS site id if it exists -- `alternate_site_id`: alternate site id if it exists -- `formation`: geologic formation in which the well terminates if it exists -- `aquifer`: aquifer from which the well draws water if it exists -- `well_depth`: depth of well if it exists -- `depth_to_water_ft_below_ground_surface`: depth to water below ground surface in ft -- `date_measured`: date of measurement in YYYY-MM-DD format -- `time_measured`: time of measurement if it exists - -**output.combined.csv - analytes** -- `source`: the organization/source for the site -- `id`: the id of the site. The id is used as the key to join the output.timeseries.csv table -- `name`: the colloquial name for the site if it exists -- `latitude`: latitude in decimal degrees -- `longitude`: the longitude in decimal degrees -- `elevation` ground surface elevation of the site in feet -- `elevation_units`: the units of the ground surface elevation. Defaults to ft -- `horizontal_datum`: horizontal datum of the latitude and longitude. Defaults to WGS84 -- `vertical_datum`: the vertical datum of the elevation -- `usgs_site_id`: USGS site id if it exists -- `alternate_site_id`: alternate site id if it exists -- `formation`: geologic formation in which the well terminates if it exists -- `aquifer`: aquifer from which the well draws water if it exists -- `well_depth`: depth of well if it exists -- `parameter`: the name of the analyte whose measurements are reported in the table. This corresponds the requested analyte -- `parameter_value`: value of the measurement -- `parameter_units`: units of the measurement -- `date_measured`: date of measurement in YYYY-MM-DD format -- `time_measured`: time of measurement if it exists - ### Summary Data If neither of the above flags are specified, a summary table called `output.csv` is exported. The summary table consists of location information as well as summary statistics for the parameter of interest for every location that has observations. diff --git a/backend/persister.py b/backend/persister.py index 296ee2a..4640fec 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -35,7 +35,6 @@ class BasePersister(Loggable): def __init__(self): self.records = [] - self.combined = [] self.timeseries = [] self.sites = [] @@ -62,10 +61,10 @@ def dump_timeseries(self, root: str): self.log(f"dumping {site.id} to {os.path.abspath(path)}") self._write(path, records) - self._write( - os.path.join(root, self.add_extension("sites")), - [s[0] for s in self.timeseries], - ) + # self._write( + # os.path.join(root, self.add_extension("sites")), + # [s[0] for s in self.timeseries], + # ) else: self.log("no timeseries records to dump", fg="red") diff --git a/backend/unifier.py b/backend/unifier.py index e67449b..a81e3f2 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -164,18 +164,10 @@ def _site_wrapper(site_source, parameter_source, persister, config): if results is None or len(results) == 0: continue - if config.output_single_timeseries: - for site, records in results: - persister.timeseries.append((site, records)) - persister.sites.append(site) - else: - # combine sites that only have one record - for site, records in results: - if len(records) == 1: - persister.combined.append((site, records[0])) - else: - persister.timeseries.append((site, records)) - persister.sites.append(site) + for site, records in results: + persister.timeseries.append((site, records)) + persister.sites.append(site) + sites_with_records_count += 1 except BaseException: @@ -200,7 +192,7 @@ def _unify_parameter( persister.dump_sites(f"{config.output_path}.sites") persister.dump_single_timeseries(f"{config.output_path}.timeseries") else: - persister.dump_combined(f"{config.output_path}.combined") + # persister.dump_combined(f"{config.output_path}.combined") persister.dump_timeseries(f"{config.output_path}_timeseries") persister.dump_sites(f"{config.output_path}.sites") From cb08ca8568e3bd9a78d2fc773809a2684003ba83 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 14 Jan 2025 11:08:30 -0700 Subject: [PATCH 10/53] Updated README.md to indicate Bureau of Reclation (BoR) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6f1ffd1..bb8c8fb 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ Data comes from the following sources. We are continuously adding new sources as The Data Integration Engine enables the user to obtain groundwater level and groundwater quality data from a variety of sources. Data from sources are automatically included in the output if available unless specifically excluded. The following flags are available to exclude specific data sources: - `--no-bernco` to exclude Bernalillo County (BernCo) data -- `--no-bor` to exclude Bureaof of Reclamation data +- `--no-bor` to exclude Bureaof of Reclamation (Bor) data - `--no-nmbgmr-amp` to exclude New Mexico Bureau of Geology and Mineral Resources (NMBGMR) Aquifer Mapping Program (AMP) data - `--no-nmed-dwb` to exclude New Mexico Environment Department (NMED) Drinking Water Bureau (DWB) data - `--no-nmose-isc-seven-rivers` to exclude New Mexico Office of State Engineer (NMOSE) Interstate Stream Commission (ISC) Seven Rivers data From 6e1d864cc0f0da8263a66589ff01e97848480cee Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 14 Jan 2025 11:25:58 -0700 Subject: [PATCH 11/53] Comment out all instances of combined Saving the code for now in case we want to add it back --- backend/persister.py | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/backend/persister.py b/backend/persister.py index 4640fec..3aec8ad 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -37,6 +37,7 @@ def __init__(self): self.records = [] self.timeseries = [] self.sites = [] + # self.combined = [] super().__init__() # self.keys = record_klass.keys @@ -68,14 +69,14 @@ def dump_timeseries(self, root: str): else: self.log("no timeseries records to dump", fg="red") - def dump_combined(self, path: str): - if self.combined: - path = self.add_extension(path) + # def dump_combined(self, path: str): + # if self.combined: + # path = self.add_extension(path) - self.log(f"dumping combined to {os.path.abspath(path)}") - self._dump_combined(path, self.combined) - else: - self.log("no combined records to dump", fg="red") + # self.log(f"dumping combined to {os.path.abspath(path)}") + # self._dump_combined(path, self.combined) + # else: + # self.log("no combined records to dump", fg="red") def dump_single_timeseries(self, path: str): if self.timeseries: @@ -112,8 +113,8 @@ def add_extension(self, path: str): def _write(self, path: str, records): raise NotImplementedError - def _dump_combined(self, path: str, combined: list): - raise NotImplementedError + # def _dump_combined(self, path: str, combined: list): + # raise NotImplementedError def _dump_single_timeseries(self, path: str, timeseries: list): raise NotImplementedError @@ -150,11 +151,11 @@ def dump_sites(writer, records): writer.writerow(site.to_row()) -def dump_combined(writer, combined): - for i, (site, record) in enumerate(combined): - if i == 0: - writer.writerow(site.keys + record.keys) - writer.writerow(site.to_row() + record.to_row()) +# def dump_combined(writer, combined): +# for i, (site, record) in enumerate(combined): +# if i == 0: +# writer.writerow(site.keys + record.keys) +# writer.writerow(site.to_row() + record.to_row()) class CloudStoragePersister(BasePersister): @@ -205,9 +206,9 @@ def _dump_single_timeseries(self, path: str, timeseries: list): content = write_memory(path, dump_single_timeseries, timeseries) self._add_content(path, content) - def _dump_combined(self, path: str, combined: list): - content = write_memory(path, dump_combined, combined) - self._add_content(path, content) + # def _dump_combined(self, path: str, combined: list): + # content = write_memory(path, dump_combined, combined) + # self._add_content(path, content) class CSVPersister(BasePersister): @@ -219,8 +220,8 @@ def _write(self, path: str, records: list): def _dump_single_timeseries(self, path: str, timeseries: list): write_file(path, dump_single_timeseries, timeseries) - def _dump_combined(self, path: str, combined: list): - write_file(path, dump_combined, combined) + # def _dump_combined(self, path: str, combined: list): + # write_file(path, dump_combined, combined) class GeoJSONPersister(BasePersister): From 91cbb5aa9093c8ba5f418fbc3280b320e84b3dc0 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 14 Jan 2025 13:37:07 -0700 Subject: [PATCH 12/53] Add --output flag for summary and timeseries The user must now specify --output summary, --output timeseried_unfied, or --output timeseries_separated to explicitly request the output format --- backend/config.py | 11 +++---- backend/unifier.py | 7 ++--- frontend/cli.py | 74 +++++++++++++++++++++++++++++++--------------- 3 files changed, 59 insertions(+), 33 deletions(-) diff --git a/backend/config.py b/backend/config.py index 884c7c7..39ddf07 100644 --- a/backend/config.py +++ b/backend/config.py @@ -126,7 +126,8 @@ class Config(Loggable): output_elevation_units: str = FEET output_well_depth_units: str = FEET output_summary: bool = False - output_single_timeseries: bool = False + output_timeseries_unified: bool = False + output_timeseries_separated: bool = False latest_water_level_only: bool = False @@ -157,13 +158,12 @@ def __init__(self, model=None, payload=None): self.wkt = payload.get("wkt", "") self.county = payload.get("county", "") self.output_summary = payload.get("output_summary", False) + self.output_timeseries_unified = payload.get("output_timeseries_unified", False) + self.output_timeseries_separated = payload.get("output_timeseries_separated", False) self.output_name = payload.get("output_name", "output") self.start_date = payload.get("start_date", "") self.end_date = payload.get("end_date", "") self.analyte = payload.get("analyte", "") - self.output_single_timeseries = payload.get( - "output_single_timeseries", False - ) for s in SOURCE_KEYS: setattr(self, f"use_source_{s}", s in payload.get("sources", [])) @@ -321,7 +321,8 @@ def _report_attributes(title, attrs): "output_dir", "output_name", "output_summary", - "output_single_timeseries", + "output_timeseries_unified", + "output_timeseries_separated", "output_horizontal_datum", "output_elevation_units", ), diff --git a/backend/unifier.py b/backend/unifier.py index a81e3f2..4b4f2fe 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -182,16 +182,15 @@ def _unify_parameter( config, sources, ): - use_summarize = config.output_summary persister = _perister_factory(config) for site_source, parameter_source in sources: _site_wrapper(site_source, parameter_source, persister, config) - if use_summarize: + if config.output_summary: persister.save(config.output_path) - elif config.output_single_timeseries: + elif config.output_unified_timeseries: persister.dump_sites(f"{config.output_path}.sites") persister.dump_single_timeseries(f"{config.output_path}.timeseries") - else: + else: # config.output_separated_timeseries # persister.dump_combined(f"{config.output_path}.combined") persister.dump_timeseries(f"{config.output_path}_timeseries") persister.dump_sites(f"{config.output_path}.sites") diff --git a/frontend/cli.py b/frontend/cli.py index c2db9f1..ac2c6f5 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -154,6 +154,14 @@ def cli(): ), ] +OUTPUT_OPTIONS = [ + click.option( + "--output", + type = click.Choice(["summary", "timeseries_unified", "timeseries_separated"]), + required=True, + help="Output summary file, single unified timeseries file, or separated timeseries files", + ) +] def add_options(options): def _add_options(func): @@ -176,14 +184,13 @@ def wells(bbox, county): @cli.command() -@add_options(TIMESERIES_OPTIONS) +@add_options(OUTPUT_OPTIONS) @add_options(DT_OPTIONS) @add_options(SPATIAL_OPTIONS) @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) def waterlevels( - separated_timeseries, - unified_timeseries, + output, start_date, end_date, bbox, @@ -200,16 +207,26 @@ def waterlevels( site_limit, dry, ): - # output type - if separated_timeseries or unified_timeseries: - timeseries = True - else: - timeseries = False - # instantiate config - config = setup_config("waterlevels", timeseries, bbox, county, site_limit, dry) + config = setup_config("waterlevels", bbox, county, site_limit, dry) - config.output_single_timeseries = unified_timeseries + # output type + if output == "summary": + summary = True + timeseries_unified = False + timeseries_separated = False + elif output == "timeseries_unified": + summary = False + timeseries_unified = True + timeseries_separated = False + elif output == "timeseries_separated": + summary = False + timeseries_unified = False + timeseries_separated = True + + config.output_summary = summary + config.output_timeseries_unified = timeseries_unified + config.output_timeseries_separated = timeseries_separated # sources config.use_source_bernco = no_bernco @@ -238,15 +255,14 @@ def waterlevels( @cli.command() @click.argument("analyte", type=click.Choice(ANALYTE_CHOICES)) -@add_options(TIMESERIES_OPTIONS) +@add_options(OUTPUT_OPTIONS) @add_options(DT_OPTIONS) @add_options(SPATIAL_OPTIONS) @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) def analytes( analyte, - separated_timeseries, - unified_timeseries, + output, start_date, end_date, bbox, @@ -263,17 +279,28 @@ def analytes( site_limit, dry, ): - # output type - if separated_timeseries or unified_timeseries: - timeseries = True - else: - timeseries = False - # instantiate config config = setup_config( - f"analytes ({analyte})", timeseries, bbox, county, site_limit, dry + f"analytes ({analyte})", bbox, county, site_limit, dry ) - config.output_single_timeseries = unified_timeseries + + # output type + if output == "summary": + summary = True + timeseries_unified = False + timeseries_separated = False + elif output == "timeseries_unified": + summary = False + timeseries_unified = True + timeseries_separated = False + elif output == "timeseries_separated": + summary = False + timeseries_unified = False + timeseries_separated = True + + config.output_summary = summary + config.output_timeseries_unified = timeseries_unified + config.output_timeseries_separated = timeseries_separated # sources config.use_source_bor = no_bor @@ -322,7 +349,7 @@ def sources(bbox, county): click.echo(s) -def setup_config(tag, timeseries, bbox, county, site_limit, dry): +def setup_config(tag, bbox, county, site_limit, dry): config = Config() if county: click.echo(f"Getting {tag} for county {county}") @@ -332,7 +359,6 @@ def setup_config(tag, timeseries, bbox, county, site_limit, dry): # bbox = -105.396826 36.219290, -106.024162 35.384307 config.bbox = bbox - config.output_summary = not timeseries config.site_limit = site_limit config.dry = dry From 694e986ae74e07b8a527fa14a5d7db32d78b8594 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Tue, 14 Jan 2025 20:38:39 +0000 Subject: [PATCH 13/53] Formatting changes --- backend/config.py | 8 ++++++-- backend/unifier.py | 2 +- frontend/cli.py | 11 +++++------ 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/backend/config.py b/backend/config.py index 39ddf07..57906ce 100644 --- a/backend/config.py +++ b/backend/config.py @@ -158,8 +158,12 @@ def __init__(self, model=None, payload=None): self.wkt = payload.get("wkt", "") self.county = payload.get("county", "") self.output_summary = payload.get("output_summary", False) - self.output_timeseries_unified = payload.get("output_timeseries_unified", False) - self.output_timeseries_separated = payload.get("output_timeseries_separated", False) + self.output_timeseries_unified = payload.get( + "output_timeseries_unified", False + ) + self.output_timeseries_separated = payload.get( + "output_timeseries_separated", False + ) self.output_name = payload.get("output_name", "output") self.start_date = payload.get("start_date", "") self.end_date = payload.get("end_date", "") diff --git a/backend/unifier.py b/backend/unifier.py index 4b4f2fe..3953492 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -190,7 +190,7 @@ def _unify_parameter( elif config.output_unified_timeseries: persister.dump_sites(f"{config.output_path}.sites") persister.dump_single_timeseries(f"{config.output_path}.timeseries") - else: # config.output_separated_timeseries + else: # config.output_separated_timeseries # persister.dump_combined(f"{config.output_path}.combined") persister.dump_timeseries(f"{config.output_path}_timeseries") persister.dump_sites(f"{config.output_path}.sites") diff --git a/frontend/cli.py b/frontend/cli.py index ac2c6f5..7d79d99 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -157,12 +157,13 @@ def cli(): OUTPUT_OPTIONS = [ click.option( "--output", - type = click.Choice(["summary", "timeseries_unified", "timeseries_separated"]), + type=click.Choice(["summary", "timeseries_unified", "timeseries_separated"]), required=True, help="Output summary file, single unified timeseries file, or separated timeseries files", ) ] + def add_options(options): def _add_options(func): for option in reversed(options): @@ -223,7 +224,7 @@ def waterlevels( summary = False timeseries_unified = False timeseries_separated = True - + config.output_summary = summary config.output_timeseries_unified = timeseries_unified config.output_timeseries_separated = timeseries_separated @@ -280,9 +281,7 @@ def analytes( dry, ): # instantiate config - config = setup_config( - f"analytes ({analyte})", bbox, county, site_limit, dry - ) + config = setup_config(f"analytes ({analyte})", bbox, county, site_limit, dry) # output type if output == "summary": @@ -297,7 +296,7 @@ def analytes( summary = False timeseries_unified = False timeseries_separated = True - + config.output_summary = summary config.output_timeseries_unified = timeseries_unified config.output_timeseries_separated = timeseries_separated From bd6c7f29660d4673490ed5ca4dc79a5132a92d44 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 14 Jan 2025 17:02:10 -0700 Subject: [PATCH 14/53] Fix names for functions for writing different output files This change was done to have the output type and the functions that write the outputs have corresponding names --- backend/persister.py | 20 ++++++++++---------- backend/unifier.py | 9 ++++----- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/backend/persister.py b/backend/persister.py index 3aec8ad..b83d126 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -48,7 +48,7 @@ def load(self, records: list): def finalize(self, output_name: str): pass - def dump_timeseries(self, root: str): + def dump_timeseries_separated(self, root: str): if self.timeseries: if os.path.isdir(root): self.log(f"root {root} already exists", fg="red") @@ -78,11 +78,11 @@ def dump_timeseries(self, root: str): # else: # self.log("no combined records to dump", fg="red") - def dump_single_timeseries(self, path: str): + def dump_timeseries_unified(self, path: str): if self.timeseries: path = self.add_extension(path) - self.log(f"dumping single timeseries to {os.path.abspath(path)}") - self._dump_single_timeseries(path, self.timeseries) + self.log(f"dumping unified timeseries to {os.path.abspath(path)}") + self._dump_timeseries_unified(path, self.timeseries) else: self.log("no timeseries records to dump", fg="red") @@ -116,7 +116,7 @@ def _write(self, path: str, records): # def _dump_combined(self, path: str, combined: list): # raise NotImplementedError - def _dump_single_timeseries(self, path: str, timeseries: list): + def _dump_timeseries_unified(self, path: str, timeseries: list): raise NotImplementedError def _make_root_directory(self, root: str): @@ -134,7 +134,7 @@ def write_memory(path, func, records): return f.getvalue() -def dump_single_timeseries(writer, timeseries): +def dump_timeseries_unified(writer, timeseries): headers_have_not_been_written = True for i, (site, records) in enumerate(timeseries): for j, record in enumerate(records): @@ -202,8 +202,8 @@ def _write(self, path: str, records: list): def _add_content(self, path: str, content: str): self._content.append((path, content)) - def _dump_single_timeseries(self, path: str, timeseries: list): - content = write_memory(path, dump_single_timeseries, timeseries) + def _dump_timeseries_unified(self, path: str, timeseries: list): + content = write_memory(path, dump_timeseries_unified, timeseries) self._add_content(path, content) # def _dump_combined(self, path: str, combined: list): @@ -217,8 +217,8 @@ class CSVPersister(BasePersister): def _write(self, path: str, records: list): write_file(path, dump_sites, records) - def _dump_single_timeseries(self, path: str, timeseries: list): - write_file(path, dump_single_timeseries, timeseries) + def _dump_timeseries_unified(self, path: str, timeseries: list): + write_file(path, dump_timeseries_unified, timeseries) # def _dump_combined(self, path: str, combined: list): # write_file(path, dump_combined, combined) diff --git a/backend/unifier.py b/backend/unifier.py index 3953492..c3a5904 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -187,12 +187,11 @@ def _unify_parameter( _site_wrapper(site_source, parameter_source, persister, config) if config.output_summary: persister.save(config.output_path) - elif config.output_unified_timeseries: + elif config.output_timeseries_unified: persister.dump_sites(f"{config.output_path}.sites") - persister.dump_single_timeseries(f"{config.output_path}.timeseries") - else: # config.output_separated_timeseries - # persister.dump_combined(f"{config.output_path}.combined") - persister.dump_timeseries(f"{config.output_path}_timeseries") + persister.dump_timeseries_unified(f"{config.output_path}.timeseries") + else: # config.output_timeseries_separated + persister.dump_timeseries_separated(f"{config.output_path}_timeseries") persister.dump_sites(f"{config.output_path}.sites") persister.finalize(config.output_name) From 8f6ae702ee60c334590fe36ae5973ab4ec929c93 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 14 Jan 2025 17:02:51 -0700 Subject: [PATCH 15/53] Update BOR for API changes catalogItems are no longer returned in the location response. They are, however, still necessary to retrieve data and records. They are now explicitly found by querying the associated catalogRecords endpoint --- backend/connectors/bor/source.py | 7 ++++++- backend/connectors/bor/transformer.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/backend/connectors/bor/source.py b/backend/connectors/bor/source.py index 1007bed..cf7d115 100644 --- a/backend/connectors/bor/source.py +++ b/backend/connectors/bor/source.py @@ -104,7 +104,12 @@ def _reorder_catalog_items(self, items): def get_records(self, site_record): code = get_analyte_search_param(self.config.analyte, BOR_ANALYTE_MAPPING) - for i, item in enumerate(self._reorder_catalog_items(site_record.catalogItems)): + catalog_record_data = self._execute_json_request( + f"https://data.usbr.gov{site_record.catalogRecords[0]['id']}" + ) + catalog_items = catalog_record_data["relationships"]["catalogItems"]["data"] + + for i, item in enumerate(self._reorder_catalog_items(catalog_items)): data = self._execute_json_request(f'https://data.usbr.gov{item["id"]}') if not data: diff --git a/backend/connectors/bor/transformer.py b/backend/connectors/bor/transformer.py index 2c4f263..4dd81a3 100644 --- a/backend/connectors/bor/transformer.py +++ b/backend/connectors/bor/transformer.py @@ -14,6 +14,7 @@ # limitations under the License. # =============================================================================== import pprint +import json from backend.record import SiteRecord, WaterLevelRecord, AnalyteSummaryRecord from backend.transformer import ( @@ -57,7 +58,6 @@ def _transform(self, record): "well_depth": WELL_DEPTHS.get(props["_id"]), "well_depth_units": "ft", "catalogRecords": record["relationships"]["catalogRecords"]["data"], - "catalogItems": record["relationships"]["catalogItems"]["data"], } return rec From 215a39bbcf863b8e92482acabf252a9cfdc8bc76 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 27 Jan 2025 16:55:31 -0700 Subject: [PATCH 16/53] Use parameter flag to specify both waterlevels and analytes This is done to standardize the way queries are called. This will allow for easier use of the command line interface and will make the code more readable. --- README.md | 4 +- backend/config.py | 10 +- backend/connectors/bor/source.py | 2 +- backend/connectors/isc_seven_rivers/source.py | 2 +- backend/connectors/nmbgmr/source.py | 8 +- backend/connectors/nmenv/source.py | 4 +- backend/connectors/wqp/source.py | 6 +- backend/source.py | 4 +- backend/transformer.py | 6 +- backend/unifier.py | 6 +- backend/worker.py | 3 +- frontend/cli.py | 133 ++++++------------ 12 files changed, 70 insertions(+), 118 deletions(-) diff --git a/README.md b/README.md index bb8c8fb..67beaa3 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ The Data Integration Engine enables the user to obtain groundwater level and gro To obtain groundwater levels, use ``` -weave waterlevels +weave parameter Waterlevels ``` followed by the desired output type, source filters, date filters, geographic filters, and excluded data sources. @@ -64,7 +64,7 @@ followed by the desired output type, source filters, date filters, geographic fi To obtain groundwater quality, use ``` -weave analytes {analyte} +weave paramater {analyte} ``` where `{analyte}` is the name of the analyte whose data is to be retrieved. diff --git a/backend/config.py b/backend/config.py index 57906ce..270a473 100644 --- a/backend/config.py +++ b/backend/config.py @@ -116,7 +116,8 @@ class Config(Loggable): use_source_pvacd: bool = False use_source_wqp: bool = False - analyte: str = "" + # parameter + parameter: str = "" # output use_cloud_storage: bool = False @@ -167,7 +168,7 @@ def __init__(self, model=None, payload=None): self.output_name = payload.get("output_name", "output") self.start_date = payload.get("start_date", "") self.end_date = payload.get("end_date", "") - self.analyte = payload.get("analyte", "") + self.parameter = payload.get("parameter", "") for s in SOURCE_KEYS: setattr(self, f"use_source_{s}", s in payload.get("sources", [])) @@ -190,9 +191,6 @@ def analyte_sources(self): s.set_config(self) ss.set_config(self) - # s.config = self - # ss.config = self - return sources def water_level_sources(self): @@ -309,7 +307,7 @@ def _report_attributes(title, attrs): "county", "bbox", "wkt", - "analyte", + "parameter", "site_limit", ] + sources # inputs diff --git a/backend/connectors/bor/source.py b/backend/connectors/bor/source.py index cf7d115..0502f99 100644 --- a/backend/connectors/bor/source.py +++ b/backend/connectors/bor/source.py @@ -102,7 +102,7 @@ def _reorder_catalog_items(self, items): return items def get_records(self, site_record): - code = get_analyte_search_param(self.config.analyte, BOR_ANALYTE_MAPPING) + code = get_analyte_search_param(self.config.parameter, BOR_ANALYTE_MAPPING) catalog_record_data = self._execute_json_request( f"https://data.usbr.gov{site_record.catalogRecords[0]['id']}" diff --git a/backend/connectors/isc_seven_rivers/source.py b/backend/connectors/isc_seven_rivers/source.py index 46dbc7c..92a1130 100644 --- a/backend/connectors/isc_seven_rivers/source.py +++ b/backend/connectors/isc_seven_rivers/source.py @@ -131,7 +131,7 @@ def _extract_parameter_dates(self, records: list) -> list: def get_records(self, site_record): config = self.config - analyte_id = self._get_analyte_id(config.analyte) + analyte_id = self._get_analyte_id(config.parameter) if analyte_id: params = { "monitoringPointId": site_record.id, diff --git a/backend/connectors/nmbgmr/source.py b/backend/connectors/nmbgmr/source.py index ba58c0a..0524d60 100644 --- a/backend/connectors/nmbgmr/source.py +++ b/backend/connectors/nmbgmr/source.py @@ -75,9 +75,9 @@ def get_records(self): if config.site_limit: params["limit"] = config.site_limit - if config.analyte: + if config.parameter != "Waterlevels": params["parameter"] = get_analyte_search_param( - config.analyte, NMBGMR_ANALYTE_MAPPING + config.parameter, NMBGMR_ANALYTE_MAPPING ) else: params["parameter"] = "Manual groundwater levels" @@ -108,7 +108,7 @@ class NMBGMRAnalyteSource(BaseAnalyteSource): transformer_klass = NMBGMRAnalyteTransformer def get_records(self, site_record): - analyte = get_analyte_search_param(self.config.analyte, NMBGMR_ANALYTE_MAPPING) + analyte = get_analyte_search_param(self.config.parameter, NMBGMR_ANALYTE_MAPPING) records = self._execute_json_request( _make_url("waterchemistry"), params={ @@ -144,7 +144,7 @@ def _extract_parameter_dates(self, records: list) -> list: return [r["info"]["CollectionDate"] for r in records] def _extract_parameter_record(self, record): - record[PARAMETER] = self.config.analyte + record[PARAMETER] = self.config.parameter record[PARAMETER_VALUE] = record["SampleValue"] record[PARAMETER_UNITS] = record["Units"] record[DT_MEASURED] = record["info"]["CollectionDate"] diff --git a/backend/connectors/nmenv/source.py b/backend/connectors/nmenv/source.py index 6366c8f..3fd094a 100644 --- a/backend/connectors/nmenv/source.py +++ b/backend/connectors/nmenv/source.py @@ -39,7 +39,7 @@ def get_records(self, *args, **kw): if "analyte" in kw: analyte = kw["analyte"] elif self.config: - analyte = self.config.analyte + analyte = self.config.parameter analyte = get_analyte_search_param(analyte, DWB_ANALYTE_MAPPING) if analyte is None: @@ -82,7 +82,7 @@ def _parse_result( def get_records(self, site, *args, **kw): service = self.get_service() - analyte = get_analyte_search_param(self.config.analyte, DWB_ANALYTE_MAPPING) + analyte = get_analyte_search_param(self.config.parameter, DWB_ANALYTE_MAPPING) ds = service.datastreams() q = ds.query() q = q.expand("Thing/Locations, ObservedProperty, Observations") diff --git a/backend/connectors/wqp/source.py b/backend/connectors/wqp/source.py index 5b4007e..bd14809 100644 --- a/backend/connectors/wqp/source.py +++ b/backend/connectors/wqp/source.py @@ -83,9 +83,9 @@ def get_records(self): if config.has_bounds(): params["bBox"] = ",".join([str(b) for b in config.bbox_bounding_points()]) - if config.analyte: + if config.parameter != "Waterlevels": params["characteristicName"] = get_analyte_search_param( - config.analyte, WQP_ANALYTE_MAPPING + config.parameter, WQP_ANALYTE_MAPPING ) params.update(get_date_range(config)) @@ -138,7 +138,7 @@ def get_records(self, site_record): "siteid": sites, "mimeType": "tsv", "characteristicName": get_analyte_search_param( - self.config.analyte, WQP_ANALYTE_MAPPING + self.config.parameter, WQP_ANALYTE_MAPPING ), } params.update(get_date_range(self.config)) diff --git a/backend/source.py b/backend/source.py index 24f6346..3cbbabd 100644 --- a/backend/source.py +++ b/backend/source.py @@ -682,7 +682,7 @@ def read( float(r), u, self._get_output_units(), - self.config.analyte, + self.config.parameter, d, ) if warning_msg == "": @@ -988,7 +988,7 @@ def _get_output_units(self): return self.config.analyte_output_units def _validate_record(self, record): - record[PARAMETER] = self.config.analyte + record[PARAMETER] = self.config.parameter for k in (PARAMETER_VALUE, PARAMETER_UNITS, DT_MEASURED): if k not in record: raise ValueError(f"Invalid record. Missing {k}") diff --git a/backend/transformer.py b/backend/transformer.py index e7acdc7..ae7cd45 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -485,7 +485,7 @@ def do_transform( float(r), u, self.config.analyte_output_units, - self.config.analyte, + self.config.parameter, dt, ) if warning_msg != "": @@ -728,7 +728,7 @@ def _transform_most_recents(self, record): record["most_recent_value"], record["most_recent_units"], u, - self.config.analyte, + self.config.parameter, ) record["most_recent_units"] = u @@ -788,7 +788,7 @@ def _get_parameter(self) -> tuple: tuple The parameter and units for the analyte records """ - return self.config.analyte, self.config.analyte_output_units + return self.config.parameter, self.config.analyte_output_units # ============= EOF ============================================= diff --git a/backend/unifier.py b/backend/unifier.py index c3a5904..c89df85 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -243,10 +243,10 @@ def get_sources(config=None): config = Config() sources = [] - if config.analyte: - allsources = config.analyte_sources() - else: + if config.parameter == "Waterlevels": allsources = config.water_level_sources() + else: + allsources = config.analyte_sources() for source, _ in allsources: if source.intersects(config.bounding_wkt()): diff --git a/backend/worker.py b/backend/worker.py index 5e3384b..049c0ee 100644 --- a/backend/worker.py +++ b/backend/worker.py @@ -62,8 +62,7 @@ def sources_handler(): if polygon: config.wkt = polygon - if parameter: - config.analyte = parameter + config.parameter = parameter sources = get_sources(config) return make_cors_response({"sources": [s.tag for s in sources]}) diff --git a/frontend/cli.py b/frontend/cli.py index 7d79d99..76e244d 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -163,6 +163,8 @@ def cli(): ) ] +PARAMETER_OPTIONS = ["Waterlevels"] + ANALYTE_CHOICES + def add_options(options): def _add_options(func): @@ -184,13 +186,21 @@ def wells(bbox, county): unify_sites(config) + + @cli.command() +@click.argument( + "parameter", + type=click.Choice(PARAMETER_OPTIONS), + required=True, +) @add_options(OUTPUT_OPTIONS) @add_options(DT_OPTIONS) @add_options(SPATIAL_OPTIONS) @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) -def waterlevels( +def parameter( + parameter, output, start_date, end_date, @@ -208,80 +218,12 @@ def waterlevels( site_limit, dry, ): - # instantiate config - config = setup_config("waterlevels", bbox, county, site_limit, dry) - - # output type - if output == "summary": - summary = True - timeseries_unified = False - timeseries_separated = False - elif output == "timeseries_unified": - summary = False - timeseries_unified = True - timeseries_separated = False - elif output == "timeseries_separated": - summary = False - timeseries_unified = False - timeseries_separated = True - - config.output_summary = summary - config.output_timeseries_unified = timeseries_unified - config.output_timeseries_separated = timeseries_separated - - # sources - config.use_source_bernco = no_bernco - config.use_source_nmbgmr_amp = no_nmbgmr_amp - config.use_source_nmose_isc_seven_rivers = no_nmose_isc_seven_rivers - config.use_source_nmose_roswell = no_nmose_roswell - config.use_source_nwis = no_nwis - config.use_source_pvacd = no_pvacd - - config.use_source_bor = False - config.use_source_nmed_dwb = False - config.use_source_wqp = False - - # dates - config.start_date = start_date - config.end_date = end_date - - if not dry: - config.report() - # prompt user to continue - if not click.confirm("Do you want to continue?", default=True): - return - - unify_waterlevels(config) - - -@cli.command() -@click.argument("analyte", type=click.Choice(ANALYTE_CHOICES)) -@add_options(OUTPUT_OPTIONS) -@add_options(DT_OPTIONS) -@add_options(SPATIAL_OPTIONS) -@add_options(ALL_SOURCE_OPTIONS) -@add_options(DEBUG_OPTIONS) -def analytes( - analyte, - output, - start_date, - end_date, - bbox, - county, - no_bernco, # has no analyte measurements - no_bor, - no_nmbgmr_amp, - no_nmed_dwb, - no_nmose_isc_seven_rivers, - no_nmose_roswell, # has no analyte measurements - no_nwis, # has no analyte measurements - no_pvacd, # has no analyte measurements - no_wqp, - site_limit, - dry, -): - # instantiate config - config = setup_config(f"analytes ({analyte})", bbox, county, site_limit, dry) + """ + Get parameter summary or timeseries data + """ + # instantiate config and set up parameter + config = setup_config(f"parameter {parameter}", bbox, county, site_limit, dry) + config.parameter = parameter # output type if output == "summary": @@ -302,19 +244,28 @@ def analytes( config.output_timeseries_separated = timeseries_separated # sources - config.use_source_bor = no_bor - config.use_source_nmbgmr_amp = no_nmbgmr_amp - config.use_source_nmed_dwb = no_nmed_dwb - config.use_source_nmose_isc_seven_rivers = no_nmose_isc_seven_rivers - config.use_source_wqp = no_wqp - - config.use_source_bernco = False - config.use_source_nmose_roswell = False - config.use_source_nwis = False - config.use_source_pvacd = False - - # analyte - config.analyte = analyte + if parameter == "Waterlevels": + config.use_source_bernco = no_bernco + config.use_source_nmbgmr_amp = no_nmbgmr_amp + config.use_source_nmose_isc_seven_rivers = no_nmose_isc_seven_rivers + config.use_source_nmose_roswell = no_nmose_roswell + config.use_source_nwis = no_nwis + config.use_source_pvacd = no_pvacd + + config.use_source_bor = False + config.use_source_nmed_dwb = False + config.use_source_wqp = False + else: + config.use_source_bor = no_bor + config.use_source_nmbgmr_amp = no_nmbgmr_amp + config.use_source_nmed_dwb = no_nmed_dwb + config.use_source_nmose_isc_seven_rivers = no_nmose_isc_seven_rivers + config.use_source_wqp = no_wqp + + config.use_source_bernco = False + config.use_source_nmose_roswell = False + config.use_source_nwis = False + config.use_source_pvacd = False # dates config.start_date = start_date @@ -326,8 +277,12 @@ def analytes( if not click.confirm("Do you want to continue?", default=True): return - unify_analytes(config) + if parameter == "Waterlevels": + unify_waterlevels(config) + else: + unify_analytes(config) + @cli.command() @add_options(SPATIAL_OPTIONS) From c3a8d31f5f0a8af0b2c6fcb61382982e165c0725 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Mon, 27 Jan 2025 23:57:49 +0000 Subject: [PATCH 17/53] Formatting changes --- backend/config.py | 2 +- backend/connectors/nmbgmr/source.py | 4 +++- frontend/cli.py | 5 +---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/backend/config.py b/backend/config.py index 270a473..766baba 100644 --- a/backend/config.py +++ b/backend/config.py @@ -116,7 +116,7 @@ class Config(Loggable): use_source_pvacd: bool = False use_source_wqp: bool = False - # parameter + # parameter parameter: str = "" # output diff --git a/backend/connectors/nmbgmr/source.py b/backend/connectors/nmbgmr/source.py index 0524d60..a26fef3 100644 --- a/backend/connectors/nmbgmr/source.py +++ b/backend/connectors/nmbgmr/source.py @@ -108,7 +108,9 @@ class NMBGMRAnalyteSource(BaseAnalyteSource): transformer_klass = NMBGMRAnalyteTransformer def get_records(self, site_record): - analyte = get_analyte_search_param(self.config.parameter, NMBGMR_ANALYTE_MAPPING) + analyte = get_analyte_search_param( + self.config.parameter, NMBGMR_ANALYTE_MAPPING + ) records = self._execute_json_request( _make_url("waterchemistry"), params={ diff --git a/frontend/cli.py b/frontend/cli.py index 76e244d..33e138b 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -186,8 +186,6 @@ def wells(bbox, county): unify_sites(config) - - @cli.command() @click.argument( "parameter", @@ -277,12 +275,11 @@ def parameter( if not click.confirm("Do you want to continue?", default=True): return - if parameter == "Waterlevels": unify_waterlevels(config) else: unify_analytes(config) - + @cli.command() @add_options(SPATIAL_OPTIONS) From a0cd1d616fd27c8b409ea1b02dbb62addbaaa207 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 28 Jan 2025 09:30:02 -0700 Subject: [PATCH 18/53] Rearrange sources for alphabetical order --- frontend/cli.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index 33e138b..b6cd3c8 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -46,6 +46,13 @@ def cli(): show_default=True, help="Exclude BoR data. Default is to include", ), + click.option( + "--no-nmbgmr-amp", + is_flag=True, + default=True, + show_default=True, + help="Exclude NMBGMR AMP data. Default is to include", + ), click.option( "--no-nmed-dwb", is_flag=True, @@ -67,13 +74,6 @@ def cli(): show_default=True, help="Exclude NMOSE Roswell data. Default is to include", ), - click.option( - "--no-nmbgmr-amp", - is_flag=True, - default=True, - show_default=True, - help="Exclude NMBGMR AMP data. Default is to include", - ), click.option( "--no-nwis", is_flag=True, From 5ef2d0b23bc253d569773631d2b33e6b1b10265f Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 28 Jan 2025 10:54:11 -0700 Subject: [PATCH 19/53] Call weave by the paramete name without any flags This allows the user to call `weave {parameter name}` for ease of use, rather than `weave parameter {parameter name}` --- README.md | 4 +- frontend/cli.py | 188 ++++++++++++++++++++++++------------------------ 2 files changed, 94 insertions(+), 98 deletions(-) diff --git a/README.md b/README.md index 67beaa3..a77ebaa 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ The Data Integration Engine enables the user to obtain groundwater level and gro To obtain groundwater levels, use ``` -weave parameter Waterlevels +weave Waterlevels ``` followed by the desired output type, source filters, date filters, geographic filters, and excluded data sources. @@ -64,7 +64,7 @@ followed by the desired output type, source filters, date filters, geographic fi To obtain groundwater quality, use ``` -weave paramater {analyte} +weave {analyte} ``` where `{analyte}` is the name of the analyte whose data is to be retrieved. diff --git a/frontend/cli.py b/frontend/cli.py index b6cd3c8..9f2b7f7 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -174,6 +174,98 @@ def _add_options(func): return _add_options +def create_command(parameter): + @cli.command(name=parameter) + @add_options(OUTPUT_OPTIONS) + @add_options(DT_OPTIONS) + @add_options(SPATIAL_OPTIONS) + @add_options(ALL_SOURCE_OPTIONS) + @add_options(DEBUG_OPTIONS) + def command( + output, + start_date, + end_date, + bbox, + county, + no_bernco, + no_bor, # has no water levels + no_nmbgmr_amp, + no_nmed_dwb, # has no water levels + no_nmose_isc_seven_rivers, + no_nmose_roswell, + no_nwis, + no_pvacd, + no_wqp, # has no water levels + site_limit, + dry, + ): + ''' + Get {parameter} summary or timeseries data + ''' + # instantiate config and set up parameter + config = setup_config(f"{parameter}", bbox, county, site_limit, dry) + config.parameter = parameter + + # output type + if output == "summary": + summary = True + timeseries_unified = False + timeseries_separated = False + elif output == "timeseries_unified": + summary = False + timeseries_unified = True + timeseries_separated = False + elif output == "timeseries_separated": + summary = False + timeseries_unified = False + timeseries_separated = True + + config.output_summary = summary + config.output_timeseries_unified = timeseries_unified + config.output_timeseries_separated = timeseries_separated + + # sources + if parameter == "Waterlevels": + config.use_source_bernco = no_bernco + config.use_source_nmbgmr_amp = no_nmbgmr_amp + config.use_source_nmose_isc_seven_rivers = no_nmose_isc_seven_rivers + config.use_source_nmose_roswell = no_nmose_roswell + config.use_source_nwis = no_nwis + config.use_source_pvacd = no_pvacd + + config.use_source_bor = False + config.use_source_nmed_dwb = False + config.use_source_wqp = False + else: + config.use_source_bor = no_bor + config.use_source_nmbgmr_amp = no_nmbgmr_amp + config.use_source_nmed_dwb = no_nmed_dwb + config.use_source_nmose_isc_seven_rivers = no_nmose_isc_seven_rivers + config.use_source_wqp = no_wqp + + config.use_source_bernco = False + config.use_source_nmose_roswell = False + config.use_source_nwis = False + config.use_source_pvacd = False + + # dates + config.start_date = start_date + config.end_date = end_date + + if not dry: + config.report() + # prompt user to continue + if not click.confirm("Do you want to continue?", default=True): + return + + if parameter == "Waterlevels": + unify_waterlevels(config) + else: + unify_analytes(config) + + +for parameter in PARAMETER_OPTIONS: + create_command(parameter) @cli.command() @add_options(SPATIAL_OPTIONS) @@ -185,102 +277,6 @@ def wells(bbox, county): config = setup_config("sites", bbox, county) unify_sites(config) - -@cli.command() -@click.argument( - "parameter", - type=click.Choice(PARAMETER_OPTIONS), - required=True, -) -@add_options(OUTPUT_OPTIONS) -@add_options(DT_OPTIONS) -@add_options(SPATIAL_OPTIONS) -@add_options(ALL_SOURCE_OPTIONS) -@add_options(DEBUG_OPTIONS) -def parameter( - parameter, - output, - start_date, - end_date, - bbox, - county, - no_bernco, - no_bor, # has no water levels - no_nmbgmr_amp, - no_nmed_dwb, # has no water levels - no_nmose_isc_seven_rivers, - no_nmose_roswell, - no_nwis, - no_pvacd, - no_wqp, # has no water levels - site_limit, - dry, -): - """ - Get parameter summary or timeseries data - """ - # instantiate config and set up parameter - config = setup_config(f"parameter {parameter}", bbox, county, site_limit, dry) - config.parameter = parameter - - # output type - if output == "summary": - summary = True - timeseries_unified = False - timeseries_separated = False - elif output == "timeseries_unified": - summary = False - timeseries_unified = True - timeseries_separated = False - elif output == "timeseries_separated": - summary = False - timeseries_unified = False - timeseries_separated = True - - config.output_summary = summary - config.output_timeseries_unified = timeseries_unified - config.output_timeseries_separated = timeseries_separated - - # sources - if parameter == "Waterlevels": - config.use_source_bernco = no_bernco - config.use_source_nmbgmr_amp = no_nmbgmr_amp - config.use_source_nmose_isc_seven_rivers = no_nmose_isc_seven_rivers - config.use_source_nmose_roswell = no_nmose_roswell - config.use_source_nwis = no_nwis - config.use_source_pvacd = no_pvacd - - config.use_source_bor = False - config.use_source_nmed_dwb = False - config.use_source_wqp = False - else: - config.use_source_bor = no_bor - config.use_source_nmbgmr_amp = no_nmbgmr_amp - config.use_source_nmed_dwb = no_nmed_dwb - config.use_source_nmose_isc_seven_rivers = no_nmose_isc_seven_rivers - config.use_source_wqp = no_wqp - - config.use_source_bernco = False - config.use_source_nmose_roswell = False - config.use_source_nwis = False - config.use_source_pvacd = False - - # dates - config.start_date = start_date - config.end_date = end_date - - if not dry: - config.report() - # prompt user to continue - if not click.confirm("Do you want to continue?", default=True): - return - - if parameter == "Waterlevels": - unify_waterlevels(config) - else: - unify_analytes(config) - - @cli.command() @add_options(SPATIAL_OPTIONS) def sources(bbox, county): From 59db0621b70f15679c33260b7589ca0776c1f4e5 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 28 Jan 2025 11:56:04 -0700 Subject: [PATCH 20/53] Updated setup url --- frontend/cli.py | 7 +++---- setup.py | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index 9f2b7f7..60fde31 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -199,9 +199,9 @@ def command( site_limit, dry, ): - ''' - Get {parameter} summary or timeseries data - ''' + """ + Get timeseries or summary data + """ # instantiate config and set up parameter config = setup_config(f"{parameter}", bbox, county, site_limit, dry) config.parameter = parameter @@ -262,7 +262,6 @@ def command( unify_waterlevels(config) else: unify_analytes(config) - for parameter in PARAMETER_OPTIONS: create_command(parameter) diff --git a/setup.py b/setup.py index 8313973..0402965 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ description="New Mexico Water Data Integration Engine", long_description=long_description, long_description_content_type="text/markdown", - url="https://github.com/DataIntegrationGroup/PyWeaver", + url="https://github.com/DataIntegrationGroup/DataIntegrationEngine", classifiers=[ "Programming Language :: Python :: 3", "Operating System :: OS Independent", From 50cb939bc4ed7f790efd9768846bdc8673fec00e Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 28 Jan 2025 11:57:33 -0700 Subject: [PATCH 21/53] Update version to 0.2.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0402965..e3a9052 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup( name="nmuwd", - version="0.2.0", + version="0.2.1", author="Jake Ross", description="New Mexico Water Data Integration Engine", long_description=long_description, From 761f7e3994f27e2eedccde5e1e9ddf597bb5cf29 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Tue, 28 Jan 2025 18:59:33 +0000 Subject: [PATCH 22/53] Formatting changes --- frontend/cli.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/frontend/cli.py b/frontend/cli.py index 60fde31..d3e6965 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -174,6 +174,7 @@ def _add_options(func): return _add_options + def create_command(parameter): @cli.command(name=parameter) @add_options(OUTPUT_OPTIONS) @@ -263,9 +264,11 @@ def command( else: unify_analytes(config) + for parameter in PARAMETER_OPTIONS: create_command(parameter) + @cli.command() @add_options(SPATIAL_OPTIONS) def wells(bbox, county): @@ -276,6 +279,7 @@ def wells(bbox, county): config = setup_config("sites", bbox, county) unify_sites(config) + @cli.command() @add_options(SPATIAL_OPTIONS) def sources(bbox, county): From ffd07296bf8f8f9b65ac8d64dec0d5f3fb5140db Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 28 Jan 2025 12:06:56 -0700 Subject: [PATCH 23/53] Removed attestations for publishing to pypi --- .github/workflows/publish-to-pypi.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index a07045c..85730c3 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -36,3 +36,5 @@ jobs: - name: Publish distribution 📦 to PyPI if: startsWith(github.ref, 'refs/tags') uses: pypa/gh-action-pypi-publish@release/v1 + with: + attestations: false From 8305f317683586979cdf621193274b260c6981c2 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 28 Jan 2025 12:07:26 -0700 Subject: [PATCH 24/53] Update version to 0.2.2 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e3a9052..62361a8 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup( name="nmuwd", - version="0.2.1", + version="0.2.2", author="Jake Ross", description="New Mexico Water Data Integration Engine", long_description=long_description, From 46309bc9ecdc8e0815cda75301b5815a204b97a4 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 28 Jan 2025 13:59:32 -0700 Subject: [PATCH 25/53] Call weave as die command Now weave is called with `die weave {parameter}` instead of `weave {parameter}` so that `sources` and `wells` are die commands. --- frontend/cli.py | 201 ++++++++++++++++++++++++++---------------------- setup.py | 2 +- 2 files changed, 109 insertions(+), 94 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index d3e6965..c7eaa76 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -175,98 +175,103 @@ def _add_options(func): return _add_options -def create_command(parameter): - @cli.command(name=parameter) - @add_options(OUTPUT_OPTIONS) - @add_options(DT_OPTIONS) - @add_options(SPATIAL_OPTIONS) - @add_options(ALL_SOURCE_OPTIONS) - @add_options(DEBUG_OPTIONS) - def command( - output, - start_date, - end_date, - bbox, - county, - no_bernco, - no_bor, # has no water levels - no_nmbgmr_amp, - no_nmed_dwb, # has no water levels - no_nmose_isc_seven_rivers, - no_nmose_roswell, - no_nwis, - no_pvacd, - no_wqp, # has no water levels - site_limit, - dry, - ): - """ - Get timeseries or summary data - """ - # instantiate config and set up parameter - config = setup_config(f"{parameter}", bbox, county, site_limit, dry) - config.parameter = parameter - - # output type - if output == "summary": - summary = True - timeseries_unified = False - timeseries_separated = False - elif output == "timeseries_unified": - summary = False - timeseries_unified = True - timeseries_separated = False - elif output == "timeseries_separated": - summary = False - timeseries_unified = False - timeseries_separated = True - - config.output_summary = summary - config.output_timeseries_unified = timeseries_unified - config.output_timeseries_separated = timeseries_separated - - # sources - if parameter == "Waterlevels": - config.use_source_bernco = no_bernco - config.use_source_nmbgmr_amp = no_nmbgmr_amp - config.use_source_nmose_isc_seven_rivers = no_nmose_isc_seven_rivers - config.use_source_nmose_roswell = no_nmose_roswell - config.use_source_nwis = no_nwis - config.use_source_pvacd = no_pvacd - - config.use_source_bor = False - config.use_source_nmed_dwb = False - config.use_source_wqp = False - else: - config.use_source_bor = no_bor - config.use_source_nmbgmr_amp = no_nmbgmr_amp - config.use_source_nmed_dwb = no_nmed_dwb - config.use_source_nmose_isc_seven_rivers = no_nmose_isc_seven_rivers - config.use_source_wqp = no_wqp - - config.use_source_bernco = False - config.use_source_nmose_roswell = False - config.use_source_nwis = False - config.use_source_pvacd = False - - # dates - config.start_date = start_date - config.end_date = end_date - - if not dry: - config.report() - # prompt user to continue - if not click.confirm("Do you want to continue?", default=True): - return - - if parameter == "Waterlevels": - unify_waterlevels(config) - else: - unify_analytes(config) - - -for parameter in PARAMETER_OPTIONS: - create_command(parameter) +@cli.command() +@click.argument( + "weave", + type=click.Choice( + PARAMETER_OPTIONS, + case_sensitive=False + ), + required=True, +) +@add_options(OUTPUT_OPTIONS) +@add_options(DT_OPTIONS) +@add_options(SPATIAL_OPTIONS) +@add_options(ALL_SOURCE_OPTIONS) +@add_options(DEBUG_OPTIONS) +def weave( + weave, + output, + start_date, + end_date, + bbox, + county, + no_bernco, + no_bor, # has no water levels + no_nmbgmr_amp, + no_nmed_dwb, # has no water levels + no_nmose_isc_seven_rivers, + no_nmose_roswell, + no_nwis, + no_pvacd, + no_wqp, # has no water levels + site_limit, + dry, +): + """ + Get parameter timeseries or summary data + """ + parameter = weave + # instantiate config and set up parameter + config = setup_config(f"{parameter}", bbox, county, site_limit, dry) + config.parameter = parameter + + # output type + if output == "summary": + summary = True + timeseries_unified = False + timeseries_separated = False + elif output == "timeseries_unified": + summary = False + timeseries_unified = True + timeseries_separated = False + elif output == "timeseries_separated": + summary = False + timeseries_unified = False + timeseries_separated = True + + config.output_summary = summary + config.output_timeseries_unified = timeseries_unified + config.output_timeseries_separated = timeseries_separated + + # sources + if parameter == "Waterlevels": + config.use_source_bernco = no_bernco + config.use_source_nmbgmr_amp = no_nmbgmr_amp + config.use_source_nmose_isc_seven_rivers = no_nmose_isc_seven_rivers + config.use_source_nmose_roswell = no_nmose_roswell + config.use_source_nwis = no_nwis + config.use_source_pvacd = no_pvacd + + config.use_source_bor = False + config.use_source_nmed_dwb = False + config.use_source_wqp = False + else: + config.use_source_bor = no_bor + config.use_source_nmbgmr_amp = no_nmbgmr_amp + config.use_source_nmed_dwb = no_nmed_dwb + config.use_source_nmose_isc_seven_rivers = no_nmose_isc_seven_rivers + config.use_source_wqp = no_wqp + + config.use_source_bernco = False + config.use_source_nmose_roswell = False + config.use_source_nwis = False + config.use_source_pvacd = False + + # dates + config.start_date = start_date + config.end_date = end_date + + if not dry: + config.report() + # prompt user to continue + if not click.confirm("Do you want to continue?", default=True): + return + + if parameter == "Waterlevels": + unify_waterlevels(config) + else: + unify_analytes(config) @cli.command() @@ -281,8 +286,16 @@ def wells(bbox, county): @cli.command() +@click.argument( + "sources", + type=click.Choice( + PARAMETER_OPTIONS, + case_sensitive=False + ), + required=True, +) @add_options(SPATIAL_OPTIONS) -def sources(bbox, county): +def sources(sources, bbox, county): """ List available sources """ @@ -294,6 +307,8 @@ def sources(bbox, county): elif bbox: config.bbox = bbox + parameter = sources + config.parameter = parameter sources = get_sources(config) for s in sources: click.echo(s) diff --git a/setup.py b/setup.py index 62361a8..93c0130 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ install_requires=["click", "httpx", "geopandas", "frost_sta_client"], entry_points={ "console_scripts": [ - "weave = frontend.cli:cli", + "die = frontend.cli:cli", ], }, packages=["frontend", "backend"] From 0972992abdfdbf4961431a687c84ee04519d4c04 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 28 Jan 2025 14:00:30 -0700 Subject: [PATCH 26/53] Print sources when user calls die sources --- backend/config.py | 18 +++++++++--------- backend/unifier.py | 5 ++++- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/backend/config.py b/backend/config.py index 766baba..4b393db 100644 --- a/backend/config.py +++ b/backend/config.py @@ -106,15 +106,15 @@ class Config(Loggable): wkt: str = "" # sources - use_source_bernco: bool = False - use_source_bor: bool = False - use_source_nmbgmr_amp: bool = False - use_source_nmed_dwb: bool = False - use_source_nmose_isc_seven_rivers: bool = False - use_source_nmose_roswell: bool = False - use_source_nwis: bool = False - use_source_pvacd: bool = False - use_source_wqp: bool = False + use_source_bernco: bool = True + use_source_bor: bool = True + use_source_nmbgmr_amp: bool = True + use_source_nmed_dwb: bool = True + use_source_nmose_isc_seven_rivers: bool = True + use_source_nmose_roswell: bool = True + use_source_nwis: bool = True + use_source_pvacd: bool = True + use_source_wqp: bool = True # parameter parameter: str = "" diff --git a/backend/unifier.py b/backend/unifier.py index c89df85..82b687d 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -249,7 +249,10 @@ def get_sources(config=None): allsources = config.analyte_sources() for source, _ in allsources: - if source.intersects(config.bounding_wkt()): + if config.wkt or config.bbox or config.county: + if source.intersects(config.bounding_wkt()): + sources.append(source) + else: sources.append(source) return sources From 42f2c3254cbdd4db360e51cb90af8548c6356ba2 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 28 Jan 2025 15:03:41 -0700 Subject: [PATCH 27/53] Standardize waterlevel records to be the same as analyte records This is done so that the tables are the same for both analytes and waterlevels. The information isn't changing, but the two table now have the same column headers --- backend/connectors/ckan/source.py | 7 ++++--- backend/connectors/isc_seven_rivers/source.py | 7 +++++-- backend/connectors/nmbgmr/source.py | 5 +++-- backend/connectors/st2/source.py | 7 ++++--- backend/connectors/usgs/source.py | 7 ++++--- backend/constants.py | 2 +- backend/record.py | 4 +++- backend/source.py | 2 +- 8 files changed, 25 insertions(+), 16 deletions(-) diff --git a/backend/connectors/ckan/source.py b/backend/connectors/ckan/source.py index da3f373..16a58d2 100644 --- a/backend/connectors/ckan/source.py +++ b/backend/connectors/ckan/source.py @@ -31,7 +31,7 @@ OSERoswellSiteTransformer, OSERoswellWaterLevelTransformer, ) -from backend.constants import FEET, DTW, DTW_UNITS, DT_MEASURED +from backend.constants import FEET, DTW, DTW_UNITS, DT_MEASURED, PARAMETER, PARAMETER_UNITS, PARAMETER_VALUE from backend.source import ( BaseSource, BaseSiteSource, @@ -130,9 +130,10 @@ def _extract_parameter_dates(self, records: list) -> list: return [r["Date"] for r in records] def _extract_parameter_record(self, record): - record[DTW] = float(record["DTWGS"]) + record[PARAMETER] = DTW + record[PARAMETER_VALUE] = float(record["DTWGS"]) + record[PARAMETER_UNITS] = FEET record[DT_MEASURED] = record["Date"] - record[DTW_UNITS] = FEET return record def _clean_records(self, records: list) -> list: diff --git a/backend/connectors/isc_seven_rivers/source.py b/backend/connectors/isc_seven_rivers/source.py index 92a1130..5480632 100644 --- a/backend/connectors/isc_seven_rivers/source.py +++ b/backend/connectors/isc_seven_rivers/source.py @@ -29,6 +29,7 @@ DT_MEASURED, DTW_UNITS, DTW, + PARAMETER, PARAMETER_VALUE, PARAMETER_UNITS, ) @@ -103,6 +104,7 @@ def _get_analyte_id(self, analyte): return self._analyte_ids.get(analyte) def _extract_parameter_record(self, record): + record[PARAMETER] = self.config.parameter record[PARAMETER_VALUE] = record["result"] record[PARAMETER_UNITS] = record["units"] record[DT_MEASURED] = get_datetime(record) @@ -166,8 +168,9 @@ def _clean_records(self, records): return [r for r in records if r["depthToWaterFeet"] is not None] def _extract_parameter_record(self, record): - record[DTW] = record["depthToWaterFeet"] - record[DTW_UNITS] = FEET + record[PARAMETER] = DTW + record[PARAMETER_VALUE] = record["depthToWaterFeet"] + record[PARAMETER_UNITS] = FEET record[DT_MEASURED] = get_datetime(record) return record diff --git a/backend/connectors/nmbgmr/source.py b/backend/connectors/nmbgmr/source.py index a26fef3..40f2d7a 100644 --- a/backend/connectors/nmbgmr/source.py +++ b/backend/connectors/nmbgmr/source.py @@ -161,9 +161,10 @@ def _clean_records(self, records): return [r for r in records if r["DepthToWaterBGS"] is not None] def _extract_parameter_record(self, record, *args, **kw): - record[DTW] = record["DepthToWaterBGS"] + record[PARAMETER] = DTW + record[PARAMETER_VALUE] = record["DepthToWaterBGS"] + record[PARAMETER_UNITS] = FEET record[DT_MEASURED] = (record["DateMeasured"], record["TimeMeasured"]) - record[DTW_UNITS] = FEET return record def _extract_most_recent(self, records): diff --git a/backend/connectors/st2/source.py b/backend/connectors/st2/source.py index 23f7db9..f06e4a3 100644 --- a/backend/connectors/st2/source.py +++ b/backend/connectors/st2/source.py @@ -31,7 +31,7 @@ STWaterLevelSource, make_dt_filter, ) -from backend.constants import DTW, DTW_UNITS, DT_MEASURED +from backend.constants import DTW, DTW_UNITS, DT_MEASURED, PARAMETER, PARAMETER_VALUE, PARAMETER_UNITS from backend.source import BaseSiteSource, BaseWaterLevelSource, get_most_recent URL = "https://st2.newmexicowaterdata.org/FROST-Server/v1.1" @@ -80,8 +80,9 @@ def _extract_most_recent(self, records): } def _extract_parameter_record(self, record): - record[DTW] = record["observation"].result - record[DTW_UNITS] = record["datastream"].unit_of_measurement.symbol + record[PARAMETER] = DTW + record[PARAMETER_VALUE] = record["observation"].result + record[PARAMETER_UNITS] = record["datastream"].unit_of_measurement.symbol record[DT_MEASURED] = record["observation"].phenomenon_time return record diff --git a/backend/connectors/usgs/source.py b/backend/connectors/usgs/source.py index b1c330a..b1e704a 100644 --- a/backend/connectors/usgs/source.py +++ b/backend/connectors/usgs/source.py @@ -17,7 +17,7 @@ import httpx from backend.connectors import NM_STATE_BOUNDING_POLYGON -from backend.constants import FEET, DTW, DTW_UNITS, DT_MEASURED +from backend.constants import FEET, DTW, DTW_UNITS, DT_MEASURED, PARAMETER, PARAMETER_VALUE, PARAMETER_UNITS from backend.connectors.usgs.transformer import ( NWISSiteTransformer, NWISWaterLevelTransformer, @@ -177,8 +177,9 @@ def _extract_most_recent(self, records): } def _extract_parameter_record(self, record): - record[DTW] = float(record["value"]) - record[DTW_UNITS] = FEET + record[PARAMETER] = DTW + record[PARAMETER_VALUE] = float(record["value"]) + record[PARAMETER_UNITS] = FEET # record[DT_MEASURED] = (record["date_measured"], record["time_measured"]) record[DT_MEASURED] = record["datetime_measured"] return record diff --git a/backend/constants.py b/backend/constants.py index 2064a87..fe6fd8b 100644 --- a/backend/constants.py +++ b/backend/constants.py @@ -43,7 +43,7 @@ DT_MEASURED = "datetime_measured" -DTW = "depth_to_water_ft_below_ground_surface" +DTW = "depth_to_water_below_ground_surface" DTW_UNITS = FEET PARAMETER = "parameter" diff --git a/backend/record.py b/backend/record.py index 487d644..7c257bf 100644 --- a/backend/record.py +++ b/backend/record.py @@ -72,7 +72,9 @@ class WaterLevelRecord(BaseRecord): # "longitude", # "surface_elevation_ft", # "well_depth_ft_below_ground_surface", - DTW, + PARAMETER, + PARAMETER_VALUE, + PARAMETER_UNITS, "date_measured", "time_measured", ) diff --git a/backend/source.py b/backend/source.py index 3cbbabd..8ab4ac3 100644 --- a/backend/source.py +++ b/backend/source.py @@ -1010,7 +1010,7 @@ def _extract_parameter_units(self, records): return [FEET for _ in records] def _validate_record(self, record): - for k in (DTW, DTW_UNITS, DT_MEASURED): + for k in (PARAMETER_VALUE, PARAMETER_UNITS, DT_MEASURED): if k not in record: raise ValueError(f"Invalid record. Missing {k}") From 057ff2f84a7028f470433517281a04ab0afa1200 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 28 Jan 2025 16:29:55 -0700 Subject: [PATCH 28/53] Work on rounding water level values to 2 places --- backend/record.py | 16 ++++++++++++---- backend/source.py | 8 -------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/backend/record.py b/backend/record.py index 7c257bf..323e0e1 100644 --- a/backend/record.py +++ b/backend/record.py @@ -30,7 +30,8 @@ def get(attr): # if v is None and self.defaults: # v = self.defaults.get(attr) v = self.__getattr__(attr) - for key, sigfigs in ( + + field_sigfigs = [ ("elevation", 2), ("depth_to_water_ft_below_ground_surface", 2), ("surface_elevation_ft", 2), @@ -41,12 +42,19 @@ def get(attr): ("min", 2), ("max", 2), ("mean", 2), - ): - if v is not None and key == attr: + ] + + # both analyte and water level tables have the same fields, but the + # rounding should only occur for water level tables + if isinstance(self, WaterLevelRecord): + field_sigfigs.append((PARAMETER, 2)) + + for field, sigfigs in field_sigfigs: + if v is not None and field == attr: try: v = round(v, sigfigs) except TypeError as e: - print(key, attr) + print(field, attr) raise e break return v diff --git a/backend/source.py b/backend/source.py index 8ab4ac3..1de8f09 100644 --- a/backend/source.py +++ b/backend/source.py @@ -586,18 +586,10 @@ class BaseParameterSource(BaseSource): _extract_parameter_record Returns a parameter record with standardized fields added. - For an analyte, the fields are - - backend.constants.PARAMETER - backend.constants.PARAMETER_VALUE - backend.constants.PARAMETER_UNITS - For a water level, the fields are - - - backend.constants.DTW - - backend.constants.DTW_UNITS - - backend.constants.DT_MEASURED - _extract_parameter_results Returns the parameter results as a list from the records, in the same order as the records themselves """ From d1ce3171291085bda07235302c34c44c33701386 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 28 Jan 2025 16:47:12 -0700 Subject: [PATCH 29/53] Round water level parameter values to 2 decimal places This is to get rid of floating point errors and keep results consistent with old data --- backend/record.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/record.py b/backend/record.py index 323e0e1..2f91ca0 100644 --- a/backend/record.py +++ b/backend/record.py @@ -47,7 +47,7 @@ def get(attr): # both analyte and water level tables have the same fields, but the # rounding should only occur for water level tables if isinstance(self, WaterLevelRecord): - field_sigfigs.append((PARAMETER, 2)) + field_sigfigs.append((PARAMETER_VALUE, 2)) for field, sigfigs in field_sigfigs: if v is not None and field == attr: From 66a1efc6c2902cd97d603f484a87de16d7746ac6 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 28 Jan 2025 16:48:30 -0700 Subject: [PATCH 30/53] Remove deprecated fields from record --- backend/record.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/backend/record.py b/backend/record.py index 2f91ca0..a230812 100644 --- a/backend/record.py +++ b/backend/record.py @@ -33,9 +33,6 @@ def get(attr): field_sigfigs = [ ("elevation", 2), - ("depth_to_water_ft_below_ground_surface", 2), - ("surface_elevation_ft", 2), - ("well_depth_ft_below_ground_surface", 2), ("well_depth", 2), ("latitude", 6), ("longitude", 6), From 4959fc5d013a4013a021cdae7623b299f40393ea Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 28 Jan 2025 17:11:51 -0700 Subject: [PATCH 31/53] Update water level summary parameter name to be consistent with timeseries tables --- backend/transformer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/transformer.py b/backend/transformer.py index ae7cd45..263e5b7 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -29,6 +29,7 @@ MICROGRAMS_PER_LITER, DT_MEASURED, PARAMETER_UNITS, + DTW ) from backend.geo_utils import datum_transform, ALLOWED_DATUMS from backend.logging import Loggable @@ -759,7 +760,7 @@ def _get_parameter(self) -> tuple: tuple The parameter and units for the water level records """ - return "DTW BGS", self.config.waterlevel_output_units + return DTW, self.config.waterlevel_output_units class AnalyteTransformer(ParameterTransformer): From d86cd77cdf943a4b2fb5a2164a59689443ea6872 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Wed, 29 Jan 2025 16:45:37 +0000 Subject: [PATCH 32/53] Formatting changes --- backend/connectors/ckan/source.py | 10 +++++++++- backend/connectors/st2/source.py | 9 ++++++++- backend/connectors/usgs/source.py | 10 +++++++++- backend/transformer.py | 2 +- frontend/cli.py | 10 ++-------- 5 files changed, 29 insertions(+), 12 deletions(-) diff --git a/backend/connectors/ckan/source.py b/backend/connectors/ckan/source.py index 16a58d2..7624c05 100644 --- a/backend/connectors/ckan/source.py +++ b/backend/connectors/ckan/source.py @@ -31,7 +31,15 @@ OSERoswellSiteTransformer, OSERoswellWaterLevelTransformer, ) -from backend.constants import FEET, DTW, DTW_UNITS, DT_MEASURED, PARAMETER, PARAMETER_UNITS, PARAMETER_VALUE +from backend.constants import ( + FEET, + DTW, + DTW_UNITS, + DT_MEASURED, + PARAMETER, + PARAMETER_UNITS, + PARAMETER_VALUE, +) from backend.source import ( BaseSource, BaseSiteSource, diff --git a/backend/connectors/st2/source.py b/backend/connectors/st2/source.py index f06e4a3..f32d52b 100644 --- a/backend/connectors/st2/source.py +++ b/backend/connectors/st2/source.py @@ -31,7 +31,14 @@ STWaterLevelSource, make_dt_filter, ) -from backend.constants import DTW, DTW_UNITS, DT_MEASURED, PARAMETER, PARAMETER_VALUE, PARAMETER_UNITS +from backend.constants import ( + DTW, + DTW_UNITS, + DT_MEASURED, + PARAMETER, + PARAMETER_VALUE, + PARAMETER_UNITS, +) from backend.source import BaseSiteSource, BaseWaterLevelSource, get_most_recent URL = "https://st2.newmexicowaterdata.org/FROST-Server/v1.1" diff --git a/backend/connectors/usgs/source.py b/backend/connectors/usgs/source.py index b1e704a..4cdafcd 100644 --- a/backend/connectors/usgs/source.py +++ b/backend/connectors/usgs/source.py @@ -17,7 +17,15 @@ import httpx from backend.connectors import NM_STATE_BOUNDING_POLYGON -from backend.constants import FEET, DTW, DTW_UNITS, DT_MEASURED, PARAMETER, PARAMETER_VALUE, PARAMETER_UNITS +from backend.constants import ( + FEET, + DTW, + DTW_UNITS, + DT_MEASURED, + PARAMETER, + PARAMETER_VALUE, + PARAMETER_UNITS, +) from backend.connectors.usgs.transformer import ( NWISSiteTransformer, NWISWaterLevelTransformer, diff --git a/backend/transformer.py b/backend/transformer.py index 263e5b7..d5069c4 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -29,7 +29,7 @@ MICROGRAMS_PER_LITER, DT_MEASURED, PARAMETER_UNITS, - DTW + DTW, ) from backend.geo_utils import datum_transform, ALLOWED_DATUMS from backend.logging import Loggable diff --git a/frontend/cli.py b/frontend/cli.py index c7eaa76..a992c05 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -178,10 +178,7 @@ def _add_options(func): @cli.command() @click.argument( "weave", - type=click.Choice( - PARAMETER_OPTIONS, - case_sensitive=False - ), + type=click.Choice(PARAMETER_OPTIONS, case_sensitive=False), required=True, ) @add_options(OUTPUT_OPTIONS) @@ -288,10 +285,7 @@ def wells(bbox, county): @cli.command() @click.argument( "sources", - type=click.Choice( - PARAMETER_OPTIONS, - case_sensitive=False - ), + type=click.Choice(PARAMETER_OPTIONS, case_sensitive=False), required=True, ) @add_options(SPATIAL_OPTIONS) From 4069dca4b3e565fd50f8655b294492951fdaa7dc Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 29 Jan 2025 09:48:06 -0700 Subject: [PATCH 33/53] Remove combined write functions since it is deprecated --- backend/persister.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/backend/persister.py b/backend/persister.py index b83d126..b4ecf64 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -37,7 +37,6 @@ def __init__(self): self.records = [] self.timeseries = [] self.sites = [] - # self.combined = [] super().__init__() # self.keys = record_klass.keys @@ -69,15 +68,6 @@ def dump_timeseries_separated(self, root: str): else: self.log("no timeseries records to dump", fg="red") - # def dump_combined(self, path: str): - # if self.combined: - # path = self.add_extension(path) - - # self.log(f"dumping combined to {os.path.abspath(path)}") - # self._dump_combined(path, self.combined) - # else: - # self.log("no combined records to dump", fg="red") - def dump_timeseries_unified(self, path: str): if self.timeseries: path = self.add_extension(path) @@ -113,9 +103,6 @@ def add_extension(self, path: str): def _write(self, path: str, records): raise NotImplementedError - # def _dump_combined(self, path: str, combined: list): - # raise NotImplementedError - def _dump_timeseries_unified(self, path: str, timeseries: list): raise NotImplementedError @@ -151,13 +138,6 @@ def dump_sites(writer, records): writer.writerow(site.to_row()) -# def dump_combined(writer, combined): -# for i, (site, record) in enumerate(combined): -# if i == 0: -# writer.writerow(site.keys + record.keys) -# writer.writerow(site.to_row() + record.to_row()) - - class CloudStoragePersister(BasePersister): extension = "csv" _content: list @@ -206,10 +186,6 @@ def _dump_timeseries_unified(self, path: str, timeseries: list): content = write_memory(path, dump_timeseries_unified, timeseries) self._add_content(path, content) - # def _dump_combined(self, path: str, combined: list): - # content = write_memory(path, dump_combined, combined) - # self._add_content(path, content) - class CSVPersister(BasePersister): extension = "csv" @@ -220,9 +196,6 @@ def _write(self, path: str, records: list): def _dump_timeseries_unified(self, path: str, timeseries: list): write_file(path, dump_timeseries_unified, timeseries) - # def _dump_combined(self, path: str, combined: list): - # write_file(path, dump_combined, combined) - class GeoJSONPersister(BasePersister): extension = "geojson" From ac78b7b24c207f63261e4c4875c1cb390771301f Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 29 Jan 2025 11:51:08 -0700 Subject: [PATCH 34/53] Ignore all outputs --- .gitignore | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 5c03936..cc04cc7 100644 --- a/.gitignore +++ b/.gitignore @@ -171,10 +171,4 @@ cython_debug/ #.idea/ # outputs -output_timeseries -output.combined.csv -output.csv -output.sites.csv -output.timeseries.csv -output.logs.txt -output.warnings.txt \ No newline at end of file +output* \ No newline at end of file From 0e0397ca79c78ce151038c7dc950bab61262191f Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 29 Jan 2025 12:39:05 -0700 Subject: [PATCH 35/53] Put each output for each call into unique output directory This is done so that outputs don't override each other. All outputs are now found in directories titled `output`. If the output directory already exists, the output will be placed in a directory with the same name but with a number appended to the end. For example, if the output directory `output` already exists, the output will be placed in `output_1`. If `output_1` already exists, the output will be placed in `output_2`, and so on. die.log is also placed in the output directory so that each tool call is recorded in a single log file. --- backend/config.py | 36 ++++++++++++++++++++++--- backend/logging.py | 3 +++ backend/persister.py | 62 +++++++++++++++++++++++++------------------- backend/unifier.py | 13 +++++----- frontend/cli.py | 12 +++++++-- 5 files changed, 88 insertions(+), 38 deletions(-) diff --git a/backend/config.py b/backend/config.py index 4b393db..32f3d8c 100644 --- a/backend/config.py +++ b/backend/config.py @@ -121,7 +121,7 @@ class Config(Loggable): # output use_cloud_storage: bool = False - output_dir: str = "" + output_dir: str = "." output_name: str = "output" output_horizontal_datum: str = WGS84 output_elevation_units: str = FEET @@ -320,8 +320,7 @@ def _report_attributes(title, attrs): _report_attributes( "Outputs", ( - "output_dir", - "output_name", + "output_path", "output_summary", "output_timeseries_unified", "output_timeseries_separated", @@ -383,6 +382,37 @@ def _validate_county(self): return True + def _update_output_name(self): + """ + Generate a unique output name based on existing directories in the output directory. + + If there are no directories with the string "output" in their name, the output name will be "output". + + If there is a directory called "output", then output_name will be "output_1". + + If there are directories called "output_{n}" where n is an integer, then output_name will be "output_{m+1}" + where m is the highest integer in the existing directories. + """ + output_name = self.output_name + + # find if there are already directories with the string "output" their names + output_names = [name for name in os.listdir(self.output_dir) if os.path.isdir(name) and output_name in name] + + if len(output_names) > 0: + max_count = 0 + # find the highest number appended to directories with "output" in their name + counts = [name.split("_")[-1] for name in output_names if name.split("_")[-1].isdigit()] + counts = [int(count) for count in counts] + if len(counts) > 0: + max_count = max(counts) + output_name = f"{output_name}_{max_count + 1}" + + self.output_name = output_name + + def _make_output_path(self): + if not os.path.exists(self.output_path): + os.mkdir(self.output_path) + @property def start_dt(self): return self._extract_date(self.start_date) diff --git a/backend/logging.py b/backend/logging.py index 8a91519..f2d6cc7 100644 --- a/backend/logging.py +++ b/backend/logging.py @@ -15,6 +15,7 @@ # =============================================================================== import logging from logging.handlers import RotatingFileHandler +import os import click @@ -51,6 +52,8 @@ def setup_logging(level=None, log_format=None, path=None): if path is None: path = "die.log" + else: + path = os.path.join(path, "die.log") # shandler = logging.StreamHandler() rhandler = RotatingFileHandler(path, maxBytes=1e8, backupCount=50) diff --git a/backend/persister.py b/backend/persister.py index b4ecf64..25dd3af 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -30,6 +30,10 @@ class BasePersister(Loggable): + """ + Class to persist the data to a file or cloud storage. + If persisting to a file, the output directory is created by config._make_output_path() + """ extension: str # output_id: str @@ -47,42 +51,46 @@ def load(self, records: list): def finalize(self, output_name: str): pass - def dump_timeseries_separated(self, root: str): - if self.timeseries: - if os.path.isdir(root): - self.log(f"root {root} already exists", fg="red") - shutil.rmtree(root) - - self._make_root_directory(root) - - for site, records in self.timeseries: - path = os.path.join(root, str(site.id).replace(" ", "_")) - path = self.add_extension(path) - self.log(f"dumping {site.id} to {os.path.abspath(path)}") - self._write(path, records) + def dump_sites(self, path: str): + if self.sites: + path = os.path.join(path, "sites") + path = self.add_extension(path) + self.log(f"dumping sites to {os.path.abspath(path)}") + self._write(path, self.sites) + else: + self.log("no sites to dump", fg="red") - # self._write( - # os.path.join(root, self.add_extension("sites")), - # [s[0] for s in self.timeseries], - # ) + def dump_summary(self, path: str): + if self.records: + path = os.path.join(path, "summary") + path = self.add_extension(path) + self.log(f"dumping summary to {os.path.abspath(path)}") + self._write(path, self.records) else: - self.log("no timeseries records to dump", fg="red") + self.log("no records to dump", fg="red") def dump_timeseries_unified(self, path: str): if self.timeseries: + path = os.path.join(path, "timeseries_unified") path = self.add_extension(path) self.log(f"dumping unified timeseries to {os.path.abspath(path)}") self._dump_timeseries_unified(path, self.timeseries) else: self.log("no timeseries records to dump", fg="red") - def dump_sites(self, path: str): - if self.sites: - path = self.add_extension(path) - self.log(f"dumping sites to {os.path.abspath(path)}") - self._write(path, self.sites) + def dump_timeseries_separated(self, path: str): + if self.timeseries: + # make timeseries path inside of config.output_path to which + # the individual site timeseries will be dumped + timeseries_path = os.path.join(path, "timeseries") + self._make_output_directory(timeseries_path) + for site, records in self.timeseries: + path = os.path.join(timeseries_path, str(site.id).replace(" ", "_")) + path = self.add_extension(path) + self.log(f"dumping {site.id} to {os.path.abspath(path)}") + self._write(path, records) else: - self.log("no sites to dump", fg="red") + self.log("no timeseries records to dump", fg="red") def save(self, path: str): if self.records: @@ -106,8 +114,8 @@ def _write(self, path: str, records): def _dump_timeseries_unified(self, path: str, timeseries: list): raise NotImplementedError - def _make_root_directory(self, root: str): - os.mkdir(root) + def _make_output_directory(self, output_directory: str): + os.mkdir(output_directory) def write_file(path, func, records): @@ -171,7 +179,7 @@ def finalize(self, output_name: str): blob = bucket.blob(path) blob.upload_from_string(cnt) - def _make_root_directory(self, root: str): + def _make_output_directory(self, output_directory: str): # prevent making root directory, because we are not saving to disk pass diff --git a/backend/unifier.py b/backend/unifier.py index 82b687d..88aa752 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -185,15 +185,16 @@ def _unify_parameter( persister = _perister_factory(config) for site_source, parameter_source in sources: _site_wrapper(site_source, parameter_source, persister, config) + if config.output_summary: - persister.save(config.output_path) + persister.dump_summary(config.output_path) elif config.output_timeseries_unified: - persister.dump_sites(f"{config.output_path}.sites") - persister.dump_timeseries_unified(f"{config.output_path}.timeseries") + persister.dump_timeseries_unified(config.output_path) + persister.dump_sites(config.output_path) else: # config.output_timeseries_separated - persister.dump_timeseries_separated(f"{config.output_path}_timeseries") - persister.dump_sites(f"{config.output_path}.sites") - + persister.dump_timeseries_separated(config.output_path) + persister.dump_sites(config.output_path) + persister.finalize(config.output_name) diff --git a/frontend/cli.py b/frontend/cli.py index a992c05..c7ed6f7 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -23,7 +23,7 @@ from backend.logging import setup_logging -setup_logging() +# setup_logging() @click.group() @@ -213,6 +213,15 @@ def weave( config = setup_config(f"{parameter}", bbox, county, site_limit, dry) config.parameter = parameter + # make sure config.output_name is properly set + config._update_output_name() + + # make output_path + config._make_output_path() + + # setup logging here so that the path can be set to config.output_path + setup_logging(path=config.output_path) + # output type if output == "summary": summary = True @@ -277,7 +286,6 @@ def wells(bbox, county): """ Get locations """ - config = setup_config("sites", bbox, county) unify_sites(config) From 2e3d2eb1f5dbd75e950dec1c63167b78558fbdd1 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Wed, 29 Jan 2025 19:42:04 +0000 Subject: [PATCH 36/53] Formatting changes --- backend/config.py | 14 +++++++++++--- backend/persister.py | 3 ++- backend/unifier.py | 2 +- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/backend/config.py b/backend/config.py index 32f3d8c..59d474e 100644 --- a/backend/config.py +++ b/backend/config.py @@ -396,12 +396,20 @@ def _update_output_name(self): output_name = self.output_name # find if there are already directories with the string "output" their names - output_names = [name for name in os.listdir(self.output_dir) if os.path.isdir(name) and output_name in name] - + output_names = [ + name + for name in os.listdir(self.output_dir) + if os.path.isdir(name) and output_name in name + ] + if len(output_names) > 0: max_count = 0 # find the highest number appended to directories with "output" in their name - counts = [name.split("_")[-1] for name in output_names if name.split("_")[-1].isdigit()] + counts = [ + name.split("_")[-1] + for name in output_names + if name.split("_")[-1].isdigit() + ] counts = [int(count) for count in counts] if len(counts) > 0: max_count = max(counts) diff --git a/backend/persister.py b/backend/persister.py index 25dd3af..38e8493 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -34,6 +34,7 @@ class BasePersister(Loggable): Class to persist the data to a file or cloud storage. If persisting to a file, the output directory is created by config._make_output_path() """ + extension: str # output_id: str @@ -90,7 +91,7 @@ def dump_timeseries_separated(self, path: str): self.log(f"dumping {site.id} to {os.path.abspath(path)}") self._write(path, records) else: - self.log("no timeseries records to dump", fg="red") + self.log("no timeseries records to dump", fg="red") def save(self, path: str): if self.records: diff --git a/backend/unifier.py b/backend/unifier.py index 88aa752..80a2f99 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -194,7 +194,7 @@ def _unify_parameter( else: # config.output_timeseries_separated persister.dump_timeseries_separated(config.output_path) persister.dump_sites(config.output_path) - + persister.finalize(config.output_name) From c617789c2af36f81816f1a6d34f9dc1dfef866d3 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 29 Jan 2025 14:27:43 -0700 Subject: [PATCH 37/53] Account for results for "< MDL" for NMED DWB Before this only the DIE only accounted for results with "< MRL". If "< MDL" was in the result it would crash --- backend/connectors/nmenv/source.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/connectors/nmenv/source.py b/backend/connectors/nmenv/source.py index 3fd094a..49493cf 100644 --- a/backend/connectors/nmenv/source.py +++ b/backend/connectors/nmenv/source.py @@ -25,6 +25,7 @@ URL = "https://nmenv.newmexicowaterdata.org/FROST-Server/v1.1/" +import sys class DWBSiteSource(STSiteSource): url = URL @@ -67,7 +68,7 @@ class DWBAnalyteSource(STAnalyteSource): def _parse_result( self, result, result_dt=None, result_id=None, result_location=None ): - if "< mrl" in result.lower(): + if "< mrl" in result.lower() or "< mdl" in result.lower(): if self.config.output_summary: self.warn( f"Non-detect found: {result} for {result_location} on {result_dt} (observation {result_id}). Setting to 0 for summary." @@ -79,6 +80,7 @@ def _parse_result( else: return float(result.split(" ")[0]) + def get_records(self, site, *args, **kw): service = self.get_service() From 51b4ec6df421bc7be0d847e85b16f395d5d5cc51 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 29 Jan 2025 14:35:55 -0700 Subject: [PATCH 38/53] Add __repr__ to all source.py files This will make it so when the user invokes `die sources` the name will be printed instead of the object's memory location. --- backend/connectors/bor/source.py | 6 ++++++ backend/connectors/ckan/source.py | 6 ++++++ backend/connectors/isc_seven_rivers/source.py | 6 ++++++ backend/connectors/nmbgmr/source.py | 9 +++++++++ backend/connectors/nmenv/source.py | 6 ++++++ backend/connectors/st2/source.py | 18 ++++++++++++++++++ backend/connectors/usgs/source.py | 6 ++++++ backend/connectors/wqp/source.py | 6 ++++++ 8 files changed, 63 insertions(+) diff --git a/backend/connectors/bor/source.py b/backend/connectors/bor/source.py index 0502f99..190e268 100644 --- a/backend/connectors/bor/source.py +++ b/backend/connectors/bor/source.py @@ -44,6 +44,9 @@ class BORSiteSource(BaseSiteSource): transformer_klass = BORSiteTransformer + def __repr__(self): + return "BORSiteSource" + def health(self): try: self.get_records() @@ -66,6 +69,9 @@ class BORAnalyteSource(BaseAnalyteSource): transformer_klass = BORAnalyteTransformer _catalog_item_idx = None + def __repr__(self): + return "BORAnalyteSource" + def _extract_parameter_record(self, record): record[PARAMETER_VALUE] = record["attributes"]["result"] record[PARAMETER_UNITS] = record["attributes"]["resultAttributes"]["units"] diff --git a/backend/connectors/ckan/source.py b/backend/connectors/ckan/source.py index 7624c05..90d599f 100644 --- a/backend/connectors/ckan/source.py +++ b/backend/connectors/ckan/source.py @@ -100,6 +100,9 @@ def __init__(self, resource_id, **kw): elif resource_id == ROSWELL_RESOURCE_ID: self.bounding_polygon = OSE_ROSWELL_ROSWELL_BOUNDING_POLYGON + def __repr__(self): + return "NMOSERoswellSiteSource" + def health(self): params = self._get_params() params["limit"] = 1 @@ -120,6 +123,9 @@ def _parse_response(self, resp): class OSERoswellWaterLevelSource(OSERoswellSource, BaseWaterLevelSource): transformer_klass = OSERoswellWaterLevelTransformer + def __repr__(self): + return "NMOSERoswellWaterLevelSource" + def get_records(self, site_record): return self._parse_response(site_record, self.get_response()) diff --git a/backend/connectors/isc_seven_rivers/source.py b/backend/connectors/isc_seven_rivers/source.py index 5480632..550f961 100644 --- a/backend/connectors/isc_seven_rivers/source.py +++ b/backend/connectors/isc_seven_rivers/source.py @@ -73,6 +73,9 @@ class ISCSevenRiversSiteSource(BaseSiteSource): transformer_klass = ISCSevenRiversSiteTransformer bounding_polygon = ISC_SEVEN_RIVERS_BOUNDING_POLYGON + def __repr__(self): + return "ISCSevenRiversSiteSource" + def health(self): try: self.get_records() @@ -91,6 +94,9 @@ class ISCSevenRiversAnalyteSource(BaseAnalyteSource): transformer_klass = ISCSevenRiversAnalyteTransformer _analyte_ids = None + def __repr__(self): + return "ISCSevenRiversAnalyteSource" + def _get_analyte_id(self, analyte): """ """ if self._analyte_ids is None: diff --git a/backend/connectors/nmbgmr/source.py b/backend/connectors/nmbgmr/source.py index 40f2d7a..e2a0aee 100644 --- a/backend/connectors/nmbgmr/source.py +++ b/backend/connectors/nmbgmr/source.py @@ -60,6 +60,9 @@ class NMBGMRSiteSource(BaseSiteSource): chunk_size = 100 bounding_polygon = NM_STATE_BOUNDING_POLYGON + def __repr__(self): + return "NMBGMRSiteSource" + def health(self): resp = self._execute_json_request( _make_url("locations"), tag="features", params={"limit": 1} @@ -107,6 +110,9 @@ def get_records(self): class NMBGMRAnalyteSource(BaseAnalyteSource): transformer_klass = NMBGMRAnalyteTransformer + def __repr__(self): + return "NMBGMRAnalyteSource" + def get_records(self, site_record): analyte = get_analyte_search_param( self.config.parameter, NMBGMR_ANALYTE_MAPPING @@ -156,6 +162,9 @@ def _extract_parameter_record(self, record): class NMBGMRWaterLevelSource(BaseWaterLevelSource): transformer_klass = NMBGMRWaterLevelTransformer + def __repr__(self): + return "NMBGMRWaterLevelSource" + def _clean_records(self, records): # remove records with no depth to water value return [r for r in records if r["DepthToWaterBGS"] is not None] diff --git a/backend/connectors/nmenv/source.py b/backend/connectors/nmenv/source.py index 49493cf..1ed8c9a 100644 --- a/backend/connectors/nmenv/source.py +++ b/backend/connectors/nmenv/source.py @@ -32,6 +32,9 @@ class DWBSiteSource(STSiteSource): transformer_klass = DWBSiteTransformer bounding_polygon = NM_STATE_BOUNDING_POLYGON + def __repr__(self): + return "DWBSiteSource" + def health(self): return self.get_records(top=10, analyte="TDS") @@ -65,6 +68,9 @@ class DWBAnalyteSource(STAnalyteSource): url = URL transformer_klass = DWBAnalyteTransformer + def __repr__(self): + return "DWBAnalyteSource" + def _parse_result( self, result, result_dt=None, result_id=None, result_location=None ): diff --git a/backend/connectors/st2/source.py b/backend/connectors/st2/source.py index f32d52b..c7004bb 100644 --- a/backend/connectors/st2/source.py +++ b/backend/connectors/st2/source.py @@ -60,17 +60,26 @@ class PVACDSiteSource(ST2SiteSource): agency = "PVACD" bounding_polygon = PVACD_BOUNDING_POLYGON + def __repr__(self): + return "PVACDSiteSource" + class EBIDSiteSource(ST2SiteSource): transformer_klass = EBIDSiteTransformer agency = "EBID" + def __repr__(self): + return "EBIDSiteSource" + class BernCoSiteSource(ST2SiteSource): agency = "BernCo" transformer_klass = BernCoSiteTransformer bounding_polygon = BERNCO_BOUNDING_POLYGON + def __repr__(self): + return "BernCoSiteSource" + class ST2WaterLevelSource(STWaterLevelSource): url = URL @@ -142,15 +151,24 @@ class PVACDWaterLevelSource(ST2WaterLevelSource): transformer_klass = PVACDWaterLevelTransformer agency = "PVACD" + def __repr__(self): + return "PVACDWaterLevelSource" + class EBIDWaterLevelSource(ST2WaterLevelSource): transformer_klass = EBIDWaterLevelTransformer agency = "EBID" + def __repr__(self): + return "EBIDWaterLevelSource" + class BernCoWaterLevelSource(ST2WaterLevelSource): agency = "BernCo" transformer_klass = BernCoWaterLevelTransformer + def __repr__(self): + return "BernCoWaterLevelSource" + # ============= EOF ============================================= diff --git a/backend/connectors/usgs/source.py b/backend/connectors/usgs/source.py index 4cdafcd..d8583f5 100644 --- a/backend/connectors/usgs/source.py +++ b/backend/connectors/usgs/source.py @@ -89,6 +89,9 @@ class NWISSiteSource(BaseSiteSource): chunk_size = 500 bounding_polygon = NM_STATE_BOUNDING_POLYGON + def __repr__(self): + return "NWISSiteSource" + @property def tag(self): return "nwis" @@ -135,6 +138,9 @@ def get_records(self): class NWISWaterLevelSource(BaseWaterLevelSource): transformer_klass = NWISWaterLevelTransformer + def __repr__(self): + return "NWISWaterLevelSource" + def get_records(self, site_record): params = { "format": "json", diff --git a/backend/connectors/wqp/source.py b/backend/connectors/wqp/source.py index bd14809..3ac7c02 100644 --- a/backend/connectors/wqp/source.py +++ b/backend/connectors/wqp/source.py @@ -62,6 +62,9 @@ class WQPSiteSource(BaseSiteSource): bounding_polygon = NM_STATE_BOUNDING_POLYGON + def __repr__(self): + return "WQPSiteSource" + def health(self): try: r = httpx.get( @@ -100,6 +103,9 @@ def get_records(self): class WQPAnalyteSource(BaseAnalyteSource): transformer_klass = WQPAnalyteTransformer + def __repr__(self): + return "WQPAnalyteSource" + def _extract_parameter_record(self, record): record[PARAMETER_VALUE] = record["ResultMeasureValue"] record[PARAMETER_UNITS] = record["ResultMeasure/MeasureUnitCode"] From 0548f35a1ab559bc29a0f14db4e8625261c53c77 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 29 Jan 2025 14:56:28 -0700 Subject: [PATCH 39/53] Set parameter options to lowercase for consistency This commit changes the parameter options to lowercase for consistency and to make the code easier to maintain since we won't have to worry about case sensitivity. The inputs are still case insensitive --- backend/connectors/nmbgmr/source.py | 2 +- backend/connectors/wqp/source.py | 2 +- backend/constants.py | 38 +++++++++++++++-------------- backend/unifier.py | 2 +- frontend/cli.py | 9 +++---- 5 files changed, 26 insertions(+), 27 deletions(-) diff --git a/backend/connectors/nmbgmr/source.py b/backend/connectors/nmbgmr/source.py index e2a0aee..90e5a4c 100644 --- a/backend/connectors/nmbgmr/source.py +++ b/backend/connectors/nmbgmr/source.py @@ -78,7 +78,7 @@ def get_records(self): if config.site_limit: params["limit"] = config.site_limit - if config.parameter != "Waterlevels": + if config.parameter.lower() != "waterlevels": params["parameter"] = get_analyte_search_param( config.parameter, NMBGMR_ANALYTE_MAPPING ) diff --git a/backend/connectors/wqp/source.py b/backend/connectors/wqp/source.py index 3ac7c02..8fd9e89 100644 --- a/backend/connectors/wqp/source.py +++ b/backend/connectors/wqp/source.py @@ -86,7 +86,7 @@ def get_records(self): if config.has_bounds(): params["bBox"] = ",".join([str(b) for b in config.bbox_bounding_points()]) - if config.parameter != "Waterlevels": + if config.parameter.lower() != "waterlevels": params["characteristicName"] = get_analyte_search_param( config.parameter, WQP_ANALYTE_MAPPING ) diff --git a/backend/constants.py b/backend/constants.py index fe6fd8b..5618ffd 100644 --- a/backend/constants.py +++ b/backend/constants.py @@ -14,23 +14,24 @@ # limitations under the License. # =============================================================================== -TDS = "TDS" -ARSENIC = "Arsenic" -BICARBONATE = "Bicarbonate" -CALCIUM = "Calcium" -CARBONATE = "Carbonate" -CHLORIDE = "Chloride" -FLUORIDE = "Fluoride" -MAGNESIUM = "Magnesium" -NITRATE = "Nitrate" -POTASSIUM = "Potassium" -SILICA = "Silica" -SODIUM = "Sodium" -SULFATE = "Sulfate" -URANIUM = "Uranium" +TDS = "tds" +ARSENIC = "arsenic" +BICARBONATE = "bicarbonate" +CALCIUM = "calcium" +CARBONATE = "carbonate" +CHLORIDE = "chloride" +FLUORIDE = "fluoride" +MAGNESIUM = "magnesium" +NITRATE = "nitrate" +POTASSIUM = "potassium" +SILICA = "silica" +SODIUM = "sodium" +SULFATE = "sulfate" +URANIUM = "uranium" +WATERLEVELS = "waterlevels" -PH = "pH" +PH = "ph" MILLIGRAMS_PER_LITER = "mg/L" @@ -50,8 +51,7 @@ PARAMETER_UNITS = "parameter_units" PARAMETER_VALUE = "parameter_value" - -ANALYTE_CHOICES = [ +ANALYTE_OPTIONS = sorted([ ARSENIC, BICARBONATE, CALCIUM, @@ -67,5 +67,7 @@ TDS, URANIUM, PH, -] +]) + +PARAMETER_OPTIONS = [WATERLEVELS] + ANALYTE_OPTIONS # ============= EOF ============================================= diff --git a/backend/unifier.py b/backend/unifier.py index 88aa752..d4c0ed6 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -244,7 +244,7 @@ def get_sources(config=None): config = Config() sources = [] - if config.parameter == "Waterlevels": + if config.parameter.lower() == "waterlevels": allsources = config.water_level_sources() else: allsources = config.analyte_sources() diff --git a/frontend/cli.py b/frontend/cli.py index c7ed6f7..8144aec 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -18,7 +18,7 @@ import click from backend.config import Config -from backend.constants import ANALYTE_CHOICES +from backend.constants import PARAMETER_OPTIONS from backend.unifier import unify_sites, unify_waterlevels, unify_analytes from backend.logging import setup_logging @@ -163,9 +163,6 @@ def cli(): ) ] -PARAMETER_OPTIONS = ["Waterlevels"] + ANALYTE_CHOICES - - def add_options(options): def _add_options(func): for option in reversed(options): @@ -241,7 +238,7 @@ def weave( config.output_timeseries_separated = timeseries_separated # sources - if parameter == "Waterlevels": + if parameter.lower() == "waterlevels": config.use_source_bernco = no_bernco config.use_source_nmbgmr_amp = no_nmbgmr_amp config.use_source_nmose_isc_seven_rivers = no_nmose_isc_seven_rivers @@ -274,7 +271,7 @@ def weave( if not click.confirm("Do you want to continue?", default=True): return - if parameter == "Waterlevels": + if parameter.lower() == "waterlevels": unify_waterlevels(config) else: unify_analytes(config) From bca689fedaf200307e62447b079d4fc3cff556b1 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Wed, 29 Jan 2025 21:58:10 +0000 Subject: [PATCH 40/53] Formatting changes --- backend/connectors/nmenv/source.py | 2 +- backend/constants.py | 36 ++++++++++++++++-------------- frontend/cli.py | 1 + 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/backend/connectors/nmenv/source.py b/backend/connectors/nmenv/source.py index 1ed8c9a..991e1a0 100644 --- a/backend/connectors/nmenv/source.py +++ b/backend/connectors/nmenv/source.py @@ -27,6 +27,7 @@ import sys + class DWBSiteSource(STSiteSource): url = URL transformer_klass = DWBSiteTransformer @@ -86,7 +87,6 @@ def _parse_result( else: return float(result.split(" ")[0]) - def get_records(self, site, *args, **kw): service = self.get_service() diff --git a/backend/constants.py b/backend/constants.py index 5618ffd..97e8ff6 100644 --- a/backend/constants.py +++ b/backend/constants.py @@ -51,23 +51,25 @@ PARAMETER_UNITS = "parameter_units" PARAMETER_VALUE = "parameter_value" -ANALYTE_OPTIONS = sorted([ - ARSENIC, - BICARBONATE, - CALCIUM, - CARBONATE, - CHLORIDE, - # FLUORIDE, - MAGNESIUM, - NITRATE, - POTASSIUM, - SILICA, - SODIUM, - SULFATE, - TDS, - URANIUM, - PH, -]) +ANALYTE_OPTIONS = sorted( + [ + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + # FLUORIDE, + MAGNESIUM, + NITRATE, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, + PH, + ] +) PARAMETER_OPTIONS = [WATERLEVELS] + ANALYTE_OPTIONS # ============= EOF ============================================= diff --git a/frontend/cli.py b/frontend/cli.py index 8144aec..747b4ef 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -163,6 +163,7 @@ def cli(): ) ] + def add_options(options): def _add_options(func): for option in reversed(options): From ac96eb176cc4fe596a2efe75bc5f301a480c2c14 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 29 Jan 2025 15:41:51 -0700 Subject: [PATCH 41/53] Work on README --- README.md | 116 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 61 insertions(+), 55 deletions(-) diff --git a/README.md b/README.md index a77ebaa..ab1a752 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,37 @@ Data comes from the following sources. We are continuously adding new sources as - [Water Quality Portal (WQP)](https://www.waterqualitydata.us/) - Available data: `water quality` -### Source Inclusion & Exclusion +## Usage + +### Parameter Data + +To obtain parameter summary or time series data, use +``` +die weave {parameter} +``` + +where `{parameter}` is the name of the parameter whose data is to be retrieved, followed by the desired output type, excluded data sources, date filters, and geographic filters. `{parameter}` is case-insensitive. + + +#### Available Parameters +The following parameters are currently available for retrieval: +- waterlevels +- arsenic +- bicarbonate +- calcium +- carbonate +- chloride +- magnesium +- nitrate +- ph +- potassium +- silica +- sodium +- sulfate +- tds +- uranium + +#### Source Inclusion & Exclusion The Data Integration Engine enables the user to obtain groundwater level and groundwater quality data from a variety of sources. Data from sources are automatically included in the output if available unless specifically excluded. The following flags are available to exclude specific data sources: - `--no-bernco` to exclude Bernalillo County (BernCo) data @@ -50,81 +80,63 @@ The Data Integration Engine enables the user to obtain groundwater level and gro - `--no-pvacd` to exclude Pecos Valley Artesian Convservancy District (PVACD) data - `--no-wqp` to exclude Water Quality Portal (WQP) data -### Water Levels +#### Geographic Filters -To obtain groundwater levels, use +The following flags can be used to geographically filter data: ``` -weave Waterlevels +-- county {county name} ``` -followed by the desired output type, source filters, date filters, geographic filters, and excluded data sources. - -### Water Quality -To obtain groundwater quality, use - ``` -weave {analyte} +-- bbox 'x1 y1, x2 y2' ``` -where `{analyte}` is the name of the analyte whose data is to be retrieved. - -#### Available Analytes -The following analytes are currently available for retrieval: -- Arsenic -- Bicarbonate -- Calcium -- Carbonate -- Chloride -- Magnesium -- Nitrate -- pH -- Potassium -- Silica -- Sodium -- Sulfate -- TDS -- Uranium +#### Date Filters -### Geographic Filters - -The following flags can be used to geographically filter data: +The following flags can be used to filter by dates: ``` --- county {county name} +--start-date YYYY-MM-DD ``` ``` --- bbox 'x1 y1, x2 y2' +--end-date YYYY-MM-DD ``` -### Date Filters - -The following flags can be used to filter by dates: +#### Output +The following flags are used to set the output type: ``` ---start-date YYYY-MM-DD +--output summary ``` +- A summary table consisting of location information as well as summary statistics for the parameter of interest for every location that has observations. ``` ---end-date YYYY-MM-DD +--output timeseries_unified ``` +- A single table consisting of time series data for all locations for the parameter of interest. +- A single table of site data that contains information such as latitude, longitude, and elevation -## Output -The data is saved to the current working directory. A log of the inputs and processes, called `die.log`, is also saved to the current working directory. If a subsquent process is run and the log from the previous process has not been moved or stored elsewhere, the log for the subsequent process will be appended to the existing log. +``` +--output timeseries_separated +``` +- Separate time series tables for all locations for the parameter of interest. +- A single table of site data that contains information such as latitude, longitude, and elevation -### Timeseries Data -The flag `--separated_timeseries` exports timeseries for every location in their own file in the directory output_series (e.g. `AB-0002.csv`, `AB-0003.csv`). +The data is saved to a directory titled `output` in the current working directory. If the directory `output` already exists, then the output directory will be called `output_1`. If enumerated output directories already exist, then the output directory will be called `output_{n}` where `n` is equal to the greatest integer suffix +1. -The flag `--unified_timeseries` exports all timeseries for all locations in one file titled `output.timeseries.csv`. +A log of the inputs and processes, called `die.log`, is also saved to the output directory. -Both time series export a file titled `output.sites.csv` that contains site information, such as latitude, longitude, and elevation. +##### Timeseries Data -#### Table Headers +**sites** -The table headers for timeseries data are as follows: +| a | source | id | name | latitude | longitude | elevation | elevation_units | horizontal_datum | vertical_datum | usgs_site_id | alternate_site_id | formation | aquifer | well_depth | +| :---------- | :----- | :--- | :--- | :------- | :-------- | :-------- | :-------------- | :--------------- | :------------- | :----------- | :---------------- | : ------- | :------ | :--------- | +| description | the organization/source for the site | the id of the site. The id is used as the key to join the site and timeseries tables | the colloquial name for the site if it exists | latitude in decimal degrees | longitude in decimal degrees | ground surface elevation of the site in feet | the units of the ground surface elevation. Defaults to ft | horizontal datum of the latitude and longitude. Defaults to WGS84 | vertical datum of the elevation | USGS site id if it exists | alternate side id if it exists | geologic formation in whch the well terminals if it exists | aquifer from which the well draws water if it exists | depth of well if it exists | +| data type | string | string | string | float | float | float | string | string | string | string | string | string | string | string | -**output.sites.csv** - `source`: the organization/source for the site - `id`: the id of the site. The id is used as the key to join the output.timeseries.csv table - `name`: the colloquial name for the site if it exists @@ -140,14 +152,8 @@ The table headers for timeseries data are as follows: - `aquifer`: aquifer from which the well draws water if it exists - `well_depth`: depth of well if it exists -**output.timeseries.csv - waterlevels** -- `source`: the organization/sources for the site -- `id`: the id of the site. The id is used as the key to join the output.sites.csv table -- `depth_to_water_ft_below_ground_surface`: depth to water below ground surface in ft -- `date_measured`: date of measurement in YYYY-MM-DD format -- `time_measured`: time of measurement if it exists -**output.timeseries.csv - analytes** +**time series** - `source`: the organization/sources for the site - `id`: the id of the site. The id is used as the key to join the output.sites.csv table - `parameter`: the name of the analyte whose measurements are reported in the table. This corresponds the requested analyte @@ -158,7 +164,7 @@ The table headers for timeseries data are as follows: ### Summary Data -If neither of the above flags are specified, a summary table called `output.csv` is exported. The summary table consists of location information as well as summary statistics for the parameter of interest for every location that has observations. +If neither of the above flags are specified, a summary table called `output.csv` is exported. #### Table Headers: Summary From ce70daa6b19df9e6e41ec9d0425c9a240ec1bdad Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 29 Jan 2025 15:44:56 -0700 Subject: [PATCH 42/53] Work on README --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ab1a752..0dded29 100644 --- a/README.md +++ b/README.md @@ -132,10 +132,10 @@ A log of the inputs and processes, called `die.log`, is also saved to the output **sites** -| a | source | id | name | latitude | longitude | elevation | elevation_units | horizontal_datum | vertical_datum | usgs_site_id | alternate_site_id | formation | aquifer | well_depth | -| :---------- | :----- | :--- | :--- | :------- | :-------- | :-------- | :-------------- | :--------------- | :------------- | :----------- | :---------------- | : ------- | :------ | :--------- | -| description | the organization/source for the site | the id of the site. The id is used as the key to join the site and timeseries tables | the colloquial name for the site if it exists | latitude in decimal degrees | longitude in decimal degrees | ground surface elevation of the site in feet | the units of the ground surface elevation. Defaults to ft | horizontal datum of the latitude and longitude. Defaults to WGS84 | vertical datum of the elevation | USGS site id if it exists | alternate side id if it exists | geologic formation in whch the well terminals if it exists | aquifer from which the well draws water if it exists | depth of well if it exists | -| data type | string | string | string | float | float | float | string | string | string | string | string | string | string | string | +| | source | id | name | latitude | longitude | elevation | elevation_units | horizontal_datum | vertical_datum | usgs_site_id | alternate_site_id | formation | aquifer | well_depth | +| :---------- | :----- | :---- | :--- | :------- | :-------- | :-------- | :-------------- | :--------------- | :------------- | :----------- | :---------------- | :-------- | :------ | :--------- | +| **description** | the organization/source for the site | the id of the site. The id is used as the key to join the site and timeseries tables | the colloquial name for the site if it exists | latitude in decimal degrees | longitude in decimal degrees | ground surface elevation of the site in feet | the units of the ground surface elevation. Defaults to ft | horizontal datum of the latitude and longitude. Defaults to WGS84 | vertical datum of the elevation | USGS site id if it exists | alternate side id if it exists | geologic formation in which the well terminals if it exists | aquifer from which the well draws water if it exists | depth of well if it exists | +| **data type** | string | string | string | float | float | float | string | string | string | string | string | string | string | string | - `source`: the organization/source for the site - `id`: the id of the site. The id is used as the key to join the output.timeseries.csv table From 562441e5a8c99167e2c9a8688a956c6bbf0b570a Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 29 Jan 2025 16:19:30 -0700 Subject: [PATCH 43/53] Updated README for die invocation Waterlevels and analytes are now included in parameters and their export tables are now the same --- README.md | 179 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 93 insertions(+), 86 deletions(-) diff --git a/README.md b/README.md index 0dded29..cb6fece 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,93 @@ The following parameters are currently available for retrieval: - tds - uranium -#### Source Inclusion & Exclusion +### Output +The following flags are used to set the output type: + +``` +--output summary +``` +- A summary table consisting of location information as well as summary statistics for the parameter of interest for every location that has observations. + +``` +--output timeseries_unified +``` +- A single table consisting of time series data for all locations for the parameter of interest. +- A single table of site data that contains information such as latitude, longitude, and elevation + +``` +--output timeseries_separated +``` +- Separate time series tables for all locations for the parameter of interest. +- A single table of site data that contains information such as latitude, longitude, and elevation + +The data is saved to a directory titled `output` in the current working directory. If the directory `output` already exists, then the output directory will be called `output_1`. If enumerated output directories already exist, then the output directory will be called `output_{n}` where `n` is equal to the greatest integer suffix +1. + +A log of the inputs and processes, called `die.log`, is also saved to the output directory. + +#### Tables + +**summary table** + +| field/header | description | data type | always present | +| :----------- | :---------- | :-------- | :------------- | +| source | the organization/source for the site | string | Y | +| id | the id of the site. The id is used as the key to join the site and timeseries tables | string | Y | +| location | the colloquial name for the site | string | Y | +| usgs_site_id | USGS site id | string | N | +| alternate_site_id | alternate site id | string | N | +| latitude | latitude in decimal degrees | float | Y | +| longitude | longitude in decimal degrees | float | Y | +| horizontal_datum | horizontal datum of the latitude and longitude. Defaults to WGS84 | string | Y | +| elevation | ground surface elevation of the site | float | Y | +| elevation_units | the units of the ground surface elevation. Defaults to ft | string | Y | +| well_depth | depth of well | float | N | +| well_depth_units | units of well depth. Defaults to ft | float | N | +| parameter | the name of the parameter whose measurements are reported in the table | string | Y | +| pramater_units | units of the observation | float | Y | +| nrecords | number of records at the site for the parameter | integer | Y | +| min | the minimum observation | float | Y | +| max | the maximum observation | float | Y | +| mean | the mean value of the observations | float | Y | +| most_recent_date| date of most recent record in YYYY-MM-DD | string | Y | +| most_recent_time | time of most recent record in HH:MM:SS or HH:MM:SS.mmm | string | N | +| most_recent_value | value of the most recent record | float | Y | +| most_recent_units | units of the most recent record | string | Y | + + +**sites table** + +| field/header | description | data type | always present | +| :----------- | :---------- | :-------- | :------------- | +| source | the organization/source for the site | string | Y | +| id | the id of the site. The id is used as the key to join the site and timeseries tables | string | Y | +| name | the colloquial name for the site | string | Y | +| latitude | latitude in decimal degrees | float | Y | +| longitude | longitude in decimal degrees | float | Y | +| elevation | ground surface elevation of the site | float | Y | +| elevation_units | the units of the ground surface elevation. Defaults to ft | string | Y | +| horizontal_datum | horizontal datum of the latitude and longitude. Defaults to WGS84 | string | Y | +| vertical_datum | vertical datum of the elevation | string | N | +| usgs_site_id | USGS site id | string | N | +| alternate_site_id | alternate site id | string | N | +| formation | geologic formation in which the well terminates | string | N | +| aquifer | aquifer from which the well draws water | string | N | +| well_depth | depth of well | float | N | + + +**time series table(s)** + +| field/header | description | data type | always present | +| :----------- | :---------- | :-------- | :------------- | +| source | the organization/source for the site | string | Y | +| id | the id of the site. The id is used as the key to join the site and timeseries tables | string | Y | +| parameter | the name of the parameter whose measurements are reported in the table | string | Y | +| parameter_value | value of the observation | float | Y | +| pramater_units | units of the observation | float | Y | +| date_measured | date of measurement in YYYY-MM-DD | string | Y | +| time_measured | time of measurement in HH:MM:SS or HH:MM:SS.mmm | string | N | + +### Source Inclusion & Exclusion The Data Integration Engine enables the user to obtain groundwater level and groundwater quality data from a variety of sources. Data from sources are automatically included in the output if available unless specifically excluded. The following flags are available to exclude specific data sources: - `--no-bernco` to exclude Bernalillo County (BernCo) data @@ -80,7 +166,7 @@ The Data Integration Engine enables the user to obtain groundwater level and gro - `--no-pvacd` to exclude Pecos Valley Artesian Convservancy District (PVACD) data - `--no-wqp` to exclude Water Quality Portal (WQP) data -#### Geographic Filters +### Geographic Filters The following flags can be used to geographically filter data: @@ -92,7 +178,7 @@ The following flags can be used to geographically filter data: -- bbox 'x1 y1, x2 y2' ``` -#### Date Filters +### Date Filters The following flags can be used to filter by dates: @@ -104,91 +190,12 @@ The following flags can be used to filter by dates: --end-date YYYY-MM-DD ``` -#### Output -The following flags are used to set the output type: - -``` ---output summary -``` -- A summary table consisting of location information as well as summary statistics for the parameter of interest for every location that has observations. +### Source Enumeration -``` ---output timeseries_unified -``` -- A single table consisting of time series data for all locations for the parameter of interest. -- A single table of site data that contains information such as latitude, longitude, and elevation +Use ``` ---output timeseries_separated +die sources {parameter} ``` -- Separate time series tables for all locations for the parameter of interest. -- A single table of site data that contains information such as latitude, longitude, and elevation - -The data is saved to a directory titled `output` in the current working directory. If the directory `output` already exists, then the output directory will be called `output_1`. If enumerated output directories already exist, then the output directory will be called `output_{n}` where `n` is equal to the greatest integer suffix +1. - -A log of the inputs and processes, called `die.log`, is also saved to the output directory. -##### Timeseries Data - -**sites** - -| | source | id | name | latitude | longitude | elevation | elevation_units | horizontal_datum | vertical_datum | usgs_site_id | alternate_site_id | formation | aquifer | well_depth | -| :---------- | :----- | :---- | :--- | :------- | :-------- | :-------- | :-------------- | :--------------- | :------------- | :----------- | :---------------- | :-------- | :------ | :--------- | -| **description** | the organization/source for the site | the id of the site. The id is used as the key to join the site and timeseries tables | the colloquial name for the site if it exists | latitude in decimal degrees | longitude in decimal degrees | ground surface elevation of the site in feet | the units of the ground surface elevation. Defaults to ft | horizontal datum of the latitude and longitude. Defaults to WGS84 | vertical datum of the elevation | USGS site id if it exists | alternate side id if it exists | geologic formation in which the well terminals if it exists | aquifer from which the well draws water if it exists | depth of well if it exists | -| **data type** | string | string | string | float | float | float | string | string | string | string | string | string | string | string | - -- `source`: the organization/source for the site -- `id`: the id of the site. The id is used as the key to join the output.timeseries.csv table -- `name`: the colloquial name for the site if it exists -- `latitude`: latitude in decimal degrees -- `longitude`: the longitude in decimal degrees -- `elevation` ground surface elevation of the site in feet -- `elevation_units`: the units of the ground surface elevation. Defaults to ft -- `horizontal_datum`: horizontal datum of the latitude and longitude. Defaults to WGS84 -- `vertical_datum`: the vertical datum of the elevation -- `usgs_site_id`: USGS site id if it exists -- `alternate_site_id`: alternate site id if it exists -- `formation`: geologic formation in which the well terminates if it exists -- `aquifer`: aquifer from which the well draws water if it exists -- `well_depth`: depth of well if it exists - - -**time series** -- `source`: the organization/sources for the site -- `id`: the id of the site. The id is used as the key to join the output.sites.csv table -- `parameter`: the name of the analyte whose measurements are reported in the table. This corresponds the requested analyte -- `parameter_value`: value of the measurement -- `parameter_units`: units of the measurement -- `date_measured`: date of measurement in YYYY-MM-DD format -- `time_measured`: time of measurement if it exists - -### Summary Data - -If neither of the above flags are specified, a summary table called `output.csv` is exported. - -#### Table Headers: Summary - -**output.csv - waterlevels and analytes** -- `source`: the organization/source for the site -- `id`: the id of the site. The id is used as the key to join the output.timeseries.csv table -- `location`: the colloquial name for the site if it exists -- `usgs_site_id`: USGS site id if it exists -- `alternate_site_id`: alternate site id if it exists -- `latitude`: latitude in decimal degrees -- `longitude`: the longitude in decimal degrees -- `horizontal_datum`: horizontal datum of the latitude and longitude. Defaults to WGS84 -- `elevation` ground surface elevation of the site in feet -- `elevation_units`: the units of the ground surface elevation. Defaults to ft -- `well_depth`: depth of well if it exists -- `well_depth_units`: units of well depth. Defaults to ft -- `parameter`: the name of the analyte whose measurements are reported in the table. This corresponds the requested analyte -- `parameter_value`: value of the measurement -- `parameter_units`: units of the measurement -- `nrecords`: the number of records for the site -- `min`: the minimum record for the site -- `max`: the maximum record for the site -- `mean`: the mean value for the records at the site -- `most_recent_date`: date of most recent record -- `most_recent_time`: time of most recent record if it exists -- `most_recent_value` the value of the most recent record -- `most_recent_units`: the units of the most recent record \ No newline at end of file +to print the sources that report that parameter to the terminal. \ No newline at end of file From 65a58f403a2fb035f3ec9353eb6d720a56e88311 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 29 Jan 2025 16:21:48 -0700 Subject: [PATCH 44/53] README clarification --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cb6fece..066bbf9 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ The following parameters are currently available for retrieval: - uranium ### Output -The following flags are used to set the output type: +The `--output` option is required and used to set the output type: ``` --output summary From 63ab7ad28dc27c18b11bfb0e8878802929322733 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 29 Jan 2025 16:42:12 -0700 Subject: [PATCH 45/53] Fix most_recent_result in summary table convert_units returns the converted results and a warning message, so use the first element of the returned tuple --- backend/transformer.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/backend/transformer.py b/backend/transformer.py index d5069c4..eb43ea3 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -126,7 +126,7 @@ def convert_units( output_units: str, analyte: str, dt: str = None, -) -> float: +) -> tuple[float, str]: """ Converts the following units for any parameter value: @@ -160,8 +160,8 @@ def convert_units( Returns -------- - float - The converted value + tuple[float, str] + The converted value and warning message is conversion failed """ warning = "" conversion_factor = None @@ -725,12 +725,16 @@ def _transform_most_recents(self, record): record["most_recent_date"] = dt record["most_recent_time"] = tt p, u = self._get_parameter() - record["most_recent_value"] = convert_units( + + most_recent_value, warning_msg = convert_units( record["most_recent_value"], record["most_recent_units"], u, self.config.parameter, ) + + # all failed conversions are skipped and handled in source.read(), so no need to duplicate here + record["most_recent_value"] = most_recent_value record["most_recent_units"] = u From 64ae17716a203d6c96ccaef9c501c18334d11d7f Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 29 Jan 2025 16:45:45 -0700 Subject: [PATCH 46/53] Update README to indicate die sources and wells are in development --- README.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 066bbf9..b608762 100644 --- a/README.md +++ b/README.md @@ -190,7 +190,7 @@ The following flags can be used to filter by dates: --end-date YYYY-MM-DD ``` -### Source Enumeration +### Source Enumeration [In Development] Use @@ -198,4 +198,14 @@ Use die sources {parameter} ``` -to print the sources that report that parameter to the terminal. \ No newline at end of file +to print the sources that report that parameter to the terminal. + +### Wells [In Development] + +Use + +``` +die wells +``` + +to print wells to the terminal. \ No newline at end of file From f0c29eb83f77a1decb79c23e88787cbea5d0fd94 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 29 Jan 2025 16:47:25 -0700 Subject: [PATCH 47/53] Bump version to 0.3.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 93c0130..9eb77e5 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup( name="nmuwd", - version="0.2.2", + version="0.3.0", author="Jake Ross", description="New Mexico Water Data Integration Engine", long_description=long_description, From f098587cfd025027c0f4f9f38e9ee3dfd041e94b Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 29 Jan 2025 16:49:21 -0700 Subject: [PATCH 48/53] Updated README for clarity --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b608762..a893d37 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ The `--output` option is required and used to set the output type: - Separate time series tables for all locations for the parameter of interest. - A single table of site data that contains information such as latitude, longitude, and elevation -The data is saved to a directory titled `output` in the current working directory. If the directory `output` already exists, then the output directory will be called `output_1`. If enumerated output directories already exist, then the output directory will be called `output_{n}` where `n` is equal to the greatest integer suffix +1. +The data is saved to a directory titled `output` in the current working directory. If the directory `output` already exists, then the output directory will be called `output_1`. If enumerated output directories already exist, then the output directory will be called `output_{n}` where `n` is equal to the greatest existing integer suffix +1. A log of the inputs and processes, called `die.log`, is also saved to the output directory. From 7c2b4abca57043a07d7e3321cf38556068d028f3 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 29 Jan 2025 16:51:08 -0700 Subject: [PATCH 49/53] Make README better formatted --- README.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index a893d37..0a05895 100644 --- a/README.md +++ b/README.md @@ -91,9 +91,7 @@ The data is saved to a directory titled `output` in the current working director A log of the inputs and processes, called `die.log`, is also saved to the output directory. -#### Tables - -**summary table** +#### Summary Table | field/header | description | data type | always present | | :----------- | :---------- | :-------- | :------------- | @@ -121,7 +119,7 @@ A log of the inputs and processes, called `die.log`, is also saved to the output | most_recent_units | units of the most recent record | string | Y | -**sites table** +#### Sites Table | field/header | description | data type | always present | | :----------- | :---------- | :-------- | :------------- | @@ -141,7 +139,7 @@ A log of the inputs and processes, called `die.log`, is also saved to the output | well_depth | depth of well | float | N | -**time series table(s)** +#### Time Series Table(s) | field/header | description | data type | always present | | :----------- | :---------- | :-------- | :------------- | From 30f76d4d6aa56e36d2e084fd36db4fddfa297189 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 29 Jan 2025 16:51:25 -0700 Subject: [PATCH 50/53] Bump version to 0.3.1 for README changes --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9eb77e5..42279b6 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup( name="nmuwd", - version="0.3.0", + version="0.3.1", author="Jake Ross", description="New Mexico Water Data Integration Engine", long_description=long_description, From 806f971bf2d115a3e4b4e38d1aaa4056c94069f6 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 30 Jan 2025 15:24:00 -0700 Subject: [PATCH 51/53] Added comments for internal documentation --- frontend/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/cli.py b/frontend/cli.py index 747b4ef..a38e429 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -214,7 +214,7 @@ def weave( # make sure config.output_name is properly set config._update_output_name() - # make output_path + # make output_path now so that die.log can be written to it live config._make_output_path() # setup logging here so that the path can be set to config.output_path From 30fe99008557cc9e19ea61943e3ded73d551467d Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 30 Jan 2025 16:45:40 -0700 Subject: [PATCH 52/53] Public to PyPi on successful pull requests and merges to main --- .github/workflows/publish-to-pypi.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 85730c3..ce4c0a5 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -1,12 +1,15 @@ name: Publish Python 🐍 distributions 📦 to PyPI and TestPyPI on: - push: - tags: - - '*' + pull_request: + branches: + - main + types: + - closed jobs: - build-n-publish: + build-and-publish-if-merged: + if: github.event.pull_request.merged == true name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI runs-on: ubuntu-latest permissions: From ef8a0224cb1e843ef6be5e40d59d8b396ab54017 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 4 Feb 2025 10:30:58 -0700 Subject: [PATCH 53/53] Update source padding in logging for long source names --- backend/logging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/logging.py b/backend/logging.py index f2d6cc7..2175854 100644 --- a/backend/logging.py +++ b/backend/logging.py @@ -28,7 +28,7 @@ def log(self, msg, level=None, fg="yellow"): if level is None: level = logging.INFO - click.secho(f"{self.__class__.__name__:30s}{msg}", fg=fg) + click.secho(f"{self.__class__.__name__:40s}{msg}", fg=fg) self.logger.log(level, msg) def warn(self, msg, fg="red"):