From 29a07fa16387ef1999261f5fd600bd6ddc7c9b92 Mon Sep 17 00:00:00 2001 From: jross Date: Thu, 27 Mar 2025 10:02:16 -0600 Subject: [PATCH 01/16] started sites only --- backend/bounding_polygons.py | 1 + backend/config.py | 15 ++- backend/connectors/nmbgmr/source.py | 41 ++++---- backend/connectors/nmbgmr/transformer.py | 6 +- backend/connectors/nmenv/source.py | 39 +++++--- backend/connectors/wqp/source.py | 18 ++-- backend/unifier.py | 116 ++++++++++++++++------- frontend/cli.py | 4 +- 8 files changed, 157 insertions(+), 83 deletions(-) diff --git a/backend/bounding_polygons.py b/backend/bounding_polygons.py index d9cd100..32cbbe3 100644 --- a/backend/bounding_polygons.py +++ b/backend/bounding_polygons.py @@ -174,6 +174,7 @@ def get_state_polygon(state): # private helpers ============================ def _make_shape(obj, as_wkt): poly = shape(obj["geometry"]) + poly = poly.simplify(0.1) if as_wkt: return poly.wkt return poly diff --git a/backend/config.py b/backend/config.py index ec9be97..89c4f59 100644 --- a/backend/config.py +++ b/backend/config.py @@ -116,6 +116,8 @@ class Config(Loggable): county: str = "" wkt: str = "" + sites_only = False + # sources use_source_bernco: bool = True use_source_bor: bool = True @@ -186,6 +188,15 @@ def __init__(self, model=None, payload=None): for s in SOURCE_KEYS: setattr(self, f"use_source_{s}", s in payload.get("sources", [])) + def finalize(self): + self.update_output_name() + self.make_output_path() + + def all_site_sources(self): + sources = self.water_level_sources() + sources.extend(self.analyte_sources()) + return sources + def analyte_sources(self): sources = [] @@ -384,7 +395,7 @@ def _validate_county(self): return True - def _update_output_name(self): + def update_output_name(self): """ Generate a unique output name based on existing directories in the output directory. @@ -419,7 +430,7 @@ def _update_output_name(self): self.output_name = output_name - def _make_output_path(self): + def make_output_path(self): if not os.path.exists(self.output_path): os.mkdir(self.output_path) diff --git a/backend/connectors/nmbgmr/source.py b/backend/connectors/nmbgmr/source.py index d75adae..a91e29e 100644 --- a/backend/connectors/nmbgmr/source.py +++ b/backend/connectors/nmbgmr/source.py @@ -73,30 +73,33 @@ def get_records(self): if config.site_limit: params["limit"] = config.site_limit - if config.parameter.lower() != "waterlevels": - params["parameter"] = get_analyte_search_param( - config.parameter, NMBGMR_ANALYTE_MAPPING - ) - else: - params["parameter"] = "Manual groundwater levels" + if not config.sites_only: + + if config.parameter.lower() != "waterlevels": + params["parameter"] = get_analyte_search_param( + config.parameter, NMBGMR_ANALYTE_MAPPING + ) + else: + params["parameter"] = "Manual groundwater levels" # tags="features" because the response object is a GeoJSON sites = self._execute_json_request( _make_url("locations"), params, tag="features", timeout=30 ) - for site in sites: - print(f"Obtaining well data for {site['properties']['point_id']}") - well_data = self._execute_json_request( - _make_url("wells"), - params={"pointid": site["properties"]["point_id"]}, - tag="", - ) - site["properties"]["formation"] = well_data["formation"] - site["properties"]["well_depth"] = well_data["well_depth_ftbgs"] - site["properties"]["well_depth_units"] = FEET - # site["properties"]["formation"] = None - # site["properties"]["well_depth"] = None - # site["properties"]["well_depth_units"] = FEET + if not config.sites_only: + for site in sites: + print(f"Obtaining well data for {site['properties']['point_id']}") + well_data = self._execute_json_request( + _make_url("wells"), + params={"pointid": site["properties"]["point_id"]}, + tag="", + ) + site["properties"]["formation"] = well_data["formation"] + site["properties"]["well_depth"] = well_data["well_depth_ftbgs"] + site["properties"]["well_depth_units"] = FEET + # site["properties"]["formation"] = None + # site["properties"]["well_depth"] = None + # site["properties"]["well_depth_units"] = FEET return sites diff --git a/backend/connectors/nmbgmr/transformer.py b/backend/connectors/nmbgmr/transformer.py index dd1163e..420c7f6 100644 --- a/backend/connectors/nmbgmr/transformer.py +++ b/backend/connectors/nmbgmr/transformer.py @@ -38,9 +38,9 @@ def _transform(self, record): "vertical_datum": props["altitude_datum"], "usgs_site_id": props["site_id"], "alternate_site_id": props["alternate_site_id"], - "formation": props["formation"], - "well_depth": props["well_depth"], - "well_depth_units": props["well_depth_units"], + "formation": props.get("formation", ""), + "well_depth": props.get("well_depth", ""), + "well_depth_units": props.get("well_depth_units", ""), } return rec diff --git a/backend/connectors/nmenv/source.py b/backend/connectors/nmenv/source.py index 335fd73..b28413f 100644 --- a/backend/connectors/nmenv/source.py +++ b/backend/connectors/nmenv/source.py @@ -47,29 +47,42 @@ def health(self): return self.get_records(top=10, analyte="TDS") def get_records(self, *args, **kw): + analyte = None if "analyte" in kw: analyte = kw["analyte"] elif self.config: analyte = self.config.parameter - analyte = get_analyte_search_param(analyte, DWB_ANALYTE_MAPPING) - if analyte is None: - return [] - service = self.get_service() - ds = service.datastreams() - q = ds.query() - fs = [f"ObservedProperty/id eq {analyte}"] - if self.config: + if self.config.sites_only: + ds = service.things() + q = ds.query() + fs = [] if self.config.has_bounds(): fs.append( - f"st_within(Thing/Location/location, geography'{self.config.bounding_wkt()}')" + f"st_within(Locations/location, geography'{self.config.bounding_wkt()}')" ) - - q = q.filter(" and ".join(fs)) - q = q.expand("Thing/Locations") - return [ds.thing.locations.entities[0] for ds in q.list()] + q = q.expand("Locations") + q = q.filter(" and ".join(fs)) + return [thing.locations.entities[0] for thing in q.list()] + else: + analyte = get_analyte_search_param(analyte, DWB_ANALYTE_MAPPING) + if analyte is None: + return [] + + ds = service.datastreams() + q = ds.query() + fs = [f"ObservedProperty/id eq {analyte}"] + if self.config: + if self.config.has_bounds(): + fs.append( + f"st_within(Thing/Location/location, geography'{self.config.bounding_wkt()}')" + ) + + q = q.filter(" and ".join(fs)) + q = q.expand("Thing/Locations") + return [di.thing.locations.entities[0] for di in q.list()] class DWBAnalyteSource(STAnalyteSource): diff --git a/backend/connectors/wqp/source.py b/backend/connectors/wqp/source.py index 4987fee..996b3aa 100644 --- a/backend/connectors/wqp/source.py +++ b/backend/connectors/wqp/source.py @@ -87,15 +87,15 @@ def get_records(self): } if config.has_bounds(): params["bBox"] = ",".join([str(b) for b in config.bbox_bounding_points()]) - - if config.parameter.lower() != "waterlevels": - params["characteristicName"] = get_analyte_search_param( - config.parameter, WQP_ANALYTE_MAPPING - ) - else: - # every record with pCode 30210 (depth in m) has a corresponding - # record with pCode 72019 (depth in ft) but not vice versa - params["pCode"] = "30210" + if not config.sites_only: + if config.parameter.lower() != "waterlevels": + params["characteristicName"] = get_analyte_search_param( + config.parameter, WQP_ANALYTE_MAPPING + ) + else: + # every record with pCode 30210 (depth in m) has a corresponding + # record with pCode 72019 (depth in ft) but not vice versa + params["pCode"] = "30210" params.update(get_date_range(config)) diff --git a/backend/unifier.py b/backend/unifier.py index 9523da9..36dd3b5 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -73,6 +73,16 @@ def unify_waterlevels(config): return True +def unify_sites_only(config): + print("Unifying sites only\n") + + # config.report() -- report is done in cli.py, no need to do it twice + config.validate() + + if not config.dry: + _unify_parameter(config, config.all_site_sources()) + + return True def _perister_factory(config): """ @@ -136,39 +146,44 @@ def _site_wrapper(site_source, parameter_source, persister, config): start_ind = 1 end_ind = 0 first_flag = True - for sites in site_source.chunks(sites): - if site_limit and sites_with_records_count == site_limit: - break - - if type(sites) == list: - if first_flag: - end_ind += len(sites) - first_flag = False + + if config.sites_only: + persister.sites.extend(sites) + else: + for sites in site_source.chunks(sites): + if site_limit and sites_with_records_count == site_limit: + break + + if type(sites) == list: + n = len(sites) + if first_flag: + first_flag = False + else: + start_ind = end_ind + 1 + + end_ind += n + + if use_summarize: + summary_records = parameter_source.read( + sites, use_summarize, start_ind, end_ind + ) + if summary_records: + persister.records.extend(summary_records) else: - start_ind = end_ind + 1 - end_ind += len(sites) - - if use_summarize: - summary_records = parameter_source.read( - sites, use_summarize, start_ind, end_ind - ) - if summary_records: - persister.records.extend(summary_records) - else: - results = parameter_source.read( - sites, use_summarize, start_ind, end_ind - ) - # no records are returned if there is no site record for parameter - # or if the record isn't clean (doesn't have the correct fields) - # don't count these sites to apply to site_limit - if results is None or len(results) == 0: - continue - - for site, records in results: - persister.timeseries.append((site, records)) - persister.sites.append(site) - - sites_with_records_count += 1 + results = parameter_source.read( + sites, use_summarize, start_ind, end_ind + ) + # no records are returned if there is no site record for parameter + # or if the record isn't clean (doesn't have the correct fields) + # don't count these sites to apply to site_limit + if results is None or len(results) == 0: + continue + + for site, records in results: + persister.timeseries.append((site, records)) + persister.sites.append(site) + + sites_with_records_count += 1 except BaseException: import traceback @@ -191,6 +206,8 @@ def _unify_parameter( elif config.output_timeseries_unified: persister.dump_timeseries_unified(config.output_path) persister.dump_sites(config.output_path) + elif config.sites_only: + persister.dump_sites(config.output_path) else: # config.output_timeseries_separated persister.dump_timeseries_separated(config.output_path) persister.dump_sites(config.output_path) @@ -297,13 +314,41 @@ def waterlevel_unification_test(): cfg.use_source_nwis = False cfg.use_source_nmbgmr = False cfg.use_source_iscsevenrivers = False - # cfg.use_source_pvacd = False - cfg.use_source_oseroswell = False + cfg.use_source_pvacd = False + # cfg.use_source_oseroswell = False cfg.use_source_bernco = False + cfg.use_source_iscsevenrivers = False + cfg.use_source_nmose_isc_seven_rivers = False + cfg.use_source_ebid = False # cfg.site_limit = 10 unify_waterlevels(cfg) +def site_unification_test(): + cfg = Config() + cfg.county = "chaves" + + + cfg.output_summary = False + cfg.output_name = "sitesonly" + cfg.sites_only = True + # cfg.output_summary = True + # cfg.output_single_timeseries = True + + cfg.use_source_nwis = False + cfg.use_source_nmbgmr = False + cfg.use_source_iscsevenrivers = False + cfg.use_source_pvacd = False + # cfg.use_source_oseroswell = False + cfg.use_source_bernco = False + cfg.use_source_iscsevenrivers = False + cfg.use_source_nmose_isc_seven_rivers = False + cfg.use_source_ebid = False + + cfg.finalize() + + unify_sites_only(cfg) + def get_datastream(siteid): import httpx @@ -329,7 +374,8 @@ def get_datastreams(): # shandler = logging.StreamHandler() # get_sources(Config()) setup_logging() - waterlevel_unification_test() + site_unification_test() + # waterlevel_unification_test() # analyte_unification_test() # print(health_check("nwis")) # generate_site_bounds() diff --git a/frontend/cli.py b/frontend/cli.py index e03ac0b..8dd330c 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -228,10 +228,10 @@ def weave( config.parameter = parameter # make sure config.output_name is properly set - config._update_output_name() + config.update_output_name() # make output_path now so that die.log can be written to it live - config._make_output_path() + config.make_output_path() # setup logging here so that the path can be set to config.output_path setup_logging(path=config.output_path) From 3510008c8f67b9e2e083b708edaa75fcde69f1b5 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 11:15:59 -0600 Subject: [PATCH 02/16] sites only --- backend/connectors/nmenv/source.py | 3 +- backend/unifier.py | 40 +++++++------- frontend/cli.py | 83 +++++++++++------------------- frontend/cronjob_worker.sh | 3 ++ 4 files changed, 53 insertions(+), 76 deletions(-) create mode 100644 frontend/cronjob_worker.sh diff --git a/backend/connectors/nmenv/source.py b/backend/connectors/nmenv/source.py index b28413f..3646073 100644 --- a/backend/connectors/nmenv/source.py +++ b/backend/connectors/nmenv/source.py @@ -64,7 +64,8 @@ def get_records(self, *args, **kw): f"st_within(Locations/location, geography'{self.config.bounding_wkt()}')" ) q = q.expand("Locations") - q = q.filter(" and ".join(fs)) + if fs: + q = q.filter(" and ".join(fs)) return [thing.locations.entities[0] for thing in q.list()] else: analyte = get_analyte_search_param(analyte, DWB_ANALYTE_MAPPING) diff --git a/backend/unifier.py b/backend/unifier.py index 36dd3b5..0646ace 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -40,22 +40,11 @@ def health_check(source: BaseSiteSource) -> bool: return bool(source.health()) -def unify_sites(config): - print("Unifying sites\n") - - # def func(config, persister): - # for source in config.site_sources(): - # s = source() - # persister.load(s.read(config)) - - # _unify_wrapper(config, func) - - def unify_analytes(config): print("Unifying analytes\n") # config.report() -- report is done in cli.py, no need to do it twice config.validate() - + config.finalize() if not config.dry: _unify_parameter(config, config.analyte_sources()) @@ -67,17 +56,18 @@ def unify_waterlevels(config): # config.report() -- report is done in cli.py, no need to do it twice config.validate() - + config.finalize() if not config.dry: _unify_parameter(config, config.water_level_sources()) return True -def unify_sites_only(config): +def unify_sites(config): print("Unifying sites only\n") # config.report() -- report is done in cli.py, no need to do it twice config.validate() + config.finalize() if not config.dry: _unify_parameter(config, config.all_site_sources()) @@ -335,19 +325,25 @@ def site_unification_test(): # cfg.output_summary = True # cfg.output_single_timeseries = True - cfg.use_source_nwis = False - cfg.use_source_nmbgmr = False - cfg.use_source_iscsevenrivers = False - cfg.use_source_pvacd = False - # cfg.use_source_oseroswell = False cfg.use_source_bernco = False - cfg.use_source_iscsevenrivers = False - cfg.use_source_nmose_isc_seven_rivers = False + cfg.use_source_bor = False + cfg.use_source_cabq = False cfg.use_source_ebid = False + cfg.use_source_nmbgmr_amp = False + cfg.use_source_nmed_dwb = False + cfg.use_source_nmose_isc_seven_rivers = False + cfg.use_source_nmose_roswell = False + cfg.use_source_nwis = False + cfg.use_source_pvacd = False + cfg.use_source_wqp = False + + cfg.use_source_nmed_dwb = True + + cfg.finalize() - unify_sites_only(cfg) + unify_sites(cfg) def get_datastream(siteid): diff --git a/frontend/cli.py b/frontend/cli.py index 8dd330c..760f303 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -227,11 +227,11 @@ def weave( config = setup_config(f"{parameter}", bbox, county, site_limit, dry) config.parameter = parameter - # make sure config.output_name is properly set - config.update_output_name() - - # make output_path now so that die.log can be written to it live - config.make_output_path() + # # make sure config.output_name is properly set + # config.update_output_name() + # + # # make output_path now so that die.log can be written to it live + # config.make_output_path() # setup logging here so that the path can be set to config.output_path setup_logging(path=config.output_path) @@ -249,53 +249,33 @@ def weave( summary = False timeseries_unified = False timeseries_separated = True + else: + click.echo(f"Invalid output type: {output}") + return config.output_summary = summary config.output_timeseries_unified = timeseries_unified config.output_timeseries_separated = timeseries_separated + false_agencies = [] + config_agencies = [] # sources if parameter == "waterlevels": - config.use_source_bernco = no_bernco - config.use_source_cabq = no_cabq - config.use_source_ebid = no_ebid - config.use_source_nmbgmr_amp = no_nmbgmr_amp - config.use_source_nmose_isc_seven_rivers = no_nmose_isc_seven_rivers - config.use_source_nmose_roswell = no_nmose_roswell - config.use_source_nwis = no_nwis - config.use_source_pvacd = no_pvacd - config.use_source_wqp = no_wqp - - config.use_source_bor = False - config.use_source_nmed_dwb = False + config_agencies =["bernco", "cabq", "ebid", "nmbgmr_amp", "nmed_dwb", + "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd"] + + false_agencies = ['bor', 'nmed_dwb'] elif parameter == "carbonate": - config.use_source_nmbgmr_amp = no_nmbgmr_amp - config.use_source_wqp = no_wqp - - config.use_source_bor = False - config.use_source_bernco = False - config.use_source_cabq = False - config.use_source_ebid = False - config.use_source_nmed_dwb = False - config.use_source_nmose_isc_seven_rivers = False - config.use_source_nmose_roswell = False - config.use_source_nwis = False - config.use_source_pvacd = False + config_agencies = ['nmbgmr_amp', 'wqp'] + false_agencies = ['bor', 'bernco', 'cabq', 'ebid', 'nmed_dwb', + 'nmose_isc_seven_rivers', 'nmose_roswell', 'nwis', 'pvacd'] elif parameter in ["arsenic", "uranium"]: - config.use_source_bor = no_bor - config.use_source_nmbgmr_amp = no_nmbgmr_amp - config.use_source_nmed_dwb = no_nmed_dwb - config.use_source_wqp = no_wqp - - config.use_source_bernco = False - config.use_source_cabq = False - config.use_source_ebid = False - config.use_source_nmose_isc_seven_rivers = False - config.use_source_nmose_roswell = False - config.use_source_nwis = False - config.use_source_pvacd = False + config_agencies = ['bor', 'nmbgmr_amp', 'nmed_dwb', 'wqp'] + false_agencies = ['bernco', 'cabq', 'ebid', 'nmose_isc_seven_rivers', + 'nmose_roswell', 'nwis', 'pvacd'] + elif parameter in [ "bicarbonate", @@ -311,19 +291,16 @@ def weave( "sulfate", "tds", ]: - config.use_source_bor = no_bor - config.use_source_nmbgmr_amp = no_nmbgmr_amp - config.use_source_nmed_dwb = no_nmed_dwb - config.use_source_nmose_isc_seven_rivers = no_nmose_isc_seven_rivers - config.use_source_wqp = no_wqp - - config.use_source_bernco = False - config.use_source_cabq = False - config.use_source_ebid = False - config.use_source_nmose_roswell = False - config.use_source_nwis = False - config.use_source_pvacd = False + config_agencies = ['bor', 'nmbgmr_amp', 'nmed_dwb','nmose_isc_seven_rivers', 'wqp'] + false_agencies = ['bernco', 'cabq', 'ebid', 'nmose_roswell', 'nwis', 'pvacd'] + + if false_agencies: + for agency in false_agencies: + setattr(config, f"use_source_{agency}", False) + if config_agencies: + for agency in config_agencies: + setattr(config, f"use_source_{agency}", getattr(locals(),f'no_{agency}')) # dates config.start_date = start_date config.end_date = end_date diff --git a/frontend/cronjob_worker.sh b/frontend/cronjob_worker.sh new file mode 100644 index 0000000..4a3925f --- /dev/null +++ b/frontend/cronjob_worker.sh @@ -0,0 +1,3 @@ + + +die weave \ No newline at end of file From e5ec261485d17f13d64bfdd52b9b46ce6f2f09a5 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 11:49:26 -0600 Subject: [PATCH 03/16] sites only cli --- frontend/cli.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index 760f303..534b225 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -321,11 +321,34 @@ def weave( @cli.command() @add_options(SPATIAL_OPTIONS) -def wells(bbox, county): +@add_options(ALL_SOURCE_OPTIONS) +@add_options(DEBUG_OPTIONS) +def wells(bbox, county, + no_bernco, + no_bor, + no_cabq, + no_ebid, + no_nmbgmr_amp, + no_nmed_dwb, + no_nmose_isc_seven_rivers, + no_nmose_roswell, + no_nwis, + no_pvacd, + no_wqp, + site_limit, + dry,): """ Get locations """ - config = setup_config("sites", bbox, county) + + + config = setup_config("sites", bbox, county, site_limit, dry) + config_agencies = ["bernco", "bor", "cabq", "ebid", "nmbgmr_amp", "nmed_dwb", + "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd", + "wqp"] + for agency in config_agencies: + setattr(config, f"use_source_{agency}", getattr(locals(),f'no_{agency}')) + unify_sites(config) From 5feace88541f9ecbb77621dd3479bb152bfb8eab Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 11:54:29 -0600 Subject: [PATCH 04/16] sites only cli --- frontend/cli.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index 534b225..eb09581 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -298,9 +298,10 @@ def weave( for agency in false_agencies: setattr(config, f"use_source_{agency}", False) + lcs = locals() if config_agencies: for agency in config_agencies: - setattr(config, f"use_source_{agency}", getattr(locals(),f'no_{agency}')) + setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) # dates config.start_date = start_date config.end_date = end_date @@ -341,13 +342,13 @@ def wells(bbox, county, Get locations """ - config = setup_config("sites", bbox, county, site_limit, dry) config_agencies = ["bernco", "bor", "cabq", "ebid", "nmbgmr_amp", "nmed_dwb", "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd", "wqp"] + lcs = locals() for agency in config_agencies: - setattr(config, f"use_source_{agency}", getattr(locals(),f'no_{agency}')) + setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) unify_sites(config) From b77984678dd869905512ccbe33ba1d3cd156306f Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 11:56:59 -0600 Subject: [PATCH 05/16] sites only cli --- frontend/cli.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/frontend/cli.py b/frontend/cli.py index eb09581..37b51ae 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -350,6 +350,11 @@ def wells(bbox, county, for agency in config_agencies: setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) + config.report() + # prompt user to continue + if not click.confirm("Do you want to continue?", default=True): + return + unify_sites(config) From f9b6a87dededa1beb0cf2377cd579ddbeac1e41d Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 12:08:08 -0600 Subject: [PATCH 06/16] sites only cli --- frontend/cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/cli.py b/frontend/cli.py index 37b51ae..89b16f1 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -350,6 +350,7 @@ def wells(bbox, county, for agency in config_agencies: setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) + config.sites_only = True config.report() # prompt user to continue if not click.confirm("Do you want to continue?", default=True): From c7b1e3d27daf8e3dfb33359838d5732b2f3b0a18 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 14:55:03 -0600 Subject: [PATCH 07/16] sites only cli --- frontend/cli.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index 89b16f1..7201bd4 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -136,6 +136,12 @@ def cli(): default=False, help="Dry run. Do not execute unifier. Used by unit tests", ), + click.option( + "--yes", + is_flag=True, + default=False, + help="Do not ask for confirmation before running", + ), ] DT_OPTIONS = [ @@ -337,7 +343,8 @@ def wells(bbox, county, no_pvacd, no_wqp, site_limit, - dry,): + dry, + yes): """ Get locations """ @@ -352,9 +359,10 @@ def wells(bbox, county, config.sites_only = True config.report() - # prompt user to continue - if not click.confirm("Do you want to continue?", default=True): - return + if not yes: + # prompt user to continue + if not click.confirm("Do you want to continue?", default=True): + return unify_sites(config) From a9d671f77ebb839dff876b71f18229f2e44ac2fa Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 15:01:02 -0600 Subject: [PATCH 08/16] sites only cli --- backend/config.py | 1 + backend/unifier.py | 5 ++--- frontend/cli.py | 16 +++++++++++----- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/backend/config.py b/backend/config.py index 89c4f59..0396a73 100644 --- a/backend/config.py +++ b/backend/config.py @@ -189,6 +189,7 @@ def __init__(self, model=None, payload=None): setattr(self, f"use_source_{s}", s in payload.get("sources", [])) def finalize(self): + self._update_output_units() self.update_output_name() self.make_output_path() diff --git a/backend/unifier.py b/backend/unifier.py index 0646ace..3e46008 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -44,7 +44,7 @@ def unify_analytes(config): print("Unifying analytes\n") # config.report() -- report is done in cli.py, no need to do it twice config.validate() - config.finalize() + if not config.dry: _unify_parameter(config, config.analyte_sources()) @@ -56,7 +56,7 @@ def unify_waterlevels(config): # config.report() -- report is done in cli.py, no need to do it twice config.validate() - config.finalize() + if not config.dry: _unify_parameter(config, config.water_level_sources()) @@ -67,7 +67,6 @@ def unify_sites(config): # config.report() -- report is done in cli.py, no need to do it twice config.validate() - config.finalize() if not config.dry: _unify_parameter(config, config.all_site_sources()) diff --git a/frontend/cli.py b/frontend/cli.py index 7201bd4..8e1ebe1 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -239,9 +239,6 @@ def weave( # # make output_path now so that die.log can be written to it live # config.make_output_path() - # setup logging here so that the path can be set to config.output_path - setup_logging(path=config.output_path) - # output type if output == "summary": summary = True @@ -312,14 +309,16 @@ def weave( config.start_date = start_date config.end_date = end_date + config.finalize() + # setup logging here so that the path can be set to config.output_path + setup_logging(path=config.output_path) + if not dry: config.report() # prompt user to continue if not click.confirm("Do you want to continue?", default=True): return - config._update_output_units() - if parameter.lower() == "waterlevels": unify_waterlevels(config) else: @@ -328,9 +327,11 @@ def weave( @cli.command() @add_options(SPATIAL_OPTIONS) +@add_options(OUTPUT_OPTIONS) @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) def wells(bbox, county, + output, no_bernco, no_bor, no_cabq, @@ -358,6 +359,11 @@ def wells(bbox, county, setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) config.sites_only = True + + config.finalize() + # setup logging here so that the path can be set to config.output_path + setup_logging(path=config.output_path) + config.report() if not yes: # prompt user to continue From 49e4f6845691e22a5e80265cbfd5819bc9ab44e0 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 15:04:46 -0600 Subject: [PATCH 09/16] sites only cli --- frontend/cli.py | 61 +++++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index 8e1ebe1..580d153 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -23,6 +23,7 @@ from backend.logging import setup_logging + # setup_logging() @@ -180,7 +181,12 @@ def cli(): type=click.Choice(["summary", "timeseries_unified", "timeseries_separated"]), required=True, help="Output summary file, single unified timeseries file, or separated timeseries files", - ) + ), + click.option( + "--output-dir", + default=".", + help="Output root directory. Default is current directory", + ), ] @@ -205,25 +211,26 @@ def _add_options(func): @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) def weave( - weave, - output, - start_date, - end_date, - bbox, - county, - no_bernco, - no_bor, - no_cabq, - no_ebid, - no_nmbgmr_amp, - no_nmed_dwb, - no_nmose_isc_seven_rivers, - no_nmose_roswell, - no_nwis, - no_pvacd, - no_wqp, - site_limit, - dry, + weave, + output, + output_dir, + start_date, + end_date, + bbox, + county, + no_bernco, + no_bor, + no_cabq, + no_ebid, + no_nmbgmr_amp, + no_nmed_dwb, + no_nmose_isc_seven_rivers, + no_nmose_roswell, + no_nwis, + no_pvacd, + no_wqp, + site_limit, + dry, ): """ Get parameter timeseries or summary data @@ -264,20 +271,20 @@ def weave( config_agencies = [] # sources if parameter == "waterlevels": - config_agencies =["bernco", "cabq", "ebid", "nmbgmr_amp", "nmed_dwb", - "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd"] + config_agencies = ["bernco", "cabq", "ebid", "nmbgmr_amp", "nmed_dwb", + "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd"] false_agencies = ['bor', 'nmed_dwb'] elif parameter == "carbonate": config_agencies = ['nmbgmr_amp', 'wqp'] false_agencies = ['bor', 'bernco', 'cabq', 'ebid', 'nmed_dwb', - 'nmose_isc_seven_rivers', 'nmose_roswell', 'nwis', 'pvacd'] + 'nmose_isc_seven_rivers', 'nmose_roswell', 'nwis', 'pvacd'] elif parameter in ["arsenic", "uranium"]: config_agencies = ['bor', 'nmbgmr_amp', 'nmed_dwb', 'wqp'] false_agencies = ['bernco', 'cabq', 'ebid', 'nmose_isc_seven_rivers', - 'nmose_roswell', 'nwis', 'pvacd'] + 'nmose_roswell', 'nwis', 'pvacd'] elif parameter in [ @@ -294,7 +301,7 @@ def weave( "sulfate", "tds", ]: - config_agencies = ['bor', 'nmbgmr_amp', 'nmed_dwb','nmose_isc_seven_rivers', 'wqp'] + config_agencies = ['bor', 'nmbgmr_amp', 'nmed_dwb', 'nmose_isc_seven_rivers', 'wqp'] false_agencies = ['bernco', 'cabq', 'ebid', 'nmose_roswell', 'nwis', 'pvacd'] if false_agencies: @@ -332,6 +339,7 @@ def weave( @add_options(DEBUG_OPTIONS) def wells(bbox, county, output, + output_dir, no_bernco, no_bor, no_cabq, @@ -359,7 +367,7 @@ def wells(bbox, county, setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) config.sites_only = True - + config.output_dir = output_dir config.finalize() # setup logging here so that the path can be set to config.output_path setup_logging(path=config.output_path) @@ -414,5 +422,4 @@ def setup_config(tag, bbox, county, site_limit, dry): return config - # ============= EOF ============================================= From 60eadafee1b8b39c54060b60c6e6dda9896500b4 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 15:06:52 -0600 Subject: [PATCH 10/16] sites only cli --- frontend/cli.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index 580d153..3f04aab 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -182,11 +182,14 @@ def cli(): required=True, help="Output summary file, single unified timeseries file, or separated timeseries files", ), - click.option( + +] +PERSISTER_OPTIONS = [ + click.option(click.option( "--output-dir", default=".", help="Output root directory. Default is current directory", - ), + )) ] @@ -206,6 +209,7 @@ def _add_options(func): required=True, ) @add_options(OUTPUT_OPTIONS) +@add_options(PERSISTER_OPTIONS) @add_options(DT_OPTIONS) @add_options(SPATIAL_OPTIONS) @add_options(ALL_SOURCE_OPTIONS) @@ -335,6 +339,7 @@ def weave( @cli.command() @add_options(SPATIAL_OPTIONS) @add_options(OUTPUT_OPTIONS) +@add_options(PERSISTER_OPTIONS) @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) def wells(bbox, county, From 5ddf81441d0b56187e3d9d90db65a3743342fd89 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 15:08:01 -0600 Subject: [PATCH 11/16] sites only cli --- frontend/cli.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index 3f04aab..73c1a61 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -338,12 +338,10 @@ def weave( @cli.command() @add_options(SPATIAL_OPTIONS) -@add_options(OUTPUT_OPTIONS) @add_options(PERSISTER_OPTIONS) @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) def wells(bbox, county, - output, output_dir, no_bernco, no_bor, From 9ad78f85add651621e3cc169b59a3d80c7974797 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 15:08:40 -0600 Subject: [PATCH 12/16] sites only cli --- frontend/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index 73c1a61..8bb9811 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -185,11 +185,11 @@ def cli(): ] PERSISTER_OPTIONS = [ - click.option(click.option( + click.option( "--output-dir", default=".", help="Output root directory. Default is current directory", - )) + ) ] From e1754477d711770b113f55f56a3b1cfd40fd20d7 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 15:10:24 -0600 Subject: [PATCH 13/16] sites only cli --- backend/config.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/backend/config.py b/backend/config.py index 0396a73..03595fb 100644 --- a/backend/config.py +++ b/backend/config.py @@ -192,6 +192,7 @@ def finalize(self): self._update_output_units() self.update_output_name() self.make_output_path() + self.make_output_directory() def all_site_sources(self): sources = self.water_level_sources() @@ -395,6 +396,12 @@ def _validate_county(self): return bool(get_county_polygon(self.county)) return True + def make_output_directory(self): + """ + Create the output directory if it doesn't exist. + """ + if not os.path.exists(self.output_dir): + os.mkdir(self.output_dir) def update_output_name(self): """ From deae8d327003bd1f5d23467e7fff673ebad69ac7 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 15:11:01 -0600 Subject: [PATCH 14/16] sites only cli --- backend/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/config.py b/backend/config.py index 03595fb..b9dea0c 100644 --- a/backend/config.py +++ b/backend/config.py @@ -190,9 +190,9 @@ def __init__(self, model=None, payload=None): def finalize(self): self._update_output_units() + self.make_output_directory() self.update_output_name() self.make_output_path() - self.make_output_directory() def all_site_sources(self): sources = self.water_level_sources() From 12e6d076655bfc685eff8b466300d7fb377f9a5f Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 15:22:40 -0600 Subject: [PATCH 15/16] sites only cli --- backend/config.py | 59 ++++++++++++++++++----------------------------- 1 file changed, 22 insertions(+), 37 deletions(-) diff --git a/backend/config.py b/backend/config.py index b9dea0c..4639987 100644 --- a/backend/config.py +++ b/backend/config.py @@ -61,46 +61,31 @@ from .connectors.usgs.source import NWISSiteSource, NWISWaterLevelSource from .connectors.wqp.source import WQPSiteSource, WQPAnalyteSource, WQPWaterLevelSource -SOURCE_KEYS = ( - "bernco", - "bor", - "cabq", - "ebid", - "nmbgmr_amp", - "nmed_dwb", - "nmose_isc_seven_rivers", - "nmose_roswell", - "nwis", - "pvacd", - "wqp", -) +SOURCE_DICT = { + "bernco": BernCoSiteSource, + "bor": BORSiteSource, + "cabq": CABQSiteSource, + "ebid": EBIDSiteSource, + "nmbgmr_amp": NMBGMRSiteSource, + "nmed_dwb": DWBSiteSource, + "nmose_isc_seven_rivers": ISCSevenRiversSiteSource, + "nmose_roswell": NMOSERoswellSiteSource, + "nwis": NWISSiteSource, + "pvacd": PVACDSiteSource, + "wqp": WQPSiteSource, +} + +SOURCE_KEYS = list(SOURCE_DICT.keys()) def get_source(source): - if source == "bernco": - return BernCoSiteSource() - elif source == "bor": - return BORSiteSource() - elif source == "cabq": - return CABQSiteSource() - elif source == "ebid": - return EBIDSiteSource() - elif source == "nmbgmr_amp": - return NMBGMRSiteSource() - elif source == "nmed_dwb": - return DWBSiteSource() - elif source == "nmose_isc_seven_rivers": - return ISCSevenRiversSiteSource() - elif source == "nmose_roswell": - return NMOSERoswellSiteSource() - elif source == "nwis": - return NWISSiteSource() - elif source == "pvacd": - return PVACDSiteSource() - elif source == "wqp": - return WQPSiteSource() - - return None + try: + klass = SOURCE_DICT[source] + except KeyError: + raise ValueError(f"Unknown source {source}") + + if klass: + return klass() class Config(Loggable): From 358aebbc81f4d039c9e3937c5a6617756b7a4ca9 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 17:09:06 -0600 Subject: [PATCH 16/16] added wqp to config_agency when parameter = 'waterlevels' --- frontend/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/cli.py b/frontend/cli.py index 8bb9811..cf038ac 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -276,7 +276,7 @@ def weave( # sources if parameter == "waterlevels": config_agencies = ["bernco", "cabq", "ebid", "nmbgmr_amp", "nmed_dwb", - "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd"] + "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd", "wqp"] false_agencies = ['bor', 'nmed_dwb']