From fde585bc0fb5eba651094afcd585261ee46ed763 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Tue, 26 May 2026 11:32:51 -0600 Subject: [PATCH 01/11] fix(persister): fix encoding issue when writing to file Use UTF-8 encoding when writing to file in persister.py to fix encoding issues that occur when the default encoding is not UTF-8. This ensures that all characters are properly encoded and prevents errors when writing to the file. --- backend/persister.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/persister.py b/backend/persister.py index 05d860b..282ba5d 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -42,7 +42,7 @@ def dump_timeseries(path, timeseries: list[list]): of timeseries separated, the inner list will contain the records for a single site and this function will be called multiple times, once for each site. """ - with open(path, "w", newline="") as f: + with open(path, "w", newline="", encoding="utf-8-sig") as f: writer = csv.writer(f) headers_have_not_been_written = True for i, records in enumerate(timeseries): @@ -55,7 +55,7 @@ def dump_timeseries(path, timeseries: list[list]): def dump_sites_summary(path, records, output_format: OutputFormat): if output_format == OutputFormat.CSV: - with open(path, "w", newline="") as f: + with open(path, "w", newline="", encoding="utf-8-sig") as f: writer = csv.writer(f) for i, site in enumerate(records): if i == 0: @@ -83,7 +83,7 @@ def dump_sites_summary(path, records, output_format: OutputFormat): ] feature_collection = {"type": "FeatureCollection", "features": features} - with open(path, "w") as f: + with open(path, "w", encoding="utf-8") as f: json.dump(feature_collection, f, indent=4) From 98403bef3438aff6d4fe5d54590f4782f88ed69f Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Tue, 26 May 2026 11:33:40 -0600 Subject: [PATCH 02/11] feat(version): bump to 0.10.3 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 72734eb..da41f4c 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ setup( name="nmuwd", - version="0.10.2", + version="0.10.3", author="Jake Ross", description="New Mexico Water Data Integration Engine", long_description=long_description, From 07f28c5cafa466b944d4e384609fdd1d00f0cb5b Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Tue, 26 May 2026 13:40:13 -0600 Subject: [PATCH 03/11] fix(persister): log errors when dumping files --- backend/persister.py | 84 +++++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 36 deletions(-) diff --git a/backend/persister.py b/backend/persister.py index 282ba5d..e348cf3 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -109,48 +109,60 @@ def finalize(self, output_name: str): pass def dump_sites(self, path: str): - if self.sites: - path = os.path.join(path, "sites") - path = self.add_extension(path, self.config.output_format) - self.log(f"dumping sites to {os.path.abspath(path)}") - self._dump_sites_summary(path, self.sites, self.config.output_format) - else: - self.log("no sites to dump", fg="red") + try: + if self.sites: + path = os.path.join(path, "sites") + path = self.add_extension(path, self.config.output_format) + self.log(f"dumping sites to {os.path.abspath(path)}") + self._dump_sites_summary(path, self.sites, self.config.output_format) + else: + self.log("no sites to dump", fg="red") + except Exception as e: + self.warn(f"failed to dump sites: {e}") def dump_summary(self, path: str): - if self.records: - path = os.path.join(path, "summary") - path = self.add_extension(path, self.config.output_format) - self.log(f"dumping summary to {os.path.abspath(path)}") - self._dump_sites_summary(path, self.records, self.config.output_format) - else: - self.log("no records to dump", fg="red") + try: + if self.records: + path = os.path.join(path, "summary") + path = self.add_extension(path, self.config.output_format) + self.log(f"dumping summary to {os.path.abspath(path)}") + self._dump_sites_summary(path, self.records, self.config.output_format) + else: + self.log("no records to dump", fg="red") + except Exception as e: + self.warn(f"failed to dump summary: {e}") def dump_timeseries_unified(self, path: str): - if self.timeseries: - path = os.path.join(path, "timeseries_unified") - path = self.add_extension(path, OutputFormat.CSV.value) - self.log(f"dumping unified timeseries to {os.path.abspath(path)}") - self._dump_timeseries(path, self.timeseries) - else: - self.log("no timeseries records to dump", fg="red") - - def dump_timeseries_separated(self, path: str): - if self.timeseries: - # make timeseries path inside of config.output_path to which - # the individual site timeseries will be dumped - timeseries_path = os.path.join(path, "timeseries") - self._make_output_directory(timeseries_path) - for records in self.timeseries: - site_id = records[0].id - path = os.path.join(timeseries_path, str(site_id).replace(" ", "_")) + try: + if self.timeseries: + path = os.path.join(path, "timeseries_unified") path = self.add_extension(path, OutputFormat.CSV.value) - self.log(f"dumping {site_id} to {os.path.abspath(path)}") + self.log(f"dumping unified timeseries to {os.path.abspath(path)}") + self._dump_timeseries(path, self.timeseries) + else: + self.log("no timeseries records to dump", fg="red") + except Exception as e: + self.warn(f"failed to dump unified timeseries: {e}") - list_of_records = [records] - self._dump_timeseries(path, list_of_records) - else: - self.log("no timeseries records to dump", fg="red") + def dump_timeseries_separated(self, path: str): + try: + if self.timeseries: + # make timeseries path inside of config.output_path to which + # the individual site timeseries will be dumped + timeseries_path = os.path.join(path, "timeseries") + self._make_output_directory(timeseries_path) + for records in self.timeseries: + site_id = records[0].id + path = os.path.join(timeseries_path, str(site_id).replace(" ", "_")) + path = self.add_extension(path, OutputFormat.CSV.value) + self.log(f"dumping {site_id} to {os.path.abspath(path)}") + + list_of_records = [records] + self._dump_timeseries(path, list_of_records) + else: + self.log("no timeseries records to dump", fg="red") + except Exception as e: + self.warn(f"failed to dump separated timeseries: {e}") def add_extension(self, path: str, extension: OutputFormat): if not extension: From e29d8ad417fee26aade5a0dac82f684650936de4 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Tue, 26 May 2026 13:40:41 -0600 Subject: [PATCH 04/11] fix(persister): use utf-8 instead of utf-8-sig --- backend/persister.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/persister.py b/backend/persister.py index e348cf3..0d176f6 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -42,7 +42,7 @@ def dump_timeseries(path, timeseries: list[list]): of timeseries separated, the inner list will contain the records for a single site and this function will be called multiple times, once for each site. """ - with open(path, "w", newline="", encoding="utf-8-sig") as f: + with open(path, "w", newline="", encoding="utf-8") as f: writer = csv.writer(f) headers_have_not_been_written = True for i, records in enumerate(timeseries): @@ -55,7 +55,7 @@ def dump_timeseries(path, timeseries: list[list]): def dump_sites_summary(path, records, output_format: OutputFormat): if output_format == OutputFormat.CSV: - with open(path, "w", newline="", encoding="utf-8-sig") as f: + with open(path, "w", newline="", encoding="utf-8") as f: writer = csv.writer(f) for i, site in enumerate(records): if i == 0: From 746a72203311c9eda0dc61875f8afc96f609c764 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Tue, 26 May 2026 13:50:10 -0600 Subject: [PATCH 05/11] fix(encoding): add error handling to timeseries dumps --- backend/persister.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/backend/persister.py b/backend/persister.py index 0d176f6..088d7ff 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -119,6 +119,7 @@ def dump_sites(self, path: str): self.log("no sites to dump", fg="red") except Exception as e: self.warn(f"failed to dump sites: {e}") + raise e def dump_summary(self, path: str): try: @@ -131,7 +132,8 @@ def dump_summary(self, path: str): self.log("no records to dump", fg="red") except Exception as e: self.warn(f"failed to dump summary: {e}") - + raise e + def dump_timeseries_unified(self, path: str): try: if self.timeseries: @@ -143,6 +145,7 @@ def dump_timeseries_unified(self, path: str): self.log("no timeseries records to dump", fg="red") except Exception as e: self.warn(f"failed to dump unified timeseries: {e}") + raise e def dump_timeseries_separated(self, path: str): try: @@ -163,6 +166,7 @@ def dump_timeseries_separated(self, path: str): self.log("no timeseries records to dump", fg="red") except Exception as e: self.warn(f"failed to dump separated timeseries: {e}") + raise e def add_extension(self, path: str, extension: OutputFormat): if not extension: From 183e8d848b0a32addbf6baca12ed5c0dc754b4fe Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Tue, 26 May 2026 13:57:18 -0600 Subject: [PATCH 06/11] fix(persister): use plain raise in except block this preserves the original traceback, which is helpful for debugging. --- backend/persister.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/persister.py b/backend/persister.py index 088d7ff..88cda70 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -119,7 +119,7 @@ def dump_sites(self, path: str): self.log("no sites to dump", fg="red") except Exception as e: self.warn(f"failed to dump sites: {e}") - raise e + raise def dump_summary(self, path: str): try: @@ -132,7 +132,7 @@ def dump_summary(self, path: str): self.log("no records to dump", fg="red") except Exception as e: self.warn(f"failed to dump summary: {e}") - raise e + raise def dump_timeseries_unified(self, path: str): try: @@ -145,7 +145,7 @@ def dump_timeseries_unified(self, path: str): self.log("no timeseries records to dump", fg="red") except Exception as e: self.warn(f"failed to dump unified timeseries: {e}") - raise e + raise def dump_timeseries_separated(self, path: str): try: @@ -166,7 +166,7 @@ def dump_timeseries_separated(self, path: str): self.log("no timeseries records to dump", fg="red") except Exception as e: self.warn(f"failed to dump separated timeseries: {e}") - raise e + raise def add_extension(self, path: str, extension: OutputFormat): if not extension: From b06937a79ef5f897b658d6c590e91a9c308bfd35 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Tue, 26 May 2026 13:58:30 -0600 Subject: [PATCH 07/11] fix(persister): remove trailing whitespace --- backend/persister.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/persister.py b/backend/persister.py index 88cda70..80731ec 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -133,7 +133,7 @@ def dump_summary(self, path: str): except Exception as e: self.warn(f"failed to dump summary: {e}") raise - + def dump_timeseries_unified(self, path: str): try: if self.timeseries: From 17a1beeac616ab71f88a1bb5d0cc0b54b2438a80 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Tue, 26 May 2026 15:48:10 -0600 Subject: [PATCH 08/11] fix(persister): include traceback in persister failure --- backend/persister.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/persister.py b/backend/persister.py index 80731ec..b873f11 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -118,7 +118,7 @@ def dump_sites(self, path: str): else: self.log("no sites to dump", fg="red") except Exception as e: - self.warn(f"failed to dump sites: {e}") + self.warn(f"failed to dump sites: {e}", exc_info=True) raise def dump_summary(self, path: str): @@ -131,7 +131,7 @@ def dump_summary(self, path: str): else: self.log("no records to dump", fg="red") except Exception as e: - self.warn(f"failed to dump summary: {e}") + self.warn(f"failed to dump summary: {e}", exc_info=True) raise def dump_timeseries_unified(self, path: str): @@ -144,7 +144,7 @@ def dump_timeseries_unified(self, path: str): else: self.log("no timeseries records to dump", fg="red") except Exception as e: - self.warn(f"failed to dump unified timeseries: {e}") + self.warn(f"failed to dump unified timeseries: {e}", exc_info=True) raise def dump_timeseries_separated(self, path: str): @@ -165,7 +165,7 @@ def dump_timeseries_separated(self, path: str): else: self.log("no timeseries records to dump", fg="red") except Exception as e: - self.warn(f"failed to dump separated timeseries: {e}") + self.warn(f"failed to dump separated timeseries: {e}", exc_info=True) raise def add_extension(self, path: str, extension: OutputFormat): From ed4f37198e58702fd37d69c62493b34495876587 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Tue, 26 May 2026 15:54:27 -0600 Subject: [PATCH 09/11] fix(logger): accept kwargs for log and warn --- backend/logger.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/logger.py b/backend/logger.py index d1b8e23..c567362 100644 --- a/backend/logger.py +++ b/backend/logger.py @@ -28,15 +28,15 @@ class Loggable: def __init__(self): self.logger = logging.getLogger(self.__class__.__name__) - def log(self, msg, level=None, fg="yellow"): + def log(self, msg, level=None, fg="yellow", **kwargs): if level is None: level = logging.INFO click.secho(f"{self.__class__.__name__:40s}{msg}", fg=fg) - self.logger.log(level, msg) + self.logger.log(level, msg, **kwargs) - def warn(self, msg, fg="red"): - self.log(msg, fg=fg, level=logging.WARNING) + def warn(self, msg, fg="red", **kwargs): + self.log(msg, fg=fg, level=logging.WARNING, **kwargs) def debug(self, msg): self.log(msg, level=logging.DEBUG, fg="blue") From 2b1abe20ce7dfc074456930e0209dc1ae261412f Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Tue, 26 May 2026 15:56:36 -0600 Subject: [PATCH 10/11] fix(persister): don't reassign variable --- backend/persister.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/persister.py b/backend/persister.py index b873f11..8887828 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -156,12 +156,12 @@ def dump_timeseries_separated(self, path: str): self._make_output_directory(timeseries_path) for records in self.timeseries: site_id = records[0].id - path = os.path.join(timeseries_path, str(site_id).replace(" ", "_")) - path = self.add_extension(path, OutputFormat.CSV.value) - self.log(f"dumping {site_id} to {os.path.abspath(path)}") + site_path = os.path.join(timeseries_path, str(site_id).replace(" ", "_")) + site_path = self.add_extension(site_path, OutputFormat.CSV.value) + self.log(f"dumping {site_id} to {os.path.abspath(site_path)}") list_of_records = [records] - self._dump_timeseries(path, list_of_records) + self._dump_timeseries(site_path, list_of_records) else: self.log("no timeseries records to dump", fg="red") except Exception as e: From 65104d8497b7a883d8e6fffec61336ee28cc8a15 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Wed, 27 May 2026 11:10:52 -0600 Subject: [PATCH 11/11] feat(README): update README to include info on encoding --- README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/README.md b/README.md index fa3e66a..cabddf1 100644 --- a/README.md +++ b/README.md @@ -204,6 +204,21 @@ or die sites --usgs-api-key FAKE_API_KEY ``` +### Encoding + +The exported files are encoded with `utf-8`. When opening the files in Python or another programming language ensure that they are opened with the same `utf-8` encoding. + +#### Working in Excel + +Because the csv files are not encoded with `utf-8-sig`, when opened in Excel special characters may be displayed incorrectly (such as μ appearing garbled). `utf-8-sig` includes a Byte Order Mark (BOM) that tells Excel the file is `utf-8` encoded. To view the characters properly, follow these steps: + +1. Go to the `Data` tab and click on `Get Data` +2. Choose `Text/CSV` and select the file to open +3. Under `File origin` select `65001: Unicode (UTF-8)` +4. Under `Delimiter` select `Comma` +5. Load the data + + ### Geographic Filters [In Development] The following flags can be used to geographically filter data: