DataIntegrationGroup · jacob-a-brown · May 27, 2026 · May 26, 2026 · May 26, 2026 · May 26, 2026
diff --git a/README.md b/README.md
@@ -204,6 +204,21 @@ or
 die sites --usgs-api-key FAKE_API_KEY
 ```
 
+### Encoding
+
+The exported files are encoded with `utf-8`. When opening the files in Python or another programming language ensure that they are opened with the same `utf-8` encoding.
+
+#### Working in Excel
+
+Because the csv files are not encoded with `utf-8-sig`, when opened in Excel special characters may be displayed incorrectly (such as μ appearing garbled). `utf-8-sig` includes a Byte Order Mark (BOM) that tells Excel the file is `utf-8` encoded. To view the characters properly, follow these steps:
+
+1. Go to the `Data` tab and click on `Get Data`
+2. Choose `Text/CSV` and select the file to open
+3. Under `File origin` select `65001: Unicode (UTF-8)`
+4. Under `Delimiter` select `Comma`
+5. Load the data
+
+
 ### Geographic Filters [In Development]
 
 The following flags can be used to geographically filter data:

diff --git a/backend/logger.py b/backend/logger.py
@@ -28,15 +28,15 @@ class Loggable:
     def __init__(self):
         self.logger = logging.getLogger(self.__class__.__name__)
 
-    def log(self, msg, level=None, fg="yellow"):
+    def log(self, msg, level=None, fg="yellow", **kwargs):
         if level is None:
             level = logging.INFO
 
         click.secho(f"{self.__class__.__name__:40s}{msg}", fg=fg)
-        self.logger.log(level, msg)
+        self.logger.log(level, msg, **kwargs)
 
-    def warn(self, msg, fg="red"):
-        self.log(msg, fg=fg, level=logging.WARNING)
+    def warn(self, msg, fg="red", **kwargs):
+        self.log(msg, fg=fg, level=logging.WARNING, **kwargs)
 
     def debug(self, msg):
         self.log(msg, level=logging.DEBUG, fg="blue")

diff --git a/backend/persister.py b/backend/persister.py
@@ -42,7 +42,7 @@ def dump_timeseries(path, timeseries: list[list]):
     of timeseries separated, the inner list will contain the records for a single site
     and this function will be called multiple times, once for each site.
     """
-    with open(path, "w", newline="") as f:
+    with open(path, "w", newline="", encoding="utf-8") as f:
         writer = csv.writer(f)
         headers_have_not_been_written = True
         for i, records in enumerate(timeseries):
@@ -55,7 +55,7 @@ def dump_timeseries(path, timeseries: list[list]):
 
 def dump_sites_summary(path, records, output_format: OutputFormat):
     if output_format == OutputFormat.CSV:
-        with open(path, "w", newline="") as f:
+        with open(path, "w", newline="", encoding="utf-8") as f:
             writer = csv.writer(f)
             for i, site in enumerate(records):
                 if i == 0:
@@ -83,7 +83,7 @@ def dump_sites_summary(path, records, output_format: OutputFormat):
         ]
         feature_collection = {"type": "FeatureCollection", "features": features}
 
-        with open(path, "w") as f:
+        with open(path, "w", encoding="utf-8") as f:
             json.dump(feature_collection, f, indent=4)
 
 
@@ -109,48 +109,64 @@ def finalize(self, output_name: str):
         pass
 
     def dump_sites(self, path: str):
-        if self.sites:
-            path = os.path.join(path, "sites")
-            path = self.add_extension(path, self.config.output_format)
-            self.log(f"dumping sites to {os.path.abspath(path)}")
-            self._dump_sites_summary(path, self.sites, self.config.output_format)
-        else:
-            self.log("no sites to dump", fg="red")
+        try:
+            if self.sites:
+                path = os.path.join(path, "sites")
+                path = self.add_extension(path, self.config.output_format)
+                self.log(f"dumping sites to {os.path.abspath(path)}")
+                self._dump_sites_summary(path, self.sites, self.config.output_format)
+            else:
+                self.log("no sites to dump", fg="red")
+        except Exception as e:
+            self.warn(f"failed to dump sites: {e}", exc_info=True)
+            raise
 
     def dump_summary(self, path: str):
-        if self.records:
-            path = os.path.join(path, "summary")
-            path = self.add_extension(path, self.config.output_format)
-            self.log(f"dumping summary to {os.path.abspath(path)}")
-            self._dump_sites_summary(path, self.records, self.config.output_format)
-        else:
-            self.log("no records to dump", fg="red")
+        try:
+            if self.records:
+                path = os.path.join(path, "summary")
+                path = self.add_extension(path, self.config.output_format)
+                self.log(f"dumping summary to {os.path.abspath(path)}")
+                self._dump_sites_summary(path, self.records, self.config.output_format)
+            else:
+                self.log("no records to dump", fg="red")
+        except Exception as e:
+            self.warn(f"failed to dump summary: {e}", exc_info=True)
+            raise
 
     def dump_timeseries_unified(self, path: str):
-        if self.timeseries:
-            path = os.path.join(path, "timeseries_unified")
-            path = self.add_extension(path, OutputFormat.CSV.value)
-            self.log(f"dumping unified timeseries to {os.path.abspath(path)}")
-            self._dump_timeseries(path, self.timeseries)
-        else:
-            self.log("no timeseries records to dump", fg="red")
-
-    def dump_timeseries_separated(self, path: str):
-        if self.timeseries:
-            # make timeseries path inside of config.output_path to which
-            # the individual site timeseries will be dumped
-            timeseries_path = os.path.join(path, "timeseries")
-            self._make_output_directory(timeseries_path)
-            for records in self.timeseries:
-                site_id = records[0].id
-                path = os.path.join(timeseries_path, str(site_id).replace(" ", "_"))
+        try:
+            if self.timeseries:
+                path = os.path.join(path, "timeseries_unified")
                 path = self.add_extension(path, OutputFormat.CSV.value)
-                self.log(f"dumping {site_id} to {os.path.abspath(path)}")
+                self.log(f"dumping unified timeseries to {os.path.abspath(path)}")
+                self._dump_timeseries(path, self.timeseries)
+            else:
+                self.log("no timeseries records to dump", fg="red")
+        except Exception as e:
+            self.warn(f"failed to dump unified timeseries: {e}", exc_info=True)
+            raise
 
-                list_of_records = [records]
-                self._dump_timeseries(path, list_of_records)
-        else:
-            self.log("no timeseries records to dump", fg="red")
+    def dump_timeseries_separated(self, path: str):
+        try:
+            if self.timeseries:
+                # make timeseries path inside of config.output_path to which
+                # the individual site timeseries will be dumped
+                timeseries_path = os.path.join(path, "timeseries")
+                self._make_output_directory(timeseries_path)
+                for records in self.timeseries:
+                    site_id = records[0].id
+                    site_path = os.path.join(timeseries_path, str(site_id).replace(" ", "_"))
+                    site_path = self.add_extension(site_path, OutputFormat.CSV.value)
+                    self.log(f"dumping {site_id} to {os.path.abspath(site_path)}")
+
+                    list_of_records = [records]
+                    self._dump_timeseries(site_path, list_of_records)
+            else:
+                self.log("no timeseries records to dump", fg="red")
+        except Exception as e:
+            self.warn(f"failed to dump separated timeseries: {e}", exc_info=True)
+            raise
 
     def add_extension(self, path: str, extension: OutputFormat):
         if not extension:

diff --git a/setup.py b/setup.py
@@ -22,7 +22,7 @@
 
 setup(
     name="nmuwd",
-    version="0.10.2",
+    version="0.10.3",
     author="Jake Ross",
     description="New Mexico Water Data Integration Engine",
     long_description=long_description,