diff --git a/README.md b/README.md index fa3e66a..cabddf1 100644 --- a/README.md +++ b/README.md @@ -204,6 +204,21 @@ or die sites --usgs-api-key FAKE_API_KEY ``` +### Encoding + +The exported files are encoded with `utf-8`. When opening the files in Python or another programming language ensure that they are opened with the same `utf-8` encoding. + +#### Working in Excel + +Because the csv files are not encoded with `utf-8-sig`, when opened in Excel special characters may be displayed incorrectly (such as μ appearing garbled). `utf-8-sig` includes a Byte Order Mark (BOM) that tells Excel the file is `utf-8` encoded. To view the characters properly, follow these steps: + +1. Go to the `Data` tab and click on `Get Data` +2. Choose `Text/CSV` and select the file to open +3. Under `File origin` select `65001: Unicode (UTF-8)` +4. Under `Delimiter` select `Comma` +5. Load the data + + ### Geographic Filters [In Development] The following flags can be used to geographically filter data: diff --git a/backend/logger.py b/backend/logger.py index d1b8e23..c567362 100644 --- a/backend/logger.py +++ b/backend/logger.py @@ -28,15 +28,15 @@ class Loggable: def __init__(self): self.logger = logging.getLogger(self.__class__.__name__) - def log(self, msg, level=None, fg="yellow"): + def log(self, msg, level=None, fg="yellow", **kwargs): if level is None: level = logging.INFO click.secho(f"{self.__class__.__name__:40s}{msg}", fg=fg) - self.logger.log(level, msg) + self.logger.log(level, msg, **kwargs) - def warn(self, msg, fg="red"): - self.log(msg, fg=fg, level=logging.WARNING) + def warn(self, msg, fg="red", **kwargs): + self.log(msg, fg=fg, level=logging.WARNING, **kwargs) def debug(self, msg): self.log(msg, level=logging.DEBUG, fg="blue") diff --git a/backend/persister.py b/backend/persister.py index 05d860b..8887828 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -42,7 +42,7 @@ def dump_timeseries(path, timeseries: list[list]): of timeseries separated, the inner list will contain the records for a single site and this function will be called multiple times, once for each site. """ - with open(path, "w", newline="") as f: + with open(path, "w", newline="", encoding="utf-8") as f: writer = csv.writer(f) headers_have_not_been_written = True for i, records in enumerate(timeseries): @@ -55,7 +55,7 @@ def dump_timeseries(path, timeseries: list[list]): def dump_sites_summary(path, records, output_format: OutputFormat): if output_format == OutputFormat.CSV: - with open(path, "w", newline="") as f: + with open(path, "w", newline="", encoding="utf-8") as f: writer = csv.writer(f) for i, site in enumerate(records): if i == 0: @@ -83,7 +83,7 @@ def dump_sites_summary(path, records, output_format: OutputFormat): ] feature_collection = {"type": "FeatureCollection", "features": features} - with open(path, "w") as f: + with open(path, "w", encoding="utf-8") as f: json.dump(feature_collection, f, indent=4) @@ -109,48 +109,64 @@ def finalize(self, output_name: str): pass def dump_sites(self, path: str): - if self.sites: - path = os.path.join(path, "sites") - path = self.add_extension(path, self.config.output_format) - self.log(f"dumping sites to {os.path.abspath(path)}") - self._dump_sites_summary(path, self.sites, self.config.output_format) - else: - self.log("no sites to dump", fg="red") + try: + if self.sites: + path = os.path.join(path, "sites") + path = self.add_extension(path, self.config.output_format) + self.log(f"dumping sites to {os.path.abspath(path)}") + self._dump_sites_summary(path, self.sites, self.config.output_format) + else: + self.log("no sites to dump", fg="red") + except Exception as e: + self.warn(f"failed to dump sites: {e}", exc_info=True) + raise def dump_summary(self, path: str): - if self.records: - path = os.path.join(path, "summary") - path = self.add_extension(path, self.config.output_format) - self.log(f"dumping summary to {os.path.abspath(path)}") - self._dump_sites_summary(path, self.records, self.config.output_format) - else: - self.log("no records to dump", fg="red") + try: + if self.records: + path = os.path.join(path, "summary") + path = self.add_extension(path, self.config.output_format) + self.log(f"dumping summary to {os.path.abspath(path)}") + self._dump_sites_summary(path, self.records, self.config.output_format) + else: + self.log("no records to dump", fg="red") + except Exception as e: + self.warn(f"failed to dump summary: {e}", exc_info=True) + raise def dump_timeseries_unified(self, path: str): - if self.timeseries: - path = os.path.join(path, "timeseries_unified") - path = self.add_extension(path, OutputFormat.CSV.value) - self.log(f"dumping unified timeseries to {os.path.abspath(path)}") - self._dump_timeseries(path, self.timeseries) - else: - self.log("no timeseries records to dump", fg="red") - - def dump_timeseries_separated(self, path: str): - if self.timeseries: - # make timeseries path inside of config.output_path to which - # the individual site timeseries will be dumped - timeseries_path = os.path.join(path, "timeseries") - self._make_output_directory(timeseries_path) - for records in self.timeseries: - site_id = records[0].id - path = os.path.join(timeseries_path, str(site_id).replace(" ", "_")) + try: + if self.timeseries: + path = os.path.join(path, "timeseries_unified") path = self.add_extension(path, OutputFormat.CSV.value) - self.log(f"dumping {site_id} to {os.path.abspath(path)}") + self.log(f"dumping unified timeseries to {os.path.abspath(path)}") + self._dump_timeseries(path, self.timeseries) + else: + self.log("no timeseries records to dump", fg="red") + except Exception as e: + self.warn(f"failed to dump unified timeseries: {e}", exc_info=True) + raise - list_of_records = [records] - self._dump_timeseries(path, list_of_records) - else: - self.log("no timeseries records to dump", fg="red") + def dump_timeseries_separated(self, path: str): + try: + if self.timeseries: + # make timeseries path inside of config.output_path to which + # the individual site timeseries will be dumped + timeseries_path = os.path.join(path, "timeseries") + self._make_output_directory(timeseries_path) + for records in self.timeseries: + site_id = records[0].id + site_path = os.path.join(timeseries_path, str(site_id).replace(" ", "_")) + site_path = self.add_extension(site_path, OutputFormat.CSV.value) + self.log(f"dumping {site_id} to {os.path.abspath(site_path)}") + + list_of_records = [records] + self._dump_timeseries(site_path, list_of_records) + else: + self.log("no timeseries records to dump", fg="red") + except Exception as e: + self.warn(f"failed to dump separated timeseries: {e}", exc_info=True) + raise def add_extension(self, path: str, extension: OutputFormat): if not extension: diff --git a/setup.py b/setup.py index 72734eb..da41f4c 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ setup( name="nmuwd", - version="0.10.2", + version="0.10.3", author="Jake Ross", description="New Mexico Water Data Integration Engine", long_description=long_description,