Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,21 @@ or
die sites --usgs-api-key FAKE_API_KEY
```

### Encoding

The exported files are encoded with `utf-8`. When opening the files in Python or another programming language ensure that they are opened with the same `utf-8` encoding.

#### Working in Excel

Because the csv files are not encoded with `utf-8-sig`, when opened in Excel special characters may be displayed incorrectly (such as μ appearing garbled). `utf-8-sig` includes a Byte Order Mark (BOM) that tells Excel the file is `utf-8` encoded. To view the characters properly, follow these steps:

1. Go to the `Data` tab and click on `Get Data`
2. Choose `Text/CSV` and select the file to open
3. Under `File origin` select `65001: Unicode (UTF-8)`
4. Under `Delimiter` select `Comma`
5. Load the data


### Geographic Filters [In Development]

The following flags can be used to geographically filter data:
Expand Down
8 changes: 4 additions & 4 deletions backend/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@ class Loggable:
def __init__(self):
self.logger = logging.getLogger(self.__class__.__name__)

def log(self, msg, level=None, fg="yellow"):
def log(self, msg, level=None, fg="yellow", **kwargs):
if level is None:
level = logging.INFO

click.secho(f"{self.__class__.__name__:40s}{msg}", fg=fg)
self.logger.log(level, msg)
self.logger.log(level, msg, **kwargs)

def warn(self, msg, fg="red"):
self.log(msg, fg=fg, level=logging.WARNING)
def warn(self, msg, fg="red", **kwargs):
self.log(msg, fg=fg, level=logging.WARNING, **kwargs)

def debug(self, msg):
self.log(msg, level=logging.DEBUG, fg="blue")
Expand Down
94 changes: 55 additions & 39 deletions backend/persister.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def dump_timeseries(path, timeseries: list[list]):
of timeseries separated, the inner list will contain the records for a single site
and this function will be called multiple times, once for each site.
"""
with open(path, "w", newline="") as f:
with open(path, "w", newline="", encoding="utf-8") as f:
Comment thread
jacob-a-brown marked this conversation as resolved.
Comment thread
jacob-a-brown marked this conversation as resolved.
writer = csv.writer(f)
Comment thread
jacob-a-brown marked this conversation as resolved.
headers_have_not_been_written = True
for i, records in enumerate(timeseries):
Expand All @@ -55,7 +55,7 @@ def dump_timeseries(path, timeseries: list[list]):

def dump_sites_summary(path, records, output_format: OutputFormat):
if output_format == OutputFormat.CSV:
with open(path, "w", newline="") as f:
with open(path, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
Comment thread
jacob-a-brown marked this conversation as resolved.
for i, site in enumerate(records):
if i == 0:
Expand Down Expand Up @@ -83,7 +83,7 @@ def dump_sites_summary(path, records, output_format: OutputFormat):
]
feature_collection = {"type": "FeatureCollection", "features": features}

with open(path, "w") as f:
with open(path, "w", encoding="utf-8") as f:
json.dump(feature_collection, f, indent=4)


Expand All @@ -109,48 +109,64 @@ def finalize(self, output_name: str):
pass

def dump_sites(self, path: str):
if self.sites:
path = os.path.join(path, "sites")
path = self.add_extension(path, self.config.output_format)
self.log(f"dumping sites to {os.path.abspath(path)}")
self._dump_sites_summary(path, self.sites, self.config.output_format)
else:
self.log("no sites to dump", fg="red")
try:
if self.sites:
path = os.path.join(path, "sites")
path = self.add_extension(path, self.config.output_format)
self.log(f"dumping sites to {os.path.abspath(path)}")
self._dump_sites_summary(path, self.sites, self.config.output_format)
else:
self.log("no sites to dump", fg="red")
except Exception as e:
self.warn(f"failed to dump sites: {e}", exc_info=True)
raise
Comment thread
jacob-a-brown marked this conversation as resolved.
Comment thread
jacob-a-brown marked this conversation as resolved.

def dump_summary(self, path: str):
if self.records:
path = os.path.join(path, "summary")
path = self.add_extension(path, self.config.output_format)
self.log(f"dumping summary to {os.path.abspath(path)}")
self._dump_sites_summary(path, self.records, self.config.output_format)
else:
self.log("no records to dump", fg="red")
try:
if self.records:
path = os.path.join(path, "summary")
path = self.add_extension(path, self.config.output_format)
self.log(f"dumping summary to {os.path.abspath(path)}")
self._dump_sites_summary(path, self.records, self.config.output_format)
else:
self.log("no records to dump", fg="red")
except Exception as e:
self.warn(f"failed to dump summary: {e}", exc_info=True)
raise
Comment thread
jacob-a-brown marked this conversation as resolved.

def dump_timeseries_unified(self, path: str):
if self.timeseries:
path = os.path.join(path, "timeseries_unified")
path = self.add_extension(path, OutputFormat.CSV.value)
self.log(f"dumping unified timeseries to {os.path.abspath(path)}")
self._dump_timeseries(path, self.timeseries)
else:
self.log("no timeseries records to dump", fg="red")

def dump_timeseries_separated(self, path: str):
if self.timeseries:
# make timeseries path inside of config.output_path to which
# the individual site timeseries will be dumped
timeseries_path = os.path.join(path, "timeseries")
self._make_output_directory(timeseries_path)
for records in self.timeseries:
site_id = records[0].id
path = os.path.join(timeseries_path, str(site_id).replace(" ", "_"))
try:
if self.timeseries:
path = os.path.join(path, "timeseries_unified")
path = self.add_extension(path, OutputFormat.CSV.value)
self.log(f"dumping {site_id} to {os.path.abspath(path)}")
self.log(f"dumping unified timeseries to {os.path.abspath(path)}")
self._dump_timeseries(path, self.timeseries)
else:
self.log("no timeseries records to dump", fg="red")
except Exception as e:
self.warn(f"failed to dump unified timeseries: {e}", exc_info=True)
raise
Comment thread
jacob-a-brown marked this conversation as resolved.

list_of_records = [records]
self._dump_timeseries(path, list_of_records)
else:
self.log("no timeseries records to dump", fg="red")
def dump_timeseries_separated(self, path: str):
try:
if self.timeseries:
# make timeseries path inside of config.output_path to which
# the individual site timeseries will be dumped
timeseries_path = os.path.join(path, "timeseries")
self._make_output_directory(timeseries_path)
for records in self.timeseries:
site_id = records[0].id
site_path = os.path.join(timeseries_path, str(site_id).replace(" ", "_"))
site_path = self.add_extension(site_path, OutputFormat.CSV.value)
self.log(f"dumping {site_id} to {os.path.abspath(site_path)}")

list_of_records = [records]
self._dump_timeseries(site_path, list_of_records)
else:
self.log("no timeseries records to dump", fg="red")
except Exception as e:
self.warn(f"failed to dump separated timeseries: {e}", exc_info=True)
raise
Comment thread
jacob-a-brown marked this conversation as resolved.

def add_extension(self, path: str, extension: OutputFormat):
if not extension:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

setup(
name="nmuwd",
version="0.10.2",
version="0.10.3",
author="Jake Ross",
description="New Mexico Water Data Integration Engine",
long_description=long_description,
Expand Down