Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ ProcessTracker - Python
Data integration process management made easy!

[![Coverage Status](https://coveralls.io/repos/github/OpenDataAlex/process_tracker_python/badge.svg?branch=master)](https://coveralls.io/github/OpenDataAlex/process_tracker_python?branch=master)
[![Codacy Badge](https://api.codacy.com/project/badge/Grade/5600be1990974a4688a4fa4852edb5d5)](https://www.codacy.com/app/alexmeadows_2718/process_tracker_python?utm_source=github.com&utm_medium=referral&utm_content=OpenDataAlex/process_tracker_python&utm_campaign=Badge_Grade)
[![Build Status](https://travis-ci.org/OpenDataAlex/process_tracker_python.svg?branch=master)](https://travis-ci.org/OpenDataAlex/process_tracker_python)
[![Downloads](https://pepy.tech/badge/processtracker)](https://pepy.tech/project/processtracker)
[![PyPI version](https://badge.fury.io/py/processtracker.svg)](https://badge.fury.io/py/processtracker)
Expand Down
143 changes: 143 additions & 0 deletions process_tracker/utilities/aws_utilities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# AWS Utilities
# Utilities for working with AWS services

import logging

import boto3
from botocore.errorfactory import ClientError


class AwsUtilities:
def __init__(self):

self.logger = logging.getLogger(__name__)
self.logger.setLevel("DEBUG")

self.s3 = boto3.resource("s3")

def determine_bucket_name(self, path):
"""
For the given path, return the bucket name, if path is a valid s3 URL.
:param path: Valid s3 URL.
:return:
"""
if self.determine_valid_s3_path(path=path):
self.logger.debug("Parsing %s" % path)

if "s3://" in path:
path = path[path.startswith("s3://") and len("s3://") :]

self.logger.debug("Path is now %s" % path)

bucket_name = path.split("/")[0]

self.logger.debug("Bucket name is %s" % bucket_name)

elif "s3" in path and ".amazonaws.com" in path:
if path.startswith("http://"):

path = path[len("http://") :]

self.logger.debug("Path is now %s" % path)

elif path.startswith("https://"):

path = path[len("https://") :]
self.logger.debug("Path is now %s" % path)

else:
error_msg = "It appears the URL is not valid. %s" % path

self.logger.error(error_msg)
raise Exception(error_msg)

bucket_name = path.split(".")[0]
else:
error_msg = "It appears the URL is not a valid s3 path. %s" % path

self.logger.error(error_msg)
raise Exception(error_msg)

return bucket_name

def determine_file_key(self, path):
"""
Determine the key of the s3 file based on the filepath provided.
:param path: Full s3 filepath. Can be in s3:// or http(s):// format.
:type path: str
:return:
"""

if "s3://" in path:
groups = path.split("/", 3)

key = groups[3]

elif "s3" in path and ".amazonaws.com" in path:
groups = path.split(".amazonaws.com/")

key = groups[1]

else:
error_msg = "It appears the URL is not valid. %s" % path

self.logger.error(error_msg)
raise Exception(error_msg)

return key

def determine_s3_file_exists(self, path):
"""
Determine if a file exists on s3 based on given path.
:param path: Full s3 filepath. Can be in s3:// or http(s):// format.
:type path: str
:return:
"""
self.logger.info("Determining if %s exists." % path)

bucket_name = self.determine_bucket_name(path=path)

key = self.determine_file_key(path=path)

try:
self.s3.Object(bucket_name, key).load()

return True

except ClientError:
error_msg = "File %s does not exist in s3." % path
self.logger.error(error_msg)

return False

def determine_valid_s3_path(self, path):
"""
Take the provided path and determine if valid s3 URL.
:param path: Full s3 filepath. Can be in s3:// or http(s):// format.
:type path: str
:return:
"""
self.logger.debug("Validating %s" % path)
if "s3://" in path:
self.logger.debug("s3:// in path.")
return True
elif "s3" in path and ".amazonaws.com" in path:
self.logger.debug("s3 and .amazonaws.com in path")
return True
else:
self.logger.error("Path is invalid.")
return False

def get_s3_bucket(self, bucket_name):

return self.s3.Bucket(bucket_name)

def read_from_s3(self, bucket_name, filename):
"""
With a given bucket and filename, read from s3.
:param bucket:
:param file:
:return:
"""

bucket = self.get_s3_bucket(bucket_name=bucket_name)
73 changes: 64 additions & 9 deletions process_tracker/utilities/settings.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
# Settings manager and configuration, both for initialization and reading.

import configparser
import logging
import os
from pathlib import Path
import tempfile

from process_tracker.utilities.aws_utilities import AwsUtilities


class SettingsManager:
Expand All @@ -15,24 +19,57 @@ def __init__(self, config_location=None):

self.config = configparser.ConfigParser(allow_no_value=True)

self.logger = logging.getLogger(__name__)
self.logger.setLevel("DEBUG")

self.aws_utils = AwsUtilities()

exists = False

if config_location is None:
home = str(Path.home())
self.config_path = os.path.join(home, ".process_tracker/")
self.config_file = os.path.join(
self.config_path, "process_tracker_config.ini"
home = Path.home()

self.config_path = str(home.joinpath(".process_tracker/"))
self.config_file = str(
Path(self.config_path).joinpath("process_tracker_config.ini")
)

exists = os.path.isfile(self.config_file)

else:
self.config_path = config_location
self.config_file = os.path.join(
self.config_path, "process_tracker_config.ini"
)

exists = os.path.isfile(self.config_file)
if "process_tracker_config.ini" not in self.config_path:
self.logger.debug(
"process_tracker_config.ini not present. Appending to %s"
% self.config_path
)

self.config_file = self.config_path

if not self.config_file.endswith("/"):
self.config_file += "/"

self.config_file += "process_tracker_config.ini"

self.logger.debug("Config file is now %s" % self.config_file)
else:
self.logger.debug(
"process_tracker_config.ini present. Setting config_path to config_file."
)
self.config_file = self.config_path

if self.aws_utils.determine_valid_s3_path(
path=self.config_path
) and self.aws_utils.determine_s3_file_exists(path=self.config_file):

exists = True

if exists:
self.read_config_file()
else:
# How to handle if exists is false and it's s3?

self.create_config_file()

def create_config_file(self):
Expand Down Expand Up @@ -62,4 +99,22 @@ def read_config_file(self):
:return:
"""

return self.config.read(self.config_file)
if self.aws_utils.determine_valid_s3_path(
path=self.config_path
) and self.aws_utils.determine_s3_file_exists(path=self.config_file):

temp_file = tempfile.NamedTemporaryFile()
bucket_name = self.aws_utils.determine_bucket_name(path=self.config_path)

bucket = self.aws_utils.get_s3_bucket(bucket_name=bucket_name)
key = self.aws_utils.determine_file_key(path=self.config_file)

bucket.download_file(key, temp_file.name)

with open(temp_file.name, "r") as f:
self.config.readfp(f)
temp_file.close()

else:

return self.config.read(self.config_file)
8 changes: 8 additions & 0 deletions tests/fixtures/process_tracker_config.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[DEFAULT]
log_level = DEBUG
data_store_type = postgresql
data_store_username = pt_admin_test
data_store_password = arglebargle
data_store_host = localhost
data_store_port = 1234
data_store_name = process_tracker
15 changes: 11 additions & 4 deletions tests/test_process_tracker.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Tests for validating process_tracking works as expected.

from datetime import datetime, timedelta
import logging
import os
from pathlib import Path
import time
Expand Down Expand Up @@ -29,7 +30,7 @@
ProcessTargetObject,
ProcessTracking,
)
from process_tracker.models.source import Source, SourceObject
from process_tracker.models.source import Source

from process_tracker.data_store import DataStore
from process_tracker.extract_tracker import ExtractTracker
Expand All @@ -43,6 +44,8 @@
class TestProcessTracker(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.logger = logging.Logger(__name__)

cls.data_store = DataStore()
cls.session = cls.data_store.session
cls.data_store_type = cls.data_store.data_store_type
Expand Down Expand Up @@ -696,6 +699,10 @@ def test_register_extracts_by_location_local(self):

self.assertCountEqual(expected_result, given_result)

@unittest.skipIf(
"TRAVIS" in os.environ and os.environ["TRAVIS"] == "true",
"Skipping this test on Travis CI.",
)
@mock_s3
def test_register_extracts_by_location_s3(self):
"""
Expand Down Expand Up @@ -740,9 +747,9 @@ def test_register_extracts_by_location_s3(self):

key = os.path.join(test_bucket, file)

print(file)
print(key)
print(fixtures_dir)
self.logger.debug("Filename %s" % file)
self.logger.debug("File key %s" % key)
self.logger.debug("Fixtures dir %s" % fixtures_dir)

file = os.path.join(fixtures_dir, file)
client.upload_file(Filename=file, Bucket=test_bucket, Key=key)
Expand Down
Loading