Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"python.testing.pytestArgs": [
"tests"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}
3 changes: 2 additions & 1 deletion conf/access.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@
}
},
"public.cps.za.dlchange": ["FooUser", "BarUser"],
"public.cps.za.test": ["TestUser"]
"public.cps.za.test": ["TestUser"],
"public.cps.za.status_change": ["TestUser"]
}
293 changes: 293 additions & 0 deletions conf/topic_schemas/status_change.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,293 @@
{
"type": "object",
"properties": {
"event_type": {
"type": "string",
"enum": [
"JobCreatedEvent",
"JobCreatedAndStartedEvent",
"JobStartedEvent",
"JobUpdatedEvent",
"JobFinishedEvent"
],
"description": "Lifecycle event type for job status changes."
},
"event_id": {
"type": "string",
"format": "uuid",
"description": "Unique identifier for the event (UUID)"
},
"job_ref": {
"type": [
"string",
"null"
],
"description": "Identifier of the job in it's respective system (e.g. Spark Application Id, Glue Job Id, EMR Step Id, etc)."
},
"tenant_id": {
"type": [
"string",
"null"
],
"description": "Application ID or ServiceNow identifier"
},
Comment thread
coderabbitai[bot] marked this conversation as resolved.
"source_app": {
"type": "string",
"description": " Standardized source application name (aqueduct, unify, lum, etc)"
},
"source_app_version": {
"type": "string",
"description": "Source application version (SemVer preferred)"
},
"environment": {
"type": "string",
"description": "Environment (dev, uat, pre-prod, prod, test or others)"
},
"timestamp_event": {
"type": "integer",
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The runs schema uses "type": "number" for timestamps. Herei s "integer". While epoch milliseconds are integers, the inconsistency may confuse producers.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, number can also be floating point, so integer is the correct choice here. However, your call, rather be consistent but wrong, or eventually fix it in the runs schema as well?

"minimum": 0,
"description": "Timestamp of the event in epoch milliseconds"
},
Comment thread
kevinwallimann marked this conversation as resolved.
"country": {
"type": [
"string",
"null"
],
"description": "The country the data is related to, e.g. za, ke, on-mu, nbc-tz, etc."
},
"job_id": {
"type": "string",
"format": "uuid",
"description": "Primary job identifier (UUID)."
},
"parent_job_id": {
"type": [
"string",
"null"
],
"format": "uuid",
"description": "Optional parent job identifier (UUID), to represent nested job hierarchies."
},
"initial_job_id": {
"type": [
"string",
"null"
],
"format": "uuid",
"description": "Optional initial job identifier (UUID), to represent retried or replayed jobs."
},
"job_group_id": {
"type": [
"string",
"null"
],
"format": "uuid",
"description": "Job group identifier (UUID), may or may not reference a job id."
},
"job_name": {
"type": "string",
"description": "Human-readable job name."
},
"attempt_number": {
"type": [
"integer",
"null"
],
"minimum": 1,
"description": "Attempt number for this job."
},
"platform": {
"type": [
"string",
"null"
],
"description": "Platform, e.g. aws.emr, aws.glue, aws.lambda."
},
"platform_metadata": {
"type": [
"object",
"null"
],
"description": "Platform-specific metadata (e.g. {\"cluster_id\": \"j-...\"})."
},
"input_arguments": {
"type": [
"object",
"null"
],
"description": "Arguments passed to the job."
},
"definition_id": {
"type": "string",
"description": "Definition (Pipeline, Domain, Process) identifier."
},
"definition_version": {
"type": [
"string",
"null"
],
"description": "Optional definition version."
},
"status_type": {
"type": [
"string",
"null"
],
"enum": [
"WAITING",
"RUNNING",
"SUCCEEDED",
"FAILED",
"KILLED",
null
],
"description": "High-level status type for the current lifecycle event."
},
"status_subtype": {
"type": [
"string",
"null"
],
"description": "Optional status subtype, e.g. NO_DATA or error code."
},
"status_detail": {
"type": [
"string",
"null"
],
"description": "Optional human-readable status detail, e.g. short error message."
},
"additional_context": {
"type": [
"object",
"null"
],
"description": "Additional context payload."
}
},
"required": [
"event_type",
"event_id",
"job_id",
"status_type"
],
"allOf": [
{
"if": {
"properties": {
"event_type": {
"enum": [
"JobCreatedEvent"
]
}
}
},
"then": {
"required": [
"job_name",
"source_app",
"source_app_version",
"timestamp_event",
"environment",
"definition_id",
"platform",
"input_arguments"
],
"properties": {
"status_type": {
"enum": [
"WAITING"
]
}
}
}
},
{
"if": {
"properties": {
"event_type": {
"enum": [
"JobCreatedAndStartedEvent"
]
}
}
},
"then": {
"required": [
"job_name",
"source_app",
"source_app_version",
"timestamp_event",
"environment",
"definition_id",
"platform",
"input_arguments"
],
"properties": {
"status_type": {
"enum": [
"RUNNING"
]
}
}
}
},
{
"if": {
"properties": {
"event_type": {
"const": "JobStartedEvent"
}
}
},
"then": {
"properties": {
"status_type": {
"enum": [
"RUNNING"
]
}
}
}
},
{
"if": {
"properties": {
"event_type": {
"const": "JobUpdatedEvent"
}
}
},
"then": {
"properties": {
"status_type": {
"enum": [
"WAITING",
"RUNNING"
]
}
}
}
},
{
"if": {
"properties": {
"event_type": {
"const": "JobFinishedEvent"
}
}
},
"then": {
"properties": {
"status_type": {
"enum": [
"SUCCEEDED",
"FAILED",
"KILLED"
]
}
}
}
}
]
}
4 changes: 3 additions & 1 deletion src/handlers/handler_topic.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from src.handlers.handler_token import HandlerToken
from src.utils.conf_path import CONF_DIR
from src.utils.config_loader import TopicAccessMap, load_access_config
from src.utils.constants import TOPIC_DLCHANGE, TOPIC_RUNS, TOPIC_TEST
from src.utils.constants import TOPIC_DLCHANGE, TOPIC_RUNS, TOPIC_STATUS_CHANGE, TOPIC_TEST
from src.utils.utils import build_error_response
from src.writers.writer import WriteError, Writer

Expand Down Expand Up @@ -75,6 +75,8 @@ def with_load_topic_schemas(self) -> "HandlerTopic":
self.topics[TOPIC_DLCHANGE] = json.load(file)
with open(os.path.join(topic_schemas_dir, "test.json"), "r", encoding="utf-8") as file:
self.topics[TOPIC_TEST] = json.load(file)
with open(os.path.join(topic_schemas_dir, "status_change.json"), "r", encoding="utf-8") as file:
self.topics[TOPIC_STATUS_CHANGE] = json.load(file)

logger.debug("Loaded topic schemas successfully.")
return self
Expand Down
3 changes: 2 additions & 1 deletion src/utils/config_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

from boto3.resources.base import ServiceResource

from src.utils.constants import TOPIC_DLCHANGE, TOPIC_RUNS, TOPIC_TEST
from src.utils.constants import TOPIC_DLCHANGE, TOPIC_RUNS, TOPIC_STATUS_CHANGE, TOPIC_TEST

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -150,6 +150,7 @@ def load_topic_names(conf_dir: str) -> list[str]:
"runs.json": TOPIC_RUNS,
"dlchange.json": TOPIC_DLCHANGE,
"test.json": TOPIC_TEST,
"status_change.json": TOPIC_STATUS_CHANGE,
}
schemas_dir = os.path.join(conf_dir, "topic_schemas")
topics: list[str] = []
Expand Down
1 change: 1 addition & 0 deletions src/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
TOPIC_RUNS = "public.cps.za.runs"
TOPIC_DLCHANGE = "public.cps.za.dlchange"
TOPIC_TEST = "public.cps.za.test"
TOPIC_STATUS_CHANGE = "public.cps.za.status_change"

SUPPORTED_WRITE_TOPICS: frozenset[str] = frozenset({TOPIC_RUNS, TOPIC_DLCHANGE, TOPIC_TEST})
Copy link
Copy Markdown
Collaborator

@oto-macenauer-absa oto-macenauer-absa May 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missing the status_change topic in SUPPORTED_WRITE_TOPICS

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm also there is no addition of this new queue in src/writers/sql/inserts.sql - do we want to push it into our Postgres for analytical purposes or no? It might become quite massive over time though

SUPPORTED_STATS_TOPICS: frozenset[str] = frozenset({TOPIC_RUNS})
5 changes: 4 additions & 1 deletion tests/unit/handlers/test_handler_topic.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def test_load_topic_schemas_success():
"runs.json": {"type": "object", "properties": {"run_id": {"type": "string"}}},
"dlchange.json": {"type": "object", "properties": {"change_id": {"type": "string"}}},
"test.json": {"type": "object", "properties": {"event_id": {"type": "string"}}},
"status_change.json": {"type": "object", "properties": {"execution_id": {"type": "string"}}},
}

def mock_open_side_effect(file_path, *_args, **_kwargs):
Expand All @@ -99,10 +100,11 @@ def mock_open_side_effect(file_path, *_args, **_kwargs):
result = handler.with_load_topic_schemas()

assert result is handler
assert 3 == len(handler.topics)
assert 4 == len(handler.topics)
assert "public.cps.za.runs" in handler.topics
assert "public.cps.za.dlchange" in handler.topics
assert "public.cps.za.test" in handler.topics
assert "public.cps.za.status_change" in handler.topics


## get_topics_list()
Expand All @@ -112,6 +114,7 @@ def test_get_topics(event_gate_module, make_event):
assert 200 == resp["statusCode"]
body = json.loads(resp["body"])
assert "public.cps.za.test" in body
assert "public.cps.za.status_change" in body


## get_topic_schema()
Expand Down
Loading