From 4b2f1f23ca516bc51771812b8ca97b8159ceaed9 Mon Sep 17 00:00:00 2001 From: Theodore Li Date: Mon, 29 Jun 2026 10:56:52 -0700 Subject: [PATCH] feat(pii): add redaction timing metrics across sidecar and persist path - Log per-request duration in the Presidio sidecar (/analyze, /anonymize) - Add durationMs to the mask-batch endpoint log line - Emit per-execution PII redaction timing (stringCount, totalBytes, durationMs, scrubbed) --- apps/pii/server.py | 20 +++++++++++++++++++ .../app/api/guardrails/mask-batch/route.ts | 6 +++++- apps/sim/lib/logs/execution/pii-redaction.ts | 11 ++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/apps/pii/server.py b/apps/pii/server.py index 597fe8f3d90..3fbd9859e45 100644 --- a/apps/pii/server.py +++ b/apps/pii/server.py @@ -5,6 +5,8 @@ endpoints so a single PRESIDIO_URL serves both. """ +import logging +import time from typing import Any from fastapi import FastAPI @@ -133,6 +135,9 @@ def build_analyzer() -> AnalyzerEngine: analyzer = build_analyzer() anonymizer = AnonymizerEngine() +# Propagates to uvicorn's root handler, so timing lands in the container log stream. +logger = logging.getLogger("sim.pii") + app = FastAPI(title="Sim Presidio", docs_url=None, redoc_url=None) @@ -163,6 +168,7 @@ def supported_entities(language: str = "en") -> list[str]: @app.post("/analyze") def analyze(req: AnalyzeRequest) -> list[dict[str, Any]]: + started = time.perf_counter() results = analyzer.analyze( text=req.text, language=req.language, @@ -170,11 +176,19 @@ def analyze(req: AnalyzeRequest) -> list[dict[str, Any]]: score_threshold=req.score_threshold, return_decision_process=req.return_decision_process, ) + logger.info( + "analyze lang=%s chars=%d entities=%d duration_ms=%.1f", + req.language, + len(req.text), + len(results), + (time.perf_counter() - started) * 1000, + ) return [r.to_dict() for r in results] @app.post("/anonymize") def anonymize(req: AnonymizeRequest) -> dict[str, Any]: + started = time.perf_counter() analyzer_results = [ RecognizerResult( entity_type=r["entity_type"], @@ -197,6 +211,12 @@ def anonymize(req: AnonymizeRequest) -> dict[str, Any]: analyzer_results=analyzer_results, operators=operators, ) + logger.info( + "anonymize chars=%d spans=%d duration_ms=%.1f", + len(req.text), + len(analyzer_results), + (time.perf_counter() - started) * 1000, + ) return { "text": result.text, "items": [ diff --git a/apps/sim/app/api/guardrails/mask-batch/route.ts b/apps/sim/app/api/guardrails/mask-batch/route.ts index 696b69e749c..b04d3d21106 100644 --- a/apps/sim/app/api/guardrails/mask-batch/route.ts +++ b/apps/sim/app/api/guardrails/mask-batch/route.ts @@ -27,8 +27,12 @@ export const POST = withRouteHandler(async (request: NextRequest) => { const { texts, entityTypes, language } = parsed.data.body try { + const startedAt = performance.now() const masked = await maskPIIBatch(texts, entityTypes, language) - logger.info('Masked PII batch', { count: texts.length }) + logger.info('Masked PII batch', { + count: texts.length, + durationMs: Math.round(performance.now() - startedAt), + }) return NextResponse.json({ masked }) } catch (error) { // An unreachable/misconfigured Presidio sidecar makes maskPIIBatch throw; fail diff --git a/apps/sim/lib/logs/execution/pii-redaction.ts b/apps/sim/lib/logs/execution/pii-redaction.ts index 8cd0fac5326..5b17694090a 100644 --- a/apps/sim/lib/logs/execution/pii-redaction.ts +++ b/apps/sim/lib/logs/execution/pii-redaction.ts @@ -132,6 +132,7 @@ export async function redactPIIFromExecution( ): Promise { const { entityTypes } = options const language = options.language ?? 'en' + const startedAt = performance.now() const units = REDACTABLE_KEYS.filter((key) => payload[key] !== undefined).map((key) => ({ key, @@ -151,12 +152,14 @@ export async function redactPIIFromExecution( if (collected.length === 0) return payload let masked: string[] + let scrubbed = false if (totalBytes > PII_MAX_TOTAL_BYTES) { logger.warn('Execution exceeds PII redaction ceiling; scrubbing text', { totalBytes, ceiling: PII_MAX_TOTAL_BYTES, }) masked = collected.map(() => REDACTION_FAILED_MARKER) + scrubbed = true } else { try { // Presidio runs only in the app container; the persist path also runs in @@ -168,6 +171,7 @@ export async function redactPIIFromExecution( stringCount: collected.length, }) masked = collected.map(() => REDACTION_FAILED_MARKER) + scrubbed = true } } @@ -176,5 +180,12 @@ export async function redactPIIFromExecution( for (const unit of units) { result[unit.key] = transformUnit(unit.key, unit.value, () => masked[index++]) } + + logger.info('PII redaction completed', { + stringCount: collected.length, + totalBytes, + durationMs: Math.round(performance.now() - startedAt), + scrubbed, + }) return result }