diff --git a/tests/test_jsonl_parser.py b/tests/test_jsonl_parser.py
index cf40e82..9bdd96d 100644
--- a/tests/test_jsonl_parser.py
+++ b/tests/test_jsonl_parser.py
@@ -7,60 +7,21 @@
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-from utils.jsonl_parser import (
- _extract_images,
- _extract_text,
- _infer_title,
- _normalize_content,
- _parse_tool_result,
- _process_assistant,
- _process_system,
- _process_user,
- _strip_system_tags,
- _track_file_activity,
- parse_session,
- quick_session_info,
+from utils.jsonl_helpers import (
+ extract_images,
+ extract_text,
+ infer_title,
+ normalize_content,
+ strip_system_tags,
)
+from utils.jsonl_parser import parse_session, quick_session_info
+from utils.tool_dispatch import _parse_tool_result
# ---------------------------------------------------------------------------
# Metadata helpers (match parse_session initialisation)
# ---------------------------------------------------------------------------
-def _fresh_metadata() -> dict:
- return {
- "session_id": "x",
- "models_used": set(),
- "total_input_tokens": 0,
- "total_output_tokens": 0,
- "total_cache_read_tokens": 0,
- "total_cache_creation_tokens": 0,
- "total_tool_calls": 0,
- "tool_call_counts": {},
- "first_timestamp": None,
- "last_timestamp": None,
- "version": None,
- "cwd": None,
- "git_branch": None,
- "permission_mode": None,
- "compactions": 0,
- "total_ephemeral_5m_tokens": 0,
- "total_ephemeral_1h_tokens": 0,
- "service_tiers": set(),
- "session_wall_time_seconds": None,
- "compact_boundaries": [],
- "api_errors": 0,
- "files_read": set(),
- "files_written": set(),
- "files_created": set(),
- "bash_commands": [],
- "web_fetches": [],
- "sidechain_messages": 0,
- "stop_reasons": {},
- "entry_counts": {},
- }
-
-
def _write_jsonl(entries: list) -> str:
f = tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False, encoding="utf-8")
for entry in entries:
@@ -69,6 +30,14 @@ def _write_jsonl(entries: list) -> str:
return f.name
+def _parse_entries(entries: list) -> dict:
+ path = _write_jsonl(entries)
+ try:
+ return parse_session(path)
+ finally:
+ os.unlink(path)
+
+
# ---------------------------------------------------------------------------
# _parse_tool_result
# ---------------------------------------------------------------------------
@@ -279,54 +248,54 @@ def test_slug_preserved(self):
# ---------------------------------------------------------------------------
-# _normalize_content, _extract_text, _extract_images
+# normalize_content, extract_text, extract_images
# ---------------------------------------------------------------------------
class TestNormalizeContent:
def test_plain_string(self):
- assert _normalize_content("hi") == [{"type": "text", "text": "hi"}]
+ assert normalize_content("hi") == [{"type": "text", "text": "hi"}]
def test_list_of_strings(self):
- assert _normalize_content(["a", "b"]) == [
+ assert normalize_content(["a", "b"]) == [
{"type": "text", "text": "a"},
{"type": "text", "text": "b"},
]
def test_list_of_dicts(self):
d = {"type": "text", "text": "x"}
- assert _normalize_content([d]) == [d]
+ assert normalize_content([d]) == [d]
def test_mixed_string_and_dict(self):
- out = _normalize_content(["s", {"type": "thinking", "thinking": "t"}])
+ out = normalize_content(["s", {"type": "thinking", "thinking": "t"}])
assert out[0]["type"] == "text"
assert out[1]["type"] == "thinking"
def test_none_returns_empty(self):
- assert _normalize_content(None) == []
+ assert normalize_content(None) == []
def test_wrong_type_returns_empty(self):
- assert _normalize_content(42) == []
+ assert normalize_content(42) == []
class TestExtractText:
def test_text_blocks_joined(self):
blocks = [{"type": "text", "text": "a"}, {"type": "text", "text": "b"}]
- assert _extract_text(blocks) == "a\nb"
+ assert extract_text(blocks) == "a\nb"
def test_tool_use_blocks_ignored(self):
- assert _extract_text([{"type": "tool_use", "name": "Read"}]) == ""
+ assert extract_text([{"type": "tool_use", "name": "Read"}]) == ""
def test_thinking_blocks_ignored(self):
- assert _extract_text([{"type": "thinking", "thinking": "secret"}]) == ""
+ assert extract_text([{"type": "thinking", "thinking": "secret"}]) == ""
def test_empty_content(self):
- assert _extract_text([]) == ""
+ assert extract_text([]) == ""
class TestExtractImages:
def test_base64_image_extracted(self):
- imgs = _extract_images(
+ imgs = extract_images(
[
{
"type": "image",
@@ -338,7 +307,7 @@ def test_base64_image_extracted(self):
assert imgs[0]["data"] == "AAA"
def test_nested_tool_result_image_extracted(self):
- imgs = _extract_images(
+ imgs = extract_images(
[
{
"type": "tool_result",
@@ -355,17 +324,17 @@ def test_nested_tool_result_image_extracted(self):
assert imgs[0]["data"] == "BBB"
def test_non_image_skipped(self):
- assert _extract_images([{"type": "text", "text": "x"}]) == []
+ assert extract_images([{"type": "text", "text": "x"}]) == []
# ---------------------------------------------------------------------------
-# _infer_title, _strip_system_tags
+# infer_title, strip_system_tags
# ---------------------------------------------------------------------------
class TestInferTitle:
def test_first_user_message_used(self):
- title = _infer_title(
+ title = infer_title(
[
{"role": "assistant", "text": "a"},
{"role": "user", "text": "My title line\nmore"},
@@ -375,36 +344,36 @@ def test_first_user_message_used(self):
def test_truncated_to_100_chars(self):
long_line = "x" * 120
- title = _infer_title([{"role": "user", "text": long_line}])
+ title = infer_title([{"role": "user", "text": long_line}])
assert len(title) == 100
assert title == "x" * 100
def test_no_text_messages_returns_untitled(self):
- assert _infer_title([{"role": "user", "text": ""}]) == "Untitled Session"
+ assert infer_title([{"role": "user", "text": ""}]) == "Untitled Session"
def test_sidechain_only_returns_untitled(self):
- assert _infer_title([]) == "Untitled Session"
+ assert infer_title([]) == "Untitled Session"
class TestStripSystemTags:
def test_system_reminder_removed(self):
t = "inkeep"
- assert _strip_system_tags(t) == "keep"
+ assert strip_system_tags(t) == "keep"
def test_ide_opened_file_removed(self):
t = "xy"
- assert _strip_system_tags(t) == "y"
+ assert strip_system_tags(t) == "y"
def test_user_prompt_submit_hook_removed(self):
t = "hz"
- assert _strip_system_tags(t) == "z"
+ assert strip_system_tags(t) == "z"
def test_remaining_known_opening_closing_tags_stripped(self):
t = "foobar"
- assert _strip_system_tags(t) == "foobar"
+ assert strip_system_tags(t) == "foobar"
def test_clean_text_unchanged(self):
- assert _strip_system_tags("hello world") == "hello world"
+ assert strip_system_tags("hello world") == "hello world"
# ---------------------------------------------------------------------------
@@ -414,64 +383,55 @@ def test_clean_text_unchanged(self):
class TestProcessUser:
def test_metadata_captured_from_first_entry_only(self):
- messages = []
- meta = _fresh_metadata()
- _process_user(
- {
- "type": "user",
- "version": 1,
- "cwd": "/first",
- "gitBranch": "main",
- "permissionMode": "default",
- "message": {"content": [{"type": "text", "text": "a"}]},
- },
- messages,
- meta,
- )
- _process_user(
- {
- "type": "user",
- "version": 2,
- "cwd": "/second",
- "gitBranch": "dev",
- "permissionMode": "all",
- "message": {"content": [{"type": "text", "text": "b"}]},
- },
- messages,
- meta,
+ s = _parse_entries(
+ [
+ {
+ "type": "user",
+ "version": 1,
+ "cwd": "/first",
+ "gitBranch": "main",
+ "permissionMode": "default",
+ "message": {"content": [{"type": "text", "text": "a"}]},
+ },
+ {
+ "type": "user",
+ "version": 2,
+ "cwd": "/second",
+ "gitBranch": "dev",
+ "permissionMode": "all",
+ "message": {"content": [{"type": "text", "text": "b"}]},
+ },
+ ]
)
- assert meta["version"] == 1
- assert meta["cwd"] == "/first"
- assert meta["git_branch"] == "main"
- assert meta["permission_mode"] == "default"
+ assert s["metadata"]["version"] == 1
+ assert s["metadata"]["cwd"] == "/first"
+ assert s["metadata"]["git_branch"] == "main"
+ assert s["metadata"]["permission_mode"] == "default"
def test_missing_message_key_no_crash(self):
- messages = []
- meta = _fresh_metadata()
- _process_user({"type": "user"}, messages, meta)
- assert len(messages) == 1
- assert messages[0]["text"] == ""
+ s = _parse_entries([{"type": "user"}])
+ assert len(s["messages"]) == 1
+ assert s["messages"][0]["text"] == ""
def test_tool_use_result_images_extracted(self):
- messages = []
- meta = _fresh_metadata()
- _process_user(
- {
- "message": {"content": []},
- "toolUseResult": {
- "content": [
- {
- "type": "image",
- "source": {"type": "base64", "data": "IMG"},
- }
- ],
- },
- },
- messages,
- meta,
+ s = _parse_entries(
+ [
+ {
+ "type": "user",
+ "message": {"content": []},
+ "toolUseResult": {
+ "content": [
+ {
+ "type": "image",
+ "source": {"type": "base64", "data": "IMG"},
+ }
+ ],
+ },
+ }
+ ]
)
- assert messages[0]["images"]
- assert messages[0]["images"][0]["data"] == "IMG"
+ assert s["messages"][0]["images"]
+ assert s["messages"][0]["images"][0]["data"] == "IMG"
# ---------------------------------------------------------------------------
@@ -481,149 +441,144 @@ def test_tool_use_result_images_extracted(self):
class TestProcessAssistant:
def test_content_plain_string_normalized(self):
- messages = []
- meta = _fresh_metadata()
- _process_assistant(
- {
- "message": {
- "model": "m",
- "content": "plain string body",
- "usage": {},
- },
- },
- messages,
- meta,
+ s = _parse_entries(
+ [
+ {
+ "type": "assistant",
+ "message": {
+ "model": "m",
+ "content": "plain string body",
+ "usage": {},
+ },
+ }
+ ]
)
- assert messages[0]["text"] == "plain string body"
+ assert s["messages"][0]["text"] == "plain string body"
def test_synthetic_model_not_added(self):
- meta = _fresh_metadata()
- _process_assistant(
- {
- "message": {
- "model": "",
- "content": [{"type": "text", "text": "x"}],
- "usage": {},
- },
- },
- [],
- meta,
+ s = _parse_entries(
+ [
+ {
+ "type": "assistant",
+ "message": {
+ "model": "",
+ "content": [{"type": "text", "text": "x"}],
+ "usage": {},
+ },
+ }
+ ]
)
- assert meta["models_used"] == set()
+ assert s["metadata"]["models_used"] == []
def test_thinking_blocks_joined(self):
- messages = []
- meta = _fresh_metadata()
- _process_assistant(
- {
- "message": {
- "model": "m",
- "content": [
- {"type": "thinking", "thinking": "t1"},
- {"type": "thinking", "thinking": "t2"},
- ],
- "usage": {},
- },
- },
- messages,
- meta,
+ s = _parse_entries(
+ [
+ {
+ "type": "assistant",
+ "message": {
+ "model": "m",
+ "content": [
+ {"type": "thinking", "thinking": "t1"},
+ {"type": "thinking", "thinking": "t2"},
+ ],
+ "usage": {},
+ },
+ }
+ ]
)
- assert messages[0]["thinking"] == "t1\n\nt2"
+ assert s["messages"][0]["thinking"] == "t1\n\nt2"
def test_tool_use_counts_accumulated(self):
- meta = _fresh_metadata()
- _process_assistant(
- {
- "message": {
- "model": "m",
- "content": [
- {"type": "tool_use", "name": "Read", "input": {"file_path": "/a"}},
- {"type": "tool_use", "name": "Read", "input": {"file_path": "/b"}},
- ],
- "usage": {},
- },
- },
- [],
- meta,
+ s = _parse_entries(
+ [
+ {
+ "type": "assistant",
+ "message": {
+ "model": "m",
+ "content": [
+ {"type": "tool_use", "name": "Read", "input": {"file_path": "/a"}},
+ {"type": "tool_use", "name": "Read", "input": {"file_path": "/b"}},
+ ],
+ "usage": {},
+ },
+ }
+ ]
)
- assert meta["total_tool_calls"] == 2
- assert meta["tool_call_counts"]["Read"] == 2
+ assert s["metadata"]["total_tool_calls"] == 2
+ assert s["metadata"]["tool_call_counts"]["Read"] == 2
def test_api_error_flag_increments_api_errors(self):
- meta = _fresh_metadata()
- _process_assistant(
- {
- "isApiErrorMessage": True,
- "message": {"model": "m", "content": [], "usage": {}},
- },
- [],
- meta,
+ s = _parse_entries(
+ [
+ {
+ "type": "assistant",
+ "isApiErrorMessage": True,
+ "message": {"model": "m", "content": [], "usage": {}},
+ }
+ ]
)
- assert meta["api_errors"] == 1
+ assert s["metadata"]["api_errors"] == 1
def test_stop_reason_accumulated(self):
- meta = _fresh_metadata()
- _process_assistant(
- {
- "message": {
- "model": "m",
- "content": [],
- "stop_reason": "max_tokens",
- "usage": {},
+ s = _parse_entries(
+ [
+ {
+ "type": "assistant",
+ "message": {
+ "model": "m",
+ "content": [],
+ "stop_reason": "max_tokens",
+ "usage": {},
+ },
},
- },
- [],
- meta,
- )
- _process_assistant(
- {
- "message": {
- "model": "m",
- "content": [],
- "stop_reason": "max_tokens",
- "usage": {},
+ {
+ "type": "assistant",
+ "message": {
+ "model": "m",
+ "content": [],
+ "stop_reason": "max_tokens",
+ "usage": {},
+ },
},
- },
- [],
- meta,
+ ]
)
- assert meta["stop_reasons"]["max_tokens"] == 2
+ assert s["metadata"]["stop_reasons"]["max_tokens"] == 2
def test_service_tier_added(self):
- meta = _fresh_metadata()
- _process_assistant(
- {
- "message": {
- "model": "m",
- "content": [],
- "usage": {"service_tier": "priority"},
- },
- },
- [],
- meta,
+ s = _parse_entries(
+ [
+ {
+ "type": "assistant",
+ "message": {
+ "model": "m",
+ "content": [],
+ "usage": {"service_tier": "priority"},
+ },
+ }
+ ]
)
- assert "priority" in meta["service_tiers"]
+ assert "priority" in s["metadata"]["service_tiers"]
def test_ephemeral_cache_tokens_accumulated(self):
- meta = _fresh_metadata()
- _process_assistant(
- {
- "message": {
- "model": "m",
- "content": [],
- "usage": {
- "cache_creation": {
- "ephemeral_5m_input_tokens": 7,
- "ephemeral_1h_input_tokens": 3,
+ s = _parse_entries(
+ [
+ {
+ "type": "assistant",
+ "message": {
+ "model": "m",
+ "content": [],
+ "usage": {
+ "cache_creation": {
+ "ephemeral_5m_input_tokens": 7,
+ "ephemeral_1h_input_tokens": 3,
+ },
},
},
- },
- },
- [],
- meta,
+ }
+ ]
)
- assert meta["total_ephemeral_5m_tokens"] == 7
- assert meta["total_ephemeral_1h_tokens"] == 3
+ assert s["metadata"]["total_ephemeral_5m_tokens"] == 7
+ assert s["metadata"]["total_ephemeral_1h_tokens"] == 3
# ---------------------------------------------------------------------------
@@ -632,49 +587,47 @@ def test_ephemeral_cache_tokens_accumulated(self):
class TestTrackFileActivity:
- def _meta(self):
- return {
- "files_read": set(),
- "files_written": set(),
- "files_created": set(),
- "bash_commands": [],
- "web_fetches": [],
- }
+ def _assistant_with_tool(self, name: str, tool_input: dict) -> dict:
+ return _parse_entries(
+ [
+ {
+ "type": "assistant",
+ "message": {
+ "model": "m",
+ "content": [{"type": "tool_use", "name": name, "input": tool_input}],
+ "usage": {},
+ },
+ }
+ ]
+ )
def test_read_tool_adds_to_files_read(self):
- m = self._meta()
- _track_file_activity("Read", {"file_path": "/r"}, m)
- assert "/r" in m["files_read"]
+ s = self._assistant_with_tool("Read", {"file_path": "/r"})
+ assert "/r" in s["metadata"]["files_read"]
def test_write_tool_adds_to_files_created(self):
- m = self._meta()
- _track_file_activity("Write", {"file_path": "/w"}, m)
- assert "/w" in m["files_created"]
+ s = self._assistant_with_tool("Write", {"file_path": "/w"})
+ assert "/w" in s["metadata"]["files_created"]
def test_edit_tool_adds_to_files_written(self):
- m = self._meta()
- _track_file_activity("Edit", {"file_path": "/e"}, m)
- assert "/e" in m["files_written"]
+ s = self._assistant_with_tool("Edit", {"file_path": "/e"})
+ assert "/e" in s["metadata"]["files_written"]
def test_bash_command_appended(self):
- m = self._meta()
- _track_file_activity("Bash", {"command": "ls"}, m)
- assert m["bash_commands"] == ["ls"]
+ s = self._assistant_with_tool("Bash", {"command": "ls"})
+ assert s["metadata"]["bash_commands"] == ["ls"]
def test_web_fetch_url_appended(self):
- m = self._meta()
- _track_file_activity("WebFetch", {"url": "https://a"}, m)
- assert m["web_fetches"] == ["https://a"]
+ s = self._assistant_with_tool("WebFetch", {"url": "https://a"})
+ assert s["metadata"]["web_fetches"] == ["https://a"]
def test_web_search_query_appended(self):
- m = self._meta()
- _track_file_activity("WebSearch", {"query": "qterm"}, m)
- assert m["web_fetches"] == ["qterm"]
+ s = self._assistant_with_tool("WebSearch", {"query": "qterm"})
+ assert s["metadata"]["web_fetches"] == ["qterm"]
def test_empty_file_path_not_added(self):
- m = self._meta()
- _track_file_activity("Read", {"file_path": ""}, m)
- assert m["files_read"] == set()
+ s = self._assistant_with_tool("Read", {"file_path": ""})
+ assert s["metadata"]["files_read"] == []
# ---------------------------------------------------------------------------
@@ -684,41 +637,37 @@ def test_empty_file_path_not_added(self):
class TestProcessSystem:
def test_compact_boundary_increments_compaction(self):
- messages = []
- meta = _fresh_metadata()
- _process_system(
- {
- "subtype": "compact_boundary",
- "timestamp": "2026-01-01T00:00:00Z",
- "compactMetadata": {"trigger": "size", "preTokens": 100},
- },
- messages,
- meta,
+ s = _parse_entries(
+ [
+ {
+ "type": "system",
+ "subtype": "compact_boundary",
+ "timestamp": "2026-01-01T00:00:00Z",
+ "compactMetadata": {"trigger": "size", "preTokens": 100},
+ }
+ ]
)
- assert meta["compactions"] == 1
- assert len(meta["compact_boundaries"]) == 1
- assert meta["compact_boundaries"][0]["trigger"] == "size"
+ assert s["metadata"]["compactions"] == 1
+ assert len(s["metadata"]["compact_boundaries"]) == 1
+ assert s["metadata"]["compact_boundaries"][0]["trigger"] == "size"
def test_compact_boundary_missing_metadata_no_crash(self):
- messages = []
- meta = _fresh_metadata()
- _process_system(
- {
- "subtype": "compact_boundary",
- "compactMetadata": None,
- },
- messages,
- meta,
+ s = _parse_entries(
+ [
+ {
+ "type": "system",
+ "subtype": "compact_boundary",
+ "compactMetadata": None,
+ }
+ ]
)
- assert meta["compactions"] == 1
- assert meta["compact_boundaries"] == []
+ assert s["metadata"]["compactions"] == 1
+ assert s["metadata"]["compact_boundaries"] == []
def test_other_subtype_no_compaction_increment(self):
- messages = []
- meta = _fresh_metadata()
- _process_system({"subtype": "init", "content": "c"}, messages, meta)
- assert meta["compactions"] == 0
- assert messages[0]["subtype"] == "init"
+ s = _parse_entries([{"type": "system", "subtype": "init", "content": "c"}])
+ assert s["metadata"]["compactions"] == 0
+ assert s["messages"][0]["subtype"] == "init"
# ---------------------------------------------------------------------------
@@ -1010,27 +959,25 @@ def test_assistant_missing_message_key(self):
os.unlink(path)
def test_tool_use_result_null_returns_none_in_message(self):
- messages = []
- meta = _fresh_metadata()
- _process_user(
- {
- "message": {"content": []},
- "toolUseResult": None,
- },
- messages,
- meta,
+ s = _parse_entries(
+ [
+ {
+ "type": "user",
+ "message": {"content": []},
+ "toolUseResult": None,
+ }
+ ]
)
- assert messages[0]["tool_result_parsed"] is None
+ assert s["messages"][0]["tool_result_parsed"] is None
def test_tool_use_result_string_returns_none(self):
- messages = []
- meta = _fresh_metadata()
- _process_user(
- {
- "message": {"content": []},
- "toolUseResult": "oops",
- },
- messages,
- meta,
+ s = _parse_entries(
+ [
+ {
+ "type": "user",
+ "message": {"content": []},
+ "toolUseResult": "oops",
+ }
+ ]
)
- assert messages[0]["tool_result_parsed"] is None
+ assert s["messages"][0]["tool_result_parsed"] is None
diff --git a/tests/test_null_usage_tokens.py b/tests/test_null_usage_tokens.py
index 666df15..ce36eea 100644
--- a/tests/test_null_usage_tokens.py
+++ b/tests/test_null_usage_tokens.py
@@ -18,7 +18,7 @@
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-from utils.jsonl_parser import _process_assistant, parse_session
+from utils.jsonl_parser import parse_session
from utils.session_stats import _estimate_cost
# ---------------------------------------------------------------------------
@@ -26,40 +26,6 @@
# ---------------------------------------------------------------------------
-def _fresh_metadata() -> dict:
- """Return a minimal metadata dict matching what parse_session initialises."""
- return {
- "models_used": set(),
- "total_input_tokens": 0,
- "total_output_tokens": 0,
- "total_cache_read_tokens": 0,
- "total_cache_creation_tokens": 0,
- "total_tool_calls": 0,
- "tool_call_counts": {},
- "first_timestamp": None,
- "last_timestamp": None,
- "total_ephemeral_5m_tokens": 0,
- "total_ephemeral_1h_tokens": 0,
- "service_tiers": set(),
- "stop_reasons": {},
- "api_errors": 0,
- "files_read": set(),
- "files_written": set(),
- "files_created": set(),
- "bash_commands": [],
- "web_fetches": [],
- "sidechain_messages": 0,
- "entry_counts": {},
- "compactions": 0,
- "compact_boundaries": [],
- "session_wall_time_seconds": None,
- "version": None,
- "cwd": None,
- "git_branch": None,
- "permission_mode": None,
- }
-
-
def _assistant_entry(usage: dict) -> dict:
"""Build a minimal assistant JSONL entry with the given usage dict."""
return {
@@ -76,103 +42,124 @@ def _assistant_entry(usage: dict) -> dict:
}
+def _write_session(entries: list) -> str:
+ f = tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False, encoding="utf-8")
+ for entry in entries:
+ f.write(json.dumps(entry) + "\n")
+ f.close()
+ return f.name
+
+
+def _parse_entries(entries: list) -> dict:
+ path = _write_session(entries)
+ try:
+ return parse_session(path)
+ finally:
+ os.unlink(path)
+
+
# ---------------------------------------------------------------------------
-# _process_assistant: null fields must not raise
+# parse_session: null fields must not raise
# ---------------------------------------------------------------------------
-class TestProcessAssistantNullUsage:
- """Unit tests for _process_assistant with null token values."""
+class TestParseSessionNullUsage:
+ """parse_session must not raise on null usage fields."""
def test_null_cache_read_tokens(self):
- meta = _fresh_metadata()
- entry = _assistant_entry(
- {
- "input_tokens": 100,
- "output_tokens": 50,
- "cache_read_input_tokens": None,
- "cache_creation_input_tokens": 0,
- }
+ s = _parse_entries(
+ [
+ _assistant_entry(
+ {
+ "input_tokens": 100,
+ "output_tokens": 50,
+ "cache_read_input_tokens": None,
+ "cache_creation_input_tokens": 0,
+ }
+ )
+ ]
)
- _process_assistant(entry, [], meta)
- assert meta["total_input_tokens"] == 100
- assert meta["total_output_tokens"] == 50
- assert meta["total_cache_read_tokens"] == 0
+ assert s["metadata"]["total_input_tokens"] == 100
+ assert s["metadata"]["total_output_tokens"] == 50
+ assert s["metadata"]["total_cache_read_tokens"] == 0
def test_null_cache_creation_tokens(self):
- meta = _fresh_metadata()
- entry = _assistant_entry(
- {
- "input_tokens": 200,
- "output_tokens": 80,
- "cache_read_input_tokens": 0,
- "cache_creation_input_tokens": None,
- }
+ s = _parse_entries(
+ [
+ _assistant_entry(
+ {
+ "input_tokens": 200,
+ "output_tokens": 80,
+ "cache_read_input_tokens": 0,
+ "cache_creation_input_tokens": None,
+ }
+ )
+ ]
)
- _process_assistant(entry, [], meta)
- assert meta["total_cache_creation_tokens"] == 0
+ assert s["metadata"]["total_cache_creation_tokens"] == 0
def test_null_input_tokens(self):
- meta = _fresh_metadata()
- entry = _assistant_entry({"input_tokens": None, "output_tokens": 30})
- _process_assistant(entry, [], meta)
- assert meta["total_input_tokens"] == 0
- assert meta["total_output_tokens"] == 30
+ s = _parse_entries([_assistant_entry({"input_tokens": None, "output_tokens": 30})])
+ assert s["metadata"]["total_input_tokens"] == 0
+ assert s["metadata"]["total_output_tokens"] == 30
def test_null_output_tokens(self):
- meta = _fresh_metadata()
- entry = _assistant_entry({"input_tokens": 10, "output_tokens": None})
- _process_assistant(entry, [], meta)
- assert meta["total_input_tokens"] == 10
- assert meta["total_output_tokens"] == 0
+ s = _parse_entries([_assistant_entry({"input_tokens": 10, "output_tokens": None})])
+ assert s["metadata"]["total_input_tokens"] == 10
+ assert s["metadata"]["total_output_tokens"] == 0
def test_all_null_usage_fields(self):
- meta = _fresh_metadata()
- entry = _assistant_entry(
- {
- "input_tokens": None,
- "output_tokens": None,
- "cache_read_input_tokens": None,
- "cache_creation_input_tokens": None,
- }
+ s = _parse_entries(
+ [
+ _assistant_entry(
+ {
+ "input_tokens": None,
+ "output_tokens": None,
+ "cache_read_input_tokens": None,
+ "cache_creation_input_tokens": None,
+ }
+ )
+ ]
)
- _process_assistant(entry, [], meta)
- assert meta["total_input_tokens"] == 0
- assert meta["total_output_tokens"] == 0
- assert meta["total_cache_read_tokens"] == 0
- assert meta["total_cache_creation_tokens"] == 0
+ assert s["metadata"]["total_input_tokens"] == 0
+ assert s["metadata"]["total_output_tokens"] == 0
+ assert s["metadata"]["total_cache_read_tokens"] == 0
+ assert s["metadata"]["total_cache_creation_tokens"] == 0
def test_null_ephemeral_tokens(self):
- meta = _fresh_metadata()
- entry = _assistant_entry(
- {
- "input_tokens": 10,
- "output_tokens": 5,
- "cache_creation": {
- "ephemeral_5m_input_tokens": None,
- "ephemeral_1h_input_tokens": None,
- },
- }
+ s = _parse_entries(
+ [
+ _assistant_entry(
+ {
+ "input_tokens": 10,
+ "output_tokens": 5,
+ "cache_creation": {
+ "ephemeral_5m_input_tokens": None,
+ "ephemeral_1h_input_tokens": None,
+ },
+ }
+ )
+ ]
)
- _process_assistant(entry, [], meta)
- assert meta["total_ephemeral_5m_tokens"] == 0
- assert meta["total_ephemeral_1h_tokens"] == 0
+ assert s["metadata"]["total_ephemeral_5m_tokens"] == 0
+ assert s["metadata"]["total_ephemeral_1h_tokens"] == 0
def test_per_message_usage_dict_has_no_null(self):
"""The usage dict stored on the message itself must never contain None."""
- messages = []
- meta = _fresh_metadata()
- entry = _assistant_entry(
- {
- "input_tokens": None,
- "output_tokens": None,
- "cache_read_input_tokens": None,
- "cache_creation_input_tokens": None,
- }
+ s = _parse_entries(
+ [
+ _assistant_entry(
+ {
+ "input_tokens": None,
+ "output_tokens": None,
+ "cache_read_input_tokens": None,
+ "cache_creation_input_tokens": None,
+ }
+ )
+ ]
)
- _process_assistant(entry, messages, meta)
- assert len(messages) == 1
- usage = messages[0]["usage"]
+ assert len(s["messages"]) == 1
+ usage = s["messages"][0]["usage"]
assert usage["input_tokens"] == 0
assert usage["output_tokens"] == 0
assert usage["cache_read"] == 0
@@ -180,40 +167,26 @@ def test_per_message_usage_dict_has_no_null(self):
def test_normal_values_still_accumulate(self):
"""Sanity check: valid integer values are accumulated correctly."""
- meta = _fresh_metadata()
- for _ in range(3):
- entry = _assistant_entry(
- {
- "input_tokens": 100,
- "output_tokens": 50,
- "cache_read_input_tokens": 20,
- "cache_creation_input_tokens": 10,
- }
- )
- _process_assistant(entry, [], meta)
- assert meta["total_input_tokens"] == 300
- assert meta["total_output_tokens"] == 150
- assert meta["total_cache_read_tokens"] == 60
- assert meta["total_cache_creation_tokens"] == 30
-
-
-# ---------------------------------------------------------------------------
-# parse_session (integration): null usage survives round-trip via temp file
-# ---------------------------------------------------------------------------
-
-
-class TestParseSessionNullUsage:
- """Integration tests: parse_session must not raise on null usage fields."""
-
- def _write_session(self, entries: list) -> str:
- f = tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False, encoding="utf-8")
- for entry in entries:
- f.write(json.dumps(entry) + "\n")
- f.close()
- return f.name
+ s = _parse_entries(
+ [
+ _assistant_entry(
+ {
+ "input_tokens": 100,
+ "output_tokens": 50,
+ "cache_read_input_tokens": 20,
+ "cache_creation_input_tokens": 10,
+ }
+ )
+ for _ in range(3)
+ ]
+ )
+ assert s["metadata"]["total_input_tokens"] == 300
+ assert s["metadata"]["total_output_tokens"] == 150
+ assert s["metadata"]["total_cache_read_tokens"] == 60
+ assert s["metadata"]["total_cache_creation_tokens"] == 30
def test_null_cache_read_does_not_crash(self):
- path = self._write_session(
+ s = _parse_entries(
[
_assistant_entry(
{
@@ -225,17 +198,13 @@ def test_null_cache_read_does_not_crash(self):
)
]
)
- try:
- session = parse_session(path)
- assert session["metadata"]["total_input_tokens"] == 500
- assert session["metadata"]["total_cache_read_tokens"] == 0
- finally:
- os.unlink(path)
+ assert s["metadata"]["total_input_tokens"] == 500
+ assert s["metadata"]["total_cache_read_tokens"] == 0
def test_mixed_null_and_normal_entries(self):
"""A session with some null-usage entries and some normal ones should
accumulate only the non-null values."""
- path = self._write_session(
+ s = _parse_entries(
[
_assistant_entry(
{"input_tokens": 100, "output_tokens": 40, "cache_read_input_tokens": None}
@@ -245,13 +214,9 @@ def test_mixed_null_and_normal_entries(self):
),
]
)
- try:
- session = parse_session(path)
- assert session["metadata"]["total_input_tokens"] == 300
- assert session["metadata"]["total_output_tokens"] == 120
- assert session["metadata"]["total_cache_read_tokens"] == 30
- finally:
- os.unlink(path)
+ assert s["metadata"]["total_input_tokens"] == 300
+ assert s["metadata"]["total_output_tokens"] == 120
+ assert s["metadata"]["total_cache_read_tokens"] == 30
# ---------------------------------------------------------------------------
diff --git a/tests/test_real_session_fixtures.py b/tests/test_real_session_fixtures.py
index b6f820e..291d166 100644
--- a/tests/test_real_session_fixtures.py
+++ b/tests/test_real_session_fixtures.py
@@ -12,11 +12,8 @@
import pytest
-from utils.jsonl_parser import (
- _TOOL_RESULT_DISPATCH,
- _parse_tool_result,
- parse_session,
-)
+from utils.jsonl_parser import parse_session
+from utils.tool_dispatch import _TOOL_RESULT_DISPATCH, _parse_tool_result
FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures")
diff --git a/utils/jsonl_parser.py b/utils/jsonl_parser.py
index 6186b7b..23086c3 100644
--- a/utils/jsonl_parser.py
+++ b/utils/jsonl_parser.py
@@ -16,29 +16,12 @@
extract_text as _extract_text,
infer_title as _infer_title,
normalize_content as _normalize_content,
- strip_system_tags as _strip_system_tags,
)
from utils.session_peek import quick_session_info
-from utils.tool_dispatch import _TOOL_RESULT_DISPATCH, _parse_tool_result
+from utils.tool_dispatch import _parse_tool_result
from utils.validation import validate_session_dict
-__all__ = [
- "parse_session",
- "quick_session_info",
- "_parse_tool_result",
- "_TOOL_RESULT_DISPATCH",
- "_entry_message",
- "_process_user",
- "_process_assistant",
- "_process_system",
- "_process_progress",
- "_normalize_content",
- "_extract_text",
- "_extract_images",
- "_infer_title",
- "_strip_system_tags",
- "_track_file_activity",
-]
+__all__ = ["parse_session", "quick_session_info"]
def _safe_int(val: Any) -> int:
diff --git a/utils/md_exporter.py b/utils/md_exporter.py
index ef9bdf3..b954b74 100644
--- a/utils/md_exporter.py
+++ b/utils/md_exporter.py
@@ -6,6 +6,7 @@
from models.session import MessageDict, SessionDict, ToolUseDict
from models.stats import SessionStatsDict
+from utils.jsonl_helpers import strip_system_tags
from utils.session_stats import format_duration
@@ -208,9 +209,7 @@ def _render_user(msg: MessageDict) -> str:
)
if msg.get("text"):
- from utils.jsonl_parser import _strip_system_tags
-
- lines.append(_strip_system_tags(msg["text"]))
+ lines.append(strip_system_tags(msg["text"]))
# Render structured tool result instead of raw dump
trp = msg.get("tool_result_parsed")
@@ -255,9 +254,7 @@ def _render_assistant(msg: MessageDict) -> str:
lines.append("\n\n")
if msg.get("text"):
- from utils.jsonl_parser import _strip_system_tags
-
- lines.append(_strip_system_tags(msg["text"]))
+ lines.append(strip_system_tags(msg["text"]))
for tool in msg.get("tool_uses") or []:
lines.append(_render_tool_use(tool))