From dfef6f4464e919ac4bdbbbcba201fcc839269543 Mon Sep 17 00:00:00 2001 From: phernandez Date: Sun, 7 Jun 2026 21:17:32 -0500 Subject: [PATCH 1/2] fix(core): split comma-separated tags in parse_tags Bug #7 (integration bug hunt): `bm tool write-note --tags "a,b"` stored one mangled tag "a,b" instead of splitting into ["a","b"], diverging from the MCP write_note convention. Typer collects a single `--tags "a,b"` into the one-element list ['a,b'], and parse_tags() only split a bare string on commas, leaving list elements verbatim. parse_tags() now flattens by splitting each list element on commas, so "a,b", ["a,b"], and ["a","b"] all converge to ["a","b"]. The search_notes 'tags' coercer is intentionally left unchanged (tracked separately as #910). Tests come from the integration bug hunt: a CLI/MCP convergence test under test-int/bughunt_fixes/ plus unit parametrize cases in tests/utils/test_parse_tags.py. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: phernandez --- src/basic_memory/utils.py | 13 +++- test-int/bughunt_fixes/__init__.py | 0 ...test_parse_tags_comma_split_integration.py | 78 +++++++++++++++++++ tests/utils/test_parse_tags.py | 3 + 4 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 test-int/bughunt_fixes/__init__.py create mode 100644 test-int/bughunt_fixes/test_parse_tags_comma_split_integration.py diff --git a/src/basic_memory/utils.py b/src/basic_memory/utils.py index 7ba719871..6fa7003d5 100644 --- a/src/basic_memory/utils.py +++ b/src/basic_memory/utils.py @@ -526,8 +526,17 @@ def parse_tags(tags: Union[List[str], str, None]) -> List[str]: # Process list of tags if isinstance(tags, list): - # First strip whitespace, then strip leading '#' characters to prevent accumulation - return [tag.strip().lstrip("#") for tag in tags if tag and tag.strip()] + # Trigger: a list element may itself be a comma-separated string (e.g. typer collects + # `--tags "a,b"` into the one-element list `["a,b"]`). + # Why: keep the CLI list path and the MCP bare-string path on a single source of truth so + # `--tags "a,b"`, `--tags a --tags b`, and `tags="a,b"` all converge to the same tags. + # Outcome: flatten by splitting each element on commas before stripping '#' / whitespace. + return [ + tag.strip().lstrip("#") + for raw in tags + for tag in str(raw).split(",") + if tag and tag.strip() + ] # Process string input if isinstance(tags, str): diff --git a/test-int/bughunt_fixes/__init__.py b/test-int/bughunt_fixes/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/test-int/bughunt_fixes/test_parse_tags_comma_split_integration.py b/test-int/bughunt_fixes/test_parse_tags_comma_split_integration.py new file mode 100644 index 000000000..823753cdc --- /dev/null +++ b/test-int/bughunt_fixes/test_parse_tags_comma_split_integration.py @@ -0,0 +1,78 @@ +"""Bug: CLI `write-note --tags "a,b"` does NOT split the comma string, but the +MCP write_note(tags="a,b") DOES (parse_tags splits a bare string but treats each +list element as a single literal tag). + +Typer collects a single --tags value into a one-element list ['a,b'], and +parse_tags(['a,b']) returns ['a,b'] (no per-element comma split). The MCP tool +receives the bare string 'a,b' and parse_tags('a,b') returns ['a','b']. + +Result: the SAME comma-string input yields different tags on CLI vs MCP, even +though write_note's docstring promises comma-separated-string support. +""" + +import json +import pytest +from fastmcp import Client +from typer.testing import CliRunner +from basic_memory.cli.main import app as cli_app + +runner = CliRunner() + + +def test_cli_write_note_comma_tags_split_matches_mcp(app, app_config, test_project, config_manager): + # CLI: single --tags value containing a comma + write = runner.invoke( + cli_app, + [ + "tool", + "write-note", + "--title", + "CLI Comma Split", + "--folder", + "cli-comma-split", + "--content", + "# CLI Comma Split\n\nbody", + "--tags", + "alpha,beta", + ], + ) + assert write.exit_code == 0, write.output + permalink = json.loads(write.stdout)["permalink"] + + read = runner.invoke( + cli_app, + ["tool", "read-note", permalink, "--include-frontmatter", "--local"], + ) + assert read.exit_code == 0, read.output + content = json.loads(read.stdout)["content"] + + # Correct behavior: two distinct tags (matching MCP write_note semantics). + assert "- alpha\n" in content and "- beta\n" in content, ( + "CLI --tags 'alpha,beta' should split into two tags like MCP write_note does; " + f"got frontmatter:\n{content}" + ) + assert "alpha,beta" not in content, "comma string must not survive as a single literal tag" + + +@pytest.mark.asyncio +async def test_mcp_write_note_comma_tags_split_baseline(mcp_server, app, test_project): + """Baseline: MCP write_note DOES split comma strings (the behavior CLI should match).""" + async with Client(mcp_server) as client: + await client.call_tool( + "write_note", + { + "project": test_project.name, + "title": "MCP Comma Split", + "directory": "mcp-comma-split", + "content": "# MCP Comma Split\n\nbody", + "tags": "alpha,beta", + }, + ) + read = await client.call_tool( + "read_note", + {"project": test_project.name, "identifier": "MCP Comma Split"}, + ) + text = read.content[0].text + assert "- alpha\n" in text and "- beta\n" in text, ( + f"MCP write_note should split comma string into two tags; got:\n{text}" + ) diff --git a/tests/utils/test_parse_tags.py b/tests/utils/test_parse_tags.py index bd99d7aa9..f2d3e0b80 100644 --- a/tests/utils/test_parse_tags.py +++ b/tests/utils/test_parse_tags.py @@ -17,6 +17,9 @@ (["tag1", "tag2"], ["tag1", "tag2"]), (["tag1", "", "tag2"], ["tag1", "tag2"]), # Empty tags are filtered ([" tag1 ", " tag2 "], ["tag1", "tag2"]), # Whitespace is stripped + # Comma inside a single list element is split (CLI `--tags "a,b"` -> ["a,b"]) + (["tag1,tag2"], ["tag1", "tag2"]), + (["tag1, tag2", "tag3"], ["tag1", "tag2", "tag3"]), # String inputs ("", []), ("tag1", ["tag1"]), From 2be0731020a0ac99022688b29be1c1feb915d713 Mon Sep 17 00:00:00 2001 From: phernandez Date: Sun, 7 Jun 2026 21:47:16 -0500 Subject: [PATCH 2/2] fix(core): skip null tag entries and make comma-split test CRLF-tolerant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex review: parse_tags must not revive a null list entry (YAML 'tags: [alpha, null]') as the literal tag 'None' — skip None before stringifying. Also fix the comma-split integration test's frontmatter assertion to use splitlines() so it passes on Windows CRLF. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: phernandez --- src/basic_memory/utils.py | 3 +++ .../test_parse_tags_comma_split_integration.py | 7 +++++-- tests/utils/test_parse_tags.py | 2 ++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/basic_memory/utils.py b/src/basic_memory/utils.py index 6fa7003d5..33db10e14 100644 --- a/src/basic_memory/utils.py +++ b/src/basic_memory/utils.py @@ -531,9 +531,12 @@ def parse_tags(tags: Union[List[str], str, None]) -> List[str]: # Why: keep the CLI list path and the MCP bare-string path on a single source of truth so # `--tags "a,b"`, `--tags a --tags b`, and `tags="a,b"` all converge to the same tags. # Outcome: flatten by splitting each element on commas before stripping '#' / whitespace. + # Skip None entries (e.g. a YAML `tags: [alpha, null]`) so they are not revived as + # the literal tag "None" by str(raw); the old list branch ignored such falsy entries. return [ tag.strip().lstrip("#") for raw in tags + if raw is not None for tag in str(raw).split(",") if tag and tag.strip() ] diff --git a/test-int/bughunt_fixes/test_parse_tags_comma_split_integration.py b/test-int/bughunt_fixes/test_parse_tags_comma_split_integration.py index 823753cdc..4fd41db7b 100644 --- a/test-int/bughunt_fixes/test_parse_tags_comma_split_integration.py +++ b/test-int/bughunt_fixes/test_parse_tags_comma_split_integration.py @@ -47,7 +47,9 @@ def test_cli_write_note_comma_tags_split_matches_mcp(app, app_config, test_proje content = json.loads(read.stdout)["content"] # Correct behavior: two distinct tags (matching MCP write_note semantics). - assert "- alpha\n" in content and "- beta\n" in content, ( + # splitlines() is line-ending agnostic (Windows CRLF vs POSIX LF). + content_lines = content.splitlines() + assert "- alpha" in content_lines and "- beta" in content_lines, ( "CLI --tags 'alpha,beta' should split into two tags like MCP write_note does; " f"got frontmatter:\n{content}" ) @@ -73,6 +75,7 @@ async def test_mcp_write_note_comma_tags_split_baseline(mcp_server, app, test_pr {"project": test_project.name, "identifier": "MCP Comma Split"}, ) text = read.content[0].text - assert "- alpha\n" in text and "- beta\n" in text, ( + text_lines = text.splitlines() + assert "- alpha" in text_lines and "- beta" in text_lines, ( f"MCP write_note should split comma string into two tags; got:\n{text}" ) diff --git a/tests/utils/test_parse_tags.py b/tests/utils/test_parse_tags.py index f2d3e0b80..358c10879 100644 --- a/tests/utils/test_parse_tags.py +++ b/tests/utils/test_parse_tags.py @@ -20,6 +20,8 @@ # Comma inside a single list element is split (CLI `--tags "a,b"` -> ["a,b"]) (["tag1,tag2"], ["tag1", "tag2"]), (["tag1, tag2", "tag3"], ["tag1", "tag2", "tag3"]), + # None entries (e.g. YAML `tags: [alpha, null]`) are skipped, not revived as "None" + (["alpha", None], ["alpha"]), # String inputs ("", []), ("tag1", ["tag1"]),