Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions src/basic_memory/mcp/tools/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,47 @@ def _format_search_error_response(
`search_notes("{project}", "{query}", search_type="{search_type}")`
""").strip()

# Corrupt/missing FastEmbed model cache (interrupted download leaves a partial
# snapshot missing model_optimized.onnx; the ONNX runtime then raises NO_SUCHFILE).
# Basic Memory self-heals by re-downloading on the next load, but if the user still
# hits this, point them at the cache dir to clear manually and offer a text fallback.
error_lower = error_message.lower()
# "load model from" is the exact ONNX phrasing ("Load model from <path>.onnx failed").
# The looser "load model" matched unrelated errors, so we keep only the specific phrase
# alongside the onnxruntime / no_suchfile / model_optimized.onnx fingerprints.
if (
"onnxruntime" in error_lower
or "no_suchfile" in error_lower
or "model_optimized.onnx" in error_lower
or "load model from" in error_lower
):
# Deferred import: keeps the repository layer out of the tool's import graph
# (matches the SearchClient deferral below) and is only needed on this error path.
from basic_memory.repository.embedding_provider_factory import _resolve_cache_dir

try:
cache_dir = _resolve_cache_dir(get_container().config)
except RuntimeError:
cache_dir = _resolve_cache_dir(ConfigManager().config)
return dedent(f"""
# Search Failed - Embedding Model Missing or Corrupt

The local FastEmbed model could not be loaded for query '{query}': {error_message}

This usually means an earlier model download was interrupted and left an
incomplete file in the model cache.

## How to fix
1. Delete the FastEmbed model cache so it re-downloads on the next search:
`{cache_dir}`
2. Run your search again (the model downloads automatically on first use):
`search_notes("{project}", "{query}", search_type="{search_type}")`

## Workaround right now
- Use full-text search, which needs no embedding model:
`search_notes("{project}", "{query}", search_type="text")`
""").strip()

# FTS5 syntax errors
if "syntax error" in error_message.lower() or "fts5" in error_message.lower():
clean_query = (
Expand Down
228 changes: 202 additions & 26 deletions src/basic_memory/repository/fastembed_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

import asyncio
import math
import shutil
from pathlib import Path
from typing import TYPE_CHECKING

from loguru import logger
Expand All @@ -15,6 +17,24 @@
from fastembed import TextEmbedding # pragma: no cover


# Substrings that identify the ONNX "model artifact file is missing" load failure (as
# opposed to a config error, a download/network error, or a genuinely offline machine).
# An interrupted FastEmbed download can leave the HuggingFace snapshot dir present but
# missing ``model_optimized.onnx``; the ONNX runtime then raises ``NO_SUCHFILE`` and every
# subsequent load repeats it until the cache is cleared. Matched case-insensitively.
#
# IMPORTANT: this text match is necessary but NOT sufficient to trigger a purge. The error
# text alone cannot distinguish a corrupt cache from a normal cold load (model not yet
# downloaded). Purging is gated on a positive filesystem confirmation that the snapshot dir
# exists on disk but the model artifact file is missing — see ``_corrupt_model_subdirs``.
_MISSING_ARTIFACT_ERROR_MARKERS = (
"no_suchfile",
"model_optimized.onnx",
"file doesn't exist",
"no such file",
)


class FastEmbedEmbeddingProvider(EmbeddingProvider):
"""Local ONNX embedding provider backed by FastEmbed."""

Expand Down Expand Up @@ -53,6 +73,156 @@ def __init__(
self._model: TextEmbedding | None = None
self._model_lock = asyncio.Lock()

def _resolved_model_name(self) -> str:
"""Return the FastEmbed model name after applying our local aliases."""
return self._MODEL_ALIASES.get(self.model_name, self.model_name)

def _create_model(self) -> "TextEmbedding":
try:
from fastembed import TextEmbedding
except ImportError as exc: # pragma: no cover - exercised via tests with monkeypatch
raise SemanticDependenciesMissingError(
"fastembed package is missing. "
"Install/update basic-memory to include semantic dependencies: "
"pip install -U basic-memory"
) from exc
resolved_model_name = self._resolved_model_name()
if self.cache_dir is not None and self.threads is not None:
return TextEmbedding(
model_name=resolved_model_name,
cache_dir=self.cache_dir,
threads=self.threads,
)
if self.cache_dir is not None:
return TextEmbedding(model_name=resolved_model_name, cache_dir=self.cache_dir)
if self.threads is not None:
return TextEmbedding(model_name=resolved_model_name, threads=self.threads)
return TextEmbedding(model_name=resolved_model_name)

def _model_cache_candidates(self) -> list[tuple[Path, str]]:
"""Resolve ``(snapshot_dir, model_file)`` pairs for this model under ``cache_dir``.

FastEmbed stores each model under ``<cache_dir>/models--<org>--<repo>`` where the
repo is the model's HuggingFace source (e.g. ``BAAI/bge-small-en-v1.5`` resolves to
``models--qdrant--bge-small-en-v1.5-onnx-q``). We resolve the source and the expected
model artifact filename from FastEmbed's own model description so corruption detection
and deletion are scoped to exactly this model's tree — never the whole cache or
unrelated models.

Note: ``TextEmbedding._list_supported_models()`` is an intentional use of an
undocumented FastEmbed API. The broad ``except`` below is a known defensive fallback:
if the lookup ever changes shape we degrade to "no candidates" (so we never purge)
rather than crashing the load path.
"""
if self.cache_dir is None:
return []

# FastEmbed matches model names case-insensitively (model_management.py:
# ``model_name.lower() == model.model.lower()``). Mirror that here so a config like
# model="baai/bge-small-en-v1.5" still resolves to the same HF source/cache subdir.
resolved_model_name = self._resolved_model_name().lower()
candidates: list[tuple[Path, str]] = []
seen: set[Path] = set()
cache_root = Path(self.cache_dir)
try:
from fastembed import TextEmbedding

for description in TextEmbedding._list_supported_models():
if description.model.lower() != resolved_model_name:
continue
hf_source = description.sources.hf
model_file = description.model_file
if not hf_source or not model_file:
continue
# HuggingFace hub names cache dirs ``models--<repo with '/' -> '--'>``.
snapshot_dir = cache_root / f"models--{hf_source.replace('/', '--')}"
if snapshot_dir not in seen:
seen.add(snapshot_dir)
candidates.append((snapshot_dir, model_file))
except Exception as exc: # pragma: no cover - defensive: never block load on lookup
logger.warning(
"Could not resolve FastEmbed model source for cache cleanup: "
"model_name={model_name} error={error}",
model_name=resolved_model_name,
error=exc,
)

return candidates

def _corrupt_model_subdirs(self) -> list[Path]:
"""Return cache subdirs that are POSITIVELY confirmed corrupt by filesystem state.

A model is corrupt when its HuggingFace cache dir exists on disk but at least one
materialized snapshot revision is missing the expected model artifact file (e.g.
``model_optimized.onnx``) — the exact fingerprint of an interrupted download. A normal
cold load (no cache dir yet) is NOT corruption and yields no entries here, so it can
never trigger a purge.

Inspection is PER-REVISION on purpose: HuggingFace keeps multiple revisions under one
``models--<repo>`` tree, so a corrupt current snapshot can coexist with an older
complete one. Checking ``rglob(model_file)`` across the whole tree would let the old
artifact mask the broken current revision and leave it self-perpetuating, so we
require every revision to carry the artifact.
"""
corrupt: list[Path] = []
for model_dir, model_file in self._model_cache_candidates():
# Trigger: the model's cache dir does not exist at all.
# Why: this is a normal cold/first load — the model simply hasn't been
# downloaded yet. Purging here would be wrong and pointless.
# Outcome: skip; not corrupt.
if not model_dir.exists():
continue
snapshots_root = model_dir / "snapshots"
revision_dirs = (
[d for d in snapshots_root.iterdir() if d.is_dir()]
if snapshots_root.is_dir()
else []
)
# Trigger: the cache dir exists but no snapshot revision has materialized.
# Why/Outcome: an interrupted download that never wrote a revision — corrupt.
if not revision_dirs:
corrupt.append(model_dir)
continue
# Trigger: any individual revision is missing the artifact (rglob covers the
# artifact at any depth within that revision, e.g. snapshots/<rev>/onnx/...).
# Why: a complete OLD revision must not mask a corrupt CURRENT one.
# Outcome: flag the model dir so the whole tree re-downloads cleanly.
if any(not any(rev.rglob(model_file)) for rev in revision_dirs):
corrupt.append(model_dir)
return corrupt

def _purge_model_subdirs(self, subdirs: list[Path]) -> bool:
"""Delete confirmed-corrupt cache subtrees so the next load re-downloads them.

Returns True when at least one targeted subdir is actually gone afterwards. On
Windows a locked file can make ``shutil.rmtree(ignore_errors=True)`` silently no-op;
reporting success in that case would let the caller retry against the same broken
cache, so each subdir only counts as removed once it has actually disappeared.
"""
removed_any = False
for subdir in subdirs:
logger.warning(
"Removing corrupt FastEmbed model cache to force re-download: {path}",
path=str(subdir),
)
shutil.rmtree(subdir, ignore_errors=True)
# Set removed only when the subdir is truly gone — a silent rmtree no-op
# (e.g. a locked file on Windows) must not be reported as a successful purge.
if not subdir.exists():
removed_any = True
return removed_any

@staticmethod
def _is_missing_artifact_error(exc: Exception) -> bool:
"""Return True when the load failure text matches the ONNX missing-artifact signature.

This is only the text-level gate; it is necessary but NOT sufficient to purge. The
purge additionally requires filesystem-confirmed corruption (``_corrupt_model_subdirs``)
so a transient/offline/"from any source" load error never deletes a valid cache.
"""
message = str(exc).lower()
return any(marker in message for marker in _MISSING_ARTIFACT_ERROR_MARKERS)

async def _load_model(self) -> "TextEmbedding":
if self._model is not None:
return self._model
Expand All @@ -61,36 +231,42 @@ async def _load_model(self) -> "TextEmbedding":
if self._model is not None:
return self._model

def _create_model() -> "TextEmbedding":
try:
from fastembed import TextEmbedding
except (
ImportError
) as exc: # pragma: no cover - exercised via tests with monkeypatch
raise SemanticDependenciesMissingError(
"fastembed package is missing. "
"Install/update basic-memory to include semantic dependencies: "
"pip install -U basic-memory"
) from exc
resolved_model_name = self._MODEL_ALIASES.get(self.model_name, self.model_name)
if self.cache_dir is not None and self.threads is not None:
return TextEmbedding(
model_name=resolved_model_name,
cache_dir=self.cache_dir,
threads=self.threads,
)
if self.cache_dir is not None:
return TextEmbedding(model_name=resolved_model_name, cache_dir=self.cache_dir)
if self.threads is not None:
return TextEmbedding(model_name=resolved_model_name, threads=self.threads)
return TextEmbedding(model_name=resolved_model_name)

self._model = await asyncio.to_thread(_create_model)
try:
self._model = await asyncio.to_thread(self._create_model)
except Exception as exc:
# Trigger: model construction raised the ONNX missing-artifact error AND a
# filesystem check positively confirms a corrupt cache subdir (the
# snapshot dir exists but the model artifact file is missing — the
# fingerprint of an interrupted download).
# Why: the raw ONNXRuntimeError is self-perpetuating — every retry hits the
# same broken snapshot until the cache is cleared. We must NOT misread a
# normal cold load (no snapshot dir, model simply not downloaded yet) or a
# transient/offline "from any source" error as corruption, because purging
# then breaks the happy path. Both the error-text gate and the positive
# filesystem confirmation are required before we delete anything.
# Outcome: confirmed corruption → purge exactly this model's subdir and retry
# once so a fresh download can land. Every other failure (including a
# retry that still fails) re-raises the ORIGINAL exception so the
# message stays actionable and we never loop.
if not self._is_missing_artifact_error(exc):
raise
corrupt_subdirs = self._corrupt_model_subdirs()
if not corrupt_subdirs:
raise
if not self._purge_model_subdirs(corrupt_subdirs):
raise
logger.info(
"Retrying FastEmbed model load after clearing corrupt cache: "
"model_name={model_name}",
model_name=self._resolved_model_name(),
)
self._model = await asyncio.to_thread(self._create_model)

logger.info(
"FastEmbed model loaded: model_name={model_name} batch_size={batch_size} "
"threads={threads} configured_parallel={configured_parallel} "
"effective_parallel={effective_parallel}",
model_name=self._MODEL_ALIASES.get(self.model_name, self.model_name),
model_name=self._resolved_model_name(),
batch_size=self.batch_size,
threads=self.threads,
configured_parallel=self.parallel,
Expand Down
36 changes: 36 additions & 0 deletions tests/mcp/test_tool_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,42 @@ def test_format_search_error_semantic_dependencies_missing(self):
assert "# Search Failed - Semantic Dependencies Missing" in result
assert "pip install -U basic-memory" in result

def test_format_search_error_corrupt_embedding_model(self):
"""Test formatting for a corrupt/missing FastEmbed model (ONNX NO_SUCHFILE)."""
from basic_memory.config import ConfigManager
from basic_memory.repository.embedding_provider_factory import _resolve_cache_dir

result = _format_search_error_response(
"test-project",
"[ONNXRuntimeError] : 3 : NO_SUCHFILE : Load model from "
"/home/u/.basic-memory/fastembed_cache/models--qdrant--bge-small-en-v1.5-onnx-q/"
"snapshots/abc/model_optimized.onnx failed. File doesn't exist",
"semantic query",
"hybrid",
)

expected_cache_dir = _resolve_cache_dir(ConfigManager().config)
assert "# Search Failed - Embedding Model Missing or Corrupt" in result
# Names the actual resolved cache dir so the user knows what to delete.
assert expected_cache_dir in result
# Offers full-text search as an immediate workaround.
assert 'search_type="text"' in result

def test_format_search_error_load_model_phrase_does_not_overmatch(self):
"""A generic error mentioning 'load model' (no 'from') must not hit the embedding branch.

The marker was tightened from the broad 'load model' to the exact ONNX phrasing
'load model from' so unrelated failures fall through to the generic handler.
"""
result = _format_search_error_response(
"test-project",
"Failed to load model configuration for this project",
"test query",
)

assert "# Search Failed - Embedding Model Missing or Corrupt" not in result
assert "# Search Failed" in result

def test_format_search_error_generic(self):
"""Test formatting for generic errors."""
result = _format_search_error_response("test-project", "unknown error", "test query")
Expand Down
Loading
Loading