From 648172439ef86b1deb9b961fc3d052da01ed50ee Mon Sep 17 00:00:00 2001 From: phernandez Date: Sun, 7 Jun 2026 14:03:43 -0500 Subject: [PATCH 1/3] fix(core): self-heal corrupt FastEmbed model cache An interrupted FastEmbed model download leaves the HuggingFace snapshot dir present but missing model_optimized.onnx. The ONNX runtime then raises NO_SUCHFILE on every load, and the failure is self-perpetuating until the cache is cleared by hand. Search surfaced only the generic 'Search Failed' message with no hint. FastEmbedEmbeddingProvider now detects a missing/corrupt-artifact load failure, deletes only this model's own models---- cache subtree (resolved from FastEmbed's model description), and retries the load exactly once to force a fresh download. A second failure fails fast with the original error. The search error formatter gains an ONNX/model-load branch that names the resolved fastembed cache dir to delete and offers search_type="text" as an immediate workaround. Closes #895 Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: phernandez --- src/basic_memory/mcp/tools/search.py | 38 ++++ .../repository/fastembed_provider.py | 141 ++++++++++--- tests/mcp/test_tool_search.py | 21 ++ tests/repository/test_fastembed_provider.py | 189 ++++++++++++++++++ 4 files changed, 364 insertions(+), 25 deletions(-) diff --git a/src/basic_memory/mcp/tools/search.py b/src/basic_memory/mcp/tools/search.py index 3c67f3ac7..63d84553d 100644 --- a/src/basic_memory/mcp/tools/search.py +++ b/src/basic_memory/mcp/tools/search.py @@ -83,6 +83,44 @@ def _format_search_error_response( `search_notes("{project}", "{query}", search_type="{search_type}")` """).strip() + # Corrupt/missing FastEmbed model cache (interrupted download leaves a partial + # snapshot missing model_optimized.onnx; the ONNX runtime then raises NO_SUCHFILE). + # Basic Memory self-heals by re-downloading on the next load, but if the user still + # hits this, point them at the cache dir to clear manually and offer a text fallback. + error_lower = error_message.lower() + if ( + "onnxruntime" in error_lower + or "no_suchfile" in error_lower + or "model_optimized.onnx" in error_lower + or "load model" in error_lower + ): + # Deferred import: keeps the repository layer out of the tool's import graph + # (matches the SearchClient deferral below) and is only needed on this error path. + from basic_memory.repository.embedding_provider_factory import _resolve_cache_dir + + try: + cache_dir = _resolve_cache_dir(get_container().config) + except RuntimeError: + cache_dir = _resolve_cache_dir(ConfigManager().config) + return dedent(f""" + # Search Failed - Embedding Model Missing or Corrupt + + The local FastEmbed model could not be loaded for query '{query}': {error_message} + + This usually means an earlier model download was interrupted and left an + incomplete file in the model cache. + + ## How to fix + 1. Delete the FastEmbed model cache so it re-downloads on the next search: + `{cache_dir}` + 2. Run your search again (the model downloads automatically on first use): + `search_notes("{project}", "{query}", search_type="{search_type}")` + + ## Workaround right now + - Use full-text search, which needs no embedding model: + `search_notes("{project}", "{query}", search_type="text")` + """).strip() + # FTS5 syntax errors if "syntax error" in error_message.lower() or "fts5" in error_message.lower(): clean_query = ( diff --git a/src/basic_memory/repository/fastembed_provider.py b/src/basic_memory/repository/fastembed_provider.py index 3d637aced..723734fb5 100644 --- a/src/basic_memory/repository/fastembed_provider.py +++ b/src/basic_memory/repository/fastembed_provider.py @@ -4,6 +4,8 @@ import asyncio import math +import shutil +from pathlib import Path from typing import TYPE_CHECKING from loguru import logger @@ -15,6 +17,19 @@ from fastembed import TextEmbedding # pragma: no cover +# Substrings that identify a missing/corrupt on-disk model artifact (as opposed to a +# config error or a genuinely offline machine). An interrupted FastEmbed download leaves +# the HuggingFace snapshot dir present but missing ``model_optimized.onnx``; the ONNX +# runtime then raises ``NO_SUCHFILE`` and every subsequent load repeats it until the +# cache is cleared. Matched case-insensitively against the exception text. +_CORRUPT_MODEL_ERROR_MARKERS = ( + "no_suchfile", + "model_optimized.onnx", + "file doesn't exist", + "no such file", +) + + class FastEmbedEmbeddingProvider(EmbeddingProvider): """Local ONNX embedding provider backed by FastEmbed.""" @@ -53,6 +68,84 @@ def __init__( self._model: TextEmbedding | None = None self._model_lock = asyncio.Lock() + def _create_model(self) -> "TextEmbedding": + try: + from fastembed import TextEmbedding + except ImportError as exc: # pragma: no cover - exercised via tests with monkeypatch + raise SemanticDependenciesMissingError( + "fastembed package is missing. " + "Install/update basic-memory to include semantic dependencies: " + "pip install -U basic-memory" + ) from exc + resolved_model_name = self._MODEL_ALIASES.get(self.model_name, self.model_name) + if self.cache_dir is not None and self.threads is not None: + return TextEmbedding( + model_name=resolved_model_name, + cache_dir=self.cache_dir, + threads=self.threads, + ) + if self.cache_dir is not None: + return TextEmbedding(model_name=resolved_model_name, cache_dir=self.cache_dir) + if self.threads is not None: + return TextEmbedding(model_name=resolved_model_name, threads=self.threads) + return TextEmbedding(model_name=resolved_model_name) + + def _model_cache_subdirs(self) -> list[Path]: + """Resolve the HuggingFace cache subdir(s) for this model under ``cache_dir``. + + FastEmbed stores each model under ``/models----`` where the + repo is the model's HuggingFace source (e.g. ``BAAI/bge-small-en-v1.5`` resolves to + ``models--qdrant--bge-small-en-v1.5-onnx-q``). We resolve the source from FastEmbed's + own model description so the deletion is scoped to exactly this model's tree — never + the whole cache or unrelated models. + """ + if self.cache_dir is None: + return [] + + resolved_model_name = self._MODEL_ALIASES.get(self.model_name, self.model_name) + hf_sources: set[str] = set() + try: + from fastembed import TextEmbedding + + for description in TextEmbedding._list_supported_models(): + if description.model == resolved_model_name: + hf_source = description.sources.hf + if hf_source: + hf_sources.add(hf_source) + except Exception as exc: # pragma: no cover - defensive: never block self-heal on lookup + logger.warning( + "Could not resolve FastEmbed model source for cache cleanup: " + "model_name={model_name} error={error}", + model_name=resolved_model_name, + error=exc, + ) + + cache_root = Path(self.cache_dir) + # HuggingFace hub names cache dirs ``models-- '--'>``. + return [cache_root / f"models--{source.replace('/', '--')}" for source in hf_sources] + + def _purge_corrupt_model_cache(self) -> bool: + """Delete this model's on-disk cache subtree so the next load re-downloads it. + + Returns True when at least one model cache subdir existed and was removed. + """ + removed = False + for subdir in self._model_cache_subdirs(): + if subdir.exists(): + logger.warning( + "Removing corrupt FastEmbed model cache to force re-download: {path}", + path=str(subdir), + ) + shutil.rmtree(subdir, ignore_errors=True) + removed = True + return removed + + @staticmethod + def _is_corrupt_model_error(exc: Exception) -> bool: + """Return True when the load failure looks like a missing/corrupt model artifact.""" + message = str(exc).lower() + return any(marker in message for marker in _CORRUPT_MODEL_ERROR_MARKERS) + async def _load_model(self) -> "TextEmbedding": if self._model is not None: return self._model @@ -61,31 +154,29 @@ async def _load_model(self) -> "TextEmbedding": if self._model is not None: return self._model - def _create_model() -> "TextEmbedding": - try: - from fastembed import TextEmbedding - except ( - ImportError - ) as exc: # pragma: no cover - exercised via tests with monkeypatch - raise SemanticDependenciesMissingError( - "fastembed package is missing. " - "Install/update basic-memory to include semantic dependencies: " - "pip install -U basic-memory" - ) from exc - resolved_model_name = self._MODEL_ALIASES.get(self.model_name, self.model_name) - if self.cache_dir is not None and self.threads is not None: - return TextEmbedding( - model_name=resolved_model_name, - cache_dir=self.cache_dir, - threads=self.threads, - ) - if self.cache_dir is not None: - return TextEmbedding(model_name=resolved_model_name, cache_dir=self.cache_dir) - if self.threads is not None: - return TextEmbedding(model_name=resolved_model_name, threads=self.threads) - return TextEmbedding(model_name=resolved_model_name) - - self._model = await asyncio.to_thread(_create_model) + try: + self._model = await asyncio.to_thread(self._create_model) + except Exception as exc: + # Trigger: model construction failed with a missing/corrupt-artifact error + # (an interrupted download left a partial snapshot in the cache). + # Why: the raw ONNXRuntimeError is self-perpetuating — every retry hits the + # same broken snapshot until the cache is cleared. Scope the deletion to + # this model's own ``models--...`` subdir and retry exactly once so a + # fresh download can land. A single retry avoids an infinite re-download + # loop if the failure is not actually a cache problem. + # Outcome: on success the user transparently recovers; on a second failure we + # fail fast with the original error so the message stays actionable. + if not self._is_corrupt_model_error(exc): + raise + if not self._purge_corrupt_model_cache(): + raise + logger.info( + "Retrying FastEmbed model load after clearing corrupt cache: " + "model_name={model_name}", + model_name=self._MODEL_ALIASES.get(self.model_name, self.model_name), + ) + self._model = await asyncio.to_thread(self._create_model) + logger.info( "FastEmbed model loaded: model_name={model_name} batch_size={batch_size} " "threads={threads} configured_parallel={configured_parallel} " diff --git a/tests/mcp/test_tool_search.py b/tests/mcp/test_tool_search.py index 52a3d7410..a04fa9db1 100644 --- a/tests/mcp/test_tool_search.py +++ b/tests/mcp/test_tool_search.py @@ -479,6 +479,27 @@ def test_format_search_error_semantic_dependencies_missing(self): assert "# Search Failed - Semantic Dependencies Missing" in result assert "pip install -U basic-memory" in result + def test_format_search_error_corrupt_embedding_model(self): + """Test formatting for a corrupt/missing FastEmbed model (ONNX NO_SUCHFILE).""" + from basic_memory.config import ConfigManager + from basic_memory.repository.embedding_provider_factory import _resolve_cache_dir + + result = _format_search_error_response( + "test-project", + "[ONNXRuntimeError] : 3 : NO_SUCHFILE : Load model from " + "/home/u/.basic-memory/fastembed_cache/models--qdrant--bge-small-en-v1.5-onnx-q/" + "snapshots/abc/model_optimized.onnx failed. File doesn't exist", + "semantic query", + "hybrid", + ) + + expected_cache_dir = _resolve_cache_dir(ConfigManager().config) + assert "# Search Failed - Embedding Model Missing or Corrupt" in result + # Names the actual resolved cache dir so the user knows what to delete. + assert expected_cache_dir in result + # Offers full-text search as an immediate workaround. + assert 'search_type="text"' in result + def test_format_search_error_generic(self): """Test formatting for generic errors.""" result = _format_search_error_response("test-project", "unknown error", "test query") diff --git a/tests/repository/test_fastembed_provider.py b/tests/repository/test_fastembed_provider.py index a8e073d8c..f3fa4863f 100644 --- a/tests/repository/test_fastembed_provider.py +++ b/tests/repository/test_fastembed_provider.py @@ -3,6 +3,7 @@ import builtins import math import sys +from dataclasses import dataclass import pytest @@ -211,3 +212,191 @@ def embed(self, texts: list[str], **_kwargs): result = await provider.embed_documents(["zero vector"]) assert result == [[0.0, 0.0, 0.0, 0.0]] + + +# --- Self-heal of corrupt/partial model cache (#895) --- +# +# A real interrupted FastEmbed download is non-deterministic and offline-unfriendly, so we +# stub TextEmbedding to (a) advertise an HF source via _list_supported_models so the provider +# can compute the exact models---- cache subdir, and (b) raise a NO_SUCHFILE-style +# ONNX error on the first construction. This is the justified mock case called out in the task. + + +@dataclass +class _StubModelSource: + hf: str + + +@dataclass +class _StubModelDescription: + model: str + sources: _StubModelSource + + +class _SelfHealStubTextEmbedding: + """Raises a NO_SUCHFILE-style ONNX error on the first N constructions, then succeeds.""" + + fail_first_n = 1 + construct_count = 0 + HF_SOURCE = "stub-org/stub-model-onnx-q" + RESOLVED_MODEL = "stub-model" + + def __init__(self, model_name: str, cache_dir: str | None = None, threads: int | None = None): + type(self).construct_count += 1 + if type(self).construct_count <= type(self).fail_first_n: + raise RuntimeError( + "[ONNXRuntimeError] : 3 : NO_SUCHFILE : Load model from " + f"{cache_dir}/models--stub-org--stub-model-onnx-q/snapshots/abc123/" + "model_optimized.onnx failed. File doesn't exist" + ) + self.model_name = model_name + + def embed(self, texts: list[str], batch_size: int = 64, **kwargs): + for _ in texts: + yield _StubVector([1.0, 0.0, 0.0, 0.0]) + + @classmethod + def _list_supported_models(cls): + return [ + _StubModelDescription( + model=cls.RESOLVED_MODEL, + sources=_StubModelSource(hf=cls.HF_SOURCE), + ) + ] + + +def _install_self_heal_stub(monkeypatch): + module = type(sys)("fastembed") + setattr(module, "TextEmbedding", _SelfHealStubTextEmbedding) + monkeypatch.setitem(sys.modules, "fastembed", module) + _SelfHealStubTextEmbedding.construct_count = 0 + _SelfHealStubTextEmbedding.fail_first_n = 1 + + +@pytest.mark.asyncio +async def test_fastembed_provider_self_heals_corrupt_model_cache(monkeypatch, tmp_path): + """A NO_SUCHFILE load failure should purge the model cache subdir and retry once.""" + _install_self_heal_stub(monkeypatch) + + # Simulate the partial-download artifact: the model's HF cache subdir exists on disk + # but is incomplete. The provider must remove exactly this subdir, not the whole cache. + cache_dir = tmp_path / "fastembed_cache" + model_subdir = cache_dir / "models--stub-org--stub-model-onnx-q" + model_subdir.mkdir(parents=True) + (model_subdir / "stale.bin").write_text("partial download") + unrelated_subdir = cache_dir / "models--other--keep-me" + unrelated_subdir.mkdir(parents=True) + (unrelated_subdir / "data.bin").write_text("do not delete") + + provider = FastEmbedEmbeddingProvider( + model_name="stub-model", dimensions=4, cache_dir=str(cache_dir) + ) + + vectors = await provider.embed_documents(["recover after corrupt cache"]) + + # Construction was attempted exactly twice: the failing load, then the post-purge retry. + assert _SelfHealStubTextEmbedding.construct_count == 2 + # The corrupt model subdir was removed; the unrelated model cache was untouched. + assert not model_subdir.exists() + assert unrelated_subdir.exists() + assert (unrelated_subdir / "data.bin").read_text() == "do not delete" + # The retry produced real vectors. + assert len(vectors) == 1 + assert len(vectors[0]) == 4 + + +@pytest.mark.asyncio +async def test_fastembed_provider_fails_fast_on_persistent_corrupt_cache(monkeypatch, tmp_path): + """A second consecutive NO_SUCHFILE failure must fail fast (no infinite retry loop).""" + _install_self_heal_stub(monkeypatch) + # Both constructions fail — the retry does not loop. + _SelfHealStubTextEmbedding.fail_first_n = 2 + + cache_dir = tmp_path / "fastembed_cache" + model_subdir = cache_dir / "models--stub-org--stub-model-onnx-q" + model_subdir.mkdir(parents=True) + + provider = FastEmbedEmbeddingProvider( + model_name="stub-model", dimensions=4, cache_dir=str(cache_dir) + ) + + with pytest.raises(RuntimeError, match="NO_SUCHFILE"): + await provider.embed_documents(["still broken"]) + + # Exactly one retry: two total construction attempts, then fail fast. + assert _SelfHealStubTextEmbedding.construct_count == 2 + + +@pytest.mark.asyncio +async def test_fastembed_provider_does_not_purge_on_unrelated_error(monkeypatch, tmp_path): + """A non-cache load error must propagate without deleting any cache subdir.""" + + class _ConfigErrorTextEmbedding: + construct_count = 0 + + def __init__(self, model_name: str, cache_dir: str | None = None, **_kwargs): + type(self).construct_count += 1 + raise ValueError("invalid model configuration") + + @classmethod + def _list_supported_models(cls): + return [ + _StubModelDescription( + model="stub-model", + sources=_StubModelSource(hf="stub-org/stub-model-onnx-q"), + ) + ] + + module = type(sys)("fastembed") + setattr(module, "TextEmbedding", _ConfigErrorTextEmbedding) + monkeypatch.setitem(sys.modules, "fastembed", module) + + cache_dir = tmp_path / "fastembed_cache" + model_subdir = cache_dir / "models--stub-org--stub-model-onnx-q" + model_subdir.mkdir(parents=True) + (model_subdir / "keep.bin").write_text("keep") + + provider = FastEmbedEmbeddingProvider( + model_name="stub-model", dimensions=4, cache_dir=str(cache_dir) + ) + + with pytest.raises(ValueError, match="invalid model configuration"): + await provider.embed_documents(["bad config"]) + + # No retry and no deletion for errors that are not missing-artifact failures. + assert _ConfigErrorTextEmbedding.construct_count == 1 + assert model_subdir.exists() + + +@pytest.mark.asyncio +async def test_fastembed_provider_fails_fast_when_no_cache_subdir_to_purge(monkeypatch, tmp_path): + """If the corrupt error fires but no model subdir exists, fail fast without retry.""" + _install_self_heal_stub(monkeypatch) + + cache_dir = tmp_path / "fastembed_cache" + cache_dir.mkdir(parents=True) + # Intentionally do NOT create the model subdir, so there is nothing to purge. + + provider = FastEmbedEmbeddingProvider( + model_name="stub-model", dimensions=4, cache_dir=str(cache_dir) + ) + + with pytest.raises(RuntimeError, match="NO_SUCHFILE"): + await provider.embed_documents(["nothing to purge"]) + + # Only the initial attempt ran — no purge means no retry. + assert _SelfHealStubTextEmbedding.construct_count == 1 + + +@pytest.mark.asyncio +async def test_fastembed_provider_fails_fast_without_cache_dir(monkeypatch): + """Without a configured cache_dir there is nothing to purge, so fail fast.""" + _install_self_heal_stub(monkeypatch) + + # cache_dir defaults to None — _model_cache_subdirs() returns no candidates. + provider = FastEmbedEmbeddingProvider(model_name="stub-model", dimensions=4) + + with pytest.raises(RuntimeError, match="NO_SUCHFILE"): + await provider.embed_documents(["no cache dir"]) + + assert _SelfHealStubTextEmbedding.construct_count == 1 From 1d90cccf92bb2ecc5ff96b8ff06de6d862ff61c9 Mon Sep 17 00:00:00 2001 From: phernandez Date: Sun, 7 Jun 2026 15:23:31 -0500 Subject: [PATCH 2/3] fix(core): make FastEmbed self-heal conservative to protect cold-load path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The self-heal introduced in this PR misclassified cold/transient model load failures as cache corruption, purged the in-progress download, and left the model unloadable for the whole session — breaking ~20 unrelated search integration tests in CI (cold model cache). Root cause: purge was driven purely by exception-text matching plus a bare subdir.exists() check, so a normal not-yet-downloaded model (or a transient 'from any source' download error) could trigger a destructive purge. Fixes: - Gate the purge on positive filesystem confirmation: the HF snapshot dir exists but the model artifact file (model_optimized.onnx) is missing from every snapshot. A cold load (no snapshot dir) is never treated as corrupt and never purges; the original error re-raises unchanged. - After a purge+retry that still fails, re-raise the ORIGINAL exception. - (Codex P2) Resolve the HF cache subdir case-insensitively, mirroring FastEmbed, so model='baai/bge-small-en-v1.5' still resolves correctly. - (claude-review #1) _purge_model_subdirs sets removed=True only when the subdir actually disappears after rmtree (Windows locked-file no-op safety). - (claude-review #2) Tighten the search error formatter ONNX marker from the broad 'load model' to the exact 'load model from' phrasing. - (minor) Document that TextEmbedding._list_supported_models() is an intentional undocumented-API use with a defensive fallback. Verified locally with the real model: a cold load downloads cleanly with no purge; a genuinely corrupt cache (snapshot present, artifact deleted) is detected, purged, and re-downloaded. Added regression tests for cold load, artifact-present non-purge, case-insensitive resolution, and locked-file rmtree no-op. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: phernandez --- src/basic_memory/mcp/tools/search.py | 5 +- .../repository/fastembed_provider.py | 166 ++++++++++++------ tests/mcp/test_tool_search.py | 15 ++ tests/repository/test_fastembed_provider.py | 138 ++++++++++++++- 4 files changed, 264 insertions(+), 60 deletions(-) diff --git a/src/basic_memory/mcp/tools/search.py b/src/basic_memory/mcp/tools/search.py index 63d84553d..e47c687f4 100644 --- a/src/basic_memory/mcp/tools/search.py +++ b/src/basic_memory/mcp/tools/search.py @@ -88,11 +88,14 @@ def _format_search_error_response( # Basic Memory self-heals by re-downloading on the next load, but if the user still # hits this, point them at the cache dir to clear manually and offer a text fallback. error_lower = error_message.lower() + # "load model from" is the exact ONNX phrasing ("Load model from .onnx failed"). + # The looser "load model" matched unrelated errors, so we keep only the specific phrase + # alongside the onnxruntime / no_suchfile / model_optimized.onnx fingerprints. if ( "onnxruntime" in error_lower or "no_suchfile" in error_lower or "model_optimized.onnx" in error_lower - or "load model" in error_lower + or "load model from" in error_lower ): # Deferred import: keeps the repository layer out of the tool's import graph # (matches the SearchClient deferral below) and is only needed on this error path. diff --git a/src/basic_memory/repository/fastembed_provider.py b/src/basic_memory/repository/fastembed_provider.py index 723734fb5..f532f061b 100644 --- a/src/basic_memory/repository/fastembed_provider.py +++ b/src/basic_memory/repository/fastembed_provider.py @@ -17,12 +17,17 @@ from fastembed import TextEmbedding # pragma: no cover -# Substrings that identify a missing/corrupt on-disk model artifact (as opposed to a -# config error or a genuinely offline machine). An interrupted FastEmbed download leaves -# the HuggingFace snapshot dir present but missing ``model_optimized.onnx``; the ONNX -# runtime then raises ``NO_SUCHFILE`` and every subsequent load repeats it until the -# cache is cleared. Matched case-insensitively against the exception text. -_CORRUPT_MODEL_ERROR_MARKERS = ( +# Substrings that identify the ONNX "model artifact file is missing" load failure (as +# opposed to a config error, a download/network error, or a genuinely offline machine). +# An interrupted FastEmbed download can leave the HuggingFace snapshot dir present but +# missing ``model_optimized.onnx``; the ONNX runtime then raises ``NO_SUCHFILE`` and every +# subsequent load repeats it until the cache is cleared. Matched case-insensitively. +# +# IMPORTANT: this text match is necessary but NOT sufficient to trigger a purge. The error +# text alone cannot distinguish a corrupt cache from a normal cold load (model not yet +# downloaded). Purging is gated on a positive filesystem confirmation that the snapshot dir +# exists on disk but the model artifact file is missing — see ``_corrupt_model_subdirs``. +_MISSING_ARTIFACT_ERROR_MARKERS = ( "no_suchfile", "model_optimized.onnx", "file doesn't exist", @@ -68,6 +73,10 @@ def __init__( self._model: TextEmbedding | None = None self._model_lock = asyncio.Lock() + def _resolved_model_name(self) -> str: + """Return the FastEmbed model name after applying our local aliases.""" + return self._MODEL_ALIASES.get(self.model_name, self.model_name) + def _create_model(self) -> "TextEmbedding": try: from fastembed import TextEmbedding @@ -77,7 +86,7 @@ def _create_model(self) -> "TextEmbedding": "Install/update basic-memory to include semantic dependencies: " "pip install -U basic-memory" ) from exc - resolved_model_name = self._MODEL_ALIASES.get(self.model_name, self.model_name) + resolved_model_name = self._resolved_model_name() if self.cache_dir is not None and self.threads is not None: return TextEmbedding( model_name=resolved_model_name, @@ -90,29 +99,47 @@ def _create_model(self) -> "TextEmbedding": return TextEmbedding(model_name=resolved_model_name, threads=self.threads) return TextEmbedding(model_name=resolved_model_name) - def _model_cache_subdirs(self) -> list[Path]: - """Resolve the HuggingFace cache subdir(s) for this model under ``cache_dir``. + def _model_cache_candidates(self) -> list[tuple[Path, str]]: + """Resolve ``(snapshot_dir, model_file)`` pairs for this model under ``cache_dir``. FastEmbed stores each model under ``/models----`` where the repo is the model's HuggingFace source (e.g. ``BAAI/bge-small-en-v1.5`` resolves to - ``models--qdrant--bge-small-en-v1.5-onnx-q``). We resolve the source from FastEmbed's - own model description so the deletion is scoped to exactly this model's tree — never - the whole cache or unrelated models. + ``models--qdrant--bge-small-en-v1.5-onnx-q``). We resolve the source and the expected + model artifact filename from FastEmbed's own model description so corruption detection + and deletion are scoped to exactly this model's tree — never the whole cache or + unrelated models. + + Note: ``TextEmbedding._list_supported_models()`` is an intentional use of an + undocumented FastEmbed API. The broad ``except`` below is a known defensive fallback: + if the lookup ever changes shape we degrade to "no candidates" (so we never purge) + rather than crashing the load path. """ if self.cache_dir is None: return [] - resolved_model_name = self._MODEL_ALIASES.get(self.model_name, self.model_name) - hf_sources: set[str] = set() + # FastEmbed matches model names case-insensitively (model_management.py: + # ``model_name.lower() == model.model.lower()``). Mirror that here so a config like + # model="baai/bge-small-en-v1.5" still resolves to the same HF source/cache subdir. + resolved_model_name = self._resolved_model_name().lower() + candidates: list[tuple[Path, str]] = [] + seen: set[Path] = set() + cache_root = Path(self.cache_dir) try: from fastembed import TextEmbedding for description in TextEmbedding._list_supported_models(): - if description.model == resolved_model_name: - hf_source = description.sources.hf - if hf_source: - hf_sources.add(hf_source) - except Exception as exc: # pragma: no cover - defensive: never block self-heal on lookup + if description.model.lower() != resolved_model_name: + continue + hf_source = description.sources.hf + model_file = description.model_file + if not hf_source or not model_file: + continue + # HuggingFace hub names cache dirs ``models-- '--'>``. + snapshot_dir = cache_root / f"models--{hf_source.replace('/', '--')}" + if snapshot_dir not in seen: + seen.add(snapshot_dir) + candidates.append((snapshot_dir, model_file)) + except Exception as exc: # pragma: no cover - defensive: never block load on lookup logger.warning( "Could not resolve FastEmbed model source for cache cleanup: " "model_name={model_name} error={error}", @@ -120,31 +147,62 @@ def _model_cache_subdirs(self) -> list[Path]: error=exc, ) - cache_root = Path(self.cache_dir) - # HuggingFace hub names cache dirs ``models-- '--'>``. - return [cache_root / f"models--{source.replace('/', '--')}" for source in hf_sources] + return candidates - def _purge_corrupt_model_cache(self) -> bool: - """Delete this model's on-disk cache subtree so the next load re-downloads it. + def _corrupt_model_subdirs(self) -> list[Path]: + """Return cache subdirs that are POSITIVELY confirmed corrupt by filesystem state. - Returns True when at least one model cache subdir existed and was removed. + A subdir is corrupt when the HuggingFace snapshot dir exists on disk but the expected + model artifact file (e.g. ``model_optimized.onnx``) is missing from every snapshot — + the exact fingerprint of an interrupted download. A normal cold load (no snapshot dir + yet) is NOT corruption and yields no entries here, so it can never trigger a purge. """ - removed = False - for subdir in self._model_cache_subdirs(): - if subdir.exists(): - logger.warning( - "Removing corrupt FastEmbed model cache to force re-download: {path}", - path=str(subdir), - ) - shutil.rmtree(subdir, ignore_errors=True) - removed = True - return removed + corrupt: list[Path] = [] + for snapshot_dir, model_file in self._model_cache_candidates(): + # Trigger: the model's cache subdir does not exist at all. + # Why: this is a normal cold/first load — the model simply hasn't been + # downloaded yet. Purging here would be wrong and pointless. + # Outcome: skip; not corrupt. + if not snapshot_dir.exists(): + continue + # The artifact lives at snapshots//; an interrupted download + # leaves the snapshot tree but no artifact. rglob covers any revision dir. + artifact_present = any(snapshot_dir.rglob(model_file)) + if not artifact_present: + corrupt.append(snapshot_dir) + return corrupt + + def _purge_model_subdirs(self, subdirs: list[Path]) -> bool: + """Delete confirmed-corrupt cache subtrees so the next load re-downloads them. + + Returns True when at least one targeted subdir is actually gone afterwards. On + Windows a locked file can make ``shutil.rmtree(ignore_errors=True)`` silently no-op; + reporting success in that case would let the caller retry against the same broken + cache, so each subdir only counts as removed once it has actually disappeared. + """ + removed_any = False + for subdir in subdirs: + logger.warning( + "Removing corrupt FastEmbed model cache to force re-download: {path}", + path=str(subdir), + ) + shutil.rmtree(subdir, ignore_errors=True) + # Set removed only when the subdir is truly gone — a silent rmtree no-op + # (e.g. a locked file on Windows) must not be reported as a successful purge. + if not subdir.exists(): + removed_any = True + return removed_any @staticmethod - def _is_corrupt_model_error(exc: Exception) -> bool: - """Return True when the load failure looks like a missing/corrupt model artifact.""" + def _is_missing_artifact_error(exc: Exception) -> bool: + """Return True when the load failure text matches the ONNX missing-artifact signature. + + This is only the text-level gate; it is necessary but NOT sufficient to purge. The + purge additionally requires filesystem-confirmed corruption (``_corrupt_model_subdirs``) + so a transient/offline/"from any source" load error never deletes a valid cache. + """ message = str(exc).lower() - return any(marker in message for marker in _CORRUPT_MODEL_ERROR_MARKERS) + return any(marker in message for marker in _MISSING_ARTIFACT_ERROR_MARKERS) async def _load_model(self) -> "TextEmbedding": if self._model is not None: @@ -157,23 +215,31 @@ async def _load_model(self) -> "TextEmbedding": try: self._model = await asyncio.to_thread(self._create_model) except Exception as exc: - # Trigger: model construction failed with a missing/corrupt-artifact error - # (an interrupted download left a partial snapshot in the cache). + # Trigger: model construction raised the ONNX missing-artifact error AND a + # filesystem check positively confirms a corrupt cache subdir (the + # snapshot dir exists but the model artifact file is missing — the + # fingerprint of an interrupted download). # Why: the raw ONNXRuntimeError is self-perpetuating — every retry hits the - # same broken snapshot until the cache is cleared. Scope the deletion to - # this model's own ``models--...`` subdir and retry exactly once so a - # fresh download can land. A single retry avoids an infinite re-download - # loop if the failure is not actually a cache problem. - # Outcome: on success the user transparently recovers; on a second failure we - # fail fast with the original error so the message stays actionable. - if not self._is_corrupt_model_error(exc): + # same broken snapshot until the cache is cleared. We must NOT misread a + # normal cold load (no snapshot dir, model simply not downloaded yet) or a + # transient/offline "from any source" error as corruption, because purging + # then breaks the happy path. Both the error-text gate and the positive + # filesystem confirmation are required before we delete anything. + # Outcome: confirmed corruption → purge exactly this model's subdir and retry + # once so a fresh download can land. Every other failure (including a + # retry that still fails) re-raises the ORIGINAL exception so the + # message stays actionable and we never loop. + if not self._is_missing_artifact_error(exc): + raise + corrupt_subdirs = self._corrupt_model_subdirs() + if not corrupt_subdirs: raise - if not self._purge_corrupt_model_cache(): + if not self._purge_model_subdirs(corrupt_subdirs): raise logger.info( "Retrying FastEmbed model load after clearing corrupt cache: " "model_name={model_name}", - model_name=self._MODEL_ALIASES.get(self.model_name, self.model_name), + model_name=self._resolved_model_name(), ) self._model = await asyncio.to_thread(self._create_model) @@ -181,7 +247,7 @@ async def _load_model(self) -> "TextEmbedding": "FastEmbed model loaded: model_name={model_name} batch_size={batch_size} " "threads={threads} configured_parallel={configured_parallel} " "effective_parallel={effective_parallel}", - model_name=self._MODEL_ALIASES.get(self.model_name, self.model_name), + model_name=self._resolved_model_name(), batch_size=self.batch_size, threads=self.threads, configured_parallel=self.parallel, diff --git a/tests/mcp/test_tool_search.py b/tests/mcp/test_tool_search.py index a04fa9db1..2c2bc535c 100644 --- a/tests/mcp/test_tool_search.py +++ b/tests/mcp/test_tool_search.py @@ -500,6 +500,21 @@ def test_format_search_error_corrupt_embedding_model(self): # Offers full-text search as an immediate workaround. assert 'search_type="text"' in result + def test_format_search_error_load_model_phrase_does_not_overmatch(self): + """A generic error mentioning 'load model' (no 'from') must not hit the embedding branch. + + The marker was tightened from the broad 'load model' to the exact ONNX phrasing + 'load model from' so unrelated failures fall through to the generic handler. + """ + result = _format_search_error_response( + "test-project", + "Failed to load model configuration for this project", + "test query", + ) + + assert "# Search Failed - Embedding Model Missing or Corrupt" not in result + assert "# Search Failed" in result + def test_format_search_error_generic(self): """Test formatting for generic errors.""" result = _format_search_error_response("test-project", "unknown error", "test query") diff --git a/tests/repository/test_fastembed_provider.py b/tests/repository/test_fastembed_provider.py index f3fa4863f..a64ded08a 100644 --- a/tests/repository/test_fastembed_provider.py +++ b/tests/repository/test_fastembed_provider.py @@ -217,9 +217,11 @@ def embed(self, texts: list[str], **_kwargs): # --- Self-heal of corrupt/partial model cache (#895) --- # # A real interrupted FastEmbed download is non-deterministic and offline-unfriendly, so we -# stub TextEmbedding to (a) advertise an HF source via _list_supported_models so the provider -# can compute the exact models---- cache subdir, and (b) raise a NO_SUCHFILE-style -# ONNX error on the first construction. This is the justified mock case called out in the task. +# stub TextEmbedding to (a) advertise an HF source + model_file via _list_supported_models so +# the provider can compute the exact models---- cache subdir and the artifact name, +# and (b) raise a NO_SUCHFILE-style ONNX error on the first construction. This is the justified +# mock case called out in the task. The purge is gated on a filesystem confirmation that the +# snapshot dir exists but the artifact is missing, so each test stages the cache accordingly. @dataclass @@ -231,6 +233,7 @@ class _StubModelSource: class _StubModelDescription: model: str sources: _StubModelSource + model_file: str = "model_optimized.onnx" class _SelfHealStubTextEmbedding: @@ -240,6 +243,7 @@ class _SelfHealStubTextEmbedding: construct_count = 0 HF_SOURCE = "stub-org/stub-model-onnx-q" RESOLVED_MODEL = "stub-model" + MODEL_FILE = "model_optimized.onnx" def __init__(self, model_name: str, cache_dir: str | None = None, threads: int | None = None): type(self).construct_count += 1 @@ -257,11 +261,24 @@ def embed(self, texts: list[str], batch_size: int = 64, **kwargs): @classmethod def _list_supported_models(cls): + # Include decoys so the resolver's skip branches are exercised: a model with a + # different name (name-mismatch skip) and one with an empty HF source (no-source skip). return [ + _StubModelDescription( + model="some-other-model", + sources=_StubModelSource(hf="other-org/other-model"), + model_file=cls.MODEL_FILE, + ), + _StubModelDescription( + model=cls.RESOLVED_MODEL, + sources=_StubModelSource(hf=""), + model_file=cls.MODEL_FILE, + ), _StubModelDescription( model=cls.RESOLVED_MODEL, sources=_StubModelSource(hf=cls.HF_SOURCE), - ) + model_file=cls.MODEL_FILE, + ), ] @@ -271,6 +288,7 @@ def _install_self_heal_stub(monkeypatch): monkeypatch.setitem(sys.modules, "fastembed", module) _SelfHealStubTextEmbedding.construct_count = 0 _SelfHealStubTextEmbedding.fail_first_n = 1 + _SelfHealStubTextEmbedding.RESOLVED_MODEL = "stub-model" @pytest.mark.asyncio @@ -327,6 +345,40 @@ async def test_fastembed_provider_fails_fast_on_persistent_corrupt_cache(monkeyp assert _SelfHealStubTextEmbedding.construct_count == 2 +@pytest.mark.asyncio +async def test_fastembed_provider_fails_fast_when_purge_silently_noops(monkeypatch, tmp_path): + """If rmtree silently fails (e.g. Windows locked files), do not claim success or retry. + + shutil.rmtree(ignore_errors=True) can no-op when a file is locked. Treating that as a + successful purge would retry against the same broken cache; instead the load must fail + fast with the original error. We inject a no-op rmtree to simulate the locked-file case. + """ + import basic_memory.repository.fastembed_provider as fastembed_provider + + _install_self_heal_stub(monkeypatch) + + cache_dir = tmp_path / "fastembed_cache" + model_subdir = cache_dir / "models--stub-org--stub-model-onnx-q" + model_subdir.mkdir(parents=True) + (model_subdir / "stale.bin").write_text("partial download") + + # Simulate a deletion that silently fails to remove the directory. + monkeypatch.setattr( + fastembed_provider.shutil, "rmtree", lambda *args, **kwargs: None, raising=True + ) + + provider = FastEmbedEmbeddingProvider( + model_name="stub-model", dimensions=4, cache_dir=str(cache_dir) + ) + + with pytest.raises(RuntimeError, match="NO_SUCHFILE"): + await provider.embed_documents(["locked cache"]) + + # rmtree no-oped, so the subdir survives and no retry was attempted. + assert model_subdir.exists() + assert _SelfHealStubTextEmbedding.construct_count == 1 + + @pytest.mark.asyncio async def test_fastembed_provider_does_not_purge_on_unrelated_error(monkeypatch, tmp_path): """A non-cache load error must propagate without deleting any cache subdir.""" @@ -369,13 +421,19 @@ def _list_supported_models(cls): @pytest.mark.asyncio -async def test_fastembed_provider_fails_fast_when_no_cache_subdir_to_purge(monkeypatch, tmp_path): - """If the corrupt error fires but no model subdir exists, fail fast without retry.""" +async def test_fastembed_provider_cold_load_does_not_purge_or_retry(monkeypatch, tmp_path): + """A cold load (snapshot dir absent) must NOT be misread as corruption. + + This is the CI happy-path regression: on a cold model cache the first load can fail + before the model is downloaded, but with no snapshot dir there is nothing corrupt to + purge. The original error must propagate unchanged with no retry, so a normal + not-yet-downloaded model is never deleted. + """ _install_self_heal_stub(monkeypatch) cache_dir = tmp_path / "fastembed_cache" cache_dir.mkdir(parents=True) - # Intentionally do NOT create the model subdir, so there is nothing to purge. + # Intentionally do NOT create the model subdir: this is a normal cold load. provider = FastEmbedEmbeddingProvider( model_name="stub-model", dimensions=4, cache_dir=str(cache_dir) @@ -384,8 +442,70 @@ async def test_fastembed_provider_fails_fast_when_no_cache_subdir_to_purge(monke with pytest.raises(RuntimeError, match="NO_SUCHFILE"): await provider.embed_documents(["nothing to purge"]) - # Only the initial attempt ran — no purge means no retry. + # Only the initial attempt ran — no snapshot dir means no confirmed corruption, no retry. + assert _SelfHealStubTextEmbedding.construct_count == 1 + + +@pytest.mark.asyncio +async def test_fastembed_provider_does_not_purge_when_artifact_present(monkeypatch, tmp_path): + """A NO_SUCHFILE-shaped error must NOT purge when the artifact is actually on disk. + + The error-text gate alone is not enough: if filesystem inspection finds the model + artifact present in the snapshot, the cache is not corrupt and must be left intact. + Re-raise the original error rather than deleting a healthy cache. + """ + _install_self_heal_stub(monkeypatch) + # Construction keeps failing with the NO_SUCHFILE text regardless of cache state. + _SelfHealStubTextEmbedding.fail_first_n = 99 + + cache_dir = tmp_path / "fastembed_cache" + snapshot_dir = cache_dir / "models--stub-org--stub-model-onnx-q" / "snapshots" / "rev1" + snapshot_dir.mkdir(parents=True) + artifact = snapshot_dir / "model_optimized.onnx" + artifact.write_text("valid model artifact") + + provider = FastEmbedEmbeddingProvider( + model_name="stub-model", dimensions=4, cache_dir=str(cache_dir) + ) + + with pytest.raises(RuntimeError, match="NO_SUCHFILE"): + await provider.embed_documents(["artifact is fine"]) + + # No purge and no retry: the artifact is present, so the cache is not corrupt. assert _SelfHealStubTextEmbedding.construct_count == 1 + assert artifact.exists() + assert artifact.read_text() == "valid model artifact" + + +@pytest.mark.asyncio +async def test_fastembed_provider_self_heals_with_case_insensitive_model_name( + monkeypatch, tmp_path +): + """A lower-cased model name must still resolve the HF cache subdir for the purge. + + FastEmbed matches model names case-insensitively, so a config like + model="baai/bge-small-en-v1.5" is valid. The purge resolver must mirror that, otherwise + the corrupt subdir resolves to nothing and self-heal silently does nothing. + """ + _install_self_heal_stub(monkeypatch) + # Advertise the model under its canonical mixed-case name. + _SelfHealStubTextEmbedding.RESOLVED_MODEL = "Stub-Model" + + cache_dir = tmp_path / "fastembed_cache" + model_subdir = cache_dir / "models--stub-org--stub-model-onnx-q" + model_subdir.mkdir(parents=True) + (model_subdir / "stale.bin").write_text("partial download") + + # Configure the provider with the lower-cased spelling. + provider = FastEmbedEmbeddingProvider( + model_name="stub-model", dimensions=4, cache_dir=str(cache_dir) + ) + + vectors = await provider.embed_documents(["recover with case-insensitive name"]) + + assert _SelfHealStubTextEmbedding.construct_count == 2 + assert not model_subdir.exists() + assert len(vectors) == 1 @pytest.mark.asyncio @@ -393,7 +513,7 @@ async def test_fastembed_provider_fails_fast_without_cache_dir(monkeypatch): """Without a configured cache_dir there is nothing to purge, so fail fast.""" _install_self_heal_stub(monkeypatch) - # cache_dir defaults to None — _model_cache_subdirs() returns no candidates. + # cache_dir defaults to None — _model_cache_candidates() returns no candidates. provider = FastEmbedEmbeddingProvider(model_name="stub-model", dimensions=4) with pytest.raises(RuntimeError, match="NO_SUCHFILE"): From 8bde4976cd4956acd771a04acb4631bef6952088 Mon Sep 17 00:00:00 2001 From: phernandez Date: Sun, 7 Jun 2026 15:58:00 -0500 Subject: [PATCH 3/3] fix(core): detect corrupt FastEmbed snapshot per-revision MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _corrupt_model_subdirs scanned the whole models-- tree with rglob, so an older complete snapshot masked a corrupt current revision and self-heal skipped the purge — leaving the broken snapshot self-perpetuating. Inspect each snapshot revision individually and flag the model when any revision is missing the artifact (Codex review on #900). Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: phernandez --- .../repository/fastembed_provider.py | 43 +++++++++++++------ tests/repository/test_fastembed_provider.py | 35 +++++++++++++++ 2 files changed, 66 insertions(+), 12 deletions(-) diff --git a/src/basic_memory/repository/fastembed_provider.py b/src/basic_memory/repository/fastembed_provider.py index f532f061b..46ab9af34 100644 --- a/src/basic_memory/repository/fastembed_provider.py +++ b/src/basic_memory/repository/fastembed_provider.py @@ -152,24 +152,43 @@ def _model_cache_candidates(self) -> list[tuple[Path, str]]: def _corrupt_model_subdirs(self) -> list[Path]: """Return cache subdirs that are POSITIVELY confirmed corrupt by filesystem state. - A subdir is corrupt when the HuggingFace snapshot dir exists on disk but the expected - model artifact file (e.g. ``model_optimized.onnx``) is missing from every snapshot — - the exact fingerprint of an interrupted download. A normal cold load (no snapshot dir - yet) is NOT corruption and yields no entries here, so it can never trigger a purge. + A model is corrupt when its HuggingFace cache dir exists on disk but at least one + materialized snapshot revision is missing the expected model artifact file (e.g. + ``model_optimized.onnx``) — the exact fingerprint of an interrupted download. A normal + cold load (no cache dir yet) is NOT corruption and yields no entries here, so it can + never trigger a purge. + + Inspection is PER-REVISION on purpose: HuggingFace keeps multiple revisions under one + ``models--`` tree, so a corrupt current snapshot can coexist with an older + complete one. Checking ``rglob(model_file)`` across the whole tree would let the old + artifact mask the broken current revision and leave it self-perpetuating, so we + require every revision to carry the artifact. """ corrupt: list[Path] = [] - for snapshot_dir, model_file in self._model_cache_candidates(): - # Trigger: the model's cache subdir does not exist at all. + for model_dir, model_file in self._model_cache_candidates(): + # Trigger: the model's cache dir does not exist at all. # Why: this is a normal cold/first load — the model simply hasn't been # downloaded yet. Purging here would be wrong and pointless. # Outcome: skip; not corrupt. - if not snapshot_dir.exists(): + if not model_dir.exists(): continue - # The artifact lives at snapshots//; an interrupted download - # leaves the snapshot tree but no artifact. rglob covers any revision dir. - artifact_present = any(snapshot_dir.rglob(model_file)) - if not artifact_present: - corrupt.append(snapshot_dir) + snapshots_root = model_dir / "snapshots" + revision_dirs = ( + [d for d in snapshots_root.iterdir() if d.is_dir()] + if snapshots_root.is_dir() + else [] + ) + # Trigger: the cache dir exists but no snapshot revision has materialized. + # Why/Outcome: an interrupted download that never wrote a revision — corrupt. + if not revision_dirs: + corrupt.append(model_dir) + continue + # Trigger: any individual revision is missing the artifact (rglob covers the + # artifact at any depth within that revision, e.g. snapshots//onnx/...). + # Why: a complete OLD revision must not mask a corrupt CURRENT one. + # Outcome: flag the model dir so the whole tree re-downloads cleanly. + if any(not any(rev.rglob(model_file)) for rev in revision_dirs): + corrupt.append(model_dir) return corrupt def _purge_model_subdirs(self, subdirs: list[Path]) -> bool: diff --git a/tests/repository/test_fastembed_provider.py b/tests/repository/test_fastembed_provider.py index a64ded08a..a672e0778 100644 --- a/tests/repository/test_fastembed_provider.py +++ b/tests/repository/test_fastembed_provider.py @@ -477,6 +477,41 @@ async def test_fastembed_provider_does_not_purge_when_artifact_present(monkeypat assert artifact.read_text() == "valid model artifact" +@pytest.mark.asyncio +async def test_fastembed_provider_self_heals_when_current_revision_corrupt(monkeypatch, tmp_path): + """A corrupt current revision must be detected even when an older revision is complete. + + HuggingFace keeps multiple revisions under one models-- tree. Per-revision + inspection is required: a whole-tree rglob would find the OLD revision's artifact and + wrongly conclude the cache is healthy, leaving the broken current snapshot + self-perpetuating (PR #900 review). + """ + _install_self_heal_stub(monkeypatch) + + cache_dir = tmp_path / "fastembed_cache" + snapshots = cache_dir / "models--stub-org--stub-model-onnx-q" / "snapshots" + # Old revision: complete (has the artifact). + good_rev = snapshots / "rev_old" + good_rev.mkdir(parents=True) + (good_rev / "model_optimized.onnx").write_text("complete old artifact") + # Current revision: interrupted download — directory present, artifact missing. + bad_rev = snapshots / "rev_current" + bad_rev.mkdir(parents=True) + (bad_rev / "stale.partial").write_text("partial download") + + provider = FastEmbedEmbeddingProvider( + model_name="stub-model", dimensions=4, cache_dir=str(cache_dir) + ) + + vectors = await provider.embed_documents(["recover from mixed-revision cache"]) + + # The corrupt-current-revision cache was detected (not masked by the old revision), + # purged, and the retry succeeded. + assert _SelfHealStubTextEmbedding.construct_count == 2 + assert not (cache_dir / "models--stub-org--stub-model-onnx-q").exists() + assert len(vectors) == 1 + + @pytest.mark.asyncio async def test_fastembed_provider_self_heals_with_case_insensitive_model_name( monkeypatch, tmp_path