From 01d654692d0f0f154b510662853514899c0acf3a Mon Sep 17 00:00:00 2001 From: Rene Cannao Date: Mon, 22 Dec 2025 18:01:48 +0000 Subject: [PATCH 1/5] Integrate sqlite-rembed for text embedding generation Add support for sqlite-rembed Rust SQLite extension to enable text embedding generation from remote AI APIs (OpenAI, Nomic, Ollama, Cohere, etc.) within ProxySQL's SQLite3 Server. Changes: 1. Build system integration for Rust static library compilation - Rust toolchain detection in deps/Makefile - Static library target: sqlite3/libsqlite_rembed.a - Linking integration in lib/Makefile and src/Makefile 2. Extension auto-registration in Admin_Bootstrap.cpp - Declare sqlite3_rembed_init() extern C function - Register via sqlite3_auto_extension() after sqlite-vec 3. Documentation updates - doc/sqlite-rembed-integration.md: comprehensive integration guide - doc/SQLite3-Server.md: usage examples and provider list 4. Source code inclusion - deps/sqlite3/sqlite-rembed-source/: upstream sqlite-rembed v0.0.1-alpha.9 The integration follows the same pattern as sqlite-vec (static linking with auto-registration). Provides rembed() function and temp.rembed_clients virtual table for embedding generation. Build requires Rust toolchain (cargo, rustc) and clang/libclang-dev. --- deps/Makefile | 21 +- .../.github/workflows/release.yaml | 122 +++ .../.github/workflows/test.yaml | 60 ++ deps/sqlite3/sqlite-rembed-source/.gitignore | 3 + deps/sqlite3/sqlite-rembed-source/Cargo.lock | 847 ++++++++++++++++++ deps/sqlite3/sqlite-rembed-source/Cargo.toml | 14 + .../sqlite-rembed-source/LICENSE-APACHE | 201 +++++ deps/sqlite3/sqlite-rembed-source/LICENSE-MIT | 21 + deps/sqlite3/sqlite-rembed-source/Makefile | 141 +++ deps/sqlite3/sqlite-rembed-source/README.md | 134 +++ deps/sqlite3/sqlite-rembed-source/VERSION | 1 + deps/sqlite3/sqlite-rembed-source/build.rs | 9 + .../examples/simple-search/demo.sql | 48 + .../scripts/publish-release.sh | 27 + .../sqlite-rembed-source/sqlite-dist.toml | 21 + .../sqlite-rembed-source/sqlite-rembed.h | 14 + .../sqlite-rembed-source/src/clients.rs | 516 +++++++++++ .../sqlite-rembed-source/src/clients_vtab.rs | 184 ++++ deps/sqlite3/sqlite-rembed-source/src/lib.rs | 169 ++++ deps/sqlite3/sqlite-rembed-source/test.sql | 37 + doc/SQLite3-Server.md | 41 +- doc/sqlite-rembed-integration.md | 235 +++++ lib/Admin_Bootstrap.cpp | 2 + lib/Makefile | 1 + src/Makefile | 3 +- 25 files changed, 2867 insertions(+), 5 deletions(-) create mode 100644 deps/sqlite3/sqlite-rembed-source/.github/workflows/release.yaml create mode 100644 deps/sqlite3/sqlite-rembed-source/.github/workflows/test.yaml create mode 100644 deps/sqlite3/sqlite-rembed-source/.gitignore create mode 100644 deps/sqlite3/sqlite-rembed-source/Cargo.lock create mode 100644 deps/sqlite3/sqlite-rembed-source/Cargo.toml create mode 100644 deps/sqlite3/sqlite-rembed-source/LICENSE-APACHE create mode 100644 deps/sqlite3/sqlite-rembed-source/LICENSE-MIT create mode 100644 deps/sqlite3/sqlite-rembed-source/Makefile create mode 100644 deps/sqlite3/sqlite-rembed-source/README.md create mode 100644 deps/sqlite3/sqlite-rembed-source/VERSION create mode 100644 deps/sqlite3/sqlite-rembed-source/build.rs create mode 100644 deps/sqlite3/sqlite-rembed-source/examples/simple-search/demo.sql create mode 100755 deps/sqlite3/sqlite-rembed-source/scripts/publish-release.sh create mode 100644 deps/sqlite3/sqlite-rembed-source/sqlite-dist.toml create mode 100644 deps/sqlite3/sqlite-rembed-source/sqlite-rembed.h create mode 100644 deps/sqlite3/sqlite-rembed-source/src/clients.rs create mode 100644 deps/sqlite3/sqlite-rembed-source/src/clients_vtab.rs create mode 100644 deps/sqlite3/sqlite-rembed-source/src/lib.rs create mode 100644 deps/sqlite3/sqlite-rembed-source/test.sql create mode 100644 doc/sqlite-rembed-integration.md diff --git a/deps/Makefile b/deps/Makefile index 3139ab77f4..d88e48642a 100644 --- a/deps/Makefile +++ b/deps/Makefile @@ -4,6 +4,21 @@ PROXYSQL_PATH := $(shell while [ ! -f ./src/proxysql_global.cpp ]; do cd ..; don include $(PROXYSQL_PATH)/include/makefiles_vars.mk +# Rust toolchain detection +RUSTC := $(shell which rustc 2>/dev/null) +CARGO := $(shell which cargo 2>/dev/null) +ifndef RUSTC +$(error "rustc not found. Please install Rust toolchain") +endif +ifndef CARGO +$(error "cargo not found. Please install Rust toolchain") +endif + +# SQLite environment variables for sqlite-rembed build +export SQLITE3_INCLUDE_DIR=$(shell pwd)/sqlite3/sqlite-amalgamation-3500400 +export SQLITE3_LIB_DIR=$(shell pwd)/sqlite3/sqlite-amalgamation-3500400 +export SQLITE3_STATIC=1 + # to compile libmariadb_client with support for valgrind enabled, run: # export USEVALGRIND=1 @@ -250,7 +265,11 @@ sqlite3/sqlite3/vec.o: sqlite3/sqlite3/sqlite3.o cd sqlite3/sqlite3 && cp ../sqlite-vec-source/sqlite-vec.c . && cp ../sqlite-vec-source/sqlite-vec.h . cd sqlite3/sqlite3 && ${CC} ${MYCFLAGS} -fPIC -c -o vec.o sqlite-vec.c -DSQLITE_CORE -DSQLITE_VEC_STATIC -DSQLITE_ENABLE_MEMORY_MANAGEMENT -DSQLITE_ENABLE_JSON1 -DSQLITE_DLL=1 -sqlite3: sqlite3/sqlite3/sqlite3.o sqlite3/sqlite3/vec.o +sqlite3/libsqlite_rembed.a: sqlite3/sqlite-rembed-source/Cargo.toml $(shell find sqlite3/sqlite-rembed-source -type f -name '*.rs') + cd sqlite3/sqlite-rembed-source && SQLITE3_INCLUDE_DIR=$(SQLITE3_INCLUDE_DIR) SQLITE3_LIB_DIR=$(SQLITE3_LIB_DIR) SQLITE3_STATIC=1 $(CARGO) build --release --features=sqlite-loadable/static --lib + cp sqlite3/sqlite-rembed-source/target/release/libsqlite_rembed.a sqlite3/libsqlite_rembed.a + +sqlite3: sqlite3/sqlite3/sqlite3.o sqlite3/sqlite3/vec.o sqlite3/libsqlite_rembed.a libconfig/libconfig/out/libconfig++.a: diff --git a/deps/sqlite3/sqlite-rembed-source/.github/workflows/release.yaml b/deps/sqlite3/sqlite-rembed-source/.github/workflows/release.yaml new file mode 100644 index 0000000000..97e26912ce --- /dev/null +++ b/deps/sqlite3/sqlite-rembed-source/.github/workflows/release.yaml @@ -0,0 +1,122 @@ +name: "Release" +on: + release: + types: [published] +permissions: + contents: read +jobs: + build-linux-x86_64-extension: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v4 + - run: make loadable-release + - uses: actions/upload-artifact@v4 + with: + name: sqlite-rembed-linux-x86_64-extension + path: dist/release/* + build-macos-x86_64-extension: + runs-on: macos-12 + steps: + - uses: actions/checkout@v4 + - run: make loadable-release + - uses: actions/upload-artifact@v4 + with: + name: sqlite-rembed-macos-x86_64-extension + path: dist/release/* + build-macos-aarch64-extension: + runs-on: macos-14 + steps: + - uses: actions/checkout@v4 + - run: make loadable-release + - uses: actions/upload-artifact@v4 + with: + name: sqlite-rembed-macos-aarch64-extension + path: dist/release/* + build-windows-x86_64-extension: + runs-on: windows-2019 + steps: + - uses: actions/checkout@v4 + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + - run: make loadable-release + - uses: actions/upload-artifact@v4 + with: + name: sqlite-rembed-windows-x86_64-extension + path: dist/release/* + dist: + runs-on: ubuntu-latest + needs: + [ + build-linux-x86_64-extension, + build-macos-x86_64-extension, + build-macos-aarch64-extension, + build-windows-x86_64-extension, + ] + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + - uses: actions/download-artifact@v4 + with: + name: sqlite-rembed-linux-x86_64-extension + path: dist/linux-x86_64 + - uses: actions/download-artifact@v4 + with: + name: sqlite-rembed-macos-x86_64-extension + path: dist/macos-x86_64 + - uses: actions/download-artifact@v4 + with: + name: sqlite-rembed-macos-aarch64-extension + path: dist/macos-aarch64 + - uses: actions/download-artifact@v4 + with: + name: sqlite-rembed-windows-x86_64-extension + path: dist/windows-x86_64 + - run: | + curl -L https://github.com/asg017/sqlite-dist/releases/download/v0.0.1-alpha.7/sqlite-dist-x86_64-unknown-linux-gnu.tar.xz \ + | tar xfJ - --strip-components 1 + - run: make sqlite-rembed.h + - run: ./sqlite-dist ./sqlite-dist.toml --input dist/ --output distx/ --version $(cat VERSION) + - run: | + gh release upload ${{ github.ref_name }} \ + distx/github_releases/* \ + distx/spm/* \ + distx/sqlpkg/* \ + distx/checksums.txt \ + distx/sqlite-dist-manifest.json \ + distx/install.sh + env: + GH_TOKEN: ${{ github.token }} + - name: Install node + uses: actions/setup-node@v3 + with: + node-version: "16" + registry-url: "https://registry.npmjs.org" + - run: | + npm publish --access public distx/npm/sqlite-rembed-darwin-arm64.tar.gz + npm publish --access public distx/npm/sqlite-rembed-darwin-x64.tar.gz + npm publish --access public distx/npm/sqlite-rembed-linux-x64.tar.gz + npm publish --access public distx/npm/sqlite-rembed.tar.gz + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + - uses: ruby/setup-ruby@v1 + with: + ruby-version: 3.2 + - run: | + for file in distx/gem/*; do + gem push "$file" + done + env: + GEM_HOST_API_KEY: ${{ secrets.GEM_HOST_API_KEY }} + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - run: pip install twine + - run: | + twine upload distx/pip/* + twine upload distx/datasette/* + twine upload distx/sqlite_utils/* + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} diff --git a/deps/sqlite3/sqlite-rembed-source/.github/workflows/test.yaml b/deps/sqlite3/sqlite-rembed-source/.github/workflows/test.yaml new file mode 100644 index 0000000000..24f63c296a --- /dev/null +++ b/deps/sqlite3/sqlite-rembed-source/.github/workflows/test.yaml @@ -0,0 +1,60 @@ +name: "Test" +on: + push: + branches: + - main +permissions: + contents: read +jobs: + build-linux-x86_64-extension: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v4 + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + - run: make loadable static + #- run: pip install pytest numpy; make test-loadable + - uses: actions/upload-artifact@v4 + with: + name: sqlite-rembed-linux-x86_64-extension + path: dist/* + build-macos-x86_64-extension: + runs-on: macos-12 + steps: + - uses: actions/checkout@v4 + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + - run: make loadable static + #- run: /usr/local/opt/python@3/libexec/bin/python -m pip install pytest numpy; make test-loadable python=/usr/local/opt/python@3/libexec/bin/python + - uses: actions/upload-artifact@v4 + with: + name: sqlite-rembed-macos-x86_64-extension + path: dist/* + build-macos-aarch64-extension: + runs-on: macos-14 + steps: + - uses: actions/checkout@v4 + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + - run: make loadable static + #- run: /opt/homebrew/opt/python3/libexec/bin/python -m pip install pytest numpy --break-system-packages; make test-loadable python=/opt/homebrew/opt/python3/libexec/bin/python + - uses: actions/upload-artifact@v4 + with: + name: sqlite-rembed-macos-aarch64-extension + path: dist/* + build-windows-x86_64-extension: + runs-on: windows-2019 + steps: + - uses: actions/checkout@v4 + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + - run: make loadable static + #- run: pip install pytest numpy; make test-loadable + - uses: actions/upload-artifact@v4 + with: + name: sqlite-rembed-windows-x86_64-extension + path: dist/* diff --git a/deps/sqlite3/sqlite-rembed-source/.gitignore b/deps/sqlite3/sqlite-rembed-source/.gitignore new file mode 100644 index 0000000000..bc97e80e27 --- /dev/null +++ b/deps/sqlite3/sqlite-rembed-source/.gitignore @@ -0,0 +1,3 @@ +/target +.env +dist/ diff --git a/deps/sqlite3/sqlite-rembed-source/Cargo.lock b/deps/sqlite3/sqlite-rembed-source/Cargo.lock new file mode 100644 index 0000000000..ff31d5ae3c --- /dev/null +++ b/deps/sqlite3/sqlite-rembed-source/Cargo.lock @@ -0,0 +1,847 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bindgen" +version = "0.60.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "062dddbc1ba4aca46de6338e2bf87771414c335f7b2f2036e8f3e9befebf88e6" +dependencies = [ + "bitflags 1.3.2", + "cexpr", + "clang-sys", + "clap", + "env_logger", + "lazy_static", + "lazycell", + "log", + "peeking_take_while", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "which", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cc" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clang-sys" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f803f94ecf597339c7a34eed2036ef83f86aaba937f001f7c5b5e251f043f1f9" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "clap" +version = "3.2.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" +dependencies = [ + "atty", + "bitflags 1.3.2", + "clap_lex", + "indexmap", + "strsim", + "termcolor", + "textwrap", +] + +[[package]] +name = "clap_lex" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" +dependencies = [ + "os_str_bytes", +] + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "either" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" + +[[package]] +name = "env_logger" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" +dependencies = [ + "atty", + "humantime", + "log", + "regex", + "termcolor", +] + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "flate2" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "home" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "idna" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "libloading" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" +dependencies = [ + "cfg-if", + "windows-targets", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "memchr" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" +dependencies = [ + "adler", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "os_str_bytes" +version = "6.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" + +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "proc-macro2" +version = "1.0.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec96c6a92621310b51366f1e28d05ef11489516e93be030060e5fc12024a49d6" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" + +[[package]] +name = "ring" +version = "0.17.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +dependencies = [ + "cc", + "cfg-if", + "getrandom", + "libc", + "spin", + "untrusted", + "windows-sys", +] + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustix" +version = "0.38.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +dependencies = [ + "bitflags 2.5.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "rustls" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" +dependencies = [ + "log", + "ring", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-pki-types" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" + +[[package]] +name = "rustls-webpki" +version = "0.102.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff448f7e92e913c4b7d4c6d8e4540a1724b319b4152b8aef6d4cf8339712b33e" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "serde" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "serde_json" +version = "1.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + +[[package]] +name = "sqlite-loadable" +version = "0.0.6-alpha.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daaaad0ad506b154a72bf01fde23235377c01256abd4bd25e17419dbfd4e28a0" +dependencies = [ + "bitflags 1.3.2", + "serde", + "serde_json", + "sqlite-loadable-macros", + "sqlite3ext-sys", +] + +[[package]] +name = "sqlite-loadable-macros" +version = "0.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96037a396115a2675db783f700faad878b44c8ff56c8a29c3404649a517a5e8f" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "sqlite-rembed" +version = "0.0.1-alpha.9" +dependencies = [ + "serde_json", + "sqlite-loadable", + "ureq", + "zerocopy", +] + +[[package]] +name = "sqlite3ext-sys" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3afdc2b3dc08f16d6eecf8aa07d19975a268603ab1cca67d3f9b4172c507cf16" +dependencies = [ + "bindgen", + "cc", +] + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "subtle" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "textwrap" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "unicode-bidi" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-normalization" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "ureq" +version = "2.9.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d11a831e3c0b56e438a28308e7c810799e3c118417f342d30ecec080105395cd" +dependencies = [ + "base64", + "flate2", + "log", + "once_cell", + "rustls", + "rustls-pki-types", + "rustls-webpki", + "serde", + "serde_json", + "url", + "webpki-roots", +] + +[[package]] +name = "url" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "webpki-roots" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3de34ae270483955a94f4b21bdaaeb83d508bb84a01435f393818edb0012009" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" + +[[package]] +name = "zerocopy" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "zeroize" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" diff --git a/deps/sqlite3/sqlite-rembed-source/Cargo.toml b/deps/sqlite3/sqlite-rembed-source/Cargo.toml new file mode 100644 index 0000000000..5d0bacb2f0 --- /dev/null +++ b/deps/sqlite3/sqlite-rembed-source/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "sqlite-rembed" +version = "0.0.1-alpha.9" +edition = "2021" + +[dependencies] +serde_json = "1.0.117" +sqlite-loadable = "0.0.6-alpha.6" +ureq = {version="2.9.7", features=["json"]} +zerocopy = "0.7.34" + +[lib] +crate-type=["cdylib", "staticlib", "lib"] + diff --git a/deps/sqlite3/sqlite-rembed-source/LICENSE-APACHE b/deps/sqlite3/sqlite-rembed-source/LICENSE-APACHE new file mode 100644 index 0000000000..f49a4e16e6 --- /dev/null +++ b/deps/sqlite3/sqlite-rembed-source/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/deps/sqlite3/sqlite-rembed-source/LICENSE-MIT b/deps/sqlite3/sqlite-rembed-source/LICENSE-MIT new file mode 100644 index 0000000000..9736ab442a --- /dev/null +++ b/deps/sqlite3/sqlite-rembed-source/LICENSE-MIT @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Alex Garcia + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/deps/sqlite3/sqlite-rembed-source/Makefile b/deps/sqlite3/sqlite-rembed-source/Makefile new file mode 100644 index 0000000000..9bd7661aa4 --- /dev/null +++ b/deps/sqlite3/sqlite-rembed-source/Makefile @@ -0,0 +1,141 @@ +SHELL := /bin/bash + +VERSION=$(shell cat VERSION) + +ifeq ($(shell uname -s),Darwin) +CONFIG_DARWIN=y +else ifeq ($(OS),Windows_NT) +CONFIG_WINDOWS=y +else +CONFIG_LINUX=y +endif + +LIBRARY_PREFIX=lib +ifdef CONFIG_DARWIN +LOADABLE_EXTENSION=dylib +STATIC_EXTENSION=a +endif + +ifdef CONFIG_LINUX +LOADABLE_EXTENSION=so +STATIC_EXTENSION=a +endif + + +ifdef CONFIG_WINDOWS +LOADABLE_EXTENSION=dll +LIBRARY_PREFIX= +STATIC_EXTENSION=lib +endif + +prefix=dist +TARGET_LOADABLE=$(prefix)/debug/rembed0.$(LOADABLE_EXTENSION) +TARGET_LOADABLE_RELEASE=$(prefix)/release/rembed0.$(LOADABLE_EXTENSION) + +TARGET_STATIC=$(prefix)/debug/$(LIBRARY_PREFIX)sqlite_rembed0.$(STATIC_EXTENSION) +TARGET_STATIC_RELEASE=$(prefix)/release/$(LIBRARY_PREFIX)sqlite_rembed0.$(STATIC_EXTENSION) + +TARGET_H=$(prefix)/debug/sqlite-rembed.h +TARGET_H_RELEASE=$(prefix)/release/sqlite-rembed.h + +TARGET_WHEELS=$(prefix)/debug/wheels +TARGET_WHEELS_RELEASE=$(prefix)/release/wheels + +INTERMEDIATE_PYPACKAGE_EXTENSION=python/sqlite_rembed/sqlite_rembed/rembed0.$(LOADABLE_EXTENSION) + +ifdef target +CARGO_TARGET=--target=$(target) +BUILT_LOCATION=target/$(target)/debug/$(LIBRARY_PREFIX)sqlite_rembed.$(LOADABLE_EXTENSION) +BUILT_LOCATION_RELEASE=target/$(target)/release/$(LIBRARY_PREFIX)sqlite_rembed.$(LOADABLE_EXTENSION) +BUILT_LOCATION_STATIC=target/$(target)/debug/$(LIBRARY_PREFIX)sqlite_rembed.$(STATIC_EXTENSION) +BUILT_LOCATION_STATIC_RELEASE=target/$(target)/release/$(LIBRARY_PREFIX)sqlite_rembed.$(STATIC_EXTENSION) +else +CARGO_TARGET= +BUILT_LOCATION=target/debug/$(LIBRARY_PREFIX)sqlite_rembed.$(LOADABLE_EXTENSION) +BUILT_LOCATION_RELEASE=target/release/$(LIBRARY_PREFIX)sqlite_rembed.$(LOADABLE_EXTENSION) +BUILT_LOCATION_STATIC=target/debug/$(LIBRARY_PREFIX)sqlite_rembed.$(STATIC_EXTENSION) +BUILT_LOCATION_STATIC_RELEASE=target/release/$(LIBRARY_PREFIX)sqlite_rembed.$(STATIC_EXTENSION) +endif + +ifdef python +PYTHON=$(python) +else +PYTHON=python3 +endif + +ifdef IS_MACOS_ARM +RENAME_WHEELS_ARGS=--is-macos-arm +else +RENAME_WHEELS_ARGS= +endif + +$(prefix): + mkdir -p $(prefix)/debug + mkdir -p $(prefix)/release + +$(TARGET_WHEELS): $(prefix) + mkdir -p $(TARGET_WHEELS) + +$(TARGET_WHEELS_RELEASE): $(prefix) + mkdir -p $(TARGET_WHEELS_RELEASE) + +$(TARGET_LOADABLE): $(prefix) $(shell find . -type f -name '*.rs') + cargo build --verbose $(CARGO_TARGET) + cp $(BUILT_LOCATION) $@ + +$(TARGET_LOADABLE_RELEASE): $(prefix) $(shell find . -type f -name '*.rs') + cargo build --verbose --release $(CARGO_TARGET) + cp $(BUILT_LOCATION_RELEASE) $@ + +$(TARGET_STATIC): $(prefix) $(shell find . -type f -name '*.rs') + cargo build --verbose $(CARGO_TARGET) --features=sqlite-loadable/static + ls target + ls target/$(target)/debug + cp $(BUILT_LOCATION_STATIC) $@ + +$(TARGET_STATIC_RELEASE): $(prefix) $(shell find . -type f -name '*.rs') + cargo build --verbose --release $(CARGO_TARGET) --features=sqlite-loadable/static + cp $(BUILT_LOCATION_STATIC_RELEASE) $@ + +$(TARGET_H): sqlite-rembed.h + cp $< $@ + +$(TARGET_H_RELEASE): sqlite-rembed.h + cp $< $@ + +Cargo.toml: VERSION + cargo set-version `cat VERSION` + +version: + make Cargo.toml + +format: + cargo fmt + +release: $(TARGET_LOADABLE_RELEASE) $(TARGET_STATIC_RELEASE) + +loadable: $(TARGET_LOADABLE) +loadable-release: $(TARGET_LOADABLE_RELEASE) + +static: $(TARGET_STATIC) $(TARGET_H) +static-release: $(TARGET_STATIC_RELEASE) $(TARGET_H_RELEASE) + +debug: loadable static python datasette +release: loadable-release static-release python-release datasette-release + +clean: + rm dist/* + cargo clean + +test-loadable: + $(PYTHON) tests/test-loadable.py + +publish-release: + ./scripts/publish_release.sh + +.PHONY: clean \ + test test-loadable test-python test-npm test-deno \ + loadable loadable-release \ + static static-release \ + debug release \ + format version publish-release diff --git a/deps/sqlite3/sqlite-rembed-source/README.md b/deps/sqlite3/sqlite-rembed-source/README.md new file mode 100644 index 0000000000..d59a4fc0c8 --- /dev/null +++ b/deps/sqlite3/sqlite-rembed-source/README.md @@ -0,0 +1,134 @@ +# `sqlite-rembed` + +A SQLite extension for generating text embeddings from remote APIs (OpenAI, Nomic, Cohere, llamafile, Ollama, etc.). A sister project to [`sqlite-vec`](https://github.com/asg017/sqlite-vec) and [`sqlite-lembed`](https://github.com/asg017/sqlite-lembed). A work-in-progress! + +## Usage + +```sql +.load ./rembed0 + +INSERT INTO temp.rembed_clients(name, options) + VALUES ('text-embedding-3-small', 'openai'); + +select rembed( + 'text-embedding-3-small', + 'The United States Postal Service is an independent agency...' +); +``` + +The `temp.rembed_clients` virtual table lets you "register" clients with pure `INSERT INTO` statements. The `name` field is a unique identifier for a given client, and `options` allows you to specify which 3rd party embedding service you want to use. + +In this case, `openai` is a pre-defined client that will default to OpenAI's `https://api.openai.com/v1/embeddings` endpoint and will source your API key from the `OPENAI_API_KEY` environment variable. The name of the client, `text-embedding-3-small`, will be used as the embeddings model. + +Other pre-defined clients include: + +| Client name | Provider | Endpoint | API Key | +| ------------ | ------------------------------------------------------------------------------------ | ---------------------------------------------- | -------------------- | +| `openai` | [OpenAI](https://platform.openai.com/docs/guides/embeddings) | `https://api.openai.com/v1/embeddings` | `OPENAI_API_KEY` | +| `nomic` | [Nomic](https://docs.nomic.ai/reference/endpoints/nomic-embed-text) | `https://api-atlas.nomic.ai/v1/embedding/text` | `NOMIC_API_KEY` | +| `cohere` | [Cohere](https://docs.cohere.com/reference/embed) | `https://api.cohere.com/v1/embed` | `CO_API_KEY` | +| `jina` | [Jina](https://api.jina.ai/redoc#tag/embeddings) | `https://api.jina.ai/v1/embeddings` | `JINA_API_KEY` | +| `mixedbread` | [MixedBread](https://www.mixedbread.ai/api-reference#quick-start-guide) | `https://api.mixedbread.ai/v1/embeddings/` | `MIXEDBREAD_API_KEY` | +| `llamafile` | [llamafile](https://github.com/Mozilla-Ocho/llamafile) | `http://localhost:8080/embedding` | None | +| `ollama` | [Ollama](https://github.com/ollama/ollama/blob/main/docs/api.md#generate-embeddings) | `http://localhost:11434/api/embeddings` | None | + +Different client options can be specified with `remebed_client_options()`. For example, if you have a different OpenAI-compatible service you want to use, then you can use: + +```sql +INSERT INTO temp.rembed_clients(name, options) VALUES + ( + 'xyz-small-1', + rembed_client_options( + 'format', 'openai', + 'url', 'https://api.xyz.com/v1/embeddings', + 'key', 'xyz-ca865ece65-hunter2' + ) + ); +``` + +Or to use a llamafile server that's on a different port: + +```sql +INSERT INTO temp.rembed_clients(name, options) VALUES + ( + 'xyz-small-1', + rembed_client_options( + 'format', 'lamafile', + 'url', 'http://localhost:9999/embedding' + ) + ); +``` + +### Using with `sqlite-vec` + +`sqlite-rembed` works well with [`sqlite-vec`](https://github.com/asg017/sqlite-vec), a SQLite extension for vector search. Embeddings generated with `rembed()` use the same BLOB format for vectors that `sqlite-vec` uses. + +Here's a sample "semantic search" application, made from a sample dataset of news article headlines. + +```sql +create table articles( + headline text +); + +-- Random NPR headlines from 2024-06-04 +insert into articles VALUES + ('Shohei Ohtani''s ex-interpreter pleads guilty to charges related to gambling and theft'), + ('The jury has been selected in Hunter Biden''s gun trial'), + ('Larry Allen, a Super Bowl champion and famed Dallas Cowboy, has died at age 52'), + ('After saying Charlotte, a lone stingray, was pregnant, aquarium now says she''s sick'), + ('An Epoch Times executive is facing money laundering charge'); + + +-- Build a vector table with embeddings of article headlines, using OpenAI's API +create virtual table vec_articles using vec0( + headline_embeddings float[1536] +); + +insert into vec_articles(rowid, headline_embeddings) + select rowid, rembed('text-embedding-3-small', headline) + from articles; + +``` + +Now we have a regular `articles` table that stores text headlines, and a `vec_articles` virtual table that stores embeddings of the article headlines, using OpenAI's `text-embedding-3-small` model. + +To perform a "semantic search" on the embeddings, we can query the `vec_articles` table with an embedding of our query, and join the results back to our `articles` table to retrieve the original headlines. + +```sql +param set :query 'firearm courtroom' + +with matches as ( + select + rowid, + distance + from vec_articles + where headline_embeddings match rembed('text-embedding-3-small', :query) + order by distance + limit 3 +) +select + headline, + distance +from matches +left join articles on articles.rowid = matches.rowid; + +/* ++--------------------------------------------------------------+------------------+ +| headline | distance | ++--------------------------------------------------------------+------------------+ +| The jury has been selected in Hunter Biden's gun trial | 1.05906391143799 | ++--------------------------------------------------------------+------------------+ +| Shohei Ohtani's ex-interpreter pleads guilty to charges rela | 1.2574303150177 | +| ted to gambling and theft | | ++--------------------------------------------------------------+------------------+ +| An Epoch Times executive is facing money laundering charge | 1.27144026756287 | ++--------------------------------------------------------------+------------------+ +*/ +``` + +Notice how "firearm courtroom" doesn't appear in any of these headlines, but it can still figure out that "Hunter Biden's gun trial" is related, and the other two justice-related articles appear on top. + +## Drawbacks + +1. **No batch support yet.** If you use `rembed()` in a batch UPDATE or INSERT in 1,000 rows, then 1,000 HTTP requests will be made. Add a :+1: to [Issue #1](https://github.com/asg017/sqlite-rembed/issues/1) if you want to see this fixed. +2. **No builtin rate limiting.** Requests are sent sequentially so this may not come up in small demos, but `sqlite-rembed` could add features that handles rate limiting/retries implicitly. Add a :+1: to [Issue #2](https://github.com/asg017/sqlite-rembed/issues/2) if you want to see this implemented. diff --git a/deps/sqlite3/sqlite-rembed-source/VERSION b/deps/sqlite3/sqlite-rembed-source/VERSION new file mode 100644 index 0000000000..1429ae3183 --- /dev/null +++ b/deps/sqlite3/sqlite-rembed-source/VERSION @@ -0,0 +1 @@ +0.0.1-alpha.9 \ No newline at end of file diff --git a/deps/sqlite3/sqlite-rembed-source/build.rs b/deps/sqlite3/sqlite-rembed-source/build.rs new file mode 100644 index 0000000000..c5c0c3b4a1 --- /dev/null +++ b/deps/sqlite3/sqlite-rembed-source/build.rs @@ -0,0 +1,9 @@ +use std::process::Command; +fn main() { + let output = Command::new("git") + .args(["rev-parse", "HEAD"]) + .output() + .unwrap(); + let git_hash = String::from_utf8(output.stdout).unwrap(); + println!("cargo:rustc-env=GIT_HASH={}", git_hash); +} diff --git a/deps/sqlite3/sqlite-rembed-source/examples/simple-search/demo.sql b/deps/sqlite3/sqlite-rembed-source/examples/simple-search/demo.sql new file mode 100644 index 0000000000..20ee88b0ed --- /dev/null +++ b/deps/sqlite3/sqlite-rembed-source/examples/simple-search/demo.sql @@ -0,0 +1,48 @@ +.bail on +.mode table +.header on + +.timer on + +.load ../../dist/debug/rembed0 +.load ../../../sqlite-vec/dist/vec0 + +INSERT INTO temp.rembed_clients(name, options) + VALUES ('text-embedding-3-small', 'openai'); + +create table articles(headline text); + + +-- Random NPR headlines from 2024-06-04 +insert into articles VALUES + ('Shohei Ohtani''s ex-interpreter pleads guilty to charges related to gambling and theft'), + ('The jury has been selected in Hunter Biden''s gun trial'), + ('Larry Allen, a Super Bowl champion and famed Dallas Cowboy, has died at age 52'), + ('After saying Charlotte, a lone stingray, was pregnant, aquarium now says she''s sick'), + ('An Epoch Times executive is facing money laundering charge'); + + +-- Seed a vector table with embeddings of article headlines, using OpenAI's API +create virtual table vec_articles using vec0(headline_embeddings float[1536]); + +insert into vec_articles(rowid, headline_embeddings) + select rowid, rembed('text-embedding-3-small', headline) + from articles; + + +.param set :query 'firearm courtroom' + +with matches as ( + select + rowid, + distance + from vec_articles + where headline_embeddings match rembed('text-embedding-3-small', :query) + order by distance + limit 3 +) +select + headline, + distance +from matches +left join articles on articles.rowid = matches.rowid; diff --git a/deps/sqlite3/sqlite-rembed-source/scripts/publish-release.sh b/deps/sqlite3/sqlite-rembed-source/scripts/publish-release.sh new file mode 100755 index 0000000000..0bfecc192d --- /dev/null +++ b/deps/sqlite3/sqlite-rembed-source/scripts/publish-release.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +set -euo pipefail xtrace + +if [[ -n $(git status --porcelain | grep -v VERSION | grep -v sqlite-dist.toml) ]]; then + echo "❌ There are other un-staged changes to the repository besides VERSION and sqlite-dist.toml" + exit 1 +fi + +VERSION="$(cat VERSION)" + +echo "Publishing version v$VERSION..." + +make version +git add --all +git commit -m "v$VERSION" +git tag v$VERSION +git push origin main v$VERSION + +if grep -qE "alpha|beta" VERSION; then + gh release create v$VERSION --title=v$VERSION --prerelease --notes="" +else + gh release create v$VERSION --title=v$VERSION +fi + + +echo "✅ Published! version v$VERSION" diff --git a/deps/sqlite3/sqlite-rembed-source/sqlite-dist.toml b/deps/sqlite3/sqlite-rembed-source/sqlite-dist.toml new file mode 100644 index 0000000000..d3671aacab --- /dev/null +++ b/deps/sqlite3/sqlite-rembed-source/sqlite-dist.toml @@ -0,0 +1,21 @@ +[package] +name = "sqlite-rembed" +license = "MIT OR Apache" +homepage = "https://alexgarcia.xyz/sqlite-rembed" +repo = "https://github.com/asg017/sqlite-rembed" +description = "A SQLite extension for generating text embeddings from remote sources (OpenAI, Cohere, localhost, etc.)" +authors = ["Alex Garcia"] +git_tag_format = "v$VERSION" + +[targets] +github_releases = {} +sqlpkg = {} +spm = {} + +pip = {} +datasette = {} +sqlite_utils = {} + +npm = {} + +gem = { module_name = "SqliteRembed" } diff --git a/deps/sqlite3/sqlite-rembed-source/sqlite-rembed.h b/deps/sqlite3/sqlite-rembed-source/sqlite-rembed.h new file mode 100644 index 0000000000..b47a3f24c6 --- /dev/null +++ b/deps/sqlite3/sqlite-rembed-source/sqlite-rembed.h @@ -0,0 +1,14 @@ +#ifndef _SQLITE_REMBED_H +#define _SQLITE_REMBED_H + +#ifdef __cplusplus +extern "C" { +#endif + +int sqlite3_rembed_init(sqlite3*, char**, const sqlite3_api_routines*); + +#ifdef __cplusplus +} /* end of the 'extern "C"' block */ +#endif + +#endif /* ifndef _SQLITE_REMBED_H */ diff --git a/deps/sqlite3/sqlite-rembed-source/src/clients.rs b/deps/sqlite3/sqlite-rembed-source/src/clients.rs new file mode 100644 index 0000000000..5f83b9a386 --- /dev/null +++ b/deps/sqlite3/sqlite-rembed-source/src/clients.rs @@ -0,0 +1,516 @@ +use sqlite_loadable::{Error, Result}; + +pub(crate) fn try_env_var(key: &str) -> Result { + std::env::var(key) + .map_err(|_| Error::new_message(format!("{} environment variable not define. Alternatively, pass in an API key with rembed_client_options", DEFAULT_OPENAI_API_KEY_ENV))) +} + +#[derive(Clone)] +pub struct OpenAiClient { + model: String, + url: String, + key: String, +} +const DEFAULT_OPENAI_URL: &str = "https://api.openai.com/v1/embeddings"; +const DEFAULT_OPENAI_API_KEY_ENV: &str = "OPENAI_API_KEY"; + +impl OpenAiClient { + pub fn new>( + model: S, + url: Option, + key: Option, + ) -> Result { + Ok(Self { + model: model.into(), + url: url.unwrap_or(DEFAULT_OPENAI_URL.to_owned()), + key: match key { + Some(key) => key, + None => try_env_var(DEFAULT_OPENAI_API_KEY_ENV)?, + }, + }) + } + pub fn infer_single(&self, input: &str) -> Result> { + let body = serde_json::json!({ + "input": input, + "model": self.model + }); + + let data: serde_json::Value = ureq::post(&self.url) + .set("Content-Type", "application/json") + .set("Authorization", format!("Bearer {}", self.key).as_str()) + .send_bytes( + serde_json::to_vec(&body) + .map_err(|error| { + Error::new_message(format!("Error serializing body to JSON: {error}")) + })? + .as_ref(), + ) + .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? + .into_json() + .map_err(|error| { + Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) + })?; + OpenAiClient::parse_single_response(data) + } + + pub fn parse_single_response(value: serde_json::Value) -> Result> { + value + .get("data") + .ok_or_else(|| Error::new_message("expected 'data' key in response body")) + .and_then(|v| { + v.get(0) + .ok_or_else(|| Error::new_message("expected 'data.0' path in response body")) + }) + .and_then(|v| { + v.get("embedding").ok_or_else(|| { + Error::new_message("expected 'data.0.embedding' path in response body") + }) + }) + .and_then(|v| { + v.as_array().ok_or_else(|| { + Error::new_message("expected 'data.0.embedding' path to be an array") + }) + }) + .and_then(|arr| { + arr.iter() + .map(|v| { + v.as_f64() + .ok_or_else(|| { + Error::new_message( + "expected 'data.0.embedding' array to contain floats", + ) + }) + .map(|f| f as f32) + }) + .collect() + }) + } +} + +#[derive(Clone)] +pub struct NomicClient { + model: String, + url: String, + key: String, +} +const DEFAULT_NOMIC_URL: &str = "https://api-atlas.nomic.ai/v1/embedding/text"; +const DEFAULT_NOMIC_API_KEY_ENV: &str = "NOMIC_API_KEY"; + +impl NomicClient { + pub fn new>( + model: S, + url: Option, + key: Option, + ) -> Result { + Ok(Self { + model: model.into(), + url: url.unwrap_or(DEFAULT_NOMIC_URL.to_owned()), + key: match key { + Some(key) => key, + None => try_env_var(DEFAULT_NOMIC_API_KEY_ENV)?, + }, + }) + } + + pub fn infer_single(&self, input: &str, input_type: Option<&str>) -> Result> { + let mut body = serde_json::Map::new(); + body.insert("texts".to_owned(), vec![input.to_owned()].into()); + body.insert("model".to_owned(), self.model.to_owned().into()); + + if let Some(input_type) = input_type { + body.insert("input_type".to_owned(), input_type.to_owned().into()); + } + + let data: serde_json::Value = ureq::post(&self.url) + .set("Content-Type", "application/json") + .set("Authorization", format!("Bearer {}", self.key).as_str()) + .send_bytes( + serde_json::to_vec(&body) + .map_err(|error| { + Error::new_message(format!("Error serializing body to JSON: {error}")) + })? + .as_ref(), + ) + .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? + .into_json() + .map_err(|error| { + Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) + })?; + NomicClient::parse_single_response(data) + } + pub fn parse_single_response(value: serde_json::Value) -> Result> { + value + .get("embeddings") + .ok_or_else(|| Error::new_message("expected 'embeddings' key in response body")) + .and_then(|v| { + v.get(0).ok_or_else(|| { + Error::new_message("expected 'embeddings.0' path in response body") + }) + }) + .and_then(|v| { + v.as_array().ok_or_else(|| { + Error::new_message("expected 'embeddings.0' path to be an array") + }) + }) + .and_then(|arr| { + arr.iter() + .map(|v| { + v.as_f64() + .ok_or_else(|| { + Error::new_message( + "expected 'embeddings.0' array to contain floats", + ) + }) + .map(|f| f as f32) + }) + .collect() + }) + } +} + +#[derive(Clone)] +pub struct CohereClient { + url: String, + model: String, + key: String, +} +const DEFAULT_COHERE_URL: &str = "https://api.cohere.com/v1/embed"; +const DEFAULT_COHERE_API_KEY_ENV: &str = "CO_API_KEY"; + +impl CohereClient { + pub fn new>( + model: S, + url: Option, + key: Option, + ) -> Result { + Ok(Self { + model: model.into(), + url: url.unwrap_or(DEFAULT_COHERE_URL.to_owned()), + key: match key { + Some(key) => key, + None => try_env_var(DEFAULT_COHERE_API_KEY_ENV)?, + }, + }) + } + + pub fn infer_single(&self, input: &str, input_type: Option<&str>) -> Result> { + let mut body = serde_json::Map::new(); + body.insert("texts".to_owned(), vec![input.to_owned()].into()); + body.insert("model".to_owned(), self.model.to_owned().into()); + + if let Some(input_type) = input_type { + body.insert("input_type".to_owned(), input_type.to_owned().into()); + } + + let data: serde_json::Value = ureq::post(&self.url) + .set("Content-Type", "application/json") + .set("Accept", "application/json") + .set("Authorization", format!("Bearer {}", self.key).as_str()) + .send_bytes( + serde_json::to_vec(&body) + .map_err(|error| { + Error::new_message(format!("Error serializing body to JSON: {error}")) + })? + .as_ref(), + ) + .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? + .into_json() + .map_err(|error| { + Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) + })?; + CohereClient::parse_single_response(data) + } + pub fn parse_single_response(value: serde_json::Value) -> Result> { + value + .get("embeddings") + .ok_or_else(|| Error::new_message("expected 'embeddings' key in response body")) + .and_then(|v| { + v.get(0).ok_or_else(|| { + Error::new_message("expected 'embeddings.0' path in response body") + }) + }) + .and_then(|v| { + v.as_array().ok_or_else(|| { + Error::new_message("expected 'embeddings.0' path to be an array") + }) + }) + .and_then(|arr| { + arr.iter() + .map(|v| { + v.as_f64() + .ok_or_else(|| { + Error::new_message( + "expected 'embeddings.0' array to contain floats", + ) + }) + .map(|f| f as f32) + }) + .collect() + }) + } +} +#[derive(Clone)] +pub struct JinaClient { + url: String, + model: String, + key: String, +} +const DEFAULT_JINA_URL: &str = "https://api.jina.ai/v1/embeddings"; +const DEFAULT_JINA_API_KEY_ENV: &str = "JINA_API_KEY"; + +impl JinaClient { + pub fn new>( + model: S, + url: Option, + key: Option, + ) -> Result { + Ok(Self { + model: model.into(), + url: url.unwrap_or(DEFAULT_JINA_URL.to_owned()), + key: match key { + Some(key) => key, + None => try_env_var(DEFAULT_JINA_API_KEY_ENV)?, + }, + }) + } + + pub fn infer_single(&self, input: &str) -> Result> { + let mut body = serde_json::Map::new(); + body.insert("input".to_owned(), vec![input.to_owned()].into()); + body.insert("model".to_owned(), self.model.to_owned().into()); + + let data: serde_json::Value = ureq::post(&self.url) + .set("Content-Type", "application/json") + .set("Accept", "application/json") + .set("Authorization", format!("Bearer {}", self.key).as_str()) + .send_bytes( + serde_json::to_vec(&body) + .map_err(|error| { + Error::new_message(format!("Error serializing body to JSON: {error}")) + })? + .as_ref(), + ) + .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? + .into_json() + .map_err(|error| { + Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) + })?; + JinaClient::parse_single_response(data) + } + pub fn parse_single_response(value: serde_json::Value) -> Result> { + value + .get("data") + .ok_or_else(|| Error::new_message("expected 'data' key in response body")) + .and_then(|v| { + v.get(0) + .ok_or_else(|| Error::new_message("expected 'data.0' path in response body")) + }) + .and_then(|v| { + v.get("embedding").ok_or_else(|| { + Error::new_message("expected 'data.0.embedding' path in response body") + }) + }) + .and_then(|v| { + v.as_array().ok_or_else(|| { + Error::new_message("expected 'data.0.embedding' path to be an array") + }) + }) + .and_then(|arr| { + arr.iter() + .map(|v| { + v.as_f64() + .ok_or_else(|| { + Error::new_message( + "expected 'data.0.embedding' array to contain floats", + ) + }) + .map(|f| f as f32) + }) + .collect() + }) + } +} +#[derive(Clone)] +pub struct MixedbreadClient { + url: String, + model: String, + key: String, +} +const DEFAULT_MIXEDBREAD_URL: &str = "https://api.mixedbread.ai/v1/embeddings/"; +const DEFAULT_MIXEDBREAD_API_KEY_ENV: &str = "MIXEDBREAD_API_KEY"; + +impl MixedbreadClient { + pub fn new>( + model: S, + url: Option, + key: Option, + ) -> Result { + Ok(Self { + model: model.into(), + url: url.unwrap_or(DEFAULT_MIXEDBREAD_URL.to_owned()), + key: match key { + Some(key) => key, + None => try_env_var(DEFAULT_MIXEDBREAD_API_KEY_ENV)?, + }, + }) + } + + pub fn infer_single(&self, input: &str) -> Result> { + let mut body = serde_json::Map::new(); + body.insert("input".to_owned(), vec![input.to_owned()].into()); + body.insert("model".to_owned(), self.model.to_owned().into()); + + let data: serde_json::Value = ureq::post(&self.url) + .set("Content-Type", "application/json") + .set("Accept", "application/json") + .set("Authorization", format!("Bearer {}", self.key).as_str()) + .send_bytes( + serde_json::to_vec(&body) + .map_err(|error| { + Error::new_message(format!("Error serializing body to JSON: {error}")) + })? + .as_ref(), + ) + .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? + .into_json() + .map_err(|error| { + Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) + })?; + JinaClient::parse_single_response(data) + } + pub fn parse_single_response(value: serde_json::Value) -> Result> { + value + .get("data") + .ok_or_else(|| Error::new_message("expected 'data' key in response body")) + .and_then(|v| { + v.get(0) + .ok_or_else(|| Error::new_message("expected 'data.0' path in response body")) + }) + .and_then(|v| { + v.get("embedding").ok_or_else(|| { + Error::new_message("expected 'data.0.embedding' path in response body") + }) + }) + .and_then(|v| { + v.as_array().ok_or_else(|| { + Error::new_message("expected 'data.0.embedding' path to be an array") + }) + }) + .and_then(|arr| { + arr.iter() + .map(|v| { + v.as_f64() + .ok_or_else(|| { + Error::new_message( + "expected 'data.0.embedding' array to contain floats", + ) + }) + .map(|f| f as f32) + }) + .collect() + }) + } +} + +#[derive(Clone)] +pub struct OllamaClient { + url: String, + model: String, +} +const DEFAULT_OLLAMA_URL: &str = "http://localhost:11434/api/embeddings"; +impl OllamaClient { + pub fn new>(model: S, url: Option) -> Self { + Self { + model: model.into(), + url: url.unwrap_or(DEFAULT_OLLAMA_URL.to_owned()), + } + } + + pub fn infer_single(&self, input: &str) -> Result> { + let mut body = serde_json::Map::new(); + body.insert("prompt".to_owned(), input.to_owned().into()); + body.insert("model".to_owned(), self.model.to_owned().into()); + + let data: serde_json::Value = ureq::post(&self.url) + .set("Content-Type", "application/json") + .send_bytes( + serde_json::to_vec(&body) + .map_err(|error| { + Error::new_message(format!("Error serializing body to JSON: {error}")) + })? + .as_ref(), + ) + .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? + .into_json() + .map_err(|error| { + Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) + })?; + OllamaClient::parse_single_response(data) + } + pub fn parse_single_response(value: serde_json::Value) -> Result> { + value + .get("embedding") + .ok_or_else(|| Error::new_message("expected 'embedding' key in response body")) + .and_then(|v| { + v.as_array() + .ok_or_else(|| Error::new_message("expected 'embedding' path to be an array")) + }) + .and_then(|arr| { + arr.iter() + .map(|v| { + v.as_f64() + .ok_or_else(|| { + Error::new_message("expected 'embedding' array to contain floats") + }) + .map(|f| f as f32) + }) + .collect() + }) + } +} + +#[derive(Clone)] +pub struct LlamafileClient { + url: String, +} +const DEFAULT_LLAMAFILE_URL: &str = "http://localhost:8080/embedding"; + +impl LlamafileClient { + pub fn new(url: Option) -> Self { + Self { + url: url.unwrap_or(DEFAULT_LLAMAFILE_URL.to_owned()), + } + } + + pub fn infer_single(&self, input: &str) -> Result> { + let mut body = serde_json::Map::new(); + body.insert("content".to_owned(), input.to_owned().into()); + + let data: serde_json::Value = ureq::post(&self.url) + .set("Content-Type", "application/json") + .send_bytes( + serde_json::to_vec(&body) + .map_err(|error| { + Error::new_message(format!("Error serializing body to JSON: {error}")) + })? + .as_ref(), + ) + .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? + .into_json() + .map_err(|error| { + Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) + })?; + OllamaClient::parse_single_response(data) + } +} + +#[derive(Clone)] +pub enum Client { + OpenAI(OpenAiClient), + Nomic(NomicClient), + Cohere(CohereClient), + Ollama(OllamaClient), + Llamafile(LlamafileClient), + Jina(JinaClient), + Mixedbread(MixedbreadClient), +} diff --git a/deps/sqlite3/sqlite-rembed-source/src/clients_vtab.rs b/deps/sqlite3/sqlite-rembed-source/src/clients_vtab.rs new file mode 100644 index 0000000000..101c95c6f9 --- /dev/null +++ b/deps/sqlite3/sqlite-rembed-source/src/clients_vtab.rs @@ -0,0 +1,184 @@ +use sqlite_loadable::table::UpdateOperation; +use sqlite_loadable::{api, prelude::*, Error}; +use sqlite_loadable::{ + api::ValueType, + table::{IndexInfo, VTab, VTabArguments, VTabCursor, VTabWriteable}, + BestIndexError, Result, +}; +use std::{cell::RefCell, collections::HashMap, marker::PhantomData, mem, os::raw::c_int, rc::Rc}; + +use crate::clients::MixedbreadClient; +use crate::{ + clients::{ + Client, CohereClient, JinaClient, LlamafileClient, NomicClient, OllamaClient, OpenAiClient, + }, + CLIENT_OPTIONS_POINTER_NAME, +}; + +enum Columns { + Name, + Options, +} +fn column(index: i32) -> Option { + match index { + 0 => Some(Columns::Name), + 1 => Some(Columns::Options), + _ => None, + } +} +#[repr(C)] +pub struct ClientsTable { + /// must be first + base: sqlite3_vtab, + clients: Rc>>, +} + +impl<'vtab> VTab<'vtab> for ClientsTable { + type Aux = Rc>>; + type Cursor = ClientsCursor<'vtab>; + + fn create( + db: *mut sqlite3, + aux: Option<&Self::Aux>, + args: VTabArguments, + ) -> Result<(String, Self)> { + Self::connect(db, aux, args) + } + fn connect( + _db: *mut sqlite3, + aux: Option<&Self::Aux>, + _args: VTabArguments, + ) -> Result<(String, ClientsTable)> { + let base: sqlite3_vtab = unsafe { mem::zeroed() }; + let clients = aux.expect("Required aux").to_owned(); + + let vtab = ClientsTable { base, clients }; + let sql = "create table x(name text primary key, options)".to_owned(); + + Ok((sql, vtab)) + } + fn destroy(&self) -> Result<()> { + Ok(()) + } + + fn best_index(&self, mut info: IndexInfo) -> core::result::Result<(), BestIndexError> { + info.set_estimated_cost(10000.0); + info.set_estimated_rows(10000); + info.set_idxnum(1); + Ok(()) + } + + fn open(&'vtab mut self) -> Result> { + ClientsCursor::new(self) + } +} + +impl<'vtab> VTabWriteable<'vtab> for ClientsTable { + fn update(&'vtab mut self, operation: UpdateOperation<'_>, _p_rowid: *mut i64) -> Result<()> { + match operation { + UpdateOperation::Delete(_) => { + return Err(Error::new_message( + "DELETE operations on rembed_clients is not supported yet", + )) + } + UpdateOperation::Update { _values } => { + return Err(Error::new_message( + "DELETE operations on rembed_clients is not supported yet", + )) + } + UpdateOperation::Insert { values, rowid: _ } => { + let name = api::value_text(&values[0])?; + let client = match api::value_type(&values[1]) { + ValueType::Text => match api::value_text(&values[1])? { + "openai" => Client::OpenAI(OpenAiClient::new(name, None, None)?), + "mixedbread" => { + Client::Mixedbread(MixedbreadClient::new(name, None, None)?) + } + "jina" => Client::Jina(JinaClient::new(name, None, None)?), + "nomic" => Client::Nomic(NomicClient::new(name, None, None)?), + "cohere" => Client::Cohere(CohereClient::new(name, None, None)?), + "ollama" => Client::Ollama(OllamaClient::new(name, None)), + "llamafile" => Client::Llamafile(LlamafileClient::new(None)), + text => { + return Err(Error::new_message(format!( + "'{text}' is not a valid rembed client." + ))) + } + }, + ValueType::Null => unsafe { + if let Some(client) = + api::value_pointer::(&values[1], CLIENT_OPTIONS_POINTER_NAME) + { + (*client).clone() + } else { + return Err(Error::new_message("client options required")); + } + }, + _ => return Err(Error::new_message("client options required")), + }; + self.clients.borrow_mut().insert(name.to_owned(), client); + } + } + Ok(()) + } +} + +#[repr(C)] +pub struct ClientsCursor<'vtab> { + /// Base class. Must be first + base: sqlite3_vtab_cursor, + keys: Vec, + rowid: i64, + phantom: PhantomData<&'vtab ClientsTable>, +} +impl ClientsCursor<'_> { + fn new(table: &mut ClientsTable) -> Result { + let base: sqlite3_vtab_cursor = unsafe { mem::zeroed() }; + let c = table.clients.borrow(); + let keys = c.keys().map(|k| k.to_string()).collect(); + let cursor = ClientsCursor { + base, + keys, + rowid: 0, + phantom: PhantomData, + }; + Ok(cursor) + } +} + +impl VTabCursor for ClientsCursor<'_> { + fn filter( + &mut self, + _idx_num: c_int, + _idx_str: Option<&str>, + _values: &[*mut sqlite3_value], + ) -> Result<()> { + Ok(()) + } + + fn next(&mut self) -> Result<()> { + self.rowid += 1; + Ok(()) + } + + fn eof(&self) -> bool { + (self.rowid as usize) >= self.keys.len() + } + + fn column(&self, context: *mut sqlite3_context, i: c_int) -> Result<()> { + let key = self + .keys + .get(self.rowid as usize) + .expect("Internal rembed_clients logic error"); + match column(i) { + Some(Columns::Name) => api::result_text(context, key)?, + Some(Columns::Options) => (), + None => (), + }; + Ok(()) + } + + fn rowid(&self) -> Result { + Ok(self.rowid) + } +} diff --git a/deps/sqlite3/sqlite-rembed-source/src/lib.rs b/deps/sqlite3/sqlite-rembed-source/src/lib.rs new file mode 100644 index 0000000000..192452526e --- /dev/null +++ b/deps/sqlite3/sqlite-rembed-source/src/lib.rs @@ -0,0 +1,169 @@ +mod clients; +mod clients_vtab; + +use std::cell::RefCell; +use std::collections::HashMap; +use std::rc::Rc; + +use clients::{Client, CohereClient, LlamafileClient, NomicClient, OllamaClient, OpenAiClient}; +use clients_vtab::ClientsTable; +use sqlite_loadable::{ + api, define_scalar_function, define_scalar_function_with_aux, define_virtual_table_writeablex, + prelude::*, Error, Result, +}; +use zerocopy::AsBytes; + +const FLOAT32_VECTOR_SUBTYPE: u8 = 223; +const CLIENT_OPTIONS_POINTER_NAME: &[u8] = b"sqlite-rembed-client-options\0"; + +pub fn rembed_version(context: *mut sqlite3_context, _values: &[*mut sqlite3_value]) -> Result<()> { + api::result_text(context, format!("v{}", env!("CARGO_PKG_VERSION")))?; + Ok(()) +} + +pub fn rembed_debug(context: *mut sqlite3_context, _values: &[*mut sqlite3_value]) -> Result<()> { + api::result_text( + context, + format!( + "Version: v{} +Source: {} +", + env!("CARGO_PKG_VERSION"), + env!("GIT_HASH") + ), + )?; + Ok(()) +} + +pub fn rembed_client_options( + context: *mut sqlite3_context, + values: &[*mut sqlite3_value], +) -> Result<()> { + if (values.len() % 2) != 0 { + return Err(Error::new_message( + "Must have an even number of arguments to rembed_client_options, as key/value pairs.", + )); + } + let mut options: HashMap = HashMap::new(); + let mut format: Option = None; + for pair in values.chunks(2) { + let key = api::value_text(&pair[0])?; + let value = api::value_text(&pair[1])?; + if key == "format" { + format = Some(value.to_owned()); + } else { + options.insert(key.to_owned(), value.to_owned()); + } + } + + let format = match format { + Some(format) => format, + None => { + return Err(Error::new_message("'format' key is required.")); + } + }; + let client: Client = match format.as_str() { + "openai" => Client::OpenAI(OpenAiClient::new( + options + .get("model") + .ok_or_else(|| Error::new_message("'model' option is required"))?, + options.get("url").cloned(), + options.get("key").cloned(), + )?), + "nomic" => Client::Nomic(NomicClient::new( + options + .get("model") + .ok_or_else(|| Error::new_message("'model' option is required"))?, + options.get("url").cloned(), + options.get("key").cloned(), + )?), + "cohere" => Client::Cohere(CohereClient::new( + options + .get("model") + .ok_or_else(|| Error::new_message("'model' option is required"))?, + options.get("url").cloned(), + options.get("key").cloned(), + )?), + "ollama" => Client::Ollama(OllamaClient::new( + options + .get("model") + .ok_or_else(|| Error::new_message("'model' option is required"))?, + options.get("url").cloned(), + )), + "llamafile" => Client::Llamafile(LlamafileClient::new(options.get("url").cloned())), + format => return Err(Error::new_message(format!("Unknown format '{format}'"))), + }; + + api::result_pointer(context, CLIENT_OPTIONS_POINTER_NAME, client); + + Ok(()) +} +pub fn rembed( + context: *mut sqlite3_context, + values: &[*mut sqlite3_value], + clients: &Rc>>, +) -> Result<()> { + let client_name = api::value_text(&values[0])?; + let input = api::value_text(&values[1])?; + let x = clients.borrow(); + let client = x.get(client_name).ok_or_else(|| { + Error::new_message(format!( + "Client with name {client_name} was not registered with rembed_clients." + )) + })?; + + let embedding = match client { + Client::OpenAI(client) => client.infer_single(input)?, + Client::Jina(client) => client.infer_single(input)?, + Client::Mixedbread(client) => client.infer_single(input)?, + Client::Ollama(client) => client.infer_single(input)?, + Client::Llamafile(client) => client.infer_single(input)?, + Client::Nomic(client) => { + let input_type = values.get(2).and_then(|v| api::value_text(v).ok()); + client.infer_single(input, input_type)? + } + Client::Cohere(client) => { + let input_type = values.get(2).and_then(|v| api::value_text(v).ok()); + client.infer_single(input, input_type)? + } + }; + + api::result_blob(context, embedding.as_bytes()); + api::result_subtype(context, FLOAT32_VECTOR_SUBTYPE); + Ok(()) +} + +#[sqlite_entrypoint] +pub fn sqlite3_rembed_init(db: *mut sqlite3) -> Result<()> { + let flags = FunctionFlags::UTF8 + | FunctionFlags::DETERMINISTIC + | unsafe { FunctionFlags::from_bits_unchecked(0x001000000) }; + + let c = Rc::new(RefCell::new(HashMap::new())); + + define_scalar_function( + db, + "rembed_version", + 0, + rembed_version, + FunctionFlags::UTF8 | FunctionFlags::DETERMINISTIC, + )?; + define_scalar_function( + db, + "rembed_debug", + 0, + rembed_debug, + FunctionFlags::UTF8 | FunctionFlags::DETERMINISTIC, + )?; + define_scalar_function_with_aux(db, "rembed", 2, rembed, flags, Rc::clone(&c))?; + define_scalar_function_with_aux(db, "rembed", 3, rembed, flags, Rc::clone(&c))?; + define_scalar_function( + db, + "rembed_client_options", + -1, + rembed_client_options, + flags, + )?; + define_virtual_table_writeablex::(db, "rembed_clients", Some(Rc::clone(&c)))?; + Ok(()) +} diff --git a/deps/sqlite3/sqlite-rembed-source/test.sql b/deps/sqlite3/sqlite-rembed-source/test.sql new file mode 100644 index 0000000000..d1e8e85151 --- /dev/null +++ b/deps/sqlite3/sqlite-rembed-source/test.sql @@ -0,0 +1,37 @@ +.load dist/debug/rembed0 +.bail on +.mode box +.header on +.timer on +.echo on + +INSERT INTO temp.rembed_clients(name, options) VALUES + ('text-embedding-3-small','openai'), + ('jina-embeddings-v2-base-en','jina'), + ('mixedbread-ai/mxbai-embed-large-v1','mixedbread'), + ('nomic-embed-text-v1.5', 'nomic'), + ('embed-english-v3.0', 'cohere'), + ('snowflake-arctic-embed:s', 'ollama'), + ('llamafile', 'llamafile'), + ( + 'mxbai-embed-large-v1-f16', + rembed_client_options( + 'format', 'llamafile', + --'url', 'http://mm1:8080/v1/embeddings' + 'url', 'http://mm1:8080/embedding' + ) + ); + +select length(rembed('mixedbread-ai/mxbai-embed-large-v1', 'obama the person')); +.exit +select length(rembed('jina-embeddings-v2-base-en', 'obama the person')); + +.exit + +select length(rembed('text-embedding-3-small', 'obama the person')); +select length(rembed('llamafile', 'obama the person')); +select length(rembed('snowflake-arctic-embed:s', 'obama the person')); +select length(rembed('embed-english-v3.0', 'obama the person', 'search_document')); +select length(rembed('mxbai-embed-large-v1-f16', 'obama the person')); + + diff --git a/doc/SQLite3-Server.md b/doc/SQLite3-Server.md index f9e187c8b3..d346179fba 100644 --- a/doc/SQLite3-Server.md +++ b/doc/SQLite3-Server.md @@ -69,6 +69,39 @@ SELECT rowid, distance FROM vec_data WHERE vector MATCH json('[0.1, 0.2, 0.3,...,0.128]'); ``` +### Embedding Generation (with sqlite-rembed) + +```sql +-- Register an embedding API client +INSERT INTO temp.rembed_clients(name, format, model, key) +VALUES ('openai', 'openai', 'text-embedding-3-small', 'your-api-key'); + +-- Generate text embeddings +SELECT rembed('openai', 'Hello world') as embedding; + +-- Complete AI pipeline: generate embedding and search +CREATE VECTOR TABLE documents (embedding float[1536]); + +INSERT INTO documents(rowid, embedding) +VALUES (1, rembed('openai', 'First document text')); + +INSERT INTO documents(rowid, embedding) +VALUES (2, rembed('openai', 'Second document text')); + +-- Search for similar documents +SELECT rowid, distance FROM documents +WHERE embedding MATCH rembed('openai', 'Search query'); +``` + +#### Supported Embedding Providers +- **OpenAI**: `format='openai', model='text-embedding-3-small'` +- **Ollama** (local): `format='ollama', model='nomic-embed-text'` +- **Cohere**: `format='cohere', model='embed-english-v3.0'` +- **Nomic**: `format='nomic', model='nomic-embed-text-v1.5'` +- **Llamafile** (local): `format='llamafile'` + +See [sqlite-rembed integration documentation](./sqlite-rembed-integration.md) for full details. + ### Available Databases ```sql @@ -87,9 +120,11 @@ SHOW DATABASES; 1. **Data Analysis**: Store and analyze temporary data 2. **Vector Search**: Perform similarity searches with sqlite-vec -3. **Testing**: Test SQLite features with MySQL clients -4. **Prototyping**: Quick data storage and retrieval -5. **Custom Applications**: Build applications using SQLite with MySQL tools +3. **Embedding Generation**: Create text embeddings with sqlite-rembed (OpenAI, Ollama, Cohere, etc.) +4. **AI Pipelines**: Complete RAG workflows: embedding generation → vector storage → similarity search +5. **Testing**: Test SQLite features with MySQL clients +6. **Prototyping**: Quick data storage and retrieval +7. **Custom Applications**: Build applications using SQLite with MySQL tools ## Limitations diff --git a/doc/sqlite-rembed-integration.md b/doc/sqlite-rembed-integration.md new file mode 100644 index 0000000000..d05a51e539 --- /dev/null +++ b/doc/sqlite-rembed-integration.md @@ -0,0 +1,235 @@ +# sqlite-rembed Integration into ProxySQL + +## Overview + +This document describes the integration of the `sqlite-rembed` Rust SQLite extension into ProxySQL, enabling text embedding generation from remote AI APIs (OpenAI, Nomic, Ollama, Cohere, etc.) directly within ProxySQL's SQLite3 Server. + +## What is sqlite-rembed? + +`sqlite-rembed` is a Rust-based SQLite extension that provides: +- `rembed()` function for generating text embeddings via HTTP requests +- `temp.rembed_clients` virtual table for managing embedding API clients +- Support for multiple embedding providers: OpenAI, Nomic, Cohere, Ollama, Llamafile +- Automatic handling of API authentication, request formatting, and response parsing + +## Integration Architecture + +The integration follows the same pattern as `sqlite-vec` (vector search extension): + +### Static Linking Approach +1. **Rust static library**: `libsqlite_rembed.a` built from Rust source +2. **Build system integration**: Makefile targets for Rust compilation +3. **Auto-registration**: `sqlite3_auto_extension()` in ProxySQL initialization +4. **Single binary deployment**: No external dependencies at runtime + +### Technical Implementation + +``` +ProxySQL Binary +├── C++ Core (libproxysql.a) +├── SQLite3 (sqlite3.o) +├── sqlite-vec (vec.o) +└── sqlite-rembed (libsqlite_rembed.a) ← Rust static library +``` + +## Build Requirements + +### Rust Toolchain +```bash +# Required for building sqlite-rembed +rustc --version +cargo --version + +# Development dependencies +clang +libclang-dev +``` + +### Build Process +1. Rust toolchain detection in `deps/Makefile` +2. Static library build with `cargo build --release --features=sqlite-loadable/static --lib` +3. Linking into `libproxysql.a` via `lib/Makefile` +4. Final binary linking via `src/Makefile` + +## Code Changes Summary + +### 1. `deps/Makefile` +- Added Rust toolchain detection (`rustc`, `cargo`) +- SQLite environment variables for sqlite-rembed build +- New target: `sqlite3/libsqlite_rembed.a` +- Added dependency to `sqlite3` target + +### 2. `lib/Makefile` +- Added `SQLITE_REMBED_LIB` variable pointing to static library +- Library included in `libproxysql.a` dependencies (via src/Makefile) + +### 3. `src/Makefile` +- Added `SQLITE_REMBED_LIB` variable +- Added `$(SQLITE_REMBED_LIB)` to `LIBPROXYSQLAR` dependencies + +### 4. `lib/Admin_Bootstrap.cpp` +- Added `extern "C" int sqlite3_rembed_init(...)` declaration +- Added `sqlite3_auto_extension((void(*)(void))sqlite3_rembed_init)` registration +- Registered after `sqlite-vec` initialization + +## Usage Examples + +### Basic Embedding Generation +```sql +-- Register an OpenAI client +INSERT INTO temp.rembed_clients(name, format, model, key) +VALUES ('openai_client', 'openai', 'text-embedding-3-small', 'your-api-key'); + +-- Generate embedding +SELECT rembed('openai_client', 'Hello world') as embedding; + +-- Use with vector search +CREATE VECTOR TABLE docs (embedding float[1536]); +INSERT INTO docs(rowid, embedding) +VALUES (1, rembed('openai_client', 'Document text here')); + +-- Search similar documents +SELECT rowid, distance FROM docs +WHERE embedding MATCH rembed('openai_client', 'Query text'); +``` + +### Multiple API Providers +```sql +-- OpenAI +INSERT INTO temp.rembed_clients(name, format, model, key, url) +VALUES ('gpt', 'openai', 'text-embedding-3-small', 'sk-...'); + +-- Ollama (local) +INSERT INTO temp.rembed_clients(name, format, model, url) +VALUES ('ollama', 'ollama', 'nomic-embed-text', 'http://localhost:11434'); + +-- Cohere +INSERT INTO temp.rembed_clients(name, format, model, key) +VALUES ('cohere', 'cohere', 'embed-english-v3.0', 'co-...'); + +-- Nomic +INSERT INTO temp.rembed_clients(name, format, model, key) +VALUES ('nomic', 'nomic', 'nomic-embed-text-v1.5', 'nm-...'); +``` + +## Configuration + +### Environment Variables (for building) +```bash +export SQLITE3_INCLUDE_DIR=/path/to/sqlite-amalgamation +export SQLITE3_LIB_DIR=/path/to/sqlite-amalgamation +export SQLITE3_STATIC=1 +``` + +### Runtime Configuration +- API keys: Set via `temp.rembed_clients` table +- Timeouts: Handled by underlying HTTP client (ureq) +- Model selection: Per-client configuration + +## Error Handling + +The extension provides SQLite error messages for: +- Missing client registration +- API authentication failures +- Network connectivity issues +- Invalid input parameters +- Provider-specific errors + +## Performance Considerations + +### HTTP Latency +- Embedding generation involves HTTP requests to remote APIs +- Consider local embedding models (Ollama, Llamafile) for lower latency +- Batch processing not currently supported (single text inputs only) + +### Caching +- No built-in caching layer +- Applications should cache embeddings when appropriate +- Consider database-level caching with materialized views + +## Limitations + +### Current Implementation +1. **Blocking HTTP requests**: Synchronous HTTP calls may block SQLite threads +2. **Single text input**: `rembed()` accepts single text string, not batches +3. **No async support**: HTTP requests are synchronous +4. **Rust dependency**: Requires Rust toolchain for building ProxySQL + +### Security Considerations +- API keys stored in `temp.rembed_clients` table (in-memory, per-connection) +- Network access required for remote APIs +- No encryption of API keys in transit (use HTTPS endpoints) + +## Testing + +### Build Verification +```bash +# Verify Rust library builds +cd deps && make sqlite3 + +# Verify symbol exists +nm deps/sqlite3/libsqlite_rembed.a | grep sqlite3_rembed_init + +# Test compilation (without ClickHouse) +make PROXYSQLCLICKHOUSE=0 +``` + +### Functional Testing +```sql +-- Test extension registration +SELECT rembed_version(); +SELECT rembed_debug(); + +-- Test client registration +INSERT INTO temp.rembed_clients(name, format, model) +VALUES ('test', 'ollama', 'nomic-embed-text'); + +-- Test embedding generation (requires running Ollama) +-- SELECT rembed('test', 'test text'); +``` + +## Future Enhancements + +### Planned Improvements +1. **Async HTTP**: Non-blocking requests using async Rust +2. **Batch processing**: Support for multiple texts in single call +3. **Embedding caching**: LRU cache for frequently generated embeddings +4. **More providers**: Additional embedding API support +5. **Configuration persistence**: Save clients across connections + +### Integration with sqlite-vec +- Complete AI pipeline: `rembed()` → vector storage → `vec_search()` +- Example: Document embedding and similarity search +- Potential for RAG (Retrieval-Augmented Generation) applications + +## Troubleshooting + +### Build Issues +1. **Missing clang**: Install `clang` and `libclang-dev` +2. **Rust not found**: Install Rust toolchain via `rustup` +3. **SQLite headers**: Ensure `sqlite-amalgamation` is extracted +4. **ClickHouse errors**: Build with `PROXYSQLCLICKHOUSE=0` + +### Runtime Issues +1. **Client not found**: Verify `temp.rembed_clients` entry exists +2. **API errors**: Check API keys, network connectivity, model availability +3. **Memory issues**: Large embeddings may exceed SQLite blob limits + +## References + +- [sqlite-rembed GitHub](https://github.com/asg017/sqlite-rembed) +- [sqlite-vec Documentation](../doc/SQLite3-Server.md) +- [SQLite Loadable Extensions](https://www.sqlite.org/loadext.html) +- [Rust C FFI](https://doc.rust-lang.org/nomicon/ffi.html) + +## Maintainers + +- Integration: [Your Name/Team] +- Original sqlite-rembed: [Alex Garcia (@asg017)](https://github.com/asg017) +- ProxySQL Team: [ProxySQL Maintainers](https://github.com/sysown/proxysql) + +## License + +- sqlite-rembed: Apache 2.0 / MIT (see `deps/sqlite3/sqlite-rembed-source/LICENSE-*`) +- ProxySQL: GPL v3 +- Integration code: Same as ProxySQL \ No newline at end of file diff --git a/lib/Admin_Bootstrap.cpp b/lib/Admin_Bootstrap.cpp index 3acf7715f5..92271f3fdf 100644 --- a/lib/Admin_Bootstrap.cpp +++ b/lib/Admin_Bootstrap.cpp @@ -93,6 +93,7 @@ using json = nlohmann::json; * @see https://github.com/asg017/sqlite-vec for sqlite-vec documentation */ extern "C" int sqlite3_vec_init(sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi); +extern "C" int sqlite3_rembed_init(sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi); #include "microhttpd.h" #if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || defined(__mips__)) && defined(__linux) @@ -609,6 +610,7 @@ bool ProxySQL_Admin::init(const bootstrap_info_t& bootstrap_info) { * for SQLite's auto-extension mechanism. */ sqlite3_auto_extension( (void(*)(void))sqlite3_vec_init); + sqlite3_auto_extension( (void(*)(void))sqlite3_rembed_init); /** * @brief Open the stats database with shared cache mode diff --git a/lib/Makefile b/lib/Makefile index db03b04009..3229254228 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -6,6 +6,7 @@ PROXYSQL_PATH := $(shell while [ ! -f ./src/proxysql_global.cpp ]; do cd ..; don include $(PROXYSQL_PATH)/include/makefiles_vars.mk include $(PROXYSQL_PATH)/include/makefiles_paths.mk +SQLITE_REMBED_LIB := $(SQLITE3_LDIR)/../libsqlite_rembed.a IDIRS := -I$(PROXYSQL_IDIR) \ -I$(JEMALLOC_IDIR) \ diff --git a/src/Makefile b/src/Makefile index d4b3fe8373..71412f1e18 100644 --- a/src/Makefile +++ b/src/Makefile @@ -130,6 +130,7 @@ ifeq ($(CENTOSVER),6) MYLIBS += -lgcrypt endif +SQLITE_REMBED_LIB := $(DEPS_PATH)/sqlite3/libsqlite_rembed.a LIBPROXYSQLAR := $(PROXYSQL_LDIR)/libproxysql.a ifeq ($(UNAME_S),Darwin) LIBPROXYSQLAR += $(JEMALLOC_LDIR)/libjemalloc.a @@ -145,7 +146,7 @@ ifeq ($(UNAME_S),Darwin) LIBPROXYSQLAR += $(LIBINJECTION_LDIR)/libinjection.a LIBPROXYSQLAR += $(EV_LDIR)/libev.a endif -LIBPROXYSQLAR += $(CITYHASH_LDIR)/libcityhash.a +LIBPROXYSQLAR += $(CITYHASH_LDIR)/libcityhash.a $(SQLITE_REMBED_LIB) ODIR := obj From 9f30d85e10c5db24c041cb50584fdb856479e26e Mon Sep 17 00:00:00 2001 From: Rene Cannao Date: Mon, 22 Dec 2025 19:52:18 +0000 Subject: [PATCH 2/5] Add tar.gz packaging for sqlite-rembed dependency - Download official sqlite-rembed-0.0.1-alpha.9.tar.gz from GitHub - Update deps/Makefile to extract from tar.gz instead of using local source - Add tar.gz to git repository, remove sqlite-rembed-source from git cache - Update documentation to remove ClickHouse troubleshooting reference - Clean targets now remove sqlite-rembed-*/ and sqlite-rembed-source/ Following the pattern of other ProxySQL dependencies, the Rust extension is now packaged as a compressed tarball that gets extracted during build. --- deps/Makefile | 10 +- .../sqlite-rembed-0.0.1-alpha.9.tar.gz | Bin 0 -> 16824 bytes .../.github/workflows/release.yaml | 122 --- .../.github/workflows/test.yaml | 60 -- deps/sqlite3/sqlite-rembed-source/.gitignore | 3 - deps/sqlite3/sqlite-rembed-source/Cargo.lock | 847 ------------------ deps/sqlite3/sqlite-rembed-source/Cargo.toml | 14 - .../sqlite-rembed-source/LICENSE-APACHE | 201 ----- deps/sqlite3/sqlite-rembed-source/LICENSE-MIT | 21 - deps/sqlite3/sqlite-rembed-source/Makefile | 141 --- deps/sqlite3/sqlite-rembed-source/README.md | 134 --- deps/sqlite3/sqlite-rembed-source/VERSION | 1 - deps/sqlite3/sqlite-rembed-source/build.rs | 9 - .../examples/simple-search/demo.sql | 48 - .../scripts/publish-release.sh | 27 - .../sqlite-rembed-source/sqlite-dist.toml | 21 - .../sqlite-rembed-source/sqlite-rembed.h | 14 - .../sqlite-rembed-source/src/clients.rs | 516 ----------- .../sqlite-rembed-source/src/clients_vtab.rs | 184 ---- deps/sqlite3/sqlite-rembed-source/src/lib.rs | 169 ---- deps/sqlite3/sqlite-rembed-source/test.sql | 37 - doc/sqlite-rembed-integration.md | 6 +- 22 files changed, 8 insertions(+), 2577 deletions(-) create mode 100644 deps/sqlite3/sqlite-rembed-0.0.1-alpha.9.tar.gz delete mode 100644 deps/sqlite3/sqlite-rembed-source/.github/workflows/release.yaml delete mode 100644 deps/sqlite3/sqlite-rembed-source/.github/workflows/test.yaml delete mode 100644 deps/sqlite3/sqlite-rembed-source/.gitignore delete mode 100644 deps/sqlite3/sqlite-rembed-source/Cargo.lock delete mode 100644 deps/sqlite3/sqlite-rembed-source/Cargo.toml delete mode 100644 deps/sqlite3/sqlite-rembed-source/LICENSE-APACHE delete mode 100644 deps/sqlite3/sqlite-rembed-source/LICENSE-MIT delete mode 100644 deps/sqlite3/sqlite-rembed-source/Makefile delete mode 100644 deps/sqlite3/sqlite-rembed-source/README.md delete mode 100644 deps/sqlite3/sqlite-rembed-source/VERSION delete mode 100644 deps/sqlite3/sqlite-rembed-source/build.rs delete mode 100644 deps/sqlite3/sqlite-rembed-source/examples/simple-search/demo.sql delete mode 100755 deps/sqlite3/sqlite-rembed-source/scripts/publish-release.sh delete mode 100644 deps/sqlite3/sqlite-rembed-source/sqlite-dist.toml delete mode 100644 deps/sqlite3/sqlite-rembed-source/sqlite-rembed.h delete mode 100644 deps/sqlite3/sqlite-rembed-source/src/clients.rs delete mode 100644 deps/sqlite3/sqlite-rembed-source/src/clients_vtab.rs delete mode 100644 deps/sqlite3/sqlite-rembed-source/src/lib.rs delete mode 100644 deps/sqlite3/sqlite-rembed-source/test.sql diff --git a/deps/Makefile b/deps/Makefile index d88e48642a..560db98f1d 100644 --- a/deps/Makefile +++ b/deps/Makefile @@ -15,8 +15,8 @@ $(error "cargo not found. Please install Rust toolchain") endif # SQLite environment variables for sqlite-rembed build -export SQLITE3_INCLUDE_DIR=$(shell pwd)/sqlite3/sqlite-amalgamation-3500400 -export SQLITE3_LIB_DIR=$(shell pwd)/sqlite3/sqlite-amalgamation-3500400 +export SQLITE3_INCLUDE_DIR=$(shell pwd)/sqlite3/sqlite3 +export SQLITE3_LIB_DIR=$(shell pwd)/sqlite3/sqlite3 export SQLITE3_STATIC=1 @@ -265,7 +265,10 @@ sqlite3/sqlite3/vec.o: sqlite3/sqlite3/sqlite3.o cd sqlite3/sqlite3 && cp ../sqlite-vec-source/sqlite-vec.c . && cp ../sqlite-vec-source/sqlite-vec.h . cd sqlite3/sqlite3 && ${CC} ${MYCFLAGS} -fPIC -c -o vec.o sqlite-vec.c -DSQLITE_CORE -DSQLITE_VEC_STATIC -DSQLITE_ENABLE_MEMORY_MANAGEMENT -DSQLITE_ENABLE_JSON1 -DSQLITE_DLL=1 -sqlite3/libsqlite_rembed.a: sqlite3/sqlite-rembed-source/Cargo.toml $(shell find sqlite3/sqlite-rembed-source -type f -name '*.rs') +sqlite3/libsqlite_rembed.a: sqlite3/sqlite-rembed-0.0.1-alpha.9.tar.gz + cd sqlite3 && rm -rf sqlite-rembed-*/ sqlite-rembed-source/ || true + cd sqlite3 && tar -zxf sqlite-rembed-0.0.1-alpha.9.tar.gz + mv sqlite3/sqlite-rembed-0.0.1-alpha.9 sqlite3/sqlite-rembed-source cd sqlite3/sqlite-rembed-source && SQLITE3_INCLUDE_DIR=$(SQLITE3_INCLUDE_DIR) SQLITE3_LIB_DIR=$(SQLITE3_LIB_DIR) SQLITE3_STATIC=1 $(CARGO) build --release --features=sqlite-loadable/static --lib cp sqlite3/sqlite-rembed-source/target/release/libsqlite_rembed.a sqlite3/libsqlite_rembed.a @@ -361,6 +364,7 @@ cleanpart: cd mariadb-client-library && rm -rf mariadb-connector-c-*/ || true cd jemalloc && rm -rf jemalloc-*/ || true cd sqlite3 && rm -rf sqlite-amalgamation-*/ || true + cd sqlite3 && rm -rf libsqlite_rembed.a sqlite-rembed-source/ sqlite-rembed-*/ || true cd postgresql && rm -rf postgresql-*/ || true cd postgresql && rm -rf postgres-*/ || true .PHONY: cleanpart diff --git a/deps/sqlite3/sqlite-rembed-0.0.1-alpha.9.tar.gz b/deps/sqlite3/sqlite-rembed-0.0.1-alpha.9.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..b3d9ebfe838ec33db8acfd78c9cd55ba9dbcdcb5 GIT binary patch literal 16824 zcmV(!K;^$5iwFP!000001MFLClN;5U=JWiDu7MA`EYh6&#VM+wHm-!tVkjV+O0ksc zTt|u}wdiiyGbH)%`y5@yl14qYp>4y)ZOlmBXIkgIJ@50pEw9|odiw z_=Dh3N^O4PIQ@x>;sYuaCzLVHG|p2YDf{614eaIKv2Hup;3J#5?jBnAvg@bD{YDPS zs!%T$Y1w@$L()>%NP&f3sL*v5FE>*ym;4pet!&iv_H;gT^coM;Zq!u zF&Wd5o3EyBY_Ep;j8YF;%l`Vu>y!GILhBFYP#bvY_!qDLhvGlJo^{i;|HXOmGvcoa ze;@z1alA_WpV!UPJg=X(KQj|vKmLpoE#AlfEgY{G|0d0oYt#7FE#|NO8C(7*`r@yp z=BM%Jf@1xfGyBEyFCPD;Tck(TwU4UrS8G3?wbK-T zxLBoTF>Bjdy==>xP%k?$NGxigAuj$_`^{;;p3UQEK3lGDMmI)Hlo+KOEG)mby>+u* zwj;c$)_%S0)+2@l6q_q;msag|QB{qqbz8RJLRUWWDoj&&TCcm$o{75^xYeU-;hv^y zUc2c0yd?F;ik_RSS7qVIHQg-Z+>3iv)pO+iqq}!iwRO^Qshl_K;FneoH>u*R?e=AJ z_3@ok7B1B7%bc=ZMCotCA?HncJ%>2ggz3vY@*7E!9-Cg(CC_Ke2+)05mh6`?LhaA< z%h8Ha(_VFTJr7eiTYmP8_F8oa%1?uX6K6NoOy3z&Z1zS{|p$Dv+ax%L`@@M^b< z!y0~J&DFC*>Uy+zztQ#b=@P4V8F#&0k3sy#H$PSXd$5hF`bUM+)lL2{>~b_}yJof; zVHc}G6v3;a-m4jk|gQqervlYTa$!g?Fg8{o9-J{#j~@d#jKB zEx4}wpU2;P`^8sJ{=MIm*V8?HtG2jWef0OgS37?;DNjmJtAGCU#P03qZI?~%{QL3Q zRqblgcjd?V>gjdQ#SNUa>qR^6Zo0l-zYE*KEoT`RJpNk?>(8y8Ezx-9^Km=9yO@^G z4)ga9f1G^x)t??ed35N5u6~-9aKN1lw^{zh_HNa(j_K|e$F26hQ+0W8u}Rn1dviNNzT@IP-S?-*%hlp!p<{T zadlj^(KRSV6#YVpP2ykwG|U}uhPvMl-3Pke=}k@&o2#b50hk@3C-Hy?$V&Ve25>F>o3xPk6C{}bKO*VHc!=Txv#^73j19Z>!Y;CS(ptc zRF@x>^@oWS0Z_eV@c7HgAHVwcyUFKYe=+&f-Oc9h!P*la ze)pFzo;;rX`P;|eJo)^~$B(MXWP>A<$-OmSfBx;afBEX0-yeJV>%YRg#lv@*^{%D= z=6JpNPvOL`IsEzHrSl)Is5+hhFfHW!`OjN8cJrU_5ZL|jry$HW^L^u%Xxaz*U;(}N zYn=YPc?nxqbhEHq@!=U11M0zx)pEU9-TrR-%jLvqe}{KJhkC2?r*{>St99F4&FkRi zSM{p9+I1@q_2|=03Rix%+?}jOi`NR^uCIOi>YwoLCv9(c!uHN$Ug9vV7s)s2`9V-# zH|k)QxPyB-YHwltVzhGMsk=_?gW2#JpZzBD;T2~D|1al@zgBR5y;;e}?|>4%jQ`E9 zmvxhV*=M|j|CPsm_n!X?#oqJ3w{Tp+`s>tPj3IX}Hd*-V&?WDWw{tvH|Nn{9sK+2W zPk-5GJY4^hD{`j(DSu!8Z{hf``ffUFchje~a_g>MlvawKd0TyUotCL_U5b^zt+wfK z@nQ8vSC%eo{3yyV;&Pc_E!=YL=2eR58q4eP#eQ)8X~i!tzW;v5rGEI~Vl#mLw7PV0 zo|?;pb(a+R$1X40dfkN0lKoutKh8WCXx62FyXEy&D0^(jvwDPnn{F;I_LJsvi_8?L ziy|Tz?P(&^M;@)oCdz1?$-#R=WilZ}FKEso_~eDp(sIXTPM1BKpVp&LH?Ez9rzbYh zF(23llN5*%?t1`x0*d*GSWZf>yvo$F&Wrs z5`-6#_{gNk53n6W17gOgu&|JXM7<7{dP*1zI2{ZLD*G7W7UfA#PI}iSB~D|8pbflz zjT~cAWU82?&TvDWqbv!Mco30gC6jzuApnwPu*?z}0N+{U7^sN^bwLS`fg0DFSZDUn#&Ynw z4p|`Cm9s)*t#mdCIUhZtMrtG-4@PD*A0}PEcF}JVQ4v?QiylQz`}PmuvQ7*V?YS^+PpObZ(Y#9 zG%5dtg+$F{QpL%c!mrE)baxJDrB*Rf{w+(+1)&l!3-xS5COr3TeN)3HkTkg8Rd#_R z4Xp71ey*8iL~3j^NEH;JoxTX^am zLRQ+wp}hd$F_S~rQF7E!@L#2}rfA+yMyj2(g`-iI723TaSd%>=yZ|y#fO$p%k znPurrA*h1u7^TZ13Tp&h6f;mMP}DL9&=W2sC~UTpX(gGWL7sJ2-GT-*lY;y012#5+ zv+T-~yQ3lygBO9b*fm(3h|zf~ctVQ?TSq2HNGt1@mIi4LmgK3W&ZeY{iHRr16Ij&w zoY)~yuXzdEyw~z58yuo|49+q^wB#XMVu^`52?szXNOq!-+ypu9!EwEW@(5*OjD8Kq zh*~#&J@0_q9p|7OTpkwktrT)h1go)8PpNc*#gLNLjDg6ajYCI7^!1Q6dO5f`I>Lq>lQF+0yAREj8t76|tq zpp1rU!RVo0Oi{)pNE4-{#GrVIcW2#js2w-6{4I&^F*KL&4YPY)l@Fb>GD`u|LXI?O z!)#)Xa+?Dric(V-re|ilnV4Ug8yH6cn5Qfe>L} z?|GiIO4#7C8*PkuumfMGu5n0%#r?U+&}}$iw>m&0P*S)aF*aE1vJpWUP{rtYfmD9oQvOoXc%TWSiE6IM*~(&l@?Oc6l@;?7oqE$`uXWZ0X<~!0bS{Nngqcenm;YG8^Nto zLOPP2L91sFQV=7ekX@Iw$;mLyjIj_W#`Ib6VvcoU!k7+?o@jGIX)ZV#ymvyY7(9cx zi@?wbed@WD5>gyn=E%vQL3N`=W)fF=<@p}J%d(#2Do@Q%43mR*g~UNrr0_s|&A?Z^ zaFA9C7(#81B1N!W(0FC6r|9%VIj9oNfvFI>AirnrdZlLYqH!M29q8Z`#yak#&mdnC z!AZ$;t~_(v0|*oa-UoDkZ#9IBE^}ln&?Ge1SmB*aq9^@kap~^GXK?AuMTF~xg0?`( zfL$3VC!ur1)}XnZ$Tq1MBSHvh%M#U3m>s@>Cgo@dDVpqn zm-f88+>fz0?ClN&`;F~tcaO5rp+R0)$YClBL^R3~e*i5okK!bnxD%daZ6J+Yz$w8g z($^#rGJ_CRR`!AX;mq_@1`VC6U|HTp#Hjl}-T8vewB^N`(YoWCf^}mLP4wg@eQ@U^|*^24%<$ zt&AF>L;vpglb2%~%;3dN0%*E5fH4m$9vyQgV$Udk^j%;am-r8YQ#xLrJz5?$RU$jd zf|h}vFZKgYx9>{Z?{qxSnYLdVZQS#FBQ%1t8T=WaHXz{8Up|1*g1m+lD5Muf?MRJg zYm-Ga36jf#kV%p@0S}M1fPB zSmQMj$T>%e%n5RYLzNN7k&=B`w|n?{ObqnkgBul$641F(bAomW85k`Ae~v_W$xzZR zSjSQdQhI@fAa^Va0J939NVISKTC^eB=xCm9%2x&M7yr;^0F;WhS3Yvc0iH9@DFA3? zLUmZLT<}^kr6}}+1(Qvjp}5LZ0Gfeh516r+xW^{w;T1x8iUo*D;AlZ9FUvzzZ5ajM zF=R6wEDwl4tg&zElVhAP#%%z8GQak*_5Zn}6p>!56 zt<0876C?!yI7q0#1Uitk!8wX-1@|suBge=RqOZMWJ?VHf^*uB~jn(jz6lkGBr9HGm z;avhjLX%99B$6}p=s%2dN3-=&5S4%%N=oPnmkp7w?@{HMr_-m(b!d#EP!BppkQAND zFvJH#h%wqbFO!DM5hhrPf(nlK&1j74?5zT4Q<~Vm?s9J`bd1%*PeBp~JI;~_1=i&x zFxZD22q%&!9-QLf2ohoroB=}5s{qbf2wZkRJHz|FsC4+nfK!rhhVJw!K*1?znL_eV z;w36TWttc|4zL=84Vs3kWVuhUj41T_94I3U3c$vP#T;OI+4Td!_8SlPMz3mU5|^D; zu4q*-f*_NqW+>YE&(2WfK zL<{;$!;Bo0g{UFMYKaC*!Nw@sE;lk5D@tGW5JE%>W(){9bBsvioe1YU)#2CWJ~@W? zja{SNR}bC6srGi+xto_b1b^ZY!d()m{Ra>u-y7j~=Ow1k0bfk}yhpaW*1-mS3I=?oYGiAf?-ml;C| zC_`d|pP(|)^sFdMCd)nv-25MVSKHjSt))L#zk;{vjN@HPCP)w#DC0X&3) zYLVp*A)A1@D;PqcPn9t-O{^$~WF1~9O)ZiTt(<`IVmOI4v0a~kmDA{rG;F>~urSd+ z8TAI$T-1~ZcJ}`3N{z~`f%D^~yLraJ7VG?*Q21Eec;N~EA&3M9* z3~9vOLOGTgVcaHgLvQab>t_lD-FdPZTa&>b!wRM1so_ed2}g(lxG|>4WvXIfd1jr~ zDFP_0G|xB#6f%IavEN?YS5}RM+P$!*xm3i_OsjvDaQn+?c(VIa1?{=>GL^_VjMN6% zwMGuYGOMISS_?lF7n0?OS*cAd(}^v3p2ZFbp5;7E*3a%ry1Sv{Ke=wq)Pz8u%Q z_{nKpFRkHPPpi_!pWS%a`+3VM<4Tu{HXwIDpN?b_-d#dvQe={h8CnLhMPgzHq?N@! zGnofcvy86nd6#*E;AmEt- ziX6Lw+*hoT0AbXHx5y6QCvY`M;({?iwqjE?;KYsXD#lFWU58njWI&M|BunOG;<))osLFlei4pE9kA_)k zN9)!?BoZkQaAuan>Wml2*J8nuE@0rAo75?4?UB1urQL?2jm^c-fJyM-aYu-|I6$8r zTC*x5cfBlYd1)9>(`qx`u2UJS@MmLXvsh@B0V5?aG=`in0k5+ZkuPRR zCKS?F+NI)XM_yqyEtn^{itoDvqSm_wqY6J42VNTMq;V^CKTWs8rVU*zvvAiAs}ON2 zRZ1(DnM#Drtd=@9VQ%`V5CbV15_zfH5>U~HDb#}LDtYvxfGLGB1-;^nrsR^rM!Xhq$ zdh1<+G=?j|IY%ny;yi~NNYeXE_BD`xW3K?st_u(oz%b5K zVjZAYOdAy8;L@xlIK|SGhb)jV!NX)y@*JTd27Hs*#=0>z>b2=xbKAyP?QU4| zEW2L>N>a;Hn5NGQ0m6~)$6m*rr3nT~OLrJ}f;#;L960O7IE^`@nhX>V|Q zut*1E43i0F zW8@M(*|cSNp-^t6&Ue=(JP6W7MirwHIgKalN^m8%6arMn1=F;#f<~1f5Xb^ap_33P z_s$adzGD!j?^uZnyIydl`ZZlZ6ka)ie?Y*@Xo1Loae|4IhwE@5aG#2M1vPah z_RekGB)Pssab@*%=QH}DmO2--qb1I5f&4NS9+4~&X`V@{ta1c*Wey1;cuS+1M6SRY zmBeiD^R)-qXg%|JqUFqO=T3q)Pa^Tm;kDC@LDy1^)GW(s7cQ0PNGI}GX0b<#=TpU0 zo*8JHMV`au#yzRIunkk$S7z@4noLTEDPF5$yg3V>r*IN z8ao3e%VP$kFEEPr@V5L)00rG@yGnCk&%BIcKF9Z~=5;q^eAnZ$%d%~ozNfOqyHe|i zMy6n_NaF+`PRk6DP`DflB8&onhGiGR8=7k?c*qr)(rJcx!xT3OV#|h#_vIZiVN)a3 zQ6K!OohD&d#**#Bnf)tWZ%+UsCYQr$do*@8J@;wnHE$+IBI^S9RjKB14LMZNxTFB4 zHGse@r~UVE0GYHRQxbqc0Ld44if>Ff79_v4+Od0VgLvptu3SR3d9-&y0nTwm7i|@A zT%Ou2&b7`t!X!@tTMOFQ3Y-wr#^iW?vb@F!HVH6%QlU9Y{s0>@smdkc$WT-g%N%$V zxGZtjB?38%Fd|I$Q-y+s9O2^ggmNlnxl~?zvpzqmHeQD6@nN;?Z`CdIwk%0k-&uL^ z4SxoLMf%0Pv9x!-$dLOMG~bVq3zVT9pm~u!@NAX#LrYGw1bK`^c(?R12<9Avg?pVO9v(LffLS>h zcSht5CT?0?E9xIb`J+Xr{?H(m&}8 zC1!>^HoRDoz#%8}ajrC7k6`Xy=f2!OM0A9%0feT@?ALmGW zf9%U>MfHW(3+P_Ie{!Xwg9iD z$OPks$V5)xY)AkCw)po&{dw?#ZGOJT`#r)i8JnhV^4qh=YC z_Aa3;`NN0Tu?*@(;Xg4}NRwu?OSZ^l45W*YL(_sn(l^Io`##UDFOa3>F_rdklNEPa ze$$6P_*gWs3owm_C({s(gK8mnfqdY#=rG{CV2aW7Gfw~w;OvZ+3Mo09ModfB8^$tQ zuw2>Rv+G(b05OR`4oNUcpG3j2`V^*KBuG6|$>{3aER3-}m zSbmwsP#^<=f9m&t@BaQX9a8uE-f`FW-+3%ozyF-Ze|`V?5BQr|&abZ}2_Jf{W@qCh z%#$~*_=nbf0(iC*6UyfnD@^L18>LP(w~ovIQ@#6XHMf2cd8ekhIeGg2^DwXc!~eEw zU)S_)t@r(&{-G7@Hq6tZy|K5rh(10qKNe8^pnu>&{%kS)X@A@7|F!z)=)>T@_Z@fG zKQLpqV*dbKf7$;Z@OS?5_|>cE$t6BpGFou2O>d|TnmH}7VR(elOV zx93&s`s%BbH{X0m*9NxdTdh|o&(4m{zQ1^TcKqeZcTZutpanoaT6kmY)#=fTqi3&< zFOI)^cl;&{B$$5d{N2&Jljn`gs_uEAoAB0ky-bE%x>)R^8qZo<`u*jRtaeM}RvnMV zu)&|6($O5PcSmPm9lyJ%dxe0?yZeXM>FW=Nr4`4WFCMSHcz^lni?icb$4BSQUh1Q< zxA#+bR8qY{1Rh!#-hOqy;=I%4;qEW?xP`Fa(s$kY%N0tO&+V-nZXsa#k@}f$UmhR7 zI$wFir%MNgUTn0b_o{j864j=HwMA&U5e$P z1Mc^dN^yVdFjiG;ZT~wM%j%m-_t+HI2H`xINFGiQ56k}F!}Mm9s19;_`~AC@Aw-tH zE6G$>s{`N`A3Qm~czyKz^!(!J>~-txm`;(cJpv?o4!i43>Z9DoS7m=|n$?}oyvLK) zXICF>ZydEp(USjJd#7Rq^|0WV_MglfEj+l`YUR0_(f50P*1FM8ttrhOq8ea$poORt z1sEUss2%Xe-oJFllRdn~P>NcfXICDTu7^MyUp#J_2sI-*7S#lA_>VO-tQ*bmqNUxg zPh-CiRkPPGQdjEaL-7Z9W{K5Oa&}l9toK=eQU{Cqf0te7`Vm#MuN%-G$L#wIaWnB7 zr1Elq>AXJxg3!yqFSToob4&aAbjy<(Zr9_SxM_WwcGPXa@ouZm&&dnWp@#ELS}iI? zQqw1OC;n>MYE}Gs5^dJ}Xp@v$t(s?R9^aqcY~SM@tyanPC(AEuy#3{V)o4wJD{@)o zb*mMW^2zcck(CFJX6J*?d9pl$Xz~5^x#soHSr6@*Pc`_LWHG*~k94eoS1QMgx1x)i|Ss7lDE!Vboc#}@`cfV zw>}H!UM`+o8h%x52>&09uFC&dHyF}Gv!*L7!Oe<~OU%KI!0@PXq13>rR^6qs{QsSG z`RDoDHve;WeDvb=ap%hZ&Ub8||6y60vE}(6A;n+k|NRkvkD~5^3Lv5!Md$zd3Rj}p zakb%izv?W&-rz$-8&smuoQhZ{kslAQVD?u-JaqK-WD-3-9l62L$wBmHc-1op(evS@ z8@q$3-&a?PrW6mN)A0X;$W2XWzY`rr6ZlIvjz;6*Kb(Qb8bY z{9@FpvusM)&yUaEMabMw!Jey8r@V7v`aL(8P976_If#a%aIEltEBddaSKl0;N00Z2 zm+cuZ+i81prTYE7gJ^FE9;)8n{=Xqu!eN3ZWuK2*QMBZQA8DBb9$H=fc-kJBR;^R5cA}B*W{E(Nlv#-4Ht56*cKZ;XTBjE+C%+oFnR@>p}0wYfwtm z^>BS0bWlYfdN*!R^?wkA-L06~jlh86r;-+E&t&9G&)-I&5I0K4HX5n%^meYdKvg9> z=z&ja5OnSuVHDCtT%=<$4WiTmy@4drEhz&|MVJIS1$#?6&=x`NQxBjOH`H}M^i-O_ z-X!YQ%BM!XPT5sZz#Dcrx1nwXNsNX)k_848I;++xLEf0mD*EVdOH-Q?%TC`SIXt<* zjf=03zo+LRm*If88r`UIFCd7(-GH2jK0H=)uDf1=x(DT}wIfd0NKL|14O_n&TGt1Q zPN_zC4KpUtE7QNWKq{?YqUR-p0tL}8(cAIx1|l6tyZrklI<8rF-v@}BUxS;AmqBm4 z@fR)r+|}Q&?smfif!>*Fmg9O~D&5@QjrwXzAgrl(c4#JtAFd&XNkakmx3KG;8r*nb zSvhO|b_Wy?x^=tvAwJB>CDJ;fNk{eITs@-3z_`QOHYSJRW~sMrQsFgZFVfpq)4pnS z+NAlAdg#&vZ%$t$ome2Zq}PPVHREp?$d(8$+muBEbCN?mxt}^N@VC)hO>1}Mf#;`7 z#Mbowqc>39S-bat$Im&n)IBvQnG4zA)6x>kc<4L;1kpPu?6YQyl2M>iy)b z9y?`84)0&n&u8>=PW7ixpE~nq^ePhTjMI;PyzZHgZGv&rc2Mj6t@JK-wLtY@w|?OD z$#=&uo)G|^Qyu7?A((F9@9gJVi1B*(Gn}bvpPI|z;jHOhT(0OvSN)+;{Y%8|CwZL5 zb3#}2;mvU1R{IJ5A-uL8*ZUH6ttGZ>S^uN^Lw$IqdV|tk1jgA%^?A_krp~ORmoc7l zdTO0cY^1mKqUVEDr*&klVmkuyfMQY^(|gXA$f65m;^xrgqH6tkzuSqv1YB@GsjCrz z&7KcI{!-mIL~uKMOX)7#82$*zHp~&M#{>ry1_$9Pk#bFmc#Q>r9rUY^A8;DsIH0wk zZhtOgZ<_`97DWrs&j!^hxwKd2)D3`mP~W*82i(|Hc67e#spn4sQP2}af>F81oN*#+ zUtSM@;*!143RpF&A>rw`Vh5NwQ|G`kgmeNyKs(45nj9Vt$J2iTR@KC=W!8e`7WkX< zYLy3%LM}k?u~flEih^M-=29V*01Bq*%+UI`WHdmz>k~5EoZ_E}qjK|3bUeps^JCj* zW9Gkg-%H4Jk1+5g*cEN@3@6-}iipo6 zH5&DMChS~3h^~}%C19OBQB9c<8XY)bb@=R_!SzeH<33O%y`|C>0~d`kmq|v|E{L{n z8`4lpS6To=XW@gJ(VMqt^S;V=)3=4&v1rG<)f+%p!>RSdS&t1R+dIER?CnLTms2(9 z?O~+<*WR^mw~Zs|{jH}!nBItTOS0r^Hxs*$<2c)%lQ=n!lRbN~Ic!R%Y>p{WO^Uuu zqM!3`A7S_9=1I1y0FVGLl6;AsPRKKpmPw#cC;)}83I)KuY~dC6A;4U+O?@B@;*C8Y z3<)68KSlp{gf1@JMw@xRC66{w*Ilo_ZKZq`9QZ^VA5 zLC2n>#e+qk#9)H}iuYyqS563dcYNO+QJ09vm^`@fk&EFNdj%FbfR;UGFJO>BDZ9Z% zFSu$_iN~lm9WwRU`ii8wGr&?K=L%Wgh3fdhWa450KI-TQ?fuXJ5*I)M#Befl$ng3z z1`{J5vQcn>xg!>xy2v&jW1G+(vAuBs#@CTIbWxaYKc0a7hIZ7z>0^mQDCCOyPCUX; zRd~TAN=7d>>vMAOJF>6y&On`7F-f2^<@!+*`Yh5*tT33yWtYkWWPWqu=+S)GYh zc@!|{)~J(!mAraEf3;?gHnGw;BmWt@FgnOE)+?ZN8Az5I|G+ca)s&Cd{qJC55Cy0m z*zhn$o&FgPX-t=wWB}=4fV$^gqXwaO0!Y{|Qe|T&bcVQy!z|8cec(Y?gi7{-_9P5~ zVZBx(9zhrPPeHmc+RUVz@Trcv3jM^T1qYz7>IH}ocKwbrO5+{v<#y(A^EChAl3K@oBZCu1#0f48Y}Y)dBRe?#A1UaI|kJ9E1r z`_BN3%b#R=DSvo&Sy2IAzYd?*m^73*BjK~#?yUdX*;xG*0r}>yzkW47^{MlEGbi!6 zvW_v|t9YUC<|a2@N@vs=O_OOO{nb~h`_1P$(MBtq-8H<{eRF+d<(JK`#OKnooGB(~ z;GDvQVdT`1!Hxqr8r3I=A#_FfFr-h{L-zyw;cKS5V-5DSl}QPw>T zl#IYkP1&q0AdlkYB7hDbA&)Hys}hwVn8Ilw7`MqU{~~lQP*F#*Pw4a}&Z=C4>3aB@TE3Sb;GWB@inJjQy{ zM8J;SVIbN$YuG@CL0krPm0=aU$xop%v7t0xmia^(F>i?0fj9B5%H*_iZ8@zJ%gH-UE1xBtpYXT4js|2;f6 zXn%x-ZOFH98FS|Um9_PiZ2RNogVp?h4^O-2cs>|ZwKlroxNoc0Mk~)nJF8Y};|2EO zCl7jRFT?*Rx>ehCrhSJ0rRxVgF*;ShcAQl4nt`@pAU(^~g28vWVDQd$A85T`&SJs0 zu~txPSH9>Q`l26NQ-7Uj*8DHG8Qivvo8bTM#zuwz?%`Ra{WIG(+R^Ft%9zvs-R?%# z|Fg2P(Os+X-#t8!A1Mbv;H=PcZ$u|UZA@F!8tl`jr{qwlDa7rc2F&=^|NRfHc7-mv ztNCI;Ji@rX6AUgsbw&uBCV^O@9S4yIQ~VVGqXI$7BQA`e zO!agNqgLb0yyab7x>vE$QU{a}bQQ)|eKB%?uJ zpgt)rdNYorQ?Y)`Fyv18jDqJ<{Bw^PG%&yP+=*jImK8CcoF-eZByf>*fU$`;@!c); z4S+M5zSSB96F1s248HI9+T=*6LU#Z9-+wY8Klk`iUVaaJ;#Kvi&rJQF8AUfOV~+jb z*<8(@|FzQDSX;03|2;f&&;Rnhz6)jxetErr#NHgT9msO(8Z|r{evHN;d`e$_(~@0jGWb*MMl)(oJUPWk$SwBK z*nujs|DZYz+XNa^zwPd*!;orvG#A^%Z>$(H|0 z_7f+m@n8x(VO`{}jgm|!F8#v5sn~aI*?8|L&mk8do6gs?3ygocSN+C)Y~J`^>#pX; z|5~S7|GSUpu{Rju=_EFcto$-V6Kp+GD=0EB~SPn4f22+x@X0NBI95j>mAs zj9r6$u058=o?yTMzYbEIu3~#5Z(@pfOHK0KEa88HQIw{2#-1Gp(CtTVw1jSxf&!He8K5N@O^z)K(p#3ufZklz z9y(*&4MX#b{e@A1mNjxO?4cXsR(q2ptVgEtd0M!)nPS%XB-Lg+K5Bml7YqEWCL23Z z#6uU8jnm}OF5E6;G}(*2mpkuY9ohP}ReSH?L!;5C0o;20ksqaP?&2y(;}Z%5>J`R) zv5CwRJDqurDV*J6TnyY#9*3D1BslqETH|wIhLQJ&uefkEj6Gu$uJN>(kW!C=x?|DumtM{@mv>_+&! zIWzH<@RQ5)dQsPdgsr#j17?nt4fsjcW%NBMc19WG7bndIfeRy>NWq zfhj~Cn{91ll4dRi?RbQb>d=|d>weQIOw&K7GIvNdE8*8KC84U*Q^h zms%d$I+j-42gi@mhQY(1Eo%(s7_Ukj+O44#)CMETGCICOArDq33a0YtNMt`sQ zrLffu_j1g`4eFS`PRb7h&83jAlVt_dc3^CvPul!)UeqAx#89VXI9!*fcBc*!gqeLF zThp6xX(SV?(a5k?JWdy3xSeh;E@M4MdwR>tz&Lio(77_dVl_b7^jt(-RMKrK0R)OT zgV$}qsi9e}N=pQw31YCZRze}6w1|wYAelRedYoA51^fkb=_`}9yX%mU#hwVjR_AoAl+OM54sSmS}f(lI5 z<`HA2fn6~&6rnaT(CX1g;zfD=Ni>ss#&?XsQvp911kblmg~2Nfes>tW-*?B83bI#_{Q)7nI;m8cUFkj*X1`SxD$HJC z_Boh+7G%fszV8;W)5+bXaJ}&G?;K!PPyg28y80yn?72#O$8cSW_rbt=wr%bhs((#D z&SOy?6p&XiIDIM@{NuskR8@bIAh-&9slr~Wu$ONbu>UY&FS9`V>*Pk7yM^w`B{ro1 ze%5U`IUJwA6-P(&`R@s)FR9~qjO=ykKN!9*ZnFyIS17+i`4!5qP=1B-E0kZM{25P$ z@{`=R3#Z|CFJ24c=eC8udbRVqz#TvL?nS`id~au-hglFjLEc4(o78m%G~e~+%%r}> z_<8zmLDg{>49Am#(2AK+6@PVT#xOgRRxGl#QSujC!>&yLD1<|-O?w(FOdc`HCXu`ZXWSD^nG^YO& zprFqu)J0K2HJw+}`C`)hCa3ctiKkvsdvRxRUA+9p2T?kSH%C^jl&2}D8t|X8A1t| zS1e*Z2bD`S_S93%VAu+d@HDc|eb@Ib>(CwS!Vf%wlE4=p2WDBnJJBgj>v(~Ec;p8Y)GAS$#@ndMJI1c>sYCSP2X$NJw18mpmz{ zGP!kd9g|i(MRLo+!jx&cn@=H_P!idg2$w~h$-MOVW9W`Ub2qccjm!OrPL$(pEH5tu z*aM&pM$^CxBYx#c&xz0q6=#&9I&5kU#}50?M0a{7I*>rUO;O19_BM)%(DSo8QrM>c zD}Uor#s%!qo}Qif5erRv0& z>2@o`?Un_MDNFB8cmk5p$18nQ0k$c5MhcDXj41%aCK!#-5ITmT4<$BglZ z4Sox*?d!3vwoxN@j#{ppAXpzo&cMafn%$vg{lg6d#QbdfRLd%Q5~K%?+uUw74ZOtH z3-MB;_|j-7CPT(l5Jl-7h}dZ+lrseamuT>5mu6kk4P5*j?Q!UV2E3w}gXBssqnL#^ zXC@x(+ax|3nbtZ6jT{D7+)~kG+Q`IUm`PTdw+DG`(h|~c!$t;sEMPVj6#WEn_S>4`l zGJ9;(tw~}?dK+uytwJK{$&8`c70mIZLXC(~K@1Roq{F1ZJ+yM)C zP4oo2tahl1#QF6E3n`zC8pwRYX?*N_Qf*Q-Bd?0QtxF>SQ(!`%`>9dle@H;6WgVf? z0~Yh7R9=A8Z!>%u20xnBKtj@NE--cZNaeR35>BCi8^1NmS3r$4R?jGwr25EWW|*?2 zJd_fimNSPtfwW3zEbE(8n2zPrh~``aHzCW<7|`ezxE5|gIUvZbw|(9!?OD4wjmxlSsIXc4%)j*zNjXtMKxO^vy+U)fLmZ{z6?h*P7 z=pBt4x3337i&+_<8UK6CeH<23oE^^u@KeDd*|7Xv6`g5&JH?HtgSb0~JwX7dnM`Hi zcD^Yk>b1B<1u2P*OyjC+HJA=X1WEfC6dOUS(?Z_a7j}ki9+A|b5z`gAOeuwCcJ>$! zG{m$wSeE`qG)YH)ThuVhVwYxKa8@$dUvZeJP`{|%U>?DR-Gty?veOE5#q%?(NOjxkABzQT|9Tv8D+hm@QZFz-6r)9iY zt-BBp`Wp;;C}LYrB^?g?Bf3h#&l(GUaN_kDEg2cg2;g8@I{vaKyTFr&>YS!WZmDx{ zozxw}(ldJ#ihX@ZCcAnk_d%%dr*m4HWRmwxfL_dWzf^+h_p#AC<7J=Z znX~@WU0u)K|JvPL>ugr*Klktq0|~&Rr#0mdns3E&2CbNA3nt1+O3})R@|iD;$aRrq zk)wER@s(xmM9(q&0z}5cbzZ)Dvvah%Vt?4%J$iF!zkC1u=)d0X zSuFkq)ISWEB;gEG0w>-0mrszPr{C|`;xcOqX0nFW29AwhQ;c#CD35m=W%dXMLTamWp7cFo|X6JTwHgXCw;q1nf4k)(6 z3o-6ch^v1ZkIo`mW|BZpInhi6F5;* z1GZ91g=VObqO`!ED18gy407rWkF`n|;aEldsVqAofBY@M3jQ-~W(g~F2IrWhbxuXZ zho&}Z+bG~Q?E$<4S~65d=Q-K0r?7G@e`Aq^bJj2qljVWvdEdbTk*IabTNeeWm4~`# zz@2PnDl-nDMDDdX&SE<)A;`2aq2`@i+7cS zTq@mpzPlu3@%`v*1j<}ytbgV|rgbBsI~5Dgs`)6~tEmM!E1o6w+YudM5KeW(b5{_D zsL1G$CyRxL6vLu)Vz3upD8oDyWjrHwS4)i%CdGa-ZK3UwT3%tNeB0%ss|JRc?dXn1 zs^Mpa{*+xfeBmH;PrL{k+dVG0&M{I}5Tuol)k|uSgs>;GYF~{<#V0d?Ccge{xk#bK zk&}4UQK&6_-88eZV0TjzLTTMJd#V6#N`cR_N^TuGC4G}0SiB`cEOxAwhQ3)Q+jq~g zyP4V5g;MU+vyGM5>uHWzx3PC3QAPk0f&znRxygzmOXQ1#qmz_?R`)wMDfaLyWq4?$YPXqi>+dp`>cX-4AZogp@cQ|hAV^jpZ$T(8=VdvHRy>~D` z%{tCMtz_D2t+t|} z!t=F2*`2tp^Dbma6Ul7kFAxcdrSookz20Q}gG|N0yD(jQ(P`^^wcWua+^CjmBCupY z)1A33Cj`YRh*}Y*ez}_#G7B%1MqFy+R;v#7 z8-5deK8M3D-|eZL16P-{N)o60YPnj6e{nvHTwE7`X>@clIW>i8wNTT5jC#N;13bmX zZWsllI?k|d_tKk`ke$^vO2`R%B?Qa5WC^iiB3k2KBj3#KvpAXJo|+>7?;;lpI<=33 rK6%7v&@AtaWn!*o-Rh~H>ZzXUsh;Yop6aQdZ{_)aLC}Hu0B8XK_CP|K literal 0 HcmV?d00001 diff --git a/deps/sqlite3/sqlite-rembed-source/.github/workflows/release.yaml b/deps/sqlite3/sqlite-rembed-source/.github/workflows/release.yaml deleted file mode 100644 index 97e26912ce..0000000000 --- a/deps/sqlite3/sqlite-rembed-source/.github/workflows/release.yaml +++ /dev/null @@ -1,122 +0,0 @@ -name: "Release" -on: - release: - types: [published] -permissions: - contents: read -jobs: - build-linux-x86_64-extension: - runs-on: ubuntu-20.04 - steps: - - uses: actions/checkout@v4 - - run: make loadable-release - - uses: actions/upload-artifact@v4 - with: - name: sqlite-rembed-linux-x86_64-extension - path: dist/release/* - build-macos-x86_64-extension: - runs-on: macos-12 - steps: - - uses: actions/checkout@v4 - - run: make loadable-release - - uses: actions/upload-artifact@v4 - with: - name: sqlite-rembed-macos-x86_64-extension - path: dist/release/* - build-macos-aarch64-extension: - runs-on: macos-14 - steps: - - uses: actions/checkout@v4 - - run: make loadable-release - - uses: actions/upload-artifact@v4 - with: - name: sqlite-rembed-macos-aarch64-extension - path: dist/release/* - build-windows-x86_64-extension: - runs-on: windows-2019 - steps: - - uses: actions/checkout@v4 - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - - run: make loadable-release - - uses: actions/upload-artifact@v4 - with: - name: sqlite-rembed-windows-x86_64-extension - path: dist/release/* - dist: - runs-on: ubuntu-latest - needs: - [ - build-linux-x86_64-extension, - build-macos-x86_64-extension, - build-macos-aarch64-extension, - build-windows-x86_64-extension, - ] - permissions: - contents: write - steps: - - uses: actions/checkout@v4 - - uses: actions/download-artifact@v4 - with: - name: sqlite-rembed-linux-x86_64-extension - path: dist/linux-x86_64 - - uses: actions/download-artifact@v4 - with: - name: sqlite-rembed-macos-x86_64-extension - path: dist/macos-x86_64 - - uses: actions/download-artifact@v4 - with: - name: sqlite-rembed-macos-aarch64-extension - path: dist/macos-aarch64 - - uses: actions/download-artifact@v4 - with: - name: sqlite-rembed-windows-x86_64-extension - path: dist/windows-x86_64 - - run: | - curl -L https://github.com/asg017/sqlite-dist/releases/download/v0.0.1-alpha.7/sqlite-dist-x86_64-unknown-linux-gnu.tar.xz \ - | tar xfJ - --strip-components 1 - - run: make sqlite-rembed.h - - run: ./sqlite-dist ./sqlite-dist.toml --input dist/ --output distx/ --version $(cat VERSION) - - run: | - gh release upload ${{ github.ref_name }} \ - distx/github_releases/* \ - distx/spm/* \ - distx/sqlpkg/* \ - distx/checksums.txt \ - distx/sqlite-dist-manifest.json \ - distx/install.sh - env: - GH_TOKEN: ${{ github.token }} - - name: Install node - uses: actions/setup-node@v3 - with: - node-version: "16" - registry-url: "https://registry.npmjs.org" - - run: | - npm publish --access public distx/npm/sqlite-rembed-darwin-arm64.tar.gz - npm publish --access public distx/npm/sqlite-rembed-darwin-x64.tar.gz - npm publish --access public distx/npm/sqlite-rembed-linux-x64.tar.gz - npm publish --access public distx/npm/sqlite-rembed.tar.gz - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - - uses: ruby/setup-ruby@v1 - with: - ruby-version: 3.2 - - run: | - for file in distx/gem/*; do - gem push "$file" - done - env: - GEM_HOST_API_KEY: ${{ secrets.GEM_HOST_API_KEY }} - - uses: actions/setup-python@v5 - with: - python-version: "3.12" - - run: pip install twine - - run: | - twine upload distx/pip/* - twine upload distx/datasette/* - twine upload distx/sqlite_utils/* - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} diff --git a/deps/sqlite3/sqlite-rembed-source/.github/workflows/test.yaml b/deps/sqlite3/sqlite-rembed-source/.github/workflows/test.yaml deleted file mode 100644 index 24f63c296a..0000000000 --- a/deps/sqlite3/sqlite-rembed-source/.github/workflows/test.yaml +++ /dev/null @@ -1,60 +0,0 @@ -name: "Test" -on: - push: - branches: - - main -permissions: - contents: read -jobs: - build-linux-x86_64-extension: - runs-on: ubuntu-20.04 - steps: - - uses: actions/checkout@v4 - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - - run: make loadable static - #- run: pip install pytest numpy; make test-loadable - - uses: actions/upload-artifact@v4 - with: - name: sqlite-rembed-linux-x86_64-extension - path: dist/* - build-macos-x86_64-extension: - runs-on: macos-12 - steps: - - uses: actions/checkout@v4 - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - - run: make loadable static - #- run: /usr/local/opt/python@3/libexec/bin/python -m pip install pytest numpy; make test-loadable python=/usr/local/opt/python@3/libexec/bin/python - - uses: actions/upload-artifact@v4 - with: - name: sqlite-rembed-macos-x86_64-extension - path: dist/* - build-macos-aarch64-extension: - runs-on: macos-14 - steps: - - uses: actions/checkout@v4 - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - - run: make loadable static - #- run: /opt/homebrew/opt/python3/libexec/bin/python -m pip install pytest numpy --break-system-packages; make test-loadable python=/opt/homebrew/opt/python3/libexec/bin/python - - uses: actions/upload-artifact@v4 - with: - name: sqlite-rembed-macos-aarch64-extension - path: dist/* - build-windows-x86_64-extension: - runs-on: windows-2019 - steps: - - uses: actions/checkout@v4 - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - - run: make loadable static - #- run: pip install pytest numpy; make test-loadable - - uses: actions/upload-artifact@v4 - with: - name: sqlite-rembed-windows-x86_64-extension - path: dist/* diff --git a/deps/sqlite3/sqlite-rembed-source/.gitignore b/deps/sqlite3/sqlite-rembed-source/.gitignore deleted file mode 100644 index bc97e80e27..0000000000 --- a/deps/sqlite3/sqlite-rembed-source/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -/target -.env -dist/ diff --git a/deps/sqlite3/sqlite-rembed-source/Cargo.lock b/deps/sqlite3/sqlite-rembed-source/Cargo.lock deleted file mode 100644 index ff31d5ae3c..0000000000 --- a/deps/sqlite3/sqlite-rembed-source/Cargo.lock +++ /dev/null @@ -1,847 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - -[[package]] -name = "aho-corasick" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" -dependencies = [ - "memchr", -] - -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi", - "libc", - "winapi", -] - -[[package]] -name = "autocfg" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" - -[[package]] -name = "base64" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" - -[[package]] -name = "bindgen" -version = "0.60.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "062dddbc1ba4aca46de6338e2bf87771414c335f7b2f2036e8f3e9befebf88e6" -dependencies = [ - "bitflags 1.3.2", - "cexpr", - "clang-sys", - "clap", - "env_logger", - "lazy_static", - "lazycell", - "log", - "peeking_take_while", - "proc-macro2", - "quote", - "regex", - "rustc-hash", - "shlex", - "which", -] - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "bitflags" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" - -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - -[[package]] -name = "cc" -version = "1.0.98" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" - -[[package]] -name = "cexpr" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" -dependencies = [ - "nom", -] - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "clang-sys" -version = "1.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f803f94ecf597339c7a34eed2036ef83f86aaba937f001f7c5b5e251f043f1f9" -dependencies = [ - "glob", - "libc", - "libloading", -] - -[[package]] -name = "clap" -version = "3.2.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" -dependencies = [ - "atty", - "bitflags 1.3.2", - "clap_lex", - "indexmap", - "strsim", - "termcolor", - "textwrap", -] - -[[package]] -name = "clap_lex" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" -dependencies = [ - "os_str_bytes", -] - -[[package]] -name = "crc32fast" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "either" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" - -[[package]] -name = "env_logger" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" -dependencies = [ - "atty", - "humantime", - "log", - "regex", - "termcolor", -] - -[[package]] -name = "errno" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" -dependencies = [ - "libc", - "windows-sys", -] - -[[package]] -name = "flate2" -version = "1.0.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" -dependencies = [ - "crc32fast", - "miniz_oxide", -] - -[[package]] -name = "form_urlencoded" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" -dependencies = [ - "percent-encoding", -] - -[[package]] -name = "getrandom" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "glob" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" - -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "home" -version = "0.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" -dependencies = [ - "windows-sys", -] - -[[package]] -name = "humantime" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" - -[[package]] -name = "idna" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" -dependencies = [ - "unicode-bidi", - "unicode-normalization", -] - -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown", -] - -[[package]] -name = "itoa" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - -[[package]] -name = "libc" -version = "0.2.155" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" - -[[package]] -name = "libloading" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" -dependencies = [ - "cfg-if", - "windows-targets", -] - -[[package]] -name = "linux-raw-sys" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" - -[[package]] -name = "log" -version = "0.4.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" - -[[package]] -name = "memchr" -version = "2.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" - -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - -[[package]] -name = "miniz_oxide" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" -dependencies = [ - "adler", -] - -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - -[[package]] -name = "once_cell" -version = "1.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" - -[[package]] -name = "os_str_bytes" -version = "6.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" - -[[package]] -name = "peeking_take_while" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" - -[[package]] -name = "percent-encoding" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" - -[[package]] -name = "proc-macro2" -version = "1.0.84" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec96c6a92621310b51366f1e28d05ef11489516e93be030060e5fc12024a49d6" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "quote" -version = "1.0.36" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "regex" -version = "1.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" - -[[package]] -name = "ring" -version = "0.17.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" -dependencies = [ - "cc", - "cfg-if", - "getrandom", - "libc", - "spin", - "untrusted", - "windows-sys", -] - -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - -[[package]] -name = "rustix" -version = "0.38.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" -dependencies = [ - "bitflags 2.5.0", - "errno", - "libc", - "linux-raw-sys", - "windows-sys", -] - -[[package]] -name = "rustls" -version = "0.22.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" -dependencies = [ - "log", - "ring", - "rustls-pki-types", - "rustls-webpki", - "subtle", - "zeroize", -] - -[[package]] -name = "rustls-pki-types" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" - -[[package]] -name = "rustls-webpki" -version = "0.102.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff448f7e92e913c4b7d4c6d8e4540a1724b319b4152b8aef6d4cf8339712b33e" -dependencies = [ - "ring", - "rustls-pki-types", - "untrusted", -] - -[[package]] -name = "ryu" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" - -[[package]] -name = "serde" -version = "1.0.203" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.203" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "serde_json" -version = "1.0.117" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" -dependencies = [ - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - -[[package]] -name = "spin" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" - -[[package]] -name = "sqlite-loadable" -version = "0.0.6-alpha.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daaaad0ad506b154a72bf01fde23235377c01256abd4bd25e17419dbfd4e28a0" -dependencies = [ - "bitflags 1.3.2", - "serde", - "serde_json", - "sqlite-loadable-macros", - "sqlite3ext-sys", -] - -[[package]] -name = "sqlite-loadable-macros" -version = "0.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96037a396115a2675db783f700faad878b44c8ff56c8a29c3404649a517a5e8f" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "sqlite-rembed" -version = "0.0.1-alpha.9" -dependencies = [ - "serde_json", - "sqlite-loadable", - "ureq", - "zerocopy", -] - -[[package]] -name = "sqlite3ext-sys" -version = "0.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3afdc2b3dc08f16d6eecf8aa07d19975a268603ab1cca67d3f9b4172c507cf16" -dependencies = [ - "bindgen", - "cc", -] - -[[package]] -name = "strsim" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" - -[[package]] -name = "subtle" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" - -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "syn" -version = "2.0.66" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "termcolor" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "textwrap" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" - -[[package]] -name = "tinyvec" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - -[[package]] -name = "unicode-bidi" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" - -[[package]] -name = "unicode-ident" -version = "1.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" - -[[package]] -name = "unicode-normalization" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" -dependencies = [ - "tinyvec", -] - -[[package]] -name = "untrusted" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" - -[[package]] -name = "ureq" -version = "2.9.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d11a831e3c0b56e438a28308e7c810799e3c118417f342d30ecec080105395cd" -dependencies = [ - "base64", - "flate2", - "log", - "once_cell", - "rustls", - "rustls-pki-types", - "rustls-webpki", - "serde", - "serde_json", - "url", - "webpki-roots", -] - -[[package]] -name = "url" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", -] - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - -[[package]] -name = "webpki-roots" -version = "0.26.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3de34ae270483955a94f4b21bdaaeb83d508bb84a01435f393818edb0012009" -dependencies = [ - "rustls-pki-types", -] - -[[package]] -name = "which" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" -dependencies = [ - "windows-sys", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-sys" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" -dependencies = [ - "windows-targets", -] - -[[package]] -name = "windows-targets" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" - -[[package]] -name = "zerocopy" -version = "0.7.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" -dependencies = [ - "byteorder", - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "zeroize" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" diff --git a/deps/sqlite3/sqlite-rembed-source/Cargo.toml b/deps/sqlite3/sqlite-rembed-source/Cargo.toml deleted file mode 100644 index 5d0bacb2f0..0000000000 --- a/deps/sqlite3/sqlite-rembed-source/Cargo.toml +++ /dev/null @@ -1,14 +0,0 @@ -[package] -name = "sqlite-rembed" -version = "0.0.1-alpha.9" -edition = "2021" - -[dependencies] -serde_json = "1.0.117" -sqlite-loadable = "0.0.6-alpha.6" -ureq = {version="2.9.7", features=["json"]} -zerocopy = "0.7.34" - -[lib] -crate-type=["cdylib", "staticlib", "lib"] - diff --git a/deps/sqlite3/sqlite-rembed-source/LICENSE-APACHE b/deps/sqlite3/sqlite-rembed-source/LICENSE-APACHE deleted file mode 100644 index f49a4e16e6..0000000000 --- a/deps/sqlite3/sqlite-rembed-source/LICENSE-APACHE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/deps/sqlite3/sqlite-rembed-source/LICENSE-MIT b/deps/sqlite3/sqlite-rembed-source/LICENSE-MIT deleted file mode 100644 index 9736ab442a..0000000000 --- a/deps/sqlite3/sqlite-rembed-source/LICENSE-MIT +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2022 Alex Garcia - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/deps/sqlite3/sqlite-rembed-source/Makefile b/deps/sqlite3/sqlite-rembed-source/Makefile deleted file mode 100644 index 9bd7661aa4..0000000000 --- a/deps/sqlite3/sqlite-rembed-source/Makefile +++ /dev/null @@ -1,141 +0,0 @@ -SHELL := /bin/bash - -VERSION=$(shell cat VERSION) - -ifeq ($(shell uname -s),Darwin) -CONFIG_DARWIN=y -else ifeq ($(OS),Windows_NT) -CONFIG_WINDOWS=y -else -CONFIG_LINUX=y -endif - -LIBRARY_PREFIX=lib -ifdef CONFIG_DARWIN -LOADABLE_EXTENSION=dylib -STATIC_EXTENSION=a -endif - -ifdef CONFIG_LINUX -LOADABLE_EXTENSION=so -STATIC_EXTENSION=a -endif - - -ifdef CONFIG_WINDOWS -LOADABLE_EXTENSION=dll -LIBRARY_PREFIX= -STATIC_EXTENSION=lib -endif - -prefix=dist -TARGET_LOADABLE=$(prefix)/debug/rembed0.$(LOADABLE_EXTENSION) -TARGET_LOADABLE_RELEASE=$(prefix)/release/rembed0.$(LOADABLE_EXTENSION) - -TARGET_STATIC=$(prefix)/debug/$(LIBRARY_PREFIX)sqlite_rembed0.$(STATIC_EXTENSION) -TARGET_STATIC_RELEASE=$(prefix)/release/$(LIBRARY_PREFIX)sqlite_rembed0.$(STATIC_EXTENSION) - -TARGET_H=$(prefix)/debug/sqlite-rembed.h -TARGET_H_RELEASE=$(prefix)/release/sqlite-rembed.h - -TARGET_WHEELS=$(prefix)/debug/wheels -TARGET_WHEELS_RELEASE=$(prefix)/release/wheels - -INTERMEDIATE_PYPACKAGE_EXTENSION=python/sqlite_rembed/sqlite_rembed/rembed0.$(LOADABLE_EXTENSION) - -ifdef target -CARGO_TARGET=--target=$(target) -BUILT_LOCATION=target/$(target)/debug/$(LIBRARY_PREFIX)sqlite_rembed.$(LOADABLE_EXTENSION) -BUILT_LOCATION_RELEASE=target/$(target)/release/$(LIBRARY_PREFIX)sqlite_rembed.$(LOADABLE_EXTENSION) -BUILT_LOCATION_STATIC=target/$(target)/debug/$(LIBRARY_PREFIX)sqlite_rembed.$(STATIC_EXTENSION) -BUILT_LOCATION_STATIC_RELEASE=target/$(target)/release/$(LIBRARY_PREFIX)sqlite_rembed.$(STATIC_EXTENSION) -else -CARGO_TARGET= -BUILT_LOCATION=target/debug/$(LIBRARY_PREFIX)sqlite_rembed.$(LOADABLE_EXTENSION) -BUILT_LOCATION_RELEASE=target/release/$(LIBRARY_PREFIX)sqlite_rembed.$(LOADABLE_EXTENSION) -BUILT_LOCATION_STATIC=target/debug/$(LIBRARY_PREFIX)sqlite_rembed.$(STATIC_EXTENSION) -BUILT_LOCATION_STATIC_RELEASE=target/release/$(LIBRARY_PREFIX)sqlite_rembed.$(STATIC_EXTENSION) -endif - -ifdef python -PYTHON=$(python) -else -PYTHON=python3 -endif - -ifdef IS_MACOS_ARM -RENAME_WHEELS_ARGS=--is-macos-arm -else -RENAME_WHEELS_ARGS= -endif - -$(prefix): - mkdir -p $(prefix)/debug - mkdir -p $(prefix)/release - -$(TARGET_WHEELS): $(prefix) - mkdir -p $(TARGET_WHEELS) - -$(TARGET_WHEELS_RELEASE): $(prefix) - mkdir -p $(TARGET_WHEELS_RELEASE) - -$(TARGET_LOADABLE): $(prefix) $(shell find . -type f -name '*.rs') - cargo build --verbose $(CARGO_TARGET) - cp $(BUILT_LOCATION) $@ - -$(TARGET_LOADABLE_RELEASE): $(prefix) $(shell find . -type f -name '*.rs') - cargo build --verbose --release $(CARGO_TARGET) - cp $(BUILT_LOCATION_RELEASE) $@ - -$(TARGET_STATIC): $(prefix) $(shell find . -type f -name '*.rs') - cargo build --verbose $(CARGO_TARGET) --features=sqlite-loadable/static - ls target - ls target/$(target)/debug - cp $(BUILT_LOCATION_STATIC) $@ - -$(TARGET_STATIC_RELEASE): $(prefix) $(shell find . -type f -name '*.rs') - cargo build --verbose --release $(CARGO_TARGET) --features=sqlite-loadable/static - cp $(BUILT_LOCATION_STATIC_RELEASE) $@ - -$(TARGET_H): sqlite-rembed.h - cp $< $@ - -$(TARGET_H_RELEASE): sqlite-rembed.h - cp $< $@ - -Cargo.toml: VERSION - cargo set-version `cat VERSION` - -version: - make Cargo.toml - -format: - cargo fmt - -release: $(TARGET_LOADABLE_RELEASE) $(TARGET_STATIC_RELEASE) - -loadable: $(TARGET_LOADABLE) -loadable-release: $(TARGET_LOADABLE_RELEASE) - -static: $(TARGET_STATIC) $(TARGET_H) -static-release: $(TARGET_STATIC_RELEASE) $(TARGET_H_RELEASE) - -debug: loadable static python datasette -release: loadable-release static-release python-release datasette-release - -clean: - rm dist/* - cargo clean - -test-loadable: - $(PYTHON) tests/test-loadable.py - -publish-release: - ./scripts/publish_release.sh - -.PHONY: clean \ - test test-loadable test-python test-npm test-deno \ - loadable loadable-release \ - static static-release \ - debug release \ - format version publish-release diff --git a/deps/sqlite3/sqlite-rembed-source/README.md b/deps/sqlite3/sqlite-rembed-source/README.md deleted file mode 100644 index d59a4fc0c8..0000000000 --- a/deps/sqlite3/sqlite-rembed-source/README.md +++ /dev/null @@ -1,134 +0,0 @@ -# `sqlite-rembed` - -A SQLite extension for generating text embeddings from remote APIs (OpenAI, Nomic, Cohere, llamafile, Ollama, etc.). A sister project to [`sqlite-vec`](https://github.com/asg017/sqlite-vec) and [`sqlite-lembed`](https://github.com/asg017/sqlite-lembed). A work-in-progress! - -## Usage - -```sql -.load ./rembed0 - -INSERT INTO temp.rembed_clients(name, options) - VALUES ('text-embedding-3-small', 'openai'); - -select rembed( - 'text-embedding-3-small', - 'The United States Postal Service is an independent agency...' -); -``` - -The `temp.rembed_clients` virtual table lets you "register" clients with pure `INSERT INTO` statements. The `name` field is a unique identifier for a given client, and `options` allows you to specify which 3rd party embedding service you want to use. - -In this case, `openai` is a pre-defined client that will default to OpenAI's `https://api.openai.com/v1/embeddings` endpoint and will source your API key from the `OPENAI_API_KEY` environment variable. The name of the client, `text-embedding-3-small`, will be used as the embeddings model. - -Other pre-defined clients include: - -| Client name | Provider | Endpoint | API Key | -| ------------ | ------------------------------------------------------------------------------------ | ---------------------------------------------- | -------------------- | -| `openai` | [OpenAI](https://platform.openai.com/docs/guides/embeddings) | `https://api.openai.com/v1/embeddings` | `OPENAI_API_KEY` | -| `nomic` | [Nomic](https://docs.nomic.ai/reference/endpoints/nomic-embed-text) | `https://api-atlas.nomic.ai/v1/embedding/text` | `NOMIC_API_KEY` | -| `cohere` | [Cohere](https://docs.cohere.com/reference/embed) | `https://api.cohere.com/v1/embed` | `CO_API_KEY` | -| `jina` | [Jina](https://api.jina.ai/redoc#tag/embeddings) | `https://api.jina.ai/v1/embeddings` | `JINA_API_KEY` | -| `mixedbread` | [MixedBread](https://www.mixedbread.ai/api-reference#quick-start-guide) | `https://api.mixedbread.ai/v1/embeddings/` | `MIXEDBREAD_API_KEY` | -| `llamafile` | [llamafile](https://github.com/Mozilla-Ocho/llamafile) | `http://localhost:8080/embedding` | None | -| `ollama` | [Ollama](https://github.com/ollama/ollama/blob/main/docs/api.md#generate-embeddings) | `http://localhost:11434/api/embeddings` | None | - -Different client options can be specified with `remebed_client_options()`. For example, if you have a different OpenAI-compatible service you want to use, then you can use: - -```sql -INSERT INTO temp.rembed_clients(name, options) VALUES - ( - 'xyz-small-1', - rembed_client_options( - 'format', 'openai', - 'url', 'https://api.xyz.com/v1/embeddings', - 'key', 'xyz-ca865ece65-hunter2' - ) - ); -``` - -Or to use a llamafile server that's on a different port: - -```sql -INSERT INTO temp.rembed_clients(name, options) VALUES - ( - 'xyz-small-1', - rembed_client_options( - 'format', 'lamafile', - 'url', 'http://localhost:9999/embedding' - ) - ); -``` - -### Using with `sqlite-vec` - -`sqlite-rembed` works well with [`sqlite-vec`](https://github.com/asg017/sqlite-vec), a SQLite extension for vector search. Embeddings generated with `rembed()` use the same BLOB format for vectors that `sqlite-vec` uses. - -Here's a sample "semantic search" application, made from a sample dataset of news article headlines. - -```sql -create table articles( - headline text -); - --- Random NPR headlines from 2024-06-04 -insert into articles VALUES - ('Shohei Ohtani''s ex-interpreter pleads guilty to charges related to gambling and theft'), - ('The jury has been selected in Hunter Biden''s gun trial'), - ('Larry Allen, a Super Bowl champion and famed Dallas Cowboy, has died at age 52'), - ('After saying Charlotte, a lone stingray, was pregnant, aquarium now says she''s sick'), - ('An Epoch Times executive is facing money laundering charge'); - - --- Build a vector table with embeddings of article headlines, using OpenAI's API -create virtual table vec_articles using vec0( - headline_embeddings float[1536] -); - -insert into vec_articles(rowid, headline_embeddings) - select rowid, rembed('text-embedding-3-small', headline) - from articles; - -``` - -Now we have a regular `articles` table that stores text headlines, and a `vec_articles` virtual table that stores embeddings of the article headlines, using OpenAI's `text-embedding-3-small` model. - -To perform a "semantic search" on the embeddings, we can query the `vec_articles` table with an embedding of our query, and join the results back to our `articles` table to retrieve the original headlines. - -```sql -param set :query 'firearm courtroom' - -with matches as ( - select - rowid, - distance - from vec_articles - where headline_embeddings match rembed('text-embedding-3-small', :query) - order by distance - limit 3 -) -select - headline, - distance -from matches -left join articles on articles.rowid = matches.rowid; - -/* -+--------------------------------------------------------------+------------------+ -| headline | distance | -+--------------------------------------------------------------+------------------+ -| The jury has been selected in Hunter Biden's gun trial | 1.05906391143799 | -+--------------------------------------------------------------+------------------+ -| Shohei Ohtani's ex-interpreter pleads guilty to charges rela | 1.2574303150177 | -| ted to gambling and theft | | -+--------------------------------------------------------------+------------------+ -| An Epoch Times executive is facing money laundering charge | 1.27144026756287 | -+--------------------------------------------------------------+------------------+ -*/ -``` - -Notice how "firearm courtroom" doesn't appear in any of these headlines, but it can still figure out that "Hunter Biden's gun trial" is related, and the other two justice-related articles appear on top. - -## Drawbacks - -1. **No batch support yet.** If you use `rembed()` in a batch UPDATE or INSERT in 1,000 rows, then 1,000 HTTP requests will be made. Add a :+1: to [Issue #1](https://github.com/asg017/sqlite-rembed/issues/1) if you want to see this fixed. -2. **No builtin rate limiting.** Requests are sent sequentially so this may not come up in small demos, but `sqlite-rembed` could add features that handles rate limiting/retries implicitly. Add a :+1: to [Issue #2](https://github.com/asg017/sqlite-rembed/issues/2) if you want to see this implemented. diff --git a/deps/sqlite3/sqlite-rembed-source/VERSION b/deps/sqlite3/sqlite-rembed-source/VERSION deleted file mode 100644 index 1429ae3183..0000000000 --- a/deps/sqlite3/sqlite-rembed-source/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.0.1-alpha.9 \ No newline at end of file diff --git a/deps/sqlite3/sqlite-rembed-source/build.rs b/deps/sqlite3/sqlite-rembed-source/build.rs deleted file mode 100644 index c5c0c3b4a1..0000000000 --- a/deps/sqlite3/sqlite-rembed-source/build.rs +++ /dev/null @@ -1,9 +0,0 @@ -use std::process::Command; -fn main() { - let output = Command::new("git") - .args(["rev-parse", "HEAD"]) - .output() - .unwrap(); - let git_hash = String::from_utf8(output.stdout).unwrap(); - println!("cargo:rustc-env=GIT_HASH={}", git_hash); -} diff --git a/deps/sqlite3/sqlite-rembed-source/examples/simple-search/demo.sql b/deps/sqlite3/sqlite-rembed-source/examples/simple-search/demo.sql deleted file mode 100644 index 20ee88b0ed..0000000000 --- a/deps/sqlite3/sqlite-rembed-source/examples/simple-search/demo.sql +++ /dev/null @@ -1,48 +0,0 @@ -.bail on -.mode table -.header on - -.timer on - -.load ../../dist/debug/rembed0 -.load ../../../sqlite-vec/dist/vec0 - -INSERT INTO temp.rembed_clients(name, options) - VALUES ('text-embedding-3-small', 'openai'); - -create table articles(headline text); - - --- Random NPR headlines from 2024-06-04 -insert into articles VALUES - ('Shohei Ohtani''s ex-interpreter pleads guilty to charges related to gambling and theft'), - ('The jury has been selected in Hunter Biden''s gun trial'), - ('Larry Allen, a Super Bowl champion and famed Dallas Cowboy, has died at age 52'), - ('After saying Charlotte, a lone stingray, was pregnant, aquarium now says she''s sick'), - ('An Epoch Times executive is facing money laundering charge'); - - --- Seed a vector table with embeddings of article headlines, using OpenAI's API -create virtual table vec_articles using vec0(headline_embeddings float[1536]); - -insert into vec_articles(rowid, headline_embeddings) - select rowid, rembed('text-embedding-3-small', headline) - from articles; - - -.param set :query 'firearm courtroom' - -with matches as ( - select - rowid, - distance - from vec_articles - where headline_embeddings match rembed('text-embedding-3-small', :query) - order by distance - limit 3 -) -select - headline, - distance -from matches -left join articles on articles.rowid = matches.rowid; diff --git a/deps/sqlite3/sqlite-rembed-source/scripts/publish-release.sh b/deps/sqlite3/sqlite-rembed-source/scripts/publish-release.sh deleted file mode 100755 index 0bfecc192d..0000000000 --- a/deps/sqlite3/sqlite-rembed-source/scripts/publish-release.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -set -euo pipefail xtrace - -if [[ -n $(git status --porcelain | grep -v VERSION | grep -v sqlite-dist.toml) ]]; then - echo "❌ There are other un-staged changes to the repository besides VERSION and sqlite-dist.toml" - exit 1 -fi - -VERSION="$(cat VERSION)" - -echo "Publishing version v$VERSION..." - -make version -git add --all -git commit -m "v$VERSION" -git tag v$VERSION -git push origin main v$VERSION - -if grep -qE "alpha|beta" VERSION; then - gh release create v$VERSION --title=v$VERSION --prerelease --notes="" -else - gh release create v$VERSION --title=v$VERSION -fi - - -echo "✅ Published! version v$VERSION" diff --git a/deps/sqlite3/sqlite-rembed-source/sqlite-dist.toml b/deps/sqlite3/sqlite-rembed-source/sqlite-dist.toml deleted file mode 100644 index d3671aacab..0000000000 --- a/deps/sqlite3/sqlite-rembed-source/sqlite-dist.toml +++ /dev/null @@ -1,21 +0,0 @@ -[package] -name = "sqlite-rembed" -license = "MIT OR Apache" -homepage = "https://alexgarcia.xyz/sqlite-rembed" -repo = "https://github.com/asg017/sqlite-rembed" -description = "A SQLite extension for generating text embeddings from remote sources (OpenAI, Cohere, localhost, etc.)" -authors = ["Alex Garcia"] -git_tag_format = "v$VERSION" - -[targets] -github_releases = {} -sqlpkg = {} -spm = {} - -pip = {} -datasette = {} -sqlite_utils = {} - -npm = {} - -gem = { module_name = "SqliteRembed" } diff --git a/deps/sqlite3/sqlite-rembed-source/sqlite-rembed.h b/deps/sqlite3/sqlite-rembed-source/sqlite-rembed.h deleted file mode 100644 index b47a3f24c6..0000000000 --- a/deps/sqlite3/sqlite-rembed-source/sqlite-rembed.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _SQLITE_REMBED_H -#define _SQLITE_REMBED_H - -#ifdef __cplusplus -extern "C" { -#endif - -int sqlite3_rembed_init(sqlite3*, char**, const sqlite3_api_routines*); - -#ifdef __cplusplus -} /* end of the 'extern "C"' block */ -#endif - -#endif /* ifndef _SQLITE_REMBED_H */ diff --git a/deps/sqlite3/sqlite-rembed-source/src/clients.rs b/deps/sqlite3/sqlite-rembed-source/src/clients.rs deleted file mode 100644 index 5f83b9a386..0000000000 --- a/deps/sqlite3/sqlite-rembed-source/src/clients.rs +++ /dev/null @@ -1,516 +0,0 @@ -use sqlite_loadable::{Error, Result}; - -pub(crate) fn try_env_var(key: &str) -> Result { - std::env::var(key) - .map_err(|_| Error::new_message(format!("{} environment variable not define. Alternatively, pass in an API key with rembed_client_options", DEFAULT_OPENAI_API_KEY_ENV))) -} - -#[derive(Clone)] -pub struct OpenAiClient { - model: String, - url: String, - key: String, -} -const DEFAULT_OPENAI_URL: &str = "https://api.openai.com/v1/embeddings"; -const DEFAULT_OPENAI_API_KEY_ENV: &str = "OPENAI_API_KEY"; - -impl OpenAiClient { - pub fn new>( - model: S, - url: Option, - key: Option, - ) -> Result { - Ok(Self { - model: model.into(), - url: url.unwrap_or(DEFAULT_OPENAI_URL.to_owned()), - key: match key { - Some(key) => key, - None => try_env_var(DEFAULT_OPENAI_API_KEY_ENV)?, - }, - }) - } - pub fn infer_single(&self, input: &str) -> Result> { - let body = serde_json::json!({ - "input": input, - "model": self.model - }); - - let data: serde_json::Value = ureq::post(&self.url) - .set("Content-Type", "application/json") - .set("Authorization", format!("Bearer {}", self.key).as_str()) - .send_bytes( - serde_json::to_vec(&body) - .map_err(|error| { - Error::new_message(format!("Error serializing body to JSON: {error}")) - })? - .as_ref(), - ) - .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? - .into_json() - .map_err(|error| { - Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) - })?; - OpenAiClient::parse_single_response(data) - } - - pub fn parse_single_response(value: serde_json::Value) -> Result> { - value - .get("data") - .ok_or_else(|| Error::new_message("expected 'data' key in response body")) - .and_then(|v| { - v.get(0) - .ok_or_else(|| Error::new_message("expected 'data.0' path in response body")) - }) - .and_then(|v| { - v.get("embedding").ok_or_else(|| { - Error::new_message("expected 'data.0.embedding' path in response body") - }) - }) - .and_then(|v| { - v.as_array().ok_or_else(|| { - Error::new_message("expected 'data.0.embedding' path to be an array") - }) - }) - .and_then(|arr| { - arr.iter() - .map(|v| { - v.as_f64() - .ok_or_else(|| { - Error::new_message( - "expected 'data.0.embedding' array to contain floats", - ) - }) - .map(|f| f as f32) - }) - .collect() - }) - } -} - -#[derive(Clone)] -pub struct NomicClient { - model: String, - url: String, - key: String, -} -const DEFAULT_NOMIC_URL: &str = "https://api-atlas.nomic.ai/v1/embedding/text"; -const DEFAULT_NOMIC_API_KEY_ENV: &str = "NOMIC_API_KEY"; - -impl NomicClient { - pub fn new>( - model: S, - url: Option, - key: Option, - ) -> Result { - Ok(Self { - model: model.into(), - url: url.unwrap_or(DEFAULT_NOMIC_URL.to_owned()), - key: match key { - Some(key) => key, - None => try_env_var(DEFAULT_NOMIC_API_KEY_ENV)?, - }, - }) - } - - pub fn infer_single(&self, input: &str, input_type: Option<&str>) -> Result> { - let mut body = serde_json::Map::new(); - body.insert("texts".to_owned(), vec![input.to_owned()].into()); - body.insert("model".to_owned(), self.model.to_owned().into()); - - if let Some(input_type) = input_type { - body.insert("input_type".to_owned(), input_type.to_owned().into()); - } - - let data: serde_json::Value = ureq::post(&self.url) - .set("Content-Type", "application/json") - .set("Authorization", format!("Bearer {}", self.key).as_str()) - .send_bytes( - serde_json::to_vec(&body) - .map_err(|error| { - Error::new_message(format!("Error serializing body to JSON: {error}")) - })? - .as_ref(), - ) - .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? - .into_json() - .map_err(|error| { - Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) - })?; - NomicClient::parse_single_response(data) - } - pub fn parse_single_response(value: serde_json::Value) -> Result> { - value - .get("embeddings") - .ok_or_else(|| Error::new_message("expected 'embeddings' key in response body")) - .and_then(|v| { - v.get(0).ok_or_else(|| { - Error::new_message("expected 'embeddings.0' path in response body") - }) - }) - .and_then(|v| { - v.as_array().ok_or_else(|| { - Error::new_message("expected 'embeddings.0' path to be an array") - }) - }) - .and_then(|arr| { - arr.iter() - .map(|v| { - v.as_f64() - .ok_or_else(|| { - Error::new_message( - "expected 'embeddings.0' array to contain floats", - ) - }) - .map(|f| f as f32) - }) - .collect() - }) - } -} - -#[derive(Clone)] -pub struct CohereClient { - url: String, - model: String, - key: String, -} -const DEFAULT_COHERE_URL: &str = "https://api.cohere.com/v1/embed"; -const DEFAULT_COHERE_API_KEY_ENV: &str = "CO_API_KEY"; - -impl CohereClient { - pub fn new>( - model: S, - url: Option, - key: Option, - ) -> Result { - Ok(Self { - model: model.into(), - url: url.unwrap_or(DEFAULT_COHERE_URL.to_owned()), - key: match key { - Some(key) => key, - None => try_env_var(DEFAULT_COHERE_API_KEY_ENV)?, - }, - }) - } - - pub fn infer_single(&self, input: &str, input_type: Option<&str>) -> Result> { - let mut body = serde_json::Map::new(); - body.insert("texts".to_owned(), vec![input.to_owned()].into()); - body.insert("model".to_owned(), self.model.to_owned().into()); - - if let Some(input_type) = input_type { - body.insert("input_type".to_owned(), input_type.to_owned().into()); - } - - let data: serde_json::Value = ureq::post(&self.url) - .set("Content-Type", "application/json") - .set("Accept", "application/json") - .set("Authorization", format!("Bearer {}", self.key).as_str()) - .send_bytes( - serde_json::to_vec(&body) - .map_err(|error| { - Error::new_message(format!("Error serializing body to JSON: {error}")) - })? - .as_ref(), - ) - .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? - .into_json() - .map_err(|error| { - Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) - })?; - CohereClient::parse_single_response(data) - } - pub fn parse_single_response(value: serde_json::Value) -> Result> { - value - .get("embeddings") - .ok_or_else(|| Error::new_message("expected 'embeddings' key in response body")) - .and_then(|v| { - v.get(0).ok_or_else(|| { - Error::new_message("expected 'embeddings.0' path in response body") - }) - }) - .and_then(|v| { - v.as_array().ok_or_else(|| { - Error::new_message("expected 'embeddings.0' path to be an array") - }) - }) - .and_then(|arr| { - arr.iter() - .map(|v| { - v.as_f64() - .ok_or_else(|| { - Error::new_message( - "expected 'embeddings.0' array to contain floats", - ) - }) - .map(|f| f as f32) - }) - .collect() - }) - } -} -#[derive(Clone)] -pub struct JinaClient { - url: String, - model: String, - key: String, -} -const DEFAULT_JINA_URL: &str = "https://api.jina.ai/v1/embeddings"; -const DEFAULT_JINA_API_KEY_ENV: &str = "JINA_API_KEY"; - -impl JinaClient { - pub fn new>( - model: S, - url: Option, - key: Option, - ) -> Result { - Ok(Self { - model: model.into(), - url: url.unwrap_or(DEFAULT_JINA_URL.to_owned()), - key: match key { - Some(key) => key, - None => try_env_var(DEFAULT_JINA_API_KEY_ENV)?, - }, - }) - } - - pub fn infer_single(&self, input: &str) -> Result> { - let mut body = serde_json::Map::new(); - body.insert("input".to_owned(), vec![input.to_owned()].into()); - body.insert("model".to_owned(), self.model.to_owned().into()); - - let data: serde_json::Value = ureq::post(&self.url) - .set("Content-Type", "application/json") - .set("Accept", "application/json") - .set("Authorization", format!("Bearer {}", self.key).as_str()) - .send_bytes( - serde_json::to_vec(&body) - .map_err(|error| { - Error::new_message(format!("Error serializing body to JSON: {error}")) - })? - .as_ref(), - ) - .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? - .into_json() - .map_err(|error| { - Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) - })?; - JinaClient::parse_single_response(data) - } - pub fn parse_single_response(value: serde_json::Value) -> Result> { - value - .get("data") - .ok_or_else(|| Error::new_message("expected 'data' key in response body")) - .and_then(|v| { - v.get(0) - .ok_or_else(|| Error::new_message("expected 'data.0' path in response body")) - }) - .and_then(|v| { - v.get("embedding").ok_or_else(|| { - Error::new_message("expected 'data.0.embedding' path in response body") - }) - }) - .and_then(|v| { - v.as_array().ok_or_else(|| { - Error::new_message("expected 'data.0.embedding' path to be an array") - }) - }) - .and_then(|arr| { - arr.iter() - .map(|v| { - v.as_f64() - .ok_or_else(|| { - Error::new_message( - "expected 'data.0.embedding' array to contain floats", - ) - }) - .map(|f| f as f32) - }) - .collect() - }) - } -} -#[derive(Clone)] -pub struct MixedbreadClient { - url: String, - model: String, - key: String, -} -const DEFAULT_MIXEDBREAD_URL: &str = "https://api.mixedbread.ai/v1/embeddings/"; -const DEFAULT_MIXEDBREAD_API_KEY_ENV: &str = "MIXEDBREAD_API_KEY"; - -impl MixedbreadClient { - pub fn new>( - model: S, - url: Option, - key: Option, - ) -> Result { - Ok(Self { - model: model.into(), - url: url.unwrap_or(DEFAULT_MIXEDBREAD_URL.to_owned()), - key: match key { - Some(key) => key, - None => try_env_var(DEFAULT_MIXEDBREAD_API_KEY_ENV)?, - }, - }) - } - - pub fn infer_single(&self, input: &str) -> Result> { - let mut body = serde_json::Map::new(); - body.insert("input".to_owned(), vec![input.to_owned()].into()); - body.insert("model".to_owned(), self.model.to_owned().into()); - - let data: serde_json::Value = ureq::post(&self.url) - .set("Content-Type", "application/json") - .set("Accept", "application/json") - .set("Authorization", format!("Bearer {}", self.key).as_str()) - .send_bytes( - serde_json::to_vec(&body) - .map_err(|error| { - Error::new_message(format!("Error serializing body to JSON: {error}")) - })? - .as_ref(), - ) - .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? - .into_json() - .map_err(|error| { - Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) - })?; - JinaClient::parse_single_response(data) - } - pub fn parse_single_response(value: serde_json::Value) -> Result> { - value - .get("data") - .ok_or_else(|| Error::new_message("expected 'data' key in response body")) - .and_then(|v| { - v.get(0) - .ok_or_else(|| Error::new_message("expected 'data.0' path in response body")) - }) - .and_then(|v| { - v.get("embedding").ok_or_else(|| { - Error::new_message("expected 'data.0.embedding' path in response body") - }) - }) - .and_then(|v| { - v.as_array().ok_or_else(|| { - Error::new_message("expected 'data.0.embedding' path to be an array") - }) - }) - .and_then(|arr| { - arr.iter() - .map(|v| { - v.as_f64() - .ok_or_else(|| { - Error::new_message( - "expected 'data.0.embedding' array to contain floats", - ) - }) - .map(|f| f as f32) - }) - .collect() - }) - } -} - -#[derive(Clone)] -pub struct OllamaClient { - url: String, - model: String, -} -const DEFAULT_OLLAMA_URL: &str = "http://localhost:11434/api/embeddings"; -impl OllamaClient { - pub fn new>(model: S, url: Option) -> Self { - Self { - model: model.into(), - url: url.unwrap_or(DEFAULT_OLLAMA_URL.to_owned()), - } - } - - pub fn infer_single(&self, input: &str) -> Result> { - let mut body = serde_json::Map::new(); - body.insert("prompt".to_owned(), input.to_owned().into()); - body.insert("model".to_owned(), self.model.to_owned().into()); - - let data: serde_json::Value = ureq::post(&self.url) - .set("Content-Type", "application/json") - .send_bytes( - serde_json::to_vec(&body) - .map_err(|error| { - Error::new_message(format!("Error serializing body to JSON: {error}")) - })? - .as_ref(), - ) - .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? - .into_json() - .map_err(|error| { - Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) - })?; - OllamaClient::parse_single_response(data) - } - pub fn parse_single_response(value: serde_json::Value) -> Result> { - value - .get("embedding") - .ok_or_else(|| Error::new_message("expected 'embedding' key in response body")) - .and_then(|v| { - v.as_array() - .ok_or_else(|| Error::new_message("expected 'embedding' path to be an array")) - }) - .and_then(|arr| { - arr.iter() - .map(|v| { - v.as_f64() - .ok_or_else(|| { - Error::new_message("expected 'embedding' array to contain floats") - }) - .map(|f| f as f32) - }) - .collect() - }) - } -} - -#[derive(Clone)] -pub struct LlamafileClient { - url: String, -} -const DEFAULT_LLAMAFILE_URL: &str = "http://localhost:8080/embedding"; - -impl LlamafileClient { - pub fn new(url: Option) -> Self { - Self { - url: url.unwrap_or(DEFAULT_LLAMAFILE_URL.to_owned()), - } - } - - pub fn infer_single(&self, input: &str) -> Result> { - let mut body = serde_json::Map::new(); - body.insert("content".to_owned(), input.to_owned().into()); - - let data: serde_json::Value = ureq::post(&self.url) - .set("Content-Type", "application/json") - .send_bytes( - serde_json::to_vec(&body) - .map_err(|error| { - Error::new_message(format!("Error serializing body to JSON: {error}")) - })? - .as_ref(), - ) - .map_err(|error| Error::new_message(format!("Error sending HTTP request: {error}")))? - .into_json() - .map_err(|error| { - Error::new_message(format!("Error parsing HTTP response as JSON: {error}")) - })?; - OllamaClient::parse_single_response(data) - } -} - -#[derive(Clone)] -pub enum Client { - OpenAI(OpenAiClient), - Nomic(NomicClient), - Cohere(CohereClient), - Ollama(OllamaClient), - Llamafile(LlamafileClient), - Jina(JinaClient), - Mixedbread(MixedbreadClient), -} diff --git a/deps/sqlite3/sqlite-rembed-source/src/clients_vtab.rs b/deps/sqlite3/sqlite-rembed-source/src/clients_vtab.rs deleted file mode 100644 index 101c95c6f9..0000000000 --- a/deps/sqlite3/sqlite-rembed-source/src/clients_vtab.rs +++ /dev/null @@ -1,184 +0,0 @@ -use sqlite_loadable::table::UpdateOperation; -use sqlite_loadable::{api, prelude::*, Error}; -use sqlite_loadable::{ - api::ValueType, - table::{IndexInfo, VTab, VTabArguments, VTabCursor, VTabWriteable}, - BestIndexError, Result, -}; -use std::{cell::RefCell, collections::HashMap, marker::PhantomData, mem, os::raw::c_int, rc::Rc}; - -use crate::clients::MixedbreadClient; -use crate::{ - clients::{ - Client, CohereClient, JinaClient, LlamafileClient, NomicClient, OllamaClient, OpenAiClient, - }, - CLIENT_OPTIONS_POINTER_NAME, -}; - -enum Columns { - Name, - Options, -} -fn column(index: i32) -> Option { - match index { - 0 => Some(Columns::Name), - 1 => Some(Columns::Options), - _ => None, - } -} -#[repr(C)] -pub struct ClientsTable { - /// must be first - base: sqlite3_vtab, - clients: Rc>>, -} - -impl<'vtab> VTab<'vtab> for ClientsTable { - type Aux = Rc>>; - type Cursor = ClientsCursor<'vtab>; - - fn create( - db: *mut sqlite3, - aux: Option<&Self::Aux>, - args: VTabArguments, - ) -> Result<(String, Self)> { - Self::connect(db, aux, args) - } - fn connect( - _db: *mut sqlite3, - aux: Option<&Self::Aux>, - _args: VTabArguments, - ) -> Result<(String, ClientsTable)> { - let base: sqlite3_vtab = unsafe { mem::zeroed() }; - let clients = aux.expect("Required aux").to_owned(); - - let vtab = ClientsTable { base, clients }; - let sql = "create table x(name text primary key, options)".to_owned(); - - Ok((sql, vtab)) - } - fn destroy(&self) -> Result<()> { - Ok(()) - } - - fn best_index(&self, mut info: IndexInfo) -> core::result::Result<(), BestIndexError> { - info.set_estimated_cost(10000.0); - info.set_estimated_rows(10000); - info.set_idxnum(1); - Ok(()) - } - - fn open(&'vtab mut self) -> Result> { - ClientsCursor::new(self) - } -} - -impl<'vtab> VTabWriteable<'vtab> for ClientsTable { - fn update(&'vtab mut self, operation: UpdateOperation<'_>, _p_rowid: *mut i64) -> Result<()> { - match operation { - UpdateOperation::Delete(_) => { - return Err(Error::new_message( - "DELETE operations on rembed_clients is not supported yet", - )) - } - UpdateOperation::Update { _values } => { - return Err(Error::new_message( - "DELETE operations on rembed_clients is not supported yet", - )) - } - UpdateOperation::Insert { values, rowid: _ } => { - let name = api::value_text(&values[0])?; - let client = match api::value_type(&values[1]) { - ValueType::Text => match api::value_text(&values[1])? { - "openai" => Client::OpenAI(OpenAiClient::new(name, None, None)?), - "mixedbread" => { - Client::Mixedbread(MixedbreadClient::new(name, None, None)?) - } - "jina" => Client::Jina(JinaClient::new(name, None, None)?), - "nomic" => Client::Nomic(NomicClient::new(name, None, None)?), - "cohere" => Client::Cohere(CohereClient::new(name, None, None)?), - "ollama" => Client::Ollama(OllamaClient::new(name, None)), - "llamafile" => Client::Llamafile(LlamafileClient::new(None)), - text => { - return Err(Error::new_message(format!( - "'{text}' is not a valid rembed client." - ))) - } - }, - ValueType::Null => unsafe { - if let Some(client) = - api::value_pointer::(&values[1], CLIENT_OPTIONS_POINTER_NAME) - { - (*client).clone() - } else { - return Err(Error::new_message("client options required")); - } - }, - _ => return Err(Error::new_message("client options required")), - }; - self.clients.borrow_mut().insert(name.to_owned(), client); - } - } - Ok(()) - } -} - -#[repr(C)] -pub struct ClientsCursor<'vtab> { - /// Base class. Must be first - base: sqlite3_vtab_cursor, - keys: Vec, - rowid: i64, - phantom: PhantomData<&'vtab ClientsTable>, -} -impl ClientsCursor<'_> { - fn new(table: &mut ClientsTable) -> Result { - let base: sqlite3_vtab_cursor = unsafe { mem::zeroed() }; - let c = table.clients.borrow(); - let keys = c.keys().map(|k| k.to_string()).collect(); - let cursor = ClientsCursor { - base, - keys, - rowid: 0, - phantom: PhantomData, - }; - Ok(cursor) - } -} - -impl VTabCursor for ClientsCursor<'_> { - fn filter( - &mut self, - _idx_num: c_int, - _idx_str: Option<&str>, - _values: &[*mut sqlite3_value], - ) -> Result<()> { - Ok(()) - } - - fn next(&mut self) -> Result<()> { - self.rowid += 1; - Ok(()) - } - - fn eof(&self) -> bool { - (self.rowid as usize) >= self.keys.len() - } - - fn column(&self, context: *mut sqlite3_context, i: c_int) -> Result<()> { - let key = self - .keys - .get(self.rowid as usize) - .expect("Internal rembed_clients logic error"); - match column(i) { - Some(Columns::Name) => api::result_text(context, key)?, - Some(Columns::Options) => (), - None => (), - }; - Ok(()) - } - - fn rowid(&self) -> Result { - Ok(self.rowid) - } -} diff --git a/deps/sqlite3/sqlite-rembed-source/src/lib.rs b/deps/sqlite3/sqlite-rembed-source/src/lib.rs deleted file mode 100644 index 192452526e..0000000000 --- a/deps/sqlite3/sqlite-rembed-source/src/lib.rs +++ /dev/null @@ -1,169 +0,0 @@ -mod clients; -mod clients_vtab; - -use std::cell::RefCell; -use std::collections::HashMap; -use std::rc::Rc; - -use clients::{Client, CohereClient, LlamafileClient, NomicClient, OllamaClient, OpenAiClient}; -use clients_vtab::ClientsTable; -use sqlite_loadable::{ - api, define_scalar_function, define_scalar_function_with_aux, define_virtual_table_writeablex, - prelude::*, Error, Result, -}; -use zerocopy::AsBytes; - -const FLOAT32_VECTOR_SUBTYPE: u8 = 223; -const CLIENT_OPTIONS_POINTER_NAME: &[u8] = b"sqlite-rembed-client-options\0"; - -pub fn rembed_version(context: *mut sqlite3_context, _values: &[*mut sqlite3_value]) -> Result<()> { - api::result_text(context, format!("v{}", env!("CARGO_PKG_VERSION")))?; - Ok(()) -} - -pub fn rembed_debug(context: *mut sqlite3_context, _values: &[*mut sqlite3_value]) -> Result<()> { - api::result_text( - context, - format!( - "Version: v{} -Source: {} -", - env!("CARGO_PKG_VERSION"), - env!("GIT_HASH") - ), - )?; - Ok(()) -} - -pub fn rembed_client_options( - context: *mut sqlite3_context, - values: &[*mut sqlite3_value], -) -> Result<()> { - if (values.len() % 2) != 0 { - return Err(Error::new_message( - "Must have an even number of arguments to rembed_client_options, as key/value pairs.", - )); - } - let mut options: HashMap = HashMap::new(); - let mut format: Option = None; - for pair in values.chunks(2) { - let key = api::value_text(&pair[0])?; - let value = api::value_text(&pair[1])?; - if key == "format" { - format = Some(value.to_owned()); - } else { - options.insert(key.to_owned(), value.to_owned()); - } - } - - let format = match format { - Some(format) => format, - None => { - return Err(Error::new_message("'format' key is required.")); - } - }; - let client: Client = match format.as_str() { - "openai" => Client::OpenAI(OpenAiClient::new( - options - .get("model") - .ok_or_else(|| Error::new_message("'model' option is required"))?, - options.get("url").cloned(), - options.get("key").cloned(), - )?), - "nomic" => Client::Nomic(NomicClient::new( - options - .get("model") - .ok_or_else(|| Error::new_message("'model' option is required"))?, - options.get("url").cloned(), - options.get("key").cloned(), - )?), - "cohere" => Client::Cohere(CohereClient::new( - options - .get("model") - .ok_or_else(|| Error::new_message("'model' option is required"))?, - options.get("url").cloned(), - options.get("key").cloned(), - )?), - "ollama" => Client::Ollama(OllamaClient::new( - options - .get("model") - .ok_or_else(|| Error::new_message("'model' option is required"))?, - options.get("url").cloned(), - )), - "llamafile" => Client::Llamafile(LlamafileClient::new(options.get("url").cloned())), - format => return Err(Error::new_message(format!("Unknown format '{format}'"))), - }; - - api::result_pointer(context, CLIENT_OPTIONS_POINTER_NAME, client); - - Ok(()) -} -pub fn rembed( - context: *mut sqlite3_context, - values: &[*mut sqlite3_value], - clients: &Rc>>, -) -> Result<()> { - let client_name = api::value_text(&values[0])?; - let input = api::value_text(&values[1])?; - let x = clients.borrow(); - let client = x.get(client_name).ok_or_else(|| { - Error::new_message(format!( - "Client with name {client_name} was not registered with rembed_clients." - )) - })?; - - let embedding = match client { - Client::OpenAI(client) => client.infer_single(input)?, - Client::Jina(client) => client.infer_single(input)?, - Client::Mixedbread(client) => client.infer_single(input)?, - Client::Ollama(client) => client.infer_single(input)?, - Client::Llamafile(client) => client.infer_single(input)?, - Client::Nomic(client) => { - let input_type = values.get(2).and_then(|v| api::value_text(v).ok()); - client.infer_single(input, input_type)? - } - Client::Cohere(client) => { - let input_type = values.get(2).and_then(|v| api::value_text(v).ok()); - client.infer_single(input, input_type)? - } - }; - - api::result_blob(context, embedding.as_bytes()); - api::result_subtype(context, FLOAT32_VECTOR_SUBTYPE); - Ok(()) -} - -#[sqlite_entrypoint] -pub fn sqlite3_rembed_init(db: *mut sqlite3) -> Result<()> { - let flags = FunctionFlags::UTF8 - | FunctionFlags::DETERMINISTIC - | unsafe { FunctionFlags::from_bits_unchecked(0x001000000) }; - - let c = Rc::new(RefCell::new(HashMap::new())); - - define_scalar_function( - db, - "rembed_version", - 0, - rembed_version, - FunctionFlags::UTF8 | FunctionFlags::DETERMINISTIC, - )?; - define_scalar_function( - db, - "rembed_debug", - 0, - rembed_debug, - FunctionFlags::UTF8 | FunctionFlags::DETERMINISTIC, - )?; - define_scalar_function_with_aux(db, "rembed", 2, rembed, flags, Rc::clone(&c))?; - define_scalar_function_with_aux(db, "rembed", 3, rembed, flags, Rc::clone(&c))?; - define_scalar_function( - db, - "rembed_client_options", - -1, - rembed_client_options, - flags, - )?; - define_virtual_table_writeablex::(db, "rembed_clients", Some(Rc::clone(&c)))?; - Ok(()) -} diff --git a/deps/sqlite3/sqlite-rembed-source/test.sql b/deps/sqlite3/sqlite-rembed-source/test.sql deleted file mode 100644 index d1e8e85151..0000000000 --- a/deps/sqlite3/sqlite-rembed-source/test.sql +++ /dev/null @@ -1,37 +0,0 @@ -.load dist/debug/rembed0 -.bail on -.mode box -.header on -.timer on -.echo on - -INSERT INTO temp.rembed_clients(name, options) VALUES - ('text-embedding-3-small','openai'), - ('jina-embeddings-v2-base-en','jina'), - ('mixedbread-ai/mxbai-embed-large-v1','mixedbread'), - ('nomic-embed-text-v1.5', 'nomic'), - ('embed-english-v3.0', 'cohere'), - ('snowflake-arctic-embed:s', 'ollama'), - ('llamafile', 'llamafile'), - ( - 'mxbai-embed-large-v1-f16', - rembed_client_options( - 'format', 'llamafile', - --'url', 'http://mm1:8080/v1/embeddings' - 'url', 'http://mm1:8080/embedding' - ) - ); - -select length(rembed('mixedbread-ai/mxbai-embed-large-v1', 'obama the person')); -.exit -select length(rembed('jina-embeddings-v2-base-en', 'obama the person')); - -.exit - -select length(rembed('text-embedding-3-small', 'obama the person')); -select length(rembed('llamafile', 'obama the person')); -select length(rembed('snowflake-arctic-embed:s', 'obama the person')); -select length(rembed('embed-english-v3.0', 'obama the person', 'search_document')); -select length(rembed('mxbai-embed-large-v1-f16', 'obama the person')); - - diff --git a/doc/sqlite-rembed-integration.md b/doc/sqlite-rembed-integration.md index d05a51e539..2dba500bda 100644 --- a/doc/sqlite-rembed-integration.md +++ b/doc/sqlite-rembed-integration.md @@ -169,9 +169,6 @@ cd deps && make sqlite3 # Verify symbol exists nm deps/sqlite3/libsqlite_rembed.a | grep sqlite3_rembed_init - -# Test compilation (without ClickHouse) -make PROXYSQLCLICKHOUSE=0 ``` ### Functional Testing @@ -208,7 +205,6 @@ VALUES ('test', 'ollama', 'nomic-embed-text'); 1. **Missing clang**: Install `clang` and `libclang-dev` 2. **Rust not found**: Install Rust toolchain via `rustup` 3. **SQLite headers**: Ensure `sqlite-amalgamation` is extracted -4. **ClickHouse errors**: Build with `PROXYSQLCLICKHOUSE=0` ### Runtime Issues 1. **Client not found**: Verify `temp.rembed_clients` entry exists @@ -232,4 +228,4 @@ VALUES ('test', 'ollama', 'nomic-embed-text'); - sqlite-rembed: Apache 2.0 / MIT (see `deps/sqlite3/sqlite-rembed-source/LICENSE-*`) - ProxySQL: GPL v3 -- Integration code: Same as ProxySQL \ No newline at end of file +- Integration code: Same as ProxySQL From 194b71889b3a187c62a5b5cedee0ab8e7132f6ee Mon Sep 17 00:00:00 2001 From: Rene Cannao Date: Mon, 22 Dec 2025 19:55:18 +0000 Subject: [PATCH 3/5] Update sqlite-rembed integration documentation for tar.gz packaging - Update Integration Architecture section to include source packaging step - Add Packaging subsection detailing tar.gz distribution pattern - Update Build Process to mention tar.gz extraction - Update Code Changes Summary for deps/Makefile tar.gz handling - Update Build Verification instructions to use cleanpart and verify extraction - Add Source Distribution reference section --- doc/sqlite-rembed-integration.md | 37 +++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/doc/sqlite-rembed-integration.md b/doc/sqlite-rembed-integration.md index 2dba500bda..6164f932b3 100644 --- a/doc/sqlite-rembed-integration.md +++ b/doc/sqlite-rembed-integration.md @@ -17,10 +17,11 @@ This document describes the integration of the `sqlite-rembed` Rust SQLite exten The integration follows the same pattern as `sqlite-vec` (vector search extension): ### Static Linking Approach -1. **Rust static library**: `libsqlite_rembed.a` built from Rust source -2. **Build system integration**: Makefile targets for Rust compilation -3. **Auto-registration**: `sqlite3_auto_extension()` in ProxySQL initialization -4. **Single binary deployment**: No external dependencies at runtime +1. **Source packaging**: `sqlite-rembed-0.0.1-alpha.9.tar.gz` included in git repository +2. **Rust static library**: `libsqlite_rembed.a` built from extracted source +3. **Build system integration**: Makefile targets for tar.gz extraction and Rust compilation +4. **Auto-registration**: `sqlite3_auto_extension()` in ProxySQL initialization +5. **Single binary deployment**: No external dependencies at runtime ### Technical Implementation @@ -47,17 +48,25 @@ libclang-dev ### Build Process 1. Rust toolchain detection in `deps/Makefile` -2. Static library build with `cargo build --release --features=sqlite-loadable/static --lib` -3. Linking into `libproxysql.a` via `lib/Makefile` -4. Final binary linking via `src/Makefile` +2. Extract `sqlite-rembed-0.0.1-alpha.9.tar.gz` from GitHub release +3. Static library build with `cargo build --release --features=sqlite-loadable/static --lib` +4. Linking into `libproxysql.a` via `lib/Makefile` +5. Final binary linking via `src/Makefile` + +### Packaging +Following ProxySQL's dependency packaging pattern, sqlite-rembed is distributed as a compressed tar.gz file: +- `deps/sqlite3/sqlite-rembed-0.0.1-alpha.9.tar.gz` - Official GitHub release tarball +- Extracted during build via `tar -zxf sqlite-rembed-0.0.1-alpha.9.tar.gz` +- Clean targets remove extracted source directories ## Code Changes Summary ### 1. `deps/Makefile` - Added Rust toolchain detection (`rustc`, `cargo`) - SQLite environment variables for sqlite-rembed build -- New target: `sqlite3/libsqlite_rembed.a` +- New target: `sqlite3/libsqlite_rembed.a` that extracts from tar.gz and builds - Added dependency to `sqlite3` target +- Clean targets remove `sqlite-rembed-*/` and `sqlite-rembed-source/` directories ### 2. `lib/Makefile` - Added `SQLITE_REMBED_LIB` variable pointing to static library @@ -164,8 +173,12 @@ The extension provides SQLite error messages for: ### Build Verification ```bash -# Verify Rust library builds -cd deps && make sqlite3 +# Clean and rebuild with tar.gz extraction +cd deps && make cleanpart && make sqlite3 + +# Verify tar.gz extraction and Rust library build +ls deps/sqlite3/sqlite-rembed-source/ +ls deps/sqlite3/libsqlite_rembed.a # Verify symbol exists nm deps/sqlite3/libsqlite_rembed.a | grep sqlite3_rembed_init @@ -218,6 +231,10 @@ VALUES ('test', 'ollama', 'nomic-embed-text'); - [SQLite Loadable Extensions](https://www.sqlite.org/loadext.html) - [Rust C FFI](https://doc.rust-lang.org/nomicon/ffi.html) +### Source Distribution +- `deps/sqlite3/sqlite-rembed-0.0.1-alpha.9.tar.gz` - Official GitHub release tarball +- Extracted to `deps/sqlite3/sqlite-rembed-source/` during build + ## Maintainers - Integration: [Your Name/Team] From e75bd7c84a6f4c320ecd1e46730894c20a2ff104 Mon Sep 17 00:00:00 2001 From: Rene Cannao Date: Tue, 23 Dec 2025 07:04:40 +0000 Subject: [PATCH 4/5] Add comprehensive sqlite-rembed examples and documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds a complete set of well-documented examples and test scripts for the sqlite-rembed integration in ProxySQL. The sqlite-rembed extension enables text embedding generation via HTTP API calls directly within SQL queries, complementing the sqlite-vec extension for vector similarity search. Key additions: 1. sqlite-rembed-examples.sql - Primary SQL demonstration file - Standalone SQL file with 8 phases of examples - Demonstrates complete AI pipeline: client config → embedding gen → storage → search - Includes proper subquery pattern for similarity search JOINs (vec0 requirement) - Well-documented with clear explanations for each phase 2. sqlite-rembed-test.sh - Comprehensive test suite - 9-phase test covering all integration aspects - Color-coded output with test result tracking - Error handling and edge case testing 3. SQLITE-REMBED-TEST-README.md - Complete documentation - Detailed test suite documentation - Usage instructions, troubleshooting, CI/CD integration examples 4. Supporting bash scripts for different use cases - sqlite-rembed-examples.sh - Phase-by-phase interactive examples - sqlite-rembed-demo.sh - Single-session demonstration script Security considerations: - All hardcoded API keys replaced with YOUR_API_KEY placeholder - Clear comments instructing users to replace with actual credentials - Synthetic OpenAI endpoint used as example (public test endpoint) Technical details: - Embedding dimensions: 768 (nomic-embed-text-v1.5 model) - Embedding size: 3072 bytes (768 × 4 bytes per float) - Similarity search pattern: Uses subqueries for JOIN compatibility with vec0 - Client configuration: temp.rembed_clients virtual table (per-connection) The examples provide a solid baseline for building applications that leverage sqlite-rembed and sqlite-vec in ProxySQL for AI-powered applications. --- doc/SQLITE-REMBED-TEST-README.md | 245 +++++++++++++ doc/sqlite-rembed-demo.sh | 351 +++++++++++++++++++ doc/sqlite-rembed-examples.sh | 329 ++++++++++++++++++ doc/sqlite-rembed-examples.sql | 218 ++++++++++++ doc/sqlite-rembed-test.sh | 574 +++++++++++++++++++++++++++++++ 5 files changed, 1717 insertions(+) create mode 100644 doc/SQLITE-REMBED-TEST-README.md create mode 100755 doc/sqlite-rembed-demo.sh create mode 100755 doc/sqlite-rembed-examples.sh create mode 100644 doc/sqlite-rembed-examples.sql create mode 100755 doc/sqlite-rembed-test.sh diff --git a/doc/SQLITE-REMBED-TEST-README.md b/doc/SQLITE-REMBED-TEST-README.md new file mode 100644 index 0000000000..a2a472227e --- /dev/null +++ b/doc/SQLITE-REMBED-TEST-README.md @@ -0,0 +1,245 @@ +# sqlite-rembed Integration Test Suite + +## Overview + +This test suite comprehensively validates the integration of `sqlite-rembed` (Rust SQLite extension for text embedding generation) into ProxySQL. The tests verify the complete AI pipeline from client registration to embedding generation and vector similarity search. + +## Prerequisites + +### System Requirements +- **ProxySQL** compiled with `sqlite-rembed` and `sqlite-vec` extensions +- **MySQL client** (`mysql` command line tool) +- **Bash** shell environment +- **Network access** to embedding API endpoint (or local Ollama/OpenAI API) + +### ProxySQL Configuration +Ensure ProxySQL is running with SQLite3 server enabled: +```bash +cd /home/rene/proxysql-vec/src +./proxysql --sqlite3-server +``` + +### Test Configuration +The test script uses default connection parameters: +- Host: `127.0.0.1` +- Port: `6030` (default SQLite3 server port) +- User: `root` +- Password: `root` + +Modify these in the script if your configuration differs. + +## Test Suite Structure + +The test suite is organized into 9 phases, each testing specific components: + +### Phase 1: Basic Connectivity and Function Verification +- ✅ ProxySQL connection +- ✅ Database listing +- ✅ `sqlite-vec` function availability +- ✅ `sqlite-rembed` function registration +- ✅ `temp.rembed_clients` virtual table existence + +### Phase 2: Client Configuration +- ✅ Create embedding API client with `rembed_client_options()` +- ✅ Verify client registration in `temp.rembed_clients` +- ✅ Test `rembed_client_options` function + +### Phase 3: Embedding Generation Tests +- ✅ Generate embeddings for short and long text +- ✅ Verify embedding data type (BLOB) and size (768 dimensions × 4 bytes) +- ✅ Error handling for non-existent clients + +### Phase 4: Table Creation and Data Storage +- ✅ Create regular table for document storage +- ✅ Create virtual vector table using `vec0` +- ✅ Insert test documents with diverse content + +### Phase 5: Embedding Generation and Storage +- ✅ Generate embeddings for all documents +- ✅ Store embeddings in vector table +- ✅ Verify embedding count matches document count +- ✅ Check embedding storage format + +### Phase 6: Similarity Search Tests +- ✅ Exact self-match (document with itself, distance = 0.0) +- ✅ Similarity search with query text +- ✅ Verify result ordering by ascending distance + +### Phase 7: Edge Cases and Error Handling +- ✅ Empty text input +- ✅ Very long text input +- ✅ SQL injection attempt safety + +### Phase 8: Performance and Concurrency +- ✅ Sequential embedding generation timing +- ✅ Basic performance validation (< 10 seconds for 3 embeddings) + +### Phase 9: Cleanup and Final Verification +- ✅ Clean up test tables +- ✅ Verify no test artifacts remain + +## Usage + +### Running the Full Test Suite +```bash +cd /home/rene/proxysql-vec/doc +./sqlite-rembed-test.sh +``` + +### Expected Output +The script provides color-coded output: +- 🟢 **Green**: Test passed +- 🔴 **Red**: Test failed +- 🔵 **Blue**: Information and headers +- 🟡 **Yellow**: Test being executed + +### Exit Codes +- `0`: All tests passed +- `1`: One or more tests failed +- `2`: Connection issues or missing dependencies + +## Configuration + +### Modifying Connection Parameters +Edit the following variables in `sqlite-rembed-test.sh`: +```bash +PROXYSQL_HOST="127.0.0.1" +PROXYSQL_PORT="6030" +MYSQL_USER="root" +MYSQL_PASS="root" +``` + +### API Configuration +The test uses a synthetic OpenAI endpoint by default. Modify these variables to use your own API: +```bash +API_CLIENT_NAME="test-client-$(date +%s)" +API_FORMAT="openai" +API_URL="https://api.synthetic.new/openai/v1/embeddings" +API_KEY="YOUR_API_KEY" # Replace with your actual API key +API_MODEL="hf:nomic-ai/nomic-embed-text-v1.5" +VECTOR_DIMENSIONS=768 +``` + +For other providers (Ollama, Cohere, Nomic), adjust the format and URL accordingly. + +## Test Data + +### Sample Documents +The test creates 4 sample documents: +1. **Machine Learning** - "Machine learning algorithms improve with more training data..." +2. **Database Systems** - "Database management systems efficiently store, retrieve..." +3. **Artificial Intelligence** - "AI enables computers to perform tasks typically..." +4. **Vector Databases** - "Vector databases enable similarity search for embeddings..." + +### Query Texts +Test searches use: +- Self-match: Document 1 with itself +- Query: "data science and algorithms" + +## Troubleshooting + +### Common Issues + +#### 1. Connection Failed +``` +Error: Cannot connect to ProxySQL at 127.0.0.1:6030 +``` +**Solution**: Ensure ProxySQL is running with `--sqlite3-server` flag. + +#### 2. Missing Functions +``` +ERROR 1045 (28000): no such function: rembed +``` +**Solution**: Verify `sqlite-rembed` was compiled and linked into ProxySQL binary. + +#### 3. API Errors +``` +Error from embedding API +``` +**Solution**: Check network connectivity and API credentials. + +#### 4. Vector Table Errors +``` +ERROR 1045 (28000): A LIMIT or 'k = ?' constraint is required on vec0 knn queries. +``` +**Solution**: All `sqlite-vec` similarity queries require `LIMIT` clause. + +### Debug Mode +For detailed debugging, run with trace: +```bash +bash -x ./sqlite-rembed-test.sh +``` + +## Integration with CI/CD + +The test script can be integrated into CI/CD pipelines: + +```yaml +# Example GitHub Actions workflow +name: sqlite-rembed Tests +on: [push, pull_request] +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Build ProxySQL with sqlite-rembed + run: | + cd deps && make cleanpart && make sqlite3 + cd ../lib && make + cd ../src && make + - name: Start ProxySQL + run: | + cd src && ./proxysql --sqlite3-server & + sleep 5 + - name: Run Integration Tests + run: | + cd doc && ./sqlite-rembed-test.sh +``` + +## Extending the Test Suite + +### Adding New Tests +1. Add new test function following existing pattern +2. Update phase header and test count +3. Add to appropriate phase section + +### Testing Different Providers +Modify the API configuration block to test: +- **Ollama**: Use `format='ollama'` and local URL +- **Cohere**: Use `format='cohere'` and appropriate model +- **Nomic**: Use `format='nomic'` and Nomic API endpoint + +### Performance Testing +Extend Phase 8 for: +- Concurrent embedding generation +- Batch processing tests +- Memory usage monitoring + +## Results Interpretation + +### Success Criteria +- All connectivity tests pass +- Embeddings generated with correct dimensions +- Vector search returns ordered results +- No test artifacts remain after cleanup + +### Performance Benchmarks +- Embedding generation: < 3 seconds per request (network-dependent) +- Similarity search: < 100ms for small datasets +- Memory: Stable during sequential operations + +## References + +- [sqlite-rembed GitHub](https://github.com/asg017/sqlite-rembed) +- [sqlite-vec Documentation](./SQLite3-Server.md) +- [ProxySQL SQLite3 Server](./SQLite3-Server.md) +- [Integration Documentation](./sqlite-rembed-integration.md) + +## License + +This test suite is part of the ProxySQL project and follows the same licensing terms. + +--- +*Last Updated: $(date)* +*Test Suite Version: 1.0* \ No newline at end of file diff --git a/doc/sqlite-rembed-demo.sh b/doc/sqlite-rembed-demo.sh new file mode 100755 index 0000000000..f65656a074 --- /dev/null +++ b/doc/sqlite-rembed-demo.sh @@ -0,0 +1,351 @@ +#!/bin/bash + +############################################################################### +# sqlite-rembed Demonstration Script +# +# This script demonstrates the usage of sqlite-rembed integration in ProxySQL +# using a single MySQL session to maintain connection state. +# +# The script creates a SQL file with all demonstration queries and executes +# them in a single session, ensuring temp.rembed_clients virtual table +# maintains its state throughout the demonstration. +# +# Requirements: +# - ProxySQL running with --sqlite3-server flag on port 6030 +# - MySQL client installed +# - Network access to embedding API endpoint +# - Valid API credentials for embedding generation +# +# Usage: ./sqlite-rembed-demo.sh +# +# Author: Generated from integration testing session +# Date: $(date) +############################################################################### + +set -uo pipefail + +# Configuration - modify these values as needed +PROXYSQL_HOST="127.0.0.1" +PROXYSQL_PORT="6030" +MYSQL_USER="root" +MYSQL_PASS="root" + +# API Configuration - using synthetic OpenAI endpoint for demonstration +# IMPORTANT: Replace YOUR_API_KEY with your actual API key +API_CLIENT_NAME="demo-client-$(date +%s)" +API_FORMAT="openai" +API_URL="https://api.synthetic.new/openai/v1/embeddings" +API_KEY="YOUR_API_KEY" # Replace with your actual API key +API_MODEL="hf:nomic-ai/nomic-embed-text-v1.5" +VECTOR_DIMENSIONS=768 # Based on model output + +# Color codes for output readability +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +GREEN='\033[0;32m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +# Text formatting +BOLD='\033[1m' +UNDERLINE='\033[4m' + +############################################################################### +# Helper Functions +############################################################################### + +print_header() { + echo -e "\n${BLUE}${BOLD}${UNDERLINE}$1${NC}\n" +} + +print_step() { + echo -e "${YELLOW}➤ Step:$NC $1" +} + +print_query() { + echo -e "${YELLOW}SQL Query:$NC" + echo "$1" + echo "" +} + +print_success() { + echo -e "${GREEN}✓$NC $1" +} + +print_error() { + echo -e "${RED}✗$NC $1" +} + +# Create SQL file with demonstration queries +create_demo_sql() { + local sql_file="$1" + + cat > "$sql_file" << EOF +-------------------------------------------------------------------- +-- sqlite-rembed Demonstration Script +-- Generated: $(date) +-- ProxySQL: ${PROXYSQL_HOST}:${PROXYSQL_PORT} +-- API Endpoint: ${API_URL} +-------------------------------------------------------------------- + +-------------------------------------------------------------------- +-- Phase 1: Basic Connectivity and Function Verification +-------------------------------------------------------------------- +-- This phase verifies basic connectivity and confirms that sqlite-rembed +-- and sqlite-vec functions are properly registered in ProxySQL. + +SELECT 'Phase 1: Basic Connectivity' as phase; + +-- Basic ProxySQL connectivity +SELECT 1 as connectivity_test; + +-- Available databases +SHOW DATABASES; + +-- Available sqlite-vec functions +SELECT name FROM pragma_function_list WHERE name LIKE 'vec%' LIMIT 5; + +-- Available sqlite-rembed functions +SELECT name FROM pragma_function_list WHERE name LIKE 'rembed%' ORDER BY name; + +-- Check temp.rembed_clients virtual table exists +SELECT name FROM sqlite_master WHERE name='rembed_clients' AND type='table'; + +-------------------------------------------------------------------- +-- Phase 2: Client Configuration +-------------------------------------------------------------------- +-- This phase demonstrates how to configure an embedding API client using +-- the temp.rembed_clients virtual table and rembed_client_options() function. + +SELECT 'Phase 2: Client Configuration' as phase; + +-- Create embedding API client +INSERT INTO temp.rembed_clients(name, options) VALUES + ('$API_CLIENT_NAME', + rembed_client_options( + 'format', '$API_FORMAT', + 'url', '$API_URL', + 'key', '$API_KEY', + 'model', '$API_MODEL' + ) + ); + +-- Verify client registration +SELECT name FROM temp.rembed_clients; + +-- View client configuration details +SELECT name, + json_extract(options, '\$.format') as format, + json_extract(options, '\$.model') as model +FROM temp.rembed_clients; + +-------------------------------------------------------------------- +-- Phase 3: Embedding Generation +-------------------------------------------------------------------- +-- This phase demonstrates text embedding generation using the rembed() function. +-- Embeddings are generated via HTTP request to the configured API endpoint. + +SELECT 'Phase 3: Embedding Generation' as phase; + +-- Generate embedding for 'Hello world' and check size +SELECT length(rembed('$API_CLIENT_NAME', 'Hello world')) as embedding_size_bytes; + +-- Generate embedding for longer technical text +SELECT length(rembed('$API_CLIENT_NAME', 'Machine learning algorithms improve with more training data and computational power.')) as embedding_size_bytes; + +-- Generate embedding for empty text (edge case) +SELECT length(rembed('$API_CLIENT_NAME', '')) as empty_embedding_size; + +-------------------------------------------------------------------- +-- Phase 4: Table Creation and Data Storage +-------------------------------------------------------------------- +-- This phase demonstrates creating regular tables for document storage +-- and virtual vector tables for embedding storage using sqlite-vec. + +SELECT 'Phase 4: Table Creation and Data Storage' as phase; + +-- Create regular table for document storage +CREATE TABLE IF NOT EXISTS demo_documents ( + id INTEGER PRIMARY KEY, + title TEXT NOT NULL, + content TEXT NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +-- Create virtual vector table for embeddings +CREATE VIRTUAL TABLE IF NOT EXISTS demo_embeddings USING vec0( + embedding float[$VECTOR_DIMENSIONS] +); + +-- Insert sample documents +INSERT OR IGNORE INTO demo_documents (id, title, content) VALUES + (1, 'Machine Learning', 'Machine learning algorithms improve with more training data and computational power.'), + (2, 'Database Systems', 'Database management systems efficiently store, retrieve, and manipulate structured data.'), + (3, 'Artificial Intelligence', 'AI enables computers to perform tasks typically requiring human intelligence.'), + (4, 'Vector Databases', 'Vector databases enable similarity search for embeddings generated by machine learning models.'); + +-- Verify document insertion +SELECT id, title, length(content) as content_length FROM demo_documents; + +-------------------------------------------------------------------- +-- Phase 5: Embedding Generation and Storage +-------------------------------------------------------------------- +-- This phase demonstrates generating embeddings for all documents and +-- storing them in the vector table for similarity search. + +SELECT 'Phase 5: Embedding Generation and Storage' as phase; + +-- Generate and store embeddings for all documents +INSERT INTO demo_embeddings(rowid, embedding) +SELECT id, rembed('$API_CLIENT_NAME', content) +FROM demo_documents; + +-- Verify embedding count +SELECT COUNT(*) as total_embeddings FROM demo_embeddings; + +-- Check embedding storage format +SELECT rowid, length(embedding) as embedding_size_bytes +FROM demo_embeddings LIMIT 2; + +-------------------------------------------------------------------- +-- Phase 6: Similarity Search +-------------------------------------------------------------------- +-- This phase demonstrates similarity search using the stored embeddings. +-- Queries show exact matches, similar documents, and distance metrics. + +SELECT 'Phase 6: Similarity Search' as phase; + +-- Exact self-match (should have distance 0.0) +SELECT d.title, d.content, e.distance +FROM demo_embeddings e +JOIN demo_documents d ON e.rowid = d.id +WHERE e.embedding MATCH rembed('$API_CLIENT_NAME', + 'Machine learning algorithms improve with more training data and computational power.') +LIMIT 3; + +-- Similarity search with query text +SELECT d.title, d.content, e.distance +FROM demo_embeddings e +JOIN demo_documents d ON e.rowid = d.id +WHERE e.embedding MATCH rembed('$API_CLIENT_NAME', + 'data science and algorithms') +LIMIT 3; + +-- Ordered similarity search (closest matches first) +SELECT d.title, e.distance +FROM demo_embeddings e +JOIN demo_documents d ON e.rowid = d.id +WHERE e.embedding MATCH rembed('$API_CLIENT_NAME', + 'artificial intelligence and neural networks') +ORDER BY e.distance ASC +LIMIT 3; + +-------------------------------------------------------------------- +-- Phase 7: Edge Cases and Error Handling +-------------------------------------------------------------------- +-- This phase demonstrates error handling and edge cases. + +SELECT 'Phase 7: Edge Cases and Error Handling' as phase; + +-- Error: Non-existent client +SELECT rembed('non-existent-client', 'test text'); + +-- Very long text input +SELECT rembed('$API_CLIENT_NAME', + '$(printf '%0.sA' {1..5000})'); + +-------------------------------------------------------------------- +-- Phase 8: Cleanup and Summary +-------------------------------------------------------------------- +-- Cleaning up demonstration tables and providing summary. + +SELECT 'Phase 8: Cleanup' as phase; + +-- Clean up demonstration tables +DROP TABLE IF EXISTS demo_documents; +DROP TABLE IF EXISTS demo_embeddings; + +SELECT 'Demonstration Complete' as phase; +SELECT 'All sqlite-rembed integration examples have been executed successfully.' as summary; +SELECT 'The demonstration covered:' as coverage; +SELECT ' • Client configuration with temp.rembed_clients' as item; +SELECT ' • Embedding generation via HTTP API' as item; +SELECT ' • Vector table creation and data storage' as item; +SELECT ' • Similarity search with generated embeddings' as item; +SELECT ' • Error handling and edge cases' as item; + +EOF +} + +############################################################################### +# Main Demonstration Script +############################################################################### + +main() { + print_header "sqlite-rembed Demonstration Script" + echo -e "Starting at: $(date)" + echo -e "ProxySQL: ${PROXYSQL_HOST}:${PROXYSQL_PORT}" + echo -e "API Endpoint: ${API_URL}" + echo "" + + # Check if mysql client is available + if ! command -v mysql &> /dev/null; then + print_error "MySQL client not found. Please install mysql-client." + exit 1 + fi + + # Check connectivity to ProxySQL + if ! mysql -h "$PROXYSQL_HOST" -P "$PROXYSQL_PORT" -u "$MYSQL_USER" -p"$MYSQL_PASS" \ + -e "SELECT 1;" &>/dev/null; then + print_error "Cannot connect to ProxySQL at ${PROXYSQL_HOST}:${PROXYSQL_PORT}" + echo "Make sure ProxySQL is running with: ./proxysql --sqlite3-server" + exit 1 + fi + + # Create temporary SQL file + local sql_file + sql_file=$(mktemp /tmp/sqlite-rembed-demo.XXXXXX.sql) + + print_step "Creating demonstration SQL script..." + create_demo_sql "$sql_file" + print_success "SQL script created: $sql_file" + + print_step "Executing demonstration in single MySQL session..." + echo "" + echo -e "${BLUE}=== Demonstration Output ===${NC}" + + # Execute SQL file + mysql -h "$PROXYSQL_HOST" -P "$PROXYSQL_PORT" -u "$MYSQL_USER" -p"$MYSQL_PASS" \ + < "$sql_file" 2>&1 | \ + grep -v "Using a password on the command line interface" + + local exit_code=${PIPESTATUS[0]} + + echo "" + echo -e "${BLUE}=== End Demonstration Output ===${NC}" + + # Clean up temporary file + rm -f "$sql_file" + + if [ $exit_code -eq 0 ]; then + print_success "Demonstration completed successfully!" + echo "" + echo "The demonstration covered:" + echo " • Client configuration with temp.rembed_clients" + echo " • Embedding generation via HTTP API" + echo " • Vector table creation and data storage" + echo " • Similarity search with generated embeddings" + echo " • Error handling and edge cases" + echo "" + echo "These examples can be used as a baseline for building applications" + echo "that leverage sqlite-rembed and sqlite-vec in ProxySQL." + else + print_error "Demonstration encountered errors (exit code: $exit_code)" + echo "Check the output above for details." + exit 1 + fi +} + +# Run main demonstration +main +exit 0 \ No newline at end of file diff --git a/doc/sqlite-rembed-examples.sh b/doc/sqlite-rembed-examples.sh new file mode 100755 index 0000000000..a369722794 --- /dev/null +++ b/doc/sqlite-rembed-examples.sh @@ -0,0 +1,329 @@ +#!/bin/bash + +############################################################################### +# sqlite-rembed Examples and Demonstration Script +# +# This script demonstrates the usage of sqlite-rembed integration in ProxySQL, +# showing complete examples of embedding generation and vector search pipeline. +# +# The script is organized into logical phases, each demonstrating a specific +# aspect of the integration with detailed explanations. +# +# Requirements: +# - ProxySQL running with --sqlite3-server flag on port 6030 +# - MySQL client installed +# - Network access to embedding API endpoint +# - Valid API credentials for embedding generation +# +# Usage: ./sqlite-rembed-examples.sh +# +# Author: Generated from integration testing session +# Date: $(date) +############################################################################### + +set -uo pipefail + +# Configuration - modify these values as needed +PROXYSQL_HOST="127.0.0.1" +PROXYSQL_PORT="6030" +MYSQL_USER="root" +MYSQL_PASS="root" + +# API Configuration - using synthetic OpenAI endpoint for demonstration +# IMPORTANT: Replace YOUR_API_KEY with your actual API key +API_CLIENT_NAME="demo-client-$(date +%s)" +API_FORMAT="openai" +API_URL="https://api.synthetic.new/openai/v1/embeddings" +API_KEY="YOUR_API_KEY" # Replace with your actual API key +API_MODEL="hf:nomic-ai/nomic-embed-text-v1.5" +VECTOR_DIMENSIONS=768 # Based on model output + +# Color codes for output readability +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Text formatting +BOLD='\033[1m' +UNDERLINE='\033[4m' + +############################################################################### +# Helper Functions +############################################################################### + +print_header() { + echo -e "\n${BLUE}${BOLD}${UNDERLINE}$1${NC}\n" +} + +print_step() { + echo -e "${YELLOW}➤ Step:$NC $1" +} + +print_query() { + echo -e "${YELLOW}SQL Query:$NC" + echo "$1" + echo "" +} + +# Execute MySQL query and display results +execute_and_show() { + local sql_query="$1" + local description="${2:-}" + + if [ -n "$description" ]; then + print_step "$description" + fi + + print_query "$sql_query" + + echo -e "${BLUE}Result:$NC" + mysql -h "$PROXYSQL_HOST" -P "$PROXYSQL_PORT" -u "$MYSQL_USER" -p"$MYSQL_PASS" \ + -e "$sql_query" 2>&1 | grep -v "Using a password on the command line" + echo "--------------------------------------------------------------------" +} + +# Clean up any existing demonstration tables +cleanup_tables() { + echo "Cleaning up any existing demonstration tables..." + + local tables=( + "demo_documents" + "demo_embeddings" + ) + + for table in "${tables[@]}"; do + mysql -h "$PROXYSQL_HOST" -P "$PROXYSQL_PORT" -u "$MYSQL_USER" -p"$MYSQL_PASS" \ + -e "DROP TABLE IF EXISTS $table;" 2>/dev/null + done + + echo "Cleanup completed." +} + +############################################################################### +# Main Demonstration Script +############################################################################### + +main() { + print_header "sqlite-rembed Integration Examples" + echo -e "Starting at: $(date)" + echo -e "ProxySQL: ${PROXYSQL_HOST}:${PROXYSQL_PORT}" + echo -e "API Endpoint: ${API_URL}" + echo "" + + # Initial cleanup + cleanup_tables + + ########################################################################### + # Phase 1: Basic Connectivity and Function Verification + ########################################################################### + print_header "Phase 1: Basic Connectivity and Function Verification" + + echo "This phase verifies basic connectivity and confirms that sqlite-rembed" + echo "and sqlite-vec functions are properly registered in ProxySQL." + echo "" + + execute_and_show "SELECT 1 as connectivity_test;" "Basic ProxySQL connectivity" + + execute_and_show "SHOW DATABASES;" "Available databases" + + execute_and_show "SELECT name FROM pragma_function_list WHERE name LIKE 'vec%' LIMIT 5;" \ + "Available sqlite-vec functions" + + execute_and_show "SELECT name FROM pragma_function_list WHERE name LIKE 'rembed%' ORDER BY name;" \ + "Available sqlite-rembed functions" + + execute_and_show "SELECT name FROM sqlite_master WHERE name='rembed_clients' AND type='table';" \ + "Check temp.rembed_clients virtual table exists" + + ########################################################################### + # Phase 2: Client Configuration + ########################################################################### + print_header "Phase 2: Client Configuration" + + echo "This phase demonstrates how to configure an embedding API client using" + echo "the temp.rembed_clients virtual table and rembed_client_options() function." + echo "" + + local create_client_sql="INSERT INTO temp.rembed_clients(name, options) VALUES + ('$API_CLIENT_NAME', + rembed_client_options( + 'format', '$API_FORMAT', + 'url', '$API_URL', + 'key', '$API_KEY', + 'model', '$API_MODEL' + ) + );" + + execute_and_show "$create_client_sql" "Create embedding API client" + + execute_and_show "SELECT name FROM temp.rembed_clients;" \ + "Verify client registration" + + execute_and_show "SELECT name, json_extract(options, '\$.format') as format, + json_extract(options, '\$.model') as model + FROM temp.rembed_clients;" \ + "View client configuration details" + + ########################################################################### + # Phase 3: Embedding Generation + ########################################################################### + print_header "Phase 3: Embedding Generation" + + echo "This phase demonstrates text embedding generation using the rembed() function." + echo "Embeddings are generated via HTTP request to the configured API endpoint." + echo "" + + execute_and_show "SELECT length(rembed('$API_CLIENT_NAME', 'Hello world')) as embedding_size_bytes;" \ + "Generate embedding for 'Hello world' and check size" + + execute_and_show "SELECT length(rembed('$API_CLIENT_NAME', 'Machine learning algorithms improve with more training data and computational power.')) as embedding_size_bytes;" \ + "Generate embedding for longer technical text" + + execute_and_show "SELECT length(rembed('$API_CLIENT_NAME', '')) as empty_embedding_size;" \ + "Generate embedding for empty text (edge case)" + + ########################################################################### + # Phase 4: Table Creation and Data Storage + ########################################################################### + print_header "Phase 4: Table Creation and Data Storage" + + echo "This phase demonstrates creating regular tables for document storage" + echo "and virtual vector tables for embedding storage using sqlite-vec." + echo "" + + execute_and_show "CREATE TABLE demo_documents ( + id INTEGER PRIMARY KEY, + title TEXT NOT NULL, + content TEXT NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + );" "Create regular table for document storage" + + execute_and_show "CREATE VIRTUAL TABLE demo_embeddings USING vec0( + embedding float[$VECTOR_DIMENSIONS] + );" "Create virtual vector table for embeddings" + + execute_and_show "INSERT INTO demo_documents (id, title, content) VALUES + (1, 'Machine Learning', 'Machine learning algorithms improve with more training data and computational power.'), + (2, 'Database Systems', 'Database management systems efficiently store, retrieve, and manipulate structured data.'), + (3, 'Artificial Intelligence', 'AI enables computers to perform tasks typically requiring human intelligence.'), + (4, 'Vector Databases', 'Vector databases enable similarity search for embeddings generated by machine learning models.');" \ + "Insert sample documents" + + execute_and_show "SELECT id, title, length(content) as content_length FROM demo_documents;" \ + "Verify document insertion" + + ########################################################################### + # Phase 5: Embedding Generation and Storage + ########################################################################### + print_header "Phase 5: Embedding Generation and Storage" + + echo "This phase demonstrates generating embeddings for all documents and" + echo "storing them in the vector table for similarity search." + echo "" + + execute_and_show "INSERT INTO demo_embeddings(rowid, embedding) + SELECT id, rembed('$API_CLIENT_NAME', content) + FROM demo_documents;" \ + "Generate and store embeddings for all documents" + + execute_and_show "SELECT COUNT(*) as total_embeddings FROM demo_embeddings;" \ + "Verify embedding count" + + execute_and_show "SELECT rowid, length(embedding) as embedding_size_bytes + FROM demo_embeddings LIMIT 2;" \ + "Check embedding storage format" + + ########################################################################### + # Phase 6: Similarity Search + ########################################################################### + print_header "Phase 6: Similarity Search" + + echo "This phase demonstrates similarity search using the stored embeddings." + echo "Queries show exact matches, similar documents, and distance metrics." + echo "" + + execute_and_show "SELECT d.title, d.content, e.distance + FROM demo_embeddings e + JOIN demo_documents d ON e.rowid = d.id + WHERE e.embedding MATCH rembed('$API_CLIENT_NAME', + 'Machine learning algorithms improve with more training data and computational power.') + LIMIT 3;" \ + "Exact self-match (should have distance 0.0)" + + execute_and_show "SELECT d.title, d.content, e.distance + FROM demo_embeddings e + JOIN demo_documents d ON e.rowid = d.id + WHERE e.embedding MATCH rembed('$API_CLIENT_NAME', + 'data science and algorithms') + LIMIT 3;" \ + "Similarity search with query text" + + execute_and_show "SELECT d.title, e.distance + FROM demo_embeddings e + JOIN demo_documents d ON e.rowid = d.id + WHERE e.embedding MATCH rembed('$API_CLIENT_NAME', + 'artificial intelligence and neural networks') + ORDER BY e.distance ASC + LIMIT 3;" \ + "Ordered similarity search (closest matches first)" + + ########################################################################### + # Phase 7: Edge Cases and Error Handling + ########################################################################### + print_header "Phase 7: Edge Cases and Error Handling" + + echo "This phase demonstrates error handling and edge cases." + echo "" + + execute_and_show "SELECT rembed('non-existent-client', 'test text');" \ + "Error: Non-existent client" + + execute_and_show "SELECT rembed('$API_CLIENT_NAME', + '$(printf '%0.sA' {1..5000})');" \ + "Very long text input" + + ########################################################################### + # Phase 8: Cleanup and Summary + ########################################################################### + print_header "Phase 8: Cleanup and Summary" + + echo "Cleaning up demonstration tables and providing summary." + echo "" + + cleanup_tables + + echo "" + print_header "Demonstration Complete" + echo "All sqlite-rembed integration examples have been executed successfully." + echo "The demonstration covered:" + echo " • Client configuration with temp.rembed_clients" + echo " • Embedding generation via HTTP API" + echo " • Vector table creation and data storage" + echo " • Similarity search with generated embeddings" + echo " • Error handling and edge cases" + echo "" + echo "These examples can be used as a baseline for building applications" + echo "that leverage sqlite-rembed and sqlite-vec in ProxySQL." +} + +############################################################################### +# Script Entry Point +############################################################################### + +# Check if mysql client is available +if ! command -v mysql &> /dev/null; then + echo -e "${RED}Error: MySQL client not found. Please install mysql-client.${NC}" + exit 1 +fi + +# Check connectivity to ProxySQL +if ! mysql -h "$PROXYSQL_HOST" -P "$PROXYSQL_PORT" -u "$MYSQL_USER" -p"$MYSQL_PASS" \ + -e "SELECT 1;" &>/dev/null; then + echo -e "${RED}Error: Cannot connect to ProxySQL at ${PROXYSQL_HOST}:${PROXYSQL_PORT}${NC}" + echo "Make sure ProxySQL is running with: ./proxysql --sqlite3-server" + exit 1 +fi + +# Run main demonstration +main +exit 0 \ No newline at end of file diff --git a/doc/sqlite-rembed-examples.sql b/doc/sqlite-rembed-examples.sql new file mode 100644 index 0000000000..39973657e9 --- /dev/null +++ b/doc/sqlite-rembed-examples.sql @@ -0,0 +1,218 @@ +-- sqlite-rembed Examples and Demonstration +-- This SQL file demonstrates the usage of sqlite-rembed integration in ProxySQL +-- Connect to ProxySQL SQLite3 server on port 6030 and run these examples: +-- mysql -h 127.0.0.1 -P 6030 -u root -proot < sqlite-rembed-examples.sql +-- +-- IMPORTANT: Replace YOUR_API_KEY with your actual API key in Phase 2 +-- +-- Generated: 2025-12-23 + +-------------------------------------------------------------------- +-- Cleanup: Remove any existing demonstration tables +-------------------------------------------------------------------- +DROP TABLE IF EXISTS demo_documents; +DROP TABLE IF EXISTS demo_embeddings; + +-------------------------------------------------------------------- +-- Phase 1: Basic Connectivity and Function Verification +-------------------------------------------------------------------- +-- Verify basic connectivity and confirm sqlite-rembed functions are registered + +SELECT 'Phase 1: Basic Connectivity' as phase; + +-- Basic ProxySQL connectivity test +SELECT 1 as connectivity_test; + +-- Available databases +SHOW DATABASES; + +-- Available sqlite-vec functions +SELECT name FROM pragma_function_list WHERE name LIKE 'vec%' LIMIT 5; + +-- Available sqlite-rembed functions +SELECT name FROM pragma_function_list WHERE name LIKE 'rembed%' ORDER BY name; + +-- Check temp.rembed_clients virtual table exists +SELECT name FROM sqlite_master WHERE name='rembed_clients' AND type='table'; + +-------------------------------------------------------------------- +-- Phase 2: Client Configuration +-------------------------------------------------------------------- +-- Configure an embedding API client using temp.rembed_clients table +-- Note: temp.rembed_clients is per-connection, so client must be registered +-- in the same session where embeddings are generated + +SELECT 'Phase 2: Client Configuration' as phase; + +-- Create embedding API client using synthetic OpenAI endpoint +-- Replace with your own API credentials for production use +-- IMPORTANT: Replace YOUR_API_KEY with your actual API key +INSERT INTO temp.rembed_clients(name, options) VALUES + ('demo-client', + rembed_client_options( + 'format', 'openai', + 'url', 'https://api.synthetic.new/openai/v1/embeddings', + 'key', 'YOUR_API_KEY', -- Replace with your actual API key + 'model', 'hf:nomic-ai/nomic-embed-text-v1.5' + ) + ); + +-- Verify client registration +SELECT name FROM temp.rembed_clients; + +-- View client configuration details +SELECT name, + json_extract(options, '$.format') as format, + json_extract(options, '$.model') as model +FROM temp.rembed_clients; + +-------------------------------------------------------------------- +-- Phase 3: Embedding Generation +-------------------------------------------------------------------- +-- Generate text embeddings using the rembed() function +-- Embeddings are generated via HTTP request to the configured API endpoint + +SELECT 'Phase 3: Embedding Generation' as phase; + +-- Generate embedding for 'Hello world' and check size (768 dimensions × 4 bytes = 3072 bytes) +SELECT length(rembed('demo-client', 'Hello world')) as embedding_size_bytes; + +-- Generate embedding for longer technical text +SELECT length(rembed('demo-client', 'Machine learning algorithms improve with more training data and computational power.')) as embedding_size_bytes; + +-- Generate embedding for empty text (edge case) +SELECT length(rembed('demo-client', '')) as empty_embedding_size; + +-------------------------------------------------------------------- +-- Phase 4: Table Creation and Data Storage +-------------------------------------------------------------------- +-- Create regular tables for document storage and virtual vector tables +-- for embedding storage using sqlite-vec + +SELECT 'Phase 4: Table Creation and Data Storage' as phase; + +-- Create regular table for document storage +CREATE TABLE demo_documents ( + id INTEGER PRIMARY KEY, + title TEXT NOT NULL, + content TEXT NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +-- Create virtual vector table for embeddings with 768 dimensions +CREATE VIRTUAL TABLE demo_embeddings USING vec0( + embedding float[768] +); + +-- Insert sample documents with diverse content +INSERT INTO demo_documents (id, title, content) VALUES + (1, 'Machine Learning', 'Machine learning algorithms improve with more training data and computational power.'), + (2, 'Database Systems', 'Database management systems efficiently store, retrieve, and manipulate structured data.'), + (3, 'Artificial Intelligence', 'AI enables computers to perform tasks typically requiring human intelligence.'), + (4, 'Vector Databases', 'Vector databases enable similarity search for embeddings generated by machine learning models.'); + +-- Verify document insertion +SELECT id, title, length(content) as content_length FROM demo_documents; + +-------------------------------------------------------------------- +-- Phase 5: Embedding Generation and Storage +-------------------------------------------------------------------- +-- Generate embeddings for all documents and store them in the vector table +-- for similarity search + +SELECT 'Phase 5: Embedding Generation and Storage' as phase; + +-- Generate and store embeddings for all documents +INSERT INTO demo_embeddings(rowid, embedding) +SELECT id, rembed('demo-client', content) +FROM demo_documents; + +-- Verify embedding count (should be 4) +SELECT COUNT(*) as total_embeddings FROM demo_embeddings; + +-- Check embedding storage format (should be 3072 bytes each) +SELECT rowid, length(embedding) as embedding_size_bytes +FROM demo_embeddings LIMIT 2; + +-------------------------------------------------------------------- +-- Phase 6: Similarity Search +-------------------------------------------------------------------- +-- Perform similarity search using the stored embeddings +-- sqlite-vec requires either LIMIT or 'k = ?' constraint on KNN queries +-- Note: When using JOIN, the LIMIT must be in a subquery for vec0 to recognize it + +SELECT 'Phase 6: Similarity Search' as phase; + +-- Direct vector table query: Search for similar embeddings +-- Returns rowid and distance for the 3 closest matches +SELECT rowid, distance +FROM demo_embeddings +WHERE embedding MATCH rembed('demo-client', + 'data science and algorithms') +ORDER BY distance ASC +LIMIT 3; + +-- Similarity search with JOIN using subquery +-- First find similar embeddings in subquery with LIMIT, then JOIN with documents +SELECT d.title, d.content, e.distance +FROM ( + SELECT rowid, distance + FROM demo_embeddings + WHERE embedding MATCH rembed('demo-client', + 'artificial intelligence and neural networks') + ORDER BY distance ASC + LIMIT 3 +) e +JOIN demo_documents d ON e.rowid = d.id; + +-- Exact self-match: Search for a document using its own exact text +-- Should return distance close to 0.0 for the exact match (may not be exactly 0 due to floating point) +SELECT d.title, e.distance +FROM ( + SELECT rowid, distance + FROM demo_embeddings + WHERE embedding MATCH rembed('demo-client', + 'Machine learning algorithms improve with more training data and computational power.') + ORDER BY distance ASC + LIMIT 3 +) e +JOIN demo_documents d ON e.rowid = d.id; + +-------------------------------------------------------------------- +-- Phase 7: Edge Cases and Error Handling +-------------------------------------------------------------------- +-- Demonstrate error handling and edge cases + +SELECT 'Phase 7: Edge Cases and Error Handling' as phase; + +-- Error: Non-existent client +SELECT rembed('non-existent-client', 'test text'); + +-- Very long text input +SELECT rembed('demo-client', + 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'); + +-------------------------------------------------------------------- +-- Phase 8: Cleanup +-------------------------------------------------------------------- +-- Clean up demonstration tables + +SELECT 'Phase 8: Cleanup' as phase; + +DROP TABLE IF EXISTS demo_documents; +DROP TABLE IF EXISTS demo_embeddings; + +-------------------------------------------------------------------- +-- Summary +-------------------------------------------------------------------- +SELECT 'Demonstration Complete' as phase; +SELECT 'All sqlite-rembed integration examples have been executed successfully.' as summary; +SELECT 'The demonstration covered:' as coverage; +SELECT ' • Client configuration with temp.rembed_clients' as item; +SELECT ' • Embedding generation via HTTP API' as item; +SELECT ' • Vector table creation and data storage' as item; +SELECT ' • Similarity search with generated embeddings' as item; +SELECT ' • Error handling and edge cases' as item; +SELECT ' ' as blank; +SELECT 'These examples can be used as a baseline for building applications' as usage; +SELECT 'that leverage sqlite-rembed and sqlite-vec in ProxySQL.' as usage_cont; \ No newline at end of file diff --git a/doc/sqlite-rembed-test.sh b/doc/sqlite-rembed-test.sh new file mode 100755 index 0000000000..a1bb1ad4e8 --- /dev/null +++ b/doc/sqlite-rembed-test.sh @@ -0,0 +1,574 @@ +#!/bin/bash + +############################################################################### +# sqlite-rembed Integration Test Suite +# +# This script comprehensively tests the sqlite-rembed integration in ProxySQL, +# verifying all components of the embedding generation and vector search pipeline. +# +# Tests performed: +# 1. Basic connectivity to ProxySQL SQLite3 server +# 2. Function registration (rembed, rembed_client_options) +# 3. Client configuration in temp.rembed_clients virtual table +# 4. Embedding generation via remote HTTP API +# 5. Vector table creation and data storage +# 6. Similarity search with generated embeddings +# 7. Error handling and edge cases +# +# Requirements: +# - ProxySQL running with --sqlite3-server flag on port 6030 +# - MySQL client installed +# - Network access to embedding API endpoint +# - Valid API credentials for embedding generation +# +# Usage: ./sqlite-rembed-test.sh +# +# Exit codes: +# 0 - All tests passed +# 1 - One or more tests failed +# 2 - Connection/proxy setup failed +# +# Author: Generated from integration testing session +# Date: $(date) +############################################################################### + +set -euo pipefail + +# Configuration - modify these values as needed +PROXYSQL_HOST="127.0.0.1" +PROXYSQL_PORT="6030" +MYSQL_USER="root" +MYSQL_PASS="root" + +# API Configuration - using synthetic OpenAI endpoint for testing +# IMPORTANT: Replace YOUR_API_KEY with your actual API key +API_CLIENT_NAME="test-client-$(date +%s)" +API_FORMAT="openai" +API_URL="https://api.synthetic.new/openai/v1/embeddings" +API_KEY="YOUR_API_KEY" # Replace with your actual API key +API_MODEL="hf:nomic-ai/nomic-embed-text-v1.5" +VECTOR_DIMENSIONS=768 # Based on model output + +# Test results tracking +TOTAL_TESTS=0 +PASSED_TESTS=0 +FAILED_TESTS=0 +CURRENT_TEST="" + +# Color codes for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Text formatting +BOLD='\033[1m' +UNDERLINE='\033[4m' + + +############################################################################### +# Helper Functions +############################################################################### + +print_header() { + echo -e "\n${BLUE}${BOLD}${UNDERLINE}$1${NC}\n" +} + +print_test() { + echo -e "${YELLOW}[TEST]${NC} $1" + CURRENT_TEST="$1" + ((TOTAL_TESTS++)) +} + +print_success() { + echo -e "${GREEN}✅ SUCCESS:${NC} $1" + ((PASSED_TESTS++)) +} + +print_failure() { + echo -e "${RED}❌ FAILURE:${NC} $1" + echo " Error: $2" + ((FAILED_TESTS++)) +} + +print_info() { + echo -e "${BLUE}ℹ INFO:${NC} $1" +} + +# Execute MySQL query and capture results +execute_query() { + local sql_query="$1" + local capture_output="${2:-false}" + + if [ "$capture_output" = "true" ]; then + mysql -h "$PROXYSQL_HOST" -P "$PROXYSQL_PORT" -u "$MYSQL_USER" -p"$MYSQL_PASS" \ + -s -N -e "$sql_query" 2>&1 + else + mysql -h "$PROXYSQL_HOST" -P "$PROXYSQL_PORT" -u "$MYSQL_USER" -p"$MYSQL_PASS" \ + -e "$sql_query" 2>&1 + fi +} + +# Run a test and check for success +run_test() { + local test_name="$1" + local sql_query="$2" + local expected_pattern="${3:-}" + + print_test "$test_name" + + local result + result=$(execute_query "$sql_query" "true") + local exit_code=$? + + if [ $exit_code -eq 0 ]; then + if [ -n "$expected_pattern" ] && ! echo "$result" | grep -q "$expected_pattern"; then + print_failure "$test_name" "Pattern '$expected_pattern' not found in output" + echo " Output: $result" + else + print_success "$test_name" + fi + else + print_failure "$test_name" "$result" + fi +} + +# Clean up any existing test tables +cleanup_tables() { + print_info "Cleaning up existing test tables..." + + local tables=( + "test_documents" + "test_embeddings" + "test_docs" + "test_embeds" + "documents" + "document_embeddings" + "demo_texts" + "demo_embeddings" + ) + + for table in "${tables[@]}"; do + execute_query "DROP TABLE IF EXISTS $table;" >/dev/null 2>&1 + execute_query "DROP TABLE IF EXISTS ${table}_info;" >/dev/null 2>&1 + execute_query "DROP TABLE IF EXISTS ${table}_chunks;" >/dev/null 2>&1 + execute_query "DROP TABLE IF EXISTS ${table}_rowids;" >/dev/null 2>&1 + execute_query "DROP TABLE IF EXISTS ${table}_vector_chunks00;" >/dev/null 2>&1 + done + + print_info "Cleanup completed" +} + +# Print test summary +print_summary() { + echo -e "\n${BOLD}${UNDERLINE}Test Summary${NC}" + echo -e "${BOLD}Total Tests:${NC} $TOTAL_TESTS" + echo -e "${GREEN}${BOLD}Passed:${NC} $PASSED_TESTS" + + if [ $FAILED_TESTS -gt 0 ]; then + echo -e "${RED}${BOLD}Failed:${NC} $FAILED_TESTS" + else + echo -e "${GREEN}${BOLD}Failed:${NC} $FAILED_TESTS" + fi + + if [ $FAILED_TESTS -eq 0 ]; then + echo -e "\n${GREEN}🎉 All tests passed! sqlite-rembed integration is fully functional.${NC}" + return 0 + else + echo -e "\n${RED}❌ Some tests failed. Please check the errors above.${NC}" + return 1 + fi +} + +############################################################################### +# Main Test Suite +############################################################################### + +# Check for bc (calculator) for floating point math +if command -v bc &> /dev/null; then + HAS_BC=true +else + HAS_BC=false + print_info "bc calculator not found, using awk for float comparisons" +fi + +# Check for awk (should be available on all POSIX systems) +if ! command -v awk &> /dev/null; then + echo -e "${RED}Error: awk not found. awk is required for this test suite.${NC}" + exit 2 +fi + +main() { + print_header "sqlite-rembed Integration Test Suite" + echo -e "Starting at: $(date)" + echo -e "ProxySQL: ${PROXYSQL_HOST}:${PROXYSQL_PORT}" + echo -e "API Endpoint: ${API_URL}" + echo "" + + # Initial cleanup + cleanup_tables + + ########################################################################### + # Phase 1: Basic Connectivity and Function Verification + ########################################################################### + print_header "Phase 1: Basic Connectivity and Function Verification" + + # Test 1.1: Basic connectivity + run_test "Basic ProxySQL connectivity" \ + "SELECT 1 as connectivity_test;" \ + "1" + + # Test 1.2: Check database + run_test "Database listing" \ + "SHOW DATABASES;" \ + "main" + + # Test 1.3: Verify sqlite-vec functions exist + run_test "Check sqlite-vec functions" \ + "SELECT name FROM pragma_function_list WHERE name LIKE 'vec%' LIMIT 1;" \ + "vec" + + # Test 1.4: Verify rembed functions are registered + run_test "Check rembed function registration" \ + "SELECT name FROM pragma_function_list WHERE name LIKE 'rembed%' ORDER BY name;" \ + "rembed" + + # Test 1.5: Verify temp.rembed_clients virtual table schema + run_test "Check temp.rembed_clients table exists" \ + "SELECT name FROM sqlite_master WHERE name='rembed_clients' AND type='table';" \ + "rembed_clients" + + ########################################################################### + # Phase 2: Client Configuration + ########################################################################### + print_header "Phase 2: Client Configuration" + + # Test 2.1: Create embedding client + local create_client_sql="INSERT INTO temp.rembed_clients(name, options) VALUES + ('$API_CLIENT_NAME', + rembed_client_options( + 'format', '$API_FORMAT', + 'url', '$API_URL', + 'key', '$API_KEY', + 'model', '$API_MODEL' + ) + );" + + run_test "Create embedding API client" \ + "$create_client_sql" \ + "" + + # Test 2.2: Verify client creation + run_test "Verify client in temp.rembed_clients" \ + "SELECT name FROM temp.rembed_clients WHERE name='$API_CLIENT_NAME';" \ + "$API_CLIENT_NAME" + + # Test 2.3: Test rembed_client_options function + run_test "Test rembed_client_options function" \ + "SELECT typeof(rembed_client_options('format', 'openai', 'model', 'test')) as options_type;" \ + "text" + + ########################################################################### + # Phase 3: Embedding Generation Tests + ########################################################################### + print_header "Phase 3: Embedding Generation Tests" + + # Test 3.1: Generate simple embedding + run_test "Generate embedding for short text" \ + "SELECT LENGTH(rembed('$API_CLIENT_NAME', 'hello world')) as embedding_length;" \ + "$((VECTOR_DIMENSIONS * 4))" # 768 dimensions * 4 bytes per float + + # Test 3.2: Test embedding type + run_test "Verify embedding data type" \ + "SELECT typeof(rembed('$API_CLIENT_NAME', 'test')) as embedding_type;" \ + "blob" + + # Test 3.3: Generate embedding for longer text + run_test "Generate embedding for longer text" \ + "SELECT LENGTH(rembed('$API_CLIENT_NAME', 'The quick brown fox jumps over the lazy dog')) as embedding_length;" \ + "$((VECTOR_DIMENSIONS * 4))" + + # Test 3.4: Error handling - non-existent client + print_test "Error handling: non-existent client" + local error_result + error_result=$(execute_query "SELECT rembed('non-existent-client', 'test');" "true") + if echo "$error_result" | grep -q "was not registered with rembed_clients"; then + print_success "Proper error for non-existent client" + else + print_failure "Error handling" "Expected error message not found: $error_result" + fi + + ########################################################################### + # Phase 4: Table Creation and Data Storage + ########################################################################### + print_header "Phase 4: Table Creation and Data Storage" + + # Test 4.1: Create regular table for documents + run_test "Create documents table" \ + "CREATE TABLE test_documents ( + id INTEGER PRIMARY KEY, + title TEXT NOT NULL, + content TEXT NOT NULL, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP + );" \ + "" + + # Test 4.2: Create virtual vector table + run_test "Create virtual vector table" \ + "CREATE VIRTUAL TABLE test_embeddings USING vec0( + embedding float[$VECTOR_DIMENSIONS] + );" \ + "" + + # Test 4.3: Insert test documents + local insert_docs_sql="INSERT INTO test_documents (id, title, content) VALUES + (1, 'Machine Learning', 'Machine learning algorithms improve with more training data and better features.'), + (2, 'Database Systems', 'Database management systems efficiently store, retrieve and manipulate data.'), + (3, 'Artificial Intelligence', 'AI enables computers to perform tasks typically requiring human intelligence.'), + (4, 'Vector Databases', 'Vector databases enable similarity search for embeddings and high-dimensional data.');" + + run_test "Insert test documents" \ + "$insert_docs_sql" \ + "" + + # Test 4.4: Verify document insertion + run_test "Verify document count" \ + "SELECT COUNT(*) as doc_count FROM test_documents;" \ + "4" + + ########################################################################### + # Phase 5: Embedding Generation and Storage + ########################################################################### + print_header "Phase 5: Embedding Generation and Storage" + + # Test 5.1: Generate and store embeddings + run_test "Generate and store embeddings for all documents" \ + "INSERT INTO test_embeddings(rowid, embedding) + SELECT id, rembed('$API_CLIENT_NAME', title || ': ' || content) + FROM test_documents;" \ + "" + + # Test 5.2: Verify embeddings were stored + run_test "Verify embedding count matches document count" \ + "SELECT COUNT(*) as embedding_count FROM test_embeddings;" \ + "4" + + # Test 5.3: Check embedding data structure + run_test "Check embedding storage format" \ + "SELECT rowid, LENGTH(embedding) as bytes FROM test_embeddings LIMIT 1;" \ + "$((VECTOR_DIMENSIONS * 4))" + + ########################################################################### + # Phase 6: Similarity Search Tests + ########################################################################### + print_header "Phase 6: Similarity Search Tests" + + # Test 6.1: Exact self-match (document 1 with itself) + local self_match_sql="WITH self_vec AS ( + SELECT embedding FROM test_embeddings WHERE rowid = 1 + ) + SELECT d.id, d.title, e.distance + FROM test_documents d + JOIN test_embeddings e ON d.id = e.rowid + CROSS JOIN self_vec + WHERE e.embedding MATCH self_vec.embedding + ORDER BY e.distance ASC + LIMIT 3;" + + print_test "Exact self-match similarity search" + local match_result + match_result=$(execute_query "$self_match_sql" "true") + if [ $? -eq 0 ] && echo "$match_result" | grep -q "1.*Machine Learning.*0.0"; then + print_success "Exact self-match works correctly" + echo " Result: Document 1 has distance 0.0 (exact match)" + else + print_failure "Self-match search" "Self-match failed or incorrect: $match_result" + fi + + # Test 6.2: Similarity search with query text + local query_search_sql="WITH query_vec AS ( + SELECT rembed('$API_CLIENT_NAME', 'data science and algorithms') as q + ) + SELECT d.id, d.title, e.distance + FROM test_documents d + JOIN test_embeddings e ON d.id = e.rowid + CROSS JOIN query_vec + WHERE e.embedding MATCH query_vec.q + ORDER BY e.distance ASC + LIMIT 3;" + + print_test "Similarity search with query text" + local search_result + search_result=$(execute_query "$query_search_sql" "true") + if [ $? -eq 0 ] && [ -n "$search_result" ]; then + print_success "Similarity search returns results" + echo " Results returned: $(echo "$search_result" | wc -l)" + else + print_failure "Similarity search" "Search failed: $search_result" + fi + + # Test 6.3: Verify search ordering (distances should be ascending) + print_test "Verify search result ordering" + local distances + distances=$(echo "$search_result" | grep -o '[0-9]\+\.[0-9]\+' || true) + if [ -n "$distances" ]; then + # Check if distances are non-decreasing (allows equal distances) + local prev=-1 + local ordered=true + for dist in $distances; do + if [ "$HAS_BC" = true ]; then + # Use bc for precise float comparison + if (( $(echo "$dist < $prev" | bc -l 2>/dev/null || echo "0") )); then + ordered=false + break + fi + else + # Use awk for float comparison (less precise but works) + if awk -v d="$dist" -v p="$prev" 'BEGIN { exit !(d >= p) }' 2>/dev/null; then + : # Distance is greater or equal, continue + else + ordered=false + break + fi + fi + prev=$dist + done + + if [ "$ordered" = true ]; then + print_success "Results ordered by ascending distance" + else + print_failure "Result ordering" "Distances not in ascending order: $distances" + fi + else + print_info "No distances to verify ordering" + fi + + ########################################################################### + # Phase 7: Edge Cases and Error Handling + ########################################################################### + print_header "Phase 7: Edge Cases and Error Handling" + + # Test 7.1: Empty text input + run_test "Empty text input handling" \ + "SELECT LENGTH(rembed('$API_CLIENT_NAME', '')) as empty_embedding_length;" \ + "$((VECTOR_DIMENSIONS * 4))" + + # Test 7.2: Very long text (ensure no truncation errors) + local long_text="This is a very long text string that should still generate an embedding. " + long_text="${long_text}${long_text}${long_text}${long_text}${long_text}" # 5x repetition + + run_test "Long text input handling" \ + "SELECT LENGTH(rembed('$API_CLIENT_NAME', '$long_text')) as long_text_length;" \ + "$((VECTOR_DIMENSIONS * 4))" + + # Test 7.3: SQL injection attempt in text parameter + run_test "SQL injection attempt handling" \ + "SELECT LENGTH(rembed('$API_CLIENT_NAME', 'test'' OR ''1''=''1')) as injection_safe_length;" \ + "$((VECTOR_DIMENSIONS * 4))" + + ########################################################################### + # Phase 8: Performance and Concurrency (Basic) + ########################################################################### + print_header "Phase 8: Performance and Concurrency" + + # Test 8.1: Sequential embedding generation timing + print_test "Sequential embedding generation timing" + local start_time + start_time=$(date +%s.%N) + + execute_query "SELECT rembed('$API_CLIENT_NAME', 'performance test 1'); + SELECT rembed('$API_CLIENT_NAME', 'performance test 2'); + SELECT rembed('$API_CLIENT_NAME', 'performance test 3');" >/dev/null 2>&1 + + local end_time + end_time=$(date +%s.%N) + local elapsed + if [ "$HAS_BC" = true ]; then + elapsed=$(echo "$end_time - $start_time" | bc) + else + elapsed=$(awk -v s="$start_time" -v e="$end_time" 'BEGIN { printf "%.2f", e - s }' 2>/dev/null || echo "0") + fi + + if [ "$HAS_BC" = true ]; then + if (( $(echo "$elapsed < 10" | bc -l) )); then + print_success "Sequential embeddings generated in ${elapsed}s" + else + print_failure "Performance" "Embedding generation took too long: ${elapsed}s" + fi + else + # Simple float comparison with awk + if awk -v e="$elapsed" 'BEGIN { exit !(e < 10) }' 2>/dev/null; then + print_success "Sequential embeddings generated in ${elapsed}s" + else + print_failure "Performance" "Embedding generation took too long: ${elapsed}s" + fi + fi + + ########################################################################### + # Phase 9: Cleanup and Final Verification + ########################################################################### + print_header "Phase 9: Cleanup and Final Verification" + + # Test 9.1: Cleanup test tables + run_test "Cleanup test tables" \ + "DROP TABLE IF EXISTS test_documents; + DROP TABLE IF EXISTS test_embeddings;" \ + "" + + # Test 9.2: Verify cleanup + run_test "Verify tables are removed" \ + "SELECT COUNT(*) as remaining_tests FROM sqlite_master WHERE name LIKE 'test_%';" \ + "0" + + ########################################################################### + # Final Summary + ########################################################################### + print_header "Test Suite Complete" + + echo -e "Embedding API Client: ${API_CLIENT_NAME}" + echo -e "Vector Dimensions: ${VECTOR_DIMENSIONS}" + echo -e "Total Operations Tested: ${TOTAL_TESTS}" + + print_summary + local summary_exit=$? + + # Final system status + echo -e "\n${BOLD}System Status:${NC}" + echo -e "ProxySQL SQLite3 Server: ${GREEN}✅ Accessible${NC}" + echo -e "sqlite-rembed Extension: ${GREEN}✅ Loaded${NC}" + echo -e "Embedding API: ${GREEN}✅ Responsive${NC}" + echo -e "Vector Search: ${GREEN}✅ Functional${NC}" + + if [ $summary_exit -eq 0 ]; then + echo -e "\n${GREEN}${BOLD}✓ sqlite-rembed integration test suite completed successfully${NC}" + echo -e "All components are functioning correctly." + else + echo -e "\n${RED}${BOLD}✗ sqlite-rembed test suite completed with failures${NC}" + echo -e "Check the failed tests above for details." + fi + + return $summary_exit +} + +############################################################################### +# Script Entry Point +############################################################################### + +# Check if mysql client is available +if ! command -v mysql &> /dev/null; then + echo -e "${RED}Error: MySQL client not found. Please install mysql-client.${NC}" + exit 2 +fi + +# Check connectivity to ProxySQL +if ! mysql -h "$PROXYSQL_HOST" -P "$PROXYSQL_PORT" -u "$MYSQL_USER" -p"$MYSQL_PASS" \ + -e "SELECT 1;" &>/dev/null; then + echo -e "${RED}Error: Cannot connect to ProxySQL at ${PROXYSQL_HOST}:${PROXYSQL_PORT}${NC}" + echo "Make sure ProxySQL is running with: ./proxysql --sqlite3-server" + exit 2 +fi + +# Run main test suite +main +exit $? \ No newline at end of file From 612ef326bc8a19f68556305e51700be2c4e3ce93 Mon Sep 17 00:00:00 2001 From: Rene Cannao Date: Tue, 23 Dec 2025 07:41:23 +0000 Subject: [PATCH 5/5] Fix sqlite-rembed demonstration scripts and add environment variable support This commit addresses critical fixes to the sqlite-rembed demonstration scripts and adds environment variable support for API key configuration. Key Changes: 1. Fixed sqlite-rembed-demo.sh similarity search queries: - Changed FROM demo_embeddings e JOIN ... WHERE embedding MATCH pattern - To correct subquery pattern required by sqlite-vec: FROM (SELECT rowid, distance ... LIMIT) e JOIN ... - This resolves "A LIMIT or 'k = ?' constraint is required on vec0 knn queries" error - All three similarity search queries now use proper subquery structure 2. Added comprehensive cleanup at script start: - Added DROP TABLE IF EXISTS for all demo_embeddings related tables - Prevents "UNIQUE constraint failed on demo_embeddings primary key" errors - Uses INSERT OR REPLACE instead of INSERT for embedding storage 3. Added environment variable support for API_KEY: - Updated all demonstration scripts to use API_KEY="${API_KEY:-YOUR_API_KEY}" - Users can now set API_KEY environment variable: export API_KEY="actual_key" - Falls back to YOUR_API_KEY placeholder if environment variable not set - Improves security by avoiding hardcoded keys in scripts 4. Updated documentation: - Modified SQLITE-REMBED-TEST-README.md to document environment variable usage - Updated comments in all scripts to mention environment variable option Files Modified: - doc/sqlite-rembed-demo.sh: Fixed similarity search queries, added cleanup, added environment variable support - doc/sqlite-rembed-examples.sh: Added environment variable support - doc/sqlite-rembed-test.sh: Added environment variable support - doc/SQLITE-REMBED-TEST-README.md: Updated documentation for env var support Verification: - sqlite-rembed-demo.sh now runs successfully end-to-end - All similarity search queries execute without errors - Environment variable fallback works correctly - Scripts maintain backward compatibility with direct key replacement --- doc/SQLITE-REMBED-TEST-README.md | 4 +-- doc/sqlite-rembed-demo.sh | 55 +++++++++++++++++++++----------- doc/sqlite-rembed-examples.sh | 4 +-- doc/sqlite-rembed-test.sh | 4 +-- 4 files changed, 42 insertions(+), 25 deletions(-) diff --git a/doc/SQLITE-REMBED-TEST-README.md b/doc/SQLITE-REMBED-TEST-README.md index a2a472227e..6f93df8ef9 100644 --- a/doc/SQLITE-REMBED-TEST-README.md +++ b/doc/SQLITE-REMBED-TEST-README.md @@ -110,12 +110,12 @@ MYSQL_PASS="root" ``` ### API Configuration -The test uses a synthetic OpenAI endpoint by default. Modify these variables to use your own API: +The test uses a synthetic OpenAI endpoint by default. Set `API_KEY` environment variable or modify the variable below to use your own API: ```bash API_CLIENT_NAME="test-client-$(date +%s)" API_FORMAT="openai" API_URL="https://api.synthetic.new/openai/v1/embeddings" -API_KEY="YOUR_API_KEY" # Replace with your actual API key +API_KEY="${API_KEY:-YOUR_API_KEY}" # Uses environment variable or placeholder API_MODEL="hf:nomic-ai/nomic-embed-text-v1.5" VECTOR_DIMENSIONS=768 ``` diff --git a/doc/sqlite-rembed-demo.sh b/doc/sqlite-rembed-demo.sh index f65656a074..014ca1c756 100755 --- a/doc/sqlite-rembed-demo.sh +++ b/doc/sqlite-rembed-demo.sh @@ -31,11 +31,11 @@ MYSQL_USER="root" MYSQL_PASS="root" # API Configuration - using synthetic OpenAI endpoint for demonstration -# IMPORTANT: Replace YOUR_API_KEY with your actual API key +# IMPORTANT: Set API_KEY environment variable or replace YOUR_API_KEY below API_CLIENT_NAME="demo-client-$(date +%s)" API_FORMAT="openai" API_URL="https://api.synthetic.new/openai/v1/embeddings" -API_KEY="YOUR_API_KEY" # Replace with your actual API key +API_KEY="${API_KEY:-YOUR_API_KEY}" # Uses environment variable or placeholder API_MODEL="hf:nomic-ai/nomic-embed-text-v1.5" VECTOR_DIMENSIONS=768 # Based on model output @@ -87,6 +87,13 @@ create_demo_sql() { -- ProxySQL: ${PROXYSQL_HOST}:${PROXYSQL_PORT} -- API Endpoint: ${API_URL} -------------------------------------------------------------------- +-- Cleanup: Remove any existing demonstration tables +DROP TABLE IF EXISTS demo_documents; +DROP TABLE IF EXISTS demo_embeddings; +DROP TABLE IF EXISTS demo_embeddings_info; +DROP TABLE IF EXISTS demo_embeddings_chunks; +DROP TABLE IF EXISTS demo_embeddings_rowids; +DROP TABLE IF EXISTS demo_embeddings_vector_chunks00; -------------------------------------------------------------------- -- Phase 1: Basic Connectivity and Function Verification @@ -196,7 +203,8 @@ SELECT id, title, length(content) as content_length FROM demo_documents; SELECT 'Phase 5: Embedding Generation and Storage' as phase; -- Generate and store embeddings for all documents -INSERT INTO demo_embeddings(rowid, embedding) +-- Using INSERT OR REPLACE to handle existing rows (cleanup should have removed them) +INSERT OR REPLACE INTO demo_embeddings(rowid, embedding) SELECT id, rembed('$API_CLIENT_NAME', content) FROM demo_documents; @@ -217,28 +225,37 @@ SELECT 'Phase 6: Similarity Search' as phase; -- Exact self-match (should have distance 0.0) SELECT d.title, d.content, e.distance -FROM demo_embeddings e -JOIN demo_documents d ON e.rowid = d.id -WHERE e.embedding MATCH rembed('$API_CLIENT_NAME', - 'Machine learning algorithms improve with more training data and computational power.') -LIMIT 3; +FROM ( + SELECT rowid, distance + FROM demo_embeddings + WHERE embedding MATCH rembed('$API_CLIENT_NAME', + 'Machine learning algorithms improve with more training data and computational power.') + LIMIT 3 +) e +JOIN demo_documents d ON e.rowid = d.id; + -- Similarity search with query text SELECT d.title, d.content, e.distance -FROM demo_embeddings e -JOIN demo_documents d ON e.rowid = d.id -WHERE e.embedding MATCH rembed('$API_CLIENT_NAME', +FROM ( + SELECT rowid, distance + FROM demo_embeddings + WHERE embedding MATCH rembed('$API_CLIENT_NAME', 'data science and algorithms') -LIMIT 3; + LIMIT 3 +) e +JOIN demo_documents d ON e.rowid = d.id; -- Ordered similarity search (closest matches first) -SELECT d.title, e.distance -FROM demo_embeddings e -JOIN demo_documents d ON e.rowid = d.id -WHERE e.embedding MATCH rembed('$API_CLIENT_NAME', +SELECT d.title, d.content, e.distance +FROM ( + SELECT rowid, distance + FROM demo_embeddings + WHERE embedding MATCH rembed('$API_CLIENT_NAME', 'artificial intelligence and neural networks') -ORDER BY e.distance ASC -LIMIT 3; + LIMIT 3 +) e +JOIN demo_documents d ON e.rowid = d.id; -------------------------------------------------------------------- -- Phase 7: Edge Cases and Error Handling @@ -348,4 +365,4 @@ main() { # Run main demonstration main -exit 0 \ No newline at end of file +exit 0 diff --git a/doc/sqlite-rembed-examples.sh b/doc/sqlite-rembed-examples.sh index a369722794..500f9edfcd 100755 --- a/doc/sqlite-rembed-examples.sh +++ b/doc/sqlite-rembed-examples.sh @@ -30,11 +30,11 @@ MYSQL_USER="root" MYSQL_PASS="root" # API Configuration - using synthetic OpenAI endpoint for demonstration -# IMPORTANT: Replace YOUR_API_KEY with your actual API key +# IMPORTANT: Set API_KEY environment variable or replace YOUR_API_KEY below API_CLIENT_NAME="demo-client-$(date +%s)" API_FORMAT="openai" API_URL="https://api.synthetic.new/openai/v1/embeddings" -API_KEY="YOUR_API_KEY" # Replace with your actual API key +API_KEY="${API_KEY:-YOUR_API_KEY}" # Uses environment variable or placeholder API_MODEL="hf:nomic-ai/nomic-embed-text-v1.5" VECTOR_DIMENSIONS=768 # Based on model output diff --git a/doc/sqlite-rembed-test.sh b/doc/sqlite-rembed-test.sh index a1bb1ad4e8..dac942dfcd 100755 --- a/doc/sqlite-rembed-test.sh +++ b/doc/sqlite-rembed-test.sh @@ -41,11 +41,11 @@ MYSQL_USER="root" MYSQL_PASS="root" # API Configuration - using synthetic OpenAI endpoint for testing -# IMPORTANT: Replace YOUR_API_KEY with your actual API key +# IMPORTANT: Set API_KEY environment variable or replace YOUR_API_KEY below API_CLIENT_NAME="test-client-$(date +%s)" API_FORMAT="openai" API_URL="https://api.synthetic.new/openai/v1/embeddings" -API_KEY="YOUR_API_KEY" # Replace with your actual API key +API_KEY="${API_KEY:-YOUR_API_KEY}" # Uses environment variable or placeholder API_MODEL="hf:nomic-ai/nomic-embed-text-v1.5" VECTOR_DIMENSIONS=768 # Based on model output