diff --git a/doc/MCP/Architecture.md b/doc/MCP/Architecture.md new file mode 100644 index 0000000000..342db909c7 --- /dev/null +++ b/doc/MCP/Architecture.md @@ -0,0 +1,465 @@ +# MCP Architecture + +This document describes the architecture of the MCP (Model Context Protocol) module in ProxySQL, including endpoint design, tool handler implementation, and future architectural direction. + +## Overview + +The MCP module implements JSON-RPC 2.0 over HTTPS for LLM (Large Language Model) integration with ProxySQL. It provides multiple endpoints, each designed to serve specific purposes while sharing a single HTTPS server. + +### Key Concepts + +- **MCP Endpoint**: A distinct HTTPS endpoint (e.g., `/mcp/config`, `/mcp/query`) that implements MCP protocol +- **Tool Handler**: A C++ class that implements specific tools available to LLMs +- **Tool Discovery**: Dynamic discovery via `tools/list` method (MCP protocol standard) +- **Endpoint Authentication**: Per-endpoint Bearer token authentication +- **Connection Pooling**: MySQL connection pooling for efficient database access + +## Current Architecture + +### Component Diagram + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ ProxySQL Process │ +│ │ +│ ┌──────────────────────────────────────────────────────────────────────┐ │ +│ │ MCP_Threads_Handler │ │ +│ │ - Configuration variables (mcp-*) │ │ +│ │ - Status variables │ │ +│ │ - mcp_server (ProxySQL_MCP_Server) │ │ +│ │ - mysql_tool_handler (MySQL_Tool_Handler) │ │ +│ └──────────────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌──────────────────────────────────────────────────────────────────────┐ │ +│ │ ProxySQL_MCP_Server │ │ +│ │ (Single HTTPS Server) │ │ +│ │ │ │ +│ │ Port: mcp-port (default 6071) │ │ +│ │ SSL: Uses ProxySQL's certificates │ │ +│ └──────────────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ┌─────────────────────┼─────────────────────┐ │ +│ ▼ ▼ ▼ │ +│ ┌───────────────────┐ ┌───────────────────┐ ┌───────────────────┐ │ +│ │ /mcp/config │ │ /mcp/observe │ │ /mcp/query │ │ +│ │ MCP_JSONRPC_ │ │ MCP_JSONRPC_ │ │ MCP_JSONRPC_ │ │ +│ │ Resource │ │ Resource │ │ Resource │ │ +│ └─────────┬─────────┘ └─────────┬─────────┘ └─────────┬─────────┘ │ +│ │ │ │ │ +│ └─────────────────────┼─────────────────────┘ │ +│ ▼ │ +│ ┌────────────────────────────────────────────┐ │ +│ │ MySQL_Tool_Handler (Shared) │ │ +│ │ │ │ +│ │ Tools: │ │ +│ │ - list_schemas │ │ +│ │ - list_tables │ │ +│ │ - describe_table │ │ +│ │ - get_constraints │ │ +│ │ - table_profile │ │ +│ │ - column_profile │ │ +│ │ - sample_rows │ │ +│ │ - run_sql_readonly │ │ +│ │ - catalog_* (6 tools) │ │ +│ └────────────────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌────────────────────────────────────────────┐ │ +│ │ MySQL Backend │ │ +│ │ (Connection Pool) │ │ +│ └────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +### Current Limitations + +1. **All endpoints share the same tool handler** - No differentiation between endpoints +2. **Same tools available everywhere** - No specialized tools per endpoint +3. **Single connection pool** - All queries use the same MySQL connections +4. **No per-endpoint authentication in code** - Variables exist but not implemented + +### File Structure + +``` +include/ +├── MCP_Thread.h # MCP_Threads_Handler class definition +├── MCP_Endpoint.h # MCP_JSONRPC_Resource class definition +├── MySQL_Tool_Handler.h # MySQL_Tool_Handler class definition +├── MySQL_Catalog.h # SQLite catalog for LLM memory +└── ProxySQL_MCP_Server.hpp # ProxySQL_MCP_Server class definition + +lib/ +├── MCP_Thread.cpp # MCP_Threads_Handler implementation +├── MCP_Endpoint.cpp # MCP_JSONRPC_Resource implementation +├── MySQL_Tool_Handler.cpp # MySQL_Tool_Handler implementation +├── MySQL_Catalog.cpp # SQLite catalog implementation +└── ProxySQL_MCP_Server.cpp # HTTPS server implementation +``` + +### Request Flow (Current) + +``` +1. LLM Client → POST /mcp/{endpoint} → HTTPS Server (port 6071) +2. HTTPS Server → MCP_JSONRPC_Resource::render_POST() +3. MCP_JSONRPC_Resource → handle_jsonrpc_request() +4. Route based on JSON-RPC method: + - initialize/ping → Handled directly + - tools/list → handle_tools_list() + - tools/describe → handle_tools_describe() + - tools/call → handle_tools_call() → MySQL_Tool_Handler +5. MySQL_Tool_Handler → MySQL Backend (via connection pool) +6. Return JSON-RPC response +``` + +## Future Architecture: Multiple Tool Handlers + +### Goal + +Each MCP endpoint will have its own dedicated tool handler with specific tools designed for that endpoint's purpose. This allows for: + +- **Specialized tools** - Different tools for different purposes +- **Isolated resources** - Separate connection pools per endpoint +- **Independent authentication** - Per-endpoint credentials +- **Clear separation of concerns** - Each endpoint has a well-defined purpose + +### Target Architecture + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ ProxySQL Process │ +│ │ +│ ┌──────────────────────────────────────────────────────────────────────┐ │ +│ │ MCP_Threads_Handler │ │ +│ │ - Configuration variables │ │ +│ │ - Status variables │ │ +│ │ - mcp_server │ │ +│ │ - config_tool_handler (NEW) │ │ +│ │ - query_tool_handler (NEW) │ │ +│ │ - admin_tool_handler (NEW) │ │ +│ │ - cache_tool_handler (NEW) │ │ +│ │ - observe_tool_handler (NEW) │ │ +│ └──────────────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌──────────────────────────────────────────────────────────────────────┐ │ +│ │ ProxySQL_MCP_Server │ │ +│ │ (Single HTTPS Server) │ │ +│ └──────────────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ┌──────────────┬──────────────┼──────────────┬──────────────┬─────────┐ │ +│ ▼ ▼ ▼ ▼ ▼ ▼ │ +│ ┌────┐ ┌────┐ ┌────┐ ┌────┐ ┌────┐ ┌───┐│ +│ │conf│ │obs │ │qry │ │adm │ │cach│ │cat││ +│ │TH │ │TH │ │TH │ │TH │ │TH │ │log│││ +│ └─┬──┘ └─┬──┘ └─┬──┘ └─┬──┘ └─┬──┘ └─┬─┘│ +│ │ │ │ │ │ │ │ +│ │ │ │ │ │ │ │ +│ Tools: Tools: Tools: Tools: Tools: │ │ +│ - get_config - list_ - list_ - admin_ - get_ │ │ +│ - set_config stats schemas - set_ cache │ │ +│ - reload - show_ - list_ - reload - set_ │ │ +│ metrics tables - invalidate │ │ +│ - query │ │ +│ │ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +Where: +- `TH` = Tool Handler + +### Endpoint Specifications + +#### `/mcp/config` - Configuration Endpoint + +**Purpose**: Runtime configuration and management of ProxySQL + +**Tools**: +- `get_config` - Get current configuration values +- `set_config` - Modify configuration values +- `reload_config` - Reload configuration from disk/memory +- `list_variables` - List all available variables +- `get_status` - Get server status information + +**Use Cases**: +- LLM assistants that need to configure ProxySQL +- Automated configuration management +- Dynamic tuning based on workload + +**Authentication**: `mcp-config_endpoint_auth` (Bearer token) + +--- + +#### `/mcp/observe` - Observability Endpoint + +**Purpose**: Real-time metrics, statistics, and monitoring data + +**Tools**: +- `list_stats` - List available statistics +- `get_stats` - Get specific statistics +- `show_connections` - Show active connections +- `show_queries` - Show query statistics +- `get_health` - Get health check information +- `show_metrics` - Show performance metrics + +**Use Cases**: +- LLM assistants for monitoring and observability +- Automated alerting and health checks +- Performance analysis + +**Authentication**: `mcp-observe_endpoint_auth` (Bearer token) + +--- + +#### `/mcp/query` - Query Endpoint + +**Purpose**: Safe database exploration and query execution + +**Tools**: +- `list_schemas` - List databases +- `list_tables` - List tables in schema +- `describe_table` - Get table structure +- `get_constraints` - Get foreign keys and constraints +- `sample_rows` - Get sample data +- `run_sql_readonly` - Execute read-only SQL +- `explain_sql` - Explain query execution plan + +**Use Cases**: +- LLM assistants for database exploration +- Data analysis and discovery +- Query optimization assistance + +**Authentication**: `mcp-query_endpoint_auth` (Bearer token) + +--- + +#### `/mcp/admin` - Administration Endpoint + +**Purpose**: Administrative operations + +**Tools**: +- `admin_list_users` - List MySQL users +- `admin_create_user` - Create MySQL user +- `admin_grant_permissions` - Grant permissions +- `admin_show_processes` - Show running processes +- `admin_kill_query` - Kill a running query +- `admin_flush_cache` - Flush various caches +- `admin_reload` - Reload users/servers + +**Use Cases**: +- LLM assistants for administration tasks +- Automated user management +- Emergency operations + +**Authentication**: `mcp-admin_endpoint_auth` (Bearer token, most restrictive) + +--- + +#### `/mcp/cache` - Cache Endpoint + +**Purpose**: Query cache management + +**Tools**: +- `get_cache_stats` - Get cache statistics +- `invalidate_cache` - Invalidate cache entries +- `set_cache_ttl` - Set cache TTL +- `clear_cache` - Clear all cache +- `warm_cache` - Warm up cache with queries +- `get_cache_entries` - List cached queries + +**Use Cases**: +- LLM assistants for cache optimization +- Automated cache management +- Performance tuning + +**Authentication**: `mcp-cache_endpoint_auth` (Bearer token) + +--- + +### Tool Discovery Flow + +MCP clients should discover available tools dynamically: + +``` +1. Client → POST /mcp/config → {"method": "tools/list", ...} +2. Server → {"result": {"tools": [ + {"name": "get_config", "description": "..."}, + {"name": "set_config", "description": "..."}, + ... + ]}} + +3. Client → POST /mcp/query → {"method": "tools/list", ...} +4. Server → {"result": {"tools": [ + {"name": "list_schemas", "description": "..."}, + {"name": "list_tables", "description": "..."}, + ... + ]}} +``` + +**Example Discovery**: + +```bash +# Discover tools on /mcp/query endpoint +curl -k -X POST https://127.0.0.1:6071/mcp/query \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer YOUR_TOKEN" \ + -d '{"jsonrpc": "2.0", "method": "tools/list", "id": 1}' +``` + +### Tool Handler Base Class + +All tool handlers will inherit from a common base class: + +```cpp +class MCP_Tool_Handler { +public: + virtual ~MCP_Tool_Handler() = default; + + // Tool discovery + virtual json get_tool_list() = 0; + virtual json get_tool_description(const std::string& tool_name) = 0; + virtual json execute_tool(const std::string& tool_name, const json& arguments) = 0; + + // Lifecycle + virtual int init() = 0; + virtual void close() = 0; +}; +``` + +### Per-Endpoint Authentication + +Each endpoint validates its own Bearer token. The implementation is complete and supports: + +- **Bearer token** from `Authorization` header +- **Query parameter fallback** (`?token=xxx`) for simple testing +- **No authentication** when token is not configured (backward compatible) + +```cpp +bool MCP_JSONRPC_Resource::authenticate_request(const http_request& req) { + // Get the expected auth token for this endpoint + char* expected_token = nullptr; + + if (endpoint_name == "config") { + expected_token = handler->variables.mcp_config_endpoint_auth; + } else if (endpoint_name == "observe") { + expected_token = handler->variables.mcp_observe_endpoint_auth; + } else if (endpoint_name == "query") { + expected_token = handler->variables.mcp_query_endpoint_auth; + } else if (endpoint_name == "admin") { + expected_token = handler->variables.mcp_admin_endpoint_auth; + } else if (endpoint_name == "cache") { + expected_token = handler->variables.mcp_cache_endpoint_auth; + } + + // If no auth token is configured, allow the request + if (!expected_token || strlen(expected_token) == 0) { + return true; // No authentication required + } + + // Try to get Bearer token from Authorization header + std::string auth_header = req.get_header("Authorization"); + + if (auth_header.empty()) { + // Fallback: try getting from query parameter + const std::map& args = req.get_args(); + auto it = args.find("token"); + if (it != args.end()) { + auth_header = "Bearer " + it->second; + } + } + + if (auth_header.empty()) { + return false; // No authentication provided + } + + // Check if it's a Bearer token + const std::string bearer_prefix = "Bearer "; + if (auth_header.length() <= bearer_prefix.length() || + auth_header.compare(0, bearer_prefix.length(), bearer_prefix) != 0) { + return false; // Invalid format + } + + // Extract and validate token + std::string provided_token = auth_header.substr(bearer_prefix.length()); + // Trim whitespace + size_t start = provided_token.find_first_not_of(" \t\n\r"); + size_t end = provided_token.find_last_not_of(" \t\n\r"); + if (start != std::string::npos && end != std::string::npos) { + provided_token = provided_token.substr(start, end - start + 1); + } + + return (provided_token == expected_token); +} +``` + +**Status:** ✅ **Implemented** (lib/MCP_Endpoint.cpp) + +### Connection Pooling Strategy + +Each tool handler manages its own connection pool: + +```cpp +class Config_Tool_Handler : public MCP_Tool_Handler { +private: + std::vector config_connection_pool; // For ProxySQL admin + pthread_mutex_t pool_lock; +}; +``` + +## Implementation Roadmap + +### Phase 1: Base Infrastructure + +1. Create `MCP_Tool_Handler` base class +2. Create stub implementations for all 5 tool handlers +3. Update `MCP_Threads_Handler` to manage all handlers +4. Update `ProxySQL_MCP_Server` to pass handlers to endpoints + +### Phase 2: Tool Implementation + +1. Implement Config_Tool_Handler tools +2. Implement Query_Tool_Handler tools (move from MySQL_Tool_Handler) +3. Implement Admin_Tool_Handler tools +4. Implement Cache_Tool_Handler tools +5. Implement Observe_Tool_Handler tools + +### Phase 3: Authentication & Testing + +1. ✅ Implement per-endpoint authentication +2. ⚠️ Update test scripts to use dynamic tool discovery +3. ⚠️ Add integration tests for each endpoint +4. ⚠️ Documentation updates + +## Migration Strategy + +### Backward Compatibility + +The migration to multiple tool handlers will maintain backward compatibility: + +1. The existing `mysql_tool_handler` will be renamed to `query_tool_handler` +2. Existing tools will continue to work on `/mcp/query` +3. New endpoints will be added incrementally +4. Deprecation warnings for accessing tools on wrong endpoints + +### Gradual Migration + +``` +Step 1: Add new base class and stub handlers (no behavior change) +Step 2: Implement /mcp/config endpoint (new functionality) +Step 3: Move MySQL tools to /mcp/query (existing tools migrate) +Step 4: Implement /mcp/admin (new functionality) +Step 5: Implement /mcp/cache (new functionality) +Step 6: Implement /mcp/observe (new functionality) +Step 7: Enable per-endpoint auth +``` + +## Related Documentation + +- [VARIABLES.md](VARIABLES.md) - Configuration variables reference +- [README.md](README.md) - Module overview and setup + +## Version + +- **MCP Thread Version:** 0.1.0 +- **Architecture Version:** 1.0 (design document) +- **Last Updated:** 2025-01-12 diff --git a/doc/MCP/Database_Discovery_Agent.md b/doc/MCP/Database_Discovery_Agent.md new file mode 100644 index 0000000000..58eaf01f00 --- /dev/null +++ b/doc/MCP/Database_Discovery_Agent.md @@ -0,0 +1,800 @@ +# Database Discovery Agent Architecture + +## Overview + +This document describes the architecture for an AI-powered database discovery agent that can autonomously explore, understand, and analyze any database schema regardless of complexity or domain. The agent uses a mixture-of-experts approach where specialized LLM agents collaborate to build comprehensive understanding of database structures, data patterns, and business semantics. + +## Core Principles + +1. **Domain Agnostic** - No assumptions about what the database contains; everything is discovered +2. **Iterative Exploration** - Not a one-time schema dump; continuous learning through multiple cycles +3. **Collaborative Intelligence** - Multiple experts with different perspectives work together +4. **Hypothesis-Driven** - Experts form hypotheses, test them, and refine understanding +5. **Confidence-Based** - Exploration continues until a confidence threshold is reached + +## High-Level Architecture + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ ORCHESTRATOR AGENT │ +│ - Manages exploration state │ +│ - Coordinates expert agents │ +│ - Synthesizes findings │ +│ - Decides when exploration is complete │ +└─────────────────────────────────────────────────────────────────────┘ + │ + ├─────────────────────────────────────┐ + │ │ + ▼─────────────────▼ ▼─────────────────▼ + ┌─────────────────────────┐ ┌─────────────────────────┐ ┌─────────────────────────┐ + │ STRUCTURAL EXPERT │ │ STATISTICAL EXPERT │ │ SEMANTIC EXPERT │ + │ │ │ │ │ │ + │ - Schemas & tables │ │ - Data distributions │ │ - Business meaning │ + │ - Relationships │ │ - Patterns & trends │ │ - Domain concepts │ + │ - Constraints │ │ - Outliers & anomalies │ │ - Entity types │ + │ - Indexes & keys │ │ - Correlations │ │ - User intent │ + └─────────────────────────┘ └─────────────────────────┘ └─────────────────────────┘ + │ │ │ + └───────────────────────────┼───────────────────────────┘ + │ + ▼ + ┌─────────────────────────────────┐ + │ SHARED CATALOG │ + │ (SQLite + MCP) │ + │ │ + │ Expert discoveries │ + │ Cross-expert notes │ + │ Exploration state │ + │ Hypotheses & results │ + └─────────────────────────────────┘ + │ + ▼ + ┌─────────────────────────────────┐ + │ MCP Query Endpoint │ + │ - Database access │ + │ - Catalog operations │ + │ - All tools available │ + └─────────────────────────────────┘ +``` + +## Expert Specializations + +### 1. Structural Expert + +**Focus:** Database topology and relationships + +**Responsibilities:** +- Map all schemas, tables, and their relationships +- Identify primary keys, foreign keys, and constraints +- Analyze index patterns and access structures +- Detect table hierarchies and dependencies +- Identify structural patterns (star schema, snowflake, hierarchical, etc.) + +**Exploration Strategy:** +```python +class StructuralExpert: + def explore(self, catalog): + # Iteration 1: Map the territory + tables = self.list_all_tables() + for table in tables: + schema = self.get_table_schema(table) + relationships = self.find_relationships(table) + + catalog.save("structure", f"table.{table}", { + "columns": schema["columns"], + "primary_key": schema["pk"], + "foreign_keys": relationships, + "indexes": schema["indexes"] + }) + + # Iteration 2: Find connection points + for table_a, table_b in potential_pairs: + joins = self.suggest_joins(table_a, table_b) + if joins: + catalog.save("relationship", f"{table_a}↔{table_b}", joins) + + # Iteration 3: Identify structural patterns + patterns = self.identify_patterns(catalog) + # "This looks like a star schema", "Hierarchical structure", etc. +``` + +**Output Examples:** +- "Found 47 tables across 3 schemas" +- "customers table has 1:many relationship with orders via customer_id" +- "Detected star schema: fact_orders with dims: customers, products, time" +- "Table hierarchy: categories → subcategories → products" + +### 2. Statistical Expert + +**Focus:** Data characteristics and patterns + +**Responsibilities:** +- Profile data distributions for all columns +- Identify correlations between fields +- Detect outliers and anomalies +- Find temporal patterns and trends +- Calculate data quality metrics + +**Exploration Strategy:** +```python +class StatisticalExpert: + def explore(self, catalog): + # Read structural discoveries first + tables = catalog.get_kind("table.*") + + for table in tables: + # Profile each column + for col in table["columns"]: + stats = self.get_column_stats(table, col) + + catalog.save("statistics", f"{table}.{col}", { + "distinct_count": stats["distinct"], + "null_percentage": stats["null_pct"], + "distribution": stats["histogram"], + "top_values": stats["top_20"], + "numeric_range": stats["min_max"] if numeric else None, + "anomalies": stats["outliers"] + }) + + # Find correlations + correlations = self.find_correlations(tables) + catalog.save("patterns", "correlations", correlations) +``` + +**Output Examples:** +- "orders.status has 4 values: pending (23%), confirmed (45%), shipped (28%), cancelled (4%)" +- "Strong correlation (0.87) between order_items.quantity and order_total" +- "Outlier detected: customer_age has values > 150 (likely data error)" +- "Temporal pattern: 80% of orders placed M-F, 9am-5pm" + +### 3. Semantic Expert + +**Focus:** Business meaning and domain understanding + +**Responsibilities:** +- Infer business domain from data patterns +- Identify entity types and their roles +- Interpret relationships in business terms +- Understand user intent and use cases +- Document business rules and constraints + +**Exploration Strategy:** +```python +class SemanticExpert: + def explore(self, catalog): + # Synthesize findings from other experts + structure = catalog.get_kind("structure.*") + stats = catalog.get_kind("statistics.*") + + for table in structure: + # Infer domain from table name, columns, and data + domain = self.infer_domain(table, stats) + # "This is an ecommerce database" + + # Understand entities + entity_type = self.identify_entity(table) + # "customers table = Customer entities" + + # Understand relationships + for rel in catalog.get_relationships(table): + business_rel = self.interpret_relationship(rel) + # "customer has many orders" + catalog.save("semantic", f"rel.{table}.{other}", { + "relationship": business_rel, + "cardinality": "one-to-many", + "business_rule": "A customer can place multiple orders" + }) + + # Identify business processes + processes = self.infer_processes(structure, stats) + # "Order fulfillment flow: orders → order_items → products" + catalog.save("semantic", "processes", processes) +``` + +**Output Examples:** +- "Domain inference: E-commerce platform (B2C)" +- "Entity: customers represents individual shoppers, not businesses" +- "Business process: Order lifecycle = pending → confirmed → shipped → delivered" +- "Business rule: Customer cannot be deleted if they have active orders" + +### 4. Query Expert + +**Focus:** Efficient data access patterns + +**Responsibilities:** +- Analyze query optimization opportunities +- Recommend index usage strategies +- Determine optimal join orders +- Design sampling strategies for exploration +- Identify performance bottlenecks + +**Exploration Strategy:** +```python +class QueryExpert: + def explore(self, catalog): + # Analyze query patterns from structural expert + structure = catalog.get_kind("structure.*") + + for table in structure: + # Suggest optimal access patterns + access_patterns = self.analyze_access_patterns(table) + catalog.save("query", f"access.{table}", { + "best_index": access_patterns["optimal_index"], + "join_order": access_patterns["optimal_join_order"], + "sampling_strategy": access_patterns["sample_method"] + }) +``` + +**Output Examples:** +- "For customers table, use idx_email for lookups, idx_created_at for time ranges" +- "Join order: customers → orders → order_items (not reverse)" +- "Sample strategy: Use TABLESAMPLE for large tables, LIMIT 1000 for small" + +## Orchestrator: The Conductor + +The Orchestrator agent coordinates all experts and manages the overall discovery process. + +```python +class DiscoveryOrchestrator: + """Coordinates the collaborative discovery process""" + + def __init__(self, mcp_endpoint): + self.mcp = MCPClient(mcp_endpoint) + self.catalog = CatalogClient(self.mcp) + + self.experts = [ + StructuralExpert(self.catalog), + StatisticalExpert(self.catalog), + SemanticExpert(self.catalog), + QueryExpert(self.catalog) + ] + + self.state = { + "iteration": 0, + "phase": "initial", + "confidence": 0.0, + "coverage": 0.0, # % of database explored + "expert_contributions": {e.name: 0 for e in self.experts} + } + + def discover(self, max_iterations=50, target_confidence=0.95): + """Main discovery loop""" + + while self.state["iteration"] < max_iterations: + self.state["iteration"] += 1 + + # 1. ASSESS: What's the current state? + assessment = self.assess_progress() + + # 2. PLAN: Which expert should work on what? + tasks = self.plan_next_tasks(assessment) + # Example: [ + # {"expert": "structural", "task": "explore_orders_table", "priority": 0.8}, + # {"expert": "semantic", "task": "interpret_customer_entity", "priority": 0.7}, + # {"expert": "statistical", "task": "analyze_price_distribution", "priority": 0.6} + # ] + + # 3. EXECUTE: Experts work in parallel + results = self.execute_tasks_parallel(tasks) + + # 4. SYNTHESIZE: Combine findings + synthesis = self.synthesize_findings(results) + + # 5. COLLABORATE: Experts share insights + self.facilitate_collaboration(synthesis) + + # 6. REFLECT: Are we done? + self.update_state(synthesis) + + if self.should_stop(): + break + + # 7. FINALIZE: Create comprehensive understanding + return self.create_final_report() + + def plan_next_tasks(self, assessment): + """Decide what each expert should do next""" + + prompt = f""" + You are orchestrating database discovery. Current state: + {assessment} + + Expert findings: + {self.format_expert_findings()} + + Plan the next exploration tasks. Consider: + 1. Which expert can contribute most valuable insights now? + 2. What areas need more exploration? + 3. Which expert findings should be verified or extended? + + Output JSON array of tasks, each with: + - expert: which expert should do it + - task: what they should do + - priority: 0-1 (higher = more important) + - dependencies: [array of catalog keys this depends on] + """ + + return self.llm_call(prompt) + + def facilitate_collaboration(self, synthesis): + """Experts exchange notes and build on each other's work""" + + # Find points where experts should collaborate + collaborations = self.find_collaboration_opportunities(synthesis) + + for collab in collaborations: + # Example: Structural found relationship, Semantic should interpret it + prompt = f""" + EXPERT COLLABORATION: + + {collab['expert_a']} found: {collab['finding_a']} + + {collab['expert_b']}: Please interpret this finding from your perspective. + Consider: How does this affect your understanding? What follow-up is needed? + + Catalog context: {self.get_relevant_context(collab)} + """ + + response = self.llm_call(prompt, expert=collab['expert_b']) + self.catalog.save("collaboration", collab['id'], response) + + def create_final_report(self): + """Synthesize all discoveries into comprehensive understanding""" + + prompt = f""" + Create a comprehensive database understanding report from all expert findings. + + Include: + 1. Executive Summary + 2. Database Structure Overview + 3. Business Domain Analysis + 4. Key Insights & Patterns + 5. Data Quality Assessment + 6. Usage Recommendations + + Catalog data: + {self.catalog.export_all()} + """ + + return self.llm_call(prompt) +``` + +## Discovery Phases + +### Phase 1: Blind Exploration (Iterations 1-10) + +**Characteristics:** +- All experts work independently on basic discovery +- No domain assumptions +- Systematic data collection +- Build foundational knowledge + +**Expert Activities:** +- **Structural**: Map all tables, columns, relationships, constraints +- **Statistical**: Profile all columns, find distributions, cardinality +- **Semantic**: Identify entity types from naming patterns, infer basic domain +- **Query**: Analyze access patterns, identify indexes + +**Output:** +- Complete table inventory +- Column profiles for all fields +- Basic relationship mapping +- Initial domain hypothesis + +### Phase 2: Pattern Recognition (Iterations 11-30) + +**Characteristics:** +- Experts begin collaborating +- Patterns emerge from data +- Domain becomes clearer +- Hypotheses form + +**Expert Activities:** +- **Structural**: Identifies structural patterns (star schema, hierarchies) +- **Statistical**: Finds correlations, temporal patterns, outliers +- **Semantic**: Interprets relationships in business terms +- **Query**: Optimizes based on discovered patterns + +**Example Collaboration:** +``` +Structural → Catalog: "Found customers→orders relationship (customer_id)" +Semantic reads: "This indicates customers place orders (ecommerce)" +Statistical reads: "Analyzing order patterns by customer..." +Query: "Optimizing customer-centric queries using customer_id index" +``` + +**Output:** +- Domain identification (e.g., "This is an ecommerce database") +- Business entity definitions +- Relationship interpretations +- Pattern documentation + +### Phase 3: Hypothesis-Driven Exploration (Iterations 31-45) + +**Characteristics:** +- Experts form and test hypotheses +- Deep dives into specific areas +- Validation of assumptions +- Filling knowledge gaps + +**Example Hypotheses:** +- "This is a SaaS metrics database" → Test for subscription patterns +- "There are seasonal trends in orders" → Analyze temporal distributions +- "Data quality issues in customer emails" → Validate email formats +- "Unused indexes exist" → Check index usage statistics + +**Expert Activities:** +- All experts design experiments to test hypotheses +- Catalog stores hypothesis results (confirmed/refined/refuted) +- Collaboration to refine understanding based on evidence + +**Output:** +- Validated business insights +- Refined domain understanding +- Data quality assessment +- Performance optimization recommendations + +### Phase 4: Synthesis & Validation (Iterations 46-50) + +**Characteristics:** +- All experts collaborate to validate findings +- Resolve contradictions +- Fill remaining gaps +- Create unified understanding + +**Expert Activities:** +- Cross-expert validation of key findings +- Synthesis of comprehensive understanding +- Documentation of uncertainties +- Recommendations for further analysis + +**Output:** +- Final comprehensive report +- Confidence scores for each finding +- Remaining uncertainties +- Actionable recommendations + +## Domain-Agnostic Discovery Examples + +### Example 1: Law Firm Database + +**Phase 1-5 (Blind):** +``` +Structural: "Found: cases, clients, attorneys, documents, time_entries, billing_rates" +Statistical: "time_entries has 1.2M rows, highly skewed distribution, 15% null values" +Semantic: "Entity types: Cases (legal matters), Clients (people/companies), Attorneys" +Query: "Best access path: case_id → time_entries (indexed)" +``` + +**Phase 6-15 (Patterns):** +``` +Collaboration: + Structural → Semantic: "cases have many-to-many with attorneys (case_attorneys table)" + Semantic: "Multiple attorneys per case = legal teams" + Statistical: "time_entries correlate with case_stage progression (r=0.72)" + Query: "Filter by case_date_first for time range queries (30% faster)" + +Domain Inference: + Semantic: "Legal practice management system" + Structural: "Found invoices, payments tables - confirms practice management" + Statistical: "Billing patterns: hourly rates, contingency fees detected" +``` + +**Phase 16-30 (Hypotheses):** +``` +Hypothesis: "Firm specializes in specific case types" +→ Statistical: "Analyze case_type distribution" +→ Found: "70% personal_injury, 20% corporate_litigation, 10% family_law" + +Hypothesis: "Document workflow exists" +→ Structural: "Found document_versions, approvals, court_filings tables" +→ Semantic: "Document approval workflow for court submissions" + +Hypothesis: "Attorney productivity varies by case type" +→ Statistical: "Analyze time_entries per attorney per case_type" +→ Found: "Personal injury cases require 3.2x more attorney hours" +``` + +**Phase 31-40 (Synthesis):** +``` +Final Understanding: +"Mid-sized personal injury law firm (50-100 attorneys) +with practice management system including: +- Case management with document workflows +- Time tracking and billing (hourly + contingency) +- 70% focus on personal injury cases +- Average case duration: 18 months +- Key metrics: case duration, settlement amounts, + attorney productivity, document approval cycle time" +``` + +### Example 2: Scientific Research Database + +**Phase 1-5 (Blind):** +``` +Structural: "experiments, samples, measurements, researchers, publications, protocols" +Statistical: "High precision numeric data (10 decimal places), temporal patterns in experiments" +Semantic: "Research lab data management system" +Query: "Measurements table largest (45M rows), needs partitioning" +``` + +**Phase 6-15 (Patterns):** +``` +Domain: "Biology/medicine research (gene_sequences, drug_compounds detected)" +Patterns: "Experiments follow protocol → samples → measurements → analysis pipeline" +Structural: "Linear workflow: protocols → experiments → samples → measurements → analysis → publications" +Statistical: "High correlation between protocol_type and measurement_outcome" +``` + +**Phase 16-30 (Hypotheses):** +``` +Hypothesis: "Longitudinal study design" +→ Structural: "Found repeated_measurements, time_points tables" +→ Confirmed: "Same subjects measured over time" + +Hypothesis: "Control groups present" +→ Statistical: "Found clustering in measurements (treatment vs control)" +→ Confirmed: "Experimental design includes control groups" + +Hypothesis: "Statistical significance testing" +→ Statistical: "Found p_value distributions, confidence intervals in results" +→ Confirmed: "Clinical trial data with statistical validation" +``` + +**Phase 31-40 (Synthesis):** +``` +Final Understanding: +"Clinical trial data management system for pharmaceutical research +- Drug compound testing with control/treatment groups +- Longitudinal design (repeated measurements over time) +- Statistical validation pipeline +- Regulatory reporting (publication tracking) +- Sample tracking from collection to analysis" +``` + +### Example 3: E-commerce Database + +**Phase 1-5 (Blind):** +``` +Structural: "customers, orders, order_items, products, categories, inventory, reviews" +Statistical: "orders has 5.4M rows, steady growth trend, seasonal patterns" +Semantic: "Online retail platform" +Query: "orders table requires date-based partitioning" +``` + +**Phase 6-15 (Patterns):** +``` +Domain: "B2C ecommerce platform" +Relationships: "customers → orders (1:N), orders → order_items (1:N), order_items → products (N:1)" +Business flow: "Browse → Add to Cart → Checkout → Payment → Fulfillment" +Statistical: "Order value distribution: Long tail, $50 median, $280 mean" +``` + +**Phase 16-30 (Hypotheses):** +``` +Hypothesis: "Customer segments exist" +→ Statistical: "Cluster customers by order frequency, total spend, recency" +→ Found: "3 segments: Casual (70%), Regular (25%), VIP (5%)" + +Hypothesis: "Product categories affect return rates" +→ Statistical: "analyze returns by category" +→ Found: "Clothing: 12% return rate, Electronics: 3% return rate" + +Hypothesis: "Seasonal buying patterns" +→ Statistical: "Time series analysis of orders by month/day/week" +→ Found: "Peak: Nov-Dec (holidays), Dip: Jan, Slow: Feb-Mar" +``` + +**Phase 31-40 (Synthesis):** +``` +Final Understanding: +"Consumer ecommerce platform with: +- 5.4M orders, steady growth, strong seasonality +- 3 customer segments (Casual/Regular/VIP) with different behaviors +- 15% overall return rate (varies by category) +- Peak season: Nov-Dec (4.3x normal volume) +- Key metrics: conversion rate, AOV, customer lifetime value, return rate" +``` + +## Catalog Schema + +The catalog serves as shared memory for all experts. Key entry types: + +### Structure Entries +```json +{ + "kind": "structure", + "key": "table.customers", + "document": { + "columns": ["customer_id", "name", "email", "created_at"], + "primary_key": "customer_id", + "foreign_keys": [{"column": "region_id", "references": "regions(id)"}], + "row_count": 125000 + }, + "tags": "customers,table" +} +``` + +### Statistics Entries +```json +{ + "kind": "statistics", + "key": "customers.created_at", + "document": { + "distinct_count": 118500, + "null_percentage": 0.0, + "min": "2020-01-15", + "max": "2025-01-10", + "distribution": "uniform_growth" + }, + "tags": "customers,created_at,temporal" +} +``` + +### Semantic Entries +```json +{ + "kind": "semantic", + "key": "entity.customers", + "document": { + "entity_type": "Customer", + "definition": "Individual shoppers who place orders", + "business_role": "Revenue generator", + "lifecycle": "Registered → Active → Inactive → Churned" + }, + "tags": "semantic,entity,customers" +} +``` + +### Relationship Entries +```json +{ + "kind": "relationship", + "key": "customers↔orders", + "document": { + "type": "one_to_many", + "join_key": "customer_id", + "business_meaning": "Customers place multiple orders", + "cardinality_estimates": { + "min_orders_per_customer": 1, + "max_orders_per_customer": 247, + "avg_orders_per_customer": 4.3 + } + }, + "tags": "relationship,customers,orders" +} +``` + +### Hypothesis Entries +```json +{ + "kind": "hypothesis", + "key": "vip_segment_behavior", + "document": { + "hypothesis": "VIP customers have higher order frequency and AOV", + "status": "confirmed", + "confidence": 0.92, + "evidence": [ + "VIP avg 12.4 orders/year vs 2.1 for regular", + "VIP avg AOV $156 vs $45 for regular" + ] + }, + "tags": "hypothesis,customer_segments,confirmed" +} +``` + +### Collaboration Entries +```json +{ + "kind": "collaboration", + "key": "semantic_interpretation_001", + "document": { + "trigger": "Structural expert found orders.status enum", + "expert": "semantic", + "interpretation": "Order lifecycle: pending → confirmed → shipped → delivered", + "follow_up_tasks": ["Analyze time_in_status durations", "Find bottleneck status"] + }, + "tags": "collaboration,structural,semantic,order_lifecycle" +} +``` + +## Stopping Criteria + +The orchestrator evaluates whether to continue exploration based on: + +1. **Confidence Threshold** - Overall confidence in understanding exceeds target (e.g., 0.95) +2. **Coverage Threshold** - Sufficient percentage of database explored (e.g., 95% of tables analyzed) +3. **Diminishing Returns** - Last N iterations produced minimal new insights +4. **Resource Limits** - Maximum iterations reached or time budget exceeded +5. **Expert Consensus** - All experts indicate satisfactory understanding + +```python +def should_stop(self): + # High confidence in core understanding + if self.state["confidence"] >= 0.95: + return True, "Confidence threshold reached" + + # Good coverage of database + if self.state["coverage"] >= 0.95: + return True, "Coverage threshold reached" + + # Diminishing returns + if self.state["recent_insights"] < 2: + self.state["diminishing_returns"] += 1 + if self.state["diminishing_returns"] >= 3: + return True, "Diminishing returns" + + # Expert consensus + if all(expert.satisfied() for expert in self.experts): + return True, "Expert consensus achieved" + + return False, "Continue exploration" +``` + +## Implementation Considerations + +### Scalability + +For large databases (hundreds/thousands of tables): +- **Parallel Exploration**: Experts work simultaneously on different table subsets +- **Incremental Coverage**: Prioritize important tables (many relationships, high cardinality) +- **Smart Sampling**: Use statistical sampling instead of full scans for large tables +- **Progressive Refinement**: Start with overview, drill down iteratively + +### Performance + +- **Caching**: Cache catalog queries to avoid repeated reads +- **Batch Operations**: Group multiple tool calls when possible +- **Index-Aware**: Let Query Expert guide exploration to use indexed columns +- **Connection Pooling**: Reuse database connections (already implemented in MCP) + +### Error Handling + +- **Graceful Degradation**: If one expert fails, others continue +- **Retry Logic**: Transient errors trigger retries with backoff +- **Partial Results**: Catalog stores partial findings if interrupted +- **Validation**: Experts cross-validate each other's findings + +### Extensibility + +- **Pluggable Experts**: New expert types can be added easily +- **Domain-Specific Experts**: Specialized experts for healthcare, finance, etc. +- **Custom Tools**: Additional MCP tools for specific analysis needs +- **Expert Configuration**: Experts can be configured/enabled based on needs + +## Usage Example + +```python +from discovery_agent import DiscoveryOrchestrator + +# Initialize agent +agent = DiscoveryOrchestrator( + mcp_endpoint="https://localhost:6071/mcp/query", + auth_token="your_token" +) + +# Run discovery +report = agent.discover( + max_iterations=50, + target_confidence=0.95 +) + +# Access findings +print(report["summary"]) +print(report["domain"]) +print(report["key_insights"]) + +# Query catalog for specific information +customers_analysis = agent.catalog.search("customers") +relationships = agent.catalog.get_kind("relationship") +``` + +## Related Documentation + +- [Architecture.md](Architecture.md) - Overall MCP architecture +- [README.md](README.md) - Module overview and setup +- [VARIABLES.md](VARIABLES.md) - Configuration variables reference + +## Version History + +- **1.0** (2025-01-12) - Initial architecture design diff --git a/doc/MCP/FTS_Implementation_Plan.md b/doc/MCP/FTS_Implementation_Plan.md new file mode 100644 index 0000000000..4a06d4aaec --- /dev/null +++ b/doc/MCP/FTS_Implementation_Plan.md @@ -0,0 +1,582 @@ +# Full Text Search (FTS) Implementation Plan + +## Overview + +This document describes the implementation of Full Text Search (FTS) capabilities for the ProxySQL MCP Query endpoint. The FTS system enables AI agents to quickly search indexed data before querying the full MySQL database, using SQLite's FTS5 extension. + +## Requirements + +1. **Indexing Strategy**: Optional WHERE clauses, no incremental updates (full rebuild on reindex) +2. **Search Scope**: Agent decides - single table or cross-table search +3. **Storage**: All rows (no limits) +4. **Catalog Integration**: Cross-reference between FTS and catalog - agent can use FTS to get top N IDs, then query real database +5. **Use Case**: FTS as another tool in the agent's toolkit + +## Architecture + +### Components + +``` +MCP Query Endpoint + ↓ +Query_Tool_Handler (routes tool calls) + ↓ +MySQL_Tool_Handler (implements tools) + ↓ +MySQL_FTS (new class - manages FTS database) + ↓ +SQLite FTS5 (mcp_fts.db) +``` + +### Database Design + +**Separate SQLite database**: `mcp_fts.db` (configurable via `mcp-ftspath` variable) + +**Tables**: +- `fts_indexes` - Metadata for all indexes +- `fts_data_` - Content tables (one per index) +- `fts_search_` - FTS5 virtual tables (one per index) + +## Tools (6 total) + +### 1. fts_index_table + +Create and populate an FTS index for a MySQL table. + +**Parameters**: +| Name | Type | Required | Description | +|------|------|----------|-------------| +| schema | string | Yes | Schema name | +| table | string | Yes | Table name | +| columns | string | Yes | JSON array of column names to index | +| primary_key | string | Yes | Primary key column name | +| where_clause | string | No | Optional WHERE clause for filtering | + +**Response**: +```json +{ + "success": true, + "schema": "sales", + "table": "orders", + "row_count": 15000, + "indexed_at": 1736668800 +} +``` + +**Implementation Logic**: +1. Validate parameters (table exists, columns are valid) +2. Check if index already exists +3. Create dynamic tables: `fts_data__` and `fts_search__
` +4. Fetch all rows from MySQL using `execute_query()` +5. For each row: + - Concatenate indexed column values into searchable content + - Store original row data as JSON metadata + - Insert into data table (triggers sync to FTS) +6. Update `fts_indexes` metadata +7. Return result + +### 2. fts_search + +Search indexed data using FTS5. + +**Parameters**: +| Name | Type | Required | Description | +|------|------|----------|-------------| +| query | string | Yes | FTS5 search query | +| schema | string | No | Filter by schema | +| table | string | No | Filter by table | +| limit | integer | No | Max results (default: 100) | +| offset | integer | No | Pagination offset (default: 0) | + +**Response**: +```json +{ + "success": true, + "query": "urgent order", + "total_matches": 234, + "results": [ + { + "schema": "sales", + "table": "orders", + "primary_key_value": "12345", + "snippet": "Customer has urgentorder...", + "metadata": "{\"order_id\":12345,\"customer_id\":987,...}" + } + ] +} +``` + +**Implementation Logic**: +1. Build FTS5 query with MATCH syntax +2. Apply schema/table filters if specified +3. Execute search with ranking (bm25) +4. Return results with snippets highlighting matches +5. Support pagination + +### 3. fts_list_indexes + +List all FTS indexes with metadata. + +**Parameters**: None + +**Response**: +```json +{ + "success": true, + "indexes": [ + { + "schema": "sales", + "table": "orders", + "columns": ["order_id", "customer_name", "notes"], + "primary_key": "order_id", + "row_count": 15000, + "indexed_at": 1736668800 + } + ] +} +``` + +**Implementation Logic**: +1. Query `fts_indexes` table +2. Return all indexes with metadata + +### 4. fts_delete_index + +Remove an FTS index. + +**Parameters**: +| Name | Type | Required | Description | +|------|------|----------|-------------| +| schema | string | Yes | Schema name | +| table | string | Yes | Table name | + +**Response**: +```json +{ + "success": true, + "schema": "sales", + "table": "orders", + "message": "Index deleted successfully" +} +``` + +**Implementation Logic**: +1. Validate index exists +2. Drop FTS search table +3. Drop data table +4. Remove metadata from `fts_indexes` + +### 5. fts_reindex + +Refresh an index with fresh data (full rebuild). + +**Parameters**: +| Name | Type | Required | Description | +|------|------|----------|-------------| +| schema | string | Yes | Schema name | +| table | string | Yes | Table name | + +**Response**: Same as `fts_index_table` + +**Implementation Logic**: +1. Fetch existing index metadata from `fts_indexes` +2. Delete existing data from tables +3. Call `index_table()` logic with stored metadata +4. Update `indexed_at` timestamp + +### 6. fts_rebuild_all + +Rebuild ALL FTS indexes with fresh data. + +**Parameters**: None + +**Response**: +```json +{ + "success": true, + "rebuilt_count": 5, + "failed": [], + "indexes": [ + { + "schema": "sales", + "table": "orders", + "row_count": 15200, + "status": "success" + } + ] +} +``` + +**Implementation Logic**: +1. Get all indexes from `fts_indexes` table +2. For each index: + - Call `reindex()` with stored metadata + - Track success/failure +3. Return summary with rebuilt count and any failures + +## Database Schema + +### fts_indexes (metadata table) +```sql +CREATE TABLE IF NOT EXISTS fts_indexes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + schema_name TEXT NOT NULL, + table_name TEXT NOT NULL, + columns TEXT NOT NULL, -- JSON array of column names + primary_key TEXT NOT NULL, + where_clause TEXT, + row_count INTEGER DEFAULT 0, + indexed_at INTEGER DEFAULT (strftime('%s', 'now')), + UNIQUE(schema_name, table_name) +); + +CREATE INDEX IF NOT EXISTS idx_fts_indexes_schema ON fts_indexes(schema_name); +CREATE INDEX IF NOT EXISTS idx_fts_indexes_table ON fts_indexes(table_name); +``` + +### Per-Index Tables (created dynamically) + +For each indexed table, create: +```sql +-- Data table (stores actual content) +CREATE TABLE fts_data__ ( + rowid INTEGER PRIMARY KEY, + content TEXT NOT NULL, -- Concatenated searchable text + metadata TEXT -- JSON with original row data +); + +-- FTS5 virtual table (external content) +CREATE VIRTUAL TABLE fts_search__ USING fts5( + content, + metadata, + content='fts_data__', + content_rowid='rowid', + tokenize='porter unicode61' +); + +-- Triggers for automatic sync +CREATE TRIGGER fts_ai_ AFTER INSERT ON fts_data_ BEGIN + INSERT INTO fts_search_(rowid, content, metadata) + VALUES (new.rowid, new.content, new.metadata); +END; + +CREATE TRIGGER fts_ad_ AFTER DELETE ON fts_data_ BEGIN + INSERT INTO fts_search_(fts_search_, rowid, content, metadata) + VALUES ('delete', old.rowid, old.content, old.metadata); +END; + +CREATE TRIGGER fts_au_ AFTER UPDATE ON fts_data_ BEGIN + INSERT INTO fts_search_(fts_search_, rowid, content, metadata) + VALUES ('delete', old.rowid, old.content, old.metadata); + INSERT INTO fts_search_(rowid, content, metadata) + VALUES (new.rowid, new.content, new.metadata); +END; +``` + +## Implementation Steps + +### Phase 1: Foundation + +**Step 1: Create MySQL_FTS class** +- Create `include/MySQL_FTS.h` - Class header with method declarations +- Create `lib/MySQL_FTS.cpp` - Implementation +- Follow `MySQL_Catalog` pattern for SQLite management + +**Step 2: Add configuration variable** +- Modify `include/MCP_Thread.h` - Add `mcp_fts_path` to variables struct +- Modify `lib/MCP_Thread.cpp` - Add to `mcp_thread_variables_names` array +- Handle `fts_path` in get/set variable functions +- Default value: `"mcp_fts.db"` + +**Step 3: Integrate FTS into MySQL_Tool_Handler** +- Add `MySQL_FTS* fts` member to `include/MySQL_Tool_Handler.h` +- Initialize in constructor with `fts_path` +- Clean up in destructor +- Add FTS tool method declarations + +### Phase 2: Core Indexing + +**Step 4: Implement fts_index_table tool** +```cpp +// In MySQL_FTS class +std::string index_table( + const std::string& schema, + const std::string& table, + const std::string& columns, // JSON array + const std::string& primary_key, + const std::string& where_clause, + MySQL_Tool_Handler* mysql_handler +); +``` + +Logic: +- Parse columns JSON array +- Create sanitized table name (replace dots/underscores) +- Create `fts_data_*` and `fts_search_*` tables +- Fetch data: `mysql_handler->execute_query(sql)` +- Build content by concatenating column values +- Insert in batches for performance +- Update metadata + +**Step 5: Implement fts_list_indexes tool** +```cpp +std::string list_indexes(); +``` +Query `fts_indexes` and return JSON array. + +**Step 6: Implement fts_delete_index tool** +```cpp +std::string delete_index(const std::string& schema, const std::string& table); +``` +Drop tables and remove metadata. + +### Phase 3: Search Functionality + +**Step 7: Implement fts_search tool** +```cpp +std::string search( + const std::string& query, + const std::string& schema, + const std::string& table, + int limit, + int offset +); +``` + +SQL query template: +```sql +SELECT + d.schema_name, + d.table_name, + d.primary_key_value, + snippet(fts_search, 2, '', '', '...', 30) as snippet, + d.metadata +FROM fts_search s +JOIN fts_data d ON s.rowid = d.rowid +WHERE fts_search MATCH ? +ORDER BY bm25(fts_search) +LIMIT ? OFFSET ? +``` + +**Step 8: Implement fts_reindex tool** +```cpp +std::string reindex( + const std::string& schema, + const std::string& table, + MySQL_Tool_Handler* mysql_handler +); +``` +Fetch metadata, delete old data, rebuild. + +**Step 9: Implement fts_rebuild_all tool** +```cpp +std::string rebuild_all(MySQL_Tool_Handler* mysql_handler); +``` +Loop through all indexes and rebuild each. + +### Phase 4: Tool Registration + +**Step 10: Register tools in Query_Tool_Handler** +- Modify `lib/Query_Tool_Handler.cpp` +- Add to `get_tool_list()`: + ```cpp + tools.push_back(create_tool_schema( + "fts_index_table", + "Create/populate FTS index for a table", + {"schema", "table", "columns", "primary_key"}, + {{"where_clause", "string"}} + )); + // Repeat for all 6 tools + ``` +- Add routing in `execute_tool()`: + ```cpp + else if (tool_name == "fts_index_table") { + std::string schema = get_json_string(arguments, "schema"); + std::string table = get_json_string(arguments, "table"); + std::string columns = get_json_string(arguments, "columns"); + std::string primary_key = get_json_string(arguments, "primary_key"); + std::string where_clause = get_json_string(arguments, "where_clause"); + result_str = mysql_handler->fts_index_table(schema, table, columns, primary_key, where_clause); + } + // Repeat for other tools + ``` + +**Step 11: Update ProxySQL_MCP_Server** +- Modify `lib/ProxySQL_MCP_Server.cpp` +- Pass `fts_path` when creating MySQL_Tool_Handler +- Initialize FTS: `mysql_handler->get_fts()->init()` + +### Phase 5: Build and Test + +**Step 12: Update build system** +- Modify `Makefile` +- Add `lib/MySQL_FTS.cpp` to compilation sources +- Verify link against sqlite3 + +**Step 13: Testing** +- Test all 6 tools via MCP endpoint +- Verify JSON responses +- Test with actual MySQL data +- Test cross-table search +- Test WHERE clause filtering + +## Critical Files + +### New Files to Create +- `include/MySQL_FTS.h` - FTS class header +- `lib/MySQL_FTS.cpp` - FTS class implementation + +### Files to Modify +- `include/MySQL_Tool_Handler.h` - Add FTS member and tool method declarations +- `lib/MySQL_Tool_Handler.cpp` - Add FTS tool wrappers, initialize FTS +- `lib/Query_Tool_Handler.cpp` - Register and route FTS tools +- `include/MCP_Thread.h` - Add `mcp_fts_path` variable +- `lib/MCP_Thread.cpp` - Handle `fts_path` configuration +- `lib/ProxySQL_MCP_Server.cpp` - Pass `fts_path` to MySQL_Tool_Handler +- `Makefile` - Add MySQL_FTS.cpp to build + +## Code Patterns to Follow + +### MySQL_FTS Class Structure (similar to MySQL_Catalog) + +```cpp +class MySQL_FTS { +private: + SQLite3DB* db; + std::string db_path; + + int init_schema(); + int create_tables(); + int create_index_tables(const std::string& schema, const std::string& table); + std::string get_data_table_name(const std::string& schema, const std::string& table); + std::string get_fts_table_name(const std::string& schema, const std::string& table); + +public: + MySQL_FTS(const std::string& path); + ~MySQL_FTS(); + + int init(); + void close(); + + // Tool methods + std::string index_table(...); + std::string search(...); + std::string list_indexes(); + std::string delete_index(...); + std::string reindex(...); + std::string rebuild_all(...); + + bool index_exists(const std::string& schema, const std::string& table); + SQLite3DB* get_db() { return db; } +}; +``` + +### Error Handling Pattern + +```cpp +json result; +result["success"] = false; +result["error"] = "Descriptive error message"; +return result.dump(); + +// Logging +proxy_error("FTS error: %s\n", error_msg); +proxy_info("FTS index created: %s.%s\n", schema.c_str(), table.c_str()); +``` + +### SQLite Operations Pattern + +```cpp +db->wrlock(); +// Write operations +db->wrunlock(); + +db->rdlock(); +// Read operations +db->rdunlock(); + +// Prepared statements +sqlite3_stmt* stmt = NULL; +db->prepare_v2(sql, &stmt); +(*proxy_sqlite3_bind_text)(stmt, 1, value.c_str(), -1, SQLITE_TRANSIENT); +SAFE_SQLITE3_STEP2(stmt); +(*proxy_sqlite3_finalize)(stmt); +``` + +### JSON Response Pattern + +```cpp +// Use nlohmann/json +json result; +result["success"] = true; +result["data"] = data_array; +return result.dump(); +``` + +## Configuration Variable + +| Variable | Default | Description | +|----------|---------|-------------| +| `mcp-ftspath` | `mcp_fts.db` | Path to FTS SQLite database (relative or absolute) | + +**Usage**: +```sql +SET mcp-ftspath='/var/lib/proxysql/mcp_fts.db'; +``` + +## Agent Workflow Example + +```python +# Agent narrows down results using FTS +fts_results = call_tool("fts_search", { + "query": "urgent customer complaint", + "limit": 10 +}) + +# Extract primary keys from FTS results +order_ids = [r["primary_key_value"] for r in fts_results["results"]] + +# Query MySQL for full data +full_data = call_tool("run_sql_readonly", { + "sql": f"SELECT * FROM orders WHERE order_id IN ({','.join(order_ids)})" +}) +``` + +## Threading Considerations + +- SQLite3DB provides thread-safe read-write locks +- Use `wrlock()` for writes (index operations) +- Use `rdlock()` for reads (search operations) +- Follow the catalog pattern for locking + +## Performance Considerations + +1. **Batch inserts**: When indexing, insert rows in batches (100-1000 at a time) +2. **Table naming**: Sanitize schema/table names for SQLite table names +3. **Memory usage**: Large tables may require streaming results +4. **Index size**: Monitor FTS database size + +## Testing Checklist + +- [ ] Create index on single table +- [ ] Create index with WHERE clause +- [ ] Search single table +- [ ] Search across all tables +- [ ] List indexes +- [ ] Delete index +- [ ] Reindex single table +- [ ] Rebuild all indexes +- [ ] Test with NULL values +- [ ] Test with special characters in data +- [ ] Test pagination +- [ ] Test schema/table filtering + +## Notes + +- Follow existing patterns from `MySQL_Catalog` for SQLite management +- Use SQLite3DB read-write locks for thread safety +- Return JSON responses using nlohmann/json library +- Handle NULL values properly (use empty string as in execute_query) +- Use prepared statements for SQL safety +- Log errors using `proxy_error()` and info using `proxy_info()` +- Table name sanitization: replace `.` and special chars with `_` diff --git a/doc/MCP/Tool_Discovery_Guide.md b/doc/MCP/Tool_Discovery_Guide.md new file mode 100644 index 0000000000..aaa2f38ff3 --- /dev/null +++ b/doc/MCP/Tool_Discovery_Guide.md @@ -0,0 +1,475 @@ +# MCP Tool Discovery Guide + +This guide explains how to discover and interact with MCP tools available on the Query endpoint. + +## Overview + +The MCP (Model Context Protocol) Query endpoint provides dynamic tool discovery through the `tools/list` method. This allows clients to: + +1. Discover all available tools at runtime +2. Get detailed schemas for each tool (parameters, requirements, descriptions) +3. Dynamically adapt to new tools without code changes + +## Endpoint Information + +- **URL**: `https://127.0.0.1:6071/mcp/query` +- **Protocol**: JSON-RPC 2.0 over HTTPS +- **Authentication**: Bearer token (optional, if configured) + +## Getting the Tool List + +### Basic Request + +```bash +curl -k -X POST https://127.0.0.1:6071/mcp/query \ + -H "Content-Type: application/json" \ + -d '{ + "jsonrpc": "2.0", + "method": "tools/list", + "id": 1 + }' | jq +``` + +### With Authentication + +If authentication is configured: + +```bash +curl -k -X POST https://127.0.0.1:6071/mcp/query \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer YOUR_TOKEN" \ + -d '{ + "jsonrpc": "2.0", + "method": "tools/list", + "id": 1 + }' | jq +``` + +### Using Query Parameter (Alternative) + +If header authentication is not available: + +```bash +curl -k -X POST "https://127.0.0.1:6071/mcp/query?token=YOUR_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "jsonrpc": "2.0", + "method": "tools/list", + "id": 1 + }' | jq +``` + +## Response Format + +```json +{ + "id": "1", + "jsonrpc": "2.0", + "result": { + "tools": [ + { + "name": "tool_name", + "description": "Tool description", + "inputSchema": { + "type": "object", + "properties": { + "param_name": { + "type": "string|integer", + "description": "Parameter description" + } + }, + "required": ["param1", "param2"] + } + } + ] + } +} +``` + +## Available Query Endpoint Tools + +### Inventory Tools + +#### list_schemas +List all available schemas/databases. + +**Parameters:** +- `page_token` (string, optional) - Pagination token +- `page_size` (integer, optional) - Results per page (default: 50) + +#### list_tables +List tables in a schema. + +**Parameters:** +- `schema` (string, **required**) - Schema name +- `page_token` (string, optional) - Pagination token +- `page_size` (integer, optional) - Results per page (default: 50) +- `name_filter` (string, optional) - Filter table names by pattern + +### Structure Tools + +#### describe_table +Get detailed table schema including columns, types, keys, and indexes. + +**Parameters:** +- `schema` (string, **required**) - Schema name +- `table` (string, **required**) - Table name + +#### get_constraints +Get constraints (foreign keys, unique constraints, etc.) for a table. + +**Parameters:** +- `schema` (string, **required**) - Schema name +- `table` (string, optional) - Table name + +### Profiling Tools + +#### table_profile +Get table statistics including row count, size estimates, and data distribution. + +**Parameters:** +- `schema` (string, **required**) - Schema name +- `table` (string, **required**) - Table name +- `mode` (string, optional) - Profile mode: "quick" or "full" (default: "quick") + +#### column_profile +Get column statistics including distinct values, null count, and top values. + +**Parameters:** +- `schema` (string, **required**) - Schema name +- `table` (string, **required**) - Table name +- `column` (string, **required**) - Column name +- `max_top_values` (integer, optional) - Maximum top values to return (default: 20) + +### Sampling Tools + +#### sample_rows +Get sample rows from a table (with hard cap on rows returned). + +**Parameters:** +- `schema` (string, **required**) - Schema name +- `table` (string, **required**) - Table name +- `columns` (string, optional) - Comma-separated column names +- `where` (string, optional) - WHERE clause filter +- `order_by` (string, optional) - ORDER BY clause +- `limit` (integer, optional) - Maximum rows (default: 20) + +#### sample_distinct +Sample distinct values from a column. + +**Parameters:** +- `schema` (string, **required**) - Schema name +- `table` (string, **required**) - Table name +- `column` (string, **required**) - Column name +- `where` (string, optional) - WHERE clause filter +- `limit` (integer, optional) - Maximum values (default: 50) + +### Query Tools + +#### run_sql_readonly +Execute a read-only SQL query with safety guardrails enforced. + +**Parameters:** +- `sql` (string, **required**) - SQL query to execute +- `max_rows` (integer, optional) - Maximum rows to return (default: 200) +- `timeout_sec` (integer, optional) - Query timeout (default: 2) + +**Safety rules:** +- Must start with SELECT +- No dangerous keywords (DROP, DELETE, INSERT, UPDATE, etc.) +- SELECT * requires LIMIT clause + +#### explain_sql +Explain a query execution plan using EXPLAIN or EXPLAIN ANALYZE. + +**Parameters:** +- `sql` (string, **required**) - SQL query to explain + +### Relationship Inference Tools + +#### suggest_joins +Suggest table joins based on heuristic analysis of column names and types. + +**Parameters:** +- `schema` (string, **required**) - Schema name +- `table_a` (string, **required**) - First table +- `table_b` (string, optional) - Second table (if omitted, checks all) +- `max_candidates` (integer, optional) - Maximum join candidates (default: 5) + +#### find_reference_candidates +Find tables that might be referenced by a foreign key column. + +**Parameters:** +- `schema` (string, **required**) - Schema name +- `table` (string, **required**) - Table name +- `column` (string, **required**) - Column name +- `max_tables` (integer, optional) - Maximum tables to check (default: 50) + +### Catalog Tools (LLM Memory) + +#### catalog_upsert +Store or update an entry in the catalog (LLM external memory). + +**Parameters:** +- `kind` (string, **required**) - Entry kind (e.g., "table", "relationship", "insight") +- `key` (string, **required**) - Unique identifier +- `document` (string, **required**) - JSON document with data +- `tags` (string, optional) - Comma-separated tags +- `links` (string, optional) - Comma-separated related keys + +#### catalog_get +Retrieve an entry from the catalog. + +**Parameters:** +- `kind` (string, **required**) - Entry kind +- `key` (string, **required**) - Entry key + +#### catalog_search +Search the catalog for entries matching a query. + +**Parameters:** +- `query` (string, **required**) - Search query +- `kind` (string, optional) - Filter by kind +- `tags` (string, optional) - Filter by tags +- `limit` (integer, optional) - Maximum results (default: 20) +- `offset` (integer, optional) - Results offset (default: 0) + +#### catalog_list +List catalog entries by kind. + +**Parameters:** +- `kind` (string, optional) - Filter by kind +- `limit` (integer, optional) - Maximum results (default: 50) +- `offset` (integer, optional) - Results offset (default: 0) + +#### catalog_merge +Merge multiple catalog entries into a single consolidated entry. + +**Parameters:** +- `keys` (string, **required**) - Comma-separated keys to merge +- `target_key` (string, **required**) - Target key for merged entry +- `kind` (string, optional) - Entry kind (default: "domain") +- `instructions` (string, optional) - Merge instructions + +#### catalog_delete +Delete an entry from the catalog. + +**Parameters:** +- `kind` (string, **required**) - Entry kind +- `key` (string, **required**) - Entry key + +## Calling a Tool + +### Request Format + +```bash +curl -k -X POST https://127.0.0.1:6071/mcp/query \ + -H "Content-Type: application/json" \ + -d '{ + "jsonrpc": "2.0", + "method": "tools/call", + "params": { + "name": "list_tables", + "arguments": { + "schema": "testdb" + } + }, + "id": 2 + }' | jq +``` + +### Response Format + +```json +{ + "id": "2", + "jsonrpc": "2.0", + "result": { + "success": true, + "data": [...] + } +} +``` + +### Error Response + +```json +{ + "id": "2", + "jsonrpc": "2.0", + "result": { + "success": false, + "error": "Error message" + } +} +``` + +## Python Examples + +### Basic Tool Discovery + +```python +import requests +import json + +# Get tool list +response = requests.post( + "https://127.0.0.1:6071/mcp/query", + json={ + "jsonrpc": "2.0", + "method": "tools/list", + "id": 1 + }, + verify=False # For self-signed cert +) + +tools = response.json()["result"]["tools"] + +# Print all tools +for tool in tools: + print(f"\n{tool['name']}") + print(f" Description: {tool['description']}") + print(f" Required: {tool['inputSchema'].get('required', [])}") +``` + +### Calling a Tool + +```python +def call_tool(tool_name, arguments): + response = requests.post( + "https://127.0.0.1:6071/mcp/query", + json={ + "jsonrpc": "2.0", + "method": "tools/call", + "params": { + "name": tool_name, + "arguments": arguments + }, + "id": 2 + }, + verify=False + ) + return response.json()["result"] + +# List tables +result = call_tool("list_tables", {"schema": "testdb"}) +print(json.dumps(result, indent=2)) + +# Describe a table +result = call_tool("describe_table", { + "schema": "testdb", + "table": "customers" +}) +print(json.dumps(result, indent=2)) + +# Run a query +result = call_tool("run_sql_readonly", { + "sql": "SELECT * FROM customers LIMIT 10" +}) +print(json.dumps(result, indent=2)) +``` + +### Complete Example: Database Discovery + +```python +import requests +import json + +class MCPQueryClient: + def __init__(self, host="127.0.0.1", port=6071, token=None): + self.url = f"https://{host}:{port}/mcp/query" + self.headers = { + "Content-Type": "application/json", + **({"Authorization": f"Bearer {token}"} if token else {}) + } + + def list_tools(self): + response = requests.post( + self.url, + json={"jsonrpc": "2.0", "method": "tools/list", "id": 1}, + headers=self.headers, + verify=False + ) + return response.json()["result"]["tools"] + + def call_tool(self, name, arguments): + response = requests.post( + self.url, + json={ + "jsonrpc": "2.0", + "method": "tools/call", + "params": {"name": name, "arguments": arguments}, + "id": 2 + }, + headers=self.headers, + verify=False + ) + return response.json()["result"] + + def explore_schema(self, schema): + """Explore a schema: list tables and their structures""" + print(f"\n=== Exploring schema: {schema} ===\n") + + # List tables + tables = self.call_tool("list_tables", {"schema": schema}) + for table in tables.get("data", []): + table_name = table["name"] + print(f"\nTable: {table_name}") + print(f" Type: {table['type']}") + print(f" Rows: {table.get('row_count', 'unknown')}") + + # Describe table + schema_info = self.call_tool("describe_table", { + "schema": schema, + "table": table_name + }) + + if schema_info.get("success"): + print(f" Columns: {', '.join([c['name'] for c in schema_info['data']['columns']])}") + +# Usage +client = MCPQueryClient() +client.explore_schema("testdb") +``` + +## Using the Test Script + +The test script provides a convenient way to discover and test tools: + +```bash +# List all discovered tools (without testing) +./scripts/mcp/test_mcp_tools.sh --list-only + +# Test only query endpoint +./scripts/mcp/test_mcp_tools.sh --endpoint query + +# Test specific tool with verbose output +./scripts/mcp/test_mcp_tools.sh --endpoint query --tool list_tables -v + +# Test all endpoints +./scripts/mcp/test_mcp_tools.sh +``` + +## Other Endpoints + +The same discovery pattern works for all MCP endpoints: + +- **Config**: `/mcp/config` - Configuration management tools +- **Query**: `/mcp/query` - Database exploration and query tools +- **Admin**: `/mcp/admin` - Administrative operations +- **Cache**: `/mcp/cache` - Cache management tools +- **Observe**: `/mcp/observe` - Monitoring and metrics tools + +Simply change the endpoint URL: + +```bash +curl -k -X POST https://127.0.0.1:6071/mcp/config \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc": "2.0", "method": "tools/list", "id": 1}' +``` + +## Related Documentation + +- [Architecture.md](Architecture.md) - Overall MCP architecture +- [Database_Discovery_Agent.md](Database_Discovery_Agent.md) - AI agent architecture +- [README.md](README.md) - Module overview diff --git a/doc/MCP/VARIABLES.md b/doc/MCP/VARIABLES.md new file mode 100644 index 0000000000..92edc552e6 --- /dev/null +++ b/doc/MCP/VARIABLES.md @@ -0,0 +1,279 @@ +# MCP Variables + +This document describes all configuration variables for the MCP (Model Context Protocol) module in ProxySQL. + +## Overview + +The MCP module provides JSON-RPC 2.0 over HTTPS for LLM integration with ProxySQL. It includes endpoints for configuration, observation, querying, administration, caching, and a MySQL Tool Handler for database exploration. + +All variables are stored in the `global_variables` table with the `mcp-` prefix and can be modified at runtime through the admin interface. + +## Variable Reference + +### Server Configuration + +#### `mcp-enabled` +- **Type:** Boolean +- **Default:** `false` +- **Description:** Enable or disable the MCP HTTPS server +- **Runtime:** Yes (requires restart of MCP server to take effect) +- **Example:** + ```sql + SET mcp-enabled=true; + LOAD MCP VARIABLES TO RUNTIME; + ``` + +#### `mcp-port` +- **Type:** Integer +- **Default:** `6071` +- **Description:** HTTPS port for the MCP server +- **Range:** 1024-65535 +- **Runtime:** Yes (requires restart of MCP server to take effect) +- **Example:** + ```sql + SET mcp-port=7071; + LOAD MCP VARIABLES TO RUNTIME; + ``` + +#### `mcp-timeout_ms` +- **Type:** Integer +- **Default:** `30000` (30 seconds) +- **Description:** Request timeout in milliseconds for all MCP endpoints +- **Range:** 1000-300000 (1 second to 5 minutes) +- **Runtime:** Yes +- **Example:** + ```sql + SET mcp-timeout_ms=60000; + LOAD MCP VARIABLES TO RUNTIME; + ``` + +### Endpoint Authentication + +The following variables control authentication (Bearer tokens) for specific MCP endpoints. If left empty, no authentication is required for that endpoint. + +#### `mcp-config_endpoint_auth` +- **Type:** String +- **Default:** `""` (empty) +- **Description:** Bearer token for `/mcp/config` endpoint +- **Runtime:** Yes +- **Example:** + ```sql + SET mcp-config_endpoint_auth='my-secret-token'; + LOAD MCP VARIABLES TO RUNTIME; + ``` + +#### `mcp-observe_endpoint_auth` +- **Type:** String +- **Default:** `""` (empty) +- **Description:** Bearer token for `/mcp/observe` endpoint +- **Runtime:** Yes +- **Example:** + ```sql + SET mcp-observe_endpoint_auth='observe-token'; + LOAD MCP VARIABLES TO RUNTIME; + ``` + +#### `mcp-query_endpoint_auth` +- **Type:** String +- **Default:** `""` (empty) +- **Description:** Bearer token for `/mcp/query` endpoint +- **Runtime:** Yes +- **Example:** + ```sql + SET mcp-query_endpoint_auth='query-token'; + LOAD MCP VARIABLES TO RUNTIME; + ``` + +#### `mcp-admin_endpoint_auth` +- **Type:** String +- **Default:** `""` (empty) +- **Description:** Bearer token for `/mcp/admin` endpoint +- **Runtime:** Yes +- **Example:** + ```sql + SET mcp-admin_endpoint_auth='admin-token'; + LOAD MCP VARIABLES TO RUNTIME; + ``` + +#### `mcp-cache_endpoint_auth` +- **Type:** String +- **Default:** `""` (empty) +- **Description:** Bearer token for `/mcp/cache` endpoint +- **Runtime:** Yes +- **Example:** + ```sql + SET mcp-cache_endpoint_auth='cache-token'; + LOAD MCP VARIABLES TO RUNTIME; + ``` + +### MySQL Tool Handler Configuration + +The MySQL Tool Handler provides LLM-based tools for MySQL database exploration, including: +- **inventory** - List databases and tables +- **structure** - Get table schema +- **profiling** - Analyze query performance +- **sampling** - Sample table data +- **query** - Execute SQL queries +- **relationships** - Infer table relationships +- **catalog** - Catalog operations + +#### `mcp-mysql_hosts` +- **Type:** String (comma-separated) +- **Default:** `"127.0.0.1"` +- **Description:** Comma-separated list of MySQL host addresses +- **Runtime:** Yes +- **Example:** + ```sql + SET mcp-mysql_hosts='192.168.1.10,192.168.1.11,192.168.1.12'; + LOAD MCP VARIABLES TO RUNTIME; + ``` + +#### `mcp-mysql_ports` +- **Type:** String (comma-separated) +- **Default:** `"3306"` +- **Description:** Comma-separated list of MySQL ports (corresponds to `mcp-mysql_hosts`) +- **Runtime:** Yes +- **Example:** + ```sql + SET mcp-mysql_ports='3306,3307,3308'; + LOAD MCP VARIABLES TO RUNTIME; + ``` + +#### `mcp-mysql_user` +- **Type:** String +- **Default:** `""` (empty) +- **Description:** MySQL username for tool handler connections +- **Runtime:** Yes +- **Example:** + ```sql + SET mcp-mysql_user='mcp_user'; + LOAD MCP VARIABLES TO RUNTIME; + ``` + +#### `mcp-mysql_password` +- **Type:** String +- **Default:** `""` (empty) +- **Description:** MySQL password for tool handler connections +- **Runtime:** Yes +- **Note:** Password is stored in plaintext in `global_variables`. Use restrictive MySQL user permissions. +- **Example:** + ```sql + SET mcp-mysql_password='secure-password'; + LOAD MCP VARIABLES TO RUNTIME; + ``` + +#### `mcp-mysql_schema` +- **Type:** String +- **Default:** `""` (empty) +- **Description:** Default database/schema to use for tool operations +- **Runtime:** Yes +- **Example:** + ```sql + SET mcp-mysql_schema='mydb'; + LOAD MCP VARIABLES TO RUNTIME; + ``` + +### Catalog Configuration + +#### `mcp-catalog_path` +- **Type:** String (file path) +- **Default:** `"mcp_catalog.db"` +- **Description:** Path to the SQLite catalog database (relative to ProxySQL datadir) +- **Runtime:** Yes +- **Example:** + ```sql + SET mcp-catalog_path='/path/to/mcp_catalog.db'; + LOAD MCP VARIABLES TO RUNTIME; + ``` + +## Management Commands + +### View Variables + +```sql +-- View all MCP variables +SHOW MCP VARIABLES; + +-- View specific variable +SELECT variable_name, variable_value +FROM global_variables +WHERE variable_name LIKE 'mcp-%'; +``` + +### Modify Variables + +```sql +-- Set a variable +SET mcp-enabled=true; + +-- Load to runtime +LOAD MCP VARIABLES TO RUNTIME; + +-- Save to disk +SAVE MCP VARIABLES TO DISK; +``` + +### Checksum Commands + +```sql +-- Checksum of disk variables +CHECKSUM DISK MCP VARIABLES; + +-- Checksum of memory variables +CHECKSUM MEM MCP VARIABLES; + +-- Checksum of runtime variables +CHECKSUM MEMORY MCP VARIABLES; +``` + +## Variable Persistence + +Variables can be persisted across three layers: + +1. **Disk** (`disk.global_variables`) - Persistent storage +2. **Memory** (`main.global_variables`) - Active configuration +3. **Runtime** (`runtime_global_variables`) - Currently active values + +``` +LOAD MCP VARIABLES FROM DISK → Disk to Memory +LOAD MCP VARIABLES TO RUNTIME → Memory to Runtime +SAVE MCP VARIABLES TO DISK → Memory to Disk +SAVE MCP VARIABLES FROM RUNTIME → Runtime to Memory +``` + +## Status Variables + +The following read-only status variables are available: + +| Variable | Description | +|----------|-------------| +| `mcp_total_requests` | Total number of MCP requests received | +| `mcp_failed_requests` | Total number of failed MCP requests | +| `mcp_active_connections` | Current number of active MCP connections | + +To view status variables: + +```sql +SELECT * FROM stats_mysql_global WHERE variable_name LIKE 'mcp_%'; +``` + +## Security Considerations + +1. **Authentication:** Always set authentication tokens for production environments +2. **HTTPS:** The MCP server uses HTTPS with SSL certificates from the ProxySQL datadir +3. **MySQL Permissions:** Create a dedicated MySQL user with limited permissions for the tool handler: + - `SELECT` permissions for inventory/structure tools + - `PROCESS` permission for profiling + - Limited `SELECT` on specific tables for sampling/query tools +4. **Network Access:** Consider firewall rules to restrict access to `mcp-port` + +## Version + +- **MCP Thread Version:** 0.1.0 +- **Protocol:** JSON-RPC 2.0 over HTTPS + +## Related Documentation + +- [MCP Module README](README.md) - Module overview and setup +- [MCP Endpoints](ENDPOINTS.md) - API endpoint documentation +- [MySQL Tool Handler](TOOL_HANDLER.md) - Tool-specific documentation diff --git a/doc/MCP/Vector_Embeddings_Implementation_Plan.md b/doc/MCP/Vector_Embeddings_Implementation_Plan.md new file mode 100644 index 0000000000..0be878068a --- /dev/null +++ b/doc/MCP/Vector_Embeddings_Implementation_Plan.md @@ -0,0 +1,884 @@ +# Vector Embeddings Implementation Plan + +## Overview + +This document describes the implementation of Vector Embeddings capabilities for the ProxySQL MCP Query endpoint. The Embeddings system enables AI agents to perform semantic similarity searches on database content using sqlite-vec for vector storage and sqlite-rembed for embedding generation. + +## Requirements + +1. **Embedding Generation**: Use sqlite-rembed (placeholder for future GenAI module) +2. **Vector Storage**: Use sqlite-vec extension (already compiled into ProxySQL) +3. **Search Type**: Semantic similarity search using vector distance +4. **Integration**: Work alongside FTS and Catalog for comprehensive search +5. **Use Case**: Find semantically similar content, not just keyword matches + +## Architecture + +``` +MCP Query Endpoint (JSON-RPC 2.0 over HTTPS) + ↓ +Query_Tool_Handler (routes tool calls) + ↓ +MySQL_Tool_Handler (implements tools) + ↓ +MySQL_Embeddings (new class - manages embeddings database) + ↓ +SQLite with sqlite-vec (mcp_embeddings.db) + ↓ +sqlite-rembed (embedding generation) + ↓ +External APIs (OpenAI, Ollama, Cohere, etc.) +``` + +## Database Design + +### Separate SQLite Database +**Path**: `mcp_embeddings.db` (configurable via `mcp-embeddingpath` variable) + +### Schema + +#### embedding_indexes (metadata table) +```sql +CREATE TABLE IF NOT EXISTS embedding_indexes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + schema_name TEXT NOT NULL, + table_name TEXT NOT NULL, + columns TEXT NOT NULL, -- JSON array: ["col1", "col2"] + primary_key TEXT NOT NULL, -- PK column name for identification + where_clause TEXT, -- Optional WHERE filter + model_name TEXT NOT NULL, -- e.g., "text-embedding-3-small" + vector_dim INTEGER NOT NULL, -- e.g., 1536 for OpenAI small + embedding_strategy TEXT NOT NULL, -- "concat", "average", "separate" + row_count INTEGER DEFAULT 0, + indexed_at INTEGER DEFAULT (strftime('%s', 'now')), + UNIQUE(schema_name, table_name) +); + +CREATE INDEX IF NOT EXISTS idx_embedding_indexes_schema ON embedding_indexes(schema_name); +CREATE INDEX IF NOT EXISTS idx_embedding_indexes_table ON embedding_indexes(table_name); +CREATE INDEX IF NOT EXISTS idx_embedding_indexes_model ON embedding_indexes(model_name); +``` + +#### Per-Index vec0 Tables (created dynamically) + +For each indexed table, create a sqlite-vec virtual table: + +```sql +-- For OpenAI text-embedding-3-small (1536 dimensions) +CREATE VIRTUAL TABLE embeddings__ USING vec0( + vector float[1536], + pk_value TEXT, + metadata TEXT +); +``` + +**Table Components**: +- `vector` - The embedding vector (required by vec0) +- `pk_value` - Primary key value for MySQL lookup +- `metadata` - JSON with original row data + +**Sanitization**: +- Replace `.` and special characters with `_` +- Example: `testdb.orders` → `embeddings_testdb_orders` + +## Tools (6 total) + +### 1. embed_index_table + +Generate embeddings and create a vector index for a MySQL table. + +**Parameters**: +| Name | Type | Required | Description | +|------|------|----------|-------------| +| schema | string | Yes | Schema name | +| table | string | Yes | Table name | +| columns | string | Yes | JSON array of column names to embed | +| primary_key | string | Yes | Primary key column name | +| where_clause | string | No | Optional WHERE clause for filtering rows | +| model | string | Yes | Embedding model name (e.g., "text-embedding-3-small") | +| strategy | string | No | Embedding strategy: "concat" (default), "average", "separate" | + +**Embedding Strategies**: + +| Strategy | Description | When to Use | +|----------|-------------|-------------| +| `concat` | Concatenate all columns with spaces, generate one embedding | Most common, semantic meaning of combined content | +| `average` | Generate embedding per column, average them | Multiple independent columns | +| `separate` | Store embeddings separately per column | Need column-specific similarity | + +**Response**: +```json +{ + "success": true, + "schema": "testdb", + "table": "orders", + "model": "text-embedding-3-small", + "vector_dim": 1536, + "row_count": 5000, + "indexed_at": 1736668800 +} +``` + +**Implementation Logic**: +1. Validate parameters (table exists, columns valid) +2. Check if index already exists +3. Create vec0 table: `embeddings__` +4. Get vector dimension from model (or default to 1536) +5. Configure sqlite-rembed client (if not already configured) +6. Fetch all rows from MySQL using `execute_query()` +7. For each row: + - Build content string based on strategy + - Call `rembed()` to generate embedding + - Store vector + metadata in vec0 table +8. Update `embedding_indexes` metadata +9. Return result + +**Code Example (concat strategy)**: +```sql +-- Configure rembed client +INSERT INTO temp.rembed_clients(name, format, model, key) +VALUES ('mcp_embeddings', 'openai', 'text-embedding-3-small', 'sk-...'); + +-- Generate and insert embeddings +INSERT INTO embeddings_testdb_orders(rowid, vector, pk_value, metadata) +SELECT + ROWID, + rembed('mcp_embeddings', + COALESCE(customer_name, '') || ' ' || + COALESCE(product_name, '') || ' ' || + COALESCE(notes, '')) as vector, + CAST(order_id AS TEXT) as pk_value, + json_object( + 'order_id', order_id, + 'customer_name', customer_name, + 'notes', notes + ) as metadata +FROM testdb.orders +WHERE active = 1; +``` + +### 2. embed_search + +Perform semantic similarity search using vector embeddings. + +**Parameters**: +| Name | Type | Required | Description | +|------|------|----------|-------------| +| query | string | Yes | Search query text | +| schema | string | No | Filter by schema | +| table | string | No | Filter by table | +| limit | integer | No | Max results (default: 10) | +| min_distance | float | No | Maximum distance threshold (default: 1.0) | + +**Response**: +```json +{ + "success": true, + "query": "customer complaining about late delivery", + "query_embedding_dim": 1536, + "total_matches": 25, + "results": [ + { + "schema": "testdb", + "table": "orders", + "primary_key_value": "12345", + "distance": 0.234, + "metadata": { + "order_id": 12345, + "customer_name": "John Doe", + "notes": "Customer upset about delivery delay" + } + } + ] +} +``` + +**Implementation Logic**: +1. Generate embedding for query text using `rembed()` +2. Build SQL with vector similarity search +3. Apply schema/table filters if specified +4. Execute KNN search with distance threshold +5. Return ranked results with metadata + +**SQL Query Template**: +```sql +SELECT + e.pk_value as primary_key_value, + e.distance, + e.metadata +FROM embeddings_testdb_orders e +WHERE e.vector MATCH rembed('mcp_embeddings', ?) + AND e.distance < ? +ORDER BY e.distance ASC +LIMIT ?; +``` + +**Distance Metrics** (sqlite-vec supports): +- L2 (Euclidean) - default +- Cosine - for normalized vectors +- Hamming - for binary vectors + +### 3. embed_list_indexes + +List all embedding indexes with metadata. + +**Parameters**: None + +**Response**: +```json +{ + "success": true, + "indexes": [ + { + "schema": "testdb", + "table": "orders", + "columns": ["customer_name", "product_name", "notes"], + "primary_key": "order_id", + "model": "text-embedding-3-small", + "vector_dim": 1536, + "strategy": "concat", + "row_count": 5000, + "indexed_at": 1736668800 + } + ] +} +``` + +**Implementation Logic**: +1. Query `embedding_indexes` table +2. Return all indexes with metadata + +### 4. embed_delete_index + +Remove an embedding index. + +**Parameters**: +| Name | Type | Required | Description | +|------|------|----------|-------------| +| schema | string | Yes | Schema name | +| table | string | Yes | Table name | + +**Response**: +```json +{ + "success": true, + "schema": "testdb", + "table": "orders", + "message": "Embedding index deleted successfully" +} +``` + +**Implementation Logic**: +1. Validate index exists +2. Drop vec0 table +3. Remove metadata from `embedding_indexes` + +### 5. embed_reindex + +Refresh an embedding index with fresh data (full rebuild). + +**Parameters**: +| Name | Type | Required | Description | +|------|------|----------|-------------| +| schema | string | Yes | Schema name | +| table | string | Yes | Table name | + +**Response**: Same as `embed_index_table` + +**Implementation Logic**: +1. Fetch existing index metadata from `embedding_indexes` +2. Drop existing vec0 table +3. Re-create vec0 table +4. Call `embed_index_table` logic with stored metadata +5. Update `indexed_at` timestamp + +### 6. embed_rebuild_all + +Rebuild ALL embedding indexes with fresh data. + +**Parameters**: None + +**Response**: +```json +{ + "success": true, + "rebuilt_count": 3, + "failed": [ + { + "schema": "testdb", + "table": "products", + "error": "API rate limit exceeded" + } + ], + "indexes": [ + { + "schema": "testdb", + "table": "orders", + "row_count": 5100, + "status": "success" + } + ] +} +``` + +**Implementation Logic**: +1. Get all indexes from `embedding_indexes` table +2. For each index: + - Call `reindex()` with stored metadata + - Track success/failure +3. Return summary with rebuilt count and any failures + +## Implementation Steps + +### Phase 1: Foundation + +**Step 1: Create MySQL_Embeddings class** +- Create `include/MySQL_Embeddings.h` - Class header with method declarations +- Create `lib/MySQL_Embeddings.cpp` - Implementation +- Follow `MySQL_FTS` and `MySQL_Catalog` patterns + +**Step 2: Add configuration variable** +- Modify `include/MCP_Thread.h` - Add `mcp_embedding_path` to variables struct +- Modify `lib/MCP_Thread.cpp` - Add to `mcp_thread_variables_names` array +- Handle `embedding_path` in get/set variable functions +- Default value: `"mcp_embeddings.db"` + +**Step 3: Integrate Embeddings into MySQL_Tool_Handler** +- Add `MySQL_Embeddings* embeddings` member to `include/MySQL_Tool_Handler.h` +- Initialize in constructor with `embedding_path` +- Clean up in destructor +- Add Embeddings tool method declarations + +### Phase 2: Core Indexing + +**Step 4: Implement embed_index_table tool** +```cpp +// In MySQL_Embeddings class +std::string index_table( + const std::string& schema, + const std::string& table, + const std::string& columns, // JSON array + const std::string& primary_key, + const std::string& where_clause, + const std::string& model, + const std::string& strategy, + MySQL_Tool_Handler* mysql_handler +); +``` + +Key implementation details: +- Parse columns JSON array +- Create sanitized table name +- Create vec0 table with appropriate dimensions +- Configure sqlite-rembed client if needed +- Fetch data from MySQL +- Generate embeddings using `rembed()` function +- Insert into vec0 table +- Update metadata + +**GenAI Module Placeholder**: +```cpp +// For future GenAI module integration +// Currently uses sqlite-rembed +std::vector generate_embedding( + const std::string& text, + const std::string& model +) { + // PLACEHOLDER: Will call GenAI module when merged + // Currently: Use sqlite-rembed + + char* error = NULL; + std::string sql = "SELECT rembed('mcp_embeddings', ?) as embedding"; + + // Execute query, parse JSON array + // Return std::vector +} +``` + +**Step 5: Implement embed_list_indexes tool** +```cpp +std::string list_indexes(); +``` +Query `embedding_indexes` and return JSON array. + +**Step 6: Implement embed_delete_index tool** +```cpp +std::string delete_index(const std::string& schema, const std::string& table); +``` +Drop vec0 table and remove metadata. + +### Phase 3: Search Functionality + +**Step 7: Implement embed_search tool** +```cpp +std::string search( + const std::string& query, + const std::string& schema, + const std::string& table, + int limit, + float min_distance +); +``` + +SQL query template: +```sql +SELECT + e.pk_value, + e.distance, + e.metadata +FROM embeddings_ e +WHERE e.vector MATCH rembed('mcp_embeddings', ?) + AND e.distance < ? +ORDER BY e.distance ASC +LIMIT ?; +``` + +**Step 8: Implement embed_reindex tool** +```cpp +std::string reindex( + const std::string& schema, + const std::string& table, + MySQL_Tool_Handler* mysql_handler +); +``` +Fetch metadata, rebuild embeddings. + +**Step 9: Implement embed_rebuild_all tool** +```cpp +std::string rebuild_all(MySQL_Tool_Handler* mysql_handler); +``` +Loop through all indexes and rebuild each. + +### Phase 4: Tool Registration + +**Step 10: Register tools in Query_Tool_Handler** +- Modify `lib/Query_Tool_Handler.cpp` +- Add to `get_tool_list()`: + ```cpp + tools.push_back(create_tool_schema( + "embed_index_table", + "Generate embeddings and create vector index for a table", + {"schema", "table", "columns", "primary_key", "model"}, + {{"where_clause", "string"}, {"strategy", "string"}} + )); + // Repeat for all 6 tools + ``` +- Add routing in `execute_tool()`: + ```cpp + else if (tool_name == "embed_index_table") { + std::string schema = get_json_string(arguments, "schema"); + std::string table = get_json_string(arguments, "table"); + std::string columns = get_json_string(arguments, "columns"); + std::string primary_key = get_json_string(arguments, "primary_key"); + std::string where_clause = get_json_string(arguments, "where_clause"); + std::string model = get_json_string(arguments, "model"); + std::string strategy = get_json_string(arguments, "strategy", "concat"); + result_str = mysql_handler->embed_index_table(schema, table, columns, primary_key, where_clause, model, strategy); + } + // Repeat for other tools + ``` + +**Step 11: Update ProxySQL_MCP_Server** +- Modify `lib/ProxySQL_MCP_Server.cpp` +- Pass `embedding_path` when creating MySQL_Tool_Handler +- Initialize Embeddings: `mysql_handler->get_embeddings()->init()` + +### Phase 5: Build and Test + +**Step 12: Update build system** +- Modify `Makefile` +- Add `lib/MySQL_Embeddings.cpp` to compilation sources +- Verify link against sqlite3 (already includes vec.o) + +**Step 13: Testing** +- Test all 6 embed tools via MCP endpoint +- Verify JSON responses +- Test with actual MySQL data +- Test cross-table semantic search +- Test different embedding strategies +- Test with sqlite-rembed configured + +## Critical Files + +### New Files to Create +- `include/MySQL_Embeddings.h` - Embeddings class header +- `lib/MySQL_Embeddings.cpp` - Embeddings class implementation + +### Files to Modify +- `include/MySQL_Tool_Handler.h` - Add embeddings member and tool method declarations +- `lib/MySQL_Tool_Handler.cpp` - Add embeddings tool wrappers, initialize embeddings +- `lib/Query_Tool_Handler.cpp` - Register and route embeddings tools +- `include/MCP_Thread.h` - Add `mcp_embedding_path` variable +- `lib/MCP_Thread.cpp` - Handle `embedding_path` configuration +- `lib/ProxySQL_MCP_Server.cpp` - Pass `embedding_path` to MySQL_Tool_Handler +- `Makefile` - Add MySQL_Embeddings.cpp to build + +## Code Patterns to Follow + +### MySQL_Embeddings Class Structure + +```cpp +class MySQL_Embeddings { +private: + SQLite3DB* db; + std::string db_path; + + // Schema management + int init_schema(); + int create_tables(); + int create_embedding_table(const std::string& schema, + const std::string& table, + int vector_dim); + std::string get_table_name(const std::string& schema, + const std::string& table); + + // Embedding generation (placeholder for GenAI) + std::vector generate_embedding(const std::string& text, + const std::string& model); + + // Content building strategies + std::string build_content(const json& row, + const std::vector& columns, + const std::string& strategy); + +public: + MySQL_Embeddings(const std::string& path); + ~MySQL_Embeddings(); + + int init(); + void close(); + + // Tool methods + std::string index_table(...); + std::string search(...); + std::string list_indexes(); + std::string delete_index(...); + std::string reindex(...); + std::string rebuild_all(...); + + bool index_exists(const std::string& schema, const std::string& table); + SQLite3DB* get_db() { return db; } +}; +``` + +### sqlite-rembed Configuration + +```cpp +// Configure rembed client during initialization +int MySQL_Embeddings::init() { + // ... open database ... + + // Check if mcp rembed client exists + char* error = NULL; + std::string check_sql = "SELECT name FROM temp.rembed_clients WHERE name='mcp_embeddings'"; + + // If not exists, create default client + // (Requires API key to be configured separately by user) + + return 0; +} +``` + +### Vector Insert Example + +```cpp +// Insert embedding with content concatenation +std::string sql = + "INSERT INTO embeddings_testdb_orders(rowid, vector, pk_value, metadata) " + "SELECT " + " ROWID, " + " rembed('mcp_embeddings', ?) as vector, " + " CAST(order_id AS TEXT) as pk_value, " + " json_object('order_id', order_id, 'customer_name', customer_name) as metadata " + "FROM testdb.orders " + "WHERE active = 1"; + +// Execute with prepared statement +sqlite3_stmt* stmt; +db->prepare_v2(sql.c_str(), &stmt); +(*proxy_sqlite3_bind_text)(stmt, 1, content.c_str(), -1, SQLITE_TRANSIENT); +SAFE_SQLITE3_STEP2(stmt); +(*proxy_sqlite3_finalize)(stmt); +``` + +### Similarity Search Example + +```cpp +// Generate query embedding +std::vector query_vec = generate_embedding(query_text, model_name); +std::string query_vec_json = vector_to_json(query_vec); + +// Build search SQL +std::ostringstream sql; +sql << "SELECT pk_value, distance, metadata " + << "FROM embeddings_testdb_orders " + << "WHERE vector MATCH " << query_vec_json << " " + << "AND distance < " << min_distance << " " + << "ORDER BY distance ASC " + << "LIMIT " << limit; + +// Execute and return results +``` + +## Configuration Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `mcp-embeddingpath` | `mcp_embeddings.db` | Path to embeddings SQLite database | +| `mcp-rembed-client` | (none) | Default sqlite-rembed client name (user must configure) | + +**sqlite-rembed Configuration** (must be done by user): +```sql +-- Configure OpenAI client +INSERT INTO temp.rembed_clients(name, format, model, key) +VALUES ('mcp_embeddings', 'openai', 'text-embedding-3-small', 'sk-...'); + +-- Or local Ollama +INSERT INTO temp.rembed_clients(name, format, model, key) +VALUES ('mcp_embeddings', 'ollama', 'nomic-embed-text', ''); + +-- Or Cohere +INSERT INTO temp.rembed_clients(name, format, model, key) +VALUES ('mcp_embeddings', 'cohere', 'embed-english-v3.0', '...'); +``` + +## Model Support + +### Common Embedding Models + +| Model | Dimensions | Provider | Format | +|-------|------------|----------|--------| +| text-embedding-3-small | 1536 | OpenAI | openai | +| text-embedding-3-large | 3072 | OpenAI | openai | +| nomic-embed-text-v1.5 | 768 | Nomic | nomic | +| all-MiniLM-L6-v2 | 384 | Local (Ollama) | ollama | +| mxbai-embed-large-v1 | 1024 | MixedBread (Ollama) | ollama | + +### Vector Dimension Reference + +```cpp +// Map model names to dimensions +std::map model_dimensions = { + {"text-embedding-3-small", 1536}, + {"text-embedding-3-large", 3072}, + {"nomic-embed-text-v1.5", 768}, + {"all-MiniLM-L6-v2", 384}, + {"mxbai-embed-large-v1", 1024} +}; +``` + +## Agent Workflow Examples + +### Example 1: Semantic Search + +```python +# Agent finds semantically similar content +embed_results = call_tool("embed_search", { + "query": "customer unhappy with shipping delay", + "limit": 10 +}) + +# Extract primary keys +order_ids = [r["primary_key_value"] for r in embed_results["results"]] + +# Query MySQL for full data +full_orders = call_tool("run_sql_readonly", { + "sql": f"SELECT * FROM orders WHERE order_id IN ({','.join(order_ids)})" +}) +``` + +### Example 2: Combined FTS + Embeddings + +```python +# FTS for exact keyword match +keyword_results = call_tool("fts_search", { + "query": "refund request", + "limit": 50 +}) + +# Embeddings for semantic similarity +semantic_results = call_tool("embed_search", { + "query": "customer wants money back", + "limit": 50 +}) + +# Combine and deduplicate for best results +all_ids = set( + [r["primary_key_value"] for r in keyword_results["results"]] + + [r["primary_key_value"] for r in semantic_results["results"]] +) +``` + +### Example 3: RAG (Retrieval Augmented Generation) + +```python +# 1. Search for relevant documents +docs = call_tool("embed_search", { + "query": user_question, + "table": "knowledge_base", + "limit": 5 +}) + +# 2. Build context from retrieved documents +context = "\n".join([d["metadata"]["content"] for d in docs["results"]]) + +# 3. Generate answer using context +answer = call_llm({ + "prompt": f"Context: {context}\n\nQuestion: {user_question}\n\nAnswer:" +}) +``` + +## Comparison: FTS vs Embeddings + +| Aspect | FTS (fts_*) | Embeddings (embed_*) | +|--------|-------------|---------------------| +| **Search Type** | Lexical (keyword matching) | Semantic (similarity matching) | +| **Query Example** | "urgent order" | "customer complaint about late delivery" | +| **Technology** | SQLite FTS5 | sqlite-vec | +| **Storage** | Text content | Vector embeddings (float arrays) | +| **External API** | None | sqlite-rembed / GenAI module | +| **Speed** | Very fast | Fast (but API call latency) | +| **Use Cases** | Exact phrase matching, filters | Similar content, semantic understanding | +| **Strengths** | Fast, precise, works offline | Finds related content, handles synonyms | +| **Weaknesses** | Misses semantic matches | Requires API, slower, needs setup | + +## Performance Considerations + +### Embedding Generation +- **API Rate Limits**: OpenAI has rate limits (e.g., 3000 RPM) +- **Batch Processing**: sqlite-rembed doesn't support batching yet +- **Latency**: Each embedding = 1 HTTP call (50-500ms) +- **Cost**: OpenAI charges per token (e.g., $0.00002/1K tokens) + +### Vector Storage +- **Storage**: 1536 floats × 4 bytes = ~6KB per embedding +- **10,000 rows** = ~60MB for embeddings +- **Memory**: sqlite-vec loads vectors into memory for search + +### Search Performance +- **KNN Search**: O(n × d) where n=rows, d=dimensions +- **Typical**: < 100ms for 10K rows, < 1s for 1M rows +- **Limit**: Use LIMIT or `k = ?` constraint (required by vec0) + +## Best Practices + +### When to Use Embeddings +- **Semantic search**: Find similar meanings, not just keywords +- **Content recommendation**: "Users who liked X also liked Y" +- **Duplicate detection**: Find similar documents +- **Categorization**: Cluster similar content +- **RAG**: Retrieve relevant context for LLM + +### When to Use FTS +- **Exact matching**: Log search, code search +- **Filters**: Combined with WHERE clauses +- **Speed critical**: Sub-millisecond response needed +- **Offline**: No external API access + +### Column Selection +- **Choose meaningful columns**: Text that captures semantic meaning +- **Avoid IDs/numbers**: Order ID, timestamps (low semantic value) +- **Combine textually**: `title + description + notes` +- **Preprocess**: Remove HTML, special characters + +### Strategy Selection +- **concat**: Default, works for most use cases +- **average**: When columns have independent meaning +- **separate**: When need column-specific similarity + +## Testing Checklist + +### Basic Functionality +- [ ] Create embedding index (single table) +- [ ] Create embedding index with WHERE clause +- [ ] Create embedding index with average strategy +- [ ] Search single table +- [ ] Search across all tables +- [ ] List indexes +- [ ] Delete index +- [ ] Reindex single table +- [ ] Rebuild all indexes + +### Edge Cases +- [ ] Empty result sets +- [ ] NULL values in columns +- [ ] Special characters in text +- [ ] Very long text (>10K chars) +- [ ] Non-ASCII text (Unicode) +- [ ] API rate limiting +- [ ] API errors +- [ ] Invalid model names + +### Integration +- [ ] Works alongside FTS +- [ ] Works with catalog +- [ ] SQLite-vec extension loaded +- [ ] sqlite-rembed client configured +- [ ] Cross-table semantic search + +## GenAI Module Integration (Future) + +### Placeholder Interface + +```cpp +// When GenAI module is merged, replace sqlite-rembed calls +#ifdef HAVE_GENAI_MODULE + #include "GenAI_Module.h" +#endif + +std::vector MySQL_Embeddings::generate_embedding( + const std::string& text, + const std::string& model +) { +#ifdef HAVE_GENAI_MODULE + // Use GenAI module + return GenAI_Module::generate_embedding(text, model); +#else + // Use sqlite-rembed + std::string sql = "SELECT rembed('mcp_embeddings', ?) as embedding"; + // ... execute and parse ... + return parse_vector_from_json(result); +#endif +} +``` + +### Configuration for GenAI + +When GenAI module is available, add configuration variable: +```sql +SET mcp-genai-provider='local'; -- or 'openai', 'ollama', etc. +SET mcp-genai-model='nomic-embed-text-v1.5'; +``` + +## Troubleshooting + +### Common Issues + +**Issue**: "Error: no such table: temp.rembed_clients" +- **Cause**: sqlite-rembed extension not loaded +- **Fix**: Ensure sqlite-rembed is compiled and auto-registered + +**Issue**: "Error: rembed client not found" +- **Cause**: sqlite-rembed client not configured +- **Fix**: Run INSERT into temp.rembed_clients + +**Issue**: "Error: vector dimension mismatch" +- **Cause**: Model output doesn't match vec0 table dimensions +- **Fix**: Ensure vector_dim matches model output + +**Issue**: API rate limit exceeded +- **Cause**: Too many embedding requests +- **Fix**: Add delays, batch processing (when available), or use local model + +## Notes + +- Follow existing patterns from `MySQL_FTS` and `MySQL_Catalog` for SQLite management +- Use SQLite3DB read-write locks for thread safety +- Return JSON responses using nlohmann/json library +- Handle NULL values properly (use empty string as in execute_query) +- Use prepared statements for SQL safety +- Log errors using `proxy_error()` and info using `proxy_info()` +- Table name sanitization: replace `.` and special chars with `_` +- Always use LIMIT or `k = ?` in vec0 KNN queries (sqlite-vec requirement) +- Configure sqlite-rembed client before indexing +- Consider API costs and rate limits when planning bulk indexing diff --git a/genai_prototype/genai_demo_event b/genai_prototype/genai_demo_event new file mode 100755 index 0000000000..f7de009b9a Binary files /dev/null and b/genai_prototype/genai_demo_event differ diff --git a/include/Admin_Tool_Handler.h b/include/Admin_Tool_Handler.h new file mode 100644 index 0000000000..78308f2d0a --- /dev/null +++ b/include/Admin_Tool_Handler.h @@ -0,0 +1,50 @@ +#ifndef CLASS_ADMIN_TOOL_HANDLER_H +#define CLASS_ADMIN_TOOL_HANDLER_H + +#include "MCP_Tool_Handler.h" +#include + +// Forward declaration +class MCP_Threads_Handler; + +/** + * @brief Administration Tool Handler for /mcp/admin endpoint + * + * This handler provides tools for administrative operations on ProxySQL. + * These tools allow LLMs to perform management tasks like user management, + * process control, and server administration. + * + * Tools provided (stub implementation): + * - admin_list_users: List MySQL users + * - admin_show_processes: Show running processes + * - admin_kill_query: Kill a running query + * - admin_flush_cache: Flush various caches + * - admin_reload: Reload users/servers configuration + */ +class Admin_Tool_Handler : public MCP_Tool_Handler { +private: + MCP_Threads_Handler* mcp_handler; ///< Pointer to MCP handler + pthread_mutex_t handler_lock; ///< Mutex for thread-safe operations + +public: + /** + * @brief Constructor + * @param handler Pointer to MCP_Threads_Handler + */ + Admin_Tool_Handler(MCP_Threads_Handler* handler); + + /** + * @brief Destructor + */ + ~Admin_Tool_Handler() override; + + // MCP_Tool_Handler interface implementation + json get_tool_list() override; + json get_tool_description(const std::string& tool_name) override; + json execute_tool(const std::string& tool_name, const json& arguments) override; + int init() override; + void close() override; + std::string get_handler_name() const override { return "admin"; } +}; + +#endif /* CLASS_ADMIN_TOOL_HANDLER_H */ diff --git a/include/Cache_Tool_Handler.h b/include/Cache_Tool_Handler.h new file mode 100644 index 0000000000..271dee65b6 --- /dev/null +++ b/include/Cache_Tool_Handler.h @@ -0,0 +1,49 @@ +#ifndef CLASS_CACHE_TOOL_HANDLER_H +#define CLASS_CACHE_TOOL_HANDLER_H + +#include "MCP_Tool_Handler.h" +#include + +// Forward declaration +class MCP_Threads_Handler; + +/** + * @brief Cache Tool Handler for /mcp/cache endpoint + * + * This handler provides tools for managing ProxySQL's query cache. + * + * Tools provided (stub implementation): + * - get_cache_stats: Get cache statistics + * - invalidate_cache: Invalidate cache entries + * - set_cache_ttl: Set cache TTL + * - clear_cache: Clear all cache + * - warm_cache: Warm up cache with queries + * - get_cache_entries: List cached queries + */ +class Cache_Tool_Handler : public MCP_Tool_Handler { +private: + MCP_Threads_Handler* mcp_handler; ///< Pointer to MCP handler + pthread_mutex_t handler_lock; ///< Mutex for thread-safe operations + +public: + /** + * @brief Constructor + * @param handler Pointer to MCP_Threads_Handler + */ + Cache_Tool_Handler(MCP_Threads_Handler* handler); + + /** + * @brief Destructor + */ + ~Cache_Tool_Handler() override; + + // MCP_Tool_Handler interface implementation + json get_tool_list() override; + json get_tool_description(const std::string& tool_name) override; + json execute_tool(const std::string& tool_name, const json& arguments) override; + int init() override; + void close() override; + std::string get_handler_name() const override { return "cache"; } +}; + +#endif /* CLASS_CACHE_TOOL_HANDLER_H */ diff --git a/include/Config_Tool_Handler.h b/include/Config_Tool_Handler.h new file mode 100644 index 0000000000..f67e173dde --- /dev/null +++ b/include/Config_Tool_Handler.h @@ -0,0 +1,85 @@ +#ifndef CLASS_CONFIG_TOOL_HANDLER_H +#define CLASS_CONFIG_TOOL_HANDLER_H + +#include "MCP_Tool_Handler.h" +#include + +// Forward declaration +class MCP_Threads_Handler; + +/** + * @brief Configuration Tool Handler for /mcp/config endpoint + * + * This handler provides tools for runtime configuration and management + * of ProxySQL. It allows LLMs to view and modify ProxySQL configuration, + * reload variables, and manage the server state. + * + * Tools provided: + * - get_config: Get current configuration values + * - set_config: Modify configuration values + * - reload_config: Reload configuration from disk/memory + * - list_variables: List all available variables + * - get_status: Get server status information + */ +class Config_Tool_Handler : public MCP_Tool_Handler { +private: + MCP_Threads_Handler* mcp_handler; ///< Pointer to MCP handler for variable access + pthread_mutex_t handler_lock; ///< Mutex for thread-safe operations + + /** + * @brief Get a configuration variable value + * @param var_name Variable name (without 'mcp-' prefix) + * @return JSON with variable value + */ + json handle_get_config(const std::string& var_name); + + /** + * @brief Set a configuration variable value + * @param var_name Variable name (without 'mcp-' prefix) + * @param var_value New value + * @return JSON with success status + */ + json handle_set_config(const std::string& var_name, const std::string& var_value); + + /** + * @brief Reload configuration + * @param scope "disk", "memory", or "runtime" + * @return JSON with success status + */ + json handle_reload_config(const std::string& scope); + + /** + * @brief List all configuration variables + * @param filter Optional filter pattern + * @return JSON with variables list + */ + json handle_list_variables(const std::string& filter); + + /** + * @brief Get server status + * @return JSON with status information + */ + json handle_get_status(); + +public: + /** + * @brief Constructor + * @param handler Pointer to MCP_Threads_Handler + */ + Config_Tool_Handler(MCP_Threads_Handler* handler); + + /** + * @brief Destructor + */ + ~Config_Tool_Handler() override; + + // MCP_Tool_Handler interface implementation + json get_tool_list() override; + json get_tool_description(const std::string& tool_name) override; + json execute_tool(const std::string& tool_name, const json& arguments) override; + int init() override; + void close() override; + std::string get_handler_name() const override { return "config"; } +}; + +#endif /* CLASS_CONFIG_TOOL_HANDLER_H */ diff --git a/include/MCP_Endpoint.h b/include/MCP_Endpoint.h new file mode 100644 index 0000000000..7e7bd5f050 --- /dev/null +++ b/include/MCP_Endpoint.h @@ -0,0 +1,143 @@ +#ifndef CLASS_MCP_ENDPOINT_H +#define CLASS_MCP_ENDPOINT_H + +#include "proxysql.h" +#include "cpp.h" +#include +#include + +// Forward declarations +class MCP_Threads_Handler; +class MCP_Tool_Handler; + +// Include httpserver after proxysql.h +#include "httpserver.hpp" + +// Include JSON library +#include "../deps/json/json.hpp" +using json = nlohmann::json; +#define PROXYJSON + +/** + * @brief MCP JSON-RPC 2.0 Resource class + * + * This class extends httpserver::http_resource to provide JSON-RPC 2.0 + * endpoints for MCP protocol communication. Each endpoint handles + * POST requests with JSON-RPC 2.0 formatted payloads. + * + * Each endpoint has its own dedicated tool handler that provides + * endpoint-specific tools. + */ +class MCP_JSONRPC_Resource : public httpserver::http_resource { +private: + MCP_Threads_Handler* handler; ///< Pointer to MCP handler for variable access + MCP_Tool_Handler* tool_handler; ///< Pointer to endpoint's dedicated tool handler + std::string endpoint_name; ///< Endpoint name (config, query, admin, etc.) + + /** + * @brief Authenticate the incoming request + * + * Placeholder for future authentication implementation. + * Currently always returns true. + * + * @param req The HTTP request + * @return true if authenticated, false otherwise + */ + bool authenticate_request(const httpserver::http_request& req); + + /** + * @brief Handle JSON-RPC 2.0 request + * + * Processes the JSON-RPC request and returns an appropriate response. + * + * @param req The HTTP request + * @return HTTP response with JSON-RPC response + */ + std::shared_ptr handle_jsonrpc_request( + const httpserver::http_request& req + ); + + /** + * @brief Create a JSON-RPC 2.0 success response + * + * @param result The result data to include + * @param id The request ID + * @return JSON string representing the response + */ + std::string create_jsonrpc_response( + const std::string& result, + const std::string& id = "1" + ); + + /** + * @brief Create a JSON-RPC 2.0 error response + * + * @param code The error code (JSON-RPC standard or custom) + * @param message The error message + * @param id The request ID + * @return JSON string representing the error response + */ + std::string create_jsonrpc_error( + int code, + const std::string& message, + const std::string& id = "" + ); + + /** + * @brief Handle tools/list method + * + * Returns a list of available MySQL exploration tools. + * + * @return JSON with tools array + */ + json handle_tools_list(); + + /** + * @brief Handle tools/describe method + * + * Returns detailed information about a specific tool. + * + * @param req_json The JSON-RPC request + * @return JSON with tool description + */ + json handle_tools_describe(const json& req_json); + + /** + * @brief Handle tools/call method + * + * Executes a tool with the provided arguments. + * + * @param req_json The JSON-RPC request + * @return JSON with tool execution result + */ + json handle_tools_call(const json& req_json); + +public: + /** + * @brief Constructor for MCP_JSONRPC_Resource + * + * @param h Pointer to the MCP_Threads_Handler instance + * @param th Pointer to the endpoint's dedicated tool handler + * @param name The name of this endpoint (e.g., "config", "query") + */ + MCP_JSONRPC_Resource(MCP_Threads_Handler* h, MCP_Tool_Handler* th, const std::string& name); + + /** + * @brief Destructor + */ + ~MCP_JSONRPC_Resource(); + + /** + * @brief Handle POST requests + * + * Processes incoming JSON-RPC 2.0 POST requests. + * + * @param req The HTTP request + * @return HTTP response with JSON-RPC response + */ + const std::shared_ptr render_POST( + const httpserver::http_request& req + ) override; +}; + +#endif /* CLASS_MCP_ENDPOINT_H */ diff --git a/include/MCP_Thread.h b/include/MCP_Thread.h new file mode 100644 index 0000000000..acf68dfb47 --- /dev/null +++ b/include/MCP_Thread.h @@ -0,0 +1,198 @@ +#ifndef __CLASS_MCP_THREAD_H +#define __CLASS_MCP_THREAD_H + +#define MCP_THREAD_VERSION "0.1.0" + +#include +#include +#include + +// Forward declarations +class ProxySQL_MCP_Server; +class MySQL_Tool_Handler; +class MCP_Tool_Handler; +class Config_Tool_Handler; +class Query_Tool_Handler; +class Admin_Tool_Handler; +class Cache_Tool_Handler; +class Observe_Tool_Handler; + +/** + * @brief MCP Threads Handler class for managing MCP module configuration + * + * This class handles the MCP (Model Context Protocol) module's configuration + * variables and lifecycle. It provides methods for initializing, shutting down, + * and managing module variables that are accessible via the admin interface. + * + * This is a standalone class independent from MySQL/PostgreSQL thread handlers. + */ +class MCP_Threads_Handler +{ +private: + int shutdown_; + pthread_rwlock_t rwlock; ///< Read-write lock for thread-safe access + +public: + /** + * @brief Structure holding MCP module configuration variables + * + * These variables are stored in the global_variables table with the + * 'mcp-' prefix and can be modified at runtime. + */ + struct { + bool mcp_enabled; ///< Enable/disable MCP server + int mcp_port; ///< HTTPS port for MCP server (default: 6071) + char* mcp_config_endpoint_auth; ///< Authentication for /mcp/config endpoint + char* mcp_observe_endpoint_auth; ///< Authentication for /mcp/observe endpoint + char* mcp_query_endpoint_auth; ///< Authentication for /mcp/query endpoint + char* mcp_admin_endpoint_auth; ///< Authentication for /mcp/admin endpoint + char* mcp_cache_endpoint_auth; ///< Authentication for /mcp/cache endpoint + int mcp_timeout_ms; ///< Request timeout in milliseconds (default: 30000) + // MySQL Tool Handler configuration + char* mcp_mysql_hosts; ///< Comma-separated list of MySQL hosts + char* mcp_mysql_ports; ///< Comma-separated list of MySQL ports + char* mcp_mysql_user; ///< MySQL username for tool connections + char* mcp_mysql_password; ///< MySQL password for tool connections + char* mcp_mysql_schema; ///< Default schema/database + char* mcp_catalog_path; ///< Path to catalog SQLite database + } variables; + + /** + * @brief Structure holding MCP module status variables (read-only counters) + */ + struct { + unsigned long long total_requests; ///< Total number of requests received + unsigned long long failed_requests; ///< Total number of failed requests + unsigned long long active_connections; ///< Current number of active connections + } status_variables; + + /** + * @brief Pointer to the HTTPS server instance + * + * This is managed by the MCP_Thread module and provides HTTPS + * endpoints for MCP protocol communication. + */ + ProxySQL_MCP_Server* mcp_server; + + /** + * @brief Pointer to the MySQL Tool Handler instance + * + * This provides tools for LLM-based MySQL database exploration, + * including inventory, structure, profiling, sampling, query, + * relationship inference, and catalog operations. + * + * @deprecated Use query_tool_handler instead. Kept for backward compatibility. + */ + MySQL_Tool_Handler* mysql_tool_handler; + + /** + * @brief Pointers to the new dedicated tool handlers for each endpoint + * + * Each endpoint now has its own dedicated tool handler: + * - config_tool_handler: /mcp/config endpoint + * - query_tool_handler: /mcp/query endpoint + * - admin_tool_handler: /mcp/admin endpoint + * - cache_tool_handler: /mcp/cache endpoint + * - observe_tool_handler: /mcp/observe endpoint + */ + Config_Tool_Handler* config_tool_handler; + Query_Tool_Handler* query_tool_handler; + Admin_Tool_Handler* admin_tool_handler; + Cache_Tool_Handler* cache_tool_handler; + Observe_Tool_Handler* observe_tool_handler; + + + /** + * @brief Default constructor for MCP_Threads_Handler + * + * Initializes member variables to default values and sets up + * synchronization primitives. + */ + MCP_Threads_Handler(); + + /** + * @brief Destructor for MCP_Threads_Handler + * + * Cleans up allocated resources including strings and server instance. + */ + ~MCP_Threads_Handler(); + + /** + * @brief Acquire write lock on variables + * + * Locks the module for write access to prevent race conditions + * when modifying variables. + */ + void wrlock(); + + /** + * @brief Release write lock on variables + * + * Unlocks the module after write operations are complete. + */ + void wrunlock(); + + /** + * @brief Initialize the MCP module + * + * Sets up the module with default configuration values and starts + * the HTTPS server if enabled. Must be called before using any + * other methods. + */ + void init(); + + /** + * @brief Shutdown the MCP module + * + * Stops the HTTPS server and performs cleanup. Called during + * ProxySQL shutdown. + */ + void shutdown(); + + /** + * @brief Get the value of a variable as a string + * + * @param name The name of the variable (without 'mcp-' prefix) + * @param val Output buffer to store the value + * @return 0 on success, -1 if variable not found + */ + int get_variable(const char* name, char* val); + + /** + * @brief Set the value of a variable + * + * @param name The name of the variable (without 'mcp-' prefix) + * @param value The new value to set + * @return 0 on success, -1 if variable not found or value invalid + */ + int set_variable(const char* name, const char* value); + + /** + * @brief Check if a variable exists + * + * @param name The name of the variable (without 'mcp-' prefix) + * @return true if the variable exists, false otherwise + */ + bool has_variable(const char* name); + + /** + * @brief Get a list of all variable names + * + * @return Dynamically allocated array of strings, terminated by NULL + * + * @note The caller is responsible for freeing the array and its elements. + */ + char** get_variables_list(); + + /** + * @brief Print the version information + * + * Outputs the MCP module version to stderr. + */ + void print_version(); +}; + +// Global instance of the MCP Threads Handler +extern MCP_Threads_Handler *GloMCPH; + +#endif // __CLASS_MCP_THREAD_H diff --git a/include/MCP_Tool_Handler.h b/include/MCP_Tool_Handler.h new file mode 100644 index 0000000000..6e2039daba --- /dev/null +++ b/include/MCP_Tool_Handler.h @@ -0,0 +1,188 @@ +#ifndef CLASS_MCP_TOOL_HANDLER_H +#define CLASS_MCP_TOOL_HANDLER_H + +#include "cpp.h" +#include +#include + +// Include JSON library +#include "../deps/json/json.hpp" +using json = nlohmann::json; +#define PROXYJSON + +/** + * @brief Base class for all MCP Tool Handlers + * + * This class defines the interface that all tool handlers must implement. + * Each endpoint (config, query, admin, cache, observe) will have its own + * dedicated tool handler that provides specific tools for that endpoint's purpose. + * + * Tool handlers are responsible for: + * - Providing a list of available tools (get_tool_list) + * - Providing detailed tool descriptions (get_tool_description) + * - Executing tool calls with arguments (execute_tool) + * - Managing their own resources (connections, state, etc.) + * - Proper initialization and cleanup + */ +class MCP_Tool_Handler { +public: + /** + * @brief Virtual destructor for proper cleanup in derived classes + */ + virtual ~MCP_Tool_Handler() = default; + + /** + * @brief Get the list of available tools + * + * This method is called in response to the MCP tools/list method. + * Each derived class implements this to return its specific tools. + * + * @return JSON object with tools array + * + * Example return format: + * { + * "tools": [ + * { + * "name": "tool_name", + * "description": "Tool description", + * "inputSchema": {...} + * }, + * ... + * ] + * } + */ + virtual json get_tool_list() = 0; + + /** + * @brief Get detailed description of a specific tool + * + * This method is called in response to the MCP tools/describe method. + * Returns detailed information about a single tool including + * full schema for inputs and outputs. + * + * @param tool_name The name of the tool to describe + * @return JSON object with tool description + * + * Example return format: + * { + * "name": "tool_name", + * "description": "Detailed description", + * "inputSchema": { + * "type": "object", + * "properties": {...}, + * "required": [...] + * } + * } + */ + virtual json get_tool_description(const std::string& tool_name) = 0; + + /** + * @brief Execute a tool with provided arguments + * + * This method is called in response to the MCP tools/call method. + * Executes the requested tool with the provided arguments. + * + * @param tool_name The name of the tool to execute + * @param arguments JSON object containing tool arguments + * @return JSON object with execution result or error + * + * Example return format (success): + * { + * "success": true, + * "result": {...} + * } + * + * Example return format (error): + * { + * "success": false, + * "error": "Error message" + * } + */ + virtual json execute_tool(const std::string& tool_name, const json& arguments) = 0; + + /** + * @brief Initialize the tool handler + * + * Called during ProxySQL startup or when MCP module is enabled. + * Implementations should initialize connections, load configuration, + * and prepare any resources needed for tool execution. + * + * @return 0 on success, -1 on error + */ + virtual int init() = 0; + + /** + * @brief Close and cleanup the tool handler + * + * Called during ProxySQL shutdown or when MCP module is disabled. + * Implementations should close connections, free resources, + * and perform any necessary cleanup. + */ + virtual void close() = 0; + + /** + * @brief Get the handler name + * + * Returns the name of this handler for logging and debugging purposes. + * + * @return Handler name (e.g., "query", "config", "admin") + */ + virtual std::string get_handler_name() const = 0; + +protected: + /** + * @brief Helper method to create a tool description JSON + * + * Standard format for tool descriptions used across all handlers. + * + * @param name Tool name + * @param description Tool description + * @param input_schema JSON schema for input validation + * @return JSON object with tool description + */ + json create_tool_description( + const std::string& name, + const std::string& description, + const json& input_schema + ) { + json tool; + tool["name"] = name; + tool["description"] = description; + if (!input_schema.is_null()) { + tool["inputSchema"] = input_schema; + } + return tool; + } + + /** + * @brief Helper method to create a success response + * + * @param result The result data + * @return JSON object with success flag and result + */ + json create_success_response(const json& result) { + json response; + response["success"] = true; + response["result"] = result; + return response; + } + + /** + * @brief Helper method to create an error response + * + * @param message Error message + * @param code Optional error code + * @return JSON object with error flag and message + */ + json create_error_response(const std::string& message, int code = -1) { + json response; + response["success"] = false; + response["error"] = message; + if (code >= 0) { + response["code"] = code; + } + return response; + } +}; + +#endif /* CLASS_MCP_TOOL_HANDLER_H */ diff --git a/include/MySQL_Catalog.h b/include/MySQL_Catalog.h new file mode 100644 index 0000000000..233895c010 --- /dev/null +++ b/include/MySQL_Catalog.h @@ -0,0 +1,159 @@ +#ifndef CLASS_MYSQL_CATALOG_H +#define CLASS_MYSQL_CATALOG_H + +#include "sqlite3db.h" +#include +#include +#include + +/** + * @brief MySQL Catalog for LLM Exploration Memory + * + * This class manages a dedicated SQLite database that stores: + * - Table summaries created by the LLM + * - Domain summaries + * - Join relationships discovered + * - Query patterns and answerability catalog + * + * The catalog serves as the LLM's "external memory" for database exploration. + */ +class MySQL_Catalog { +private: + SQLite3DB* db; + std::string db_path; + + /** + * @brief Initialize catalog schema + * @return 0 on success, -1 on error + */ + int init_schema(); + + /** + * @brief Create catalog tables + * @return 0 on success, -1 on error + */ + int create_tables(); + +public: + /** + * @brief Constructor + * @param path Path to the catalog database file + */ + MySQL_Catalog(const std::string& path); + + /** + * @brief Destructor + */ + ~MySQL_Catalog(); + + /** + * @brief Initialize the catalog database + * @return 0 on success, -1 on error + */ + int init(); + + /** + * @brief Close the catalog database + */ + void close(); + + /** + * @brief Catalog upsert - create or update a catalog entry + * + * @param kind The kind of entry ("table", "view", "domain", "metric", "note") + * @param key Unique key (e.g., "db.sales.orders") + * @param document JSON document with summary/details + * @param tags Optional comma-separated tags + * @param links Optional comma-separated links to related keys + * @return 0 on success, -1 on error + */ + int upsert( + const std::string& kind, + const std::string& key, + const std::string& document, + const std::string& tags = "", + const std::string& links = "" + ); + + /** + * @brief Get a catalog entry by kind and key + * + * @param kind The kind of entry + * @param key The unique key + * @param document Output: JSON document + * @return 0 on success, -1 if not found + */ + int get( + const std::string& kind, + const std::string& key, + std::string& document + ); + + /** + * @brief Search catalog entries + * + * @param query Search query (searches in key, document, tags) + * @param kind Optional filter by kind + * @param tags Optional filter by tags (comma-separated) + * @param limit Max results (default 20) + * @param offset Pagination offset (default 0) + * @return JSON array of matching entries + */ + std::string search( + const std::string& query, + const std::string& kind = "", + const std::string& tags = "", + int limit = 20, + int offset = 0 + ); + + /** + * @brief List catalog entries with pagination + * + * @param kind Optional filter by kind + * @param limit Max results per page (default 50) + * @param offset Pagination offset (default 0) + * @return JSON array of entries with total count + */ + std::string list( + const std::string& kind = "", + int limit = 50, + int offset = 0 + ); + + /** + * @brief Merge multiple entries into a new summary + * + * @param keys Array of keys to merge + * @param target_key Key for the merged summary + * @param kind Kind for the merged entry (default "domain") + * @param instructions Optional instructions for merging + * @return 0 on success, -1 on error + */ + int merge( + const std::vector& keys, + const std::string& target_key, + const std::string& kind = "domain", + const std::string& instructions = "" + ); + + /** + * @brief Delete a catalog entry + * + * @param kind The kind of entry + * @param key The unique key + * @return 0 on success, -1 if not found + */ + int remove( + const std::string& kind, + const std::string& key + ); + + /** + * @brief Get database handle for direct access + * @return SQLite3DB pointer + */ + SQLite3DB* get_db() { return db; } +}; + +#endif /* CLASS_MYSQL_CATALOG_H */ diff --git a/include/MySQL_Tool_Handler.h b/include/MySQL_Tool_Handler.h new file mode 100644 index 0000000000..fa42b91a50 --- /dev/null +++ b/include/MySQL_Tool_Handler.h @@ -0,0 +1,394 @@ +#ifndef CLASS_MYSQL_TOOL_HANDLER_H +#define CLASS_MYSQL_TOOL_HANDLER_H + +#include "MySQL_Catalog.h" +#include "cpp.h" +#include +#include +#include +#include +#include + +// Forward declaration for MYSQL (mysql.h is included via proxysql.h/cpp.h) +typedef struct st_mysql MYSQL; + +/** + * @brief MySQL Tool Handler for LLM Database Exploration + * + * This class provides tools for an LLM to safely explore a MySQL database: + * - Discovery tools (list_schemas, list_tables, describe_table) + * - Profiling tools (table_profile, column_profile) + * - Sampling tools (sample_rows, sample_distinct) + * - Query tools (run_sql_readonly, explain_sql) + * - Relationship tools (suggest_joins, find_reference_candidates) + * - Catalog tools (external memory for LLM discoveries) + */ +class MySQL_Tool_Handler { +private: + // Connection configuration + std::vector mysql_hosts; ///< List of MySQL host addresses + std::vector mysql_ports; ///< List of MySQL port numbers + std::string mysql_user; ///< MySQL username for authentication + std::string mysql_password; ///< MySQL password for authentication + std::string mysql_schema; ///< Default schema/database name + + // Connection pool + /** + * @brief Represents a single MySQL connection in the pool + * + * Contains the MYSQL handle, connection details, and availability status. + */ + struct MySQLConnection { + MYSQL* mysql; ///< MySQL connection handle (NULL if not connected) + std::string host; ///< Host address for this connection + int port; ///< Port number for this connection + bool in_use; ///< True if connection is currently checked out + }; + std::vector connection_pool; ///< Pool of MySQL connections + pthread_mutex_t pool_lock; ///< Mutex protecting connection pool access + int pool_size; ///< Number of connections in the pool + + // Catalog for LLM memory + MySQL_Catalog* catalog; ///< SQLite catalog for LLM discoveries + + // Query guardrails + int max_rows; ///< Maximum rows to return (default 200) + int timeout_ms; ///< Query timeout in milliseconds (default 2000) + bool allow_select_star; ///< Allow SELECT * without LIMIT (default false) + + /** + * @brief Initialize connection pool to backend MySQL servers + * @return 0 on success, -1 on error + */ + int init_connection_pool(); + + /** + * @brief Get a connection from the pool + * @return Pointer to MYSQL connection, or NULL if none available + */ + MYSQL* get_connection(); + + /** + * @brief Return a connection to the pool + * @param mysql The MYSQL connection to return + */ + void return_connection(MYSQL* mysql); + + /** + * @brief Execute a query and return results as JSON + * @param query SQL query to execute + * @return JSON with results or error + */ + std::string execute_query(const std::string& query); + + /** + * @brief Validate SQL is read-only + * @param query SQL to validate + * @return true if safe, false otherwise + */ + bool validate_readonly_query(const std::string& query); + + /** + * @brief Check if SQL contains dangerous keywords + * @param query SQL to check + * @return true if dangerous, false otherwise + */ + bool is_dangerous_query(const std::string& query); + + /** + * @brief Sanitize SQL to prevent injection + * @param query SQL to sanitize + * @return Sanitized query + */ + std::string sanitize_query(const std::string& query); + +public: + /** + * @brief Constructor + * @param hosts Comma-separated list of MySQL hosts + * @param ports Comma-separated list of MySQL ports + * @param user MySQL username + * @param password MySQL password + * @param schema Default schema/database + * @param catalog_path Path to catalog database + */ + MySQL_Tool_Handler( + const std::string& hosts, + const std::string& ports, + const std::string& user, + const std::string& password, + const std::string& schema, + const std::string& catalog_path + ); + + /** + * @brief Destructor + */ + ~MySQL_Tool_Handler(); + + /** + * @brief Initialize the tool handler + * @return 0 on success, -1 on error + */ + int init(); + + /** + * @brief Close connections and cleanup + */ + void close(); + + // ========== Inventory Tools ========== + + /** + * @brief List available schemas/databases + * @param page_token Pagination token (optional) + * @param page_size Page size (default 50) + * @return JSON array of schemas with metadata + */ + std::string list_schemas(const std::string& page_token = "", int page_size = 50); + + /** + * @brief List tables in a schema + * @param schema Schema name (empty for all schemas) + * @param page_token Pagination token (optional) + * @param page_size Page size (default 50) + * @param name_filter Optional name pattern filter + * @return JSON array of tables with size estimates + */ + std::string list_tables( + const std::string& schema = "", + const std::string& page_token = "", + int page_size = 50, + const std::string& name_filter = "" + ); + + // ========== Structure Tools ========== + + /** + * @brief Get detailed table schema + * @param schema Schema name + * @param table Table name + * @return JSON with columns, types, keys, indexes + */ + std::string describe_table(const std::string& schema, const std::string& table); + + /** + * @brief Get constraints (FK, unique, etc.) + * @param schema Schema name + * @param table Table name (empty for all tables in schema) + * @return JSON array of constraints + */ + std::string get_constraints(const std::string& schema, const std::string& table = ""); + + /** + * @brief Get view definition + * @param schema Schema name + * @param view View name + * @return JSON with view details + */ + std::string describe_view(const std::string& schema, const std::string& view); + + // ========== Profiling Tools ========== + + /** + * @brief Get quick table profile + * @param schema Schema name + * @param table Table name + * @param mode Profile mode ("quick" or "full") + * @return JSON with table statistics + */ + std::string table_profile( + const std::string& schema, + const std::string& table, + const std::string& mode = "quick" + ); + + /** + * @brief Get column profile (distinct values, nulls, etc.) + * @param schema Schema name + * @param table Table name + * @param column Column name + * @param max_top_values Max distinct values to return (default 20) + * @return JSON with column statistics + */ + std::string column_profile( + const std::string& schema, + const std::string& table, + const std::string& column, + int max_top_values = 20 + ); + + // ========== Sampling Tools ========== + + /** + * @brief Sample rows from a table (with hard cap) + * @param schema Schema name + * @param table Table name + * @param columns Optional comma-separated column list + * @param where Optional WHERE clause + * @param order_by Optional ORDER BY clause + * @param limit Max rows (hard cap default 20) + * @return JSON array of rows + */ + std::string sample_rows( + const std::string& schema, + const std::string& table, + const std::string& columns = "", + const std::string& where = "", + const std::string& order_by = "", + int limit = 20 + ); + + /** + * @brief Sample distinct values from a column + * @param schema Schema name + * @param table Table name + * @param column Column name + * @param where Optional WHERE clause + * @param limit Max distinct values (default 50) + * @return JSON array of distinct values + */ + std::string sample_distinct( + const std::string& schema, + const std::string& table, + const std::string& column, + const std::string& where = "", + int limit = 50 + ); + + // ========== Query Tools ========== + + /** + * @brief Execute read-only SQL with guardrails + * @param sql SQL query + * @param max_rows Max rows (enforced, default 200) + * @param timeout_sec Timeout in seconds (enforced, default 2) + * @return JSON with query results or error + */ + std::string run_sql_readonly( + const std::string& sql, + int max_rows = 200, + int timeout_sec = 2 + ); + + /** + * @brief Explain a query (EXPLAIN/EXPLAIN ANALYZE) + * @param sql SQL query to explain + * @return JSON with execution plan + */ + std::string explain_sql(const std::string& sql); + + // ========== Relationship Inference Tools ========== + + /** + * @brief Suggest joins between two tables (heuristic-based) + * @param schema Schema name + * @param table_a First table + * @param table_b Second table (empty for auto-detect) + * @param max_candidates Max suggestions (default 5) + * @return JSON array of join candidates with confidence + */ + std::string suggest_joins( + const std::string& schema, + const std::string& table_a, + const std::string& table_b = "", + int max_candidates = 5 + ); + + /** + * @brief Find tables referenced by a column (e.g., orders.customer_id) + * @param schema Schema name + * @param table Table name + * @param column Column name + * @param max_tables Max results (default 50) + * @return JSON array of candidate references + */ + std::string find_reference_candidates( + const std::string& schema, + const std::string& table, + const std::string& column, + int max_tables = 50 + ); + + // ========== Catalog Tools (LLM Memory) ========== + + /** + * @brief Upsert catalog entry + * @param kind Entry kind + * @param key Unique key + * @param document JSON document + * @param tags Comma-separated tags + * @param links Comma-separated links + * @return JSON result + */ + std::string catalog_upsert( + const std::string& kind, + const std::string& key, + const std::string& document, + const std::string& tags = "", + const std::string& links = "" + ); + + /** + * @brief Get catalog entry + * @param kind Entry kind + * @param key Unique key + * @return JSON document or error + */ + std::string catalog_get(const std::string& kind, const std::string& key); + + /** + * @brief Search catalog + * @param query Search query + * @param kind Optional kind filter + * @param tags Optional tag filter + * @param limit Max results (default 20) + * @param offset Pagination offset (default 0) + * @return JSON array of matching entries + */ + std::string catalog_search( + const std::string& query, + const std::string& kind = "", + const std::string& tags = "", + int limit = 20, + int offset = 0 + ); + + /** + * @brief List catalog entries + * @param kind Optional kind filter + * @param limit Max results per page (default 50) + * @param offset Pagination offset (default 0) + * @return JSON with total count and results array + */ + std::string catalog_list( + const std::string& kind = "", + int limit = 50, + int offset = 0 + ); + + /** + * @brief Merge catalog entries + * @param keys JSON array of keys to merge + * @param target_key Target key for merged entry + * @param kind Kind for merged entry (default "domain") + * @param instructions Optional instructions + * @return JSON result + */ + std::string catalog_merge( + const std::string& keys, + const std::string& target_key, + const std::string& kind = "domain", + const std::string& instructions = "" + ); + + /** + * @brief Delete catalog entry + * @param kind Entry kind + * @param key Unique key + * @return JSON result + */ + std::string catalog_delete(const std::string& kind, const std::string& key); +}; + +#endif /* CLASS_MYSQL_TOOL_HANDLER_H */ diff --git a/include/Observe_Tool_Handler.h b/include/Observe_Tool_Handler.h new file mode 100644 index 0000000000..d8bc5d3037 --- /dev/null +++ b/include/Observe_Tool_Handler.h @@ -0,0 +1,49 @@ +#ifndef CLASS_OBSERVE_TOOL_HANDLER_H +#define CLASS_OBSERVE_TOOL_HANDLER_H + +#include "MCP_Tool_Handler.h" +#include + +// Forward declaration +class MCP_Threads_Handler; + +/** + * @brief Observability Tool Handler for /mcp/observe endpoint + * + * This handler provides tools for real-time metrics, statistics, and monitoring. + * + * Tools provided (stub implementation): + * - list_stats: List available statistics + * - get_stats: Get specific statistics + * - show_connections: Show active connections + * - show_queries: Show query statistics + * - get_health: Get health check information + * - show_metrics: Show performance metrics + */ +class Observe_Tool_Handler : public MCP_Tool_Handler { +private: + MCP_Threads_Handler* mcp_handler; ///< Pointer to MCP handler + pthread_mutex_t handler_lock; ///< Mutex for thread-safe operations + +public: + /** + * @brief Constructor + * @param handler Pointer to MCP_Threads_Handler + */ + Observe_Tool_Handler(MCP_Threads_Handler* handler); + + /** + * @brief Destructor + */ + ~Observe_Tool_Handler() override; + + // MCP_Tool_Handler interface implementation + json get_tool_list() override; + json get_tool_description(const std::string& tool_name) override; + json execute_tool(const std::string& tool_name, const json& arguments) override; + int init() override; + void close() override; + std::string get_handler_name() const override { return "observe"; } +}; + +#endif /* CLASS_OBSERVE_TOOL_HANDLER_H */ diff --git a/include/ProxySQL_MCP_Server.hpp b/include/ProxySQL_MCP_Server.hpp new file mode 100644 index 0000000000..e4ed237db3 --- /dev/null +++ b/include/ProxySQL_MCP_Server.hpp @@ -0,0 +1,68 @@ +#ifndef CLASS_PROXYSQL_MCP_SERVER_H +#define CLASS_PROXYSQL_MCP_SERVER_H + +#include "proxysql.h" +#include "cpp.h" +#include +#include +#include +#include + +// Forward declaration +class MCP_Threads_Handler; + +// Include httpserver after proxysql.h +#include "httpserver.hpp" + +/** + * @brief ProxySQL MCP Server class + * + * This class wraps an HTTPS server using libhttpserver to provide + * MCP (Model Context Protocol) endpoints. It supports multiple + * MCP server endpoints with their own authentication. + */ +class ProxySQL_MCP_Server { +private: + std::unique_ptr ws; + int port; + pthread_t thread_id; + + // Endpoint resources + std::vector>> _endpoints; + + MCP_Threads_Handler* handler; + +public: + /** + * @brief Constructor for ProxySQL_MCP_Server + * + * Creates a new HTTPS server instance on the specified port. + * + * @param p The port number to listen on + * @param h Pointer to the MCP_Threads_Handler instance + */ + ProxySQL_MCP_Server(int p, MCP_Threads_Handler* h); + + /** + * @brief Destructor for ProxySQL_MCP_Server + * + * Stops the webserver and cleans up resources. + */ + ~ProxySQL_MCP_Server(); + + /** + * @brief Start the HTTPS server + * + * Starts the webserver in a dedicated thread. + */ + void start(); + + /** + * @brief Stop the HTTPS server + * + * Stops the webserver and waits for the thread to complete. + */ + void stop(); +}; + +#endif /* CLASS_PROXYSQL_MCP_SERVER_H */ diff --git a/include/Query_Tool_Handler.h b/include/Query_Tool_Handler.h new file mode 100644 index 0000000000..da067a6863 --- /dev/null +++ b/include/Query_Tool_Handler.h @@ -0,0 +1,99 @@ +#ifndef CLASS_QUERY_TOOL_HANDLER_H +#define CLASS_QUERY_TOOL_HANDLER_H + +#include "MCP_Tool_Handler.h" +#include "MySQL_Tool_Handler.h" +#include + +/** + * @brief Query Tool Handler for /mcp/query endpoint + * + * This handler provides tools for safe database exploration and query execution. + * It wraps the existing MySQL_Tool_Handler to provide MCP protocol compliance. + * + * Tools provided: + * - list_schemas: List databases + * - list_tables: List tables in schema + * - describe_table: Get table structure + * - get_constraints: Get foreign keys and constraints + * - table_profile: Get table statistics + * - column_profile: Get column statistics + * - sample_rows: Get sample data + * - sample_distinct: Sample distinct values + * - run_sql_readonly: Execute read-only SQL + * - explain_sql: Explain query execution plan + * - suggest_joins: Suggest table joins + * - find_reference_candidates: Find foreign key references + * - catalog_upsert: Store data in catalog + * - catalog_get: Retrieve from catalog + * - catalog_search: Search catalog + * - catalog_list: List catalog entries + * - catalog_merge: Merge catalog entries + * - catalog_delete: Delete from catalog + */ +class Query_Tool_Handler : public MCP_Tool_Handler { +private: + MySQL_Tool_Handler* mysql_handler; ///< Underlying MySQL tool handler + bool owns_handler; ///< Whether we created the handler + + /** + * @brief Create tool list schema for a tool + * @param tool_name Name of the tool + * @param description Description of the tool + * @param required_params Required parameter names + * @param optional_params Optional parameter names with types + * @return JSON schema object + */ + json create_tool_schema( + const std::string& tool_name, + const std::string& description, + const std::vector& required_params, + const std::map& optional_params + ); + +public: + /** + * @brief Constructor with existing MySQL_Tool_Handler + * @param handler Existing MySQL_Tool_Handler to wrap + */ + Query_Tool_Handler(MySQL_Tool_Handler* handler); + + /** + * @brief Constructor creating new MySQL_Tool_Handler + * @param hosts Comma-separated list of MySQL hosts + * @param ports Comma-separated list of MySQL ports + * @param user MySQL username + * @param password MySQL password + * @param schema Default schema/database + * @param catalog_path Path to catalog database + */ + Query_Tool_Handler( + const std::string& hosts, + const std::string& ports, + const std::string& user, + const std::string& password, + const std::string& schema, + const std::string& catalog_path + ); + + /** + * @brief Destructor + */ + ~Query_Tool_Handler() override; + + // MCP_Tool_Handler interface implementation + json get_tool_list() override; + json get_tool_description(const std::string& tool_name) override; + json execute_tool(const std::string& tool_name, const json& arguments) override; + int init() override; + void close() override; + std::string get_handler_name() const override { return "query"; } + + /** + * @brief Get the underlying MySQL_Tool_Handler + * @return Pointer to MySQL_Tool_Handler + */ + MySQL_Tool_Handler* get_mysql_handler() const { return mysql_handler; } +}; + +#endif /* CLASS_QUERY_TOOL_HANDLER_H */ diff --git a/include/proxysql_admin.h b/include/proxysql_admin.h index 90b046032f..b97aada64b 100644 --- a/include/proxysql_admin.h +++ b/include/proxysql_admin.h @@ -483,6 +483,10 @@ class ProxySQL_Admin { void flush_ldap_variables___runtime_to_database(SQLite3DB *db, bool replace, bool del, bool onlyifempty, bool runtime=false); void flush_ldap_variables___database_to_runtime(SQLite3DB *db, bool replace, const std::string& checksum = "", const time_t epoch = 0); + // MCP (Model Context Protocol) + void flush_mcp_variables___runtime_to_database(SQLite3DB* db, bool replace, bool del, bool onlyifempty, bool runtime = false, bool use_lock = true); + void flush_mcp_variables___database_to_runtime(SQLite3DB* db, bool replace, const std::string& checksum = "", const time_t epoch = 0, bool lock = true); + public: /** * @brief Mutex taken by 'ProxySQL_Admin::admin_session_handler'. It's used prevent multiple @@ -773,6 +777,11 @@ class ProxySQL_Admin { void load_pgsql_servers_to_runtime(const incoming_pgsql_servers_t& incoming_pgsql_servers = {}, const runtime_pgsql_servers_checksum_t& peer_runtime_pgsql_server = {}, const pgsql_servers_v2_checksum_t& peer_pgsql_server_v2 = {}); + // MCP (Model Context Protocol) + void init_mcp_variables(); + void load_mcp_variables_to_runtime(const std::string& checksum = "", const time_t epoch = 0) { flush_mcp_variables___database_to_runtime(admindb, true, checksum, epoch); } + void save_mcp_variables_from_runtime() { flush_mcp_variables___runtime_to_database(admindb, true, true, false); } + char* load_pgsql_query_rules_to_runtime(SQLite3_result* SQLite3_query_rules_resultset = NULL, SQLite3_result* SQLite3_query_rules_fast_routing_resultset = NULL, const std::string& checksum = "", const time_t epoch = 0); diff --git a/lib/Admin_Bootstrap.cpp b/lib/Admin_Bootstrap.cpp index 92271f3fdf..f27f09f1fc 100644 --- a/lib/Admin_Bootstrap.cpp +++ b/lib/Admin_Bootstrap.cpp @@ -1208,6 +1208,7 @@ bool ProxySQL_Admin::init(const bootstrap_info_t& bootstrap_info) { flush_clickhouse_variables___database_to_runtime(admindb,true); #endif /* PROXYSQLCLICKHOUSE */ flush_sqliteserver_variables___database_to_runtime(admindb,true); + flush_mcp_variables___database_to_runtime(admindb, true); if (GloVars.__cmd_proxysql_admin_socket) { set_variable((char *)"mysql_ifaces",GloVars.__cmd_proxysql_admin_socket); diff --git a/lib/Admin_FlushVariables.cpp b/lib/Admin_FlushVariables.cpp index e9d6c343ae..26b954a638 100644 --- a/lib/Admin_FlushVariables.cpp +++ b/lib/Admin_FlushVariables.cpp @@ -25,6 +25,9 @@ using json = nlohmann::json; #include "proxysql.h" #include "proxysql_config.h" #include "proxysql_restapi.h" +#include "MCP_Thread.h" +#include "MySQL_Tool_Handler.h" +#include "ProxySQL_MCP_Server.hpp" #include "proxysql_utils.h" #include "prometheus_helpers.h" #include "cpp.h" @@ -139,6 +142,7 @@ extern PgSQL_Logger* GloPgSQL_Logger; extern MySQL_STMT_Manager_v14 *GloMyStmt; extern MySQL_Monitor *GloMyMon; extern PgSQL_Threads_Handler* GloPTH; +extern MCP_Threads_Handler* GloMCPH; extern GenAI_Threads_Handler* GloGATH; extern void (*flush_logs_function)(); @@ -1314,5 +1318,246 @@ void ProxySQL_Admin::flush_admin_variables___runtime_to_database(SQLite3DB *db, free(varnames[i]); } free(varnames); +} + +// MCP (Model Context Protocol) VARIABLES +void ProxySQL_Admin::flush_mcp_variables___database_to_runtime(SQLite3DB* db, bool replace, const std::string& checksum, const time_t epoch, bool lock) { + proxy_debug(PROXY_DEBUG_ADMIN, 4, "Flushing MCP variables. Replace:%d\n", replace); + if (GloMCPH == NULL) { + proxy_debug(PROXY_DEBUG_ADMIN, 4, "MCP handler not initialized, skipping MCP variables\n"); + return; + } + char* error = NULL; + int cols = 0; + int affected_rows = 0; + SQLite3_result* resultset = NULL; + char* q = (char*)"SELECT variable_name, variable_value FROM global_variables WHERE variable_name LIKE 'mcp-%'"; + db->execute_statement(q, &error, &cols, &affected_rows, &resultset); + if (error) { + proxy_error("Error on %s : %s\n", q, error); + return; + } + if (resultset) { + if (lock) wrlock(); + for (std::vector::iterator it = resultset->rows.begin(); it != resultset->rows.end(); ++it) { + SQLite3_row* r = *it; + char* name = r->fields[0]; + char* val = r->fields[1]; + // Skip the 'mcp-' prefix + char* var_name = name + 4; + GloMCPH->set_variable(var_name, val); + } + + // Populate runtime_global_variables + // Note: Checksum generation is skipped for MCP until the feature is complete + { + pthread_mutex_lock(&GloVars.checksum_mutex); + wrunlock(); // Release outer lock before calling runtime_to_database + flush_mcp_variables___runtime_to_database(admindb, false, false, false, true, true); + wrlock(); // Re-acquire outer lock + pthread_mutex_unlock(&GloVars.checksum_mutex); + } + + // Handle server start/stop based on mcp_enabled + bool enabled = GloMCPH->variables.mcp_enabled; + proxy_info("MCP: mcp_enabled=%d after loading variables\n", enabled); + + if (enabled) { + // Start the server if not already running + if (GloMCPH->mcp_server == NULL) { + // Check if SSL certificates are available + if (!GloVars.global.ssl_key_pem_mem || !GloVars.global.ssl_cert_pem_mem) { + proxy_error("MCP: Cannot start server - SSL certificates not loaded. Please configure ssl_key_fp and ssl_cert_fp.\n"); + } else { + int port = GloMCPH->variables.mcp_port; + proxy_info("MCP: Starting HTTPS server on port %d\n", port); + GloMCPH->mcp_server = new ProxySQL_MCP_Server(port, GloMCPH); + if (GloMCPH->mcp_server) { + GloMCPH->mcp_server->start(); + proxy_info("MCP: Server started successfully\n"); + } else { + proxy_error("MCP: Failed to create server instance\n"); + } + } + } else { + proxy_info("MCP: Server already running, updating configuration...\n"); + // Server is already running - we could update port/restart if needed + // For now, just log that it's running + } + } else { + // Stop the server if running + if (GloMCPH->mcp_server != NULL) { + proxy_info("MCP: Stopping HTTPS server\n"); + delete GloMCPH->mcp_server; + GloMCPH->mcp_server = NULL; + proxy_info("MCP: Server stopped successfully\n"); + } + } + + if (lock) wrunlock(); + delete resultset; + } +} + +void ProxySQL_Admin::flush_mcp_variables___runtime_to_database(SQLite3DB* db, bool replace, bool del, bool onlyifempty, bool runtime, bool use_lock) { + proxy_info("MCP: flush_mcp_variables___runtime_to_database called. runtime=%d, use_lock=%d\n", runtime, use_lock); + proxy_debug(PROXY_DEBUG_ADMIN, 4, "Flushing MCP variables. Replace:%d, Delete:%d, Only_If_Empty:%d\n", replace, del, onlyifempty); + if (GloMCPH == NULL) { + proxy_debug(PROXY_DEBUG_ADMIN, 4, "MCP handler not initialized, skipping MCP variables\n"); + return; + } + if (onlyifempty) { + char* error = NULL; + int cols = 0; + int affected_rows = 0; + SQLite3_result* resultset = NULL; + char* q = (char*)"SELECT COUNT(*) FROM global_variables WHERE variable_name LIKE 'mcp-%'"; + db->execute_statement(q, &error, &cols, &affected_rows, &resultset); + int matching_rows = 0; + if (error) { + proxy_error("Error on %s : %s\n", q, error); + return; + } + else { + for (std::vector::iterator it = resultset->rows.begin(); it != resultset->rows.end(); ++it) { + SQLite3_row* r = *it; + matching_rows += atoi(r->fields[0]); + } + } + if (resultset) delete resultset; + if (matching_rows) { + proxy_debug(PROXY_DEBUG_ADMIN, 4, "Table global_variables has MCP variables - skipping\n"); + return; + } + } + if (del) { + proxy_debug(PROXY_DEBUG_ADMIN, 4, "Deleting MCP variables from global_variables\n"); + db->execute("DELETE FROM global_variables WHERE variable_name LIKE 'mcp-%'"); + } + static char* a; + static char* b; + if (replace) { + a = (char*)"REPLACE INTO global_variables(variable_name, variable_value) VALUES(\"mcp-%s\",\"%s\")"; + } + else { + a = (char*)"INSERT OR IGNORE INTO global_variables(variable_name, variable_value) VALUES(\"mcp-%s\",\"%s\")"; + } + b = (char*)"INSERT INTO runtime_global_variables(variable_name, variable_value) VALUES(\"%s\",\"%s\")"; + int rc; + sqlite3_stmt* statement1 = NULL; + rc = db->prepare_v2("REPLACE INTO global_variables(variable_name, variable_value) VALUES(?1, ?2)", &statement1); + ASSERT_SQLITE_OK(rc, db); + if (use_lock) { + GloMCPH->wrlock(); + } + if (runtime) { + db->execute("DELETE FROM runtime_global_variables WHERE variable_name LIKE 'mcp-%'"); + } + char** varnames = GloMCPH->get_variables_list(); + int var_count = 0; + for (int i = 0; varnames[i]; i++) { + var_count++; + } + proxy_info("MCP: Processing %d variables\n", var_count); + for (int i = 0; varnames[i]; i++) { + char val[256]; + GloMCPH->get_variable(varnames[i], val); + char* qualified_name = (char*)malloc(strlen(varnames[i]) + 8); + sprintf(qualified_name, "mcp-%s", varnames[i]); + rc = (*proxy_sqlite3_bind_text)(statement1, 1, qualified_name, -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, db); + rc = (*proxy_sqlite3_bind_text)(statement1, 2, (val ? val : (char*)""), -1, SQLITE_TRANSIENT); ASSERT_SQLITE_OK(rc, db); + SAFE_SQLITE3_STEP2(statement1); + rc = (*proxy_sqlite3_clear_bindings)(statement1); ASSERT_SQLITE_OK(rc, db); + rc = (*proxy_sqlite3_reset)(statement1); ASSERT_SQLITE_OK(rc, db); + if (runtime) { + if (i < 3) { + proxy_info("MCP: Inserting variable %d: %s = %s\n", i, qualified_name, val); + } + // Use db->execute() for runtime_global_variables like admin version does + // qualified_name already contains the mcp- prefix, so we use %s without prefix + int l = strlen(qualified_name) + strlen(val) + 100; + char* query = (char*)malloc(l); + sprintf(query, b, qualified_name, val); + if (i < 3) { + proxy_info("MCP: Executing SQL: %s\n", query); + } + db->execute(query); + free(query); + } + free(qualified_name); + } + proxy_info("MCP: Finished processing %d variables\n", var_count); + // Handle server start/stop based on mcp_enabled when runtime=true + // This ensures the server state matches the enabled flag after loading to runtime + if (runtime) { + bool enabled = GloMCPH->variables.mcp_enabled; + proxy_info("MCP: mcp_enabled=%d, managing server state\n", enabled); + + if (enabled) { + // Start the server if not already running + if (GloMCPH->mcp_server == NULL) { + // Check if SSL certificates are available + if (!GloVars.global.ssl_key_pem_mem || !GloVars.global.ssl_cert_pem_mem) { + proxy_error("MCP: Cannot start server - SSL certificates not loaded. Please configure ssl_key_fp and ssl_cert_fp.\n"); + } else { + int port = GloMCPH->variables.mcp_port; + proxy_info("MCP: Starting HTTPS server on port %d\n", port); + GloMCPH->mcp_server = new ProxySQL_MCP_Server(port, GloMCPH); + if (GloMCPH->mcp_server) { + GloMCPH->mcp_server->start(); + proxy_info("MCP: Server started successfully\n"); + } else { + proxy_error("MCP: Failed to create server instance\n"); + } + } + } else { + // Server is already running - check if MySQL configuration changed + // and reinitialize the tool handler if needed + proxy_info("MCP: Server already running, checking MySQL tool handler reinitialization\n"); + if (GloMCPH->mysql_tool_handler) { + // Delete old handler + delete GloMCPH->mysql_tool_handler; + GloMCPH->mysql_tool_handler = NULL; + } + + // Create new tool handler with current configuration + proxy_info("MCP: Reinitializing MySQL Tool Handler with current configuration\n"); + GloMCPH->mysql_tool_handler = new MySQL_Tool_Handler( + GloMCPH->variables.mcp_mysql_hosts ? GloMCPH->variables.mcp_mysql_hosts : "", + GloMCPH->variables.mcp_mysql_ports ? GloMCPH->variables.mcp_mysql_ports : "", + GloMCPH->variables.mcp_mysql_user ? GloMCPH->variables.mcp_mysql_user : "", + GloMCPH->variables.mcp_mysql_password ? GloMCPH->variables.mcp_mysql_password : "", + GloMCPH->variables.mcp_mysql_schema ? GloMCPH->variables.mcp_mysql_schema : "", + GloMCPH->variables.mcp_catalog_path ? GloMCPH->variables.mcp_catalog_path : "" + ); + + if (GloMCPH->mysql_tool_handler->init() != 0) { + proxy_error("MCP: Failed to reinitialize MySQL Tool Handler\n"); + delete GloMCPH->mysql_tool_handler; + GloMCPH->mysql_tool_handler = NULL; + } else { + proxy_info("MCP: MySQL Tool Handler reinitialized successfully\n"); + } + } + } else { + // Stop the server if running + if (GloMCPH->mcp_server != NULL) { + proxy_info("MCP: Stopping HTTPS server\n"); + delete GloMCPH->mcp_server; + GloMCPH->mcp_server = NULL; + proxy_info("MCP: Server stopped successfully\n"); + } + } + } + + if (use_lock) { + proxy_info("MCP: Releasing lock\n"); + GloMCPH->wrunlock(); + } + (*proxy_sqlite3_finalize)(statement1); + for (int i = 0; varnames[i]; i++) { + free(varnames[i]); + } + free(varnames); } diff --git a/lib/Admin_Handler.cpp b/lib/Admin_Handler.cpp index 0611d31918..0070d6ce90 100644 --- a/lib/Admin_Handler.cpp +++ b/lib/Admin_Handler.cpp @@ -42,6 +42,7 @@ using json = nlohmann::json; #include "ProxySQL_Statistics.hpp" #include "MySQL_Logger.hpp" #include "PgSQL_Logger.hpp" +#include "MCP_Thread.h" #include "GenAI_Thread.h" #include "SQLite3_Server.h" #include "Web_Interface.hpp" @@ -152,6 +153,7 @@ extern PgSQL_Logger* GloPgSQL_Logger; extern MySQL_STMT_Manager_v14 *GloMyStmt; extern MySQL_Monitor *GloMyMon; extern PgSQL_Threads_Handler* GloPTH; +extern MCP_Threads_Handler* GloMCPH; extern GenAI_Threads_Handler* GloGATH; extern void (*flush_logs_function)(); @@ -272,6 +274,17 @@ const std::vector SAVE_PGSQL_VARIABLES_TO_MEMORY = { "SAVE PGSQL VARIABLES FROM RUNTIME" , "SAVE PGSQL VARIABLES FROM RUN" }; +const std::vector LOAD_MCP_VARIABLES_FROM_MEMORY = { + "LOAD MCP VARIABLES FROM MEMORY" , + "LOAD MCP VARIABLES FROM MEM" , + "LOAD MCP VARIABLES TO RUNTIME" , + "LOAD MCP VARIABLES TO RUN" }; + +const std::vector SAVE_MCP_VARIABLES_TO_MEMORY = { + "SAVE MCP VARIABLES TO MEMORY" , + "SAVE MCP VARIABLES TO MEM" , + "SAVE MCP VARIABLES FROM RUNTIME" , + "SAVE MCP VARIABLES FROM RUN" }; // GenAI const std::vector LOAD_GENAI_VARIABLES_FROM_MEMORY = { "LOAD GENAI VARIABLES FROM MEMORY" , @@ -887,6 +900,8 @@ bool is_valid_global_variable(const char *var_name) { } else if (strlen(var_name) > 11 && !strncmp(var_name, "clickhouse-", 11) && GloClickHouseServer && GloClickHouseServer->has_variable(var_name + 11)) { return true; #endif /* PROXYSQLCLICKHOUSE */ + } else if (strlen(var_name) > 4 && !strncmp(var_name, "mcp-", 4) && GloMCPH && GloMCPH->has_variable(var_name + 4)) { + return true; } else { return false; } @@ -940,7 +955,15 @@ bool admin_handler_command_set(char *query_no_space, unsigned int query_no_space free(buff); run_query = false; } else { - const char *update_format = (char *)"UPDATE global_variables SET variable_value=%s WHERE variable_name='%s'"; + // Check if the value is a boolean literal that needs to be quoted as a string + // to prevent SQLite from interpreting it as a boolean keyword (storing 1 or 0) + bool is_boolean = (strcasecmp(var_value, "true") == 0 || strcasecmp(var_value, "false") == 0); + const char *update_format; + if (is_boolean) { + update_format = (char *)"UPDATE global_variables SET variable_value='%s' WHERE variable_name='%s'"; + } else { + update_format = (char *)"UPDATE global_variables SET variable_value=%s WHERE variable_name='%s'"; + } // Computed length is more than needed since it also counts the format modifiers (%s). size_t query_len = strlen(update_format) + strlen(var_name) + strlen(var_value) + 1; char *query = (char *)l_alloc(query_len); @@ -1782,6 +1805,66 @@ bool admin_handler_command_load_or_save(char *query_no_space, unsigned int query } } + // MCP (Model Context Protocol) VARIABLES - DISK commands + if ((query_no_space_length > 19) && ((!strncasecmp("SAVE MCP VARIABLES ", query_no_space, 19)) || (!strncasecmp("LOAD MCP VARIABLES ", query_no_space, 19)))) { + const std::string modname = "mcp_variables"; + tuple, vector>& t = load_save_disk_commands[modname]; + if (is_admin_command_or_alias(get<1>(t), query_no_space, query_no_space_length)) { + l_free(*ql, *q); + *q = l_strdup("INSERT OR REPLACE INTO main.global_variables SELECT * FROM disk.global_variables WHERE variable_name LIKE 'mcp-%'"); + *ql = strlen(*q) + 1; + return true; + } + if (is_admin_command_or_alias(get<2>(t), query_no_space, query_no_space_length)) { + l_free(*ql, *q); + *q = l_strdup("INSERT OR REPLACE INTO disk.global_variables SELECT * FROM main.global_variables WHERE variable_name LIKE 'mcp-%'"); + *ql = strlen(*q) + 1; + return true; + } + } + + // MCP (Model Context Protocol) LOAD/SAVE handlers + if (is_admin_command_or_alias(LOAD_MCP_VARIABLES_FROM_MEMORY, query_no_space, query_no_space_length)) { + ProxySQL_Admin* SPA = (ProxySQL_Admin*)pa; + SPA->load_mcp_variables_to_runtime(); + proxy_debug(PROXY_DEBUG_ADMIN, 4, "Loaded mcp variables to RUNTIME\n"); + SPA->send_ok_msg_to_client(sess, NULL, 0, query_no_space); + return false; + } + if (is_admin_command_or_alias(SAVE_MCP_VARIABLES_TO_MEMORY, query_no_space, query_no_space_length)) { + ProxySQL_Admin* SPA = (ProxySQL_Admin*)pa; + SPA->save_mcp_variables_from_runtime(); + proxy_debug(PROXY_DEBUG_ADMIN, 4, "Saved mcp variables from RUNTIME\n"); + SPA->send_ok_msg_to_client(sess, NULL, 0, query_no_space); + return false; + } + + if ((query_no_space_length == 31) && (!strncasecmp("LOAD MCP VARIABLES FROM CONFIG", query_no_space, query_no_space_length))) { + proxy_info("Received %s command\n", query_no_space); + if (GloVars.configfile_open) { + proxy_debug(PROXY_DEBUG_ADMIN, 4, "Loading from file %s\n", GloVars.config_file); + if (GloVars.confFile->OpenFile(NULL)==true) { + int rows=0; + ProxySQL_Admin *SPA=(ProxySQL_Admin *)pa; + rows=SPA->proxysql_config().Read_Global_Variables_from_configfile("mcp"); + proxy_debug(PROXY_DEBUG_ADMIN, 4, "Loaded mcp global variables from CONFIG\n"); + SPA->send_ok_msg_to_client(sess, NULL, rows, query_no_space); + GloVars.confFile->CloseFile(); + } else { + proxy_debug(PROXY_DEBUG_ADMIN, 4, "Unable to open or parse config file %s\n", GloVars.config_file); + char *s=(char *)"Unable to open or parse config file %s"; + char *m=(char *)malloc(strlen(s)+strlen(GloVars.config_file)+1); + sprintf(m,s,GloVars.config_file); + SPA->send_error_msg_to_client(sess, m); + free(m); + } + } else { + proxy_debug(PROXY_DEBUG_ADMIN, 4, "Unknown config file\n"); + SPA->send_error_msg_to_client(sess, (char *)"Config file unknown"); + } + return false; + } + if ((query_no_space_length > 14) && (!strncasecmp("LOAD COREDUMP ", query_no_space, 14))) { if ( is_admin_command_or_alias(LOAD_COREDUMP_FROM_MEMORY, query_no_space, query_no_space_length) ) { @@ -3672,6 +3755,23 @@ void admin_session_handler(S* sess, void *_pa, PtrSize_t *pkt) { SPA->admindb->execute_statement(q, &error, &cols, &affected_rows, &resultset); } + // MCP (Model Context Protocol) VARIABLES CHECKSUM + if (strlen(query_no_space)==strlen("CHECKSUM DISK MCP VARIABLES") && !strncasecmp("CHECKSUM DISK MCP VARIABLES", query_no_space, strlen(query_no_space))){ + char *q=(char *)"SELECT * FROM global_variables WHERE variable_name LIKE 'mcp-%' ORDER BY variable_name"; + tablename=(char *)"MCP VARIABLES"; + SPA->configdb->execute_statement(q, &error, &cols, &affected_rows, &resultset); + } + + if ((strlen(query_no_space)==strlen("CHECKSUM MEMORY MCP VARIABLES") && !strncasecmp("CHECKSUM MEMORY MCP VARIABLES", query_no_space, strlen(query_no_space))) + || + (strlen(query_no_space)==strlen("CHECKSUM MEM MCP VARIABLES") && !strncasecmp("CHECKSUM MEM MCP VARIABLES", query_no_space, strlen(query_no_space))) + || + (strlen(query_no_space)==strlen("CHECKSUM MCP VARIABLES") && !strncasecmp("CHECKSUM MCP VARIABLES", query_no_space, strlen(query_no_space)))){ + char *q=(char *)"SELECT * FROM global_variables WHERE variable_name LIKE 'mcp-%' ORDER BY variable_name"; + tablename=(char *)"MCP VARIABLES"; + SPA->admindb->execute_statement(q, &error, &cols, &affected_rows, &resultset); + } + if (error) { proxy_error("Error: %s\n", error); char buf[1024]; @@ -3960,6 +4060,13 @@ void admin_session_handler(S* sess, void *_pa, PtrSize_t *pkt) { goto __run_query; } + if (query_no_space_length == strlen("SHOW MCP VARIABLES") && !strncasecmp("SHOW MCP VARIABLES", query_no_space, query_no_space_length)) { + l_free(query_length, query); + query = l_strdup("SELECT variable_name AS Variable_name, variable_value AS Value FROM global_variables WHERE variable_name LIKE 'mcp-%' ORDER BY variable_name"); + query_length = strlen(query) + 1; + goto __run_query; + } + strA=(char *)"SHOW CREATE TABLE "; strB=(char *)"SELECT name AS 'table' , REPLACE(REPLACE(sql,' , ', X'2C0A20202020'),'CREATE TABLE %s (','CREATE TABLE %s ('||X'0A20202020') AS 'Create Table' FROM %s.sqlite_master WHERE type='table' AND name='%s'"; strAl=strlen(strA); diff --git a/lib/Admin_Tool_Handler.cpp b/lib/Admin_Tool_Handler.cpp new file mode 100644 index 0000000000..db8d582537 --- /dev/null +++ b/lib/Admin_Tool_Handler.cpp @@ -0,0 +1,155 @@ +#include "../deps/json/json.hpp" +using json = nlohmann::json; +#define PROXYJSON + +#include "Admin_Tool_Handler.h" +#include "MCP_Thread.h" +#include "proxysql_debug.h" + +Admin_Tool_Handler::Admin_Tool_Handler(MCP_Threads_Handler* handler) + : mcp_handler(handler) +{ + pthread_mutex_init(&handler_lock, NULL); + proxy_debug(PROXY_DEBUG_GENERIC, 3, "Admin_Tool_Handler created\n"); +} + +Admin_Tool_Handler::~Admin_Tool_Handler() { + close(); + pthread_mutex_destroy(&handler_lock); + proxy_debug(PROXY_DEBUG_GENERIC, 3, "Admin_Tool_Handler destroyed\n"); +} + +int Admin_Tool_Handler::init() { + proxy_info("Admin_Tool_Handler initialized\n"); + return 0; +} + +void Admin_Tool_Handler::close() { + proxy_debug(PROXY_DEBUG_GENERIC, 2, "Admin_Tool_Handler closed\n"); +} + +json Admin_Tool_Handler::get_tool_list() { + json tools = json::array(); + + // Stub tools for administrative operations + tools.push_back(create_tool_description( + "admin_list_users", + "List all MySQL users configured in ProxySQL", + { + {"type", "object"}, + {"properties", {}} + } + )); + + tools.push_back(create_tool_description( + "admin_show_processes", + "Show running MySQL processes", + { + {"type", "object"}, + {"properties", {}} + } + )); + + tools.push_back(create_tool_description( + "admin_kill_query", + "Kill a running query by process ID", + { + {"type", "object"}, + {"properties", { + {"process_id", { + {"type", "integer"}, + {"description", "Process ID to kill"} + }} + }}, + {"required", {"process_id"}} + } + )); + + tools.push_back(create_tool_description( + "admin_flush_cache", + "Flush ProxySQL query cache", + { + {"type", "object"}, + {"properties", { + {"cache_type", { + {"type", "string"}, + {"enum", {"query_cache", "host_cache", "all"}}, + {"description", "Type of cache to flush"} + }} + }}, + {"required", {"cache_type"}} + } + )); + + tools.push_back(create_tool_description( + "admin_reload", + "Reload ProxySQL configuration (users, servers, etc.)", + { + {"type", "object"}, + {"properties", { + {"target", { + {"type", "string"}, + {"enum", {"users", "servers", "all"}}, + {"description", "What to reload"} + }} + }}, + {"required", {"target"}} + } + )); + + json result; + result["tools"] = tools; + return result; +} + +json Admin_Tool_Handler::get_tool_description(const std::string& tool_name) { + json tools_list = get_tool_list(); + for (const auto& tool : tools_list["tools"]) { + if (tool["name"] == tool_name) { + return tool; + } + } + return create_error_response("Tool not found: " + tool_name); +} + +json Admin_Tool_Handler::execute_tool(const std::string& tool_name, const json& arguments) { + pthread_mutex_lock(&handler_lock); + + json result; + + // Stub implementation - returns placeholder responses + if (tool_name == "admin_list_users") { + result = create_success_response(json{ + {"message", "admin_list_users functionality to be implemented"}, + {"users", json::array()} + }); + } else if (tool_name == "admin_show_processes") { + result = create_success_response(json{ + {"message", "admin_show_processes functionality to be implemented"}, + {"processes", json::array()} + }); + } else if (tool_name == "admin_kill_query") { + int process_id = arguments.value("process_id", 0); + result = create_success_response(json{ + {"message", "admin_kill_query functionality to be implemented"}, + {"process_id", process_id} + }); + } else if (tool_name == "admin_flush_cache") { + std::string cache_type = arguments.value("cache_type", "all"); + result = create_success_response(json{ + {"message", "admin_flush_cache functionality to be implemented"}, + {"cache_type", cache_type} + }); + } else if (tool_name == "admin_reload") { + std::string target = arguments.value("target", "all"); + result = create_success_response(json{ + {"message", "admin_reload functionality to be implemented"}, + {"target", target} + }); + } else { + result = create_error_response("Unknown tool: " + tool_name); + } + + pthread_mutex_unlock(&handler_lock); + return result; +} diff --git a/lib/Cache_Tool_Handler.cpp b/lib/Cache_Tool_Handler.cpp new file mode 100644 index 0000000000..c809001b0d --- /dev/null +++ b/lib/Cache_Tool_Handler.cpp @@ -0,0 +1,177 @@ +#include "../deps/json/json.hpp" +using json = nlohmann::json; +#define PROXYJSON + +#include "Cache_Tool_Handler.h" +#include "MCP_Thread.h" +#include "proxysql_debug.h" + +Cache_Tool_Handler::Cache_Tool_Handler(MCP_Threads_Handler* handler) + : mcp_handler(handler) +{ + pthread_mutex_init(&handler_lock, NULL); + proxy_debug(PROXY_DEBUG_GENERIC, 3, "Cache_Tool_Handler created\n"); +} + +Cache_Tool_Handler::~Cache_Tool_Handler() { + close(); + pthread_mutex_destroy(&handler_lock); + proxy_debug(PROXY_DEBUG_GENERIC, 3, "Cache_Tool_Handler destroyed\n"); +} + +int Cache_Tool_Handler::init() { + proxy_info("Cache_Tool_Handler initialized\n"); + return 0; +} + +void Cache_Tool_Handler::close() { + proxy_debug(PROXY_DEBUG_GENERIC, 2, "Cache_Tool_Handler closed\n"); +} + +json Cache_Tool_Handler::get_tool_list() { + json tools = json::array(); + + // Stub tools for cache management + tools.push_back(create_tool_description( + "get_cache_stats", + "Get ProxySQL query cache statistics", + { + {"type", "object"}, + {"properties", {}} + } + )); + + tools.push_back(create_tool_description( + "invalidate_cache", + "Invalidate specific cache entries", + { + {"type", "object"}, + {"properties", { + {"pattern", { + {"type", "string"}, + {"description", "Pattern matching queries to invalidate"} + }} + }}, + {"required", {"pattern"}} + } + )); + + tools.push_back(create_tool_description( + "set_cache_ttl", + "Set time-to-live for cache entries", + { + {"type", "object"}, + {"properties", { + {"ttl_ms", { + {"type", "integer"}, + {"description", "TTL in milliseconds"} + }} + }}, + {"required", {"ttl_ms"}} + } + )); + + tools.push_back(create_tool_description( + "clear_cache", + "Clear all entries from the query cache", + { + {"type", "object"}, + {"properties", {}} + } + )); + + tools.push_back(create_tool_description( + "warm_cache", + "Warm up cache with specified queries", + { + {"type", "object"}, + {"properties", { + {"queries", { + {"type", "array"}, + {"description", "Array of SQL queries to execute"} + }} + }}, + {"required", {"queries"}} + } + )); + + tools.push_back(create_tool_description( + "get_cache_entries", + "List currently cached queries", + { + {"type", "object"}, + {"properties", { + {"limit", { + {"type", "integer"}, + {"description", "Maximum number of entries to return"} + }} + }} + } + )); + + json result; + result["tools"] = tools; + return result; +} + +json Cache_Tool_Handler::get_tool_description(const std::string& tool_name) { + json tools_list = get_tool_list(); + for (const auto& tool : tools_list["tools"]) { + if (tool["name"] == tool_name) { + return tool; + } + } + return create_error_response("Tool not found: " + tool_name); +} + +json Cache_Tool_Handler::execute_tool(const std::string& tool_name, const json& arguments) { + pthread_mutex_lock(&handler_lock); + + json result; + + // Stub implementation - returns placeholder responses + if (tool_name == "get_cache_stats") { + result = create_success_response(json{ + {"message", "get_cache_stats functionality to be implemented"}, + {"stats", { + {"entries", 0}, + {"hit_rate", 0.0}, + {"memory_usage", 0} + }} + }); + } else if (tool_name == "invalidate_cache") { + std::string pattern = arguments.value("pattern", ""); + result = create_success_response(json{ + {"message", "invalidate_cache functionality to be implemented"}, + {"pattern", pattern} + }); + } else if (tool_name == "set_cache_ttl") { + int ttl_ms = arguments.value("ttl_ms", 0); + result = create_success_response(json{ + {"message", "set_cache_ttl functionality to be implemented"}, + {"ttl_ms", ttl_ms} + }); + } else if (tool_name == "clear_cache") { + result = create_success_response(json{ + {"message", "clear_cache functionality to be implemented"} + }); + } else if (tool_name == "warm_cache") { + json queries = arguments.value("queries", json::array()); + result = create_success_response(json{ + {"message", "warm_cache functionality to be implemented"}, + {"query_count", queries.size()} + }); + } else if (tool_name == "get_cache_entries") { + int limit = arguments.value("limit", 100); + result = create_success_response(json{ + {"message", "get_cache_entries functionality to be implemented"}, + {"entries", json::array()}, + {"limit", limit} + }); + } else { + result = create_error_response("Unknown tool: " + tool_name); + } + + pthread_mutex_unlock(&handler_lock); + return result; +} diff --git a/lib/Config_Tool_Handler.cpp b/lib/Config_Tool_Handler.cpp new file mode 100644 index 0000000000..865ba13dff --- /dev/null +++ b/lib/Config_Tool_Handler.cpp @@ -0,0 +1,264 @@ +#include "../deps/json/json.hpp" +using json = nlohmann::json; +#define PROXYJSON + +#include "Config_Tool_Handler.h" +#include "MCP_Thread.h" +#include "proxysql_debug.h" +#include "proxysql_utils.h" + +#include + +Config_Tool_Handler::Config_Tool_Handler(MCP_Threads_Handler* handler) + : mcp_handler(handler) +{ + pthread_mutex_init(&handler_lock, NULL); + proxy_debug(PROXY_DEBUG_GENERIC, 3, "Config_Tool_Handler created\n"); +} + +Config_Tool_Handler::~Config_Tool_Handler() { + close(); + pthread_mutex_destroy(&handler_lock); + proxy_debug(PROXY_DEBUG_GENERIC, 3, "Config_Tool_Handler destroyed\n"); +} + +int Config_Tool_Handler::init() { + proxy_info("Config_Tool_Handler initialized\n"); + return 0; +} + +void Config_Tool_Handler::close() { + proxy_debug(PROXY_DEBUG_GENERIC, 2, "Config_Tool_Handler closed\n"); +} + +json Config_Tool_Handler::get_tool_list() { + json tools = json::array(); + + // get_config + tools.push_back(create_tool_description( + "get_config", + "Get the current value of a ProxySQL MCP configuration variable", + { + {"type", "object"}, + {"properties", { + {"variable_name", { + {"type", "string"}, + {"description", "Variable name (without 'mcp-' prefix)"} + }} + }}, + {"required", {"variable_name"}} + } + )); + + // set_config + tools.push_back(create_tool_description( + "set_config", + "Set the value of a ProxySQL MCP configuration variable", + { + {"type", "object"}, + {"properties", { + {"variable_name", { + {"type", "string"}, + {"description", "Variable name (without 'mcp-' prefix)"} + }}, + {"value", { + {"type", "string"}, + {"description", "New value for the variable"} + }} + }}, + {"required", {"variable_name", "value"}} + } + )); + + // reload_config + tools.push_back(create_tool_description( + "reload_config", + "Reload ProxySQL MCP configuration from disk/memory to runtime", + { + {"type", "object"}, + {"properties", { + {"scope", { + {"type", "string"}, + {"enum", {"disk", "memory", "runtime"}}, + {"description", "Reload scope: 'disk' (from disk to memory), 'memory' (not applicable), 'runtime' (from memory to runtime)"} + }} + }}, + {"required", {"scope"}} + } + )); + + // list_variables + tools.push_back(create_tool_description( + "list_variables", + "List all ProxySQL MCP configuration variables", + { + {"type", "object"}, + {"properties", { + {"filter", { + {"type", "string"}, + {"description", "Optional filter pattern (e.g., 'mysql_%' for MySQL-related variables)"} + }} + }} + } + )); + + // get_status + tools.push_back(create_tool_description( + "get_status", + "Get ProxySQL MCP server status information", + { + {"type", "object"}, + {"properties", {}} + } + )); + + json result; + result["tools"] = tools; + return result; +} + +json Config_Tool_Handler::get_tool_description(const std::string& tool_name) { + // For now, just return the basic description from the list + // In a full implementation, this would provide more detailed schema info + json tools_list = get_tool_list(); + for (const auto& tool : tools_list["tools"]) { + if (tool["name"] == tool_name) { + return tool; + } + } + return create_error_response("Tool not found: " + tool_name); +} + +json Config_Tool_Handler::execute_tool(const std::string& tool_name, const json& arguments) { + pthread_mutex_lock(&handler_lock); + + json result; + + try { + if (tool_name == "get_config") { + std::string var_name = arguments.value("variable_name", ""); + result = handle_get_config(var_name); + } else if (tool_name == "set_config") { + std::string var_name = arguments.value("variable_name", ""); + std::string var_value = arguments.value("value", ""); + result = handle_set_config(var_name, var_value); + } else if (tool_name == "reload_config") { + std::string scope = arguments.value("scope", "runtime"); + result = handle_reload_config(scope); + } else if (tool_name == "list_variables") { + std::string filter = arguments.value("filter", ""); + result = handle_list_variables(filter); + } else if (tool_name == "get_status") { + result = handle_get_status(); + } else { + result = create_error_response("Unknown tool: " + tool_name); + } + } catch (const std::exception& e) { + result = create_error_response(std::string("Exception: ") + e.what()); + } + + pthread_mutex_unlock(&handler_lock); + return result; +} + +json Config_Tool_Handler::handle_get_config(const std::string& var_name) { + if (!mcp_handler) { + return create_error_response("MCP handler not initialized"); + } + + char val[1024]; + if (mcp_handler->get_variable(var_name.c_str(), val) == 0) { + json result; + result["variable_name"] = var_name; + result["value"] = val; + return create_success_response(result); + } else { + return create_error_response("Variable not found: " + var_name); + } +} + +json Config_Tool_Handler::handle_set_config(const std::string& var_name, const std::string& var_value) { + if (!mcp_handler) { + return create_error_response("MCP handler not initialized"); + } + + if (mcp_handler->set_variable(var_name.c_str(), var_value.c_str()) == 0) { + json result; + result["variable_name"] = var_name; + result["value"] = var_value; + result["message"] = "Variable set successfully. Use 'reload_config' to load to runtime."; + return create_success_response(result); + } else { + return create_error_response("Failed to set variable: " + var_name); + } +} + +json Config_Tool_Handler::handle_reload_config(const std::string& scope) { + if (!mcp_handler) { + return create_error_response("MCP handler not initialized"); + } + + // This is a stub - actual implementation would call Admin_FlushVariables + // For now, return success with a message + json result; + result["scope"] = scope; + result["message"] = "Configuration reload functionality to be implemented"; + return create_success_response(result); +} + +json Config_Tool_Handler::handle_list_variables(const std::string& filter) { + if (!mcp_handler) { + return create_error_response("MCP handler not initialized"); + } + + char** vars = mcp_handler->get_variables_list(); + if (!vars) { + return create_error_response("Failed to get variables list"); + } + + json variables = json::array(); + + // Filter and list variables + for (int i = 0; vars[i] != NULL; i++) { + std::string var_name = vars[i]; + + // Apply filter if provided + if (!filter.empty()) { + // Simple pattern matching (expand to full SQL LIKE pattern later) + if (var_name.find(filter) == std::string::npos) { + continue; + } + } + + char val[1024]; + if (mcp_handler->get_variable(var_name.c_str(), val) == 0) { + json var; + var["name"] = var_name; + var["value"] = val; + variables.push_back(var); + } + + free(vars[i]); + } + free(vars); + + json result; + result["variables"] = variables; + result["count"] = variables.size(); + return create_success_response(result); +} + +json Config_Tool_Handler::handle_get_status() { + if (!mcp_handler) { + return create_error_response("MCP handler not initialized"); + } + + json status; + status["enabled"] = mcp_handler->variables.mcp_enabled; + status["port"] = mcp_handler->variables.mcp_port; + status["total_requests"] = mcp_handler->status_variables.total_requests; + status["failed_requests"] = mcp_handler->status_variables.failed_requests; + status["active_connections"] = mcp_handler->status_variables.active_connections; + + return create_success_response(status); +} diff --git a/lib/MCP_Endpoint.cpp b/lib/MCP_Endpoint.cpp new file mode 100644 index 0000000000..f5484a94a9 --- /dev/null +++ b/lib/MCP_Endpoint.cpp @@ -0,0 +1,345 @@ +#include "../deps/json/json.hpp" +using json = nlohmann::json; +#define PROXYJSON + +#include "MCP_Endpoint.h" +#include "MCP_Thread.h" +#include "MySQL_Tool_Handler.h" +#include "MCP_Tool_Handler.h" +#include "proxysql_debug.h" +#include "cpp.h" + +using namespace httpserver; + +MCP_JSONRPC_Resource::MCP_JSONRPC_Resource(MCP_Threads_Handler* h, MCP_Tool_Handler* th, const std::string& name) + : handler(h), tool_handler(th), endpoint_name(name) +{ + proxy_debug(PROXY_DEBUG_GENERIC, 3, "Created MCP JSON-RPC resource for endpoint '%s'\n", name.c_str()); +} + +MCP_JSONRPC_Resource::~MCP_JSONRPC_Resource() { + proxy_debug(PROXY_DEBUG_GENERIC, 3, "Destroyed MCP JSON-RPC resource for endpoint '%s'\n", endpoint_name.c_str()); +} + +bool MCP_JSONRPC_Resource::authenticate_request(const httpserver::http_request& req) { + if (!handler) { + proxy_error("MCP authentication on %s: handler is NULL\n", endpoint_name.c_str()); + return false; + } + + // Get the expected auth token for this endpoint + char* expected_token = nullptr; + + if (endpoint_name == "config") { + expected_token = handler->variables.mcp_config_endpoint_auth; + } else if (endpoint_name == "observe") { + expected_token = handler->variables.mcp_observe_endpoint_auth; + } else if (endpoint_name == "query") { + expected_token = handler->variables.mcp_query_endpoint_auth; + } else if (endpoint_name == "admin") { + expected_token = handler->variables.mcp_admin_endpoint_auth; + } else if (endpoint_name == "cache") { + expected_token = handler->variables.mcp_cache_endpoint_auth; + } else { + proxy_error("MCP authentication on %s: unknown endpoint\n", endpoint_name.c_str()); + return false; + } + + // If no auth token is configured, allow the request (no authentication required) + if (!expected_token || strlen(expected_token) == 0) { + proxy_debug(PROXY_DEBUG_GENERIC, 4, "MCP authentication on %s: no auth configured, allowing request\n", endpoint_name.c_str()); + return true; + } + + // Try to get Bearer token from Authorization header + std::string auth_header = req.get_header("Authorization"); + + if (auth_header.empty()) { + // Try getting from query parameter as fallback + const std::map& args = req.get_args(); + auto it = args.find("token"); + if (it != args.end()) { + auth_header = "Bearer " + it->second; + } + } + + if (auth_header.empty()) { + proxy_debug(PROXY_DEBUG_GENERIC, 4, "MCP authentication on %s: no Authorization header or token param\n", endpoint_name.c_str()); + return false; + } + + // Check if it's a Bearer token + const std::string bearer_prefix = "Bearer "; + if (auth_header.length() <= bearer_prefix.length() || + auth_header.compare(0, bearer_prefix.length(), bearer_prefix) != 0) { + proxy_debug(PROXY_DEBUG_GENERIC, 4, "MCP authentication on %s: invalid Authorization header format\n", endpoint_name.c_str()); + return false; + } + + // Extract the token + std::string provided_token = auth_header.substr(bearer_prefix.length()); + + // Trim whitespace + size_t start = provided_token.find_first_not_of(" \t\n\r"); + size_t end = provided_token.find_last_not_of(" \t\n\r"); + if (start != std::string::npos && end != std::string::npos) { + provided_token = provided_token.substr(start, end - start + 1); + } + + // Compare tokens + bool authenticated = (provided_token == expected_token); + + if (authenticated) { + proxy_debug(PROXY_DEBUG_GENERIC, 4, "MCP authentication on %s: success\n", endpoint_name.c_str()); + } else { + proxy_debug(PROXY_DEBUG_GENERIC, 4, "MCP authentication on %s: failed (token mismatch)\n", endpoint_name.c_str()); + } + + return authenticated; +} + +std::string MCP_JSONRPC_Resource::create_jsonrpc_response( + const std::string& result, + const std::string& id +) { + json j; + j["jsonrpc"] = "2.0"; + j["result"] = json::parse(result); + j["id"] = id; + return j.dump(); +} + +std::string MCP_JSONRPC_Resource::create_jsonrpc_error( + int code, + const std::string& message, + const std::string& id +) { + json j; + j["jsonrpc"] = "2.0"; + json error; + error["code"] = code; + error["message"] = message; + j["error"] = error; + j["id"] = id; + return j.dump(); +} + +std::shared_ptr MCP_JSONRPC_Resource::handle_jsonrpc_request( + const httpserver::http_request& req +) { + // Update statistics + if (handler) { + handler->status_variables.total_requests++; + } + + // Get request body + std::string req_body = req.get_content(); + std::string req_path = req.get_path(); + + proxy_debug(PROXY_DEBUG_GENERIC, 2, "MCP request on %s: %s\n", req_path.c_str(), req_body.c_str()); + + // Validate JSON + json req_json; + try { + req_json = json::parse(req_body); + } catch (json::parse_error& e) { + proxy_error("MCP request on %s: Invalid JSON - %s\n", req_path.c_str(), e.what()); + if (handler) { + handler->status_variables.failed_requests++; + } + auto response = std::shared_ptr(new string_response( + create_jsonrpc_error(-32700, "Parse error", ""), + http::http_utils::http_bad_request + )); + response->with_header("Content-Type", "application/json"); + return response; + } + + // Validate JSON-RPC 2.0 basic structure + if (!req_json.contains("jsonrpc") || req_json["jsonrpc"] != "2.0") { + proxy_error("MCP request on %s: Missing or invalid jsonrpc version\n", req_path.c_str()); + if (handler) { + handler->status_variables.failed_requests++; + } + auto response = std::shared_ptr(new string_response( + create_jsonrpc_error(-32600, "Invalid Request", ""), + http::http_utils::http_bad_request + )); + response->with_header("Content-Type", "application/json"); + return response; + } + + if (!req_json.contains("method")) { + proxy_error("MCP request on %s: Missing method field\n", req_path.c_str()); + if (handler) { + handler->status_variables.failed_requests++; + } + auto response = std::shared_ptr(new string_response( + create_jsonrpc_error(-32600, "Invalid Request", ""), + http::http_utils::http_bad_request + )); + response->with_header("Content-Type", "application/json"); + return response; + } + + // Get request ID (optional but recommended) + std::string req_id = ""; + if (req_json.contains("id")) { + if (req_json["id"].is_string()) { + req_id = req_json["id"].get(); + } else if (req_json["id"].is_number()) { + req_id = std::to_string(req_json["id"].get()); + } + } + + // Get method name + std::string method = req_json["method"].get(); + proxy_debug(PROXY_DEBUG_GENERIC, 2, "MCP method '%s' requested on endpoint '%s'\n", method.c_str(), endpoint_name.c_str()); + + // Handle different methods + json result; + + if (method == "tools/call" || method == "tools/list" || method == "tools/describe") { + // Route tool-related methods to the endpoint's tool handler + if (!tool_handler) { + proxy_error("MCP request on %s: Tool Handler not initialized\n", req_path.c_str()); + if (handler) { + handler->status_variables.failed_requests++; + } + auto response = std::shared_ptr(new string_response( + create_jsonrpc_error(-32000, "Tool Handler not initialized for endpoint: " + endpoint_name, req_id), + http::http_utils::http_internal_server_error + )); + response->with_header("Content-Type", "application/json"); + return response; + } + + // Route to appropriate tool handler method + if (method == "tools/list") { + result = handle_tools_list(); + } else if (method == "tools/describe") { + result = handle_tools_describe(req_json); + } else if (method == "tools/call") { + result = handle_tools_call(req_json); + } + } else if (method == "initialize" || method == "ping") { + // Handle MCP protocol methods + if (method == "initialize") { + result["protocolVersion"] = "2024-11-05"; + result["capabilities"] = json::object(); + result["serverInfo"] = { + {"name", "proxysql-mcp-mysql-tools"}, + {"version", MCP_THREAD_VERSION} + }; + } else if (method == "ping") { + result["status"] = "ok"; + } + } else { + // Unknown method + proxy_info("MCP: Unknown method '%s' on endpoint '%s'\n", method.c_str(), endpoint_name.c_str()); + auto response = std::shared_ptr(new string_response( + create_jsonrpc_error(-32601, "Method not found", req_id), + http::http_utils::http_not_found + )); + response->with_header("Content-Type", "application/json"); + return response; + } + + auto response = std::shared_ptr(new string_response( + create_jsonrpc_response(result.dump(), req_id), + http::http_utils::http_ok + )); + response->with_header("Content-Type", "application/json"); + return response; +} + +const std::shared_ptr MCP_JSONRPC_Resource::render_POST( + const httpserver::http_request& req +) { + std::string req_path = req.get_path(); + proxy_debug(PROXY_DEBUG_GENERIC, 2, "Received MCP POST request on %s\n", req_path.c_str()); + + // Check Content-Type header + std::string content_type = req.get_header(http::http_utils::http_header_content_type); + if (content_type.empty() || + (content_type.find("application/json") == std::string::npos && + content_type.find("text/json") == std::string::npos)) { + proxy_error("MCP request on %s: Invalid Content-Type '%s'\n", req_path.c_str(), content_type.c_str()); + if (handler) { + handler->status_variables.failed_requests++; + } + auto response = std::shared_ptr(new string_response( + create_jsonrpc_error(-32600, "Invalid Request: Content-Type must be application/json", ""), + http::http_utils::http_unsupported_media_type + )); + response->with_header("Content-Type", "application/json"); + return response; + } + + // Authenticate request + if (!authenticate_request(req)) { + proxy_error("MCP request on %s: Authentication failed\n", req_path.c_str()); + if (handler) { + handler->status_variables.failed_requests++; + } + auto response = std::shared_ptr(new string_response( + create_jsonrpc_error(-32001, "Unauthorized", ""), + http::http_utils::http_unauthorized + )); + response->with_header("Content-Type", "application/json"); + return response; + } + + // Handle the JSON-RPC request + return handle_jsonrpc_request(req); +} + +// Helper method to handle tools/list +json MCP_JSONRPC_Resource::handle_tools_list() { + if (!tool_handler) { + json result; + result["error"] = "Tool handler not initialized"; + return result; + } + return tool_handler->get_tool_list(); +} + +// Helper method to handle tools/describe +json MCP_JSONRPC_Resource::handle_tools_describe(const json& req_json) { + if (!tool_handler) { + json result; + result["error"] = "Tool handler not initialized"; + return result; + } + + if (!req_json.contains("params") || !req_json["params"].contains("name")) { + json result; + result["error"] = "Missing tool name"; + return result; + } + + std::string tool_name = req_json["params"]["name"].get(); + return tool_handler->get_tool_description(tool_name); +} + +// Helper method to handle tools/call +json MCP_JSONRPC_Resource::handle_tools_call(const json& req_json) { + if (!tool_handler) { + json result; + result["error"] = "Tool handler not initialized"; + return result; + } + + if (!req_json.contains("params") || !req_json["params"].contains("name")) { + json result; + result["error"] = "Missing tool name"; + return result; + } + + std::string tool_name = req_json["params"]["name"].get(); + json arguments = req_json["params"].contains("arguments") ? req_json["params"]["arguments"] : json::object(); + + proxy_debug(PROXY_DEBUG_GENERIC, 2, "MCP tool call: %s with args: %s\n", tool_name.c_str(), arguments.dump().c_str()); + + return tool_handler->execute_tool(tool_name, arguments); +} diff --git a/lib/MCP_Thread.cpp b/lib/MCP_Thread.cpp new file mode 100644 index 0000000000..9d8a578608 --- /dev/null +++ b/lib/MCP_Thread.cpp @@ -0,0 +1,364 @@ +#include "MCP_Thread.h" +#include "MySQL_Tool_Handler.h" +#include "Config_Tool_Handler.h" +#include "Query_Tool_Handler.h" +#include "Admin_Tool_Handler.h" +#include "Cache_Tool_Handler.h" +#include "Observe_Tool_Handler.h" +#include "proxysql_debug.h" +#include "ProxySQL_MCP_Server.hpp" + +#include +#include +#include +#include + +// Define the array of variable names for the MCP module +static const char* mcp_thread_variables_names[] = { + "enabled", + "port", + "config_endpoint_auth", + "observe_endpoint_auth", + "query_endpoint_auth", + "admin_endpoint_auth", + "cache_endpoint_auth", + "timeout_ms", + // MySQL Tool Handler configuration + "mysql_hosts", + "mysql_ports", + "mysql_user", + "mysql_password", + "mysql_schema", + "catalog_path", + NULL +}; + +MCP_Threads_Handler::MCP_Threads_Handler() { + shutdown_ = 0; + + // Initialize the rwlock + pthread_rwlock_init(&rwlock, NULL); + + // Initialize variables with default values + variables.mcp_enabled = false; + variables.mcp_port = 6071; + variables.mcp_config_endpoint_auth = strdup(""); + variables.mcp_observe_endpoint_auth = strdup(""); + variables.mcp_query_endpoint_auth = strdup(""); + variables.mcp_admin_endpoint_auth = strdup(""); + variables.mcp_cache_endpoint_auth = strdup(""); + variables.mcp_timeout_ms = 30000; + // MySQL Tool Handler default values + variables.mcp_mysql_hosts = strdup("127.0.0.1"); + variables.mcp_mysql_ports = strdup("3306"); + variables.mcp_mysql_user = strdup(""); + variables.mcp_mysql_password = strdup(""); + variables.mcp_mysql_schema = strdup(""); + variables.mcp_catalog_path = strdup("mcp_catalog.db"); + + status_variables.total_requests = 0; + status_variables.failed_requests = 0; + status_variables.active_connections = 0; + + mcp_server = NULL; + mysql_tool_handler = NULL; + + // Initialize new tool handlers + config_tool_handler = NULL; + query_tool_handler = NULL; + admin_tool_handler = NULL; + cache_tool_handler = NULL; + observe_tool_handler = NULL; +} + +MCP_Threads_Handler::~MCP_Threads_Handler() { + if (variables.mcp_config_endpoint_auth) + free(variables.mcp_config_endpoint_auth); + if (variables.mcp_observe_endpoint_auth) + free(variables.mcp_observe_endpoint_auth); + if (variables.mcp_query_endpoint_auth) + free(variables.mcp_query_endpoint_auth); + if (variables.mcp_admin_endpoint_auth) + free(variables.mcp_admin_endpoint_auth); + if (variables.mcp_cache_endpoint_auth) + free(variables.mcp_cache_endpoint_auth); + // Free MySQL Tool Handler variables + if (variables.mcp_mysql_hosts) + free(variables.mcp_mysql_hosts); + if (variables.mcp_mysql_ports) + free(variables.mcp_mysql_ports); + if (variables.mcp_mysql_user) + free(variables.mcp_mysql_user); + if (variables.mcp_mysql_password) + free(variables.mcp_mysql_password); + if (variables.mcp_mysql_schema) + free(variables.mcp_mysql_schema); + if (variables.mcp_catalog_path) + free(variables.mcp_catalog_path); + + if (mcp_server) { + delete mcp_server; + mcp_server = NULL; + } + + if (mysql_tool_handler) { + delete mysql_tool_handler; + mysql_tool_handler = NULL; + } + + // Clean up new tool handlers + if (config_tool_handler) { + delete config_tool_handler; + config_tool_handler = NULL; + } + if (query_tool_handler) { + delete query_tool_handler; + query_tool_handler = NULL; + } + if (admin_tool_handler) { + delete admin_tool_handler; + admin_tool_handler = NULL; + } + if (cache_tool_handler) { + delete cache_tool_handler; + cache_tool_handler = NULL; + } + if (observe_tool_handler) { + delete observe_tool_handler; + observe_tool_handler = NULL; + } + + // Destroy the rwlock + pthread_rwlock_destroy(&rwlock); +} + +void MCP_Threads_Handler::init() { + proxy_info("Initializing MCP Threads Handler\n"); + // For now, this is a simple initialization + // The HTTPS server will be started when mcp_enabled is set to true + // and will be managed through ProxySQL_Admin + print_version(); +} + +void MCP_Threads_Handler::shutdown() { + proxy_info("Shutting down MCP Threads Handler\n"); + shutdown_ = 1; + + // Stop the HTTPS server if it's running + if (mcp_server) { + delete mcp_server; + mcp_server = NULL; + } +} + +void MCP_Threads_Handler::wrlock() { + pthread_rwlock_wrlock(&rwlock); +} + +void MCP_Threads_Handler::wrunlock() { + pthread_rwlock_unlock(&rwlock); +} + +int MCP_Threads_Handler::get_variable(const char* name, char* val) { + if (!name || !val) + return -1; + + if (!strcmp(name, "enabled")) { + sprintf(val, "%s", variables.mcp_enabled ? "true" : "false"); + return 0; + } + if (!strcmp(name, "port")) { + sprintf(val, "%d", variables.mcp_port); + return 0; + } + if (!strcmp(name, "config_endpoint_auth")) { + sprintf(val, "%s", variables.mcp_config_endpoint_auth ? variables.mcp_config_endpoint_auth : ""); + return 0; + } + if (!strcmp(name, "observe_endpoint_auth")) { + sprintf(val, "%s", variables.mcp_observe_endpoint_auth ? variables.mcp_observe_endpoint_auth : ""); + return 0; + } + if (!strcmp(name, "query_endpoint_auth")) { + sprintf(val, "%s", variables.mcp_query_endpoint_auth ? variables.mcp_query_endpoint_auth : ""); + return 0; + } + if (!strcmp(name, "admin_endpoint_auth")) { + sprintf(val, "%s", variables.mcp_admin_endpoint_auth ? variables.mcp_admin_endpoint_auth : ""); + return 0; + } + if (!strcmp(name, "cache_endpoint_auth")) { + sprintf(val, "%s", variables.mcp_cache_endpoint_auth ? variables.mcp_cache_endpoint_auth : ""); + return 0; + } + if (!strcmp(name, "timeout_ms")) { + sprintf(val, "%d", variables.mcp_timeout_ms); + return 0; + } + // MySQL Tool Handler configuration + if (!strcmp(name, "mysql_hosts")) { + sprintf(val, "%s", variables.mcp_mysql_hosts ? variables.mcp_mysql_hosts : ""); + return 0; + } + if (!strcmp(name, "mysql_ports")) { + sprintf(val, "%s", variables.mcp_mysql_ports ? variables.mcp_mysql_ports : ""); + return 0; + } + if (!strcmp(name, "mysql_user")) { + sprintf(val, "%s", variables.mcp_mysql_user ? variables.mcp_mysql_user : ""); + return 0; + } + if (!strcmp(name, "mysql_password")) { + sprintf(val, "%s", variables.mcp_mysql_password ? variables.mcp_mysql_password : ""); + return 0; + } + if (!strcmp(name, "mysql_schema")) { + sprintf(val, "%s", variables.mcp_mysql_schema ? variables.mcp_mysql_schema : ""); + return 0; + } + if (!strcmp(name, "catalog_path")) { + sprintf(val, "%s", variables.mcp_catalog_path ? variables.mcp_catalog_path : ""); + return 0; + } + + return -1; +} + +int MCP_Threads_Handler::set_variable(const char* name, const char* value) { + if (!name || !value) + return -1; + + if (!strcmp(name, "enabled")) { + if (strcasecmp(value, "true") == 0 || strcasecmp(value, "1") == 0) { + variables.mcp_enabled = true; + return 0; + } + if (strcasecmp(value, "false") == 0 || strcasecmp(value, "0") == 0) { + variables.mcp_enabled = false; + return 0; + } + return -1; + } + if (!strcmp(name, "port")) { + int port = atoi(value); + if (port > 0 && port < 65536) { + variables.mcp_port = port; + return 0; + } + return -1; + } + if (!strcmp(name, "config_endpoint_auth")) { + if (variables.mcp_config_endpoint_auth) + free(variables.mcp_config_endpoint_auth); + variables.mcp_config_endpoint_auth = strdup(value); + return 0; + } + if (!strcmp(name, "observe_endpoint_auth")) { + if (variables.mcp_observe_endpoint_auth) + free(variables.mcp_observe_endpoint_auth); + variables.mcp_observe_endpoint_auth = strdup(value); + return 0; + } + if (!strcmp(name, "query_endpoint_auth")) { + if (variables.mcp_query_endpoint_auth) + free(variables.mcp_query_endpoint_auth); + variables.mcp_query_endpoint_auth = strdup(value); + return 0; + } + if (!strcmp(name, "admin_endpoint_auth")) { + if (variables.mcp_admin_endpoint_auth) + free(variables.mcp_admin_endpoint_auth); + variables.mcp_admin_endpoint_auth = strdup(value); + return 0; + } + if (!strcmp(name, "cache_endpoint_auth")) { + if (variables.mcp_cache_endpoint_auth) + free(variables.mcp_cache_endpoint_auth); + variables.mcp_cache_endpoint_auth = strdup(value); + return 0; + } + if (!strcmp(name, "timeout_ms")) { + int timeout = atoi(value); + if (timeout >= 0) { + variables.mcp_timeout_ms = timeout; + return 0; + } + return -1; + } + // MySQL Tool Handler configuration + if (!strcmp(name, "mysql_hosts")) { + if (variables.mcp_mysql_hosts) + free(variables.mcp_mysql_hosts); + variables.mcp_mysql_hosts = strdup(value); + return 0; + } + if (!strcmp(name, "mysql_ports")) { + if (variables.mcp_mysql_ports) + free(variables.mcp_mysql_ports); + variables.mcp_mysql_ports = strdup(value); + return 0; + } + if (!strcmp(name, "mysql_user")) { + if (variables.mcp_mysql_user) + free(variables.mcp_mysql_user); + variables.mcp_mysql_user = strdup(value); + return 0; + } + if (!strcmp(name, "mysql_password")) { + if (variables.mcp_mysql_password) + free(variables.mcp_mysql_password); + variables.mcp_mysql_password = strdup(value); + return 0; + } + if (!strcmp(name, "mysql_schema")) { + if (variables.mcp_mysql_schema) + free(variables.mcp_mysql_schema); + variables.mcp_mysql_schema = strdup(value); + return 0; + } + if (!strcmp(name, "catalog_path")) { + if (variables.mcp_catalog_path) + free(variables.mcp_catalog_path); + variables.mcp_catalog_path = strdup(value); + return 0; + } + + return -1; +} + +bool MCP_Threads_Handler::has_variable(const char* name) { + if (!name) + return false; + + for (int i = 0; mcp_thread_variables_names[i]; i++) { + if (!strcmp(name, mcp_thread_variables_names[i])) { + return true; + } + } + return false; +} + +char** MCP_Threads_Handler::get_variables_list() { + // Count variables + int count = 0; + while (mcp_thread_variables_names[count]) { + count++; + } + + // Allocate array + char** list = (char**)malloc(sizeof(char*) * (count + 1)); + if (!list) + return NULL; + + // Fill array + for (int i = 0; i < count; i++) { + list[i] = strdup(mcp_thread_variables_names[i]); + } + list[count] = NULL; + + return list; +} + +void MCP_Threads_Handler::print_version() { + fprintf(stderr, "MCP Threads Handler rev. %s -- %s -- %s\n", MCP_THREAD_VERSION, __FILE__, __TIMESTAMP__); +} diff --git a/lib/Makefile b/lib/Makefile index 10c7a25ef8..231036b57f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -80,7 +80,11 @@ _OBJ_CXX := ProxySQL_GloVars.oo network.oo debug.oo configfile.oo Query_Cache.oo MySQL_Set_Stmt_Parser.oo PgSQL_Set_Stmt_Parser.oo \ PgSQL_Variables_Validator.oo PgSQL_ExplicitTxnStateMgr.oo \ PgSQL_PreparedStatement.oo PgSQL_Extended_Query_Message.oo \ - pgsql_tokenizer.oo + pgsql_tokenizer.oo \ + MCP_Thread.oo ProxySQL_MCP_Server.oo MCP_Endpoint.oo \ + MySQL_Catalog.oo MySQL_Tool_Handler.oo \ + Config_Tool_Handler.oo Query_Tool_Handler.oo \ + Admin_Tool_Handler.oo Cache_Tool_Handler.oo Observe_Tool_Handler.oo OBJ_CXX := $(patsubst %,$(ODIR)/%,$(_OBJ_CXX)) HEADERS := ../include/*.h ../include/*.hpp diff --git a/lib/MySQL_Catalog.cpp b/lib/MySQL_Catalog.cpp new file mode 100644 index 0000000000..86f085c607 --- /dev/null +++ b/lib/MySQL_Catalog.cpp @@ -0,0 +1,356 @@ +#include "MySQL_Catalog.h" +#include "cpp.h" +#include "proxysql.h" +#include +#include + +MySQL_Catalog::MySQL_Catalog(const std::string& path) + : db(NULL), db_path(path) +{ +} + +MySQL_Catalog::~MySQL_Catalog() { + close(); +} + +int MySQL_Catalog::init() { + // Initialize database connection + db = new SQLite3DB(); + char path_buf[db_path.size() + 1]; + strcpy(path_buf, db_path.c_str()); + int rc = db->open(path_buf, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE); + if (rc != SQLITE_OK) { + proxy_error("Failed to open catalog database at %s: %d\n", db_path.c_str(), rc); + return -1; + } + + // Initialize schema + return init_schema(); +} + +void MySQL_Catalog::close() { + if (db) { + delete db; + db = NULL; + } +} + +int MySQL_Catalog::init_schema() { + // Enable foreign keys + db->execute("PRAGMA foreign_keys = ON"); + + // Create tables + int rc = create_tables(); + if (rc) { + proxy_error("Failed to create catalog tables\n"); + return -1; + } + + proxy_info("MySQL Catalog database initialized at %s\n", db_path.c_str()); + return 0; +} + +int MySQL_Catalog::create_tables() { + // Main catalog table + const char* create_catalog_table = + "CREATE TABLE IF NOT EXISTS catalog (" + " id INTEGER PRIMARY KEY AUTOINCREMENT," + " kind TEXT NOT NULL," // table, view, domain, metric, note + " key TEXT NOT NULL," // e.g., "db.sales.orders" + " document TEXT NOT NULL," // JSON content + " tags TEXT," // comma-separated tags + " links TEXT," // comma-separated related keys + " created_at INTEGER DEFAULT (strftime('%s', 'now'))," + " updated_at INTEGER DEFAULT (strftime('%s', 'now'))," + " UNIQUE(kind, key)" + ");"; + + if (!db->execute(create_catalog_table)) { + proxy_error("Failed to create catalog table\n"); + return -1; + } + + // Indexes for search + db->execute("CREATE INDEX IF NOT EXISTS idx_catalog_kind ON catalog(kind)"); + db->execute("CREATE INDEX IF NOT EXISTS idx_catalog_tags ON catalog(tags)"); + db->execute("CREATE INDEX IF NOT EXISTS idx_catalog_created ON catalog(created_at)"); + + // Full-text search table for better search (optional enhancement) + db->execute("CREATE VIRTUAL TABLE IF NOT EXISTS catalog_fts USING fts5(" + " kind, key, document, tags, content='catalog', content_rowid='id'" + ");"); + + // Triggers to keep FTS in sync + db->execute("DROP TRIGGER IF EXISTS catalog_ai"); + db->execute("DROP TRIGGER IF EXISTS catalog_ad"); + + db->execute("CREATE TRIGGER IF NOT EXISTS catalog_ai AFTER INSERT ON catalog BEGIN" + " INSERT INTO catalog_fts(rowid, kind, key, document, tags)" + " VALUES (new.id, new.kind, new.key, new.document, new.tags);" + "END;"); + + db->execute("CREATE TRIGGER IF NOT EXISTS catalog_ad AFTER DELETE ON catalog BEGIN" + " INSERT INTO catalog_fts(catalog_fts, rowid, kind, key, document, tags)" + " VALUES ('delete', old.id, old.kind, old.key, old.document, old.tags);" + "END;"); + + // Merge operations log + const char* create_merge_log = + "CREATE TABLE IF NOT EXISTS merge_log (" + " id INTEGER PRIMARY KEY AUTOINCREMENT," + " target_key TEXT NOT NULL," + " source_keys TEXT NOT NULL," // JSON array + " instructions TEXT," + " created_at INTEGER DEFAULT (strftime('%s', 'now'))" + ");"; + + db->execute(create_merge_log); + + return 0; +} + +int MySQL_Catalog::upsert( + const std::string& kind, + const std::string& key, + const std::string& document, + const std::string& tags, + const std::string& links +) { + sqlite3_stmt* stmt = NULL; + + const char* upsert_sql = + "INSERT INTO catalog(kind, key, document, tags, links, updated_at) " + "VALUES(?1, ?2, ?3, ?4, ?5, strftime('%s', 'now')) " + "ON CONFLICT(kind, key) DO UPDATE SET " + " document = ?3," + " tags = ?4," + " links = ?5," + " updated_at = strftime('%s', 'now')"; + + int rc = db->prepare_v2(upsert_sql, &stmt); + if (rc != SQLITE_OK) { + proxy_error("Failed to prepare catalog upsert: %d\n", rc); + return -1; + } + + (*proxy_sqlite3_bind_text)(stmt, 1, kind.c_str(), -1, SQLITE_TRANSIENT); + (*proxy_sqlite3_bind_text)(stmt, 2, key.c_str(), -1, SQLITE_TRANSIENT); + (*proxy_sqlite3_bind_text)(stmt, 3, document.c_str(), -1, SQLITE_TRANSIENT); + (*proxy_sqlite3_bind_text)(stmt, 4, tags.c_str(), -1, SQLITE_TRANSIENT); + (*proxy_sqlite3_bind_text)(stmt, 5, links.c_str(), -1, SQLITE_TRANSIENT); + + SAFE_SQLITE3_STEP2(stmt); + (*proxy_sqlite3_finalize)(stmt); + + proxy_debug(PROXY_DEBUG_GENERIC, 3, "Catalog upsert: kind=%s, key=%s\n", kind.c_str(), key.c_str()); + return 0; +} + +int MySQL_Catalog::get( + const std::string& kind, + const std::string& key, + std::string& document +) { + sqlite3_stmt* stmt = NULL; + + const char* get_sql = + "SELECT document FROM catalog " + "WHERE kind = ?1 AND key = ?2"; + + int rc = db->prepare_v2(get_sql, &stmt); + if (rc != SQLITE_OK) { + proxy_error("Failed to prepare catalog get: %d\n", rc); + return -1; + } + + (*proxy_sqlite3_bind_text)(stmt, 1, kind.c_str(), -1, SQLITE_TRANSIENT); + (*proxy_sqlite3_bind_text)(stmt, 2, key.c_str(), -1, SQLITE_TRANSIENT); + + rc = (*proxy_sqlite3_step)(stmt); + + if (rc == SQLITE_ROW) { + const char* doc = (const char*)(*proxy_sqlite3_column_text)(stmt, 0); + if (doc) { + document = doc; + } + (*proxy_sqlite3_finalize)(stmt); + return 0; + } + + (*proxy_sqlite3_finalize)(stmt); + return -1; +} + +std::string MySQL_Catalog::search( + const std::string& query, + const std::string& kind, + const std::string& tags, + int limit, + int offset +) { + std::ostringstream sql; + sql << "SELECT kind, key, document, tags, links FROM catalog WHERE 1=1"; + + // Add kind filter + if (!kind.empty()) { + sql << " AND kind = '" << kind << "'"; + } + + // Add tags filter + if (!tags.empty()) { + sql << " AND tags LIKE '%" << tags << "%'"; + } + + // Add search query + if (!query.empty()) { + sql << " AND (key LIKE '%" << query << "%' " + << "OR document LIKE '%" << query << "%' " + << "OR tags LIKE '%" << query << "%')"; + } + + sql << " ORDER BY updated_at DESC LIMIT " << limit << " OFFSET " << offset; + + char* error = NULL; + int cols = 0, affected = 0; + SQLite3_result* resultset = NULL; + + db->execute_statement(sql.str().c_str(), &error, &cols, &affected, &resultset); + if (error) { + proxy_error("Catalog search error: %s\n", error); + return "[]"; + } + + // Build JSON result + std::ostringstream json; + json << "["; + bool first = true; + + if (resultset) { + for (std::vector::iterator it = resultset->rows.begin(); + it != resultset->rows.end(); ++it) { + SQLite3_row* row = *it; + if (!first) json << ","; + first = false; + + json << "{" + << "\"kind\":\"" << (row->fields[0] ? row->fields[0] : "") << "\"," + << "\"key\":\"" << (row->fields[1] ? row->fields[1] : "") << "\"," + << "\"document\":" << (row->fields[2] ? row->fields[2] : "null") << "," + << "\"tags\":\"" << (row->fields[3] ? row->fields[3] : "") << "\"," + << "\"links\":\"" << (row->fields[4] ? row->fields[4] : "") << "\"" + << "}"; + } + delete resultset; + } + + json << "]"; + return json.str(); +} + +std::string MySQL_Catalog::list( + const std::string& kind, + int limit, + int offset +) { + std::ostringstream sql; + sql << "SELECT kind, key, document, tags, links FROM catalog"; + + if (!kind.empty()) { + sql << " WHERE kind = '" << kind << "'"; + } + + sql << " ORDER BY kind, key ASC LIMIT " << limit << " OFFSET " << offset; + + // Get total count + std::ostringstream count_sql; + count_sql << "SELECT COUNT(*) FROM catalog"; + if (!kind.empty()) { + count_sql << " WHERE kind = '" << kind << "'"; + } + + char* error = NULL; + int cols = 0, affected = 0; + SQLite3_result* resultset = NULL; + int total = 0; + + SQLite3_result* count_result = db->execute_statement(count_sql.str().c_str(), &error, &cols, &affected); + if (count_result && !count_result->rows.empty()) { + total = atoi(count_result->rows[0]->fields[0]); + } + delete count_result; + + resultset = NULL; + db->execute_statement(sql.str().c_str(), &error, &cols, &affected, &resultset); + + // Build JSON result with total count + std::ostringstream json; + json << "{\"total\":" << total << ",\"results\":["; + + bool first = true; + if (resultset) { + for (std::vector::iterator it = resultset->rows.begin(); + it != resultset->rows.end(); ++it) { + SQLite3_row* row = *it; + if (!first) json << ","; + first = false; + + json << "{" + << "\"kind\":\"" << (row->fields[0] ? row->fields[0] : "") << "\"," + << "\"key\":\"" << (row->fields[1] ? row->fields[1] : "") << "\"," + << "\"document\":" << (row->fields[2] ? row->fields[2] : "null") << "," + << "\"tags\":\"" << (row->fields[3] ? row->fields[3] : "") << "\"," + << "\"links\":\"" << (row->fields[4] ? row->fields[4] : "") << "\"" + << "}"; + } + delete resultset; + } + + json << "]}"; + return json.str(); +} + +int MySQL_Catalog::merge( + const std::vector& keys, + const std::string& target_key, + const std::string& kind, + const std::string& instructions +) { + // Fetch all source entries + std::string source_docs = ""; + for (const auto& key : keys) { + std::string doc; + // Try different kinds for flexible merging + if (get("table", key, doc) == 0 || get("view", key, doc) == 0) { + source_docs += doc + "\n\n"; + } + } + + // Create merged document + std::string merged_doc = "{"; + merged_doc += "\"source_keys\":["; + + for (size_t i = 0; i < keys.size(); i++) { + if (i > 0) merged_doc += ","; + merged_doc += "\"" + keys[i] + "\""; + } + merged_doc += "],"; + merged_doc += "\"instructions\":" + std::string(instructions.empty() ? "\"\"" : "\"" + instructions + "\""); + merged_doc += "}"; + + return upsert(kind, target_key, merged_doc, "", ""); +} + +int MySQL_Catalog::remove( + const std::string& kind, + const std::string& key +) { + std::ostringstream sql; + sql << "DELETE FROM catalog WHERE kind = '" << kind << "' AND key = '" << key << "'"; + + if (!db->execute(sql.str().c_str())) { + proxy_error("Catalog remove error\n"); + return -1; + } + + return 0; +} diff --git a/lib/MySQL_Tool_Handler.cpp b/lib/MySQL_Tool_Handler.cpp new file mode 100644 index 0000000000..b7132b09da --- /dev/null +++ b/lib/MySQL_Tool_Handler.cpp @@ -0,0 +1,984 @@ +#include "MySQL_Tool_Handler.h" +#include "proxysql_debug.h" +#include "cpp.h" +#include +#include +#include +#include + +// MySQL client library +#include + +// JSON library +#include "../deps/json/json.hpp" +using json = nlohmann::json; +#define PROXYJSON + +MySQL_Tool_Handler::MySQL_Tool_Handler( + const std::string& hosts, + const std::string& ports, + const std::string& user, + const std::string& password, + const std::string& schema, + const std::string& catalog_path +) + : catalog(NULL), + max_rows(200), + timeout_ms(2000), + allow_select_star(false), + pool_size(0) +{ + // Initialize the pool mutex + pthread_mutex_init(&pool_lock, NULL); + + // Parse hosts + std::istringstream h(hosts); + std::string host; + while (std::getline(h, host, ',')) { + // Trim whitespace + host.erase(0, host.find_first_not_of(" \t")); + host.erase(host.find_last_not_of(" \t") + 1); + if (!host.empty()) { + mysql_hosts.push_back(host); + } + } + + // Parse ports + std::istringstream p(ports); + std::string port; + while (std::getline(p, port, ',')) { + port.erase(0, port.find_first_not_of(" \t")); + port.erase(port.find_last_not_of(" \t") + 1); + if (!port.empty()) { + mysql_ports.push_back(atoi(port.c_str())); + } + } + + // Ensure ports array matches hosts array size + while (mysql_ports.size() < mysql_hosts.size()) { + mysql_ports.push_back(3306); // Default MySQL port + } + + mysql_user = user; + mysql_password = password; + mysql_schema = schema; + + // Create catalog + catalog = new MySQL_Catalog(catalog_path); +} + +MySQL_Tool_Handler::~MySQL_Tool_Handler() { + close(); + if (catalog) { + delete catalog; + } + // Destroy the pool mutex + pthread_mutex_destroy(&pool_lock); +} + +int MySQL_Tool_Handler::init() { + // Initialize catalog + if (catalog->init()) { + return -1; + } + + // Initialize connection pool + if (init_connection_pool()) { + return -1; + } + + proxy_info("MySQL Tool Handler initialized for schema '%s'\n", mysql_schema.c_str()); + return 0; +} + +/** + * @brief Close all MySQL connections and cleanup resources + * + * Thread-safe method that closes all connections in the pool, + * clears the connection vector, and resets the pool size. + */ +void MySQL_Tool_Handler::close() { + // Close all connections in the pool + pthread_mutex_lock(&pool_lock); + for (auto& conn : connection_pool) { + if (conn.mysql) { + mysql_close(conn.mysql); + conn.mysql = NULL; + } + } + connection_pool.clear(); + pool_size = 0; + pthread_mutex_unlock(&pool_lock); +} + +/** + * @brief Initialize the MySQL connection pool + * + * Creates one MySQL connection per configured host:port pair. + * Uses mysql_init() and mysql_real_connect() to establish connections. + * Sets 5-second timeouts for connect, read, and write operations. + * Thread-safe: acquires pool_lock during initialization. + * + * @return 0 on success, -1 on error (logs specific error via proxy_error) + */ +int MySQL_Tool_Handler::init_connection_pool() { + // Create one connection per host/port pair + size_t num_connections = std::min(mysql_hosts.size(), mysql_ports.size()); + + if (num_connections == 0) { + proxy_error("MySQL_Tool_Handler: No hosts configured\n"); + return -1; + } + + pthread_mutex_lock(&pool_lock); + + for (size_t i = 0; i < num_connections; i++) { + MySQLConnection conn; + conn.host = mysql_hosts[i]; + conn.port = mysql_ports[i]; + conn.in_use = false; + + // Initialize MySQL connection + conn.mysql = mysql_init(NULL); + if (!conn.mysql) { + proxy_error("MySQL_Tool_Handler: mysql_init failed for %s:%d\n", + conn.host.c_str(), conn.port); + pthread_mutex_unlock(&pool_lock); + return -1; + } + + // Set connection timeout + unsigned int timeout = 5; + mysql_options(conn.mysql, MYSQL_OPT_CONNECT_TIMEOUT, &timeout); + mysql_options(conn.mysql, MYSQL_OPT_READ_TIMEOUT, &timeout); + mysql_options(conn.mysql, MYSQL_OPT_WRITE_TIMEOUT, &timeout); + + // Connect to MySQL server + if (!mysql_real_connect( + conn.mysql, + conn.host.c_str(), + mysql_user.c_str(), + mysql_password.c_str(), + mysql_schema.empty() ? NULL : mysql_schema.c_str(), + conn.port, + NULL, + CLIENT_MULTI_STATEMENTS + )) { + proxy_error("MySQL_Tool_Handler: mysql_real_connect failed for %s:%d: %s\n", + conn.host.c_str(), conn.port, mysql_error(conn.mysql)); + mysql_close(conn.mysql); + pthread_mutex_unlock(&pool_lock); + return -1; + } + + connection_pool.push_back(conn); + pool_size++; + + proxy_info("MySQL_Tool_Handler: Connected to %s:%d\n", + conn.host.c_str(), conn.port); + } + + pthread_mutex_unlock(&pool_lock); + + proxy_info("MySQL_Tool_Handler: Connection pool initialized with %d connection(s)\n", pool_size); + return 0; +} + +/** + * @brief Get an available connection from the pool + * + * Thread-safe method that searches for a connection not currently in use. + * Marks the connection as in_use before returning. + * + * @return Pointer to MYSQL connection, or NULL if no available connection + * (logs error via proxy_error if pool exhausted) + */ +MYSQL* MySQL_Tool_Handler::get_connection() { + MYSQL* conn = NULL; + + pthread_mutex_lock(&pool_lock); + + // Find an available connection + for (auto& c : connection_pool) { + if (!c.in_use) { + c.in_use = true; + conn = c.mysql; + break; + } + } + + pthread_mutex_unlock(&pool_lock); + + if (!conn) { + proxy_error("MySQL_Tool_Handler: No available connection in pool\n"); + } + + return conn; +} + +/** + * @brief Return a connection to the pool for reuse + * + * Thread-safe method that marks a previously obtained connection + * as available for other operations. Does not close the connection. + * + * @param mysql The MYSQL connection to return to the pool + */ +void MySQL_Tool_Handler::return_connection(MYSQL* mysql) { + pthread_mutex_lock(&pool_lock); + + // Find the connection and mark as available + for (auto& c : connection_pool) { + if (c.mysql == mysql) { + c.in_use = false; + break; + } + } + + pthread_mutex_unlock(&pool_lock); +} + +/** + * @brief Execute a SQL query and return results as JSON + * + * Thread-safe method that: + * 1. Gets a connection from the pool + * 2. Executes the query via mysql_query() + * 3. Fetches results via mysql_store_result() + * 4. Converts rows/columns to JSON format + * 5. Returns the connection to the pool + * + * @param query SQL query to execute + * @return JSON string with format: + * - Success: {"success":true, "columns":[...], "rows":[...], "row_count":N} + * - Failure: {"success":false, "error":"...", "sql_error":code} + */ +std::string MySQL_Tool_Handler::execute_query(const std::string& query) { + fprintf(stderr, "DEBUG execute_query: Starting, query=%s\n", query.c_str()); + + json result; + result["success"] = false; + + MYSQL* mysql = get_connection(); + fprintf(stderr, "DEBUG execute_query: Got connection\n"); + + if (!mysql) { + result["error"] = "No available database connection"; + return result.dump(); + } + + // Execute query + fprintf(stderr, "DEBUG execute_query: About to call mysql_query\n"); + if (mysql_query(mysql, query.c_str()) != 0) { + fprintf(stderr, "DEBUG execute_query: mysql_query failed\n"); + result["error"] = mysql_error(mysql); + result["sql_error"] = mysql_errno(mysql); + return_connection(mysql); + return result.dump(); + } + fprintf(stderr, "DEBUG execute_query: mysql_query succeeded\n"); + + // Store result + MYSQL_RES* res = mysql_store_result(mysql); + fprintf(stderr, "DEBUG execute_query: Got result set\n"); + + if (!res) { + // No result set (e.g., INSERT, UPDATE, etc.) + result["success"] = true; + result["rows_affected"] = (int)mysql_affected_rows(mysql); + return_connection(mysql); + return result.dump(); + } + + // Get column names (convert to lowercase for consistency) + json columns = json::array(); + std::vector lowercase_columns; + MYSQL_FIELD* field; + fprintf(stderr, "DEBUG execute_query: About to fetch fields\n"); + int field_count = 0; + while ((field = mysql_fetch_field(res))) { + field_count++; + fprintf(stderr, "DEBUG execute_query: Processing field %d, name=%p\n", field_count, (void*)field->name); + // Check if field name is null (can happen in edge cases) + // Use placeholder name to maintain column index alignment + std::string col_name = field->name ? field->name : "unknown_field"; + // Convert to lowercase + std::transform(col_name.begin(), col_name.end(), col_name.begin(), ::tolower); + columns.push_back(col_name); + lowercase_columns.push_back(col_name); + } + fprintf(stderr, "DEBUG execute_query: Processed %d fields\n", field_count); + + // Get rows + json rows = json::array(); + MYSQL_ROW row; + unsigned int num_fields = mysql_num_fields(res); + while ((row = mysql_fetch_row(res))) { + json json_row = json::object(); + for (unsigned int i = 0; i < num_fields; i++) { + // Use empty string for NULL values instead of nullptr + // to avoid std::string construction from null issues + json_row[lowercase_columns[i]] = row[i] ? row[i] : ""; + } + rows.push_back(json_row); + } + + mysql_free_result(res); + return_connection(mysql); + + result["success"] = true; + result["columns"] = columns; + result["rows"] = rows; + result["row_count"] = (int)rows.size(); + + return result.dump(); +} + +std::string MySQL_Tool_Handler::sanitize_query(const std::string& query) { + // Basic SQL injection prevention + std::string sanitized = query; + + // Remove comments + std::regex comment_regex("--[^\\n]*\\n|/\\*.*?\\*/"); + sanitized = std::regex_replace(sanitized, comment_regex, " "); + + // Trim + sanitized.erase(0, sanitized.find_first_not_of(" \t\n\r")); + sanitized.erase(sanitized.find_last_not_of(" \t\n\r") + 1); + + return sanitized; +} + +bool MySQL_Tool_Handler::is_dangerous_query(const std::string& query) { + std::string upper = query; + std::transform(upper.begin(), upper.end(), upper.begin(), ::toupper); + fprintf(stderr, "DEBUG is_dangerous_query: Checking query '%s'\n", upper.c_str()); + + // List of dangerous keywords + static const char* dangerous[] = { + "DROP", "DELETE", "INSERT", "UPDATE", "TRUNCATE", + "ALTER", "CREATE", "GRANT", "REVOKE", "EXECUTE", + "SCRIPT", "INTO OUTFILE", "LOAD_FILE", "LOAD DATA", + "SLEEP", "BENCHMARK", "WAITFOR", "DELAY" + }; + + for (const char* word : dangerous) { + if (upper.find(word) != std::string::npos) { + fprintf(stderr, "DEBUG is_dangerous_query: Found dangerous keyword '%s'\n", word); + proxy_debug(PROXY_DEBUG_GENERIC, 3, "Dangerous keyword found: %s\n", word); + return true; + } + } + + fprintf(stderr, "DEBUG is_dangerous_query: No dangerous keywords found\n"); + return false; +} + +bool MySQL_Tool_Handler::validate_readonly_query(const std::string& query) { + std::string upper = query; + std::transform(upper.begin(), upper.end(), upper.begin(), ::toupper); + + // Must start with SELECT + if (upper.substr(0, 6) != "SELECT") { + return false; + } + + // Check for dangerous keywords + if (is_dangerous_query(query)) { + return false; + } + + // Check for SELECT * without LIMIT + if (!allow_select_star) { + std::regex select_star_regex("\\bSELECT\\s+\\*\\s+FROM", std::regex_constants::icase); + if (std::regex_search(upper, select_star_regex)) { + // Allow if there's a LIMIT clause + if (upper.find("LIMIT ") == std::string::npos) { + proxy_debug(PROXY_DEBUG_GENERIC, 3, "SELECT * without LIMIT rejected\n"); + return false; + } + } + } + + return true; +} + +std::string MySQL_Tool_Handler::list_schemas(const std::string& page_token, int page_size) { + // Build query to list schemas + std::string query = + "SELECT schema_name, " + " (SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = s.schema_name) as table_count " + "FROM information_schema.schemata s " + "WHERE schema_name NOT IN ('information_schema', 'performance_schema', 'mysql', 'sys') " + "ORDER BY schema_name " + "LIMIT " + std::to_string(page_size); + + // Execute the query + std::string response = execute_query(query); + + // Parse the response and format it for the tool + json result; + try { + json query_result = json::parse(response); + if (query_result["success"] == true) { + result = json::array(); + for (const auto& row : query_result["rows"]) { + json schema_entry; + schema_entry["name"] = row["schema_name"]; + schema_entry["table_count"] = row["table_count"]; + result.push_back(schema_entry); + } + } else { + result["error"] = query_result["error"]; + } + } catch (const std::exception& e) { + result["error"] = std::string("Failed to parse query result: ") + e.what(); + } + + return result.dump(); +} + +std::string MySQL_Tool_Handler::list_tables( + const std::string& schema, + const std::string& page_token, + int page_size, + const std::string& name_filter +) { + fprintf(stderr, "DEBUG: list_tables called with schema='%s', page_token='%s', page_size=%d, name_filter='%s'\n", + schema.c_str(), page_token.c_str(), page_size, name_filter.c_str()); + fprintf(stderr, "DEBUG: mysql_schema='%s'\n", mysql_schema.c_str()); + + // Build query to list tables with metadata + std::string sql = + "SELECT " + " t.table_name, " + " t.table_type, " + " COALESCE(t.table_rows, 0) as row_count, " + " COALESCE(t.data_length, 0) + COALESCE(t.index_length, 0) as total_size, " + " t.create_time, " + " t.update_time " + "FROM information_schema.tables t " + "WHERE t.table_schema = '" + (schema.empty() ? mysql_schema : schema) + "' "; + + fprintf(stderr, "DEBUG: Built WHERE clause\n"); + + if (!name_filter.empty()) { + sql += " AND t.table_name LIKE '%" + name_filter + "%'"; + } + + fprintf(stderr, "DEBUG: Built name_filter clause\n"); + + sql += " ORDER BY t.table_name LIMIT " + std::to_string(page_size); + + fprintf(stderr, "DEBUG: Built SQL query: %s\n", sql.c_str()); + + proxy_debug(PROXY_DEBUG_GENERIC, 3, "list_tables query: %s\n", sql.c_str()); + + fprintf(stderr, "DEBUG: About to call execute_query\n"); + + // Execute the query + std::string response = execute_query(sql); + + fprintf(stderr, "DEBUG: execute_query returned, response length=%zu\n", response.length()); + + // Debug: print raw response + proxy_debug(PROXY_DEBUG_GENERIC, 3, "list_tables raw response: %s\n", response.c_str()); + fprintf(stderr, "DEBUG: list_tables raw response: %s\n", response.c_str()); + + // Parse and format the response + json result; + try { + fprintf(stderr, "DEBUG list_tables: About to parse response\n"); + json query_result = json::parse(response); + fprintf(stderr, "DEBUG list_tables: Parsed response successfully\n"); + if (query_result["success"] == true) { + fprintf(stderr, "DEBUG list_tables: Query successful, processing rows\n"); + result = json::array(); + for (const auto& row : query_result["rows"]) { + fprintf(stderr, "DEBUG list_tables: Processing row\n"); + json table_entry; + fprintf(stderr, "DEBUG list_tables: About to access table_name\n"); + table_entry["name"] = row["table_name"]; + fprintf(stderr, "DEBUG list_tables: About to access table_type\n"); + table_entry["type"] = row["table_type"]; + fprintf(stderr, "DEBUG list_tables: About to access row_count\n"); + table_entry["row_count"] = row["row_count"]; + fprintf(stderr, "DEBUG list_tables: About to access total_size\n"); + table_entry["total_size"] = row["total_size"]; + fprintf(stderr, "DEBUG list_tables: About to access create_time\n"); + table_entry["create_time"] = row["create_time"]; + fprintf(stderr, "DEBUG list_tables: About to access update_time (may be null)\n"); + table_entry["update_time"] = row["update_time"]; + fprintf(stderr, "DEBUG list_tables: All fields accessed, pushing entry\n"); + result.push_back(table_entry); + } + } else { + fprintf(stderr, "DEBUG list_tables: Query failed, extracting error\n"); + result["error"] = query_result["error"]; + } + } catch (const std::exception& e) { + fprintf(stderr, "DEBUG list_tables: Exception caught: %s\n", e.what()); + result["error"] = std::string("Failed to parse query result: ") + e.what(); + } + + return result.dump(); +} + +std::string MySQL_Tool_Handler::describe_table(const std::string& schema, const std::string& table) { + json result; + result["schema"] = schema; + result["table"] = table; + + // Query to get columns + std::string columns_query = + "SELECT " + " column_name, " + " data_type, " + " column_type, " + " is_nullable, " + " column_default, " + " column_comment, " + " character_set_name, " + " collation_name " + "FROM information_schema.columns " + "WHERE table_schema = '" + (schema.empty() ? mysql_schema : schema) + "' " + "AND table_name = '" + table + "' " + "ORDER BY ordinal_position"; + + std::string columns_response = execute_query(columns_query); + json columns_result = json::parse(columns_response); + + result["columns"] = json::array(); + if (columns_result["success"] == true) { + for (const auto& row : columns_result["rows"]) { + json col; + col["name"] = row["column_name"]; + col["data_type"] = row["data_type"]; + col["column_type"] = row["column_type"]; + col["nullable"] = (row["is_nullable"] == "YES"); + col["default"] = row["column_default"]; + col["comment"] = row["column_comment"]; + col["charset"] = row["character_set_name"]; + col["collation"] = row["collation_name"]; + result["columns"].push_back(col); + } + } + + // Query to get primary key + std::string pk_query = + "SELECT k.column_name " + "FROM information_schema.table_constraints t " + "JOIN information_schema.key_column_usage k " + " ON t.constraint_name = k.constraint_name " + " AND t.table_schema = k.table_schema " + "WHERE t.table_schema = '" + (schema.empty() ? mysql_schema : schema) + "' " + "AND t.table_name = '" + table + "' " + "AND t.constraint_type = 'PRIMARY KEY' " + "ORDER BY k.ordinal_position"; + + std::string pk_response = execute_query(pk_query); + json pk_result = json::parse(pk_response); + + result["primary_key"] = json::array(); + if (pk_result["success"] == true) { + for (const auto& row : pk_result["rows"]) { + result["primary_key"].push_back(row["column_name"]); + } + } + + // Query to get indexes + std::string indexes_query = + "SELECT " + " index_name, " + " column_name, " + " seq_in_index, " + " index_type, " + " non_unique, " + " nullable " + "FROM information_schema.statistics " + "WHERE table_schema = '" + (schema.empty() ? mysql_schema : schema) + "' " + "AND table_name = '" + table + "' " + "ORDER BY index_name, seq_in_index"; + + std::string indexes_response = execute_query(indexes_query); + json indexes_result = json::parse(indexes_response); + + result["indexes"] = json::array(); + if (indexes_result["success"] == true) { + for (const auto& row : indexes_result["rows"]) { + json idx; + idx["name"] = row["index_name"]; + idx["column"] = row["column_name"]; + idx["seq_in_index"] = row["seq_in_index"]; + idx["type"] = row["index_type"]; + idx["unique"] = (row["non_unique"] == "0"); + idx["nullable"] = (row["nullable"] == "YES"); + result["indexes"].push_back(idx); + } + } + + result["constraints"] = json::array(); // Placeholder for constraints + + return result.dump(); +} + +std::string MySQL_Tool_Handler::get_constraints(const std::string& schema, const std::string& table) { + // Get foreign keys, unique constraints, check constraints + json result = json::array(); + return result.dump(); +} + +std::string MySQL_Tool_Handler::describe_view(const std::string& schema, const std::string& view) { + // Get view definition and columns + json result; + result["schema"] = schema; + result["view"] = view; + result["definition"] = ""; + result["columns"] = json::array(); + return result.dump(); +} + +std::string MySQL_Tool_Handler::table_profile( + const std::string& schema, + const std::string& table, + const std::string& mode +) { + // Get table profile including: + // - Estimated row count and size + // - Time columns detected + // - ID columns detected + // - Column null percentages + // - Top N distinct values for low-cardinality columns + // - Min/max for numeric/date columns + + json result; + result["schema"] = schema; + result["table"] = table; + result["row_estimate"] = 0; + result["size_estimate"] = 0; + result["time_columns"] = json::array(); + result["id_columns"] = json::array(); + result["column_stats"] = json::object(); + + return result.dump(); +} + +std::string MySQL_Tool_Handler::column_profile( + const std::string& schema, + const std::string& table, + const std::string& column, + int max_top_values +) { + // Get column profile: + // - Null count and percentage + // - Distinct count (approximate) + // - Top N values (capped) + // - Min/max for numeric/date types + + json result; + result["schema"] = schema; + result["table"] = table; + result["column"] = column; + result["null_count"] = 0; + result["distinct_count"] = 0; + result["top_values"] = json::array(); + result["min_value"] = nullptr; + result["max_value"] = nullptr; + + return result.dump(); +} + +std::string MySQL_Tool_Handler::sample_rows( + const std::string& schema, + const std::string& table, + const std::string& columns, + const std::string& where, + const std::string& order_by, + int limit +) { + // Build and execute sampling query with hard cap + int actual_limit = std::min(limit, 20); // Hard cap at 20 rows + + std::string sql = "SELECT "; + sql += columns.empty() ? "*" : columns; + sql += " FROM " + (schema.empty() ? mysql_schema : schema) + "." + table; + + if (!where.empty()) { + sql += " WHERE " + where; + } + + if (!order_by.empty()) { + sql += " ORDER BY " + order_by; + } + + sql += " LIMIT " + std::to_string(actual_limit); + + proxy_debug(PROXY_DEBUG_GENERIC, 3, "sample_rows query: %s\n", sql.c_str()); + + // Execute the query + std::string response = execute_query(sql); + + // Parse and return the results + json result; + try { + json query_result = json::parse(response); + if (query_result["success"] == true) { + result = query_result["rows"]; + } else { + result["error"] = query_result["error"]; + } + } catch (const std::exception& e) { + result["error"] = std::string("Failed to parse query result: ") + e.what(); + } + + return result.dump(); +} + +std::string MySQL_Tool_Handler::sample_distinct( + const std::string& schema, + const std::string& table, + const std::string& column, + const std::string& where, + int limit +) { + // Build query to sample distinct values + int actual_limit = std::min(limit, 50); + + std::string sql = "SELECT DISTINCT " + column + " as value, COUNT(*) as count "; + sql += " FROM " + (schema.empty() ? mysql_schema : schema) + "." + table; + + if (!where.empty()) { + sql += " WHERE " + where; + } + + sql += " GROUP BY " + column + " ORDER BY count DESC LIMIT " + std::to_string(actual_limit); + + proxy_debug(PROXY_DEBUG_GENERIC, 3, "sample_distinct query: %s\n", sql.c_str()); + + // Execute the query + std::string response = execute_query(sql); + + // Parse and return the results + json result; + try { + json query_result = json::parse(response); + if (query_result["success"] == true) { + result = query_result["rows"]; + } else { + result["error"] = query_result["error"]; + } + } catch (const std::exception& e) { + result["error"] = std::string("Failed to parse query result: ") + e.what(); + } + + return result.dump(); +} + +std::string MySQL_Tool_Handler::run_sql_readonly( + const std::string& sql, + int max_rows, + int timeout_sec +) { + json result; + result["success"] = false; + + // Validate query is read-only + if (!validate_readonly_query(sql)) { + result["error"] = "Query validation failed: not SELECT-only or contains dangerous keywords"; + return result.dump(); + } + + // Add LIMIT if not present and not an aggregate query + std::string query = sql; + std::string upper = sql; + std::transform(upper.begin(), upper.end(), upper.begin(), ::toupper); + + bool has_limit = upper.find("LIMIT ") != std::string::npos; + bool is_aggregate = upper.find("GROUP BY") != std::string::npos || + upper.find("COUNT(") != std::string::npos || + upper.find("SUM(") != std::string::npos || + upper.find("AVG(") != std::string::npos; + + if (!has_limit && !is_aggregate && !allow_select_star) { + query += " LIMIT " + std::to_string(std::min(max_rows, 200)); + } + + // Execute the query + std::string response = execute_query(query); + + // Parse and return the results + try { + json query_result = json::parse(response); + if (query_result["success"] == true) { + result["success"] = true; + result["rows"] = query_result["rows"]; + result["row_count"] = query_result["row_count"]; + result["columns"] = query_result["columns"]; + } else { + result["error"] = query_result["error"]; + if (query_result.contains("sql_error")) { + result["sql_error"] = query_result["sql_error"]; + } + } + } catch (const std::exception& e) { + result["error"] = std::string("Failed to parse query result: ") + e.what(); + } + + return result.dump(); +} + +std::string MySQL_Tool_Handler::explain_sql(const std::string& sql) { + // Run EXPLAIN on the query + std::string query = "EXPLAIN " + sql; + + // Execute the query + std::string response = execute_query(query); + + // Parse and return the results + json result; + try { + json query_result = json::parse(response); + if (query_result["success"] == true) { + result = query_result["rows"]; + } else { + result["error"] = query_result["error"]; + } + } catch (const std::exception& e) { + result["error"] = std::string("Failed to parse query result: ") + e.what(); + } + + return result.dump(); +} + +std::string MySQL_Tool_Handler::suggest_joins( + const std::string& schema, + const std::string& table_a, + const std::string& table_b, + int max_candidates +) { + // Heuristic-based join suggestion: + // 1. Check for matching column names (id, user_id, etc.) + // 2. Check for matching data types + // 3. Check index presence on potential join columns + + json result = json::array(); + return result.dump(); +} + +std::string MySQL_Tool_Handler::find_reference_candidates( + const std::string& schema, + const std::string& table, + const std::string& column, + int max_tables +) { + // Find tables that might be referenced by this column + // Look for primary keys with matching names in other tables + + json result = json::array(); + return result.dump(); +} + +// Catalog tools (LLM memory) + +std::string MySQL_Tool_Handler::catalog_upsert( + const std::string& kind, + const std::string& key, + const std::string& document, + const std::string& tags, + const std::string& links +) { + int rc = catalog->upsert(kind, key, document, tags, links); + + json result; + result["success"] = (rc == 0); + if (rc == 0) { + result["kind"] = kind; + result["key"] = key; + } else { + result["error"] = "Failed to upsert catalog entry"; + } + + return result.dump(); +} + +std::string MySQL_Tool_Handler::catalog_get(const std::string& kind, const std::string& key) { + std::string document; + int rc = catalog->get(kind, key, document); + + json result; + result["success"] = (rc == 0); + if (rc == 0) { + result["kind"] = kind; + result["key"] = key; + result["document"] = json::parse(document); + } else { + result["error"] = "Entry not found"; + } + + return result.dump(); +} + +std::string MySQL_Tool_Handler::catalog_search( + const std::string& query, + const std::string& kind, + const std::string& tags, + int limit, + int offset +) { + std::string results = catalog->search(query, kind, tags, limit, offset); + + json result; + result["query"] = query; + result["results"] = json::parse(results); + + return result.dump(); +} + +std::string MySQL_Tool_Handler::catalog_list( + const std::string& kind, + int limit, + int offset +) { + std::string results = catalog->list(kind, limit, offset); + + json result; + result["kind"] = kind.empty() ? "all" : kind; + result["results"] = json::parse(results); + + return result.dump(); +} + +std::string MySQL_Tool_Handler::catalog_merge( + const std::string& keys, + const std::string& target_key, + const std::string& kind, + const std::string& instructions +) { + // Parse keys JSON array + json keys_json = json::parse(keys); + std::vector key_list; + + for (const auto& k : keys_json) { + key_list.push_back(k.get()); + } + + int rc = catalog->merge(key_list, target_key, kind, instructions); + + json result; + result["success"] = (rc == 0); + result["target_key"] = target_key; + result["merged_keys"] = keys_json; + + return result.dump(); +} + +std::string MySQL_Tool_Handler::catalog_delete(const std::string& kind, const std::string& key) { + int rc = catalog->remove(kind, key); + + json result; + result["success"] = (rc == 0); + result["kind"] = kind; + result["key"] = key; + + return result.dump(); +} diff --git a/lib/Observe_Tool_Handler.cpp b/lib/Observe_Tool_Handler.cpp new file mode 100644 index 0000000000..cc865aa169 --- /dev/null +++ b/lib/Observe_Tool_Handler.cpp @@ -0,0 +1,175 @@ +#include "../deps/json/json.hpp" +using json = nlohmann::json; +#define PROXYJSON + +#include "Observe_Tool_Handler.h" +#include "MCP_Thread.h" +#include "proxysql_debug.h" + +Observe_Tool_Handler::Observe_Tool_Handler(MCP_Threads_Handler* handler) + : mcp_handler(handler) +{ + pthread_mutex_init(&handler_lock, NULL); + proxy_debug(PROXY_DEBUG_GENERIC, 3, "Observe_Tool_Handler created\n"); +} + +Observe_Tool_Handler::~Observe_Tool_Handler() { + close(); + pthread_mutex_destroy(&handler_lock); + proxy_debug(PROXY_DEBUG_GENERIC, 3, "Observe_Tool_Handler destroyed\n"); +} + +int Observe_Tool_Handler::init() { + proxy_info("Observe_Tool_Handler initialized\n"); + return 0; +} + +void Observe_Tool_Handler::close() { + proxy_debug(PROXY_DEBUG_GENERIC, 2, "Observe_Tool_Handler closed\n"); +} + +json Observe_Tool_Handler::get_tool_list() { + json tools = json::array(); + + // Stub tools for observability + tools.push_back(create_tool_description( + "list_stats", + "List all available ProxySQL statistics", + { + {"type", "object"}, + {"properties", { + {"filter", { + {"type", "string"}, + {"description", "Filter pattern for stat names"} + }} + }} + } + )); + + tools.push_back(create_tool_description( + "get_stats", + "Get specific statistics by name", + { + {"type", "object"}, + {"properties", { + {"stat_names", { + {"type", "array"}, + {"description", "Array of stat names to retrieve"} + }} + }}, + {"required", {"stat_names"}} + } + )); + + tools.push_back(create_tool_description( + "show_connections", + "Show active connection information", + { + {"type", "object"}, + {"properties", {}} + } + )); + + tools.push_back(create_tool_description( + "show_queries", + "Show query execution statistics", + { + {"type", "object"}, + {"properties", { + {"limit", { + {"type", "integer"}, + {"description", "Maximum number of queries to return"} + }} + }} + } + )); + + tools.push_back(create_tool_description( + "get_health", + "Get ProxySQL health check status", + { + {"type", "object"}, + {"properties", {}} + } + )); + + tools.push_back(create_tool_description( + "show_metrics", + "Show performance metrics", + { + {"type", "object"}, + {"properties", { + {"category", { + {"type", "string"}, + {"enum", {"query", "connection", "cache", "all"}}, + {"description", "Metrics category to show"} + }} + }} + } + )); + + json result; + result["tools"] = tools; + return result; +} + +json Observe_Tool_Handler::get_tool_description(const std::string& tool_name) { + json tools_list = get_tool_list(); + for (const auto& tool : tools_list["tools"]) { + if (tool["name"] == tool_name) { + return tool; + } + } + return create_error_response("Tool not found: " + tool_name); +} + +json Observe_Tool_Handler::execute_tool(const std::string& tool_name, const json& arguments) { + pthread_mutex_lock(&handler_lock); + + json result; + + // Stub implementation - returns placeholder responses + if (tool_name == "list_stats") { + std::string filter = arguments.value("filter", ""); + result = create_success_response(json{ + {"message", "list_stats functionality to be implemented"}, + {"filter", filter}, + {"stats", json::array()} + }); + } else if (tool_name == "get_stats") { + json stat_names = arguments.value("stat_names", json::array()); + result = create_success_response(json{ + {"message", "get_stats functionality to be implemented"}, + {"stats", json::object()} + }); + } else if (tool_name == "show_connections") { + result = create_success_response(json{ + {"message", "show_connections functionality to be implemented"}, + {"connections", json::array()} + }); + } else if (tool_name == "show_queries") { + int limit = arguments.value("limit", 100); + result = create_success_response(json{ + {"message", "show_queries functionality to be implemented"}, + {"queries", json::array()}, + {"limit", limit} + }); + } else if (tool_name == "get_health") { + result = create_success_response(json{ + {"message", "get_health functionality to be implemented"}, + {"health", "unknown"} + }); + } else if (tool_name == "show_metrics") { + std::string category = arguments.value("category", "all"); + result = create_success_response(json{ + {"message", "show_metrics functionality to be implemented"}, + {"category", category}, + {"metrics", json::object()} + }); + } else { + result = create_error_response("Unknown tool: " + tool_name); + } + + pthread_mutex_unlock(&handler_lock); + return result; +} diff --git a/lib/ProxySQL_Admin.cpp b/lib/ProxySQL_Admin.cpp index 1cb678b488..58409f6c22 100644 --- a/lib/ProxySQL_Admin.cpp +++ b/lib/ProxySQL_Admin.cpp @@ -42,6 +42,7 @@ using json = nlohmann::json; #include "ProxySQL_Statistics.hpp" #include "MySQL_Logger.hpp" #include "PgSQL_Logger.hpp" +#include "MCP_Thread.h" #include "SQLite3_Server.h" #include "Web_Interface.hpp" @@ -323,6 +324,7 @@ extern PgSQL_Logger* GloPgSQL_Logger; extern MySQL_STMT_Manager_v14 *GloMyStmt; extern MySQL_Monitor *GloMyMon; extern PgSQL_Threads_Handler* GloPTH; +extern MCP_Threads_Handler* GloMCPH; extern void (*flush_logs_function)(); @@ -1587,6 +1589,7 @@ bool ProxySQL_Admin::GenericRefreshStatistics(const char *query_no_space, unsign flush_sqliteserver_variables___runtime_to_database(admindb, false, false, false, true); flush_ldap_variables___runtime_to_database(admindb, false, false, false, true); flush_pgsql_variables___runtime_to_database(admindb, false, false, false, true); + flush_mcp_variables___runtime_to_database(admindb, false, false, false, true, false); pthread_mutex_unlock(&GloVars.checksum_mutex); } if (runtime_mysql_servers) { @@ -2610,6 +2613,7 @@ ProxySQL_Admin::ProxySQL_Admin() : generate_load_save_disk_commands("pgsql_users", "PGSQL USERS"); generate_load_save_disk_commands("pgsql_servers", "PGSQL SERVERS"); generate_load_save_disk_commands("pgsql_variables", "PGSQL VARIABLES"); + generate_load_save_disk_commands("mcp_variables", "MCP VARIABLES"); generate_load_save_disk_commands("genai_variables", "GENAI VARIABLES"); generate_load_save_disk_commands("scheduler", "SCHEDULER"); generate_load_save_disk_commands("restapi", "RESTAPI"); @@ -2839,6 +2843,12 @@ void ProxySQL_Admin::init_pgsql_variables() { flush_pgsql_variables___database_to_runtime(admindb, true); } +void ProxySQL_Admin::init_mcp_variables() { + if (GloMCPH) { + flush_mcp_variables___runtime_to_database(configdb, false, false, false, false, false); + flush_mcp_variables___runtime_to_database(admindb, false, true, false, false, false); + flush_mcp_variables___database_to_runtime(admindb, true, "", 0); + } void ProxySQL_Admin::init_genai_variables() { flush_genai_variables___runtime_to_database(configdb, false, false, false); flush_genai_variables___runtime_to_database(admindb, false, true, false); diff --git a/lib/ProxySQL_MCP_Server.cpp b/lib/ProxySQL_MCP_Server.cpp new file mode 100644 index 0000000000..fc58f6405c --- /dev/null +++ b/lib/ProxySQL_MCP_Server.cpp @@ -0,0 +1,192 @@ +#include "../deps/json/json.hpp" +using json = nlohmann::json; +#define PROXYJSON + +#include "ProxySQL_MCP_Server.hpp" +#include "MCP_Endpoint.h" +#include "MCP_Thread.h" +#include "MySQL_Tool_Handler.h" +#include "MCP_Tool_Handler.h" +#include "Config_Tool_Handler.h" +#include "Query_Tool_Handler.h" +#include "Admin_Tool_Handler.h" +#include "Cache_Tool_Handler.h" +#include "Observe_Tool_Handler.h" +#include "proxysql_utils.h" + +using namespace httpserver; + +extern ProxySQL_Admin *GloAdmin; + +/** + * @brief Thread function for the MCP server + * + * This function runs in a dedicated thread and starts the webserver. + * + * @param arg Pointer to the webserver instance + * @return NULL + */ +static void *mcp_server_thread(void *arg) { + set_thread_name("MCP_Server", GloVars.set_thread_name); + httpserver::webserver * ws = (httpserver::webserver *)arg; + ws->start(true); + return NULL; +} + +ProxySQL_MCP_Server::ProxySQL_MCP_Server(int p, MCP_Threads_Handler* h) + : port(p), handler(h), thread_id(0) +{ + proxy_info("Creating ProxySQL MCP Server on port %d\n", port); + + // Get SSL certificates from ProxySQL + char* ssl_key = NULL; + char* ssl_cert = NULL; + GloVars.get_SSL_pem_mem(&ssl_key, &ssl_cert); + + // Check if SSL certificates are available + if (!ssl_key || !ssl_cert) { + proxy_error("Cannot start MCP server: SSL certificates not loaded. Please configure ssl_key_fp and ssl_cert_fp.\n"); + return; + } + + // Create HTTPS webserver using existing ProxySQL TLS certificates + // Use raw_https_mem_key/raw_https_mem_cert to pass in-memory PEM buffers + ws = std::unique_ptr(new webserver( + create_webserver(port) + .use_ssl() + .raw_https_mem_key(std::string(ssl_key)) + .raw_https_mem_cert(std::string(ssl_cert)) + .no_post_process() + )); + + // Initialize tool handlers for each endpoint + proxy_info("Initializing MCP tool handlers...\n"); + + // 1. Config Tool Handler + handler->config_tool_handler = new Config_Tool_Handler(handler); + if (handler->config_tool_handler->init() == 0) { + proxy_info("Config Tool Handler initialized\n"); + } else { + proxy_error("Failed to initialize Config Tool Handler\n"); + delete handler->config_tool_handler; + handler->config_tool_handler = NULL; + } + + // 2. Query Tool Handler (wraps MySQL_Tool_Handler for backward compatibility) + if (!handler->mysql_tool_handler) { + proxy_info("Initializing MySQL Tool Handler...\n"); + handler->mysql_tool_handler = new MySQL_Tool_Handler( + handler->variables.mcp_mysql_hosts ? handler->variables.mcp_mysql_hosts : "", + handler->variables.mcp_mysql_ports ? handler->variables.mcp_mysql_ports : "", + handler->variables.mcp_mysql_user ? handler->variables.mcp_mysql_user : "", + handler->variables.mcp_mysql_password ? handler->variables.mcp_mysql_password : "", + handler->variables.mcp_mysql_schema ? handler->variables.mcp_mysql_schema : "", + handler->variables.mcp_catalog_path ? handler->variables.mcp_catalog_path : "" + ); + + if (handler->mysql_tool_handler->init() != 0) { + proxy_error("Failed to initialize MySQL Tool Handler\n"); + delete handler->mysql_tool_handler; + handler->mysql_tool_handler = NULL; + } else { + proxy_info("MySQL Tool Handler initialized successfully\n"); + } + } + + // Create Query_Tool_Handler that wraps the MySQL_Tool_Handler + if (handler->mysql_tool_handler) { + handler->query_tool_handler = new Query_Tool_Handler(handler->mysql_tool_handler); + if (handler->query_tool_handler->init() == 0) { + proxy_info("Query Tool Handler initialized\n"); + } + } + + // 3. Admin Tool Handler + handler->admin_tool_handler = new Admin_Tool_Handler(handler); + if (handler->admin_tool_handler->init() == 0) { + proxy_info("Admin Tool Handler initialized\n"); + } + + // 4. Cache Tool Handler + handler->cache_tool_handler = new Cache_Tool_Handler(handler); + if (handler->cache_tool_handler->init() == 0) { + proxy_info("Cache Tool Handler initialized\n"); + } + + // 5. Observe Tool Handler + handler->observe_tool_handler = new Observe_Tool_Handler(handler); + if (handler->observe_tool_handler->init() == 0) { + proxy_info("Observe Tool Handler initialized\n"); + } + + // Register MCP endpoints + // Each endpoint gets its own dedicated tool handler + std::unique_ptr config_resource = + std::unique_ptr(new MCP_JSONRPC_Resource(handler, handler->config_tool_handler, "config")); + ws->register_resource("/mcp/config", config_resource.get(), true); + _endpoints.push_back({"/mcp/config", std::move(config_resource)}); + + std::unique_ptr observe_resource = + std::unique_ptr(new MCP_JSONRPC_Resource(handler, handler->observe_tool_handler, "observe")); + ws->register_resource("/mcp/observe", observe_resource.get(), true); + _endpoints.push_back({"/mcp/observe", std::move(observe_resource)}); + + std::unique_ptr query_resource = + std::unique_ptr(new MCP_JSONRPC_Resource(handler, handler->query_tool_handler, "query")); + ws->register_resource("/mcp/query", query_resource.get(), true); + _endpoints.push_back({"/mcp/query", std::move(query_resource)}); + + std::unique_ptr admin_resource = + std::unique_ptr(new MCP_JSONRPC_Resource(handler, handler->admin_tool_handler, "admin")); + ws->register_resource("/mcp/admin", admin_resource.get(), true); + _endpoints.push_back({"/mcp/admin", std::move(admin_resource)}); + + std::unique_ptr cache_resource = + std::unique_ptr(new MCP_JSONRPC_Resource(handler, handler->cache_tool_handler, "cache")); + ws->register_resource("/mcp/cache", cache_resource.get(), true); + _endpoints.push_back({"/mcp/cache", std::move(cache_resource)}); + + proxy_info("Registered 5 MCP endpoints with dedicated tool handlers: /mcp/config, /mcp/observe, /mcp/query, /mcp/admin, /mcp/cache\n"); +} + +ProxySQL_MCP_Server::~ProxySQL_MCP_Server() { + stop(); + + // Clean up MySQL Tool Handler + if (handler && handler->mysql_tool_handler) { + proxy_info("Cleaning up MySQL Tool Handler...\n"); + delete handler->mysql_tool_handler; + handler->mysql_tool_handler = NULL; + } +} + +void ProxySQL_MCP_Server::start() { + if (!ws) { + proxy_error("Cannot start MCP server: webserver not initialized\n"); + return; + } + + proxy_info("Starting MCP HTTPS server on port %d\n", port); + + // Start the server in a dedicated thread + if (pthread_create(&thread_id, NULL, mcp_server_thread, ws.get()) != 0) { + proxy_error("Failed to create MCP server thread: %s\n", strerror(errno)); + return; + } + + proxy_info("MCP HTTPS server started successfully\n"); +} + +void ProxySQL_MCP_Server::stop() { + if (ws) { + proxy_info("Stopping MCP HTTPS server\n"); + ws->stop(); + + if (thread_id) { + pthread_join(thread_id, NULL); + thread_id = 0; + } + + proxy_info("MCP HTTPS server stopped\n"); + } +} diff --git a/lib/Query_Tool_Handler.cpp b/lib/Query_Tool_Handler.cpp new file mode 100644 index 0000000000..d638b86fb4 --- /dev/null +++ b/lib/Query_Tool_Handler.cpp @@ -0,0 +1,417 @@ +#include "../deps/json/json.hpp" +using json = nlohmann::json; +#define PROXYJSON + +#include "Query_Tool_Handler.h" +#include "proxysql_debug.h" + +#include +#include + +Query_Tool_Handler::Query_Tool_Handler(MySQL_Tool_Handler* handler) + : mysql_handler(handler), owns_handler(false) +{ + proxy_debug(PROXY_DEBUG_GENERIC, 3, "Query_Tool_Handler created (wrapping existing handler)\n"); +} + +Query_Tool_Handler::Query_Tool_Handler( + const std::string& hosts, + const std::string& ports, + const std::string& user, + const std::string& password, + const std::string& schema, + const std::string& catalog_path) + : owns_handler(true) +{ + mysql_handler = new MySQL_Tool_Handler(hosts, ports, user, password, schema, catalog_path); + proxy_debug(PROXY_DEBUG_GENERIC, 3, "Query_Tool_Handler created (with new handler)\n"); +} + +Query_Tool_Handler::~Query_Tool_Handler() { + close(); + if (owns_handler && mysql_handler) { + delete mysql_handler; + mysql_handler = NULL; + } + proxy_debug(PROXY_DEBUG_GENERIC, 3, "Query_Tool_Handler destroyed\n"); +} + +int Query_Tool_Handler::init() { + if (mysql_handler) { + return mysql_handler->init(); + } + return -1; +} + +void Query_Tool_Handler::close() { + if (owns_handler && mysql_handler) { + mysql_handler->close(); + } +} + +json Query_Tool_Handler::create_tool_schema( + const std::string& tool_name, + const std::string& description, + const std::vector& required_params, + const std::map& optional_params) +{ + json properties = json::object(); + + for (const auto& param : required_params) { + properties[param] = { + {"type", "string"}, + {"description", param + " parameter"} + }; + } + + for (const auto& param : optional_params) { + properties[param.first] = { + {"type", param.second}, + {"description", param.first + " parameter"} + }; + } + + json schema; + schema["type"] = "object"; + schema["properties"] = properties; + if (!required_params.empty()) { + schema["required"] = required_params; + } + + return create_tool_description(tool_name, description, schema); +} + +json Query_Tool_Handler::get_tool_list() { + json tools = json::array(); + + // Inventory tools + tools.push_back(create_tool_schema( + "list_schemas", + "List all available schemas/databases", + {}, + {{"page_token", "string"}, {"page_size", "integer"}} + )); + + tools.push_back(create_tool_schema( + "list_tables", + "List tables in a schema", + {"schema"}, + {{"page_token", "string"}, {"page_size", "integer"}, {"name_filter", "string"}} + )); + + // Structure tools + tools.push_back(create_tool_schema( + "describe_table", + "Get detailed table schema including columns, types, keys, and indexes", + {"schema", "table"}, + {} + )); + + tools.push_back(create_tool_schema( + "get_constraints", + "Get constraints (foreign keys, unique constraints, etc.) for a table", + {"schema"}, + {{"table", "string"}} + )); + + // Profiling tools + tools.push_back(create_tool_schema( + "table_profile", + "Get table statistics including row count, size estimates, and data distribution", + {"schema", "table"}, + {{"mode", "string"}} + )); + + tools.push_back(create_tool_schema( + "column_profile", + "Get column statistics including distinct values, null count, and top values", + {"schema", "table", "column"}, + {{"max_top_values", "integer"}} + )); + + // Sampling tools + tools.push_back(create_tool_schema( + "sample_rows", + "Get sample rows from a table (with hard cap on rows returned)", + {"schema", "table"}, + {{"columns", "string"}, {"where", "string"}, {"order_by", "string"}, {"limit", "integer"}} + )); + + tools.push_back(create_tool_schema( + "sample_distinct", + "Sample distinct values from a column", + {"schema", "table", "column"}, + {{"where", "string"}, {"limit", "integer"}} + )); + + // Query tools + tools.push_back(create_tool_schema( + "run_sql_readonly", + "Execute a read-only SQL query with safety guardrails enforced", + {"sql"}, + {{"max_rows", "integer"}, {"timeout_sec", "integer"}} + )); + + tools.push_back(create_tool_schema( + "explain_sql", + "Explain a query execution plan using EXPLAIN or EXPLAIN ANALYZE", + {"sql"}, + {} + )); + + // Relationship inference tools + tools.push_back(create_tool_schema( + "suggest_joins", + "Suggest table joins based on heuristic analysis of column names and types", + {"schema", "table_a"}, + {{"table_b", "string"}, {"max_candidates", "integer"}} + )); + + tools.push_back(create_tool_schema( + "find_reference_candidates", + "Find tables that might be referenced by a foreign key column", + {"schema", "table", "column"}, + {{"max_tables", "integer"}} + )); + + // Catalog tools (LLM memory) + tools.push_back(create_tool_schema( + "catalog_upsert", + "Store or update an entry in the catalog (LLM external memory)", + {"kind", "key", "document"}, + {{"tags", "string"}, {"links", "string"}} + )); + + tools.push_back(create_tool_schema( + "catalog_get", + "Retrieve an entry from the catalog", + {"kind", "key"}, + {} + )); + + tools.push_back(create_tool_schema( + "catalog_search", + "Search the catalog for entries matching a query", + {"query"}, + {{"kind", "string"}, {"tags", "string"}, {"limit", "integer"}, {"offset", "integer"}} + )); + + tools.push_back(create_tool_schema( + "catalog_list", + "List catalog entries by kind", + {}, + {{"kind", "string"}, {"limit", "integer"}, {"offset", "integer"}} + )); + + tools.push_back(create_tool_schema( + "catalog_merge", + "Merge multiple catalog entries into a single consolidated entry", + {"keys", "target_key"}, + {{"kind", "string"}, {"instructions", "string"}} + )); + + tools.push_back(create_tool_schema( + "catalog_delete", + "Delete an entry from the catalog", + {"kind", "key"}, + {} + )); + + json result; + result["tools"] = tools; + return result; +} + +json Query_Tool_Handler::get_tool_description(const std::string& tool_name) { + json tools_list = get_tool_list(); + for (const auto& tool : tools_list["tools"]) { + if (tool["name"] == tool_name) { + return tool; + } + } + return create_error_response("Tool not found: " + tool_name); +} + +// Helper function to safely extract string value from JSON +// nlohmann::json value() handles missing keys, null values, and type conversion +static std::string get_json_string(const json& j, const std::string& key, const std::string& default_val = "") { + fprintf(stderr, "DEBUG: get_json_string key=%s, default='%s'\n", key.c_str(), default_val.c_str()); + if (j.contains(key)) { + const json& val = j[key]; + fprintf(stderr, "DEBUG: key exists, is_null=%d, is_string=%d\n", val.is_null(), val.is_string()); + if (!val.is_null()) { + if (val.is_string()) { + std::string result = val.get(); + fprintf(stderr, "DEBUG: returning string: '%s'\n", result.c_str()); + return result; + } else { + fprintf(stderr, "DEBUG: value is not a string, trying dump\n"); + std::string result = val.dump(); + fprintf(stderr, "DEBUG: returning dumped: '%s'\n", result.c_str()); + return result; + } + } + } + fprintf(stderr, "DEBUG: returning default: '%s'\n", default_val.c_str()); + return default_val; +} + +// Helper function to safely extract int value from JSON +static int get_json_int(const json& j, const std::string& key, int default_val = 0) { + if (j.contains(key) && !j[key].is_null()) { + return j[key].get(); + } + return default_val; +} + +json Query_Tool_Handler::execute_tool(const std::string& tool_name, const json& arguments) { + fprintf(stderr, "DEBUG: execute_tool tool_name=%s, arguments=%s\n", tool_name.c_str(), arguments.dump().c_str()); + + if (!mysql_handler) { + return create_error_response("MySQL handler not initialized"); + } + + std::string result_str; + + try { + // Inventory tools + if (tool_name == "list_schemas") { + std::string page_token = get_json_string(arguments, "page_token"); + int page_size = get_json_int(arguments, "page_size", 50); + result_str = mysql_handler->list_schemas(page_token, page_size); + } + else if (tool_name == "list_tables") { + std::string schema = get_json_string(arguments, "schema"); + std::string page_token = get_json_string(arguments, "page_token"); + int page_size = get_json_int(arguments, "page_size", 50); + std::string name_filter = get_json_string(arguments, "name_filter"); + result_str = mysql_handler->list_tables(schema, page_token, page_size, name_filter); + } + // Structure tools + else if (tool_name == "describe_table") { + std::string schema = get_json_string(arguments, "schema"); + std::string table = get_json_string(arguments, "table"); + result_str = mysql_handler->describe_table(schema, table); + } + else if (tool_name == "get_constraints") { + std::string schema = get_json_string(arguments, "schema"); + std::string table = get_json_string(arguments, "table"); + result_str = mysql_handler->get_constraints(schema, table); + } + // Profiling tools + else if (tool_name == "table_profile") { + std::string schema = get_json_string(arguments, "schema"); + std::string table = get_json_string(arguments, "table"); + std::string mode = get_json_string(arguments, "mode", "quick"); + result_str = mysql_handler->table_profile(schema, table, mode); + } + else if (tool_name == "column_profile") { + std::string schema = get_json_string(arguments, "schema"); + std::string table = get_json_string(arguments, "table"); + std::string column = get_json_string(arguments, "column"); + int max_top_values = get_json_int(arguments, "max_top_values", 20); + result_str = mysql_handler->column_profile(schema, table, column, max_top_values); + } + // Sampling tools + else if (tool_name == "sample_rows") { + std::string schema = get_json_string(arguments, "schema"); + std::string table = get_json_string(arguments, "table"); + std::string columns = get_json_string(arguments, "columns"); + std::string where = get_json_string(arguments, "where"); + std::string order_by = get_json_string(arguments, "order_by"); + int limit = get_json_int(arguments, "limit", 20); + result_str = mysql_handler->sample_rows(schema, table, columns, where, order_by, limit); + } + else if (tool_name == "sample_distinct") { + std::string schema = get_json_string(arguments, "schema"); + std::string table = get_json_string(arguments, "table"); + std::string column = get_json_string(arguments, "column"); + std::string where = get_json_string(arguments, "where"); + int limit = get_json_int(arguments, "limit", 50); + result_str = mysql_handler->sample_distinct(schema, table, column, where, limit); + } + // Query tools + else if (tool_name == "run_sql_readonly") { + std::string sql = get_json_string(arguments, "sql"); + int max_rows = get_json_int(arguments, "max_rows", 200); + int timeout_sec = get_json_int(arguments, "timeout_sec", 2); + result_str = mysql_handler->run_sql_readonly(sql, max_rows, timeout_sec); + } + else if (tool_name == "explain_sql") { + std::string sql = get_json_string(arguments, "sql"); + result_str = mysql_handler->explain_sql(sql); + } + // Relationship inference tools + else if (tool_name == "suggest_joins") { + std::string schema = get_json_string(arguments, "schema"); + std::string table_a = get_json_string(arguments, "table_a"); + std::string table_b = get_json_string(arguments, "table_b"); + int max_candidates = get_json_int(arguments, "max_candidates", 5); + result_str = mysql_handler->suggest_joins(schema, table_a, table_b, max_candidates); + } + else if (tool_name == "find_reference_candidates") { + std::string schema = get_json_string(arguments, "schema"); + std::string table = get_json_string(arguments, "table"); + std::string column = get_json_string(arguments, "column"); + int max_tables = get_json_int(arguments, "max_tables", 50); + result_str = mysql_handler->find_reference_candidates(schema, table, column, max_tables); + } + // Catalog tools + else if (tool_name == "catalog_upsert") { + std::string kind = get_json_string(arguments, "kind"); + std::string key = get_json_string(arguments, "key"); + std::string document = get_json_string(arguments, "document"); + std::string tags = get_json_string(arguments, "tags"); + std::string links = get_json_string(arguments, "links"); + result_str = mysql_handler->catalog_upsert(kind, key, document, tags, links); + } + else if (tool_name == "catalog_get") { + std::string kind = get_json_string(arguments, "kind"); + std::string key = get_json_string(arguments, "key"); + result_str = mysql_handler->catalog_get(kind, key); + } + else if (tool_name == "catalog_search") { + std::string query = get_json_string(arguments, "query"); + std::string kind = get_json_string(arguments, "kind"); + std::string tags = get_json_string(arguments, "tags"); + int limit = get_json_int(arguments, "limit", 20); + int offset = get_json_int(arguments, "offset", 0); + result_str = mysql_handler->catalog_search(query, kind, tags, limit, offset); + } + else if (tool_name == "catalog_list") { + std::string kind = get_json_string(arguments, "kind"); + int limit = get_json_int(arguments, "limit", 50); + int offset = get_json_int(arguments, "offset", 0); + result_str = mysql_handler->catalog_list(kind, limit, offset); + } + else if (tool_name == "catalog_merge") { + std::string keys = get_json_string(arguments, "keys"); + std::string target_key = get_json_string(arguments, "target_key"); + std::string kind = get_json_string(arguments, "kind", "domain"); + std::string instructions = get_json_string(arguments, "instructions"); + result_str = mysql_handler->catalog_merge(keys, target_key, kind, instructions); + } + else if (tool_name == "catalog_delete") { + std::string kind = get_json_string(arguments, "kind"); + std::string key = get_json_string(arguments, "key"); + result_str = mysql_handler->catalog_delete(kind, key); + } + else { + return create_error_response("Unknown tool: " + tool_name); + } + + // Parse the result and return + try { + json result_json = json::parse(result_str); + return create_success_response(result_json); + } catch (const json::parse_error& e) { + // If parsing fails, return as string + json result; + result["data"] = result_str; + return create_success_response(result); + } + + } catch (const std::exception& e) { + return create_error_response(std::string("Exception: ") + e.what()); + } +} diff --git a/proxysql-ca.pem b/proxysql-ca.pem new file mode 100644 index 0000000000..68a417bb98 --- /dev/null +++ b/proxysql-ca.pem @@ -0,0 +1,18 @@ +-----BEGIN CERTIFICATE----- +MIIC8zCCAdugAwIBAgIEaWLxIjANBgkqhkiG9w0BAQsFADAxMS8wLQYDVQQDDCZQ +cm94eVNRTF9BdXRvX0dlbmVyYXRlZF9DQV9DZXJ0aWZpY2F0ZTAeFw0yNjAxMTEw +MDM4NThaFw0zNjAxMDkwMDM4NThaMDExLzAtBgNVBAMMJlByb3h5U1FMX0F1dG9f +R2VuZXJhdGVkX0NBX0NlcnRpZmljYXRlMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8A +MIIBCgKCAQEAqNVkkQPrGTuUxpXupBMLTBPATs7/xZ2lsGOy3tT7MansRicPv8hl +7KFd8HLm+JmGmW0tRibvrGfM4WJP4R5EXcR+ZVncGPuM4AUR1Vfz3EQIszPmyEM0 +le/L7FTf/j/MZywA2LypiLOfj2ehZwZRD/aC7iKhRSQ6sG8Ed3V2mD7CAtRhbJOq +pZSvqjIpci873przhQrEHC+npwP0f6km4mHySx3K5LAeU0eSB+h2dhr13RtsDUA8 +zIG89yD+PJLFGIZBG2inCjtCae3IG4okCqsiO5DcrL+eAnZwQ5gNFZxKs9SLyz4d +zbYg5bRRO/CNFTZPc0gnOHEBI0XiLksYFQIDAQABoxMwETAPBgNVHRMBAf8EBTAD +AQH/MA0GCSqGSIb3DQEBCwUAA4IBAQAI4RutTG3qKX1jJDMelGbY5UGXRtFll/WG +GdjnBI4V1q891yNbSn5zyzun5icqyXm3ruYNhBuAU7glI30+8wsQRAwAU938ZV3H +iHtLJ2GvrlzzuAb8yqKob2a64VvFGcsXgTu9dMNDTzbVG2ySo4GTmpkJ9wQDsdct +1rzgbLkK078zA0F1zj2GLW+ixKfirMtMzOyXTlRLkWd2Bkzxlco6LPL9+6oiwPjm +prqte2eOhfYkyOk9oJ6Nzyce2lkAldY+tSeOg9tc1asY15mFnssp48dXashYp1eU +ld7R1Jg5/o7sgIgOs6SAYbIsrY4v//I8tmuynU37rFlTD3vB4nnt +-----END CERTIFICATE----- diff --git a/proxysql-cert.pem b/proxysql-cert.pem new file mode 100644 index 0000000000..93bcf330c0 --- /dev/null +++ b/proxysql-cert.pem @@ -0,0 +1,18 @@ +-----BEGIN CERTIFICATE----- +MIIC9DCCAdygAwIBAgIEaWLxIjANBgkqhkiG9w0BAQsFADAxMS8wLQYDVQQDDCZQ +cm94eVNRTF9BdXRvX0dlbmVyYXRlZF9DQV9DZXJ0aWZpY2F0ZTAeFw0yNjAxMTEw +MDM4NThaFw0zNjAxMDkwMDM4NThaMDUxMzAxBgNVBAMMKlByb3h5U1FMX0F1dG9f +R2VuZXJhdGVkX1NlcnZlcl9DZXJ0aWZpY2F0ZTCCASIwDQYJKoZIhvcNAQEBBQAD +ggEPADCCAQoCggEBAKjVZJED6xk7lMaV7qQTC0wTwE7O/8WdpbBjst7U+zGp7EYn +D7/IZeyhXfBy5viZhpltLUYm76xnzOFiT+EeRF3EfmVZ3Bj7jOAFEdVX89xECLMz +5shDNJXvy+xU3/4/zGcsANi8qYizn49noWcGUQ/2gu4ioUUkOrBvBHd1dpg+wgLU +YWyTqqWUr6oyKXIvO96a84UKxBwvp6cD9H+pJuJh8ksdyuSwHlNHkgfodnYa9d0b +bA1APMyBvPcg/jySxRiGQRtopwo7QmntyBuKJAqrIjuQ3Ky/ngJ2cEOYDRWcSrPU +i8s+Hc22IOW0UTvwjRU2T3NIJzhxASNF4i5LGBUCAwEAAaMQMA4wDAYDVR0TAQH/ +BAIwADANBgkqhkiG9w0BAQsFAAOCAQEAnk0MVxaLgzRn5SswunDdCypcRiexzISE +iMsEss78W7t43kzyfucVS0RPMdj/IFubfjV1UaCl/nl1wNILTsL2hTICovfHGFrx +BvawfnYZazxs60Y6Qig+/Q3SLvldH0dU/6ZUJfVMYevDWJ9qd6oHBCQGU/wldBje +EXrs/K2XjI66sP5qzeRoLIY5cXkMvFPy1/Oy5eqIbYqjxw4iNTSVQNV0LRE3h5Lm +FxMT+V/B4QV+x9rqcoFZJi1qGEM42mI8ctCs7kAgROry+Nzk0qVrgmSOYsTuXM6P +s3ueYOhh32VFYH0bmpkKsYakfcCjNYFTb3pRaxxaHdjxPkI3LMbSoQ== +-----END CERTIFICATE----- diff --git a/proxysql-key.pem b/proxysql-key.pem new file mode 100644 index 0000000000..3593494168 --- /dev/null +++ b/proxysql-key.pem @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEpQIBAAKCAQEAqNVkkQPrGTuUxpXupBMLTBPATs7/xZ2lsGOy3tT7MansRicP +v8hl7KFd8HLm+JmGmW0tRibvrGfM4WJP4R5EXcR+ZVncGPuM4AUR1Vfz3EQIszPm +yEM0le/L7FTf/j/MZywA2LypiLOfj2ehZwZRD/aC7iKhRSQ6sG8Ed3V2mD7CAtRh +bJOqpZSvqjIpci873przhQrEHC+npwP0f6km4mHySx3K5LAeU0eSB+h2dhr13Rts +DUA8zIG89yD+PJLFGIZBG2inCjtCae3IG4okCqsiO5DcrL+eAnZwQ5gNFZxKs9SL +yz4dzbYg5bRRO/CNFTZPc0gnOHEBI0XiLksYFQIDAQABAoIBAEIyaRvyzVs3YT37 +y3XJgcRyehRsVRzGkxB2BswX9eWjGmDnL+WiTVRacNq2MpmGmJ/PjtDSs2aFzG8S +fP9nPqcFRAm5EfM5riKn2jYsJhFXG5In53Td5OBlBS/El464tQw+1JYmYtKWmxk/ +KKmccGwx22RDb7gMXHaREM9F3xoR3SpHxsvz1D/YauciRf7hgwm7i5dikCY0kg58 +GI59/HAZgwq/xY9fJ6Z67fPTXLMn1frkmD74yEinNP4ms4gbFSeZvKx8S5Es1N0a +f68Ba1ZYispW+8idVWEKsdrku9DCEELQbIc6dWxDA4AjXCYVZJDbnjYtNgqM+beI +dUIMcIECgYEA6PFFdGjgjRn2jixXp2wA5ViKEuxPvjdCwPMxz+42MrhSb3DQz+aN +rEE3WzJy5nL1NRFVY7MLcWNUjh4iaE9LTClAtZX5Vws0gAeNbA0fPBmydgYuiErQ +qyA3DwFRETv9IFg3sk0j9uC7a2lqcvrbf/sW2CkvH4XygXbYQctQRssCgYEAuYuc +dtw4sUZPmQw6VlYgSp2r7DQqh49wU2JifbpZqMk+gOW/6AhKERkNJDI33l+OOt70 +tMpBeXa7Ew7qUyYzGKEEJcK3H2dZ6DkY+rnsZaHehPeEsxJNBB2LYswYNkvGXkY+ +99y3rMGygIhVs3C6Z5SKwMGJIKVkog88ZzdJYJ8CgYEAkp/r/A5X6flBvNQkiHnv +Rm2o26hruWvHVPS/kgZ7jwl+ui7lATg6TQbv9TOYJ36M4k561TrKJSFFA//r4ISo +/NOqq6IvRJ8E+OHIHw9Tbd0u/CN//sI4/r5UadmGUbbU6hsdU9pCnQ9waXf9TUqi +B7jg9EdYJhuGPf+0uBVl/mkCgYEAqC6QKHz9NlLRG50l09RFeNzqVTQDyNSPsEVh +mS0sz/16FkQqaxv4Zv8aFlEeqwZaWap2jNk39+1TLLc8Vxos/ooUxFV2v5Rivkfj +CIE2cfkDRetF8TsJbE2LZoYw/CY7LIDn2qvKIWGBd1gctoXbsL/H9Wh374t7aBn/ +Wl+Wt2kCgYEAnKsy5A2YybPzMsZzRlbNjYiNeOJIH1UM+6I8g0q/F7TzzNiM80Co +DRvkAADqv6KU2Bh9EVYJR0q9CmvYru5MoAMSgt5yLm2lpvSU3iDTyuS4Py5raH5O +Ud5//1fXYVC84n6nN5KdhsHozmADaJeh0qpDx45nhq3+ZL4yCHw6QeY= +-----END RSA PRIVATE KEY----- diff --git a/scripts/mcp/README.md b/scripts/mcp/README.md new file mode 100644 index 0000000000..926a492a85 --- /dev/null +++ b/scripts/mcp/README.md @@ -0,0 +1,571 @@ +# MCP Module Testing Suite + +This directory contains scripts to test the ProxySQL MCP (Model Context Protocol) module with MySQL connection pool and exploration tools. + +## Table of Contents + +1. [Architecture Overview](#architecture-overview) +2. [Components](#components) +3. [Testing Flow](#testing-flow) +4. [Quick Start (Copy/Paste)](#quick-start-copypaste) +5. [Detailed Documentation](#detailed-documentation) +6. [Troubleshooting](#troubleshooting) + +--- + +## Architecture Overview + +### What is MCP? + +MCP (Model Context Protocol) is a JSON-RPC 2.0 protocol that allows AI/LLM applications to: +- **Discover** database schemas (list tables, describe columns, view relationships) +- **Explore** data safely (sample rows, run read-only queries with guardrails) +- **Remember** discoveries in an external catalog (SQLite-based memory for LLM) + +### Component Architecture + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ ProxySQL MCP Module │ +├─────────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌─────────────────────────────────────────────────────────────┐ │ +│ │ ProxySQL Admin Interface (Port 6032) │ │ +│ │ Configure: mcp-enabled, mcp-mysql_hosts, mcp-port, etc. │ │ +│ └──────────────────────────┬──────────────────────────────────┘ │ +│ │ │ +│ ┌──────────────────────────▼──────────────────────────────────┐ │ +│ │ MCP HTTPS Server (Port 6071) │ │ +│ │ │ │ +│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │ +│ │ │ /config │ │ /query │ │ /admin │ │ │ +│ │ │ endpoint │ │ endpoint │ │ endpoint │ │ │ +│ │ └──────┬──────┘ └──────┬──────┘ └─────────────┘ │ │ +│ └─────────┼─────────────────┼─────────────────────────────────┘ │ +│ │ │ │ +│ ┌─────────▼─────────────────▼─────────────────────────────────┐ │ +│ │ MySQL_Tool_Handler │ │ +│ │ ┌─────────────────────────────────────────────────────┐ │ │ +│ │ │ MySQL Connection Pool │ │ │ +│ │ │ ┌─────┐ ┌─────┐ ┌─────┐ ┌─────┐ │ │ │ +│ │ │ │Conn1│ │Conn2│ │Conn3│ │ ... │ (to MySQL) │ │ │ +│ │ │ └──┬──┘ └──┬──┘ └──┬──┘ └──┬──┘ │ │ │ +│ │ │ └──────┴──────┴──────┴──────┘ │ │ │ +│ │ └─────────────────────────────────────────────────────┘ │ │ +│ │ │ │ +│ │ Tool Methods: │ │ +│ │ • list_schemas, list_tables, describe_table │ │ +│ │ • sample_rows, sample_distinct, run_sql_readonly │ │ +│ │ • catalog_upsert, catalog_get, catalog_search │ │ +│ └──────────────────────────────────────────────────────────────┘ │ +│ │ +│ ┌─────────────────────────────────────────────────────────────┐ │ +│ │ MySQL_Catalog (SQLite Memory) │ │ +│ │ • LLM discoveries catalog (FTS searchable) │ │ +│ │ • Tables: catalog_entries, catalog_links │ │ +│ └──────────────────────────────────────────────────────────────┘ │ +│ │ +└──────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌──────────────────────────────────────────────────────────────────────┐ +│ MySQL Server (Port 3306) │ +│ • Test Database: testdb │ +│ • Tables: customers, orders, products, etc. │ +└──────────────────────────────────────────────────────────────────────┘ +``` + +### MCP Tools Available + +| Category | Tools | Purpose | +|----------|-------|---------| +| **Inventory** | `list_schemas`, `list_tables` | Discover available databases and tables | +| **Structure** | `describe_table`, `get_constraints` | Get schema details (columns, keys, indexes) | +| **Sampling** | `sample_rows`, `sample_distinct` | Sample data safely with row limits | +| **Query** | `run_sql_readonly`, `explain_sql` | Execute SELECT queries with guardrails | +| **Catalog** | `catalog_upsert`, `catalog_get`, `catalog_search` | Store/retrieve LLM discoveries | + +--- + +## Components + +### 1. ProxySQL MCP Module + +**Location:** Built into ProxySQL (`lib/MCP_*.cpp`) + +**Purpose:** Exposes HTTPS endpoints that implement JSON-RPC 2.0 protocol for LLM integration. + +**Key Configuration Variables:** + +| Variable | Default | Description | +|----------|---------|-------------| +| `mcp-enabled` | false | Enable/disable MCP server | +| `mcp-port` | 6071 | HTTPS port for MCP endpoints | +| `mcp-mysql_hosts` | 127.0.0.1 | MySQL server(s) for tool execution | +| `mcp-mysql_ports` | 3306 | MySQL port(s) | +| `mcp-mysql_user` | (empty) | MySQL username for connections | +| `mcp-mysql_password` | (empty) | MySQL password | +| `mcp-mysql_schema` | (empty) | Default schema for queries | +| `mcp-catalog_path` | mcp_catalog.db | SQLite catalog database path (relative to datadir) | + +**Endpoints:** +- `POST https://localhost:6071/config` - Initialize, ping, tools/list +- `POST https://localhost:6071/query` - Execute tools (tools/call) + +### 2. MySQL Connection Pool + +**Location:** `lib/MySQL_Tool_Handler.cpp` + +**Purpose:** Manages reusable connections to backend MySQL servers for tool execution. + +**Features:** +- Thread-safe connection pooling with `pthread_mutex_t` +- One connection per configured `host:port` pair +- Automatic connection on first use +- 5-second timeouts for connect/read/write operations + +### 3. MySQL Catalog (LLM Memory) + +**Location:** `lib/MySQL_Catalog.cpp` + +**Purpose:** External memory for LLM to store discoveries with full-text search. + +**Features:** +- SQLite-based storage (`mcp_catalog.db`) +- Full-text search (FTS) on document content +- Link tracking between related entries +- Entry kinds: table, domain, column, relationship, pattern + +### 4. Test Scripts + +| Script | Purpose | What it Does | +|--------|---------|--------------| +| `setup_test_db.sh` | Database setup | Creates test MySQL database with sample data (customers, orders, products) | +| `configure_mcp.sh` | ProxySQL configuration | Sets MCP variables and loads to runtime | +| `test_mcp_tools.sh` | Tool testing | Tests all 15 MCP tools via JSON-RPC | +| `test_catalog.sh` | Catalog testing | Tests catalog CRUD and FTS search | +| `stress_test.sh` | Load testing | Concurrent connection stress test | + +--- + +## Testing Flow + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ Step 1: Setup Test Database │ +│ ───────────────────────────────────────────────────────────────── │ +│ ./setup_test_db.sh start --mode native │ +│ │ +│ → Creates 'testdb' database on your MySQL server │ +│ → Creates tables: customers, orders, products, order_items │ +│ → Inserts sample data (5 customers, 5 products, 5 orders) │ +│ → Creates view: customer_orders │ +│ → Creates stored procedure: get_customer_stats │ +└─────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ Step 2: Configure ProxySQL MCP Module │ +│ ───────────────────────────────────────────────────────────────── │ +│ ./configure_mcp.sh --host 127.0.0.1 --port 3306 --user root \ │ +│ --password your_password --enable │ +│ │ +│ → Sets mcp-mysql_hosts=127.0.0.1 │ +│ → Sets mcp-mysql_ports=3306 │ +│ → Sets mcp-mysql_user=root │ +│ → Sets mcp-mysql_password=your_password │ +│ → Sets mcp-mysql_schema=testdb │ +│ → Sets mcp-enabled=true │ +│ → Loads MCP VARIABLES TO RUNTIME │ +│ │ +│ Result: │ +│ → MySQL_Tool_Handler initializes connection pool │ +│ → Connection established to MySQL server │ +│ → HTTPS server starts on port 6071 │ +└─────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ Step 3: Test MCP Tools │ +│ ───────────────────────────────────────────────────────────────── │ +│ ./test_mcp_tools.sh │ +│ │ +│ → Sends JSON-RPC requests to https://localhost:6071/query │ +│ → Tests tools: list_schemas, list_tables, describe_table, etc. │ +│ → Verifies responses are valid JSON with expected data │ +│ │ +│ Example Request: │ +│ POST /query │ +│ { │ +│ "jsonrpc": "2.0", │ +│ "method": "tools/call", │ +│ "params": { │ +│ "name": "list_tables", │ +│ "arguments": {"schema": "testdb"} │ +│ }, │ +│ "id": 1 │ +│ } │ +└─────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ Step 4: Verify Connection Pool │ +│ ───────────────────────────────────────────────────────────────── │ +│ grep "MySQL_Tool_Handler" /path/to/proxysql.log │ +│ │ +│ Expected logs: │ +│ MySQL_Tool_Handler: Connected to 127.0.0.1:3306 │ +│ MySQL_Tool_Handler: Connection pool initialized with 1 connection(s)│ +│ MySQL Tool Handler initialized for schema 'testdb' │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Quick Start (Copy/Paste) + +### Prerequisites - Set Environment Variables + +```bash +# Add to ~/.bashrc or run before testing +export PROXYSQL_ADMIN_PASSWORD=admin # Your ProxySQL admin password +export MYSQL_PASSWORD=your_mysql_password # Your MySQL root password +``` + +### Option A: Using Real MySQL (Recommended) + +```bash +cd /home/rene/proxysql-vec/scripts/mcp + +# 1. Setup test database on your MySQL server +./setup_test_db.sh start --mode native + +# 2. Configure and enable ProxySQL MCP module +./configure_mcp.sh --host 127.0.0.1 --port 3306 --user root --enable + +# 3. Run all MCP tool tests +./test_mcp_tools.sh + +# 4. Run catalog tests +./test_catalog.sh + +# 5. Run stress test (10 concurrent requests) +./stress_test.sh -n 10 + +# 6. Clean up (drop test database when done) +./setup_test_db.sh reset --mode native +``` + +### Option B: Using Docker + +```bash +cd /home/rene/proxysql-vec/scripts/mcp + +# 1. Start test MySQL container +./setup_test_db.sh start --mode docker + +# 2. Configure and enable ProxySQL MCP module +./configure_mcp.sh --host 127.0.0.1 --port 3307 --user root --password test123 --enable + +# 3. Run all MCP tool tests +./test_mcp_tools.sh + +# 4. Stop test MySQL container when done +./setup_test_db.sh stop --mode docker +``` + +--- + +## Detailed Documentation + +### setup_test_db.sh - Database Setup + +**Purpose:** Creates a test MySQL database with sample schema and data for MCP testing. + +**What it does:** +- Creates `testdb` database with 4 tables: `customers`, `orders`, `products`, `order_items` +- Inserts sample data (5 customers, 5 products, 5 orders with items) +- Creates a view (`customer_orders`) and stored procedure (`get_customer_stats`) +- Generates `init_testdb.sql` for reproducibility + +**Commands:** +```bash +./setup_test_db.sh start [--mode native|docker] # Create test database +./setup_test_db.sh status [--mode native|docker] # Check database status +./setup_test_db.sh connect [--mode native|docker] # Connect to MySQL shell +./setup_test_db.sh reset [--mode native|docker] # Drop/recreate database +./setup_test_db.sh --help # Show help +``` + +**Native Mode (your MySQL server):** +```bash +# With defaults (127.0.0.1:3306, root user) +./setup_test_db.sh start --mode native + +# With custom credentials +./setup_test_db.sh start --mode native --host localhost --port 3307 \ + --user myuser --password mypass +``` + +**Docker Mode (isolated container):** +```bash +./setup_test_db.sh start --mode docker +# Container port: 3307, root user, password: test123 +``` + +### configure_mcp.sh - ProxySQL Configuration + +**Purpose:** Configures ProxySQL MCP module variables via admin interface. + +**What it does:** +1. Connects to ProxySQL admin interface (default: 127.0.0.1:6032) +2. Sets MCP configuration variables: + - `mcp-mysql_hosts` - Where to find MySQL server + - `mcp-mysql_ports` - MySQL port + - `mcp-mysql_user` - MySQL username + - `mcp-mysql_password` - MySQL password + - `mcp-mysql_schema` - Default database + - `mcp-enabled` - Enable/disable MCP server +3. Loads variables to RUNTIME (activates the configuration) +4. Optionally tests MCP server connectivity + +**Commands:** +```bash +./configure_mcp.sh --enable # Enable with defaults +./configure_mcp.sh --disable # Disable MCP server +./configure_mcp.sh --status # Show current configuration +./configure_mcp.sh --help # Show help +``` + +**Options:** +```bash +--host HOST MySQL host (default: 127.0.0.1) +--port PORT MySQL port (default: 3307 for Docker, 3306 for native) +--user USER MySQL user (default: root) +--password PASS MySQL password +--database DB Default database (default: testdb) +--mcp-port PORT MCP HTTPS port (default: 6071) +``` + +**Full Example:** +```bash +./configure_mcp.sh \ + --host 127.0.0.1 \ + --port 3306 \ + --user root \ + --password your_password \ + --database testdb \ + --enable +``` + +**What happens when you run `--enable`:** +1. Sets `mcp-mysql_hosts='127.0.0.1'` in ProxySQL +2. Sets `mcp-mysql_ports='3306'` in ProxySQL +3. Sets `mcp-mysql_user='root'` in ProxySQL +4. Sets `mcp-mysql_password='your_password'` in ProxySQL +5. Sets `mcp-mysql_schema='testdb'` in ProxySQL +6. Sets `mcp-enabled='true'` in ProxySQL +7. Runs `LOAD MCP VARIABLES TO RUNTIME` +8. `MySQL_Tool_Handler` initializes connection pool to MySQL +9. HTTPS server starts listening on port 6071 + +### test_mcp_tools.sh - Tool Testing + +**Purpose:** Tests all MCP tools via HTTPS/JSON-RPC to verify the connection pool and tools work. + +**What it does:** +- Sends JSON-RPC 2.0 requests to MCP `/query` endpoint +- Tests 15 tools across 5 categories +- Validates JSON responses +- Reports pass/fail statistics + +**Tools Tested:** + +| Category | Tools | What it Verifies | +|----------|-------|-------------------| +| Inventory | `list_schemas`, `list_tables` | Connection works, can query information_schema | +| Structure | `describe_table`, `get_constraints`, `describe_view` | Can read schema details | +| Profiling | `table_profile`, `column_profile` | Aggregation queries work | +| Sampling | `sample_rows`, `sample_distinct` | Can sample data with limits | +| Query | `run_sql_readonly`, `explain_sql` | Query guardrails and execution | +| Catalog | `catalog_upsert`, `catalog_get`, `catalog_search` | Catalog CRUD works | + +**Commands:** +```bash +./test_mcp_tools.sh # Test all tools +./test_mcp_tools.sh --tool list_schemas # Test single tool +./test_mcp_tools.sh --skip-tool catalog_* # Skip catalog tests +./test_mcp_tools.sh -v # Verbose output +``` + +**Example Test Flow:** +```bash +$ ./test_mcp_tools.sh --tool list_tables + +[TEST] Testing tool: list_tables +[INFO] ✓ list_tables + +Test Summary +Total tests: 1 +Passed: 1 +Failed: 0 +``` + +### test_catalog.sh - Catalog Testing + +**Purpose:** Tests the SQLite catalog (LLM memory) functionality. + +**What it does:** +- Tests catalog CRUD operations (Create, Read, Update, Delete) +- Tests full-text search (FTS) +- Tests entry linking between related discoveries + +**Tests:** +1. `CAT001`: Upsert table schema entry +2. `CAT002`: Upsert domain knowledge entry +3. `CAT003`: Get table entry +4. `CAT004`: Get domain entry +5. `CAT005`: Search catalog +6. `CAT006`: List entries by kind +7. `CAT007`: Update existing entry +8. `CAT008`: Verify update +9. `CAT009`: FTS search with wildcard +10. `CAT010`: Delete entry +11. `CAT011`: Verify deletion +12. `CAT012`: Cleanup domain entry + +### stress_test.sh - Load Testing + +**Purpose:** Tests concurrent connection handling by the connection pool. + +**What it does:** +- Launches N concurrent requests to MCP server +- Measures response times +- Reports success rate and requests/second + +**Commands:** +```bash +./stress_test.sh -n 10 # 10 concurrent requests +./stress_test.sh -n 50 -d 100 # 50 requests, 100ms delay +./stress_test.sh -t list_tables -v # Test specific tool +``` + +--- + +## Troubleshooting + +### MCP server not starting + +**Check ProxySQL logs:** +```bash +tail -f /path/to/proxysql.log | grep -i mcp +``` + +**Verify configuration:** +```sql +mysql -h 127.0.0.1 -P 6032 -u admin -padmin +SHOW VARIABLES LIKE 'mcp-%'; +``` + +**Expected output:** +``` +Variable_name Value +mcp-enabled true +mcp-port 6071 +mcp-mysql_hosts 127.0.0.1 +mcp-mysql_ports 3306 +... +``` + +### Connection pool failing + +**Verify MySQL is accessible:** +```bash +mysql -h 127.0.0.1 -P 3306 -u root -pyourpassword testdb -e "SELECT 1" +``` + +**Check for connection pool errors in logs:** +```bash +grep "MySQL_Tool_Handler" /path/to/proxysql.log +``` + +**Expected logs on success:** +``` +MySQL_Tool_Handler: Connected to 127.0.0.1:3306 +MySQL_Tool_Handler: Connection pool initialized with 1 connection(s) +MySQL Tool Handler initialized for schema 'testdb' +``` + +### Test failures + +**Common causes:** +1. **MySQL not accessible** - Check credentials, host, port +2. **Database not created** - Run `./setup_test_db.sh start` first +3. **MCP not enabled** - Run `./configure_mcp.sh --enable` +4. **Wrong port** - Docker uses 3307, native uses 3306 +5. **Firewall** - Ensure ports 6032, 6071, and MySQL port are open + +**Enable verbose output:** +```bash +./test_mcp_tools.sh -v +``` + +### Clean slate + +**To reset everything and start over:** + +```bash +# 1. Disable MCP +./configure_mcp.sh --disable + +# 2. Drop test database +./setup_test_db.sh reset --mode native + +# 3. Start fresh +./setup_test_db.sh start --mode native +./configure_mcp.sh --enable +``` + +--- + +## Default Configuration Reference + +| Variable | Default | Description | +|----------|---------|-------------| +| `mcp-enabled` | false | Enable MCP server | +| `mcp-port` | 6071 | HTTPS port for MCP | +| `mcp-config_endpoint_auth` | (empty) | Auth token for /config endpoint | +| `mcp-observe_endpoint_auth` | (empty) | Auth token for /observe endpoint | +| `mcp-query_endpoint_auth` | (empty) | Auth token for /query endpoint | +| `mcp-admin_endpoint_auth` | (empty) | Auth token for /admin endpoint | +| `mcp-cache_endpoint_auth` | (empty) | Auth token for /cache endpoint | +| `mcp-timeout_ms` | 30000 | Query timeout in milliseconds | +| `mcp-mysql_hosts` | 127.0.0.1 | MySQL server host(s) | +| `mcp-mysql_ports` | 3306 | MySQL server port(s) | +| `mcp-mysql_user` | (empty) | MySQL username | +| `mcp-mysql_password` | (empty) | MySQL password | +| `mcp-mysql_schema` | (empty) | Default schema | +| `mcp-catalog_path` | mcp_catalog.db | Catalog database path (relative to datadir) | + +--- + +## Environment Variables Reference + +```bash +# ProxySQL Admin Configuration (for configure_mcp.sh) +export PROXYSQL_ADMIN_HOST=${PROXYSQL_ADMIN_HOST:-127.0.0.1} +export PROXYSQL_ADMIN_PORT=${PROXYSQL_ADMIN_PORT:-6032} +export PROXYSQL_ADMIN_USER=${PROXYSQL_ADMIN_USER:-admin} +export PROXYSQL_ADMIN_PASSWORD=${PROXYSQL_ADMIN_PASSWORD:-admin} + +# MySQL Configuration (for setup_test_db.sh and configure_mcp.sh) +export MYSQL_HOST=${MYSQL_HOST:-127.0.0.1} +export MYSQL_PORT=${MYSQL_PORT:-3306} +export MYSQL_USER=${MYSQL_USER:-root} +export MYSQL_PASSWORD=${MYSQL_PASSWORD:-} +export TEST_DB_NAME=${TEST_DB_NAME:-testdb} + +# MCP Server Configuration (for test scripts) +export MCP_HOST=${MCP_HOST:-127.0.0.1} +export MCP_PORT=${MCP_PORT:-6071} +``` diff --git a/scripts/mcp/configure_mcp.sh b/scripts/mcp/configure_mcp.sh new file mode 100755 index 0000000000..3cfcd6a549 --- /dev/null +++ b/scripts/mcp/configure_mcp.sh @@ -0,0 +1,348 @@ +#!/bin/bash +# +# configure_mcp.sh - Configure ProxySQL MCP module +# +# Usage: +# ./configure_mcp.sh [options] +# +# Options: +# -h, --host HOST MySQL host (default: 127.0.0.1) +# -P, --port PORT MySQL port (default: 3307) +# -u, --user USER MySQL user (default: root) +# -p, --password PASS MySQL password (default: test123) +# -d, --database DB MySQL database (default: testdb) +# --mcp-port PORT MCP server port (default: 6071) +# --enable Enable MCP server +# --disable Disable MCP server +# --status Show current MCP configuration +# + +set -e + +# Default configuration (can be overridden by environment variables) +MYSQL_HOST="${MYSQL_HOST:-127.0.0.1}" +MYSQL_PORT="${MYSQL_PORT:-3307}" +MYSQL_USER="${MYSQL_USER:-root}" +MYSQL_PASSWORD="${MYSQL_PASSWORD=test123}" # Use = instead of :- to allow empty passwords +MYSQL_DATABASE="${TEST_DB_NAME:-testdb}" +MCP_PORT="${MCP_PORT:-6071}" +MCP_ENABLED="false" + +# ProxySQL admin configuration +PROXYSQL_ADMIN_HOST="${PROXYSQL_ADMIN_HOST:-127.0.0.1}" +PROXYSQL_ADMIN_PORT="${PROXYSQL_ADMIN_PORT:-6032}" +PROXYSQL_ADMIN_USER="${PROXYSQL_ADMIN_USER:-admin}" +PROXYSQL_ADMIN_PASSWORD="${PROXYSQL_ADMIN_PASSWORD:-admin}" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +log_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +log_step() { + echo -e "${BLUE}[STEP]${NC} $1" +} + +# Execute MySQL command via ProxySQL admin +exec_admin() { + mysql -h "${PROXYSQL_ADMIN_HOST}" -P "${PROXYSQL_ADMIN_PORT}" \ + -u "${PROXYSQL_ADMIN_USER}" -p"${PROXYSQL_ADMIN_PASSWORD}" \ + -e "$1" 2>&1 +} + +# Execute MySQL command via ProxySQL admin (silent mode) +exec_admin_silent() { + mysql -h "${PROXYSQL_ADMIN_HOST}" -P "${PROXYSQL_ADMIN_PORT}" \ + -u "${PROXYSQL_ADMIN_USER}" -p"${PROXYSQL_ADMIN_PASSWORD}" \ + -e "$1" 2>/dev/null +} + +# Check if ProxySQL admin is accessible +check_proxysql_admin() { + log_step "Checking ProxySQL admin connection..." + if exec_admin_silent "SELECT 1" >/dev/null 2>&1; then + log_info "Connected to ProxySQL admin at ${PROXYSQL_ADMIN_HOST}:${PROXYSQL_ADMIN_PORT}" + return 0 + else + log_error "Cannot connect to ProxySQL admin at ${PROXYSQL_ADMIN_HOST}:${PROXYSQL_ADMIN_PORT}" + log_error "Please ensure ProxySQL is running" + return 1 + fi +} + +# Check if MySQL is accessible +check_mysql_connection() { + log_step "Checking MySQL connection..." + if mysql -h "${MYSQL_HOST}" -P "${MYSQL_PORT}" \ + -u "${MYSQL_USER}" -p"${MYSQL_PASSWORD}" \ + -e "SELECT 1" >/dev/null 2>&1; then + log_info "Connected to MySQL at ${MYSQL_HOST}:${MYSQL_PORT}" + return 0 + else + log_error "Cannot connect to MySQL at ${MYSQL_HOST}:${MYSQL_PORT}" + log_error "Please ensure MySQL is running and credentials are correct" + return 1 + fi +} + +# Configure MCP variables +configure_mcp() { + local enable="$1" + + log_step "Configuring MCP variables..." + + local errors=0 + + # Set each variable individually to catch errors + exec_admin_silent "SET mcp-mysql_hosts='${MYSQL_HOST}';" || { log_error "Failed to set mcp-mysql_hosts"; errors=$((errors + 1)); } + exec_admin_silent "SET mcp-mysql_ports='${MYSQL_PORT}';" || { log_error "Failed to set mcp-mysql_ports"; errors=$((errors + 1)); } + exec_admin_silent "SET mcp-mysql_user='${MYSQL_USER}';" || { log_error "Failed to set mcp-mysql_user"; errors=$((errors + 1)); } + exec_admin_silent "SET mcp-mysql_password='${MYSQL_PASSWORD}';" || { log_error "Failed to set mcp-mysql_password"; errors=$((errors + 1)); } + exec_admin_silent "SET mcp-mysql_schema='${MYSQL_DATABASE}';" || { log_error "Failed to set mcp-mysql_schema"; errors=$((errors + 1)); } + exec_admin_silent "SET mcp-catalog_path='mcp_catalog.db';" || { log_error "Failed to set mcp-catalog_path"; errors=$((errors + 1)); } + exec_admin_silent "SET mcp-port='${MCP_PORT}';" || { log_error "Failed to set mcp-port"; errors=$((errors + 1)); } + exec_admin_silent "SET mcp-enabled='${enable}';" || { log_error "Failed to set mcp-enabled"; errors=$((errors + 1)); } + + if [ $errors -gt 0 ]; then + log_error "Failed to configure $errors MCP variable(s)" + return 1 + fi + + log_info "MCP variables configured:" + echo " mcp-mysql_hosts = ${MYSQL_HOST}" + echo " mcp-mysql_ports = ${MYSQL_PORT}" + echo " mcp-mysql_user = ${MYSQL_USER}" + echo " mcp-mysql_password = ${MYSQL_PASSWORD}" + echo " mcp-mysql_schema = ${MYSQL_DATABASE}" + echo " mcp-catalog_path = mcp_catalog.db (relative to datadir)" + echo " mcp-port = ${MCP_PORT}" + echo " mcp-enabled = ${enable}" +} + +# Load MCP variables to runtime +load_to_runtime() { + log_step "Loading MCP variables to RUNTIME..." + if exec_admin_silent "LOAD MCP VARIABLES TO RUNTIME;" >/dev/null 2>&1; then + log_info "MCP variables loaded to RUNTIME" + else + log_error "Failed to load MCP variables to RUNTIME" + return 1 + fi +} + +# Show current MCP configuration +show_status() { + log_step "Current MCP configuration:" + echo "" + exec_admin_silent "SHOW VARIABLES LIKE 'mcp-%';" | column -t + echo "" +} + +# Test MCP server connectivity +test_mcp_server() { + log_step "Testing MCP server connectivity..." + + # Wait a moment for server to start + sleep 2 + + # Test ping endpoint + local response + response=$(curl -k -s -X POST "https://${PROXYSQL_ADMIN_HOST}:${MCP_PORT}/mcp/config" \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","method":"ping","id":1}' 2>/dev/null || echo "") + + if [ -n "$response" ]; then + log_info "MCP server is responding" + echo " Response: $response" + else + log_warn "MCP server not responding (may still be starting)" + fi +} + +# Parse command line arguments +parse_args() { + while [[ $# -gt 0 ]]; do + case $1 in + -h|--host) + MYSQL_HOST="$2" + shift 2 + ;; + -P|--port) + MYSQL_PORT="$2" + shift 2 + ;; + -u|--user) + MYSQL_USER="$2" + shift 2 + ;; + -p|--password) + MYSQL_PASSWORD="$2" + shift 2 + ;; + -d|--database) + MYSQL_DATABASE="$2" + shift 2 + ;; + --mcp-port) + MCP_PORT="$2" + shift 2 + ;; + --enable) + MCP_ENABLED="true" + shift + ;; + --disable) + MCP_ENABLED="false" + shift + ;; + --status) + show_status + exit 0 + ;; + *) + echo "Unknown option: $1" + echo "Use --help for usage information" + exit 1 + ;; + esac + done +} + +# Show usage +show_usage() { + cat < +# ./setup_test_db.sh [options] +# +# Commands: +# start Setup/start test database +# stop Stop test database (Docker only) +# status Check status +# connect Connect to test database shell +# reset Drop/recreate test database +# +# Options: +# --mode MODE Mode: docker or native (default: auto-detect) +# --host HOST MySQL host (native mode, default: 127.0.0.1) +# --port PORT MySQL port (native mode, default: 3306) +# --user USER MySQL user (native mode, default: root) +# --password PASS MySQL password +# --database DB Database name (default: testdb) +# -h, --help Show help +# + +set -e + +# Default Docker configuration +CONTAINER_NAME="proxysql_mcp_test_mysql" +DOCKER_PORT="3307" +DOCKER_ROOT_PASSWORD="test123" +DOCKER_DATABASE="testdb" +DOCKER_VERSION="8.4" + +# Default native MySQL configuration +NATIVE_HOST="127.0.0.1" +NATIVE_PORT="3306" +NATIVE_USER="root" +NATIVE_PASSWORD="" +DATABASE_NAME="testdb" + +# Mode: auto, docker, or native +MODE="auto" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +log_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +log_step() { + echo -e "${BLUE}[STEP]${NC} $1" +} + +# Detect which mode to use +detect_mode() { + if [ "${MODE}" != "auto" ]; then + echo "${MODE}" + return 0 + fi + + # Check if Docker is available + if command -v docker &> /dev/null; then + # Check if user can run docker + if docker info &> /dev/null; then + echo "docker" + return 0 + fi + fi + + # Check if mysql client can connect locally + if command -v mysql &> /dev/null; then + # Try to connect with default credentials + if MYSQL_PWD="" mysql -h "${NATIVE_HOST}" -P "${NATIVE_PORT}" -u "${NATIVE_USER}" -e "SELECT 1" &> /dev/null; then + echo "native" + return 0 + fi + fi + + # Fall back to Docker + echo "docker" + return 0 +} + +# Execute MySQL command (native mode) +exec_mysql_native() { + local sql="$1" + local db="${2:-mysql}" + + if [ -z "${NATIVE_PASSWORD}" ]; then + mysql -h "${NATIVE_HOST}" -P "${NATIVE_PORT}" -u "${NATIVE_USER}" "${db}" -e "${sql}" + else + MYSQL_PWD="${NATIVE_PASSWORD}" mysql -h "${NATIVE_HOST}" -P "${NATIVE_PORT}" -u "${NATIVE_USER}" "${db}" -e "${sql}" + fi +} + +# Create init SQL file +create_init_sql() { + cat > "${SCRIPT_DIR}/init_testdb.sql" <<'EOSQL' +-- Test Database Schema for MCP Testing + +CREATE DATABASE IF NOT EXISTS testdb; +USE testdb; + +CREATE TABLE IF NOT EXISTS customers ( + id INT PRIMARY KEY AUTO_INCREMENT, + name VARCHAR(100), + email VARCHAR(100), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + INDEX idx_email (email) +); + +CREATE TABLE IF NOT EXISTS orders ( + id INT PRIMARY KEY AUTO_INCREMENT, + customer_id INT NOT NULL, + order_date DATE, + total DECIMAL(10,2), + status VARCHAR(20), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (customer_id) REFERENCES customers(id), + INDEX idx_customer (customer_id), + INDEX idx_status (status) +); + +CREATE TABLE IF NOT EXISTS products ( + id INT PRIMARY KEY AUTO_INCREMENT, + name VARCHAR(200), + category VARCHAR(50), + price DECIMAL(10,2), + stock INT DEFAULT 0, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + INDEX idx_category (category) +); + +CREATE TABLE IF NOT EXISTS order_items ( + id INT PRIMARY KEY AUTO_INCREMENT, + order_id INT NOT NULL, + product_id INT NOT NULL, + quantity INT DEFAULT 1, + price DECIMAL(10,2), + FOREIGN KEY (order_id) REFERENCES orders(id), + FOREIGN KEY (product_id) REFERENCES products(id) +); + +-- Insert sample customers +INSERT INTO customers (name, email) VALUES + ('Alice Johnson', 'alice@example.com'), + ('Bob Smith', 'bob@example.com'), + ('Charlie Brown', 'charlie@example.com'), + ('Diana Prince', 'diana@example.com'), + ('Eve Davis', 'eve@example.com'); + +-- Insert sample products +INSERT INTO products (name, category, price, stock) VALUES + ('Laptop', 'Electronics', 999.99, 50), + ('Mouse', 'Electronics', 29.99, 200), + ('Keyboard', 'Electronics', 79.99, 150), + ('Desk Chair', 'Furniture', 199.99, 75), + ('Coffee Mug', 'Kitchen', 12.99, 500); + +-- Insert sample orders +INSERT INTO orders (customer_id, order_date, total, status) VALUES + (1, '2024-01-15', 1029.98, 'completed'), + (2, '2024-01-16', 79.99, 'shipped'), + (1, '2024-01-17', 212.98, 'pending'), + (3, '2024-01-18', 199.99, 'completed'), + (4, '2024-01-19', 1099.98, 'shipped'); + +-- Insert sample order items +INSERT INTO order_items (order_id, product_id, quantity, price) VALUES + (1, 1, 1, 999.99), + (1, 2, 1, 29.99), + (2, 3, 1, 79.99), + (3, 1, 1, 999.99), + (3, 3, 1, 79.99), + (3, 5, 3, 38.97), + (4, 4, 1, 199.99), + (5, 1, 1, 999.99), + (5, 4, 1, 199.99); + +-- Create a view +CREATE OR REPLACE VIEW customer_orders AS +SELECT + c.id AS customer_id, + c.name AS customer_name, + COUNT(o.id) AS order_count, + SUM(o.total) AS total_spent +FROM customers c +LEFT JOIN orders o ON c.id = o.customer_id +GROUP BY c.id, c.name; + +-- Create a stored procedure +DELIMITER // +CREATE PROCEDURE get_customer_stats(IN customer_id INT) +BEGIN + SELECT + c.name, + COUNT(o.id) AS order_count, + COALESCE(SUM(o.total), 0) AS total_spent + FROM customers c + LEFT JOIN orders o ON c.id = o.customer_id + WHERE c.id = customer_id; +END // +DELIMITER ; +EOSQL + + log_info "Created ${SCRIPT_DIR}/init_testdb.sql" +} + +# ========== Docker Mode Functions ========== + +start_docker() { + log_step "Starting Docker MySQL container..." + + if ! command -v docker &> /dev/null; then + log_error "Docker is not installed" + exit 1 + fi + + # Check if container already exists + if docker ps -a --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then + log_warn "Container '${CONTAINER_NAME}' already exists" + read -p "Remove and recreate? (y/N): " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + docker rm -f "${CONTAINER_NAME}" > /dev/null 2>&1 || true + else + log_info "Starting existing container..." + docker start "${CONTAINER_NAME}" + return 0 + fi + fi + + # Create init SQL if needed + if [ ! -f "${SCRIPT_DIR}/init_testdb.sql" ]; then + create_init_sql + fi + + # Create and start container + docker run -d \ + --name "${CONTAINER_NAME}" \ + -p "${DOCKER_PORT}:3306" \ + -e MYSQL_ROOT_PASSWORD="${DOCKER_ROOT_PASSWORD}" \ + -e MYSQL_DATABASE="${DOCKER_DATABASE}" \ + -v "${SCRIPT_DIR}/init_testdb.sql:/docker-entrypoint-initdb.d/01-init.sql:ro" \ + mysql:${DOCKER_VERSION} \ + --default-authentication-plugin=mysql_native_password + + log_info "Waiting for MySQL to be ready..." + for i in {1..30}; do + if docker exec "${CONTAINER_NAME}" mysqladmin ping -h localhost --silent 2>/dev/null; then + log_info "MySQL is ready!" + break + fi + sleep 1 + done + + show_docker_info +} + +stop_docker() { + log_step "Stopping Docker MySQL container..." + + if docker ps --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then + docker stop "${CONTAINER_NAME}" + log_info "Container stopped" + else + log_warn "Container '${CONTAINER_NAME}' is not running" + fi + + if docker ps -a --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then + read -p "Remove container '${CONTAINER_NAME}'? (y/N): " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + docker rm "${CONTAINER_NAME}" + log_info "Container removed" + fi + fi +} + +status_docker() { + if docker ps --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then + echo -e "${GREEN}●${NC} Docker container '${CONTAINER_NAME}' is ${GREEN}running${NC}" + show_docker_info + show_docker_tables + elif docker ps -a --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then + echo -e "${YELLOW}○${NC} Docker container '${CONTAINER_NAME}' exists but is ${YELLOW}stopped${NC}" + echo "Start with: $0 start --mode docker" + else + echo -e "${RED}✗${NC} Docker container '${CONTAINER_NAME}' does not exist" + echo "Create with: $0 start --mode docker" + fi +} + +connect_docker() { + if ! docker ps --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then + log_error "Container '${CONTAINER_NAME}' is not running" + exit 1 + fi + docker exec -it "${CONTAINER_NAME}" mysql -uroot -p"${DOCKER_ROOT_PASSWORD}" "${DOCKER_DATABASE}" +} + +reset_docker() { + log_step "Resetting Docker MySQL database..." + if ! docker ps --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then + log_error "Container '${CONTAINER_NAME}' is not running" + exit 1 + fi + + docker exec -i "${CONTAINER_NAME}" mysql -uroot -p"${DOCKER_ROOT_PASSWORD}" <<'EOSQL' +DROP DATABASE IF EXISTS testdb; +CREATE DATABASE testdb; +EOSQL + + # Re-run init script + if [ -f "${SCRIPT_DIR}/init_testdb.sql" ]; then + docker exec -i "${CONTAINER_NAME}" mysql -uroot -p"${DOCKER_ROOT_PASSWORD}" "${DOCKER_DATABASE}" < "${SCRIPT_DIR}/init_testdb.sql" + fi + + log_info "Database reset complete" +} + +show_docker_info() { + echo "" + echo "Connection Details:" + echo " Host: 127.0.0.1" + echo " Port: ${DOCKER_PORT}" + echo " User: root" + echo " Password: ${DOCKER_ROOT_PASSWORD}" + echo " Database: ${DOCKER_DATABASE}" + echo "" + echo "To configure ProxySQL MCP:" + echo " ./configure_mcp.sh --host 127.0.0.1 --port ${DOCKER_PORT}" +} + +show_docker_tables() { + echo "Database Info:" + docker exec "${CONTAINER_NAME}" mysql -uroot -p"${DOCKER_ROOT_PASSWORD}" -e " + SELECT + table_name AS 'Table', + table_rows AS 'Rows', + ROUND((data_length + index_length) / 1024, 2) AS 'Size (KB)' + FROM information_schema.tables + WHERE table_schema = '${DOCKER_DATABASE}' + ORDER BY table_name; + " 2>/dev/null | column -t +} + +# ========== Native Mode Functions ========== + +start_native() { + log_step "Setting up native MySQL database..." + + if ! command -v mysql &> /dev/null; then + log_error "mysql client is not installed" + exit 1 + fi + + # Test connection + if ! test_native_connection; then + log_error "Cannot connect to MySQL server" + log_error "Please ensure MySQL is running and credentials are correct" + log_error " Host: ${NATIVE_HOST}" + log_error " Port: ${NATIVE_PORT}" + log_error " User: ${NATIVE_USER}" + exit 1 + fi + + # Create init SQL and run it + create_init_sql + + log_info "Creating database and tables..." + if [ -z "${NATIVE_PASSWORD}" ]; then + mysql -h "${NATIVE_HOST}" -P "${NATIVE_PORT}" -u "${NATIVE_USER}" < "${SCRIPT_DIR}/init_testdb.sql" + else + MYSQL_PWD="${NATIVE_PASSWORD}" mysql -h "${NATIVE_HOST}" -P "${NATIVE_PORT}" -u "${NATIVE_USER}" < "${SCRIPT_DIR}/init_testdb.sql" + fi + + show_native_info +} + +stop_native() { + log_warn "Native mode: Database is not stopped (it's managed by MySQL server)" + log_info "To remove the test database, use: $0 reset --mode native" +} + +status_native() { + if test_native_connection; then + echo -e "${GREEN}●${NC} Native MySQL connection ${GREEN}successful${NC}" + show_native_info + show_native_tables + else + echo -e "${RED}✗${NC} Cannot connect to MySQL at ${NATIVE_HOST}:${NATIVE_PORT}" + echo " Host: ${NATIVE_HOST}" + echo " Port: ${NATIVE_PORT}" + echo " User: ${NATIVE_USER}" + fi +} + +connect_native() { + local db="${DATABASE_NAME}" + + if [ -z "${NATIVE_PASSWORD}" ]; then + mysql -h "${NATIVE_HOST}" -P "${NATIVE_PORT}" -u "${NATIVE_USER}" "${db}" + else + MYSQL_PWD="${NATIVE_PASSWORD}" mysql -h "${NATIVE_HOST}" -P "${NATIVE_PORT}" -u "${NATIVE_USER}" "${db}" + fi +} + +reset_native() { + log_step "Resetting native MySQL database..." + + if ! test_native_connection; then + log_error "Cannot connect to MySQL server" + exit 1 + fi + + read -p "Drop database '${DATABASE_NAME}'? (y/N): " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + log_info "Aborted" + return 0 + fi + + exec_mysql_native "DROP DATABASE IF EXISTS ${DATABASE_NAME};" + + log_info "Database dropped. Recreate with: $0 start --mode native" +} + +test_native_connection() { + if [ -z "${NATIVE_PASSWORD}" ]; then + MYSQL_PWD="" mysql -h "${NATIVE_HOST}" -P "${NATIVE_PORT}" -u "${NATIVE_USER}" -e "SELECT 1" &> /dev/null + else + MYSQL_PWD="${NATIVE_PASSWORD}" mysql -h "${NATIVE_HOST}" -P "${NATIVE_PORT}" -u "${NATIVE_USER}" -e "SELECT 1" &> /dev/null + fi +} + +show_native_info() { + echo "" + echo "Connection Details:" + echo " Host: ${NATIVE_HOST}" + echo " Port: ${NATIVE_PORT}" + echo " User: ${NATIVE_USER}" + echo " Password: ${NATIVE_PASSWORD:-}" + echo " Database: ${DATABASE_NAME}" + echo "" + echo "To configure ProxySQL MCP:" + echo " ./configure_mcp.sh --host ${NATIVE_HOST} --port ${NATIVE_PORT}" +} + +show_native_tables() { + echo "Database Info:" + exec_mysql_native " + SELECT + table_name AS 'Table', + table_rows AS 'Rows', + ROUND((data_length + index_length) / 1024, 2) AS 'Size (KB)' + FROM information_schema.tables + WHERE table_schema = '${DATABASE_NAME}' + ORDER BY table_name; + " 2>/dev/null | column -t +} + +# ========== Main Functions ========== + +show_usage() { + cat < + +Commands: + start Setup/start test database + stop Stop test database (Docker only) + status Check status + connect Connect to test database shell + reset Drop/recreate test database + create-sql Create init_testdb.sql file + +Options: + --mode MODE Mode: docker, native, or auto (default: auto) + --host HOST MySQL host for native mode (default: 127.0.0.1) + --port PORT MySQL port (default: 3306) + --user USER MySQL user (default: root) + --password PASS MySQL password + --database DB Database name (default: testdb) + -h, --help Show this help + +Environment Variables: + MYSQL_HOST MySQL host (native mode) + MYSQL_PORT MySQL port (native mode) + MYSQL_USER MySQL user + MYSQL_PASSWORD MySQL password + TEST_DB_NAME Test database name + +Examples: + # Auto-detect mode and setup + $0 start + + # Use native MySQL explicitly + $0 start --mode native + $0 start --mode native --host localhost --port 3306 + + # Check status + $0 status + + # Connect to test database + $0 connect + + # Drop and recreate test database + $0 reset + + # Stop Docker container + $0 stop --mode docker +EOF +} + +# Main script +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Load environment variables if set +[ -n "${MYSQL_HOST}" ] && NATIVE_HOST="${MYSQL_HOST}" +[ -n "${MYSQL_PORT}" ] && NATIVE_PORT="${MYSQL_PORT}" +[ -n "${MYSQL_USER}" ] && NATIVE_USER="${MYSQL_USER}" +[ -n "${MYSQL_PASSWORD}" ] && NATIVE_PASSWORD="${MYSQL_PASSWORD}" +[ -n "${TEST_DB_NAME}" ] && DATABASE_NAME="${TEST_DB_NAME}" + +# Print environment variables +log_info "Environment Variables:" +echo " MYSQL_HOST=${MYSQL_HOST:-}" +echo " MYSQL_PORT=${MYSQL_PORT:-}" +echo " MYSQL_USER=${MYSQL_USER:-}" +echo " MYSQL_PASSWORD=${MYSQL_PASSWORD:-}" +echo " TEST_DB_NAME=${TEST_DB_NAME:-}" +echo "" + +# Parse arguments +COMMAND="" +while [[ $# -gt 0 ]]; do + case $1 in + -h|--help) + show_usage + exit 0 + ;; + --mode) + MODE="$2" + shift 2 + ;; + --host) + NATIVE_HOST="$2" + shift 2 + ;; + --port) + if [ "$2" = "3307" ]; then + DOCKER_PORT="$2" + else + NATIVE_PORT="$2" + fi + shift 2 + ;; + --user) + NATIVE_USER="$2" + shift 2 + ;; + --password) + NATIVE_PASSWORD="$2" + shift 2 + ;; + --database) + DATABASE_NAME="$2" + DOCKER_DATABASE="$2" + shift 2 + ;; + start|stop|status|connect|reset|create-sql) + COMMAND="$1" + shift + # Continue parsing options after command + while [[ $# -gt 0 ]]; do + case $1 in + --mode) + MODE="$2" + shift 2 + ;; + --host) + NATIVE_HOST="$2" + shift 2 + ;; + --port) + if [ "$2" = "3307" ]; then + DOCKER_PORT="$2" + else + NATIVE_PORT="$2" + fi + shift 2 + ;; + --user) + NATIVE_USER="$2" + shift 2 + ;; + --password) + NATIVE_PASSWORD="$2" + shift 2 + ;; + --database) + DATABASE_NAME="$2" + DOCKER_DATABASE="$2" + shift 2 + ;; + *) + log_error "Unknown option: $1" + show_usage + exit 1 + ;; + esac + done + break + ;; + *) + log_error "Unknown option or command: $1" + show_usage + exit 1 + ;; + esac +done + +# Check if command was provided +if [ -z "${COMMAND}" ]; then + show_usage + exit 1 +fi + +# Detect mode if auto +DETECTED_MODE=$(detect_mode) +if [ "${MODE}" = "auto" ]; then + MODE="${DETECTED_MODE}" +fi + +# Execute command based on mode +case "${COMMAND}" in + start) + if [ "${MODE}" = "docker" ]; then + start_docker + else + start_native + fi + ;; + stop) + if [ "${MODE}" = "docker" ]; then + stop_docker + else + stop_native + fi + ;; + status) + if [ "${MODE}" = "docker" ]; then + status_docker + else + status_native + fi + ;; + connect) + if [ "${MODE}" = "docker" ]; then + connect_docker + else + connect_native + fi + ;; + reset) + if [ "${MODE}" = "docker" ]; then + reset_docker + else + reset_native + fi + ;; + create-sql) + create_init_sql + ;; +esac diff --git a/scripts/mcp/stress_test.sh b/scripts/mcp/stress_test.sh new file mode 100755 index 0000000000..a04459681b --- /dev/null +++ b/scripts/mcp/stress_test.sh @@ -0,0 +1,286 @@ +#!/bin/bash +# +# stress_test.sh - Concurrent connection stress test for MCP tools +# +# Usage: +# ./stress_test.sh [options] +# +# Options: +# -n, --num-requests N Number of concurrent requests (default: 10) +# -t, --tool NAME Tool to test (default: sample_rows) +# -d, --delay SEC Delay between requests in ms (default: 0) +# -v, --verbose Show individual responses +# -h, --help Show help +# + +set -e + +# Configuration +MCP_HOST="${MCP_HOST:-127.0.0.1}" +MCP_PORT="${MCP_PORT:-6071}" +MCP_URL="https://${MCP_HOST}:${MCP_PORT}/query" + +# Test options +NUM_REQUESTS="${NUM_REQUESTS:-10}" +TOOL_NAME="${TOOL_NAME:-sample_rows}" +DELAY_MS="${DELAY_MS:-0}" +VERBOSE=false + +# Statistics +TOTAL_REQUESTS=0 +SUCCESSFUL_REQUESTS=0 +FAILED_REQUESTS=0 +TOTAL_TIME=0 + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +log_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +# Execute MCP request +mcp_request() { + local id="$1" + + local payload + payload=$(cat </dev/null) + + local end_time + end_time=$(date +%s%N) + + local duration + duration=$(( (end_time - start_time) / 1000000 )) # Convert to milliseconds + + local body + body=$(echo "$response" | head -n -1) + + local code + code=$(echo "$response" | tail -n 1) + + echo "${body}|${duration}|${code}" +} + +# Run concurrent requests +run_stress_test() { + log_info "Running stress test with ${NUM_REQUESTS} concurrent requests..." + log_info "Tool: ${TOOL_NAME}" + log_info "Target: ${MCP_URL}" + echo "" + + # Create temp directory for results + local tmpdir + tmpdir=$(mktemp -d) + trap "rm -rf ${tmpdir}" EXIT + + local pids=() + + # Launch requests in background + for i in $(seq 1 "${NUM_REQUESTS}"); do + ( + if [ -n "${DELAY_MS}" ] && [ "${DELAY_MS}" -gt 0 ]; then + sleep $(( (RANDOM % ${DELAY_MS}) / 1000 )).$(( (RANDOM % 1000) )) + fi + + local result + result=$(mcp_request "${i}") + + local body + local duration + local code + + body=$(echo "${result}" | cut -d'|' -f1) + duration=$(echo "${result}" | cut -d'|' -f2) + code=$(echo "${result}" | cut -d'|' -f3) + + echo "${body}" > "${tmpdir}/response_${i}.json" + echo "${duration}" > "${tmpdir}/duration_${i}.txt" + echo "${code}" > "${tmpdir}/code_${i}.txt" + ) & + pids+=($!) + done + + # Wait for all requests to complete + local start_time + start_time=$(date +%s) + + for pid in "${pids[@]}"; do + wait ${pid} || true + done + + local end_time + end_time=$(date +%s) + + local total_wall_time + total_wall_time=$((end_time - start_time)) + + # Collect results + for i in $(seq 1 "${NUM_REQUESTS}"); do + TOTAL_REQUESTS=$((TOTAL_REQUESTS + 1)) + + local code + code=$(cat "${tmpdir}/code_${i}.txt" 2>/dev/null || echo "000") + + if [ "${code}" = "200" ]; then + SUCCESSFUL_REQUESTS=$((SUCCESSFUL_REQUESTS + 1)) + else + FAILED_REQUESTS=$((FAILED_REQUESTS + 1)) + fi + + local duration + duration=$(cat "${tmpdir}/duration_${i}.txt" 2>/dev/null || echo "0") + TOTAL_TIME=$((TOTAL_TIME + duration)) + + if [ "${VERBOSE}" = "true" ]; then + local body + body=$(cat "${tmpdir}/response_${i}.json" 2>/dev/null || echo "{}") + echo "Request ${i}: [${code}] ${duration}ms" + if [ "${code}" != "200" ]; then + echo " Response: ${body}" + fi + fi + done + + # Calculate statistics + local avg_time + if [ ${TOTAL_REQUESTS} -gt 0 ]; then + avg_time=$((TOTAL_TIME / TOTAL_REQUESTS)) + else + avg_time=0 + fi + + local requests_per_second + if [ ${total_wall_time} -gt 0 ]; then + requests_per_second=$(awk "BEGIN {printf \"%.2f\", ${NUM_REQUESTS} / ${total_wall_time}}") + else + requests_per_second="N/A" + fi + + # Print summary + echo "" + echo "======================================" + echo "Stress Test Results" + echo "======================================" + echo "Concurrent requests: ${NUM_REQUESTS}" + echo "Total wall time: ${total_wall_time}s" + echo "" + echo "Total requests: ${TOTAL_REQUESTS}" + echo -e "Successful: ${GREEN}${SUCCESSFUL_REQUESTS}${NC}" + echo -e "Failed: ${RED}${FAILED_REQUESTS}${NC}" + echo "" + echo "Average response time: ${avg_time}ms" + echo "Requests/second: ${requests_per_second}" + echo "" + + # Calculate success rate + if [ ${TOTAL_REQUESTS} -gt 0 ]; then + local success_rate + success_rate=$(awk "BEGIN {printf \"%.1f\", (${SUCCESSFUL_REQUESTS} * 100) / ${TOTAL_REQUESTS}}") + echo "Success rate: ${success_rate}%" + echo "" + + if [ ${FAILED_REQUESTS} -eq 0 ]; then + log_info "All requests succeeded!" + return 0 + else + log_error "Some requests failed!" + return 1 + fi + else + log_error "No requests were completed!" + return 1 + fi +} + +# Parse command line arguments +parse_args() { + while [[ $# -gt 0 ]]; do + case $1 in + -n|--num-requests) + NUM_REQUESTS="$2" + shift 2 + ;; + -t|--tool) + TOOL_NAME="$2" + shift 2 + ;; + -d|--delay) + DELAY_MS="$2" + shift 2 + ;; + -v|--verbose) + VERBOSE=true + shift + ;; + -h|--help) + cat </dev/null) + + echo "${response}" +} + +# Test catalog operations +test_catalog() { + local test_id="$1" + local operation="$2" + local payload="$3" + local expected="$4" + + log_test "${test_id}: ${operation}" + + local response + response=$(mcp_request "${payload}") + + if [ "${VERBOSE}" = "true" ]; then + echo "Payload: ${payload}" + echo "Response: ${response}" + fi + + if echo "${response}" | grep -q "${expected}"; then + log_info "✓ ${test_id}" + return 0 + else + log_error "✗ ${test_id}" + if [ "${VERBOSE}" = "true" ]; then + echo "Expected to find: ${expected}" + fi + return 1 + fi +} + +# Main test flow +run_catalog_tests() { + echo "======================================" + echo "Catalog (LLM Memory) Test Suite" + echo "======================================" + echo "" + echo "Testing catalog operations for LLM memory persistence" + echo "" + + local passed=0 + local failed=0 + + # Test 1: Upsert a table schema entry + local payload1 + payload1='{ + "jsonrpc": "2.0", + "method": "tools/call", + "params": { + "name": "catalog_upsert", + "arguments": { + "kind": "table", + "key": "testdb.customers", + "document": "{\"table\": \"customers\", \"columns\": [{\"name\": \"id\", \"type\": \"INT\"}, {\"name\": \"name\", \"type\": \"VARCHAR\"}], \"row_count\": 5}", + "tags": "schema,testdb", + "links": "testdb.orders:customer_id" + } + }, + "id": 1 +}' + + if test_catalog "CAT001" "Upsert table schema" "${payload1}" '"success"[[:space:]]*:[[:space:]]*true'; then + passed=$((passed + 1)) + else + failed=$((failed + 1)) + fi + + # Test 2: Upsert a domain knowledge entry + local payload2 + payload2='{ + "jsonrpc": "2.0", + "method": "tools/call", + "params": { + "name": "catalog_upsert", + "arguments": { + "kind": "domain", + "key": "customer_management", + "document": "{\"description\": \"Customer management domain\", \"entities\": [\"customers\", \"orders\", \"products\"], \"relationships\": [\"customer has many orders\", \"order belongs to customer\"]}", + "tags": "domain,business", + "links": "" + } + }, + "id": 2 +}' + + if test_catalog "CAT002" "Upsert domain knowledge" "${payload2}" '"success"[[:space:]]*:[[:space:]]*true'; then + passed=$((passed + 1)) + else + failed=$((failed + 1)) + fi + + # Test 3: Get the upserted table entry + local payload3 + payload3='{ + "jsonrpc": "2.0", + "method": "tools/call", + "params": { + "name": "catalog_get", + "arguments": { + "kind": "table", + "key": "testdb.customers" + } + }, + "id": 3 +}' + + if test_catalog "CAT003" "Get table entry" "${payload3}" '"columns"'; then + passed=$((passed + 1)) + else + failed=$((failed + 1)) + fi + + # Test 4: Get the upserted domain entry + local payload4 + payload4='{ + "jsonrpc": "2.0", + "method": "tools/call", + "params": { + "name": "catalog_get", + "arguments": { + "kind": "domain", + "key": "customer_management" + } + }, + "id": 4 +}' + + if test_catalog "CAT004" "Get domain entry" "${payload4}" '"entities"'; then + passed=$((passed + 1)) + else + failed=$((failed + 1)) + fi + + # Test 5: Search for table entries + local payload5 + payload5='{ + "jsonrpc": "2.0", + "method": "tools/call", + "params": { + "name": "catalog_search", + "arguments": { + "query": "customers", + "limit": 10 + } + }, + "id": 5 +}' + + if test_catalog "CAT005" "Search catalog" "${payload5}" '"results"'; then + passed=$((passed + 1)) + else + failed=$((failed + 1)) + fi + + # Test 6: List entries by kind + local payload6 + payload6='{ + "jsonrpc": "2.0", + "method": "tools/call", + "params": { + "name": "catalog_list", + "arguments": { + "kind": "table", + "limit": 10 + } + }, + "id": 6 +}' + + if test_catalog "CAT006" "List by kind" "${payload6}" '"results"'; then + passed=$((passed + 1)) + else + failed=$((failed + 1)) + fi + + # Test 7: Update existing entry + local payload7 + payload7='{ + "jsonrpc": "2.0", + "method": "tools/call", + "params": { + "name": "catalog_upsert", + "arguments": { + "kind": "table", + "key": "testdb.customers", + "document": "{\"table\": \"customers\", \"columns\": [{\"name\": \"id\", \"type\": \"INT\"}, {\"name\": \"name\", \"type\": \"VARCHAR\"}, {\"name\": \"email\", \"type\": \"VARCHAR\"}], \"row_count\": 5, \"updated\": true}", + "tags": "schema,testdb,updated", + "links": "testdb.orders:customer_id" + } + }, + "id": 7 +}' + + if test_catalog "CAT007" "Update existing entry" "${payload7}" '"success"[[:space:]]*:[[:space:]]*true'; then + passed=$((passed + 1)) + else + failed=$((failed + 1)) + fi + + # Test 8: Verify update + local payload8 + payload8='{ + "jsonrpc": "2.0", + "method": "tools/call", + "params": { + "name": "catalog_get", + "arguments": { + "kind": "table", + "key": "testdb.customers" + } + }, + "id": 8 +}' + + if test_catalog "CAT008" "Verify update" "${payload8}" '"updated"[[:space:]]*:[[:space:]]*true'; then + passed=$((passed + 1)) + else + failed=$((failed + 1)) + fi + + # Test 9: Test FTS search with special characters + local payload9 + payload9='{ + "jsonrpc": "2.0", + "method": "tools/call", + "params": { + "name": "catalog_search", + "arguments": { + "query": "customer*", + "limit": 10 + } + }, + "id": 9 +}' + + if test_catalog "CAT009" "FTS search with wildcard" "${payload9}" '"results"'; then + passed=$((passed + 1)) + else + failed=$((failed + 1)) + fi + + # Test 10: Delete entry + local payload10 + payload10='{ + "jsonrpc": "2.0", + "method": "tools/call", + "params": { + "name": "catalog_delete", + "arguments": { + "kind": "table", + "key": "testdb.customers" + } + }, + "id": 10 +}' + + if test_catalog "CAT010" "Delete entry" "${payload10}" '"success"[[:space:]]*:[[:space:]]*true'; then + passed=$((passed + 1)) + else + failed=$((failed + 1)) + fi + + # Test 11: Verify deletion + local payload11 + payload11='{ + "jsonrpc": "2.0", + "method": "tools/call", + "params": { + "name": "catalog_get", + "arguments": { + "kind": "table", + "key": "testdb.customers" + } + }, + "id": 11 +}' + + # This should return an error since we deleted it + log_test "CAT011: Verify deletion (should fail)" + local response11 + response11=$(mcp_request "${payload11}") + + if echo "${response11}" | grep -q '"error"'; then + log_info "✓ CAT011" + passed=$((passed + 1)) + else + log_error "✗ CAT011" + failed=$((failed + 1)) + fi + + # Test 12: Cleanup - delete domain entry + local payload12 + payload12='{ + "jsonrpc": "2.0", + "method": "tools/call", + "params": { + "name": "catalog_delete", + "arguments": { + "kind": "domain", + "key": "customer_management" + } + }, + "id": 12 +}' + + if test_catalog "CAT012" "Cleanup domain entry" "${payload12}" '"success"[[:space:]]*:[[:space:]]*true'; then + passed=$((passed + 1)) + else + failed=$((failed + 1)) + fi + + # Print summary + echo "" + echo "======================================" + echo "Test Summary" + echo "======================================" + echo "Total tests: $((passed + failed))" + echo -e "Passed: ${GREEN}${passed}${NC}" + echo -e "Failed: ${RED}${failed}${NC}" + echo "" + + if [ ${failed} -gt 0 ]; then + log_error "Some tests failed!" + return 1 + else + log_info "All catalog tests passed!" + return 0 + fi +} + +# Parse command line arguments +parse_args() { + while [[ $# -gt 0 ]]; do + case $1 in + -v|--verbose) + VERBOSE=true + shift + ;; + -h|--help) + cat </dev/null) + + local body + body=$(echo "$response" | head -n -1) + local code + code=$(echo "$response" | tail -n 1) + + if [ "${VERBOSE}" = "true" ]; then + echo "Request: ${payload}" >&2 + echo "Response (${code}): ${body}" >&2 + fi + + echo "${body}" + return 0 +} + +# Check if MCP server is accessible +check_mcp_server() { + log_test "Checking MCP server accessibility..." + + local config_url + config_url=$(get_endpoint_url "config") + local response + response=$(mcp_request "${config_url}" '{"jsonrpc":"2.0","method":"ping","id":1}') + + if echo "${response}" | grep -q "result"; then + log_info "MCP server is accessible" + return 0 + else + log_error "MCP server is not accessible" + log_error "Response: ${response}" + return 1 + fi +} + +# Discover tools from an endpoint +discover_tools() { + local endpoint="$1" + local url + url=$(get_endpoint_url "${endpoint}") + + log_verbose "Discovering tools from endpoint: ${endpoint}" + + local payload='{"jsonrpc":"2.0","method":"tools/list","id":1}' + local response + response=$(mcp_request "${url}" "${payload}") + + # Extract tool names from response + local tools_json="" + + if command -v jq >/dev/null 2>&1; then + # Use jq for reliable JSON parsing + tools_json=$(echo "${response}" | jq -r '.result.tools[].name' 2>/dev/null || echo "") + else + # Fallback to grep/sed + tools_json=$(echo "${response}" | grep -o '"name"[[:space:]]*:[[:space:]]*"[^"]*"' | sed 's/.*: "\(.*\)"/\1/') + fi + + # Store discovered tools in temp file + # Format: endpoint:tool_name + while IFS= read -r tool_name; do + if [ -n "${tool_name}" ]; then + echo "${endpoint}:${tool_name}" >> "${DISCOVERED_TOOLS_FILE}" + fi + done <<< "${tools_json}" + + log_verbose "Discovered tools from ${endpoint}: ${tools_json}" +} + +# Check if a tool is discovered on an endpoint +is_tool_discovered() { + local endpoint="$1" + local tool="$2" + local key="${endpoint}:${tool}" + + if grep -q "^${key}$" "${DISCOVERED_TOOLS_FILE}" 2>/dev/null; then + return 0 + fi + return 1 +} + +# Get discovered tools for an endpoint +get_discovered_tools() { + local endpoint="$1" + grep "^${endpoint}:" "${DISCOVERED_TOOLS_FILE}" 2>/dev/null | sed "s/^${endpoint}://" || true +} + +# Count discovered tools for an endpoint +count_discovered_tools() { + local endpoint="$1" + get_discovered_tools "${endpoint}" | wc -l +} + +# Assert that JSON contains expected value +assert_json_contains() { + local response="$1" + local field="$2" + local expected="$3" + + if echo "${response}" | grep -q "\"${field}\"[[:space:]]*:[[:space:]]*${expected}"; then + return 0 + fi + + # Try with jq if available + if command -v jq >/dev/null 2>&1; then + local actual + actual=$(echo "${response}" | jq -r "${field}" 2>/dev/null) + if [ "${actual}" = "${expected}" ]; then + return 0 + fi + fi + + return 1 +} + +# Test a tool +test_tool() { + local endpoint="$1" + local tool_name="$2" + local arguments="$3" + local expected_field="$4" + local expected_value="$5" + + TOTAL_TESTS=$((TOTAL_TESTS + 1)) + + log_test "Testing tool: ${tool_name} (endpoint: ${endpoint})" + + local url + url=$(get_endpoint_url "${endpoint}") + + local payload + payload=$(cat </dev/null || echo "0") + echo "" + echo "Total tools discovered: ${total}" + echo "" +} + +# Parse command line arguments +parse_args() { + while [[ $# -gt 0 ]]; do + case $1 in + -v|--verbose) + VERBOSE=true + shift + ;; + -q|--quiet) + QUIET=true + shift + ;; + --endpoint) + TEST_ENDPOINT="$2" + shift 2 + ;; + --tool) + TEST_TOOL="$2" + shift 2 + ;; + --skip-tool) + SKIP_TOOLS+=("$2") + shift 2 + ;; + --list-only) + LIST_ONLY=true + shift + ;; + -h|--help) + cat < "${DISCOVERED_TOOLS_FILE}" # Clear the file + + if [ -n "${TEST_ENDPOINT}" ]; then + discover_tools "${TEST_ENDPOINT}" + else + for endpoint in "${ENDPOINTS[@]}"; do + discover_tools "${endpoint}" + done + fi +} + +# Run all tests +run_all_tests() { + echo "======================================" + echo "MCP Tools Test Suite (Dynamic Discovery)" + echo "======================================" + echo "" + echo "MCP Host: ${MCP_HOST}" + echo "MCP Port: ${MCP_PORT}" + echo "" + + # Print environment variables if set + if [ -n "${MCP_HOST}" ] || [ -n "${MCP_PORT}" ]; then + log_info "Environment Variables:" + [ -n "${MCP_HOST}" ] && echo " MCP_HOST=${MCP_HOST}" + [ -n "${MCP_PORT}" ] && echo " MCP_PORT=${MCP_PORT}" + echo "" + fi + + # Check MCP server + if ! check_mcp_server; then + log_error "MCP server is not accessible. Please run:" + echo " ./configure_mcp.sh --enable" + exit 1 + fi + + # Discover all tools + discover_all_tools + + # Print discovery report + print_discovery_report + + # Exit if list-only mode + if [ "${LIST_ONLY}" = "true" ]; then + exit 0 + fi + + echo "======================================" + echo "Running Tests" + echo "======================================" + echo "" + + # Run tests + local num_tests=${#TEST_ENDPOINTS[@]} + for ((i=0; iinit(); + proxy_info("MCP module initialized\n"); +} + void ProxySQL_Main_init_Admin_module(const bootstrap_info_t& bootstrap_info) { // cluster module needs to be initialized before @@ -1267,6 +1276,12 @@ void ProxySQL_Main_shutdown_all_modules() { std::cerr << "GloPTH shutdown in "; #endif } + if (GloMCPH) { + cpu_timer t; + delete GloMCPH; + GloMCPH = NULL; +#ifdef DEBUG + std::cerr << "GloMCPH shutdown in "; if (GloGATH) { cpu_timer t; delete GloGATH; @@ -1439,6 +1454,7 @@ void ProxySQL_Main_init_phase2___not_started(const bootstrap_info_t& boostrap_in LoadPlugins(); ProxySQL_Main_init_main_modules(); + ProxySQL_Main_init_MCP_module(); ProxySQL_Main_init_Admin_module(boostrap_info); GloMTH->print_version(); @@ -1537,6 +1553,14 @@ void ProxySQL_Main_init_phase3___start_all() { #endif } + { + cpu_timer t; + ProxySQL_Main_init_MCP_module(); +#ifdef DEBUG + std::cerr << "Main phase3 : MCP module initialized in "; +#endif + } + unsigned int iter = 0; do { sleep_iter(++iter); } while (load_ != 1); load_ = 0; diff --git a/src/proxysql.cfg b/src/proxysql.cfg index 70f8ec27c2..8ffee0b7fd 100644 --- a/src/proxysql.cfg +++ b/src/proxysql.cfg @@ -57,6 +57,16 @@ mysql_variables= sessions_sort=true } +mcp_variables= +{ + mcp_enabled=false + mcp_port=6071 + mcp_config_endpoint_auth="" + mcp_observe_endpoint_auth="" + mcp_query_endpoint_auth="" + mcp_admin_endpoint_auth="" + mcp_cache_endpoint_auth="" + mcp_timeout_ms=30000 # GenAI module configuration genai_variables= { diff --git a/test/tap/proxysql-ca.pem b/test/tap/proxysql-ca.pem new file mode 100644 index 0000000000..256a3158d4 --- /dev/null +++ b/test/tap/proxysql-ca.pem @@ -0,0 +1,18 @@ +-----BEGIN CERTIFICATE----- +MIIC8zCCAdugAwIBAgIEaWQj8TANBgkqhkiG9w0BAQsFADAxMS8wLQYDVQQDDCZQ +cm94eVNRTF9BdXRvX0dlbmVyYXRlZF9DQV9DZXJ0aWZpY2F0ZTAeFw0yNjAxMTEy +MjI4MDFaFw0zNjAxMDkyMjI4MDFaMDExLzAtBgNVBAMMJlByb3h5U1FMX0F1dG9f +R2VuZXJhdGVkX0NBX0NlcnRpZmljYXRlMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8A +MIIBCgKCAQEAm+yYXZdv9Q1ifx7QRxR7icJMyOqnEIcFTT4zpStJx586mKrtNLbl +dWf8wpxVLoEbmwTcfrKTL7ys7QZEQiX1JVEYkCWjlhy90uo2czOhag91WgBdJe9D +9x9wGLUscgxj8bxQU0tT0ZjRVcvGMf45frFw26f2PPaHJ5eCyU1hRx9PGp6XUct8 +xDWPUrUU4ilxdsgxIjNLGKrXT3HgmaiePEn+wn0ASKkaiSrtE5VwYkmCnbv3qBQ8 +/hT2K1W81zfpvQIa6gMEOs3FExfhuEIGWs7PcipT7XSK6n+fZY40jdN3NVRLQvfE +8z+mHXEqDM+SNTZuG2W7QegSaEZncaXVUQIDAQABoxMwETAPBgNVHRMBAf8EBTAD +AQH/MA0GCSqGSIb3DQEBCwUAA4IBAQAmP+o3MGKoNpjnxW1tkjcUZaDuAjPVBJoX +EzjVahV0Hnb9ALptIeGXkpTP9LcvOgOMFMWNRFdQTyUfgiajCBVOjc0LgkbWfpiS +UV9QEbtN9uXdzxMO0ZvAAbZsB+TAfRo6zQeU++vWVochnn/J4J0ax641Gq1tSH2M +If4KUhTLP1fZoGKllm2pr/YJr56e+nsy3gVmolR9o5P+2aYfDd0TPy8tgH+uPHTZ +o1asy6oB/8a47nQVUU82ljJgoe1iVYwYRchLjYQLCJCoYN6AMPxpPxQVME4AgBrx +OHyDVPBvWU/NgN3banbrlRTJtCtp3spoKO8oGtAvPqGV0h1860mw +-----END CERTIFICATE----- diff --git a/test/tap/proxysql-cert.pem b/test/tap/proxysql-cert.pem new file mode 100644 index 0000000000..0aff3a8fff --- /dev/null +++ b/test/tap/proxysql-cert.pem @@ -0,0 +1,18 @@ +-----BEGIN CERTIFICATE----- +MIIC9DCCAdygAwIBAgIEaWQj8TANBgkqhkiG9w0BAQsFADAxMS8wLQYDVQQDDCZQ +cm94eVNRTF9BdXRvX0dlbmVyYXRlZF9DQV9DZXJ0aWZpY2F0ZTAeFw0yNjAxMTEy +MjI4MDFaFw0zNjAxMDkyMjI4MDFaMDUxMzAxBgNVBAMMKlByb3h5U1FMX0F1dG9f +R2VuZXJhdGVkX1NlcnZlcl9DZXJ0aWZpY2F0ZTCCASIwDQYJKoZIhvcNAQEBBQAD +ggEPADCCAQoCggEBAJvsmF2Xb/UNYn8e0EcUe4nCTMjqpxCHBU0+M6UrScefOpiq +7TS25XVn/MKcVS6BG5sE3H6yky+8rO0GREIl9SVRGJAlo5YcvdLqNnMzoWoPdVoA +XSXvQ/cfcBi1LHIMY/G8UFNLU9GY0VXLxjH+OX6xcNun9jz2hyeXgslNYUcfTxqe +l1HLfMQ1j1K1FOIpcXbIMSIzSxiq109x4JmonjxJ/sJ9AEipGokq7ROVcGJJgp27 +96gUPP4U9itVvNc36b0CGuoDBDrNxRMX4bhCBlrOz3IqU+10iup/n2WONI3TdzVU +S0L3xPM/ph1xKgzPkjU2bhtlu0HoEmhGZ3Gl1VECAwEAAaMQMA4wDAYDVR0TAQH/ +BAIwADANBgkqhkiG9w0BAQsFAAOCAQEAL2fQnE9vUK7/t6tECL7LMSs2Y5pBUZsA +sCQigyU7CQ9e6GTG5lPonWVX4pOfriDEWOkAuWlgRSxZpbvPJBpqN1CpR1tFBpMn +2H7gXZGkx+O2fvVvBMPFxusZZRoFfKWwO7Vr+YU3q8pai4ra3lFMMzzrIKku65pt +Vv2U4Sb4RsdXYDsjiAUSsPNqJsQTvum5QTEzqMSUSrKEvpOtVVvGr7KULZt4md/C +GQcuZujr2VTiclDhAP7rvMhmWE8FhGCcBce+k3/PMq9ui+NsMLGmWvp4BUmr8mD3 +xTwclMHIahUrxFEgp/AA+NspGCFm48xyvSpmfttAW83JYDs7R5fJEQ== +-----END CERTIFICATE----- diff --git a/test/tap/proxysql-key.pem b/test/tap/proxysql-key.pem new file mode 100644 index 0000000000..c5c9eed8a6 --- /dev/null +++ b/test/tap/proxysql-key.pem @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEogIBAAKCAQEAm+yYXZdv9Q1ifx7QRxR7icJMyOqnEIcFTT4zpStJx586mKrt +NLbldWf8wpxVLoEbmwTcfrKTL7ys7QZEQiX1JVEYkCWjlhy90uo2czOhag91WgBd +Je9D9x9wGLUscgxj8bxQU0tT0ZjRVcvGMf45frFw26f2PPaHJ5eCyU1hRx9PGp6X +Uct8xDWPUrUU4ilxdsgxIjNLGKrXT3HgmaiePEn+wn0ASKkaiSrtE5VwYkmCnbv3 +qBQ8/hT2K1W81zfpvQIa6gMEOs3FExfhuEIGWs7PcipT7XSK6n+fZY40jdN3NVRL +QvfE8z+mHXEqDM+SNTZuG2W7QegSaEZncaXVUQIDAQABAoIBABbreNwtEgp5/LQF +8gS4yI4P7xyLjaI6zrczgQDy84Xx7HmbioG4rtMKxZdPxp+u38FyPf0rv8IBIIQ4 +6xi0HqxtFsi9l6XNtMOHpRhbCwudmRjxO8ADQ0DUsLQZEZ70Hk7e6QnNZVVGeuL7 +MLeRkJ8Eczv+nQ4KCQTzWwi/JKEBCOoYtPDwkecydbxMsOVM5204rXwmQxW9l2Sr +uGrtfWp5C+xW041spRGskV/7jNhNNKethO1obQlBN6LJKD48p8uEvH+FuHWndm/E +F5GgttSLOemeJrjpXjE4RCdRCT/ZSyE120mxv7YgctMGC1ouFWolgc4hGzJURBtu +H/8KbXcCgYEAzjEp8b9I4QUCopc+bYO5FAVN+I5e/uvVFbgu1QLhknK488DIj2XH +uKj52lGMOkdtgdEQdpk/9fYd0kwn2k7U8/6mb5kQqtuzSll6UCC+OwaCbke3DPp1 +JXmGapUYVIZ8TIxnVaZcKSWv3VqjuwV2GQqOcaSSbAt3BQ5whIzn4F8CgYEAwZbj +IHx0GmrvxjF0JpC1duk65zMKWyLddYeAIuq9hgB7jCVOqmmDElTcZOWKboMUvVg7 +SvteIZjQLB93ktqHf40n1hfmYMaSNLJYxe/JMXWYEDL9++qBPz0rLpScZGxOmNyj +jIl8pwilATs2ZAjQEfy5qL1GeOHe/X6N896vaE8CgYBNNfHL+eIziOnEsrgI0GOU +0Kuy4LVH5k3DtVWsJEkNyvHhLRatQ+K3DmeJTjIhfK/QBdaRYq+lzgS6xBPEVvK9 +b2Upsvqf0Gdh9wGrUaeKeNSMsUQlkwAdCVXBQZV7yWRwUb88PnCSY+9oB1H6bYAc +vmw6t/KwjNaDyTVvHUiTJwKBgHZ2hvZSMhoYZjG6AYG3+9OQVWM1cJjkdPB+woKb +cu6VTQUtrz3I41RMabG0ZUnLHN3hKCdyOuAESx81Ak7zOwdqsX3pkiiWWtG0cW5u +lYeWlj8TdSi7D+xK2ine9vTc8hvIqKxPVeBBAfgG6/m7Cth29oWzjXRbg8FLuEIL +evsxAoGASKbnZznS0tI8mLBrnZWISlpbdiXwHcIOcuF06rEVHTFHd+Ab5eRCFwY9 +idQnAEUUUK8FTHvj5pdPNYv3s9koRF2FHgBilF4k3ESMR2yoPuUQHQ0M7uySy2+c +u7owHRtq0phoywgtZnbKpg1h0kafTkYdRG3eF3I8pBy7jDGrG4k= +-----END RSA PRIVATE KEY----- diff --git a/test/tap/tests/mcp_module-t.cpp b/test/tap/tests/mcp_module-t.cpp new file mode 100644 index 0000000000..18b85a0632 --- /dev/null +++ b/test/tap/tests/mcp_module-t.cpp @@ -0,0 +1,435 @@ +/** + * @file mcp_module-t.cpp + * @brief TAP test for the MCP module + * + * This test verifies the functionality of the MCP (Model Context Protocol) module in ProxySQL. + * It tests: + * - LOAD/SAVE commands for MCP variables across all variants + * - Variable access (SET and SELECT) for MCP variables + * - Variable persistence across storage layers (memory, disk, runtime) + * - CHECKSUM commands for MCP variables + * - SHOW VARIABLES for MCP module + * + * @date 2025-01-11 + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "mysql.h" +#include "mysqld_error.h" + +#include "tap.h" +#include "command_line.h" +#include "utils.h" + +using std::string; + +/** + * @brief Helper function to add LOAD/SAVE command variants for MCP module + * + * This function generates all the standard LOAD/SAVE command variants that + * ProxySQL supports for module variables. + * + * @param queries Vector to append the generated commands to + */ +void add_mcp_load_save_commands(std::vector& queries) { + // LOAD commands - Memory variants + queries.push_back("LOAD MCP VARIABLES TO MEMORY"); + queries.push_back("LOAD MCP VARIABLES TO MEM"); + + // LOAD from disk + queries.push_back("LOAD MCP VARIABLES FROM DISK"); + + // LOAD from memory + queries.push_back("LOAD MCP VARIABLES FROM MEMORY"); + queries.push_back("LOAD MCP VARIABLES FROM MEM"); + + // LOAD to runtime + queries.push_back("LOAD MCP VARIABLES TO RUNTIME"); + queries.push_back("LOAD MCP VARIABLES TO RUN"); + + // SAVE from memory + queries.push_back("SAVE MCP VARIABLES FROM MEMORY"); + queries.push_back("SAVE MCP VARIABLES FROM MEM"); + + // SAVE to disk + queries.push_back("SAVE MCP VARIABLES TO DISK"); + + // SAVE to memory + queries.push_back("SAVE MCP VARIABLES TO MEMORY"); + queries.push_back("SAVE MCP VARIABLES TO MEM"); + + // SAVE from runtime + queries.push_back("SAVE MCP VARIABLES FROM RUNTIME"); + queries.push_back("SAVE MCP VARIABLES FROM RUN"); +} + +/** + * @brief Get the value of an MCP variable as a string + * + * @param admin MySQL connection to admin interface + * @param var_name Variable name (without mcp- prefix) + * @return std::string The variable value, or empty string on error + */ +std::string get_mcp_variable(MYSQL* admin, const std::string& var_name) { + std::string query = "SELECT @@mcp-" + var_name; + if (mysql_query(admin, query.c_str()) != 0) { + return ""; + } + + MYSQL_RES* res = mysql_store_result(admin); + if (!res) { + return ""; + } + + MYSQL_ROW row = mysql_fetch_row(res); + std::string value = row && row[0] ? row[0] : ""; + + mysql_free_result(res); + return value; +} + +/** + * @brief Test variable access operations (SET and SELECT) + * + * Tests setting and retrieving MCP variables to ensure they work correctly. + */ +int test_variable_access(MYSQL* admin) { + int test_num = 0; + + // Test 1: Get default value of mcp_enabled + std::string enabled_default = get_mcp_variable(admin, "enabled"); + ok(enabled_default == "false", + "Default value of mcp_enabled is 'false', got '%s'", enabled_default.c_str()); + + // Test 2: Get default value of mcp_port + std::string port_default = get_mcp_variable(admin, "port"); + ok(port_default == "6071", + "Default value of mcp_port is '6071', got '%s'", port_default.c_str()); + + // Test 3: Set mcp_enabled to true + MYSQL_QUERY(admin, "SET mcp-enabled=true"); + std::string enabled_new = get_mcp_variable(admin, "enabled"); + ok(enabled_new == "true", + "After SET, mcp_enabled is 'true', got '%s'", enabled_new.c_str()); + + // Test 4: Set mcp_port to a new value + MYSQL_QUERY(admin, "SET mcp-port=8080"); + std::string port_new = get_mcp_variable(admin, "port"); + ok(port_new == "8080", + "After SET, mcp_port is '8080', got '%s'", port_new.c_str()); + + // Test 5: Set mcp_config_endpoint_auth + MYSQL_QUERY(admin, "SET mcp-config_endpoint_auth='token123'"); + std::string auth_config = get_mcp_variable(admin, "config_endpoint_auth"); + ok(auth_config == "token123", + "After SET, mcp_config_endpoint_auth is 'token123', got '%s'", auth_config.c_str()); + + // Test 6: Set mcp_timeout_ms + MYSQL_QUERY(admin, "SET mcp-timeout_ms=60000"); + std::string timeout = get_mcp_variable(admin, "timeout_ms"); + ok(timeout == "60000", + "After SET, mcp_timeout_ms is '60000', got '%s'", timeout.c_str()); + + // Test 7: Verify SHOW VARIABLES LIKE pattern + MYSQL_QUERY(admin, "SHOW VARIABLES LIKE 'mcp-%'"); + MYSQL_RES* res = mysql_store_result(admin); + int num_rows = mysql_num_rows(res); + ok(num_rows == 14, + "SHOW VARIABLES LIKE 'mcp-%%' returns 14 rows, got %d", num_rows); + mysql_free_result(res); + + // Test 8: Restore default values + MYSQL_QUERY(admin, "SET mcp-enabled=false"); + MYSQL_QUERY(admin, "SET mcp-port=6071"); + MYSQL_QUERY(admin, "SET mcp-config_endpoint_auth=''"); + MYSQL_QUERY(admin, "SET mcp-timeout_ms=30000"); + MYSQL_QUERY(admin, "SET mcp-mysql_hosts='127.0.0.1'"); + MYSQL_QUERY(admin, "SET mcp-mysql_ports='3306'"); + MYSQL_QUERY(admin, "SET mcp-mysql_user=''"); + MYSQL_QUERY(admin, "SET mcp-mysql_password=''"); + MYSQL_QUERY(admin, "SET mcp-mysql_schema=''"); + MYSQL_QUERY(admin, "SET mcp-catalog_path='mcp_catalog.db'"); + ok(1, "Restored default values for MCP variables"); + + return test_num; +} + +/** + * @brief Test variable persistence across storage layers + * + * Tests that variables are correctly copied between: + * - Memory (main.global_variables) + * - Disk (disk.global_variables) + * - Runtime (GloMCPH handler object) + */ +int test_variable_persistence(MYSQL* admin) { + int test_num = 0; + + diag("=== Part 3: Testing variable persistence across storage layers ==="); + diag("Testing variable persistence: Set values, save to disk, modify, load from disk"); + + // Test 1: Set values and save to disk + diag("Test 1: Setting mcp-enabled=true, mcp-port=7070, mcp-timeout_ms=90000"); + MYSQL_QUERY(admin, "SET mcp-enabled=true"); + MYSQL_QUERY(admin, "SET mcp-port=7070"); + MYSQL_QUERY(admin, "SET mcp-timeout_ms=90000"); + diag("Test 1: Saving variables to disk with 'SAVE MCP VARIABLES TO DISK'"); + MYSQL_QUERY(admin, "SAVE MCP VARIABLES TO DISK"); + ok(1, "Set mcp_enabled=true, mcp_port=7070, mcp_timeout_ms=90000 and saved to disk"); + + // Test 2: Modify values in memory + diag("Test 2: Modifying values in memory (mcp-enabled=false, mcp-port=8080)"); + MYSQL_QUERY(admin, "SET mcp-enabled=false"); + MYSQL_QUERY(admin, "SET mcp-port=8080"); + std::string enabled_mem = get_mcp_variable(admin, "enabled"); + std::string port_mem = get_mcp_variable(admin, "port"); + diag("Test 2: After modification - mcp_enabled='%s', mcp_port='%s'", enabled_mem.c_str(), port_mem.c_str()); + ok(enabled_mem == "false" && port_mem == "8080", + "Modified in memory: mcp_enabled='false', mcp_port='8080'"); + + // Test 3: Load from disk and verify original values restored + diag("Test 3: Loading variables from disk with 'LOAD MCP VARIABLES FROM DISK'"); + MYSQL_QUERY(admin, "LOAD MCP VARIABLES FROM DISK"); + std::string enabled_disk = get_mcp_variable(admin, "enabled"); + std::string port_disk = get_mcp_variable(admin, "port"); + std::string timeout_disk = get_mcp_variable(admin, "timeout_ms"); + diag("Test 3: After LOAD FROM DISK - mcp_enabled='%s', mcp_port='%s', mcp_timeout_ms='%s'", + enabled_disk.c_str(), port_disk.c_str(), timeout_disk.c_str()); + ok(enabled_disk == "true" && port_disk == "7070" && timeout_disk == "90000", + "After LOAD FROM DISK: mcp_enabled='true', mcp_port='7070', mcp_timeout_ms='90000'"); + + // Test 4: Save to memory and verify + diag("Test 4: Executing 'SAVE MCP VARIABLES TO MEMORY'"); + MYSQL_QUERY(admin, "SAVE MCP VARIABLES TO MEMORY"); + ok(1, "SAVE MCP VARIABLES TO MEMORY executed"); + + // Test 5: Load from memory + diag("Test 5: Executing 'LOAD MCP VARIABLES FROM MEMORY'"); + MYSQL_QUERY(admin, "LOAD MCP VARIABLES FROM MEMORY"); + ok(1, "LOAD MCP VARIABLES FROM MEMORY executed"); + + // Test 6: Test SAVE from runtime + diag("Test 6: Executing 'SAVE MCP VARIABLES FROM RUNTIME'"); + MYSQL_QUERY(admin, "SAVE MCP VARIABLES FROM RUNTIME"); + ok(1, "SAVE MCP VARIABLES FROM RUNTIME executed"); + + // Test 7: Test LOAD to runtime + diag("Test 7: Executing 'LOAD MCP VARIABLES TO RUNTIME'"); + MYSQL_QUERY(admin, "LOAD MCP VARIABLES TO RUNTIME"); + ok(1, "LOAD MCP VARIABLES TO RUNTIME executed"); + + // Test 8: Restore default values + diag("Test 8: Restoring default values"); + MYSQL_QUERY(admin, "SET mcp-enabled=false"); + MYSQL_QUERY(admin, "SET mcp-port=6071"); + MYSQL_QUERY(admin, "SET mcp-config_endpoint_auth=''"); + MYSQL_QUERY(admin, "SET mcp-observe_endpoint_auth=''"); + MYSQL_QUERY(admin, "SET mcp-query_endpoint_auth=''"); + MYSQL_QUERY(admin, "SET mcp-admin_endpoint_auth=''"); + MYSQL_QUERY(admin, "SET mcp-cache_endpoint_auth=''"); + MYSQL_QUERY(admin, "SET mcp-timeout_ms=30000"); + MYSQL_QUERY(admin, "SET mcp-mysql_hosts='127.0.0.1'"); + MYSQL_QUERY(admin, "SET mcp-mysql_ports='3306'"); + MYSQL_QUERY(admin, "SET mcp-mysql_user=''"); + MYSQL_QUERY(admin, "SET mcp-mysql_password=''"); + MYSQL_QUERY(admin, "SET mcp-mysql_schema=''"); + MYSQL_QUERY(admin, "SET mcp-catalog_path='mcp_catalog.db'"); + MYSQL_QUERY(admin, "SAVE MCP VARIABLES TO DISK"); + ok(1, "Restored default values and saved to disk"); + + return test_num; +} + +/** + * @brief Test CHECKSUM commands for MCP variables + * + * Tests all CHECKSUM variants to ensure they work correctly. + */ +int test_checksum_commands(MYSQL* admin) { + int test_num = 0; + + diag("=== Part 4: Testing CHECKSUM commands ==="); + diag("Testing CHECKSUM commands for MCP variables"); + + // Test 1: CHECKSUM DISK MCP VARIABLES + diag("Test 1: Executing 'CHECKSUM DISK MCP VARIABLES'"); + int rc1 = mysql_query(admin, "CHECKSUM DISK MCP VARIABLES"); + diag("Test 1: Query returned with rc=%d", rc1); + ok(rc1 == 0, "CHECKSUM DISK MCP VARIABLES"); + if (rc1 == 0) { + MYSQL_RES* res = mysql_store_result(admin); + int num_rows = mysql_num_rows(res); + diag("Test 1: Result has %d row(s)", num_rows); + ok(num_rows == 1, "CHECKSUM DISK MCP VARIABLES returns 1 row"); + mysql_free_result(res); + } else { + diag("Test 1: Query failed with error: %s", mysql_error(admin)); + skip(1, "Skipping row count check due to error"); + } + + // Test 2: CHECKSUM MEM MCP VARIABLES + diag("Test 2: Executing 'CHECKSUM MEM MCP VARIABLES'"); + int rc2 = mysql_query(admin, "CHECKSUM MEM MCP VARIABLES"); + diag("Test 2: Query returned with rc=%d", rc2); + ok(rc2 == 0, "CHECKSUM MEM MCP VARIABLES"); + if (rc2 == 0) { + MYSQL_RES* res = mysql_store_result(admin); + int num_rows = mysql_num_rows(res); + diag("Test 2: Result has %d row(s)", num_rows); + ok(num_rows == 1, "CHECKSUM MEM MCP VARIABLES returns 1 row"); + mysql_free_result(res); + } else { + diag("Test 2: Query failed with error: %s", mysql_error(admin)); + skip(1, "Skipping row count check due to error"); + } + + // Test 3: CHECKSUM MEMORY MCP VARIABLES (alias for MEM) + diag("Test 3: Executing 'CHECKSUM MEMORY MCP VARIABLES' (alias for MEM)"); + int rc3 = mysql_query(admin, "CHECKSUM MEMORY MCP VARIABLES"); + diag("Test 3: Query returned with rc=%d", rc3); + ok(rc3 == 0, "CHECKSUM MEMORY MCP VARIABLES"); + if (rc3 == 0) { + MYSQL_RES* res = mysql_store_result(admin); + int num_rows = mysql_num_rows(res); + diag("Test 3: Result has %d row(s)", num_rows); + ok(num_rows == 1, "CHECKSUM MEMORY MCP VARIABLES returns 1 row"); + mysql_free_result(res); + } else { + diag("Test 3: Query failed with error: %s", mysql_error(admin)); + skip(1, "Skipping row count check due to error"); + } + + // Test 4: CHECKSUM MCP VARIABLES (defaults to DISK) + diag("Test 4: Executing 'CHECKSUM MCP VARIABLES' (defaults to DISK)"); + int rc4 = mysql_query(admin, "CHECKSUM MCP VARIABLES"); + diag("Test 4: Query returned with rc=%d", rc4); + ok(rc4 == 0, "CHECKSUM MCP VARIABLES"); + if (rc4 == 0) { + MYSQL_RES* res = mysql_store_result(admin); + int num_rows = mysql_num_rows(res); + diag("Test 4: Result has %d row(s)", num_rows); + ok(num_rows == 1, "CHECKSUM MCP VARIABLES returns 1 row"); + mysql_free_result(res); + } else { + diag("Test 4: Query failed with error: %s", mysql_error(admin)); + skip(1, "Skipping row count check due to error"); + } + + return test_num; +} + +/** + * @brief Main test function + * + * Orchestrates all MCP module tests. + */ +int main() { + CommandLine cl; + + if (cl.getEnv()) { + diag("Failed to get the required environmental variables."); + return EXIT_FAILURE; + } + + // Initialize connection to admin interface + MYSQL* admin = mysql_init(NULL); + if (!admin) { + fprintf(stderr, "File %s, line %d, Error: mysql_init failed\n", __FILE__, __LINE__); + return EXIT_FAILURE; + } + + if (!mysql_real_connect(admin, cl.host, cl.admin_username, cl.admin_password, + NULL, cl.admin_port, NULL, 0)) { + fprintf(stderr, "File %s, line %d, Error: %s\n", __FILE__, __LINE__, mysql_error(admin)); + return EXIT_FAILURE; + } + + diag("Connected to ProxySQL admin interface at %s:%d", cl.host, cl.admin_port); + + // Build the list of LOAD/SAVE commands to test + std::vector queries; + add_mcp_load_save_commands(queries); + + // Each command test = 2 tests (execution + optional result check) + // LOAD/SAVE commands: 14 commands + // Variable access tests: 8 tests + // Persistence tests: 8 tests + // CHECKSUM tests: 8 tests (4 commands × 2) + int num_load_save_tests = (int)queries.size() * 2; // Each command + result check + int total_tests = num_load_save_tests + 8 + 8 + 8; + + plan(total_tests); + + int test_count = 0; + + // ============================================================================ + // Part 1: Test LOAD/SAVE commands + // ============================================================================ + diag("=== Part 1: Testing LOAD/SAVE MCP VARIABLES commands ==="); + for (const auto& query : queries) { + MYSQL* admin_local = mysql_init(NULL); + if (!admin_local) { + diag("Failed to initialize MySQL connection"); + continue; + } + + if (!mysql_real_connect(admin_local, cl.host, cl.admin_username, cl.admin_password, + NULL, cl.admin_port, NULL, 0)) { + diag("Failed to connect to admin interface"); + mysql_close(admin_local); + continue; + } + + int rc = run_q(admin_local, query.c_str()); + ok(rc == 0, "Command executed successfully: %s", query.c_str()); + + // For SELECT/SHOW/CHECKSUM style commands, verify result set + if (strncasecmp(query.c_str(), "SELECT ", 7) == 0 || + strncasecmp(query.c_str(), "SHOW ", 5) == 0 || + strncasecmp(query.c_str(), "CHECKSUM ", 9) == 0) { + MYSQL_RES* res = mysql_store_result(admin_local); + unsigned long long num_rows = mysql_num_rows(res); + ok(num_rows != 0, "Command returned rows: %s", query.c_str()); + mysql_free_result(res); + } else { + // For non-query commands, just mark the test as passed + ok(1, "Command completed: %s", query.c_str()); + } + + mysql_close(admin_local); + } + + // ============================================================================ + // Part 2: Test variable access (SET and SELECT) + // ============================================================================ + diag("=== Part 2: Testing variable access (SET and SELECT) ==="); + test_count += test_variable_access(admin); + + // ============================================================================ + // Part 3: Test variable persistence across layers + // ============================================================================ + diag("=== Part 3: Testing variable persistence across storage layers ==="); + test_count += test_variable_persistence(admin); + + // ============================================================================ + // Part 4: Test CHECKSUM commands + // ============================================================================ + diag("=== Part 4: Testing CHECKSUM commands ==="); + test_count += test_checksum_commands(admin); + + // ============================================================================ + // Cleanup + // ============================================================================ + mysql_close(admin); + + diag("=== All MCP module tests completed ==="); + + return exit_status(); +}