建立一個SQL代理 - AI Agent 開發特訓營：短期實現智能自動化

🤖 使用 LangGraph 建構 SQL Agent：從零到生產的完整指南在現代數據驅動的世界中，讓機器理解自然語言並自動生成 SQL 查詢已經成為一個重要需求。本文將帶您從頭開始建構一個強大的 SQL Agent，它能夠接受自然語言問題，自動分析資料庫結構，生成並執行 SQL 查詢，最後提供易懂的答案。 🎯 什麼是 SQL Agent？ SQL Agent 是一個智能代理程序，能夠：接受自然語言查詢分析資料庫結構自動生成 SQL 語句執行查詢並處理錯誤提供人類可讀的回應讓我們開始實作吧！ ⚙️ 環境設置首先，我們需要安裝必要的套件： # 安裝必要的套件 !pip install -q langgraph langchain_community langchain-openai requests sqlite3 # 匯入必要的模組 import os import requests import sqlite3 from langchain_community.utilities import SQLDatabase from langchain_community.agent_toolkits import SQLDatabaseToolkit from langchain_openai import ChatOpenAI from langgraph.prebuilt import create_react_agent from langchain_core.messages import HumanMessage # 設置 OpenAI API Key os.environ["OPENAI_API_KEY"] = "your-openai-api-key-here" # 請替換為您的 API Key print("✅ 套件安裝完成！") 🗄️ 準備示例資料庫我們將使用 Chinook 資料庫，這是一個代表數位媒體商店的示例資料庫： # 下載 Chinook 示例資料庫 def download_chinook_db(): url = "https://storage.googleapis.com/benchmarks-artifacts/chinook/Chinook.db" print("📥 正在下載 Chinook 資料庫...") response = requests.get(url) if response.status_code == 200: with open("Chinook.db", "wb") as file: file.write(response.content) print("✅ 資料庫下載成功！") return True else: print(f"❌ 下載失敗，狀態碼：{response.status_code}") return False # 執行下載 if download_chinook_db(): # 建立資料庫連接 db = SQLDatabase.from_uri("sqlite:///Chinook.db") print(f"📊 資料庫方言：{db.dialect}") print(f"📋 可用表格：{db.get_usable_table_names()}") # 查看示例資料 sample_query = "SELECT * FROM Artist LIMIT 5;" result = db.run(sample_query) print(f"🎵 藝術家表格示例：\n{result}") 🔧 初始化 LLM 和工具接下來，我們初始化語言模型和資料庫互動工具： # 初始化語言模型 def initialize_llm(): """初始化 ChatOpenAI 模型""" try: llm = ChatOpenAI( model="gpt-4", temperature=0, # 降低隨機性以獲得更一致的結果 ) # 測試連接 test_response = llm.invoke([HumanMessage(content="Hello!")]) print("✅ LLM 初始化成功！") return llm except Exception as e: print(f"❌ LLM 初始化失敗：{e}") return None # 建立資料庫工具包 def create_database_toolkit(db, llm): """建立 SQL 資料庫工具包""" toolkit = SQLDatabaseToolkit(db=db, llm=llm) tools = toolkit.get_tools() print("🛠️ 可用工具：") for i, tool in enumerate(tools, 1): print(f"{i}. {tool.name}: {tool.description[:100]}...") return tools # 執行初始化 llm = initialize_llm() if llm: tools = create_database_toolkit(db, llm) 🚀 建構預建 SQL Agent 讓我們首先建立一個預建的 SQL Agent 來快速開始： # 定義系統提示 def create_system_prompt(): """建立系統提示""" return f""" 你是一個專門與 SQL 資料庫互動的智能代理。給定一個輸入問題，請建立語法正確的 {db.dialect} 查詢來執行，然後查看查詢結果並返回答案。重要指示： 1. 除非用戶指定特定數量，否則請將查詢限制在最多 5 個結果 2. 可以按相關欄位排序以返回最有趣的示例 3. 永遠不要查詢表格的所有欄位，只查詢與問題相關的欄位 4. 執行查詢前務必檢查查詢的正確性 5. 如果執行查詢時出錯，請重寫查詢並重試 6. 不要執行任何 DML 語句（INSERT、UPDATE、DELETE、DROP 等） 7. 開始時應該總是查看資料庫中的表格以了解可以查詢什麼 8. 然後查詢最相關表格的 schema 記住：安全第一，準確第二！ """ # 建立預建 Agent def create_prebuilt_agent(llm, tools): """建立預建的 React Agent""" system_prompt = create_system_prompt() agent = create_react_agent( llm, tools, prompt=system_prompt, ) print("🤖 預建 SQL Agent 建立成功！") return agent # 執行 Agent 建立 if llm and tools: prebuilt_agent = create_prebuilt_agent(llm, tools) 🧪 測試預建 Agent 現在讓我們測試我們的預建 Agent： # 定義測試函數 def test_sql_agent(agent, question): """測試 SQL Agent 的功能""" print(f"❓ 問題：{question}") print("=" * 50) try: # 執行查詢 messages = [{"role": "user", "content": question}] # 串流執行並顯示過程 step_count = 0 for step in agent.stream({"messages": messages}, stream_mode="values"): step_count += 1 last_message = step["messages"][-1] print(f"\n📍 步驟 {step_count}:") if hasattr(last_message, 'content'): if last_message.content: print(f"💬 回應：{last_message.content[:200]}...") if hasattr(last_message, 'tool_calls') and last_message.tool_calls: for tool_call in last_message.tool_calls: print(f"🔧 工具呼叫：{tool_call.get('name', 'Unknown')}") print("\n" + "=" * 50) print("✅ 查詢完成！") except Exception as e: print(f"❌ 執行錯誤：{e}") # 測試不同類型的問題 test_questions = [ "哪個銷售代理在 2009 年的銷售額最高？", "列出前 3 名最受歡迎的音樂類型", "哪個國家的客戶最多？" ] # 執行測試 if 'prebuilt_agent' in locals(): for question in test_questions: test_sql_agent(prebuilt_agent, question) print("\n" + "🔄" * 20 + "\n") 🎨 建構自定義 SQL Agent 雖然預建 Agent 很方便，但自定義 Agent 能提供更精確的控制。讓我們建構一個： from typing import Literal from langchain_core.messages import AIMessage from langchain_core.runnables import RunnableConfig from langgraph.graph import END, START, MessagesState, StateGraph from langgraph.prebuilt import ToolNode # 取得特定工具 def setup_custom_tools(tools): """設置自定義工具""" tool_dict = {tool.name: tool for tool in tools} get_schema_tool = tool_dict["sql_db_schema"] get_schema_node = ToolNode([get_schema_tool], name="get_schema") run_query_tool = tool_dict["sql_db_query"] run_query_node = ToolNode([run_query_tool], name="run_query") list_tables_tool = tool_dict["sql_db_list_tables"] query_checker_tool = tool_dict["sql_db_query_checker"] return { 'get_schema_tool': get_schema_tool, 'get_schema_node': get_schema_node, 'run_query_tool': run_query_tool, 'run_query_node': run_query_node, 'list_tables_tool': list_tables_tool, 'query_checker_tool': query_checker_tool } # 建立專用節點函數 def create_custom_nodes(llm, custom_tools): """建立自定義節點""" # 1. 列出表格節點 def list_tables(state: MessagesState): tool_call = { "name": "sql_db_list_tables", "args": {}, "id": "list_tables_call", "type": "tool_call", } tool_call_message = AIMessage(content="", tool_calls=[tool_call]) tool_message = custom_tools['list_tables_tool'].invoke(tool_call) response = AIMessage(f"📋 可用表格：{tool_message.content}") return {"messages": [tool_call_message, tool_message, response]} # 2. 呼叫 schema 節點 def call_get_schema(state: MessagesState): llm_with_tools = llm.bind_tools([custom_tools['get_schema_tool']], tool_choice="any") response = llm_with_tools.invoke(state["messages"]) return {"messages": [response]} # 3. 生成查詢節點 def generate_query(state: MessagesState): system_message = { "role": "system", "content": f""" 你是專門設計來與 SQL 資料庫互動的代理。給定輸入問題，建立語法正確的 {db.dialect} 查詢來執行。限制查詢結果最多 5 個。只查詢與問題相關的欄位。不要執行任何 DML 語句。 """ } llm_with_tools = llm.bind_tools([custom_tools['run_query_tool']]) response = llm_with_tools.invoke([system_message] + state["messages"]) return {"messages": [response]} # 4. 檢查查詢節點 def check_query(state: MessagesState): system_message = { "role": "system", "content": f""" 你是一個細心的 SQL 專家。檢查 {db.dialect} 查詢的常見錯誤，包括： - NULL 值的 NOT IN 使用 - 應該使用 UNION ALL 而非 UNION - 排他性範圍的 BETWEEN 使用 - 謂詞中的資料類型不匹配 - 標識符的正確引用 - 函數參數數量正確 - 正確的資料類型轉換 - 正確的 JOIN 欄位如果有錯誤，重寫查詢。如果沒有錯誤，重現原始查詢。 """ } tool_call = state["messages"][-1].tool_calls[0] user_message = {"role": "user", "content": tool_call["args"]["query"]} llm_with_tools = llm.bind_tools([custom_tools['run_query_tool']], tool_choice="any") response = llm_with_tools.invoke([system_message, user_message]) response.id = state["messages"][-1].id return {"messages": [response]} return list_tables, call_get_schema, generate_query, check_query # 設置自定義工具和節點 if 'tools' in locals(): custom_tools = setup_custom_tools(tools) list_tables, call_get_schema, generate_query, check_query = create_custom_nodes(llm, custom_tools) print("✅ 自定義節點建立完成！") 🤖 SQL Agent 自定義工作流程詳解 📊 節點功能說明 1. 🚀 START → list_tables 觸發: 用戶輸入自然語言問題功能: 自動列出資料庫中所有可用的表格輸出: 表格列表 (如: Album, Artist, Customer, Employee...) 2. 📋 list_tables → call_get_schema 功能: 強制 LLM 呼叫 schema 工具策略: 使用 tool_choice="any" 確保工具被呼叫目的: 為下一步獲取詳細的表格結構做準備 3. 🔧 call_get_schema → get_schema 功能: 實際執行 schema 查詢工具輸入: 相關表格名稱列表輸出: 詳細的表格結構、欄位類型、樣本資料 4. 📊 get_schema → generate_query 功能: 基於問題和 schema 資訊生成 SQL 查詢考量因素: 用戶的自然語言問題表格結構和關聯性資料類型匹配查詢優化 5. ⚡ generate_query → should_continue (條件分支) 判斷邏輯:if not last_message.tool_calls: return END # 沒有生成 SQL，直接結束 else: return "check_query" # 有 SQL 查詢，進入檢查階段 6. 🤔 should_continue 分支點路徑 A: END (結束) 條件: LLM 沒有生成工具呼叫情況: 問題太模糊無法生成查詢 LLM 直接提供答案而無需查詢資料庫發生錯誤導致無法繼續路徑 B: check_query (檢查查詢) 條件: LLM 生成了 SQL 查詢工具呼叫進入: 查詢驗證和優化流程 7. ✅ check_query → run_query 功能: SQL 查詢安全性和正確性檢查檢查項目: 語法錯誤安全性問題 (防止 SQL 注入) 效能考量 (避免過於複雜的查詢) 邏輯正確性輸出: 修正後的 SQL 查詢 8. 🚀 run_query → generate_query (回環) 功能: 執行 SQL 查詢並處理結果可能結果: ✅ 成功: 返回查詢結果，流程結束 ❌ 失敗: 回到 generate_query 重新生成查詢 🔄 工作流程特點 🎯 確定性執行順序與預建 Agent 不同，自定義 Agent 強制執行特定的步驟順序，確保：必定先查看資料庫結構必定獲取相關表格的 schema 必定檢查生成的查詢具備錯誤重試機制 🛡️ 安全性保障強制查詢檢查: 每個 SQL 查詢都必須通過安全檢查結構化驗證: 按步驟驗證每個階段的輸出錯誤隔離: 錯誤發生在特定節點，便於除錯 ⚡ 效能優化智能分支: 只在需要時執行查詢檢查快速結束: 無需查詢時直接結束，節省資源重試機制: 查詢失敗時自動重新生成 🔧 可擴展性每個節點都是獨立的功能單元，可以輕鬆：添加新的處理步驟修改特定節點的邏輯插入額外的驗證或處理步驟 📈 與預建 Agent 的比較特性預建 Agent 自定義 Agent 控制精度依賴提示強制執行順序錯誤處理通用處理節點特定處理可維護性較低高 (模組化) 除錯能力困難容易 (節點級別) 客製化有限完全可控開發複雜度低中等 🎨 自定義建議您可以根據需求增加額外節點： # 範例：添加結果格式化節點 builder.add_node("format_results", format_results) builder.add_edge("run_query", "format_results") builder.add_edge("format_results", END) # 範例：添加查詢快取節點 builder.add_node("check_cache", check_cache) builder.add_edge("generate_query", "check_cache") builder.add_conditional_edges("check_cache", cache_decision) 這種結構化的方法讓您對 SQL Agent 的每個步驟都有完全的控制權！ 🔗 組裝自定義 Agent 圖結構現在讓我們組裝完整的圖結構： # 建立條件邊函數 def should_continue(state: MessagesState) -> Literal[END, "check_query"]: """決定是否繼續到查詢檢查階段""" messages = state["messages"] last_message = messages[-1] # 如果沒有工具呼叫，結束流程 if not last_message.tool_calls: return END else: return "check_query" # 建構狀態圖 def build_custom_agent(): """建構自定義 SQL Agent""" builder = StateGraph(MessagesState) # 新增節點 builder.add_node("list_tables", list_tables) builder.add_node("call_get_schema", call_get_schema) builder.add_node("get_schema", custom_tools['get_schema_node']) builder.add_node("generate_query", generate_query) builder.add_node("check_query", check_query) builder.add_node("run_query", custom_tools['run_query_node']) # 定義邊的連接 builder.add_edge(START, "list_tables") builder.add_edge("list_tables", "call_get_schema") builder.add_edge("call_get_schema", "get_schema") builder.add_edge("get_schema", "generate_query") builder.add_conditional_edges("generate_query", should_continue) builder.add_edge("check_query", "run_query") builder.add_edge("run_query", "generate_query") # 編譯圖 agent = builder.compile() print("🎯 自定義 SQL Agent 建構完成！") return agent # 建立自定義 Agent if 'custom_tools' in locals(): custom_agent = build_custom_agent() 🧪 比較兩種 Agent 的效能讓我們比較預建和自定義 Agent 的表現： import time def compare_agents(question): """比較兩種 Agent 的效能""" print(f"🔬 比較測試：{question}") print("=" * 60) agents = { "預建 Agent": prebuilt_agent, "自定義 Agent": custom_agent } results = {} for agent_name, agent in agents.items(): print(f"\n🤖 測試 {agent_name}：") print("-" * 30) start_time = time.time() try: messages = [{"role": "user", "content": question}] # 只取最後一個回應 final_response = None step_count = 0 for step in agent.stream({"messages": messages}, stream_mode="values"): step_count += 1 final_response = step["messages"][-1] end_time = time.time() duration = end_time - start_time results[agent_name] = { 'response': final_response.content if hasattr(final_response, 'content') else "無回應", 'duration': duration, 'steps': step_count, 'success': True } print(f"⏱️ 執行時間：{duration:.2f} 秒") print(f"📊 執行步驟：{step_count} 步") print(f"💬 最終回應：{results[agent_name]['response'][:100]}...") except Exception as e: end_time = time.time() duration = end_time - start_time results[agent_name] = { 'response': f"錯誤：{str(e)}", 'duration': duration, 'steps': 0, 'success': False } print(f"❌ 執行失敗：{e}") # 顯示比較結果 print(f"\n📈 比較結果：") print("-" * 30) for agent_name, result in results.items(): status = "✅" if result['success'] else "❌" print(f"{status} {agent_name}: {result['duration']:.2f}秒, {result['steps']}步驟") return results # 執行比較測試 if 'prebuilt_agent' in locals() and 'custom_agent' in locals(): comparison_results = compare_agents("列出銷售額前 3 名的藝術家") 📊 進階功能：錯誤處理和監控讓我們加入錯誤處理和監控功能： import logging from functools import wraps # 設置日誌 def setup_logging(): """設置 SQL Agent 日誌""" logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger('sql_agent') return logger # 效能監控裝飾器 def monitor_performance(func): """監控 Agent 效能的裝飾器""" @wraps(func) def wrapper(*args, **kwargs): start_time = time.time() try: result = func(*args, **kwargs) success = True error = None except Exception as e: result = None success = False error = str(e) logger.error(f"Function {func.__name__} failed: {error}") end_time = time.time() duration = end_time - start_time # 記錄效能指標 logger.info(f"Function: {func.__name__}, Duration: {duration:.2f}s, Success: {success}") if not success: raise Exception(error) return result return wrapper # 建立強化版 Agent 測試函數 @monitor_performance def enhanced_agent_test(agent, question): """強化版 Agent 測試""" messages = [{"role": "user", "content": question}] responses = [] for step in agent.stream({"messages": messages}, stream_mode="values"): responses.append(step["messages"][-1]) return responses[-1] if responses else None # 初始化日誌 logger = setup_logging() # 測試強化版功能 if 'custom_agent' in locals(): print("🔧 測試強化版功能：") test_questions = [ "哪個員工負責最多客戶？", "2009年每月的銷售趨勢如何？", "最受歡迎的音樂專輯是什麼？" ] for question in test_questions: try: print(f"\n❓ 問題：{question}") result = enhanced_agent_test(custom_agent, question) print(f"✅ 回應：{result.content[:100] if hasattr(result, 'content') else 'No content'}...") except Exception as e: print(f"❌ 錯誤：{e}") 🚀 部署就緒的 Agent 類別最後，讓我們建立一個生產就緒的 Agent 類別： class ProductionSQLAgent: """生產就緒的 SQL Agent 類別""" def __init__(self, database_uri, openai_api_key, model="gpt-4"): """初始化 SQL Agent""" self.db = SQLDatabase.from_uri(database_uri) self.llm = ChatOpenAI( api_key=openai_api_key, model=model, temperature=0 ) # 建立工具包 toolkit = SQLDatabaseToolkit(db=self.db, llm=self.llm) self.tools = toolkit.get_tools() # 建立 Agent self.agent = self._build_agent() # 設置監控 self.logger = logging.getLogger(f'sql_agent_{id(self)}') self.query_count = 0 self.total_time = 0 def _build_agent(self): """建構 Agent""" system_prompt = f""" 你是一個專業的 SQL 資料庫分析師。資料庫類型：{self.db.dialect} 可用表格：{', '.join(self.db.get_usable_table_names())} 職責： 1. 理解自然語言問題 2. 分析資料庫結構 3. 生成安全的 SQL 查詢 4. 提供清晰的答案安全原則： - 永遠不執行 DML 語句 - 限制結果數量 - 檢查查詢安全性 """ return create_react_agent( self.llm, self.tools, prompt=system_prompt ) @monitor_performance def query(self, question: str, max_results: int = 5): """執行查詢""" self.query_count += 1 start_time = time.time() try: # 加入結果限制到問題中 enhanced_question = f"{question} (限制結果數量為 {max_results})" messages = [{"role": "user", "content": enhanced_question}] # 執行查詢 final_response = None for step in self.agent.stream({"messages": messages}, stream_mode="values"): final_response = step["messages"][-1] end_time = time.time() duration = end_time - start_time self.total_time += duration response_content = final_response.content if hasattr(final_response, 'content') else "無法取得回應" self.logger.info(f"Query successful: {question[:50]}... | Duration: {duration:.2f}s") return { 'success': True, 'response': response_content, 'duration': duration, 'query_number': self.query_count } except Exception as e: self.logger.error(f"Query failed: {question[:50]}... | Error: {str(e)}") return { 'success': False, 'error': str(e), 'duration': time.time() - start_time, 'query_number': self.query_count } def get_stats(self): """取得統計資訊""" avg_time = self.total_time / self.query_count if self.query_count > 0 else 0 return { 'total_queries': self.query_count, 'total_time': self.total_time, 'average_time': avg_time, 'available_tables': self.db.get_usable_table_names() } # 建立生產級 Agent def create_production_agent(): """建立生產級 SQL Agent""" try: production_agent = ProductionSQLAgent( database_uri="sqlite:///Chinook.db", openai_api_key=os.environ.get("OPENAI_API_KEY", "your-api-key-here") ) print("🏭 生產級 SQL Agent 建立成功！") return production_agent except Exception as e: print(f"❌ 建立失敗：{e}") return None # 測試生產級 Agent if os.environ.get("OPENAI_API_KEY"): prod_agent = create_production_agent() if prod_agent: # 執行測試查詢 test_queries = [ "顯示銷售額最高的 3 個國家", "哪些藝術家有超過 10 張專輯？", "2008 年和 2009 年的銷售額比較" ] print("\n🧪 生產級 Agent 測試：") print("=" * 50) for query in test_queries: print(f"\n❓ 查詢：{query}") result = prod_agent.query(query) if result['success']: print(f"✅ 成功 ({result['duration']:.2f}s)") print(f"📝 回應：{result['response'][:150]}...") else: print(f"❌ 失敗：{result['error']}") # 顯示統計 stats = prod_agent.get_stats() print(f"\n📊 統計資訊：") print(f"總查詢數：{stats['total_queries']}") print(f"平均時間：{stats['average_time']:.2f}秒") print(f"可用表格：{len(stats['available_tables'])} 個") 🎯 總結和最佳實踐通過本教學，我們從零開始建構了一個完整的 SQL Agent 系統。以下是關鍵要點： # 最佳實踐檢查清單 def best_practices_checklist(): """SQL Agent 最佳實踐檢查清單""" practices = { "安全性": [ "✅ 限制資料庫權限", "✅ 禁止 DML 操作", "✅ 輸入驗證和清理", "✅ 查詢複雜度限制", "✅ 錯誤訊息過濾" ], "效能": [ "✅ 結果數量限制", "✅ 查詢超時設定", "✅ 連接池管理", "✅ 快取策略", "✅ 監控和日誌" ], "可靠性": [ "✅ 錯誤處理機制", "✅ 重試邏輯", "✅ 查詢驗證", "✅ 結果格式化", "✅ 健康檢查" ], "可維護性": [ "✅ 模組化設計", "✅ 配置管理", "✅ 版本控制", "✅ 文件完整", "✅ 測試覆蓋" ] } print("📋 SQL Agent 最佳實踐檢查清單：") print("=" * 40) for category, items in practices.items(): print(f"\n🏷️ {category}：") for item in items: print(f" {item}") return practices # 顯示檢查清單 best_practices_checklist() print(""" 🎉 恭喜！您已經成功建構了一個完整的 SQL Agent 系統！ 🔜 下一步建議： 1. 部署到雲端平台（AWS、GCP、Azure） 2. 整合到現有應用程式中 3. 添加更多安全性措施 4. 實施 A/B 測試 5. 收集用戶反饋並持續改進記住：一個好的 AI 系統需要持續的監控、測試和優化。祝您的 SQL Agent 專案成功！🚀 """) 🔗 額外資源 # 有用的資源連結 resources = { "官方文件": [ "https://python.langchain.com/docs/tutorials/sql_qa", "https://langchain-ai.github.io/langgraph/", "https://docs.anthropic.com/claude/docs" ], "範例專案": [ "https://github.com/langchain-ai/langgraph/tree/main/examples", "https://github.com/langchain-ai/langchain/tree/main/templates" ], "學習資源": [ "LangChain SQL Tutorial", "LangGraph Documentation", "SQL Database Best Practices" ] } print("📚 更多學習資源：") for category, links in resources.items(): print(f"\n🔗 {category}：") for link in links: print(f" • {link}") 🛠️ 疑難排解指南在使用過程中可能遇到的常見問題和解決方案： class TroubleshootingGuide: """SQL Agent 疑難排解指南""" @staticmethod def common_issues(): """常見問題和解決方案""" issues = { "連接問題": { "症狀": "無法連接到資料庫", "可能原因": ["API Key 錯誤", "網路問題", "資料庫路徑錯誤"], "解決方案": [ "檢查 OpenAI API Key 是否正確設定", "確認資料庫檔案存在", "檢查網路連接", "驗證資料庫 URI 格式" ] }, "查詢錯誤": { "症狀": "SQL 查詢執行失敗", "可能原因": ["語法錯誤", "欄位名錯誤", "表格不存在"], "解決方案": [ "檢查資料庫 schema", "驗證表格名稱", "測試簡單查詢", "檢查資料類型匹配" ] }, "效能問題": { "症狀": "回應時間過長", "可能原因": ["複雜查詢", "大量資料", "模型回應慢"], "解決方案": [ "限制結果數量", "優化查詢邏輯", "使用索引", "考慮快取策略" ] }, "記憶體問題": { "症狀": "記憶體不足錯誤", "可能原因": ["結果集太大", "模型佔用過多記憶體"], "解決方案": [ "增加 LIMIT 限制", "分批處理資料", "清理不需要的變數", "考慮使用串流處理" ] } } print("🔧 SQL Agent 疑難排解指南：") print("=" * 50) for issue_type, details in issues.items(): print(f"\n🚨 {issue_type}：") print(f" 症狀：{details['症狀']}") print(f" 可能原因：{', '.join(details['可能原因'])}") print(f" 解決方案：") for solution in details['解決方案']: print(f" • {solution}") return issues @staticmethod def debug_agent(agent, question): """除錯 Agent 執行過程""" print(f"🔍 除錯模式：{question}") print("-" * 40) try: messages = [{"role": "user", "content": question}] step_count = 0 for step in agent.stream({"messages": messages}, stream_mode="values"): step_count += 1 last_message = step["messages"][-1] print(f"\n📍 步驟 {step_count}：") print(f" 類型：{type(last_message).__name__}") if hasattr(last_message, 'content') and last_message.content: print(f" 內容：{last_message.content[:100]}...") if hasattr(last_message, 'tool_calls') and last_message.tool_calls: print(f" 工具呼叫：") for tool_call in last_message.tool_calls: print(f" • {tool_call.get('name', 'Unknown')}") if 'args' in tool_call: print(f" 參數：{str(tool_call['args'])[:100]}...") # 檢查錯誤 if hasattr(last_message, 'content') and 'error' in last_message.content.lower(): print(f" ⚠️ 可能的錯誤：{last_message.content}") print(f"\n✅ 除錯完成，總共 {step_count} 個步驟") except Exception as e: print(f"❌ 除錯過程中發生錯誤：{e}") import traceback traceback.print_exc() # 使用疑難排解指南 troubleshooter = TroubleshootingGuide() troubleshooter.common_issues() # 如果需要除錯特定問題 if 'custom_agent' in locals(): print("\n" + "="*60) troubleshooter.debug_agent(custom_agent, "測試查詢：列出所有藝術家") 🚀 進階擴展功能讓我們實作一些進階功能來增強我們的 SQL Agent： class AdvancedSQLAgent(ProductionSQLAgent): """進階 SQL Agent 擴展功能""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.query_history = [] self.performance_metrics = { 'successful_queries': 0, 'failed_queries': 0, 'total_execution_time': 0, 'average_response_time': 0 } def explain_query(self, question: str): """解釋查詢過程""" print(f"🔍 查詢解釋：{question}") print("-" * 40) # 模擬查詢規劃 print("📋 查詢規劃階段：") print(" 1. 分析自然語言問題") print(" 2. 識別相關資料表") print(" 3. 確定所需欄位") print(" 4. 規劃 JOIN 策略") print(" 5. 設定過濾條件") # 執行查詢並記錄過程 result = self.query(question) if result['success']: print(f"\n✅ 查詢成功完成") print(f"⏱️ 執行時間：{result['duration']:.2f} 秒") else: print(f"\n❌ 查詢失敗：{result.get('error', 'Unknown error')}") return result def batch_query(self, questions: list): """批次處理多個查詢""" print(f"📦 批次查詢：{len(questions)} 個問題") print("=" * 50) results = [] total_start_time = time.time() for i, question in enumerate(questions, 1): print(f"\n🔄 處理查詢 {i}/{len(questions)}：{question[:50]}...") result = self.query(question) results.append({ 'question': question, 'result': result }) status = "✅" if result['success'] else "❌" print(f"{status} 完成 ({result['duration']:.2f}s)") total_time = time.time() - total_start_time success_count = sum(1 for r in results if r['result']['success']) print(f"\n📊 批次處理結果：") print(f" 總查詢數：{len(questions)}") print(f" 成功數：{success_count}") print(f" 成功率：{success_count/len(questions)*100:.1f}%") print(f" 總時間：{total_time:.2f} 秒") print(f" 平均時間：{total_time/len(questions):.2f} 秒/查詢") return results def suggest_optimizations(self): """建議查詢優化""" print("💡 查詢優化建議：") print("-" * 30) suggestions = [ "使用適當的索引來加速查詢", "避免 SELECT * ，只選擇需要的欄位", "使用 LIMIT 限制結果數量", "考慮使用 EXISTS 而非 IN 子查詢", "避免在 WHERE 子句中使用函數", "使用 JOIN 而非多個子查詢", "考慮資料類型的匹配性" ] for i, suggestion in enumerate(suggestions, 1): print(f" {i}. {suggestion}") return suggestions def generate_report(self): """生成效能報告""" stats = self.get_stats() report = f""" 📊 SQL Agent 效能報告 {'='*40} 基本統計： • 總查詢數：{stats['total_queries']} • 總執行時間：{stats['total_time']:.2f} 秒 • 平均回應時間：{stats['average_time']:.2f} 秒資料庫資訊： • 可用表格數：{len(stats['available_tables'])} • 資料庫類型：{self.db.dialect} 效能指標： • 成功查詢：{self.performance_metrics['successful_queries']} • 失敗查詢：{self.performance_metrics['failed_queries']} • 成功率：{self.performance_metrics['successful_queries']/(self.performance_metrics['successful_queries']+self.performance_metrics['failed_queries'])*100 if (self.performance_metrics['successful_queries']+self.performance_metrics['failed_queries']) > 0 else 0:.1f}% 建議： • 監控長時間執行的查詢 • 定期檢查資料庫效能 • 考慮實施查詢快取 • 優化常用查詢路徑 """ print(report) return report # 建立進階 SQL Agent def create_advanced_agent(): """建立進階 SQL Agent""" if os.environ.get("OPENAI_API_KEY"): try: advanced_agent = AdvancedSQLAgent( database_uri="sqlite:///Chinook.db", openai_api_key=os.environ.get("OPENAI_API_KEY") ) print("🚀 進階 SQL Agent 建立成功！") return advanced_agent except Exception as e: print(f"❌ 建立失敗：{e}") return None else: print("⚠️ 請設定 OPENAI_API_KEY 環境變數") return None # 測試進階功能 advanced_agent = create_advanced_agent() if advanced_agent: print("\n🧪 進階功能測試：") # 測試查詢解釋 advanced_agent.explain_query("哪個客戶購買最多？") # 測試批次查詢 batch_questions = [ "列出前 5 名暢銷專輯", "哪個城市的客戶最多？", "2009 年最受歡迎的音樂類型" ] print("\n" + "="*60) batch_results = advanced_agent.batch_query(batch_questions) # 顯示優化建議 print("\n" + "="*60) advanced_agent.suggest_optimizations() # 生成報告 print("\n" + "="*60) advanced_agent.generate_report() 🔒 安全性增強實作額外的安全性措施： ```pythonimport reimport hashlibfrom datetime import datetime, timedelta class SecureSQLAgent(AdvancedSQLAgent): """安全增強版 SQL Agent""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.blocked_patterns = [ r'\bDROP\b', r'\bDELETE\b', r'\bINSERT\b', r'\bUPDATE\b', r'\bALTER\b', r'\bCREATE\b', r'\bTRUNCATE\b', r'\bGRANT\b', r'\bREVOKE\b', r'--', r'/\*', r'\*/', r'\bEXEC\b' ] self.query_cache = {} self.rate_limit = {} self.max_queries_per_minute = 10 def _is_query_safe(self, query: str) -> tuple[bool, str]: """檢查查詢是否安全""" query_upper = query.upper() # 檢查危險模式 for pattern in self.blocked_patterns: if re.search(pattern, query_upper, re.IGNORECASE): return False, f"查詢包含被禁止的模式: {pattern}" # 檢查查詢長度 if len(query) > 1000: return False, "查詢長度超過限制" # 檢查是否只有 SELECT 語句 if not query_upper.strip().startswith('SELECT'): return False, "只允許 SELECT 查詢" return True, "查詢安全" def _check_rate_limit(self, user_id: str = "default") -> bool: """檢查速率限制""" now = datetime.now() minute_ago = now - timedelta(minutes=1) if user_id not in self.rate_limit: self.rate_limit[user_id] = [] # 清除一分鐘前的記錄 self.rate_limit[user_id] = [ timestamp for timestamp in self.rate_limit[user_id] if timestamp > minute_ago ] # 檢查是否超過限制 if len(self.rate_limit[user_id]) >= self.max_queries_per_minute: return False # 記錄當前查詢 self.rate_limit[user_id].append(now) return True def _get_query_hash(self, question: str) -> str: """生成查詢雜湊值""" return hashlib.md5(question.encode()).hexdigest() def secure_query(self, question: str, user_id: str = "default"): """安全查詢執行""" # 檢查速率限制 if not self._check_rate_limit(user_id): return { 'success': False, 'error': '查詢頻率超過限制，請稍後再試', 'duration': 0 } # 檢查快取 query_hash = self._get_query_hash(question) if query_hash in self.query_cache: cached_result = self.query_cache[query_hash] print("📦 使用快取結果") return { **cached_result, 'from_cache': True } # 執行查詢 result = self.query(question) # 如果成功，加入快取 if result['success']: self.query_cache[query_hash] = { 'response': result['response'], 'timestamp': datetime.now(), 'success': True } return result def audit_log(self, action: str, details: dict): """記錄審計日誌""" log_entry = { 'timestamp': datetime.now().isoformat(), 'action': action, 'details': details } print(f"📋 審計日誌：{log_entry}") # 在實際應用中，這裡應該寫入到安全的日誌系統 return log_entry def security_report(self): """生成安全報告""" report = f""" 🔒 安全報告{'='*30} 快取統計： • 快取條目數：{len(self.query_cache)} • 快取命中可節省的查詢數速率限制： • 每分鐘最大查詢數：{self.max_queries_per_minute} • 目前活躍用戶：{len(self.rate_limit)} 安全檢查： • 被禁止的 SQL 模式：{len(self.blocked_patterns)} 個 • 查詢長度限制：1000 字元 • 只允許 SELECT 語句建議： • 定期審查查詢日誌 • 監控異常查詢模式 • 更新安全規則 """ print(report) return report 建立安全版 Agent def create_secure_agent(): """建立安全版 SQL Agent""" if os.environ.get("OPENAI_API_KEY"): try: secure_agent = SecureSQLAgent( database_uri="sqlite:///Chinook.db", openai_api_key=os.environ.get("OPENAI_API_KEY") ) print("🔒 安全版 SQL Agent 建立成功！") return secure_agent except Exception as e: print(f"❌ 建立失敗：{e}") return None else: print("⚠️ 請設定 OPENAI_API_KEY 環境變數") return None 測試安全功能 secure_agent = create_secure_agent() if secure_agent: print("\n🔒 安全功能測試：") # 測試正常查詢 print("\n1. 測試正常查詢：") result1 = secure_agent.secure_query("列出前 3 名客戶") print(f"結果：{'✅ 成功' if result1['success'] else '❌ 失敗'}") # 測試快取 print("\n2. 測試查詢快取：") result2 = secure_agent.secure_query("列出前 3 名客戶") # 相同查詢 if result2.get('from_cache'): print("✅ 成功使用快取") # 測試速率限制（模擬多次查詢） print("\n3. 測試速率限制：") for i in range(12): # 超過限制的查詢 result = secure_agent.secure_query(f"測試查詢 {i}") if not result['success'] and '頻率超過限制' in result['error']: print(f"✅ 速率限制在第 {i+1} 次查詢時觸發") break # 生成安全報告 print("\n" + "="*60) secure_agent.security_report() print(""" 🎉 教學完成！您已經學會了：✅ 建構基本的 SQL Agent✅ 實作自定義工作流程✅ 添加錯誤處理和監控✅ 實施安全性措施✅ 效能優化和快取✅ 批次處理和報告生成 🚀 現在您可以：部署到生產環境整合到您的應用程式根據需求進行客製化擴展更多功能記住：持續監控、測試和改進是成功的關鍵！ Happy coding! 🎯""")