[{"data":1,"prerenderedAt":221},["ShallowReactive",2],{"content-developer\u002Fdecisions\u002F007-pluggable-llm":3,"surround-\u002Fdeveloper\u002Fdecisions\u002F007-pluggable-llm":212},{"id":4,"title":5,"body":6,"description":204,"extension":205,"meta":206,"navigation":207,"path":208,"seo":209,"stem":210,"__hash__":211},"content\u002F3.developer\u002Fdecisions\u002F8.007-pluggable-llm.md","ADR-007: Pluggable LLM Provider",{"type":7,"value":8,"toc":198},"minimark",[9,26,31,35,56,59,84,88,99,109,128,139,143,148,179,184],[10,11,12,20],"ul",{},[13,14,15,19],"li",{},[16,17,18],"strong",{},"Status:"," Accepted",[13,21,22,25],{},[16,23,24],{},"Date:"," 2026-03-24",[27,28,30],"h2",{"id":29},"context","Context",[32,33,34],"p",{},"The Agent Pipeline, Knowledge Graph, and semantic file system all require LLM capabilities — text generation, structured output, embeddings, and tool calling. Owlat needs to support multiple deployment scenarios:",[36,37,38,44,50],"ol",{},[13,39,40,43],{},[16,41,42],{},"Cloud users"," who want the best available models (GPT-4o, Claude, etc.) via API keys",[13,45,46,49],{},[16,47,48],{},"Self-hosters"," who need fully offline operation with local models (via Ollama, vLLM, or similar)",[13,51,52,55],{},[16,53,54],{},"Enterprise users"," who route through internal API gateways with custom endpoints",[32,57,58],{},"The options considered:",[36,60,61,67,73],{},[13,62,63,66],{},[16,64,65],{},"Hardcode OpenAI"," — simplest, but locks out self-hosters who cannot or will not use cloud APIs",[13,68,69,72],{},[16,70,71],{},"LangChain\u002FLlamaIndex"," — heavy frameworks with large dependency trees, complex abstractions, and features Owlat does not need (chains, memory management, vector store adapters)",[13,74,75,78,79,83],{},[16,76,77],{},"Vercel AI SDK with provider abstraction"," — lightweight, already in the dependency tree (",[80,81,82],"code",{},"@ai-sdk\u002Fopenai","), provider-agnostic, supports structured output and tool calling natively",[27,85,87],{"id":86},"decision","Decision",[32,89,90,91,98],{},"Use the ",[92,93,97],"a",{"href":94,"rel":95},"https:\u002F\u002Fsdk.vercel.ai\u002F",[96],"nofollow","Vercel AI SDK"," as the LLM orchestration layer, wrapped in a thin provider abstraction configured via environment variables.",[100,101,106],"pre",{"className":102,"code":104,"language":105},[103],"language-text","LLM_PROVIDER=openai          # or: anthropic, ollama, custom\nLLM_BASE_URL=                 # for ollama: http:\u002F\u002Flocalhost:11434\u002Fv1\nLLM_API_KEY=                  # not needed for ollama\nLLM_MODEL=gpt-4o             # or: claude-sonnet-4-20250514, llama3, etc.\nLLM_EMBEDDING_MODEL=          # optional, defaults to provider's default\n","text",[80,107,104],{"__ignoreMap":108},"",[32,110,111,112,115,116,119,120,123,124,127],{},"The AI SDK's ",[80,113,114],{},"createOpenAI()"," factory accepts a ",[80,117,118],{},"baseURL"," parameter, which means ",[16,121,122],{},"any OpenAI-compatible API"," (Ollama, vLLM, LiteLLM, Azure OpenAI) works without additional provider code. For Anthropic, the AI SDK has a dedicated ",[80,125,126],{},"@ai-sdk\u002Fanthropic"," provider.",[32,129,130,131,134,135,138],{},"All LLM calls go through a single ",[80,132,133],{},"getLLMProvider()"," function in ",[80,136,137],{},"apps\u002Fapi\u002Fconvex\u002Flib\u002FllmProvider.ts"," that reads these environment variables and returns a configured provider instance.",[27,140,142],{"id":141},"consequences","Consequences",[32,144,145],{},[16,146,147],{},"Enables:",[10,149,150,153,156,162,165],{},[13,151,152],{},"Self-hosters run Ollama locally for fully offline, zero-cost AI features",[13,154,155],{},"Cloud users choose their preferred provider (OpenAI, Anthropic, or any compatible API)",[13,157,158,159],{},"Enterprise users point at internal gateways or proxy endpoints via ",[80,160,161],{},"LLM_BASE_URL",[13,163,164],{},"Single configuration surface — four environment variables control all LLM behavior",[13,166,167,168,170,171,174,175,178],{},"AI SDK is already a dependency (",[80,169,82],{}," in both ",[80,172,173],{},"apps\u002Fapi"," and ",[80,176,177],{},"apps\u002Fweb",")",[32,180,181],{},[16,182,183],{},"Trade-offs:",[10,185,186,189,192,195],{},[13,187,188],{},"Quality varies significantly between providers — local models may produce lower-quality classifications and drafts than GPT-4o or Claude",[13,190,191],{},"Embedding dimensions differ across models — vector indexes need to be configured for the chosen embedding model's dimensions",[13,193,194],{},"No built-in RAG chains — retrieval-augmented generation is implemented as explicit Convex function steps (query vector index, pass results to prompt), which is more verbose but more debuggable",[13,196,197],{},"AI SDK updates may introduce breaking changes, though the abstraction layer isolates application code",{"title":108,"searchDepth":199,"depth":199,"links":200},2,[201,202,203],{"id":29,"depth":199,"text":30},{"id":86,"depth":199,"text":87},{"id":141,"depth":199,"text":142},"Why Owlat uses the Vercel AI SDK with a provider abstraction layer instead of hardcoding a single LLM vendor.","md",{},true,"\u002Fdeveloper\u002Fdecisions\u002F007-pluggable-llm",{"title":5,"description":204},"3.developer\u002Fdecisions\u002F8.007-pluggable-llm","_DIXZ4eyFXq9D04K1XiMfPvLgc3fh1VQlrdqlR5zeY4",[213,217],{"title":214,"path":215,"stem":216,"children":-1},"ADR-006: Self-Hosted Convex","\u002Fdeveloper\u002Fdecisions\u002F006-self-hosted-convex","3.developer\u002Fdecisions\u002F7.006-self-hosted-convex",{"title":218,"path":219,"stem":220,"children":-1},"ADR-008: Agent Process Architecture","\u002Fdeveloper\u002Fdecisions\u002F008-process-architecture","3.developer\u002Fdecisions\u002F9.008-process-architecture",1774391043023]