Skip to content
Theme:

Custom Providers

You can add support for any LLM backend by implementing the StreamingModelProvider trait.

use llm::{StreamingModelProvider, LlmResponseStream, Context, LlmModel};
pub trait StreamingModelProvider: Send + Sync {
fn stream_response(&self, context: &Context) -> LlmResponseStream;
fn display_name(&self) -> String;
fn context_window(&self) -> Option<u32>;
fn model(&self) -> Option<LlmModel> { None }
}
use std::pin::Pin;
use futures::stream;
use llm::{
StreamingModelProvider, LlmResponseStream, LlmResponse,
Context, LlmModel,
};
pub struct MyProvider {
api_key: String,
model: String,
}
impl MyProvider {
pub fn new(api_key: String, model: String) -> Self {
Self { api_key, model }
}
}
impl StreamingModelProvider for MyProvider {
fn stream_response(&self, context: &Context) -> LlmResponseStream {
let messages = context.messages().clone();
let tools = context.tools().clone();
let api_key = self.api_key.clone();
Box::pin(async_stream::stream! {
// 1. Signal response start
yield Ok(LlmResponse::start("msg-001"));
// 2. Call your API, stream chunks
let response = call_my_api(&api_key, &messages, &tools).await;
for chunk in response.chunks() {
yield Ok(LlmResponse::text(&chunk));
}
// 3. Report usage
yield Ok(LlmResponse::usage(input_tokens, output_tokens));
// 4. Signal completion
yield Ok(LlmResponse::done());
})
}
fn display_name(&self) -> String {
format!("my-provider:{}", self.model)
}
fn context_window(&self) -> Option<u32> {
Some(128_000)
}
}

Your stream_response must emit events in this order:

  1. LlmResponse::start(message_id) — once at the beginning
  2. Content chunks — any mix of:
    • LlmResponse::text(chunk) — text output
    • LlmResponse::reasoning(chunk) — extended thinking
    • LlmResponse::tool_request_start(id, name)tool_request_arg(id, chunk)tool_request_complete(tool_call) — tool calls
  3. LlmResponse::usage(input, output) — token counts (optional but recommended)
  4. LlmResponse::done() or LlmResponse::done_with_stop_reason(reason) — must be last

If an error occurs mid-stream, yield LlmResponse::Error { message }.

When the model wants to call a tool:

// Signal the tool call is starting
yield Ok(LlmResponse::tool_request_start("call-1", "read_file"));
// Stream the JSON arguments in chunks
yield Ok(LlmResponse::tool_request_arg("call-1", "{\"path\":"));
yield Ok(LlmResponse::tool_request_arg("call-1", "\"src/main.rs\"}"));
// Signal the tool call is complete
yield Ok(LlmResponse::tool_request_complete(
"call-1", "read_file", "{\"path\":\"src/main.rs\"}"
));

The agent runtime handles dispatching the tool call to MCP servers and feeding the result back.

If you want your provider to be loadable from environment variables:

use llm::ProviderFactory;
impl ProviderFactory for MyProvider {
async fn from_env() -> llm::Result<Self> {
let api_key = std::env::var("MY_API_KEY")
.map_err(|_| LlmError::MissingApiKey("MY_API_KEY".into()))?;
Ok(Self::new(api_key, String::new()))
}
fn with_model(mut self, model: &str) -> Self {
self.model = model.to_string();
self
}
}
use aether_core::core::agent;
let provider = MyProvider::new(api_key, "my-model-v2".into());
let (tx, rx, handle) = agent(provider)
.system_prompt(Prompt::text("Hello"))
.spawn()
.await?;

The agent() function accepts any impl StreamingModelProvider + 'static.