Custom Providers

You can add support for any LLM backend by implementing the StreamingModelProvider trait.

The trait

use llm::{StreamingModelProvider, LlmResponseStream, Context, LlmModel};

pub trait StreamingModelProvider: Send + Sync {
    fn stream_response(&self, context: &Context) -> LlmResponseStream;
    fn display_name(&self) -> String;
    fn context_window(&self) -> Option<u32>;
    fn model(&self) -> Option<LlmModel> { None }
}

Minimal implementation

use std::pin::Pin;
use futures::stream;
use llm::{
    StreamingModelProvider, LlmResponseStream, LlmResponse,
    Context, LlmModel,
};

pub struct MyProvider {
    api_key: String,
    model: String,
}

impl MyProvider {
    pub fn new(api_key: String, model: String) -> Self {
        Self { api_key, model }
    }
}

impl StreamingModelProvider for MyProvider {
    fn stream_response(&self, context: &Context) -> LlmResponseStream {
        let messages = context.messages().clone();
        let tools = context.tools().clone();
        let api_key = self.api_key.clone();

        Box::pin(async_stream::stream! {
            // 1. Signal response start
            yield Ok(LlmResponse::start("msg-001"));

            // 2. Call your API, stream chunks
            let response = call_my_api(&api_key, &messages, &tools).await;
            for chunk in response.chunks() {
                yield Ok(LlmResponse::text(&chunk));
            }

            // 3. Report usage
            yield Ok(LlmResponse::usage(input_tokens, output_tokens));

            // 4. Signal completion
            yield Ok(LlmResponse::done());
        })
    }

    fn display_name(&self) -> String {
        format!("my-provider:{}", self.model)
    }

    fn context_window(&self) -> Option<u32> {
        Some(128_000)
    }
}

Response protocol

Your stream_response must emit events in this order:

LlmResponse::start(message_id) — once at the beginning
Content chunks — any mix of:
- LlmResponse::text(chunk) — text output
- LlmResponse::reasoning(chunk) — extended thinking
- LlmResponse::tool_request_start(id, name) → tool_request_arg(id, chunk) → tool_request_complete(tool_call) — tool calls
LlmResponse::usage(input, output) — token counts (optional but recommended)
LlmResponse::done() or LlmResponse::done_with_stop_reason(reason) — must be last

If an error occurs mid-stream, yield LlmResponse::Error { message }.

Tool call streaming

When the model wants to call a tool:

// Signal the tool call is starting
yield Ok(LlmResponse::tool_request_start("call-1", "read_file"));

// Stream the JSON arguments in chunks
yield Ok(LlmResponse::tool_request_arg("call-1", "{\"path\":"));
yield Ok(LlmResponse::tool_request_arg("call-1", "\"src/main.rs\"}"));

// Signal the tool call is complete
yield Ok(LlmResponse::tool_request_complete(
    "call-1", "read_file", "{\"path\":\"src/main.rs\"}"
));

The agent runtime handles dispatching the tool call to MCP servers and feeding the result back.

ProviderFactory

If you want your provider to be loadable from environment variables:

use llm::ProviderFactory;

impl ProviderFactory for MyProvider {
    async fn from_env() -> llm::Result<Self> {
        let api_key = std::env::var("MY_API_KEY")
            .map_err(|_| LlmError::MissingApiKey("MY_API_KEY".into()))?;
        Ok(Self::new(api_key, String::new()))
    }

    fn with_model(mut self, model: &str) -> Self {
        self.model = model.to_string();
        self
    }
}

Using with an agent

use aether_core::core::agent;

let provider = MyProvider::new(api_key, "my-model-v2".into());

let (tx, rx, handle) = agent(provider)
    .system_prompt(Prompt::text("Hello"))
    .spawn()
    .await?;

The agent() function accepts any impl StreamingModelProvider + 'static.