Skip to main content

Embeddings API Reference

Complete API documentation for the Soul Kernel embeddings crate.

Module Overview

use embeddings::{
    // Core types
    EmbeddingService,
    EmbeddingConfig,
    EmbeddingProvider,
    EmbeddingCache,
    EmbeddingError,
    Result,
    
    // Factory function
    create_embedding_service,
    
    // Provider implementations
    MockEmbeddingService,
    OpenAIEmbeddingService,
};

Core Trait

EmbeddingService

The main trait that all embedding providers implement.
#[async_trait]
pub trait EmbeddingService: Send + Sync {
    /// Generate embedding vector for the given text
    async fn generate_embedding(&self, text: &str) -> Result<Vec<f32>>;
    
    /// Generate embeddings for multiple texts (batch operation)
    async fn generate_embeddings(&self, texts: &[String]) -> Result<Vec<Vec<f32>>>;
    
    /// Get the dimension of embeddings produced by this service
    fn dimension(&self) -> usize;
    
    /// Get the model name/identifier
    fn model_name(&self) -> &str;
    
    /// Check if the service supports batch operations
    fn supports_batch(&self) -> bool {
        false
    }
    
    /// Get maximum batch size (if batch is supported)
    fn max_batch_size(&self) -> Option<usize> {
        None
    }
}

Configuration

EmbeddingConfig

Configuration for embedding services.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EmbeddingConfig {
    pub provider: EmbeddingProvider,
    pub model: String,
    pub dimension: usize,
    pub cache_size: usize,
}

impl EmbeddingConfig {
    /// Create config for OpenAI
    pub fn openai(api_key: String) -> Self;
    
    /// Create config for local model
    pub fn local(model_path: PathBuf, dimension: usize) -> Self;
}

impl Default for EmbeddingConfig {
    fn default() -> Self {
        Self {
            provider: EmbeddingProvider::Mock,
            model: "mock".to_string(),
            dimension: 384,
            cache_size: 1000,
        }
    }
}

EmbeddingProvider

Supported embedding providers.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum EmbeddingProvider {
    /// Mock provider for testing
    Mock,
    
    /// OpenAI embeddings API
    OpenAI { 
        api_key: String,
    },
    
    /// Local model using Candle
    Local { 
        model_path: PathBuf,
    },
}

Caching

EmbeddingCache

LRU cache for embeddings to reduce API calls.
pub struct EmbeddingCache {
    cache: Arc<Mutex<LruCache<String, Vec<f32>>>>,
}

impl EmbeddingCache {
    /// Create a new cache with the specified capacity
    pub fn new(capacity: usize) -> Self;
    
    /// Get an embedding from the cache
    pub fn get(&self, text: &str) -> Option<Vec<f32>>;
    
    /// Store an embedding in the cache
    pub fn put(&self, text: &str, embedding: Vec<f32>);
    
    /// Clear all entries from the cache
    pub fn clear(&self);
    
    /// Get cache statistics
    pub fn stats(&self) -> CacheStats;
}

#[derive(Debug, Clone)]
pub struct CacheStats {
    pub capacity: usize,
    pub size: usize,
}

Error Types

EmbeddingError

Error types for embedding operations.
#[derive(Debug, thiserror::Error)]
pub enum EmbeddingError {
    #[error("Model not found: {0}")]
    ModelNotFound(String),
    
    #[error("API error: {0}")]
    ApiError(String),
    
    #[error("Rate limit exceeded")]
    RateLimit,
    
    #[error("Invalid input: {0}")]
    InvalidInput(String),
    
    #[error("Not supported: {0}")]
    NotSupported(String),
    
    #[error("IO error: {0}")]
    Io(#[from] std::io::Error),
}

pub type Result<T> = std::result::Result<T, EmbeddingError>;

Factory Function

create_embedding_service

Create an embedding service from configuration.
pub async fn create_embedding_service(
    config: &EmbeddingConfig
) -> Result<Box<dyn EmbeddingService>>;

Provider Implementations

MockEmbeddingService

Mock provider for testing.
pub struct MockEmbeddingService {
    dimension: usize,
}

impl MockEmbeddingService {
    pub fn new(dimension: usize) -> Self;
}
Characteristics:
  • Deterministic output based on text hash
  • Configurable dimensions (default: 384)
  • ~50μs generation time
  • Normalized vectors

OpenAIEmbeddingService

OpenAI API integration.
pub struct OpenAIEmbeddingService {
    api_key: String,
    model: String,
    dimension: usize,
    client: Arc<ureq::Agent>,
}

impl OpenAIEmbeddingService {
    pub fn new(api_key: String, model: String) -> Result<Self>;
}
Supported Models:
  • text-embedding-3-small (1536 dimensions)
  • text-embedding-3-large (3072 dimensions)
  • text-embedding-ada-002 (1536 dimensions)
Features:
  • Batch processing (up to 100 texts)
  • Automatic retry with exponential backoff
  • Rate limit handling
  • Conditional compilation with openai feature

Usage Examples

Basic Usage

use embeddings::{create_embedding_service, EmbeddingConfig};

#[tokio::main]
async fn main() -> anyhow::Result<()> {
    // Create service
    let config = EmbeddingConfig::default();
    let service = create_embedding_service(&config).await?;
    
    // Generate embedding
    let text = "Hello, world!";
    let embedding = service.generate_embedding(text).await?;
    
    println!("Embedding dimension: {}", embedding.len());
    Ok(())
}

With OpenAI

use embeddings::{create_embedding_service, EmbeddingConfig};

#[tokio::main]
async fn main() -> anyhow::Result<()> {
    // Load API key from environment
    let api_key = std::env::var("OPENAI_API_KEY")?;
    
    // Configure OpenAI provider
    let config = EmbeddingConfig::openai(api_key);
    let service = create_embedding_service(&config).await?;
    
    // Batch processing
    let texts = vec![
        "First document".to_string(),
        "Second document".to_string(),
    ];
    
    let embeddings = service.generate_embeddings(&texts).await?;
    
    for (text, embedding) in texts.iter().zip(embeddings.iter()) {
        println!("{}: {} dimensions", text, embedding.len());
    }
    
    Ok(())
}

With Caching

use embeddings::{EmbeddingCache, create_embedding_service, EmbeddingConfig};

#[tokio::main]
async fn main() -> anyhow::Result<()> {
    let config = EmbeddingConfig::default();
    let service = create_embedding_service(&config).await?;
    let cache = EmbeddingCache::new(100);
    
    let text = "Cached text";
    
    // First call - generates embedding
    let embedding = if let Some(cached) = cache.get(text) {
        cached
    } else {
        let embedding = service.generate_embedding(text).await?;
        cache.put(text, embedding.clone());
        embedding
    };
    
    // Second call - uses cache
    let cached = cache.get(text).expect("Should be cached");
    assert_eq!(embedding, cached);
    
    Ok(())
}

Feature Flags

[dependencies]
embeddings = { version = "0.1.0", features = ["openai"] }
Available features:
  • openai - Enable OpenAI provider (adds ureq, url dependencies)
  • local - Enable local models (adds candle dependencies) [planned]

Environment Variables

When using OpenAI provider:
  • OPENAI_API_KEY - Required API key
  • OPENAI_EMBEDDING_MODEL - Model name (default: “text-embedding-3-small”)
  • EMBEDDING_CACHE_SIZE - Cache capacity (default: 1000)

Performance Considerations

  1. Batch Operations - Process multiple texts together for better throughput
  2. Caching - Use EmbeddingCache to avoid redundant API calls
  3. Model Selection - Choose appropriate model for quality/cost tradeoff
  4. Connection Pooling - OpenAI provider reuses HTTP connections

Thread Safety

All types are thread-safe:
  • EmbeddingService trait requires Send + Sync
  • EmbeddingCache uses Arc<Mutex<_>> internally
  • Providers can be shared across threads

Error Handling

match service.generate_embedding(text).await {
    Ok(embedding) => {
        // Process embedding
    }
    Err(EmbeddingError::RateLimit) => {
        // Implement backoff strategy
    }
    Err(EmbeddingError::ApiError(msg)) => {
        // Handle API errors
    }
    Err(e) => {
        // Handle other errors
    }
}

Change Log

  • 2025-06-13: Initial API documentation
  • 2025-06-13: Added OpenAI provider details
  • 2025-06-13: Documented caching and error handling