Skip to main content

Memory Storage Tutorial

Learn how to use Soul Kernel’s memory storage system to persist and query memories.

Prerequisites

  • Rust 1.79+ installed
  • Basic understanding of async Rust
  • Soul Kernel workspace set up

Overview

In this tutorial, you’ll learn how to:
  1. Initialize the storage system
  2. Store memory events
  3. Query memories using vector similarity
  4. Filter and retrieve specific memories
  5. Handle synchronization scenarios

Setting Up

First, add the storage crate to your Cargo.toml:
[dependencies]
storage = { path = "../kernel/storage" }
tokio = { version = "1.42", features = ["full"] }
chrono = "0.4"
uuid = { version = "1.11", features = ["v4"] }

Basic Memory Storage

Let’s start with a simple example that stores and retrieves memories:
use storage::{HybridMemoryStore, MemoryStore, MemoryEvent, MemoryEventType};
use tokio;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    // Initialize the storage system
    let store = HybridMemoryStore::new("tutorial_memories.db", None).await?;
    
    // Run migrations (only needed once)
    store.migrate().await?;
    
    // Create a memory event
    let memory = MemoryEvent::new(
        "tutorial_device".to_string(),
        MemoryEventType::Observation,
        "The user's favorite color is blue".to_string(),
        vec![0.1, 0.8, 0.2, 0.1], // Simplified embedding
    );
    
    // Store the memory
    let memory_id = store.insert_event(&memory).await?;
    println!("Stored memory with ID: {}", memory_id);
    
    // Retrieve the memory
    if let Some(retrieved) = store.get_event(&memory_id).await? {
        println!("Retrieved: {}", retrieved.content);
    }
    
    Ok(())
}

Working with Embeddings

In a real application, you’ll generate embeddings using an LLM. Here’s a more realistic example:
use storage::{MemoryEvent, MemoryEventType};

// Simulate embedding generation (in practice, use an LLM)
fn generate_embedding(text: &str) -> Vec<f32> {
    // This would call your LLM API
    // For demo, we'll create a simple hash-based embedding
    let mut embedding = vec![0.0; 384];
    for (i, byte) in text.bytes().enumerate() {
        embedding[i % 384] += byte as f32 / 255.0;
    }
    // Normalize
    let norm: f32 = embedding.iter().map(|x| x * x).sum::<f32>().sqrt();
    embedding.iter_mut().for_each(|x| *x /= norm);
    embedding
}

async fn store_conversation(
    store: &impl MemoryStore,
    user_input: &str,
    assistant_response: &str,
) -> Result<(), Box<dyn std::error::Error>> {
    // Store user input
    let user_memory = MemoryEvent::new(
        "assistant_1".to_string(),
        MemoryEventType::Interaction,
        format!("User said: {}", user_input),
        generate_embedding(user_input),
    );
    store.insert_event(&user_memory).await?;
    
    // Store assistant response
    let assistant_memory = MemoryEvent::new(
        "assistant_1".to_string(),
        MemoryEventType::Interaction,
        format!("I responded: {}", assistant_response),
        generate_embedding(assistant_response),
    );
    store.insert_event(&assistant_memory).await?;
    
    Ok(())
}
Find memories similar to a query:
use storage::{MemoryQuery, MemoryFilter};

async fn find_similar_memories(
    store: &impl MemoryStore,
    query_text: &str,
    top_k: usize,
) -> Result<(), Box<dyn std::error::Error>> {
    let query = MemoryQuery {
        embedding: generate_embedding(query_text),
        top_k,
        score_threshold: Some(0.5), // Only return matches > 50% similarity
        filter: None,
    };
    
    let results = store.query_embeddings(&query).await?;
    
    println!("Found {} similar memories:", results.events.len());
    for (i, result) in results.events.iter().enumerate() {
        println!(
            "{}. [Score: {:.2}] {}", 
            i + 1, 
            result.score, 
            result.event.content
        );
    }
    
    Ok(())
}

Advanced Filtering

Filter memories by type, author, or time range:
use storage::{MemoryQuery, MemoryFilter, MemoryEventType};
use chrono::{Utc, Duration};

async fn find_recent_observations(
    store: &impl MemoryStore,
) -> Result<(), Box<dyn std::error::Error>> {
    let query = MemoryQuery {
        embedding: vec![0.0; 384], // Neutral embedding
        top_k: 20,
        score_threshold: None,
        filter: Some(MemoryFilter {
            event_types: Some(vec![MemoryEventType::Observation]),
            authors: None,
            after: Some(Utc::now() - Duration::hours(24)),
            before: None,
        }),
    };
    
    let results = store.query_embeddings(&query).await?;
    
    for event in results.events {
        println!("{}: {}", 
            event.event.timestamp.format("%Y-%m-%d %H:%M"),
            event.event.content
        );
    }
    
    Ok(())
}

Memory Context Building

Build context from related memories:
async fn build_context(
    store: &impl MemoryStore,
    topic: &str,
) -> Result<String, Box<dyn std::error::Error>> {
    // Find memories related to the topic
    let query = MemoryQuery {
        embedding: generate_embedding(topic),
        top_k: 10,
        score_threshold: Some(0.6),
        filter: None,
    };
    
    let results = store.query_embeddings(&query).await?;
    
    // Build context string
    let mut context = String::new();
    context.push_str(&format!("Context about '{}':\n", topic));
    
    for (i, result) in results.events.iter().enumerate() {
        context.push_str(&format!(
            "- {} (relevance: {:.0}%)\n", 
            result.event.content,
            result.score * 100.0
        ));
    }
    
    Ok(context)
}

Synchronization Support

Implement basic sync functionality:
use storage::MemoryEvent;

async fn sync_memories(
    store: &impl MemoryStore,
    last_sync_timestamp: i64,
) -> Result<Vec<MemoryEvent>, Box<dyn std::error::Error>> {
    // Get all events since last sync
    let new_events = store.get_events_since(last_sync_timestamp, 1000).await?;
    
    println!("Found {} events to sync", new_events.len());
    
    // In a real implementation, you would:
    // 1. Send these events to your sync service
    // 2. Receive events from other devices
    // 3. Apply CRDT merge logic
    // 4. Store merged events
    
    Ok(new_events)
}

Performance Optimization

Tips for optimal performance:
use storage::{HybridMemoryStore, MemoryStore, MemoryEvent};
use std::sync::Arc;

async fn batch_insert_example(
    store: Arc<HybridMemoryStore>,
) -> Result<(), Box<dyn std::error::Error>> {
    // Batch multiple inserts
    let memories: Vec<MemoryEvent> = (0..100)
        .map(|i| MemoryEvent::new(
            "batch_device".to_string(),
            MemoryEventType::System,
            format!("Batch event {}", i),
            vec![i as f32 / 100.0; 4],
        ))
        .collect();
    
    // Insert in parallel (be mindful of connection limits)
    let mut handles = vec![];
    
    for memory in memories {
        let store_clone = store.clone();
        let handle = tokio::spawn(async move {
            store_clone.insert_event(&memory).await
        });
        handles.push(handle);
    }
    
    // Wait for all inserts
    for handle in handles {
        handle.await??;
    }
    
    // Compact storage after bulk operations
    store.compact().await?;
    
    Ok(())
}

Error Handling

Properly handle storage errors:
use storage::{StorageError, MemoryStore};

async fn safe_memory_operation(
    store: &impl MemoryStore,
    memory_id: &uuid::Uuid,
) -> Result<(), Box<dyn std::error::Error>> {
    match store.get_event(memory_id).await {
        Ok(Some(memory)) => {
            println!("Found memory: {}", memory.content);
        }
        Ok(None) => {
            println!("Memory not found");
        }
        Err(StorageError::Database(e)) => {
            eprintln!("Database error: {}", e);
            // Handle database-specific errors
        }
        Err(StorageError::NotFound(msg)) => {
            eprintln!("Not found: {}", msg);
            // Handle not found case
        }
        Err(e) => {
            eprintln!("Storage error: {}", e);
            // Handle other errors
        }
    }
    
    Ok(())
}

Complete Example

Here’s a complete example that demonstrates all concepts:
// From: kernel/storage/examples/demo_storage.rs

use storage::{HybridMemoryStore, MemoryStore, MemoryEvent, MemoryEventType, MemoryQuery};
use tokio;

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    // Initialize
    let store = HybridMemoryStore::in_memory().await?;
    store.migrate().await?;
    
    // Store various memories
    let events = vec![
        MemoryEvent::new(
            "device_1".to_string(),
            MemoryEventType::Observation,
            "I see a red apple on the kitchen table".to_string(),
            vec![0.8, 0.2, 0.1, 0.0],
        ),
        MemoryEvent::new(
            "device_1".to_string(),
            MemoryEventType::Interaction,
            "User asked me to remember the apple for later".to_string(),
            vec![0.7, 0.3, 0.1, 0.0],
        ),
        MemoryEvent::new(
            "device_2".to_string(),
            MemoryEventType::Observation,
            "The weather is sunny and warm today".to_string(),
            vec![0.0, 0.1, 0.9, 0.0],
        ),
    ];
    
    for event in &events {
        store.insert_event(event).await?;
    }
    
    // Search for food-related memories
    let food_query = MemoryQuery {
        embedding: vec![0.9, 0.1, 0.0, 0.0],
        top_k: 3,
        score_threshold: Some(0.5),
        filter: None,
    };
    
    let results = store.query_embeddings(&food_query).await?;
    
    println!("Food-related memories:");
    for result in results.events {
        println!("- [{}] {}", result.score, result.event.content);
    }
    
    Ok(())
}

Next Steps

Troubleshooting

Common Issues

  1. Database locked errors: Ensure you’re not opening multiple connections to the same SQLite file
  2. Performance issues: Check embedding dimensions and consider using batch operations
  3. Memory growth: Implement retention policies and use compact() regularly

Performance Tips

  • Keep embeddings under 1024 dimensions
  • Use batch inserts for bulk operations
  • Index frequently queried fields
  • Run compact() during off-peak hours

Change Log

  • 2025-06-13: Initial tutorial created with storage implementation examples