Session 7: Multi-Provider AI Orchestration

Why Multi-Provider?

Locking users into a single AI provider is a business risk and a user experience limitation. Different models excel at different tasks. Users have provider preferences. Enterprise customers often require Azure or self-hosted options for compliance.

🤖

OpenAI

🧠

Anthropic

🔮

Google

⚡

Grok

☁️

Azure

🏠

Custom

The Orchestrator Pattern

The AI Orchestrator provides a unified interface regardless of which provider is being used:

// api/src/services/aiOrchestrator.ts

export interface AIMessage {
  role: 'system' | 'user' | 'assistant';
  content: string;
}

export interface AIResponse {
  content: string;
  usage: {
    promptTokens: number;
    completionTokens: number;
    totalTokens: number;
  };
  model: string;
  provider: string;
}

export interface ChatOptions {
  userId: string;
  messages: AIMessage[];
  model?: string;
  temperature?: number;
  maxTokens?: number;
}

class AIOrchestrator {
  private providers: Map<string, AIProvider>;

  constructor() {
    this.providers = new Map();
    this.registerProviders();
  }

  async chat(options: ChatOptions): Promise<AIResponse> {
    // 1. Get user's preferred provider
    const userConfig = await this.getUserAIConfig(options.userId);
    const provider = this.providers.get(userConfig.provider);

    if (!provider) {
      throw new Error(`Provider ${userConfig.provider} not available`);
    }

    // 2. Execute chat with appropriate provider
    const response = await provider.chat({
      messages: options.messages,
      model: options.model || userConfig.defaultModel,
      temperature: options.temperature ?? 0.7,
      maxTokens: options.maxTokens ?? 2000,
      apiKey: await this.decryptApiKey(userConfig.encryptedApiKey),
    });

    // 3. Track usage
    await this.trackUsage(options.userId, response);

    return response;
  }
}

Provider Implementation

Each provider implements a common interface but handles the API differences internally:

// Provider interface
interface AIProvider {
  name: string;
  models: string[];
  chat(options: ProviderChatOptions): Promise<AIResponse>;
}

// OpenAI implementation
class OpenAIProvider implements AIProvider {
  name = 'openai';
  models = ['gpt-4-turbo', 'gpt-4', 'gpt-3.5-turbo'];

  async chat(options: ProviderChatOptions): Promise<AIResponse> {
    const response = await fetch('https://api.openai.com/v1/chat/completions', {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        'Authorization': `Bearer ${options.apiKey}`,
      },
      body: JSON.stringify({
        model: options.model,
        messages: options.messages,
        temperature: options.temperature,
        max_tokens: options.maxTokens,
      }),
    });

    const data = await response.json();

    return {
      content: data.choices[0].message.content,
      usage: {
        promptTokens: data.usage.prompt_tokens,
        completionTokens: data.usage.completion_tokens,
        totalTokens: data.usage.total_tokens,
      },
      model: data.model,
      provider: 'openai',
    };
  }
}

// Anthropic implementation
class AnthropicProvider implements AIProvider {
  name = 'anthropic';
  models = ['claude-3-opus', 'claude-3-sonnet', 'claude-3-haiku'];

  async chat(options: ProviderChatOptions): Promise<AIResponse> {
    // Extract system message (Anthropic handles it differently)
    const systemMessage = options.messages.find(m => m.role === 'system');
    const otherMessages = options.messages.filter(m => m.role !== 'system');

    const response = await fetch('https://api.anthropic.com/v1/messages', {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        'x-api-key': options.apiKey,
        'anthropic-version': '2024-01-01',
      },
      body: JSON.stringify({
        model: options.model,
        system: systemMessage?.content,
        messages: otherMessages.map(m => ({
          role: m.role,
          content: m.content,
        })),
        temperature: options.temperature,
        max_tokens: options.maxTokens,
      }),
    });

    const data = await response.json();

    return {
      content: data.content[0].text,
      usage: {
        promptTokens: data.usage.input_tokens,
        completionTokens: data.usage.output_tokens,
        totalTokens: data.usage.input_tokens + data.usage.output_tokens,
      },
      model: data.model,
      provider: 'anthropic',
    };
  }
}

User Configuration

Users configure their preferred AI provider in settings. API keys are encrypted with the user's DEK:

// Platform schema
model UserAIConfig {
  id                String   @id @default(uuid())
  userId            String   @unique @map("user_id")

  provider          String   @default("openai")
  defaultModel      String   @default("gpt-4-turbo")

  // Encrypted API key
  encryptedApiKey   String?  @map("encrypted_api_key")
  apiKeyIv          String?  @map("api_key_iv")

  // Provider-specific settings
  customEndpoint    String?  @map("custom_endpoint")
  azureResourceName String?  @map("azure_resource_name")
  azureDeployment   String?  @map("azure_deployment")

  createdAt         DateTime @default(now())
  updatedAt         DateTime @updatedAt

  user              User     @relation(fields: [userId], references: [id])

  @@map("user_ai_configs")
}

Token Tracking & Cost Estimation

Every AI call is tracked for usage analysis and cost estimation:

model TokenUsage {
  id              String   @id @default(uuid())
  userId          String   @map("user_id")

  provider        String
  model           String
  feature         String   // 'skill', 'report', 'chat', etc.
  featureId       String?  @map("feature_id")

  promptTokens    Int      @map("prompt_tokens")
  completionTokens Int     @map("completion_tokens")
  totalTokens     Int      @map("total_tokens")

  estimatedCost   Float?   @map("estimated_cost")
  latencyMs       Int?     @map("latency_ms")

  createdAt       DateTime @default(now())

  @@map("token_usage")
}

// Cost calculation (per 1K tokens)
const COST_PER_1K_TOKENS = {
  'openai': {
    'gpt-4-turbo': { prompt: 0.01, completion: 0.03 },
    'gpt-4': { prompt: 0.03, completion: 0.06 },
    'gpt-3.5-turbo': { prompt: 0.0005, completion: 0.0015 },
  },
  'anthropic': {
    'claude-3-opus': { prompt: 0.015, completion: 0.075 },
    'claude-3-sonnet': { prompt: 0.003, completion: 0.015 },
    'claude-3-haiku': { prompt: 0.00025, completion: 0.00125 },
  },
  // ... other providers
};

function estimateCost(
  provider: string,
  model: string,
  promptTokens: number,
  completionTokens: number
): number {
  const rates = COST_PER_1K_TOKENS[provider]?.[model];
  if (!rates) return 0;

  return (
    (promptTokens / 1000) * rates.prompt +
    (completionTokens / 1000) * rates.completion
  );
}

Context Builder

The Context Builder assembles user data for AI prompts, respecting privacy and scope limits:

// api/src/services/contextBuilder.ts

type ContextScope = 'minimal' | 'standard' | 'comprehensive';

interface ContextOptions {
  scope: ContextScope;
  dateRange?: { start: Date; end: Date };
  includePillars?: boolean;
  includeFeels?: boolean;
  includeHabits?: boolean;
}

async function buildContext(
  userId: string,
  options: ContextOptions
): Promise<UserContext> {
  const userDek = await getUserDEK(userId);

  const context: UserContext = {
    user: await getUserProfile(userId),
    timestamp: new Date().toISOString(),
  };

  // Minimal: just user name and current date
  if (options.scope === 'minimal') {
    return context;
  }

  // Standard: include recent data
  if (options.scope === 'standard' || options.scope === 'comprehensive') {
    const dateRange = options.dateRange || {
      start: subDays(new Date(), 7),
      end: new Date(),
    };

    if (options.includePillars !== false) {
      context.pillars = await getPillarsWithScores(userId, userDek);
    }

    if (options.includeFeels !== false) {
      context.feels = await getFeelsEntries(userId, dateRange, userDek);
    }

    context.entries = await getDailyEntries(userId, dateRange, userDek);
  }

  // Comprehensive: include 12-week history
  if (options.scope === 'comprehensive') {
    const extendedRange = {
      start: subWeeks(new Date(), 12),
      end: new Date(),
    };

    context.weeklyRecaps = await getWeeklyRecaps(userId, extendedRange, userDek);
    context.patterns = await identifyPatterns(userId, extendedRange, userDek);

    if (options.includeHabits !== false) {
      context.habits = await getHabitHistory(userId, extendedRange, userDek);
    }
  }

  return context;
}

Privacy by Design

All user data is decrypted just-in-time during context building. The decrypted context exists only in memory during the API call. No plaintext user data is ever logged or stored.

Fallback & Retry Logic

async function chatWithRetry(
  options: ChatOptions,
  maxRetries = 3
): Promise<AIResponse> {
  let lastError: Error | null = null;

  for (let attempt = 1; attempt <= maxRetries; attempt++) {
    try {
      return await this.chat(options);
    } catch (error: any) {
      lastError = error;

      // Don't retry on auth errors
      if (error.status === 401 || error.status === 403) {
        throw error;
      }

      // Rate limit - wait and retry
      if (error.status === 429) {
        const retryAfter = error.headers?.['retry-after'] || attempt * 2;
        await sleep(retryAfter * 1000);
        continue;
      }

      // Server errors - retry with backoff
      if (error.status >= 500) {
        await sleep(Math.pow(2, attempt) * 1000);
        continue;
      }

      throw error;
    }
  }

  throw lastError || new Error('Max retries exceeded');
}

AI Orchestration Stats

Providers supported: 6
Models available: 15+
Context scopes: 3
Implementation time: ~3 hours

Key Takeaways

Provider abstraction enables flexibility and vendor independence
Encrypted API key storage protects user credentials
Token tracking enables cost management and optimization
Context scopes balance data richness with token efficiency
Retry logic handles transient failures gracefully