"""Response schemas for the gateway runtime usage endpoint.""" from __future__ import annotations from datetime import datetime from uuid import UUID from sqlmodel import SQLModel RUNTIME_ANNOTATION_TYPES = (datetime, UUID) class RuntimeUsageWindow(SQLModel): """Rolling 5-hour usage window metadata.""" key: str # "5h" started_at: datetime resets_at: datetime reset_in_ms: int # milliseconds until oldest event ages out class RuntimeUsageCurrent(SQLModel): """Aggregated totals within the current window.""" total_cost_usd: float total_tokens: int # input + output across all sessions total_calls: int token_limit: int | None = None # configured limit; None = unknown token_pct: int | None = None # 0–100; None when limit unknown cost_limit_usd: float | None = None cost_pct: int | None = None class RuntimeUsageBurnRate(SQLModel): """Recent token and cost velocity (last 60 minutes of the window).""" tokens_per_minute: float cost_usd_per_minute: float class RuntimeUsagePredictions(SQLModel): """Estimates derived from current burn rate and configured limits.""" time_to_limit_ms: int | None = None # None when limit or burn rate unknown safe: bool # True if time_to_limit > reset_in_ms (will reset before hitting limit) class ModelUsageEntry(SQLModel): """Usage and cost breakdown for one provider/model combination.""" provider: str # normalised: "anthropic", "openai", "ollama", "unknown" account_key: str # e.g. "claude-default", "openai-work", "ollama-local" model: str # normalised model slug, e.g. "claude-sonnet-4-6" input_tokens: int output_tokens: int cache_read_tokens: int cache_write_tokens: int total_tokens: int cost_usd: float calls: int unpriced: bool # True = unknown paid model; False = priced or intentionally free (Ollama) class TopSession(SQLModel): """Summary row for one session, sorted by cost descending.""" session_id: str label: str | None = None model: str | None = None cost_usd: float total_tokens: int updated_at: str | None = None class RuntimeUsageResponse(SQLModel): """Complete runtime usage payload returned by GET /gateways/{id}/runtime-usage.""" generated_at: datetime gateway_id: UUID window: RuntimeUsageWindow current: RuntimeUsageCurrent burn_rate: RuntimeUsageBurnRate predictions: RuntimeUsagePredictions per_model: dict[str, ModelUsageEntry] # key = "provider/model" top_sessions: list[TopSession]