Pipeline/backend/app/schemas/runtime_usage.py

"""Response schemas for the gateway runtime usage endpoint."""

from __future__ import annotations

from datetime import datetime
from uuid import UUID

from sqlmodel import SQLModel

RUNTIME_ANNOTATION_TYPES = (datetime, UUID)


class RuntimeUsageWindow(SQLModel):
    """Rolling 5-hour usage window metadata."""

    key: str  # "5h"
    started_at: datetime
    resets_at: datetime
    reset_in_ms: int  # milliseconds until oldest event ages out


class RuntimeUsageCurrent(SQLModel):
    """Aggregated totals within the current window."""

    total_cost_usd: float
    total_tokens: int  # input + output across all sessions
    total_calls: int
    token_limit: int | None = None  # configured limit; None = unknown
    token_pct: int | None = None   # 0–100; None when limit unknown
    cost_limit_usd: float | None = None
    cost_pct: int | None = None


class RuntimeUsageBurnRate(SQLModel):
    """Recent token and cost velocity (last 60 minutes of the window)."""

    tokens_per_minute: float
    cost_usd_per_minute: float


class RuntimeUsagePredictions(SQLModel):
    """Estimates derived from current burn rate and configured limits."""

    time_to_limit_ms: int | None = None  # None when limit or burn rate unknown
    safe: bool  # True if time_to_limit > reset_in_ms (will reset before hitting limit)


class ModelUsageEntry(SQLModel):
    """Usage and cost breakdown for one provider/model combination."""

    provider: str         # normalised: "anthropic", "openai", "ollama", "unknown"
    account_key: str      # e.g. "claude-default", "openai-work", "ollama-local"
    model: str            # normalised model slug, e.g. "claude-sonnet-4-6"
    input_tokens: int
    output_tokens: int
    cache_read_tokens: int
    cache_write_tokens: int
    total_tokens: int
    cost_usd: float
    calls: int
    unpriced: bool  # True = unknown paid model; False = priced or intentionally free (Ollama)


class TopSession(SQLModel):
    """Summary row for one session, sorted by cost descending."""

    session_id: str
    label: str | None = None
    model: str | None = None
    cost_usd: float
    total_tokens: int
    updated_at: str | None = None


class ProviderUsageScrapeResult(SQLModel):
    """Structured result from one provider-native usage scrape (e.g. Claude CLI /usage).

    Returned by GET /gateways/{id}/provider-usage.
    All fields are optional — partial data is still useful and expected
    when CLI output format changes or the session is quiet.
    """

    provider: str          # "anthropic", "openai", "google"
    source_name: str       # "claude_cli_tmux", "gemini_scrape", etc.
    scraped_at: datetime
    fresh: bool            # True if within the freshness window
    freshness_ttl_seconds: int

    current_pct: float | None = None       # 0–100 % of current window used
    remaining_ms: int | None = None        # ms until window resets
    remaining_label: str | None = None     # human-readable "2h 47m"

    weekly_messages_used: int | None = None
    weekly_messages_limit: int | None = None
    weekly_tokens_used: int | None = None
    weekly_cost_usd: float | None = None

    raw_text: str | None = None   # included when DEBUG_SCRAPER_RAW=true
    error: str | None = None      # set when scrape or parse failed


class ProviderUsageResponse(SQLModel):
    """Response envelope for GET /gateways/{id}/provider-usage."""

    gateway_id: UUID
    generated_at: datetime
    scraper_enabled: bool
    results: list[ProviderUsageScrapeResult]


class RuntimeUsageResponse(SQLModel):
    """Complete runtime usage payload returned by GET /gateways/{id}/runtime-usage."""

    generated_at: datetime
    gateway_id: UUID
    window: RuntimeUsageWindow
    current: RuntimeUsageCurrent
    burn_rate: RuntimeUsageBurnRate
    predictions: RuntimeUsagePredictions
    per_model: dict[str, ModelUsageEntry]  # key = "provider/model"
    top_sessions: list[TopSession]