"""Response schemas for the gateway runtime usage endpoint.""" from __future__ import annotations from datetime import datetime from uuid import UUID from sqlmodel import Field, SQLModel RUNTIME_ANNOTATION_TYPES = (datetime, UUID) class RuntimeUsageWindow(SQLModel): """Rolling 5-hour usage window metadata.""" key: str # "5h" started_at: datetime resets_at: datetime reset_in_ms: int # milliseconds until oldest event ages out source: str = "local_jsonl_estimate" # source of this window confidence: str = "low" # confidence level for this window class RuntimeUsageCurrent(SQLModel): """Aggregated totals within the current window.""" total_cost_usd: float total_tokens: int # input + output across all sessions total_output_tokens: int = 0 # output tokens only — used with output_token_limit total_calls: int # ── Legacy fields (kept for backwards compat) ──────────────────────────── # token_limit is ambiguous (could be total or output); use typed fields below # when the limit kind is known. token_limit: int | None = None token_pct: int | None = None cost_limit_usd: float | None = None cost_pct: int | None = None token_limit_source: str | None = None cost_limit_source: str | None = None # ── Typed limits (Phase 4) ──────────────────────────────────────────────── # Each field pairs a limit with a percent computed from matching units only. # Output-token limit: compared against output tokens only, never input/cache. output_token_limit: int | None = None output_token_limit_pct: int | None = None output_token_limit_source: str | None = None # Total-token limit: compared against input + output combined. total_token_limit: int | None = None total_token_limit_pct: int | None = None total_token_limit_source: str | None = None # Message/request limit: compared against call count, never token totals. message_limit: int | None = None message_pct: int | None = None message_limit_source: str | None = None class RuntimeUsageBurnRate(SQLModel): """Recent token and cost velocity (last 60 minutes of the window).""" tokens_per_minute: float # input + output combined output_tokens_per_minute: float = 0.0 # output tokens only cost_usd_per_minute: float class RuntimeUsagePredictions(SQLModel): """Estimates derived from current burn rate and configured limits.""" time_to_limit_ms: int | None = None # None when limit or burn rate unknown safe: bool # True if time_to_limit > reset_in_ms (will reset before hitting limit) limit_kind: str = "total_tokens" # which limit drove this prediction class ModelUsageEntry(SQLModel): """Usage and cost breakdown for one provider/model combination.""" provider: str # normalised: "anthropic", "openai", "ollama", "unknown" account_key: str # e.g. "claude-default", "openai-work", "ollama-local" model: str # normalised model slug, e.g. "claude-sonnet-4-6" input_tokens: int output_tokens: int cache_read_tokens: int cache_write_tokens: int total_tokens: int cost_usd: float calls: int unpriced: bool # True = unknown paid model; False = priced or intentionally free (Ollama) source: str = "local_jsonl_estimate" # source of this data class TopSession(SQLModel): """Summary row for one session, sorted by cost descending.""" session_id: str label: str | None = None model: str | None = None cost_usd: float total_tokens: int updated_at: str | None = None source: str = "local_jsonl_estimate" # source of this session data class ProviderUsageWindow(SQLModel): """One provider-native usage window (session/week/model-specific).""" key: str # current_session | weekly_all_models | weekly_sonnet | extra_usage label: str pct_used: float | None = None remaining_ms: int | None = None remaining_label: str | None = None extra_text: str | None = None source: str = "provider_native" confidence: str = "high" class ProviderUsageScrapeResult(SQLModel): """Structured result from one provider-native usage scrape (e.g. Claude CLI /usage). Returned by GET /gateways/{id}/provider-usage. All fields are optional — partial data is still useful and expected when CLI output format changes or the session is quiet. """ provider: str # "anthropic", "openai", "google" source_name: str # "claude_cli_tmux", "gemini_scrape", etc. scraped_at: datetime fresh: bool # True if within the freshness window freshness_ttl_seconds: int windows: list[ProviderUsageWindow] = Field(default_factory=list) current_pct: float | None = None # 0–100 % of current window used remaining_ms: int | None = None # ms until window resets remaining_label: str | None = None # human-readable "2h 47m" weekly_messages_used: int | None = None weekly_messages_limit: int | None = None weekly_tokens_used: int | None = None weekly_cost_usd: float | None = None raw_text: str | None = None # included when DEBUG_SCRAPER_RAW=true error: str | None = None # set when scrape or parse failed # Source and confidence for the scraped data source: str | None = None # e.g. "provider_native" or "provider_api_rate_limit" confidence: str | None = None # e.g. "high" or "medium" class ProviderUsageResponse(SQLModel): """Response envelope for GET /gateways/{id}/provider-usage.""" gateway_id: UUID generated_at: datetime scraper_enabled: bool results: list[ProviderUsageScrapeResult] class ClaudeStatuslineUsageIn(SQLModel): """Sanitized Claude Code status-line payload posted by a local collector. Claude Code passes a much larger JSON object to status-line commands. The collector should forward only these low-risk fields so Pipeline never needs raw prompts, file paths beyond the current workspace, or credentials. """ session_id: str | None = None model: dict[str, object] | None = None workspace: dict[str, object] | None = None rate_limits: dict[str, object] | None = None class RuntimeUsageResponse(SQLModel): """Complete runtime usage payload returned by GET /gateways/{id}/runtime-usage.""" generated_at: datetime gateway_id: UUID window: RuntimeUsageWindow current: RuntimeUsageCurrent burn_rate: RuntimeUsageBurnRate predictions: RuntimeUsagePredictions per_model: dict[str, ModelUsageEntry] # key = "provider/model" top_sessions: list[TopSession]