Pipeline/backend/app/schemas/runtime_usage.py

122 lines
3.8 KiB
Python
Raw Normal View History

"""Response schemas for the gateway runtime usage endpoint."""
from __future__ import annotations
from datetime import datetime
from uuid import UUID
from sqlmodel import SQLModel
RUNTIME_ANNOTATION_TYPES = (datetime, UUID)
class RuntimeUsageWindow(SQLModel):
"""Rolling 5-hour usage window metadata."""
key: str # "5h"
started_at: datetime
resets_at: datetime
reset_in_ms: int # milliseconds until oldest event ages out
class RuntimeUsageCurrent(SQLModel):
"""Aggregated totals within the current window."""
total_cost_usd: float
total_tokens: int # input + output across all sessions
total_calls: int
token_limit: int | None = None # configured limit; None = unknown
token_pct: int | None = None # 0100; None when limit unknown
cost_limit_usd: float | None = None
cost_pct: int | None = None
class RuntimeUsageBurnRate(SQLModel):
"""Recent token and cost velocity (last 60 minutes of the window)."""
tokens_per_minute: float
cost_usd_per_minute: float
class RuntimeUsagePredictions(SQLModel):
"""Estimates derived from current burn rate and configured limits."""
time_to_limit_ms: int | None = None # None when limit or burn rate unknown
safe: bool # True if time_to_limit > reset_in_ms (will reset before hitting limit)
class ModelUsageEntry(SQLModel):
"""Usage and cost breakdown for one provider/model combination."""
provider: str # normalised: "anthropic", "openai", "ollama", "unknown"
account_key: str # e.g. "claude-default", "openai-work", "ollama-local"
model: str # normalised model slug, e.g. "claude-sonnet-4-6"
input_tokens: int
output_tokens: int
cache_read_tokens: int
cache_write_tokens: int
total_tokens: int
cost_usd: float
calls: int
unpriced: bool # True = unknown paid model; False = priced or intentionally free (Ollama)
class TopSession(SQLModel):
"""Summary row for one session, sorted by cost descending."""
session_id: str
label: str | None = None
model: str | None = None
cost_usd: float
total_tokens: int
updated_at: str | None = None
class ProviderUsageScrapeResult(SQLModel):
"""Structured result from one provider-native usage scrape (e.g. Claude CLI /usage).
Returned by GET /gateways/{id}/provider-usage.
All fields are optional partial data is still useful and expected
when CLI output format changes or the session is quiet.
"""
provider: str # "anthropic", "openai", "google"
source_name: str # "claude_cli_tmux", "gemini_scrape", etc.
scraped_at: datetime
fresh: bool # True if within the freshness window
freshness_ttl_seconds: int
current_pct: float | None = None # 0100 % of current window used
remaining_ms: int | None = None # ms until window resets
remaining_label: str | None = None # human-readable "2h 47m"
weekly_messages_used: int | None = None
weekly_messages_limit: int | None = None
weekly_tokens_used: int | None = None
weekly_cost_usd: float | None = None
raw_text: str | None = None # included when DEBUG_SCRAPER_RAW=true
error: str | None = None # set when scrape or parse failed
class ProviderUsageResponse(SQLModel):
"""Response envelope for GET /gateways/{id}/provider-usage."""
gateway_id: UUID
generated_at: datetime
scraper_enabled: bool
results: list[ProviderUsageScrapeResult]
class RuntimeUsageResponse(SQLModel):
"""Complete runtime usage payload returned by GET /gateways/{id}/runtime-usage."""
generated_at: datetime
gateway_id: UUID
window: RuntimeUsageWindow
current: RuntimeUsageCurrent
burn_rate: RuntimeUsageBurnRate
predictions: RuntimeUsagePredictions
per_model: dict[str, ModelUsageEntry] # key = "provider/model"
top_sessions: list[TopSession]