Pipeline/backend/app/schemas/runtime_usage.py

122 lines
3.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Response schemas for the gateway runtime usage endpoint."""
from __future__ import annotations
from datetime import datetime
from uuid import UUID
from sqlmodel import SQLModel
RUNTIME_ANNOTATION_TYPES = (datetime, UUID)
class RuntimeUsageWindow(SQLModel):
"""Rolling 5-hour usage window metadata."""
key: str # "5h"
started_at: datetime
resets_at: datetime
reset_in_ms: int # milliseconds until oldest event ages out
class RuntimeUsageCurrent(SQLModel):
"""Aggregated totals within the current window."""
total_cost_usd: float
total_tokens: int # input + output across all sessions
total_calls: int
token_limit: int | None = None # configured limit; None = unknown
token_pct: int | None = None # 0100; None when limit unknown
cost_limit_usd: float | None = None
cost_pct: int | None = None
class RuntimeUsageBurnRate(SQLModel):
"""Recent token and cost velocity (last 60 minutes of the window)."""
tokens_per_minute: float
cost_usd_per_minute: float
class RuntimeUsagePredictions(SQLModel):
"""Estimates derived from current burn rate and configured limits."""
time_to_limit_ms: int | None = None # None when limit or burn rate unknown
safe: bool # True if time_to_limit > reset_in_ms (will reset before hitting limit)
class ModelUsageEntry(SQLModel):
"""Usage and cost breakdown for one provider/model combination."""
provider: str # normalised: "anthropic", "openai", "ollama", "unknown"
account_key: str # e.g. "claude-default", "openai-work", "ollama-local"
model: str # normalised model slug, e.g. "claude-sonnet-4-6"
input_tokens: int
output_tokens: int
cache_read_tokens: int
cache_write_tokens: int
total_tokens: int
cost_usd: float
calls: int
unpriced: bool # True = unknown paid model; False = priced or intentionally free (Ollama)
class TopSession(SQLModel):
"""Summary row for one session, sorted by cost descending."""
session_id: str
label: str | None = None
model: str | None = None
cost_usd: float
total_tokens: int
updated_at: str | None = None
class ProviderUsageScrapeResult(SQLModel):
"""Structured result from one provider-native usage scrape (e.g. Claude CLI /usage).
Returned by GET /gateways/{id}/provider-usage.
All fields are optional — partial data is still useful and expected
when CLI output format changes or the session is quiet.
"""
provider: str # "anthropic", "openai", "google"
source_name: str # "claude_cli_tmux", "gemini_scrape", etc.
scraped_at: datetime
fresh: bool # True if within the freshness window
freshness_ttl_seconds: int
current_pct: float | None = None # 0100 % of current window used
remaining_ms: int | None = None # ms until window resets
remaining_label: str | None = None # human-readable "2h 47m"
weekly_messages_used: int | None = None
weekly_messages_limit: int | None = None
weekly_tokens_used: int | None = None
weekly_cost_usd: float | None = None
raw_text: str | None = None # included when DEBUG_SCRAPER_RAW=true
error: str | None = None # set when scrape or parse failed
class ProviderUsageResponse(SQLModel):
"""Response envelope for GET /gateways/{id}/provider-usage."""
gateway_id: UUID
generated_at: datetime
scraper_enabled: bool
results: list[ProviderUsageScrapeResult]
class RuntimeUsageResponse(SQLModel):
"""Complete runtime usage payload returned by GET /gateways/{id}/runtime-usage."""
generated_at: datetime
gateway_id: UUID
window: RuntimeUsageWindow
current: RuntimeUsageCurrent
burn_rate: RuntimeUsageBurnRate
predictions: RuntimeUsagePredictions
per_model: dict[str, ModelUsageEntry] # key = "provider/model"
top_sessions: list[TopSession]