86 lines
2.5 KiB
Python
86 lines
2.5 KiB
Python
"""Response schemas for the gateway runtime usage endpoint."""
|
||
|
||
from __future__ import annotations
|
||
|
||
from datetime import datetime
|
||
from uuid import UUID
|
||
|
||
from sqlmodel import SQLModel
|
||
|
||
RUNTIME_ANNOTATION_TYPES = (datetime, UUID)
|
||
|
||
|
||
class RuntimeUsageWindow(SQLModel):
|
||
"""Rolling 5-hour usage window metadata."""
|
||
|
||
key: str # "5h"
|
||
started_at: datetime
|
||
resets_at: datetime
|
||
reset_in_ms: int # milliseconds until oldest event ages out
|
||
|
||
|
||
class RuntimeUsageCurrent(SQLModel):
|
||
"""Aggregated totals within the current window."""
|
||
|
||
total_cost_usd: float
|
||
total_tokens: int # input + output across all sessions
|
||
total_calls: int
|
||
token_limit: int | None = None # configured limit; None = unknown
|
||
token_pct: int | None = None # 0–100; None when limit unknown
|
||
cost_limit_usd: float | None = None
|
||
cost_pct: int | None = None
|
||
|
||
|
||
class RuntimeUsageBurnRate(SQLModel):
|
||
"""Recent token and cost velocity (last 60 minutes of the window)."""
|
||
|
||
tokens_per_minute: float
|
||
cost_usd_per_minute: float
|
||
|
||
|
||
class RuntimeUsagePredictions(SQLModel):
|
||
"""Estimates derived from current burn rate and configured limits."""
|
||
|
||
time_to_limit_ms: int | None = None # None when limit or burn rate unknown
|
||
safe: bool # True if time_to_limit > reset_in_ms (will reset before hitting limit)
|
||
|
||
|
||
class ModelUsageEntry(SQLModel):
|
||
"""Usage and cost breakdown for one provider/model combination."""
|
||
|
||
provider: str # normalised: "anthropic", "openai", "ollama", "unknown"
|
||
account_key: str # e.g. "claude-default", "openai-work", "ollama-local"
|
||
model: str # normalised model slug, e.g. "claude-sonnet-4-6"
|
||
input_tokens: int
|
||
output_tokens: int
|
||
cache_read_tokens: int
|
||
cache_write_tokens: int
|
||
total_tokens: int
|
||
cost_usd: float
|
||
calls: int
|
||
unpriced: bool # True = unknown paid model; False = priced or intentionally free (Ollama)
|
||
|
||
|
||
class TopSession(SQLModel):
|
||
"""Summary row for one session, sorted by cost descending."""
|
||
|
||
session_id: str
|
||
label: str | None = None
|
||
model: str | None = None
|
||
cost_usd: float
|
||
total_tokens: int
|
||
updated_at: str | None = None
|
||
|
||
|
||
class RuntimeUsageResponse(SQLModel):
|
||
"""Complete runtime usage payload returned by GET /gateways/{id}/runtime-usage."""
|
||
|
||
generated_at: datetime
|
||
gateway_id: UUID
|
||
window: RuntimeUsageWindow
|
||
current: RuntimeUsageCurrent
|
||
burn_rate: RuntimeUsageBurnRate
|
||
predictions: RuntimeUsagePredictions
|
||
per_model: dict[str, ModelUsageEntry] # key = "provider/model"
|
||
top_sessions: list[TopSession]
|