Pipeline/backend/app/schemas/runtime_usage.py

86 lines
2.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Response schemas for the gateway runtime usage endpoint."""
from __future__ import annotations
from datetime import datetime
from uuid import UUID
from sqlmodel import SQLModel
RUNTIME_ANNOTATION_TYPES = (datetime, UUID)
class RuntimeUsageWindow(SQLModel):
"""Rolling 5-hour usage window metadata."""
key: str # "5h"
started_at: datetime
resets_at: datetime
reset_in_ms: int # milliseconds until oldest event ages out
class RuntimeUsageCurrent(SQLModel):
"""Aggregated totals within the current window."""
total_cost_usd: float
total_tokens: int # input + output across all sessions
total_calls: int
token_limit: int | None = None # configured limit; None = unknown
token_pct: int | None = None # 0100; None when limit unknown
cost_limit_usd: float | None = None
cost_pct: int | None = None
class RuntimeUsageBurnRate(SQLModel):
"""Recent token and cost velocity (last 60 minutes of the window)."""
tokens_per_minute: float
cost_usd_per_minute: float
class RuntimeUsagePredictions(SQLModel):
"""Estimates derived from current burn rate and configured limits."""
time_to_limit_ms: int | None = None # None when limit or burn rate unknown
safe: bool # True if time_to_limit > reset_in_ms (will reset before hitting limit)
class ModelUsageEntry(SQLModel):
"""Usage and cost breakdown for one provider/model combination."""
provider: str # normalised: "anthropic", "openai", "ollama", "unknown"
account_key: str # e.g. "claude-default", "openai-work", "ollama-local"
model: str # normalised model slug, e.g. "claude-sonnet-4-6"
input_tokens: int
output_tokens: int
cache_read_tokens: int
cache_write_tokens: int
total_tokens: int
cost_usd: float
calls: int
unpriced: bool # True = unknown paid model; False = priced or intentionally free (Ollama)
class TopSession(SQLModel):
"""Summary row for one session, sorted by cost descending."""
session_id: str
label: str | None = None
model: str | None = None
cost_usd: float
total_tokens: int
updated_at: str | None = None
class RuntimeUsageResponse(SQLModel):
"""Complete runtime usage payload returned by GET /gateways/{id}/runtime-usage."""
generated_at: datetime
gateway_id: UUID
window: RuntimeUsageWindow
current: RuntimeUsageCurrent
burn_rate: RuntimeUsageBurnRate
predictions: RuntimeUsagePredictions
per_model: dict[str, ModelUsageEntry] # key = "provider/model"
top_sessions: list[TopSession]