2026-05-20 20:15:02 -05:00
|
|
|
|
"""Response schemas for the gateway runtime usage endpoint."""
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
from uuid import UUID
|
|
|
|
|
|
|
2026-05-21 01:32:59 -05:00
|
|
|
|
from sqlmodel import Field, SQLModel
|
2026-05-20 20:15:02 -05:00
|
|
|
|
|
|
|
|
|
|
RUNTIME_ANNOTATION_TYPES = (datetime, UUID)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class RuntimeUsageWindow(SQLModel):
|
|
|
|
|
|
"""Rolling 5-hour usage window metadata."""
|
|
|
|
|
|
|
|
|
|
|
|
key: str # "5h"
|
|
|
|
|
|
started_at: datetime
|
|
|
|
|
|
resets_at: datetime
|
|
|
|
|
|
reset_in_ms: int # milliseconds until oldest event ages out
|
2026-05-21 01:01:05 -05:00
|
|
|
|
source: str = "local_jsonl_estimate" # source of this window
|
|
|
|
|
|
confidence: str = "low" # confidence level for this window
|
2026-05-20 20:15:02 -05:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class RuntimeUsageCurrent(SQLModel):
|
|
|
|
|
|
"""Aggregated totals within the current window."""
|
|
|
|
|
|
|
|
|
|
|
|
total_cost_usd: float
|
2026-05-21 01:43:28 -05:00
|
|
|
|
total_tokens: int # input + output across all sessions
|
|
|
|
|
|
total_output_tokens: int = 0 # output tokens only — used with output_token_limit
|
2026-05-20 20:15:02 -05:00
|
|
|
|
total_calls: int
|
2026-05-21 01:43:28 -05:00
|
|
|
|
|
|
|
|
|
|
# ── Legacy fields (kept for backwards compat) ────────────────────────────
|
|
|
|
|
|
# token_limit is ambiguous (could be total or output); use typed fields below
|
|
|
|
|
|
# when the limit kind is known.
|
|
|
|
|
|
token_limit: int | None = None
|
|
|
|
|
|
token_pct: int | None = None
|
2026-05-20 20:15:02 -05:00
|
|
|
|
cost_limit_usd: float | None = None
|
|
|
|
|
|
cost_pct: int | None = None
|
2026-05-21 01:01:05 -05:00
|
|
|
|
token_limit_source: str | None = None
|
|
|
|
|
|
cost_limit_source: str | None = None
|
2026-05-20 20:15:02 -05:00
|
|
|
|
|
2026-05-21 01:43:28 -05:00
|
|
|
|
# ── Typed limits (Phase 4) ────────────────────────────────────────────────
|
|
|
|
|
|
# Each field pairs a limit with a percent computed from matching units only.
|
|
|
|
|
|
|
|
|
|
|
|
# Output-token limit: compared against output tokens only, never input/cache.
|
|
|
|
|
|
output_token_limit: int | None = None
|
|
|
|
|
|
output_token_limit_pct: int | None = None
|
|
|
|
|
|
output_token_limit_source: str | None = None
|
|
|
|
|
|
|
|
|
|
|
|
# Total-token limit: compared against input + output combined.
|
|
|
|
|
|
total_token_limit: int | None = None
|
|
|
|
|
|
total_token_limit_pct: int | None = None
|
|
|
|
|
|
total_token_limit_source: str | None = None
|
|
|
|
|
|
|
|
|
|
|
|
# Message/request limit: compared against call count, never token totals.
|
|
|
|
|
|
message_limit: int | None = None
|
|
|
|
|
|
message_pct: int | None = None
|
|
|
|
|
|
message_limit_source: str | None = None
|
|
|
|
|
|
|
2026-05-20 20:15:02 -05:00
|
|
|
|
|
|
|
|
|
|
class RuntimeUsageBurnRate(SQLModel):
|
|
|
|
|
|
"""Recent token and cost velocity (last 60 minutes of the window)."""
|
|
|
|
|
|
|
2026-05-21 01:43:28 -05:00
|
|
|
|
tokens_per_minute: float # input + output combined
|
|
|
|
|
|
output_tokens_per_minute: float = 0.0 # output tokens only
|
2026-05-20 20:15:02 -05:00
|
|
|
|
cost_usd_per_minute: float
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class RuntimeUsagePredictions(SQLModel):
|
|
|
|
|
|
"""Estimates derived from current burn rate and configured limits."""
|
|
|
|
|
|
|
|
|
|
|
|
time_to_limit_ms: int | None = None # None when limit or burn rate unknown
|
|
|
|
|
|
safe: bool # True if time_to_limit > reset_in_ms (will reset before hitting limit)
|
2026-05-21 01:43:28 -05:00
|
|
|
|
limit_kind: str = "total_tokens" # which limit drove this prediction
|
2026-05-20 20:15:02 -05:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ModelUsageEntry(SQLModel):
|
|
|
|
|
|
"""Usage and cost breakdown for one provider/model combination."""
|
|
|
|
|
|
|
|
|
|
|
|
provider: str # normalised: "anthropic", "openai", "ollama", "unknown"
|
|
|
|
|
|
account_key: str # e.g. "claude-default", "openai-work", "ollama-local"
|
|
|
|
|
|
model: str # normalised model slug, e.g. "claude-sonnet-4-6"
|
|
|
|
|
|
input_tokens: int
|
|
|
|
|
|
output_tokens: int
|
|
|
|
|
|
cache_read_tokens: int
|
|
|
|
|
|
cache_write_tokens: int
|
|
|
|
|
|
total_tokens: int
|
|
|
|
|
|
cost_usd: float
|
|
|
|
|
|
calls: int
|
|
|
|
|
|
unpriced: bool # True = unknown paid model; False = priced or intentionally free (Ollama)
|
2026-05-21 01:01:05 -05:00
|
|
|
|
source: str = "local_jsonl_estimate" # source of this data
|
2026-05-20 20:15:02 -05:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TopSession(SQLModel):
|
|
|
|
|
|
"""Summary row for one session, sorted by cost descending."""
|
|
|
|
|
|
|
|
|
|
|
|
session_id: str
|
|
|
|
|
|
label: str | None = None
|
|
|
|
|
|
model: str | None = None
|
|
|
|
|
|
cost_usd: float
|
|
|
|
|
|
total_tokens: int
|
|
|
|
|
|
updated_at: str | None = None
|
2026-05-21 01:01:05 -05:00
|
|
|
|
source: str = "local_jsonl_estimate" # source of this session data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ProviderUsageWindow(SQLModel):
|
|
|
|
|
|
"""One provider-native usage window (session/week/model-specific)."""
|
|
|
|
|
|
|
|
|
|
|
|
key: str # current_session | weekly_all_models | weekly_sonnet | extra_usage
|
|
|
|
|
|
label: str
|
|
|
|
|
|
pct_used: float | None = None
|
|
|
|
|
|
remaining_ms: int | None = None
|
|
|
|
|
|
remaining_label: str | None = None
|
|
|
|
|
|
extra_text: str | None = None
|
|
|
|
|
|
source: str = "provider_native"
|
|
|
|
|
|
confidence: str = "high"
|
2026-05-20 20:15:02 -05:00
|
|
|
|
|
|
|
|
|
|
|
2026-05-20 20:55:05 -05:00
|
|
|
|
class ProviderUsageScrapeResult(SQLModel):
|
|
|
|
|
|
"""Structured result from one provider-native usage scrape (e.g. Claude CLI /usage).
|
|
|
|
|
|
|
|
|
|
|
|
Returned by GET /gateways/{id}/provider-usage.
|
|
|
|
|
|
All fields are optional — partial data is still useful and expected
|
|
|
|
|
|
when CLI output format changes or the session is quiet.
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
provider: str # "anthropic", "openai", "google"
|
|
|
|
|
|
source_name: str # "claude_cli_tmux", "gemini_scrape", etc.
|
|
|
|
|
|
scraped_at: datetime
|
|
|
|
|
|
fresh: bool # True if within the freshness window
|
|
|
|
|
|
freshness_ttl_seconds: int
|
|
|
|
|
|
|
2026-05-21 01:32:59 -05:00
|
|
|
|
windows: list[ProviderUsageWindow] = Field(default_factory=list)
|
2026-05-21 01:01:05 -05:00
|
|
|
|
|
2026-05-20 20:55:05 -05:00
|
|
|
|
current_pct: float | None = None # 0–100 % of current window used
|
|
|
|
|
|
remaining_ms: int | None = None # ms until window resets
|
|
|
|
|
|
remaining_label: str | None = None # human-readable "2h 47m"
|
|
|
|
|
|
|
|
|
|
|
|
weekly_messages_used: int | None = None
|
|
|
|
|
|
weekly_messages_limit: int | None = None
|
|
|
|
|
|
weekly_tokens_used: int | None = None
|
|
|
|
|
|
weekly_cost_usd: float | None = None
|
|
|
|
|
|
|
|
|
|
|
|
raw_text: str | None = None # included when DEBUG_SCRAPER_RAW=true
|
|
|
|
|
|
error: str | None = None # set when scrape or parse failed
|
|
|
|
|
|
|
2026-05-21 01:01:05 -05:00
|
|
|
|
# Source and confidence for the scraped data
|
|
|
|
|
|
source: str | None = None # e.g. "provider_native" or "provider_api_rate_limit"
|
|
|
|
|
|
confidence: str | None = None # e.g. "high" or "medium"
|
|
|
|
|
|
|
2026-05-20 20:55:05 -05:00
|
|
|
|
|
|
|
|
|
|
class ProviderUsageResponse(SQLModel):
|
|
|
|
|
|
"""Response envelope for GET /gateways/{id}/provider-usage."""
|
|
|
|
|
|
|
|
|
|
|
|
gateway_id: UUID
|
|
|
|
|
|
generated_at: datetime
|
|
|
|
|
|
scraper_enabled: bool
|
|
|
|
|
|
results: list[ProviderUsageScrapeResult]
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-05-21 04:25:31 -05:00
|
|
|
|
class ClaudeStatuslineUsageIn(SQLModel):
|
|
|
|
|
|
"""Sanitized Claude Code status-line payload posted by a local collector.
|
|
|
|
|
|
|
|
|
|
|
|
Claude Code passes a much larger JSON object to status-line commands. The
|
|
|
|
|
|
collector should forward only these low-risk fields so Pipeline never needs
|
|
|
|
|
|
raw prompts, file paths beyond the current workspace, or credentials.
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
session_id: str | None = None
|
|
|
|
|
|
model: dict[str, object] | None = None
|
|
|
|
|
|
workspace: dict[str, object] | None = None
|
|
|
|
|
|
rate_limits: dict[str, object] | None = None
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-05-20 20:15:02 -05:00
|
|
|
|
class RuntimeUsageResponse(SQLModel):
|
|
|
|
|
|
"""Complete runtime usage payload returned by GET /gateways/{id}/runtime-usage."""
|
|
|
|
|
|
|
|
|
|
|
|
generated_at: datetime
|
|
|
|
|
|
gateway_id: UUID
|
|
|
|
|
|
window: RuntimeUsageWindow
|
|
|
|
|
|
current: RuntimeUsageCurrent
|
|
|
|
|
|
burn_rate: RuntimeUsageBurnRate
|
|
|
|
|
|
predictions: RuntimeUsagePredictions
|
|
|
|
|
|
per_model: dict[str, ModelUsageEntry] # key = "provider/model"
|
|
|
|
|
|
top_sessions: list[TopSession]
|