Pipeline/backend/app/schemas/runtime_usage.py

186 lines
6.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Response schemas for the gateway runtime usage endpoint."""
from __future__ import annotations
from datetime import datetime
from uuid import UUID
from sqlmodel import Field, SQLModel
RUNTIME_ANNOTATION_TYPES = (datetime, UUID)
class RuntimeUsageWindow(SQLModel):
"""Rolling 5-hour usage window metadata."""
key: str # "5h"
started_at: datetime
resets_at: datetime
reset_in_ms: int # milliseconds until oldest event ages out
source: str = "local_jsonl_estimate" # source of this window
confidence: str = "low" # confidence level for this window
class RuntimeUsageCurrent(SQLModel):
"""Aggregated totals within the current window."""
total_cost_usd: float
total_tokens: int # input + output across all sessions
total_output_tokens: int = 0 # output tokens only — used with output_token_limit
total_calls: int
# ── Legacy fields (kept for backwards compat) ────────────────────────────
# token_limit is ambiguous (could be total or output); use typed fields below
# when the limit kind is known.
token_limit: int | None = None
token_pct: int | None = None
cost_limit_usd: float | None = None
cost_pct: int | None = None
token_limit_source: str | None = None
cost_limit_source: str | None = None
# ── Typed limits (Phase 4) ────────────────────────────────────────────────
# Each field pairs a limit with a percent computed from matching units only.
# Output-token limit: compared against output tokens only, never input/cache.
output_token_limit: int | None = None
output_token_limit_pct: int | None = None
output_token_limit_source: str | None = None
# Total-token limit: compared against input + output combined.
total_token_limit: int | None = None
total_token_limit_pct: int | None = None
total_token_limit_source: str | None = None
# Message/request limit: compared against call count, never token totals.
message_limit: int | None = None
message_pct: int | None = None
message_limit_source: str | None = None
class RuntimeUsageBurnRate(SQLModel):
"""Recent token and cost velocity (last 60 minutes of the window)."""
tokens_per_minute: float # input + output combined
output_tokens_per_minute: float = 0.0 # output tokens only
cost_usd_per_minute: float
class RuntimeUsagePredictions(SQLModel):
"""Estimates derived from current burn rate and configured limits."""
time_to_limit_ms: int | None = None # None when limit or burn rate unknown
safe: bool # True if time_to_limit > reset_in_ms (will reset before hitting limit)
limit_kind: str = "total_tokens" # which limit drove this prediction
class ModelUsageEntry(SQLModel):
"""Usage and cost breakdown for one provider/model combination."""
provider: str # normalised: "anthropic", "openai", "ollama", "unknown"
account_key: str # e.g. "claude-default", "openai-work", "ollama-local"
model: str # normalised model slug, e.g. "claude-sonnet-4-6"
input_tokens: int
output_tokens: int
cache_read_tokens: int
cache_write_tokens: int
total_tokens: int
cost_usd: float
calls: int
unpriced: bool # True = unknown paid model; False = priced or intentionally free (Ollama)
source: str = "local_jsonl_estimate" # source of this data
class TopSession(SQLModel):
"""Summary row for one session, sorted by cost descending."""
session_id: str
label: str | None = None
model: str | None = None
cost_usd: float
total_tokens: int
updated_at: str | None = None
source: str = "local_jsonl_estimate" # source of this session data
class ProviderUsageWindow(SQLModel):
"""One provider-native usage window (session/week/model-specific)."""
key: str # current_session | weekly_all_models | weekly_sonnet | extra_usage
label: str
pct_used: float | None = None
remaining_ms: int | None = None
remaining_label: str | None = None
extra_text: str | None = None
source: str = "provider_native"
confidence: str = "high"
class ProviderUsageScrapeResult(SQLModel):
"""Structured result from one provider-native usage scrape (e.g. Claude CLI /usage).
Returned by GET /gateways/{id}/provider-usage.
All fields are optional — partial data is still useful and expected
when CLI output format changes or the session is quiet.
"""
provider: str # "anthropic", "openai", "google"
source_name: str # "claude_cli_tmux", "gemini_scrape", etc.
scraped_at: datetime
fresh: bool # True if within the freshness window
freshness_ttl_seconds: int
windows: list[ProviderUsageWindow] = Field(default_factory=list)
current_pct: float | None = None # 0100 % of current window used
remaining_ms: int | None = None # ms until window resets
remaining_label: str | None = None # human-readable "2h 47m"
weekly_messages_used: int | None = None
weekly_messages_limit: int | None = None
weekly_tokens_used: int | None = None
weekly_cost_usd: float | None = None
raw_text: str | None = None # included when DEBUG_SCRAPER_RAW=true
error: str | None = None # set when scrape or parse failed
# Source and confidence for the scraped data
source: str | None = None # e.g. "provider_native" or "provider_api_rate_limit"
confidence: str | None = None # e.g. "high" or "medium"
class ProviderUsageResponse(SQLModel):
"""Response envelope for GET /gateways/{id}/provider-usage."""
gateway_id: UUID
generated_at: datetime
scraper_enabled: bool
results: list[ProviderUsageScrapeResult]
class ClaudeStatuslineUsageIn(SQLModel):
"""Sanitized Claude Code status-line payload posted by a local collector.
Claude Code passes a much larger JSON object to status-line commands. The
collector should forward only these low-risk fields so Pipeline never needs
raw prompts, file paths beyond the current workspace, or credentials.
"""
session_id: str | None = None
model: dict[str, object] | None = None
workspace: dict[str, object] | None = None
rate_limits: dict[str, object] | None = None
class RuntimeUsageResponse(SQLModel):
"""Complete runtime usage payload returned by GET /gateways/{id}/runtime-usage."""
generated_at: datetime
gateway_id: UUID
window: RuntimeUsageWindow
current: RuntimeUsageCurrent
burn_rate: RuntimeUsageBurnRate
predictions: RuntimeUsagePredictions
per_model: dict[str, ModelUsageEntry] # key = "provider/model"
top_sessions: list[TopSession]