Pipeline/backend/app/schemas/runtime_usage.py

"""Response schemas for the gateway runtime usage endpoint."""

from __future__ import annotations

from datetime import datetime
from uuid import UUID

from sqlmodel import Field, SQLModel

RUNTIME_ANNOTATION_TYPES = (datetime, UUID)


class RuntimeUsageWindow(SQLModel):
    """Rolling 5-hour usage window metadata."""

    key: str  # "5h"
    started_at: datetime
    resets_at: datetime
    reset_in_ms: int  # milliseconds until oldest event ages out
    source: str = "local_jsonl_estimate"  # source of this window
    confidence: str = "low"  # confidence level for this window


class RuntimeUsageCurrent(SQLModel):
    """Aggregated totals within the current window."""

    total_cost_usd: float
    total_tokens: int       # input + output across all sessions
    total_output_tokens: int = 0  # output tokens only — used with output_token_limit
    total_calls: int

    # ── Legacy fields (kept for backwards compat) ────────────────────────────
    # token_limit is ambiguous (could be total or output); use typed fields below
    # when the limit kind is known.
    token_limit: int | None = None
    token_pct: int | None = None
    cost_limit_usd: float | None = None
    cost_pct: int | None = None
    token_limit_source: str | None = None
    cost_limit_source: str | None = None

    # ── Typed limits (Phase 4) ────────────────────────────────────────────────
    # Each field pairs a limit with a percent computed from matching units only.

    # Output-token limit: compared against output tokens only, never input/cache.
    output_token_limit: int | None = None
    output_token_limit_pct: int | None = None
    output_token_limit_source: str | None = None

    # Total-token limit: compared against input + output combined.
    total_token_limit: int | None = None
    total_token_limit_pct: int | None = None
    total_token_limit_source: str | None = None

    # Message/request limit: compared against call count, never token totals.
    message_limit: int | None = None
    message_pct: int | None = None
    message_limit_source: str | None = None


class RuntimeUsageBurnRate(SQLModel):
    """Recent token and cost velocity (last 60 minutes of the window)."""

    tokens_per_minute: float          # input + output combined
    output_tokens_per_minute: float = 0.0  # output tokens only
    cost_usd_per_minute: float


class RuntimeUsagePredictions(SQLModel):
    """Estimates derived from current burn rate and configured limits."""

    time_to_limit_ms: int | None = None  # None when limit or burn rate unknown
    safe: bool  # True if time_to_limit > reset_in_ms (will reset before hitting limit)
    limit_kind: str = "total_tokens"  # which limit drove this prediction


class ModelUsageEntry(SQLModel):
    """Usage and cost breakdown for one provider/model combination."""

    provider: str         # normalised: "anthropic", "openai", "ollama", "unknown"
    account_key: str      # e.g. "claude-default", "openai-work", "ollama-local"
    model: str            # normalised model slug, e.g. "claude-sonnet-4-6"
    input_tokens: int
    output_tokens: int
    cache_read_tokens: int
    cache_write_tokens: int
    total_tokens: int
    cost_usd: float
    calls: int
    unpriced: bool  # True = unknown paid model; False = priced or intentionally free (Ollama)
    source: str = "local_jsonl_estimate"  # source of this data


class TopSession(SQLModel):
    """Summary row for one session, sorted by cost descending."""

    session_id: str
    label: str | None = None
    model: str | None = None
    cost_usd: float
    total_tokens: int
    updated_at: str | None = None
    source: str = "local_jsonl_estimate"  # source of this session data


class ProviderUsageWindow(SQLModel):
    """One provider-native usage window (session/week/model-specific)."""

    key: str  # current_session | weekly_all_models | weekly_sonnet | extra_usage
    label: str
    pct_used: float | None = None
    remaining_ms: int | None = None
    remaining_label: str | None = None
    extra_text: str | None = None
    source: str = "provider_native"
    confidence: str = "high"


class ProviderUsageScrapeResult(SQLModel):
    """Structured result from one provider-native usage scrape (e.g. Claude CLI /usage).

    Returned by GET /gateways/{id}/provider-usage.
    All fields are optional — partial data is still useful and expected
    when CLI output format changes or the session is quiet.
    """

    provider: str          # "anthropic", "openai", "google"
    source_name: str       # "claude_cli_tmux", "gemini_scrape", etc.
    scraped_at: datetime
    fresh: bool            # True if within the freshness window
    freshness_ttl_seconds: int

    windows: list[ProviderUsageWindow] = Field(default_factory=list)

    current_pct: float | None = None       # 0–100 % of current window used
    remaining_ms: int | None = None        # ms until window resets
    remaining_label: str | None = None     # human-readable "2h 47m"

    weekly_messages_used: int | None = None
    weekly_messages_limit: int | None = None
    weekly_tokens_used: int | None = None
    weekly_cost_usd: float | None = None

    raw_text: str | None = None   # included when DEBUG_SCRAPER_RAW=true
    error: str | None = None      # set when scrape or parse failed

    # Source and confidence for the scraped data
    source: str | None = None     # e.g. "provider_native" or "provider_api_rate_limit"
    confidence: str | None = None # e.g. "high" or "medium"


class ProviderUsageResponse(SQLModel):
    """Response envelope for GET /gateways/{id}/provider-usage."""

    gateway_id: UUID
    generated_at: datetime
    scraper_enabled: bool
    results: list[ProviderUsageScrapeResult]


class ClaudeStatuslineUsageIn(SQLModel):
    """Sanitized Claude Code status-line payload posted by a local collector.

    Claude Code passes a much larger JSON object to status-line commands. The
    collector should forward only these low-risk fields so Pipeline never needs
    raw prompts, file paths beyond the current workspace, or credentials.
    """

    session_id: str | None = None
    model: dict[str, object] | None = None
    workspace: dict[str, object] | None = None
    rate_limits: dict[str, object] | None = None


class RuntimeUsageResponse(SQLModel):
    """Complete runtime usage payload returned by GET /gateways/{id}/runtime-usage."""

    generated_at: datetime
    gateway_id: UUID
    window: RuntimeUsageWindow
    current: RuntimeUsageCurrent
    burn_rate: RuntimeUsageBurnRate
    predictions: RuntimeUsagePredictions
    per_model: dict[str, ModelUsageEntry]  # key = "provider/model"
    top_sessions: list[TopSession]
-												feat(runtime-usage): add read-only usage core service, schemas, and API endpoint (batch 1, #30)

											
										
										
											2026-05-20 20:15:02 -05:00
+								"""Response schemas for the gateway runtime usage endpoint."""
 								from __future__ import annotations
 								from datetime import datetime
 								from uuid import UUID
-												feat(usage): fix local window estimation + provider-native windows + pricing updates (#37 #38 #40)

Phase 3 (#38): Fix reset_in_ms=0 bug
- Add _oldest_active_ts() to find oldest session timestamp in 5h window
- _build_window() now anchors fallback to oldest_event_ts + 5h instead of now - 5h
- Add _parse_rate_limit_reset_value() and _extract_rate_limit_reset_at() for proper rate-limit reset parsing
- Source/confidence labeling now based on reset provenance

Phase 2 (#37): Provider-native usage windows
- ParsedClaudeUsageWindow dataclass with section-aware parsing
- Frontend ProviderNativeUsageWindow interface and provider-native usage section
- sessions.list call now has 8s timeout to avoid gateway blocking

Phase 5 (#40): Pricing fixes
- Opus cache_write corrected .75 → .75
- Added GPT-4.1/mini/nano, GPT-4.5 pricing
- Pricing override loader supports both shapes (rates_usd_per_million wrapper and direct dict)

											
										
										
											2026-05-21 01:32:59 -05:00
+								from sqlmodel import Field, SQLModel
-												feat(runtime-usage): add read-only usage core service, schemas, and API endpoint (batch 1, #30)

											
										
										
											2026-05-20 20:15:02 -05:00
 								RUNTIME_ANNOTATION_TYPES = (datetime, UUID)
 								class RuntimeUsageWindow(SQLModel):
 								    """Rolling 5-hour usage window metadata."""
 								    key: str  # "5h"
 								    started_at: datetime
 								    resets_at: datetime
 								    reset_in_ms: int  # milliseconds until oldest event ages out
-												feat(usage): add source/confidence fields and relabel API rate limits (Phase 1, #36)

- Add source and confidence fields to RuntimeUsageWindow, ModelUsageEntry,
  TopSession, RuntimeUsageCurrent, and ProviderUsageScrapeResult schemas
- _build_window() assigns source based on data origin:
  provider_native > provider_api_rate_limit > local_jsonl_estimate
- _build_current() tags token_limit_source and cost_limit_source
- Frontend relabels 'Current session'/'All models' to 'API rate limit'
- Shows source label and confidence in usage strip
- Changes 'did not return active usage windows' to 'did not return
  API rate-limit windows for percent + reset diagnostics'

											
										
										
											2026-05-21 01:01:05 -05:00
+								    source: str = "local_jsonl_estimate"  # source of this window
 								    confidence: str = "low"  # confidence level for this window
-												feat(runtime-usage): add read-only usage core service, schemas, and API endpoint (batch 1, #30)

											
										
										
											2026-05-20 20:15:02 -05:00
 								class RuntimeUsageCurrent(SQLModel):
 								    """Aggregated totals within the current window."""
 								    total_cost_usd: float
-												feat(usage): separate limit types — typed limits for output tokens, total tokens, messages (#39)

- Add typed limit fields to RuntimeUsageCurrent: output_token_limit,
  total_token_limit, message_limit with matching pct and source
- Add total_output_tokens and output_tokens_per_minute to burn rate
- _build_current() now computes each pct from matching units only
- Legacy token_limit backfilled from typed limits for backwards compat
- Frontend aggregateRuntimeUsage() tracks typed limits separately
- limit_kind field on predictions indicates which limit drove time-to-limit

											
										
										
											2026-05-21 01:43:28 -05:00
+								    total_tokens: int       # input + output across all sessions
 								    total_output_tokens: int = 0  # output tokens only — used with output_token_limit
-												feat(runtime-usage): add read-only usage core service, schemas, and API endpoint (batch 1, #30)

											
										
										
											2026-05-20 20:15:02 -05:00
+								    total_calls: int
-												feat(usage): separate limit types — typed limits for output tokens, total tokens, messages (#39)

- Add typed limit fields to RuntimeUsageCurrent: output_token_limit,
  total_token_limit, message_limit with matching pct and source
- Add total_output_tokens and output_tokens_per_minute to burn rate
- _build_current() now computes each pct from matching units only
- Legacy token_limit backfilled from typed limits for backwards compat
- Frontend aggregateRuntimeUsage() tracks typed limits separately
- limit_kind field on predictions indicates which limit drove time-to-limit

											
										
										
											2026-05-21 01:43:28 -05:00
 								    # ── Legacy fields (kept for backwards compat) ────────────────────────────
 								    # token_limit is ambiguous (could be total or output); use typed fields below
 								    # when the limit kind is known.
 								    token_limit: int | None = None
 								    token_pct: int | None = None
-												feat(runtime-usage): add read-only usage core service, schemas, and API endpoint (batch 1, #30)

											
										
										
											2026-05-20 20:15:02 -05:00
+								    cost_limit_usd: float | None = None
 								    cost_pct: int | None = None
-												feat(usage): add source/confidence fields and relabel API rate limits (Phase 1, #36)

- Add source and confidence fields to RuntimeUsageWindow, ModelUsageEntry,
  TopSession, RuntimeUsageCurrent, and ProviderUsageScrapeResult schemas
- _build_window() assigns source based on data origin:
  provider_native > provider_api_rate_limit > local_jsonl_estimate
- _build_current() tags token_limit_source and cost_limit_source
- Frontend relabels 'Current session'/'All models' to 'API rate limit'
- Shows source label and confidence in usage strip
- Changes 'did not return active usage windows' to 'did not return
  API rate-limit windows for percent + reset diagnostics'

											
										
										
											2026-05-21 01:01:05 -05:00
+								    token_limit_source: str | None = None
 								    cost_limit_source: str | None = None
-												feat(runtime-usage): add read-only usage core service, schemas, and API endpoint (batch 1, #30)

											
										
										
											2026-05-20 20:15:02 -05:00
-												feat(usage): separate limit types — typed limits for output tokens, total tokens, messages (#39)

- Add typed limit fields to RuntimeUsageCurrent: output_token_limit,
  total_token_limit, message_limit with matching pct and source
- Add total_output_tokens and output_tokens_per_minute to burn rate
- _build_current() now computes each pct from matching units only
- Legacy token_limit backfilled from typed limits for backwards compat
- Frontend aggregateRuntimeUsage() tracks typed limits separately
- limit_kind field on predictions indicates which limit drove time-to-limit

											
										
										
											2026-05-21 01:43:28 -05:00
+								    # ── Typed limits (Phase 4) ────────────────────────────────────────────────
 								    # Each field pairs a limit with a percent computed from matching units only.
 								    # Output-token limit: compared against output tokens only, never input/cache.
 								    output_token_limit: int | None = None
 								    output_token_limit_pct: int | None = None
 								    output_token_limit_source: str | None = None
 								    # Total-token limit: compared against input + output combined.
 								    total_token_limit: int | None = None
 								    total_token_limit_pct: int | None = None
 								    total_token_limit_source: str | None = None
 								    # Message/request limit: compared against call count, never token totals.
 								    message_limit: int | None = None
 								    message_pct: int | None = None
 								    message_limit_source: str | None = None
-												feat(runtime-usage): add read-only usage core service, schemas, and API endpoint (batch 1, #30)

											
										
										
											2026-05-20 20:15:02 -05:00
 								class RuntimeUsageBurnRate(SQLModel):
 								    """Recent token and cost velocity (last 60 minutes of the window)."""
-												feat(usage): separate limit types — typed limits for output tokens, total tokens, messages (#39)

- Add typed limit fields to RuntimeUsageCurrent: output_token_limit,
  total_token_limit, message_limit with matching pct and source
- Add total_output_tokens and output_tokens_per_minute to burn rate
- _build_current() now computes each pct from matching units only
- Legacy token_limit backfilled from typed limits for backwards compat
- Frontend aggregateRuntimeUsage() tracks typed limits separately
- limit_kind field on predictions indicates which limit drove time-to-limit

											
										
										
											2026-05-21 01:43:28 -05:00
+								    tokens_per_minute: float          # input + output combined
 								    output_tokens_per_minute: float = 0.0  # output tokens only
-												feat(runtime-usage): add read-only usage core service, schemas, and API endpoint (batch 1, #30)

											
										
										
											2026-05-20 20:15:02 -05:00
+								    cost_usd_per_minute: float
 								class RuntimeUsagePredictions(SQLModel):
 								    """Estimates derived from current burn rate and configured limits."""
 								    time_to_limit_ms: int | None = None  # None when limit or burn rate unknown
 								    safe: bool  # True if time_to_limit > reset_in_ms (will reset before hitting limit)
-												feat(usage): separate limit types — typed limits for output tokens, total tokens, messages (#39)

- Add typed limit fields to RuntimeUsageCurrent: output_token_limit,
  total_token_limit, message_limit with matching pct and source
- Add total_output_tokens and output_tokens_per_minute to burn rate
- _build_current() now computes each pct from matching units only
- Legacy token_limit backfilled from typed limits for backwards compat
- Frontend aggregateRuntimeUsage() tracks typed limits separately
- limit_kind field on predictions indicates which limit drove time-to-limit

											
										
										
											2026-05-21 01:43:28 -05:00
+								    limit_kind: str = "total_tokens"  # which limit drove this prediction
-												feat(runtime-usage): add read-only usage core service, schemas, and API endpoint (batch 1, #30)

											
										
										
											2026-05-20 20:15:02 -05:00
 								class ModelUsageEntry(SQLModel):
 								    """Usage and cost breakdown for one provider/model combination."""
 								    provider: str         # normalised: "anthropic", "openai", "ollama", "unknown"
 								    account_key: str      # e.g. "claude-default", "openai-work", "ollama-local"
 								    model: str            # normalised model slug, e.g. "claude-sonnet-4-6"
 								    input_tokens: int
 								    output_tokens: int
 								    cache_read_tokens: int
 								    cache_write_tokens: int
 								    total_tokens: int
 								    cost_usd: float
 								    calls: int
 								    unpriced: bool  # True = unknown paid model; False = priced or intentionally free (Ollama)
-												feat(usage): add source/confidence fields and relabel API rate limits (Phase 1, #36)

- Add source and confidence fields to RuntimeUsageWindow, ModelUsageEntry,
  TopSession, RuntimeUsageCurrent, and ProviderUsageScrapeResult schemas
- _build_window() assigns source based on data origin:
  provider_native > provider_api_rate_limit > local_jsonl_estimate
- _build_current() tags token_limit_source and cost_limit_source
- Frontend relabels 'Current session'/'All models' to 'API rate limit'
- Shows source label and confidence in usage strip
- Changes 'did not return active usage windows' to 'did not return
  API rate-limit windows for percent + reset diagnostics'

											
										
										
											2026-05-21 01:01:05 -05:00
+								    source: str = "local_jsonl_estimate"  # source of this data
-												feat(runtime-usage): add read-only usage core service, schemas, and API endpoint (batch 1, #30)

											
										
										
											2026-05-20 20:15:02 -05:00
 								class TopSession(SQLModel):
 								    """Summary row for one session, sorted by cost descending."""
 								    session_id: str
 								    label: str | None = None
 								    model: str | None = None
 								    cost_usd: float
 								    total_tokens: int
 								    updated_at: str | None = None
-												feat(usage): add source/confidence fields and relabel API rate limits (Phase 1, #36)

- Add source and confidence fields to RuntimeUsageWindow, ModelUsageEntry,
  TopSession, RuntimeUsageCurrent, and ProviderUsageScrapeResult schemas
- _build_window() assigns source based on data origin:
  provider_native > provider_api_rate_limit > local_jsonl_estimate
- _build_current() tags token_limit_source and cost_limit_source
- Frontend relabels 'Current session'/'All models' to 'API rate limit'
- Shows source label and confidence in usage strip
- Changes 'did not return active usage windows' to 'did not return
  API rate-limit windows for percent + reset diagnostics'

											
										
										
											2026-05-21 01:01:05 -05:00
+								    source: str = "local_jsonl_estimate"  # source of this session data
 								class ProviderUsageWindow(SQLModel):
 								    """One provider-native usage window (session/week/model-specific)."""
 								    key: str  # current_session | weekly_all_models | weekly_sonnet | extra_usage
 								    label: str
 								    pct_used: float | None = None
 								    remaining_ms: int | None = None
 								    remaining_label: str | None = None
 								    extra_text: str | None = None
 								    source: str = "provider_native"
 								    confidence: str = "high"
-												feat(runtime-usage): add read-only usage core service, schemas, and API endpoint (batch 1, #30)

											
										
										
											2026-05-20 20:15:02 -05:00
-												feat(runtime-usage): add provider usage scrapers as optional local adapters (batch 3, #32)

											
										
										
											2026-05-20 20:55:05 -05:00
+								class ProviderUsageScrapeResult(SQLModel):
 								    """Structured result from one provider-native usage scrape (e.g. Claude CLI /usage).
 								    Returned by GET /gateways/{id}/provider-usage.
 								    All fields are optional — partial data is still useful and expected
 								    when CLI output format changes or the session is quiet.
 								    """
 								    provider: str          # "anthropic", "openai", "google"
 								    source_name: str       # "claude_cli_tmux", "gemini_scrape", etc.
 								    scraped_at: datetime
 								    fresh: bool            # True if within the freshness window
 								    freshness_ttl_seconds: int
-												feat(usage): fix local window estimation + provider-native windows + pricing updates (#37 #38 #40)

Phase 3 (#38): Fix reset_in_ms=0 bug
- Add _oldest_active_ts() to find oldest session timestamp in 5h window
- _build_window() now anchors fallback to oldest_event_ts + 5h instead of now - 5h
- Add _parse_rate_limit_reset_value() and _extract_rate_limit_reset_at() for proper rate-limit reset parsing
- Source/confidence labeling now based on reset provenance

Phase 2 (#37): Provider-native usage windows
- ParsedClaudeUsageWindow dataclass with section-aware parsing
- Frontend ProviderNativeUsageWindow interface and provider-native usage section
- sessions.list call now has 8s timeout to avoid gateway blocking

Phase 5 (#40): Pricing fixes
- Opus cache_write corrected .75 → .75
- Added GPT-4.1/mini/nano, GPT-4.5 pricing
- Pricing override loader supports both shapes (rates_usd_per_million wrapper and direct dict)

											
										
										
											2026-05-21 01:32:59 -05:00
+								    windows: list[ProviderUsageWindow] = Field(default_factory=list)
-												feat(usage): add source/confidence fields and relabel API rate limits (Phase 1, #36)

- Add source and confidence fields to RuntimeUsageWindow, ModelUsageEntry,
  TopSession, RuntimeUsageCurrent, and ProviderUsageScrapeResult schemas
- _build_window() assigns source based on data origin:
  provider_native > provider_api_rate_limit > local_jsonl_estimate
- _build_current() tags token_limit_source and cost_limit_source
- Frontend relabels 'Current session'/'All models' to 'API rate limit'
- Shows source label and confidence in usage strip
- Changes 'did not return active usage windows' to 'did not return
  API rate-limit windows for percent + reset diagnostics'

											
										
										
											2026-05-21 01:01:05 -05:00
-												feat(runtime-usage): add provider usage scrapers as optional local adapters (batch 3, #32)

											
										
										
											2026-05-20 20:55:05 -05:00
+								    current_pct: float | None = None       # 0–100 % of current window used
 								    remaining_ms: int | None = None        # ms until window resets
 								    remaining_label: str | None = None     # human-readable "2h 47m"
 								    weekly_messages_used: int | None = None
 								    weekly_messages_limit: int | None = None
 								    weekly_tokens_used: int | None = None
 								    weekly_cost_usd: float | None = None
 								    raw_text: str | None = None   # included when DEBUG_SCRAPER_RAW=true
 								    error: str | None = None      # set when scrape or parse failed
-												feat(usage): add source/confidence fields and relabel API rate limits (Phase 1, #36)

- Add source and confidence fields to RuntimeUsageWindow, ModelUsageEntry,
  TopSession, RuntimeUsageCurrent, and ProviderUsageScrapeResult schemas
- _build_window() assigns source based on data origin:
  provider_native > provider_api_rate_limit > local_jsonl_estimate
- _build_current() tags token_limit_source and cost_limit_source
- Frontend relabels 'Current session'/'All models' to 'API rate limit'
- Shows source label and confidence in usage strip
- Changes 'did not return active usage windows' to 'did not return
  API rate-limit windows for percent + reset diagnostics'

											
										
										
											2026-05-21 01:01:05 -05:00
+								    # Source and confidence for the scraped data
 								    source: str | None = None     # e.g. "provider_native" or "provider_api_rate_limit"
 								    confidence: str | None = None # e.g. "high" or "medium"
-												feat(runtime-usage): add provider usage scrapers as optional local adapters (batch 3, #32)

											
										
										
											2026-05-20 20:55:05 -05:00
 								class ProviderUsageResponse(SQLModel):
 								    """Response envelope for GET /gateways/{id}/provider-usage."""
 								    gateway_id: UUID
 								    generated_at: datetime
 								    scraper_enabled: bool
 								    results: list[ProviderUsageScrapeResult]
-												fix: ai reauth

											
										
										
											2026-05-21 04:25:31 -05:00
+								class ClaudeStatuslineUsageIn(SQLModel):
 								    """Sanitized Claude Code status-line payload posted by a local collector.
 								    Claude Code passes a much larger JSON object to status-line commands. The
 								    collector should forward only these low-risk fields so Pipeline never needs
 								    raw prompts, file paths beyond the current workspace, or credentials.
 								    """
 								    session_id: str | None = None
 								    model: dict[str, object] | None = None
 								    workspace: dict[str, object] | None = None
 								    rate_limits: dict[str, object] | None = None
-												feat(runtime-usage): add read-only usage core service, schemas, and API endpoint (batch 1, #30)

											
										
										
											2026-05-20 20:15:02 -05:00
+								class RuntimeUsageResponse(SQLModel):
 								    """Complete runtime usage payload returned by GET /gateways/{id}/runtime-usage."""
 								    generated_at: datetime
 								    gateway_id: UUID
 								    window: RuntimeUsageWindow
 								    current: RuntimeUsageCurrent
 								    burn_rate: RuntimeUsageBurnRate
 								    predictions: RuntimeUsagePredictions
 								    per_model: dict[str, ModelUsageEntry]  # key = "provider/model"
 								    top_sessions: list[TopSession]