feat(usage): add source/confidence fields and relabel API rate limits (Phase 1, #36)

- Add source and confidence fields to RuntimeUsageWindow, ModelUsageEntry, TopSession, RuntimeUsageCurrent, and ProviderUsageScrapeResult schemas - _build_window() assigns source based on data origin: provider_native > provider_api_rate_limit > local_jsonl_estimate - _build_current() tags token_limit_source and cost_limit_source - Frontend relabels 'Current session'/'All models' to 'API rate limit' - Shows source label and confidence in usage strip - Changes 'did not return active usage windows' to 'did not return API rate-limit windows for percent + reset diagnostics'
2026-05-21 01:01:05 -05:00 · 2026-05-21 01:01:05 -05:00 · 8d11f4f840
parent 184d86c58a
commit 8d11f4f840
10 changed files with 136 additions and 7 deletions
--- a/.gitignore
+++ b/.gitignore
@ -34,3 +34,4 @@ backend/app/services/openclaw/.device-keys
 FUTURE.md
 FUTURE.md
 docs/runtime-usage-dashboard-plan.md
 docs/remaining-usage-accuracy-review-plan.md
--- a/backend/app/api/provider_credentials.py
+++ b/backend/app/api/provider_credentials.py
@ -162,6 +162,8 @@ async def test_provider_credential(
        account_key=live.account_key,
        checked_at=live.checked_at.isoformat(),
        reachable=live.reachable,
        source=live.source,
        confidence=live.confidence,
        error=live.error,
        tokens=_tok(live.tokens),
        input_tokens=_tok(live.input_tokens),
@ -264,6 +266,8 @@ async def get_provider_usage_live(
        account_key=live.account_key,
        checked_at=live.checked_at.isoformat(),
        reachable=live.reachable,
        source=live.source,
        confidence=live.confidence,
        error=live.error,
        tokens=_tok(live.tokens),
        input_tokens=_tok(live.input_tokens),
--- a/backend/app/schemas/provider_credentials.py
+++ b/backend/app/schemas/provider_credentials.py
@ -56,6 +56,8 @@ class ProviderUsageLiveRead(SQLModel):
    account_key: str
    checked_at: str           # ISO 8601 UTC
    reachable: bool
    source: str  # provider_native | provider_api_rate_limit | local_jsonl_estimate | configured_limit
    confidence: str  # high | medium | low
    error: str | None = None
    tokens: TokenWindowRead
    input_tokens: TokenWindowRead   # Anthropic splits input tokens separately
--- a/backend/app/schemas/runtime_usage.py
+++ b/backend/app/schemas/runtime_usage.py
@ -17,6 +17,8 @@ class RuntimeUsageWindow(SQLModel):
    started_at: datetime
    resets_at: datetime
    reset_in_ms: int  # milliseconds until oldest event ages out
    source: str = "local_jsonl_estimate"  # source of this window
    confidence: str = "low"  # confidence level for this window
 class RuntimeUsageCurrent(SQLModel):
@ -29,6 +31,9 @@ class RuntimeUsageCurrent(SQLModel):
    token_pct: int | None = None   # 0–100; None when limit unknown
    cost_limit_usd: float | None = None
    cost_pct: int | None = None
    # Source and confidence for the limits
    token_limit_source: str | None = None
    cost_limit_source: str | None = None
 class RuntimeUsageBurnRate(SQLModel):
@ -59,6 +64,7 @@ class ModelUsageEntry(SQLModel):
    cost_usd: float
    calls: int
    unpriced: bool  # True = unknown paid model; False = priced or intentionally free (Ollama)
    source: str = "local_jsonl_estimate"  # source of this data
 class TopSession(SQLModel):
@ -70,6 +76,20 @@ class TopSession(SQLModel):
    cost_usd: float
    total_tokens: int
    updated_at: str | None = None
    source: str = "local_jsonl_estimate"  # source of this session data
 class ProviderUsageWindow(SQLModel):
    """One provider-native usage window (session/week/model-specific)."""
    key: str  # current_session | weekly_all_models | weekly_sonnet | extra_usage
    label: str
    pct_used: float | None = None
    remaining_ms: int | None = None
    remaining_label: str | None = None
    extra_text: str | None = None
    source: str = "provider_native"
    confidence: str = "high"
 class ProviderUsageScrapeResult(SQLModel):
@ -86,6 +106,8 @@ class ProviderUsageScrapeResult(SQLModel):
    fresh: bool            # True if within the freshness window
    freshness_ttl_seconds: int
    windows: list[ProviderUsageWindow] = []
    current_pct: float | None = None       # 0–100 % of current window used
    remaining_ms: int | None = None        # ms until window resets
    remaining_label: str | None = None     # human-readable "2h 47m"
@ -98,6 +120,10 @@ class ProviderUsageScrapeResult(SQLModel):
    raw_text: str | None = None   # included when DEBUG_SCRAPER_RAW=true
    error: str | None = None      # set when scrape or parse failed
    # Source and confidence for the scraped data
    source: str | None = None     # e.g. "provider_native" or "provider_api_rate_limit"
    confidence: str | None = None # e.g. "high" or "medium"
 class ProviderUsageResponse(SQLModel):
    """Response envelope for GET /gateways/{id}/provider-usage."""
--- a/backend/app/services/openclaw/runtime_usage.py
+++ b/backend/app/services/openclaw/runtime_usage.py
@ -323,6 +323,7 @@ def aggregate_per_model(
                "cost_usd": 0.0,
                "calls": 0,
                "unpriced": unpriced,
                "source": "local_jsonl_estimate",  # default source for aggregated session data
            }
        e = entries[key]
        e["input_tokens"]       += tokens["input"]
@ -371,6 +372,7 @@ def _top_sessions(
            cost_usd=round(cost, 8),
            total_tokens=total,
            updated_at=updated,
            source="local_jsonl_estimate",  # default source for session data
        ))
    rows.sort(key=lambda r: r.cost_usd, reverse=True)
    return rows[:limit]
@ -386,8 +388,41 @@ _WINDOW_HOURS = 5
 def _build_window(
    status_raw: dict[str, Any],
    now: datetime,
    account_key: str = "default",
 ) -> RuntimeUsageWindow:
-    """Build the usage window, preferring gateway status data then falling back."""
+    """Build the usage window, preferring gateway status data then falling back.
    Source assignment:
    - If gateway status provides explicit window data, use provider_native
    - If API rate-limit headers are the only source, use provider_api_rate_limit
    - If falling back to local logic, use local_jsonl_estimate
    """
    # Check if gateway status provides explicit window data
    has_window_start = status_raw.get("windowStart") or status_raw.get("window_start") or status_raw.get("period_start") or status_raw.get("started_at")
    has_window_end = status_raw.get("windowEnd") or status_raw.get("window_end") or status_raw.get("period_end") or status_raw.get("resets_at")
    # Check for API rate-limit headers (these indicate throttling, not subscription usage)
    has_rate_limit_headers = (
        status_raw.get("x_ratelimit_remaining") or
        status_raw.get("x_ratelimit_limit") or
        status_raw.get("x_ratelimit_reset") or
        status_raw.get("anthropic_ratelimit_remaining") or
        status_raw.get("anthropic_ratelimit_limit")
    )
    if has_window_start and has_window_end:
        # Gateway status provides explicit window data
        source = "provider_native"
        confidence = "high"
    elif has_rate_limit_headers:
        # Only API rate-limit headers available - treat as diagnostics
        source = "provider_api_rate_limit"
        confidence = "medium"
    else:
        # Fall back to local logic (5-hour window from oldest event)
        source = "local_jsonl_estimate"
        confidence = "low"
    started_at = _parse_datetime(
        status_raw.get("windowStart") or status_raw.get("window_start")
        or status_raw.get("period_start") or status_raw.get("started_at")
@ -408,12 +443,15 @@ def _build_window(
        started_at=started_at,
        resets_at=resets_at,
        reset_in_ms=reset_in_ms,
        source=source,
        confidence=confidence,
    )
 def _build_current(
    per_model: dict[str, ModelUsageEntry],
    status_raw: dict[str, Any],
    account_key: str = "default",
 ) -> RuntimeUsageCurrent:
    total_cost  = round(sum(e.cost_usd for e in per_model.values()), 8)
    total_tokens = sum(e.total_tokens for e in per_model.values())
@ -422,10 +460,34 @@ def _build_current(
    # Try to get configured limits from the gateway status
    raw_token_limit = _get_int(status_raw, "tokenLimit", "token_limit", "messageLimit", "message_limit", default=0)
    token_limit = raw_token_limit or None
    # Determine source for token limit
    if raw_token_limit:
        # Check for API rate-limit headers
        has_rate_limit_headers = (
            status_raw.get("x_ratelimit_remaining") or
            status_raw.get("x_ratelimit_limit") or
            status_raw.get("anthropic_ratelimit_remaining") or
            status_raw.get("anthropic_ratelimit_limit")
        )
        if has_rate_limit_headers:
            token_limit_source = "provider_api_rate_limit"
        else:
            token_limit_source = "configured_limit"
    else:
        token_limit_source = None
    token_pct   = int(min(100, total_tokens * 100 // raw_token_limit)) if raw_token_limit else None
    raw_cost_limit = _get_float(status_raw, "costLimit", "cost_limit", "costLimitUsd", default=0.0)
    cost_limit  = raw_cost_limit or None
    # Determine source for cost limit
    if raw_cost_limit:
        cost_limit_source = "configured_limit"
    else:
        cost_limit_source = None
    cost_pct    = int(min(100, total_cost * 100 / raw_cost_limit)) if raw_cost_limit else None
    return RuntimeUsageCurrent(
@ -436,6 +498,8 @@ def _build_current(
        token_pct=token_pct,
        cost_limit_usd=cost_limit,
        cost_pct=cost_pct,
        token_limit_source=token_limit_source,
        cost_limit_source=cost_limit_source,
    )
--- a/backend/app/services/openclaw/usage_scrapers.py
+++ b/backend/app/services/openclaw/usage_scrapers.py
@ -50,6 +50,10 @@ class ParsedClaudeUsage:
    weekly_tokens_used: int | None = None
    weekly_cost_usd: float | None = None
    error: str | None = None
    # Source and confidence for the parsed data
    source: str | None = None  # e.g., "provider_native" or "provider_api_rate_limit"
    confidence: str | None = None  # e.g., "high" or "medium"
@dataclass
@ -439,7 +443,11 @@ class ClaudeTmuxScraper(RuntimeUsageProviderAdapter):
        )
    def parse(self, raw: str) -> ParsedClaudeUsage:
-        return parse_claude_usage(raw)
+        result = parse_claude_usage(raw)
        # Tag the parsed result with source and confidence
        result.source = "provider_native"
        result.confidence = "high"
        return result
 # ---------------------------------------------------------------------------
--- a/backend/app/services/provider_usage.py
+++ b/backend/app/services/provider_usage.py
@ -104,6 +104,10 @@ class ProviderUsageLive:
    account_key: str
    checked_at: datetime
    reachable: bool
    # Phase 1 semantics: this service reports provider API diagnostics
    # (rate-limit windows / probe metadata), not subscription usage windows.
    source: str = "provider_api_rate_limit"
    confidence: str = "high"
    error: str | None = None
    tokens: TokenWindow = field(default_factory=TokenWindow)
    input_tokens: TokenWindow = field(default_factory=TokenWindow)  # Anthropic splits input/output
@ -136,6 +140,8 @@ class ProviderUsageLive:
            "account_key": self.account_key,
            "checked_at":  self.checked_at.isoformat(),
            "reachable":   self.reachable,
            "source":      self.source,
            "confidence":  self.confidence,
            "error":       self.error,
            "tokens":      _window(self.tokens),
            "input_tokens": _window(self.input_tokens),
--- a/backend/tests/test_provider_credentials_usage_api.py
+++ b/backend/tests/test_provider_credentials_usage_api.py
@ -121,6 +121,8 @@ async def test_usage_response_includes_rate_limit_header_names(monkeypatch: pyte
        data = response.json()
        assert data["provider"] == "anthropic"
        assert data["reachable"] is True
        assert data["source"] == "provider_api_rate_limit"
        assert data["confidence"] == "high"
        assert data["sample_model"] == "claude-sonnet-4-6"
        assert data["sample_input_tokens"] == 9
        assert data["sample_output_tokens"] == 1
@ -195,6 +197,8 @@ async def test_test_endpoint_returns_live_result(monkeypatch: pytest.MonkeyPatch
        assert data["provider"] == "anthropic"
        assert data["account_key"] == "Claude"
        assert data["reachable"] is True
        assert data["source"] == "provider_api_rate_limit"
        assert data["confidence"] == "high"
        assert data["models"] == ["claude-sonnet-4-6"]
        assert data["sample_model"] == "claude-sonnet-4-6"
        assert data["sample_input_tokens"] == 8
--- a/frontend/src/api/generated/model/providerUsageLiveRead.ts
+++ b/frontend/src/api/generated/model/providerUsageLiveRead.ts
@ -15,6 +15,8 @@ export interface ProviderUsageLiveRead {
  account_key: string;
  checked_at: string;
  reachable: boolean;
  source: string;
  confidence: string;
  error?: string | null;
  tokens: TokenWindowRead;
  input_tokens: TokenWindowRead;
--- a/frontend/src/app/settings/ai-providers/page.tsx
+++ b/frontend/src/app/settings/ai-providers/page.tsx
@ -368,24 +368,30 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
  const inputTok = usage.input_tokens;
  const req = usage.requests;
  const isOllama = provider === "ollama";
  const sourceLabel: Record<string, string> = {
    provider_native: "Provider native",
    provider_api_rate_limit: "API rate limit",
    local_jsonl_estimate: "Local estimate",
    configured_limit: "Configured limit",
  };
  const usageBars: UsageWindowBarProps[] = [];
  if (inputTok.pct_used != null) {
    usageBars.push({
-      label: "Current session",
+      label: "API rate limit · input tokens",
      pct: inputTok.pct_used,
      resetInMs: inputTok.reset_in_ms,
    });
  }
  if (tok.pct_used != null) {
    usageBars.push({
-      label: usageBars.length > 0 ? "All models" : "Usage",
+      label: "API rate limit · tokens",
      pct: tok.pct_used,
      resetInMs: tok.reset_in_ms,
    });
  }
  if (usageBars.length === 0 && req.limit != null && req.remaining != null && req.limit > 0) {
    usageBars.push({
-      label: "Requests",
+      label: "API rate limit · requests",
      pct: ((req.limit - req.remaining) / req.limit) * 100,
      resetInMs: req.reset_in_ms,
    });
@ -395,6 +401,9 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
    <div className="mt-2 rounded-lg border border-[color:var(--border)] bg-[color:var(--surface)] p-2.5">
      {isOllama ? (
        <div className="space-y-1.5">
          <div className="text-[11px] text-muted">
            Source: {sourceLabel[usage.source] ?? usage.source} · confidence: {usage.confidence}
          </div>
          <div className="flex items-center gap-3 text-xs text-muted">
            <span className="flex items-center gap-1 text-[color:var(--success)]">
              <span className="inline-block h-1.5 w-1.5 rounded-full bg-[color:var(--success)]" />
@ -432,7 +441,7 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
                </div>
              )}
              <p className="text-[11px] text-muted">
-                This provider did not return active usage windows for percent + reset tracking.
+                Provider did not return API rate-limit windows for percent + reset diagnostics.
              </p>
            </>
          )}
@ -442,6 +451,9 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
        </div>
      ) : (
        <div className="space-y-1.5">
          <div className="text-[11px] text-muted">
            Source: {sourceLabel[usage.source] ?? usage.source} · confidence: {usage.confidence}
          </div>
          {usageBars.length > 0 ? (
            <div className="space-y-2">
              {usageBars.map((bar) => (
@ -467,7 +479,7 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
                </div>
              )}
              <p className="text-[11px] text-muted">
-                Connected — provider did not return usage windows for percent + reset tracking.
+                Connected — provider did not return API rate-limit windows for percent + reset diagnostics.
              </p>
            </>
          )}