diff --git a/.gitignore b/.gitignore index cf7579e..1c2003a 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,4 @@ backend/app/services/openclaw/.device-keys FUTURE.md FUTURE.md docs/runtime-usage-dashboard-plan.md +docs/remaining-usage-accuracy-review-plan.md diff --git a/backend/app/api/provider_credentials.py b/backend/app/api/provider_credentials.py index 1a4db21..bbdb7d5 100644 --- a/backend/app/api/provider_credentials.py +++ b/backend/app/api/provider_credentials.py @@ -162,6 +162,8 @@ async def test_provider_credential( account_key=live.account_key, checked_at=live.checked_at.isoformat(), reachable=live.reachable, + source=live.source, + confidence=live.confidence, error=live.error, tokens=_tok(live.tokens), input_tokens=_tok(live.input_tokens), @@ -264,6 +266,8 @@ async def get_provider_usage_live( account_key=live.account_key, checked_at=live.checked_at.isoformat(), reachable=live.reachable, + source=live.source, + confidence=live.confidence, error=live.error, tokens=_tok(live.tokens), input_tokens=_tok(live.input_tokens), diff --git a/backend/app/schemas/provider_credentials.py b/backend/app/schemas/provider_credentials.py index 25a40d4..e35e49d 100644 --- a/backend/app/schemas/provider_credentials.py +++ b/backend/app/schemas/provider_credentials.py @@ -56,6 +56,8 @@ class ProviderUsageLiveRead(SQLModel): account_key: str checked_at: str # ISO 8601 UTC reachable: bool + source: str # provider_native | provider_api_rate_limit | local_jsonl_estimate | configured_limit + confidence: str # high | medium | low error: str | None = None tokens: TokenWindowRead input_tokens: TokenWindowRead # Anthropic splits input tokens separately diff --git a/backend/app/schemas/runtime_usage.py b/backend/app/schemas/runtime_usage.py index d378cd9..abeb943 100644 --- a/backend/app/schemas/runtime_usage.py +++ b/backend/app/schemas/runtime_usage.py @@ -17,6 +17,8 @@ class RuntimeUsageWindow(SQLModel): started_at: datetime resets_at: datetime reset_in_ms: int # milliseconds until oldest event ages out + source: str = "local_jsonl_estimate" # source of this window + confidence: str = "low" # confidence level for this window class RuntimeUsageCurrent(SQLModel): @@ -29,6 +31,9 @@ class RuntimeUsageCurrent(SQLModel): token_pct: int | None = None # 0–100; None when limit unknown cost_limit_usd: float | None = None cost_pct: int | None = None + # Source and confidence for the limits + token_limit_source: str | None = None + cost_limit_source: str | None = None class RuntimeUsageBurnRate(SQLModel): @@ -59,6 +64,7 @@ class ModelUsageEntry(SQLModel): cost_usd: float calls: int unpriced: bool # True = unknown paid model; False = priced or intentionally free (Ollama) + source: str = "local_jsonl_estimate" # source of this data class TopSession(SQLModel): @@ -70,6 +76,20 @@ class TopSession(SQLModel): cost_usd: float total_tokens: int updated_at: str | None = None + source: str = "local_jsonl_estimate" # source of this session data + + +class ProviderUsageWindow(SQLModel): + """One provider-native usage window (session/week/model-specific).""" + + key: str # current_session | weekly_all_models | weekly_sonnet | extra_usage + label: str + pct_used: float | None = None + remaining_ms: int | None = None + remaining_label: str | None = None + extra_text: str | None = None + source: str = "provider_native" + confidence: str = "high" class ProviderUsageScrapeResult(SQLModel): @@ -86,6 +106,8 @@ class ProviderUsageScrapeResult(SQLModel): fresh: bool # True if within the freshness window freshness_ttl_seconds: int + windows: list[ProviderUsageWindow] = [] + current_pct: float | None = None # 0–100 % of current window used remaining_ms: int | None = None # ms until window resets remaining_label: str | None = None # human-readable "2h 47m" @@ -98,6 +120,10 @@ class ProviderUsageScrapeResult(SQLModel): raw_text: str | None = None # included when DEBUG_SCRAPER_RAW=true error: str | None = None # set when scrape or parse failed + # Source and confidence for the scraped data + source: str | None = None # e.g. "provider_native" or "provider_api_rate_limit" + confidence: str | None = None # e.g. "high" or "medium" + class ProviderUsageResponse(SQLModel): """Response envelope for GET /gateways/{id}/provider-usage.""" diff --git a/backend/app/services/openclaw/runtime_usage.py b/backend/app/services/openclaw/runtime_usage.py index a9ed204..0ca9067 100644 --- a/backend/app/services/openclaw/runtime_usage.py +++ b/backend/app/services/openclaw/runtime_usage.py @@ -323,6 +323,7 @@ def aggregate_per_model( "cost_usd": 0.0, "calls": 0, "unpriced": unpriced, + "source": "local_jsonl_estimate", # default source for aggregated session data } e = entries[key] e["input_tokens"] += tokens["input"] @@ -371,6 +372,7 @@ def _top_sessions( cost_usd=round(cost, 8), total_tokens=total, updated_at=updated, + source="local_jsonl_estimate", # default source for session data )) rows.sort(key=lambda r: r.cost_usd, reverse=True) return rows[:limit] @@ -386,8 +388,41 @@ _WINDOW_HOURS = 5 def _build_window( status_raw: dict[str, Any], now: datetime, + account_key: str = "default", ) -> RuntimeUsageWindow: - """Build the usage window, preferring gateway status data then falling back.""" + """Build the usage window, preferring gateway status data then falling back. + + Source assignment: + - If gateway status provides explicit window data, use provider_native + - If API rate-limit headers are the only source, use provider_api_rate_limit + - If falling back to local logic, use local_jsonl_estimate + """ + # Check if gateway status provides explicit window data + has_window_start = status_raw.get("windowStart") or status_raw.get("window_start") or status_raw.get("period_start") or status_raw.get("started_at") + has_window_end = status_raw.get("windowEnd") or status_raw.get("window_end") or status_raw.get("period_end") or status_raw.get("resets_at") + + # Check for API rate-limit headers (these indicate throttling, not subscription usage) + has_rate_limit_headers = ( + status_raw.get("x_ratelimit_remaining") or + status_raw.get("x_ratelimit_limit") or + status_raw.get("x_ratelimit_reset") or + status_raw.get("anthropic_ratelimit_remaining") or + status_raw.get("anthropic_ratelimit_limit") + ) + + if has_window_start and has_window_end: + # Gateway status provides explicit window data + source = "provider_native" + confidence = "high" + elif has_rate_limit_headers: + # Only API rate-limit headers available - treat as diagnostics + source = "provider_api_rate_limit" + confidence = "medium" + else: + # Fall back to local logic (5-hour window from oldest event) + source = "local_jsonl_estimate" + confidence = "low" + started_at = _parse_datetime( status_raw.get("windowStart") or status_raw.get("window_start") or status_raw.get("period_start") or status_raw.get("started_at") @@ -408,12 +443,15 @@ def _build_window( started_at=started_at, resets_at=resets_at, reset_in_ms=reset_in_ms, + source=source, + confidence=confidence, ) def _build_current( per_model: dict[str, ModelUsageEntry], status_raw: dict[str, Any], + account_key: str = "default", ) -> RuntimeUsageCurrent: total_cost = round(sum(e.cost_usd for e in per_model.values()), 8) total_tokens = sum(e.total_tokens for e in per_model.values()) @@ -422,10 +460,34 @@ def _build_current( # Try to get configured limits from the gateway status raw_token_limit = _get_int(status_raw, "tokenLimit", "token_limit", "messageLimit", "message_limit", default=0) token_limit = raw_token_limit or None + + # Determine source for token limit + if raw_token_limit: + # Check for API rate-limit headers + has_rate_limit_headers = ( + status_raw.get("x_ratelimit_remaining") or + status_raw.get("x_ratelimit_limit") or + status_raw.get("anthropic_ratelimit_remaining") or + status_raw.get("anthropic_ratelimit_limit") + ) + if has_rate_limit_headers: + token_limit_source = "provider_api_rate_limit" + else: + token_limit_source = "configured_limit" + else: + token_limit_source = None + token_pct = int(min(100, total_tokens * 100 // raw_token_limit)) if raw_token_limit else None raw_cost_limit = _get_float(status_raw, "costLimit", "cost_limit", "costLimitUsd", default=0.0) cost_limit = raw_cost_limit or None + + # Determine source for cost limit + if raw_cost_limit: + cost_limit_source = "configured_limit" + else: + cost_limit_source = None + cost_pct = int(min(100, total_cost * 100 / raw_cost_limit)) if raw_cost_limit else None return RuntimeUsageCurrent( @@ -436,6 +498,8 @@ def _build_current( token_pct=token_pct, cost_limit_usd=cost_limit, cost_pct=cost_pct, + token_limit_source=token_limit_source, + cost_limit_source=cost_limit_source, ) diff --git a/backend/app/services/openclaw/usage_scrapers.py b/backend/app/services/openclaw/usage_scrapers.py index d5c00f6..2c7c100 100644 --- a/backend/app/services/openclaw/usage_scrapers.py +++ b/backend/app/services/openclaw/usage_scrapers.py @@ -50,6 +50,10 @@ class ParsedClaudeUsage: weekly_tokens_used: int | None = None weekly_cost_usd: float | None = None error: str | None = None + + # Source and confidence for the parsed data + source: str | None = None # e.g., "provider_native" or "provider_api_rate_limit" + confidence: str | None = None # e.g., "high" or "medium" @dataclass @@ -439,7 +443,11 @@ class ClaudeTmuxScraper(RuntimeUsageProviderAdapter): ) def parse(self, raw: str) -> ParsedClaudeUsage: - return parse_claude_usage(raw) + result = parse_claude_usage(raw) + # Tag the parsed result with source and confidence + result.source = "provider_native" + result.confidence = "high" + return result # --------------------------------------------------------------------------- diff --git a/backend/app/services/provider_usage.py b/backend/app/services/provider_usage.py index 0525c95..683e86f 100644 --- a/backend/app/services/provider_usage.py +++ b/backend/app/services/provider_usage.py @@ -104,6 +104,10 @@ class ProviderUsageLive: account_key: str checked_at: datetime reachable: bool + # Phase 1 semantics: this service reports provider API diagnostics + # (rate-limit windows / probe metadata), not subscription usage windows. + source: str = "provider_api_rate_limit" + confidence: str = "high" error: str | None = None tokens: TokenWindow = field(default_factory=TokenWindow) input_tokens: TokenWindow = field(default_factory=TokenWindow) # Anthropic splits input/output @@ -136,6 +140,8 @@ class ProviderUsageLive: "account_key": self.account_key, "checked_at": self.checked_at.isoformat(), "reachable": self.reachable, + "source": self.source, + "confidence": self.confidence, "error": self.error, "tokens": _window(self.tokens), "input_tokens": _window(self.input_tokens), diff --git a/backend/tests/test_provider_credentials_usage_api.py b/backend/tests/test_provider_credentials_usage_api.py index 5754844..5434bbf 100644 --- a/backend/tests/test_provider_credentials_usage_api.py +++ b/backend/tests/test_provider_credentials_usage_api.py @@ -121,6 +121,8 @@ async def test_usage_response_includes_rate_limit_header_names(monkeypatch: pyte data = response.json() assert data["provider"] == "anthropic" assert data["reachable"] is True + assert data["source"] == "provider_api_rate_limit" + assert data["confidence"] == "high" assert data["sample_model"] == "claude-sonnet-4-6" assert data["sample_input_tokens"] == 9 assert data["sample_output_tokens"] == 1 @@ -195,6 +197,8 @@ async def test_test_endpoint_returns_live_result(monkeypatch: pytest.MonkeyPatch assert data["provider"] == "anthropic" assert data["account_key"] == "Claude" assert data["reachable"] is True + assert data["source"] == "provider_api_rate_limit" + assert data["confidence"] == "high" assert data["models"] == ["claude-sonnet-4-6"] assert data["sample_model"] == "claude-sonnet-4-6" assert data["sample_input_tokens"] == 8 diff --git a/frontend/src/api/generated/model/providerUsageLiveRead.ts b/frontend/src/api/generated/model/providerUsageLiveRead.ts index 1bd1967..70e8772 100644 --- a/frontend/src/api/generated/model/providerUsageLiveRead.ts +++ b/frontend/src/api/generated/model/providerUsageLiveRead.ts @@ -15,6 +15,8 @@ export interface ProviderUsageLiveRead { account_key: string; checked_at: string; reachable: boolean; + source: string; + confidence: string; error?: string | null; tokens: TokenWindowRead; input_tokens: TokenWindowRead; diff --git a/frontend/src/app/settings/ai-providers/page.tsx b/frontend/src/app/settings/ai-providers/page.tsx index 2b50337..0315248 100644 --- a/frontend/src/app/settings/ai-providers/page.tsx +++ b/frontend/src/app/settings/ai-providers/page.tsx @@ -368,24 +368,30 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider const inputTok = usage.input_tokens; const req = usage.requests; const isOllama = provider === "ollama"; + const sourceLabel: Record = { + provider_native: "Provider native", + provider_api_rate_limit: "API rate limit", + local_jsonl_estimate: "Local estimate", + configured_limit: "Configured limit", + }; const usageBars: UsageWindowBarProps[] = []; if (inputTok.pct_used != null) { usageBars.push({ - label: "Current session", + label: "API rate limit · input tokens", pct: inputTok.pct_used, resetInMs: inputTok.reset_in_ms, }); } if (tok.pct_used != null) { usageBars.push({ - label: usageBars.length > 0 ? "All models" : "Usage", + label: "API rate limit · tokens", pct: tok.pct_used, resetInMs: tok.reset_in_ms, }); } if (usageBars.length === 0 && req.limit != null && req.remaining != null && req.limit > 0) { usageBars.push({ - label: "Requests", + label: "API rate limit · requests", pct: ((req.limit - req.remaining) / req.limit) * 100, resetInMs: req.reset_in_ms, }); @@ -395,6 +401,9 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
{isOllama ? (
+
+ Source: {sourceLabel[usage.source] ?? usage.source} · confidence: {usage.confidence} +
@@ -432,7 +441,7 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
)}

- This provider did not return active usage windows for percent + reset tracking. + Provider did not return API rate-limit windows for percent + reset diagnostics.

)} @@ -442,6 +451,9 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
) : (
+
+ Source: {sourceLabel[usage.source] ?? usage.source} · confidence: {usage.confidence} +
{usageBars.length > 0 ? (
{usageBars.map((bar) => ( @@ -467,7 +479,7 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
)}

- Connected — provider did not return usage windows for percent + reset tracking. + Connected — provider did not return API rate-limit windows for percent + reset diagnostics.

)}