feat(usage): add source/confidence fields and relabel API rate limits (Phase 1, #36)

- Add source and confidence fields to RuntimeUsageWindow, ModelUsageEntry,
  TopSession, RuntimeUsageCurrent, and ProviderUsageScrapeResult schemas
- _build_window() assigns source based on data origin:
  provider_native > provider_api_rate_limit > local_jsonl_estimate
- _build_current() tags token_limit_source and cost_limit_source
- Frontend relabels 'Current session'/'All models' to 'API rate limit'
- Shows source label and confidence in usage strip
- Changes 'did not return active usage windows' to 'did not return
  API rate-limit windows for percent + reset diagnostics'
This commit is contained in:
null 2026-05-21 01:01:05 -05:00
parent 184d86c58a
commit 8d11f4f840
10 changed files with 136 additions and 7 deletions

1
.gitignore vendored
View File

@ -34,3 +34,4 @@ backend/app/services/openclaw/.device-keys
FUTURE.md FUTURE.md
FUTURE.md FUTURE.md
docs/runtime-usage-dashboard-plan.md docs/runtime-usage-dashboard-plan.md
docs/remaining-usage-accuracy-review-plan.md

View File

@ -162,6 +162,8 @@ async def test_provider_credential(
account_key=live.account_key, account_key=live.account_key,
checked_at=live.checked_at.isoformat(), checked_at=live.checked_at.isoformat(),
reachable=live.reachable, reachable=live.reachable,
source=live.source,
confidence=live.confidence,
error=live.error, error=live.error,
tokens=_tok(live.tokens), tokens=_tok(live.tokens),
input_tokens=_tok(live.input_tokens), input_tokens=_tok(live.input_tokens),
@ -264,6 +266,8 @@ async def get_provider_usage_live(
account_key=live.account_key, account_key=live.account_key,
checked_at=live.checked_at.isoformat(), checked_at=live.checked_at.isoformat(),
reachable=live.reachable, reachable=live.reachable,
source=live.source,
confidence=live.confidence,
error=live.error, error=live.error,
tokens=_tok(live.tokens), tokens=_tok(live.tokens),
input_tokens=_tok(live.input_tokens), input_tokens=_tok(live.input_tokens),

View File

@ -56,6 +56,8 @@ class ProviderUsageLiveRead(SQLModel):
account_key: str account_key: str
checked_at: str # ISO 8601 UTC checked_at: str # ISO 8601 UTC
reachable: bool reachable: bool
source: str # provider_native | provider_api_rate_limit | local_jsonl_estimate | configured_limit
confidence: str # high | medium | low
error: str | None = None error: str | None = None
tokens: TokenWindowRead tokens: TokenWindowRead
input_tokens: TokenWindowRead # Anthropic splits input tokens separately input_tokens: TokenWindowRead # Anthropic splits input tokens separately

View File

@ -17,6 +17,8 @@ class RuntimeUsageWindow(SQLModel):
started_at: datetime started_at: datetime
resets_at: datetime resets_at: datetime
reset_in_ms: int # milliseconds until oldest event ages out reset_in_ms: int # milliseconds until oldest event ages out
source: str = "local_jsonl_estimate" # source of this window
confidence: str = "low" # confidence level for this window
class RuntimeUsageCurrent(SQLModel): class RuntimeUsageCurrent(SQLModel):
@ -29,6 +31,9 @@ class RuntimeUsageCurrent(SQLModel):
token_pct: int | None = None # 0100; None when limit unknown token_pct: int | None = None # 0100; None when limit unknown
cost_limit_usd: float | None = None cost_limit_usd: float | None = None
cost_pct: int | None = None cost_pct: int | None = None
# Source and confidence for the limits
token_limit_source: str | None = None
cost_limit_source: str | None = None
class RuntimeUsageBurnRate(SQLModel): class RuntimeUsageBurnRate(SQLModel):
@ -59,6 +64,7 @@ class ModelUsageEntry(SQLModel):
cost_usd: float cost_usd: float
calls: int calls: int
unpriced: bool # True = unknown paid model; False = priced or intentionally free (Ollama) unpriced: bool # True = unknown paid model; False = priced or intentionally free (Ollama)
source: str = "local_jsonl_estimate" # source of this data
class TopSession(SQLModel): class TopSession(SQLModel):
@ -70,6 +76,20 @@ class TopSession(SQLModel):
cost_usd: float cost_usd: float
total_tokens: int total_tokens: int
updated_at: str | None = None updated_at: str | None = None
source: str = "local_jsonl_estimate" # source of this session data
class ProviderUsageWindow(SQLModel):
"""One provider-native usage window (session/week/model-specific)."""
key: str # current_session | weekly_all_models | weekly_sonnet | extra_usage
label: str
pct_used: float | None = None
remaining_ms: int | None = None
remaining_label: str | None = None
extra_text: str | None = None
source: str = "provider_native"
confidence: str = "high"
class ProviderUsageScrapeResult(SQLModel): class ProviderUsageScrapeResult(SQLModel):
@ -86,6 +106,8 @@ class ProviderUsageScrapeResult(SQLModel):
fresh: bool # True if within the freshness window fresh: bool # True if within the freshness window
freshness_ttl_seconds: int freshness_ttl_seconds: int
windows: list[ProviderUsageWindow] = []
current_pct: float | None = None # 0100 % of current window used current_pct: float | None = None # 0100 % of current window used
remaining_ms: int | None = None # ms until window resets remaining_ms: int | None = None # ms until window resets
remaining_label: str | None = None # human-readable "2h 47m" remaining_label: str | None = None # human-readable "2h 47m"
@ -98,6 +120,10 @@ class ProviderUsageScrapeResult(SQLModel):
raw_text: str | None = None # included when DEBUG_SCRAPER_RAW=true raw_text: str | None = None # included when DEBUG_SCRAPER_RAW=true
error: str | None = None # set when scrape or parse failed error: str | None = None # set when scrape or parse failed
# Source and confidence for the scraped data
source: str | None = None # e.g. "provider_native" or "provider_api_rate_limit"
confidence: str | None = None # e.g. "high" or "medium"
class ProviderUsageResponse(SQLModel): class ProviderUsageResponse(SQLModel):
"""Response envelope for GET /gateways/{id}/provider-usage.""" """Response envelope for GET /gateways/{id}/provider-usage."""

View File

@ -323,6 +323,7 @@ def aggregate_per_model(
"cost_usd": 0.0, "cost_usd": 0.0,
"calls": 0, "calls": 0,
"unpriced": unpriced, "unpriced": unpriced,
"source": "local_jsonl_estimate", # default source for aggregated session data
} }
e = entries[key] e = entries[key]
e["input_tokens"] += tokens["input"] e["input_tokens"] += tokens["input"]
@ -371,6 +372,7 @@ def _top_sessions(
cost_usd=round(cost, 8), cost_usd=round(cost, 8),
total_tokens=total, total_tokens=total,
updated_at=updated, updated_at=updated,
source="local_jsonl_estimate", # default source for session data
)) ))
rows.sort(key=lambda r: r.cost_usd, reverse=True) rows.sort(key=lambda r: r.cost_usd, reverse=True)
return rows[:limit] return rows[:limit]
@ -386,8 +388,41 @@ _WINDOW_HOURS = 5
def _build_window( def _build_window(
status_raw: dict[str, Any], status_raw: dict[str, Any],
now: datetime, now: datetime,
account_key: str = "default",
) -> RuntimeUsageWindow: ) -> RuntimeUsageWindow:
"""Build the usage window, preferring gateway status data then falling back.""" """Build the usage window, preferring gateway status data then falling back.
Source assignment:
- If gateway status provides explicit window data, use provider_native
- If API rate-limit headers are the only source, use provider_api_rate_limit
- If falling back to local logic, use local_jsonl_estimate
"""
# Check if gateway status provides explicit window data
has_window_start = status_raw.get("windowStart") or status_raw.get("window_start") or status_raw.get("period_start") or status_raw.get("started_at")
has_window_end = status_raw.get("windowEnd") or status_raw.get("window_end") or status_raw.get("period_end") or status_raw.get("resets_at")
# Check for API rate-limit headers (these indicate throttling, not subscription usage)
has_rate_limit_headers = (
status_raw.get("x_ratelimit_remaining") or
status_raw.get("x_ratelimit_limit") or
status_raw.get("x_ratelimit_reset") or
status_raw.get("anthropic_ratelimit_remaining") or
status_raw.get("anthropic_ratelimit_limit")
)
if has_window_start and has_window_end:
# Gateway status provides explicit window data
source = "provider_native"
confidence = "high"
elif has_rate_limit_headers:
# Only API rate-limit headers available - treat as diagnostics
source = "provider_api_rate_limit"
confidence = "medium"
else:
# Fall back to local logic (5-hour window from oldest event)
source = "local_jsonl_estimate"
confidence = "low"
started_at = _parse_datetime( started_at = _parse_datetime(
status_raw.get("windowStart") or status_raw.get("window_start") status_raw.get("windowStart") or status_raw.get("window_start")
or status_raw.get("period_start") or status_raw.get("started_at") or status_raw.get("period_start") or status_raw.get("started_at")
@ -408,12 +443,15 @@ def _build_window(
started_at=started_at, started_at=started_at,
resets_at=resets_at, resets_at=resets_at,
reset_in_ms=reset_in_ms, reset_in_ms=reset_in_ms,
source=source,
confidence=confidence,
) )
def _build_current( def _build_current(
per_model: dict[str, ModelUsageEntry], per_model: dict[str, ModelUsageEntry],
status_raw: dict[str, Any], status_raw: dict[str, Any],
account_key: str = "default",
) -> RuntimeUsageCurrent: ) -> RuntimeUsageCurrent:
total_cost = round(sum(e.cost_usd for e in per_model.values()), 8) total_cost = round(sum(e.cost_usd for e in per_model.values()), 8)
total_tokens = sum(e.total_tokens for e in per_model.values()) total_tokens = sum(e.total_tokens for e in per_model.values())
@ -422,10 +460,34 @@ def _build_current(
# Try to get configured limits from the gateway status # Try to get configured limits from the gateway status
raw_token_limit = _get_int(status_raw, "tokenLimit", "token_limit", "messageLimit", "message_limit", default=0) raw_token_limit = _get_int(status_raw, "tokenLimit", "token_limit", "messageLimit", "message_limit", default=0)
token_limit = raw_token_limit or None token_limit = raw_token_limit or None
# Determine source for token limit
if raw_token_limit:
# Check for API rate-limit headers
has_rate_limit_headers = (
status_raw.get("x_ratelimit_remaining") or
status_raw.get("x_ratelimit_limit") or
status_raw.get("anthropic_ratelimit_remaining") or
status_raw.get("anthropic_ratelimit_limit")
)
if has_rate_limit_headers:
token_limit_source = "provider_api_rate_limit"
else:
token_limit_source = "configured_limit"
else:
token_limit_source = None
token_pct = int(min(100, total_tokens * 100 // raw_token_limit)) if raw_token_limit else None token_pct = int(min(100, total_tokens * 100 // raw_token_limit)) if raw_token_limit else None
raw_cost_limit = _get_float(status_raw, "costLimit", "cost_limit", "costLimitUsd", default=0.0) raw_cost_limit = _get_float(status_raw, "costLimit", "cost_limit", "costLimitUsd", default=0.0)
cost_limit = raw_cost_limit or None cost_limit = raw_cost_limit or None
# Determine source for cost limit
if raw_cost_limit:
cost_limit_source = "configured_limit"
else:
cost_limit_source = None
cost_pct = int(min(100, total_cost * 100 / raw_cost_limit)) if raw_cost_limit else None cost_pct = int(min(100, total_cost * 100 / raw_cost_limit)) if raw_cost_limit else None
return RuntimeUsageCurrent( return RuntimeUsageCurrent(
@ -436,6 +498,8 @@ def _build_current(
token_pct=token_pct, token_pct=token_pct,
cost_limit_usd=cost_limit, cost_limit_usd=cost_limit,
cost_pct=cost_pct, cost_pct=cost_pct,
token_limit_source=token_limit_source,
cost_limit_source=cost_limit_source,
) )

View File

@ -50,6 +50,10 @@ class ParsedClaudeUsage:
weekly_tokens_used: int | None = None weekly_tokens_used: int | None = None
weekly_cost_usd: float | None = None weekly_cost_usd: float | None = None
error: str | None = None error: str | None = None
# Source and confidence for the parsed data
source: str | None = None # e.g., "provider_native" or "provider_api_rate_limit"
confidence: str | None = None # e.g., "high" or "medium"
@dataclass @dataclass
@ -439,7 +443,11 @@ class ClaudeTmuxScraper(RuntimeUsageProviderAdapter):
) )
def parse(self, raw: str) -> ParsedClaudeUsage: def parse(self, raw: str) -> ParsedClaudeUsage:
return parse_claude_usage(raw) result = parse_claude_usage(raw)
# Tag the parsed result with source and confidence
result.source = "provider_native"
result.confidence = "high"
return result
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------

View File

@ -104,6 +104,10 @@ class ProviderUsageLive:
account_key: str account_key: str
checked_at: datetime checked_at: datetime
reachable: bool reachable: bool
# Phase 1 semantics: this service reports provider API diagnostics
# (rate-limit windows / probe metadata), not subscription usage windows.
source: str = "provider_api_rate_limit"
confidence: str = "high"
error: str | None = None error: str | None = None
tokens: TokenWindow = field(default_factory=TokenWindow) tokens: TokenWindow = field(default_factory=TokenWindow)
input_tokens: TokenWindow = field(default_factory=TokenWindow) # Anthropic splits input/output input_tokens: TokenWindow = field(default_factory=TokenWindow) # Anthropic splits input/output
@ -136,6 +140,8 @@ class ProviderUsageLive:
"account_key": self.account_key, "account_key": self.account_key,
"checked_at": self.checked_at.isoformat(), "checked_at": self.checked_at.isoformat(),
"reachable": self.reachable, "reachable": self.reachable,
"source": self.source,
"confidence": self.confidence,
"error": self.error, "error": self.error,
"tokens": _window(self.tokens), "tokens": _window(self.tokens),
"input_tokens": _window(self.input_tokens), "input_tokens": _window(self.input_tokens),

View File

@ -121,6 +121,8 @@ async def test_usage_response_includes_rate_limit_header_names(monkeypatch: pyte
data = response.json() data = response.json()
assert data["provider"] == "anthropic" assert data["provider"] == "anthropic"
assert data["reachable"] is True assert data["reachable"] is True
assert data["source"] == "provider_api_rate_limit"
assert data["confidence"] == "high"
assert data["sample_model"] == "claude-sonnet-4-6" assert data["sample_model"] == "claude-sonnet-4-6"
assert data["sample_input_tokens"] == 9 assert data["sample_input_tokens"] == 9
assert data["sample_output_tokens"] == 1 assert data["sample_output_tokens"] == 1
@ -195,6 +197,8 @@ async def test_test_endpoint_returns_live_result(monkeypatch: pytest.MonkeyPatch
assert data["provider"] == "anthropic" assert data["provider"] == "anthropic"
assert data["account_key"] == "Claude" assert data["account_key"] == "Claude"
assert data["reachable"] is True assert data["reachable"] is True
assert data["source"] == "provider_api_rate_limit"
assert data["confidence"] == "high"
assert data["models"] == ["claude-sonnet-4-6"] assert data["models"] == ["claude-sonnet-4-6"]
assert data["sample_model"] == "claude-sonnet-4-6" assert data["sample_model"] == "claude-sonnet-4-6"
assert data["sample_input_tokens"] == 8 assert data["sample_input_tokens"] == 8

View File

@ -15,6 +15,8 @@ export interface ProviderUsageLiveRead {
account_key: string; account_key: string;
checked_at: string; checked_at: string;
reachable: boolean; reachable: boolean;
source: string;
confidence: string;
error?: string | null; error?: string | null;
tokens: TokenWindowRead; tokens: TokenWindowRead;
input_tokens: TokenWindowRead; input_tokens: TokenWindowRead;

View File

@ -368,24 +368,30 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
const inputTok = usage.input_tokens; const inputTok = usage.input_tokens;
const req = usage.requests; const req = usage.requests;
const isOllama = provider === "ollama"; const isOllama = provider === "ollama";
const sourceLabel: Record<string, string> = {
provider_native: "Provider native",
provider_api_rate_limit: "API rate limit",
local_jsonl_estimate: "Local estimate",
configured_limit: "Configured limit",
};
const usageBars: UsageWindowBarProps[] = []; const usageBars: UsageWindowBarProps[] = [];
if (inputTok.pct_used != null) { if (inputTok.pct_used != null) {
usageBars.push({ usageBars.push({
label: "Current session", label: "API rate limit · input tokens",
pct: inputTok.pct_used, pct: inputTok.pct_used,
resetInMs: inputTok.reset_in_ms, resetInMs: inputTok.reset_in_ms,
}); });
} }
if (tok.pct_used != null) { if (tok.pct_used != null) {
usageBars.push({ usageBars.push({
label: usageBars.length > 0 ? "All models" : "Usage", label: "API rate limit · tokens",
pct: tok.pct_used, pct: tok.pct_used,
resetInMs: tok.reset_in_ms, resetInMs: tok.reset_in_ms,
}); });
} }
if (usageBars.length === 0 && req.limit != null && req.remaining != null && req.limit > 0) { if (usageBars.length === 0 && req.limit != null && req.remaining != null && req.limit > 0) {
usageBars.push({ usageBars.push({
label: "Requests", label: "API rate limit · requests",
pct: ((req.limit - req.remaining) / req.limit) * 100, pct: ((req.limit - req.remaining) / req.limit) * 100,
resetInMs: req.reset_in_ms, resetInMs: req.reset_in_ms,
}); });
@ -395,6 +401,9 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
<div className="mt-2 rounded-lg border border-[color:var(--border)] bg-[color:var(--surface)] p-2.5"> <div className="mt-2 rounded-lg border border-[color:var(--border)] bg-[color:var(--surface)] p-2.5">
{isOllama ? ( {isOllama ? (
<div className="space-y-1.5"> <div className="space-y-1.5">
<div className="text-[11px] text-muted">
Source: {sourceLabel[usage.source] ?? usage.source} · confidence: {usage.confidence}
</div>
<div className="flex items-center gap-3 text-xs text-muted"> <div className="flex items-center gap-3 text-xs text-muted">
<span className="flex items-center gap-1 text-[color:var(--success)]"> <span className="flex items-center gap-1 text-[color:var(--success)]">
<span className="inline-block h-1.5 w-1.5 rounded-full bg-[color:var(--success)]" /> <span className="inline-block h-1.5 w-1.5 rounded-full bg-[color:var(--success)]" />
@ -432,7 +441,7 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
</div> </div>
)} )}
<p className="text-[11px] text-muted"> <p className="text-[11px] text-muted">
This provider did not return active usage windows for percent + reset tracking. Provider did not return API rate-limit windows for percent + reset diagnostics.
</p> </p>
</> </>
)} )}
@ -442,6 +451,9 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
</div> </div>
) : ( ) : (
<div className="space-y-1.5"> <div className="space-y-1.5">
<div className="text-[11px] text-muted">
Source: {sourceLabel[usage.source] ?? usage.source} · confidence: {usage.confidence}
</div>
{usageBars.length > 0 ? ( {usageBars.length > 0 ? (
<div className="space-y-2"> <div className="space-y-2">
{usageBars.map((bar) => ( {usageBars.map((bar) => (
@ -467,7 +479,7 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
</div> </div>
)} )}
<p className="text-[11px] text-muted"> <p className="text-[11px] text-muted">
Connected provider did not return usage windows for percent + reset tracking. Connected provider did not return API rate-limit windows for percent + reset diagnostics.
</p> </p>
</> </>
)} )}