feat(usage): add source/confidence fields and relabel API rate limits (Phase 1, #36)
- Add source and confidence fields to RuntimeUsageWindow, ModelUsageEntry, TopSession, RuntimeUsageCurrent, and ProviderUsageScrapeResult schemas - _build_window() assigns source based on data origin: provider_native > provider_api_rate_limit > local_jsonl_estimate - _build_current() tags token_limit_source and cost_limit_source - Frontend relabels 'Current session'/'All models' to 'API rate limit' - Shows source label and confidence in usage strip - Changes 'did not return active usage windows' to 'did not return API rate-limit windows for percent + reset diagnostics'
This commit is contained in:
parent
184d86c58a
commit
8d11f4f840
|
|
@ -34,3 +34,4 @@ backend/app/services/openclaw/.device-keys
|
|||
FUTURE.md
|
||||
FUTURE.md
|
||||
docs/runtime-usage-dashboard-plan.md
|
||||
docs/remaining-usage-accuracy-review-plan.md
|
||||
|
|
|
|||
|
|
@ -162,6 +162,8 @@ async def test_provider_credential(
|
|||
account_key=live.account_key,
|
||||
checked_at=live.checked_at.isoformat(),
|
||||
reachable=live.reachable,
|
||||
source=live.source,
|
||||
confidence=live.confidence,
|
||||
error=live.error,
|
||||
tokens=_tok(live.tokens),
|
||||
input_tokens=_tok(live.input_tokens),
|
||||
|
|
@ -264,6 +266,8 @@ async def get_provider_usage_live(
|
|||
account_key=live.account_key,
|
||||
checked_at=live.checked_at.isoformat(),
|
||||
reachable=live.reachable,
|
||||
source=live.source,
|
||||
confidence=live.confidence,
|
||||
error=live.error,
|
||||
tokens=_tok(live.tokens),
|
||||
input_tokens=_tok(live.input_tokens),
|
||||
|
|
|
|||
|
|
@ -56,6 +56,8 @@ class ProviderUsageLiveRead(SQLModel):
|
|||
account_key: str
|
||||
checked_at: str # ISO 8601 UTC
|
||||
reachable: bool
|
||||
source: str # provider_native | provider_api_rate_limit | local_jsonl_estimate | configured_limit
|
||||
confidence: str # high | medium | low
|
||||
error: str | None = None
|
||||
tokens: TokenWindowRead
|
||||
input_tokens: TokenWindowRead # Anthropic splits input tokens separately
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@ class RuntimeUsageWindow(SQLModel):
|
|||
started_at: datetime
|
||||
resets_at: datetime
|
||||
reset_in_ms: int # milliseconds until oldest event ages out
|
||||
source: str = "local_jsonl_estimate" # source of this window
|
||||
confidence: str = "low" # confidence level for this window
|
||||
|
||||
|
||||
class RuntimeUsageCurrent(SQLModel):
|
||||
|
|
@ -29,6 +31,9 @@ class RuntimeUsageCurrent(SQLModel):
|
|||
token_pct: int | None = None # 0–100; None when limit unknown
|
||||
cost_limit_usd: float | None = None
|
||||
cost_pct: int | None = None
|
||||
# Source and confidence for the limits
|
||||
token_limit_source: str | None = None
|
||||
cost_limit_source: str | None = None
|
||||
|
||||
|
||||
class RuntimeUsageBurnRate(SQLModel):
|
||||
|
|
@ -59,6 +64,7 @@ class ModelUsageEntry(SQLModel):
|
|||
cost_usd: float
|
||||
calls: int
|
||||
unpriced: bool # True = unknown paid model; False = priced or intentionally free (Ollama)
|
||||
source: str = "local_jsonl_estimate" # source of this data
|
||||
|
||||
|
||||
class TopSession(SQLModel):
|
||||
|
|
@ -70,6 +76,20 @@ class TopSession(SQLModel):
|
|||
cost_usd: float
|
||||
total_tokens: int
|
||||
updated_at: str | None = None
|
||||
source: str = "local_jsonl_estimate" # source of this session data
|
||||
|
||||
|
||||
class ProviderUsageWindow(SQLModel):
|
||||
"""One provider-native usage window (session/week/model-specific)."""
|
||||
|
||||
key: str # current_session | weekly_all_models | weekly_sonnet | extra_usage
|
||||
label: str
|
||||
pct_used: float | None = None
|
||||
remaining_ms: int | None = None
|
||||
remaining_label: str | None = None
|
||||
extra_text: str | None = None
|
||||
source: str = "provider_native"
|
||||
confidence: str = "high"
|
||||
|
||||
|
||||
class ProviderUsageScrapeResult(SQLModel):
|
||||
|
|
@ -86,6 +106,8 @@ class ProviderUsageScrapeResult(SQLModel):
|
|||
fresh: bool # True if within the freshness window
|
||||
freshness_ttl_seconds: int
|
||||
|
||||
windows: list[ProviderUsageWindow] = []
|
||||
|
||||
current_pct: float | None = None # 0–100 % of current window used
|
||||
remaining_ms: int | None = None # ms until window resets
|
||||
remaining_label: str | None = None # human-readable "2h 47m"
|
||||
|
|
@ -98,6 +120,10 @@ class ProviderUsageScrapeResult(SQLModel):
|
|||
raw_text: str | None = None # included when DEBUG_SCRAPER_RAW=true
|
||||
error: str | None = None # set when scrape or parse failed
|
||||
|
||||
# Source and confidence for the scraped data
|
||||
source: str | None = None # e.g. "provider_native" or "provider_api_rate_limit"
|
||||
confidence: str | None = None # e.g. "high" or "medium"
|
||||
|
||||
|
||||
class ProviderUsageResponse(SQLModel):
|
||||
"""Response envelope for GET /gateways/{id}/provider-usage."""
|
||||
|
|
|
|||
|
|
@ -323,6 +323,7 @@ def aggregate_per_model(
|
|||
"cost_usd": 0.0,
|
||||
"calls": 0,
|
||||
"unpriced": unpriced,
|
||||
"source": "local_jsonl_estimate", # default source for aggregated session data
|
||||
}
|
||||
e = entries[key]
|
||||
e["input_tokens"] += tokens["input"]
|
||||
|
|
@ -371,6 +372,7 @@ def _top_sessions(
|
|||
cost_usd=round(cost, 8),
|
||||
total_tokens=total,
|
||||
updated_at=updated,
|
||||
source="local_jsonl_estimate", # default source for session data
|
||||
))
|
||||
rows.sort(key=lambda r: r.cost_usd, reverse=True)
|
||||
return rows[:limit]
|
||||
|
|
@ -386,8 +388,41 @@ _WINDOW_HOURS = 5
|
|||
def _build_window(
|
||||
status_raw: dict[str, Any],
|
||||
now: datetime,
|
||||
account_key: str = "default",
|
||||
) -> RuntimeUsageWindow:
|
||||
"""Build the usage window, preferring gateway status data then falling back."""
|
||||
"""Build the usage window, preferring gateway status data then falling back.
|
||||
|
||||
Source assignment:
|
||||
- If gateway status provides explicit window data, use provider_native
|
||||
- If API rate-limit headers are the only source, use provider_api_rate_limit
|
||||
- If falling back to local logic, use local_jsonl_estimate
|
||||
"""
|
||||
# Check if gateway status provides explicit window data
|
||||
has_window_start = status_raw.get("windowStart") or status_raw.get("window_start") or status_raw.get("period_start") or status_raw.get("started_at")
|
||||
has_window_end = status_raw.get("windowEnd") or status_raw.get("window_end") or status_raw.get("period_end") or status_raw.get("resets_at")
|
||||
|
||||
# Check for API rate-limit headers (these indicate throttling, not subscription usage)
|
||||
has_rate_limit_headers = (
|
||||
status_raw.get("x_ratelimit_remaining") or
|
||||
status_raw.get("x_ratelimit_limit") or
|
||||
status_raw.get("x_ratelimit_reset") or
|
||||
status_raw.get("anthropic_ratelimit_remaining") or
|
||||
status_raw.get("anthropic_ratelimit_limit")
|
||||
)
|
||||
|
||||
if has_window_start and has_window_end:
|
||||
# Gateway status provides explicit window data
|
||||
source = "provider_native"
|
||||
confidence = "high"
|
||||
elif has_rate_limit_headers:
|
||||
# Only API rate-limit headers available - treat as diagnostics
|
||||
source = "provider_api_rate_limit"
|
||||
confidence = "medium"
|
||||
else:
|
||||
# Fall back to local logic (5-hour window from oldest event)
|
||||
source = "local_jsonl_estimate"
|
||||
confidence = "low"
|
||||
|
||||
started_at = _parse_datetime(
|
||||
status_raw.get("windowStart") or status_raw.get("window_start")
|
||||
or status_raw.get("period_start") or status_raw.get("started_at")
|
||||
|
|
@ -408,12 +443,15 @@ def _build_window(
|
|||
started_at=started_at,
|
||||
resets_at=resets_at,
|
||||
reset_in_ms=reset_in_ms,
|
||||
source=source,
|
||||
confidence=confidence,
|
||||
)
|
||||
|
||||
|
||||
def _build_current(
|
||||
per_model: dict[str, ModelUsageEntry],
|
||||
status_raw: dict[str, Any],
|
||||
account_key: str = "default",
|
||||
) -> RuntimeUsageCurrent:
|
||||
total_cost = round(sum(e.cost_usd for e in per_model.values()), 8)
|
||||
total_tokens = sum(e.total_tokens for e in per_model.values())
|
||||
|
|
@ -422,10 +460,34 @@ def _build_current(
|
|||
# Try to get configured limits from the gateway status
|
||||
raw_token_limit = _get_int(status_raw, "tokenLimit", "token_limit", "messageLimit", "message_limit", default=0)
|
||||
token_limit = raw_token_limit or None
|
||||
|
||||
# Determine source for token limit
|
||||
if raw_token_limit:
|
||||
# Check for API rate-limit headers
|
||||
has_rate_limit_headers = (
|
||||
status_raw.get("x_ratelimit_remaining") or
|
||||
status_raw.get("x_ratelimit_limit") or
|
||||
status_raw.get("anthropic_ratelimit_remaining") or
|
||||
status_raw.get("anthropic_ratelimit_limit")
|
||||
)
|
||||
if has_rate_limit_headers:
|
||||
token_limit_source = "provider_api_rate_limit"
|
||||
else:
|
||||
token_limit_source = "configured_limit"
|
||||
else:
|
||||
token_limit_source = None
|
||||
|
||||
token_pct = int(min(100, total_tokens * 100 // raw_token_limit)) if raw_token_limit else None
|
||||
|
||||
raw_cost_limit = _get_float(status_raw, "costLimit", "cost_limit", "costLimitUsd", default=0.0)
|
||||
cost_limit = raw_cost_limit or None
|
||||
|
||||
# Determine source for cost limit
|
||||
if raw_cost_limit:
|
||||
cost_limit_source = "configured_limit"
|
||||
else:
|
||||
cost_limit_source = None
|
||||
|
||||
cost_pct = int(min(100, total_cost * 100 / raw_cost_limit)) if raw_cost_limit else None
|
||||
|
||||
return RuntimeUsageCurrent(
|
||||
|
|
@ -436,6 +498,8 @@ def _build_current(
|
|||
token_pct=token_pct,
|
||||
cost_limit_usd=cost_limit,
|
||||
cost_pct=cost_pct,
|
||||
token_limit_source=token_limit_source,
|
||||
cost_limit_source=cost_limit_source,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -50,6 +50,10 @@ class ParsedClaudeUsage:
|
|||
weekly_tokens_used: int | None = None
|
||||
weekly_cost_usd: float | None = None
|
||||
error: str | None = None
|
||||
|
||||
# Source and confidence for the parsed data
|
||||
source: str | None = None # e.g., "provider_native" or "provider_api_rate_limit"
|
||||
confidence: str | None = None # e.g., "high" or "medium"
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -439,7 +443,11 @@ class ClaudeTmuxScraper(RuntimeUsageProviderAdapter):
|
|||
)
|
||||
|
||||
def parse(self, raw: str) -> ParsedClaudeUsage:
|
||||
return parse_claude_usage(raw)
|
||||
result = parse_claude_usage(raw)
|
||||
# Tag the parsed result with source and confidence
|
||||
result.source = "provider_native"
|
||||
result.confidence = "high"
|
||||
return result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -104,6 +104,10 @@ class ProviderUsageLive:
|
|||
account_key: str
|
||||
checked_at: datetime
|
||||
reachable: bool
|
||||
# Phase 1 semantics: this service reports provider API diagnostics
|
||||
# (rate-limit windows / probe metadata), not subscription usage windows.
|
||||
source: str = "provider_api_rate_limit"
|
||||
confidence: str = "high"
|
||||
error: str | None = None
|
||||
tokens: TokenWindow = field(default_factory=TokenWindow)
|
||||
input_tokens: TokenWindow = field(default_factory=TokenWindow) # Anthropic splits input/output
|
||||
|
|
@ -136,6 +140,8 @@ class ProviderUsageLive:
|
|||
"account_key": self.account_key,
|
||||
"checked_at": self.checked_at.isoformat(),
|
||||
"reachable": self.reachable,
|
||||
"source": self.source,
|
||||
"confidence": self.confidence,
|
||||
"error": self.error,
|
||||
"tokens": _window(self.tokens),
|
||||
"input_tokens": _window(self.input_tokens),
|
||||
|
|
|
|||
|
|
@ -121,6 +121,8 @@ async def test_usage_response_includes_rate_limit_header_names(monkeypatch: pyte
|
|||
data = response.json()
|
||||
assert data["provider"] == "anthropic"
|
||||
assert data["reachable"] is True
|
||||
assert data["source"] == "provider_api_rate_limit"
|
||||
assert data["confidence"] == "high"
|
||||
assert data["sample_model"] == "claude-sonnet-4-6"
|
||||
assert data["sample_input_tokens"] == 9
|
||||
assert data["sample_output_tokens"] == 1
|
||||
|
|
@ -195,6 +197,8 @@ async def test_test_endpoint_returns_live_result(monkeypatch: pytest.MonkeyPatch
|
|||
assert data["provider"] == "anthropic"
|
||||
assert data["account_key"] == "Claude"
|
||||
assert data["reachable"] is True
|
||||
assert data["source"] == "provider_api_rate_limit"
|
||||
assert data["confidence"] == "high"
|
||||
assert data["models"] == ["claude-sonnet-4-6"]
|
||||
assert data["sample_model"] == "claude-sonnet-4-6"
|
||||
assert data["sample_input_tokens"] == 8
|
||||
|
|
|
|||
|
|
@ -15,6 +15,8 @@ export interface ProviderUsageLiveRead {
|
|||
account_key: string;
|
||||
checked_at: string;
|
||||
reachable: boolean;
|
||||
source: string;
|
||||
confidence: string;
|
||||
error?: string | null;
|
||||
tokens: TokenWindowRead;
|
||||
input_tokens: TokenWindowRead;
|
||||
|
|
|
|||
|
|
@ -368,24 +368,30 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
|
|||
const inputTok = usage.input_tokens;
|
||||
const req = usage.requests;
|
||||
const isOllama = provider === "ollama";
|
||||
const sourceLabel: Record<string, string> = {
|
||||
provider_native: "Provider native",
|
||||
provider_api_rate_limit: "API rate limit",
|
||||
local_jsonl_estimate: "Local estimate",
|
||||
configured_limit: "Configured limit",
|
||||
};
|
||||
const usageBars: UsageWindowBarProps[] = [];
|
||||
if (inputTok.pct_used != null) {
|
||||
usageBars.push({
|
||||
label: "Current session",
|
||||
label: "API rate limit · input tokens",
|
||||
pct: inputTok.pct_used,
|
||||
resetInMs: inputTok.reset_in_ms,
|
||||
});
|
||||
}
|
||||
if (tok.pct_used != null) {
|
||||
usageBars.push({
|
||||
label: usageBars.length > 0 ? "All models" : "Usage",
|
||||
label: "API rate limit · tokens",
|
||||
pct: tok.pct_used,
|
||||
resetInMs: tok.reset_in_ms,
|
||||
});
|
||||
}
|
||||
if (usageBars.length === 0 && req.limit != null && req.remaining != null && req.limit > 0) {
|
||||
usageBars.push({
|
||||
label: "Requests",
|
||||
label: "API rate limit · requests",
|
||||
pct: ((req.limit - req.remaining) / req.limit) * 100,
|
||||
resetInMs: req.reset_in_ms,
|
||||
});
|
||||
|
|
@ -395,6 +401,9 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
|
|||
<div className="mt-2 rounded-lg border border-[color:var(--border)] bg-[color:var(--surface)] p-2.5">
|
||||
{isOllama ? (
|
||||
<div className="space-y-1.5">
|
||||
<div className="text-[11px] text-muted">
|
||||
Source: {sourceLabel[usage.source] ?? usage.source} · confidence: {usage.confidence}
|
||||
</div>
|
||||
<div className="flex items-center gap-3 text-xs text-muted">
|
||||
<span className="flex items-center gap-1 text-[color:var(--success)]">
|
||||
<span className="inline-block h-1.5 w-1.5 rounded-full bg-[color:var(--success)]" />
|
||||
|
|
@ -432,7 +441,7 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
|
|||
</div>
|
||||
)}
|
||||
<p className="text-[11px] text-muted">
|
||||
This provider did not return active usage windows for percent + reset tracking.
|
||||
Provider did not return API rate-limit windows for percent + reset diagnostics.
|
||||
</p>
|
||||
</>
|
||||
)}
|
||||
|
|
@ -442,6 +451,9 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
|
|||
</div>
|
||||
) : (
|
||||
<div className="space-y-1.5">
|
||||
<div className="text-[11px] text-muted">
|
||||
Source: {sourceLabel[usage.source] ?? usage.source} · confidence: {usage.confidence}
|
||||
</div>
|
||||
{usageBars.length > 0 ? (
|
||||
<div className="space-y-2">
|
||||
{usageBars.map((bar) => (
|
||||
|
|
@ -467,7 +479,7 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
|
|||
</div>
|
||||
)}
|
||||
<p className="text-[11px] text-muted">
|
||||
Connected — provider did not return usage windows for percent + reset tracking.
|
||||
Connected — provider did not return API rate-limit windows for percent + reset diagnostics.
|
||||
</p>
|
||||
</>
|
||||
)}
|
||||
|
|
|
|||
Loading…
Reference in New Issue