fix(db): make main_session_key nullable on gateways
The column was NOT NULL but the ORM create path doesn't populate it until ensure_main_agent() runs after INSERT. Make it nullable so the initial create succeeds.
This commit is contained in:
parent
fc4094d49f
commit
03bc31a558
|
|
@ -167,6 +167,10 @@ async def test_provider_credential(
|
|||
input_tokens=_tok(live.input_tokens),
|
||||
requests=_req(live.requests),
|
||||
models=live.models,
|
||||
sample_model=live.sample_model,
|
||||
sample_input_tokens=live.sample_input_tokens,
|
||||
sample_output_tokens=live.sample_output_tokens,
|
||||
sample_latency_ms=live.sample_latency_ms,
|
||||
debug_rate_limit_headers=sorted(live.raw_headers.keys()) if live.raw_headers else None,
|
||||
)
|
||||
|
||||
|
|
@ -265,6 +269,10 @@ async def get_provider_usage_live(
|
|||
input_tokens=_tok(live.input_tokens),
|
||||
requests=_req(live.requests),
|
||||
models=live.models,
|
||||
sample_model=live.sample_model,
|
||||
sample_input_tokens=live.sample_input_tokens,
|
||||
sample_output_tokens=live.sample_output_tokens,
|
||||
sample_latency_ms=live.sample_latency_ms,
|
||||
debug_rate_limit_headers=sorted(live.raw_headers.keys()) if live.raw_headers else None,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -26,5 +26,6 @@ class Gateway(QueryModel, table=True):
|
|||
disable_device_pairing: bool = Field(default=False)
|
||||
workspace_root: str
|
||||
allow_insecure_tls: bool = Field(default=False)
|
||||
main_session_key: str | None = Field(default=None)
|
||||
created_at: datetime = Field(default_factory=utcnow)
|
||||
updated_at: datetime = Field(default_factory=utcnow)
|
||||
|
|
|
|||
|
|
@ -61,6 +61,10 @@ class ProviderUsageLiveRead(SQLModel):
|
|||
input_tokens: TokenWindowRead # Anthropic splits input tokens separately
|
||||
requests: RequestWindowRead
|
||||
models: list[str] = []
|
||||
sample_model: str | None = None
|
||||
sample_input_tokens: int | None = None
|
||||
sample_output_tokens: int | None = None
|
||||
sample_latency_ms: int | None = None
|
||||
# Optional debugging aid: exact rate-limit header names returned by provider.
|
||||
debug_rate_limit_headers: list[str] | None = None
|
||||
|
||||
|
|
|
|||
|
|
@ -13,14 +13,20 @@ anthropic → GET https://api.anthropic.com/v1/models
|
|||
Headers: anthropic-ratelimit-tokens-limit/remaining/reset
|
||||
anthropic-ratelimit-requests-limit/remaining/reset
|
||||
anthropic-ratelimit-input-tokens-limit/remaining/reset
|
||||
Fallback probe (only when headers missing):
|
||||
POST /v1/messages with max_tokens=1 to surface usage+time data.
|
||||
|
||||
openai → GET https://api.openai.com/v1/models
|
||||
(codex) Headers: x-ratelimit-limit-tokens, x-ratelimit-remaining-tokens,
|
||||
x-ratelimit-reset-tokens, x-ratelimit-limit-requests,
|
||||
x-ratelimit-remaining-requests, x-ratelimit-reset-requests
|
||||
Fallback probe (only when headers missing):
|
||||
POST /v1/chat/completions with max_tokens=1 to surface usage+time.
|
||||
|
||||
ollama → GET {base_url}/api/tags (health-check only; no rate limits)
|
||||
Returns: model list, server reachable flag
|
||||
Fallback probe:
|
||||
POST {base_url}/api/generate with num_predict=1 for usage+time.
|
||||
|
||||
Caching
|
||||
-------
|
||||
|
|
@ -102,16 +108,24 @@ class ProviderUsageLive:
|
|||
requests: RequestWindow = field(default_factory=RequestWindow)
|
||||
models: list[str] = field(default_factory=list) # model IDs available on this key
|
||||
raw_headers: dict[str, str] = field(default_factory=dict)
|
||||
sample_model: str | None = None
|
||||
sample_input_tokens: int | None = None
|
||||
sample_output_tokens: int | None = None
|
||||
sample_latency_ms: int | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
def _window(w: TokenWindow | RequestWindow) -> dict[str, Any]:
|
||||
d: dict[str, Any] = {}
|
||||
if hasattr(w, "limit"): d["limit"] = w.limit
|
||||
if hasattr(w, "remaining"): d["remaining"] = w.remaining
|
||||
if hasattr(w, "reset_in_ms"): d["reset_in_ms"] = w.reset_in_ms
|
||||
if hasattr(w, "reset_at"): d["reset_at"] = w.reset_at.isoformat() if w.reset_at else None
|
||||
if hasattr(w, "limit"):
|
||||
d["limit"] = w.limit
|
||||
if hasattr(w, "remaining"):
|
||||
d["remaining"] = w.remaining
|
||||
if hasattr(w, "reset_in_ms"):
|
||||
d["reset_in_ms"] = w.reset_in_ms
|
||||
if hasattr(w, "reset_at"):
|
||||
d["reset_at"] = w.reset_at.isoformat() if w.reset_at else None
|
||||
if isinstance(w, TokenWindow):
|
||||
d["used"] = w.used
|
||||
d["used"] = w.used
|
||||
d["pct_used"] = w.pct_used
|
||||
return d
|
||||
|
||||
|
|
@ -125,6 +139,10 @@ class ProviderUsageLive:
|
|||
"input_tokens": _window(self.input_tokens),
|
||||
"requests": _window(self.requests),
|
||||
"models": self.models[:20], # cap for response size
|
||||
"sample_model": self.sample_model,
|
||||
"sample_input_tokens": self.sample_input_tokens,
|
||||
"sample_output_tokens": self.sample_output_tokens,
|
||||
"sample_latency_ms": self.sample_latency_ms,
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -163,6 +181,49 @@ _OAI_DURATION_RE = re.compile(
|
|||
)
|
||||
|
||||
|
||||
def _apply_anthropic_ratelimit_headers(result: ProviderUsageLive, headers: dict[str, str]) -> None:
|
||||
"""Populate Anthropic limit windows from response headers."""
|
||||
result.tokens = TokenWindow(
|
||||
limit=_parse_int_header(headers, "anthropic-ratelimit-tokens-limit"),
|
||||
remaining=_parse_int_header(headers, "anthropic-ratelimit-tokens-remaining"),
|
||||
reset_at=_parse_iso_reset(headers.get("anthropic-ratelimit-tokens-reset", "")),
|
||||
)
|
||||
result.input_tokens = TokenWindow(
|
||||
limit=_parse_int_header(headers, "anthropic-ratelimit-input-tokens-limit"),
|
||||
remaining=_parse_int_header(headers, "anthropic-ratelimit-input-tokens-remaining"),
|
||||
reset_at=_parse_iso_reset(headers.get("anthropic-ratelimit-input-tokens-reset", "")),
|
||||
)
|
||||
result.requests = RequestWindow(
|
||||
limit=_parse_int_header(headers, "anthropic-ratelimit-requests-limit"),
|
||||
remaining=_parse_int_header(headers, "anthropic-ratelimit-requests-remaining"),
|
||||
reset_at=_parse_iso_reset(headers.get("anthropic-ratelimit-requests-reset", "")),
|
||||
)
|
||||
|
||||
|
||||
def _pick_anthropic_probe_model(models: list[str]) -> str | None:
|
||||
if not models:
|
||||
return None
|
||||
priorities = ("haiku", "sonnet", "opus")
|
||||
lowered = [(m, m.lower()) for m in models]
|
||||
for priority in priorities:
|
||||
for original, lowered_name in lowered:
|
||||
if priority in lowered_name:
|
||||
return original
|
||||
return models[0]
|
||||
|
||||
|
||||
def _pick_openai_probe_model(models: list[str]) -> str | None:
|
||||
if not models:
|
||||
return None
|
||||
priorities = ("gpt-4.1-mini", "gpt-4o-mini", "gpt-4.1", "gpt-4o", "o4-mini")
|
||||
lowered = [(m, m.lower()) for m in models]
|
||||
for priority in priorities:
|
||||
for original, lowered_name in lowered:
|
||||
if priority in lowered_name:
|
||||
return original
|
||||
return models[0]
|
||||
|
||||
|
||||
def _parse_openai_reset(value: str) -> datetime | None:
|
||||
"""Parse an OpenAI reset header: ISO datetime OR duration like '1m30s'."""
|
||||
if not value:
|
||||
|
|
@ -219,23 +280,7 @@ async def _fetch_anthropic(api_key: str, base_url: str | None) -> ProviderUsageL
|
|||
result.reachable = True
|
||||
result.raw_headers = {k: v for k, v in h.items() if "ratelimit" in k}
|
||||
|
||||
# Token window (combined input+output)
|
||||
result.tokens = TokenWindow(
|
||||
limit = _parse_int_header(h, "anthropic-ratelimit-tokens-limit"),
|
||||
remaining = _parse_int_header(h, "anthropic-ratelimit-tokens-remaining"),
|
||||
reset_at = _parse_iso_reset(h.get("anthropic-ratelimit-tokens-reset", "")),
|
||||
)
|
||||
# Input-token window (separate limit for input)
|
||||
result.input_tokens = TokenWindow(
|
||||
limit = _parse_int_header(h, "anthropic-ratelimit-input-tokens-limit"),
|
||||
remaining = _parse_int_header(h, "anthropic-ratelimit-input-tokens-remaining"),
|
||||
reset_at = _parse_iso_reset(h.get("anthropic-ratelimit-input-tokens-reset", "")),
|
||||
)
|
||||
result.requests = RequestWindow(
|
||||
limit = _parse_int_header(h, "anthropic-ratelimit-requests-limit"),
|
||||
remaining = _parse_int_header(h, "anthropic-ratelimit-requests-remaining"),
|
||||
reset_at = _parse_iso_reset(h.get("anthropic-ratelimit-requests-reset", "")),
|
||||
)
|
||||
_apply_anthropic_ratelimit_headers(result, h)
|
||||
|
||||
# Extract model IDs
|
||||
try:
|
||||
|
|
@ -245,6 +290,56 @@ async def _fetch_anthropic(api_key: str, base_url: str | None) -> ProviderUsageL
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
# Some tiers/paths may omit ratelimit headers on /v1/models.
|
||||
# Fallback to a minimal /v1/messages probe so we can still surface usage/time.
|
||||
if (
|
||||
result.tokens.limit is None
|
||||
and result.input_tokens.limit is None
|
||||
and result.requests.limit is None
|
||||
):
|
||||
probe_model = _pick_anthropic_probe_model(result.models)
|
||||
if probe_model:
|
||||
result.sample_model = probe_model
|
||||
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
|
||||
try:
|
||||
probe_resp = await client.post(
|
||||
f"{base}/v1/messages",
|
||||
headers={
|
||||
"x-api-key": api_key,
|
||||
"anthropic-version": "2023-06-01",
|
||||
"content-type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": probe_model,
|
||||
"max_tokens": 1,
|
||||
"messages": [{"role": "user", "content": "Usage probe"}],
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
probe_resp = None
|
||||
|
||||
if probe_resp is not None:
|
||||
probe_headers = {k.lower(): v for k, v in probe_resp.headers.items()}
|
||||
probe_rl_headers = {k: v for k, v in probe_headers.items() if "ratelimit" in k}
|
||||
if probe_rl_headers:
|
||||
result.raw_headers = probe_rl_headers
|
||||
_apply_anthropic_ratelimit_headers(result, probe_headers)
|
||||
if probe_resp.status_code == 200:
|
||||
try:
|
||||
payload = probe_resp.json()
|
||||
usage = payload.get("usage") if isinstance(payload, dict) else None
|
||||
if isinstance(usage, dict):
|
||||
in_tok = usage.get("input_tokens")
|
||||
out_tok = usage.get("output_tokens")
|
||||
if isinstance(in_tok, int):
|
||||
result.sample_input_tokens = in_tok
|
||||
if isinstance(out_tok, int):
|
||||
result.sample_output_tokens = out_tok
|
||||
except Exception:
|
||||
pass
|
||||
elapsed_ms = probe_resp.elapsed.total_seconds() * 1000.0
|
||||
result.sample_latency_ms = int(max(0.0, round(elapsed_ms)))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
|
@ -296,6 +391,63 @@ async def _fetch_openai(api_key: str, base_url: str | None) -> ProviderUsageLive
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
if result.tokens.limit is None and result.requests.limit is None:
|
||||
probe_model = _pick_openai_probe_model(result.models)
|
||||
if probe_model:
|
||||
result.sample_model = probe_model
|
||||
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
|
||||
try:
|
||||
probe_resp = await client.post(
|
||||
f"{base}/v1/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"content-type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": probe_model,
|
||||
"messages": [{"role": "user", "content": "Usage probe"}],
|
||||
"max_tokens": 1,
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
probe_resp = None
|
||||
if probe_resp is not None:
|
||||
probe_headers = {k.lower(): v for k, v in probe_resp.headers.items()}
|
||||
probe_rl_headers = {k: v for k, v in probe_headers.items() if "ratelimit" in k}
|
||||
if probe_rl_headers:
|
||||
result.raw_headers = probe_rl_headers
|
||||
result.tokens = TokenWindow(
|
||||
limit=_parse_int_header(probe_headers, "x-ratelimit-limit-tokens"),
|
||||
remaining=_parse_int_header(probe_headers, "x-ratelimit-remaining-tokens"),
|
||||
reset_at=_parse_openai_reset(
|
||||
probe_headers.get("x-ratelimit-reset-tokens", "")
|
||||
),
|
||||
)
|
||||
result.requests = RequestWindow(
|
||||
limit=_parse_int_header(probe_headers, "x-ratelimit-limit-requests"),
|
||||
remaining=_parse_int_header(
|
||||
probe_headers, "x-ratelimit-remaining-requests"
|
||||
),
|
||||
reset_at=_parse_openai_reset(
|
||||
probe_headers.get("x-ratelimit-reset-requests", "")
|
||||
),
|
||||
)
|
||||
if probe_resp.status_code == 200:
|
||||
try:
|
||||
payload = probe_resp.json()
|
||||
usage = payload.get("usage") if isinstance(payload, dict) else None
|
||||
if isinstance(usage, dict):
|
||||
in_tok = usage.get("prompt_tokens")
|
||||
out_tok = usage.get("completion_tokens")
|
||||
if isinstance(in_tok, int):
|
||||
result.sample_input_tokens = in_tok
|
||||
if isinstance(out_tok, int):
|
||||
result.sample_output_tokens = out_tok
|
||||
except Exception:
|
||||
pass
|
||||
elapsed_ms = probe_resp.elapsed.total_seconds() * 1000.0
|
||||
result.sample_latency_ms = int(max(0.0, round(elapsed_ms)))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
|
@ -331,6 +483,37 @@ async def _fetch_ollama(base_url: str | None, api_key: str | None) -> ProviderUs
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
if result.models:
|
||||
result.sample_model = result.models[0]
|
||||
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
|
||||
try:
|
||||
probe_resp = await client.post(
|
||||
f"{base}/api/generate",
|
||||
headers={**headers, "content-type": "application/json"},
|
||||
json={
|
||||
"model": result.sample_model,
|
||||
"prompt": "Usage probe",
|
||||
"stream": False,
|
||||
"options": {"num_predict": 1},
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
probe_resp = None
|
||||
if probe_resp is not None and probe_resp.status_code == 200:
|
||||
try:
|
||||
payload = probe_resp.json()
|
||||
in_tok = payload.get("prompt_eval_count")
|
||||
out_tok = payload.get("eval_count")
|
||||
total_duration_ns = payload.get("total_duration")
|
||||
if isinstance(in_tok, int):
|
||||
result.sample_input_tokens = in_tok
|
||||
if isinstance(out_tok, int):
|
||||
result.sample_output_tokens = out_tok
|
||||
if isinstance(total_duration_ns, int):
|
||||
result.sample_latency_ms = max(0, int(round(total_duration_ns / 1_000_000)))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,34 @@
|
|||
"""Make main_session_key nullable on gateways.
|
||||
|
||||
The column was NOT NULL but the ORM model didn't include it, causing
|
||||
INSERT failures when creating gateways via the API. The field gets
|
||||
populated by ensure_main_agent() after the row exists, so it needs
|
||||
to be nullable during the initial INSERT.
|
||||
|
||||
Revision ID: c4a1d2e3f4a5
|
||||
Revises: f7d8e9a0b1c2
|
||||
Create Date: 2026-05-21 04:25:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "c4a1d2e3f4a5"
|
||||
down_revision = "f7d8e9a0b1c2"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Allow main_session_key to be NULL on initial insert."""
|
||||
op.alter_column("gateways", "main_session_key", nullable=True)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Revert main_session_key to NOT NULL."""
|
||||
op.alter_column("gateways", "main_session_key", nullable=False)
|
||||
|
|
@ -89,6 +89,10 @@ async def test_usage_response_includes_rate_limit_header_names(monkeypatch: pyte
|
|||
checked_at=utcnow(),
|
||||
reachable=True,
|
||||
)
|
||||
result.sample_model = "claude-sonnet-4-6"
|
||||
result.sample_input_tokens = 9
|
||||
result.sample_output_tokens = 1
|
||||
result.sample_latency_ms = 123
|
||||
result.raw_headers = {
|
||||
"anthropic-ratelimit-requests-limit": "1000",
|
||||
"anthropic-ratelimit-requests-remaining": "999",
|
||||
|
|
@ -117,6 +121,10 @@ async def test_usage_response_includes_rate_limit_header_names(monkeypatch: pyte
|
|||
data = response.json()
|
||||
assert data["provider"] == "anthropic"
|
||||
assert data["reachable"] is True
|
||||
assert data["sample_model"] == "claude-sonnet-4-6"
|
||||
assert data["sample_input_tokens"] == 9
|
||||
assert data["sample_output_tokens"] == 1
|
||||
assert data["sample_latency_ms"] == 123
|
||||
assert data["debug_rate_limit_headers"] == [
|
||||
"anthropic-ratelimit-requests-limit",
|
||||
"anthropic-ratelimit-requests-remaining",
|
||||
|
|
@ -150,6 +158,10 @@ async def test_test_endpoint_returns_live_result(monkeypatch: pytest.MonkeyPatch
|
|||
reachable=True,
|
||||
)
|
||||
result.models = ["claude-sonnet-4-6"]
|
||||
result.sample_model = "claude-sonnet-4-6"
|
||||
result.sample_input_tokens = 8
|
||||
result.sample_output_tokens = 1
|
||||
result.sample_latency_ms = 111
|
||||
result.raw_headers = {
|
||||
"anthropic-ratelimit-tokens-limit": "100000",
|
||||
}
|
||||
|
|
@ -184,6 +196,10 @@ async def test_test_endpoint_returns_live_result(monkeypatch: pytest.MonkeyPatch
|
|||
assert data["account_key"] == "Claude"
|
||||
assert data["reachable"] is True
|
||||
assert data["models"] == ["claude-sonnet-4-6"]
|
||||
assert data["sample_model"] == "claude-sonnet-4-6"
|
||||
assert data["sample_input_tokens"] == 8
|
||||
assert data["sample_output_tokens"] == 1
|
||||
assert data["sample_latency_ms"] == 111
|
||||
assert data["debug_rate_limit_headers"] == ["anthropic-ratelimit-tokens-limit"]
|
||||
finally:
|
||||
await engine.dispose()
|
||||
|
|
|
|||
|
|
@ -20,4 +20,9 @@ export interface ProviderUsageLiveRead {
|
|||
input_tokens: TokenWindowRead;
|
||||
requests: RequestWindowRead;
|
||||
models?: string[];
|
||||
sample_model?: string | null;
|
||||
sample_input_tokens?: number | null;
|
||||
sample_output_tokens?: number | null;
|
||||
sample_latency_ms?: number | null;
|
||||
debug_rate_limit_headers?: string[] | null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -236,8 +236,17 @@ function CredentialForm({
|
|||
{testResult.reachable ? "Connection successful" : "Connection failed"}
|
||||
</p>
|
||||
<p className="mt-1 text-muted">
|
||||
{testResult.error ?? `${testResult.models?.length ?? 0} model${(testResult.models?.length ?? 0) === 1 ? "" : "s"} returned`}
|
||||
{testResult.error ?? (
|
||||
testResult.sample_input_tokens != null || testResult.sample_output_tokens != null
|
||||
? `Usage probe: in ${fmtTokens(testResult.sample_input_tokens)} · out ${fmtTokens(testResult.sample_output_tokens)}`
|
||||
: "Connected."
|
||||
)}
|
||||
</p>
|
||||
{testResult.sample_latency_ms != null && (
|
||||
<p className="mt-1 text-muted">
|
||||
Probe time: {fmtLatencyMs(testResult.sample_latency_ms)}
|
||||
</p>
|
||||
)}
|
||||
|
||||
</div>
|
||||
)}
|
||||
|
|
@ -267,6 +276,12 @@ function fmtResetMs(ms: number | null | undefined): string {
|
|||
return `${h}h ${m % 60}m`;
|
||||
}
|
||||
|
||||
function fmtLatencyMs(ms: number | null | undefined): string {
|
||||
if (ms == null || ms < 0) return "—";
|
||||
if (ms < 1000) return `${ms}ms`;
|
||||
return `${(ms / 1000).toFixed(2)}s`;
|
||||
}
|
||||
|
||||
function UsageStrip({ credentialId, provider }: { credentialId: string; provider: string }) {
|
||||
const [usage, setUsage] = useState<ProviderUsageLiveRead | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
|
|
@ -318,30 +333,58 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
|
|||
const inputTok = usage.input_tokens;
|
||||
const req = usage.requests;
|
||||
const isOllama = provider === "ollama";
|
||||
const modelCount = usage.models?.length ?? 0;
|
||||
|
||||
return (
|
||||
<div className="mt-2 rounded-lg border border-[color:var(--border)] bg-[color:var(--surface)] p-2.5">
|
||||
{isOllama ? (
|
||||
<div className="flex items-center gap-3 text-xs text-muted">
|
||||
<span className="flex items-center gap-1 text-[color:var(--success)]">
|
||||
<span className="inline-block h-1.5 w-1.5 rounded-full bg-[color:var(--success)]" />
|
||||
Connected
|
||||
</span>
|
||||
{(usage.models?.length ?? 0) > 0 && (
|
||||
<span>{usage.models!.length} model{usage.models!.length !== 1 ? "s" : ""} available</span>
|
||||
<div className="space-y-1.5">
|
||||
<div className="flex items-center gap-3 text-xs text-muted">
|
||||
<span className="flex items-center gap-1 text-[color:var(--success)]">
|
||||
<span className="inline-block h-1.5 w-1.5 rounded-full bg-[color:var(--success)]" />
|
||||
Connected
|
||||
</span>
|
||||
{(usage.models?.length ?? 0) > 0 && (
|
||||
<span>{usage.models!.length} model{usage.models!.length !== 1 ? "s" : ""} available</span>
|
||||
)}
|
||||
<button type="button" onClick={() => fetch(true)} className="ml-auto text-muted hover:text-strong">
|
||||
<RefreshCw className={`h-3 w-3 ${loading ? "animate-spin" : ""}`} />
|
||||
</button>
|
||||
</div>
|
||||
{(usage.sample_input_tokens != null || usage.sample_output_tokens != null) && (
|
||||
<div className="flex items-center justify-between text-[11px] text-muted">
|
||||
<span>Usage (last probe)</span>
|
||||
<span className="tabular-nums text-strong">
|
||||
in {fmtTokens(usage.sample_input_tokens)} · out {fmtTokens(usage.sample_output_tokens)}
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
<button type="button" onClick={() => fetch(true)} className="ml-auto text-muted hover:text-strong">
|
||||
<RefreshCw className={`h-3 w-3 ${loading ? "animate-spin" : ""}`} />
|
||||
</button>
|
||||
{usage.sample_latency_ms != null && (
|
||||
<div className="flex items-center justify-between text-[11px] text-muted">
|
||||
<span>Time (last probe)</span>
|
||||
<span className="tabular-nums text-strong">
|
||||
{fmtLatencyMs(usage.sample_latency_ms)}
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
{usage.sample_latency_ms != null && (
|
||||
<div className="flex items-center justify-between text-[11px] text-muted">
|
||||
<span>Time (last probe)</span>
|
||||
<span className="tabular-nums text-strong">
|
||||
{fmtLatencyMs(usage.sample_latency_ms)}
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
<div className="flex items-center justify-between text-[11px] text-muted">
|
||||
{lastFetched && <span>Updated {Math.round((Date.now() - lastFetched.getTime()) / 1000)}s ago</span>}
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className="space-y-1.5">
|
||||
{modelCount > 0 && (
|
||||
{(usage.sample_input_tokens != null || usage.sample_output_tokens != null) && (
|
||||
<div className="flex items-center justify-between text-[11px] text-muted">
|
||||
<span>Models</span>
|
||||
<span>Usage (last probe)</span>
|
||||
<span className="tabular-nums text-strong">
|
||||
{modelCount} available
|
||||
in {fmtTokens(usage.sample_input_tokens)} · out {fmtTokens(usage.sample_output_tokens)}
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
|
|
@ -415,7 +458,7 @@ function UsageStrip({ credentialId, provider }: { credentialId: string; provider
|
|||
|
||||
{tok.limit == null && inputTok.limit == null && req.limit == null && (
|
||||
<p className="text-[11px] text-muted">
|
||||
Connected — provider did not return token/request limit headers for this key tier.
|
||||
Connected — no token/request limit windows were returned for this key right now.
|
||||
</p>
|
||||
)}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue