feat(usage): per-account limit rows and aggregate labeling (#41)

- Add PerGatewayUsage interface and buildPerGatewayUsage() function
- Each gateway's most constrained limit shown independently, sorted by exhaustion
- Per-account rows render BEFORE aggregate summary
- Aggregate cards labeled 'Combined' when multiple gateways exist
- 'Soonest reset' and 'Combined limit' labels replace misleading single-gateway labels
- Filter out gateways with no configured limit to reduce noise
This commit is contained in:
null 2026-05-21 01:57:13 -05:00
parent 5c3c09edba
commit cd688ced26
2 changed files with 170 additions and 9 deletions

View File

@ -47,7 +47,9 @@ import {
type ProviderNativeUsageWindow,
RuntimeUsageSection,
aggregateRuntimeUsage,
buildPerGatewayUsage,
type AggregatedRuntimeUsage,
type PerGatewayUsage,
} from "@/components/dashboard/RuntimeUsageSection";
import { GatewayHealthPanel } from "@/components/dashboard/GatewayHealthPanel";
import { GatewayCronPanel } from "@/components/dashboard/GatewayCronPanel";
@ -647,8 +649,11 @@ export default function DashboardPage() {
() => gatewayStatusesQuery.data ?? [],
[gatewayStatusesQuery.data],
);
// Runtime usage — query all gateways in parallel, aggregate
const runtimeUsageQuery = useQuery<AggregatedRuntimeUsage, ApiError>({
// Runtime usage — query all gateways in parallel, aggregate + per-gateway
const runtimeUsageQuery = useQuery<
{ aggregate: AggregatedRuntimeUsage; perGateway: PerGatewayUsage[] },
ApiError
>({
queryKey: [
"dashboard",
"runtime-usage",
@ -674,11 +679,19 @@ export default function DashboardPage() {
r.status === "fulfilled" && r.value !== null,
)
.map((r) => r.value);
return aggregateRuntimeUsage(valid);
const labels: Record<string, string> = {};
for (const t of gatewayTargets) {
labels[t.gatewayId] = t.boardName;
}
return {
aggregate: aggregateRuntimeUsage(valid),
perGateway: buildPerGatewayUsage(valid, labels),
};
},
});
const runtimeUsage = runtimeUsageQuery.data ?? null;
const runtimeUsage = runtimeUsageQuery.data?.aggregate ?? null;
const perGatewayUsage = runtimeUsageQuery.data?.perGateway ?? [];
const providerUsageQuery = useQuery<ProviderNativeUsageWindow[], ApiError>({
queryKey: [
"dashboard",
@ -1186,6 +1199,7 @@ export default function DashboardPage() {
<RuntimeUsageSection
usage={runtimeUsage}
providerUsageWindows={providerUsageWindows}
perGatewayUsage={perGatewayUsage}
isLoading={runtimeUsageQuery.isLoading || providerUsageQuery.isLoading}
hasGateways={hasConfiguredGateways}
/>

View File

@ -39,6 +39,97 @@ export interface AggregatedRuntimeUsage {
topSessions: TopSession[];
}
// One gateway's most-constrained limit — used for per-account rows.
export interface PerGatewayUsage {
gatewayId: string;
gatewayLabel: string; // human name shown in the row
accountKey: string;
limitKind: string; // "output_tokens" | "total_tokens" | "messages" | "cost" | "none"
limitLabel: string; // e.g. "output tokens", "messages"
limitValue: number | null;
usedValue: number;
pctUsed: number | null; // 0100, derived from the most constraining limit
timeToLimitMs: number | null;
safe: boolean;
resetInMs: number;
totalCostUsd: number;
}
// Build per-gateway constraint rows from raw API responses.
export function buildPerGatewayUsage(
responses: RuntimeUsageResponse[],
labels: Record<string, string>, // gatewayId → display label
): PerGatewayUsage[] {
return responses
.map((r): PerGatewayUsage => {
const gid = String(r.gateway_id);
const label = labels[gid] ?? gid.slice(0, 8);
const c = r.current;
// Pick the most constrained typed limit (highest pct used)
type Candidate = { kind: string; label: string; limit: number; used: number; pct: number };
const candidates: Candidate[] = [];
if (c.output_token_limit && c.output_token_limit_pct !== null) {
candidates.push({
kind: "output_tokens", label: "output tokens",
limit: c.output_token_limit,
used: c.total_output_tokens ?? 0,
pct: c.output_token_limit_pct ?? 0,
});
}
if (c.total_token_limit && c.total_token_limit_pct !== null) {
candidates.push({
kind: "total_tokens", label: "total tokens",
limit: c.total_token_limit,
used: c.total_tokens,
pct: c.total_token_limit_pct ?? 0,
});
}
// legacy token_limit only when no typed token limit present
if (!c.output_token_limit && !c.total_token_limit && c.token_limit && c.token_pct !== null) {
candidates.push({
kind: "total_tokens", label: "tokens",
limit: c.token_limit,
used: c.total_tokens,
pct: c.token_pct ?? 0,
});
}
if (c.message_limit && c.message_pct !== null) {
candidates.push({
kind: "messages", label: "messages",
limit: c.message_limit,
used: c.total_calls,
pct: c.message_pct ?? 0,
});
}
// Most constrained = highest pct used
const binding = candidates.length > 0
? candidates.reduce((a, b) => a.pct >= b.pct ? a : b)
: null;
return {
gatewayId: gid,
gatewayLabel: label,
accountKey: gid,
limitKind: binding?.kind ?? "none",
limitLabel: binding?.label ?? "no limit",
limitValue: binding?.limit ?? null,
usedValue: binding?.used ?? 0,
pctUsed: binding?.pct ?? null,
timeToLimitMs: r.predictions.time_to_limit_ms ?? null,
safe: r.predictions.safe,
resetInMs: r.window.reset_in_ms,
totalCostUsd: c.total_cost_usd,
};
})
// Only surface rows that have a configured limit — otherwise it is noise
.filter((row) => row.limitKind !== "none")
// Most exhausted first
.sort((a, b) => (b.pctUsed ?? 0) - (a.pctUsed ?? 0));
}
export interface ProviderNativeUsageWindow {
key: string;
label: string;
@ -278,6 +369,7 @@ function StatCard({ label, value, sub, tone = "default", icon }: StatCardProps)
interface RuntimeUsageSectionProps {
usage: AggregatedRuntimeUsage | null;
providerUsageWindows: ProviderNativeUsageWindow[];
perGatewayUsage: PerGatewayUsage[];
isLoading: boolean;
hasGateways: boolean;
}
@ -285,6 +377,7 @@ interface RuntimeUsageSectionProps {
export function RuntimeUsageSection({
usage,
providerUsageWindows,
perGatewayUsage,
isLoading,
hasGateways,
}: RuntimeUsageSectionProps) {
@ -313,7 +406,8 @@ export function RuntimeUsageSection({
const right = order[b.key] ?? 99;
return left === right ? a.gatewayLabel.localeCompare(b.gatewayLabel) : left - right;
});
const noData = !hasRuntimeData && providerRows.length === 0;
const noData = !hasRuntimeData && providerRows.length === 0 && perGatewayUsage.length === 0;
const showCombinedLabel = perGatewayUsage.length > 1;
return (
<DashboardSection title="Runtime Usage">
@ -321,7 +415,7 @@ export function RuntimeUsageSection({
<DashboardEmptyState message="Loading usage data…" />
) : noData ? (
<DashboardEmptyState message="No usage data yet. Usage appears after the first model call." />
) : hasRuntimeData || providerRows.length > 0 ? (
) : hasRuntimeData || providerRows.length > 0 || perGatewayUsage.length > 0 ? (
<div className="space-y-3">
{providerRows.length > 0 && (
<div className="space-y-2 rounded-lg border border-[color:var(--border)] bg-[color:var(--surface-muted)] p-3">
@ -371,23 +465,76 @@ export function RuntimeUsageSection({
</div>
)}
{/* Per-account constraint rows — most exhausted first */}
{perGatewayUsage.length > 0 && (
<div className="rounded-lg border border-[color:var(--border)] bg-[color:var(--surface-muted)] p-3">
<p className="mb-2 text-[11px] font-semibold uppercase tracking-wider text-muted">
Per-account limits
</p>
<div className="space-y-2">
{perGatewayUsage.map((row) => {
const pct = row.pctUsed ?? 0;
const barTone =
pct >= 90
? "bg-[color:var(--danger)]"
: pct >= 75
? "bg-[color:var(--warning)]"
: "bg-[color:var(--success)]";
const dangerLabel =
pct >= 90 ? "text-[color:var(--danger)]"
: pct >= 75 ? "text-[color:var(--warning)]"
: "text-muted";
return (
<div
key={row.gatewayId}
className="rounded-md border border-[color:var(--border)] bg-[color:var(--surface)] p-2"
>
<div className="mb-1 flex items-center justify-between gap-2">
<p className="text-xs font-medium text-strong truncate">{row.gatewayLabel}</p>
<div className="flex items-center gap-2 shrink-0">
{row.timeToLimitMs !== null && (
<span className={`text-[11px] tabular-nums ${!row.safe ? "text-[color:var(--danger)]" : "text-muted"}`}>
{row.timeToLimitMs === 0 ? "at limit" : `${fmtMs(row.timeToLimitMs)} left`}
</span>
)}
<span className={`text-[11px] tabular-nums ${dangerLabel}`}>
{row.pctUsed === null ? "—" : `${Math.round(pct)}%`}
</span>
</div>
</div>
<div className="h-1.5 w-full overflow-hidden rounded-full bg-[color:var(--surface-strong)]">
<div
className={`h-full rounded-full transition-all ${barTone}`}
style={{ width: `${Math.max(0, Math.min(100, pct))}%` }}
/>
</div>
<p className="mt-1 text-[11px] text-muted">
{row.limitLabel} · resets in {fmtMs(row.resetInMs)} · {fmtCost(row.totalCostUsd)} spent
</p>
</div>
);
})}
</div>
</div>
)}
{usage && hasRuntimeData && (
<div className="grid grid-cols-2 gap-2 sm:grid-cols-4">
<StatCard
label="5h Spend"
label={showCombinedLabel ? "Combined spend" : "5h Spend"}
value={fmtCost(usage.totalCostUsd)}
sub={`${fmtTokens(usage.totalTokens)} tokens`}
tone={usage.costLimitUsd && usage.totalCostUsd / usage.costLimitUsd > 0.8 ? "warning" : "default"}
icon={<Zap className="h-3 w-3" />}
/>
<StatCard
label="Reset In"
label={showCombinedLabel ? "Soonest reset" : "Reset In"}
value={fmtMs(usage.resetInMs)}
sub={usage.resetsAt ? new Date(usage.resetsAt).toLocaleTimeString([], { hour: "2-digit", minute: "2-digit" }) : undefined}
icon={<Clock className="h-3 w-3" />}
/>
<StatCard
label="Time to Limit"
label={showCombinedLabel ? "Combined limit" : "Time to Limit"}
value={usage.timeToLimitMs === null ? "—" : usage.timeToLimitMs === 0 ? "At limit" : fmtMs(usage.timeToLimitMs)}
sub={(() => {
if (usage.outputTokenLimit) {