feat(usage): per-account limit rows and aggregate labeling (#41)

- Add PerGatewayUsage interface and buildPerGatewayUsage() function - Each gateway's most constrained limit shown independently, sorted by exhaustion - Per-account rows render BEFORE aggregate summary - Aggregate cards labeled 'Combined' when multiple gateways exist - 'Soonest reset' and 'Combined limit' labels replace misleading single-gateway labels - Filter out gateways with no configured limit to reduce noise
2026-05-21 01:57:13 -05:00 · 2026-05-21 01:57:13 -05:00 · cd688ced26
parent 5c3c09edba
commit cd688ced26
2 changed files with 170 additions and 9 deletions
--- a/frontend/src/app/dashboard/page.tsx
+++ b/frontend/src/app/dashboard/page.tsx
@ -47,7 +47,9 @@ import {
  type ProviderNativeUsageWindow,
  RuntimeUsageSection,
  aggregateRuntimeUsage,
+  buildPerGatewayUsage,
  type AggregatedRuntimeUsage,
+  type PerGatewayUsage,
 } from "@/components/dashboard/RuntimeUsageSection";
 import { GatewayHealthPanel } from "@/components/dashboard/GatewayHealthPanel";
 import { GatewayCronPanel } from "@/components/dashboard/GatewayCronPanel";
@ -647,8 +649,11 @@ export default function DashboardPage() {
    () => gatewayStatusesQuery.data ?? [],
    [gatewayStatusesQuery.data],
  );
-  // Runtime usage — query all gateways in parallel, aggregate
-  const runtimeUsageQuery = useQuery<AggregatedRuntimeUsage, ApiError>({
+  // Runtime usage — query all gateways in parallel, aggregate + per-gateway
+  const runtimeUsageQuery = useQuery<
+    { aggregate: AggregatedRuntimeUsage; perGateway: PerGatewayUsage[] },
+    ApiError
+  >({
    queryKey: [
      "dashboard",
      "runtime-usage",
@ -674,11 +679,19 @@ export default function DashboardPage() {
            r.status === "fulfilled" && r.value !== null,
        )
        .map((r) => r.value);
-      return aggregateRuntimeUsage(valid);
+      const labels: Record<string, string> = {};
+      for (const t of gatewayTargets) {
+        labels[t.gatewayId] = t.boardName;
+      }
+      return {
+        aggregate: aggregateRuntimeUsage(valid),
+        perGateway: buildPerGatewayUsage(valid, labels),
+      };
    },
  });

-  const runtimeUsage = runtimeUsageQuery.data ?? null;
+  const runtimeUsage = runtimeUsageQuery.data?.aggregate ?? null;
+  const perGatewayUsage = runtimeUsageQuery.data?.perGateway ?? [];
  const providerUsageQuery = useQuery<ProviderNativeUsageWindow[], ApiError>({
    queryKey: [
      "dashboard",
@ -1186,6 +1199,7 @@ export default function DashboardPage() {
                <RuntimeUsageSection
                  usage={runtimeUsage}
                  providerUsageWindows={providerUsageWindows}
+                  perGatewayUsage={perGatewayUsage}
                  isLoading={runtimeUsageQuery.isLoading || providerUsageQuery.isLoading}
                  hasGateways={hasConfiguredGateways}
                />
--- a/frontend/src/components/dashboard/RuntimeUsageSection.tsx
+++ b/frontend/src/components/dashboard/RuntimeUsageSection.tsx
@ -39,6 +39,97 @@ export interface AggregatedRuntimeUsage {
  topSessions: TopSession[];
 }

+// One gateway's most-constrained limit — used for per-account rows.
+export interface PerGatewayUsage {
+  gatewayId: string;
+  gatewayLabel: string;    // human name shown in the row
+  accountKey: string;
+  limitKind: string;       // "output_tokens" | "total_tokens" | "messages" | "cost" | "none"
+  limitLabel: string;      // e.g. "output tokens", "messages"
+  limitValue: number | null;
+  usedValue: number;
+  pctUsed: number | null;  // 0–100, derived from the most constraining limit
+  timeToLimitMs: number | null;
+  safe: boolean;
+  resetInMs: number;
+  totalCostUsd: number;
+}
+
+// Build per-gateway constraint rows from raw API responses.
+export function buildPerGatewayUsage(
+  responses: RuntimeUsageResponse[],
+  labels: Record<string, string>,   // gatewayId → display label
+): PerGatewayUsage[] {
+  return responses
+    .map((r): PerGatewayUsage => {
+      const gid = String(r.gateway_id);
+      const label = labels[gid] ?? gid.slice(0, 8);
+      const c = r.current;
+
+      // Pick the most constrained typed limit (highest pct used)
+      type Candidate = { kind: string; label: string; limit: number; used: number; pct: number };
+      const candidates: Candidate[] = [];
+
+      if (c.output_token_limit && c.output_token_limit_pct !== null) {
+        candidates.push({
+          kind: "output_tokens", label: "output tokens",
+          limit: c.output_token_limit,
+          used: c.total_output_tokens ?? 0,
+          pct: c.output_token_limit_pct ?? 0,
+        });
+      }
+      if (c.total_token_limit && c.total_token_limit_pct !== null) {
+        candidates.push({
+          kind: "total_tokens", label: "total tokens",
+          limit: c.total_token_limit,
+          used: c.total_tokens,
+          pct: c.total_token_limit_pct ?? 0,
+        });
+      }
+      // legacy token_limit only when no typed token limit present
+      if (!c.output_token_limit && !c.total_token_limit && c.token_limit && c.token_pct !== null) {
+        candidates.push({
+          kind: "total_tokens", label: "tokens",
+          limit: c.token_limit,
+          used: c.total_tokens,
+          pct: c.token_pct ?? 0,
+        });
+      }
+      if (c.message_limit && c.message_pct !== null) {
+        candidates.push({
+          kind: "messages", label: "messages",
+          limit: c.message_limit,
+          used: c.total_calls,
+          pct: c.message_pct ?? 0,
+        });
+      }
+
+      // Most constrained = highest pct used
+      const binding = candidates.length > 0
+        ? candidates.reduce((a, b) => a.pct >= b.pct ? a : b)
+        : null;
+
+      return {
+        gatewayId: gid,
+        gatewayLabel: label,
+        accountKey: gid,
+        limitKind: binding?.kind ?? "none",
+        limitLabel: binding?.label ?? "no limit",
+        limitValue: binding?.limit ?? null,
+        usedValue: binding?.used ?? 0,
+        pctUsed: binding?.pct ?? null,
+        timeToLimitMs: r.predictions.time_to_limit_ms ?? null,
+        safe: r.predictions.safe,
+        resetInMs: r.window.reset_in_ms,
+        totalCostUsd: c.total_cost_usd,
+      };
+    })
+    // Only surface rows that have a configured limit — otherwise it is noise
+    .filter((row) => row.limitKind !== "none")
+    // Most exhausted first
+    .sort((a, b) => (b.pctUsed ?? 0) - (a.pctUsed ?? 0));
+}
+
 export interface ProviderNativeUsageWindow {
  key: string;
  label: string;
@ -278,6 +369,7 @@ function StatCard({ label, value, sub, tone = "default", icon }: StatCardProps)
 interface RuntimeUsageSectionProps {
  usage: AggregatedRuntimeUsage | null;
  providerUsageWindows: ProviderNativeUsageWindow[];
+  perGatewayUsage: PerGatewayUsage[];
  isLoading: boolean;
  hasGateways: boolean;
 }
@ -285,6 +377,7 @@ interface RuntimeUsageSectionProps {
 export function RuntimeUsageSection({
  usage,
  providerUsageWindows,
+  perGatewayUsage,
  isLoading,
  hasGateways,
 }: RuntimeUsageSectionProps) {
@ -313,7 +406,8 @@ export function RuntimeUsageSection({
    const right = order[b.key] ?? 99;
    return left === right ? a.gatewayLabel.localeCompare(b.gatewayLabel) : left - right;
  });
-  const noData = !hasRuntimeData && providerRows.length === 0;
+  const noData = !hasRuntimeData && providerRows.length === 0 && perGatewayUsage.length === 0;
+  const showCombinedLabel = perGatewayUsage.length > 1;

  return (
    <DashboardSection title="Runtime Usage">
@ -321,7 +415,7 @@ export function RuntimeUsageSection({
        <DashboardEmptyState message="Loading usage data…" />
      ) : noData ? (
        <DashboardEmptyState message="No usage data yet. Usage appears after the first model call." />
-      ) : hasRuntimeData || providerRows.length > 0 ? (
+      ) : hasRuntimeData || providerRows.length > 0 || perGatewayUsage.length > 0 ? (
        <div className="space-y-3">
          {providerRows.length > 0 && (
            <div className="space-y-2 rounded-lg border border-[color:var(--border)] bg-[color:var(--surface-muted)] p-3">
@ -371,23 +465,76 @@ export function RuntimeUsageSection({
            </div>
          )}

+          {/* Per-account constraint rows — most exhausted first */}
+          {perGatewayUsage.length > 0 && (
+            <div className="rounded-lg border border-[color:var(--border)] bg-[color:var(--surface-muted)] p-3">
+              <p className="mb-2 text-[11px] font-semibold uppercase tracking-wider text-muted">
+                Per-account limits
+              </p>
+              <div className="space-y-2">
+                {perGatewayUsage.map((row) => {
+                  const pct = row.pctUsed ?? 0;
+                  const barTone =
+                    pct >= 90
+                      ? "bg-[color:var(--danger)]"
+                      : pct >= 75
+                        ? "bg-[color:var(--warning)]"
+                        : "bg-[color:var(--success)]";
+                  const dangerLabel =
+                    pct >= 90 ? "text-[color:var(--danger)]"
+                    : pct >= 75 ? "text-[color:var(--warning)]"
+                    : "text-muted";
+                  return (
+                    <div
+                      key={row.gatewayId}
+                      className="rounded-md border border-[color:var(--border)] bg-[color:var(--surface)] p-2"
+                    >
+                      <div className="mb-1 flex items-center justify-between gap-2">
+                        <p className="text-xs font-medium text-strong truncate">{row.gatewayLabel}</p>
+                        <div className="flex items-center gap-2 shrink-0">
+                          {row.timeToLimitMs !== null && (
+                            <span className={`text-[11px] tabular-nums ${!row.safe ? "text-[color:var(--danger)]" : "text-muted"}`}>
+                              {row.timeToLimitMs === 0 ? "at limit" : `${fmtMs(row.timeToLimitMs)} left`}
+                            </span>
+                          )}
+                          <span className={`text-[11px] tabular-nums ${dangerLabel}`}>
+                            {row.pctUsed === null ? "—" : `${Math.round(pct)}%`}
+                          </span>
+                        </div>
+                      </div>
+                      <div className="h-1.5 w-full overflow-hidden rounded-full bg-[color:var(--surface-strong)]">
+                        <div
+                          className={`h-full rounded-full transition-all ${barTone}`}
+                          style={{ width: `${Math.max(0, Math.min(100, pct))}%` }}
+                        />
+                      </div>
+                      <p className="mt-1 text-[11px] text-muted">
+                        {row.limitLabel} · resets in {fmtMs(row.resetInMs)} · {fmtCost(row.totalCostUsd)} spent
+                      </p>
+                    </div>
+                  );
+                })}
+              </div>
+            </div>
+          )}
+
          {usage && hasRuntimeData && (
            <div className="grid grid-cols-2 gap-2 sm:grid-cols-4">
              <StatCard
-                label="5h Spend"
+                label={showCombinedLabel ? "Combined spend" : "5h Spend"}
                value={fmtCost(usage.totalCostUsd)}
                sub={`${fmtTokens(usage.totalTokens)} tokens`}
                tone={usage.costLimitUsd && usage.totalCostUsd / usage.costLimitUsd > 0.8 ? "warning" : "default"}
                icon={<Zap className="h-3 w-3" />}
              />
              <StatCard
-                label="Reset In"
+                label={showCombinedLabel ? "Soonest reset" : "Reset In"}
                value={fmtMs(usage.resetInMs)}
                sub={usage.resetsAt ? new Date(usage.resetsAt).toLocaleTimeString([], { hour: "2-digit", minute: "2-digit" }) : undefined}
                icon={<Clock className="h-3 w-3" />}
              />
              <StatCard
-                label="Time to Limit"
+                label={showCombinedLabel ? "Combined limit" : "Time to Limit"}
                value={usage.timeToLimitMs === null ? "—" : usage.timeToLimitMs === 0 ? "At limit" : fmtMs(usage.timeToLimitMs)}
                sub={(() => {
                  if (usage.outputTokenLimit) {