Pipeline/backend/app/services/openclaw/admin_service.py

705 lines
26 KiB
Python
Raw Permalink Normal View History

"""Gateway admin lifecycle service."""
from __future__ import annotations
from abc import ABC, abstractmethod
2026-05-20 04:13:32 -05:00
import re
from typing import TYPE_CHECKING
from uuid import UUID
from fastapi import HTTPException, status
from sqlmodel import col
2026-05-20 04:13:32 -05:00
from sqlmodel import select
from app.core.auth import AuthContext
from app.core.logging import TRACE_LEVEL
from app.core.time import utcnow
from app.db import crud
from app.models.activity_events import ActivityEvent
from app.models.agents import Agent
from app.models.approvals import Approval
2026-05-20 04:13:32 -05:00
from app.models.boards import Board
from app.models.board_webhooks import BoardWebhook
from app.models.gateways import Gateway
from app.models.tasks import Task
2026-05-20 04:13:32 -05:00
from app.schemas.gateways import (
GatewayAgentImportCandidate,
GatewayAgentImportPreviewResponse,
GatewayAgentImportReconcileSummary,
GatewayAgentImportResponse,
GatewayAgentReconcileError,
GatewayTemplatesSyncResult,
)
from app.services.openclaw.constants import DEFAULT_HEARTBEAT_CONFIG
from app.services.openclaw.db_service import OpenClawDBService
from app.services.openclaw.error_messages import normalize_gateway_error_message
from app.services.openclaw.gateway_compat import check_gateway_version_compatibility
from app.services.openclaw.gateway_rpc import GatewayConfig as GatewayClientConfig
from app.services.openclaw.gateway_rpc import OpenClawGatewayError, openclaw_call
2026-05-20 04:13:32 -05:00
from app.services.openclaw.internal.agent_key import agent_key as resolve_agent_key
from app.services.openclaw.lifecycle_orchestrator import AgentLifecycleOrchestrator
from app.services.openclaw.provisioning_db import (
GatewayTemplateSyncOptions,
OpenClawProvisioningService,
)
from app.services.openclaw.session_service import GatewayTemplateSyncQuery
from app.services.openclaw.shared import GatewayAgentIdentity
if TYPE_CHECKING:
from sqlmodel.ext.asyncio.session import AsyncSession
from app.models.users import User
2026-05-20 04:13:32 -05:00
_LEAD_SESSION_KEY_RE = re.compile(
r"^agent:lead-(?P<board_id>[0-9a-fA-F-]{36}):main$"
)
def _as_dict_list(value: object) -> list[dict[str, object]]:
if isinstance(value, dict):
for key in ("agents", "sessions", "items", "data"):
nested = value.get(key)
if isinstance(nested, list):
return [item for item in nested if isinstance(item, dict)]
return []
if isinstance(value, list):
return [item for item in value if isinstance(item, dict)]
return []
def _derive_session_key_for_runtime_agent(
runtime_agent_id: str,
session_by_agent_key: dict[str, str],
) -> str:
existing = session_by_agent_key.get(runtime_agent_id)
if existing:
return existing
return f"agent:{runtime_agent_id}:main"
def _parse_lead_board_id(session_key: str) -> UUID | None:
matched = _LEAD_SESSION_KEY_RE.match(session_key)
if matched is None:
return None
board_id_raw = matched.group("board_id")
try:
return UUID(board_id_raw)
except ValueError:
return None
class AbstractGatewayMainAgentManager(ABC):
"""Abstract manager for gateway-main agent naming/profile behavior."""
@abstractmethod
def build_main_agent_name(self, gateway: Gateway) -> str:
raise NotImplementedError
@abstractmethod
def build_identity_profile(self) -> dict[str, str]:
raise NotImplementedError
class DefaultGatewayMainAgentManager(AbstractGatewayMainAgentManager):
"""Default naming/profile strategy for gateway-main agents."""
def build_main_agent_name(self, gateway: Gateway) -> str:
return f"{gateway.name} Gateway Agent"
def build_identity_profile(self) -> dict[str, str]:
return {
"role": "Gateway Agent",
"communication_style": "direct, concise, practical",
"emoji": ":compass:",
}
class GatewayAdminLifecycleService(OpenClawDBService):
"""Write-side gateway lifecycle service (CRUD, main agent, template sync)."""
def __init__(
self,
session: AsyncSession,
*,
main_agent_manager: AbstractGatewayMainAgentManager | None = None,
) -> None:
super().__init__(session)
self._main_agent_manager = main_agent_manager or DefaultGatewayMainAgentManager()
@property
def main_agent_manager(self) -> AbstractGatewayMainAgentManager:
return self._main_agent_manager
@main_agent_manager.setter
def main_agent_manager(self, value: AbstractGatewayMainAgentManager) -> None:
self._main_agent_manager = value
async def require_gateway(
self,
*,
gateway_id: UUID,
organization_id: UUID,
) -> Gateway:
gateway = (
await Gateway.objects.by_id(gateway_id)
.filter(col(Gateway.organization_id) == organization_id)
.first(self.session)
)
if gateway is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Gateway not found",
)
return gateway
async def find_main_agent(self, gateway: Gateway) -> Agent | None:
return (
await Agent.objects.filter_by(gateway_id=gateway.id)
.filter(col(Agent.board_id).is_(None))
.first(self.session)
)
async def upsert_main_agent_record(self, gateway: Gateway) -> tuple[Agent, bool]:
changed = False
session_key = GatewayAgentIdentity.session_key(gateway)
agent = await self.find_main_agent(gateway)
main_agent_name = self.main_agent_manager.build_main_agent_name(gateway)
identity_profile = self.main_agent_manager.build_identity_profile()
if agent is None:
agent = Agent(
name=main_agent_name,
status="provisioning",
board_id=None,
gateway_id=gateway.id,
is_board_lead=False,
openclaw_session_id=session_key,
heartbeat_config=DEFAULT_HEARTBEAT_CONFIG.copy(),
identity_profile=identity_profile,
)
self.session.add(agent)
changed = True
if agent.board_id is not None:
agent.board_id = None
changed = True
if agent.gateway_id != gateway.id:
agent.gateway_id = gateway.id
changed = True
if agent.is_board_lead:
agent.is_board_lead = False
changed = True
if agent.name != main_agent_name:
agent.name = main_agent_name
changed = True
if agent.openclaw_session_id != session_key:
agent.openclaw_session_id = session_key
changed = True
if agent.heartbeat_config is None:
agent.heartbeat_config = DEFAULT_HEARTBEAT_CONFIG.copy()
changed = True
if agent.identity_profile is None:
agent.identity_profile = identity_profile
changed = True
if not agent.status:
agent.status = "provisioning"
changed = True
if changed:
agent.updated_at = utcnow()
self.session.add(agent)
return agent, changed
async def gateway_has_main_agent_entry(self, gateway: Gateway) -> bool:
if not gateway.url:
return False
config = GatewayClientConfig(
url=gateway.url,
token=gateway.token,
allow_insecure_tls=gateway.allow_insecure_tls,
disable_device_pairing=gateway.disable_device_pairing,
)
target_id = GatewayAgentIdentity.openclaw_agent_id(gateway)
try:
await openclaw_call("agents.files.list", {"agentId": target_id}, config=config)
except OpenClawGatewayError as exc:
message = str(exc).lower()
if any(marker in message for marker in ("not found", "unknown agent", "no such agent")):
return False
return True
return True
async def assert_gateway_runtime_compatible(
self,
*,
url: str,
token: str | None,
allow_insecure_tls: bool = False,
disable_device_pairing: bool = False,
) -> None:
"""Validate that a gateway runtime meets minimum supported version."""
config = GatewayClientConfig(
url=url,
token=token,
allow_insecure_tls=allow_insecure_tls,
disable_device_pairing=disable_device_pairing,
)
try:
result = await check_gateway_version_compatibility(config)
except OpenClawGatewayError as exc:
detail = normalize_gateway_error_message(str(exc))
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"Gateway compatibility check failed: {detail}",
) from exc
if not result.compatible:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
detail=result.message or "Gateway runtime version is not supported.",
)
async def provision_main_agent_record(
self,
gateway: Gateway,
agent: Agent,
*,
user: User | None,
action: str,
notify: bool,
) -> Agent:
orchestrator = AgentLifecycleOrchestrator(self.session)
try:
provisioned = await orchestrator.run_lifecycle(
gateway=gateway,
agent_id=agent.id,
board=None,
user=user,
action=action,
auth_token=None,
force_bootstrap=False,
reset_session=False,
wake=notify,
deliver_wakeup=True,
wakeup_verb=None,
clear_confirm_token=False,
raise_gateway_errors=True,
)
except HTTPException:
self.logger.error(
"gateway.main_agent.provision_failed gateway_id=%s agent_id=%s action=%s",
gateway.id,
agent.id,
action,
)
raise
self.logger.info(
"gateway.main_agent.provision_success gateway_id=%s agent_id=%s action=%s",
gateway.id,
provisioned.id,
action,
)
return provisioned
async def ensure_main_agent(
self,
gateway: Gateway,
auth: AuthContext,
*,
action: str = "provision",
) -> Agent:
self.logger.log(
TRACE_LEVEL,
"gateway.main_agent.ensure.start gateway_id=%s action=%s",
gateway.id,
action,
)
agent, _ = await self.upsert_main_agent_record(gateway)
return await self.provision_main_agent_record(
gateway,
agent,
user=auth.user,
action=action,
notify=True,
)
async def ensure_gateway_agents_exist(self, gateways: list[Gateway]) -> None:
for gateway in gateways:
agent, gateway_changed = await self.upsert_main_agent_record(gateway)
has_gateway_entry = await self.gateway_has_main_agent_entry(gateway)
needs_provision = (
gateway_changed or not bool(agent.agent_token_hash) or not has_gateway_entry
)
if needs_provision:
await self.provision_main_agent_record(
gateway,
agent,
user=None,
action="provision",
notify=False,
)
2026-05-20 04:13:32 -05:00
async def preview_gateway_agent_import(
self,
*,
gateway: Gateway,
) -> GatewayAgentImportPreviewResponse:
"""Return runtime agents that can be imported into Pipeline metadata."""
config = GatewayClientConfig(
url=gateway.url,
token=gateway.token,
allow_insecure_tls=gateway.allow_insecure_tls,
disable_device_pairing=gateway.disable_device_pairing,
)
try:
runtime_agents_raw = await openclaw_call("agents.list", config=config)
except OpenClawGatewayError as exc:
detail = normalize_gateway_error_message(str(exc))
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"Gateway runtime query failed: {detail}",
) from exc
2026-05-21 00:13:31 -05:00
# sessions.list with any non-trivial limit blocks the gateway's Node.js event
# loop on large session stores, making the gateway unresponsive. The fallback
# key "agent:{id}:main" is the standard format used for every agent, so omitting
# the sessions lookup has no practical effect on matching accuracy.
2026-05-20 04:13:32 -05:00
runtime_agents = _as_dict_list(runtime_agents_raw)
session_by_agent_key: dict[str, str] = {}
existing_agents = await Agent.objects.filter_by(gateway_id=gateway.id).all(self.session)
existing_by_session_key = {
(agent.openclaw_session_id or "").strip(): agent
for agent in existing_agents
if (agent.openclaw_session_id or "").strip()
}
existing_by_runtime_key = {
resolve_agent_key(agent): agent
for agent in existing_agents
}
lead_board_ids: set[UUID] = set()
normalized_runtime_agents: list[tuple[str, str | None, str]] = []
for runtime_agent in runtime_agents:
runtime_id = runtime_agent.get("id")
if not isinstance(runtime_id, str) or not runtime_id.strip():
continue
runtime_key = runtime_id.strip()
runtime_name = runtime_agent.get("name")
normalized_name = runtime_name.strip() if isinstance(runtime_name, str) else None
session_key = _derive_session_key_for_runtime_agent(runtime_key, session_by_agent_key)
lead_board_id = _parse_lead_board_id(session_key)
if lead_board_id is not None:
lead_board_ids.add(lead_board_id)
normalized_runtime_agents.append((runtime_key, normalized_name, session_key))
board_by_id: dict[UUID, Board] = {}
if lead_board_ids:
board_rows = (
await self.session.exec(
select(Board).where(
col(Board.id).in_(lead_board_ids),
col(Board.organization_id) == gateway.organization_id,
col(Board.gateway_id) == gateway.id,
)
)
).all()
board_by_id = {board.id: board for board in board_rows}
candidates: list[GatewayAgentImportCandidate] = []
for runtime_id, runtime_name, session_key in normalized_runtime_agents:
tracked_agent = existing_by_session_key.get(session_key)
if tracked_agent is None:
tracked_agent = existing_by_runtime_key.get(runtime_id)
lead_board_id = _parse_lead_board_id(session_key)
inferred_board_id = (
lead_board_id if lead_board_id is not None and lead_board_id in board_by_id else None
)
inferred_is_board_lead = inferred_board_id is not None
candidates.append(
GatewayAgentImportCandidate(
gateway_agent_id=runtime_id,
gateway_agent_name=runtime_name,
session_key=session_key,
existing_agent_id=tracked_agent.id if tracked_agent is not None else None,
importable=tracked_agent is None,
inferred_board_id=inferred_board_id,
inferred_is_board_lead=inferred_is_board_lead,
)
)
candidates.sort(key=lambda item: item.gateway_agent_id.lower())
discovered_count = len(candidates)
importable_count = sum(1 for item in candidates if item.importable)
already_tracked_count = discovered_count - importable_count
return GatewayAgentImportPreviewResponse(
gateway_id=gateway.id,
discovered_count=discovered_count,
importable_count=importable_count,
already_tracked_count=already_tracked_count,
candidates=candidates,
)
async def import_gateway_agents(
self,
*,
gateway: Gateway,
gateway_agent_ids: list[str],
reconcile_after_import: bool = False,
rotate_tokens: bool = True,
reset_sessions: bool = True,
force_bootstrap: bool = True,
) -> GatewayAgentImportResponse:
"""Import selected runtime agents as Pipeline agent rows."""
preview = await self.preview_gateway_agent_import(gateway=gateway)
candidates_by_runtime_id = {
item.gateway_agent_id: item
for item in preview.candidates
}
seen: set[str] = set()
deduped_ids: list[str] = []
for runtime_id in gateway_agent_ids:
cleaned = runtime_id.strip()
if not cleaned or cleaned in seen:
continue
seen.add(cleaned)
deduped_ids.append(cleaned)
imported_agent_ids: list[UUID] = []
skipped_gateway_agent_ids: list[str] = []
for runtime_id in deduped_ids:
candidate = candidates_by_runtime_id.get(runtime_id)
if candidate is None or not candidate.importable:
skipped_gateway_agent_ids.append(runtime_id)
continue
imported = Agent(
name=candidate.gateway_agent_name or f"Imported {runtime_id}",
board_id=candidate.inferred_board_id,
gateway_id=gateway.id,
status="offline",
heartbeat_config=DEFAULT_HEARTBEAT_CONFIG.copy(),
openclaw_session_id=candidate.session_key,
is_board_lead=candidate.inferred_is_board_lead,
)
self.session.add(imported)
await self.session.flush()
imported_agent_ids.append(imported.id)
await self.session.commit()
reconcile_summary: GatewayAgentImportReconcileSummary | None = None
if reconcile_after_import and imported_agent_ids:
reconcile_summary = await self.reconcile_imported_agents(
gateway=gateway,
agent_ids=imported_agent_ids,
rotate_tokens=rotate_tokens,
reset_sessions=reset_sessions,
force_bootstrap=force_bootstrap,
)
return GatewayAgentImportResponse(
gateway_id=gateway.id,
imported_count=len(imported_agent_ids),
skipped_count=len(skipped_gateway_agent_ids),
imported_agent_ids=imported_agent_ids,
skipped_gateway_agent_ids=skipped_gateway_agent_ids,
reconcile=reconcile_summary,
)
async def reconcile_imported_agents(
self,
*,
gateway: Gateway,
agent_ids: list[UUID],
rotate_tokens: bool,
reset_sessions: bool,
force_bootstrap: bool,
) -> GatewayAgentImportReconcileSummary:
"""Run post-import lifecycle reconcile for imported agent rows."""
if not agent_ids:
return GatewayAgentImportReconcileSummary(
attempted=0,
updated=0,
skipped=0,
errors=[],
)
agents = (
await self.session.exec(
select(Agent).where(
col(Agent.id).in_(agent_ids),
col(Agent.gateway_id) == gateway.id,
)
)
).all()
boards_by_id: dict[UUID, Board] = {}
board_ids = [agent.board_id for agent in agents if agent.board_id is not None]
if board_ids:
board_rows = (
await self.session.exec(
select(Board).where(
col(Board.id).in_(board_ids),
col(Board.gateway_id) == gateway.id,
col(Board.organization_id) == gateway.organization_id,
)
)
).all()
boards_by_id = {board.id: board for board in board_rows}
updated = 0
skipped = 0
errors: list[GatewayAgentReconcileError] = []
main_session_key = GatewayAgentIdentity.session_key(gateway)
for agent in agents:
board: Board | None = None
if agent.board_id is not None:
board = boards_by_id.get(agent.board_id)
if board is None:
skipped += 1
errors.append(
GatewayAgentReconcileError(
agent_id=agent.id,
message="Skipping reconcile: board is missing or not mapped to gateway.",
)
)
continue
elif (agent.openclaw_session_id or "").strip() != main_session_key:
skipped += 1
errors.append(
GatewayAgentReconcileError(
agent_id=agent.id,
message=(
"Skipping reconcile: boardless imported agent is not the gateway-main "
"session and cannot be reprovisioned automatically."
),
)
)
continue
if not rotate_tokens:
skipped += 1
errors.append(
GatewayAgentReconcileError(
agent_id=agent.id,
message=(
"Skipping reconcile: rotate_tokens=false is unsupported for imported "
"agents without an existing backend token hash."
),
)
)
continue
try:
await AgentLifecycleOrchestrator(self.session).run_lifecycle(
gateway=gateway,
agent_id=agent.id,
board=board,
user=None,
action="update",
auth_token=None,
force_bootstrap=force_bootstrap,
reset_session=reset_sessions,
wake=False,
deliver_wakeup=False,
wakeup_verb="updated",
clear_confirm_token=False,
raise_gateway_errors=True,
)
except HTTPException as exc:
skipped += 1
message = exc.detail if isinstance(exc.detail, str) else str(exc.detail)
errors.append(
GatewayAgentReconcileError(
agent_id=agent.id,
message=f"Reconcile failed: {message}",
)
)
except Exception as exc:
skipped += 1
errors.append(
GatewayAgentReconcileError(
agent_id=agent.id,
message=f"Reconcile failed: {exc}",
)
)
else:
updated += 1
return GatewayAgentImportReconcileSummary(
attempted=len(agents),
updated=updated,
skipped=skipped,
errors=errors,
)
async def clear_agent_foreign_keys(self, *, agent_id: UUID) -> None:
now = utcnow()
await crud.update_where(
self.session,
Task,
col(Task.assigned_agent_id) == agent_id,
col(Task.status) == "in_progress",
assigned_agent_id=None,
status="inbox",
in_progress_at=None,
updated_at=now,
commit=False,
)
await crud.update_where(
self.session,
Task,
col(Task.assigned_agent_id) == agent_id,
col(Task.status) != "in_progress",
assigned_agent_id=None,
updated_at=now,
commit=False,
)
await crud.update_where(
self.session,
ActivityEvent,
col(ActivityEvent.agent_id) == agent_id,
agent_id=None,
commit=False,
)
await crud.update_where(
self.session,
Approval,
col(Approval.agent_id) == agent_id,
agent_id=None,
commit=False,
)
await crud.update_where(
self.session,
BoardWebhook,
col(BoardWebhook.agent_id) == agent_id,
agent_id=None,
updated_at=now,
commit=False,
)
async def sync_templates(
self,
gateway: Gateway,
*,
query: GatewayTemplateSyncQuery,
auth: AuthContext,
) -> GatewayTemplatesSyncResult:
self.logger.log(
TRACE_LEVEL,
"gateway.templates.sync.start gateway_id=%s include_main=%s",
gateway.id,
query.include_main,
)
await self.ensure_gateway_agents_exist([gateway])
result = await OpenClawProvisioningService(self.session).sync_gateway_templates(
gateway,
GatewayTemplateSyncOptions(
user=auth.user,
include_main=query.include_main,
lead_only=query.lead_only,
reset_sessions=query.reset_sessions,
rotate_tokens=query.rotate_tokens,
force_bootstrap=query.force_bootstrap,
overwrite=query.overwrite,
board_id=query.board_id,
),
)
self.logger.info("gateway.templates.sync.success gateway_id=%s", gateway.id)
return result