feat: db migration script

This commit is contained in:
null 2026-05-20 02:49:21 -05:00
parent 6a5669a42e
commit 58014b286d
2 changed files with 174 additions and 0 deletions

View File

@ -59,6 +59,59 @@ def run_migrations() -> None:
logger.info("Running database migrations.")
command.upgrade(_alembic_config(), "head")
logger.info("Database migrations complete.")
_warn_on_schema_drift()
def _warn_on_schema_drift() -> None:
"""Log an error-level warning if the live schema is missing model columns.
This catches the case where Alembic's version table reports 'head' but
columns were never actually applied (e.g. a migration was inserted into the
chain after the DB had already advanced past it). The service continues to
start the warning is intentionally loud so it surfaces in logs immediately.
"""
from sqlalchemy import create_engine
from sqlalchemy import inspect as sa_inspect
sync_url = (
settings.database_url
.replace("postgresql+asyncpg://", "postgresql+psycopg://")
.replace("postgresql://", "postgresql+psycopg://")
.replace("postgres://", "postgresql+psycopg://")
)
# _normalize_database_url already adds +psycopg; strip any double-prefix.
if "postgresql+psycopg+psycopg" in sync_url:
sync_url = sync_url.replace("postgresql+psycopg+psycopg", "postgresql+psycopg")
try:
engine = create_engine(sync_url, pool_pre_ping=True)
inspector = sa_inspect(engine)
except Exception as exc:
logger.error("schema_drift_check_failed", error=str(exc))
engine.dispose() if "engine" in dir() else None # type: ignore[name-defined]
return
missing: list[str] = []
try:
for table_name, table in SQLModel.metadata.tables.items():
if not inspector.has_table(table_name):
missing.append(f"TABLE {table_name}")
continue
db_cols = {col["name"] for col in inspector.get_columns(table_name)}
for col in table.columns:
if col.name not in db_cols:
missing.append(f"COLUMN {table_name}.{col.name}")
except Exception as exc:
logger.error("schema_drift_check_failed", error=str(exc))
finally:
engine.dispose()
if missing:
logger.error(
"schema_drift_detected",
missing=missing,
hint="DB schema does not match models. Run scripts/check_schema.py for details.",
)
async def init_db() -> None:

View File

@ -0,0 +1,121 @@
"""check_schema.py — verify the live DB schema matches SQLModel model definitions.
Connects to the database, inspects every table and column defined in the
SQLModel metadata, and reports anything that is absent in the real schema.
Exits 1 if drift is found, 0 if clean.
Usage:
python scripts/check_schema.py
DATABASE_URL=postgresql+psycopg://... python scripts/check_schema.py
This catches the class of problem where Alembic's version table says the DB is
up-to-date but columns were never actually created (e.g. a migration was
inserted into the chain after the DB had already advanced past it).
"""
from __future__ import annotations
import os
import sys
from pathlib import Path
# Make the app package importable when run directly.
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
def _resolve_database_url() -> str | None:
url = os.getenv("DATABASE_URL")
if url:
return url
env_file = Path(__file__).resolve().parents[1] / ".env"
if env_file.exists():
for line in env_file.read_text().splitlines():
line = line.strip()
if line.startswith("DATABASE_URL=") and not line.startswith("#"):
return line.split("=", 1)[1].strip()
return None
def _to_sync_url(url: str) -> str:
"""Convert async driver URL to sync equivalent for inspection."""
replacements = [
("postgresql+asyncpg://", "postgresql+psycopg://"),
("postgresql+aiosqlite://", "sqlite:///"),
("postgresql://", "postgresql+psycopg://"),
("postgres://", "postgresql+psycopg://"),
]
for old, new in replacements:
if url.startswith(old):
return new + url[len(old):]
return url
def main() -> int:
from sqlalchemy import create_engine
from sqlalchemy import inspect as sa_inspect
from sqlmodel import SQLModel
# Import all models so SQLModel.metadata is fully populated.
import app.models as _ # noqa: F401
database_url = _resolve_database_url()
if not database_url:
print("ERROR: DATABASE_URL not set and not found in .env")
return 1
sync_url = _to_sync_url(database_url)
try:
engine = create_engine(sync_url, pool_pre_ping=True)
inspector = sa_inspect(engine)
except Exception as exc:
print(f"ERROR: could not connect to database: {exc}")
return 1
missing_tables: list[str] = []
missing_columns: list[str] = []
try:
for table_name, table in SQLModel.metadata.tables.items():
if not inspector.has_table(table_name):
missing_tables.append(table_name)
continue
db_columns = {col["name"] for col in inspector.get_columns(table_name)}
model_columns = {col.name for col in table.columns}
for col in sorted(model_columns - db_columns):
missing_columns.append(f"{table_name}.{col}")
except Exception as exc:
print(f"ERROR: failed to inspect schema: {exc}")
return 1
finally:
engine.dispose()
if not missing_tables and not missing_columns:
model_table_count = len(SQLModel.metadata.tables)
print(f"OK: all {model_table_count} model tables and their columns exist in the database")
return 0
print("SCHEMA DRIFT DETECTED\n")
if missing_tables:
print("Missing tables (exist in models, not in DB):")
for t in sorted(missing_tables):
print(f" - {t}")
print()
if missing_columns:
print("Missing columns (exist in models, not in DB):")
for c in missing_columns:
print(f" - {c}")
print()
print("Possible causes:")
print(" 1. A migration was inserted into the chain after the DB had already")
print(" advanced past it — run the migration manually or add the columns.")
print(" 2. A new model field has no migration yet — run:")
print(" alembic revision --autogenerate -m 'add <field>'")
print(" 3. The DB was populated from an older schema — run:")
print(" alembic upgrade head")
return 1
if __name__ == "__main__":
raise SystemExit(main())