feat: db migration script
This commit is contained in:
parent
6a5669a42e
commit
58014b286d
|
|
@ -59,6 +59,59 @@ def run_migrations() -> None:
|
|||
logger.info("Running database migrations.")
|
||||
command.upgrade(_alembic_config(), "head")
|
||||
logger.info("Database migrations complete.")
|
||||
_warn_on_schema_drift()
|
||||
|
||||
|
||||
def _warn_on_schema_drift() -> None:
|
||||
"""Log an error-level warning if the live schema is missing model columns.
|
||||
|
||||
This catches the case where Alembic's version table reports 'head' but
|
||||
columns were never actually applied (e.g. a migration was inserted into the
|
||||
chain after the DB had already advanced past it). The service continues to
|
||||
start — the warning is intentionally loud so it surfaces in logs immediately.
|
||||
"""
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy import inspect as sa_inspect
|
||||
|
||||
sync_url = (
|
||||
settings.database_url
|
||||
.replace("postgresql+asyncpg://", "postgresql+psycopg://")
|
||||
.replace("postgresql://", "postgresql+psycopg://")
|
||||
.replace("postgres://", "postgresql+psycopg://")
|
||||
)
|
||||
# _normalize_database_url already adds +psycopg; strip any double-prefix.
|
||||
if "postgresql+psycopg+psycopg" in sync_url:
|
||||
sync_url = sync_url.replace("postgresql+psycopg+psycopg", "postgresql+psycopg")
|
||||
|
||||
try:
|
||||
engine = create_engine(sync_url, pool_pre_ping=True)
|
||||
inspector = sa_inspect(engine)
|
||||
except Exception as exc:
|
||||
logger.error("schema_drift_check_failed", error=str(exc))
|
||||
engine.dispose() if "engine" in dir() else None # type: ignore[name-defined]
|
||||
return
|
||||
|
||||
missing: list[str] = []
|
||||
try:
|
||||
for table_name, table in SQLModel.metadata.tables.items():
|
||||
if not inspector.has_table(table_name):
|
||||
missing.append(f"TABLE {table_name}")
|
||||
continue
|
||||
db_cols = {col["name"] for col in inspector.get_columns(table_name)}
|
||||
for col in table.columns:
|
||||
if col.name not in db_cols:
|
||||
missing.append(f"COLUMN {table_name}.{col.name}")
|
||||
except Exception as exc:
|
||||
logger.error("schema_drift_check_failed", error=str(exc))
|
||||
finally:
|
||||
engine.dispose()
|
||||
|
||||
if missing:
|
||||
logger.error(
|
||||
"schema_drift_detected",
|
||||
missing=missing,
|
||||
hint="DB schema does not match models. Run scripts/check_schema.py for details.",
|
||||
)
|
||||
|
||||
|
||||
async def init_db() -> None:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,121 @@
|
|||
"""check_schema.py — verify the live DB schema matches SQLModel model definitions.
|
||||
|
||||
Connects to the database, inspects every table and column defined in the
|
||||
SQLModel metadata, and reports anything that is absent in the real schema.
|
||||
Exits 1 if drift is found, 0 if clean.
|
||||
|
||||
Usage:
|
||||
python scripts/check_schema.py
|
||||
DATABASE_URL=postgresql+psycopg://... python scripts/check_schema.py
|
||||
|
||||
This catches the class of problem where Alembic's version table says the DB is
|
||||
up-to-date but columns were never actually created (e.g. a migration was
|
||||
inserted into the chain after the DB had already advanced past it).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Make the app package importable when run directly.
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
|
||||
|
||||
|
||||
def _resolve_database_url() -> str | None:
|
||||
url = os.getenv("DATABASE_URL")
|
||||
if url:
|
||||
return url
|
||||
env_file = Path(__file__).resolve().parents[1] / ".env"
|
||||
if env_file.exists():
|
||||
for line in env_file.read_text().splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith("DATABASE_URL=") and not line.startswith("#"):
|
||||
return line.split("=", 1)[1].strip()
|
||||
return None
|
||||
|
||||
|
||||
def _to_sync_url(url: str) -> str:
|
||||
"""Convert async driver URL to sync equivalent for inspection."""
|
||||
replacements = [
|
||||
("postgresql+asyncpg://", "postgresql+psycopg://"),
|
||||
("postgresql+aiosqlite://", "sqlite:///"),
|
||||
("postgresql://", "postgresql+psycopg://"),
|
||||
("postgres://", "postgresql+psycopg://"),
|
||||
]
|
||||
for old, new in replacements:
|
||||
if url.startswith(old):
|
||||
return new + url[len(old):]
|
||||
return url
|
||||
|
||||
|
||||
def main() -> int:
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy import inspect as sa_inspect
|
||||
from sqlmodel import SQLModel
|
||||
|
||||
# Import all models so SQLModel.metadata is fully populated.
|
||||
import app.models as _ # noqa: F401
|
||||
|
||||
database_url = _resolve_database_url()
|
||||
if not database_url:
|
||||
print("ERROR: DATABASE_URL not set and not found in .env")
|
||||
return 1
|
||||
|
||||
sync_url = _to_sync_url(database_url)
|
||||
|
||||
try:
|
||||
engine = create_engine(sync_url, pool_pre_ping=True)
|
||||
inspector = sa_inspect(engine)
|
||||
except Exception as exc:
|
||||
print(f"ERROR: could not connect to database: {exc}")
|
||||
return 1
|
||||
|
||||
missing_tables: list[str] = []
|
||||
missing_columns: list[str] = []
|
||||
|
||||
try:
|
||||
for table_name, table in SQLModel.metadata.tables.items():
|
||||
if not inspector.has_table(table_name):
|
||||
missing_tables.append(table_name)
|
||||
continue
|
||||
|
||||
db_columns = {col["name"] for col in inspector.get_columns(table_name)}
|
||||
model_columns = {col.name for col in table.columns}
|
||||
for col in sorted(model_columns - db_columns):
|
||||
missing_columns.append(f"{table_name}.{col}")
|
||||
except Exception as exc:
|
||||
print(f"ERROR: failed to inspect schema: {exc}")
|
||||
return 1
|
||||
finally:
|
||||
engine.dispose()
|
||||
|
||||
if not missing_tables and not missing_columns:
|
||||
model_table_count = len(SQLModel.metadata.tables)
|
||||
print(f"OK: all {model_table_count} model tables and their columns exist in the database")
|
||||
return 0
|
||||
|
||||
print("SCHEMA DRIFT DETECTED\n")
|
||||
if missing_tables:
|
||||
print("Missing tables (exist in models, not in DB):")
|
||||
for t in sorted(missing_tables):
|
||||
print(f" - {t}")
|
||||
print()
|
||||
if missing_columns:
|
||||
print("Missing columns (exist in models, not in DB):")
|
||||
for c in missing_columns:
|
||||
print(f" - {c}")
|
||||
print()
|
||||
print("Possible causes:")
|
||||
print(" 1. A migration was inserted into the chain after the DB had already")
|
||||
print(" advanced past it — run the migration manually or add the columns.")
|
||||
print(" 2. A new model field has no migration yet — run:")
|
||||
print(" alembic revision --autogenerate -m 'add <field>'")
|
||||
print(" 3. The DB was populated from an older schema — run:")
|
||||
print(" alembic upgrade head")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Loading…
Reference in New Issue