Closer/seed/fix_depth5_grammar.py

#!/usr/bin/env python3
"""
Q5 / Q2 — fix grammar errors in depth-5 (and a handful of depth-4) questions.

Three families of template-generated errors:

  1. emotional_intimacy — "When does [plural/compound noun] affect..."
     Subject-verb disagreement: "does" should be "do".
     Also: "your deeper fears" (awkward) → "your deep fears";
           "emotional needs you hide" (missing article) → "the emotional needs you hide";
           "the parts of you you protect" (double-you) → "the parts of yourself you protect".

  2. stress_080 – stress_150 (every 5th) — "after [topic] affects the way we treat each other"
     Reads as machine-made and wordy: "the way" is filler. Fixed to
     "when [topic] affects how we treat each other".

Edits BOTH the source JSON and the shipped asset DB (data only — Room hash untouched).
build_db.py is NOT run.  One-off migration kept in repo for traceability.
"""
import json
import os
import sqlite3

HERE = os.path.dirname(os.path.abspath(__file__))
DB_PATH = os.path.join(HERE, "..", "app", "src", "main", "assets", "database", "app.db")
EI_JSON = os.path.join(HERE, "questions", "emotional_intimacy.json")
ST_JSON = os.path.join(HERE, "questions", "stress.json")

# ---------------------------------------------------------------------------
# 1. Exact rewrites keyed by question id
# ---------------------------------------------------------------------------
REWRITES = {
    # --- deeper fears block (076 d4 + 077-080 d5) ---
    "emotional_intimacy_076": "What do you wish I understood about your deep fears?",
    "emotional_intimacy_077": "When do your deep fears affect how close you feel to me?",
    "emotional_intimacy_078": "What would help you share more honestly about your deep fears?",
    "emotional_intimacy_079": "How can I respond to your deep fears in a way that feels safe instead of fixing you?",
    "emotional_intimacy_080": "What is one boundary or reassurance that would help with your deep fears?",

    # --- emotional needs you hide block (081 d4 + 082-085 d5) ---
    "emotional_intimacy_081": "What do you wish I understood about the emotional needs you hide?",
    "emotional_intimacy_082": "When do the emotional needs you hide affect how close you feel to me?",
    "emotional_intimacy_083": "What would help you share more honestly about the emotional needs you hide?",
    "emotional_intimacy_084": "How can I respond to the emotional needs you hide in a way that feels safe instead of fixing you?",
    "emotional_intimacy_085": "What is one boundary or reassurance that would help with the emotional needs you hide?",

    # --- moments you feel unseen (087 d5 only — does→do + article) ---
    "emotional_intimacy_087": "When do the moments you feel unseen affect how close you feel to me?",

    # --- childhood patterns / shame (092, 097 d5 — does→do) ---
    "emotional_intimacy_092": "When do childhood patterns affect how close you feel to me?",
    "emotional_intimacy_097": "When do shame and tenderness affect how close you feel to me?",

    # --- parts of yourself block (146 d4 + 147-150 d5) ---
    "emotional_intimacy_146": "What do you wish I understood about the parts of yourself you protect?",
    "emotional_intimacy_147": "When do the parts of yourself you protect affect how close you feel to me?",
    "emotional_intimacy_148": "What would help you share more honestly about the parts of yourself you protect?",
    "emotional_intimacy_149": "How can I respond to the parts of yourself you protect in a way that feels safe instead of fixing you?",
    "emotional_intimacy_150": "What is one boundary or reassurance that would help with the parts of yourself you protect?",
}

# Stress block: "after [topic] affects the way we treat each other"
# → "when [topic] affects how we treat each other"
STRESS_SUFFIX_OLD = " affects the way we treat each other?"
STRESS_SUFFIX_NEW = " affects how we treat each other?"
STRESS_PREFIX_OLD = "What repair do we need after "
STRESS_PREFIX_NEW = "What repair do we need when "

STRESS_IDS = [f"stress_{n:03d}" for n in range(80, 155, 5)]


# ---------------------------------------------------------------------------
# helpers
# ---------------------------------------------------------------------------

def fix_stress_text(text: str) -> str | None:
    if text.startswith(STRESS_PREFIX_OLD) and text.endswith(STRESS_SUFFIX_OLD):
        topic = text[len(STRESS_PREFIX_OLD) : -len(STRESS_SUFFIX_OLD)]
        return f"{STRESS_PREFIX_NEW}{topic}{STRESS_SUFFIX_NEW}"
    return None


# ---------------------------------------------------------------------------
# JSON pass
# ---------------------------------------------------------------------------

def migrate_json(json_path: str, rewrites: dict, stress_ids: list[str]) -> int:
    with open(json_path) as f:
        data = json.load(f)
    changed = 0
    for q in data.get("questions", []):
        qid = q["id"]
        if qid in rewrites:
            q["text"] = rewrites[qid]
            changed += 1
        if qid in stress_ids:
            fixed = fix_stress_text(q["text"])
            if fixed:
                q["text"] = fixed
                changed += 1
    if changed:
        with open(json_path, "w") as f:
            json.dump(data, f, indent=2, ensure_ascii=False)
            f.write("\n")
    return changed


# ---------------------------------------------------------------------------
# DB pass
# ---------------------------------------------------------------------------

def migrate_db(rewrites: dict, stress_ids: list[str]) -> int:
    con = sqlite3.connect(DB_PATH)
    cur = con.cursor()
    changed = 0

    for qid, new_text in rewrites.items():
        cur.execute("UPDATE question SET text=? WHERE id=?", (new_text, qid))
        if cur.rowcount:
            changed += 1

    for qid in stress_ids:
        row = cur.execute("SELECT text FROM question WHERE id=?", (qid,)).fetchone()
        if row:
            fixed = fix_stress_text(row[0])
            if fixed:
                cur.execute("UPDATE question SET text=? WHERE id=?", (fixed, qid))
                if cur.rowcount:
                    changed += 1

    con.commit()
    con.close()
    return changed


if __name__ == "__main__":
    n = migrate_json(EI_JSON, REWRITES, [])
    print(f"JSON emotional_intimacy: {n} fixed")
    n = migrate_json(ST_JSON, {}, STRESS_IDS)
    print(f"JSON stress:             {n} fixed")
    n = migrate_db(REWRITES, STRESS_IDS)
    print(f"DB total:                {n} fixed")

    # Quick sanity check: verify no original bad text remains
    con = sqlite3.connect(DB_PATH)
    cur = con.cursor()
    h = cur.execute("SELECT identity_hash FROM room_master_table").fetchone()[0]
    bad = cur.execute(
        "SELECT id FROM question WHERE text LIKE '%When does%affect%' OR text LIKE '%needs you hide%' OR text LIKE '%of you you protect%' OR text LIKE '%deeper fears%'"
    ).fetchall()
    con.close()
    print(f"Room hash: {h}")
    if bad:
        print(f"WARNING — still-bad rows: {[r[0] for r in bad]}")
    else:
        print("Verified: 0 bad rows remaining")