Closer/seed/fix_question_quality_q2.py

163 lines
6.7 KiB
Python

#!/usr/bin/env python3
"""
Q2 - fix remaining template grammar errors and abstract filler.
Edits BOTH the source JSON files and the shipped asset DB (data only, schema
untouched). build_db.py is NOT run. One-off migration kept for traceability.
"""
from __future__ import annotations
import json
import sqlite3
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
QUESTIONS_DIR = ROOT / "seed" / "questions"
DB_PATH = ROOT / "app" / "src" / "main" / "assets" / "database" / "app.db"
REWRITES = {
# Emotional intimacy: replace placeholder-ish phrasing with more natural language.
"emotional_intimacy_076": "What would help me understand the fears you carry privately?",
"emotional_intimacy_077": "When do those private fears make closeness feel harder?",
"emotional_intimacy_078": "What would help you share those private fears more honestly?",
"emotional_intimacy_079": "How can I respond to those fears in a way that feels safe instead of fixing you?",
"emotional_intimacy_080": "What reassurance would help when those fears come up?",
"emotional_intimacy_081": "What do you want me to notice about needs you usually keep private?",
"emotional_intimacy_082": "When do private emotional needs make closeness feel harder?",
"emotional_intimacy_083": "What would help you name those needs more honestly?",
"emotional_intimacy_084": "How can I respond to those private needs in a way that feels safe instead of fixing you?",
"emotional_intimacy_085": "What reassurance would help when those private needs come up?",
# Subject/verb agreement.
"parenting_093": "How do in-laws and family expectations affect how close you feel to me?",
"parenting_138": "How does needing extra support affect how close you feel to me?",
"physical_intimacy_013": "When do warm greetings help you feel connected to me?",
"stress_121": "When do coping habits make you feel distant from me?",
"stress_141": "When do stress recovery routines make you feel distant from me?",
# Stress prompts: replace abstract or plural + singular generated wording.
"stress_020": "What should we avoid doing when asking for help feels hard?",
"stress_025": "What should we avoid doing when low-energy days pile up?",
"stress_035": "What should we avoid doing when unexpected problems pile up?",
"stress_045": "What should we avoid doing when busy weeks feel intense?",
"stress_050": "What should we avoid doing when one of us needs support?",
"stress_065": "What should we avoid doing when health worries feel heavy?",
"stress_070": "What should we avoid doing when burnout signs are showing?",
"stress_075": "What should we avoid doing when we are resetting after stress?",
"stress_154": "Which kind of help can feel like too much when asking for help is hard?",
"stress_157": "When unexpected problems pile up, what usually helps first?",
"stress_159": "How should we handle a serious talk when busy weeks feel intense?",
"stress_160": "Which kind of support can feel like too much when you need to feel supported?",
"stress_163": "When health worries feel heavy, what usually helps first?",
"stress_165": "How should we handle a serious talk when we are resetting after stress?",
"stress_177": "How should we handle a serious talk when one of us feels alone in stress?",
"stress_197": "What should we protect when unexpected problems pile up?",
"stress_200": "Which supports help when one of us needs to feel supported?",
"stress_204": "Which supports help when burnout signs are showing?",
"stress_205": "What should we protect when we are resetting after stress?",
"stress_223": "How well do we talk when health worries feel heavy?",
"stress_239": "When asking for help feels hard, what helps more?",
"stress_242": "When unexpected problems pile up, what helps more?",
"stress_245": "When one of us needs support, what helps more?",
"stress_248": "When health worries feel heavy, what helps more?",
}
BAD_SNIPPETS = (
"deeper fears",
"deep fears",
"emotional needs you hide",
"needs you hide",
"of you you protect",
"does in-laws",
"does special needs",
"does warm greetings",
"does coping habits",
"does stress recovery routines",
"asking for help is high",
"low energy days is high",
"unexpected problems is high",
"busy weeks is high",
"feeling supported is high",
"feeling alone in stress is high",
"health worries is high",
"burnout signs is high",
"resetting after stress is high",
"asking for help is present",
"feeling supported is present",
"unexpected problems shows up",
"health worries shows up",
)
def migrate_json() -> int:
changed = 0
remaining_ids = set(REWRITES)
for path in sorted(QUESTIONS_DIR.glob("*.json")):
with path.open() as f:
data = json.load(f)
file_changed = False
for q in data.get("questions", []):
qid = q.get("id")
if qid in REWRITES:
q["text"] = REWRITES[qid]
remaining_ids.discard(qid)
changed += 1
file_changed = True
if file_changed:
with path.open("w") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
f.write("\n")
if remaining_ids:
raise RuntimeError(f"Missing JSON ids: {sorted(remaining_ids)}")
return changed
def migrate_db() -> int:
con = sqlite3.connect(DB_PATH)
try:
cur = con.cursor()
changed = 0
for qid, text in sorted(REWRITES.items()):
cur.execute("UPDATE question SET text=? WHERE id=?", (text, qid))
changed += cur.rowcount
con.commit()
return changed
finally:
con.close()
def bad_rows() -> list[tuple[str, str]]:
where = " OR ".join("LOWER(text) LIKE ?" for _ in BAD_SNIPPETS)
params = [f"%{snippet.lower()}%" for snippet in BAD_SNIPPETS]
con = sqlite3.connect(DB_PATH)
try:
return con.execute(
f"SELECT id, text FROM question WHERE status='active' AND ({where}) ORDER BY id",
params,
).fetchall()
finally:
con.close()
if __name__ == "__main__":
json_changed = migrate_json()
db_changed = migrate_db()
con = sqlite3.connect(DB_PATH)
try:
room_hash = con.execute("SELECT identity_hash FROM room_master_table").fetchone()[0]
finally:
con.close()
bad = bad_rows()
print(f"JSON rows changed: {json_changed}")
print(f"DB rows changed: {db_changed}")
print(f"Room hash: {room_hash}")
if bad:
print(f"WARNING - still-bad rows: {bad}")
else:
print("Verified: 0 Q2 bad snippets remaining")