'use strict'; // Security note: xlsx (SheetJS Community Edition) has known prototype-pollution // and ReDoS CVEs (no OSS fix available as of 2026). Mitigations applied here: // 1. cellFormula: false — never parse/execute formulas // 2. cellHTML: false — never parse HTML markup // 3. 10 MB file-size cap (enforced by caller via express.raw limit) // 4. XLSX magic-bytes check before parsing // 5. Endpoint requires authenticated session; no anonymous uploads // 6. All cells treated as plain string data; no formula result access const xlsx = require('xlsx'); const crypto = require('crypto'); const { getDb, ensureUserDefaultCategories } = require('../db/database'); // ─── Constants ──────────────────────────────────────────────────────────────── const SESSION_TTL_MS = 24 * 60 * 60 * 1000; // 24 h const MAX_ROWS = 5_000; const LABEL_PATTERNS = { autopay: /\bauto(?:pay)?\b/i, past_due: /\bpast\s*due\b/i, double_pay: /\bdouble\s*pay\b/i, skipped: /\b(?:skip(?:ped)?|n\/?a|not\s*applicable)\b/i, }; const HEADER_PATTERNS = { bill_name: /^(?:bill|name|bill\s*name|description|payee|vendor|service)$/i, amount: /^(?:amount|amt|expected|expected\s*amount|cost|price|payment|paid|value)$/i, due_date: /^(?:due\s*date|due|due\s*day)$/i, paid_date: /^(?:paid\s*date|date\s*paid|payment\s*date|date\s*cleared|cleared\s*date)$/i, date: /^(?:date|due\s*date|due|paid\s*date|when|day)$/i, category: /^(?:category|cat|type|group)$/i, notes: /^(?:notes?|comment|label|status|memo|remark)$/i, }; const CATEGORY_KEYWORDS = [ { words: ['electric', 'power', 'utility', 'utilities', 'water', 'sewer', 'gas'], categories: ['Utilities'] }, { words: ['netflix', 'hulu', 'disney', 'spotify', 'subscription', 'streaming'], categories: ['Streaming', 'Entertainment', 'Subscriptions'] }, { words: ['capital one', 'credit card', 'discover', 'visa', 'mastercard', 'amex'], categories: ['Credit Cards', 'Credit Card', 'Cards'] }, { words: ['rent', 'mortgage'], categories: ['Housing'] }, { words: ['insurance'], categories: ['Insurance'] }, { words: ['loan', 'car payment', 'auto loan'], categories: ['Loans'] }, ]; // Sheet names that clearly are not month/bill-data tabs const NON_MONTH_SHEET_RE = /^(?:summary|totals?|dashboard|info|notes?|categories|settings?|overview|index|readme|instructions?|help|template|data|master|all|annual|yearly|archive|backup|charts?|graphs?|sheet\d+|.*tax(?:es)?|.*debt.*to.*als?|home\s*ownership\s*expenses)$/i; // Full month-name lookup (abbrev and full forms) const MONTH_LOOKUP = { jan: 1, january: 1, januaru: 1, feb: 2, february: 2, febuary: 2, mar: 3, march: 3, apr: 4, april: 4, may: 5, jun: 6, june: 6, jul: 7, july: 7, aug: 8, august: 8, sep: 9, sept: 9, september: 9, oct: 10, october: 10, nov: 11, november: 11, novevmber: 11, dec: 12, december: 12, }; const MONTH_KEYS_BY_LENGTH = Object.keys(MONTH_LOOKUP).sort((a, b) => b.length - a.length); // ─── Sheet Name Parsing ─────────────────────────────────────────────────────── /** * Detect year and month from a worksheet tab name. * Returns { year, month, is_non_month_sheet }. * year and month may be null if not determinable. * Exported for testing. */ function parseSheetName(name) { const clean = (name || '').trim(); // Known non-data sheet names → skip if (NON_MONTH_SHEET_RE.test(clean)) { return { year: null, month: null, is_non_month_sheet: true }; } // YYYY-MM or YYYY/MM (e.g. "2026-01", "2026/5") let m = clean.match(/^(\d{4})[\/\-](\d{1,2})$/); if (m) { const yr = parseInt(m[1], 10), mo = parseInt(m[2], 10); if (yr >= 2000 && yr <= 2100 && mo >= 1 && mo <= 12) return { year: yr, month: mo, is_non_month_sheet: false }; } // MM-YYYY or MM/YYYY (e.g. "01-2026", "1/2026") m = clean.match(/^(\d{1,2})[\/\-](\d{4})$/); if (m) { const mo = parseInt(m[1], 10), yr = parseInt(m[2], 10); if (yr >= 2000 && yr <= 2100 && mo >= 1 && mo <= 12) return { year: yr, month: mo, is_non_month_sheet: false }; } // Extract 4-digit year if present anywhere in the name. Some real workbook // tabs use names like "July2017", so this cannot rely on word boundaries. const yearM = clean.match(/(20\d{2})/); const year = yearM ? parseInt(yearM[1], 10) : null; // Extract a month name (full, abbreviated, common typos, or compact // MonthYYYY / YYYYMonth forms from legacy spreadsheets). const lower = clean.toLowerCase(); const compact = lower.replace(/[^a-z0-9]/g, ''); for (const key of MONTH_KEYS_BY_LENGTH) { const wordMatch = new RegExp(`(^|[^a-z])${key}([^a-z]|$)`, 'i').test(lower); const compactMonthYear = year && ( compact.includes(`${key}${year}`) || compact.includes(`${year}${key}`) ); if (wordMatch || compactMonthYear) { return { year, month: MONTH_LOOKUP[key], is_non_month_sheet: false }; } } // No month found — year might still be present (ambiguous) return { year, month: null, is_non_month_sheet: false }; } // ─── XLSX Parsing ───────────────────────────────────────────────────────────── function isXlsxBuffer(buffer) { // XLSX = ZIP file: magic bytes PK\x03\x04 return ( Buffer.isBuffer(buffer) && buffer.length >= 4 && buffer[0] === 0x50 && buffer[1] === 0x4b && buffer[2] === 0x03 && buffer[3] === 0x04 ); } function parseXlsxBuffer(buffer) { if (!isXlsxBuffer(buffer)) { const err = new Error('Invalid file format. Only XLSX files are supported.'); err.status = 400; throw err; } let workbook; try { workbook = xlsx.read(buffer, { type: 'buffer', cellFormula: false, cellHTML: false, cellNF: false, cellStyles: false, dense: false, }); } catch { const err = new Error('Could not read XLSX file. The file may be corrupted or in an unsupported format.'); err.status = 400; throw err; } if (!workbook.SheetNames.length) { const err = new Error('XLSX file contains no sheets.'); err.status = 400; throw err; } return workbook; } function getSheetRows(workbook, sheetName) { const sheet = workbook.Sheets[sheetName]; if (!sheet) return []; // raw:false → formatted string values; no formula results can leak through return xlsx.utils.sheet_to_json(sheet, { header: 1, defval: null, raw: false }); } // ─── Header Detection ───────────────────────────────────────────────────────── function detectHeaders(firstRow) { if (!Array.isArray(firstRow)) return {}; const map = {}; firstRow.forEach((cell, idx) => { if (cell == null) return; const val = String(cell).trim(); for (const [field, pattern] of Object.entries(HEADER_PATTERNS)) { if (pattern.test(val) && !(field in map)) map[field] = idx; } }); return map; } // ─── Row Classification ─────────────────────────────────────────────────────── function isBlankRow(cells) { return cells.every((c) => c == null || String(c).trim() === ''); } function isLikelyHeaderRow(cells) { const nonEmpty = cells.filter((c) => c != null && String(c).trim() !== ''); if (nonEmpty.length === 0) return false; let matches = 0; for (const cell of nonEmpty) { for (const pattern of Object.values(HEADER_PATTERNS)) { if (pattern.test(String(cell).trim())) { matches++; break; } } } return matches >= Math.ceil(nonEmpty.length * 0.5); } function isLikelyTotalRow(cells) { return cells.some( (c) => c != null && /^(?:total|subtotal|sum|grand\s*total)$/i.test(String(c).trim()), ); } // ─── Cell-Value Parsers ─────────────────────────────────────────────────────── function parseAmount(raw) { if (raw == null) return null; const str = String(raw).trim(); if (!str) return null; const cleaned = str.replace(/[$,\s]/g, '').replace(/^\((.+)\)$/, '-$1'); const val = parseFloat(cleaned); return Number.isFinite(val) ? val : null; } function parseDate(raw) { if (raw == null) return null; const str = String(raw).trim(); if (!str) return null; // MM/DD/YYYY or M/D/YYYY let m = str.match(/^(\d{1,2})\/(\d{1,2})\/(\d{4})$/); if (m) { const [, mon, day, yr] = m; return { year: parseInt(yr, 10), month: parseInt(mon, 10), day: parseInt(day, 10), iso: `${yr}-${mon.padStart(2, '0')}-${day.padStart(2, '0')}`, }; } // YYYY-MM-DD m = str.match(/^(\d{4})-(\d{2})-(\d{2})$/); if (m) { const [, yr, mon, day] = m; return { year: parseInt(yr, 10), month: parseInt(mon, 10), day: parseInt(day, 10), iso: str }; } // MM/DD (no year) m = str.match(/^(\d{1,2})\/(\d{1,2})$/); if (m) { return { year: null, month: parseInt(m[1], 10), day: parseInt(m[2], 10), iso: null }; } // Month name: "May 2026" or "May" m = str.match(/^(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:t(?:ember)?)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\s*(\d{4})?$/i); if (m) { const key = m[1].toLowerCase(); const month = MONTH_LOOKUP[key] ?? MONTH_LOOKUP[key.slice(0, 3)]; if (month) return { year: m[2] ? parseInt(m[2], 10) : null, month, day: null, iso: null }; } return null; } function resolveDateIso(parsedDate, fallbackYear) { if (!parsedDate?.month || !parsedDate?.day) return null; const year = parsedDate.year ?? fallbackYear ?? null; if (!year || year < 2000 || year > 2100) return null; if (parsedDate.month < 1 || parsedDate.month > 12 || parsedDate.day < 1 || parsedDate.day > 31) return null; return `${year}-${String(parsedDate.month).padStart(2, '0')}-${String(parsedDate.day).padStart(2, '0')}`; } function detectLabels(text) { if (!text) return []; return Object.entries(LABEL_PATTERNS) .filter(([, pattern]) => pattern.test(text)) .map(([label]) => label); } // ─── Name Normalization ─────────────────────────────────────────────────────── function normalizeName(name) { return String(name) .trim() .toLowerCase() .replace(/\s+/g, ' ') .replace(/[.,!?;:'"()]/g, '') .trim(); } // ─── Bill Matching ──────────────────────────────────────────────────────────── function canonicalizeName(name) { const normalized = normalizeName(name) .replace(/\bcap\b/g, 'capital') .replace(/\bcc\b/g, 'credit card') .replace(/\bco\b/g, 'company') .replace(/\bsvc\b/g, 'service') .replace(/\bservices\b/g, 'service'); return normalized .split(' ') .filter((t) => t && !['the', 'bill', 'payment', 'pay', 'paid'].includes(t)) .join(' ') .trim(); } function nameTokens(name) { return canonicalizeName(name) .split(' ') .filter((t) => t.length > 1 && !['card', 'credit'].includes(t)); } function matchScore(detectedName, bill) { const target = canonicalizeName(detectedName); const candidate = canonicalizeName(bill.name); if (!target || !candidate) return null; if (candidate === target) { return { match_confidence: 'high', match_reason: 'exact_normalized_match', score: 100 }; } const tTokens = nameTokens(target); const bTokens = nameTokens(candidate); if (!tTokens.length || !bTokens.length) return null; const tSet = new Set(tTokens); const bSet = new Set(bTokens); const overlap = tTokens.filter((t) => bSet.has(t)); const overlapRatio = overlap.length / Math.max(tSet.size, bSet.size); const sameTokens = overlap.length === tSet.size && overlap.length === bSet.size; if (sameTokens) { return { match_confidence: 'medium', match_reason: 'token_reorder_match', score: 86 }; } if (candidate.includes(target) || target.includes(candidate)) { const targetIsShortSubset = tTokens.length === 1 && bTokens.length > 1; const extraTokens = bTokens.filter((t) => !tSet.has(t)); const isKnownStrongSubset = tTokens[0] === 'capital' && extraTokens.includes('one'); return { match_confidence: targetIsShortSubset && !isKnownStrongSubset ? 'low' : 'medium', match_reason: isKnownStrongSubset ? 'contains_match: Capital -> Capital One' : 'contains_match', score: isKnownStrongSubset ? 82 : (targetIsShortSubset ? 45 : 72), }; } if (overlap.length > 0 && overlapRatio >= 0.5) { return { match_confidence: 'medium', match_reason: 'token_overlap_match', score: Math.round(60 + overlapRatio * 20) }; } if (overlap.length > 0) { return { match_confidence: 'low', match_reason: 'weak_token_overlap', score: 35 }; } return null; } function findBillMatches(detectedName, userBills) { if (!detectedName || !userBills.length) return []; const matches = []; for (const bill of userBills) { const scored = matchScore(detectedName, bill); if (!scored) continue; matches.push({ bill_id: bill.id, bill_name: bill.name, category_id: bill.category_id ?? null, category: bill.category_name || null, expected_amount: bill.expected_amount, due_day: bill.due_day ?? null, match_confidence: scored.match_confidence, match_reason: scored.match_reason, score: scored.score, }); } const ORDER = { high: 0, medium: 1, low: 2 }; matches.sort((a, b) => ORDER[a.match_confidence] - ORDER[b.match_confidence] || b.score - a.score); return matches; } // ─── Recommendation Helpers ────────────────────────────────────────────────── function normalizeCategoryName(name) { return normalizeName(name).replace(/&/g, 'and'); } function findCategoryByName(name, categories) { if (!name) return null; const target = normalizeCategoryName(name); return categories.find((c) => normalizeCategoryName(c.name) === target) || null; } function inferCategory({ billName, detectedCategory, notesText, topMatch }, categories) { if (!categories.length) return null; const explicit = findCategoryByName(detectedCategory, categories); if (explicit) return { category: explicit, reason: 'category_column_match' }; if (topMatch?.category_id && topMatch?.category) { return { category: { id: topMatch.category_id, name: topMatch.category }, reason: `matched_bill_category: ${topMatch.bill_name}`, }; } const text = canonicalizeName([billName, detectedCategory, notesText].filter(Boolean).join(' ')); for (const rule of CATEGORY_KEYWORDS) { if (!rule.words.some((word) => text.includes(canonicalizeName(word)))) continue; for (const catName of rule.categories) { const category = findCategoryByName(catName, categories); if (category) return { category, reason: `keyword_category_match: ${catName}` }; } } return null; } function isPaymentDateHeader(header) { return /\b(?:paid|payment|pay)\b/i.test(String(header || '')); } function isDueDateHeader(header) { return /\bdue\b/i.test(String(header || '')); } function amountsDiffer(a, b) { if (a == null || b == null) return false; return Math.abs(Number(a) - Number(b)) >= 0.01; } function buildRecommendation({ billName, detectedAmount, parsedDate, dateHeader, detectedCategory, notesText, possibleMatches, categories, warnings, errors, paymentDateIso, }) { const recWarnings = [...warnings]; const topMatch = possibleMatches[0] || null; const highMatches = possibleMatches.filter((m) => m.match_confidence === 'high'); const mediumMatches = possibleMatches.filter((m) => m.match_confidence === 'medium'); const dateDay = parsedDate?.day; const dueDay = Number.isInteger(dateDay) && dateDay >= 1 && dateDay <= 31 ? dateDay : null; const paymentDate = isPaymentDateHeader(dateHeader); if (dueDay && paymentDate && !isDueDateHeader(dateHeader)) { recWarnings.push('Date appears to be a payment date, not a due date'); } let action = 'create_new_bill'; let confidence = 'low'; let reason = 'no_bill_match'; let selectedMatch = null; if (errors.includes('No bill name detected')) { action = 'skip_row'; confidence = 'high'; reason = 'blank_or_summary_row'; } else if (highMatches.length === 1) { action = 'match_existing_bill'; confidence = 'high'; reason = highMatches[0].match_reason; selectedMatch = highMatches[0]; } else if (highMatches.length > 1) { action = 'ambiguous'; confidence = 'low'; reason = 'multiple_exact_matches'; recWarnings.push('Multiple exact matches — choose one'); } else if (mediumMatches.length === 1) { action = 'match_existing_bill'; confidence = paymentDate ? 'low' : 'medium'; reason = mediumMatches[0].match_reason; selectedMatch = mediumMatches[0]; recWarnings.push('Suggested match should be reviewed before apply'); } else if (possibleMatches.length > 0) { action = 'ambiguous'; confidence = 'low'; reason = possibleMatches.length > 1 ? 'multiple_possible_matches' : possibleMatches[0].match_reason; recWarnings.push('Possible weak match found — choose a bill, create a new bill, or skip'); } const categoryGuess = inferCategory({ billName, detectedCategory, notesText, topMatch: selectedMatch || (possibleMatches.length === 1 ? possibleMatches[0] : null), }, categories); if (action === 'create_new_bill' && !categoryGuess) { recWarnings.push('Category uncertain — no existing category matched clearly'); } if (selectedMatch && dueDay && selectedMatch.due_day && dueDay !== selectedMatch.due_day) { recWarnings.push('Spreadsheet due day differs from current bill due day'); } if (selectedMatch && amountsDiffer(detectedAmount, selectedMatch.expected_amount)) { recWarnings.push('Spreadsheet amount differs from current bill expected amount'); } return { action, bill_id: selectedMatch?.bill_id ?? null, bill_name: selectedMatch?.bill_name ?? (action === 'create_new_bill' ? billName : null), category_id: categoryGuess?.category?.id ?? null, category_name: categoryGuess?.category?.name ?? null, due_day: dueDay, expected_amount: action === 'create_new_bill' && detectedAmount != null ? detectedAmount : null, actual_amount: detectedAmount, payment_amount: paymentDateIso && detectedAmount != null ? detectedAmount : null, payment_date: paymentDateIso, confidence, reason: categoryGuess?.reason && action === 'create_new_bill' ? `${reason}; ${categoryGuess.reason}` : reason, warnings: [...new Set(recWarnings)], }; } // ─── Year/Month Resolution ──────────────────────────────────────────────────── /** * Determine the final year/month for a row and where they came from. * Priority: full row date > partial row date (sheet/default year) > sheet name > default > ambiguous. * Never falls back to today's date. * Exported for testing. */ function resolveYearMonth(parsedDate, sheetYear, sheetMonth, defaultYear, defaultMonth) { // Full date with year in the row cell if (parsedDate?.year && parsedDate?.month) { return { year: parsedDate.year, month: parsedDate.month, source: 'row_date', warnings: [] }; } // Partial date (month/day, no year) in row — supplement year from sheet or default if (parsedDate?.month && !parsedDate?.year) { const year = sheetYear ?? defaultYear ?? null; const source = year ? (sheetYear ? 'sheet_name' : 'default') : null; const w = year ? [] : ['Year unknown — row date has no year; provide default_year']; return { year, month: parsedDate.month, source: source || 'ambiguous', warnings: w }; } // No date in row — use sheet tab name values if (sheetMonth) { const year = sheetYear ?? defaultYear ?? null; const hasYear = year !== null; return { year, month: sheetMonth, source: hasYear ? 'sheet_name' : 'ambiguous', warnings: hasYear ? [] : ['Year unknown — sheet name has no year; provide default_year'], }; } // No month from row or sheet — last resort: request defaults if (defaultMonth) { const year = defaultYear ?? null; return { year, month: defaultMonth, source: 'default', warnings: year ? [] : ['Year unknown — provide default_year'], }; } // Nothing available return { year: null, month: null, source: 'ambiguous', warnings: ['No year or month could be determined'] }; } // ─── Row Analysis Helpers ───────────────────────────────────────────────────── function findFirstAmountCell(cells, skipIndices) { for (let i = 0; i < cells.length; i++) { if (skipIndices.has(i) || cells[i] == null) continue; const v = parseAmount(cells[i]); if (v !== null && v > 0) return cells[i]; } return null; } function collectNotesCells(cells, headerMap, billName) { const skipIndices = new Set(Object.values(headerMap)); const parts = []; for (let i = 0; i < cells.length; i++) { if (skipIndices.has(i) || cells[i] == null) continue; const val = String(cells[i]).trim(); if (!val) continue; if (parseAmount(val) !== null) continue; if (billName && normalizeName(val) === normalizeName(billName)) continue; const parsed = parseDate(val); if (parsed && (parsed.year || parsed.month)) continue; parts.push(val); } return parts.join(' ').trim() || null; } // ─── Single-Row Analyzer ────────────────────────────────────────────────────── function analyzeRow(rowIndex, cells, headerMap, headerLabels, userBills, categories, sheetName, sheetYear, sheetMonth, defaultYear, defaultMonth, rowIdPrefix) { const get = (field) => { const idx = headerMap[field]; return idx !== undefined ? cells[idx] : undefined; }; const rawBillName = get('bill_name') ?? cells[0]; const billName = rawBillName ? String(rawBillName).trim() || null : null; const skipIndices = new Set(Object.values(headerMap)); const rawAmount = get('amount') ?? findFirstAmountCell(cells, skipIndices); const detectedAmount = parseAmount(rawAmount); const parsedDueDate = parseDate(get('due_date')); const parsedGenericDate = parseDate(get('date')); const parsedDate = parsedDueDate ?? parsedGenericDate; const dateHeader = headerMap.due_date !== undefined ? headerLabels[headerMap.due_date] : (headerMap.date !== undefined ? headerLabels[headerMap.date] : null); const { year: detectedYear, month: detectedMonth, source: yearMonthSource, warnings: ymWarnings, } = resolveYearMonth(parsedDate, sheetYear, sheetMonth, defaultYear, defaultMonth); const detectedDate = parsedDate?.iso ?? resolveDateIso(parsedDate, sheetYear ?? defaultYear ?? null); const parsedPaidDate = parseDate(get('paid_date')); const paidDateYear = parsedPaidDate?.year ?? sheetYear ?? detectedYear ?? defaultYear ?? null; const detectedPaidDate = resolveDateIso(parsedPaidDate, paidDateYear); const rawCategory = get('category'); const detectedCategory = rawCategory ? String(rawCategory).trim() || null : null; const notesText = collectNotesCells(cells, headerMap, billName); const allText = cells.filter((c) => c != null && typeof c === 'string').map((c) => c.trim()).join(' '); const detectedLabels = detectLabels(allText); const rawValues = cells.map((c) => (c != null ? String(c) : null)); const warnings = [...ymWarnings]; const errors = []; if (!billName) errors.push('No bill name detected'); if (detectedAmount === null) warnings.push('No amount detected'); const possibleMatches = billName ? findBillMatches(billName, userBills) : []; const recommendation = buildRecommendation({ billName, detectedAmount, parsedDate, dateHeader, detectedCategory, notesText, possibleMatches, categories, warnings, errors, paymentDateIso: detectedPaidDate, }); const proposedAction = recommendation.action === 'ambiguous' ? 'mark_ambiguous' : recommendation.action; const confidence = recommendation.confidence; const requiresUserDecision = recommendation.action === 'ambiguous'; const rowId = rowIdPrefix ? `${rowIdPrefix}_r${rowIndex + 1}` : `row_${rowIndex + 1}`; return { row_id: rowId, source_row_number: rowIndex + 1, sheet_name: sheetName, year_month_source: yearMonthSource, raw_values: rawValues, detected_bill_name: billName, detected_category: detectedCategory, detected_amount: detectedAmount, detected_date: detectedDate, detected_paid_date: detectedPaidDate, detected_payment_amount: detectedPaidDate && detectedAmount != null ? detectedAmount : null, detected_year: detectedYear, detected_month: detectedMonth, detected_notes: notesText, detected_labels: detectedLabels, proposed_action: proposedAction, confidence, warnings: recommendation.warnings, errors, possible_bill_matches: possibleMatches, requires_user_decision: requiresUserDecision, recommendation, }; } // ─── Sheet Row Parser ───────────────────────────────────────────────────────── /** * Parse all data rows from one sheet's rawRows array. * sheetYear/sheetMonth come from parseSheetName(); rowIdPrefix is null for single-sheet mode. */ function parseSheetRows({ name, rawRows, year: sheetYear, month: sheetMonth, rowIdPrefix }, userBills, categories, defaultYear, defaultMonth) { if (!rawRows.length) return { rows: [], headerRow: null }; const firstRow = rawRows[0] || []; const headerMap = detectHeaders(firstRow); const headerLabels = firstRow.map((c) => (c != null ? String(c).trim() : null)); const hasHeaders = Object.keys(headerMap).length > 0; const startRow = hasHeaders ? 1 : 0; const rows = []; for (let i = startRow; i < rawRows.length; i++) { const cells = rawRows[i] || []; if (isBlankRow(cells)) continue; if (isLikelyHeaderRow(cells) && i > 0) continue; if (isLikelyTotalRow(cells)) continue; rows.push(analyzeRow( i, cells, headerMap, headerLabels, userBills, categories, name, sheetYear, sheetMonth, defaultYear, defaultMonth, rowIdPrefix, )); } return { rows, headerRow: hasHeaders ? firstRow.map((c) => (c != null ? String(c) : null)) : null, }; } // ─── Import Session Management ──────────────────────────────────────────────── function makeSessionId() { return crypto.randomBytes(16).toString('hex'); } function saveImportSession(db, userId, sessionData) { const id = makeSessionId(); const now = new Date().toISOString(); const expiresAt = new Date(Date.now() + SESSION_TTL_MS).toISOString(); db.prepare(` INSERT INTO import_sessions (id, user_id, created_at, expires_at, preview_json) VALUES (?, ?, ?, ?, ?) `).run(id, userId, now, expiresAt, JSON.stringify(sessionData)); return id; } function loadImportSession(db, sessionId, userId) { const row = db.prepare(` SELECT preview_json FROM import_sessions WHERE id = ? AND user_id = ? AND expires_at > datetime('now') `).get(sessionId, userId); if (!row) { const err = new Error('Import session not found or expired. Please re-upload your file.'); err.status = 404; throw err; } return JSON.parse(row.preview_json); } function deleteImportSession(db, sessionId) { db.prepare('DELETE FROM import_sessions WHERE id = ?').run(sessionId); } function pruneExpiredSessions(db) { db.prepare("DELETE FROM import_sessions WHERE expires_at <= datetime('now')").run(); } // ─── Preview ────────────────────────────────────────────────────────────────── async function previewSpreadsheet(userId, buffer, options = {}) { const db = getDb(); pruneExpiredSessions(db); ensureUserDefaultCategories(userId); const workbook = parseXlsxBuffer(buffer); const sheetNames = workbook.SheetNames; const parseAll = options.parse_all_sheets === true; const defaultYear = options.default_year ?? null; const defaultMonth = options.default_month ?? null; // Load user's active bills once for matching across all sheets const userBills = db.prepare(` SELECT b.id, b.name, b.category_id, b.due_day, b.expected_amount, b.autopay_enabled, c.name AS category_name FROM bills b LEFT JOIN categories c ON c.id = b.category_id WHERE b.active = 1 AND b.user_id = ? ORDER BY b.name `).all(userId); const categories = db.prepare('SELECT id, name FROM categories WHERE user_id = ? ORDER BY name').all(userId); // ── Multi-sheet mode ───────────────────────────────────────────────────────── if (parseAll) { const sheetsMeta = []; let allRows = []; let totalParsed = 0; for (let si = 0; si < sheetNames.length; si++) { const name = sheetNames[si]; const { year, month, is_non_month_sheet } = parseSheetName(name); if (is_non_month_sheet) { sheetsMeta.push({ name, detected_year: null, detected_month: null, is_non_month_sheet: true, status: 'skipped', row_count: 0, }); continue; } const rawRows = getSheetRows(workbook, name); const rowIdPrefix = `s${si}`; const { rows, headerRow } = parseSheetRows( { name, rawRows, year, month, rowIdPrefix }, userBills, categories, defaultYear, defaultMonth, ); totalParsed += rows.length; if (totalParsed > MAX_ROWS) { const err = new Error(`Spreadsheet too large. Combined rows across all sheets exceed ${MAX_ROWS}.`); err.status = 400; throw err; } const resolvedYear = year ?? (month ? defaultYear : null); let status; if (!month) { status = 'ambiguous'; } else if (resolvedYear) { status = 'parsed'; } else { status = 'parsed_month_only'; } sheetsMeta.push({ name, detected_year: resolvedYear, detected_month: month, is_non_month_sheet: false, status, row_count: rows.length, header_row: headerRow, }); allRows = allRows.concat(rows); } const sessionData = { user_id: userId, original_filename: options.original_filename || null, sheet_name: 'multiple', rows: allRows, default_year: defaultYear, default_month: defaultMonth, created_at: new Date().toISOString(), }; const importSessionId = saveImportSession(db, userId, sessionData); return { import_session_id: importSessionId, workbook: { sheet_names: sheetNames, parse_mode: 'all_sheets', sheets: sheetsMeta, total_row_count: allRows.length, }, rows: allRows, }; } // ── Single-sheet mode (existing behaviour) ──────────────────────────────── const selectedSheet = options.sheet_name && sheetNames.includes(options.sheet_name) ? options.sheet_name : sheetNames[0]; const rawRows = getSheetRows(workbook, selectedSheet); if (rawRows.length === 0) { const err = new Error('Selected sheet is empty.'); err.status = 400; throw err; } if (rawRows.length > MAX_ROWS + 1) { const err = new Error(`Spreadsheet too large. Maximum ${MAX_ROWS} data rows supported.`); err.status = 400; throw err; } const { year: sheetYear, month: sheetMonth } = parseSheetName(selectedSheet); const { rows: previewRows, headerRow } = parseSheetRows( { name: selectedSheet, rawRows, year: sheetYear, month: sheetMonth, rowIdPrefix: null }, userBills, categories, defaultYear, defaultMonth, ); const sessionData = { user_id: userId, original_filename: options.original_filename || null, sheet_name: selectedSheet, rows: previewRows, default_year: defaultYear, default_month: defaultMonth, created_at: new Date().toISOString(), }; const importSessionId = saveImportSession(db, userId, sessionData); return { import_session_id: importSessionId, workbook: { sheet_names: sheetNames, selected_sheet: selectedSheet, parse_mode: 'single_sheet', row_count: previewRows.length, header_row: headerRow, }, rows: previewRows, }; } // ─── Apply ──────────────────────────────────────────────────────────────────── const VALID_ACTIONS = new Set([ 'match_existing_bill', 'create_new_bill', 'skip_row', 'create_payment', 'update_monthly_state', 'add_monthly_note', ]); function importValidationError(message, detail = {}) { const err = new Error(message); err.status = 400; err.code = 'IMPORT_VALIDATION_ERROR'; err.details = [detail].filter(Boolean); return err; } function decisionValidationError(idx, d, field, message) { return importValidationError(`Decision [${idx}]: ${message}`, { row_id: d?.row_id ?? null, field, message, }); } function coerceOptionalNumber(d, idx, field, { min = null, max = null } = {}) { if (d[field] === undefined || d[field] === null || d[field] === '') return null; const value = Number(d[field]); if (!Number.isFinite(value)) { throw decisionValidationError(idx, d, field, `${field} must be a finite number`); } if (min !== null && value < min) { throw decisionValidationError(idx, d, field, `${field} must be at least ${min}`); } if (max !== null && value > max) { throw decisionValidationError(idx, d, field, `${field} must be no greater than ${max}`); } d[field] = value; return value; } function coerceOptionalInteger(d, idx, field, { min = null, max = null } = {}) { if (d[field] === undefined || d[field] === null || d[field] === '') return null; const value = Number(d[field]); if (!Number.isInteger(value)) { throw decisionValidationError(idx, d, field, `${field} must be an integer`); } if (min !== null && value < min) { throw decisionValidationError(idx, d, field, `${field} must be at least ${min}`); } if (max !== null && value > max) { throw decisionValidationError(idx, d, field, `${field} must be no greater than ${max}`); } d[field] = value; return value; } function validateOptionalIsoDate(d, idx, field) { if (d[field] === undefined || d[field] === null || d[field] === '') return null; const value = String(d[field]).trim(); if (!/^\d{4}-\d{2}-\d{2}$/.test(value)) { throw decisionValidationError(idx, d, field, `${field} must use YYYY-MM-DD format`); } d[field] = value; return value; } function validateDecision(d, idx) { if (!d.row_id) throw decisionValidationError(idx, d, 'row_id', 'row_id is required'); if (!VALID_ACTIONS.has(d.action)) throw decisionValidationError(idx, d, 'action', `unsupported action '${d.action}'`); if (d.action === 'create_new_bill' && !d.bill_name) throw decisionValidationError(idx, d, 'bill_name', 'bill_name is required for create_new_bill'); if (['match_existing_bill', 'update_monthly_state', 'add_monthly_note', 'create_payment'].includes(d.action)) { if (d.bill_id === undefined || d.bill_id === null || d.bill_id === '') throw decisionValidationError(idx, d, 'bill_id', `bill_id is required for ${d.action}`); coerceOptionalInteger(d, idx, 'bill_id', { min: 1 }); } coerceOptionalInteger(d, idx, 'due_day', { min: 1, max: 31 }); coerceOptionalInteger(d, idx, 'year', { min: 2000, max: 2100 }); coerceOptionalInteger(d, idx, 'month', { min: 1, max: 12 }); coerceOptionalInteger(d, idx, 'category_id', { min: 1 }); coerceOptionalNumber(d, idx, 'actual_amount', { min: 0 }); coerceOptionalNumber(d, idx, 'expected_amount', { min: 0 }); coerceOptionalNumber(d, idx, 'payment_amount', { min: 0 }); validateOptionalIsoDate(d, idx, 'payment_date'); if (d.action === 'create_payment' && !d.payment_amount && !d.actual_amount) { throw decisionValidationError(idx, d, 'payment_amount', 'payment_amount or actual_amount is required for create_payment'); } } function resolveYear(decision, previewRow, sessionData) { return decision.year ?? previewRow?.detected_year ?? sessionData.default_year ?? null; } function resolveMonth(decision, previewRow, sessionData) { return decision.month ?? previewRow?.detected_month ?? sessionData.default_month ?? null; } function upsertMonthlyState(db, billId, year, month, amount, notes, isSkipped, allowOverwrite) { const existing = db.prepare(` SELECT id, actual_amount, notes, is_skipped FROM monthly_bill_state WHERE bill_id = ? AND year = ? AND month = ? `).get(billId, year, month); if (!existing) { db.prepare(` INSERT INTO monthly_bill_state (bill_id, year, month, actual_amount, notes, is_skipped) VALUES (?, ?, ?, ?, ?, ?) `).run(billId, year, month, amount, notes, isSkipped ?? 0); return { result: 'created' }; } const amountConflict = (amount !== null && existing.actual_amount !== null && existing.actual_amount !== amount); const notesConflict = (notes !== null && existing.notes !== null && existing.notes !== notes); if ((amountConflict || notesConflict) && !allowOverwrite) { return { result: 'skipped_conflict', note: 'Monthly state already exists with different values — use overwrite:true to replace' }; } const newAmount = allowOverwrite ? amount : (existing.actual_amount !== null ? existing.actual_amount : amount); const newNotes = allowOverwrite ? notes : (existing.notes !== null ? existing.notes : notes); const newSkipped = isSkipped !== null ? isSkipped : existing.is_skipped; db.prepare(` UPDATE monthly_bill_state SET actual_amount = ?, notes = ?, is_skipped = ?, updated_at = datetime('now') WHERE bill_id = ? AND year = ? AND month = ? `).run(newAmount, newNotes, newSkipped, billId, year, month); return { result: allowOverwrite ? 'overwritten' : 'updated' }; } function createPaymentFromImport(db, billId, amount, paidDate, notes, allowOverwrite) { if (!paidDate || amount == null || amount <= 0) return null; const dup = db.prepare(` SELECT id FROM payments WHERE bill_id = ? AND paid_date = ? AND amount = ? AND deleted_at IS NULL `).get(billId, paidDate, amount); if (dup && !allowOverwrite) return 'skipped_duplicate'; db.prepare(` INSERT INTO payments (bill_id, amount, paid_date, method, notes) VALUES (?, ?, ?, ?, ?) `).run(billId, amount, paidDate, null, notes); return 'created'; } function resolvePaymentInfo(decision, previewRow, amount) { const paymentDate = decision.payment_date ?? previewRow?.detected_paid_date ?? null; const paymentAmount = decision.payment_amount ?? previewRow?.detected_payment_amount ?? (paymentDate ? amount : null); return { paymentDate, paymentAmount }; } function importRelatedPaidMonthsForNewBill(db, newBillId, billName, sourceRowId, sessionData, appliedRowIds, allowOverwrite, summary) { const targetName = normalizeName(billName); if (!targetName) return 0; let imported = 0; for (const row of sessionData.rows || []) { if (row.row_id === sourceRowId || appliedRowIds.has(row.row_id)) continue; if (normalizeName(row.detected_bill_name || '') !== targetName) continue; if (!row.detected_year || !row.detected_month) continue; const amount = row.detected_amount ?? null; const notes = row.detected_notes ?? null; const paymentDate = row.detected_paid_date ?? null; const paymentAmount = row.detected_payment_amount ?? (paymentDate ? amount : null); if (amount == null && !paymentDate) continue; const detail = { row_id: row.row_id, action: 'create_new_bill_related_month', result: 'imported', bill_id: newBillId }; if (amount != null) { const st = upsertMonthlyState(db, newBillId, row.detected_year, row.detected_month, amount, notes, null, allowOverwrite); detail.monthly_state = st.result; } const paymentResult = createPaymentFromImport(db, newBillId, paymentAmount, paymentDate, notes, allowOverwrite); if (paymentResult) { detail.payment = paymentResult; detail.paid_date = paymentDate; detail.payment_amount = paymentAmount; } summary.created++; if (summary.skipped > 0) summary.skipped--; summary.details.push(detail); imported++; } return imported; } function applyOneDecision(db, userId, decision, previewRow, sessionData, allowOverwrite, summary, appliedRowIds) { const { row_id, action } = decision; if (action === 'skip_row') { summary.skipped++; summary.details.push({ row_id, action, result: 'skipped' }); return; } try { const year = resolveYear(decision, previewRow, sessionData); const month = resolveMonth(decision, previewRow, sessionData); const amount = decision.actual_amount ?? previewRow?.detected_amount ?? null; const notes = decision.notes ?? previewRow?.detected_notes ?? null; if (action === 'create_new_bill') { const name = (decision.bill_name || previewRow?.detected_bill_name || '').trim(); if (!name) throw new Error('Bill name required'); const existing = db.prepare( "SELECT id FROM bills WHERE name = ? COLLATE NOCASE AND active = 1 AND user_id = ?", ).get(name, userId); if (existing && !allowOverwrite) { summary.skipped++; summary.details.push({ row_id, action, result: 'skipped_duplicate', note: `Bill "${name}" already exists (id=${existing.id})` }); return; } let categoryId = decision.category_id ?? null; if (categoryId) { const cat = db.prepare('SELECT id FROM categories WHERE id = ? AND user_id = ?').get(categoryId, userId); categoryId = cat?.id ?? null; } const catName = categoryId ? null : (decision.category || previewRow?.detected_category); if (!categoryId && catName) { const cat = db.prepare('SELECT id FROM categories WHERE name = ? COLLATE NOCASE AND user_id = ?').get(catName, userId); categoryId = cat?.id ?? null; } const dueDay = decision.due_day ?? 1; const expectedAmount = decision.expected_amount ?? amount ?? 0; const ins = db.prepare(` INSERT INTO bills (user_id, name, category_id, due_day, bucket, expected_amount, billing_cycle, active) VALUES (?, ?, ?, ?, ?, ?, 'monthly', 1) `).run(userId, name, categoryId, dueDay, dueDay <= 14 ? '1st' : '15th', expectedAmount); const newBillId = ins.lastInsertRowid; summary.created++; const detail = { row_id, action, result: 'created', bill_id: newBillId, bill_name: name }; if (year && month) { const st = upsertMonthlyState(db, newBillId, year, month, amount, notes, null, allowOverwrite); detail.monthly_state = st.result; } const { paymentDate, paymentAmount } = resolvePaymentInfo(decision, previewRow, amount); const paymentResult = createPaymentFromImport( db, newBillId, paymentAmount, paymentDate, decision.payment_notes ?? notes, allowOverwrite, ); if (paymentResult) { detail.payment = paymentResult; detail.paid_date = paymentDate; detail.payment_amount = paymentAmount; } const relatedImported = importRelatedPaidMonthsForNewBill( db, newBillId, name, row_id, sessionData, appliedRowIds, allowOverwrite, summary, ); if (relatedImported > 0) detail.related_months_imported = relatedImported; summary.details.push(detail); } else if (['match_existing_bill', 'update_monthly_state', 'add_monthly_note'].includes(action)) { const billId = decision.bill_id; const bill = db.prepare('SELECT id, name FROM bills WHERE id = ? AND active = 1 AND user_id = ?').get(billId, userId); if (!bill) throw new Error(`Bill id=${billId} not found or inactive`); if (!year || !month) { summary.ambiguous++; summary.details.push({ row_id, action, result: 'ambiguous', error: 'year and month required for monthly state' }); return; } const noteToStore = action === 'add_monthly_note' ? (decision.notes ?? previewRow?.detected_notes ?? null) : notes; const amountToStore = action === 'add_monthly_note' ? null : amount; const isSkipped = decision.is_skipped ?? null; const st = upsertMonthlyState(db, billId, year, month, amountToStore, noteToStore, isSkipped, allowOverwrite); if (st.result === 'skipped_conflict') { summary.skipped++; } else if (st.result === 'created') { summary.created++; } else { summary.updated++; } const detail = { row_id, action, result: st.result, bill_id: billId }; if (st.note) detail.note = st.note; const { paymentDate, paymentAmount } = resolvePaymentInfo(decision, previewRow, amount); if (paymentDate && paymentAmount != null && paymentAmount > 0) { const paymentResult = createPaymentFromImport( db, billId, paymentAmount, paymentDate, decision.payment_notes ?? noteToStore, allowOverwrite, ); if (paymentResult) { detail.payment = paymentResult; detail.paid_date = paymentDate; detail.payment_amount = paymentAmount; } } summary.details.push(detail); } else if (action === 'create_payment') { const billId = decision.bill_id; const bill = db.prepare('SELECT id FROM bills WHERE id = ? AND active = 1 AND user_id = ?').get(billId, userId); if (!bill) throw new Error(`Bill id=${billId} not found or inactive`); const payAmount = decision.payment_amount ?? amount; const payDate = decision.payment_date ?? (year && month ? `${year}-${String(month).padStart(2, '0')}-01` : null); if (!payAmount || !payDate) { summary.ambiguous++; summary.details.push({ row_id, action, result: 'ambiguous', error: 'payment_amount and payment_date required' }); return; } const dup = db.prepare(` SELECT id FROM payments WHERE bill_id = ? AND paid_date = ? AND amount = ? AND deleted_at IS NULL `).get(billId, payDate, payAmount); if (dup && !allowOverwrite) { summary.skipped++; summary.details.push({ row_id, action, result: 'skipped_duplicate', note: 'Identical payment already exists' }); return; } db.prepare(` INSERT INTO payments (bill_id, amount, paid_date, method, notes) VALUES (?, ?, ?, ?, ?) `).run(billId, payAmount, payDate, decision.payment_method ?? null, decision.payment_notes ?? null); summary.created++; summary.details.push({ row_id, action, result: 'created', bill_id: billId, paid_date: payDate, amount: payAmount }); } } catch (err) { summary.errored++; summary.details.push({ row_id, action, result: 'error', error: err.message }); } } async function applyImportDecisions(userId, importSessionId, decisions, opts = {}) { const db = getDb(); ensureUserDefaultCategories(userId); const sessionData = loadImportSession(db, importSessionId, userId); const rowLookup = Object.fromEntries(sessionData.rows.map((r) => [r.row_id, r])); const appliedRowIds = new Set(decisions.map((d) => d.row_id)); for (let i = 0; i < decisions.length; i++) { validateDecision(decisions[i], i); if (!rowLookup[decisions[i].row_id]) { throw decisionValidationError(i, decisions[i], 'row_id', `row_id '${decisions[i].row_id}' was not found in this import session`); } } const allowOverwrite = opts.overwrite === true; const reviewedSkippedCount = Number.isInteger(Number(opts.reviewed_skipped_count)) ? Math.max(0, Number(opts.reviewed_skipped_count)) : 0; const summary = { created: 0, updated: 0, skipped: reviewedSkippedCount, errored: 0, ambiguous: 0, details: [] }; const applyAll = db.transaction(() => { for (const decision of decisions) { applyOneDecision(db, userId, decision, rowLookup[decision.row_id], sessionData, allowOverwrite, summary, appliedRowIds); } }); applyAll(); try { db.prepare(` INSERT INTO import_history ( user_id, imported_at, source_filename, file_type, sheet_name, rows_parsed, rows_created, rows_updated, rows_skipped, rows_ambiguous, rows_errored, options_json, summary_json ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) `).run( userId, new Date().toISOString(), sessionData.original_filename, 'xlsx', sessionData.sheet_name, sessionData.rows.length, summary.created, summary.updated, summary.skipped, summary.ambiguous, summary.errored, JSON.stringify(opts), JSON.stringify(summary.details), ); } catch (histErr) { console.error('[import] history write failed:', histErr.message); } deleteImportSession(db, importSessionId); return { success: true, rows_parsed: sessionData.rows.length, rows_created: summary.created, rows_updated: summary.updated, rows_skipped: summary.skipped, rows_ambiguous: summary.ambiguous, rows_errored: summary.errored, details: summary.details, }; } // ─── Import History ─────────────────────────────────────────────────────────── function getImportHistory(userId) { const db = getDb(); return db.prepare(` SELECT id, imported_at, source_filename, file_type, sheet_name, rows_parsed, rows_created, rows_updated, rows_skipped, rows_ambiguous, rows_errored FROM import_history WHERE user_id = ? ORDER BY imported_at DESC LIMIT 100 `).all(userId); } module.exports = { previewSpreadsheet, applyImportDecisions, getImportHistory, // Exported for testing parseSheetName, resolveYearMonth, parseAmount, parseDate, normalizeName, findBillMatches, buildRecommendation, };