BillTracker/services/merchantStoreMatchService.js

158 lines
5.3 KiB
JavaScript
Raw Permalink Normal View History

'use strict';
const { categorizeTransaction } = require('./spendingService');
// Mirrors the pack's match_order_recommendation: regional descriptor variants
// (most specific — tied to a city/county) are checked first, then online
// billing descriptors (PAYPAL/STRIPE/APPLE.COM/BILL/etc.), then canonical
// national merchants as the fallback.
const ENTRY_KIND_ORDER = {
regional_descriptor_variant: 0,
online_billing_descriptor_variant: 1,
canonical_merchant: 2,
};
let _entries = null;
function maxPatternLength(patterns) {
return patterns.reduce((max, p) => Math.max(max, p.length), 0);
}
// Lazily load and pre-sort all merchant_store_matches rows. Cached for the
// lifetime of the process — this is static reference data seeded once by the
// v1.05 migration.
function loadMerchantMatchEntries(db) {
if (_entries) return _entries;
const rows = db.prepare(`
SELECT id, entry_kind, canonical_merchant_id, canonical_name, display_name,
category, merchant_type, scope, priority, match_patterns, negative_patterns,
locality_city, locality_state
FROM merchant_store_matches
`).all();
_entries = rows.map(row => ({
...row,
match_patterns: JSON.parse(row.match_patterns || '[]'),
negative_patterns: JSON.parse(row.negative_patterns || '[]'),
}));
_entries.sort((a, b) => {
const orderA = ENTRY_KIND_ORDER[a.entry_kind] ?? 99;
const orderB = ENTRY_KIND_ORDER[b.entry_kind] ?? 99;
if (orderA !== orderB) return orderA - orderB;
if (b.priority !== a.priority) return b.priority - a.priority;
return maxPatternLength(b.match_patterns) - maxPatternLength(a.match_patterns);
});
return _entries;
}
// Apply the pack's normalization rules: uppercase, "&" -> "AND", strip
// apostrophes/punctuation, collapse whitespace. match_patterns in the pack
// are pre-normalized to this same shape (e.g. "THE CHILDREN S PLACE").
function normalizeForMatch(value) {
return String(value || '')
.toUpperCase()
.replace(/&/g, ' AND ')
.replace(/[']/g, '')
.replace(/[^A-Z0-9\s]/g, ' ')
.replace(/\s+/g, ' ')
.trim();
}
// Find the best merchant/store match for a raw transaction description.
// Returns { entry_id, canonical_name, display_name, category, scope, priority } or null.
function findMerchantMatch(db, description) {
const normalized = normalizeForMatch(description);
if (!normalized) return null;
const entries = loadMerchantMatchEntries(db);
for (const entry of entries) {
if (entry.negative_patterns.some(p => normalized.includes(p))) continue;
if (entry.match_patterns.some(p => p && normalized.includes(p))) {
return {
entry_id: entry.id,
canonical_name: entry.canonical_name,
display_name: entry.display_name,
category: entry.category,
scope: entry.scope,
priority: entry.priority,
};
}
}
return null;
}
// Find-or-create a spending category by name for this user, matching the
// COLLATE NOCASE convention used by ensureUserDefaultCategories (db/database.js).
function findOrCreateCategory(db, userId, name) {
const existing = db.prepare('SELECT id FROM categories WHERE user_id = ? AND name = ? COLLATE NOCASE')
.get(userId, name);
if (existing) return existing.id;
const result = db.prepare('INSERT INTO categories (user_id, name) VALUES (?, ?)').run(userId, name);
return result.lastInsertRowid;
}
// Scan the user's uncategorized outflows, apply merchant/store pack matches,
// and categorize them (writing spending_category_rules so future syncs hit
// the cheaper rule path instead of rescanning the full pack).
//
// With { dryRun: true }, computes the same matches without writing anything
// (no categories created, no transactions updated) — used to preview an
// auto-categorize run before applying it.
//
// Returns { updated: number, categories: [{ name, count }], changes: [{ transaction_id, display_name, category }] }.
function applyMerchantStoreMatches(db, userId, { dryRun = false } = {}) {
const txRows = db.prepare(`
SELECT id, payee, description, memo
FROM transactions
WHERE user_id = ?
AND amount < 0
AND ignored = 0
AND match_status != 'matched'
AND spending_category_id IS NULL
`).all(userId);
if (txRows.length === 0) return { updated: 0, categories: [], changes: [] };
const categoryCounts = new Map(); // category name -> count
const changes = [];
let updated = 0;
const apply = () => {
for (const tx of txRows) {
const match = findMerchantMatch(db, tx.payee || tx.description || tx.memo || '');
if (!match) continue;
if (!dryRun) {
const categoryId = findOrCreateCategory(db, userId, match.category);
categorizeTransaction(db, userId, tx.id, categoryId, true);
}
updated++;
changes.push({ transaction_id: tx.id, display_name: match.display_name, category: match.category });
categoryCounts.set(match.category, (categoryCounts.get(match.category) || 0) + 1);
}
};
if (dryRun) {
apply();
} else {
db.transaction(apply)();
}
return {
updated,
categories: [...categoryCounts.entries()].map(([name, count]) => ({ name, count })),
changes,
};
}
module.exports = {
loadMerchantMatchEntries,
normalizeForMatch,
findMerchantMatch,
findOrCreateCategory,
applyMerchantStoreMatches,
};