BillTracker/services/subscriptionService.js

572 lines
23 KiB
JavaScript

'use strict';
const { insertBill, validateBillData } = require('./billsService');
const SUBSCRIPTION_TYPES = [
'streaming', 'software', 'cloud', 'music', 'news',
'fitness', 'gaming', 'utilities', 'insurance',
'food', 'education', 'shopping', 'security', 'other',
];
const MONTHLY_FACTORS = {
weekly: 52 / 12,
biweekly: 26 / 12,
monthly: 1,
quarterly: 1 / 3,
annual: 1 / 12,
annually: 1 / 12,
irregular: 1,
};
// Transactions that are clearly not subscriptions — skip before grouping
const SKIP_MERCHANT_RE = /\b(atm|withdrawal|transfer|deposit|zelle|venmo|wire|refund|rebate|interest charge)\b/;
// Fallback keyword list used when catalog lookup finds no match
const TYPE_KEYWORDS = [
['streaming', ['netflix', 'hulu', 'disney', 'max', 'paramount', 'peacock', 'youtube tv', 'sling', 'espn', 'fubo', 'starz', 'crunchyroll', 'dazn']],
['music', ['spotify', 'apple music', 'tidal', 'pandora', 'siriusxm', 'soundcloud', 'deezer', 'iheart']],
['software', ['adobe', 'microsoft', 'github', 'notion', 'figma', 'canva', 'openai', 'chatgpt', 'grammarly', 'zoom', 'slack', 'cursor', 'ynab']],
['cloud', ['dropbox', 'icloud', 'google one', 'google storage', 'backblaze', 'box storage']],
['news', ['nyt', 'new york times', 'economist', 'athletic', 'washington post', 'wsj', 'bloomberg', 'substack', 'patreon', 'medium']],
['fitness', ['peloton', 'planet fitness', 'gym', 'fitbit', 'strava', 'headspace', 'calm', 'noom', 'classpass', 'whoop']],
['gaming', ['xbox', 'playstation', 'steam', 'nintendo', 'roblox', 'discord nitro', 'ea play', 'ubisoft']],
['utilities', ['verizon', 'at t', 'att', 'comcast', 'xfinity', 'spectrum', 'tmobile', 't mobile']],
['insurance', ['insurance', 'geico', 'progressive', 'state farm', 'allstate']],
['food', ['hellofresh', 'blue apron', 'doordash', 'instacart', 'uber eats', 'grubhub', 'factor', 'hungryroot']],
['education', ['duolingo', 'masterclass', 'coursera', 'skillshare', 'audible', 'kindle unlimited', 'blinkist']],
['shopping', ['amazon prime', 'walmart plus', 'costco', 'target circle', 'chewy']],
['security', ['nordvpn', 'expressvpn', '1password', 'dashlane', 'norton', 'mcafee', 'surfshark']],
];
// ── Catalog ───────────────────────────────────────────────────────────────────
function loadCatalog(db) {
try {
return db.prepare('SELECT id, rank, name, category, subscription_type, domain, website FROM subscription_catalog ORDER BY rank ASC').all();
} catch {
return [];
}
}
// Build a normalized-name → subscription_type map from the full catalog so
// inferType can use all 290 known services, not just the hardcoded keyword list.
function buildCatalogTypeMap(catalog) {
const map = new Map();
for (const entry of catalog) {
if (!entry.subscription_type || entry.subscription_type === 'other') continue;
const key = normalizeCatalogName(entry.name);
if (key.length >= 3 && !map.has(key)) map.set(key, entry.subscription_type);
}
return map;
}
function compactCatalogKey(value) {
return normalizeCatalogName(value).replace(/\s+/g, '');
}
function hostFromUrl(value) {
if (!value) return '';
try {
return new URL(String(value).startsWith('http') ? String(value) : `https://${value}`).hostname;
} catch {
return String(value || '');
}
}
function catalogDomainKeys(entry) {
const keys = new Set();
const candidates = [entry.domain, hostFromUrl(entry.website)].filter(Boolean);
for (const candidate of candidates) {
const host = String(candidate).toLowerCase().replace(/^www\./, '').replace(/\/.*$/, '');
const labels = host.split('.').filter(Boolean);
if (labels.length >= 2) {
keys.add(labels.join(' '));
keys.add(labels.slice(-2).join(' '));
if (labels[0].length >= 5) keys.add(labels[0]);
}
}
return [...keys].filter(key => key.length >= 4);
}
function normalizeCatalogName(value) {
return String(value || '')
.toLowerCase()
.replace(/\+/g, ' plus ') // "Walmart+" → "walmart plus" so it only matches "walmart plus" transactions
.replace(/[^a-z0-9]+/g, ' ')
.replace(/\s+/g, ' ')
.trim();
}
// Given a normalized merchant string, find the best matching catalog entry.
// Matches on service name (normalized) or domain (dot replaced with space).
function lookupCatalog(catalog, merchantText) {
if (!catalog.length || !merchantText) return null;
let best = null;
let bestScore = 0;
const merchantCompact = compactCatalogKey(merchantText);
for (const entry of catalog) {
const nameKey = normalizeCatalogName(entry.name);
const nameCompact = compactCatalogKey(entry.name);
const nameScore = 1000 + nameKey.length;
if (
nameKey.length >= 3
&& (merchantText.includes(nameKey) || (nameCompact.length >= 5 && merchantCompact.includes(nameCompact)))
&& nameScore > bestScore
) {
best = entry;
bestScore = nameScore;
}
for (const domainKey of catalogDomainKeys(entry)) {
const domainCompact = domainKey.replace(/\s+/g, '');
const domainScore = 500 + domainKey.length;
if (
(merchantText.includes(domainKey) || (domainCompact.length >= 5 && merchantCompact.includes(domainCompact)))
&& domainScore > bestScore
) {
best = entry;
bestScore = domainScore;
}
}
}
return best;
}
// ── Helpers ───────────────────────────────────────────────────────────────────
function normalizeMerchant(value) {
return String(value || '')
.toLowerCase()
.replace(/\+/g, ' plus ') // preserve "+" so "WALMART+" matches catalog "Walmart+" → "walmart plus"
.replace(/[^a-z0-9\s]/g, ' ')
.replace(/\b(pos|debit|card|payment|purchase|recurring|online|inc|llc|co|www)\b/g, ' ')
.replace(/\s+/g, ' ')
.trim();
}
function titleCase(value) {
return String(value || 'Subscription')
.split(/\s+/)
.filter(Boolean)
.map(word => word.charAt(0).toUpperCase() + word.slice(1))
.join(' ');
}
function inferType(merchantText, catalogEntry, catalogTypeMap = null) {
if (catalogEntry?.subscription_type) return catalogEntry.subscription_type;
const haystack = normalizeMerchant(merchantText);
if (catalogTypeMap) {
for (const [nameKey, type] of catalogTypeMap.entries()) {
if (haystack.includes(nameKey)) return type;
}
}
for (const [type, words] of TYPE_KEYWORDS) {
if (words.some(word => haystack.includes(word))) return type;
}
return 'other';
}
function catalogMatchPayload(catalogEntry) {
return catalogEntry ? {
id: catalogEntry.id,
name: catalogEntry.name,
category: catalogEntry.category,
subscription_type: catalogEntry.subscription_type || 'other',
website: catalogEntry.website || null,
} : null;
}
function monthlyEquivalent(amount, cycleType, billingCycle) {
const key = String(cycleType || billingCycle || 'monthly').toLowerCase();
const fallback = String(billingCycle || '').toLowerCase() === 'quarterly'
? 'quarterly'
: String(billingCycle || '').toLowerCase() === 'annually'
? 'annual'
: key;
const factor = MONTHLY_FACTORS[key] ?? MONTHLY_FACTORS[fallback] ?? 1;
return Math.round(Number(amount || 0) * factor * 100) / 100;
}
function nextDueDate(bill, now = new Date()) {
const dueDay = Math.min(Math.max(Number(bill.due_day) || 1, 1), 31);
const cycle = String(bill.cycle_type || bill.billing_cycle || 'monthly').toLowerCase();
let date = new Date(now.getFullYear(), now.getMonth(), dueDay);
if (date < new Date(now.getFullYear(), now.getMonth(), now.getDate())) {
date = new Date(now.getFullYear(), now.getMonth() + 1, dueDay);
}
if (cycle === 'quarterly' || cycle === 'annual') {
const startMonth = Math.min(Math.max(Number(bill.cycle_day) || 1, 1), 12) - 1;
const step = cycle === 'quarterly' ? 3 : 12;
date = new Date(now.getFullYear(), startMonth, dueDay);
while (date < new Date(now.getFullYear(), now.getMonth(), now.getDate())) {
date = new Date(date.getFullYear(), date.getMonth() + step, dueDay);
}
}
return date.toISOString().slice(0, 10);
}
function decorateSubscription(bill) {
const monthly = monthlyEquivalent(bill.expected_amount, bill.cycle_type, bill.billing_cycle);
return {
...bill,
is_subscription: !!bill.is_subscription,
active: !!bill.active,
monthly_equivalent: monthly,
yearly_equivalent: Math.round(monthly * 12 * 100) / 100,
next_due_date: nextDueDate(bill),
subscription_type: bill.subscription_type || inferType(`${bill.name} ${bill.category_name || ''}`, null),
};
}
function getSubscriptions(db, userId) {
return db.prepare(`
SELECT b.*, c.name AS category_name,
CASE WHEN EXISTS(
SELECT 1 FROM bill_merchant_rules WHERE bill_id = b.id AND user_id = b.user_id
) THEN 1 ELSE 0 END AS has_merchant_rule
FROM bills b
LEFT JOIN categories c ON c.id = b.category_id AND c.user_id = b.user_id AND c.deleted_at IS NULL
WHERE b.user_id = ?
AND b.deleted_at IS NULL
AND b.is_subscription = 1
ORDER BY b.active DESC, b.due_day ASC, b.name COLLATE NOCASE ASC
`).all(userId).map(decorateSubscription);
}
function getSubscriptionSummary(subscriptions) {
const active = subscriptions.filter(item => item.active);
const monthlyTotal = active.reduce((sum, item) => sum + Number(item.monthly_equivalent || 0), 0);
const typeTotals = new Map();
for (const item of active) {
const type = item.subscription_type || 'other';
typeTotals.set(type, (typeTotals.get(type) || 0) + Number(item.monthly_equivalent || 0));
}
const topType = [...typeTotals.entries()].sort((a, b) => b[1] - a[1])[0] || null;
return {
active_count: active.length,
paused_count: subscriptions.length - active.length,
monthly_total: Math.round(monthlyTotal * 100) / 100,
yearly_total: Math.round(monthlyTotal * 12 * 100) / 100,
top_type: topType ? { type: topType[0], monthly_total: Math.round(topType[1] * 100) / 100 } : null,
};
}
function existingBillNames(db, userId) {
return db.prepare('SELECT name FROM bills WHERE user_id = ? AND deleted_at IS NULL')
.all(userId)
.map(row => normalizeMerchant(row.name))
.filter(Boolean);
}
function dollarsFromTransactionAmount(amount) {
return Math.round((Math.abs(Number(amount || 0)) / 100) * 100) / 100;
}
function billingCycleForCycleType(cycleType) {
if (cycleType === 'quarterly') return 'quarterly';
if (cycleType === 'annual') return 'annually';
if (cycleType === 'monthly') return 'monthly';
return 'irregular';
}
// ── Decline store ─────────────────────────────────────────────────────────────
function getDeclinedKeys(db, userId) {
try {
const rows = db.prepare('SELECT decline_key FROM declined_subscription_hints WHERE user_id = ?').all(userId);
return new Set(rows.map(r => r.decline_key));
} catch {
return new Set();
}
}
function declineRecommendation(db, userId, declineKey) {
db.prepare(`
INSERT INTO declined_subscription_hints (user_id, decline_key)
VALUES (?, ?)
ON CONFLICT(user_id, decline_key) DO NOTHING
`).run(userId, declineKey);
}
// ── Recommendations ───────────────────────────────────────────────────────────
function getSubscriptionRecommendations(db, userId) {
const catalog = loadCatalog(db);
const catalogTypeMap = buildCatalogTypeMap(catalog);
const existingNames = existingBillNames(db, userId);
const declined = getDeclinedKeys(db, userId);
const rows = db.prepare(`
SELECT
t.id, t.amount, t.currency, t.description, t.payee, t.memo, t.category,
COALESCE(t.posted_date, substr(t.transacted_at, 1, 10)) AS tx_date,
ds.provider AS data_source_provider,
ds.name AS data_source_name
FROM transactions t
LEFT JOIN data_sources ds ON ds.id = t.data_source_id AND ds.user_id = t.user_id
WHERE t.user_id = ?
AND t.ignored = 0
AND t.match_status = 'unmatched'
AND t.amount < 0
AND COALESCE(t.posted_date, substr(t.transacted_at, 1, 10)) >= date('now', '-420 days')
ORDER BY tx_date ASC
`).all(userId);
// Group by merchant + amount bucket — consistent amounts are the foundation of
// subscription detection. Catalog lookup names the service and boosts confidence
// but does not change the grouping; deduplication at the end ensures one entry
// per known service.
const groups = new Map();
for (const tx of rows) {
const merchant = normalizeMerchant(tx.payee || tx.description || tx.memo);
if (!merchant || merchant.length < 3) continue;
if (SKIP_MERCHANT_RE.test(merchant)) continue;
const amount = dollarsFromTransactionAmount(tx.amount);
if (amount < 1) continue;
const key = `${merchant}:${Math.round(amount)}`;
if (!groups.has(key)) {
groups.set(key, { merchant, items: [], catalogEntry: null });
}
const group = groups.get(key);
group.items.push({ ...tx, amount_dollars: amount });
if (!group.catalogEntry) group.catalogEntry = lookupCatalog(catalog, merchant);
}
const recommendations = [];
for (const group of groups.values()) {
const { merchant, catalogEntry } = group;
const declineKey = catalogEntry ? `catalog:${catalogEntry.id}` : `merchant:${merchant}`;
if (declined.has(declineKey)) continue;
if (existingNames.some(n => n.includes(merchant) || merchant.includes(n))) continue;
const sorted = group.items
.filter(item => item.tx_date)
.sort((a, b) => String(a.tx_date).localeCompare(String(b.tx_date)));
if (sorted.length === 0) continue;
const averageAmount = sorted.reduce((sum, item) => sum + item.amount_dollars, 0) / sorted.length;
const maxDelta = sorted.length > 1
? Math.max(...sorted.map(item => Math.abs(item.amount_dollars - averageAmount)))
: 0;
const last = sorted[sorted.length - 1];
// Tier 1: catalog match with 1 occurrence
if (catalogEntry && sorted.length === 1) {
recommendations.push(buildRecommendation({
merchant, catalogEntry, sorted, averageAmount, maxDelta, last,
cycleType: 'monthly', avgGap: 30, confidence: 90, tier: 'known_service', declineKey, catalogTypeMap,
}));
continue;
}
if (sorted.length < 2) continue;
const gaps = [];
for (let i = 1; i < sorted.length; i++) {
gaps.push(Math.round(
(new Date(`${sorted[i].tx_date}T00:00:00`) - new Date(`${sorted[i - 1].tx_date}T00:00:00`)) / 86400000
));
}
const avgGap = gaps.reduce((sum, g) => sum + g, 0) / gaps.length;
const cycleType = avgGap >= 320 ? 'annual'
: avgGap >= 75 ? 'quarterly'
: avgGap >= 10 && avgGap <= 18 ? 'biweekly'
: avgGap <= 9 ? 'weekly'
: 'monthly';
if (cycleType === 'monthly' && (avgGap < 24 || avgGap > 38)) continue;
if (cycleType === 'quarterly' && (avgGap < 75 || avgGap > 105)) continue;
if (cycleType === 'weekly') continue;
if (maxDelta > Math.max(3, averageAmount * 0.18)) continue;
let confidence;
if (catalogEntry) {
confidence = Math.min(99, 68 + sorted.length * 8 + (maxDelta <= 1 ? 8 : 0));
} else {
confidence = Math.min(96, 58 + sorted.length * 9 + (maxDelta <= 1 ? 10 : 0));
}
const tier = catalogEntry ? 'confirmed' : 'pattern';
recommendations.push(buildRecommendation({
merchant, catalogEntry, sorted, averageAmount, maxDelta, last,
cycleType, avgGap, confidence, tier, declineKey, catalogTypeMap,
}));
}
// Deduplicate by catalog entry — if multiple amount buckets matched the same
// known service, keep only the highest-confidence one.
const seen = new Map();
const deduped = [];
for (const rec of recommendations.sort((a, b) => b.confidence - a.confidence || b.occurrence_count - a.occurrence_count)) {
const key = rec.catalog_match ? `catalog:${rec.catalog_match.id}` : `merchant:${rec.merchant}`;
if (!seen.has(key)) {
seen.set(key, true);
deduped.push(rec);
}
}
return deduped.slice(0, 20);
}
function buildRecommendation({ merchant, catalogEntry, sorted, averageAmount, maxDelta, last, cycleType, avgGap, confidence, tier, declineKey, catalogTypeMap }) {
const name = catalogEntry ? catalogEntry.name : titleCase(merchant);
const subscriptionType = inferType(merchant, catalogEntry, catalogTypeMap);
const reasons = [];
if (catalogEntry) reasons.push(`Matches known service: ${catalogEntry.name}`);
if (sorted.length > 1) reasons.push(`${sorted.length} similar charges`);
if (sorted.length > 1) reasons.push(`About ${Math.round(avgGap)} days apart`);
reasons.push(`${last.currency || 'USD'} ${averageAmount.toFixed(2)} average`);
return {
id: Buffer.from(`${merchant}:${Math.round(averageAmount)}:${last.tx_date}`).toString('base64url'),
name,
subscription_type: subscriptionType,
expected_amount: Math.round(averageAmount * 100) / 100,
monthly_equivalent: monthlyEquivalent(averageAmount, cycleType, cycleType),
cycle_type: cycleType,
billing_cycle: billingCycleForCycleType(cycleType),
due_day: Number(String(last.tx_date).slice(8, 10)) || 1,
last_seen_date: last.tx_date,
occurrence_count: sorted.length,
confidence,
tier,
catalog_match: catalogMatchPayload(catalogEntry),
transaction_ids: sorted.map(item => item.id),
merchant,
decline_key: declineKey,
source: last.data_source_name || 'Transaction history',
reasons,
};
}
function searchSubscriptionTransactions(db, userId, query = {}) {
const q = String(query.q || '').trim();
if (q.length < 2) return [];
const limit = Math.max(1, Math.min(parseInt(query.limit || '50', 10) || 50, 100));
const like = `%${q}%`;
const catalog = loadCatalog(db);
const rows = db.prepare(`
SELECT
t.id, t.user_id, t.data_source_id, t.account_id, t.provider_transaction_id,
t.source_type, t.transaction_type, t.posted_date, t.transacted_at, t.amount,
t.currency, t.description, t.payee, t.memo, t.category, t.matched_bill_id,
t.match_status, t.ignored, t.created_at, t.updated_at,
ds.type AS data_source_type, ds.provider AS data_source_provider,
ds.name AS data_source_name, ds.status AS data_source_status,
fa.name AS account_name, fa.org_name AS account_org_name,
fa.account_type AS account_type,
b.name AS matched_bill_name
FROM transactions t
LEFT JOIN data_sources ds ON ds.id = t.data_source_id AND ds.user_id = t.user_id
LEFT JOIN financial_accounts fa ON fa.id = t.account_id AND fa.user_id = t.user_id
LEFT JOIN bills b ON b.id = t.matched_bill_id AND b.user_id = t.user_id AND b.deleted_at IS NULL
WHERE t.user_id = ?
AND t.ignored = 0
AND t.amount < 0
AND (t.description LIKE ? OR t.payee LIKE ? OR t.memo LIKE ? OR t.category LIKE ?)
ORDER BY
CASE WHEN t.match_status = 'unmatched' THEN 0 ELSE 1 END,
COALESCE(t.posted_date, substr(t.transacted_at, 1, 10), t.created_at) DESC,
t.id DESC
LIMIT ?
`).all(userId, like, like, like, like, limit);
return rows.map(row => {
const merchant = normalizeMerchant(row.payee || row.description || row.memo);
const catalogEntry = lookupCatalog(catalog, merchant);
return {
...row,
amount_dollars: dollarsFromTransactionAmount(row.amount),
merchant,
is_known_subscription: !!catalogEntry,
catalog_match: catalogMatchPayload(catalogEntry),
};
}).sort((a, b) => Number(b.is_known_subscription) - Number(a.is_known_subscription));
}
function createSubscriptionFromRecommendation(db, userId, payload = {}) {
const seenDate = payload.last_seen_date || new Date().toISOString().slice(0, 10);
const source = payload.catalog_match
? 'catalog_match'
: 'simplefin_recommendation';
const draft = {
name: payload.name,
category_id: payload.category_id || null,
due_day: payload.due_day,
expected_amount: payload.expected_amount,
billing_cycle: billingCycleForCycleType(payload.cycle_type || 'monthly'),
cycle_type: payload.cycle_type || 'monthly',
cycle_day: (payload.cycle_type === 'annual' || payload.cycle_type === 'quarterly')
? String(new Date(`${seenDate}T00:00:00`).getMonth() + 1)
: (payload.cycle_type === 'weekly' || payload.cycle_type === 'biweekly')
? 'monday'
: String(payload.due_day || 1),
is_subscription: 1,
subscription_type: SUBSCRIPTION_TYPES.includes(payload.subscription_type) ? payload.subscription_type : 'other',
reminder_days_before: 3,
subscription_source: source,
subscription_detected_at: new Date().toISOString(),
notes: payload.merchant ? `Detected from recurring merchant: ${payload.merchant}` : null,
};
const validation = validateBillData(draft);
if (validation.errors.length > 0) {
const err = new Error(validation.errors[0].message);
err.field = validation.errors[0].field;
err.status = 400;
throw err;
}
const created = insertBill(db, userId, validation.normalized);
const ids = Array.isArray(payload.transaction_ids)
? payload.transaction_ids.map(id => Number(id)).filter(Number.isInteger).slice(0, 50)
: [];
if (ids.length > 0) {
const placeholders = ids.map(() => '?').join(',');
const txRows = db.prepare(`
SELECT id, amount, posted_date, transacted_at
FROM transactions
WHERE user_id = ? AND id IN (${placeholders}) AND ignored = 0
`).all(userId, ...ids);
const updateTx = db.prepare(`
UPDATE transactions
SET matched_bill_id = ?, match_status = 'matched', updated_at = CURRENT_TIMESTAMP
WHERE id = ? AND user_id = ? AND ignored = 0 AND match_status != 'matched'
`);
const insertPayment = db.prepare(`
INSERT OR IGNORE INTO payments (bill_id, amount, paid_date, payment_source, transaction_id)
VALUES (?, ?, ?, 'auto_match', ?)
`);
db.transaction(() => {
for (const tx of txRows) {
const paidDate = tx.posted_date || (tx.transacted_at ? String(tx.transacted_at).slice(0, 10) : null);
const amount = Math.round(Math.abs(tx.amount)) / 100;
updateTx.run(created.id, tx.id, userId);
if (paidDate) insertPayment.run(created.id, amount, paidDate, tx.id);
}
})();
}
return decorateSubscription(created);
}
module.exports = {
SUBSCRIPTION_TYPES,
createSubscriptionFromRecommendation,
declineRecommendation,
decorateSubscription,
getSubscriptionRecommendations,
getSubscriptionSummary,
getSubscriptions,
lookupCatalog,
loadCatalog,
monthlyEquivalent,
normalizeMerchant,
searchSubscriptionTransactions,
};