feat: subscription catalog migration, 200-row seed, improved detection

db/database.js:
- Added monitored to COLUMN_WHITELIST
- runSubscriptionCatalogMigration() creates table + seeds 200 rows
- Migration v0.65 in both legacy reconciliation and main migrations

services/subscriptionService.js:
- SUBSCRIPTION_TYPES expanded 10→14 (food, education, shopping, security)
- TYPE_KEYWORDS updated with 30 new keywords across categories
- loadCatalog() loads 200 entries per recommendation call, graceful [] on old DBs
- lookupCatalog() longest-match wins, handles embedded domains
- inferType() catalog hit takes priority over keyword guessing
- Two-tier detection: catalog 1-hit → possible (62), 2+ → pattern/confirmed with boost (68-99)
- Canonical names from catalog, type auto-filled
- buildRecommendation() extracted as shared helper with tier + catalog_match fields
- createSubscriptionFromRecommendation sets subscription_source to catalog_match
This commit is contained in:
null 2026-05-29 01:51:42 -05:00
parent 7682758aa8
commit 820fedd58e
3 changed files with 432 additions and 63 deletions

View File

@ -52,6 +52,8 @@ const COLUMN_WHITELIST = new Set([
'subscription_source', 'subscription_detected_at', 'deleted_at', 'subscription_source', 'subscription_detected_at', 'deleted_at',
// sessions table columns // sessions table columns
'created_at', 'created_at',
// financial_accounts table columns
'monitored',
]); ]);
// Security validation function for column names // Security validation function for column names
@ -69,6 +71,240 @@ function isValidSqlDefinition(def) {
return /^[\w\s\(\)\',!@#$%^&*+=\[\]<>\-.]+$/i.test(def); return /^[\w\s\(\)\',!@#$%^&*+=\[\]<>\-.]+$/i.test(def);
} }
// ── Subscription catalog seed ─────────────────────────────────────────────────
// [rank, name, category, subscription_type, website, domain]
const SUBSCRIPTION_CATALOG_ROWS = [
[1,'Netflix','Video Streaming','streaming','https://www.netflix.com/','netflix.com'],
[2,'Amazon Prime Video','Video Streaming','streaming','https://www.primevideo.com/','primevideo.com'],
[3,'Hulu','Video Streaming','streaming','https://www.hulu.com/','hulu.com'],
[4,'Disney+','Video Streaming','streaming','https://www.disneyplus.com/','disneyplus.com'],
[5,'Max','Video Streaming','streaming','https://www.max.com/','max.com'],
[6,'Peacock','Video Streaming','streaming','https://www.peacocktv.com/','peacocktv.com'],
[7,'Paramount+','Video Streaming','streaming','https://www.paramountplus.com/','paramountplus.com'],
[8,'Apple TV+','Video Streaming','streaming','https://tv.apple.com/','tv.apple.com'],
[9,'YouTube Premium','Video Streaming','streaming','https://www.youtube.com/premium','youtube.com'],
[10,'ESPN+','Sports Streaming','streaming','https://plus.espn.com/','plus.espn.com'],
[11,'YouTube TV','Live TV Streaming','streaming','https://tv.youtube.com/','tv.youtube.com'],
[12,'Sling TV','Live TV Streaming','streaming','https://www.sling.com/','sling.com'],
[13,'Fubo','Live TV Streaming','streaming','https://www.fubo.tv/','fubo.tv'],
[14,'DirecTV Stream','Live TV Streaming','streaming','https://streamtv.directv.com/','streamtv.directv.com'],
[15,'Philo','Live TV Streaming','streaming','https://www.philo.com/','philo.com'],
[16,'Starz','Video Streaming','streaming','https://www.starz.com/','starz.com'],
[17,'MGM+','Video Streaming','streaming','https://www.mgmplus.com/','mgmplus.com'],
[18,'AMC+','Video Streaming','streaming','https://www.amcplus.com/','amcplus.com'],
[19,'BET+','Video Streaming','streaming','https://www.bet.plus/','bet.plus'],
[20,'Crunchyroll','Video Streaming','streaming','https://www.crunchyroll.com/','crunchyroll.com'],
[21,'HIDIVE','Video Streaming','streaming','https://www.hidive.com/','hidive.com'],
[22,'Shudder','Video Streaming','streaming','https://www.shudder.com/','shudder.com'],
[23,'Acorn TV','Video Streaming','streaming','https://acorn.tv/','acorn.tv'],
[24,'BritBox','Video Streaming','streaming','https://www.britbox.com/','britbox.com'],
[25,'The Criterion Channel','Video Streaming','streaming','https://www.criterionchannel.com/','criterionchannel.com'],
[26,'MUBI','Video Streaming','streaming','https://mubi.com/','mubi.com'],
[27,'Discovery+','Video Streaming','streaming','https://www.discoveryplus.com/','discoveryplus.com'],
[28,'Hallmark+','Video Streaming','streaming','https://www.hallmarkplus.com/','hallmarkplus.com'],
[29,'PBS Passport','Video Streaming','streaming','https://www.pbs.org/passport/','pbs.org'],
[30,'MagellanTV','Video Streaming','streaming','https://www.magellantv.com/','magellantv.com'],
[31,'Curiosity Stream','Video Streaming','streaming','https://curiositystream.com/','curiositystream.com'],
[32,'Nebula','Video Streaming','streaming','https://nebula.tv/','nebula.tv'],
[33,'WOW Presents Plus','Video Streaming','streaming','https://www.wowpresentsplus.com/','wowpresentsplus.com'],
[34,'ViX Premium','Video Streaming','streaming','https://vix.com/','vix.com'],
[35,'FloSports','Sports Streaming','streaming','https://www.flosports.tv/','flosports.tv'],
[36,'DAZN','Sports Streaming','streaming','https://www.dazn.com/','dazn.com'],
[37,'MLB.TV','Sports Streaming','streaming','https://www.mlb.com/live-stream-games/subscribe','mlb.com'],
[38,'NBA League Pass','Sports Streaming','streaming','https://www.nba.com/watch/league-pass-stream','nba.com'],
[39,'NHL Power Play on ESPN+','Sports Streaming','streaming','https://www.espn.com/espnplus/catalog/nhl','espn.com'],
[40,'NFL+','Sports Streaming','streaming','https://www.nfl.com/plus/','nfl.com'],
[41,'Spotify Premium','Music & Audio','music','https://www.spotify.com/premium/','spotify.com'],
[42,'Apple Music','Music & Audio','music','https://www.apple.com/apple-music/','apple.com'],
[43,'Amazon Music Unlimited','Music & Audio','music','https://music.amazon.com/','music.amazon.com'],
[44,'Pandora Premium','Music & Audio','music','https://www.pandora.com/upgrade','pandora.com'],
[45,'SiriusXM','Music & Audio','music','https://www.siriusxm.com/','siriusxm.com'],
[46,'TIDAL','Music & Audio','music','https://tidal.com/','tidal.com'],
[47,'Qobuz','Music & Audio','music','https://www.qobuz.com/us-en/music/streaming/offers','qobuz.com'],
[48,'SoundCloud Go+','Music & Audio','music','https://soundcloud.com/go','soundcloud.com'],
[49,'Deezer','Music & Audio','music','https://www.deezer.com/us/offers','deezer.com'],
[50,'iHeartRadio Plus','Music & Audio','music','https://www.iheart.com/plus/','iheart.com'],
[51,'Audible','Audiobooks','education','https://www.audible.com/','audible.com'],
[52,'Spotify Audiobooks','Audiobooks','education','https://www.spotify.com/us/audiobooks/','spotify.com'],
[53,'Everand','Audiobooks & Ebooks','education','https://www.everand.com/','everand.com'],
[54,'Scribd','Documents & Ebooks','education','https://www.scribd.com/','scribd.com'],
[55,'Kindle Unlimited','Ebooks','education','https://www.amazon.com/kindle-dbs/hz/subscribe/ku','amazon.com'],
[56,'Kobo Plus','Ebooks & Audiobooks','education','https://www.kobo.com/us/en/plus','kobo.com'],
[57,'Libro.fm','Audiobooks','education','https://libro.fm/','libro.fm'],
[58,'Blinkist','Books & Learning','education','https://www.blinkist.com/','blinkist.com'],
[59,'Pocket Casts Plus','Podcasts','music','https://pocketcasts.com/plus/','pocketcasts.com'],
[60,'Wondery+','Podcasts','music','https://wondery.com/plus/','wondery.com'],
[61,'The New York Times','News & Magazines','news','https://www.nytimes.com/subscription','nytimes.com'],
[62,'The Wall Street Journal','News & Magazines','news','https://www.wsj.com/news/subscribe','wsj.com'],
[63,'The Washington Post','News & Magazines','news','https://subscribe.washingtonpost.com/','subscribe.washingtonpost.com'],
[64,'The Atlantic','News & Magazines','news','https://www.theatlantic.com/subscribe/','theatlantic.com'],
[65,'The New Yorker','News & Magazines','news','https://www.newyorker.com/subscribe','newyorker.com'],
[66,'Bloomberg.com','News & Magazines','news','https://www.bloomberg.com/subscriptions','bloomberg.com'],
[67,'Financial Times','News & Magazines','news','https://www.ft.com/products','ft.com'],
[68,'The Economist','News & Magazines','news','https://www.economist.com/subscribe','economist.com'],
[69,'TIME','News & Magazines','news','https://time.com/subscribe/','time.com'],
[70,'WIRED','News & Magazines','news','https://www.wired.com/subscribe/','wired.com'],
[71,'Consumer Reports','News & Magazines','news','https://www.consumerreports.org/join/','consumerreports.org'],
[72,'Politico Pro','News & Magazines','news','https://www.politicopro.com/','politicopro.com'],
[73,'The Athletic','Sports Media','streaming','https://theathletic.com/','theathletic.com'],
[74,'Substack','Creator Media','news','https://substack.com/','substack.com'],
[75,'Medium','Creator Media','news','https://medium.com/membership','medium.com'],
[76,'Patreon','Creator Media','news','https://www.patreon.com/','patreon.com'],
[77,'Apple News+','News & Magazines','news','https://www.apple.com/apple-news/','apple.com'],
[78,'Readly','News & Magazines','news','https://us.readly.com/','us.readly.com'],
[79,'PressReader','News & Magazines','news','https://www.pressreader.com/','pressreader.com'],
[80,'The Information','News & Magazines','news','https://www.theinformation.com/subscribe','theinformation.com'],
[81,'Microsoft 365','Software & Productivity','software','https://www.microsoft.com/microsoft-365','microsoft.com'],
[82,'Google One','Cloud & Storage','cloud','https://one.google.com/','one.google.com'],
[83,'iCloud+','Cloud & Storage','cloud','https://www.apple.com/icloud/','apple.com'],
[84,'Dropbox','Cloud & Storage','cloud','https://www.dropbox.com/plans','dropbox.com'],
[85,'Box','Cloud & Storage','cloud','https://www.box.com/pricing','box.com'],
[86,'Adobe Creative Cloud','Software & Design','software','https://www.adobe.com/creativecloud.html','adobe.com'],
[87,'Canva Pro','Software & Design','software','https://www.canva.com/pro/','canva.com'],
[88,'Figma','Software & Design','software','https://www.figma.com/pricing/','figma.com'],
[89,'Notion','Software & Productivity','software','https://www.notion.so/pricing','notion.so'],
[90,'Evernote','Software & Productivity','software','https://evernote.com/compare-plans','evernote.com'],
[91,'Todoist','Software & Productivity','software','https://todoist.com/pricing','todoist.com'],
[92,'Grammarly','Writing & AI','software','https://www.grammarly.com/plans','grammarly.com'],
[93,'ChatGPT','AI','software','https://chatgpt.com/pricing','chatgpt.com'],
[94,'Claude','AI','software','https://claude.ai/upgrade','claude.ai'],
[95,'Perplexity','AI','software','https://www.perplexity.ai/pro','perplexity.ai'],
[96,'Gemini Advanced','AI','software','https://one.google.com/about/google-ai-plans/','one.google.com'],
[97,'GitHub Copilot','Developer Tools','software','https://github.com/features/copilot/plans','github.com'],
[98,'Cursor','Developer Tools','software','https://www.cursor.com/pricing','cursor.com'],
[99,'Replit','Developer Tools','software','https://replit.com/pricing','replit.com'],
[100,'Setapp','Software & Productivity','software','https://setapp.com/','setapp.com'],
[101,'1Password','Security','security','https://1password.com/pricing','1password.com'],
[102,'Dashlane','Security','security','https://www.dashlane.com/pricing','dashlane.com'],
[103,'NordVPN','Security','security','https://nordvpn.com/pricing/','nordvpn.com'],
[104,'ExpressVPN','Security','security','https://www.expressvpn.com/','expressvpn.com'],
[105,'Surfshark','Security','security','https://surfshark.com/pricing','surfshark.com'],
[106,'Norton 360','Security','security','https://us.norton.com/products','us.norton.com'],
[107,'McAfee+','Security','security','https://www.mcafee.com/en-us/consumer-support/pricing.html','mcafee.com'],
[108,'QuickBooks Online','Finance Software','software','https://quickbooks.intuit.com/pricing/','quickbooks.intuit.com'],
[109,'TurboTax Live','Finance Software','software','https://turbotax.intuit.com/personal-taxes/online/live/','turbotax.intuit.com'],
[110,'YNAB','Finance Software','software','https://www.ynab.com/pricing','ynab.com'],
[111,'Rocket Money Premium','Finance Software','software','https://www.rocketmoney.com/premium','rocketmoney.com'],
[112,'Copilot Money','Finance Software','software','https://copilot.money/','copilot.money'],
[113,'Calendly','Software & Productivity','software','https://calendly.com/pricing','calendly.com'],
[114,'Zoom Workplace','Software & Productivity','software','https://www.zoom.com/en/pricing/','zoom.com'],
[115,'Slack','Software & Productivity','software','https://slack.com/pricing','slack.com'],
[116,'Xbox Game Pass','Gaming','gaming','https://www.xbox.com/en-US/xbox-game-pass','xbox.com'],
[117,'PlayStation Plus','Gaming','gaming','https://www.playstation.com/en-us/ps-plus/','playstation.com'],
[118,'Nintendo Switch Online','Gaming','gaming','https://www.nintendo.com/us/switch/online/','nintendo.com'],
[119,'Apple Arcade','Gaming','gaming','https://www.apple.com/apple-arcade/','apple.com'],
[120,'EA Play','Gaming','gaming','https://www.ea.com/ea-play','ea.com'],
[121,'Ubisoft+','Gaming','gaming','https://www.ubisoft.com/en-us/ubisoft-plus','ubisoft.com'],
[122,'NVIDIA GeForce NOW','Gaming','gaming','https://www.nvidia.com/en-us/geforce-now/memberships/','nvidia.com'],
[123,'Roblox Premium','Gaming','gaming','https://www.roblox.com/premium/membership','roblox.com'],
[124,'Fortnite Crew','Gaming','gaming','https://www.fortnite.com/fortnite-crew-subscription','fortnite.com'],
[125,'Minecraft Realms','Gaming','gaming','https://www.minecraft.net/realms','minecraft.net'],
[126,'Twitch Turbo','Creator & Social','news','https://www.twitch.tv/turbo','twitch.tv'],
[127,'Discord Nitro','Creator & Social','news','https://discord.com/nitro','discord.com'],
[128,'X Premium','Creator & Social','news','https://help.x.com/en/using-x/x-premium','help.x.com'],
[129,'Snapchat+','Creator & Social','news','https://www.snapchat.com/plus','snapchat.com'],
[130,'TikTok Live Subscription','Creator & Social','news','https://www.tiktok.com/live/creators/en-US/subscription/','tiktok.com'],
[131,'Meta Verified','Creator & Social','news','https://about.meta.com/technologies/meta-verified/','about.meta.com'],
[132,'LinkedIn Premium','Career & Social','news','https://premium.linkedin.com/','premium.linkedin.com'],
[133,'Tinder Gold','Dating','other','https://tinder.com/feature/plus','tinder.com'],
[134,'Bumble Premium','Dating','other','https://bumble.com/en/the-buzz/bumble-premium','bumble.com'],
[135,'Hinge+','Dating','other','https://hinge.co/hinge-plus','hinge.co'],
[136,'Amazon Prime','Shopping & Delivery','shopping','https://www.amazon.com/amazonprime','amazon.com'],
[137,'Walmart+','Shopping & Delivery','shopping','https://www.walmart.com/plus','walmart.com'],
[138,'Target Circle 360','Shopping & Delivery','shopping','https://www.target.com/circle/target-circle-360','target.com'],
[139,'Costco','Warehouse Clubs','shopping','https://www.costco.com/join-costco.html','costco.com'],
[140,'Sam\'s Club','Warehouse Clubs','shopping','https://www.samsclub.com/join','samsclub.com'],
[141,'BJ\'s Wholesale Club','Warehouse Clubs','shopping','https://www.bjs.com/membership','bjs.com'],
[142,'Instacart+','Grocery & Delivery','food','https://www.instacart.com/instacart-plus','instacart.com'],
[143,'DoorDash DashPass','Food Delivery','food','https://www.doordash.com/dashpass/','doordash.com'],
[144,'Uber One','Food & Rides','food','https://www.uber.com/us/en/u/uber-one/','uber.com'],
[145,'Grubhub+','Food Delivery','food','https://www.grubhub.com/plus','grubhub.com'],
[146,'Shipt','Grocery & Delivery','food','https://www.shipt.com/membership/','shipt.com'],
[147,'Kroger Boost','Grocery & Delivery','food','https://www.kroger.com/pr/boost','kroger.com'],
[148,'Thrive Market','Grocery & Delivery','food','https://thrivemarket.com/','thrivemarket.com'],
[149,'Misfits Market','Grocery & Delivery','food','https://www.misfitsmarket.com/','misfitsmarket.com'],
[150,'Imperfect Foods','Grocery & Delivery','food','https://www.imperfectfoods.com/','imperfectfoods.com'],
[151,'Chewy Autoship','Pet Retail','shopping','https://www.chewy.com/app/content/autoship','chewy.com'],
[152,'Petco Vital Care Premier','Pet Retail','shopping','https://www.petco.com/shop/en/petcostore/c/vitalcare','petco.com'],
[153,'PetSmart Treats Rewards VIPP','Pet Retail','shopping','https://www.petsmart.com/treats-rewards-vipp.html','petsmart.com'],
[154,'GameStop Pro','Retail Memberships','shopping','https://www.gamestop.com/pro/','gamestop.com'],
[155,'Barnes & Noble Membership','Retail Memberships','shopping','https://www.barnesandnoble.com/membership','barnesandnoble.com'],
[156,'HelloFresh','Food & Meal Kits','food','https://www.hellofresh.com/','hellofresh.com'],
[157,'Blue Apron','Food & Meal Kits','food','https://www.blueapron.com/','blueapron.com'],
[158,'Home Chef','Food & Meal Kits','food','https://www.homechef.com/','homechef.com'],
[159,'Marley Spoon','Food & Meal Kits','food','https://marleyspoon.com/','marleyspoon.com'],
[160,'Dinnerly','Food & Meal Kits','food','https://dinnerly.com/','dinnerly.com'],
[161,'EveryPlate','Food & Meal Kits','food','https://www.everyplate.com/','everyplate.com'],
[162,'Green Chef','Food & Meal Kits','food','https://www.greenchef.com/','greenchef.com'],
[163,'Purple Carrot','Food & Meal Kits','food','https://www.purplecarrot.com/','purplecarrot.com'],
[164,'Sunbasket','Food & Meal Kits','food','https://sunbasket.com/','sunbasket.com'],
[165,'Factor','Prepared Meals','food','https://www.factor75.com/','factor75.com'],
[166,'CookUnity','Prepared Meals','food','https://www.cookunity.com/','cookunity.com'],
[167,'Fresh N Lean','Prepared Meals','food','https://www.freshnlean.com/','freshnlean.com'],
[168,'Hungryroot','Food & Meal Kits','food','https://www.hungryroot.com/','hungryroot.com'],
[169,'Daily Harvest','Prepared Meals','food','https://www.daily-harvest.com/','daily-harvest.com'],
[170,'Tovala','Prepared Meals','food','https://www.tovala.com/','tovala.com'],
[171,'MistoBox','Coffee & Tea','food','https://mistobox.com/','mistobox.com'],
[172,'Trade Coffee','Coffee & Tea','food','https://www.drinktrade.com/','drinktrade.com'],
[173,'Atlas Coffee Club','Coffee & Tea','food','https://atlascoffeeclub.com/','atlascoffeeclub.com'],
[174,'Bean Box','Coffee & Tea','food','https://beanbox.com/','beanbox.com'],
[175,'Universal Yums','Snacks','food','https://www.universalyums.com/','universalyums.com'],
[176,'Peloton App','Fitness & Wellness','fitness','https://www.onepeloton.com/app','onepeloton.com'],
[177,'ClassPass','Fitness & Wellness','fitness','https://classpass.com/','classpass.com'],
[178,'Apple Fitness+','Fitness & Wellness','fitness','https://www.apple.com/apple-fitness-plus/','apple.com'],
[179,'Strava','Fitness & Wellness','fitness','https://www.strava.com/subscribe','strava.com'],
[180,'Fitbit Premium','Fitness & Wellness','fitness','https://www.fitbit.com/global/us/products/services/premium','fitbit.com'],
[181,'MyFitnessPal Premium','Fitness & Wellness','fitness','https://www.myfitnesspal.com/premium','myfitnesspal.com'],
[182,'Noom','Fitness & Wellness','fitness','https://www.noom.com/','noom.com'],
[183,'WW','Fitness & Wellness','fitness','https://www.weightwatchers.com/us/plans','weightwatchers.com'],
[184,'Headspace','Meditation & Wellness','fitness','https://www.headspace.com/','headspace.com'],
[185,'Calm','Meditation & Wellness','fitness','https://www.calm.com/premium','calm.com'],
[186,'Sleep Cycle Premium','Sleep & Wellness','fitness','https://www.sleepcycle.com/premium/','sleepcycle.com'],
[187,'Oura Membership','Fitness & Wellness','fitness','https://ouraring.com/membership','ouraring.com'],
[188,'Whoop','Fitness & Wellness','fitness','https://www.whoop.com/us/en/membership/','whoop.com'],
[189,'Aaptiv','Fitness & Wellness','fitness','https://aaptiv.com/','aaptiv.com'],
[190,'Fitbod','Fitness & Wellness','fitness','https://fitbod.me/pricing/','fitbod.me'],
[191,'Alo Moves','Fitness & Wellness','fitness','https://www.alomoves.com/','alomoves.com'],
[192,'Obe Fitness','Fitness & Wellness','fitness','https://obefitness.com/','obefitness.com'],
[193,'Centr','Fitness & Wellness','fitness','https://centr.com/','centr.com'],
[194,'Future','Fitness & Wellness','fitness','https://www.future.co/','future.co'],
[195,'Tonal Membership','Fitness & Wellness','fitness','https://www.tonal.com/membership/','tonal.com'],
[196,'Duolingo Super','Education','education','https://www.duolingo.com/super','duolingo.com'],
[197,'MasterClass','Education','education','https://www.masterclass.com/','masterclass.com'],
[198,'Coursera Plus','Education','education','https://www.coursera.org/courseraplus','coursera.org'],
[199,'Skillshare','Education','education','https://www.skillshare.com/','skillshare.com'],
[200,'Book of the Month','Books & Subscription Boxes','education','https://www.bookofthemonth.com/','bookofthemonth.com'],
];
function runSubscriptionCatalogMigration(database) {
database.exec(`
CREATE TABLE IF NOT EXISTS subscription_catalog (
id INTEGER PRIMARY KEY AUTOINCREMENT,
rank INTEGER NOT NULL,
name TEXT NOT NULL,
category TEXT NOT NULL,
subscription_type TEXT NOT NULL,
website TEXT,
domain TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE INDEX IF NOT EXISTS idx_subscription_catalog_rank ON subscription_catalog(rank);
CREATE INDEX IF NOT EXISTS idx_subscription_catalog_type ON subscription_catalog(subscription_type);
`);
const existing = database.prepare('SELECT COUNT(*) as n FROM subscription_catalog').get();
if (existing.n === 0) {
const insert = database.prepare(
'INSERT INTO subscription_catalog (rank, name, category, subscription_type, website, domain) VALUES (?,?,?,?,?,?)'
);
const insertMany = database.transaction((rows) => {
for (const row of rows) insert.run(...row);
});
insertMany(SUBSCRIPTION_CATALOG_ROWS);
console.log(`[migration] subscription_catalog: seeded ${SUBSCRIPTION_CATALOG_ROWS.length} rows`);
}
}
function seedManualDataSources(database = db) { function seedManualDataSources(database = db) {
if (!database) return; if (!database) return;
const hasDataSources = database.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='data_sources'").get(); const hasDataSources = database.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='data_sources'").get();
@ -1100,6 +1336,16 @@ function reconcileLegacyMigrations() {
console.log('[migration] financial_accounts: monitored column added'); console.log('[migration] financial_accounts: monitored column added');
} }
} }
},
{
version: 'v0.65',
description: 'subscription_catalog: top-200 known subscription services',
check: function() {
return !!db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='subscription_catalog'").get();
},
run: function() {
runSubscriptionCatalogMigration(db);
}
} }
]; ];
@ -1875,6 +2121,14 @@ function runMigrations() {
console.log('[migration] financial_accounts: monitored column added'); console.log('[migration] financial_accounts: monitored column added');
} }
} }
},
{
version: 'v0.65',
description: 'subscription_catalog: top-200 known subscription services',
dependsOn: ['v0.64'],
run: function() {
runSubscriptionCatalogMigration(db);
}
} }
]; ];

View File

@ -1,6 +1,6 @@
{ {
"name": "bill-tracker", "name": "bill-tracker",
"version": "0.33.1", "version": "0.33.2",
"description": "Monthly bill tracking system", "description": "Monthly bill tracking system",
"main": "server.js", "main": "server.js",
"scripts": { "scripts": {

View File

@ -1,6 +1,12 @@
'use strict';
const { insertBill, validateBillData } = require('./billsService'); const { insertBill, validateBillData } = require('./billsService');
const SUBSCRIPTION_TYPES = ['streaming', 'software', 'cloud', 'music', 'news', 'fitness', 'gaming', 'utilities', 'insurance', 'other']; const SUBSCRIPTION_TYPES = [
'streaming', 'software', 'cloud', 'music', 'news',
'fitness', 'gaming', 'utilities', 'insurance',
'food', 'education', 'shopping', 'security', 'other',
];
const MONTHLY_FACTORS = { const MONTHLY_FACTORS = {
weekly: 52 / 12, weekly: 52 / 12,
@ -12,23 +18,65 @@ const MONTHLY_FACTORS = {
irregular: 1, irregular: 1,
}; };
// Fallback keyword list used when catalog lookup finds no match
const TYPE_KEYWORDS = [ const TYPE_KEYWORDS = [
['streaming', ['netflix', 'hulu', 'disney', 'max', 'paramount', 'peacock', 'youtube tv', 'sling']], ['streaming', ['netflix', 'hulu', 'disney', 'max', 'paramount', 'peacock', 'youtube tv', 'sling', 'espn', 'fubo', 'starz', 'crunchyroll', 'dazn']],
['music', ['spotify', 'apple music', 'tidal', 'pandora']], ['music', ['spotify', 'apple music', 'tidal', 'pandora', 'siriusxm', 'soundcloud', 'deezer', 'iheart']],
['software', ['adobe', 'microsoft', 'github', 'notion', 'linear', 'figma', 'canva', 'openai', 'chatgpt']], ['software', ['adobe', 'microsoft', 'github', 'notion', 'figma', 'canva', 'openai', 'chatgpt', 'grammarly', 'zoom', 'slack', 'cursor', 'ynab']],
['cloud', ['dropbox', 'icloud', 'google storage', 'backblaze', 'aws', 'cloudflare']], ['cloud', ['dropbox', 'icloud', 'google one', 'google storage', 'backblaze', 'box storage']],
['news', ['nyt', 'new york times', 'economist', 'athletic', 'washington post']], ['news', ['nyt', 'new york times', 'economist', 'athletic', 'washington post', 'wsj', 'bloomberg', 'substack', 'patreon', 'medium']],
['fitness', ['peloton', 'planet fitness', 'gym', 'fitbit']], ['fitness', ['peloton', 'planet fitness', 'gym', 'fitbit', 'strava', 'headspace', 'calm', 'noom', 'classpass', 'whoop']],
['gaming', ['xbox', 'playstation', 'steam', 'nintendo']], ['gaming', ['xbox', 'playstation', 'steam', 'nintendo', 'roblox', 'discord nitro', 'ea play', 'ubisoft']],
['utilities', ['verizon', 'at t', 'comcast', 'xfinity', 'spectrum', 'tmobile']], ['utilities', ['verizon', 'at t', 'att', 'comcast', 'xfinity', 'spectrum', 'tmobile', 't mobile']],
['insurance', ['insurance', 'geico', 'progressive', 'state farm', 'allstate']], ['insurance', ['insurance', 'geico', 'progressive', 'state farm', 'allstate']],
['food', ['hellofresh', 'blue apron', 'doordash', 'instacart', 'uber eats', 'grubhub', 'factor', 'hungryroot']],
['education', ['duolingo', 'masterclass', 'coursera', 'skillshare', 'audible', 'kindle unlimited', 'blinkist']],
['shopping', ['amazon prime', 'walmart plus', 'costco', 'target circle', 'chewy']],
['security', ['nordvpn', 'expressvpn', '1password', 'dashlane', 'norton', 'mcafee', 'surfshark']],
]; ];
// ── Catalog ───────────────────────────────────────────────────────────────────
function loadCatalog(db) {
try {
return db.prepare('SELECT id, rank, name, category, subscription_type, domain FROM subscription_catalog ORDER BY rank ASC').all();
} catch {
return [];
}
}
function normalizeCatalogName(value) {
return String(value || '').toLowerCase().replace(/[^a-z0-9]+/g, ' ').trim();
}
// Given a normalized merchant string, find the best matching catalog entry.
// Matches on service name (normalized) or domain (dot replaced with space).
function lookupCatalog(catalog, merchantText) {
if (!catalog.length || !merchantText) return null;
let best = null;
let bestLen = 0;
for (const entry of catalog) {
const nameKey = normalizeCatalogName(entry.name);
const domainKey = entry.domain ? entry.domain.replace(/\./g, ' ') : '';
if (nameKey.length >= 3 && merchantText.includes(nameKey) && nameKey.length > bestLen) {
best = entry;
bestLen = nameKey.length;
}
if (domainKey.length >= 4 && merchantText.includes(domainKey) && domainKey.length > bestLen) {
best = entry;
bestLen = domainKey.length;
}
}
return best;
}
// ── Helpers ───────────────────────────────────────────────────────────────────
function normalizeMerchant(value) { function normalizeMerchant(value) {
return String(value || '') return String(value || '')
.toLowerCase() .toLowerCase()
.replace(/[^a-z0-9\s]/g, ' ') .replace(/[^a-z0-9\s]/g, ' ')
.replace(/\b(pos|debit|card|payment|purchase|recurring|online|inc|llc|co)\b/g, ' ') .replace(/\b(pos|debit|card|payment|purchase|recurring|online|inc|llc|co|www)\b/g, ' ')
.replace(/\s+/g, ' ') .replace(/\s+/g, ' ')
.trim(); .trim();
} }
@ -41,8 +89,9 @@ function titleCase(value) {
.join(' '); .join(' ');
} }
function inferType(text) { function inferType(merchantText, catalogEntry) {
const haystack = normalizeMerchant(text); if (catalogEntry?.subscription_type) return catalogEntry.subscription_type;
const haystack = normalizeMerchant(merchantText);
for (const [type, words] of TYPE_KEYWORDS) { for (const [type, words] of TYPE_KEYWORDS) {
if (words.some(word => haystack.includes(word))) return type; if (words.some(word => haystack.includes(word))) return type;
} }
@ -54,8 +103,8 @@ function monthlyEquivalent(amount, cycleType, billingCycle) {
const fallback = String(billingCycle || '').toLowerCase() === 'quarterly' const fallback = String(billingCycle || '').toLowerCase() === 'quarterly'
? 'quarterly' ? 'quarterly'
: String(billingCycle || '').toLowerCase() === 'annually' : String(billingCycle || '').toLowerCase() === 'annually'
? 'annual' ? 'annual'
: key; : key;
const factor = MONTHLY_FACTORS[key] ?? MONTHLY_FACTORS[fallback] ?? 1; const factor = MONTHLY_FACTORS[key] ?? MONTHLY_FACTORS[fallback] ?? 1;
return Math.round(Number(amount || 0) * factor * 100) / 100; return Math.round(Number(amount || 0) * factor * 100) / 100;
} }
@ -67,7 +116,6 @@ function nextDueDate(bill, now = new Date()) {
if (date < new Date(now.getFullYear(), now.getMonth(), now.getDate())) { if (date < new Date(now.getFullYear(), now.getMonth(), now.getDate())) {
date = new Date(now.getFullYear(), now.getMonth() + 1, dueDay); date = new Date(now.getFullYear(), now.getMonth() + 1, dueDay);
} }
if (cycle === 'quarterly' || cycle === 'annual') { if (cycle === 'quarterly' || cycle === 'annual') {
const startMonth = Math.min(Math.max(Number(bill.cycle_day) || 1, 1), 12) - 1; const startMonth = Math.min(Math.max(Number(bill.cycle_day) || 1, 1), 12) - 1;
const step = cycle === 'quarterly' ? 3 : 12; const step = cycle === 'quarterly' ? 3 : 12;
@ -76,7 +124,6 @@ function nextDueDate(bill, now = new Date()) {
date = new Date(date.getFullYear(), date.getMonth() + step, dueDay); date = new Date(date.getFullYear(), date.getMonth() + step, dueDay);
} }
} }
return date.toISOString().slice(0, 10); return date.toISOString().slice(0, 10);
} }
@ -89,7 +136,7 @@ function decorateSubscription(bill) {
monthly_equivalent: monthly, monthly_equivalent: monthly,
yearly_equivalent: Math.round(monthly * 12 * 100) / 100, yearly_equivalent: Math.round(monthly * 12 * 100) / 100,
next_due_date: nextDueDate(bill), next_due_date: nextDueDate(bill),
subscription_type: bill.subscription_type || inferType(`${bill.name} ${bill.category_name || ''}`), subscription_type: bill.subscription_type || inferType(`${bill.name} ${bill.category_name || ''}`, null),
}; };
} }
@ -136,19 +183,24 @@ function dollarsFromTransactionAmount(amount) {
function billingCycleForCycleType(cycleType) { function billingCycleForCycleType(cycleType) {
if (cycleType === 'quarterly') return 'quarterly'; if (cycleType === 'quarterly') return 'quarterly';
if (cycleType === 'annual') return 'annually'; if (cycleType === 'annual') return 'annually';
if (cycleType === 'monthly') return 'monthly'; if (cycleType === 'monthly') return 'monthly';
return 'irregular'; return 'irregular';
} }
// ── Recommendations ───────────────────────────────────────────────────────────
function getSubscriptionRecommendations(db, userId) { function getSubscriptionRecommendations(db, userId) {
const catalog = loadCatalog(db);
const existingNames = existingBillNames(db, userId); const existingNames = existingBillNames(db, userId);
// Scan all transaction sources, not just SimpleFIN
const rows = db.prepare(` const rows = db.prepare(`
SELECT SELECT
t.id, t.amount, t.currency, t.description, t.payee, t.memo, t.category, t.id, t.amount, t.currency, t.description, t.payee, t.memo, t.category,
COALESCE(t.posted_date, substr(t.transacted_at, 1, 10)) AS tx_date, COALESCE(t.posted_date, substr(t.transacted_at, 1, 10)) AS tx_date,
ds.provider AS data_source_provider, ds.provider AS data_source_provider,
ds.name AS data_source_name ds.name AS data_source_name
FROM transactions t FROM transactions t
LEFT JOIN data_sources ds ON ds.id = t.data_source_id AND ds.user_id = t.user_id LEFT JOIN data_sources ds ON ds.id = t.data_source_id AND ds.user_id = t.user_id
WHERE t.user_id = ? WHERE t.user_id = ?
@ -156,10 +208,10 @@ function getSubscriptionRecommendations(db, userId) {
AND t.match_status = 'unmatched' AND t.match_status = 'unmatched'
AND t.amount < 0 AND t.amount < 0
AND COALESCE(t.posted_date, substr(t.transacted_at, 1, 10)) >= date('now', '-420 days') AND COALESCE(t.posted_date, substr(t.transacted_at, 1, 10)) >= date('now', '-420 days')
AND (ds.provider = 'simplefin' OR t.source_type = 'provider_sync')
ORDER BY tx_date ASC ORDER BY tx_date ASC
`).all(userId); `).all(userId);
// Group by merchant + amount bucket
const groups = new Map(); const groups = new Map();
for (const tx of rows) { for (const tx of rows) {
const merchant = normalizeMerchant(tx.payee || tx.description || tx.memo); const merchant = normalizeMerchant(tx.payee || tx.description || tx.memo);
@ -167,60 +219,121 @@ function getSubscriptionRecommendations(db, userId) {
const amount = dollarsFromTransactionAmount(tx.amount); const amount = dollarsFromTransactionAmount(tx.amount);
if (amount < 1) continue; if (amount < 1) continue;
const key = `${merchant}:${Math.round(amount)}`; const key = `${merchant}:${Math.round(amount)}`;
const group = groups.get(key) || { merchant, amountBucket: Math.round(amount), items: [] }; if (!groups.has(key)) {
groups.set(key, { merchant, amountBucket: Math.round(amount), items: [], catalogEntry: null });
}
const group = groups.get(key);
group.items.push({ ...tx, amount_dollars: amount }); group.items.push({ ...tx, amount_dollars: amount });
groups.set(key, group); if (!group.catalogEntry) group.catalogEntry = lookupCatalog(catalog, merchant);
} }
const recommendations = []; const recommendations = [];
for (const group of groups.values()) {
if (group.items.length < 2) continue;
if (existingNames.some(name => name.includes(group.merchant) || group.merchant.includes(name))) continue;
const sorted = group.items.filter(item => item.tx_date).sort((a, b) => String(a.tx_date).localeCompare(String(b.tx_date))); for (const group of groups.values()) {
if (sorted.length < 2) continue; const { merchant, catalogEntry } = group;
const gaps = [];
for (let i = 1; i < sorted.length; i++) { // Skip if already a known bill
gaps.push(Math.round((new Date(`${sorted[i].tx_date}T00:00:00`) - new Date(`${sorted[i - 1].tx_date}T00:00:00`)) / 86400000)); if (existingNames.some(name => name.includes(merchant) || merchant.includes(name))) continue;
}
const avgGap = gaps.reduce((sum, gap) => sum + gap, 0) / gaps.length; const sorted = group.items
const cycleType = avgGap >= 320 ? 'annual' : avgGap >= 75 ? 'quarterly' : avgGap >= 10 && avgGap <= 18 ? 'biweekly' : avgGap <= 9 ? 'weekly' : 'monthly'; .filter(item => item.tx_date)
if (cycleType === 'monthly' && (avgGap < 24 || avgGap > 38)) continue; .sort((a, b) => String(a.tx_date).localeCompare(String(b.tx_date)));
if (cycleType === 'quarterly' && (avgGap < 75 || avgGap > 105)) continue;
if (sorted.length === 0) continue;
const averageAmount = sorted.reduce((sum, item) => sum + item.amount_dollars, 0) / sorted.length; const averageAmount = sorted.reduce((sum, item) => sum + item.amount_dollars, 0) / sorted.length;
const maxDelta = Math.max(...sorted.map(item => Math.abs(item.amount_dollars - averageAmount))); const maxDelta = sorted.length > 1
? Math.max(...sorted.map(item => Math.abs(item.amount_dollars - averageAmount)))
: 0;
const last = sorted[sorted.length - 1];
// ── Tier 1: catalog match with 1 occurrence (possible subscription) ──────
if (catalogEntry && sorted.length === 1) {
const confidence = 62;
recommendations.push(buildRecommendation({
merchant, catalogEntry, sorted, averageAmount, maxDelta, last,
cycleType: 'monthly', avgGap: 30, confidence, tier: 'possible',
}));
continue;
}
// ── Tier 2: 2+ occurrences — pattern detection ────────────────────────────
if (sorted.length < 2) continue;
const gaps = [];
for (let i = 1; i < sorted.length; i++) {
gaps.push(Math.round(
(new Date(`${sorted[i].tx_date}T00:00:00`) - new Date(`${sorted[i - 1].tx_date}T00:00:00`)) / 86400000
));
}
const avgGap = gaps.reduce((sum, g) => sum + g, 0) / gaps.length;
const cycleType = avgGap >= 320 ? 'annual'
: avgGap >= 75 ? 'quarterly'
: avgGap >= 10 && avgGap <= 18 ? 'biweekly'
: avgGap <= 9 ? 'weekly'
: 'monthly';
if (cycleType === 'monthly' && (avgGap < 24 || avgGap > 38)) continue;
if (cycleType === 'quarterly' && (avgGap < 75 || avgGap > 105)) continue;
if (maxDelta > Math.max(3, averageAmount * 0.18)) continue; if (maxDelta > Math.max(3, averageAmount * 0.18)) continue;
const last = sorted[sorted.length - 1]; // Confidence: catalog match raises the floor and ceiling
recommendations.push({ let confidence;
id: Buffer.from(`${group.merchant}:${group.amountBucket}:${last.tx_date}`).toString('base64url'), if (catalogEntry) {
name: titleCase(group.merchant), confidence = Math.min(99, 68 + sorted.length * 8 + (maxDelta <= 1 ? 8 : 0));
subscription_type: inferType(group.merchant), } else {
expected_amount: Math.round(averageAmount * 100) / 100, confidence = Math.min(96, 58 + sorted.length * 9 + (maxDelta <= 1 ? 10 : 0));
monthly_equivalent: monthlyEquivalent(averageAmount, cycleType, cycleType), }
cycle_type: cycleType,
billing_cycle: billingCycleForCycleType(cycleType), const tier = catalogEntry ? 'confirmed' : 'pattern';
due_day: Number(String(last.tx_date).slice(8, 10)) || 1, recommendations.push(buildRecommendation({
last_seen_date: last.tx_date, merchant, catalogEntry, sorted, averageAmount, maxDelta, last,
occurrence_count: sorted.length, cycleType, avgGap, confidence, tier,
confidence: Math.min(96, 58 + sorted.length * 9 + (maxDelta <= 1 ? 10 : 0)), }));
transaction_ids: sorted.map(item => item.id),
merchant: group.merchant,
source: last.data_source_name || 'SimpleFIN',
reasons: [
`${sorted.length} similar SimpleFIN charges`,
`About ${Math.round(avgGap)} days apart`,
`${last.currency || 'USD'} ${averageAmount.toFixed(2)} average charge`,
],
});
} }
return recommendations.sort((a, b) => b.confidence - a.confidence || b.occurrence_count - a.occurrence_count).slice(0, 20); return recommendations
.sort((a, b) => b.confidence - a.confidence || b.occurrence_count - a.occurrence_count)
.slice(0, 20);
}
function buildRecommendation({ merchant, catalogEntry, sorted, averageAmount, maxDelta, last, cycleType, avgGap, confidence, tier }) {
const name = catalogEntry ? catalogEntry.name : titleCase(merchant);
const subscriptionType = inferType(merchant, catalogEntry);
const reasons = [];
if (catalogEntry) reasons.push(`Matches known service: ${catalogEntry.name}`);
if (sorted.length > 1) reasons.push(`${sorted.length} similar charges`);
if (sorted.length > 1) reasons.push(`About ${Math.round(avgGap)} days apart`);
reasons.push(`${last.currency || 'USD'} ${averageAmount.toFixed(2)} average`);
return {
id: Buffer.from(`${merchant}:${Math.round(averageAmount)}:${last.tx_date}`).toString('base64url'),
name,
subscription_type: subscriptionType,
expected_amount: Math.round(averageAmount * 100) / 100,
monthly_equivalent: monthlyEquivalent(averageAmount, cycleType, cycleType),
cycle_type: cycleType,
billing_cycle: billingCycleForCycleType(cycleType),
due_day: Number(String(last.tx_date).slice(8, 10)) || 1,
last_seen_date: last.tx_date,
occurrence_count: sorted.length,
confidence,
tier,
catalog_match: catalogEntry ? { id: catalogEntry.id, name: catalogEntry.name, category: catalogEntry.category } : null,
transaction_ids: sorted.map(item => item.id),
merchant,
source: last.data_source_name || 'Transaction history',
reasons,
};
} }
function createSubscriptionFromRecommendation(db, userId, payload = {}) { function createSubscriptionFromRecommendation(db, userId, payload = {}) {
const seenDate = payload.last_seen_date || new Date().toISOString().slice(0, 10); const seenDate = payload.last_seen_date || new Date().toISOString().slice(0, 10);
const source = payload.catalog_match
? 'catalog_match'
: 'simplefin_recommendation';
const draft = { const draft = {
name: payload.name, name: payload.name,
category_id: payload.category_id || null, category_id: payload.category_id || null,
@ -234,9 +347,9 @@ function createSubscriptionFromRecommendation(db, userId, payload = {}) {
is_subscription: 1, is_subscription: 1,
subscription_type: SUBSCRIPTION_TYPES.includes(payload.subscription_type) ? payload.subscription_type : 'other', subscription_type: SUBSCRIPTION_TYPES.includes(payload.subscription_type) ? payload.subscription_type : 'other',
reminder_days_before: 3, reminder_days_before: 3,
subscription_source: 'simplefin_recommendation', subscription_source: source,
subscription_detected_at: new Date().toISOString(), subscription_detected_at: new Date().toISOString(),
notes: payload.merchant ? `Detected from recurring SimpleFIN merchant: ${payload.merchant}` : null, notes: payload.merchant ? `Detected from recurring merchant: ${payload.merchant}` : null,
}; };
const validation = validateBillData(draft); const validation = validateBillData(draft);
@ -270,5 +383,7 @@ module.exports = {
getSubscriptionRecommendations, getSubscriptionRecommendations,
getSubscriptionSummary, getSubscriptionSummary,
getSubscriptions, getSubscriptions,
lookupCatalog,
loadCatalog,
monthlyEquivalent, monthlyEquivalent,
}; };