From 820fedd58ee6c0e7a9330d50bdeadd9292f8065e Mon Sep 17 00:00:00 2001 From: null Date: Fri, 29 May 2026 01:51:42 -0500 Subject: [PATCH] feat: subscription catalog migration, 200-row seed, improved detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit db/database.js: - Added monitored to COLUMN_WHITELIST - runSubscriptionCatalogMigration() creates table + seeds 200 rows - Migration v0.65 in both legacy reconciliation and main migrations services/subscriptionService.js: - SUBSCRIPTION_TYPES expanded 10→14 (food, education, shopping, security) - TYPE_KEYWORDS updated with 30 new keywords across categories - loadCatalog() loads 200 entries per recommendation call, graceful [] on old DBs - lookupCatalog() longest-match wins, handles embedded domains - inferType() catalog hit takes priority over keyword guessing - Two-tier detection: catalog 1-hit → possible (62), 2+ → pattern/confirmed with boost (68-99) - Canonical names from catalog, type auto-filled - buildRecommendation() extracted as shared helper with tier + catalog_match fields - createSubscriptionFromRecommendation sets subscription_source to catalog_match --- db/database.js | 254 ++++++++++++++++++++++++++++++++ package.json | 2 +- services/subscriptionService.js | 239 ++++++++++++++++++++++-------- 3 files changed, 432 insertions(+), 63 deletions(-) diff --git a/db/database.js b/db/database.js index e5c7d85..281e75b 100644 --- a/db/database.js +++ b/db/database.js @@ -52,6 +52,8 @@ const COLUMN_WHITELIST = new Set([ 'subscription_source', 'subscription_detected_at', 'deleted_at', // sessions table columns 'created_at', + // financial_accounts table columns + 'monitored', ]); // Security validation function for column names @@ -69,6 +71,240 @@ function isValidSqlDefinition(def) { return /^[\w\s\(\)\',!@#$%^&*+=\[\]<>\-.]+$/i.test(def); } +// ── Subscription catalog seed ───────────────────────────────────────────────── +// [rank, name, category, subscription_type, website, domain] +const SUBSCRIPTION_CATALOG_ROWS = [ + [1,'Netflix','Video Streaming','streaming','https://www.netflix.com/','netflix.com'], + [2,'Amazon Prime Video','Video Streaming','streaming','https://www.primevideo.com/','primevideo.com'], + [3,'Hulu','Video Streaming','streaming','https://www.hulu.com/','hulu.com'], + [4,'Disney+','Video Streaming','streaming','https://www.disneyplus.com/','disneyplus.com'], + [5,'Max','Video Streaming','streaming','https://www.max.com/','max.com'], + [6,'Peacock','Video Streaming','streaming','https://www.peacocktv.com/','peacocktv.com'], + [7,'Paramount+','Video Streaming','streaming','https://www.paramountplus.com/','paramountplus.com'], + [8,'Apple TV+','Video Streaming','streaming','https://tv.apple.com/','tv.apple.com'], + [9,'YouTube Premium','Video Streaming','streaming','https://www.youtube.com/premium','youtube.com'], + [10,'ESPN+','Sports Streaming','streaming','https://plus.espn.com/','plus.espn.com'], + [11,'YouTube TV','Live TV Streaming','streaming','https://tv.youtube.com/','tv.youtube.com'], + [12,'Sling TV','Live TV Streaming','streaming','https://www.sling.com/','sling.com'], + [13,'Fubo','Live TV Streaming','streaming','https://www.fubo.tv/','fubo.tv'], + [14,'DirecTV Stream','Live TV Streaming','streaming','https://streamtv.directv.com/','streamtv.directv.com'], + [15,'Philo','Live TV Streaming','streaming','https://www.philo.com/','philo.com'], + [16,'Starz','Video Streaming','streaming','https://www.starz.com/','starz.com'], + [17,'MGM+','Video Streaming','streaming','https://www.mgmplus.com/','mgmplus.com'], + [18,'AMC+','Video Streaming','streaming','https://www.amcplus.com/','amcplus.com'], + [19,'BET+','Video Streaming','streaming','https://www.bet.plus/','bet.plus'], + [20,'Crunchyroll','Video Streaming','streaming','https://www.crunchyroll.com/','crunchyroll.com'], + [21,'HIDIVE','Video Streaming','streaming','https://www.hidive.com/','hidive.com'], + [22,'Shudder','Video Streaming','streaming','https://www.shudder.com/','shudder.com'], + [23,'Acorn TV','Video Streaming','streaming','https://acorn.tv/','acorn.tv'], + [24,'BritBox','Video Streaming','streaming','https://www.britbox.com/','britbox.com'], + [25,'The Criterion Channel','Video Streaming','streaming','https://www.criterionchannel.com/','criterionchannel.com'], + [26,'MUBI','Video Streaming','streaming','https://mubi.com/','mubi.com'], + [27,'Discovery+','Video Streaming','streaming','https://www.discoveryplus.com/','discoveryplus.com'], + [28,'Hallmark+','Video Streaming','streaming','https://www.hallmarkplus.com/','hallmarkplus.com'], + [29,'PBS Passport','Video Streaming','streaming','https://www.pbs.org/passport/','pbs.org'], + [30,'MagellanTV','Video Streaming','streaming','https://www.magellantv.com/','magellantv.com'], + [31,'Curiosity Stream','Video Streaming','streaming','https://curiositystream.com/','curiositystream.com'], + [32,'Nebula','Video Streaming','streaming','https://nebula.tv/','nebula.tv'], + [33,'WOW Presents Plus','Video Streaming','streaming','https://www.wowpresentsplus.com/','wowpresentsplus.com'], + [34,'ViX Premium','Video Streaming','streaming','https://vix.com/','vix.com'], + [35,'FloSports','Sports Streaming','streaming','https://www.flosports.tv/','flosports.tv'], + [36,'DAZN','Sports Streaming','streaming','https://www.dazn.com/','dazn.com'], + [37,'MLB.TV','Sports Streaming','streaming','https://www.mlb.com/live-stream-games/subscribe','mlb.com'], + [38,'NBA League Pass','Sports Streaming','streaming','https://www.nba.com/watch/league-pass-stream','nba.com'], + [39,'NHL Power Play on ESPN+','Sports Streaming','streaming','https://www.espn.com/espnplus/catalog/nhl','espn.com'], + [40,'NFL+','Sports Streaming','streaming','https://www.nfl.com/plus/','nfl.com'], + [41,'Spotify Premium','Music & Audio','music','https://www.spotify.com/premium/','spotify.com'], + [42,'Apple Music','Music & Audio','music','https://www.apple.com/apple-music/','apple.com'], + [43,'Amazon Music Unlimited','Music & Audio','music','https://music.amazon.com/','music.amazon.com'], + [44,'Pandora Premium','Music & Audio','music','https://www.pandora.com/upgrade','pandora.com'], + [45,'SiriusXM','Music & Audio','music','https://www.siriusxm.com/','siriusxm.com'], + [46,'TIDAL','Music & Audio','music','https://tidal.com/','tidal.com'], + [47,'Qobuz','Music & Audio','music','https://www.qobuz.com/us-en/music/streaming/offers','qobuz.com'], + [48,'SoundCloud Go+','Music & Audio','music','https://soundcloud.com/go','soundcloud.com'], + [49,'Deezer','Music & Audio','music','https://www.deezer.com/us/offers','deezer.com'], + [50,'iHeartRadio Plus','Music & Audio','music','https://www.iheart.com/plus/','iheart.com'], + [51,'Audible','Audiobooks','education','https://www.audible.com/','audible.com'], + [52,'Spotify Audiobooks','Audiobooks','education','https://www.spotify.com/us/audiobooks/','spotify.com'], + [53,'Everand','Audiobooks & Ebooks','education','https://www.everand.com/','everand.com'], + [54,'Scribd','Documents & Ebooks','education','https://www.scribd.com/','scribd.com'], + [55,'Kindle Unlimited','Ebooks','education','https://www.amazon.com/kindle-dbs/hz/subscribe/ku','amazon.com'], + [56,'Kobo Plus','Ebooks & Audiobooks','education','https://www.kobo.com/us/en/plus','kobo.com'], + [57,'Libro.fm','Audiobooks','education','https://libro.fm/','libro.fm'], + [58,'Blinkist','Books & Learning','education','https://www.blinkist.com/','blinkist.com'], + [59,'Pocket Casts Plus','Podcasts','music','https://pocketcasts.com/plus/','pocketcasts.com'], + [60,'Wondery+','Podcasts','music','https://wondery.com/plus/','wondery.com'], + [61,'The New York Times','News & Magazines','news','https://www.nytimes.com/subscription','nytimes.com'], + [62,'The Wall Street Journal','News & Magazines','news','https://www.wsj.com/news/subscribe','wsj.com'], + [63,'The Washington Post','News & Magazines','news','https://subscribe.washingtonpost.com/','subscribe.washingtonpost.com'], + [64,'The Atlantic','News & Magazines','news','https://www.theatlantic.com/subscribe/','theatlantic.com'], + [65,'The New Yorker','News & Magazines','news','https://www.newyorker.com/subscribe','newyorker.com'], + [66,'Bloomberg.com','News & Magazines','news','https://www.bloomberg.com/subscriptions','bloomberg.com'], + [67,'Financial Times','News & Magazines','news','https://www.ft.com/products','ft.com'], + [68,'The Economist','News & Magazines','news','https://www.economist.com/subscribe','economist.com'], + [69,'TIME','News & Magazines','news','https://time.com/subscribe/','time.com'], + [70,'WIRED','News & Magazines','news','https://www.wired.com/subscribe/','wired.com'], + [71,'Consumer Reports','News & Magazines','news','https://www.consumerreports.org/join/','consumerreports.org'], + [72,'Politico Pro','News & Magazines','news','https://www.politicopro.com/','politicopro.com'], + [73,'The Athletic','Sports Media','streaming','https://theathletic.com/','theathletic.com'], + [74,'Substack','Creator Media','news','https://substack.com/','substack.com'], + [75,'Medium','Creator Media','news','https://medium.com/membership','medium.com'], + [76,'Patreon','Creator Media','news','https://www.patreon.com/','patreon.com'], + [77,'Apple News+','News & Magazines','news','https://www.apple.com/apple-news/','apple.com'], + [78,'Readly','News & Magazines','news','https://us.readly.com/','us.readly.com'], + [79,'PressReader','News & Magazines','news','https://www.pressreader.com/','pressreader.com'], + [80,'The Information','News & Magazines','news','https://www.theinformation.com/subscribe','theinformation.com'], + [81,'Microsoft 365','Software & Productivity','software','https://www.microsoft.com/microsoft-365','microsoft.com'], + [82,'Google One','Cloud & Storage','cloud','https://one.google.com/','one.google.com'], + [83,'iCloud+','Cloud & Storage','cloud','https://www.apple.com/icloud/','apple.com'], + [84,'Dropbox','Cloud & Storage','cloud','https://www.dropbox.com/plans','dropbox.com'], + [85,'Box','Cloud & Storage','cloud','https://www.box.com/pricing','box.com'], + [86,'Adobe Creative Cloud','Software & Design','software','https://www.adobe.com/creativecloud.html','adobe.com'], + [87,'Canva Pro','Software & Design','software','https://www.canva.com/pro/','canva.com'], + [88,'Figma','Software & Design','software','https://www.figma.com/pricing/','figma.com'], + [89,'Notion','Software & Productivity','software','https://www.notion.so/pricing','notion.so'], + [90,'Evernote','Software & Productivity','software','https://evernote.com/compare-plans','evernote.com'], + [91,'Todoist','Software & Productivity','software','https://todoist.com/pricing','todoist.com'], + [92,'Grammarly','Writing & AI','software','https://www.grammarly.com/plans','grammarly.com'], + [93,'ChatGPT','AI','software','https://chatgpt.com/pricing','chatgpt.com'], + [94,'Claude','AI','software','https://claude.ai/upgrade','claude.ai'], + [95,'Perplexity','AI','software','https://www.perplexity.ai/pro','perplexity.ai'], + [96,'Gemini Advanced','AI','software','https://one.google.com/about/google-ai-plans/','one.google.com'], + [97,'GitHub Copilot','Developer Tools','software','https://github.com/features/copilot/plans','github.com'], + [98,'Cursor','Developer Tools','software','https://www.cursor.com/pricing','cursor.com'], + [99,'Replit','Developer Tools','software','https://replit.com/pricing','replit.com'], + [100,'Setapp','Software & Productivity','software','https://setapp.com/','setapp.com'], + [101,'1Password','Security','security','https://1password.com/pricing','1password.com'], + [102,'Dashlane','Security','security','https://www.dashlane.com/pricing','dashlane.com'], + [103,'NordVPN','Security','security','https://nordvpn.com/pricing/','nordvpn.com'], + [104,'ExpressVPN','Security','security','https://www.expressvpn.com/','expressvpn.com'], + [105,'Surfshark','Security','security','https://surfshark.com/pricing','surfshark.com'], + [106,'Norton 360','Security','security','https://us.norton.com/products','us.norton.com'], + [107,'McAfee+','Security','security','https://www.mcafee.com/en-us/consumer-support/pricing.html','mcafee.com'], + [108,'QuickBooks Online','Finance Software','software','https://quickbooks.intuit.com/pricing/','quickbooks.intuit.com'], + [109,'TurboTax Live','Finance Software','software','https://turbotax.intuit.com/personal-taxes/online/live/','turbotax.intuit.com'], + [110,'YNAB','Finance Software','software','https://www.ynab.com/pricing','ynab.com'], + [111,'Rocket Money Premium','Finance Software','software','https://www.rocketmoney.com/premium','rocketmoney.com'], + [112,'Copilot Money','Finance Software','software','https://copilot.money/','copilot.money'], + [113,'Calendly','Software & Productivity','software','https://calendly.com/pricing','calendly.com'], + [114,'Zoom Workplace','Software & Productivity','software','https://www.zoom.com/en/pricing/','zoom.com'], + [115,'Slack','Software & Productivity','software','https://slack.com/pricing','slack.com'], + [116,'Xbox Game Pass','Gaming','gaming','https://www.xbox.com/en-US/xbox-game-pass','xbox.com'], + [117,'PlayStation Plus','Gaming','gaming','https://www.playstation.com/en-us/ps-plus/','playstation.com'], + [118,'Nintendo Switch Online','Gaming','gaming','https://www.nintendo.com/us/switch/online/','nintendo.com'], + [119,'Apple Arcade','Gaming','gaming','https://www.apple.com/apple-arcade/','apple.com'], + [120,'EA Play','Gaming','gaming','https://www.ea.com/ea-play','ea.com'], + [121,'Ubisoft+','Gaming','gaming','https://www.ubisoft.com/en-us/ubisoft-plus','ubisoft.com'], + [122,'NVIDIA GeForce NOW','Gaming','gaming','https://www.nvidia.com/en-us/geforce-now/memberships/','nvidia.com'], + [123,'Roblox Premium','Gaming','gaming','https://www.roblox.com/premium/membership','roblox.com'], + [124,'Fortnite Crew','Gaming','gaming','https://www.fortnite.com/fortnite-crew-subscription','fortnite.com'], + [125,'Minecraft Realms','Gaming','gaming','https://www.minecraft.net/realms','minecraft.net'], + [126,'Twitch Turbo','Creator & Social','news','https://www.twitch.tv/turbo','twitch.tv'], + [127,'Discord Nitro','Creator & Social','news','https://discord.com/nitro','discord.com'], + [128,'X Premium','Creator & Social','news','https://help.x.com/en/using-x/x-premium','help.x.com'], + [129,'Snapchat+','Creator & Social','news','https://www.snapchat.com/plus','snapchat.com'], + [130,'TikTok Live Subscription','Creator & Social','news','https://www.tiktok.com/live/creators/en-US/subscription/','tiktok.com'], + [131,'Meta Verified','Creator & Social','news','https://about.meta.com/technologies/meta-verified/','about.meta.com'], + [132,'LinkedIn Premium','Career & Social','news','https://premium.linkedin.com/','premium.linkedin.com'], + [133,'Tinder Gold','Dating','other','https://tinder.com/feature/plus','tinder.com'], + [134,'Bumble Premium','Dating','other','https://bumble.com/en/the-buzz/bumble-premium','bumble.com'], + [135,'Hinge+','Dating','other','https://hinge.co/hinge-plus','hinge.co'], + [136,'Amazon Prime','Shopping & Delivery','shopping','https://www.amazon.com/amazonprime','amazon.com'], + [137,'Walmart+','Shopping & Delivery','shopping','https://www.walmart.com/plus','walmart.com'], + [138,'Target Circle 360','Shopping & Delivery','shopping','https://www.target.com/circle/target-circle-360','target.com'], + [139,'Costco','Warehouse Clubs','shopping','https://www.costco.com/join-costco.html','costco.com'], + [140,'Sam\'s Club','Warehouse Clubs','shopping','https://www.samsclub.com/join','samsclub.com'], + [141,'BJ\'s Wholesale Club','Warehouse Clubs','shopping','https://www.bjs.com/membership','bjs.com'], + [142,'Instacart+','Grocery & Delivery','food','https://www.instacart.com/instacart-plus','instacart.com'], + [143,'DoorDash DashPass','Food Delivery','food','https://www.doordash.com/dashpass/','doordash.com'], + [144,'Uber One','Food & Rides','food','https://www.uber.com/us/en/u/uber-one/','uber.com'], + [145,'Grubhub+','Food Delivery','food','https://www.grubhub.com/plus','grubhub.com'], + [146,'Shipt','Grocery & Delivery','food','https://www.shipt.com/membership/','shipt.com'], + [147,'Kroger Boost','Grocery & Delivery','food','https://www.kroger.com/pr/boost','kroger.com'], + [148,'Thrive Market','Grocery & Delivery','food','https://thrivemarket.com/','thrivemarket.com'], + [149,'Misfits Market','Grocery & Delivery','food','https://www.misfitsmarket.com/','misfitsmarket.com'], + [150,'Imperfect Foods','Grocery & Delivery','food','https://www.imperfectfoods.com/','imperfectfoods.com'], + [151,'Chewy Autoship','Pet Retail','shopping','https://www.chewy.com/app/content/autoship','chewy.com'], + [152,'Petco Vital Care Premier','Pet Retail','shopping','https://www.petco.com/shop/en/petcostore/c/vitalcare','petco.com'], + [153,'PetSmart Treats Rewards VIPP','Pet Retail','shopping','https://www.petsmart.com/treats-rewards-vipp.html','petsmart.com'], + [154,'GameStop Pro','Retail Memberships','shopping','https://www.gamestop.com/pro/','gamestop.com'], + [155,'Barnes & Noble Membership','Retail Memberships','shopping','https://www.barnesandnoble.com/membership','barnesandnoble.com'], + [156,'HelloFresh','Food & Meal Kits','food','https://www.hellofresh.com/','hellofresh.com'], + [157,'Blue Apron','Food & Meal Kits','food','https://www.blueapron.com/','blueapron.com'], + [158,'Home Chef','Food & Meal Kits','food','https://www.homechef.com/','homechef.com'], + [159,'Marley Spoon','Food & Meal Kits','food','https://marleyspoon.com/','marleyspoon.com'], + [160,'Dinnerly','Food & Meal Kits','food','https://dinnerly.com/','dinnerly.com'], + [161,'EveryPlate','Food & Meal Kits','food','https://www.everyplate.com/','everyplate.com'], + [162,'Green Chef','Food & Meal Kits','food','https://www.greenchef.com/','greenchef.com'], + [163,'Purple Carrot','Food & Meal Kits','food','https://www.purplecarrot.com/','purplecarrot.com'], + [164,'Sunbasket','Food & Meal Kits','food','https://sunbasket.com/','sunbasket.com'], + [165,'Factor','Prepared Meals','food','https://www.factor75.com/','factor75.com'], + [166,'CookUnity','Prepared Meals','food','https://www.cookunity.com/','cookunity.com'], + [167,'Fresh N Lean','Prepared Meals','food','https://www.freshnlean.com/','freshnlean.com'], + [168,'Hungryroot','Food & Meal Kits','food','https://www.hungryroot.com/','hungryroot.com'], + [169,'Daily Harvest','Prepared Meals','food','https://www.daily-harvest.com/','daily-harvest.com'], + [170,'Tovala','Prepared Meals','food','https://www.tovala.com/','tovala.com'], + [171,'MistoBox','Coffee & Tea','food','https://mistobox.com/','mistobox.com'], + [172,'Trade Coffee','Coffee & Tea','food','https://www.drinktrade.com/','drinktrade.com'], + [173,'Atlas Coffee Club','Coffee & Tea','food','https://atlascoffeeclub.com/','atlascoffeeclub.com'], + [174,'Bean Box','Coffee & Tea','food','https://beanbox.com/','beanbox.com'], + [175,'Universal Yums','Snacks','food','https://www.universalyums.com/','universalyums.com'], + [176,'Peloton App','Fitness & Wellness','fitness','https://www.onepeloton.com/app','onepeloton.com'], + [177,'ClassPass','Fitness & Wellness','fitness','https://classpass.com/','classpass.com'], + [178,'Apple Fitness+','Fitness & Wellness','fitness','https://www.apple.com/apple-fitness-plus/','apple.com'], + [179,'Strava','Fitness & Wellness','fitness','https://www.strava.com/subscribe','strava.com'], + [180,'Fitbit Premium','Fitness & Wellness','fitness','https://www.fitbit.com/global/us/products/services/premium','fitbit.com'], + [181,'MyFitnessPal Premium','Fitness & Wellness','fitness','https://www.myfitnesspal.com/premium','myfitnesspal.com'], + [182,'Noom','Fitness & Wellness','fitness','https://www.noom.com/','noom.com'], + [183,'WW','Fitness & Wellness','fitness','https://www.weightwatchers.com/us/plans','weightwatchers.com'], + [184,'Headspace','Meditation & Wellness','fitness','https://www.headspace.com/','headspace.com'], + [185,'Calm','Meditation & Wellness','fitness','https://www.calm.com/premium','calm.com'], + [186,'Sleep Cycle Premium','Sleep & Wellness','fitness','https://www.sleepcycle.com/premium/','sleepcycle.com'], + [187,'Oura Membership','Fitness & Wellness','fitness','https://ouraring.com/membership','ouraring.com'], + [188,'Whoop','Fitness & Wellness','fitness','https://www.whoop.com/us/en/membership/','whoop.com'], + [189,'Aaptiv','Fitness & Wellness','fitness','https://aaptiv.com/','aaptiv.com'], + [190,'Fitbod','Fitness & Wellness','fitness','https://fitbod.me/pricing/','fitbod.me'], + [191,'Alo Moves','Fitness & Wellness','fitness','https://www.alomoves.com/','alomoves.com'], + [192,'Obe Fitness','Fitness & Wellness','fitness','https://obefitness.com/','obefitness.com'], + [193,'Centr','Fitness & Wellness','fitness','https://centr.com/','centr.com'], + [194,'Future','Fitness & Wellness','fitness','https://www.future.co/','future.co'], + [195,'Tonal Membership','Fitness & Wellness','fitness','https://www.tonal.com/membership/','tonal.com'], + [196,'Duolingo Super','Education','education','https://www.duolingo.com/super','duolingo.com'], + [197,'MasterClass','Education','education','https://www.masterclass.com/','masterclass.com'], + [198,'Coursera Plus','Education','education','https://www.coursera.org/courseraplus','coursera.org'], + [199,'Skillshare','Education','education','https://www.skillshare.com/','skillshare.com'], + [200,'Book of the Month','Books & Subscription Boxes','education','https://www.bookofthemonth.com/','bookofthemonth.com'], +]; + +function runSubscriptionCatalogMigration(database) { + database.exec(` + CREATE TABLE IF NOT EXISTS subscription_catalog ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + rank INTEGER NOT NULL, + name TEXT NOT NULL, + category TEXT NOT NULL, + subscription_type TEXT NOT NULL, + website TEXT, + domain TEXT, + created_at TEXT NOT NULL DEFAULT (datetime('now')) + ); + CREATE INDEX IF NOT EXISTS idx_subscription_catalog_rank ON subscription_catalog(rank); + CREATE INDEX IF NOT EXISTS idx_subscription_catalog_type ON subscription_catalog(subscription_type); + `); + + const existing = database.prepare('SELECT COUNT(*) as n FROM subscription_catalog').get(); + if (existing.n === 0) { + const insert = database.prepare( + 'INSERT INTO subscription_catalog (rank, name, category, subscription_type, website, domain) VALUES (?,?,?,?,?,?)' + ); + const insertMany = database.transaction((rows) => { + for (const row of rows) insert.run(...row); + }); + insertMany(SUBSCRIPTION_CATALOG_ROWS); + console.log(`[migration] subscription_catalog: seeded ${SUBSCRIPTION_CATALOG_ROWS.length} rows`); + } +} + function seedManualDataSources(database = db) { if (!database) return; const hasDataSources = database.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='data_sources'").get(); @@ -1100,6 +1336,16 @@ function reconcileLegacyMigrations() { console.log('[migration] financial_accounts: monitored column added'); } } + }, + { + version: 'v0.65', + description: 'subscription_catalog: top-200 known subscription services', + check: function() { + return !!db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='subscription_catalog'").get(); + }, + run: function() { + runSubscriptionCatalogMigration(db); + } } ]; @@ -1875,6 +2121,14 @@ function runMigrations() { console.log('[migration] financial_accounts: monitored column added'); } } + }, + { + version: 'v0.65', + description: 'subscription_catalog: top-200 known subscription services', + dependsOn: ['v0.64'], + run: function() { + runSubscriptionCatalogMigration(db); + } } ]; diff --git a/package.json b/package.json index 5eb7d18..bc91a71 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "bill-tracker", - "version": "0.33.1", + "version": "0.33.2", "description": "Monthly bill tracking system", "main": "server.js", "scripts": { diff --git a/services/subscriptionService.js b/services/subscriptionService.js index 6a8198e..ab69b24 100644 --- a/services/subscriptionService.js +++ b/services/subscriptionService.js @@ -1,6 +1,12 @@ +'use strict'; + const { insertBill, validateBillData } = require('./billsService'); -const SUBSCRIPTION_TYPES = ['streaming', 'software', 'cloud', 'music', 'news', 'fitness', 'gaming', 'utilities', 'insurance', 'other']; +const SUBSCRIPTION_TYPES = [ + 'streaming', 'software', 'cloud', 'music', 'news', + 'fitness', 'gaming', 'utilities', 'insurance', + 'food', 'education', 'shopping', 'security', 'other', +]; const MONTHLY_FACTORS = { weekly: 52 / 12, @@ -12,23 +18,65 @@ const MONTHLY_FACTORS = { irregular: 1, }; +// Fallback keyword list used when catalog lookup finds no match const TYPE_KEYWORDS = [ - ['streaming', ['netflix', 'hulu', 'disney', 'max', 'paramount', 'peacock', 'youtube tv', 'sling']], - ['music', ['spotify', 'apple music', 'tidal', 'pandora']], - ['software', ['adobe', 'microsoft', 'github', 'notion', 'linear', 'figma', 'canva', 'openai', 'chatgpt']], - ['cloud', ['dropbox', 'icloud', 'google storage', 'backblaze', 'aws', 'cloudflare']], - ['news', ['nyt', 'new york times', 'economist', 'athletic', 'washington post']], - ['fitness', ['peloton', 'planet fitness', 'gym', 'fitbit']], - ['gaming', ['xbox', 'playstation', 'steam', 'nintendo']], - ['utilities', ['verizon', 'at t', 'comcast', 'xfinity', 'spectrum', 'tmobile']], + ['streaming', ['netflix', 'hulu', 'disney', 'max', 'paramount', 'peacock', 'youtube tv', 'sling', 'espn', 'fubo', 'starz', 'crunchyroll', 'dazn']], + ['music', ['spotify', 'apple music', 'tidal', 'pandora', 'siriusxm', 'soundcloud', 'deezer', 'iheart']], + ['software', ['adobe', 'microsoft', 'github', 'notion', 'figma', 'canva', 'openai', 'chatgpt', 'grammarly', 'zoom', 'slack', 'cursor', 'ynab']], + ['cloud', ['dropbox', 'icloud', 'google one', 'google storage', 'backblaze', 'box storage']], + ['news', ['nyt', 'new york times', 'economist', 'athletic', 'washington post', 'wsj', 'bloomberg', 'substack', 'patreon', 'medium']], + ['fitness', ['peloton', 'planet fitness', 'gym', 'fitbit', 'strava', 'headspace', 'calm', 'noom', 'classpass', 'whoop']], + ['gaming', ['xbox', 'playstation', 'steam', 'nintendo', 'roblox', 'discord nitro', 'ea play', 'ubisoft']], + ['utilities', ['verizon', 'at t', 'att', 'comcast', 'xfinity', 'spectrum', 'tmobile', 't mobile']], ['insurance', ['insurance', 'geico', 'progressive', 'state farm', 'allstate']], + ['food', ['hellofresh', 'blue apron', 'doordash', 'instacart', 'uber eats', 'grubhub', 'factor', 'hungryroot']], + ['education', ['duolingo', 'masterclass', 'coursera', 'skillshare', 'audible', 'kindle unlimited', 'blinkist']], + ['shopping', ['amazon prime', 'walmart plus', 'costco', 'target circle', 'chewy']], + ['security', ['nordvpn', 'expressvpn', '1password', 'dashlane', 'norton', 'mcafee', 'surfshark']], ]; +// ── Catalog ─────────────────────────────────────────────────────────────────── + +function loadCatalog(db) { + try { + return db.prepare('SELECT id, rank, name, category, subscription_type, domain FROM subscription_catalog ORDER BY rank ASC').all(); + } catch { + return []; + } +} + +function normalizeCatalogName(value) { + return String(value || '').toLowerCase().replace(/[^a-z0-9]+/g, ' ').trim(); +} + +// Given a normalized merchant string, find the best matching catalog entry. +// Matches on service name (normalized) or domain (dot replaced with space). +function lookupCatalog(catalog, merchantText) { + if (!catalog.length || !merchantText) return null; + let best = null; + let bestLen = 0; + for (const entry of catalog) { + const nameKey = normalizeCatalogName(entry.name); + const domainKey = entry.domain ? entry.domain.replace(/\./g, ' ') : ''; + if (nameKey.length >= 3 && merchantText.includes(nameKey) && nameKey.length > bestLen) { + best = entry; + bestLen = nameKey.length; + } + if (domainKey.length >= 4 && merchantText.includes(domainKey) && domainKey.length > bestLen) { + best = entry; + bestLen = domainKey.length; + } + } + return best; +} + +// ── Helpers ─────────────────────────────────────────────────────────────────── + function normalizeMerchant(value) { return String(value || '') .toLowerCase() .replace(/[^a-z0-9\s]/g, ' ') - .replace(/\b(pos|debit|card|payment|purchase|recurring|online|inc|llc|co)\b/g, ' ') + .replace(/\b(pos|debit|card|payment|purchase|recurring|online|inc|llc|co|www)\b/g, ' ') .replace(/\s+/g, ' ') .trim(); } @@ -41,8 +89,9 @@ function titleCase(value) { .join(' '); } -function inferType(text) { - const haystack = normalizeMerchant(text); +function inferType(merchantText, catalogEntry) { + if (catalogEntry?.subscription_type) return catalogEntry.subscription_type; + const haystack = normalizeMerchant(merchantText); for (const [type, words] of TYPE_KEYWORDS) { if (words.some(word => haystack.includes(word))) return type; } @@ -54,8 +103,8 @@ function monthlyEquivalent(amount, cycleType, billingCycle) { const fallback = String(billingCycle || '').toLowerCase() === 'quarterly' ? 'quarterly' : String(billingCycle || '').toLowerCase() === 'annually' - ? 'annual' - : key; + ? 'annual' + : key; const factor = MONTHLY_FACTORS[key] ?? MONTHLY_FACTORS[fallback] ?? 1; return Math.round(Number(amount || 0) * factor * 100) / 100; } @@ -67,7 +116,6 @@ function nextDueDate(bill, now = new Date()) { if (date < new Date(now.getFullYear(), now.getMonth(), now.getDate())) { date = new Date(now.getFullYear(), now.getMonth() + 1, dueDay); } - if (cycle === 'quarterly' || cycle === 'annual') { const startMonth = Math.min(Math.max(Number(bill.cycle_day) || 1, 1), 12) - 1; const step = cycle === 'quarterly' ? 3 : 12; @@ -76,7 +124,6 @@ function nextDueDate(bill, now = new Date()) { date = new Date(date.getFullYear(), date.getMonth() + step, dueDay); } } - return date.toISOString().slice(0, 10); } @@ -89,7 +136,7 @@ function decorateSubscription(bill) { monthly_equivalent: monthly, yearly_equivalent: Math.round(monthly * 12 * 100) / 100, next_due_date: nextDueDate(bill), - subscription_type: bill.subscription_type || inferType(`${bill.name} ${bill.category_name || ''}`), + subscription_type: bill.subscription_type || inferType(`${bill.name} ${bill.category_name || ''}`, null), }; } @@ -136,19 +183,24 @@ function dollarsFromTransactionAmount(amount) { function billingCycleForCycleType(cycleType) { if (cycleType === 'quarterly') return 'quarterly'; - if (cycleType === 'annual') return 'annually'; - if (cycleType === 'monthly') return 'monthly'; + if (cycleType === 'annual') return 'annually'; + if (cycleType === 'monthly') return 'monthly'; return 'irregular'; } +// ── Recommendations ─────────────────────────────────────────────────────────── + function getSubscriptionRecommendations(db, userId) { + const catalog = loadCatalog(db); const existingNames = existingBillNames(db, userId); + + // Scan all transaction sources, not just SimpleFIN const rows = db.prepare(` SELECT t.id, t.amount, t.currency, t.description, t.payee, t.memo, t.category, COALESCE(t.posted_date, substr(t.transacted_at, 1, 10)) AS tx_date, ds.provider AS data_source_provider, - ds.name AS data_source_name + ds.name AS data_source_name FROM transactions t LEFT JOIN data_sources ds ON ds.id = t.data_source_id AND ds.user_id = t.user_id WHERE t.user_id = ? @@ -156,10 +208,10 @@ function getSubscriptionRecommendations(db, userId) { AND t.match_status = 'unmatched' AND t.amount < 0 AND COALESCE(t.posted_date, substr(t.transacted_at, 1, 10)) >= date('now', '-420 days') - AND (ds.provider = 'simplefin' OR t.source_type = 'provider_sync') ORDER BY tx_date ASC `).all(userId); + // Group by merchant + amount bucket const groups = new Map(); for (const tx of rows) { const merchant = normalizeMerchant(tx.payee || tx.description || tx.memo); @@ -167,60 +219,121 @@ function getSubscriptionRecommendations(db, userId) { const amount = dollarsFromTransactionAmount(tx.amount); if (amount < 1) continue; const key = `${merchant}:${Math.round(amount)}`; - const group = groups.get(key) || { merchant, amountBucket: Math.round(amount), items: [] }; + if (!groups.has(key)) { + groups.set(key, { merchant, amountBucket: Math.round(amount), items: [], catalogEntry: null }); + } + const group = groups.get(key); group.items.push({ ...tx, amount_dollars: amount }); - groups.set(key, group); + if (!group.catalogEntry) group.catalogEntry = lookupCatalog(catalog, merchant); } const recommendations = []; - for (const group of groups.values()) { - if (group.items.length < 2) continue; - if (existingNames.some(name => name.includes(group.merchant) || group.merchant.includes(name))) continue; - const sorted = group.items.filter(item => item.tx_date).sort((a, b) => String(a.tx_date).localeCompare(String(b.tx_date))); - if (sorted.length < 2) continue; - const gaps = []; - for (let i = 1; i < sorted.length; i++) { - gaps.push(Math.round((new Date(`${sorted[i].tx_date}T00:00:00`) - new Date(`${sorted[i - 1].tx_date}T00:00:00`)) / 86400000)); - } - const avgGap = gaps.reduce((sum, gap) => sum + gap, 0) / gaps.length; - const cycleType = avgGap >= 320 ? 'annual' : avgGap >= 75 ? 'quarterly' : avgGap >= 10 && avgGap <= 18 ? 'biweekly' : avgGap <= 9 ? 'weekly' : 'monthly'; - if (cycleType === 'monthly' && (avgGap < 24 || avgGap > 38)) continue; - if (cycleType === 'quarterly' && (avgGap < 75 || avgGap > 105)) continue; + for (const group of groups.values()) { + const { merchant, catalogEntry } = group; + + // Skip if already a known bill + if (existingNames.some(name => name.includes(merchant) || merchant.includes(name))) continue; + + const sorted = group.items + .filter(item => item.tx_date) + .sort((a, b) => String(a.tx_date).localeCompare(String(b.tx_date))); + + if (sorted.length === 0) continue; const averageAmount = sorted.reduce((sum, item) => sum + item.amount_dollars, 0) / sorted.length; - const maxDelta = Math.max(...sorted.map(item => Math.abs(item.amount_dollars - averageAmount))); + const maxDelta = sorted.length > 1 + ? Math.max(...sorted.map(item => Math.abs(item.amount_dollars - averageAmount))) + : 0; + const last = sorted[sorted.length - 1]; + + // ── Tier 1: catalog match with 1 occurrence (possible subscription) ────── + if (catalogEntry && sorted.length === 1) { + const confidence = 62; + recommendations.push(buildRecommendation({ + merchant, catalogEntry, sorted, averageAmount, maxDelta, last, + cycleType: 'monthly', avgGap: 30, confidence, tier: 'possible', + })); + continue; + } + + // ── Tier 2: 2+ occurrences — pattern detection ──────────────────────────── + if (sorted.length < 2) continue; + + const gaps = []; + for (let i = 1; i < sorted.length; i++) { + gaps.push(Math.round( + (new Date(`${sorted[i].tx_date}T00:00:00`) - new Date(`${sorted[i - 1].tx_date}T00:00:00`)) / 86400000 + )); + } + const avgGap = gaps.reduce((sum, g) => sum + g, 0) / gaps.length; + const cycleType = avgGap >= 320 ? 'annual' + : avgGap >= 75 ? 'quarterly' + : avgGap >= 10 && avgGap <= 18 ? 'biweekly' + : avgGap <= 9 ? 'weekly' + : 'monthly'; + + if (cycleType === 'monthly' && (avgGap < 24 || avgGap > 38)) continue; + if (cycleType === 'quarterly' && (avgGap < 75 || avgGap > 105)) continue; if (maxDelta > Math.max(3, averageAmount * 0.18)) continue; - const last = sorted[sorted.length - 1]; - recommendations.push({ - id: Buffer.from(`${group.merchant}:${group.amountBucket}:${last.tx_date}`).toString('base64url'), - name: titleCase(group.merchant), - subscription_type: inferType(group.merchant), - expected_amount: Math.round(averageAmount * 100) / 100, - monthly_equivalent: monthlyEquivalent(averageAmount, cycleType, cycleType), - cycle_type: cycleType, - billing_cycle: billingCycleForCycleType(cycleType), - due_day: Number(String(last.tx_date).slice(8, 10)) || 1, - last_seen_date: last.tx_date, - occurrence_count: sorted.length, - confidence: Math.min(96, 58 + sorted.length * 9 + (maxDelta <= 1 ? 10 : 0)), - transaction_ids: sorted.map(item => item.id), - merchant: group.merchant, - source: last.data_source_name || 'SimpleFIN', - reasons: [ - `${sorted.length} similar SimpleFIN charges`, - `About ${Math.round(avgGap)} days apart`, - `${last.currency || 'USD'} ${averageAmount.toFixed(2)} average charge`, - ], - }); + // Confidence: catalog match raises the floor and ceiling + let confidence; + if (catalogEntry) { + confidence = Math.min(99, 68 + sorted.length * 8 + (maxDelta <= 1 ? 8 : 0)); + } else { + confidence = Math.min(96, 58 + sorted.length * 9 + (maxDelta <= 1 ? 10 : 0)); + } + + const tier = catalogEntry ? 'confirmed' : 'pattern'; + recommendations.push(buildRecommendation({ + merchant, catalogEntry, sorted, averageAmount, maxDelta, last, + cycleType, avgGap, confidence, tier, + })); } - return recommendations.sort((a, b) => b.confidence - a.confidence || b.occurrence_count - a.occurrence_count).slice(0, 20); + return recommendations + .sort((a, b) => b.confidence - a.confidence || b.occurrence_count - a.occurrence_count) + .slice(0, 20); +} + +function buildRecommendation({ merchant, catalogEntry, sorted, averageAmount, maxDelta, last, cycleType, avgGap, confidence, tier }) { + const name = catalogEntry ? catalogEntry.name : titleCase(merchant); + const subscriptionType = inferType(merchant, catalogEntry); + + const reasons = []; + if (catalogEntry) reasons.push(`Matches known service: ${catalogEntry.name}`); + if (sorted.length > 1) reasons.push(`${sorted.length} similar charges`); + if (sorted.length > 1) reasons.push(`About ${Math.round(avgGap)} days apart`); + reasons.push(`${last.currency || 'USD'} ${averageAmount.toFixed(2)} average`); + + return { + id: Buffer.from(`${merchant}:${Math.round(averageAmount)}:${last.tx_date}`).toString('base64url'), + name, + subscription_type: subscriptionType, + expected_amount: Math.round(averageAmount * 100) / 100, + monthly_equivalent: monthlyEquivalent(averageAmount, cycleType, cycleType), + cycle_type: cycleType, + billing_cycle: billingCycleForCycleType(cycleType), + due_day: Number(String(last.tx_date).slice(8, 10)) || 1, + last_seen_date: last.tx_date, + occurrence_count: sorted.length, + confidence, + tier, + catalog_match: catalogEntry ? { id: catalogEntry.id, name: catalogEntry.name, category: catalogEntry.category } : null, + transaction_ids: sorted.map(item => item.id), + merchant, + source: last.data_source_name || 'Transaction history', + reasons, + }; } function createSubscriptionFromRecommendation(db, userId, payload = {}) { const seenDate = payload.last_seen_date || new Date().toISOString().slice(0, 10); + const source = payload.catalog_match + ? 'catalog_match' + : 'simplefin_recommendation'; + const draft = { name: payload.name, category_id: payload.category_id || null, @@ -234,9 +347,9 @@ function createSubscriptionFromRecommendation(db, userId, payload = {}) { is_subscription: 1, subscription_type: SUBSCRIPTION_TYPES.includes(payload.subscription_type) ? payload.subscription_type : 'other', reminder_days_before: 3, - subscription_source: 'simplefin_recommendation', + subscription_source: source, subscription_detected_at: new Date().toISOString(), - notes: payload.merchant ? `Detected from recurring SimpleFIN merchant: ${payload.merchant}` : null, + notes: payload.merchant ? `Detected from recurring merchant: ${payload.merchant}` : null, }; const validation = validateBillData(draft); @@ -270,5 +383,7 @@ module.exports = { getSubscriptionRecommendations, getSubscriptionSummary, getSubscriptions, + lookupCatalog, + loadCatalog, monthlyEquivalent, };