Rene Fichtmueller 2348238888 feat: add NADDOD, QSFPTEK, and AddOn Networks scrapers
Three new fetch-based price scrapers for compatible optics vendors:
- NADDOD: WooCommerce, USD, ~800+ SKUs
- QSFPTEK: Custom PHP shop, USD, ~1000+ SKUs
- AddOn Networks: Magento/custom, USD, ~2500 SKUs

All registered in scheduler (8-12h intervals) and index.ts --flags.
Build: 0 TypeScript errors.
2026-03-30 21:20:23 +02:00

203 lines
8.1 KiB
TypeScript

/**
* TIP Scraper Engine — Main entry point.
*
* Usage:
* tsx src/index.ts — Start scheduler (production mode)
* tsx src/index.ts --all — Run all scrapers once
* tsx src/index.ts --fs — Run FS.com scraper once
* tsx src/index.ts --cisco — Run Cisco TMG scraper once
* tsx src/index.ts --optcore — Run Optcore scraper once
* tsx src/index.ts --news — Run news aggregator once
* tsx src/index.ts --flexoptix — Run Flexoptix catalog scraper once
* tsx src/index.ts --vendors — Run Flexoptix vendor list scraper once
* tsx src/index.ts --10gtek — Run 10Gtek scraper once
* tsx src/index.ts --champion — Run Champion ONE scraper once
* tsx src/index.ts --fluxlight — Run Fluxlight scraper once
* tsx src/index.ts --gbics — Run GBICS.com scraper once
* tsx src/index.ts --prolabs — Run ProLabs scraper once
* tsx src/index.ts --juniper — Run Juniper HCT scraper once
* tsx src/index.ts --switches — Seed switch/router database
* tsx src/index.ts --whitebox — Seed whitebox switch database (Edgecore, Celestica, etc.)
* tsx src/index.ts --switches-ext — Seed extended switches (Fortinet, MikroTik, Industrial, etc.)
* tsx src/index.ts --sonic-hcl — Scrape SONiC Hardware Compatibility List
* tsx src/index.ts --edgecore — Scrape Edgecore product catalog
* tsx src/index.ts --ufispace — Scrape UfiSpace product catalog
* tsx src/index.ts --switch-assets — Scrape switch assets via URL patterns
* tsx src/index.ts --switch-crawl — Crawl switch assets (Cheerio, static HTML vendors)
* tsx src/index.ts --switch-crawl-pw — Crawl switch assets (Playwright, JS-heavy vendors)
* tsx src/index.ts --fetch-only — Run only fetch-based scrapers (no Playwright)
* tsx src/index.ts --atgbics — Run ATGBICS scraper once
* tsx src/index.ts --naddod — Run NADDOD scraper once
* tsx src/index.ts --qsfptek — Run QSFPTEK scraper once
* tsx src/index.ts --addon — Run AddOn Networks scraper once
*/
import { createScheduler, registerSchedules, registerWorkers } from "./scheduler";
import { scrapeFs } from "./scrapers/fs-com";
import { scrapeCiscoTmg } from "./scrapers/cisco-tmg";
import { scrapeOptcore } from "./scrapers/optcore";
import { scrapeNews } from "./scrapers/news";
import { scrapeFlexoptixCatalog } from "./scrapers/flexoptix-catalog";
import { scrapeFlexoptixVendors } from "./scrapers/flexoptix-vendors";
import { scrape10Gtek } from "./scrapers/tenGtek";
import { scrapeChampionOne } from "./scrapers/champion-one";
import { scrapeFluxlight } from "./scrapers/fluxlight";
import { scrapeSfpCables } from "./scrapers/sfpcables";
import { scrapeGbics } from "./scrapers/gbics";
import { scrapeJuniperHct } from "./scrapers/juniper-hct";
import { seedSwitches } from "./scrapers/switch-seed";
import { seedWhiteboxSwitches } from "./scrapers/whitebox-seed";
import { seedFlexoptixVendors } from "./scrapers/flexoptix-supported-vendors";
import { scrapeSonicHcl } from "./scrapers/sonic-hcl";
import { scrapeEdgecore } from "./scrapers/edgecore";
import { scrapeUfiSpace } from "./scrapers/ufispace";
import { seedExtendedSwitches } from "./scrapers/switch-seed-extended";
import { seedBulkSwitches } from "./scrapers/switch-seed-bulk";
import { scrapeSwitchAssets } from "./scrapers/switch-assets";
import { crawlSwitchAssets } from "./scrapers/switch-assets-crawler";
import { crawlSwitchAssetsPlaywright } from "./scrapers/switch-assets-playwright";
import { scrapeAtgbics } from "./scrapers/atgbics";
import { scrapeProLabs } from "./scrapers/prolabs";
import { scrapeNaddod } from "./scrapers/naddod";
import { scrapeQsfptek } from "./scrapers/qsfptek";
import { scrapeAddonNetworks } from "./scrapers/addon-networks";
import { pool } from "./utils/db";
const args = process.argv.slice(2);
const isAll = args.includes("--all");
const isFetchOnly = args.includes("--fetch-only");
async function runOnce(): Promise<void> {
// Fetch-based scrapers (no Playwright/Chromium needed — fast, reliable)
if (args.includes("--flexoptix") || isAll || isFetchOnly) {
await scrapeFlexoptixCatalog();
}
if (args.includes("--vendors") || isAll || isFetchOnly) {
await scrapeFlexoptixVendors();
}
if (args.includes("--10gtek") || isAll || isFetchOnly) {
await scrape10Gtek();
}
if (args.includes("--champion") || isAll || isFetchOnly) {
await scrapeChampionOne();
}
if (args.includes("--fluxlight") || isAll || isFetchOnly) {
await scrapeFluxlight();
}
if (args.includes("--sfpcables") || isAll || isFetchOnly) {
await scrapeSfpCables();
}
if (args.includes("--gbics") || isAll || isFetchOnly) {
await scrapeGbics();
}
if (args.includes("--prolabs") || isAll || isFetchOnly) {
await scrapeProLabs();
}
if (args.includes("--naddod") || isAll || isFetchOnly) {
await scrapeNaddod();
}
if (args.includes("--qsfptek") || isAll || isFetchOnly) {
await scrapeQsfptek();
}
if (args.includes("--addon") || isAll || isFetchOnly) {
await scrapeAddonNetworks();
}
if (args.includes("--juniper") || isAll || isFetchOnly) {
await scrapeJuniperHct();
}
if (args.includes("--switches") || isAll || isFetchOnly) {
await seedSwitches();
}
if (args.includes("--whitebox") || isAll || isFetchOnly) {
await seedWhiteboxSwitches();
}
if (args.includes("--flexoptix-vendors") || isAll || isFetchOnly) {
await seedFlexoptixVendors();
}
if (args.includes("--switches-ext") || isAll || isFetchOnly) {
await seedExtendedSwitches();
}
if (args.includes("--switches-bulk") || isAll || isFetchOnly) {
await seedBulkSwitches();
}
if (args.includes("--sonic-hcl") || isAll || isFetchOnly) {
await scrapeSonicHcl();
}
if (args.includes("--news") || isAll || isFetchOnly) {
await scrapeNews();
}
if (args.includes("--switch-assets") || isAll) {
const vendor = args.find((a) => a.startsWith("--vendor="))?.split("=")[1];
await scrapeSwitchAssets(vendor);
}
if (args.includes("--switch-crawl") || isAll) {
const vendor = args.find((a) => a.startsWith("--vendor="))?.split("=")[1];
await crawlSwitchAssets(vendor);
}
// Crawlee-based scrapers (Cheerio, no Playwright needed)
if (args.includes("--edgecore") || isAll) {
await scrapeEdgecore();
}
if (args.includes("--ufispace") || isAll) {
await scrapeUfiSpace();
}
// Playwright-based scrapers (need Chromium installed)
if (!isFetchOnly) {
if (args.includes("--fs") || isAll) {
await scrapeFs();
}
if (args.includes("--cisco") || isAll) {
await scrapeCiscoTmg();
}
if (args.includes("--optcore") || isAll) {
await scrapeOptcore();
}
if (args.includes("--switch-crawl-pw") || isAll) {
const vendor = args.find((a) => a.startsWith("--vendor="))?.split("=")[1];
await crawlSwitchAssetsPlaywright(vendor);
}
if (args.includes("--atgbics") || isAll) {
await scrapeAtgbics();
}
}
await pool.end();
}
async function runScheduler(): Promise<void> {
console.log("=== TIP Scraper Engine ===\n");
console.log("Mode: Scheduler (pg-boss)\n");
const boss = await createScheduler();
await registerSchedules(boss);
await registerWorkers(boss);
console.log("\nScheduler running. Press Ctrl+C to stop.\n");
// Graceful shutdown
const shutdown = async () => {
console.log("\nShutting down...");
await boss.stop();
await pool.end();
process.exit(0);
};
process.on("SIGINT", shutdown);
process.on("SIGTERM", shutdown);
}
const ALL_FLAGS = ["--all", "--fs", "--cisco", "--optcore", "--news", "--flexoptix", "--vendors", "--10gtek", "--champion", "--fluxlight", "--sfpcables", "--gbics", "--prolabs", "--naddod", "--qsfptek", "--addon", "--juniper", "--switches", "--whitebox", "--switches-ext", "--flexoptix-vendors", "--sonic-hcl", "--edgecore", "--ufispace", "--switch-assets", "--switch-crawl", "--switch-crawl-pw", "--fetch-only", "--atgbics"];
if (args.some((a) => ALL_FLAGS.includes(a))) {
runOnce().catch((err) => {
console.error("Fatal:", err);
process.exit(1);
});
} else {
runScheduler().catch((err) => {
console.error("Fatal:", err);
process.exit(1);
});
}