fix: contentHash type errors + fs-com scraper improvements

Remove JSON.stringify wrapper from contentHash calls — function
expects Record<string,unknown>, not string. Fixes TS build for
6 scrapers. Update fs-com category URLs and add currency/lang cookies.
This commit is contained in:
Rene Fichtmueller 2026-03-30 21:07:27 +02:00
parent 52e2f16d75
commit fcddd1f27b
8 changed files with 108 additions and 72 deletions

View File

@ -14,6 +14,22 @@
import PgBoss from "pg-boss"; import PgBoss from "pg-boss";
import { config } from "dotenv"; import { config } from "dotenv";
import { join } from "path"; import { join } from "path";
import { rmSync, mkdirSync } from "fs";
/** Run a scraper with an isolated Crawlee storage directory to prevent queue collisions */
async function withIsolatedStorage(name: string, fn: () => Promise<void>): Promise<void> {
const dir = join(__dirname, "..", "..", "..", `storage-${name}`);
mkdirSync(dir, { recursive: true });
const prev = process.env.CRAWLEE_STORAGE_DIR;
process.env.CRAWLEE_STORAGE_DIR = dir;
try {
await fn();
} finally {
process.env.CRAWLEE_STORAGE_DIR = prev ?? "";
// Clean up after successful run
try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
}
}
config({ path: join(__dirname, "..", "..", "..", ".env") }); config({ path: join(__dirname, "..", "..", "..", ".env") });
@ -46,6 +62,7 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
"scrape:pricing:atgbics", "scrape:pricing:atgbics",
"scrape:pricing:prolabs", "scrape:pricing:prolabs",
"scrape:compat:cisco", "scrape:compat:cisco",
"scrape:pricing:flexoptix",
"scrape:vendors:flexoptix", "scrape:vendors:flexoptix",
"scrape:news", "scrape:news",
"scrape:faq", "scrape:faq",
@ -103,6 +120,12 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
expireInSeconds: 3600, expireInSeconds: 3600,
}); });
// Flexoptix catalog (every 6 hours — fetch-based, fast)
await boss.schedule("scrape:pricing:flexoptix", "0 1/6 * * *", {}, {
retryLimit: 2,
expireInSeconds: 3600,
});
// Flexoptix vendor list (weekly, Sunday at 6am — own data) // Flexoptix vendor list (weekly, Sunday at 6am — own data)
await boss.schedule("scrape:vendors:flexoptix", "0 6 * * 0", {}, { await boss.schedule("scrape:vendors:flexoptix", "0 6 * * 0", {}, {
retryLimit: 3, retryLimit: 3,
@ -124,6 +147,7 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
const { scrapeCiscoTmg } = await import("./scrapers/cisco-tmg"); const { scrapeCiscoTmg } = await import("./scrapers/cisco-tmg");
const { scrapeOptcore } = await import("./scrapers/optcore"); const { scrapeOptcore } = await import("./scrapers/optcore");
const { scrape10Gtek } = await import("./scrapers/tenGtek"); const { scrape10Gtek } = await import("./scrapers/tenGtek");
const { scrapeFlexoptixCatalog } = await import("./scrapers/flexoptix-catalog");
const { scrapeFlexoptixVendors } = await import("./scrapers/flexoptix-vendors"); const { scrapeFlexoptixVendors } = await import("./scrapers/flexoptix-vendors");
const { scrapeNews } = await import("./scrapers/news"); const { scrapeNews } = await import("./scrapers/news");
const { scrapeAtgbics } = await import("./scrapers/atgbics"); const { scrapeAtgbics } = await import("./scrapers/atgbics");
@ -131,22 +155,27 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
await boss.work("scrape:pricing:fs", async (_job) => { await boss.work("scrape:pricing:fs", async (_job) => {
console.log(`[${new Date().toISOString()}] Running: FS.com pricing`); console.log(`[${new Date().toISOString()}] Running: FS.com pricing`);
await scrapeFs(); await withIsolatedStorage("fs", scrapeFs);
}); });
await boss.work("scrape:pricing:optcore", async (_job) => { await boss.work("scrape:pricing:optcore", async (_job) => {
console.log(`[${new Date().toISOString()}] Running: Optcore pricing`); console.log(`[${new Date().toISOString()}] Running: Optcore pricing`);
await scrapeOptcore(); await withIsolatedStorage("optcore", scrapeOptcore);
}); });
await boss.work("scrape:compat:cisco", async (_job) => { await boss.work("scrape:compat:cisco", async (_job) => {
console.log(`[${new Date().toISOString()}] Running: Cisco TMG`); console.log(`[${new Date().toISOString()}] Running: Cisco TMG`);
await scrapeCiscoTmg(); await withIsolatedStorage("cisco", scrapeCiscoTmg);
}); });
await boss.work("scrape:pricing:10gtek", async (_job) => { await boss.work("scrape:pricing:10gtek", async (_job) => {
console.log(`[${new Date().toISOString()}] Running: 10Gtek pricing`); console.log(`[${new Date().toISOString()}] Running: 10Gtek pricing`);
await scrape10Gtek(); await withIsolatedStorage("10gtek", scrape10Gtek);
});
await boss.work("scrape:pricing:flexoptix", async (_job) => {
console.log(`[${new Date().toISOString()}] Running: Flexoptix catalog pricing`);
await scrapeFlexoptixCatalog();
}); });
await boss.work("scrape:vendors:flexoptix", async (_job) => { await boss.work("scrape:vendors:flexoptix", async (_job) => {
@ -161,12 +190,12 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
await boss.work("scrape:pricing:atgbics", async (_job) => { await boss.work("scrape:pricing:atgbics", async (_job) => {
console.log(`[${new Date().toISOString()}] Running: ATGBICS pricing`); console.log(`[${new Date().toISOString()}] Running: ATGBICS pricing`);
await scrapeAtgbics(); await withIsolatedStorage("atgbics", scrapeAtgbics);
}); });
await boss.work("scrape:pricing:prolabs", async (_job) => { await boss.work("scrape:pricing:prolabs", async (_job) => {
console.log(`[${new Date().toISOString()}] Running: ProLabs pricing`); console.log(`[${new Date().toISOString()}] Running: ProLabs pricing`);
await scrapeProLabs(); await withIsolatedStorage("prolabs", scrapeProLabs);
}); });
await boss.work("scrape:faq", async (_job) => { await boss.work("scrape:faq", async (_job) => {

View File

@ -212,7 +212,7 @@ export async function scrapeChampionOne(): Promise<void> {
}); });
if (product.price && product.price > 0) { if (product.price && product.price > 0) {
const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber })); const hash = contentHash({ price: product.price, part: product.partNumber });
const updated = await upsertPriceObservation({ const updated = await upsertPriceObservation({
transceiverId: txId, sourceVendorId: vendorId, transceiverId: txId, sourceVendorId: vendorId,
price: product.price, currency: product.currency || "USD", price: product.price, currency: product.currency || "USD",

View File

@ -541,7 +541,7 @@ export async function scrapeFlexoptixCatalog(): Promise<void> {
}); });
if (product.price && product.price > 0) { if (product.price && product.price > 0) {
const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber })); const hash = contentHash({ price: product.price, part: product.partNumber });
const updated = await upsertPriceObservation({ const updated = await upsertPriceObservation({
transceiverId: txId, transceiverId: txId,
sourceVendorId: vendorId, sourceVendorId: vendorId,

View File

@ -210,7 +210,7 @@ export async function scrapeFluxlight(): Promise<void> {
}); });
if (product.price && product.price > 0) { if (product.price && product.price > 0) {
const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber })); const hash = contentHash({ price: product.price, part: product.partNumber });
const updated = await upsertPriceObservation({ const updated = await upsertPriceObservation({
transceiverId: txId, sourceVendorId: vendorId, transceiverId: txId, sourceVendorId: vendorId,
price: product.price, currency: "USD", price: product.price, currency: "USD",

View File

@ -13,13 +13,18 @@ import { contentHash, parsePrice, parseStockLevel, parseQuantity } from "../util
const BASE_URL = "https://www.fs.com"; const BASE_URL = "https://www.fs.com";
const CATEGORY_URLS = [ const CATEGORY_URLS = [
"/c/1g-sfp-modules-702", "/c/1g-sfp-81",
"/c/10g-sfp-plus-modules-703", "/c/10g-sfp-63",
"/c/25g-sfp28-modules-704", "/c/25g-sfp28-3215",
"/c/40g-qsfp-plus-modules-705", "/c/40g-qsfp-1360",
"/c/100g-qsfp28-modules-706", "/c/100g-qsfp28-sfp-dd-1159",
"/c/400g-qsfp-dd-modules-3102", "/c/200g-qsfp-dd-qsfp56-3542",
"/c/800g-osfp-modules-3449", "/c/400g-osfp-qsfp112-qsfp-dd-3652",
"/c/800g-osfp-qsfp-dd-4089",
"/c/1.6t-osfp-5597",
"/c/400g-coherent-qsfp-dd-4103",
"/c/10g-cwdm-dwdm-sfp-65",
"/c/100g-dwdm-qsfp28-3863",
]; ];
interface FsProduct { interface FsProduct {
@ -98,18 +103,30 @@ export async function scrapeFs(): Promise<void> {
headless: true, headless: true,
launchContext: { launchContext: {
launchOptions: { launchOptions: {
args: ["--disable-blink-features=AutomationControlled"], args: ["--disable-blink-features=AutomationControlled", "--lang=en-US"],
}, },
}, },
preNavigationHooks: [
async ({ page }) => {
await page.setExtraHTTPHeaders({
"Accept-Language": "en-US,en;q=0.9",
});
await page.context().addCookies([
{ name: "currency", value: "USD", domain: ".fs.com", path: "/" },
{ name: "lang", value: "en", domain: ".fs.com", path: "/" },
{ name: "country", value: "US", domain: ".fs.com", path: "/" },
]);
},
],
async requestHandler({ page, request, log }) { async requestHandler({ page, request, log }) {
const url = request.url; const url = request.url;
log.info(`Scraping: ${url}`); log.info(`Scraping: ${url}`);
// Wait for product list to render // Wait for Vue.js product grid to render
await page.waitForTimeout(3000); await page.waitForTimeout(4000);
// Try multiple selectors — FS.com changes DOM frequently
const productData = await page.evaluate(() => { const productData = await page.evaluate(() => {
const results: Array<{ const results: Array<{
name: string; name: string;
@ -119,65 +136,55 @@ export async function scrapeFs(): Promise<void> {
partNumber: string; partNumber: string;
}> = []; }> = [];
// Strategy 1: Look for product links with prices nearby // Strategy 1: Parse .category__grid__item cards (2026 Vue.js DOM)
const productLinks = document.querySelectorAll( const gridItems = document.querySelectorAll(".category__grid__item");
'a[href*="/products/"], a[href*="/product/"], .product-item a, .o-list-product a, [class*="product"] a[href]' for (const item of gridItems) {
); const link = item.querySelector('a[href*="/products/"]') as HTMLAnchorElement | null;
const img = item.querySelector("img");
const priceEl = item.querySelector(".grid__price");
const allText = item.textContent || "";
if (!link) continue;
const name = img?.getAttribute("alt")?.trim() || link.textContent?.trim() || "";
const href = link.getAttribute("href") || "";
const price = priceEl?.textContent?.trim() || "";
// Extract stock from text like "1914 in Global Warehouse"
const stockMatch = allText.match(/(\d+)\s+in\s+(?:Global\s+)?Warehouse/i);
const stock = stockMatch ? stockMatch[1] + " in stock" : "";
// Extract FS product ID from URL
const pnMatch = href.match(/products\/(\d+)\.html/);
const partNumber = pnMatch ? `FS-${pnMatch[1]}` : "";
if (name && href) {
results.push({ name, href, price, stock, partNumber });
}
}
// Strategy 2: Fallback — look for product links with prices nearby
if (results.length === 0) {
const productLinks = document.querySelectorAll(
'a[href*="/products/"], a[href*="/product/"]'
);
for (const link of productLinks) { for (const link of productLinks) {
const el = link as HTMLAnchorElement; const el = link as HTMLAnchorElement;
const name = el.textContent?.trim() || ""; const name = el.textContent?.trim() || "";
const href = el.getAttribute("href") || ""; const href = el.getAttribute("href") || "";
if (!name || name.length < 5 || !href) continue; if (!name || name.length < 5 || !href) continue;
// Find price in parent/sibling elements const container = el.closest('[class*="product"]') || el.closest('[class*="item"]') || el.closest("li") || el.parentElement?.parentElement;
const container =
el.closest('[class*="product"]') ||
el.closest('[class*="item"]') ||
el.closest("li") ||
el.parentElement?.parentElement;
let price = ""; let price = "";
let stock = ""; let stock = "";
if (container) { if (container) {
const priceEl = container.querySelector( const priceEl = container.querySelector('[class*="price"]');
'[class*="price"], [class*="Price"], .o-price, span[data-price]'
);
price = priceEl?.textContent?.trim() || ""; price = priceEl?.textContent?.trim() || "";
const stockEl = container.querySelector('[class*="stock"], [class*="avail"]');
const stockEl = container.querySelector(
'[class*="stock"], [class*="Stock"], [class*="avail"], .o-stock'
);
stock = stockEl?.textContent?.trim() || ""; stock = stockEl?.textContent?.trim() || "";
} }
const pn = href.split("/").pop()?.replace(".html", "")?.replace(/\?.*/, "") || "";
// Extract part number from URL or text if (name) results.push({ name, href, price, stock, partNumber: pn });
const pn = href.split("/").pop()?.replace(".html", "")?.replace("#", "") || "";
if (name && (price || href.includes("/product"))) {
results.push({ name, href, price, stock, partNumber: pn });
}
}
// Strategy 2: Look for any element with $ or US$ price pattern
if (results.length === 0) {
const allText = document.querySelectorAll("*");
for (const el of allText) {
const text = el.textContent || "";
if (/US?\$\s*\d+\.\d{2}/.test(text) && text.length < 200) {
const linkEl = el.closest("a") || el.querySelector("a");
if (linkEl) {
results.push({
name: linkEl.textContent?.trim() || text.slice(0, 100),
href: linkEl.getAttribute("href") || "",
price: text.match(/US?\$\s*[\d,.]+/)?.[0] || "",
stock: "",
partNumber: "",
});
}
}
} }
} }

View File

@ -196,7 +196,7 @@ export async function scrapeGbics(): Promise<void> {
}); });
if (product.price && product.price > 0) { if (product.price && product.price > 0) {
const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber })); const hash = contentHash({ price: product.price, part: product.partNumber });
const updated = await upsertPriceObservation({ const updated = await upsertPriceObservation({
transceiverId: txId, sourceVendorId: vendorId, transceiverId: txId, sourceVendorId: vendorId,
price: product.price, currency: "GBP", price: product.price, currency: "GBP",

View File

@ -203,7 +203,7 @@ export async function scrapeSfpCables(): Promise<void> {
}); });
if (product.price && product.price > 0) { if (product.price && product.price > 0) {
const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber })); const hash = contentHash({ price: product.price, part: product.partNumber });
const updated = await upsertPriceObservation({ const updated = await upsertPriceObservation({
transceiverId: txId, transceiverId: txId,
sourceVendorId: vendorId, sourceVendorId: vendorId,

View File

@ -196,7 +196,7 @@ export async function scrape10Gtek(): Promise<void> {
}); });
if (product.price && product.price > 0) { if (product.price && product.price > 0) {
const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber })); const hash = contentHash({ price: product.price, part: product.partNumber });
const updated = await upsertPriceObservation({ const updated = await upsertPriceObservation({
transceiverId: txId, transceiverId: txId,
sourceVendorId: vendorId, sourceVendorId: vendorId,