transceiver-db/packages/scraper/src/scrapers/distributor-leads.ts
Rene Fichtmueller 7f1c701ba1 feat: 6 prediction signal scrapers + forecast engine
New scrapers (all registered in pg-boss, 50 total jobs):
  - sec-edgar.ts       : SEC EDGAR XBRL API — hyperscaler CapEx from 10-Q/10-K
  - github-signals.ts  : GitHub Search/Stats API — tech adoption metrics weekly
  - ebay-velocity.ts   : eBay completed listings — sold count + price distribution
  - ai-clusters.ts     : RSS feeds (6 sources) — AI cluster & DC announcements
  - distributor-leads.ts : Mouser, Digi-Key, RS Components — lead time + stock
  - standards-tracker.ts : IEEE 802.3, OIF, IETF — draft/ballot/published status

New utilities:
  - forecast-engine.ts : Weighted signal aggregator → demand_index + price_direction
    6 signal types, 4 horizons (3/9/12/18 months), 5 technologies tracked

New DB tables (migration 022):
  hyperscaler_capex, distributor_lead_times, github_tech_signals,
  marketplace_velocity, ai_cluster_announcements, standards_activity,
  forecast_signals

Schedules:
  - EDGAR: weekly Mon 06:00
  - GitHub: weekly Sun 05:00
  - eBay velocity: every 12h
  - AI clusters: every 4h (news-speed)
  - Distributor leads: daily 03:30
  - Standards: weekly Wed 04:00
  - Forecast engine: daily 08:00 (after all nightly scrapers)
2026-04-02 02:02:44 +02:00

242 lines
10 KiB
TypeScript

/**
* Distributor Lead Time & Stock Monitor
*
* Scrapes Mouser Electronics and Digi-Key for transceiver
* availability, stock levels, and lead times.
*
* Lead time is a 2-4 month supply constraint indicator:
* Lead time > 12 weeks → price increase likely in 6-8 weeks
* Lead time < 4 weeks → excess supply, price pressure downward
* In Stock / large qty → commodity phase
*
* No API keys required — uses public catalog search pages.
*/
import * as cheerio from "cheerio";
import { pool } from "../utils/db";
import { logger } from "../utils/logger";
const HEADERS = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xhtml",
"Accept-Language": "en-US,en;q=0.9",
};
interface LeadEntry {
distributor: string;
formFactor: string;
speedLabel: string;
partNumber: string;
productName: string;
inStock: boolean;
stockQty: number | null;
leadTimeWeeks: number | null;
priceUsd: number | null;
productUrl: string;
}
// ─── Mouser ────────────────────────────────────────────────────────────────
const MOUSER_SEARCHES = [
{ url: "https://www.mouser.com/c/optoelectronics/fiber-optic-components/fiber-optic-transceivers-pluggable/?q=100G&instock=y", form: "QSFP28", speed: "100G" },
{ url: "https://www.mouser.com/c/optoelectronics/fiber-optic-components/fiber-optic-transceivers-pluggable/?q=400G", form: "QSFP-DD", speed: "400G" },
{ url: "https://www.mouser.com/c/optoelectronics/fiber-optic-components/fiber-optic-transceivers-pluggable/?q=800G", form: "OSFP", speed: "800G" },
{ url: "https://www.mouser.com/c/optoelectronics/fiber-optic-components/fiber-optic-transceivers-pluggable/?q=SFP+10G", form: "SFP+", speed: "10G" },
];
async function scrapeMouser(entry: { url: string; form: string; speed: string }): Promise<LeadEntry[]> {
const res = await fetch(entry.url, { headers: HEADERS, signal: AbortSignal.timeout(20000) });
if (!res.ok) throw new Error(`Mouser ${res.status}`);
const $ = cheerio.load(await res.text());
const results: LeadEntry[] = [];
$(".product-table tr[data-part-number]").each((_, el) => {
const partNumber = $(el).attr("data-part-number") ?? "";
const productName = $(el).find(".part-description a").first().text().trim();
const productUrl = `https://www.mouser.com${$(el).find(".part-description a").first().attr("href") ?? ""}`;
const priceText = $(el).find(".pricing-buy-price").first().text().replace(/,/g, "").trim();
const price = parseFloat(priceText.replace(/[^0-9.]/g, "")) || null;
const availText = $(el).find(".avail-text").first().text().trim();
const qtyMatch = availText.match(/([\d,]+)\s*In Stock/i);
const inStock = qtyMatch != null || availText.toLowerCase().includes("in stock");
const stockQty = qtyMatch ? parseInt(qtyMatch[1].replace(/,/g, "")) : (inStock ? 1 : null);
let leadTimeWeeks: number | null = null;
const leadMatch = availText.match(/(\d+)\s*(?:week|wk)/i);
if (leadMatch) leadTimeWeeks = parseInt(leadMatch[1]);
else if (!inStock) leadTimeWeeks = null; // non-stocked
if (!partNumber && !productName) return;
results.push({
distributor: "mouser",
formFactor: entry.form,
speedLabel: entry.speed,
partNumber,
productName: productName.substring(0, 200),
inStock,
stockQty,
leadTimeWeeks,
priceUsd: price,
productUrl: productUrl.substring(0, 500),
});
});
return results;
}
// ─── Digi-Key ──────────────────────────────────────────────────────────────
const DIGIKEY_SEARCHES = [
{ url: "https://www.digikey.com/en/products/filter/fiber-optic-transceivers-pluggable/814?q=100G&s=N4IgjCBcoA2oBhUBjEAzAhgGwM4FMBXAFwHsoBOABgDoA2AJgFYBOGoA", form: "QSFP28", speed: "100G" },
{ url: "https://www.digikey.com/en/products/filter/fiber-optic-transceivers-pluggable/814?q=400G", form: "QSFP-DD", speed: "400G" },
];
async function scrapeDigikey(entry: { url: string; form: string; speed: string }): Promise<LeadEntry[]> {
const res = await fetch(entry.url, { headers: HEADERS, signal: AbortSignal.timeout(20000) });
if (!res.ok) throw new Error(`Digi-Key ${res.status}`);
const $ = cheerio.load(await res.text());
const results: LeadEntry[] = [];
// Digi-Key uses React-rendered tables — grab what's in the SSR HTML
$("tr[data-partid], .product-table tbody tr").each((_, el) => {
const cells = $(el).find("td");
if (cells.length < 4) return;
const partLink = $(cells[0]).find("a").first();
const partNumber = partLink.text().trim() || $(cells[0]).text().trim();
const productName = $(cells[1]).text().trim();
const priceText = $(cells[2]).text().replace(/,/g, "").trim();
const price = parseFloat(priceText.replace(/[^0-9.]/g, "")) || null;
const qtyText = $(cells[3]).text().replace(/,/g, "").trim();
const qty = parseInt(qtyText.replace(/[^0-9]/g, "")) || 0;
if (!partNumber || partNumber.length < 3) return;
results.push({
distributor: "digikey",
formFactor: entry.form,
speedLabel: entry.speed,
partNumber: partNumber.substring(0, 100),
productName: productName.substring(0, 200),
inStock: qty > 0,
stockQty: qty || null,
leadTimeWeeks: qty === 0 ? null : 0,
priceUsd: price,
productUrl: `https://www.digikey.com${partLink.attr("href") ?? ""}`,
});
});
return results;
}
// ─── RS Components ─────────────────────────────────────────────────────────
async function scrapeRsComponents(): Promise<LeadEntry[]> {
const url = "https://uk.rs-online.com/web/c/optoelectronics/fibre-optic-components/fibre-optic-transceivers/?applied-dimensions=4294958026&sortby=P_PRICE&st=400G";
const res = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(20000) });
if (!res.ok) throw new Error(`RS ${res.status}`);
const $ = cheerio.load(await res.text());
const results: LeadEntry[] = [];
$(".product-info-wrap").each((_, el) => {
const partNumber = $(el).find(".product-number").first().text().trim().replace("RS Stock No.", "");
const productName = $(el).find(".product-title a").first().text().trim();
const productUrl = `https://uk.rs-online.com${$(el).find(".product-title a").attr("href") ?? ""}`;
const priceText = $(el).find(".price-info .price").first().text().trim();
const price = parseFloat(priceText.replace(/[^0-9.]/g, "")) || null;
const stockText = $(el).find(".stock-status").first().text().trim();
const inStock = /in stock|available/i.test(stockText);
if (!productName) return;
results.push({
distributor: "rs-components",
formFactor: "QSFP-DD",
speedLabel: "400G",
partNumber: partNumber.trim(),
productName: productName.substring(0, 200),
inStock,
stockQty: null,
leadTimeWeeks: inStock ? 0 : null,
priceUsd: price, // GBP, stored as-is — currency conversion handled in forecast
productUrl: productUrl.substring(0, 500),
});
});
return results;
}
export async function scrapeDistributorLeads(): Promise<void> {
logger.info("Distributor lead time scraper starting");
let recorded = 0;
// Mouser
for (const s of MOUSER_SEARCHES) {
try {
await new Promise(r => setTimeout(r, 3000));
logger.info(`Mouser: ${s.speed} ${s.form}`);
const entries = await scrapeMouser(s);
for (const e of entries.slice(0, 50)) { // limit to 50 per search
await pool.query(`
INSERT INTO distributor_lead_times
(distributor, form_factor, speed_label, part_number, product_name,
in_stock, stock_qty, lead_time_weeks, price_usd, product_url)
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)
`, [e.distributor, e.formFactor, e.speedLabel, e.partNumber, e.productName,
e.inStock, e.stockQty, e.leadTimeWeeks, e.priceUsd, e.productUrl]);
recorded++;
}
logger.info(`Mouser ${s.speed}: ${entries.length} products`);
} catch (err) {
logger.warn(`Mouser scrape failed: ${s.speed}`, { err });
}
}
// Digi-Key
for (const s of DIGIKEY_SEARCHES) {
try {
await new Promise(r => setTimeout(r, 4000));
logger.info(`Digi-Key: ${s.speed} ${s.form}`);
const entries = await scrapeDigikey(s);
for (const e of entries.slice(0, 30)) {
await pool.query(`
INSERT INTO distributor_lead_times
(distributor, form_factor, speed_label, part_number, product_name,
in_stock, stock_qty, lead_time_weeks, price_usd, product_url)
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)
`, [e.distributor, e.formFactor, e.speedLabel, e.partNumber, e.productName,
e.inStock, e.stockQty, e.leadTimeWeeks, e.priceUsd, e.productUrl]);
recorded++;
}
logger.info(`Digi-Key ${s.speed}: ${entries.length} products`);
} catch (err) {
logger.warn(`Digi-Key scrape failed: ${s.speed}`, { err });
}
}
// RS Components
try {
await new Promise(r => setTimeout(r, 3000));
const rsEntries = await scrapeRsComponents();
for (const e of rsEntries.slice(0, 30)) {
await pool.query(`
INSERT INTO distributor_lead_times
(distributor, form_factor, speed_label, part_number, product_name,
in_stock, stock_qty, lead_time_weeks, price_usd, product_url)
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)
`, [e.distributor, e.formFactor, e.speedLabel, e.partNumber, e.productName,
e.inStock, e.stockQty, e.leadTimeWeeks, e.priceUsd, e.productUrl]);
recorded++;
}
logger.info(`RS Components: ${rsEntries.length} products`);
} catch (err) {
logger.warn("RS Components scrape failed", { err });
}
logger.info(`Distributor lead time scraper done — ${recorded} records`);
}