New scrapers (all registered in pg-boss, 50 total jobs):
- sec-edgar.ts : SEC EDGAR XBRL API — hyperscaler CapEx from 10-Q/10-K
- github-signals.ts : GitHub Search/Stats API — tech adoption metrics weekly
- ebay-velocity.ts : eBay completed listings — sold count + price distribution
- ai-clusters.ts : RSS feeds (6 sources) — AI cluster & DC announcements
- distributor-leads.ts : Mouser, Digi-Key, RS Components — lead time + stock
- standards-tracker.ts : IEEE 802.3, OIF, IETF — draft/ballot/published status
New utilities:
- forecast-engine.ts : Weighted signal aggregator → demand_index + price_direction
6 signal types, 4 horizons (3/9/12/18 months), 5 technologies tracked
New DB tables (migration 022):
hyperscaler_capex, distributor_lead_times, github_tech_signals,
marketplace_velocity, ai_cluster_announcements, standards_activity,
forecast_signals
Schedules:
- EDGAR: weekly Mon 06:00
- GitHub: weekly Sun 05:00
- eBay velocity: every 12h
- AI clusters: every 4h (news-speed)
- Distributor leads: daily 03:30
- Standards: weekly Wed 04:00
- Forecast engine: daily 08:00 (after all nightly scrapers)
242 lines
10 KiB
TypeScript
242 lines
10 KiB
TypeScript
/**
|
|
* Distributor Lead Time & Stock Monitor
|
|
*
|
|
* Scrapes Mouser Electronics and Digi-Key for transceiver
|
|
* availability, stock levels, and lead times.
|
|
*
|
|
* Lead time is a 2-4 month supply constraint indicator:
|
|
* Lead time > 12 weeks → price increase likely in 6-8 weeks
|
|
* Lead time < 4 weeks → excess supply, price pressure downward
|
|
* In Stock / large qty → commodity phase
|
|
*
|
|
* No API keys required — uses public catalog search pages.
|
|
*/
|
|
|
|
import * as cheerio from "cheerio";
|
|
import { pool } from "../utils/db";
|
|
import { logger } from "../utils/logger";
|
|
|
|
const HEADERS = {
|
|
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
"Accept": "text/html,application/xhtml+xml,application/xhtml",
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
};
|
|
|
|
interface LeadEntry {
|
|
distributor: string;
|
|
formFactor: string;
|
|
speedLabel: string;
|
|
partNumber: string;
|
|
productName: string;
|
|
inStock: boolean;
|
|
stockQty: number | null;
|
|
leadTimeWeeks: number | null;
|
|
priceUsd: number | null;
|
|
productUrl: string;
|
|
}
|
|
|
|
// ─── Mouser ────────────────────────────────────────────────────────────────
|
|
|
|
const MOUSER_SEARCHES = [
|
|
{ url: "https://www.mouser.com/c/optoelectronics/fiber-optic-components/fiber-optic-transceivers-pluggable/?q=100G&instock=y", form: "QSFP28", speed: "100G" },
|
|
{ url: "https://www.mouser.com/c/optoelectronics/fiber-optic-components/fiber-optic-transceivers-pluggable/?q=400G", form: "QSFP-DD", speed: "400G" },
|
|
{ url: "https://www.mouser.com/c/optoelectronics/fiber-optic-components/fiber-optic-transceivers-pluggable/?q=800G", form: "OSFP", speed: "800G" },
|
|
{ url: "https://www.mouser.com/c/optoelectronics/fiber-optic-components/fiber-optic-transceivers-pluggable/?q=SFP+10G", form: "SFP+", speed: "10G" },
|
|
];
|
|
|
|
async function scrapeMouser(entry: { url: string; form: string; speed: string }): Promise<LeadEntry[]> {
|
|
const res = await fetch(entry.url, { headers: HEADERS, signal: AbortSignal.timeout(20000) });
|
|
if (!res.ok) throw new Error(`Mouser ${res.status}`);
|
|
const $ = cheerio.load(await res.text());
|
|
|
|
const results: LeadEntry[] = [];
|
|
|
|
$(".product-table tr[data-part-number]").each((_, el) => {
|
|
const partNumber = $(el).attr("data-part-number") ?? "";
|
|
const productName = $(el).find(".part-description a").first().text().trim();
|
|
const productUrl = `https://www.mouser.com${$(el).find(".part-description a").first().attr("href") ?? ""}`;
|
|
const priceText = $(el).find(".pricing-buy-price").first().text().replace(/,/g, "").trim();
|
|
const price = parseFloat(priceText.replace(/[^0-9.]/g, "")) || null;
|
|
const availText = $(el).find(".avail-text").first().text().trim();
|
|
const qtyMatch = availText.match(/([\d,]+)\s*In Stock/i);
|
|
const inStock = qtyMatch != null || availText.toLowerCase().includes("in stock");
|
|
const stockQty = qtyMatch ? parseInt(qtyMatch[1].replace(/,/g, "")) : (inStock ? 1 : null);
|
|
|
|
let leadTimeWeeks: number | null = null;
|
|
const leadMatch = availText.match(/(\d+)\s*(?:week|wk)/i);
|
|
if (leadMatch) leadTimeWeeks = parseInt(leadMatch[1]);
|
|
else if (!inStock) leadTimeWeeks = null; // non-stocked
|
|
|
|
if (!partNumber && !productName) return;
|
|
|
|
results.push({
|
|
distributor: "mouser",
|
|
formFactor: entry.form,
|
|
speedLabel: entry.speed,
|
|
partNumber,
|
|
productName: productName.substring(0, 200),
|
|
inStock,
|
|
stockQty,
|
|
leadTimeWeeks,
|
|
priceUsd: price,
|
|
productUrl: productUrl.substring(0, 500),
|
|
});
|
|
});
|
|
|
|
return results;
|
|
}
|
|
|
|
// ─── Digi-Key ──────────────────────────────────────────────────────────────
|
|
|
|
const DIGIKEY_SEARCHES = [
|
|
{ url: "https://www.digikey.com/en/products/filter/fiber-optic-transceivers-pluggable/814?q=100G&s=N4IgjCBcoA2oBhUBjEAzAhgGwM4FMBXAFwHsoBOABgDoA2AJgFYBOGoA", form: "QSFP28", speed: "100G" },
|
|
{ url: "https://www.digikey.com/en/products/filter/fiber-optic-transceivers-pluggable/814?q=400G", form: "QSFP-DD", speed: "400G" },
|
|
];
|
|
|
|
async function scrapeDigikey(entry: { url: string; form: string; speed: string }): Promise<LeadEntry[]> {
|
|
const res = await fetch(entry.url, { headers: HEADERS, signal: AbortSignal.timeout(20000) });
|
|
if (!res.ok) throw new Error(`Digi-Key ${res.status}`);
|
|
const $ = cheerio.load(await res.text());
|
|
|
|
const results: LeadEntry[] = [];
|
|
|
|
// Digi-Key uses React-rendered tables — grab what's in the SSR HTML
|
|
$("tr[data-partid], .product-table tbody tr").each((_, el) => {
|
|
const cells = $(el).find("td");
|
|
if (cells.length < 4) return;
|
|
|
|
const partLink = $(cells[0]).find("a").first();
|
|
const partNumber = partLink.text().trim() || $(cells[0]).text().trim();
|
|
const productName = $(cells[1]).text().trim();
|
|
const priceText = $(cells[2]).text().replace(/,/g, "").trim();
|
|
const price = parseFloat(priceText.replace(/[^0-9.]/g, "")) || null;
|
|
const qtyText = $(cells[3]).text().replace(/,/g, "").trim();
|
|
const qty = parseInt(qtyText.replace(/[^0-9]/g, "")) || 0;
|
|
|
|
if (!partNumber || partNumber.length < 3) return;
|
|
|
|
results.push({
|
|
distributor: "digikey",
|
|
formFactor: entry.form,
|
|
speedLabel: entry.speed,
|
|
partNumber: partNumber.substring(0, 100),
|
|
productName: productName.substring(0, 200),
|
|
inStock: qty > 0,
|
|
stockQty: qty || null,
|
|
leadTimeWeeks: qty === 0 ? null : 0,
|
|
priceUsd: price,
|
|
productUrl: `https://www.digikey.com${partLink.attr("href") ?? ""}`,
|
|
});
|
|
});
|
|
|
|
return results;
|
|
}
|
|
|
|
// ─── RS Components ─────────────────────────────────────────────────────────
|
|
|
|
async function scrapeRsComponents(): Promise<LeadEntry[]> {
|
|
const url = "https://uk.rs-online.com/web/c/optoelectronics/fibre-optic-components/fibre-optic-transceivers/?applied-dimensions=4294958026&sortby=P_PRICE&st=400G";
|
|
const res = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(20000) });
|
|
if (!res.ok) throw new Error(`RS ${res.status}`);
|
|
const $ = cheerio.load(await res.text());
|
|
|
|
const results: LeadEntry[] = [];
|
|
|
|
$(".product-info-wrap").each((_, el) => {
|
|
const partNumber = $(el).find(".product-number").first().text().trim().replace("RS Stock No.", "");
|
|
const productName = $(el).find(".product-title a").first().text().trim();
|
|
const productUrl = `https://uk.rs-online.com${$(el).find(".product-title a").attr("href") ?? ""}`;
|
|
const priceText = $(el).find(".price-info .price").first().text().trim();
|
|
const price = parseFloat(priceText.replace(/[^0-9.]/g, "")) || null;
|
|
const stockText = $(el).find(".stock-status").first().text().trim();
|
|
const inStock = /in stock|available/i.test(stockText);
|
|
|
|
if (!productName) return;
|
|
|
|
results.push({
|
|
distributor: "rs-components",
|
|
formFactor: "QSFP-DD",
|
|
speedLabel: "400G",
|
|
partNumber: partNumber.trim(),
|
|
productName: productName.substring(0, 200),
|
|
inStock,
|
|
stockQty: null,
|
|
leadTimeWeeks: inStock ? 0 : null,
|
|
priceUsd: price, // GBP, stored as-is — currency conversion handled in forecast
|
|
productUrl: productUrl.substring(0, 500),
|
|
});
|
|
});
|
|
|
|
return results;
|
|
}
|
|
|
|
export async function scrapeDistributorLeads(): Promise<void> {
|
|
logger.info("Distributor lead time scraper starting");
|
|
let recorded = 0;
|
|
|
|
// Mouser
|
|
for (const s of MOUSER_SEARCHES) {
|
|
try {
|
|
await new Promise(r => setTimeout(r, 3000));
|
|
logger.info(`Mouser: ${s.speed} ${s.form}`);
|
|
const entries = await scrapeMouser(s);
|
|
for (const e of entries.slice(0, 50)) { // limit to 50 per search
|
|
await pool.query(`
|
|
INSERT INTO distributor_lead_times
|
|
(distributor, form_factor, speed_label, part_number, product_name,
|
|
in_stock, stock_qty, lead_time_weeks, price_usd, product_url)
|
|
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)
|
|
`, [e.distributor, e.formFactor, e.speedLabel, e.partNumber, e.productName,
|
|
e.inStock, e.stockQty, e.leadTimeWeeks, e.priceUsd, e.productUrl]);
|
|
recorded++;
|
|
}
|
|
logger.info(`Mouser ${s.speed}: ${entries.length} products`);
|
|
} catch (err) {
|
|
logger.warn(`Mouser scrape failed: ${s.speed}`, { err });
|
|
}
|
|
}
|
|
|
|
// Digi-Key
|
|
for (const s of DIGIKEY_SEARCHES) {
|
|
try {
|
|
await new Promise(r => setTimeout(r, 4000));
|
|
logger.info(`Digi-Key: ${s.speed} ${s.form}`);
|
|
const entries = await scrapeDigikey(s);
|
|
for (const e of entries.slice(0, 30)) {
|
|
await pool.query(`
|
|
INSERT INTO distributor_lead_times
|
|
(distributor, form_factor, speed_label, part_number, product_name,
|
|
in_stock, stock_qty, lead_time_weeks, price_usd, product_url)
|
|
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)
|
|
`, [e.distributor, e.formFactor, e.speedLabel, e.partNumber, e.productName,
|
|
e.inStock, e.stockQty, e.leadTimeWeeks, e.priceUsd, e.productUrl]);
|
|
recorded++;
|
|
}
|
|
logger.info(`Digi-Key ${s.speed}: ${entries.length} products`);
|
|
} catch (err) {
|
|
logger.warn(`Digi-Key scrape failed: ${s.speed}`, { err });
|
|
}
|
|
}
|
|
|
|
// RS Components
|
|
try {
|
|
await new Promise(r => setTimeout(r, 3000));
|
|
const rsEntries = await scrapeRsComponents();
|
|
for (const e of rsEntries.slice(0, 30)) {
|
|
await pool.query(`
|
|
INSERT INTO distributor_lead_times
|
|
(distributor, form_factor, speed_label, part_number, product_name,
|
|
in_stock, stock_qty, lead_time_weeks, price_usd, product_url)
|
|
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)
|
|
`, [e.distributor, e.formFactor, e.speedLabel, e.partNumber, e.productName,
|
|
e.inStock, e.stockQty, e.leadTimeWeeks, e.priceUsd, e.productUrl]);
|
|
recorded++;
|
|
}
|
|
logger.info(`RS Components: ${rsEntries.length} products`);
|
|
} catch (err) {
|
|
logger.warn("RS Components scrape failed", { err });
|
|
}
|
|
|
|
logger.info(`Distributor lead time scraper done — ${recorded} records`);
|
|
}
|