fix: add fscom targeted detail verification mode

This commit is contained in:
Rene Fichtmueller 2026-05-09 11:15:36 +02:00
parent e73defe6bb
commit f64dbf7b6b

View File

@ -72,6 +72,7 @@ const MAX_DETAIL_PAGES_PER_RUN = parseInt(process.env["FS_MAX_DETAIL_PAGES_PER_R
const STOCK_FRESH_HOURS = parseInt(process.env["FS_STOCK_FRESH_HOURS"] ?? "12", 10); const STOCK_FRESH_HOURS = parseInt(process.env["FS_STOCK_FRESH_HOURS"] ?? "12", 10);
const FORCE_REVALIDATE = process.env["TIP_FORCE_REVALIDATE"] === "1"; const FORCE_REVALIDATE = process.env["TIP_FORCE_REVALIDATE"] === "1";
const ONLY_MISSING_IMAGES = process.env["FS_ONLY_MISSING_IMAGES"] === "1"; const ONLY_MISSING_IMAGES = process.env["FS_ONLY_MISSING_IMAGES"] === "1";
const DB_DETAIL_ONLY = process.env["FS_DB_DETAIL_ONLY"] === "1";
const PROXY_URLS = (process.env["PROXY_URLS"] ?? "") const PROXY_URLS = (process.env["PROXY_URLS"] ?? "")
.split(",") .split(",")
@ -241,8 +242,18 @@ function detectSpeed(text: string): { speed: string; speedGbps: number } | undef
} }
function detectReach(text: string): string | undefined { function detectReach(text: string): string | undefined {
const m = text.match(/(\d+)\s*(m|km)\b/i); const m = text.match(/(\d{1,3}(?:,\d{3})+|\d+(?:\.\d+)?)\s*(m|km)\b/i);
return m ? `${m[1]}${m[2].toLowerCase()}` : undefined; return m ? `${m[1].replace(/,/g, "")}${m[2].toLowerCase()}` : undefined;
}
function detectFiberType(text: string): string | undefined {
if (/active\s+optical|\baoc\b/i.test(text)) return "AOC";
if (/copper|dac|twinax|direct\s+attach|rj-?45|base-t/i.test(text)) return "Copper";
if (/single.?mode|\bsmf\b|os2|cwdm|dwdm|\bcw-|^cw-|dw-|bidi|\blx\b|\blr\d*\b|\ber\d*\b|\bzr\d*\b|\bdr\d*\b|\bfr\d*\b|\bpsm\d*\b/i.test(text)) {
return "SMF";
}
if (/multi.?mode|\bmmf\b|om[1-5]|\bsx\b|\bsr\d*\b/i.test(text)) return "MMF";
return undefined;
} }
// ── Types ────────────────────────────────────────────────────────────────────── // ── Types ──────────────────────────────────────────────────────────────────────
@ -777,8 +788,45 @@ export async function scrapeFs(): Promise<void> {
console.log(`Vendor ID: ${vendorId}`); console.log(`Vendor ID: ${vendorId}`);
// ── Phase 1: Discover product URLs ───────────────────────────────────────── // ── Phase 1: Discover product URLs ─────────────────────────────────────────
let productMap: Map<string, ProductSummary>;
if (DB_DETAIL_ONLY) {
console.log("\n[Phase 1] DB detail-only mode — using existing FS.COM product URLs with missing verification signals…");
const dbRows = await pool.query(
`
SELECT t.part_number, t.product_page_url
FROM transceivers t
JOIN vendors v ON v.id = t.vendor_id
WHERE v.name = 'FS.COM'
AND t.product_page_url IS NOT NULL
AND t.product_page_url != ''
AND t.product_page_url LIKE '%/products/%'
AND (
COALESCE(t.price_verified, false) = false
OR COALESCE(t.image_verified, false) = false
OR COALESCE(t.details_verified, false) = false
OR COALESCE(t.fiber_type, '') = ''
OR COALESCE(t.reach_label, '') = ''
)
ORDER BY
COALESCE(t.price_verified, false) DESC,
COALESCE(t.image_verified, false) DESC,
COALESCE(t.details_verified, false) ASC,
t.part_number
LIMIT $1
`,
[MAX_DETAIL_PAGES_PER_RUN]
);
productMap = new Map(
dbRows.rows.map((row) => {
const url = normalizeFsProductUrl(row.product_page_url as string);
const partNumber = row.part_number as string;
return [url, { url, name: partNumber, partNumber }];
})
);
} else {
console.log("\n[Phase 1] Collecting product URLs from category listing pages…"); console.log("\n[Phase 1] Collecting product URLs from category listing pages…");
const productMap = await collectProductUrls(proxyConfiguration); productMap = await collectProductUrls(proxyConfiguration);
}
if (productMap.size === 0) { if (productMap.size === 0) {
console.warn("[Phase 1] No products discovered — check selectors or proxy."); console.warn("[Phase 1] No products discovered — check selectors or proxy.");
@ -860,16 +908,19 @@ export async function scrapeFs(): Promise<void> {
const speedInfo = detectSpeed(detail.name); const speedInfo = detectSpeed(detail.name);
const reach = detectReach(detail.name); const reach = detectReach(detail.name);
const parsed = parseSpecTable(detail.specs); const parsed = parseSpecTable(detail.specs);
const textForInference = `${detail.name} ${detail.partNumber} ${Object.values(detail.specs).join(" ")}`;
const fiberType = parsed.fiberType ?? detectFiberType(textForInference);
const transceiverId = await findOrCreateScrapedTransceiver({ const transceiverId = await findOrCreateScrapedTransceiver({
partNumber: detail.partNumber, partNumber: detail.partNumber,
vendorId, vendorId,
productUrl: detail.url,
formFactor: ff, formFactor: ff,
speedGbps: speedInfo?.speedGbps, speedGbps: speedInfo?.speedGbps,
speed: speedInfo?.speed, speed: speedInfo?.speed,
reachLabel: reach ?? parsed.reachLabel, reachLabel: reach ?? parsed.reachLabel,
reachMeters: parsed.reachMeters, reachMeters: parsed.reachMeters,
fiberType: parsed.fiberType, fiberType,
wavelengths: parsed.wavelengths, wavelengths: parsed.wavelengths,
imageUrl: detail.imageUrl, imageUrl: detail.imageUrl,
category: "DataCenter", category: "DataCenter",
@ -922,7 +973,7 @@ export async function scrapeFs(): Promise<void> {
if (Object.keys(detail.specs).length > 0) { if (Object.keys(detail.specs).length > 0) {
const updated = await updateVerifiedSpecs({ const updated = await updateVerifiedSpecs({
transceiverId, transceiverId,
fiberType: parsed.fiberType, fiberType,
connector: parsed.connector, connector: parsed.connector,
wavelengths: parsed.wavelengths, wavelengths: parsed.wavelengths,
reachMeters: parsed.reachMeters, reachMeters: parsed.reachMeters,
@ -933,7 +984,7 @@ export async function scrapeFs(): Promise<void> {
domSupport: parsed.domSupport, domSupport: parsed.domSupport,
imageUrl: detail.imageUrl, imageUrl: detail.imageUrl,
datasheetUrl: detail.datasheetUrl, datasheetUrl: detail.datasheetUrl,
source: "fs.com", source: detail.url,
}); });
if (updated) specsUpdated++; if (updated) specsUpdated++;
} }