fix: add fscom targeted detail verification mode
This commit is contained in:
parent
e73defe6bb
commit
f64dbf7b6b
@ -72,6 +72,7 @@ const MAX_DETAIL_PAGES_PER_RUN = parseInt(process.env["FS_MAX_DETAIL_PAGES_PER_R
|
|||||||
const STOCK_FRESH_HOURS = parseInt(process.env["FS_STOCK_FRESH_HOURS"] ?? "12", 10);
|
const STOCK_FRESH_HOURS = parseInt(process.env["FS_STOCK_FRESH_HOURS"] ?? "12", 10);
|
||||||
const FORCE_REVALIDATE = process.env["TIP_FORCE_REVALIDATE"] === "1";
|
const FORCE_REVALIDATE = process.env["TIP_FORCE_REVALIDATE"] === "1";
|
||||||
const ONLY_MISSING_IMAGES = process.env["FS_ONLY_MISSING_IMAGES"] === "1";
|
const ONLY_MISSING_IMAGES = process.env["FS_ONLY_MISSING_IMAGES"] === "1";
|
||||||
|
const DB_DETAIL_ONLY = process.env["FS_DB_DETAIL_ONLY"] === "1";
|
||||||
|
|
||||||
const PROXY_URLS = (process.env["PROXY_URLS"] ?? "")
|
const PROXY_URLS = (process.env["PROXY_URLS"] ?? "")
|
||||||
.split(",")
|
.split(",")
|
||||||
@ -241,8 +242,18 @@ function detectSpeed(text: string): { speed: string; speedGbps: number } | undef
|
|||||||
}
|
}
|
||||||
|
|
||||||
function detectReach(text: string): string | undefined {
|
function detectReach(text: string): string | undefined {
|
||||||
const m = text.match(/(\d+)\s*(m|km)\b/i);
|
const m = text.match(/(\d{1,3}(?:,\d{3})+|\d+(?:\.\d+)?)\s*(m|km)\b/i);
|
||||||
return m ? `${m[1]}${m[2].toLowerCase()}` : undefined;
|
return m ? `${m[1].replace(/,/g, "")}${m[2].toLowerCase()}` : undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectFiberType(text: string): string | undefined {
|
||||||
|
if (/active\s+optical|\baoc\b/i.test(text)) return "AOC";
|
||||||
|
if (/copper|dac|twinax|direct\s+attach|rj-?45|base-t/i.test(text)) return "Copper";
|
||||||
|
if (/single.?mode|\bsmf\b|os2|cwdm|dwdm|\bcw-|^cw-|dw-|bidi|\blx\b|\blr\d*\b|\ber\d*\b|\bzr\d*\b|\bdr\d*\b|\bfr\d*\b|\bpsm\d*\b/i.test(text)) {
|
||||||
|
return "SMF";
|
||||||
|
}
|
||||||
|
if (/multi.?mode|\bmmf\b|om[1-5]|\bsx\b|\bsr\d*\b/i.test(text)) return "MMF";
|
||||||
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Types ──────────────────────────────────────────────────────────────────────
|
// ── Types ──────────────────────────────────────────────────────────────────────
|
||||||
@ -777,8 +788,45 @@ export async function scrapeFs(): Promise<void> {
|
|||||||
console.log(`Vendor ID: ${vendorId}`);
|
console.log(`Vendor ID: ${vendorId}`);
|
||||||
|
|
||||||
// ── Phase 1: Discover product URLs ─────────────────────────────────────────
|
// ── Phase 1: Discover product URLs ─────────────────────────────────────────
|
||||||
|
let productMap: Map<string, ProductSummary>;
|
||||||
|
if (DB_DETAIL_ONLY) {
|
||||||
|
console.log("\n[Phase 1] DB detail-only mode — using existing FS.COM product URLs with missing verification signals…");
|
||||||
|
const dbRows = await pool.query(
|
||||||
|
`
|
||||||
|
SELECT t.part_number, t.product_page_url
|
||||||
|
FROM transceivers t
|
||||||
|
JOIN vendors v ON v.id = t.vendor_id
|
||||||
|
WHERE v.name = 'FS.COM'
|
||||||
|
AND t.product_page_url IS NOT NULL
|
||||||
|
AND t.product_page_url != ''
|
||||||
|
AND t.product_page_url LIKE '%/products/%'
|
||||||
|
AND (
|
||||||
|
COALESCE(t.price_verified, false) = false
|
||||||
|
OR COALESCE(t.image_verified, false) = false
|
||||||
|
OR COALESCE(t.details_verified, false) = false
|
||||||
|
OR COALESCE(t.fiber_type, '') = ''
|
||||||
|
OR COALESCE(t.reach_label, '') = ''
|
||||||
|
)
|
||||||
|
ORDER BY
|
||||||
|
COALESCE(t.price_verified, false) DESC,
|
||||||
|
COALESCE(t.image_verified, false) DESC,
|
||||||
|
COALESCE(t.details_verified, false) ASC,
|
||||||
|
t.part_number
|
||||||
|
LIMIT $1
|
||||||
|
`,
|
||||||
|
[MAX_DETAIL_PAGES_PER_RUN]
|
||||||
|
);
|
||||||
|
productMap = new Map(
|
||||||
|
dbRows.rows.map((row) => {
|
||||||
|
const url = normalizeFsProductUrl(row.product_page_url as string);
|
||||||
|
const partNumber = row.part_number as string;
|
||||||
|
return [url, { url, name: partNumber, partNumber }];
|
||||||
|
})
|
||||||
|
);
|
||||||
|
} else {
|
||||||
console.log("\n[Phase 1] Collecting product URLs from category listing pages…");
|
console.log("\n[Phase 1] Collecting product URLs from category listing pages…");
|
||||||
const productMap = await collectProductUrls(proxyConfiguration);
|
productMap = await collectProductUrls(proxyConfiguration);
|
||||||
|
}
|
||||||
|
|
||||||
if (productMap.size === 0) {
|
if (productMap.size === 0) {
|
||||||
console.warn("[Phase 1] No products discovered — check selectors or proxy.");
|
console.warn("[Phase 1] No products discovered — check selectors or proxy.");
|
||||||
@ -860,16 +908,19 @@ export async function scrapeFs(): Promise<void> {
|
|||||||
const speedInfo = detectSpeed(detail.name);
|
const speedInfo = detectSpeed(detail.name);
|
||||||
const reach = detectReach(detail.name);
|
const reach = detectReach(detail.name);
|
||||||
const parsed = parseSpecTable(detail.specs);
|
const parsed = parseSpecTable(detail.specs);
|
||||||
|
const textForInference = `${detail.name} ${detail.partNumber} ${Object.values(detail.specs).join(" ")}`;
|
||||||
|
const fiberType = parsed.fiberType ?? detectFiberType(textForInference);
|
||||||
|
|
||||||
const transceiverId = await findOrCreateScrapedTransceiver({
|
const transceiverId = await findOrCreateScrapedTransceiver({
|
||||||
partNumber: detail.partNumber,
|
partNumber: detail.partNumber,
|
||||||
vendorId,
|
vendorId,
|
||||||
|
productUrl: detail.url,
|
||||||
formFactor: ff,
|
formFactor: ff,
|
||||||
speedGbps: speedInfo?.speedGbps,
|
speedGbps: speedInfo?.speedGbps,
|
||||||
speed: speedInfo?.speed,
|
speed: speedInfo?.speed,
|
||||||
reachLabel: reach ?? parsed.reachLabel,
|
reachLabel: reach ?? parsed.reachLabel,
|
||||||
reachMeters: parsed.reachMeters,
|
reachMeters: parsed.reachMeters,
|
||||||
fiberType: parsed.fiberType,
|
fiberType,
|
||||||
wavelengths: parsed.wavelengths,
|
wavelengths: parsed.wavelengths,
|
||||||
imageUrl: detail.imageUrl,
|
imageUrl: detail.imageUrl,
|
||||||
category: "DataCenter",
|
category: "DataCenter",
|
||||||
@ -922,7 +973,7 @@ export async function scrapeFs(): Promise<void> {
|
|||||||
if (Object.keys(detail.specs).length > 0) {
|
if (Object.keys(detail.specs).length > 0) {
|
||||||
const updated = await updateVerifiedSpecs({
|
const updated = await updateVerifiedSpecs({
|
||||||
transceiverId,
|
transceiverId,
|
||||||
fiberType: parsed.fiberType,
|
fiberType,
|
||||||
connector: parsed.connector,
|
connector: parsed.connector,
|
||||||
wavelengths: parsed.wavelengths,
|
wavelengths: parsed.wavelengths,
|
||||||
reachMeters: parsed.reachMeters,
|
reachMeters: parsed.reachMeters,
|
||||||
@ -933,7 +984,7 @@ export async function scrapeFs(): Promise<void> {
|
|||||||
domSupport: parsed.domSupport,
|
domSupport: parsed.domSupport,
|
||||||
imageUrl: detail.imageUrl,
|
imageUrl: detail.imageUrl,
|
||||||
datasheetUrl: detail.datasheetUrl,
|
datasheetUrl: detail.datasheetUrl,
|
||||||
source: "fs.com",
|
source: detail.url,
|
||||||
});
|
});
|
||||||
if (updated) specsUpdated++;
|
if (updated) specsUpdated++;
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user