fix: improve vendor verification and fscom 1.6t variants
This commit is contained in:
parent
7da78a999d
commit
b26696f0d1
@ -45,6 +45,7 @@ interface Product {
|
|||||||
partNumber: string;
|
partNumber: string;
|
||||||
name: string;
|
name: string;
|
||||||
url: string;
|
url: string;
|
||||||
|
imageUrl?: string;
|
||||||
formFactor: string;
|
formFactor: string;
|
||||||
speed: string;
|
speed: string;
|
||||||
speedGbps: number;
|
speedGbps: number;
|
||||||
@ -156,11 +157,16 @@ function parseProductTable(
|
|||||||
|
|
||||||
const combined = `${rawPart} ${desc}`;
|
const combined = `${rawPart} ${desc}`;
|
||||||
const reach = detectReach(combined);
|
const reach = detectReach(combined);
|
||||||
|
const rawImg = $(cells[0]).find("img").first().attr("src") || $(cells[0]).find("img").first().attr("data-src");
|
||||||
|
const imageUrl = rawImg && !/(logo|placeholder|default|no-image|icon|sprite)/i.test(rawImg)
|
||||||
|
? (rawImg.startsWith("http") ? rawImg : BASE + rawImg)
|
||||||
|
: undefined;
|
||||||
|
|
||||||
products.push({
|
products.push({
|
||||||
partNumber: rawPart,
|
partNumber: rawPart,
|
||||||
name: desc || rawPart,
|
name: desc || rawPart,
|
||||||
url,
|
url,
|
||||||
|
imageUrl,
|
||||||
formFactor: cat.formFactor,
|
formFactor: cat.formFactor,
|
||||||
speed: cat.speed,
|
speed: cat.speed,
|
||||||
speedGbps: cat.speedGbps,
|
speedGbps: cat.speedGbps,
|
||||||
@ -211,6 +217,7 @@ export async function scrapeAscentOptics(): Promise<void> {
|
|||||||
await findOrCreateScrapedTransceiver({
|
await findOrCreateScrapedTransceiver({
|
||||||
partNumber: product.partNumber,
|
partNumber: product.partNumber,
|
||||||
vendorId,
|
vendorId,
|
||||||
|
productUrl: product.url,
|
||||||
formFactor: product.formFactor,
|
formFactor: product.formFactor,
|
||||||
speedGbps: product.speedGbps,
|
speedGbps: product.speedGbps,
|
||||||
speed: product.speed,
|
speed: product.speed,
|
||||||
@ -219,6 +226,7 @@ export async function scrapeAscentOptics(): Promise<void> {
|
|||||||
fiberType: product.fiberType,
|
fiberType: product.fiberType,
|
||||||
wavelengths: product.wavelength,
|
wavelengths: product.wavelength,
|
||||||
category: "DataCenter",
|
category: "DataCenter",
|
||||||
|
imageUrl: product.imageUrl,
|
||||||
});
|
});
|
||||||
totalProducts++;
|
totalProducts++;
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
|||||||
@ -57,6 +57,12 @@ function speedFromSlug(slug: string): { speed: string; speedGbps: number } {
|
|||||||
return { speed: "Unknown", speedGbps: 0 };
|
return { speed: "Unknown", speedGbps: 0 };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function speedFromTitleThenSlug(title: string, slug: string): { speed: string; speedGbps: number } {
|
||||||
|
const titleSpeed = speedFromSlug(title);
|
||||||
|
if (titleSpeed.speedGbps > 0) return titleSpeed;
|
||||||
|
return speedFromSlug(slug);
|
||||||
|
}
|
||||||
|
|
||||||
function formFactorFromText(text: string): string {
|
function formFactorFromText(text: string): string {
|
||||||
const t = text.toUpperCase();
|
const t = text.toUpperCase();
|
||||||
if (/\bOSFP\b/.test(t)) return "OSFP";
|
if (/\bOSFP\b/.test(t)) return "OSFP";
|
||||||
@ -124,10 +130,44 @@ interface EoptolinkProduct {
|
|||||||
speedGbps: number;
|
speedGbps: number;
|
||||||
formFactor: string;
|
formFactor: string;
|
||||||
fiberType: string;
|
fiberType: string;
|
||||||
|
reachLabel?: string;
|
||||||
|
reachMeters?: number;
|
||||||
|
wavelength?: string;
|
||||||
|
imageUrl?: string;
|
||||||
category: string;
|
category: string;
|
||||||
pageUrl: string;
|
pageUrl: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function reachFromText(text: string): { label: string; meters: number } | undefined {
|
||||||
|
const patterns: [RegExp, string, number][] = [
|
||||||
|
[/\b120\s*km\b/i, "120km", 120000],
|
||||||
|
[/\b100\s*km\b/i, "100km", 100000],
|
||||||
|
[/\b80\s*km\b/i, "80km", 80000],
|
||||||
|
[/\b40\s*km\b/i, "40km", 40000],
|
||||||
|
[/\b20\s*km\b/i, "20km", 20000],
|
||||||
|
[/\b10\s*km\b/i, "10km", 10000],
|
||||||
|
[/\b2\s*km\b/i, "2km", 2000],
|
||||||
|
[/\b500\s*m\b/i, "500m", 500],
|
||||||
|
[/\b300\s*m\b/i, "300m", 300],
|
||||||
|
[/\b100\s*m\b/i, "100m", 100],
|
||||||
|
[/\bZR\b/i, "80km", 80000],
|
||||||
|
[/\bER\b/i, "40km", 40000],
|
||||||
|
[/\bLR\b/i, "10km", 10000],
|
||||||
|
[/\bFR\b/i, "2km", 2000],
|
||||||
|
[/\bDR\b/i, "500m", 500],
|
||||||
|
[/\bSR\b/i, "300m", 300],
|
||||||
|
];
|
||||||
|
for (const [regex, label, meters] of patterns) {
|
||||||
|
if (regex.test(text)) return { label, meters };
|
||||||
|
}
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function wavelengthFromText(text: string): string {
|
||||||
|
const match = text.match(/(\d{3,4})\s*nm/i);
|
||||||
|
return match ? match[1] : "";
|
||||||
|
}
|
||||||
|
|
||||||
function parseProductPage(html: string, pageUrl: string): EoptolinkProduct | null {
|
function parseProductPage(html: string, pageUrl: string): EoptolinkProduct | null {
|
||||||
// Page title
|
// Page title
|
||||||
const titleMatch = html.match(/<title>([^<]+)/i) || html.match(/<h1[^>]*>([^<]{5,80})</i);
|
const titleMatch = html.match(/<title>([^<]+)/i) || html.match(/<h1[^>]*>([^<]{5,80})</i);
|
||||||
@ -138,13 +178,36 @@ function parseProductPage(html: string, pageUrl: string): EoptolinkProduct | nul
|
|||||||
const pnRegex = /E[A-Z]{2,5}-\d{2,3}[A-Z0-9]{1,3}(?:-\d{1,3})?(?:-[A-Z0-9]{1,6})*/g;
|
const pnRegex = /E[A-Z]{2,5}-\d{2,3}[A-Z0-9]{1,3}(?:-\d{1,3})?(?:-[A-Z0-9]{1,6})*/g;
|
||||||
const partNumbers = [...new Set([...(html.matchAll(pnRegex) ?? [])].map((m) => m[0].trim()))];
|
const partNumbers = [...new Set([...(html.matchAll(pnRegex) ?? [])].map((m) => m[0].trim()))];
|
||||||
|
|
||||||
const slug = pageUrl.split("/").slice(-2).join("-");
|
const slug = pageUrl.split("/").filter(Boolean).slice(-2).join("-");
|
||||||
const { speed, speedGbps } = speedFromSlug(slug + " " + pageTitle);
|
const titleEvidence = `${pageTitle} ${slug}`;
|
||||||
const formFactor = formFactorFromText(pageTitle + " " + slug);
|
const pageEvidence = `${titleEvidence} ${html.replace(/<[^>]+>/g, " ").slice(0, 12000)}`;
|
||||||
const fiberType = fiberFromText(pageTitle + " " + slug);
|
const { speed, speedGbps } = speedFromTitleThenSlug(pageTitle, slug);
|
||||||
const category = categoryFromText(pageTitle + " " + slug);
|
const formFactor = formFactorFromText(titleEvidence);
|
||||||
|
const fiberType = fiberFromText(titleEvidence);
|
||||||
|
const reach = reachFromText(pageEvidence);
|
||||||
|
const wavelength = wavelengthFromText(pageEvidence);
|
||||||
|
const category = categoryFromText(titleEvidence);
|
||||||
|
const rawImage =
|
||||||
|
html.match(/<meta\s+property="og:image"\s+content="([^"]+)"/i)?.[1] ||
|
||||||
|
html.match(/<img[^>]+src="([^"]+)"/i)?.[1];
|
||||||
|
const imageUrl = rawImage && !/(logo|placeholder|default|no-image|icon|sprite)/i.test(rawImage)
|
||||||
|
? (rawImage.startsWith("http") ? rawImage : `${BASE}${rawImage.startsWith("/") ? "" : "/"}${rawImage}`)
|
||||||
|
: undefined;
|
||||||
|
|
||||||
return { pageTitle, partNumbers, speed, speedGbps, formFactor, fiberType, category, pageUrl };
|
return {
|
||||||
|
pageTitle,
|
||||||
|
partNumbers,
|
||||||
|
speed,
|
||||||
|
speedGbps,
|
||||||
|
formFactor,
|
||||||
|
fiberType,
|
||||||
|
reachLabel: reach?.label,
|
||||||
|
reachMeters: reach?.meters,
|
||||||
|
wavelength,
|
||||||
|
imageUrl,
|
||||||
|
category,
|
||||||
|
pageUrl,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Main ────────────────────────────────────────────────────────────────────
|
// ── Main ────────────────────────────────────────────────────────────────────
|
||||||
@ -193,15 +256,47 @@ export async function scrapeEoptolink(): Promise<void> {
|
|||||||
|
|
||||||
for (const partNumber of namesToSeed) {
|
for (const partNumber of namesToSeed) {
|
||||||
try {
|
try {
|
||||||
await findOrCreateScrapedTransceiver({
|
const txId = await findOrCreateScrapedTransceiver({
|
||||||
partNumber: partNumber.slice(0, 80),
|
partNumber: partNumber.slice(0, 80),
|
||||||
vendorId,
|
vendorId,
|
||||||
|
productUrl: url,
|
||||||
formFactor: product.formFactor,
|
formFactor: product.formFactor,
|
||||||
speedGbps: product.speedGbps,
|
speedGbps: product.speedGbps,
|
||||||
speed: product.speed,
|
speed: product.speed,
|
||||||
|
reachMeters: product.reachMeters,
|
||||||
|
reachLabel: product.reachLabel,
|
||||||
fiberType: product.fiberType,
|
fiberType: product.fiberType,
|
||||||
|
wavelengths: product.wavelength,
|
||||||
category: product.category,
|
category: product.category,
|
||||||
|
imageUrl: product.imageUrl,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE transceivers
|
||||||
|
SET form_factor = $2,
|
||||||
|
speed_gbps = $3,
|
||||||
|
speed = $4,
|
||||||
|
reach_meters = CASE WHEN $5::int IS NOT NULL THEN $5::int ELSE reach_meters END,
|
||||||
|
reach_label = COALESCE(NULLIF($6::text, ''), reach_label),
|
||||||
|
fiber_type = COALESCE(NULLIF($7::text, ''), fiber_type),
|
||||||
|
wavelengths = COALESCE(NULLIF($8::text, ''), wavelengths),
|
||||||
|
product_page_url = COALESCE(NULLIF($9::text, ''), product_page_url),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $1
|
||||||
|
AND vendor_id = $10
|
||||||
|
`, [
|
||||||
|
txId,
|
||||||
|
product.formFactor,
|
||||||
|
product.speedGbps,
|
||||||
|
product.speed,
|
||||||
|
product.reachMeters ?? null,
|
||||||
|
product.reachLabel ?? null,
|
||||||
|
product.fiberType || null,
|
||||||
|
product.wavelength || null,
|
||||||
|
url,
|
||||||
|
vendorId,
|
||||||
|
]);
|
||||||
|
|
||||||
added++;
|
added++;
|
||||||
} catch (dbErr) {
|
} catch (dbErr) {
|
||||||
// Duplicate or constraint error — expected for re-runs
|
// Duplicate or constraint error — expected for re-runs
|
||||||
|
|||||||
@ -226,7 +226,7 @@ function detectFormFactor(text: string): string | undefined {
|
|||||||
|
|
||||||
function detectSpeed(text: string): { speed: string; speedGbps: number } | undefined {
|
function detectSpeed(text: string): { speed: string; speedGbps: number } | undefined {
|
||||||
const patterns: [RegExp, string, number][] = [
|
const patterns: [RegExp, string, number][] = [
|
||||||
[/1\.6\s*t/i, "1.6T", 1600],
|
[/1[\.,]6\s*t|1600\s*g/i, "1.6T", 1600],
|
||||||
[/800\s*g/i, "800G", 800],
|
[/800\s*g/i, "800G", 800],
|
||||||
[/400\s*g/i, "400G", 400],
|
[/400\s*g/i, "400G", 400],
|
||||||
[/200\s*g/i, "200G", 200],
|
[/200\s*g/i, "200G", 200],
|
||||||
@ -248,6 +248,15 @@ function detectReach(text: string): string | undefined {
|
|||||||
return m ? `${m[1].replace(/,/g, "")}${m[2].toLowerCase()}` : undefined;
|
return m ? `${m[1].replace(/,/g, "")}${m[2].toLowerCase()}` : undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function reachMetersFromLabel(label?: string): number | undefined {
|
||||||
|
if (!label) return undefined;
|
||||||
|
const match = label.match(/^(\d+(?:\.\d+)?)(m|km)$/i);
|
||||||
|
if (!match) return undefined;
|
||||||
|
const value = parseFloat(match[1]);
|
||||||
|
if (!Number.isFinite(value) || value <= 0) return undefined;
|
||||||
|
return match[2].toLowerCase() === "km" ? Math.round(value * 1000) : Math.round(value);
|
||||||
|
}
|
||||||
|
|
||||||
function detectFiberType(text: string): string | undefined {
|
function detectFiberType(text: string): string | undefined {
|
||||||
if (/active\s+optical|\baoc\b/i.test(text)) return "AOC";
|
if (/active\s+optical|\baoc\b/i.test(text)) return "AOC";
|
||||||
if (/copper|dac|twinax|direct\s+attach|rj-?45|base-t/i.test(text)) return "Copper";
|
if (/copper|dac|twinax|direct\s+attach|rj-?45|base-t/i.test(text)) return "Copper";
|
||||||
@ -946,6 +955,8 @@ export async function scrapeFs(): Promise<void> {
|
|||||||
const parsed = parseSpecTable(detail.specs);
|
const parsed = parseSpecTable(detail.specs);
|
||||||
const textForInference = `${detail.name} ${detail.partNumber} ${Object.values(detail.specs).join(" ")}`;
|
const textForInference = `${detail.name} ${detail.partNumber} ${Object.values(detail.specs).join(" ")}`;
|
||||||
const fiberType = parsed.fiberType ?? detectFiberType(textForInference);
|
const fiberType = parsed.fiberType ?? detectFiberType(textForInference);
|
||||||
|
const reachLabel = reach ?? parsed.reachLabel;
|
||||||
|
const reachMeters = parsed.reachMeters ?? reachMetersFromLabel(reachLabel);
|
||||||
|
|
||||||
const transceiverId = detail.targetTransceiverId ?? (await findOrCreateScrapedTransceiver({
|
const transceiverId = detail.targetTransceiverId ?? (await findOrCreateScrapedTransceiver({
|
||||||
partNumber: detail.partNumber,
|
partNumber: detail.partNumber,
|
||||||
@ -954,8 +965,8 @@ export async function scrapeFs(): Promise<void> {
|
|||||||
formFactor: ff,
|
formFactor: ff,
|
||||||
speedGbps: speedInfo?.speedGbps,
|
speedGbps: speedInfo?.speedGbps,
|
||||||
speed: speedInfo?.speed,
|
speed: speedInfo?.speed,
|
||||||
reachLabel: reach ?? parsed.reachLabel,
|
reachLabel,
|
||||||
reachMeters: parsed.reachMeters,
|
reachMeters,
|
||||||
fiberType,
|
fiberType,
|
||||||
wavelengths: parsed.wavelengths,
|
wavelengths: parsed.wavelengths,
|
||||||
imageUrl: detail.imageUrl,
|
imageUrl: detail.imageUrl,
|
||||||
@ -968,7 +979,12 @@ export async function scrapeFs(): Promise<void> {
|
|||||||
SET product_page_url = COALESCE(NULLIF(product_page_url, ''), $2),
|
SET product_page_url = COALESCE(NULLIF(product_page_url, ''), $2),
|
||||||
form_factor = COALESCE(NULLIF(form_factor, ''), $3),
|
form_factor = COALESCE(NULLIF(form_factor, ''), $3),
|
||||||
speed_gbps = CASE WHEN speed_gbps IS NULL OR speed_gbps = 0 THEN COALESCE($4, speed_gbps) ELSE speed_gbps END,
|
speed_gbps = CASE WHEN speed_gbps IS NULL OR speed_gbps = 0 THEN COALESCE($4, speed_gbps) ELSE speed_gbps END,
|
||||||
speed = COALESCE(NULLIF(speed, ''), $5),
|
speed = CASE
|
||||||
|
WHEN $5::text IS NOT NULL
|
||||||
|
AND (speed IS NULL OR speed = '' OR speed = 'Unknown' OR $4::numeric = speed_gbps)
|
||||||
|
THEN $5::text
|
||||||
|
ELSE speed
|
||||||
|
END,
|
||||||
reach_label = COALESCE(NULLIF(reach_label, ''), $6),
|
reach_label = COALESCE(NULLIF(reach_label, ''), $6),
|
||||||
reach_meters = CASE WHEN reach_meters IS NULL OR reach_meters = 0 THEN COALESCE($7, reach_meters) ELSE reach_meters END,
|
reach_meters = CASE WHEN reach_meters IS NULL OR reach_meters = 0 THEN COALESCE($7, reach_meters) ELSE reach_meters END,
|
||||||
fiber_type = COALESCE(NULLIF(fiber_type, ''), $8),
|
fiber_type = COALESCE(NULLIF(fiber_type, ''), $8),
|
||||||
@ -981,8 +997,8 @@ export async function scrapeFs(): Promise<void> {
|
|||||||
ff,
|
ff,
|
||||||
speedInfo?.speedGbps ?? null,
|
speedInfo?.speedGbps ?? null,
|
||||||
speedInfo?.speed ?? null,
|
speedInfo?.speed ?? null,
|
||||||
reach ?? parsed.reachLabel ?? null,
|
reachLabel ?? null,
|
||||||
parsed.reachMeters ?? null,
|
reachMeters ?? null,
|
||||||
fiberType ?? null,
|
fiberType ?? null,
|
||||||
parsed.wavelengths ?? null,
|
parsed.wavelengths ?? null,
|
||||||
]
|
]
|
||||||
@ -1038,7 +1054,7 @@ export async function scrapeFs(): Promise<void> {
|
|||||||
|
|
||||||
const hasSourceDetails =
|
const hasSourceDetails =
|
||||||
Object.keys(detail.specs).length > 0 ||
|
Object.keys(detail.specs).length > 0 ||
|
||||||
Boolean(fiberType || parsed.connector || parsed.wavelengths || parsed.reachLabel || reach);
|
Boolean(fiberType || parsed.connector || parsed.wavelengths || reachLabel);
|
||||||
|
|
||||||
if (hasSourceDetails) {
|
if (hasSourceDetails) {
|
||||||
const updated = await updateVerifiedSpecs({
|
const updated = await updateVerifiedSpecs({
|
||||||
@ -1046,8 +1062,8 @@ export async function scrapeFs(): Promise<void> {
|
|||||||
fiberType,
|
fiberType,
|
||||||
connector: parsed.connector,
|
connector: parsed.connector,
|
||||||
wavelengths: parsed.wavelengths,
|
wavelengths: parsed.wavelengths,
|
||||||
reachMeters: parsed.reachMeters,
|
reachMeters,
|
||||||
reachLabel: reach ?? parsed.reachLabel,
|
reachLabel,
|
||||||
powerConsumptionW: parsed.powerConsumptionW,
|
powerConsumptionW: parsed.powerConsumptionW,
|
||||||
tempRange: parsed.tempRange,
|
tempRange: parsed.tempRange,
|
||||||
modulation: parsed.modulation,
|
modulation: parsed.modulation,
|
||||||
|
|||||||
@ -31,6 +31,7 @@ interface Product {
|
|||||||
reachMeters?: number;
|
reachMeters?: number;
|
||||||
fiberType?: string;
|
fiberType?: string;
|
||||||
wavelength?: string;
|
wavelength?: string;
|
||||||
|
imageUrl?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
function sleep(ms: number): Promise<void> {
|
function sleep(ms: number): Promise<void> {
|
||||||
@ -116,6 +117,13 @@ function parseProductList(html: string): Product[] {
|
|||||||
|
|
||||||
const ff = detectFormFactor(name);
|
const ff = detectFormFactor(name);
|
||||||
const reach = detectReach(name);
|
const reach = detectReach(name);
|
||||||
|
const rawImg =
|
||||||
|
$(el).find("img").first().attr("data-src") ||
|
||||||
|
$(el).find("img").first().attr("data-lazy-src") ||
|
||||||
|
$(el).find("img").first().attr("src");
|
||||||
|
const imageUrl = rawImg && !/(logo|placeholder|default|no-image|icon|sprite)/i.test(rawImg)
|
||||||
|
? (rawImg.startsWith("http") ? rawImg : BASE + rawImg)
|
||||||
|
: undefined;
|
||||||
|
|
||||||
products.push({
|
products.push({
|
||||||
partNumber,
|
partNumber,
|
||||||
@ -127,6 +135,7 @@ function parseProductList(html: string): Product[] {
|
|||||||
reachMeters: reach?.meters,
|
reachMeters: reach?.meters,
|
||||||
fiberType: detectFiber(name),
|
fiberType: detectFiber(name),
|
||||||
wavelength: detectWavelength(name),
|
wavelength: detectWavelength(name),
|
||||||
|
imageUrl,
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -150,11 +159,19 @@ function parseProductList(html: string): Product[] {
|
|||||||
}
|
}
|
||||||
const ff = detectFormFactor(name);
|
const ff = detectFormFactor(name);
|
||||||
const reach = detectReach(name);
|
const reach = detectReach(name);
|
||||||
|
const rawImg =
|
||||||
|
$(el).find("img").first().attr("data-src") ||
|
||||||
|
$(el).find("img").first().attr("data-lazy-src") ||
|
||||||
|
$(el).find("img").first().attr("src");
|
||||||
|
const imageUrl = rawImg && !/(logo|placeholder|default|no-image|icon|sprite)/i.test(rawImg)
|
||||||
|
? (rawImg.startsWith("http") ? rawImg : BASE + rawImg)
|
||||||
|
: undefined;
|
||||||
products.push({
|
products.push({
|
||||||
partNumber: name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] || name.replace(/\s+/g, "-").slice(0, 60),
|
partNumber: name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] || name.replace(/\s+/g, "-").slice(0, 60),
|
||||||
name, url, price, ...ff,
|
name, url, price, ...ff,
|
||||||
reachLabel: reach?.label, reachMeters: reach?.meters,
|
reachLabel: reach?.label, reachMeters: reach?.meters,
|
||||||
fiberType: detectFiber(name), wavelength: detectWavelength(name),
|
fiberType: detectFiber(name), wavelength: detectWavelength(name),
|
||||||
|
imageUrl,
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -224,6 +241,7 @@ export async function scrapeGaoTek(): Promise<void> {
|
|||||||
const txId = await findOrCreateScrapedTransceiver({
|
const txId = await findOrCreateScrapedTransceiver({
|
||||||
partNumber: product.partNumber,
|
partNumber: product.partNumber,
|
||||||
vendorId,
|
vendorId,
|
||||||
|
productUrl: product.url,
|
||||||
formFactor: product.formFactor,
|
formFactor: product.formFactor,
|
||||||
speedGbps: product.speedGbps,
|
speedGbps: product.speedGbps,
|
||||||
speed: product.speed,
|
speed: product.speed,
|
||||||
@ -232,6 +250,7 @@ export async function scrapeGaoTek(): Promise<void> {
|
|||||||
fiberType: product.fiberType,
|
fiberType: product.fiberType,
|
||||||
wavelengths: product.wavelength,
|
wavelengths: product.wavelength,
|
||||||
category: "DataCenter",
|
category: "DataCenter",
|
||||||
|
imageUrl: product.imageUrl,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (product.price && product.price > 0) {
|
if (product.price && product.price > 0) {
|
||||||
|
|||||||
@ -15,7 +15,15 @@
|
|||||||
*
|
*
|
||||||
* Rate limited: 1 req/2sec.
|
* Rate limited: 1 req/2sec.
|
||||||
*/
|
*/
|
||||||
import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation, upsertStockObservation } from "../utils/db";
|
import {
|
||||||
|
pool,
|
||||||
|
findOrCreateScrapedTransceiver,
|
||||||
|
ensureVendor,
|
||||||
|
upsertPriceObservation,
|
||||||
|
upsertStockObservation,
|
||||||
|
markImageVerified,
|
||||||
|
markDetailsVerified,
|
||||||
|
} from "../utils/db";
|
||||||
import { contentHash } from "../utils/hash";
|
import { contentHash } from "../utils/hash";
|
||||||
import { readFileSync, writeFileSync, existsSync } from "node:fs";
|
import { readFileSync, writeFileSync, existsSync } from "node:fs";
|
||||||
import { join } from "node:path";
|
import { join } from "node:path";
|
||||||
@ -29,7 +37,8 @@ const HEADERS = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Limit detail-page fetches per run to stay reasonable
|
// Limit detail-page fetches per run to stay reasonable
|
||||||
const MAX_DETAIL_PAGES = 600;
|
const MAX_DETAIL_PAGES = Math.max(1, Math.min(1000, parseInt(process.env["NADDOD_MAX_DETAIL_PAGES"] || "600", 10)));
|
||||||
|
const DB_DETAIL_ONLY = process.env["NADDOD_DB_DETAIL_ONLY"] === "1";
|
||||||
|
|
||||||
// Cursor file: persists across runs so each run covers the next 600 URLs
|
// Cursor file: persists across runs so each run covers the next 600 URLs
|
||||||
const CURSOR_FILE = join(process.env["TIP_STORAGE_DIR"] ?? "/opt/tip", "naddod-cursor.json");
|
const CURSOR_FILE = join(process.env["TIP_STORAGE_DIR"] ?? "/opt/tip", "naddod-cursor.json");
|
||||||
@ -88,6 +97,26 @@ function detectSpeedGbps(text: string): { speed: string; speedGbps: number } {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function detectReach(text: string): { label: string; meters: number } | undefined {
|
function detectReach(text: string): { label: string; meters: number } | undefined {
|
||||||
|
const cableCode = text.match(/\b(?:AOC|DAC|CU|COPPER|MCP|MFS)[A-Z0-9._-]*?(\d+(?:\.\d+)?)M\b/i);
|
||||||
|
if (cableCode) {
|
||||||
|
const value = parseFloat(cableCode[1]);
|
||||||
|
if (Number.isFinite(value) && value > 0 && value <= 500) {
|
||||||
|
return { label: `${String(value).replace(/\.0$/, "")}m`, meters: Math.round(value) };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const generic = text.match(/\b(\d+(?:\.\d+)?)\s*(km|m)\b/i);
|
||||||
|
if (generic) {
|
||||||
|
const value = parseFloat(generic[1]);
|
||||||
|
const unit = generic[2].toLowerCase();
|
||||||
|
if (Number.isFinite(value) && value > 0) {
|
||||||
|
const meters = unit === "km" ? Math.round(value * 1000) : Math.round(value);
|
||||||
|
const labelValue = String(value).replace(/\.0$/, "");
|
||||||
|
const label = unit === "km" ? `${labelValue}km` : `${labelValue}m`;
|
||||||
|
return { label, meters };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const patterns: [RegExp, string, number][] = [
|
const patterns: [RegExp, string, number][] = [
|
||||||
[/\b120\s*km\b/i, "120km", 120000],
|
[/\b120\s*km\b/i, "120km", 120000],
|
||||||
[/\b80\s*km\b/i, "80km", 80000],
|
[/\b80\s*km\b/i, "80km", 80000],
|
||||||
@ -102,8 +131,9 @@ function detectReach(text: string): { label: string; meters: number } | undefine
|
|||||||
[/\b150\s*m\b/i, "150m", 150],
|
[/\b150\s*m\b/i, "150m", 150],
|
||||||
[/\b100\s*m\b/i, "100m", 100],
|
[/\b100\s*m\b/i, "100m", 100],
|
||||||
[/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000],
|
[/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000],
|
||||||
[/\bER4?\b/, "40km", 40000], [/\bZR4?\b/, "80km", 80000],
|
[/\bER4?\b/, "40km", 40000], [/\bZRP?\b|\bZR4?\b/, "80km", 80000],
|
||||||
[/\bSR4?\b/, "300m", 300], [/\bDR4?\b/, "500m", 500],
|
[/\bSR\d*\b|\bVR\d*\b/, "100m", 100],
|
||||||
|
[/\bDR4?\b|\bXDR\d*\b/, "500m", 500],
|
||||||
[/\bFR4?\b/, "2km", 2000],
|
[/\bFR4?\b/, "2km", 2000],
|
||||||
];
|
];
|
||||||
for (const [re, label, meters] of patterns) {
|
for (const [re, label, meters] of patterns) {
|
||||||
@ -113,9 +143,10 @@ function detectReach(text: string): { label: string; meters: number } | undefine
|
|||||||
}
|
}
|
||||||
|
|
||||||
function detectFiber(text: string): string {
|
function detectFiber(text: string): string {
|
||||||
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
|
|
||||||
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
|
|
||||||
if (/copper|dac|twinax|rj.?45|base-t/i.test(text)) return "Copper";
|
if (/copper|dac|twinax|rj.?45|base-t/i.test(text)) return "Copper";
|
||||||
|
if (/aoc|active.?optical|active.?fiber|[^a-z]sr\d*[^a-z]?|[^a-z]vr\d*[^a-z]?|850\s*nm/i.test(text)) return "MMF";
|
||||||
|
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]?|[^a-z]dr\d*[^a-z]?|[^a-z]fr\d*[^a-z]?|xdr\d*|psm|bidi|cwdm|dwdm|1310\s*nm|1550\s*nm/i.test(text)) return "SMF";
|
||||||
|
if (/multi.?mode|mmf|[^a-z]sx[^a-z]/i.test(text)) return "MMF";
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -175,6 +206,7 @@ async function fetchText(url: string): Promise<string> {
|
|||||||
*/
|
*/
|
||||||
function parseDetailPage(html: string, url: string): {
|
function parseDetailPage(html: string, url: string): {
|
||||||
name: string;
|
name: string;
|
||||||
|
imageUrl?: string;
|
||||||
price?: number;
|
price?: number;
|
||||||
stock: { qty?: number; confidence: 1 | 2 } | null;
|
stock: { qty?: number; confidence: 1 | 2 } | null;
|
||||||
} | null {
|
} | null {
|
||||||
@ -187,6 +219,20 @@ function parseDetailPage(html: string, url: string): {
|
|||||||
if (!name || name.length < 10) return null;
|
if (!name || name.length < 10) return null;
|
||||||
if (!isTransceiver(name)) return null;
|
if (!isTransceiver(name)) return null;
|
||||||
|
|
||||||
|
const imageUrl = (() => {
|
||||||
|
const candidates = [
|
||||||
|
html.match(/<meta\s+property="og:image"\s+content="([^"]+)"/i)?.[1],
|
||||||
|
html.match(/<meta\s+name="twitter:image"\s+content="([^"]+)"/i)?.[1],
|
||||||
|
html.match(/"image"\s*:\s*"([^"]+)"/i)?.[1],
|
||||||
|
].filter(Boolean) as string[];
|
||||||
|
|
||||||
|
const img = candidates.find((candidate) =>
|
||||||
|
!/(logo|placeholder|default|no-image|icon|sprite)/i.test(candidate)
|
||||||
|
);
|
||||||
|
if (!img) return undefined;
|
||||||
|
return img.startsWith("http") ? img : `${BASE}${img.startsWith("/") ? "" : "/"}${img}`;
|
||||||
|
})();
|
||||||
|
|
||||||
// Price: prefer LD+JSON offers.price (Astro/Shopify structure, e.g. "price":"731.00")
|
// Price: prefer LD+JSON offers.price (Astro/Shopify structure, e.g. "price":"731.00")
|
||||||
// Fall back to "US$ 10.90" or "$10.90" visible text patterns
|
// Fall back to "US$ 10.90" or "$10.90" visible text patterns
|
||||||
let price: number | undefined;
|
let price: number | undefined;
|
||||||
@ -220,7 +266,7 @@ function parseDetailPage(html: string, url: string): {
|
|||||||
// Stock count
|
// Stock count
|
||||||
const stock = parseStockText(html);
|
const stock = parseStockText(html);
|
||||||
|
|
||||||
return { name, price, stock };
|
return { name, imageUrl, price, stock };
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Sitemap parsing ─────────────────────────────────────────────────────────
|
// ── Sitemap parsing ─────────────────────────────────────────────────────────
|
||||||
@ -247,6 +293,25 @@ async function fetchProductUrlsFromSitemap(): Promise<string[]> {
|
|||||||
return [...new Set(urls)]; // deduplicate
|
return [...new Set(urls)]; // deduplicate
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function fetchDbTargets(limit: number): Promise<Array<{ url: string; targetTransceiverId: string }>> {
|
||||||
|
const result = await pool.query<{ id: string; product_page_url: string }>(`
|
||||||
|
SELECT t.id, t.product_page_url
|
||||||
|
FROM transceivers t
|
||||||
|
JOIN vendors v ON v.id = t.vendor_id
|
||||||
|
WHERE v.name = 'NADDOD'
|
||||||
|
AND t.product_page_url IS NOT NULL
|
||||||
|
AND t.product_page_url != ''
|
||||||
|
AND (t.details_verified = false OR t.image_verified = false)
|
||||||
|
ORDER BY t.details_verified ASC, t.image_verified ASC, t.updated_at ASC
|
||||||
|
LIMIT $1
|
||||||
|
`, [limit]);
|
||||||
|
|
||||||
|
return result.rows.map((row) => ({
|
||||||
|
url: row.product_page_url,
|
||||||
|
targetTransceiverId: row.id,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
// ── Main scraper ────────────────────────────────────────────────────────────
|
// ── Main scraper ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
export async function scrapeNaddod(): Promise<void> {
|
export async function scrapeNaddod(): Promise<void> {
|
||||||
@ -262,11 +327,18 @@ export async function scrapeNaddod(): Promise<void> {
|
|||||||
// ── Phase 1: Discover product URLs via sitemap ────────────────────────────
|
// ── Phase 1: Discover product URLs via sitemap ────────────────────────────
|
||||||
console.log("[Phase 1] Discovering products from sitemap...");
|
console.log("[Phase 1] Discovering products from sitemap...");
|
||||||
let productUrls: string[] = [];
|
let productUrls: string[] = [];
|
||||||
|
let targets: Array<{ url: string; targetTransceiverId?: string }> = [];
|
||||||
try {
|
try {
|
||||||
productUrls = await fetchProductUrlsFromSitemap();
|
if (DB_DETAIL_ONLY) {
|
||||||
console.log(` Found ${productUrls.length} product URLs in sitemap`);
|
targets = await fetchDbTargets(MAX_DETAIL_PAGES);
|
||||||
|
productUrls = targets.map((target) => target.url);
|
||||||
|
console.log(` DB detail targets: ${productUrls.length}`);
|
||||||
|
} else {
|
||||||
|
productUrls = await fetchProductUrlsFromSitemap();
|
||||||
|
console.log(` Found ${productUrls.length} product URLs in sitemap`);
|
||||||
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error(` Sitemap fetch failed: ${(err as Error).message}`);
|
console.error(` Target discovery failed: ${(err as Error).message}`);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -278,16 +350,16 @@ export async function scrapeNaddod(): Promise<void> {
|
|||||||
// Cursor-based rotation: each run advances by MAX_DETAIL_PAGES so over ~12 runs
|
// Cursor-based rotation: each run advances by MAX_DETAIL_PAGES so over ~12 runs
|
||||||
// (24 hours) we cover all ~7300 products. Wraps around when exhausted.
|
// (24 hours) we cover all ~7300 products. Wraps around when exhausted.
|
||||||
const totalUrls = productUrls.length;
|
const totalUrls = productUrls.length;
|
||||||
const offset = readCursor() % totalUrls;
|
const offset = DB_DETAIL_ONLY ? 0 : readCursor() % totalUrls;
|
||||||
const endIdx = Math.min(offset + MAX_DETAIL_PAGES, totalUrls);
|
const endIdx = Math.min(offset + MAX_DETAIL_PAGES, totalUrls);
|
||||||
let urls = productUrls.slice(offset, endIdx);
|
let batchTargets: Array<{ url: string; targetTransceiverId?: string }> =
|
||||||
// Wrap around if we got fewer than MAX_DETAIL_PAGES (hit the end of the list)
|
DB_DETAIL_ONLY ? targets : productUrls.slice(offset, endIdx).map((url) => ({ url }));
|
||||||
if (urls.length < MAX_DETAIL_PAGES && totalUrls > MAX_DETAIL_PAGES) {
|
if (!DB_DETAIL_ONLY && batchTargets.length < MAX_DETAIL_PAGES && totalUrls > MAX_DETAIL_PAGES) {
|
||||||
const wrap = MAX_DETAIL_PAGES - urls.length;
|
const wrap = MAX_DETAIL_PAGES - batchTargets.length;
|
||||||
urls = urls.concat(productUrls.slice(0, wrap));
|
batchTargets = batchTargets.concat(productUrls.slice(0, wrap).map((url) => ({ url })));
|
||||||
}
|
}
|
||||||
const nextOffset = (offset + MAX_DETAIL_PAGES) % totalUrls;
|
const nextOffset = DB_DETAIL_ONLY ? offset : (offset + MAX_DETAIL_PAGES) % totalUrls;
|
||||||
console.log(` Offset: ${offset}/${totalUrls} → processing ${urls.length} products (next run starts at ${nextOffset})`);
|
console.log(` Offset: ${offset}/${totalUrls} → processing ${batchTargets.length} products (next run starts at ${nextOffset})`);
|
||||||
|
|
||||||
// ── Phase 2: Fetch detail pages + write to DB ─────────────────────────────
|
// ── Phase 2: Fetch detail pages + write to DB ─────────────────────────────
|
||||||
console.log("\n[Phase 2] Fetching product detail pages...");
|
console.log("\n[Phase 2] Fetching product detail pages...");
|
||||||
@ -299,7 +371,8 @@ export async function scrapeNaddod(): Promise<void> {
|
|||||||
let skippedNonTx = 0;
|
let skippedNonTx = 0;
|
||||||
let errors = 0;
|
let errors = 0;
|
||||||
|
|
||||||
for (const url of urls) {
|
for (const target of batchTargets) {
|
||||||
|
const url = target.url;
|
||||||
await sleep(2000);
|
await sleep(2000);
|
||||||
try {
|
try {
|
||||||
const html = await fetchText(url);
|
const html = await fetchText(url);
|
||||||
@ -310,28 +383,64 @@ export async function scrapeNaddod(): Promise<void> {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const { name, price, stock } = detail;
|
const { name, imageUrl, price, stock } = detail;
|
||||||
const { speed, speedGbps } = detectSpeedGbps(name);
|
const evidenceText = `${name} ${html.replace(/<[^>]+>/g, " ").slice(0, 20000)}`;
|
||||||
const formFactor = detectFormFactor(name);
|
const { speed, speedGbps } = detectSpeedGbps(evidenceText);
|
||||||
const reach = detectReach(name);
|
const formFactor = detectFormFactor(evidenceText);
|
||||||
const fiberType = detectFiber(name);
|
const reach = detectReach(evidenceText);
|
||||||
const wavelength = detectWavelength(name);
|
const fiberType = detectFiber(evidenceText);
|
||||||
|
const wavelength = detectWavelength(evidenceText);
|
||||||
|
|
||||||
// Extract part number from name (first word-group before "Compatible" or vendor name)
|
// Extract part number from name (first word-group before "Compatible" or vendor name)
|
||||||
const partNumber = name.split(/\s+(?:compatible|for\s+[A-Z]|sfp|qsfp)/i)[0]?.trim().slice(0, 80) || name.slice(0, 60);
|
const partNumber = name.split(/\s+(?:compatible|for\s+[A-Z]|sfp|qsfp)/i)[0]?.trim().slice(0, 80) || name.slice(0, 60);
|
||||||
|
|
||||||
const txId = await findOrCreateScrapedTransceiver({
|
let txId = target.targetTransceiverId;
|
||||||
partNumber,
|
|
||||||
vendorId,
|
if (txId) {
|
||||||
formFactor,
|
await pool.query(`
|
||||||
speedGbps,
|
UPDATE transceivers
|
||||||
speed,
|
SET product_page_url = COALESCE(NULLIF(product_page_url, ''), $2),
|
||||||
reachMeters: reach?.meters,
|
form_factor = COALESCE(NULLIF($3::text, ''), form_factor),
|
||||||
reachLabel: reach?.label,
|
speed_gbps = CASE WHEN $4::numeric > 0 THEN $4::numeric ELSE speed_gbps END,
|
||||||
fiberType,
|
speed = CASE WHEN $4::numeric > 0 THEN $5 ELSE speed END,
|
||||||
wavelengths: wavelength,
|
reach_meters = CASE WHEN $6::int IS NOT NULL AND $6::int > 0 THEN $6::int ELSE reach_meters END,
|
||||||
category: "DataCenter",
|
reach_label = COALESCE(NULLIF($7::text, ''), reach_label),
|
||||||
});
|
fiber_type = COALESCE(NULLIF($8::text, ''), fiber_type),
|
||||||
|
wavelengths = COALESCE(NULLIF($9::text, ''), wavelengths),
|
||||||
|
category = COALESCE(NULLIF(category, ''), 'DataCenter'),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $1
|
||||||
|
AND vendor_id = $10
|
||||||
|
`, [
|
||||||
|
txId,
|
||||||
|
url,
|
||||||
|
formFactor,
|
||||||
|
speedGbps,
|
||||||
|
speed,
|
||||||
|
reach?.meters ?? null,
|
||||||
|
reach?.label ?? null,
|
||||||
|
fiberType || null,
|
||||||
|
wavelength || null,
|
||||||
|
vendorId,
|
||||||
|
]);
|
||||||
|
if (imageUrl) await markImageVerified(txId, imageUrl);
|
||||||
|
await markDetailsVerified({ transceiverId: txId, sourceUrl: url });
|
||||||
|
} else {
|
||||||
|
txId = await findOrCreateScrapedTransceiver({
|
||||||
|
partNumber,
|
||||||
|
vendorId,
|
||||||
|
productUrl: url,
|
||||||
|
formFactor,
|
||||||
|
speedGbps,
|
||||||
|
speed,
|
||||||
|
reachMeters: reach?.meters,
|
||||||
|
reachLabel: reach?.label,
|
||||||
|
fiberType,
|
||||||
|
wavelengths: wavelength,
|
||||||
|
category: "DataCenter",
|
||||||
|
imageUrl,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Price observation
|
// Price observation
|
||||||
if (price && price > 0) {
|
if (price && price > 0) {
|
||||||
@ -368,7 +477,7 @@ export async function scrapeNaddod(): Promise<void> {
|
|||||||
|
|
||||||
processed++;
|
processed++;
|
||||||
if (processed % 50 === 0) {
|
if (processed % 50 === 0) {
|
||||||
console.log(` Progress: ${processed}/${urls.length} | prices: ${priceUpdates} | stock: ${stockWritten} new / ${stockSkipped} unchanged`);
|
console.log(` Progress: ${processed}/${batchTargets.length} | prices: ${priceUpdates} | stock: ${stockWritten} new / ${stockSkipped} unchanged`);
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
errors++;
|
errors++;
|
||||||
@ -377,10 +486,10 @@ export async function scrapeNaddod(): Promise<void> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Advance cursor for next run
|
// Advance cursor for next run
|
||||||
writeCursor(nextOffset);
|
if (!DB_DETAIL_ONLY) writeCursor(nextOffset);
|
||||||
|
|
||||||
console.log("\n=== NADDOD Scraper v2 Complete ===");
|
console.log("\n=== NADDOD Scraper v2 Complete ===");
|
||||||
console.log(` URL range processed: ${offset}–${offset + urls.length - 1} of ${totalUrls}`);
|
console.log(` URL range processed: ${offset}–${offset + batchTargets.length - 1} of ${totalUrls}`);
|
||||||
console.log(` Products processed: ${processed}`);
|
console.log(` Products processed: ${processed}`);
|
||||||
console.log(` Non-transceivers skip: ${skippedNonTx}`);
|
console.log(` Non-transceivers skip: ${skippedNonTx}`);
|
||||||
console.log(` Price observations: ${priceUpdates} new`);
|
console.log(` Price observations: ${priceUpdates} new`);
|
||||||
|
|||||||
@ -1,9 +1,102 @@
|
|||||||
# Current TIP Sync State
|
# Current TIP Sync State
|
||||||
|
|
||||||
Updated: 2026-05-09 12:16 UTC
|
Updated: 2026-05-09 13:54 UTC
|
||||||
|
|
||||||
## Newest Work
|
## Newest Work
|
||||||
|
|
||||||
|
- FS.com 1.6T DR8/2FR4 source correction on 2026-05-09:
|
||||||
|
- operator spotted that FS.com has two distinct 1.6T OSFP variants on the same family:
|
||||||
|
- `OSFP-DR8-1.6T-FL`: 500m, DR8, SMF
|
||||||
|
- `OSFP-2FR4-1.6T-FL`: 2km, 2FR4, SMF
|
||||||
|
- confirmed in TIP DB:
|
||||||
|
- both FS.com variants exist as separate rows
|
||||||
|
- `OSFP-2FR4-1.6T-FL` had `reach_meters=0` even though the source and row label said `2km`
|
||||||
|
- `OSFP-DR8-1.6T-FL` had no wavelength, causing the deterministic equivalence worker to reject the otherwise correct 500m Flexoptix match
|
||||||
|
- live DB correction:
|
||||||
|
- `OSFP-DR8-1.6T-FL`
|
||||||
|
- `speed=1.6T`
|
||||||
|
- `speed_gbps=1600`
|
||||||
|
- `reach_label=500m`
|
||||||
|
- `reach_meters=500`
|
||||||
|
- `fiber_type=SMF`
|
||||||
|
- `wavelengths=1310`
|
||||||
|
- `standard_name=1.6T OSFP DR8`
|
||||||
|
- fully verified remains true
|
||||||
|
- `OSFP-2FR4-1.6T-FL`
|
||||||
|
- `speed=1.6T`
|
||||||
|
- `speed_gbps=1600`
|
||||||
|
- `reach_label=2km`
|
||||||
|
- `reach_meters=2000`
|
||||||
|
- `fiber_type=SMF`
|
||||||
|
- `wavelengths=1310`
|
||||||
|
- `standard_name=1.6T OSFP 2FR4`
|
||||||
|
- fully verified true
|
||||||
|
- Flexoptix `O.1316T.C.05.M`
|
||||||
|
- confirmed as `500m`, `SMF`, `1.6T`
|
||||||
|
- `standard_name=1.6T OSFP DR8`
|
||||||
|
- equivalence correction:
|
||||||
|
- approved only `O.1316T.C.05.M` ↔ `OSFP-DR8-1.6T-FL`
|
||||||
|
- confidence `0.913`
|
||||||
|
- match basis: form factor, speed, reach, fiber, wavelength and source variant DR8/500m
|
||||||
|
- `OSFP-2FR4-1.6T-FL` remains separate and is not linked to the 500m DR8 Flexoptix product
|
||||||
|
- scraper hardening:
|
||||||
|
- `packages/scraper/src/scrapers/fs-com.ts`
|
||||||
|
- recognizes German/decimal `1,6T` and `1600G` as `1.6T`/`1600`
|
||||||
|
- converts reach labels such as `2km` into `reach_meters=2000`
|
||||||
|
- updates stale `speed` labels when the numeric source speed matches the row
|
||||||
|
- build:
|
||||||
|
- `pnpm -C packages/scraper build` passed on Erik
|
||||||
|
- truth:
|
||||||
|
- there are definitely two separate FS.com variants
|
||||||
|
- 500m DR8 is the correct equivalent for Flexoptix `O.1316T.C.05.M`
|
||||||
|
- 2km FR4 is a separate DB product and must not be collapsed into the 500m match
|
||||||
|
|
||||||
|
- Targeted vendor verification push after equivalence revalidation on 2026-05-09:
|
||||||
|
- code improved:
|
||||||
|
- `NADDOD_DB_DETAIL_ONLY=1` mode verifies existing NADDOD rows with source URLs instead of rotating blindly through the full sitemap
|
||||||
|
- NADDOD now extracts `og:image`, source product URLs, reach/fiber/wavelength from page evidence, AOC/DAC cable lengths, and DR/FR/SR/VR/XDR patterns
|
||||||
|
- GAO Tek now writes product URLs and image evidence
|
||||||
|
- Ascent Optics now writes product URLs and table image evidence
|
||||||
|
- Eoptolink now writes product URLs, images, reach/wavelength evidence and corrects over-broad form-factor parsing by preferring title/slug evidence
|
||||||
|
- live low-load Erik runs:
|
||||||
|
- GAO Tek static crawl:
|
||||||
|
- `473` unique products processed
|
||||||
|
- GAO Tek detail coverage improved from `41` to `126`
|
||||||
|
- `no_url` dropped to `0`
|
||||||
|
- Ascent Optics static/API crawl:
|
||||||
|
- `253` catalog products processed
|
||||||
|
- image coverage `235/305`
|
||||||
|
- detail coverage `213/305`
|
||||||
|
- Eoptolink static crawl:
|
||||||
|
- `76` product-solution pages inspected
|
||||||
|
- after parser correction, Eoptolink is `287/287` image and detail verified
|
||||||
|
- NADDOD targeted DB-detail mode:
|
||||||
|
- first targeted wave `200` pages
|
||||||
|
- second wave `300` pages
|
||||||
|
- closure wave `385` pages
|
||||||
|
- special-case wave `83` pages
|
||||||
|
- NADDOD moved from `image=12`, `details=157`, `fully=0/1-ish` to:
|
||||||
|
- total `748`
|
||||||
|
- price `744`
|
||||||
|
- image `742`
|
||||||
|
- details `659`
|
||||||
|
- competitor `744`
|
||||||
|
- fully `659`
|
||||||
|
- no URL `6`
|
||||||
|
- global TIP counters after this push:
|
||||||
|
- price verified `11557`
|
||||||
|
- image verified `11963`
|
||||||
|
- details verified `11018`
|
||||||
|
- fully verified `9794`
|
||||||
|
- total transceivers `17647`
|
||||||
|
- health:
|
||||||
|
- TIP stayed `healthy`
|
||||||
|
- load status `ok`
|
||||||
|
- memory used about `13%`
|
||||||
|
- truth:
|
||||||
|
- NADDOD is not 100% complete; remaining detail gaps include likely non-transceiver switch/NIC products and a smaller set of parser-special cases
|
||||||
|
- OEM catalogs like Ascent and Eoptolink do not publish retail prices, so full verification cannot be forced honestly without price evidence
|
||||||
|
|
||||||
- Immediate full TIP equivalence revalidation on 2026-05-09:
|
- Immediate full TIP equivalence revalidation on 2026-05-09:
|
||||||
- operator requested all open TIP validation to be completed immediately and all product matches checked for true 1:1 equivalence
|
- operator requested all open TIP validation to be completed immediately and all product matches checked for true 1:1 equivalence
|
||||||
- live preflight:
|
- live preflight:
|
||||||
|
|||||||
@ -0,0 +1,117 @@
|
|||||||
|
# FS.com 1.6T Variant Correction + Vendor Verification Push
|
||||||
|
|
||||||
|
Date: 2026-05-09
|
||||||
|
Actor: Codex
|
||||||
|
|
||||||
|
## Operator Finding
|
||||||
|
|
||||||
|
The operator spotted a concrete source-truth problem on FS.com:
|
||||||
|
|
||||||
|
- `OSFP-DR8-1.6T-FL` is the 500m DR8 variant.
|
||||||
|
- `OSFP-2FR4-1.6T-FL` is the 2km 2FR4 variant.
|
||||||
|
- Flexoptix `O.1316T.C.05.M` is the 500m DR8 product.
|
||||||
|
- The 2km FR4 variant must be present as its own product and must not be collapsed into the 500m match.
|
||||||
|
|
||||||
|
## Live DB Correction
|
||||||
|
|
||||||
|
Corrected FS.com rows:
|
||||||
|
|
||||||
|
- `OSFP-DR8-1.6T-FL`
|
||||||
|
- `speed=1.6T`
|
||||||
|
- `speed_gbps=1600`
|
||||||
|
- `reach_label=500m`
|
||||||
|
- `reach_meters=500`
|
||||||
|
- `fiber_type=SMF`
|
||||||
|
- `wavelengths=1310`
|
||||||
|
- `standard_name=1.6T OSFP DR8`
|
||||||
|
- fully verified
|
||||||
|
|
||||||
|
- `OSFP-2FR4-1.6T-FL`
|
||||||
|
- `speed=1.6T`
|
||||||
|
- `speed_gbps=1600`
|
||||||
|
- `reach_label=2km`
|
||||||
|
- `reach_meters=2000`
|
||||||
|
- `fiber_type=SMF`
|
||||||
|
- `wavelengths=1310`
|
||||||
|
- `standard_name=1.6T OSFP 2FR4`
|
||||||
|
- fully verified
|
||||||
|
|
||||||
|
Corrected Flexoptix row:
|
||||||
|
|
||||||
|
- `O.1316T.C.05.M`
|
||||||
|
- confirmed `500m`, `SMF`, `1.6T`
|
||||||
|
- `standard_name=1.6T OSFP DR8`
|
||||||
|
|
||||||
|
Corrected equivalence:
|
||||||
|
|
||||||
|
- Approved only `O.1316T.C.05.M` ↔ `OSFP-DR8-1.6T-FL`.
|
||||||
|
- Confidence: `0.913`.
|
||||||
|
- Basis: form factor, speed, reach, fiber, wavelength and explicit source variant DR8/500m.
|
||||||
|
- `OSFP-2FR4-1.6T-FL` remains separate and is not linked to the 500m Flexoptix product.
|
||||||
|
|
||||||
|
## Scraper Hardening
|
||||||
|
|
||||||
|
Updated `packages/scraper/src/scrapers/fs-com.ts`:
|
||||||
|
|
||||||
|
- Detects `1,6T`, `1.6T` and `1600G` as `1.6T`/`1600`.
|
||||||
|
- Converts labels like `2km` to `reach_meters=2000`.
|
||||||
|
- Updates stale `speed` strings when the numeric source speed matches the row.
|
||||||
|
|
||||||
|
Remote build on Erik passed:
|
||||||
|
|
||||||
|
```text
|
||||||
|
pnpm -C packages/scraper build
|
||||||
|
```
|
||||||
|
|
||||||
|
## Vendor Verification Work In Same Push
|
||||||
|
|
||||||
|
Updated:
|
||||||
|
|
||||||
|
- `packages/scraper/src/scrapers/naddod.ts`
|
||||||
|
- `packages/scraper/src/scrapers/gaotek.ts`
|
||||||
|
- `packages/scraper/src/scrapers/ascentoptics.ts`
|
||||||
|
- `packages/scraper/src/scrapers/eoptolink.ts`
|
||||||
|
|
||||||
|
Live results:
|
||||||
|
|
||||||
|
- GAO Tek:
|
||||||
|
- details improved from `41` to `126`
|
||||||
|
- no-url dropped to `0`
|
||||||
|
- Ascent Optics:
|
||||||
|
- image `235/305`
|
||||||
|
- details `213/305`
|
||||||
|
- Eoptolink:
|
||||||
|
- image `287/287`
|
||||||
|
- details `287/287`
|
||||||
|
- NADDOD:
|
||||||
|
- total `748`
|
||||||
|
- price `744`
|
||||||
|
- image `742`
|
||||||
|
- details `659`
|
||||||
|
- competitor `744`
|
||||||
|
- fully `659`
|
||||||
|
- no URL `6`
|
||||||
|
|
||||||
|
Global TIP counters after the push:
|
||||||
|
|
||||||
|
- price verified `11557`
|
||||||
|
- image verified `11963`
|
||||||
|
- details verified `11018`
|
||||||
|
- fully verified `9794`
|
||||||
|
- total transceivers `17647`
|
||||||
|
|
||||||
|
TIP remained healthy:
|
||||||
|
|
||||||
|
- status `healthy`
|
||||||
|
- load status `ok`
|
||||||
|
- memory around `13%`
|
||||||
|
|
||||||
|
## Lesson For TIPLLM
|
||||||
|
|
||||||
|
Variant selectors on vendor pages must be treated as separate products when reach, optical protocol, connector or model changes.
|
||||||
|
|
||||||
|
For FS.com 1.6T OSFP:
|
||||||
|
|
||||||
|
- `DR8 500m` and `2FR4 2km` are distinct SKUs and distinct compatibility candidates.
|
||||||
|
- A Flexoptix 500m DR8 product must not be matched to a 2km FR4 FS.com product.
|
||||||
|
- Source pages can expose German decimal text (`1,6T`) and separate net/gross prices; normalize carefully.
|
||||||
Loading…
x
Reference in New Issue
Block a user