fix: enrich flexoptix detail verification
This commit is contained in:
parent
5522bb2152
commit
549b4430df
@ -182,6 +182,18 @@ function detectReach(text: string): { label: string; meters: number } | undefine
|
|||||||
for (const [regex, label, meters] of patterns) {
|
for (const [regex, label, meters] of patterns) {
|
||||||
if (regex.test(text)) return { label, meters };
|
if (regex.test(text)) return { label, meters };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const generic = text.match(/\b(\d{1,3}(?:,\d{3})+|\d+(?:\.\d+)?)\s*(km|m)\b/i);
|
||||||
|
if (generic) {
|
||||||
|
const value = parseFloat(generic[1].replace(/,/g, ""));
|
||||||
|
const unit = generic[2].toLowerCase();
|
||||||
|
if (Number.isFinite(value) && value > 0) {
|
||||||
|
const meters = unit === "km" ? Math.round(value * 1000) : Math.max(1, Math.round(value));
|
||||||
|
const normalized = Number.isInteger(value) ? String(value) : String(value).replace(/0+$/, "").replace(/\.$/, "");
|
||||||
|
const label = unit === "km" ? `${normalized}km` : `${normalized}m`;
|
||||||
|
return { label, meters };
|
||||||
|
}
|
||||||
|
}
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -194,8 +206,10 @@ function detectFiber(text: string): string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function detectWavelength(text: string): string {
|
function detectWavelength(text: string): string {
|
||||||
const match = text.match(/(\d{3,4})\s*nm/i);
|
const matches = [...text.matchAll(/(?:λ|lambda)?\s*(\d{3,4})\s*nm/gi)]
|
||||||
if (match) return match[1];
|
.map((match) => match[1])
|
||||||
|
.filter((value, index, values) => values.indexOf(value) === index);
|
||||||
|
if (matches.length) return matches.join("/");
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -561,6 +575,7 @@ export async function scrapeFlexoptixCatalog(): Promise<void> {
|
|||||||
const txId = await findOrCreateScrapedTransceiver({
|
const txId = await findOrCreateScrapedTransceiver({
|
||||||
partNumber: product.partNumber,
|
partNumber: product.partNumber,
|
||||||
vendorId,
|
vendorId,
|
||||||
|
productUrl: product.url,
|
||||||
formFactor: product.formFactor,
|
formFactor: product.formFactor,
|
||||||
speedGbps: product.speedGbps,
|
speedGbps: product.speedGbps,
|
||||||
speed: product.speed,
|
speed: product.speed,
|
||||||
|
|||||||
275
packages/scraper/src/scrapers/flexoptix-detail-pages.ts
Normal file
275
packages/scraper/src/scrapers/flexoptix-detail-pages.ts
Normal file
@ -0,0 +1,275 @@
|
|||||||
|
/**
|
||||||
|
* Flexoptix detail-page verifier.
|
||||||
|
*
|
||||||
|
* Targeted pass for Flexoptix rows that already have product_page_url but are
|
||||||
|
* missing price/image/details signals. Uses static product HTML only.
|
||||||
|
*/
|
||||||
|
import * as cheerio from "cheerio";
|
||||||
|
import {
|
||||||
|
ensureVendor,
|
||||||
|
markDetailsVerified,
|
||||||
|
markImageVerified,
|
||||||
|
pool,
|
||||||
|
upsertPriceObservation,
|
||||||
|
} from "../utils/db";
|
||||||
|
import { contentHash } from "../utils/hash";
|
||||||
|
|
||||||
|
const HEADERS = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; internal-flexoptix-detail)",
|
||||||
|
Accept: "text/html,application/xhtml+xml",
|
||||||
|
};
|
||||||
|
|
||||||
|
type DetailPatch = {
|
||||||
|
title: string;
|
||||||
|
description: string;
|
||||||
|
price?: number;
|
||||||
|
currency: string;
|
||||||
|
imageUrl?: string;
|
||||||
|
reachLabel?: string;
|
||||||
|
reachMeters?: number;
|
||||||
|
fiberType?: string;
|
||||||
|
wavelengths?: string;
|
||||||
|
connector?: string;
|
||||||
|
standardName?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
function cleanText(value: string | undefined): string {
|
||||||
|
return (value || "")
|
||||||
|
.replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCodePoint(parseInt(hex, 16)))
|
||||||
|
.replace(/&#(\d+);/g, (_, dec) => String.fromCodePoint(parseInt(dec, 10)))
|
||||||
|
.replace(/&/g, "&")
|
||||||
|
.replace(/"/g, '"')
|
||||||
|
.replace(/ /g, " ")
|
||||||
|
.replace(/\s+/g, " ")
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatNumber(value: number): string {
|
||||||
|
return Number.isInteger(value) ? String(value) : String(value).replace(/0+$/, "").replace(/\.$/, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectReach(text: string): { label: string; meters: number } | undefined {
|
||||||
|
const range = text.match(/\b(\d{1,3}(?:,\d{3})+|\d+(?:\.\d+)?)\s*-\s*(\d{1,3}(?:,\d{3})+|\d+(?:\.\d+)?)\s*(km|m)\b/i);
|
||||||
|
if (range) {
|
||||||
|
const max = parseFloat(range[2].replace(/,/g, ""));
|
||||||
|
const unit = range[3].toLowerCase();
|
||||||
|
const meters = unit === "km" ? Math.round(max * 1000) : Math.max(1, Math.round(max));
|
||||||
|
return { label: `${formatNumber(max)}${unit}`, meters };
|
||||||
|
}
|
||||||
|
|
||||||
|
const upTo = text.match(/\b(?:up to|max\.?|distance[:\s]*)\s*(\d{1,3}(?:,\d{3})+|\d+(?:\.\d+)?)\s*(km|m)\b/i);
|
||||||
|
const generic = upTo || text.match(/\b(\d{1,3}(?:,\d{3})+|\d+(?:\.\d+)?)\s*(km|m)\b/i);
|
||||||
|
if (generic) {
|
||||||
|
const value = parseFloat(generic[1].replace(/,/g, ""));
|
||||||
|
const unit = generic[2].toLowerCase();
|
||||||
|
if (Number.isFinite(value) && value > 0) {
|
||||||
|
const meters = unit === "km" ? Math.round(value * 1000) : Math.max(1, Math.round(value));
|
||||||
|
return { label: `${formatNumber(value)}${unit}`, meters };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (/\badapter|converter|serial to ip/i.test(text)) return { label: "N/A", meters: 0 };
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectFiber(text: string): string | undefined {
|
||||||
|
if (/active electrical cable|\baec\b|copper|dac|twinax|rj45|base-t|serial to ip/i.test(text)) return "Copper";
|
||||||
|
if (/active optical cable|\baoc\b/i.test(text)) return "AOC";
|
||||||
|
const hasSingle = /single.?mode|\bsmf\b|\blx\b|\blr\b|\ber\b|\bzr\b|bidi|cwdm|dwdm/i.test(text);
|
||||||
|
const hasMulti = /multi.?mode|\bmmf\b|\bsx\b|\bsr\b/i.test(text);
|
||||||
|
if (hasSingle && hasMulti) return "SMF/MMF";
|
||||||
|
if (hasSingle) return "SMF";
|
||||||
|
if (hasMulti) return "MMF";
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectWavelengths(text: string): string | undefined {
|
||||||
|
const matches = [...text.matchAll(/(?:λ|lambda)?\s*(\d{3,4})\s*nm/gi)]
|
||||||
|
.map((match) => match[1])
|
||||||
|
.filter((value, index, values) => values.indexOf(value) === index);
|
||||||
|
return matches.length ? matches.join("/") : undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectConnector(text: string): string | undefined {
|
||||||
|
const connectors = [
|
||||||
|
/MTP\/MPO[-\w/]*/i,
|
||||||
|
/LC[-\w/]*/i,
|
||||||
|
/\bCS\b/i,
|
||||||
|
/\bSN\b/i,
|
||||||
|
/\bRJ-?45\b/i,
|
||||||
|
];
|
||||||
|
for (const regex of connectors) {
|
||||||
|
const match = text.match(regex);
|
||||||
|
if (match) return match[0].toUpperCase().replace("RJ45", "RJ-45");
|
||||||
|
}
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectStandard(text: string): string | undefined {
|
||||||
|
const match = text.match(/\b(?:\d+(?:\.\d+)?[GT]?BASE-[A-Z0-9.+-]+|[A-Z0-9]+GBASE-[A-Z0-9.+-]+)\b/i);
|
||||||
|
return match ? match[0].toUpperCase() : undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseDetail(html: string): DetailPatch {
|
||||||
|
const $ = cheerio.load(html);
|
||||||
|
const title = cleanText($('meta[name="title"]').attr("content") || $("title").text());
|
||||||
|
const description = cleanText(
|
||||||
|
$('meta[name="description"]').attr("content") ||
|
||||||
|
$('meta[property="og:description"]').attr("content") ||
|
||||||
|
""
|
||||||
|
);
|
||||||
|
const combined = `${title} ${description} ${cleanText($(".description-list-item").text())}`;
|
||||||
|
const priceText = $('meta[property="product:price:amount"]').attr("content");
|
||||||
|
const price = priceText ? parseFloat(priceText) : undefined;
|
||||||
|
const currency = $('meta[property="product:price:currency"]').attr("content") || "EUR";
|
||||||
|
const imageUrl =
|
||||||
|
$('meta[property="og:image"]').attr("content") ||
|
||||||
|
$('meta[name="twitter:image"]').attr("content") ||
|
||||||
|
undefined;
|
||||||
|
const reach = detectReach(combined);
|
||||||
|
|
||||||
|
return {
|
||||||
|
title,
|
||||||
|
description,
|
||||||
|
price: price && price > 0 && price < 100000 ? price : undefined,
|
||||||
|
currency,
|
||||||
|
imageUrl: imageUrl && !/placeholder|no-image|logo/i.test(imageUrl) ? imageUrl : undefined,
|
||||||
|
reachLabel: reach?.label,
|
||||||
|
reachMeters: reach?.meters,
|
||||||
|
fiberType: detectFiber(combined),
|
||||||
|
wavelengths: detectWavelengths(combined),
|
||||||
|
connector: detectConnector(combined),
|
||||||
|
standardName: detectStandard(combined),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function run(): Promise<void> {
|
||||||
|
const vendorId = await ensureVendor("Flexoptix", "reseller", "https://www.flexoptix.net", "https://www.flexoptix.net/en/");
|
||||||
|
const limit = Math.max(1, Math.min(1000, parseInt(process.env["FLEXOPTIX_DETAIL_LIMIT"] || "300", 10)));
|
||||||
|
const onlyMissing = process.env["FLEXOPTIX_DETAIL_ONLY_MISSING"] !== "false";
|
||||||
|
|
||||||
|
const rows = await pool.query(
|
||||||
|
`
|
||||||
|
SELECT t.id, t.part_number, t.product_page_url
|
||||||
|
FROM transceivers t
|
||||||
|
JOIN vendors v ON v.id = t.vendor_id
|
||||||
|
WHERE UPPER(v.name) LIKE '%FLEXOPTIX%'
|
||||||
|
AND t.product_page_url IS NOT NULL
|
||||||
|
AND t.product_page_url != ''
|
||||||
|
AND t.product_page_url LIKE 'https://www.flexoptix.net/%'
|
||||||
|
AND t.product_page_url LIKE '%.html%'
|
||||||
|
AND (
|
||||||
|
$2::boolean = false
|
||||||
|
OR t.price_verified = false OR t.price_verified IS NULL
|
||||||
|
OR t.image_verified = false OR t.image_verified IS NULL
|
||||||
|
OR t.details_verified = false OR t.details_verified IS NULL
|
||||||
|
OR t.reach_label IS NULL OR t.reach_label = ''
|
||||||
|
OR t.fiber_type IS NULL OR t.fiber_type = ''
|
||||||
|
OR t.wavelengths IS NULL OR t.wavelengths = ''
|
||||||
|
)
|
||||||
|
ORDER BY t.price_verified DESC, t.image_verified DESC, t.details_verified ASC, t.part_number
|
||||||
|
LIMIT $1
|
||||||
|
`,
|
||||||
|
[limit, onlyMissing]
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(`=== Flexoptix detail verifier: ${rows.rows.length} products ===`);
|
||||||
|
|
||||||
|
let fetched = 0;
|
||||||
|
let failed = 0;
|
||||||
|
let prices = 0;
|
||||||
|
let images = 0;
|
||||||
|
let details = 0;
|
||||||
|
|
||||||
|
for (const row of rows.rows) {
|
||||||
|
try {
|
||||||
|
const resp = await fetch(row.product_page_url, {
|
||||||
|
headers: HEADERS,
|
||||||
|
signal: AbortSignal.timeout(20000),
|
||||||
|
});
|
||||||
|
if (!resp.ok) {
|
||||||
|
failed++;
|
||||||
|
console.warn(` ${row.part_number}: HTTP ${resp.status}`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const html = await resp.text();
|
||||||
|
const patch = parseDetail(html);
|
||||||
|
|
||||||
|
await pool.query(
|
||||||
|
`
|
||||||
|
UPDATE transceivers
|
||||||
|
SET reach_label = COALESCE(NULLIF(reach_label, ''), $2),
|
||||||
|
reach_meters = CASE WHEN reach_meters IS NULL OR reach_meters = 0 THEN COALESCE($3, reach_meters) ELSE reach_meters END,
|
||||||
|
fiber_type = COALESCE(NULLIF(fiber_type, ''), $4),
|
||||||
|
wavelengths = COALESCE(NULLIF(wavelengths, ''), $5),
|
||||||
|
connector = COALESCE(NULLIF(connector, ''), $6),
|
||||||
|
standard_name = COALESCE(NULLIF(standard_name, ''), $7),
|
||||||
|
product_page_url = COALESCE(NULLIF(product_page_url, ''), $8),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $1
|
||||||
|
`,
|
||||||
|
[
|
||||||
|
row.id,
|
||||||
|
patch.reachLabel || null,
|
||||||
|
patch.reachMeters ?? null,
|
||||||
|
patch.fiberType || null,
|
||||||
|
patch.wavelengths || null,
|
||||||
|
patch.connector || null,
|
||||||
|
patch.standardName || null,
|
||||||
|
row.product_page_url,
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (patch.imageUrl) {
|
||||||
|
const marked = await markImageVerified(row.id, patch.imageUrl);
|
||||||
|
if (marked) images++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (patch.price) {
|
||||||
|
const updated = await upsertPriceObservation({
|
||||||
|
transceiverId: row.id,
|
||||||
|
sourceVendorId: vendorId,
|
||||||
|
price: patch.price,
|
||||||
|
currency: patch.currency,
|
||||||
|
stockLevel: "in_stock",
|
||||||
|
url: row.product_page_url,
|
||||||
|
contentHash: contentHash({ price: patch.price, part: row.part_number }),
|
||||||
|
});
|
||||||
|
if (updated) prices++;
|
||||||
|
}
|
||||||
|
|
||||||
|
const verified = await markDetailsVerified({
|
||||||
|
transceiverId: row.id,
|
||||||
|
sourceUrl: row.product_page_url,
|
||||||
|
});
|
||||||
|
if (verified) details++;
|
||||||
|
fetched++;
|
||||||
|
|
||||||
|
if (fetched % 25 === 0) {
|
||||||
|
console.log(` processed ${fetched}/${rows.rows.length}`);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
failed++;
|
||||||
|
console.warn(` ${row.part_number}: ${(error as Error).message.slice(0, 100)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
await sleep(800);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`=== Flexoptix detail verifier complete: fetched=${fetched}, failed=${failed}, newPrices=${prices}, imagesMarked=${images}, detailsMarked=${details} ===`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (require.main === module) {
|
||||||
|
run()
|
||||||
|
.then(() => pool.end())
|
||||||
|
.catch((error) => {
|
||||||
|
console.error("Fatal:", error);
|
||||||
|
pool.end();
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
|
}
|
||||||
@ -1,9 +1,87 @@
|
|||||||
# Current TIP Sync State
|
# Current TIP Sync State
|
||||||
|
|
||||||
Updated: 2026-05-09 06:15 UTC
|
Updated: 2026-05-09 07:34 UTC
|
||||||
|
|
||||||
## Newest Work
|
## Newest Work
|
||||||
|
|
||||||
|
- TIP Flexoptix completion push on 2026-05-09:
|
||||||
|
- operator said "feuer frei" after confirming Flexoptix was not yet complete
|
||||||
|
- TIPLLM training pool was updated immediately with the truth rule:
|
||||||
|
- all Flexoptix products are not complete
|
||||||
|
- active catalog coverage must be separated from historical/extra DB rows
|
||||||
|
- never claim 100% verification without exact counters and fresh source timestamps
|
||||||
|
- code improved:
|
||||||
|
- `packages/scraper/src/scrapers/flexoptix-catalog.ts`
|
||||||
|
- generic reach parsing now handles values such as `50 m`, `1,000 m`, decimal/range forms
|
||||||
|
- wavelength parsing now handles multiple `λ... nm` values
|
||||||
|
- product URL is now passed into `findOrCreateScrapedTransceiver`
|
||||||
|
- `packages/scraper/src/scrapers/flexoptix-detail-pages.ts`
|
||||||
|
- new targeted Flexoptix detail-page verifier
|
||||||
|
- fetches only Flexoptix `.html` product pages with missing price/image/detail fields
|
||||||
|
- parses static product page metadata:
|
||||||
|
- title
|
||||||
|
- description
|
||||||
|
- `og:image`
|
||||||
|
- `product:price:amount`
|
||||||
|
- reach
|
||||||
|
- fiber type
|
||||||
|
- wavelengths
|
||||||
|
- connector
|
||||||
|
- standard name
|
||||||
|
- writes only DB evidence from Flexoptix pages, no external AI
|
||||||
|
- live run results on Erik:
|
||||||
|
- `pnpm -C packages/scraper build` passed
|
||||||
|
- improved catalog run completed:
|
||||||
|
- `Total unique products after GraphQL: 615`
|
||||||
|
- `Flexoptix Catalog Complete: 615 products, 0 prices`
|
||||||
|
- details improved from:
|
||||||
|
- `details_verified: 500`
|
||||||
|
- `price+image+details: 496`
|
||||||
|
- `fully_verified: 496`
|
||||||
|
- after catalog parser improvement:
|
||||||
|
- `details_verified: 606`
|
||||||
|
- `price+image+details: 602`
|
||||||
|
- `fully_verified: 602`
|
||||||
|
- detail verifier run:
|
||||||
|
- target: `191` real `.html` product pages
|
||||||
|
- fetched: `191`
|
||||||
|
- failed: `0`
|
||||||
|
- new/updated price observations: `177`
|
||||||
|
- images marked: `187`
|
||||||
|
- details marked: `185`
|
||||||
|
- after detail verifier and explicit BiDi correction:
|
||||||
|
- total Flexoptix rows: `744`
|
||||||
|
- HTML product-like rows: `626`
|
||||||
|
- price verified: `626`
|
||||||
|
- image verified: `622`
|
||||||
|
- details verified: `624`
|
||||||
|
- price+image+details verified: `620`
|
||||||
|
- fully verified: `620`
|
||||||
|
- filter/category rows with no verification: `108`
|
||||||
|
- other non-product/generic rows with no verification: `10`
|
||||||
|
- manual evidence correction:
|
||||||
|
- four BiDi SFP products had `1,000 m` in the Flexoptix title
|
||||||
|
- updated from source evidence:
|
||||||
|
- `S.B1312.M.DIL`
|
||||||
|
- `S.B1312.M.DL`
|
||||||
|
- `S.B1512.M.DIL`
|
||||||
|
- `S.B1512.M.DL`
|
||||||
|
- set:
|
||||||
|
- `reach_label=1000m`
|
||||||
|
- `reach_meters=1000`
|
||||||
|
- `fiber_type=MMF`
|
||||||
|
- `details_verified=true`
|
||||||
|
- remaining truth:
|
||||||
|
- active/product-like Flexoptix rows are much closer to complete
|
||||||
|
- not all `744` Flexoptix rows can honestly be 100% verified because `118` are filter/category/generic/non-product URLs rather than concrete product pages
|
||||||
|
- remaining HTML product-like gaps observed before SSH became unavailable:
|
||||||
|
- `4` product-like rows without image verification
|
||||||
|
- `2` FLEXBOX/accessory-like rows without reach/details
|
||||||
|
- operational note:
|
||||||
|
- Erik SSH became unavailable with `connection refused` after the last verification checks
|
||||||
|
- public TIP HTTPS still responded through Cloudflare
|
||||||
|
- no further live commands were started after SSH refused
|
||||||
|
|
||||||
- TIP Flexoptix price truth recheck on 2026-05-09:
|
- TIP Flexoptix price truth recheck on 2026-05-09:
|
||||||
- operator question:
|
- operator question:
|
||||||
- are all Flexoptix prices, images and information present
|
- are all Flexoptix prices, images and information present
|
||||||
|
|||||||
98
sync/history/2026-05-09-flexoptix-completion-push.md
Normal file
98
sync/history/2026-05-09-flexoptix-completion-push.md
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
# Flexoptix Completion Push
|
||||||
|
|
||||||
|
Date: 2026-05-09
|
||||||
|
|
||||||
|
## Goal
|
||||||
|
|
||||||
|
Push Flexoptix products as far as possible toward complete automated verification without manually approving incomplete data.
|
||||||
|
|
||||||
|
## Code Changes
|
||||||
|
|
||||||
|
- `packages/scraper/src/scrapers/flexoptix-catalog.ts`
|
||||||
|
- Added generic reach parsing for values such as `50 m`, `1,000 m`, decimal values, and ranges.
|
||||||
|
- Improved wavelength parsing for multiple `λ... nm` values.
|
||||||
|
- Passed `productUrl` into `findOrCreateScrapedTransceiver`.
|
||||||
|
|
||||||
|
- `packages/scraper/src/scrapers/flexoptix-detail-pages.ts`
|
||||||
|
- Added a targeted Flexoptix detail-page verifier.
|
||||||
|
- Fetches only real Flexoptix `.html` product pages with missing signals.
|
||||||
|
- Parses static product page evidence:
|
||||||
|
- title
|
||||||
|
- description
|
||||||
|
- `og:image`
|
||||||
|
- `product:price:amount`
|
||||||
|
- reach
|
||||||
|
- fiber type
|
||||||
|
- wavelengths
|
||||||
|
- connector
|
||||||
|
- standard name
|
||||||
|
|
||||||
|
## Live Runs
|
||||||
|
|
||||||
|
- Built on Erik:
|
||||||
|
- `pnpm -C packages/scraper build`
|
||||||
|
- Ran improved Flexoptix catalog scraper:
|
||||||
|
- `Total unique products after GraphQL: 615`
|
||||||
|
- `Flexoptix Catalog Complete: 615 products, 0 prices`
|
||||||
|
- Ran detail-page verifier:
|
||||||
|
- target: `191`
|
||||||
|
- fetched: `191`
|
||||||
|
- failed: `0`
|
||||||
|
- new/updated price observations: `177`
|
||||||
|
- images marked: `187`
|
||||||
|
- details marked: `185`
|
||||||
|
|
||||||
|
## Verification Improvement
|
||||||
|
|
||||||
|
Before the completion push:
|
||||||
|
|
||||||
|
- details verified: `500`
|
||||||
|
- price + image + details verified: `496`
|
||||||
|
- fully verified: `496`
|
||||||
|
|
||||||
|
After catalog parser improvement:
|
||||||
|
|
||||||
|
- details verified: `606`
|
||||||
|
- price + image + details verified: `602`
|
||||||
|
- fully verified: `602`
|
||||||
|
|
||||||
|
After detail verifier and explicit BiDi correction:
|
||||||
|
|
||||||
|
- total Flexoptix rows: `744`
|
||||||
|
- HTML product-like rows: `626`
|
||||||
|
- price verified: `626`
|
||||||
|
- image verified: `622`
|
||||||
|
- details verified: `624`
|
||||||
|
- price + image + details verified: `620`
|
||||||
|
- fully verified: `620`
|
||||||
|
- filter/category rows with no verification: `108`
|
||||||
|
- other non-product/generic rows with no verification: `10`
|
||||||
|
|
||||||
|
## Source Evidence Correction
|
||||||
|
|
||||||
|
Four BiDi SFP products had `1,000 m` in the Flexoptix page title. They were corrected from Flexoptix source evidence:
|
||||||
|
|
||||||
|
- `S.B1312.M.DIL`
|
||||||
|
- `S.B1312.M.DL`
|
||||||
|
- `S.B1512.M.DIL`
|
||||||
|
- `S.B1512.M.DL`
|
||||||
|
|
||||||
|
Set:
|
||||||
|
|
||||||
|
- `reach_label=1000m`
|
||||||
|
- `reach_meters=1000`
|
||||||
|
- `fiber_type=MMF`
|
||||||
|
- `details_verified=true`
|
||||||
|
|
||||||
|
## Remaining Truth
|
||||||
|
|
||||||
|
Do not claim all `744` Flexoptix rows are complete. The remaining unverified rows are mostly filter/category/generic URLs rather than concrete product pages.
|
||||||
|
|
||||||
|
Remaining product-like gaps observed before SSH became unavailable:
|
||||||
|
|
||||||
|
- `4` product-like rows without image verification
|
||||||
|
- `2` FLEXBOX/accessory-like rows without reach/details
|
||||||
|
|
||||||
|
## Operational Note
|
||||||
|
|
||||||
|
After the last verification checks, SSH to Erik returned `connection refused`. Public TIP HTTPS still responded through Cloudflare. No further live commands were started after SSH refused.
|
||||||
Loading…
x
Reference in New Issue
Block a user