feat: close TIP retail price research states
This commit is contained in:
parent
b58f7cee41
commit
2be61f2441
@ -13,12 +13,14 @@
|
|||||||
"scrape:atgbics:details": "tsx src/scrapers/atgbics-detail-pages.ts",
|
"scrape:atgbics:details": "tsx src/scrapers/atgbics-detail-pages.ts",
|
||||||
"scrape:vendors:details": "tsx src/scrapers/shopfiber24-fibermall-detail-pages.ts",
|
"scrape:vendors:details": "tsx src/scrapers/shopfiber24-fibermall-detail-pages.ts",
|
||||||
"scrape:gaotek:details": "tsx src/scrapers/gaotek-detail-pages.ts",
|
"scrape:gaotek:details": "tsx src/scrapers/gaotek-detail-pages.ts",
|
||||||
|
"scrape:10gtek": "tsx src/scrapers/tenGtek.ts",
|
||||||
"scrape:gbics": "tsx src/scrapers/gbics.ts",
|
"scrape:gbics": "tsx src/scrapers/gbics.ts",
|
||||||
"scrape:tscom": "tsx src/scrapers/tscom.ts",
|
"scrape:tscom": "tsx src/scrapers/tscom.ts",
|
||||||
"scrape:sfpcables": "tsx src/scrapers/sfpcables.ts",
|
"scrape:sfpcables": "tsx src/scrapers/sfpcables.ts",
|
||||||
"verify:catalog:details": "tsx src/utils/verify-catalog-details.ts",
|
"verify:catalog:details": "tsx src/utils/verify-catalog-details.ts",
|
||||||
"verify:quarantine:non-transceivers": "tsx src/utils/quarantine-non-transceivers.ts",
|
"verify:quarantine:non-transceivers": "tsx src/utils/quarantine-non-transceivers.ts",
|
||||||
"verify:product-page-assets": "tsx src/utils/verify-product-page-assets.ts",
|
"verify:product-page-assets": "tsx src/utils/verify-product-page-assets.ts",
|
||||||
|
"verify:product-page-prices": "tsx src/utils/verify-product-page-prices.ts",
|
||||||
"verify:part-number-details": "tsx src/utils/verify-part-number-details.ts",
|
"verify:part-number-details": "tsx src/utils/verify-part-number-details.ts",
|
||||||
"verify:normalize:product-urls": "tsx src/utils/normalize-product-urls.ts",
|
"verify:normalize:product-urls": "tsx src/utils/normalize-product-urls.ts",
|
||||||
"verify:fs:sku-aliases": "tsx src/utils/quarantine-fs-sku-aliases.ts",
|
"verify:fs:sku-aliases": "tsx src/utils/quarantine-fs-sku-aliases.ts",
|
||||||
|
|||||||
@ -148,13 +148,13 @@ function parseProductPage(html: string, url: string): Product | null {
|
|||||||
|| partNumber;
|
|| partNumber;
|
||||||
if (!name || name.length < 5) return null;
|
if (!name || name.length < 5) return null;
|
||||||
|
|
||||||
// Price — take the lowest itemprop price (minPrice if available, else first price)
|
// Price — take the first product price in DOM order. Later itemprop/minPrice
|
||||||
|
// values can belong to related accessories and must not override the main SKU.
|
||||||
const priceEls = $("[itemprop='price']").map((_i, el) => {
|
const priceEls = $("[itemprop='price']").map((_i, el) => {
|
||||||
const val = $(el).attr("content") || $(el).text();
|
const val = $(el).attr("content") || $(el).text();
|
||||||
return parseFloat(val.replace(",", "."));
|
return parseFloat(val.replace(",", "."));
|
||||||
}).get().filter((p: number) => !isNaN(p) && p > 0 && p < 50000);
|
}).get().filter((p: number) => !isNaN(p) && p > 0 && p < 50000);
|
||||||
const minPriceEl = $("[itemprop='minPrice']").attr("content");
|
const price = priceEls.length > 0 ? priceEls[0] : undefined;
|
||||||
const price = minPriceEl ? parseFloat(minPriceEl) : (priceEls.length > 0 ? Math.min(...priceEls) : undefined);
|
|
||||||
|
|
||||||
// Currency
|
// Currency
|
||||||
const currency = $("[itemprop='priceCurrency']").first().attr("content") || "EUR";
|
const currency = $("[itemprop='priceCurrency']").first().attr("content") || "EUR";
|
||||||
|
|||||||
@ -139,6 +139,23 @@ function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function parseProductPagePrice(html: string): number | undefined {
|
||||||
|
const patterns = [
|
||||||
|
/<meta[^>]+property=["']product:price:amount["'][^>]+content=["']([\d,.]+)["']/i,
|
||||||
|
/<meta[^>]+itemprop=["']price["'][^>]+content=["']([\d,.]+)["']/i,
|
||||||
|
/<div[^>]+class=["'][^"']*price-box[^"']*["'][\s\S]{0,1500}?<span[^>]+class=["']price["'][^>]*>\s*US?\$?\s*([\d,.]+)/i,
|
||||||
|
/<span[^>]+class=["']price["'][^>]*>\s*US?\$?\s*([\d,.]+)/i,
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const pattern of patterns) {
|
||||||
|
const match = html.match(pattern);
|
||||||
|
if (!match) continue;
|
||||||
|
const price = parseFloat(match[1].replace(",", ""));
|
||||||
|
if (Number.isFinite(price) && price > 0 && price < 50000) return price;
|
||||||
|
}
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
async function fetchPage(url: string): Promise<string> {
|
async function fetchPage(url: string): Promise<string> {
|
||||||
const resp = await fetch(url, {
|
const resp = await fetch(url, {
|
||||||
headers: HEADERS,
|
headers: HEADERS,
|
||||||
@ -189,6 +206,16 @@ export async function scrapeSfpCables(): Promise<void> {
|
|||||||
|
|
||||||
for (const product of catProducts) {
|
for (const product of catProducts) {
|
||||||
try {
|
try {
|
||||||
|
let observedPrice = product.price;
|
||||||
|
if (!observedPrice) {
|
||||||
|
await sleep(600);
|
||||||
|
try {
|
||||||
|
observedPrice = parseProductPagePrice(await fetchPage(product.url));
|
||||||
|
} catch (err) {
|
||||||
|
console.warn(` Price page failed: ${product.partNumber}: ${(err as Error).message.slice(0, 80)}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const txId = await findOrCreateScrapedTransceiver({
|
const txId = await findOrCreateScrapedTransceiver({
|
||||||
partNumber: product.partNumber,
|
partNumber: product.partNumber,
|
||||||
vendorId,
|
vendorId,
|
||||||
@ -200,14 +227,15 @@ export async function scrapeSfpCables(): Promise<void> {
|
|||||||
fiberType: product.fiberType,
|
fiberType: product.fiberType,
|
||||||
wavelengths: product.wavelength,
|
wavelengths: product.wavelength,
|
||||||
category: "DataCenter",
|
category: "DataCenter",
|
||||||
|
productUrl: product.url,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (product.price && product.price > 0) {
|
if (observedPrice && observedPrice > 0) {
|
||||||
const hash = contentHash({ price: product.price, part: product.partNumber });
|
const hash = contentHash({ price: observedPrice, part: product.partNumber });
|
||||||
const updated = await upsertPriceObservation({
|
const updated = await upsertPriceObservation({
|
||||||
transceiverId: txId,
|
transceiverId: txId,
|
||||||
sourceVendorId: vendorId,
|
sourceVendorId: vendorId,
|
||||||
price: product.price,
|
price: observedPrice,
|
||||||
currency: "USD",
|
currency: "USD",
|
||||||
stockLevel: "in_stock",
|
stockLevel: "in_stock",
|
||||||
url: product.url,
|
url: product.url,
|
||||||
|
|||||||
@ -181,14 +181,18 @@ const PRICE_BOUNDS: Record<string, [number, number]> = {
|
|||||||
|
|
||||||
async function isPriceAnomalous(transceiverId: string, priceUsd: number): Promise<boolean> {
|
async function isPriceAnomalous(transceiverId: string, priceUsd: number): Promise<boolean> {
|
||||||
const row = await pool.query(
|
const row = await pool.query(
|
||||||
`SELECT form_factor FROM transceivers WHERE id = $1`,
|
`SELECT form_factor, part_number, product_page_url FROM transceivers WHERE id = $1`,
|
||||||
[transceiverId]
|
[transceiverId]
|
||||||
);
|
);
|
||||||
const formFactor = row.rows[0]?.form_factor as string | undefined;
|
const formFactor = row.rows[0]?.form_factor as string | undefined;
|
||||||
if (!formFactor) return false;
|
if (!formFactor) return false;
|
||||||
const bounds = PRICE_BOUNDS[formFactor];
|
const bounds = PRICE_BOUNDS[formFactor];
|
||||||
if (!bounds) return false;
|
if (!bounds) return false;
|
||||||
return priceUsd < bounds[0] || priceUsd > bounds[1];
|
const descriptor = `${row.rows[0]?.part_number || ""} ${row.rows[0]?.product_page_url || ""}`;
|
||||||
|
const max = /\b(zr|zr4|dco|coherent|otu|4000km|tunable)\b/i.test(descriptor)
|
||||||
|
? Math.max(bounds[1], 80000)
|
||||||
|
: bounds[1];
|
||||||
|
return priceUsd < bounds[0] || priceUsd > max;
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function upsertPriceObservation(params: {
|
export async function upsertPriceObservation(params: {
|
||||||
@ -253,7 +257,11 @@ export async function upsertPriceObservation(params: {
|
|||||||
await pool.query(
|
await pool.query(
|
||||||
`UPDATE transceivers SET
|
`UPDATE transceivers SET
|
||||||
price_verified = true,
|
price_verified = true,
|
||||||
price_verified_at = NOW()
|
price_verified_at = NOW(),
|
||||||
|
price_status = 'public_price',
|
||||||
|
price_status_updated_at = NOW(),
|
||||||
|
price_unavailable_verified_at = NULL,
|
||||||
|
price_unavailable_reason = NULL
|
||||||
${isCompetitor ? ", competitor_verified = true, competitor_verified_at = NOW(), competitor_status = 'matched', competitor_status_updated_at = NOW()" : ""}
|
${isCompetitor ? ", competitor_verified = true, competitor_verified_at = NOW(), competitor_status = 'matched', competitor_status_updated_at = NOW()" : ""}
|
||||||
WHERE id = $1`,
|
WHERE id = $1`,
|
||||||
[params.transceiverId]
|
[params.transceiverId]
|
||||||
@ -296,6 +304,10 @@ export async function upsertPriceObservation(params: {
|
|||||||
`UPDATE transceivers SET
|
`UPDATE transceivers SET
|
||||||
price_verified = true,
|
price_verified = true,
|
||||||
price_verified_at = NOW(),
|
price_verified_at = NOW(),
|
||||||
|
price_status = 'public_price',
|
||||||
|
price_status_updated_at = NOW(),
|
||||||
|
price_unavailable_verified_at = NULL,
|
||||||
|
price_unavailable_reason = NULL,
|
||||||
competitor_verified = true,
|
competitor_verified = true,
|
||||||
competitor_verified_at = NOW(),
|
competitor_verified_at = NOW(),
|
||||||
competitor_status = 'matched',
|
competitor_status = 'matched',
|
||||||
@ -307,7 +319,11 @@ export async function upsertPriceObservation(params: {
|
|||||||
await pool.query(
|
await pool.query(
|
||||||
`UPDATE transceivers
|
`UPDATE transceivers
|
||||||
SET price_verified = true,
|
SET price_verified = true,
|
||||||
price_verified_at = NOW()
|
price_verified_at = NOW(),
|
||||||
|
price_status = 'public_price',
|
||||||
|
price_status_updated_at = NOW(),
|
||||||
|
price_unavailable_verified_at = NULL,
|
||||||
|
price_unavailable_reason = NULL
|
||||||
WHERE id = $1`,
|
WHERE id = $1`,
|
||||||
[params.transceiverId]
|
[params.transceiverId]
|
||||||
);
|
);
|
||||||
|
|||||||
@ -57,7 +57,16 @@ async function quarantine(): Promise<void> {
|
|||||||
OR t.part_number ~* '(^Transceiver$|Product-Brochure|^[0-9]+G(/[0-9]+G(/[0-9]+G)?)?-Transceiver$)'
|
OR t.part_number ~* '(^Transceiver$|Product-Brochure|^[0-9]+G(/[0-9]+G(/[0-9]+G)?)?-Transceiver$)'
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
OR t.part_number ~* '^(N/A|Change|wurde|Distance|FO-TRANSCEIVER|Kompatible-Transceiver|Transceivermodule|Zu-den-|QSFP\\+-MSA|SFP\\+-MSA|Marken-Transceiver.*|.*DAC.*AOC.*Kabel)$'
|
OR (
|
||||||
|
v.name = 'ShopFiber24'
|
||||||
|
AND (
|
||||||
|
t.part_number ~* '^(FO-TRANSCEIVER|Kompatible-Transceiver|Transceivermodule|Zu-den-.*|QSFP\\+-MSA|SFP\\+-MSA|Marken-Transceiver.*|.*DAC.*&.*AOC.*Kabel|.*DAC.*AOC.*Kabel)$'
|
||||||
|
OR t.part_number ~* '^FOCP-'
|
||||||
|
OR COALESCE(t.product_page_url, '') ~* '/(FO-TRANSCEIVER|Kompatible-Transceiver|Transceivermodule|Zu-den-|QSFP\\+-MSA|SFP\\+-MSA|Marken-Transceiver|FOCP-|.*DAC.*AOC.*Kabel)/'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
OR (v.name = 'Vcelink' AND t.part_number ~ '^[0-9]+$' AND COALESCE(t.product_page_url, '') = '')
|
||||||
|
OR t.part_number ~* '^(N/A|Change|wurde|Distance|FO-TRANSCEIVER|Kompatible-Transceiver|Transceivermodule|Zu-den-|QSFP\\+-MSA|SFP\\+-MSA|Marken-Transceiver.*|.*DAC.*&.*AOC.*Kabel|.*DAC.*AOC.*Kabel)$'
|
||||||
OR t.category IN (
|
OR t.category IN (
|
||||||
'Accessory',
|
'Accessory',
|
||||||
'Adapter / Converter',
|
'Adapter / Converter',
|
||||||
@ -101,7 +110,16 @@ async function quarantine(): Promise<void> {
|
|||||||
OR t.part_number ~* '(^Transceiver$|Product-Brochure|^[0-9]+G(/[0-9]+G(/[0-9]+G)?)?-Transceiver$)'
|
OR t.part_number ~* '(^Transceiver$|Product-Brochure|^[0-9]+G(/[0-9]+G(/[0-9]+G)?)?-Transceiver$)'
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
OR t.part_number ~* '^(N/A|Change|wurde|Distance|FO-TRANSCEIVER|Kompatible-Transceiver|Transceivermodule|Zu-den-|QSFP\\+-MSA|SFP\\+-MSA|Marken-Transceiver.*|.*DAC.*AOC.*Kabel)$'
|
OR (
|
||||||
|
v.name = 'ShopFiber24'
|
||||||
|
AND (
|
||||||
|
t.part_number ~* '^(FO-TRANSCEIVER|Kompatible-Transceiver|Transceivermodule|Zu-den-.*|QSFP\\+-MSA|SFP\\+-MSA|Marken-Transceiver.*|.*DAC.*&.*AOC.*Kabel|.*DAC.*AOC.*Kabel)$'
|
||||||
|
OR t.part_number ~* '^FOCP-'
|
||||||
|
OR COALESCE(t.product_page_url, '') ~* '/(FO-TRANSCEIVER|Kompatible-Transceiver|Transceivermodule|Zu-den-|QSFP\\+-MSA|SFP\\+-MSA|Marken-Transceiver|FOCP-|.*DAC.*AOC.*Kabel)/'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
OR (v.name = 'Vcelink' AND t.part_number ~ '^[0-9]+$' AND COALESCE(t.product_page_url, '') = '')
|
||||||
|
OR t.part_number ~* '^(N/A|Change|wurde|Distance|FO-TRANSCEIVER|Kompatible-Transceiver|Transceivermodule|Zu-den-|QSFP\\+-MSA|SFP\\+-MSA|Marken-Transceiver.*|.*DAC.*&.*AOC.*Kabel|.*DAC.*AOC.*Kabel)$'
|
||||||
OR t.category IN (
|
OR t.category IN (
|
||||||
'Accessory',
|
'Accessory',
|
||||||
'Adapter / Converter',
|
'Adapter / Converter',
|
||||||
|
|||||||
@ -136,7 +136,9 @@ async function main(): Promise<void> {
|
|||||||
console.log("=== Price availability resolver ===", { vendorNames, limit, apply, count: result.rows.length });
|
console.log("=== Price availability resolver ===", { vendorNames, limit, apply, count: result.rows.length });
|
||||||
|
|
||||||
for (const row of result.rows) {
|
for (const row of result.rows) {
|
||||||
const reason = getNoPublicPriceReason(row.vendorName);
|
const reason = row.vendorName === "10Gtek" && !row.productUrl
|
||||||
|
? "not_listed_in_public_sfpcables_retail_catalog_after_full_crawl"
|
||||||
|
: getNoPublicPriceReason(row.vendorName);
|
||||||
if (!reason) {
|
if (!reason) {
|
||||||
skipped++;
|
skipped++;
|
||||||
continue;
|
continue;
|
||||||
|
|||||||
173
packages/scraper/src/utils/verify-product-page-prices.ts
Normal file
173
packages/scraper/src/utils/verify-product-page-prices.ts
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
import { pool, upsertPriceObservation } from "./db";
|
||||||
|
import { contentHash } from "./hash";
|
||||||
|
|
||||||
|
type Candidate = {
|
||||||
|
id: string;
|
||||||
|
vendorId: string;
|
||||||
|
vendorName: string;
|
||||||
|
partNumber: string;
|
||||||
|
productUrl: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
type PriceResult = {
|
||||||
|
price: number;
|
||||||
|
currency: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
const HEADERS = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||||
|
Accept: "text/html,application/xhtml+xml",
|
||||||
|
"Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
|
||||||
|
};
|
||||||
|
|
||||||
|
function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseNumericPrice(value: string | undefined): number | undefined {
|
||||||
|
if (!value) return undefined;
|
||||||
|
const normalized = value
|
||||||
|
.replace(/\s/g, "")
|
||||||
|
.replace(/[^\d,.]/g, "")
|
||||||
|
.replace(/\.(?=\d{3}(?:\D|$))/g, "")
|
||||||
|
.replace(",", ".");
|
||||||
|
const price = parseFloat(normalized);
|
||||||
|
return Number.isFinite(price) && price > 0 && price < 50000 ? price : undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseSfpCables(html: string): PriceResult | undefined {
|
||||||
|
const patterns = [
|
||||||
|
/<meta[^>]+property=["']product:price:amount["'][^>]+content=["']([\d,.]+)["']/i,
|
||||||
|
/<meta[^>]+itemprop=["']price["'][^>]+content=["']([\d,.]+)["']/i,
|
||||||
|
/<div[^>]+class=["'][^"']*price-box[^"']*["'][\s\S]{0,1500}?<span[^>]+class=["']price["'][^>]*>\s*US?\$?\s*([\d,.]+)/i,
|
||||||
|
/<span[^>]+class=["']price["'][^>]*>\s*US?\$?\s*([\d,.]+)/i,
|
||||||
|
];
|
||||||
|
for (const pattern of patterns) {
|
||||||
|
const price = parseNumericPrice(html.match(pattern)?.[1]);
|
||||||
|
if (price) return { price, currency: "USD" };
|
||||||
|
}
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseShopFiber24(html: string): PriceResult | undefined {
|
||||||
|
const price = parseNumericPrice(html.match(/itemprop=["']price["'][^>]+content=["']([\d,.]+)["']/i)?.[1])
|
||||||
|
|| parseNumericPrice(html.match(/([\d.]+,\d{2})\s*€/i)?.[1]);
|
||||||
|
if (!price) return undefined;
|
||||||
|
return { price, currency: "EUR" };
|
||||||
|
}
|
||||||
|
|
||||||
|
function parsePrice(vendorName: string, html: string): PriceResult | undefined {
|
||||||
|
if (vendorName === "SFPcables") return parseSfpCables(html);
|
||||||
|
if (vendorName === "ShopFiber24") return parseShopFiber24(html);
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseAtgbicsProductJson(jsonText: string): PriceResult | undefined {
|
||||||
|
const parsed = JSON.parse(jsonText) as { price?: number; variants?: Array<{ price?: number | string }> };
|
||||||
|
const rawPrice = parsed.price ?? parsed.variants?.[0]?.price;
|
||||||
|
const numeric = typeof rawPrice === "string" ? parseFloat(rawPrice) : rawPrice;
|
||||||
|
if (!Number.isFinite(numeric) || !numeric || numeric <= 0) return undefined;
|
||||||
|
return { price: numeric / 100, currency: "GBP" };
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchPage(url: string): Promise<string> {
|
||||||
|
const resp = await fetch(url, {
|
||||||
|
headers: HEADERS,
|
||||||
|
signal: AbortSignal.timeout(30000),
|
||||||
|
});
|
||||||
|
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
|
||||||
|
return resp.text();
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchVendorPrice(row: Candidate): Promise<PriceResult | undefined> {
|
||||||
|
if (row.vendorName === "ATGBICS") {
|
||||||
|
const jsonUrl = `${row.productUrl.replace(/\/$/, "")}.js`;
|
||||||
|
return parseAtgbicsProductJson(await fetchPage(jsonUrl));
|
||||||
|
}
|
||||||
|
return parsePrice(row.vendorName, await fetchPage(row.productUrl));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main(): Promise<void> {
|
||||||
|
const vendorFilter = (process.env["PRODUCT_PRICE_VENDOR"] || "").trim();
|
||||||
|
const limit = parseInt(process.env["PRODUCT_PRICE_LIMIT"] || "100", 10);
|
||||||
|
const apply = process.env["PRODUCT_PRICE_APPLY"] === "1";
|
||||||
|
const vendorNames = vendorFilter
|
||||||
|
? vendorFilter.split(",").map((v) => v.trim()).filter(Boolean)
|
||||||
|
: ["SFPcables", "ShopFiber24", "ATGBICS"];
|
||||||
|
|
||||||
|
const result = await pool.query<Candidate>(
|
||||||
|
`SELECT t.id,
|
||||||
|
t.part_number AS "partNumber",
|
||||||
|
t.product_page_url AS "productUrl",
|
||||||
|
v.id AS "vendorId",
|
||||||
|
v.name AS "vendorName"
|
||||||
|
FROM transceivers t
|
||||||
|
JOIN vendors v ON v.id = t.vendor_id
|
||||||
|
WHERE v.name = ANY($1)
|
||||||
|
AND COALESCE(t.category, '') != 'NonTransceiver'
|
||||||
|
AND COALESCE(t.price_verified, false) = false
|
||||||
|
AND COALESCE(t.price_status, 'needs_research') IN ('unknown', 'needs_research', 'ambiguous')
|
||||||
|
AND COALESCE(t.product_page_url, '') != ''
|
||||||
|
ORDER BY v.name, t.part_number
|
||||||
|
LIMIT $2`,
|
||||||
|
[vendorNames, limit],
|
||||||
|
);
|
||||||
|
|
||||||
|
let prices = 0;
|
||||||
|
let skipped = 0;
|
||||||
|
let errors = 0;
|
||||||
|
|
||||||
|
console.log("=== Product page price verifier ===", { vendorNames, limit, apply, count: result.rows.length });
|
||||||
|
|
||||||
|
for (const row of result.rows) {
|
||||||
|
await sleep(800);
|
||||||
|
try {
|
||||||
|
const parsed = await fetchVendorPrice(row);
|
||||||
|
if (!parsed) {
|
||||||
|
skipped++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (apply) {
|
||||||
|
const hash = contentHash({ price: parsed.price, currency: parsed.currency, part: row.partNumber });
|
||||||
|
await upsertPriceObservation({
|
||||||
|
transceiverId: row.id,
|
||||||
|
sourceVendorId: row.vendorId,
|
||||||
|
price: parsed.price,
|
||||||
|
currency: parsed.currency,
|
||||||
|
stockLevel: "in_stock",
|
||||||
|
url: row.productUrl,
|
||||||
|
contentHash: hash,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log("price verified", {
|
||||||
|
vendor: row.vendorName,
|
||||||
|
partNumber: row.partNumber,
|
||||||
|
price: parsed.price,
|
||||||
|
currency: parsed.currency,
|
||||||
|
apply,
|
||||||
|
});
|
||||||
|
prices++;
|
||||||
|
} catch (err) {
|
||||||
|
errors++;
|
||||||
|
console.warn("price page failed", {
|
||||||
|
vendor: row.vendorName,
|
||||||
|
partNumber: row.partNumber,
|
||||||
|
error: (err as Error).message,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log("Product page price verifier complete", { prices, skipped, errors, apply });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (require.main === module) {
|
||||||
|
main()
|
||||||
|
.then(() => pool.end())
|
||||||
|
.catch((err) => {
|
||||||
|
console.error("Fatal:", err);
|
||||||
|
pool.end();
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
|
}
|
||||||
@ -1,9 +1,41 @@
|
|||||||
# Current TIP Sync State
|
# Current TIP Sync State
|
||||||
|
|
||||||
Updated: 2026-05-09 23:15 UTC
|
Updated: 2026-05-09 23:38 UTC
|
||||||
|
|
||||||
## Newest Work
|
## Newest Work
|
||||||
|
|
||||||
|
- TIP price/source verification closure on 2026-05-10 local / 2026-05-09 UTC:
|
||||||
|
- fixed SFPcables scraper to persist `product_page_url`
|
||||||
|
- added product-page price fallback for SFPcables when listing pages omit price markup
|
||||||
|
- added `verify:product-page-prices`
|
||||||
|
- source-backed public price verification from existing product URLs
|
||||||
|
- ShopFiber24 parser takes the first main product `itemprop=price`, not related-product `minPrice`
|
||||||
|
- ATGBICS parser uses Shopify `/products/{handle}.js` prices for coherent/ZR products
|
||||||
|
- fixed `upsertPriceObservation` to set `price_status='public_price'`
|
||||||
|
- widened price anomaly handling only for explicit coherent/ZR/DCO/tunable products
|
||||||
|
- expanded quarantine for ShopFiber24 FOCP/category/DAC-AOC artifacts and Vcelink numeric rows
|
||||||
|
- live runs on Erik:
|
||||||
|
- ShopFiber24 quarantine: `12` artifacts removed
|
||||||
|
- SFPcables scraper with detail fallback: `110` products, `37` price observations
|
||||||
|
- SFPcables asset verifier: `31` images, `29` details, `0` errors
|
||||||
|
- ShopFiber24 price verifier: `12` real EUR prices
|
||||||
|
- ATGBICS price verifier: `3` real GBP coherent/ZR prices
|
||||||
|
- Vcelink quarantine: `2` numeric artifacts removed
|
||||||
|
- 10Gtek/SFPcables retail crawl confirmed remaining `126` rows have no public retail product URL
|
||||||
|
- 10Gtek price availability resolver: `126` rows set to `price_status=no_public_price` with evidence
|
||||||
|
- live health after this pass:
|
||||||
|
- active products: `17181`
|
||||||
|
- price verified: `11460`
|
||||||
|
- price status: `public_price=11460`, `no_public_price=5721`, `needs_research=0`, `ambiguous=0`
|
||||||
|
- image verified: `12125`
|
||||||
|
- details verified: `16920`
|
||||||
|
- fully verified: `10549`
|
||||||
|
- competitor status: `matched=10821`, `no_valid_match=74`, `ambiguous=556`, `needs_research=5730`
|
||||||
|
- interpretation:
|
||||||
|
- price research queue is closed without fabricated prices
|
||||||
|
- remaining verification work is image/details/competitor state, dominated by OEM/catalog rows
|
||||||
|
- largest current product-data gaps: Juniper, Cisco, 10Gtek, Nokia, Palo Alto, Arista
|
||||||
|
|
||||||
- TIP continuation on 2026-05-10 local / 2026-05-09 UTC:
|
- TIP continuation on 2026-05-10 local / 2026-05-09 UTC:
|
||||||
- added `verify:part-number-details`
|
- added `verify:part-number-details`
|
||||||
- deterministic part-number speed inference for rows where form factor/reach/fiber already exist but `speed_gbps=0`
|
- deterministic part-number speed inference for rows where form factor/reach/fiber already exist but `speed_gbps=0`
|
||||||
@ -65,6 +97,7 @@ Updated: 2026-05-09 23:15 UTC
|
|||||||
- API payload now uses `iters` and `seed_only` instead of stale `iterations` and `seedOnly`
|
- API payload now uses `iters` and `seed_only` instead of stale `iterations` and `seedOnly`
|
||||||
- added `all` mode for sequential full-lane training
|
- added `all` mode for sequential full-lane training
|
||||||
- streams SSE lines to the log instead of buffering until the response closes
|
- streams SSE lines to the log instead of buffering until the response closes
|
||||||
|
- MAGATAMA Gitea commit: `76d4054`
|
||||||
- live sequence started on Erik:
|
- live sequence started on Erik:
|
||||||
- command: `python3 -u scripts/trigger_lane_training_once.py all 500 false`
|
- command: `python3 -u scripts/trigger_lane_training_once.py all 500 false`
|
||||||
- log: `/opt/magatama/logs/runpod-all-lanes-20260509T230549Z.log`
|
- log: `/opt/magatama/logs/runpod-all-lanes-20260509T230549Z.log`
|
||||||
|
|||||||
@ -0,0 +1,40 @@
|
|||||||
|
# TIP Price / Source Verification Closure
|
||||||
|
|
||||||
|
Date: 2026-05-09 23:38 UTC
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
Continued TIP verification work with deterministic crawlers/verifiers only. No external AI was used. Erik was kept to one focused run at a time.
|
||||||
|
|
||||||
|
## Changes
|
||||||
|
|
||||||
|
- SFPcables now persists `product_page_url`.
|
||||||
|
- SFPcables now fetches product pages for missing listing prices.
|
||||||
|
- Added `verify:product-page-prices`.
|
||||||
|
- `upsertPriceObservation` now also writes `price_status='public_price'`.
|
||||||
|
- Coherent/ZR/DCO/tunable optics get a higher anomaly ceiling so real high-end prices are not rejected.
|
||||||
|
- ShopFiber24 parser now uses the first main product `itemprop=price`, not related-product `minPrice`.
|
||||||
|
- ShopFiber24/Vcelink quarantine rules were expanded for remaining artifacts.
|
||||||
|
- 10Gtek rows absent from the public SFPcables retail catalog are resolved as `price_status=no_public_price`, not `price_verified`.
|
||||||
|
|
||||||
|
## Live Results
|
||||||
|
|
||||||
|
- SFPcables: `0` price gaps, `0` image gaps, `3` detail gaps after scraper + asset verifier.
|
||||||
|
- ShopFiber24: `0` price/image/detail gaps.
|
||||||
|
- ATGBICS: `3` coherent/ZR prices verified from Shopify product JSON.
|
||||||
|
- Vcelink: `2` numeric artifacts quarantined.
|
||||||
|
- Price research queue: `needs_research=0`.
|
||||||
|
|
||||||
|
## Health Snapshot
|
||||||
|
|
||||||
|
- active products: `17181`
|
||||||
|
- price verified: `11460`
|
||||||
|
- price status: `public_price=11460`, `no_public_price=5721`, `needs_research=0`, `ambiguous=0`
|
||||||
|
- image verified: `12125`
|
||||||
|
- details verified: `16920`
|
||||||
|
- fully verified: `10549`
|
||||||
|
- competitor status: `matched=10821`, `no_valid_match=74`, `ambiguous=556`, `needs_research=5730`
|
||||||
|
|
||||||
|
## Remaining Work
|
||||||
|
|
||||||
|
The price queue is closed without fabricated prices. Remaining work is image/details/competitor verification, led by OEM/catalog rows such as Juniper, Cisco, 10Gtek, Nokia, Palo Alto and Arista.
|
||||||
@ -51,3 +51,7 @@
|
|||||||
{"id":"tip-20260509-quote-only-tscom-pricing-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Public-price verification requires a real public price."},{"role":"user","content":"T&S Communication product pages have valid images and details but expose no real public prices, only no price or price 0.00. What should TIP do?"},{"role":"assistant","content":"Do not mark price_verified. Treat T&S as image/detail-verifiable but public-price-missing until a real price observation exists. `price: 0.00`, quote/request flows, missing amount fields, or SEO filenames containing `price` are not price evidence. Keep image_verified/details_verified true when source-backed, keep price_verified false, and document the vendor as quote-only/public-price-unavailable rather than fabricating a market price."}]}
|
{"id":"tip-20260509-quote-only-tscom-pricing-001","source":"codex-tip-verification-2026-05-09","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Public-price verification requires a real public price."},{"role":"user","content":"T&S Communication product pages have valid images and details but expose no real public prices, only no price or price 0.00. What should TIP do?"},{"role":"assistant","content":"Do not mark price_verified. Treat T&S as image/detail-verifiable but public-price-missing until a real price observation exists. `price: 0.00`, quote/request flows, missing amount fields, or SEO filenames containing `price` are not price evidence. Keep image_verified/details_verified true when source-backed, keep price_verified false, and document the vendor as quote-only/public-price-unavailable rather than fabricating a market price."}]}
|
||||||
{"id":"tip-20260510-part-number-details-verifier-001","source":"codex-tip-verification-2026-05-10","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Use deterministic part-number inference only for conservative, auditable detail repair."},{"role":"user","content":"How should TIP close OEM detail gaps when form factor, reach and fiber are known but speed_gbps is zero?"},{"role":"assistant","content":"Use a part-number details verifier. Infer speed only from unambiguous tokens such as 800G, 400G, 100GBASE, 40GBASE, SFP10G, XFP-10GE, 4X25G, 2X50G, 10X10G, CTP-SFP-1GE, and known FC tokens. Dry-run first and exclude traps such as Cisco GLC-FE Fast Ethernet; do not treat all GLC-* as 1G. Update speed/speed_gbps, then mark details through the canonical evidence path with robot_name `verify:part-number-details`."}]}
|
{"id":"tip-20260510-part-number-details-verifier-001","source":"codex-tip-verification-2026-05-10","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Use deterministic part-number inference only for conservative, auditable detail repair."},{"role":"user","content":"How should TIP close OEM detail gaps when form factor, reach and fiber are known but speed_gbps is zero?"},{"role":"assistant","content":"Use a part-number details verifier. Infer speed only from unambiguous tokens such as 800G, 400G, 100GBASE, 40GBASE, SFP10G, XFP-10GE, 4X25G, 2X50G, 10X10G, CTP-SFP-1GE, and known FC tokens. Dry-run first and exclude traps such as Cisco GLC-FE Fast Ethernet; do not treat all GLC-* as 1G. Update speed/speed_gbps, then mark details through the canonical evidence path with robot_name `verify:part-number-details`."}]}
|
||||||
{"id":"tip-20260510-price-status-model-001","source":"codex-tip-verification-2026-05-10","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Separate real public prices from resolved no-public-price states."},{"role":"user","content":"How should TIP handle OEM, component, hyperscaler, test-equipment, and quote-only vendors with no public checkout price?"},{"role":"assistant","content":"Do not set `price_verified` unless there is a real public price observation. Add or use `price_status`: `public_price` for true price observations, `no_public_price` for source/policy-backed quote-only or OEM catalog cases, `needs_research` for real retail/source discovery work, and `ambiguous` for uncertain cases. Write `price_unavailable` evidence with the vendor, part number, reason, and source URL if available. Keep retail vendors such as 10Gtek, SFPcables, ShopFiber24, ATGBICS, Vcelink, FS.com and Flexoptix out of no-public-price policies because they should have real prices."}]}
|
{"id":"tip-20260510-price-status-model-001","source":"codex-tip-verification-2026-05-10","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Separate real public prices from resolved no-public-price states."},{"role":"user","content":"How should TIP handle OEM, component, hyperscaler, test-equipment, and quote-only vendors with no public checkout price?"},{"role":"assistant","content":"Do not set `price_verified` unless there is a real public price observation. Add or use `price_status`: `public_price` for true price observations, `no_public_price` for source/policy-backed quote-only or OEM catalog cases, `needs_research` for real retail/source discovery work, and `ambiguous` for uncertain cases. Write `price_unavailable` evidence with the vendor, part number, reason, and source URL if available. Keep retail vendors such as 10Gtek, SFPcables, ShopFiber24, ATGBICS, Vcelink, FS.com and Flexoptix out of no-public-price policies because they should have real prices."}]}
|
||||||
|
{"id":"tip-20260510-product-page-price-verifier-001","source":"codex-tip-verification-2026-05-10","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Verify retail prices from source-backed product pages and never from related-product widgets."},{"role":"user","content":"SFPcables and ShopFiber24 rows have product URLs but still sit in price_status=needs_research. What should the crawler do?"},{"role":"assistant","content":"Run a targeted product-page price verifier. For SFPcables, use the product page main price box or product price meta tags when listing pages omit prices. For ShopFiber24, take the first main product `itemprop=price` in DOM order and ignore later `minPrice` or related-accessory prices. Write a real price_observation through `upsertPriceObservation`; that function must set `price_verified=true` and `price_status=public_price`. Do not mark public price if no source price is found."}]}
|
||||||
|
{"id":"tip-20260510-coherent-price-bounds-001","source":"codex-tip-verification-2026-05-10","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Price anomaly filters must reject artifacts without blocking legitimate high-end coherent optics."},{"role":"user","content":"ATGBICS 100G/400G coherent ZR products show prices like 15105.95 GBP or 17554.95 GBP. Should TIP reject them as anomalous?"},{"role":"assistant","content":"No, not when the product descriptor contains explicit coherent evidence such as ZR, ZR4, DCO, coherent, OTU, tunable or ultra-long-haul reach. Standard form-factor ceilings are useful for ordinary SR/LR optics, but coherent/ZR optics legitimately exceed normal QSFP28/QSFP-DD bounds. Raise the anomaly ceiling only for those explicit coherent tokens and still require a real source-backed price observation."}]}
|
||||||
|
{"id":"tip-20260510-retail-absent-no-public-price-001","source":"codex-tip-verification-2026-05-10","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Close research states with explicit source-discovery evidence instead of endless manual queues."},{"role":"user","content":"10Gtek has old catalog rows with no product URL after a full SFPcables retail crawl. How should TIP classify their price state?"},{"role":"assistant","content":"If a full controlled 10Gtek/SFPcables retail crawl finds the public retail catalog and a row still has no product_page_url or public price source, set `price_status=no_public_price` with reason `not_listed_in_public_sfpcables_retail_catalog_after_full_crawl`. Do not set `price_verified`; no fabricated price exists. Record `price_unavailable` evidence so the dashboard can distinguish resolved no-public-price rows from real open price research."}]}
|
||||||
|
{"id":"tip-20260510-artifact-quarantine-shopfiber-vcelink-001","source":"codex-tip-verification-2026-05-10","kind":"sft-jsonl","messages":[{"role":"system","content":"You are TIP_LLM. Keep active product counts clean by quarantining deterministic artifacts."},{"role":"user","content":"What remaining ShopFiber24 and Vcelink artifacts were found during price closure?"},{"role":"assistant","content":"ShopFiber24 artifacts include FOCP fiber cable SKUs, FO-TRANSCEIVER/category labels, Kompatible-Transceiver, Transceivermodule, QSFP+-MSA/SFP+-MSA, Marken-Transceiver pages, Zu-den-* navigation labels, and DAC/AOC cable category pages. Vcelink rows with numeric-only part numbers and no product_page_url are not real transceiver SKUs. Move them to NonTransceiver, clear verification flags, and write artifact_quarantine evidence."}]}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user