API/finder: - Add modular chassis support: sibling linecards fetched when is_linecard=true - Add chassis linecards when system_type=modular - Extend switch response: system_type, is_linecard, chassis_model, slot_type, flexbox_compat_mode, flexbox_notes, description, switching_capacity_tbps, total_ports, category, lifecycle_status, features, use_cases, linecards[] API/transceivers: - Filter price_observations with COALESCE(is_anomalous, false) = false (direct prices + comparable market prices) Scraper/db: - Add PRICE_BOUNDS map (per form-factor min/max USD sanity bounds) - Add isPriceAnomalous() — marks DB price_observations as is_anomalous=true - Add competitor_verified flag: set true when valid competitor price stored - upsertPriceObservation: skip prices outside sanity bounds, set competitor_verified Scraper/hash: - contentHash() now accepts Record<string,unknown> | string (union type) to support both structured objects and legacy string callers Scrapers (skylane, tscom, wiitek): - Fix contentHash() call signature: pass objects not JSON.stringify strings - Fix wiitek: remove invalid 'name' param, fix t.id → transceiverId Migrations: - Add is_anomalous, competitor_verified, competitor_verified_at, image_primary columns - Recreate sync_fully_verified trigger to include competitor_verified - Add is_linecard, chassis_model, system_type, slot_type, flexbox_compat_mode, flexbox_notes to switches table
90 lines
4.0 KiB
TypeScript
90 lines
4.0 KiB
TypeScript
/**
|
|
* Wiitek Scraper — wiitek.com
|
|
*
|
|
* Chinese manufacturer/reseller, direct factory prices.
|
|
* Covers: CSFP, SFP-DD, CFP, CFP2, OSFP224, QSFP112 and all standard forms.
|
|
* Good for price benchmarking.
|
|
*
|
|
* Schedule: every 8h
|
|
*/
|
|
import * as cheerio from "cheerio";
|
|
import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver } from "../utils/db";
|
|
import { contentHash, parsePrice } from "../utils/hash";
|
|
import { logger } from "../utils/logger";
|
|
|
|
const BASE = "https://www.wiitek.com";
|
|
|
|
const CATEGORIES: Array<{ path: string; form_factor: string }> = [
|
|
{ path: "/SFP-Transceiver/", form_factor: "SFP" },
|
|
{ path: "/SFP-Plus-Transceiver/", form_factor: "SFP+" },
|
|
{ path: "/SFP28-Transceiver/", form_factor: "SFP28" },
|
|
{ path: "/SFP56-Transceiver/", form_factor: "SFP56" },
|
|
{ path: "/SFP-DD-Transceiver/", form_factor: "SFP-DD" },
|
|
{ path: "/CSFP-Transceiver/", form_factor: "CSFP" },
|
|
{ path: "/QSFP-Transceiver/", form_factor: "QSFP+" },
|
|
{ path: "/QSFP28-Transceiver/", form_factor: "QSFP28" },
|
|
{ path: "/QSFP56-Transceiver/", form_factor: "QSFP56" },
|
|
{ path: "/QSFP-DD-Transceiver/", form_factor: "QSFP-DD" },
|
|
{ path: "/QSFP-DD800-Transceiver/", form_factor: "QSFP-DD800" },
|
|
{ path: "/QSFP112-Transceiver/", form_factor: "QSFP112" },
|
|
{ path: "/OSFP-Transceiver/", form_factor: "OSFP" },
|
|
{ path: "/OSFP112-Transceiver/", form_factor: "OSFP112" },
|
|
{ path: "/OSFP224-Transceiver/", form_factor: "OSFP224" },
|
|
{ path: "/CFP-Transceiver/", form_factor: "CFP" },
|
|
{ path: "/CFP2-Transceiver/", form_factor: "CFP2" },
|
|
{ path: "/XFP-Transceiver/", form_factor: "XFP" },
|
|
{ path: "/GBIC-Transceiver/", form_factor: "GBIC" },
|
|
{ path: "/XENPAK-Transceiver/", form_factor: "XENPAK" },
|
|
{ path: "/CXP-Transceiver/", form_factor: "CXP" },
|
|
];
|
|
|
|
export async function scrapeWiitek(): Promise<void> {
|
|
logger.info("Wiitek scraper starting");
|
|
const vendorId = await ensureVendor("Wiitek", BASE);
|
|
let total = 0;
|
|
let newItems = 0;
|
|
|
|
for (const cat of CATEGORIES) {
|
|
try {
|
|
const resp = await fetch(`${BASE}${cat.path}`, {
|
|
headers: { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" },
|
|
signal: AbortSignal.timeout(20_000),
|
|
});
|
|
if (!resp.ok) continue;
|
|
const $ = cheerio.load(await resp.text());
|
|
|
|
const items = $(".product-item, .goods-item, .pro-item, [class*=product]");
|
|
for (let i = 0; i < items.length; i++) {
|
|
const $el = $(items[i]);
|
|
const name = $el.find("h2,h3,h4,.title,.name").first().text().trim();
|
|
const priceText = $el.find(".price,.cost,[class*=price]").first().text().trim();
|
|
const href = $el.find("a[href]").first().attr("href") || "";
|
|
if (!name || !priceText) continue;
|
|
|
|
const partMatch = name.match(/([A-Z0-9]{2,8}[-\/][A-Z0-9][A-Z0-9\-\.\/]{3,35})/);
|
|
const partNumber = (partMatch ? partMatch[1] : name.substring(0, 50)).toUpperCase();
|
|
const { price, currency } = parsePrice(priceText);
|
|
if (price <= 0) continue;
|
|
|
|
try {
|
|
const transceiverId = await findOrCreateScrapedTransceiver({
|
|
partNumber, vendorId, formFactor: cat.form_factor,
|
|
});
|
|
const isNew = await upsertPriceObservation({
|
|
transceiverId, sourceVendorId: vendorId,
|
|
price, currency: currency || "USD",
|
|
stockLevel: "unknown",
|
|
url: href.startsWith("http") ? href : `${BASE}${href}`,
|
|
contentHash: contentHash({ partNumber, price, currency: currency || "USD" }),
|
|
});
|
|
if (isNew) newItems++;
|
|
total++;
|
|
} catch { /* skip */ }
|
|
}
|
|
} catch (e) {
|
|
logger.warn(`Wiitek ${cat.form_factor} failed`, { err: e });
|
|
}
|
|
}
|
|
logger.info(`Wiitek done — ${total} total, ${newItems} new`);
|
|
}
|