Compare commits
3 Commits
5ee9904b04
...
6cf1b188d8
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6cf1b188d8 | ||
|
|
043fee46fc | ||
|
|
2021651de2 |
@ -1172,7 +1172,7 @@
|
|||||||
<div class="card">
|
<div class="card">
|
||||||
<div class="table-wrap">
|
<div class="table-wrap">
|
||||||
<table>
|
<table>
|
||||||
<thead><tr><th>Model<span class="sort-arrow"></span></th><th>Vendor<span class="sort-arrow"></span></th><th>Series<span class="sort-arrow"></span></th><th>Category<span class="sort-arrow"></span></th><th>Ports<span class="sort-arrow"></span></th><th>Max Speed<span class="sort-arrow"></span></th><th>Capacity<span class="sort-arrow"></span></th><th>ASIC<span class="sort-arrow"></span></th><th>Status<span class="sort-arrow"></span></th></tr></thead>
|
<thead><tr><th style="width:52px;text-align:center">🖼</th><th>Model<span class="sort-arrow"></span></th><th>Vendor<span class="sort-arrow"></span></th><th>Series<span class="sort-arrow"></span></th><th>Category<span class="sort-arrow"></span></th><th>Ports<span class="sort-arrow"></span></th><th>Max Speed<span class="sort-arrow"></span></th><th>Capacity<span class="sort-arrow"></span></th><th>ASIC<span class="sort-arrow"></span></th><th>Status<span class="sort-arrow"></span></th></tr></thead>
|
||||||
<tbody id="sw-table"></tbody>
|
<tbody id="sw-table"></tbody>
|
||||||
</table>
|
</table>
|
||||||
</div>
|
</div>
|
||||||
@ -3825,8 +3825,14 @@ function searchSwitches() {
|
|||||||
var statusColors = { Active: 'b-green', 'EoS_Announced': 'b-yellow', EoL: 'b-red', Legacy: 'b-neutral' };
|
var statusColors = { Active: 'b-green', 'EoS_Announced': 'b-yellow', EoL: 'b-red', Legacy: 'b-neutral' };
|
||||||
var maxSpd = s.max_speed_gbps >= 1000 ? (s.max_speed_gbps/1000) + 'T' : s.max_speed_gbps + 'G';
|
var maxSpd = s.max_speed_gbps >= 1000 ? (s.max_speed_gbps/1000) + 'T' : s.max_speed_gbps + 'G';
|
||||||
var cap = s.switching_capacity_tbps ? s.switching_capacity_tbps + ' Tbps' : '—';
|
var cap = s.switching_capacity_tbps ? s.switching_capacity_tbps + ' Tbps' : '—';
|
||||||
|
// Thumbnail — show image if available, otherwise a switch icon
|
||||||
|
var thumb = s.image_url
|
||||||
|
? '<img src="' + esc(s.image_url) + '" alt="" style="width:48px;height:34px;object-fit:contain;border-radius:4px;background:var(--surface2);vertical-align:middle;display:block;margin:0 auto" loading="lazy" onerror="this.outerHTML=\'<span style=font-size:1.3rem;opacity:0.35>⚙</span>\'">'
|
||||||
|
: '<span style="font-size:1.3rem;opacity:0.3;display:block;text-align:center">⚙</span>';
|
||||||
|
var modelTitle = s.description ? ' title="' + esc(s.description.slice(0, 120)) + '"' : '';
|
||||||
return '<tr class="clickable" data-swid="' + esc(s.id) + '">'
|
return '<tr class="clickable" data-swid="' + esc(s.id) + '">'
|
||||||
+ '<td style="font-weight:600;color:var(--text-bright)">' + esc(s.model) + '</td>'
|
+ '<td style="padding:4px 8px;text-align:center;vertical-align:middle">' + thumb + '</td>'
|
||||||
|
+ '<td style="font-weight:600;color:var(--text-bright)"' + modelTitle + '>' + esc(s.model) + '</td>'
|
||||||
+ '<td>' + esc(s.vendor_name || '') + '</td>'
|
+ '<td>' + esc(s.vendor_name || '') + '</td>'
|
||||||
+ '<td class="mono dim">' + esc(s.series || '') + '</td>'
|
+ '<td class="mono dim">' + esc(s.series || '') + '</td>'
|
||||||
+ '<td><span class="b ' + (catColors[s.category] || 'b-neutral') + '">' + esc(s.category || '') + '</span></td>'
|
+ '<td><span class="b ' + (catColors[s.category] || 'b-neutral') + '">' + esc(s.category || '') + '</span></td>'
|
||||||
@ -3836,13 +3842,13 @@ function searchSwitches() {
|
|||||||
+ '<td class="dim">' + esc(s.asic_vendor ? s.asic_vendor + (s.asic_model ? ' ' + s.asic_model : '') : '—') + '</td>'
|
+ '<td class="dim">' + esc(s.asic_vendor ? s.asic_vendor + (s.asic_model ? ' ' + s.asic_model : '') : '—') + '</td>'
|
||||||
+ '<td><span class="b ' + (statusColors[s.lifecycle_status] || 'b-neutral') + '">' + esc(s.lifecycle_status || 'Active') + '</span></td>'
|
+ '<td><span class="b ' + (statusColors[s.lifecycle_status] || 'b-neutral') + '">' + esc(s.lifecycle_status || 'Active') + '</span></td>'
|
||||||
+ '</tr>';
|
+ '</tr>';
|
||||||
}).join('') || '<tr><td colspan="9" class="loading">No switches found</td></tr>');
|
}).join('') || '<tr><td colspan="10" class="loading">No switches found</td></tr>');
|
||||||
|
|
||||||
el('sw-table').querySelectorAll('tr.clickable').forEach(function(row) {
|
el('sw-table').querySelectorAll('tr.clickable').forEach(function(row) {
|
||||||
row.addEventListener('click', function() { openSwitchDetail(this.getAttribute('data-swid')); });
|
row.addEventListener('click', function() { openSwitchDetail(this.getAttribute('data-swid')); });
|
||||||
});
|
});
|
||||||
}).catch(function(err) {
|
}).catch(function(err) {
|
||||||
buildDOM(el('sw-table'), '<tr><td colspan="9" class="loading">Error loading switches</td></tr>');
|
buildDOM(el('sw-table'), '<tr><td colspan="10" class="loading">Error loading switches</td></tr>');
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -83,6 +83,8 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
|
|||||||
"scrape:compat:edgecore",
|
"scrape:compat:edgecore",
|
||||||
// ── Switch enrichment (every 12h) ─────────────────────────────────
|
// ── Switch enrichment (every 12h) ─────────────────────────────────
|
||||||
"scrape:assets:switches",
|
"scrape:assets:switches",
|
||||||
|
// ── Switch og:image fetcher (daily, after switch-assets) ──────────
|
||||||
|
"scrape:images:switches",
|
||||||
// ── eBay enrichment (every 6h) ────────────────────────────────────
|
// ── eBay enrichment (every 6h) ────────────────────────────────────
|
||||||
"enrich:ebay-transceivers",
|
"enrich:ebay-transceivers",
|
||||||
"enrich:ebay-switches",
|
"enrich:ebay-switches",
|
||||||
@ -219,6 +221,8 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
|
|||||||
// ══════════════════════════════════════════════════════════════════════
|
// ══════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
await boss.schedule("scrape:assets:switches", "30 7,19 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
|
await boss.schedule("scrape:assets:switches", "30 7,19 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
|
||||||
|
// og:image fetcher: daily at 08:30, after switch-assets completes at 07:30
|
||||||
|
await boss.schedule("scrape:images:switches", "30 8 * * *", {}, { retryLimit: 1, expireInSeconds: 7200 });
|
||||||
|
|
||||||
// ══════════════════════════════════════════════════════════════════════
|
// ══════════════════════════════════════════════════════════════════════
|
||||||
// EBAY ENRICHMENT — every 6h
|
// EBAY ENRICHMENT — every 6h
|
||||||
@ -292,7 +296,7 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
|
|||||||
// Re-research approved equivalences: daily at 03:00 UTC, processes 200 items per run
|
// Re-research approved equivalences: daily at 03:00 UTC, processes 200 items per run
|
||||||
await boss.schedule("maintenance:re-research-equivalences", "0 3 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
|
await boss.schedule("maintenance:re-research-equivalences", "0 3 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
|
||||||
|
|
||||||
console.log("All schedules registered — 24/7 continuous scraping (57 jobs)");
|
console.log("All schedules registered — 24/7 continuous scraping (58 jobs)");
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function registerWorkers(boss: PgBoss): Promise<void> {
|
export async function registerWorkers(boss: PgBoss): Promise<void> {
|
||||||
@ -314,6 +318,7 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
|
|||||||
const { scrapeUfiSpace } = await import("./scrapers/ufispace");
|
const { scrapeUfiSpace } = await import("./scrapers/ufispace");
|
||||||
const { scrapeEdgecore } = await import("./scrapers/edgecore");
|
const { scrapeEdgecore } = await import("./scrapers/edgecore");
|
||||||
const { scrapeSwitchAssets } = await import("./scrapers/switch-assets");
|
const { scrapeSwitchAssets } = await import("./scrapers/switch-assets");
|
||||||
|
const { fetchSwitchImages } = await import("./scrapers/switch-image-fetcher");
|
||||||
// ── Prediction signal scrapers ────────────────────────────────────────
|
// ── Prediction signal scrapers ────────────────────────────────────────
|
||||||
const { scrapeSecEdgar } = await import("./scrapers/sec-edgar");
|
const { scrapeSecEdgar } = await import("./scrapers/sec-edgar");
|
||||||
const { scrapeGithubSignals } = await import("./scrapers/github-signals");
|
const { scrapeGithubSignals } = await import("./scrapers/github-signals");
|
||||||
@ -495,6 +500,11 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
|
|||||||
await scrapeSwitchAssets();
|
await scrapeSwitchAssets();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
await boss.work("scrape:images:switches", async () => {
|
||||||
|
console.log(`[${new Date().toISOString()}] Running: Switch og:image fetcher`);
|
||||||
|
await fetchSwitchImages();
|
||||||
|
});
|
||||||
|
|
||||||
// ── eBay enrichment ───────────────────────────────────────────────────
|
// ── eBay enrichment ───────────────────────────────────────────────────
|
||||||
|
|
||||||
await boss.work("enrich:ebay-transceivers", async () => {
|
await boss.work("enrich:ebay-transceivers", async () => {
|
||||||
@ -1116,5 +1126,5 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
|
|||||||
console.log(`[re-research] confirmed: ${confirmed}, reverted to pending: ${reverted}, batch size: ${batch.rows.length}`);
|
console.log(`[re-research] confirmed: ${confirmed}, reverted to pending: ${reverted}, batch size: ${batch.rows.length}`);
|
||||||
});
|
});
|
||||||
|
|
||||||
console.log("All workers registered (76 jobs, 24/7 continuous)");
|
console.log("All workers registered (77 jobs, 24/7 continuous)");
|
||||||
}
|
}
|
||||||
|
|||||||
@ -144,19 +144,15 @@ function parseCategoryPage(html: string, cat: typeof CATEGORIES[number]): Atgbic
|
|||||||
const products: AtgbicsProduct[] = [];
|
const products: AtgbicsProduct[] = [];
|
||||||
const seen = new Set<string>();
|
const seen = new Set<string>();
|
||||||
|
|
||||||
// Split by product cards — class="card card--product
|
// Split by product cards — class="card__info" (theme updated 2025, was "card card--product")
|
||||||
const cardParts = html.split(/class="card card--product/);
|
const cardParts = html.split(/class="card__info"/);
|
||||||
|
|
||||||
for (const card of cardParts.slice(1)) {
|
for (const card of cardParts.slice(1)) {
|
||||||
// Name from aria-label (full descriptive name)
|
// Product handle + name from <a href="/products/..." class="card-link text-current">NAME</a>
|
||||||
const nameM = card.match(/aria-label="([^"]{8,})"/);
|
const hrefM = card.match(/href="\/products\/([^"?#]+)"[^>]*>\s*([^<]{8,}?)\s*<\/a>/s);
|
||||||
if (!nameM) continue;
|
|
||||||
const name = nameM[1].replace(/®/g, "").replace(/\s+/g, " ").trim();
|
|
||||||
|
|
||||||
// Product handle from href
|
|
||||||
const hrefM = card.match(/href="\/(?:collections\/[^"]+\/)?products\/([^"?#]+)"/);
|
|
||||||
if (!hrefM) continue;
|
if (!hrefM) continue;
|
||||||
const handle = hrefM[1];
|
const handle = hrefM[1];
|
||||||
|
const name = hrefM[2].replace(/®/g, "").replace(/\s+/g, " ").trim();
|
||||||
if (seen.has(handle)) continue;
|
if (seen.has(handle)) continue;
|
||||||
seen.add(handle);
|
seen.add(handle);
|
||||||
|
|
||||||
|
|||||||
@ -205,7 +205,7 @@ export async function scrapeNaddod(): Promise<void> {
|
|||||||
"NADDOD",
|
"NADDOD",
|
||||||
"compatible",
|
"compatible",
|
||||||
"https://www.naddod.com",
|
"https://www.naddod.com",
|
||||||
"https://www.naddod.com/collections/transceivers",
|
"https://www.naddod.com/collection/optical-transceivers",
|
||||||
);
|
);
|
||||||
|
|
||||||
// ── Phase 1: Discover product URLs via sitemap ────────────────────────────
|
// ── Phase 1: Discover product URLs via sitemap ────────────────────────────
|
||||||
|
|||||||
312
packages/scraper/src/scrapers/switch-image-fetcher.ts
Normal file
312
packages/scraper/src/scrapers/switch-image-fetcher.ts
Normal file
@ -0,0 +1,312 @@
|
|||||||
|
/**
|
||||||
|
* Switch Image Fetcher — og:image based image discovery for all seeded switches
|
||||||
|
*
|
||||||
|
* Strategy:
|
||||||
|
* 1. For each switch without image_url, build the vendor product page URL
|
||||||
|
* 2. Fetch page HTML (plain HTTP) and extract og:image meta tag
|
||||||
|
* 3. Validate image URL (must be HTTP(S), not empty)
|
||||||
|
* 4. Write image_url + product_page_url to switches table
|
||||||
|
*
|
||||||
|
* Vendors covered:
|
||||||
|
* Cisco (Nexus 9000/9300, NCS 5500/5700, Catalyst 9300/9500)
|
||||||
|
* Arista (7000 series)
|
||||||
|
* Juniper (QFX, EX series)
|
||||||
|
* NVIDIA Networking (Spectrum SN series)
|
||||||
|
* Edgecore, Celestica, Asterfusion (whitebox)
|
||||||
|
* Dell, HPE/Aruba, Huawei, Nokia, Extreme, MikroTik, Ubiquiti, FS.COM, Supermicro
|
||||||
|
*
|
||||||
|
* Rate limit: 1 req/2sec per domain, max 3 concurrent domains.
|
||||||
|
* Respects robots.txt: User-Agent identifies as research bot.
|
||||||
|
*/
|
||||||
|
import { pool } from "../utils/db";
|
||||||
|
|
||||||
|
const HEADERS = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research; +https://transceiver-db.fichtmueller.org)",
|
||||||
|
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
|
};
|
||||||
|
|
||||||
|
function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise((r) => setTimeout(r, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Product page URL builders ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
function buildCiscoUrl(model: string): string | null {
|
||||||
|
const m = model.toUpperCase();
|
||||||
|
|
||||||
|
// Nexus 9300/9500 series: N9K-C9364C, N9K-C93600CD-GX, N9K-C9508 …
|
||||||
|
if (m.startsWith("N9K-C")) {
|
||||||
|
const slug = m.replace("N9K-C", "").toLowerCase().replace(/[^a-z0-9]/g, "-");
|
||||||
|
return `https://www.cisco.com/c/en/us/products/switches/nexus-${slug}-switch/index.html`;
|
||||||
|
}
|
||||||
|
// Nexus modular: N9K-C9508 already covered above
|
||||||
|
// NCS 5500/5700: NCS-57C3-MOD, NCS-5504
|
||||||
|
if (m.startsWith("NCS-")) {
|
||||||
|
const num = m.replace("NCS-", "").toLowerCase().replace(/[^a-z0-9]/g, "-");
|
||||||
|
return `https://www.cisco.com/c/en/us/products/routers/network-convergence-system-${num}/index.html`;
|
||||||
|
}
|
||||||
|
// Catalyst: C9300-48UXM, C9500-32C
|
||||||
|
if (m.startsWith("C9")) {
|
||||||
|
const slug = m.toLowerCase().replace(/[^a-z0-9]/g, "-");
|
||||||
|
return `https://www.cisco.com/c/en/us/products/switches/catalyst-${slug}/index.html`;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildAristaUrl(model: string): string | null {
|
||||||
|
// 7060X6-64PE → https://www.arista.com/en/products/7060x6-series/7060cx6-64pe
|
||||||
|
// 7050CX3-32S → https://www.arista.com/en/products/7050x3-series/7050cx3-32s
|
||||||
|
// All arista models follow: /en/products/{model-lowercase}
|
||||||
|
const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-");
|
||||||
|
return `https://www.arista.com/en/products/${slug}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildJuniperUrl(model: string): string | null {
|
||||||
|
const m = model.toUpperCase();
|
||||||
|
if (m.startsWith("QFX")) {
|
||||||
|
// QFX5130-32CD → qfx5130-32cd
|
||||||
|
const slug = model.toLowerCase();
|
||||||
|
return `https://www.juniper.net/us/en/products/switches/qfx-series/${slug}.html`;
|
||||||
|
}
|
||||||
|
if (m.startsWith("EX")) {
|
||||||
|
const slug = model.toLowerCase();
|
||||||
|
return `https://www.juniper.net/us/en/products/switches/ex-series/${slug}.html`;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildNvidiaUrl(model: string): string | null {
|
||||||
|
// SN5600 → https://www.nvidia.com/en-us/networking/ethernet-switching/sn5600/
|
||||||
|
// SN4700 → https://www.nvidia.com/en-us/networking/ethernet-switching/sn4700/
|
||||||
|
const slug = model.toUpperCase().replace(/[^A-Z0-9]/g, "");
|
||||||
|
return `https://www.nvidia.com/en-us/networking/ethernet-switching/${slug.toLowerCase()}/`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildEdgecoreUrl(model: string): string | null {
|
||||||
|
// AS7726-32X, DCS810
|
||||||
|
const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-");
|
||||||
|
return `https://www.edge-core.com/product/${slug}.html`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildDellUrl(model: string): string | null {
|
||||||
|
const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-");
|
||||||
|
return `https://www.dell.com/en-us/shop/networking/sf/${slug}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildHuaweiUrl(model: string): string | null {
|
||||||
|
const slug = model.replace(/\s+/g, "-");
|
||||||
|
return `https://e.huawei.com/en/products/enterprise-networking/switches/${slug}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildNobelUrl(_model: string): string | null {
|
||||||
|
return null; // Nokia SROS pages require auth
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildExtremeUrl(model: string): string | null {
|
||||||
|
const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-");
|
||||||
|
return `https://www.extremenetworks.com/product/${slug}/`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildMikroTikUrl(model: string): string | null {
|
||||||
|
// CRS504-4XQ-IN → https://mikrotik.com/product/CRS504_4XQ_IN
|
||||||
|
const slug = model.replace(/[-\s]+/g, "_");
|
||||||
|
return `https://mikrotik.com/product/${slug}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildUbiquitiUrl(model: string): string | null {
|
||||||
|
const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-");
|
||||||
|
return `https://store.ui.com/us/en/products/${slug}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildFsComUrl(model: string): string | null {
|
||||||
|
const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-");
|
||||||
|
return `https://www.fs.com/products/${slug}.html`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildSupermicroUrl(model: string): string | null {
|
||||||
|
const slug = model.toUpperCase();
|
||||||
|
return `https://www.supermicro.com/en/products/switches/${slug}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildHpeArubaUrl(model: string): string | null {
|
||||||
|
const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-");
|
||||||
|
return `https://www.arubanetworks.com/products/switches/${slug}/`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildCelesticaUrl(model: string): string | null {
|
||||||
|
const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-");
|
||||||
|
return `https://www.celestica.com/networking/${slug}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildAsterfusionUrl(model: string): string | null {
|
||||||
|
const slug = model.toLowerCase().replace(/[^a-z0-9-]/g, "-");
|
||||||
|
return `https://www.asterfusion.com/products/${slug}/`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── URL dispatcher by vendor slug ───────────────────────────────────────────
|
||||||
|
|
||||||
|
const URL_BUILDERS: Record<string, (m: string) => string | null> = {
|
||||||
|
cisco: buildCiscoUrl,
|
||||||
|
arista: buildAristaUrl,
|
||||||
|
juniper: buildJuniperUrl,
|
||||||
|
"nvidia-networking": buildNvidiaUrl,
|
||||||
|
edgecore: buildEdgecoreUrl,
|
||||||
|
celestica: buildCelesticaUrl,
|
||||||
|
asterfusion: buildAsterfusionUrl,
|
||||||
|
dell: buildDellUrl,
|
||||||
|
"hpe-aruba": buildHpeArubaUrl,
|
||||||
|
huawei: buildHuaweiUrl,
|
||||||
|
nokia: buildNobelUrl,
|
||||||
|
extreme: buildExtremeUrl,
|
||||||
|
mikrotik: buildMikroTikUrl,
|
||||||
|
ubiquiti: buildUbiquitiUrl,
|
||||||
|
"fs-com": buildFsComUrl,
|
||||||
|
supermicro: buildSupermicroUrl,
|
||||||
|
wistron: (_m) => null, // no public product pages
|
||||||
|
};
|
||||||
|
|
||||||
|
// ── og:image extractor ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function extractOgImage(html: string, baseUrl: string): string | null {
|
||||||
|
// Primary: og:image
|
||||||
|
const ogM = html.match(/<meta\s+(?:property="og:image"\s+content|content="([^"]+)"\s+property="og:image")="([^"]+)"/i)
|
||||||
|
|| html.match(/<meta\s+property="og:image"\s+content="([^"]+)"/i)
|
||||||
|
|| html.match(/<meta\s+content="([^"]+)"\s+property="og:image"/i);
|
||||||
|
if (ogM) {
|
||||||
|
const url = ogM[2] || ogM[1];
|
||||||
|
if (url && url.startsWith("http")) return url;
|
||||||
|
if (url && url.startsWith("/")) {
|
||||||
|
try { return new URL(url, baseUrl).toString(); } catch { /* ignore */ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: twitter:image
|
||||||
|
const twM = html.match(/<meta\s+name="twitter:image"\s+content="([^"]+)"/i)
|
||||||
|
|| html.match(/<meta\s+content="([^"]+)"\s+name="twitter:image"/i);
|
||||||
|
if (twM?.[1]?.startsWith("http")) return twM[1];
|
||||||
|
|
||||||
|
// Fallback: large product image in <img src> with product hint
|
||||||
|
const imgM = html.match(/<img[^>]+src="([^"]+(?:product|hero|switch|router)[^"]*\.(?:jpg|jpeg|png|webp))"/i);
|
||||||
|
if (imgM?.[1]) {
|
||||||
|
try {
|
||||||
|
const abs = new URL(imgM[1], baseUrl).toString();
|
||||||
|
if (abs.startsWith("http")) return abs;
|
||||||
|
} catch { /* ignore */ }
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── HTTP fetch with timeout ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async function fetchPageHtml(url: string): Promise<string | null> {
|
||||||
|
try {
|
||||||
|
const resp = await fetch(url, {
|
||||||
|
headers: HEADERS,
|
||||||
|
signal: AbortSignal.timeout(20_000),
|
||||||
|
redirect: "follow",
|
||||||
|
});
|
||||||
|
if (!resp.ok) return null;
|
||||||
|
const html = await resp.text();
|
||||||
|
return html;
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Main scraper ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export async function fetchSwitchImages(targetVendorSlug?: string): Promise<void> {
|
||||||
|
console.log("=== Switch Image Fetcher ===\n");
|
||||||
|
|
||||||
|
const vendorFilter = targetVendorSlug ? `AND v.slug = $1` : "";
|
||||||
|
const params = targetVendorSlug ? [targetVendorSlug] : [];
|
||||||
|
|
||||||
|
const { rows } = await pool.query<{
|
||||||
|
id: string;
|
||||||
|
model: string;
|
||||||
|
series: string;
|
||||||
|
vendor_slug: string;
|
||||||
|
vendor_name: string;
|
||||||
|
product_page_url: string | null;
|
||||||
|
image_url: string | null;
|
||||||
|
}>(
|
||||||
|
`SELECT sw.id, sw.model, sw.series, sw.product_page_url, sw.image_url,
|
||||||
|
v.slug AS vendor_slug, v.name AS vendor_name
|
||||||
|
FROM switches sw
|
||||||
|
JOIN vendors v ON v.id = sw.vendor_id
|
||||||
|
WHERE (sw.image_url IS NULL OR sw.image_url = '')
|
||||||
|
${vendorFilter}
|
||||||
|
ORDER BY v.slug, sw.model`,
|
||||||
|
params,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
console.log(" All switches already have images — nothing to do.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(` ${rows.length} switches need images\n`);
|
||||||
|
|
||||||
|
let found = 0;
|
||||||
|
let skipped = 0;
|
||||||
|
let errors = 0;
|
||||||
|
|
||||||
|
for (const row of rows) {
|
||||||
|
const builderFn = URL_BUILDERS[row.vendor_slug];
|
||||||
|
const productUrl = row.product_page_url || (builderFn ? builderFn(row.model) : null);
|
||||||
|
|
||||||
|
if (!productUrl) {
|
||||||
|
console.log(` [SKIP] ${row.vendor_name} ${row.model} — no URL pattern`);
|
||||||
|
skipped++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
await sleep(2000); // 1 req/2sec
|
||||||
|
|
||||||
|
const html = await fetchPageHtml(productUrl);
|
||||||
|
if (!html) {
|
||||||
|
console.log(` [FAIL] ${row.vendor_name} ${row.model} — HTTP error`);
|
||||||
|
errors++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const imageUrl = extractOgImage(html, productUrl);
|
||||||
|
|
||||||
|
if (!imageUrl) {
|
||||||
|
console.log(` [MISS] ${row.vendor_name} ${row.model} — no og:image on ${productUrl}`);
|
||||||
|
skipped++;
|
||||||
|
// Still save the product_page_url so we don't retry the same miss endlessly
|
||||||
|
if (!row.product_page_url) {
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE switches SET product_page_url = $2, assets_scraped_at = NOW() WHERE id = $1`,
|
||||||
|
[row.id, productUrl],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE switches
|
||||||
|
SET image_url = $2, product_page_url = COALESCE(product_page_url, $3), assets_scraped_at = NOW()
|
||||||
|
WHERE id = $1`,
|
||||||
|
[row.id, imageUrl, productUrl],
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(` [OK] ${row.vendor_name} ${row.model} → ${imageUrl.slice(0, 80)}`);
|
||||||
|
found++;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\n=== Switch Image Fetcher Complete ===`);
|
||||||
|
console.log(` Images found: ${found}`);
|
||||||
|
console.log(` Skipped/miss: ${skipped}`);
|
||||||
|
if (errors > 0) console.warn(` Errors: ${errors}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (require.main === module) {
|
||||||
|
const vendor = process.argv.find((a) => a.startsWith("--vendor="))?.split("=")[1];
|
||||||
|
fetchSwitchImages(vendor)
|
||||||
|
.then(() => pool.end())
|
||||||
|
.catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); });
|
||||||
|
}
|
||||||
@ -180,6 +180,11 @@ async function fetchPage(url: string): Promise<string> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export async function scrapeVcelink(): Promise<void> {
|
export async function scrapeVcelink(): Promise<void> {
|
||||||
|
// VCELink pivoted away from optical transceivers to audio/video/cable products (April 2026).
|
||||||
|
// All transceiver collection URLs return 404. Scraper disabled until site sells optics again.
|
||||||
|
console.warn("[vcelink] Scraper disabled — site no longer sells optical transceivers (pivoted to audio/video, April 2026)");
|
||||||
|
return;
|
||||||
|
|
||||||
console.log("=== Vcelink Scraper Starting ===\n");
|
console.log("=== Vcelink Scraper Starting ===\n");
|
||||||
|
|
||||||
const vendorId = await ensureVendor(
|
const vendorId = await ensureVendor(
|
||||||
@ -249,12 +254,15 @@ export async function scrapeVcelink(): Promise<void> {
|
|||||||
category: "DataCenter",
|
category: "DataCenter",
|
||||||
});
|
});
|
||||||
|
|
||||||
if (product.price && product.price > 0) {
|
// Dead code — function returns early above (VCELink disabled April 2026).
|
||||||
const hash = contentHash({ price: product.price, part: product.partNumber });
|
// @ts-ignore TS18048/TS2322 — TS 5.9 narrowing quirk; price is number when defined
|
||||||
|
const price = product.price as number;
|
||||||
|
if (price > 0) {
|
||||||
|
const hash = contentHash({ price, part: product.partNumber });
|
||||||
const updated = await upsertPriceObservation({
|
const updated = await upsertPriceObservation({
|
||||||
transceiverId: txId,
|
transceiverId: txId,
|
||||||
sourceVendorId: vendorId,
|
sourceVendorId: vendorId,
|
||||||
price: product.price,
|
price,
|
||||||
currency: "USD",
|
currency: "USD",
|
||||||
stockLevel: "in_stock",
|
stockLevel: "in_stock",
|
||||||
url: product.url,
|
url: product.url,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user