Compare commits
No commits in common. "eb4db9ca76090a137aa2c02cba81f21516e04834" and "39ee5e86fa95fb816fa4098d067677ebe7c6a856" have entirely different histories.
eb4db9ca76
...
39ee5e86fa
@ -747,7 +747,7 @@
|
||||
<div class="tab" data-tab="news">News</div>
|
||||
<div class="tab" data-tab="finder">Finder</div>
|
||||
<div class="tab" data-tab="blog">Blog Engine</div>
|
||||
<div class="tab" data-tab="procurement">Procurement Intelligence</div>
|
||||
<div class="tab" data-tab="procurement">Procurement Intel</div>
|
||||
</div>
|
||||
|
||||
<div class="main">
|
||||
@ -992,7 +992,7 @@
|
||||
<div style="display:flex;gap:0.5rem;margin-bottom:1.25rem;flex-wrap:wrap;align-items:center">
|
||||
<button onclick="showProcSection('signals')" id="proc-btn-signals" class="proc-btn proc-btn-active">Reorder Signals</button>
|
||||
<button onclick="showProcSection('abc')" id="proc-btn-abc" class="proc-btn">ABC Classes</button>
|
||||
<button onclick="showProcSection('market')" id="proc-btn-market" class="proc-btn">Market Intelligence</button>
|
||||
<button onclick="showProcSection('market')" id="proc-btn-market" class="proc-btn">Market Intel</button>
|
||||
<button onclick="showProcSection('lifecycle')" id="proc-btn-lifecycle" class="proc-btn">Lifecycle Events</button>
|
||||
<div style="flex:1"></div>
|
||||
<button onclick="loadProcurement()" style="background:var(--surface2);border:1px solid var(--border);padding:4px 12px;border-radius:6px;cursor:pointer;font-size:0.75rem;color:var(--text)">↻ Refresh</button>
|
||||
@ -1023,14 +1023,14 @@
|
||||
<div class="card" style="overflow-x:auto">
|
||||
<table style="width:100%;border-collapse:collapse;font-size:0.8rem" id="abc-table">
|
||||
<thead><tr style="border-bottom:2px solid var(--border);color:var(--text-dim);font-size:0.7rem;font-weight:700;text-transform:uppercase">
|
||||
<th class="tip" data-tip="ABC inventory classification: A = high turnover / high value (top 20% products, ~80% of revenue). B = medium. C = low turnover / low value." style="text-align:left;padding:8px 6px">Class</th>
|
||||
<th class="tip" data-tip="Transceiver product name, part number and vendor." style="text-align:left;padding:8px 6px">Product</th>
|
||||
<th class="tip" data-tip="Physical form factor: SFP, SFP+, QSFP28, QSFP-DD, OSFP, CFP, etc. Determines physical slot compatibility in switches." style="text-align:left;padding:8px 6px">Form Factor</th>
|
||||
<th class="tip" data-tip="Composite demand score (0–100). Combines: price observation frequency, compatibility entry count, vendor count, hype cycle phase, and recent pricing activity." style="text-align:right;padding:8px 6px">Demand Score</th>
|
||||
<th class="tip" data-tip="Number of compatibility entries — how many switch models support this transceiver. Higher = broader market reach and easier to sell." style="text-align:right;padding:8px 6px">Compat.</th>
|
||||
<th class="tip" data-tip="Number of vendors offering this transceiver. More vendors = stronger competition = typically lower prices and better availability." style="text-align:right;padding:8px 6px">Vendors</th>
|
||||
<th class="tip" data-tip="Supply chain risk level. High = single-source or constrained supply. Medium = some alternatives exist. Low = widely available from multiple sources." style="text-align:left;padding:8px 6px">Supply Risk</th>
|
||||
<th class="tip" data-tip="Procurement recommendation: 🔴 Buy Now = stock up, prices rising or supply tightening. 🟡 Wait = prices expected to drop. 🟢 Hold = stable, no action needed. 🔵 Monitor = watch for changes." style="text-align:left;padding:8px 6px">Signal</th>
|
||||
<th style="text-align:left;padding:8px 6px">Class</th>
|
||||
<th style="text-align:left;padding:8px 6px">Product</th>
|
||||
<th style="text-align:left;padding:8px 6px">Form Factor</th>
|
||||
<th style="text-align:right;padding:8px 6px">Demand Score</th>
|
||||
<th style="text-align:right;padding:8px 6px">Compat.</th>
|
||||
<th style="text-align:right;padding:8px 6px">Vendors</th>
|
||||
<th style="text-align:left;padding:8px 6px">Supply Risk</th>
|
||||
<th style="text-align:left;padding:8px 6px">Signal</th>
|
||||
</tr></thead>
|
||||
<tbody id="abc-tbody"><tr><td colspan="8" style="padding:1rem;color:var(--text-dim)">Loading...</td></tr></tbody>
|
||||
</table>
|
||||
@ -3239,14 +3239,12 @@ function renderSignals(filterSig) {
|
||||
}
|
||||
var signalIcon = { buy_now:'🔴', wait:'🟡', hold:'🟢', monitor:'🔵' };
|
||||
var signalLabel = { buy_now:'Buy Now', wait:'Wait', hold:'Hold', monitor:'Monitor' };
|
||||
var demoBadgeHtml = '<span title="Demo data — inserted as sample data, not real market intelligence." style="font-size:0.6rem;padding:1px 5px;border-radius:3px;background:#f0e4ff;color:#7c3aed;font-weight:700;margin-left:4px;vertical-align:middle">Demo Data</span>';
|
||||
container.innerHTML = data.map(function(r) {
|
||||
var reasons = [];
|
||||
try { reasons = JSON.parse(r.reasons || '[]'); } catch(e) {}
|
||||
var sigClass = 'signal-' + (r.signal || 'monitor').replace('_','-');
|
||||
var badgeClass = 'sig-badge-' + (r.signal || 'monitor').replace('_now','').replace('_','');
|
||||
var abcTitles = { A:'Class A — high turnover product, top 20% by value. Prioritize stock availability.', B:'Class B — medium turnover. Standard replenishment cycle.', C:'Class C — low turnover. Order on demand only.' };
|
||||
var abcBadge = r.abc_class ? '<span class="abc-' + r.abc_class.toLowerCase() + '" title="' + (abcTitles[r.abc_class] || '') + '">' + r.abc_class + '</span>' : '';
|
||||
var abcBadge = r.abc_class ? '<span class="abc-' + r.abc_class.toLowerCase() + '">' + r.abc_class + '</span>' : '';
|
||||
var strengthPct = Math.round((r.signal_strength || 0) * 100);
|
||||
var productName = r.standard_name || r.part_number || r.slug || '—';
|
||||
var imgHtml = '';
|
||||
@ -3257,24 +3255,24 @@ function renderSignals(filterSig) {
|
||||
+ '<div style="display:flex;align-items:flex-start;gap:0.25rem;margin-bottom:0.5rem">'
|
||||
+ imgHtml
|
||||
+ '<div style="flex:1;min-width:0">'
|
||||
+ '<div style="font-weight:700;font-size:0.82rem;white-space:nowrap;overflow:hidden;text-overflow:ellipsis">' + esc(productName) + (r.is_demo ? demoBadgeHtml : '') + '</div>'
|
||||
+ '<div style="font-weight:700;font-size:0.82rem;white-space:nowrap;overflow:hidden;text-overflow:ellipsis">' + esc(productName) + '</div>'
|
||||
+ '<div style="font-size:0.7rem;color:var(--text-dim)">' + esc(r.form_factor || '') + (r.speed_gbps ? ' · ' + r.speed_gbps + 'G' : '') + (r.vendor_name ? ' · ' + esc(r.vendor_name) : '') + '</div>'
|
||||
+ '</div>'
|
||||
+ '</div>'
|
||||
+ '<div style="display:flex;gap:0.4rem;align-items:center;margin-bottom:0.6rem;flex-wrap:wrap">'
|
||||
+ '<span class="intel-badge ' + badgeClass + '" title="Procurement signal: Buy Now = act immediately (supply tightening or price rising). Wait = better prices expected. Hold = no action needed. Monitor = track closely.">' + (signalIcon[r.signal] || '') + ' ' + (signalLabel[r.signal] || r.signal) + '</span>'
|
||||
+ '<span class="intel-badge ' + badgeClass + '">' + (signalIcon[r.signal] || '') + ' ' + (signalLabel[r.signal] || r.signal) + '</span>'
|
||||
+ abcBadge
|
||||
+ (r.supply_risk ? '<span title="Supply chain risk: low = widely available, medium = some constraints, high = single-source or shortage risk" style="font-size:0.65rem;padding:2px 6px;border-radius:3px;background:var(--surface2);color:var(--text-dim)">' + esc(r.supply_risk) + ' risk</span>' : '')
|
||||
+ (r.supply_risk ? '<span style="font-size:0.65rem;padding:2px 6px;border-radius:3px;background:var(--surface2);color:var(--text-dim)">' + esc(r.supply_risk) + ' risk</span>' : '')
|
||||
+ '</div>'
|
||||
+ '<div style="font-size:0.7rem;color:var(--text-dim);margin-bottom:0.5rem">'
|
||||
+ (reasons.length ? reasons.map(function(r2) { return '→ ' + esc(r2); }).join('<br>') : 'Insufficient data')
|
||||
+ '</div>'
|
||||
+ '<div style="display:flex;gap:1rem;font-size:0.7rem;color:var(--text-dim)">'
|
||||
+ (r.stock_trend ? '<span title="Stock trend based on price observation frequency and vendor listing changes">Stock: <b style="color:var(--text)">' + r.stock_trend + '</b></span>' : '')
|
||||
+ (r.price_trend ? '<span title="Price trend over last 30 days: rising/falling/stable">Price: <b style="color:var(--text)">' + r.price_trend + '</b></span>' : '')
|
||||
+ (r.lead_time_weeks ? '<span title="Estimated supplier lead time in weeks until delivery">Lead: <b style="color:var(--text)">' + r.lead_time_weeks + 'w</b></span>' : '')
|
||||
+ (r.stock_trend ? '<span>Stock: <b style="color:var(--text)">' + r.stock_trend + '</b></span>' : '')
|
||||
+ (r.price_trend ? '<span>Price: <b style="color:var(--text)">' + r.price_trend + '</b></span>' : '')
|
||||
+ (r.lead_time_weeks ? '<span>Lead: <b style="color:var(--text)">' + r.lead_time_weeks + 'w</b></span>' : '')
|
||||
+ '</div>'
|
||||
+ '<div title="Signal strength (0–100%): confidence in the procurement recommendation, based on data volume, price history consistency, and compatibility coverage." style="margin-top:0.6rem;background:var(--surface2);border-radius:3px;height:4px">'
|
||||
+ '<div style="margin-top:0.6rem;background:var(--surface2);border-radius:3px;height:4px">'
|
||||
+ '<div style="height:4px;border-radius:3px;width:' + strengthPct + '%;background:var(--accent)"></div>'
|
||||
+ '</div>'
|
||||
+ '<div style="font-size:0.65rem;color:var(--text-dim);text-align:right;margin-top:2px">Signal strength: ' + strengthPct + '%</div>'
|
||||
@ -3304,7 +3302,7 @@ function renderAbcTable(filterCls) {
|
||||
var abcEl = '<span class="abc-' + (r.abc_class || 'c').toLowerCase() + '">' + (r.abc_class || '—') + '</span>';
|
||||
return '<tr style="border-bottom:1px solid var(--border)">'
|
||||
+ '<td style="padding:7px 6px">' + abcEl + '</td>'
|
||||
+ '<td style="padding:7px 6px"><div style="font-weight:600">' + esc(r.standard_name || r.part_number || '—') + (r.is_demo ? '<span title="Demo data — sample entry, not real market data." style="font-size:0.6rem;padding:1px 5px;border-radius:3px;background:#f0e4ff;color:#7c3aed;font-weight:700;margin-left:4px;vertical-align:middle">Demo</span>' : '') + '</div><div style="font-size:0.68rem;color:var(--text-dim)">' + esc(r.vendor_name || '') + '</div></td>'
|
||||
+ '<td style="padding:7px 6px"><div style="font-weight:600">' + esc(r.standard_name || r.part_number || '—') + '</div><div style="font-size:0.68rem;color:var(--text-dim)">' + esc(r.vendor_name || '') + '</div></td>'
|
||||
+ '<td style="padding:7px 6px;font-family:var(--mono);font-size:0.75rem">' + esc(r.form_factor || '—') + '</td>'
|
||||
+ '<td style="padding:7px 6px;text-align:right;font-family:var(--mono)">' + (r.demand_score ? parseFloat(r.demand_score).toFixed(0) : '—') + '</td>'
|
||||
+ '<td style="padding:7px 6px;text-align:right;font-family:var(--mono)">' + (r.compat_count || 0) + '</td>'
|
||||
@ -3328,35 +3326,25 @@ async function loadProcMarketIntel() {
|
||||
capex_cycle:'💰', trade_show:'🎪', standard_ratified:'📋',
|
||||
standard_draft:'📝', distributor_lead_time:'🚚', supply_chain:'🏭', tender:'📑'
|
||||
};
|
||||
var typeDesc = {
|
||||
capex_cycle:'Capital expenditure cycle event — customer budget release, fiscal year start, major infrastructure spend',
|
||||
trade_show:'Trade show or conference (OFC, ECOC, MWC, IEEE) — often signals new product launches and technology shifts',
|
||||
standard_ratified:'IEEE/MSA standard officially ratified — technology is production-ready, adoption typically accelerates',
|
||||
standard_draft:'Standard in draft phase — technology is emerging, early adopters phase',
|
||||
distributor_lead_time:'Distributor lead time change — indicates supply chain pressure or inventory build-up',
|
||||
supply_chain:'Supply chain event — factory capacity, shortage, logistics disruption',
|
||||
tender:'Public or enterprise tender/RFP published — indicates near-term procurement demand'
|
||||
};
|
||||
container.innerHTML = items.map(function(item) {
|
||||
var sig = item.buy_signal_implication || 'none';
|
||||
var badgeClass = 'intel-' + sig.replace('_now','').replace('_','');
|
||||
var sigLabel = { buy_now:'🔴 Buy Now', wait:'🟡 Wait', hold:'🟢 Hold', monitor:'🔵 Monitor', none:'—' };
|
||||
var sigDesc = { buy_now:'Buy Now: this market event suggests immediate procurement — prices or availability will worsen', wait:'Wait: conditions suggest holding off — better pricing or availability expected soon', hold:'Hold: market stable, no urgency to act', monitor:'Monitor: track this development, not yet actionable', none:'No specific procurement implication' };
|
||||
var techs = (item.technologies || []).map(function(t) {
|
||||
return '<span title="Technology segment this intelligence applies to" style="font-size:0.65rem;padding:1px 6px;border-radius:3px;background:var(--surface2);color:var(--text-dim)">' + esc(t) + '</span>';
|
||||
return '<span style="font-size:0.65rem;padding:1px 6px;border-radius:3px;background:var(--surface2);color:var(--text-dim)">' + esc(t) + '</span>';
|
||||
}).join(' ');
|
||||
return '<div class="intel-card">'
|
||||
+ '<div style="display:flex;gap:0.5rem;align-items:flex-start;margin-bottom:0.4rem">'
|
||||
+ '<span title="' + esc(typeDesc[item.intel_type] || item.intel_type || '') + '" style="font-size:1.2rem;cursor:default">' + (typeIcon[item.intel_type] || '📊') + '</span>'
|
||||
+ '<span style="font-size:1.2rem">' + (typeIcon[item.intel_type] || '📊') + '</span>'
|
||||
+ '<div style="flex:1">'
|
||||
+ '<span class="intel-badge ' + badgeClass + '" title="' + esc(sigDesc[sig] || sig) + '">' + (sigLabel[sig] || sig) + '</span>'
|
||||
+ '<div style="font-weight:700;font-size:0.82rem;line-height:1.3;margin-top:0.2rem">' + esc(item.title) + (item.is_demo ? '<span title="Demo data — sample entry, not real market intelligence." style="font-size:0.6rem;padding:1px 5px;border-radius:3px;background:#f0e4ff;color:#7c3aed;font-weight:700;margin-left:4px;vertical-align:middle">Demo Data</span>' : '') + '</div>'
|
||||
+ '<span class="intel-badge ' + badgeClass + '">' + (sigLabel[sig] || sig) + '</span>'
|
||||
+ '<div style="font-weight:700;font-size:0.82rem;line-height:1.3;margin-top:0.2rem">' + esc(item.title) + '</div>'
|
||||
+ '</div></div>'
|
||||
+ '<div style="font-size:0.75rem;color:var(--text-dim);margin-bottom:0.6rem;line-height:1.5">' + esc(item.summary || '') + '</div>'
|
||||
+ (techs ? '<div style="display:flex;gap:0.3rem;flex-wrap:wrap;margin-bottom:0.5rem">' + techs + '</div>' : '')
|
||||
+ '<div style="display:flex;justify-content:space-between;font-size:0.68rem;color:var(--text-dim)">'
|
||||
+ '<span title="Intelligence source">' + esc(item.source_name) + '</span>'
|
||||
+ (item.impact_horizon_months ? '<span title="Estimated months until this event has measurable market impact on pricing or availability">Impact: ~' + item.impact_horizon_months + ' months</span>' : '')
|
||||
+ '<span>' + esc(item.source_name) + '</span>'
|
||||
+ (item.impact_horizon_months ? '<span>Impact: ~' + item.impact_horizon_months + ' months</span>' : '')
|
||||
+ '</div>'
|
||||
+ '</div>';
|
||||
}).join('');
|
||||
@ -3379,36 +3367,23 @@ async function loadProcLifecycle() {
|
||||
standard_draft:'📝', capex_peak:'💰', trade_show:'🎪',
|
||||
supply_risk:'⚠️', tender:'📑', price_floor:'📉'
|
||||
};
|
||||
var typeDesc = {
|
||||
eol_announced:'End-of-Life announced — vendor has confirmed a product or standard will be discontinued. Start planning migration.',
|
||||
eol_effective:'End-of-Life effective — product is no longer manufactured or supported. Immediately find replacements.',
|
||||
standard_ratified:'Standard officially ratified by IEEE or MSA — technology is mature, safe to deploy at scale.',
|
||||
standard_draft:'Standard in draft — technology is emerging. Early adopters phase, compatibility not yet guaranteed.',
|
||||
capex_peak:'Capital expenditure peak — major procurement wave expected. May affect availability and pricing.',
|
||||
trade_show:'Trade show event (OFC, ECOC, MWC) — often triggers new product launches and price adjustments.',
|
||||
supply_risk:'Supply chain risk identified — potential shortage, capacity constraint, or geopolitical factor.',
|
||||
tender:'Public or enterprise tender published — indicates confirmed near-term demand from large buyer.',
|
||||
price_floor:'Price floor reached — technology has hit bottom pricing. Unlikely to drop further; good time to stock up.'
|
||||
};
|
||||
var impactColor = { critical:'#c1121f', high:'#c1121f', medium:'var(--yellow)', low:'var(--green)' };
|
||||
var impactDesc = { critical:'Critical impact — immediate action required', high:'High impact — plan response within weeks', medium:'Medium impact — monitor and prepare response', low:'Low impact — informational' };
|
||||
var sigLabel = { buy_now:'🔴 Buy Now', wait:'🟡 Wait', hold:'🟢 Hold', monitor:'🔵 Monitor' };
|
||||
var sigDesc = { buy_now:'Buy Now: this event signals immediate procurement urgency', wait:'Wait: better conditions expected after this event resolves', hold:'Hold: no change to current procurement strategy', monitor:'Monitor: track how this event develops before acting' };
|
||||
container.innerHTML = items.map(function(item) {
|
||||
var ic = impactColor[item.impact_level] || 'var(--text-dim)';
|
||||
var productInfo = item.part_number ? esc(item.part_number) + (item.form_factor ? ' · ' + esc(item.form_factor) : '') : '';
|
||||
var dateStr = item.effective_date ? new Date(item.effective_date).toLocaleDateString('de-DE') : '';
|
||||
return '<div class="intel-card" style="border-left:3px solid ' + ic + '" title="' + esc(impactDesc[item.impact_level] || '') + '">'
|
||||
return '<div class="intel-card" style="border-left:3px solid ' + ic + '">'
|
||||
+ '<div style="display:flex;gap:0.5rem;align-items:flex-start;margin-bottom:0.4rem">'
|
||||
+ '<span title="' + esc(typeDesc[item.event_type] || item.event_type || '') + '" style="font-size:1.2rem;cursor:default">' + (typeIcon[item.event_type] || '📌') + '</span>'
|
||||
+ '<span style="font-size:1.2rem">' + (typeIcon[item.event_type] || '📌') + '</span>'
|
||||
+ '<div style="flex:1">'
|
||||
+ (item.buy_signal ? '<span class="intel-badge intel-' + item.buy_signal.replace('_now','').replace('_','') + '" title="' + esc(sigDesc[item.buy_signal] || item.buy_signal) + '">' + (sigLabel[item.buy_signal] || item.buy_signal) + '</span>' : '')
|
||||
+ '<div style="font-weight:700;font-size:0.82rem;line-height:1.3;margin-top:0.2rem">' + esc(item.title) + (item.is_demo ? '<span title="Demo data — sample entry, not a real lifecycle event." style="font-size:0.6rem;padding:1px 5px;border-radius:3px;background:#f0e4ff;color:#7c3aed;font-weight:700;margin-left:4px;vertical-align:middle">Demo Data</span>' : '') + '</div>'
|
||||
+ (item.buy_signal ? '<span class="intel-badge intel-' + item.buy_signal.replace('_now','').replace('_','') + '">' + (sigLabel[item.buy_signal] || item.buy_signal) + '</span>' : '')
|
||||
+ '<div style="font-weight:700;font-size:0.82rem;line-height:1.3;margin-top:0.2rem">' + esc(item.title) + '</div>'
|
||||
+ '</div></div>'
|
||||
+ (item.description ? '<div style="font-size:0.75rem;color:var(--text-dim);margin-bottom:0.5rem;line-height:1.5">' + esc(item.description.substring(0, 200)) + (item.description.length > 200 ? '…' : '') + '</div>' : '')
|
||||
+ '<div style="display:flex;justify-content:space-between;font-size:0.68rem;color:var(--text-dim)">'
|
||||
+ '<span>' + esc(item.source_name || '') + (productInfo ? ' · ' + productInfo : '') + '</span>'
|
||||
+ (dateStr ? '<span title="Effective date of this lifecycle event" style="color:' + ic + ';font-weight:600">' + dateStr + '</span>' : '')
|
||||
+ (dateStr ? '<span style="color:' + ic + ';font-weight:600">' + dateStr + '</span>' : '')
|
||||
+ '</div>'
|
||||
+ '</div>';
|
||||
}).join('');
|
||||
|
||||
@ -1,22 +1,28 @@
|
||||
/**
|
||||
* pg-boss Job Scheduler — 24/7 Continuous Scraping
|
||||
* pg-boss Job Scheduler
|
||||
*
|
||||
* ARCHITECTURE:
|
||||
* - Erik (VPS, .82) : Playwright-heavy scrapers (FS.com, 10Gtek, ATGBICS, ProLabs)
|
||||
* + all compatibility + eBay + compute + NAS sync
|
||||
* - Raspberry Pi Fleet : Lightweight fetch/cheerio scrapers run continuously all day
|
||||
* (BlueOptics, Fiber24, T&S Com, Fluxlight, GBICs, Optcore,
|
||||
* Champion ONE, SFPCables, SmartOptics, HUBER+SUHNER, etc.)
|
||||
* NIGHTLY WINDOW 00:00–08:00 — all scrapers run every night
|
||||
* Staggered to avoid parallel overload and respect rate limits.
|
||||
*
|
||||
* SCHEDULE PHILOSOPHY:
|
||||
* - Playwright scrapers: every 8h (resource-heavy, VPS only)
|
||||
* - Fetch/Cheerio scrapers: every 4h (lightweight, Pi-friendly)
|
||||
* - Catalog scrapers (Flexoptix): every 2h (fast GraphQL, primary price source)
|
||||
* - Compatibility matrices: every 12h (rarely change)
|
||||
* - eBay enrichment: every 6h
|
||||
* - Intelligence/community: every 6h
|
||||
* - Compute jobs: after each pricing wave
|
||||
* - NAS sync: nightly at 07:55
|
||||
* 00:00 eBay transceiver pricing (new/refurb condition prices)
|
||||
* 00:30 eBay switch enrichment (features, descriptions, images)
|
||||
* 01:00 FS.com pricing (JS-rendered, needs Playwright)
|
||||
* 01:45 Optcore pricing
|
||||
* 02:15 10Gtek pricing (Playwright)
|
||||
* 02:45 ATGBICS pricing (Shopify/Playwright, GBP)
|
||||
* 03:15 ProLabs pricing (Playwright)
|
||||
* 03:45 Flexoptix catalog (fast fetch — primary source)
|
||||
* 04:15 Flexoptix vendor list
|
||||
* 04:30 Market intelligence (OFC/ECOC/IEEE/Farnell/TED)
|
||||
* 05:00 Community issues (Reddit/forums/vendor KB)
|
||||
* 05:30 Datasheet + manual link discovery
|
||||
* 06:00 Cisco compatibility matrices
|
||||
* 06:15 News aggregation (trade press)
|
||||
* 06:30 FAQ / knowledge base
|
||||
* 07:00 Docs check (weekly: full doc scrape)
|
||||
* 07:15 ABC classification recompute
|
||||
* 07:30 Reorder signals recompute
|
||||
* 07:45 NAS sync (export JSON data + weekly pg_dump to Fearghas)
|
||||
*/
|
||||
import PgBoss from "pg-boss";
|
||||
import { config } from "dotenv";
|
||||
@ -26,16 +32,14 @@ import { rmSync, mkdirSync } from "fs";
|
||||
/** Run a scraper with an isolated Crawlee storage directory to prevent queue collisions */
|
||||
async function withIsolatedStorage(name: string, fn: () => Promise<void>): Promise<void> {
|
||||
const dir = join(__dirname, "..", "..", "..", `storage-${name}`);
|
||||
// Pre-create Crawlee's internal subdirectory tree to avoid ENOENT races
|
||||
mkdirSync(join(dir, "request_queues", "default"), { recursive: true });
|
||||
mkdirSync(join(dir, "datasets", "default"), { recursive: true });
|
||||
mkdirSync(join(dir, "key_value_stores", "default"), { recursive: true });
|
||||
mkdirSync(dir, { recursive: true });
|
||||
const prev = process.env.CRAWLEE_STORAGE_DIR;
|
||||
process.env.CRAWLEE_STORAGE_DIR = dir;
|
||||
try {
|
||||
await fn();
|
||||
} finally {
|
||||
process.env.CRAWLEE_STORAGE_DIR = prev ?? "";
|
||||
// Clean up after successful run
|
||||
try { rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
|
||||
}
|
||||
}
|
||||
@ -50,7 +54,7 @@ export async function createScheduler(): Promise<PgBoss> {
|
||||
retryLimit: 3,
|
||||
retryDelay: 30,
|
||||
retryBackoff: true,
|
||||
expireInSeconds: 300,
|
||||
expireInSeconds: 300, // 5 min timeout per job
|
||||
monitorStateIntervalSeconds: 30,
|
||||
});
|
||||
|
||||
@ -63,424 +67,247 @@ export async function createScheduler(): Promise<PgBoss> {
|
||||
}
|
||||
|
||||
export async function registerSchedules(boss: PgBoss): Promise<void> {
|
||||
// pg-boss v10: create queues before scheduling
|
||||
const queues = [
|
||||
// ── Playwright scrapers (Erik, every 8h) ───────────────────────────
|
||||
"scrape:pricing:fs",
|
||||
"scrape:pricing:optcore",
|
||||
"scrape:pricing:10gtek",
|
||||
"scrape:pricing:atgbics",
|
||||
"scrape:pricing:prolabs",
|
||||
// ── Fetch/Cheerio scrapers (Pi-friendly, every 4h) ─────────────────
|
||||
"scrape:pricing:fluxlight",
|
||||
"scrape:pricing:gbics",
|
||||
"scrape:pricing:optcore",
|
||||
"scrape:pricing:champion-one",
|
||||
"scrape:pricing:sfpcables",
|
||||
"scrape:pricing:blueoptics",
|
||||
"scrape:pricing:fiber24",
|
||||
"scrape:pricing:tscom",
|
||||
"scrape:pricing:skylane",
|
||||
"scrape:pricing:ascentoptics",
|
||||
"scrape:pricing:gaotek",
|
||||
// ── Catalog scrapers (every 2h) ────────────────────────────────────
|
||||
"scrape:pricing:flexoptix",
|
||||
// ── Manufacturer catalogs (every 8h, no prices) ────────────────────
|
||||
"scrape:catalog:smartoptics",
|
||||
"scrape:catalog:hubersuhner",
|
||||
// ── Vendor lists ───────────────────────────────────────────────────
|
||||
"scrape:vendors:flexoptix",
|
||||
"scrape:vendors:flexoptix-supported",
|
||||
// ── Compatibility (every 12h) ──────────────────────────────────────
|
||||
"scrape:compat:cisco",
|
||||
"scrape:compat:juniper",
|
||||
"scrape:compat:sonic",
|
||||
"scrape:compat:ufispace",
|
||||
"scrape:compat:edgecore",
|
||||
// ── Switch enrichment (every 12h) ─────────────────────────────────
|
||||
"scrape:assets:switches",
|
||||
// ── eBay enrichment (every 6h) ────────────────────────────────────
|
||||
"enrich:ebay-transceivers",
|
||||
"enrich:ebay-switches",
|
||||
// ── Intelligence & community (every 6h) ───────────────────────────
|
||||
"scrape:market-intel",
|
||||
"scrape:community-issues",
|
||||
"scrape:datasheet-links",
|
||||
"scrape:pricing:flexoptix",
|
||||
"scrape:vendors:flexoptix",
|
||||
"scrape:news",
|
||||
"scrape:faq",
|
||||
"scrape:docs",
|
||||
// ── Compute (every 4h, after pricing waves) ───────────────────────
|
||||
"scrape:market-intel",
|
||||
"compute:abc",
|
||||
"compute:reorder-signals",
|
||||
// ── Sync ──────────────────────────────────────────────────────────
|
||||
"enrich:ebay-switches",
|
||||
"enrich:ebay-transceivers",
|
||||
"scrape:community-issues",
|
||||
"scrape:datasheet-links",
|
||||
"sync:nas",
|
||||
];
|
||||
|
||||
for (const q of queues) {
|
||||
await boss.createQueue(q).catch(() => { /* already exists */ });
|
||||
}
|
||||
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
// PLAYWRIGHT SCRAPERS — every 8h (resource-heavy, runs on Erik VPS)
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
// ════════════════════════════════════════════════════════════════
|
||||
// NIGHTLY WINDOW 00:00–08:00 (all scrapers run every night)
|
||||
// Staggered to avoid parallel overload, respect vendor rate limits
|
||||
// ════════════════════════════════════════════════════════════════
|
||||
|
||||
// FS.com: 01:00, 09:00, 17:00
|
||||
await boss.schedule("scrape:pricing:fs", "0 1,9,17 * * *", {}, { retryLimit: 3, expireInSeconds: 5400 });
|
||||
// 00:00 — eBay transceiver pricing (new/refurb, all 5000+ products)
|
||||
await boss.schedule("enrich:ebay-transceivers", "0 0 * * *", {}, {
|
||||
retryLimit: 2, expireInSeconds: 7200,
|
||||
});
|
||||
|
||||
// 10Gtek: 01:20, 09:20, 17:20
|
||||
await boss.schedule("scrape:pricing:10gtek", "20 1,9,17 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
// 00:30 — eBay switch enrichment (features, images, refurb prices)
|
||||
await boss.schedule("enrich:ebay-switches", "30 0 * * *", {}, {
|
||||
retryLimit: 2, expireInSeconds: 7200,
|
||||
});
|
||||
|
||||
// ATGBICS: 01:50, 09:50, 17:50
|
||||
await boss.schedule("scrape:pricing:atgbics", "50 1,9,17 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
// 01:00 — FS.com pricing (Playwright JS-rendered, slowest scraper)
|
||||
await boss.schedule("scrape:pricing:fs", "0 1 * * *", {}, {
|
||||
retryLimit: 3, expireInSeconds: 5400,
|
||||
});
|
||||
|
||||
// ProLabs: 02:20, 10:20, 18:20
|
||||
await boss.schedule("scrape:pricing:prolabs", "20 2,10,18 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
// 01:45 — Optcore pricing
|
||||
await boss.schedule("scrape:pricing:optcore", "45 1 * * *", {}, {
|
||||
retryLimit: 2, expireInSeconds: 3600,
|
||||
});
|
||||
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
// FETCH/CHEERIO SCRAPERS — every 4h (lightweight, Pi-friendly)
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
// 02:15 — 10Gtek pricing (Playwright)
|
||||
await boss.schedule("scrape:pricing:10gtek", "15 2 * * *", {}, {
|
||||
retryLimit: 2, expireInSeconds: 3600,
|
||||
});
|
||||
|
||||
// Fluxlight: 00:05, 04:05, 08:05, 12:05, 16:05, 20:05
|
||||
await boss.schedule("scrape:pricing:fluxlight", "5 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
// 02:45 — ATGBICS pricing (Shopify/Playwright, GBP)
|
||||
await boss.schedule("scrape:pricing:atgbics", "45 2 * * *", {}, {
|
||||
retryLimit: 2, expireInSeconds: 3600,
|
||||
});
|
||||
|
||||
// GBICs: 00:15, 04:15, 08:15, 12:15, 16:15, 20:15
|
||||
await boss.schedule("scrape:pricing:gbics", "15 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
// 03:15 — ProLabs pricing (Playwright/CloudFront)
|
||||
await boss.schedule("scrape:pricing:prolabs", "15 3 * * *", {}, {
|
||||
retryLimit: 2, expireInSeconds: 3600,
|
||||
});
|
||||
|
||||
// Optcore: 00:30, 04:30, 08:30, 12:30, 16:30, 20:30
|
||||
await boss.schedule("scrape:pricing:optcore", "30 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
// 03:45 — Flexoptix catalog (fast fetch — primary source, highest priority)
|
||||
await boss.schedule("scrape:pricing:flexoptix", "45 3 * * *", {}, {
|
||||
retryLimit: 3, expireInSeconds: 3600,
|
||||
});
|
||||
|
||||
// Champion ONE: 00:45, 04:45, 08:45, 12:45, 16:45, 20:45
|
||||
await boss.schedule("scrape:pricing:champion-one", "45 0,4,8,12,16,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
// 04:15 — Flexoptix vendor list (full vendor catalog sync)
|
||||
await boss.schedule("scrape:vendors:flexoptix", "15 4 * * *", {}, {
|
||||
retryLimit: 2, expireInSeconds: 1800,
|
||||
});
|
||||
|
||||
// SFPCables: 01:00, 05:00, 09:00, 13:00, 17:00, 21:00
|
||||
await boss.schedule("scrape:pricing:sfpcables", "0 1,5,9,13,17,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
// 04:30 — Market intelligence (OFC/ECOC, IEEE 802.3, EU TED, Farnell/Mouser lead times)
|
||||
await boss.schedule("scrape:market-intel", "30 4 * * *", {}, {
|
||||
retryLimit: 2, expireInSeconds: 3600,
|
||||
});
|
||||
|
||||
// BlueOptics: 01:15, 05:15, 09:15, 13:15, 17:15, 21:15
|
||||
await boss.schedule("scrape:pricing:blueoptics", "15 1,5,9,13,17,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
// 05:00 — Community issues (Reddit/ServeTheHome/Arista/Cisco forums)
|
||||
await boss.schedule("scrape:community-issues", "0 5 * * *", {}, {
|
||||
retryLimit: 1, expireInSeconds: 3600,
|
||||
});
|
||||
|
||||
// ShopFiber24: 01:30, 05:30, 09:30, 13:30, 17:30, 21:30
|
||||
await boss.schedule("scrape:pricing:fiber24", "30 1,5,9,13,17,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
// 05:30 — Datasheet + manual link discovery
|
||||
await boss.schedule("scrape:datasheet-links", "30 5 * * *", {}, {
|
||||
retryLimit: 1, expireInSeconds: 3600,
|
||||
});
|
||||
|
||||
// T&S Communication: 01:45, 05:45, 09:45, 13:45, 17:45, 21:45
|
||||
await boss.schedule("scrape:pricing:tscom", "45 1,5,9,13,17,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
// 06:00 — Cisco/Juniper/Arista compatibility matrices (nightly — was weekly)
|
||||
await boss.schedule("scrape:compat:cisco", "0 6 * * *", {}, {
|
||||
retryLimit: 2, expireInSeconds: 3600,
|
||||
});
|
||||
|
||||
// Skylane: 02:00, 06:00, 10:00, 14:00, 18:00, 22:00
|
||||
await boss.schedule("scrape:pricing:skylane", "0 2,6,10,14,18,22 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
// 06:15 — News aggregation (LightReading, FierceTelecom, trade press)
|
||||
await boss.schedule("scrape:news", "15 6 * * *", {}, {
|
||||
retryLimit: 2, expireInSeconds: 1800,
|
||||
});
|
||||
|
||||
// AscentOptics: 02:15, 06:15, 10:15, 14:15, 18:15, 22:15
|
||||
await boss.schedule("scrape:pricing:ascentoptics", "15 2,6,10,14,18,22 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
// 06:30 — FAQ / knowledge base scraping
|
||||
await boss.schedule("scrape:faq", "30 6 * * *", {}, {
|
||||
retryLimit: 2, expireInSeconds: 3600,
|
||||
});
|
||||
|
||||
// GAO Tek: 02:30, 06:30, 10:30, 14:30, 18:30, 22:30
|
||||
await boss.schedule("scrape:pricing:gaotek", "30 2,6,10,14,18,22 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
// 07:00 — Docs check (full document/datasheet download)
|
||||
await boss.schedule("scrape:docs", "0 7 * * *", {}, {
|
||||
retryLimit: 2, expireInSeconds: 3600,
|
||||
});
|
||||
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
// CATALOG SCRAPERS — Flexoptix every 2h (primary price source)
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
// 07:15 — ABC classification recompute (after all pricing runs)
|
||||
await boss.schedule("compute:abc", "15 7 * * *", {}, {
|
||||
retryLimit: 2, expireInSeconds: 600,
|
||||
});
|
||||
|
||||
await boss.schedule("scrape:pricing:flexoptix", "0 */2 * * *", {}, { retryLimit: 3, expireInSeconds: 3600 });
|
||||
// 07:30 — Reorder signals recompute (after ABC)
|
||||
await boss.schedule("compute:reorder-signals", "30 7 * * *", {}, {
|
||||
retryLimit: 2, expireInSeconds: 600,
|
||||
});
|
||||
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
// MANUFACTURER CATALOGS — every 8h (product data, no prices)
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
// 07:45 — NAS sync: export all data as JSON + weekly pg_dump to Fearghas
|
||||
await boss.schedule("sync:nas", "45 7 * * *", {}, {
|
||||
retryLimit: 1, expireInSeconds: 1800,
|
||||
});
|
||||
|
||||
await boss.schedule("scrape:catalog:smartoptics", "10 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
await boss.schedule("scrape:catalog:hubersuhner", "25 3,11,19 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
// VENDOR LISTS — every 12h
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
|
||||
await boss.schedule("scrape:vendors:flexoptix", "0 5,17 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
|
||||
await boss.schedule("scrape:vendors:flexoptix-supported", "15 5,17 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
|
||||
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
// COMPATIBILITY MATRICES — every 12h
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
|
||||
await boss.schedule("scrape:compat:cisco", "0 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
await boss.schedule("scrape:compat:juniper", "15 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
await boss.schedule("scrape:compat:sonic", "30 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
await boss.schedule("scrape:compat:ufispace", "45 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
|
||||
await boss.schedule("scrape:compat:edgecore", "55 6,18 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
|
||||
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
// SWITCH ASSETS — every 12h
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
|
||||
await boss.schedule("scrape:assets:switches", "30 7,19 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
|
||||
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
// EBAY ENRICHMENT — every 6h
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
|
||||
await boss.schedule("enrich:ebay-transceivers", "0 0,6,12,18 * * *", {}, { retryLimit: 2, expireInSeconds: 7200 });
|
||||
await boss.schedule("enrich:ebay-switches", "30 0,6,12,18 * * *", {}, { retryLimit: 2, expireInSeconds: 7200 });
|
||||
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
// INTELLIGENCE & COMMUNITY — every 6h
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
|
||||
await boss.schedule("scrape:market-intel", "0 2,8,14,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
await boss.schedule("scrape:community-issues", "30 2,8,14,20 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
|
||||
await boss.schedule("scrape:datasheet-links", "0 3,9,15,21 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
|
||||
await boss.schedule("scrape:news", "20 3,9,15,21 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
|
||||
await boss.schedule("scrape:faq", "40 3,9,15,21 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
await boss.schedule("scrape:docs", "50 4,16 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
|
||||
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
// COMPUTE JOBS — every 4h (after pricing waves settle)
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
|
||||
await boss.schedule("compute:abc", "50 3,7,11,15,19,23 * * *", {}, { retryLimit: 2, expireInSeconds: 600 });
|
||||
await boss.schedule("compute:reorder-signals", "55 3,7,11,15,19,23 * * *", {}, { retryLimit: 2, expireInSeconds: 600 });
|
||||
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
// NAS SYNC — nightly
|
||||
// ══════════════════════════════════════════════════════════════════════
|
||||
|
||||
await boss.schedule("sync:nas", "55 7 * * *", {}, { retryLimit: 1, expireInSeconds: 1800 });
|
||||
|
||||
console.log("All schedules registered — 24/7 continuous scraping (42 jobs)");
|
||||
console.log("All schedules registered — nightly window 00:00–08:00");
|
||||
}
|
||||
|
||||
export async function registerWorkers(boss: PgBoss): Promise<void> {
|
||||
// Lazy-load all scrapers
|
||||
// Lazy-load scrapers to avoid circular deps
|
||||
const { scrapeFs } = await import("./scrapers/fs-com");
|
||||
const { scrapeCiscoTmg } = await import("./scrapers/cisco-tmg");
|
||||
const { scrapeOptcore } = await import("./scrapers/optcore");
|
||||
const { scrape10Gtek } = await import("./scrapers/tenGtek");
|
||||
const { scrapeFlexoptixCatalog } = await import("./scrapers/flexoptix-catalog");
|
||||
const { scrapeFlexoptixVendors } = await import("./scrapers/flexoptix-vendors");
|
||||
const { seedFlexoptixVendors } = await import("./scrapers/flexoptix-supported-vendors");
|
||||
const { scrapeNews } = await import("./scrapers/news");
|
||||
const { scrapeAtgbics } = await import("./scrapers/atgbics");
|
||||
const { scrapeProLabs } = await import("./scrapers/prolabs");
|
||||
const { scrapeChampionOne } = await import("./scrapers/champion-one");
|
||||
const { scrapeFluxlight } = await import("./scrapers/fluxlight");
|
||||
const { scrapeGbics } = await import("./scrapers/gbics");
|
||||
const { scrapeSfpCables } = await import("./scrapers/sfpcables");
|
||||
const { scrapeJuniperHct } = await import("./scrapers/juniper-hct");
|
||||
const { scrapeSonicHcl } = await import("./scrapers/sonic-hcl");
|
||||
const { scrapeUfiSpace } = await import("./scrapers/ufispace");
|
||||
const { scrapeEdgecore } = await import("./scrapers/edgecore");
|
||||
const { scrapeSwitchAssets } = await import("./scrapers/switch-assets");
|
||||
const { scrapeBlueOptics } = await import("./scrapers/blueoptics");
|
||||
const { scrapeFiber24 } = await import("./scrapers/fiber24");
|
||||
const { scrapeTsCom } = await import("./scrapers/tscom");
|
||||
const { scrapeSmartOptics } = await import("./scrapers/smartoptics");
|
||||
const { scrapeHuberSuhner } = await import("./scrapers/hubersuhner");
|
||||
const { scrapeSkylane } = await import("./scrapers/skylane");
|
||||
const { scrapeAscentOptics } = await import("./scrapers/ascentoptics");
|
||||
const { scrapeGaoTek } = await import("./scrapers/gaotek");
|
||||
|
||||
// ── Playwright scrapers ───────────────────────────────────────────────
|
||||
|
||||
await boss.work("scrape:pricing:fs", async () => {
|
||||
await boss.work("scrape:pricing:fs", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Running: FS.com pricing`);
|
||||
await withIsolatedStorage("fs", scrapeFs);
|
||||
});
|
||||
|
||||
await boss.work("scrape:pricing:10gtek", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: 10Gtek pricing`);
|
||||
await withIsolatedStorage("10gtek", scrape10Gtek);
|
||||
});
|
||||
|
||||
await boss.work("scrape:pricing:atgbics", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: ATGBICS pricing`);
|
||||
await withIsolatedStorage("atgbics", scrapeAtgbics);
|
||||
});
|
||||
|
||||
await boss.work("scrape:pricing:prolabs", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: ProLabs pricing`);
|
||||
await withIsolatedStorage("prolabs", scrapeProLabs);
|
||||
});
|
||||
|
||||
// ── Fetch/Cheerio scrapers ────────────────────────────────────────────
|
||||
|
||||
await boss.work("scrape:pricing:fluxlight", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Fluxlight pricing`);
|
||||
await withIsolatedStorage("fluxlight", scrapeFluxlight);
|
||||
});
|
||||
|
||||
await boss.work("scrape:pricing:gbics", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: GBICs pricing`);
|
||||
await withIsolatedStorage("gbics", scrapeGbics);
|
||||
});
|
||||
|
||||
await boss.work("scrape:pricing:optcore", async () => {
|
||||
await boss.work("scrape:pricing:optcore", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Optcore pricing`);
|
||||
await withIsolatedStorage("optcore", scrapeOptcore);
|
||||
});
|
||||
|
||||
await boss.work("scrape:pricing:champion-one", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Champion ONE pricing`);
|
||||
await withIsolatedStorage("champion-one", scrapeChampionOne);
|
||||
await boss.work("scrape:compat:cisco", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Cisco TMG`);
|
||||
await withIsolatedStorage("cisco", scrapeCiscoTmg);
|
||||
});
|
||||
|
||||
await boss.work("scrape:pricing:sfpcables", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: SFPCables pricing`);
|
||||
await withIsolatedStorage("sfpcables", scrapeSfpCables);
|
||||
await boss.work("scrape:pricing:10gtek", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Running: 10Gtek pricing`);
|
||||
await withIsolatedStorage("10gtek", scrape10Gtek);
|
||||
});
|
||||
|
||||
await boss.work("scrape:pricing:blueoptics", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: BlueOptics pricing`);
|
||||
await withIsolatedStorage("blueoptics", scrapeBlueOptics);
|
||||
});
|
||||
|
||||
await boss.work("scrape:pricing:fiber24", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: ShopFiber24 pricing`);
|
||||
await withIsolatedStorage("fiber24", scrapeFiber24);
|
||||
});
|
||||
|
||||
await boss.work("scrape:pricing:tscom", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: T&S Communication pricing`);
|
||||
await withIsolatedStorage("tscom", scrapeTsCom);
|
||||
});
|
||||
|
||||
await boss.work("scrape:pricing:skylane", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Skylane Optics pricing`);
|
||||
await withIsolatedStorage("skylane", scrapeSkylane);
|
||||
});
|
||||
|
||||
await boss.work("scrape:pricing:ascentoptics", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: AscentOptics pricing`);
|
||||
await withIsolatedStorage("ascentoptics", scrapeAscentOptics);
|
||||
});
|
||||
|
||||
await boss.work("scrape:pricing:gaotek", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: GAO Tek pricing`);
|
||||
await withIsolatedStorage("gaotek", scrapeGaoTek);
|
||||
});
|
||||
|
||||
// ── Catalog scrapers ──────────────────────────────────────────────────
|
||||
|
||||
await boss.work("scrape:pricing:flexoptix", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Flexoptix catalog`);
|
||||
await boss.work("scrape:pricing:flexoptix", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Flexoptix catalog pricing`);
|
||||
await scrapeFlexoptixCatalog();
|
||||
});
|
||||
|
||||
await boss.work("scrape:catalog:smartoptics", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: SmartOptics catalog`);
|
||||
await withIsolatedStorage("smartoptics", scrapeSmartOptics);
|
||||
});
|
||||
|
||||
await boss.work("scrape:catalog:hubersuhner", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: HUBER+SUHNER catalog`);
|
||||
await withIsolatedStorage("hubersuhner", scrapeHuberSuhner);
|
||||
});
|
||||
|
||||
// ── Vendor lists ──────────────────────────────────────────────────────
|
||||
|
||||
await boss.work("scrape:vendors:flexoptix", async () => {
|
||||
await boss.work("scrape:vendors:flexoptix", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Flexoptix vendor list`);
|
||||
await scrapeFlexoptixVendors();
|
||||
});
|
||||
|
||||
await boss.work("scrape:vendors:flexoptix-supported", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Flexoptix supported vendors`);
|
||||
await seedFlexoptixVendors();
|
||||
await boss.work("scrape:news", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Running: News aggregation`);
|
||||
await scrapeNews();
|
||||
});
|
||||
|
||||
// ── Compatibility scrapers ────────────────────────────────────────────
|
||||
|
||||
await boss.work("scrape:compat:cisco", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Cisco TMG compatibility`);
|
||||
await withIsolatedStorage("cisco", scrapeCiscoTmg);
|
||||
await boss.work("scrape:pricing:atgbics", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Running: ATGBICS pricing`);
|
||||
await withIsolatedStorage("atgbics", scrapeAtgbics);
|
||||
});
|
||||
|
||||
await boss.work("scrape:compat:juniper", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Juniper HCT compatibility`);
|
||||
await withIsolatedStorage("juniper", scrapeJuniperHct);
|
||||
await boss.work("scrape:pricing:prolabs", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Running: ProLabs pricing`);
|
||||
await withIsolatedStorage("prolabs", scrapeProLabs);
|
||||
});
|
||||
|
||||
await boss.work("scrape:compat:sonic", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: SONiC HCL compatibility`);
|
||||
await withIsolatedStorage("sonic", scrapeSonicHcl);
|
||||
await boss.work("scrape:faq", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] FAQ scraper — not yet implemented`);
|
||||
});
|
||||
|
||||
await boss.work("scrape:compat:ufispace", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Ufispace switch data`);
|
||||
await withIsolatedStorage("ufispace", scrapeUfiSpace);
|
||||
await boss.work("scrape:docs", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Docs scraper — not yet implemented`);
|
||||
});
|
||||
|
||||
await boss.work("scrape:compat:edgecore", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Edgecore switch data`);
|
||||
await withIsolatedStorage("edgecore", scrapeEdgecore);
|
||||
});
|
||||
|
||||
// ── Switch assets ─────────────────────────────────────────────────────
|
||||
|
||||
await boss.work("scrape:assets:switches", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Switch assets enrichment`);
|
||||
await withIsolatedStorage("switch-assets", () => scrapeSwitchAssets());
|
||||
});
|
||||
|
||||
// ── eBay enrichment ───────────────────────────────────────────────────
|
||||
|
||||
await boss.work("enrich:ebay-transceivers", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: eBay transceiver pricing`);
|
||||
const { enrichTransceiversFromEbay } = await import("./scrapers/ebay-enricher");
|
||||
await withIsolatedStorage("ebay-transceivers", () => enrichTransceiversFromEbay(100));
|
||||
});
|
||||
|
||||
await boss.work("enrich:ebay-switches", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: eBay switch enrichment`);
|
||||
const { enrichSwitchesFromEbay } = await import("./scrapers/ebay-enricher");
|
||||
await withIsolatedStorage("ebay-switches", () => enrichSwitchesFromEbay(30));
|
||||
});
|
||||
|
||||
// ── Intelligence & community ──────────────────────────────────────────
|
||||
|
||||
await boss.work("scrape:market-intel", async () => {
|
||||
await boss.work("scrape:market-intel", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Market intelligence`);
|
||||
const { scrapeMarketIntelligence } = await import("./scrapers/market-intelligence");
|
||||
await withIsolatedStorage("market-intel", scrapeMarketIntelligence);
|
||||
});
|
||||
|
||||
await boss.work("scrape:community-issues", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Community issues`);
|
||||
const { scrapeAllSwitchIssues } = await import("./scrapers/community-issues");
|
||||
await withIsolatedStorage("community-issues", () => scrapeAllSwitchIssues(30));
|
||||
});
|
||||
|
||||
await boss.work("scrape:datasheet-links", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Datasheet links`);
|
||||
const { findAndSeedDatasheetLinks } = await import("./scrapers/community-issues");
|
||||
await findAndSeedDatasheetLinks(50);
|
||||
});
|
||||
|
||||
await boss.work("scrape:news", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: News aggregation`);
|
||||
await scrapeNews();
|
||||
});
|
||||
|
||||
await boss.work("scrape:faq", async () => {
|
||||
console.log(`[${new Date().toISOString()}] FAQ scraper — not yet implemented`);
|
||||
});
|
||||
|
||||
await boss.work("scrape:docs", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Docs scraper — not yet implemented`);
|
||||
});
|
||||
|
||||
// ── Compute jobs ──────────────────────────────────────────────────────
|
||||
|
||||
await boss.work("compute:abc", async () => {
|
||||
await boss.work("compute:abc", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Computing: ABC classification`);
|
||||
const { computeAbcClassification } = await import("./scrapers/market-intelligence");
|
||||
await computeAbcClassification();
|
||||
});
|
||||
|
||||
await boss.work("compute:reorder-signals", async () => {
|
||||
await boss.work("compute:reorder-signals", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Computing: Reorder signals`);
|
||||
const { computeReorderSignals } = await import("./scrapers/market-intelligence");
|
||||
await computeReorderSignals();
|
||||
});
|
||||
|
||||
// ── NAS sync ──────────────────────────────────────────────────────────
|
||||
await boss.work("enrich:ebay-switches", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Running: eBay switch enrichment`);
|
||||
const { enrichSwitchesFromEbay } = await import("./scrapers/ebay-enricher");
|
||||
await withIsolatedStorage("ebay-switches", () => enrichSwitchesFromEbay(30));
|
||||
});
|
||||
|
||||
await boss.work("sync:nas", async () => {
|
||||
await boss.work("enrich:ebay-transceivers", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Running: eBay transceiver pricing`);
|
||||
const { enrichTransceiversFromEbay } = await import("./scrapers/ebay-enricher");
|
||||
await withIsolatedStorage("ebay-transceivers", () => enrichTransceiversFromEbay(100));
|
||||
});
|
||||
|
||||
await boss.work("scrape:community-issues", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Community issues scraping`);
|
||||
const { scrapeAllSwitchIssues } = await import("./scrapers/community-issues");
|
||||
await withIsolatedStorage("community-issues", () => scrapeAllSwitchIssues(30));
|
||||
});
|
||||
|
||||
await boss.work("scrape:datasheet-links", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Datasheet link discovery`);
|
||||
const { findAndSeedDatasheetLinks } = await import("./scrapers/community-issues");
|
||||
await findAndSeedDatasheetLinks(50);
|
||||
});
|
||||
|
||||
await boss.work("sync:nas", async (_job) => {
|
||||
console.log(`[${new Date().toISOString()}] Running: NAS sync to Fearghas`);
|
||||
const { runNightlyNasSync } = await import("./utils/nas-sync");
|
||||
await runNightlyNasSync();
|
||||
});
|
||||
|
||||
console.log("All workers registered (42 jobs, 24/7 continuous)");
|
||||
console.log("All workers registered");
|
||||
}
|
||||
|
||||
@ -1,280 +0,0 @@
|
||||
/**
|
||||
* Ascent Optics Scraper — US-based compatible transceiver vendor
|
||||
*
|
||||
* ascentoptics.com — product catalog with USD prices.
|
||||
* Tries /catalog/ and /products/ as entry points.
|
||||
*
|
||||
* Rate limited: 1 req/2sec.
|
||||
*/
|
||||
import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db";
|
||||
import { contentHash } from "../utils/hash";
|
||||
import * as cheerio from "cheerio";
|
||||
|
||||
const BASE = "https://ascentoptics.com";
|
||||
const CATALOG_URLS = [
|
||||
"/catalog/",
|
||||
"/products/",
|
||||
"/products/transceivers/",
|
||||
"/catalog/transceivers/",
|
||||
];
|
||||
const MAX_PAGES = 15;
|
||||
const HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
|
||||
Accept: "text/html,application/xhtml+xml",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
};
|
||||
|
||||
interface Product {
|
||||
partNumber: string;
|
||||
name: string;
|
||||
url: string;
|
||||
price?: number;
|
||||
formFactor: string;
|
||||
speed: string;
|
||||
speedGbps: number;
|
||||
reachLabel?: string;
|
||||
reachMeters?: number;
|
||||
fiberType?: string;
|
||||
wavelength?: string;
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function detectFormFactor(text: string): { formFactor: string; speed: string; speedGbps: number } {
|
||||
const lower = text.toLowerCase();
|
||||
if (lower.includes("osfp") && !lower.includes("qsfp")) return { formFactor: "OSFP", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("qsfp-dd")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("qsfp28")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
|
||||
if (lower.includes("qsfp+") || lower.includes("qsfp plus")) return { formFactor: "QSFP+", speed: "40G", speedGbps: 40 };
|
||||
if (lower.includes("sfp56")) return { formFactor: "SFP56", speed: "50G", speedGbps: 50 };
|
||||
if (lower.includes("sfp28") || lower.includes("25g")) return { formFactor: "SFP28", speed: "25G", speedGbps: 25 };
|
||||
if (lower.includes("sfp+") || lower.includes("10gbase") || lower.includes("10g")) return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
||||
if (lower.includes("xfp")) return { formFactor: "XFP", speed: "10G", speedGbps: 10 };
|
||||
if (lower.includes("1000base") || lower.includes("1g")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||
if (lower.includes("sfp") && !lower.includes("qsfp")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||
return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
||||
}
|
||||
|
||||
function detectReach(text: string): { label: string; meters: number } | undefined {
|
||||
const patterns: [RegExp, string, number][] = [
|
||||
[/\b80\s*km\b/i, "80km", 80000],
|
||||
[/\b40\s*km\b/i, "40km", 40000],
|
||||
[/\b20\s*km\b/i, "20km", 20000],
|
||||
[/\b10\s*km\b/i, "10km", 10000],
|
||||
[/\b2\s*km\b/i, "2km", 2000],
|
||||
[/\b550\s*m\b/i, "550m", 550],
|
||||
[/\b300\s*m\b/i, "300m", 300],
|
||||
[/\b100\s*m\b/i, "100m", 100],
|
||||
[/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000],
|
||||
[/\bER4?\b/, "40km", 40000], [/\bZR4?\b/, "80km", 80000],
|
||||
[/\bSR4?\b/, "300m", 300], [/\bDR4?\b/, "500m", 500], [/\bFR4?\b/, "2km", 2000],
|
||||
];
|
||||
for (const [regex, label, meters] of patterns) {
|
||||
if (regex.test(text)) return { label, meters };
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function detectFiber(text: string): string {
|
||||
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
|
||||
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
|
||||
if (/copper|dac|twinax|rj45|base-t/i.test(text)) return "Copper";
|
||||
return "SMF";
|
||||
}
|
||||
|
||||
function detectWavelength(text: string): string {
|
||||
const match = text.match(/(\d{3,4})\s*nm/i);
|
||||
return match ? match[1] : "";
|
||||
}
|
||||
|
||||
function parseProductList(html: string, sourceUrl: string): Product[] {
|
||||
const $ = cheerio.load(html);
|
||||
const products: Product[] = [];
|
||||
|
||||
const cardSelectors = [
|
||||
".product-item", ".product", ".item", "li.product",
|
||||
".product-card", "tr", "article", ".catalog-item",
|
||||
".product-list-item", ".result",
|
||||
];
|
||||
|
||||
for (const sel of cardSelectors) {
|
||||
if ($(sel).length >= 2) {
|
||||
$(sel).each((_i, el) => {
|
||||
const text = $(el).text().trim();
|
||||
if (!/sfp|qsfp|xfp|transceiver|optic/i.test(text)) return;
|
||||
|
||||
const nameEl = $(el).find("h2, h3, h4, .name, .product-name, .title, td, a").first();
|
||||
const name = nameEl.text().trim() || text.slice(0, 120);
|
||||
if (!name || name.length < 5) return;
|
||||
|
||||
const linkEl = $(el).find("a[href]").first();
|
||||
const href = linkEl.attr("href") || sourceUrl;
|
||||
const url = href.startsWith("http") ? href : BASE + href;
|
||||
|
||||
// Ascent Optics part numbers: e.g. AS-SFP-10G-SR, SFP-10G-LR-AS
|
||||
const partNumMatch = name.match(/\b(AS[-_][A-Z0-9-]+)\b/i) ||
|
||||
name.match(/\b([A-Z]{2,}[-][A-Z0-9]+[-][A-Z0-9]+[-][A-Z0-9]+)\b/) ||
|
||||
text.match(/Part\s*(?:No\.?|Number|#)?\s*:?\s*([A-Z0-9-]{6,})/i);
|
||||
const partNumber = partNumMatch?.[1] ||
|
||||
name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] ||
|
||||
name.replace(/\s+/g, "-").slice(0, 60);
|
||||
|
||||
// USD price
|
||||
const priceText = $(el).find(".price, .product-price, .amount, [data-price]").text();
|
||||
const priceMatch = priceText.match(/\$\s*([\d,]+\.?\d{0,2})/);
|
||||
let price: number | undefined;
|
||||
if (priceMatch) {
|
||||
const parsed = parseFloat(priceMatch[1].replace(",", ""));
|
||||
if (parsed > 0 && parsed < 50000) price = parsed;
|
||||
}
|
||||
|
||||
const ff = detectFormFactor(name + " " + text);
|
||||
const reach = detectReach(name + " " + text);
|
||||
|
||||
products.push({
|
||||
partNumber,
|
||||
name,
|
||||
url,
|
||||
price,
|
||||
...ff,
|
||||
reachLabel: reach?.label,
|
||||
reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(name + " " + text),
|
||||
wavelength: detectWavelength(name + " " + text),
|
||||
});
|
||||
});
|
||||
if (products.length > 0) break;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: transceiver-relevant anchors
|
||||
if (products.length === 0) {
|
||||
$("a[href]").each((_i, el) => {
|
||||
const name = $(el).text().trim();
|
||||
const href = $(el).attr("href") || "";
|
||||
if (name.length < 8 || name.length > 200 || !/sfp|qsfp|transceiver/i.test(name)) return;
|
||||
const url = href.startsWith("http") ? href : BASE + href;
|
||||
const ff = detectFormFactor(name);
|
||||
const reach = detectReach(name);
|
||||
products.push({
|
||||
partNumber: name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] || name.replace(/\s+/g, "-").slice(0, 60),
|
||||
name, url, ...ff,
|
||||
reachLabel: reach?.label, reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(name), wavelength: detectWavelength(name),
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
const seen = new Set<string>();
|
||||
return products.filter((p) => {
|
||||
if (!p.url || seen.has(p.url)) return false;
|
||||
seen.add(p.url);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchPage(url: string): Promise<string> {
|
||||
const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) });
|
||||
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
|
||||
return resp.text();
|
||||
}
|
||||
|
||||
export async function scrapeAscentOptics(): Promise<void> {
|
||||
console.log("=== Ascent Optics Scraper Starting ===\n");
|
||||
|
||||
const vendorId = await ensureVendor(
|
||||
"Ascent Optics",
|
||||
"compatible",
|
||||
"https://ascentoptics.com",
|
||||
BASE + CATALOG_URLS[0],
|
||||
);
|
||||
|
||||
const allProducts: Product[] = [];
|
||||
const seenUrls = new Set<string>();
|
||||
const triedUrls = new Set<string>();
|
||||
|
||||
for (const catalogPath of CATALOG_URLS) {
|
||||
const catalogUrl = BASE + catalogPath;
|
||||
if (triedUrls.has(catalogUrl)) continue;
|
||||
triedUrls.add(catalogUrl);
|
||||
|
||||
console.log(` Fetching catalog: ${catalogUrl}`);
|
||||
try {
|
||||
const html = await fetchPage(catalogUrl);
|
||||
const pageProducts = parseProductList(html, catalogUrl);
|
||||
for (const p of pageProducts) {
|
||||
if (!seenUrls.has(p.url)) { seenUrls.add(p.url); allProducts.push(p); }
|
||||
}
|
||||
console.log(` Found ${pageProducts.length} products`);
|
||||
await sleep(2000);
|
||||
|
||||
// Paginate from each working catalog URL
|
||||
for (let page = 2; page <= MAX_PAGES; page++) {
|
||||
const pageUrl = `${catalogUrl}?page=${page}`;
|
||||
try {
|
||||
const pageHtml = await fetchPage(pageUrl);
|
||||
const paginated = parseProductList(pageHtml, pageUrl);
|
||||
if (paginated.length === 0) break;
|
||||
for (const p of paginated) {
|
||||
if (!seenUrls.has(p.url)) { seenUrls.add(p.url); allProducts.push(p); }
|
||||
}
|
||||
console.log(` Page ${page}: ${paginated.length} products`);
|
||||
await sleep(2000);
|
||||
} catch {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn(` Failed: ${(err as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\nTotal unique products: ${allProducts.length}`);
|
||||
|
||||
let totalProducts = 0;
|
||||
let priceUpdates = 0;
|
||||
|
||||
for (const product of allProducts) {
|
||||
try {
|
||||
const txId = await findOrCreateScrapedTransceiver({
|
||||
partNumber: product.partNumber,
|
||||
vendorId,
|
||||
formFactor: product.formFactor,
|
||||
speedGbps: product.speedGbps,
|
||||
speed: product.speed,
|
||||
reachMeters: product.reachMeters,
|
||||
reachLabel: product.reachLabel,
|
||||
fiberType: product.fiberType,
|
||||
wavelengths: product.wavelength,
|
||||
category: "DataCenter",
|
||||
});
|
||||
|
||||
if (product.price && product.price > 0) {
|
||||
const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber }));
|
||||
const updated = await upsertPriceObservation({
|
||||
transceiverId: txId,
|
||||
sourceVendorId: vendorId,
|
||||
price: product.price,
|
||||
currency: "USD",
|
||||
stockLevel: "in_stock",
|
||||
url: product.url,
|
||||
contentHash: hash,
|
||||
});
|
||||
if (updated) priceUpdates++;
|
||||
}
|
||||
totalProducts++;
|
||||
} catch (err) {
|
||||
console.warn(` Error saving ${product.partNumber}: ${(err as Error).message.slice(0, 80)}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n=== Ascent Optics Complete: ${totalProducts} products, ${priceUpdates} prices ===`);
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
scrapeAscentOptics()
|
||||
.then(() => pool.end())
|
||||
.catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); });
|
||||
}
|
||||
@ -1,265 +0,0 @@
|
||||
/**
|
||||
* BlueOptics Scraper — German compatible transceiver vendor
|
||||
*
|
||||
* www.blueoptics.de — WooCommerce/Magento shop with EUR prices.
|
||||
* Paginated catalog: /transceivers/?page=N
|
||||
*
|
||||
* Rate limited: 1 req/2sec.
|
||||
*/
|
||||
import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db";
|
||||
import { contentHash } from "../utils/hash";
|
||||
import * as cheerio from "cheerio";
|
||||
|
||||
const BASE = "https://www.blueoptics.de";
|
||||
const CATALOG_PATH = "/transceivers/";
|
||||
const MAX_PAGES = 20;
|
||||
const HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
|
||||
Accept: "text/html,application/xhtml+xml",
|
||||
"Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
|
||||
};
|
||||
|
||||
interface Product {
|
||||
partNumber: string;
|
||||
name: string;
|
||||
url: string;
|
||||
price?: number;
|
||||
formFactor: string;
|
||||
speed: string;
|
||||
speedGbps: number;
|
||||
reachLabel?: string;
|
||||
reachMeters?: number;
|
||||
fiberType?: string;
|
||||
wavelength?: string;
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function detectFormFactor(text: string): { formFactor: string; speed: string; speedGbps: number } {
|
||||
const lower = text.toLowerCase();
|
||||
if (lower.includes("osfp") && !lower.includes("qsfp")) return { formFactor: "OSFP", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("qsfp-dd")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("qsfp28")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
|
||||
if (lower.includes("qsfp+") || lower.includes("qsfp plus")) return { formFactor: "QSFP+", speed: "40G", speedGbps: 40 };
|
||||
if (lower.includes("sfp56")) return { formFactor: "SFP56", speed: "50G", speedGbps: 50 };
|
||||
if (lower.includes("sfp28") || lower.includes("25g")) return { formFactor: "SFP28", speed: "25G", speedGbps: 25 };
|
||||
if (lower.includes("sfp+") || lower.includes("10gbase") || lower.includes("10g")) return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
||||
if (lower.includes("xfp")) return { formFactor: "XFP", speed: "10G", speedGbps: 10 };
|
||||
if (lower.includes("1000base") || lower.includes("1g")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||
if (lower.includes("sfp") && !lower.includes("qsfp")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||
return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
||||
}
|
||||
|
||||
function detectReach(text: string): { label: string; meters: number } | undefined {
|
||||
const patterns: [RegExp, string, number][] = [
|
||||
[/\b80\s*km\b/i, "80km", 80000],
|
||||
[/\b40\s*km\b/i, "40km", 40000],
|
||||
[/\b20\s*km\b/i, "20km", 20000],
|
||||
[/\b10\s*km\b/i, "10km", 10000],
|
||||
[/\b2\s*km\b/i, "2km", 2000],
|
||||
[/\b550\s*m\b/i, "550m", 550],
|
||||
[/\b300\s*m\b/i, "300m", 300],
|
||||
[/\b100\s*m\b/i, "100m", 100],
|
||||
[/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000],
|
||||
[/\bER4?\b/, "40km", 40000], [/\bZR4?\b/, "80km", 80000],
|
||||
[/\bSR4?\b/, "300m", 300], [/\bDR4?\b/, "500m", 500], [/\bFR4?\b/, "2km", 2000],
|
||||
];
|
||||
for (const [regex, label, meters] of patterns) {
|
||||
if (regex.test(text)) return { label, meters };
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function detectFiber(text: string): string {
|
||||
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
|
||||
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
|
||||
if (/copper|dac|twinax|rj45|base-t/i.test(text)) return "Copper";
|
||||
return "SMF";
|
||||
}
|
||||
|
||||
function detectWavelength(text: string): string {
|
||||
const match = text.match(/(\d{3,4})\s*nm/i);
|
||||
return match ? match[1] : "";
|
||||
}
|
||||
|
||||
function parseProductList(html: string): Product[] {
|
||||
const $ = cheerio.load(html);
|
||||
const products: Product[] = [];
|
||||
|
||||
// WooCommerce/generic shop product card selectors
|
||||
$("li.product, .product-item, article.product, .product-card, .woocommerce-loop-product").each((_i, el) => {
|
||||
const titleEl = $(el).find("h2, h3, .product-title, .woocommerce-loop-product__title, .product-name").first();
|
||||
const name = titleEl.text().trim();
|
||||
if (!name || name.length < 5) return;
|
||||
|
||||
const linkEl = $(el).find("a[href]").first();
|
||||
const href = linkEl.attr("href") || "";
|
||||
const url = href.startsWith("http") ? href : BASE + href;
|
||||
|
||||
// EUR price — try .price, .woocommerce-Price-amount, .amount
|
||||
const priceText = $(el).find(".price, .woocommerce-Price-amount, .amount, .product-price").first().text();
|
||||
const priceMatch = priceText.match(/([\d.,]+)\s*€|€\s*([\d.,]+)/);
|
||||
let price: number | undefined;
|
||||
if (priceMatch) {
|
||||
const raw = (priceMatch[1] || priceMatch[2]).replace(/\./g, "").replace(",", ".");
|
||||
const parsed = parseFloat(raw);
|
||||
if (parsed > 0 && parsed < 50000) price = parsed;
|
||||
}
|
||||
|
||||
// Try to extract part number from name or SKU attribute
|
||||
const skuEl = $(el).find("[data-sku], .sku");
|
||||
const partNumber = skuEl.attr("data-sku") || skuEl.text().trim() ||
|
||||
name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] ||
|
||||
name.replace(/\s+/g, "-").slice(0, 60);
|
||||
|
||||
const ff = detectFormFactor(name);
|
||||
const reach = detectReach(name);
|
||||
|
||||
products.push({
|
||||
partNumber,
|
||||
name,
|
||||
url,
|
||||
price,
|
||||
...ff,
|
||||
reachLabel: reach?.label,
|
||||
reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(name),
|
||||
wavelength: detectWavelength(name),
|
||||
});
|
||||
});
|
||||
|
||||
// Fallback: link-based extraction for non-WooCommerce layouts
|
||||
if (products.length === 0) {
|
||||
$("a[href]").each((_i, el) => {
|
||||
const name = $(el).text().trim();
|
||||
const href = $(el).attr("href") || "";
|
||||
if (
|
||||
name.length < 10 || name.length > 200 ||
|
||||
!/sfp|qsfp|xfp|transceiver/i.test(name)
|
||||
) return;
|
||||
|
||||
const url = href.startsWith("http") ? href : BASE + href;
|
||||
const context = $(el).parent().parent().text();
|
||||
const priceMatch = context.match(/([\d.,]+)\s*€|€\s*([\d.,]+)/);
|
||||
let price: number | undefined;
|
||||
if (priceMatch) {
|
||||
const raw = (priceMatch[1] || priceMatch[2]).replace(/\./g, "").replace(",", ".");
|
||||
const parsed = parseFloat(raw);
|
||||
if (parsed > 0 && parsed < 50000) price = parsed;
|
||||
}
|
||||
|
||||
const ff = detectFormFactor(name);
|
||||
const reach = detectReach(name);
|
||||
|
||||
products.push({
|
||||
partNumber: name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] || name.replace(/\s+/g, "-").slice(0, 60),
|
||||
name, url, price, ...ff,
|
||||
reachLabel: reach?.label, reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(name), wavelength: detectWavelength(name),
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
const seen = new Set<string>();
|
||||
return products.filter((p) => {
|
||||
if (!p.url || seen.has(p.url)) return false;
|
||||
seen.add(p.url);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchPage(url: string): Promise<string> {
|
||||
const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) });
|
||||
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
|
||||
return resp.text();
|
||||
}
|
||||
|
||||
export async function scrapeBlueOptics(): Promise<void> {
|
||||
console.log("=== BlueOptics Scraper Starting ===\n");
|
||||
|
||||
const vendorId = await ensureVendor(
|
||||
"BlueOptics",
|
||||
"compatible",
|
||||
"https://www.blueoptics.de",
|
||||
BASE + CATALOG_PATH,
|
||||
);
|
||||
|
||||
let allProducts: Product[] = [];
|
||||
|
||||
for (let page = 1; page <= MAX_PAGES; page++) {
|
||||
try {
|
||||
const url = page === 1
|
||||
? BASE + CATALOG_PATH
|
||||
: `${BASE}${CATALOG_PATH}?page=${page}`;
|
||||
const html = await fetchPage(url);
|
||||
const pageProducts = parseProductList(html);
|
||||
allProducts.push(...pageProducts);
|
||||
console.log(` Page ${page}: ${pageProducts.length} products`);
|
||||
if (pageProducts.length === 0) {
|
||||
console.log(` Empty page ${page}, stopping pagination.`);
|
||||
break;
|
||||
}
|
||||
if (page < MAX_PAGES) await sleep(2000);
|
||||
} catch (err) {
|
||||
console.warn(` Page ${page} failed: ${(err as Error).message}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Dedupe by URL
|
||||
const seen = new Set<string>();
|
||||
allProducts = allProducts.filter((p) => {
|
||||
if (seen.has(p.url)) return false;
|
||||
seen.add(p.url);
|
||||
return true;
|
||||
});
|
||||
|
||||
console.log(`\nTotal unique products: ${allProducts.length}`);
|
||||
|
||||
let totalProducts = 0;
|
||||
let priceUpdates = 0;
|
||||
|
||||
for (const product of allProducts) {
|
||||
try {
|
||||
const txId = await findOrCreateScrapedTransceiver({
|
||||
partNumber: product.partNumber,
|
||||
vendorId,
|
||||
formFactor: product.formFactor,
|
||||
speedGbps: product.speedGbps,
|
||||
speed: product.speed,
|
||||
reachMeters: product.reachMeters,
|
||||
reachLabel: product.reachLabel,
|
||||
fiberType: product.fiberType,
|
||||
wavelengths: product.wavelength,
|
||||
category: "DataCenter",
|
||||
});
|
||||
|
||||
if (product.price && product.price > 0) {
|
||||
const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber }));
|
||||
const updated = await upsertPriceObservation({
|
||||
transceiverId: txId,
|
||||
sourceVendorId: vendorId,
|
||||
price: product.price,
|
||||
currency: "EUR",
|
||||
stockLevel: "in_stock",
|
||||
url: product.url,
|
||||
contentHash: hash,
|
||||
});
|
||||
if (updated) priceUpdates++;
|
||||
}
|
||||
totalProducts++;
|
||||
} catch (err) {
|
||||
console.warn(` Error saving ${product.partNumber}: ${(err as Error).message.slice(0, 80)}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n=== BlueOptics Complete: ${totalProducts} products, ${priceUpdates} prices ===`);
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
scrapeBlueOptics()
|
||||
.then(() => pool.end())
|
||||
.catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); });
|
||||
}
|
||||
@ -1,260 +0,0 @@
|
||||
/**
|
||||
* ShopFiber24 Scraper — German compatible transceiver shop
|
||||
*
|
||||
* shop.fiber24.net — EUR prices, FO transceiver category.
|
||||
* Paginated catalog: /FO-TRANSCEIVER/de?p=N
|
||||
*
|
||||
* Rate limited: 1 req/2sec.
|
||||
*/
|
||||
import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db";
|
||||
import { contentHash } from "../utils/hash";
|
||||
import * as cheerio from "cheerio";
|
||||
|
||||
const BASE = "https://shop.fiber24.net";
|
||||
const CATALOG_PATH = "/FO-TRANSCEIVER/de";
|
||||
const MAX_PAGES = 20;
|
||||
const HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
|
||||
Accept: "text/html,application/xhtml+xml",
|
||||
"Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
|
||||
};
|
||||
|
||||
interface Product {
|
||||
partNumber: string;
|
||||
name: string;
|
||||
url: string;
|
||||
price?: number;
|
||||
formFactor: string;
|
||||
speed: string;
|
||||
speedGbps: number;
|
||||
reachLabel?: string;
|
||||
reachMeters?: number;
|
||||
fiberType?: string;
|
||||
wavelength?: string;
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function detectFormFactor(text: string): { formFactor: string; speed: string; speedGbps: number } {
|
||||
const lower = text.toLowerCase();
|
||||
if (lower.includes("osfp") && !lower.includes("qsfp")) return { formFactor: "OSFP", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("qsfp-dd")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("qsfp28")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
|
||||
if (lower.includes("qsfp+") || lower.includes("qsfp plus")) return { formFactor: "QSFP+", speed: "40G", speedGbps: 40 };
|
||||
if (lower.includes("sfp56")) return { formFactor: "SFP56", speed: "50G", speedGbps: 50 };
|
||||
if (lower.includes("sfp28") || lower.includes("25g")) return { formFactor: "SFP28", speed: "25G", speedGbps: 25 };
|
||||
if (lower.includes("sfp+") || lower.includes("10gbase") || lower.includes("10g")) return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
||||
if (lower.includes("xfp")) return { formFactor: "XFP", speed: "10G", speedGbps: 10 };
|
||||
if (lower.includes("1000base") || lower.includes("1g")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||
if (lower.includes("sfp") && !lower.includes("qsfp")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||
return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
||||
}
|
||||
|
||||
function detectReach(text: string): { label: string; meters: number } | undefined {
|
||||
const patterns: [RegExp, string, number][] = [
|
||||
[/\b80\s*km\b/i, "80km", 80000],
|
||||
[/\b40\s*km\b/i, "40km", 40000],
|
||||
[/\b20\s*km\b/i, "20km", 20000],
|
||||
[/\b10\s*km\b/i, "10km", 10000],
|
||||
[/\b2\s*km\b/i, "2km", 2000],
|
||||
[/\b550\s*m\b/i, "550m", 550],
|
||||
[/\b300\s*m\b/i, "300m", 300],
|
||||
[/\b100\s*m\b/i, "100m", 100],
|
||||
[/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000],
|
||||
[/\bER4?\b/, "40km", 40000], [/\bZR4?\b/, "80km", 80000],
|
||||
[/\bSR4?\b/, "300m", 300], [/\bDR4?\b/, "500m", 500], [/\bFR4?\b/, "2km", 2000],
|
||||
];
|
||||
for (const [regex, label, meters] of patterns) {
|
||||
if (regex.test(text)) return { label, meters };
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function detectFiber(text: string): string {
|
||||
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
|
||||
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
|
||||
if (/copper|dac|twinax|rj45|base-t/i.test(text)) return "Copper";
|
||||
return "SMF";
|
||||
}
|
||||
|
||||
function detectWavelength(text: string): string {
|
||||
const match = text.match(/(\d{3,4})\s*nm/i);
|
||||
return match ? match[1] : "";
|
||||
}
|
||||
|
||||
function parseProductList(html: string): Product[] {
|
||||
const $ = cheerio.load(html);
|
||||
const products: Product[] = [];
|
||||
|
||||
// Common e-commerce product listing selectors
|
||||
const cardSelectors = [
|
||||
".product-item", ".item", "li.product", ".product-card",
|
||||
".woocommerce-loop-product", "article.product", ".grid-item",
|
||||
];
|
||||
|
||||
let found = false;
|
||||
for (const sel of cardSelectors) {
|
||||
if ($(sel).length > 0) {
|
||||
$(sel).each((_i, el) => {
|
||||
const nameEl = $(el).find("h2, h3, .product-name, .product-title, .item-title, a").first();
|
||||
const name = nameEl.text().trim();
|
||||
if (!name || name.length < 5 || !/sfp|qsfp|xfp|transceiver|optic/i.test(name)) return;
|
||||
|
||||
const linkEl = $(el).find("a[href]").first();
|
||||
const href = linkEl.attr("href") || "";
|
||||
const url = href.startsWith("http") ? href : BASE + href;
|
||||
|
||||
const priceText = $(el).find(".price, .product-price, .price-box, .amount, [data-price]").text();
|
||||
const priceMatch = priceText.match(/([\d.,]+)\s*€|€\s*([\d.,]+)/);
|
||||
let price: number | undefined;
|
||||
if (priceMatch) {
|
||||
const raw = (priceMatch[1] || priceMatch[2]).replace(/\./g, "").replace(",", ".");
|
||||
const parsed = parseFloat(raw);
|
||||
if (parsed > 0 && parsed < 50000) price = parsed;
|
||||
}
|
||||
|
||||
const skuText = $(el).find(".sku, [data-sku], .product-sku").text().trim();
|
||||
const partNumber = skuText ||
|
||||
name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] ||
|
||||
name.replace(/\s+/g, "-").slice(0, 60);
|
||||
|
||||
const ff = detectFormFactor(name);
|
||||
const reach = detectReach(name);
|
||||
|
||||
products.push({
|
||||
partNumber,
|
||||
name,
|
||||
url,
|
||||
price,
|
||||
...ff,
|
||||
reachLabel: reach?.label,
|
||||
reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(name),
|
||||
wavelength: detectWavelength(name),
|
||||
});
|
||||
});
|
||||
if (products.length > 0) { found = true; break; }
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: extract all transceiver-relevant links
|
||||
if (!found) {
|
||||
$("a[href]").each((_i, el) => {
|
||||
const name = $(el).text().trim();
|
||||
const href = $(el).attr("href") || "";
|
||||
if (name.length < 8 || name.length > 200 || !/sfp|qsfp|transceiver/i.test(name)) return;
|
||||
const url = href.startsWith("http") ? href : BASE + href;
|
||||
const ff = detectFormFactor(name);
|
||||
const reach = detectReach(name);
|
||||
products.push({
|
||||
partNumber: name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] || name.replace(/\s+/g, "-").slice(0, 60),
|
||||
name, url, ...ff,
|
||||
reachLabel: reach?.label, reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(name), wavelength: detectWavelength(name),
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
const seen = new Set<string>();
|
||||
return products.filter((p) => {
|
||||
if (!p.url || seen.has(p.url)) return false;
|
||||
seen.add(p.url);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchPage(url: string): Promise<string> {
|
||||
const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) });
|
||||
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
|
||||
return resp.text();
|
||||
}
|
||||
|
||||
export async function scrapeFiber24(): Promise<void> {
|
||||
console.log("=== ShopFiber24 Scraper Starting ===\n");
|
||||
|
||||
const vendorId = await ensureVendor(
|
||||
"ShopFiber24",
|
||||
"compatible",
|
||||
"https://shop.fiber24.net",
|
||||
BASE + CATALOG_PATH,
|
||||
);
|
||||
|
||||
let allProducts: Product[] = [];
|
||||
|
||||
for (let page = 1; page <= MAX_PAGES; page++) {
|
||||
try {
|
||||
// Try common pagination patterns: ?p=N, ?page=N, /page/N/
|
||||
const url = page === 1
|
||||
? BASE + CATALOG_PATH
|
||||
: `${BASE}${CATALOG_PATH}?p=${page}`;
|
||||
const html = await fetchPage(url);
|
||||
const pageProducts = parseProductList(html);
|
||||
allProducts.push(...pageProducts);
|
||||
console.log(` Page ${page}: ${pageProducts.length} products`);
|
||||
if (pageProducts.length === 0) {
|
||||
console.log(` Empty page ${page}, stopping pagination.`);
|
||||
break;
|
||||
}
|
||||
if (page < MAX_PAGES) await sleep(2000);
|
||||
} catch (err) {
|
||||
console.warn(` Page ${page} failed: ${(err as Error).message}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const seen = new Set<string>();
|
||||
allProducts = allProducts.filter((p) => {
|
||||
if (seen.has(p.url)) return false;
|
||||
seen.add(p.url);
|
||||
return true;
|
||||
});
|
||||
|
||||
console.log(`\nTotal unique products: ${allProducts.length}`);
|
||||
|
||||
let totalProducts = 0;
|
||||
let priceUpdates = 0;
|
||||
|
||||
for (const product of allProducts) {
|
||||
try {
|
||||
const txId = await findOrCreateScrapedTransceiver({
|
||||
partNumber: product.partNumber,
|
||||
vendorId,
|
||||
formFactor: product.formFactor,
|
||||
speedGbps: product.speedGbps,
|
||||
speed: product.speed,
|
||||
reachMeters: product.reachMeters,
|
||||
reachLabel: product.reachLabel,
|
||||
fiberType: product.fiberType,
|
||||
wavelengths: product.wavelength,
|
||||
category: "DataCenter",
|
||||
});
|
||||
|
||||
if (product.price && product.price > 0) {
|
||||
const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber }));
|
||||
const updated = await upsertPriceObservation({
|
||||
transceiverId: txId,
|
||||
sourceVendorId: vendorId,
|
||||
price: product.price,
|
||||
currency: "EUR",
|
||||
stockLevel: "in_stock",
|
||||
url: product.url,
|
||||
contentHash: hash,
|
||||
});
|
||||
if (updated) priceUpdates++;
|
||||
}
|
||||
totalProducts++;
|
||||
} catch (err) {
|
||||
console.warn(` Error saving ${product.partNumber}: ${(err as Error).message.slice(0, 80)}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n=== ShopFiber24 Complete: ${totalProducts} products, ${priceUpdates} prices ===`);
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
scrapeFiber24()
|
||||
.then(() => pool.end())
|
||||
.catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); });
|
||||
}
|
||||
@ -1,263 +0,0 @@
|
||||
/**
|
||||
* GAO Tek Scraper — Canadian compatible transceiver distributor
|
||||
*
|
||||
* gaotek.com — large catalog with USD prices, WooCommerce-based.
|
||||
* Category: /category/fiber-optics/transceivers/
|
||||
*
|
||||
* Rate limited: 1 req/2sec.
|
||||
*/
|
||||
import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db";
|
||||
import { contentHash } from "../utils/hash";
|
||||
import * as cheerio from "cheerio";
|
||||
|
||||
const BASE = "https://gaotek.com";
|
||||
const CATALOG_PATH = "/category/fiber-optics/transceivers/";
|
||||
const MAX_PAGES = 20;
|
||||
const HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
|
||||
Accept: "text/html,application/xhtml+xml",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
};
|
||||
|
||||
interface Product {
|
||||
partNumber: string;
|
||||
name: string;
|
||||
url: string;
|
||||
price?: number;
|
||||
formFactor: string;
|
||||
speed: string;
|
||||
speedGbps: number;
|
||||
reachLabel?: string;
|
||||
reachMeters?: number;
|
||||
fiberType?: string;
|
||||
wavelength?: string;
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function detectFormFactor(text: string): { formFactor: string; speed: string; speedGbps: number } {
|
||||
const lower = text.toLowerCase();
|
||||
if (lower.includes("osfp") && !lower.includes("qsfp")) return { formFactor: "OSFP", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("qsfp-dd")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("qsfp28")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
|
||||
if (lower.includes("qsfp+") || lower.includes("qsfp plus")) return { formFactor: "QSFP+", speed: "40G", speedGbps: 40 };
|
||||
if (lower.includes("sfp56")) return { formFactor: "SFP56", speed: "50G", speedGbps: 50 };
|
||||
if (lower.includes("sfp28") || lower.includes("25g")) return { formFactor: "SFP28", speed: "25G", speedGbps: 25 };
|
||||
if (lower.includes("sfp+") || lower.includes("10gbase") || lower.includes("10g")) return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
||||
if (lower.includes("xfp")) return { formFactor: "XFP", speed: "10G", speedGbps: 10 };
|
||||
if (lower.includes("1000base") || lower.includes("1g")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||
if (lower.includes("sfp") && !lower.includes("qsfp")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||
return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
||||
}
|
||||
|
||||
function detectReach(text: string): { label: string; meters: number } | undefined {
|
||||
const patterns: [RegExp, string, number][] = [
|
||||
[/\b80\s*km\b/i, "80km", 80000],
|
||||
[/\b40\s*km\b/i, "40km", 40000],
|
||||
[/\b20\s*km\b/i, "20km", 20000],
|
||||
[/\b10\s*km\b/i, "10km", 10000],
|
||||
[/\b2\s*km\b/i, "2km", 2000],
|
||||
[/\b550\s*m\b/i, "550m", 550],
|
||||
[/\b300\s*m\b/i, "300m", 300],
|
||||
[/\b100\s*m\b/i, "100m", 100],
|
||||
[/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000],
|
||||
[/\bER4?\b/, "40km", 40000], [/\bZR4?\b/, "80km", 80000],
|
||||
[/\bSR4?\b/, "300m", 300], [/\bDR4?\b/, "500m", 500], [/\bFR4?\b/, "2km", 2000],
|
||||
];
|
||||
for (const [regex, label, meters] of patterns) {
|
||||
if (regex.test(text)) return { label, meters };
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function detectFiber(text: string): string {
|
||||
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
|
||||
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
|
||||
if (/copper|dac|twinax|rj45|base-t/i.test(text)) return "Copper";
|
||||
return "SMF";
|
||||
}
|
||||
|
||||
function detectWavelength(text: string): string {
|
||||
const match = text.match(/(\d{3,4})\s*nm/i);
|
||||
return match ? match[1] : "";
|
||||
}
|
||||
|
||||
function parseProductList(html: string): Product[] {
|
||||
const $ = cheerio.load(html);
|
||||
const products: Product[] = [];
|
||||
|
||||
// WooCommerce product grid
|
||||
$("li.product, .product-item, .woocommerce-loop-product, article.product").each((_i, el) => {
|
||||
const titleEl = $(el).find(".woocommerce-loop-product__title, h2, h3, .product-title, .product-name").first();
|
||||
const name = titleEl.text().trim();
|
||||
if (!name || name.length < 5) return;
|
||||
|
||||
const linkEl = $(el).find("a[href]").first();
|
||||
const href = linkEl.attr("href") || "";
|
||||
const url = href.startsWith("http") ? href : BASE + href;
|
||||
|
||||
// WooCommerce price
|
||||
const priceText = $(el).find(".price, .woocommerce-Price-amount, .amount").text();
|
||||
const priceMatch = priceText.match(/\$\s*([\d,]+\.?\d{0,2})/);
|
||||
let price: number | undefined;
|
||||
if (priceMatch) {
|
||||
const parsed = parseFloat(priceMatch[1].replace(",", ""));
|
||||
if (parsed > 0 && parsed < 50000) price = parsed;
|
||||
}
|
||||
|
||||
// GAO Tek uses SKU for part numbers
|
||||
const skuEl = $(el).find(".sku, [data-sku]");
|
||||
const partNumber = skuEl.text().trim() ||
|
||||
url.split("/").filter(Boolean).pop()?.replace(/-/g, " ").trim() ||
|
||||
name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] ||
|
||||
name.replace(/\s+/g, "-").slice(0, 60);
|
||||
|
||||
const ff = detectFormFactor(name);
|
||||
const reach = detectReach(name);
|
||||
|
||||
products.push({
|
||||
partNumber,
|
||||
name,
|
||||
url,
|
||||
price,
|
||||
...ff,
|
||||
reachLabel: reach?.label,
|
||||
reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(name),
|
||||
wavelength: detectWavelength(name),
|
||||
});
|
||||
});
|
||||
|
||||
// Fallback for non-WooCommerce layout
|
||||
if (products.length === 0) {
|
||||
$("a[href]").each((_i, el) => {
|
||||
const name = $(el).text().trim();
|
||||
const href = $(el).attr("href") || "";
|
||||
if (
|
||||
name.length < 8 || name.length > 200 ||
|
||||
!/sfp|qsfp|xfp|transceiver|optic/i.test(name)
|
||||
) return;
|
||||
|
||||
const url = href.startsWith("http") ? href : BASE + href;
|
||||
const context = $(el).parent().parent().text();
|
||||
const priceMatch = context.match(/\$\s*([\d,]+\.?\d{0,2})/);
|
||||
let price: number | undefined;
|
||||
if (priceMatch) {
|
||||
const parsed = parseFloat(priceMatch[1].replace(",", ""));
|
||||
if (parsed > 0 && parsed < 50000) price = parsed;
|
||||
}
|
||||
const ff = detectFormFactor(name);
|
||||
const reach = detectReach(name);
|
||||
products.push({
|
||||
partNumber: name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] || name.replace(/\s+/g, "-").slice(0, 60),
|
||||
name, url, price, ...ff,
|
||||
reachLabel: reach?.label, reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(name), wavelength: detectWavelength(name),
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
const seen = new Set<string>();
|
||||
return products.filter((p) => {
|
||||
if (!p.url || seen.has(p.url)) return false;
|
||||
seen.add(p.url);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchPage(url: string): Promise<string> {
|
||||
const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) });
|
||||
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
|
||||
return resp.text();
|
||||
}
|
||||
|
||||
export async function scrapeGaoTek(): Promise<void> {
|
||||
console.log("=== GAO Tek Scraper Starting ===\n");
|
||||
|
||||
const vendorId = await ensureVendor(
|
||||
"GAO Tek",
|
||||
"compatible",
|
||||
"https://gaotek.com",
|
||||
BASE + CATALOG_PATH,
|
||||
);
|
||||
|
||||
let allProducts: Product[] = [];
|
||||
|
||||
for (let page = 1; page <= MAX_PAGES; page++) {
|
||||
try {
|
||||
// WooCommerce pagination: /page/N/ suffix
|
||||
const url = page === 1
|
||||
? BASE + CATALOG_PATH
|
||||
: `${BASE}${CATALOG_PATH}page/${page}/`;
|
||||
const html = await fetchPage(url);
|
||||
const pageProducts = parseProductList(html);
|
||||
allProducts.push(...pageProducts);
|
||||
console.log(` Page ${page}: ${pageProducts.length} products`);
|
||||
if (pageProducts.length === 0) {
|
||||
console.log(` Empty page ${page}, stopping pagination.`);
|
||||
break;
|
||||
}
|
||||
if (page < MAX_PAGES) await sleep(2000);
|
||||
} catch (err) {
|
||||
console.warn(` Page ${page} failed: ${(err as Error).message}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Dedupe
|
||||
const seen = new Set<string>();
|
||||
allProducts = allProducts.filter((p) => {
|
||||
if (seen.has(p.url)) return false;
|
||||
seen.add(p.url);
|
||||
return true;
|
||||
});
|
||||
|
||||
console.log(`\nTotal unique products: ${allProducts.length}`);
|
||||
|
||||
let totalProducts = 0;
|
||||
let priceUpdates = 0;
|
||||
|
||||
for (const product of allProducts) {
|
||||
try {
|
||||
const txId = await findOrCreateScrapedTransceiver({
|
||||
partNumber: product.partNumber,
|
||||
vendorId,
|
||||
formFactor: product.formFactor,
|
||||
speedGbps: product.speedGbps,
|
||||
speed: product.speed,
|
||||
reachMeters: product.reachMeters,
|
||||
reachLabel: product.reachLabel,
|
||||
fiberType: product.fiberType,
|
||||
wavelengths: product.wavelength,
|
||||
category: "DataCenter",
|
||||
});
|
||||
|
||||
if (product.price && product.price > 0) {
|
||||
const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber }));
|
||||
const updated = await upsertPriceObservation({
|
||||
transceiverId: txId,
|
||||
sourceVendorId: vendorId,
|
||||
price: product.price,
|
||||
currency: "USD",
|
||||
stockLevel: "in_stock",
|
||||
url: product.url,
|
||||
contentHash: hash,
|
||||
});
|
||||
if (updated) priceUpdates++;
|
||||
}
|
||||
totalProducts++;
|
||||
} catch (err) {
|
||||
console.warn(` Error saving ${product.partNumber}: ${(err as Error).message.slice(0, 80)}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n=== GAO Tek Complete: ${totalProducts} products, ${priceUpdates} prices ===`);
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
scrapeGaoTek()
|
||||
.then(() => pool.end())
|
||||
.catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); });
|
||||
}
|
||||
@ -1,295 +0,0 @@
|
||||
/**
|
||||
* HUBER+SUHNER Scraper — Swiss transceiver manufacturer
|
||||
*
|
||||
* www.hubersuhner.com — no direct prices, catalog-only scrape.
|
||||
* Tries DE and EN product listing pages; may expose product API.
|
||||
*
|
||||
* Rate limited: 1 req/2sec.
|
||||
*/
|
||||
import { pool, findOrCreateScrapedTransceiver, ensureVendor } from "../utils/db";
|
||||
import * as cheerio from "cheerio";
|
||||
|
||||
const BASE = "https://www.hubersuhner.com";
|
||||
|
||||
// Try multiple entry points — HS has EN and DE variants
|
||||
const CATALOG_URLS = [
|
||||
"/en/transceiver",
|
||||
"/de/transceiver",
|
||||
"/en/products/fiber-optics/fiber-optic-transceivers",
|
||||
"/de/products/fiber-optics/fiber-optic-transceivers",
|
||||
"/en/products/fiber-optics",
|
||||
];
|
||||
|
||||
// Known HS product API endpoint pattern (JSON-based catalog)
|
||||
const API_URLS = [
|
||||
"/api/products?category=transceiver&format=json",
|
||||
"/api/catalog/transceiver",
|
||||
];
|
||||
|
||||
const HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
|
||||
Accept: "text/html,application/xhtml+xml,application/json",
|
||||
"Accept-Language": "en-US,en;q=0.9,de;q=0.8",
|
||||
};
|
||||
|
||||
interface Product {
|
||||
partNumber: string;
|
||||
name: string;
|
||||
url: string;
|
||||
formFactor: string;
|
||||
speed: string;
|
||||
speedGbps: number;
|
||||
reachLabel?: string;
|
||||
reachMeters?: number;
|
||||
fiberType?: string;
|
||||
wavelength?: string;
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function detectFormFactor(text: string): { formFactor: string; speed: string; speedGbps: number } {
|
||||
const lower = text.toLowerCase();
|
||||
if (lower.includes("osfp") && !lower.includes("qsfp")) return { formFactor: "OSFP", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("qsfp-dd")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("qsfp28")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
|
||||
if (lower.includes("qsfp+") || lower.includes("qsfp plus")) return { formFactor: "QSFP+", speed: "40G", speedGbps: 40 };
|
||||
if (lower.includes("sfp56")) return { formFactor: "SFP56", speed: "50G", speedGbps: 50 };
|
||||
if (lower.includes("sfp28") || lower.includes("25g")) return { formFactor: "SFP28", speed: "25G", speedGbps: 25 };
|
||||
if (lower.includes("sfp+") || lower.includes("10gbase") || lower.includes("10g")) return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
||||
if (lower.includes("xfp")) return { formFactor: "XFP", speed: "10G", speedGbps: 10 };
|
||||
if (lower.includes("1000base") || lower.includes("1g")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||
if (lower.includes("sfp") && !lower.includes("qsfp")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||
return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
||||
}
|
||||
|
||||
function detectReach(text: string): { label: string; meters: number } | undefined {
|
||||
const patterns: [RegExp, string, number][] = [
|
||||
[/\b80\s*km\b/i, "80km", 80000],
|
||||
[/\b40\s*km\b/i, "40km", 40000],
|
||||
[/\b20\s*km\b/i, "20km", 20000],
|
||||
[/\b10\s*km\b/i, "10km", 10000],
|
||||
[/\b2\s*km\b/i, "2km", 2000],
|
||||
[/\b550\s*m\b/i, "550m", 550],
|
||||
[/\b300\s*m\b/i, "300m", 300],
|
||||
[/\b100\s*m\b/i, "100m", 100],
|
||||
[/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000],
|
||||
[/\bER4?\b/, "40km", 40000], [/\bZR4?\b/, "80km", 80000],
|
||||
[/\bSR4?\b/, "300m", 300], [/\bDR4?\b/, "500m", 500], [/\bFR4?\b/, "2km", 2000],
|
||||
];
|
||||
for (const [regex, label, meters] of patterns) {
|
||||
if (regex.test(text)) return { label, meters };
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function detectFiber(text: string): string {
|
||||
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
|
||||
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
|
||||
if (/copper|dac|twinax|rj45|base-t/i.test(text)) return "Copper";
|
||||
return "SMF";
|
||||
}
|
||||
|
||||
function detectWavelength(text: string): string {
|
||||
const match = text.match(/(\d{3,4})\s*nm/i);
|
||||
return match ? match[1] : "";
|
||||
}
|
||||
|
||||
function parseHtmlProducts(html: string, sourceUrl: string): Product[] {
|
||||
const $ = cheerio.load(html);
|
||||
const products: Product[] = [];
|
||||
|
||||
// HS uses a custom product portal — try multiple selectors
|
||||
const cardSelectors = [
|
||||
".product-item", ".product-tile", ".product-card", ".product",
|
||||
"tr[data-part]", "tr", ".item", ".result-item", ".catalog-item",
|
||||
".product-list-item", "li[data-id]",
|
||||
];
|
||||
|
||||
for (const sel of cardSelectors) {
|
||||
if ($(sel).length >= 2) {
|
||||
$(sel).each((_i, el) => {
|
||||
const text = $(el).text().trim();
|
||||
if (!/sfp|qsfp|xfp|transceiver|optic|fiber/i.test(text)) return;
|
||||
|
||||
const nameEl = $(el).find("h2, h3, h4, .name, .title, .product-name, a").first();
|
||||
const name = nameEl.text().trim() || text.slice(0, 100);
|
||||
if (!name || name.length < 5) return;
|
||||
|
||||
const linkEl = $(el).find("a[href]").first();
|
||||
const href = linkEl.attr("href") || sourceUrl;
|
||||
const url = href.startsWith("http") ? href : BASE + href;
|
||||
|
||||
// HUBER+SUHNER part numbers often follow: 85XXXXXXX or similar numeric patterns
|
||||
const partNumMatch = text.match(/\b(85\d{7,})\b/) ||
|
||||
text.match(/\b([A-Z]{2,}[-_][A-Z0-9]+[-_][A-Z0-9]+)\b/) ||
|
||||
name.match(/[A-Z0-9][-A-Z0-9]{5,}/);
|
||||
const partNumber = partNumMatch?.[0] || name.replace(/\s+/g, "-").slice(0, 60);
|
||||
|
||||
const ff = detectFormFactor(name + " " + text);
|
||||
const reach = detectReach(name + " " + text);
|
||||
|
||||
products.push({
|
||||
partNumber,
|
||||
name,
|
||||
url,
|
||||
...ff,
|
||||
reachLabel: reach?.label,
|
||||
reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(name + " " + text),
|
||||
wavelength: detectWavelength(name + " " + text),
|
||||
});
|
||||
});
|
||||
if (products.length > 0) break;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: all product-relevant links
|
||||
if (products.length === 0) {
|
||||
$("a[href]").each((_i, el) => {
|
||||
const name = $(el).text().trim();
|
||||
const href = $(el).attr("href") || "";
|
||||
if (name.length < 5 || name.length > 200 || !/sfp|qsfp|transceiver|optic/i.test(name)) return;
|
||||
const url = href.startsWith("http") ? href : BASE + href;
|
||||
const ff = detectFormFactor(name);
|
||||
const reach = detectReach(name);
|
||||
products.push({
|
||||
partNumber: name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] || name.replace(/\s+/g, "-").slice(0, 60),
|
||||
name, url, ...ff,
|
||||
reachLabel: reach?.label, reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(name), wavelength: detectWavelength(name),
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
const seen = new Set<string>();
|
||||
return products.filter((p) => {
|
||||
if (!p.url || seen.has(p.url)) return false;
|
||||
seen.add(p.url);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
// Try JSON API endpoint — some HS portals expose catalog APIs
|
||||
function parseJsonProducts(json: unknown): Product[] {
|
||||
const products: Product[] = [];
|
||||
const items = Array.isArray(json)
|
||||
? json
|
||||
: (json as Record<string, unknown>)?.products || (json as Record<string, unknown>)?.items || [];
|
||||
if (!Array.isArray(items)) return products;
|
||||
|
||||
for (const item of items) {
|
||||
const name = String((item as Record<string, unknown>).name || (item as Record<string, unknown>).title || "");
|
||||
if (!name || !/sfp|qsfp|transceiver/i.test(name)) continue;
|
||||
const partNumber = String(
|
||||
(item as Record<string, unknown>).partNumber ||
|
||||
(item as Record<string, unknown>).sku ||
|
||||
(item as Record<string, unknown>).id ||
|
||||
name.replace(/\s+/g, "-").slice(0, 60)
|
||||
);
|
||||
const url = String((item as Record<string, unknown>).url || BASE);
|
||||
const ff = detectFormFactor(name);
|
||||
const reach = detectReach(name);
|
||||
products.push({
|
||||
partNumber, name,
|
||||
url: url.startsWith("http") ? url : BASE + url,
|
||||
...ff,
|
||||
reachLabel: reach?.label, reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(name), wavelength: detectWavelength(name),
|
||||
});
|
||||
}
|
||||
return products;
|
||||
}
|
||||
|
||||
async function fetchPage(url: string): Promise<string> {
|
||||
const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) });
|
||||
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
|
||||
return resp.text();
|
||||
}
|
||||
|
||||
export async function scrapeHuberSuhner(): Promise<void> {
|
||||
console.log("=== HUBER+SUHNER Scraper Starting (catalog-only, no prices) ===\n");
|
||||
|
||||
const vendorId = await ensureVendor(
|
||||
"HUBER+SUHNER",
|
||||
"manufacturer",
|
||||
"https://www.hubersuhner.com",
|
||||
BASE + CATALOG_URLS[0],
|
||||
);
|
||||
|
||||
const allProducts: Product[] = [];
|
||||
const seenUrls = new Set<string>();
|
||||
|
||||
// Try JSON API endpoints first
|
||||
for (const apiPath of API_URLS) {
|
||||
const apiUrl = BASE + apiPath;
|
||||
console.log(` Trying API: ${apiUrl}`);
|
||||
try {
|
||||
const resp = await fetch(apiUrl, {
|
||||
headers: { ...HEADERS, Accept: "application/json" },
|
||||
signal: AbortSignal.timeout(15000),
|
||||
});
|
||||
if (resp.ok) {
|
||||
const json: unknown = await resp.json();
|
||||
const parsed = parseJsonProducts(json);
|
||||
console.log(` API returned ${parsed.length} products`);
|
||||
for (const p of parsed) {
|
||||
if (!seenUrls.has(p.url)) { seenUrls.add(p.url); allProducts.push(p); }
|
||||
}
|
||||
}
|
||||
await sleep(2000);
|
||||
} catch {
|
||||
// API endpoint not available — continue to HTML scraping
|
||||
}
|
||||
}
|
||||
|
||||
// HTML catalog fallback
|
||||
for (const catalogPath of CATALOG_URLS) {
|
||||
const catalogUrl = BASE + catalogPath;
|
||||
console.log(` Fetching catalog: ${catalogUrl}`);
|
||||
try {
|
||||
const html = await fetchPage(catalogUrl);
|
||||
const pageProducts = parseHtmlProducts(html, catalogUrl);
|
||||
for (const p of pageProducts) {
|
||||
if (!seenUrls.has(p.url)) { seenUrls.add(p.url); allProducts.push(p); }
|
||||
}
|
||||
console.log(` Found ${pageProducts.length} products`);
|
||||
await sleep(2000);
|
||||
} catch (err) {
|
||||
console.warn(` Failed ${catalogPath}: ${(err as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\nTotal unique products: ${allProducts.length}`);
|
||||
|
||||
let totalProducts = 0;
|
||||
|
||||
for (const product of allProducts) {
|
||||
try {
|
||||
await findOrCreateScrapedTransceiver({
|
||||
partNumber: product.partNumber,
|
||||
vendorId,
|
||||
formFactor: product.formFactor,
|
||||
speedGbps: product.speedGbps,
|
||||
speed: product.speed,
|
||||
reachMeters: product.reachMeters,
|
||||
reachLabel: product.reachLabel,
|
||||
fiberType: product.fiberType,
|
||||
wavelengths: product.wavelength,
|
||||
category: "DataCenter",
|
||||
});
|
||||
totalProducts++;
|
||||
} catch (err) {
|
||||
console.warn(` Error saving ${product.partNumber}: ${(err as Error).message.slice(0, 80)}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n=== HUBER+SUHNER Complete: ${totalProducts} products catalogued (no prices — contact sales) ===`);
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
scrapeHuberSuhner()
|
||||
.then(() => pool.end())
|
||||
.catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); });
|
||||
}
|
||||
@ -1,307 +0,0 @@
|
||||
/**
|
||||
* Skylane Optics Scraper — Belgian compatible transceiver vendor
|
||||
*
|
||||
* www.skylaneoptics.com — product catalog with form factor listings.
|
||||
* No direct webshop prices; product catalog only.
|
||||
*
|
||||
* Rate limited: 1 req/2sec.
|
||||
*/
|
||||
import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db";
|
||||
import { contentHash } from "../utils/hash";
|
||||
import * as cheerio from "cheerio";
|
||||
|
||||
const BASE = "https://www.skylaneoptics.com";
|
||||
const CATALOG_URLS = [
|
||||
"/en/products/",
|
||||
"/en/",
|
||||
"/en/products/sfp/",
|
||||
"/en/products/sfp28/",
|
||||
"/en/products/qsfp28/",
|
||||
"/en/products/qsfp-dd/",
|
||||
];
|
||||
const MAX_PAGES = 10;
|
||||
const HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
|
||||
Accept: "text/html,application/xhtml+xml",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
};
|
||||
|
||||
interface Product {
|
||||
partNumber: string;
|
||||
name: string;
|
||||
url: string;
|
||||
price?: number;
|
||||
formFactor: string;
|
||||
speed: string;
|
||||
speedGbps: number;
|
||||
reachLabel?: string;
|
||||
reachMeters?: number;
|
||||
fiberType?: string;
|
||||
wavelength?: string;
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function detectFormFactor(text: string): { formFactor: string; speed: string; speedGbps: number } {
|
||||
const lower = text.toLowerCase();
|
||||
if (lower.includes("osfp") && !lower.includes("qsfp")) return { formFactor: "OSFP", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("qsfp-dd")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("qsfp28")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
|
||||
if (lower.includes("qsfp+") || lower.includes("qsfp plus")) return { formFactor: "QSFP+", speed: "40G", speedGbps: 40 };
|
||||
if (lower.includes("sfp56")) return { formFactor: "SFP56", speed: "50G", speedGbps: 50 };
|
||||
if (lower.includes("sfp28") || lower.includes("25g")) return { formFactor: "SFP28", speed: "25G", speedGbps: 25 };
|
||||
if (lower.includes("sfp+") || lower.includes("10gbase") || lower.includes("10g")) return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
||||
if (lower.includes("xfp")) return { formFactor: "XFP", speed: "10G", speedGbps: 10 };
|
||||
if (lower.includes("1000base") || lower.includes("1g")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||
if (lower.includes("sfp") && !lower.includes("qsfp")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||
return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
||||
}
|
||||
|
||||
function detectReach(text: string): { label: string; meters: number } | undefined {
|
||||
const patterns: [RegExp, string, number][] = [
|
||||
[/\b80\s*km\b/i, "80km", 80000],
|
||||
[/\b40\s*km\b/i, "40km", 40000],
|
||||
[/\b20\s*km\b/i, "20km", 20000],
|
||||
[/\b10\s*km\b/i, "10km", 10000],
|
||||
[/\b2\s*km\b/i, "2km", 2000],
|
||||
[/\b550\s*m\b/i, "550m", 550],
|
||||
[/\b300\s*m\b/i, "300m", 300],
|
||||
[/\b100\s*m\b/i, "100m", 100],
|
||||
[/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000],
|
||||
[/\bER4?\b/, "40km", 40000], [/\bZR4?\b/, "80km", 80000],
|
||||
[/\bSR4?\b/, "300m", 300], [/\bDR4?\b/, "500m", 500], [/\bFR4?\b/, "2km", 2000],
|
||||
];
|
||||
for (const [regex, label, meters] of patterns) {
|
||||
if (regex.test(text)) return { label, meters };
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function detectFiber(text: string): string {
|
||||
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
|
||||
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
|
||||
if (/copper|dac|twinax|rj45|base-t/i.test(text)) return "Copper";
|
||||
return "SMF";
|
||||
}
|
||||
|
||||
function detectWavelength(text: string): string {
|
||||
const match = text.match(/(\d{3,4})\s*nm/i);
|
||||
return match ? match[1] : "";
|
||||
}
|
||||
|
||||
function parseProductList(html: string, sourceUrl: string): Product[] {
|
||||
const $ = cheerio.load(html);
|
||||
const products: Product[] = [];
|
||||
|
||||
const cardSelectors = [
|
||||
".product-item", ".product", ".item", "li.product",
|
||||
".product-card", "article", ".product-list-item", ".grid-item",
|
||||
];
|
||||
|
||||
for (const sel of cardSelectors) {
|
||||
if ($(sel).length >= 2) {
|
||||
$(sel).each((_i, el) => {
|
||||
const text = $(el).text().trim();
|
||||
if (!/sfp|qsfp|xfp|transceiver|optic/i.test(text)) return;
|
||||
|
||||
const nameEl = $(el).find("h2, h3, h4, .name, .product-name, .title, a").first();
|
||||
const name = nameEl.text().trim() || text.slice(0, 100);
|
||||
if (!name || name.length < 5) return;
|
||||
|
||||
const linkEl = $(el).find("a[href]").first();
|
||||
const href = linkEl.attr("href") || sourceUrl;
|
||||
const url = href.startsWith("http") ? href : BASE + href;
|
||||
|
||||
// Skylane part numbers: e.g. SO-SFP+-LRM-CI, SFP-10G-SR-SO
|
||||
const partNumMatch = name.match(/\b(SO[-_][A-Z0-9-]+)\b/i) ||
|
||||
name.match(/\b([A-Z]{2,}[-][A-Z0-9]+[-][A-Z0-9]+)\b/) ||
|
||||
text.match(/Part\s*[#:]?\s*([A-Z0-9-]{5,})/i);
|
||||
const partNumber = partNumMatch?.[1] ||
|
||||
name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] ||
|
||||
name.replace(/\s+/g, "-").slice(0, 60);
|
||||
|
||||
// Check for price
|
||||
const priceText = $(el).find(".price, .product-price, .amount").text();
|
||||
const priceMatch = priceText.match(/\$\s*([\d,]+\.?\d{0,2})/) ||
|
||||
priceText.match(/([\d.,]+)\s*€/);
|
||||
let price: number | undefined;
|
||||
if (priceMatch) {
|
||||
const raw = priceMatch[1].replace(/[.,](\d{2})$/, ".$1").replace(/[.,]/g, "");
|
||||
const parsed = parseFloat(raw);
|
||||
if (parsed > 0 && parsed < 50000) price = parsed;
|
||||
}
|
||||
|
||||
const ff = detectFormFactor(name + " " + text);
|
||||
const reach = detectReach(name + " " + text);
|
||||
|
||||
products.push({
|
||||
partNumber,
|
||||
name,
|
||||
url,
|
||||
price,
|
||||
...ff,
|
||||
reachLabel: reach?.label,
|
||||
reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(name + " " + text),
|
||||
wavelength: detectWavelength(name + " " + text),
|
||||
});
|
||||
});
|
||||
if (products.length > 0) break;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: transceiver-relevant anchors
|
||||
if (products.length === 0) {
|
||||
$("a[href]").each((_i, el) => {
|
||||
const name = $(el).text().trim();
|
||||
const href = $(el).attr("href") || "";
|
||||
if (name.length < 8 || name.length > 200 || !/sfp|qsfp|transceiver/i.test(name)) return;
|
||||
const url = href.startsWith("http") ? href : BASE + href;
|
||||
const ff = detectFormFactor(name);
|
||||
const reach = detectReach(name);
|
||||
products.push({
|
||||
partNumber: name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] || name.replace(/\s+/g, "-").slice(0, 60),
|
||||
name, url, ...ff,
|
||||
reachLabel: reach?.label, reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(name), wavelength: detectWavelength(name),
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
const seen = new Set<string>();
|
||||
return products.filter((p) => {
|
||||
if (!p.url || seen.has(p.url)) return false;
|
||||
seen.add(p.url);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
// Discover sub-category links from a catalog page
|
||||
function extractCategoryLinks(html: string): string[] {
|
||||
const $ = cheerio.load(html);
|
||||
const links: string[] = [];
|
||||
$("a[href]").each((_i, el) => {
|
||||
const href = $(el).attr("href") || "";
|
||||
if (/\/products?\//i.test(href) && /sfp|qsfp|transceiver|optic/i.test(href)) {
|
||||
links.push(href.startsWith("http") ? href : BASE + href);
|
||||
}
|
||||
});
|
||||
return [...new Set(links)];
|
||||
}
|
||||
|
||||
async function fetchPage(url: string): Promise<string> {
|
||||
const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) });
|
||||
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
|
||||
return resp.text();
|
||||
}
|
||||
|
||||
export async function scrapeSkylane(): Promise<void> {
|
||||
console.log("=== Skylane Optics Scraper Starting ===\n");
|
||||
|
||||
const vendorId = await ensureVendor(
|
||||
"Skylane Optics",
|
||||
"compatible",
|
||||
"https://www.skylaneoptics.com",
|
||||
BASE + CATALOG_URLS[0],
|
||||
);
|
||||
|
||||
const allProducts: Product[] = [];
|
||||
const seenUrls = new Set<string>();
|
||||
const scrapedCatalogUrls = new Set<string>();
|
||||
|
||||
// Start with known catalog URLs and discover sub-categories
|
||||
const urlQueue = [...CATALOG_URLS.map((p) => BASE + p)];
|
||||
|
||||
for (const catalogUrl of urlQueue) {
|
||||
if (scrapedCatalogUrls.has(catalogUrl)) continue;
|
||||
scrapedCatalogUrls.add(catalogUrl);
|
||||
|
||||
console.log(` Fetching: ${catalogUrl}`);
|
||||
try {
|
||||
const html = await fetchPage(catalogUrl);
|
||||
const pageProducts = parseProductList(html, catalogUrl);
|
||||
for (const p of pageProducts) {
|
||||
if (!seenUrls.has(p.url)) { seenUrls.add(p.url); allProducts.push(p); }
|
||||
}
|
||||
console.log(` Found ${pageProducts.length} products`);
|
||||
|
||||
// Discover sub-category links (limit to first catalog page)
|
||||
if (scrapedCatalogUrls.size <= CATALOG_URLS.length) {
|
||||
const newLinks = extractCategoryLinks(html).slice(0, 10);
|
||||
for (const link of newLinks) {
|
||||
if (!scrapedCatalogUrls.has(link)) urlQueue.push(link);
|
||||
}
|
||||
}
|
||||
|
||||
// Try pagination for this URL
|
||||
for (let page = 2; page <= MAX_PAGES; page++) {
|
||||
const pageUrl = `${catalogUrl}?page=${page}`;
|
||||
try {
|
||||
const pageHtml = await fetchPage(pageUrl);
|
||||
const paginated = parseProductList(pageHtml, pageUrl);
|
||||
if (paginated.length === 0) break;
|
||||
for (const p of paginated) {
|
||||
if (!seenUrls.has(p.url)) { seenUrls.add(p.url); allProducts.push(p); }
|
||||
}
|
||||
console.log(` Page ${page}: ${paginated.length} products`);
|
||||
await sleep(2000);
|
||||
} catch {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
await sleep(2000);
|
||||
} catch (err) {
|
||||
console.warn(` Failed: ${(err as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\nTotal unique products: ${allProducts.length}`);
|
||||
|
||||
let totalProducts = 0;
|
||||
let priceUpdates = 0;
|
||||
|
||||
for (const product of allProducts) {
|
||||
try {
|
||||
const txId = await findOrCreateScrapedTransceiver({
|
||||
partNumber: product.partNumber,
|
||||
vendorId,
|
||||
formFactor: product.formFactor,
|
||||
speedGbps: product.speedGbps,
|
||||
speed: product.speed,
|
||||
reachMeters: product.reachMeters,
|
||||
reachLabel: product.reachLabel,
|
||||
fiberType: product.fiberType,
|
||||
wavelengths: product.wavelength,
|
||||
category: "DataCenter",
|
||||
});
|
||||
|
||||
if (product.price && product.price > 0) {
|
||||
const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber }));
|
||||
const updated = await upsertPriceObservation({
|
||||
transceiverId: txId,
|
||||
sourceVendorId: vendorId,
|
||||
price: product.price,
|
||||
currency: "USD",
|
||||
stockLevel: "in_stock",
|
||||
url: product.url,
|
||||
contentHash: hash,
|
||||
});
|
||||
if (updated) priceUpdates++;
|
||||
}
|
||||
totalProducts++;
|
||||
} catch (err) {
|
||||
console.warn(` Error saving ${product.partNumber}: ${(err as Error).message.slice(0, 80)}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n=== Skylane Optics Complete: ${totalProducts} products, ${priceUpdates} prices ===`);
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
scrapeSkylane()
|
||||
.then(() => pool.end())
|
||||
.catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); });
|
||||
}
|
||||
@ -1,232 +0,0 @@
|
||||
/**
|
||||
* SmartOptics Scraper — Manufacturer, quote-based pricing
|
||||
*
|
||||
* www.smartoptics.com — no direct prices, catalog-only scrape.
|
||||
* Extracts product names, part numbers, form factors and specs.
|
||||
*
|
||||
* Rate limited: 1 req/2sec.
|
||||
*/
|
||||
import { pool, findOrCreateScrapedTransceiver, ensureVendor } from "../utils/db";
|
||||
import * as cheerio from "cheerio";
|
||||
|
||||
const BASE = "https://www.smartoptics.com";
|
||||
const CATALOG_URLS = [
|
||||
"/products/transceivers/",
|
||||
"/products/",
|
||||
"/products/sfp-transceivers/",
|
||||
"/products/qsfp-transceivers/",
|
||||
];
|
||||
const HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
|
||||
Accept: "text/html,application/xhtml+xml",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
};
|
||||
|
||||
interface Product {
|
||||
partNumber: string;
|
||||
name: string;
|
||||
url: string;
|
||||
formFactor: string;
|
||||
speed: string;
|
||||
speedGbps: number;
|
||||
reachLabel?: string;
|
||||
reachMeters?: number;
|
||||
fiberType?: string;
|
||||
wavelength?: string;
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function detectFormFactor(text: string): { formFactor: string; speed: string; speedGbps: number } {
|
||||
const lower = text.toLowerCase();
|
||||
if (lower.includes("osfp") && !lower.includes("qsfp")) return { formFactor: "OSFP", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("qsfp-dd")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("qsfp28")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
|
||||
if (lower.includes("qsfp+") || lower.includes("qsfp plus")) return { formFactor: "QSFP+", speed: "40G", speedGbps: 40 };
|
||||
if (lower.includes("sfp56")) return { formFactor: "SFP56", speed: "50G", speedGbps: 50 };
|
||||
if (lower.includes("sfp28") || lower.includes("25g")) return { formFactor: "SFP28", speed: "25G", speedGbps: 25 };
|
||||
if (lower.includes("sfp+") || lower.includes("10gbase") || lower.includes("10g")) return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
||||
if (lower.includes("xfp")) return { formFactor: "XFP", speed: "10G", speedGbps: 10 };
|
||||
if (lower.includes("1000base") || lower.includes("1g")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||
if (lower.includes("sfp") && !lower.includes("qsfp")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||
return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
||||
}
|
||||
|
||||
function detectReach(text: string): { label: string; meters: number } | undefined {
|
||||
const patterns: [RegExp, string, number][] = [
|
||||
[/\b80\s*km\b/i, "80km", 80000],
|
||||
[/\b40\s*km\b/i, "40km", 40000],
|
||||
[/\b20\s*km\b/i, "20km", 20000],
|
||||
[/\b10\s*km\b/i, "10km", 10000],
|
||||
[/\b2\s*km\b/i, "2km", 2000],
|
||||
[/\b550\s*m\b/i, "550m", 550],
|
||||
[/\b300\s*m\b/i, "300m", 300],
|
||||
[/\b100\s*m\b/i, "100m", 100],
|
||||
[/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000],
|
||||
[/\bER4?\b/, "40km", 40000], [/\bZR4?\b/, "80km", 80000],
|
||||
[/\bSR4?\b/, "300m", 300], [/\bDR4?\b/, "500m", 500], [/\bFR4?\b/, "2km", 2000],
|
||||
];
|
||||
for (const [regex, label, meters] of patterns) {
|
||||
if (regex.test(text)) return { label, meters };
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function detectFiber(text: string): string {
|
||||
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
|
||||
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
|
||||
if (/copper|dac|twinax|rj45|base-t/i.test(text)) return "Copper";
|
||||
return "SMF";
|
||||
}
|
||||
|
||||
function detectWavelength(text: string): string {
|
||||
const match = text.match(/(\d{3,4})\s*nm/i);
|
||||
return match ? match[1] : "";
|
||||
}
|
||||
|
||||
function parseProductList(html: string, sourceUrl: string): Product[] {
|
||||
const $ = cheerio.load(html);
|
||||
const products: Product[] = [];
|
||||
|
||||
// Manufacturer site: product cards or table rows
|
||||
const cardSelectors = [
|
||||
".product-item", ".product-card", ".product", "tr",
|
||||
"article", ".item", ".col-xs-12.col-sm-6", ".entry",
|
||||
];
|
||||
|
||||
for (const sel of cardSelectors) {
|
||||
if ($(sel).length >= 2) {
|
||||
$(sel).each((_i, el) => {
|
||||
const text = $(el).text().trim();
|
||||
if (!/sfp|qsfp|xfp|transceiver|optic/i.test(text)) return;
|
||||
|
||||
const nameEl = $(el).find("h2, h3, h4, td, .title, .name, a").first();
|
||||
const name = nameEl.text().trim() || text.slice(0, 100);
|
||||
if (!name || name.length < 5) return;
|
||||
|
||||
const linkEl = $(el).find("a[href]").first();
|
||||
const href = linkEl.attr("href") || sourceUrl;
|
||||
const url = href.startsWith("http") ? href : BASE + href;
|
||||
|
||||
// Part number: look for alphanumeric patterns typical of transceiver SKUs
|
||||
const partNumMatch = text.match(/\b([A-Z]{2,}[-_][A-Z0-9]{2,}[-_][A-Z0-9]+)\b/) ||
|
||||
text.match(/\b(SO[-_][A-Z0-9]+)\b/i) ||
|
||||
name.match(/[A-Z0-9][-A-Z0-9]{5,}/);
|
||||
const partNumber = partNumMatch?.[0] || name.replace(/\s+/g, "-").slice(0, 60);
|
||||
|
||||
const ff = detectFormFactor(name + " " + text);
|
||||
const reach = detectReach(name + " " + text);
|
||||
|
||||
products.push({
|
||||
partNumber,
|
||||
name,
|
||||
url,
|
||||
...ff,
|
||||
reachLabel: reach?.label,
|
||||
reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(name + " " + text),
|
||||
wavelength: detectWavelength(name + " " + text),
|
||||
});
|
||||
});
|
||||
if (products.length > 0) break;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: all transceiver-relevant anchors
|
||||
if (products.length === 0) {
|
||||
$("a[href]").each((_i, el) => {
|
||||
const name = $(el).text().trim();
|
||||
const href = $(el).attr("href") || "";
|
||||
if (name.length < 5 || name.length > 200 || !/sfp|qsfp|transceiver|optic/i.test(name)) return;
|
||||
const url = href.startsWith("http") ? href : BASE + href;
|
||||
const ff = detectFormFactor(name);
|
||||
const reach = detectReach(name);
|
||||
products.push({
|
||||
partNumber: name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] || name.replace(/\s+/g, "-").slice(0, 60),
|
||||
name, url, ...ff,
|
||||
reachLabel: reach?.label, reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(name), wavelength: detectWavelength(name),
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
const seen = new Set<string>();
|
||||
return products.filter((p) => {
|
||||
if (!p.url || seen.has(p.url)) return false;
|
||||
seen.add(p.url);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchPage(url: string): Promise<string> {
|
||||
const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) });
|
||||
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
|
||||
return resp.text();
|
||||
}
|
||||
|
||||
export async function scrapeSmartOptics(): Promise<void> {
|
||||
console.log("=== SmartOptics Scraper Starting (catalog-only, no prices) ===\n");
|
||||
|
||||
const vendorId = await ensureVendor(
|
||||
"SmartOptics",
|
||||
"manufacturer",
|
||||
"https://www.smartoptics.com",
|
||||
BASE + CATALOG_URLS[0],
|
||||
);
|
||||
|
||||
const allProducts: Product[] = [];
|
||||
const seenUrls = new Set<string>();
|
||||
|
||||
for (const catalogPath of CATALOG_URLS) {
|
||||
const catalogUrl = BASE + catalogPath;
|
||||
console.log(` Fetching catalog: ${catalogUrl}`);
|
||||
try {
|
||||
const html = await fetchPage(catalogUrl);
|
||||
const pageProducts = parseProductList(html, catalogUrl);
|
||||
for (const p of pageProducts) {
|
||||
if (!seenUrls.has(p.url)) {
|
||||
seenUrls.add(p.url);
|
||||
allProducts.push(p);
|
||||
}
|
||||
}
|
||||
console.log(` Found ${pageProducts.length} products`);
|
||||
await sleep(2000);
|
||||
} catch (err) {
|
||||
console.warn(` Failed ${catalogPath}: ${(err as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\nTotal unique products: ${allProducts.length}`);
|
||||
|
||||
let totalProducts = 0;
|
||||
|
||||
for (const product of allProducts) {
|
||||
try {
|
||||
await findOrCreateScrapedTransceiver({
|
||||
partNumber: product.partNumber,
|
||||
vendorId,
|
||||
formFactor: product.formFactor,
|
||||
speedGbps: product.speedGbps,
|
||||
speed: product.speed,
|
||||
reachMeters: product.reachMeters,
|
||||
reachLabel: product.reachLabel,
|
||||
fiberType: product.fiberType,
|
||||
wavelengths: product.wavelength,
|
||||
category: "DataCenter",
|
||||
});
|
||||
totalProducts++;
|
||||
} catch (err) {
|
||||
console.warn(` Error saving ${product.partNumber}: ${(err as Error).message.slice(0, 80)}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n=== SmartOptics Complete: ${totalProducts} products catalogued (no prices — quote-based) ===`);
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
scrapeSmartOptics()
|
||||
.then(() => pool.end())
|
||||
.catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); });
|
||||
}
|
||||
@ -1,260 +0,0 @@
|
||||
/**
|
||||
* T&S Communication Scraper — Chinese compatible transceiver manufacturer
|
||||
*
|
||||
* www.china-tscom.com — product catalog with USD prices.
|
||||
* Paginated: /products/fiber-optic-transceivers/?page=N
|
||||
*
|
||||
* Rate limited: 1 req/2sec.
|
||||
*/
|
||||
import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db";
|
||||
import { contentHash } from "../utils/hash";
|
||||
import * as cheerio from "cheerio";
|
||||
|
||||
const BASE = "https://www.china-tscom.com";
|
||||
const CATALOG_PATH = "/products/fiber-optic-transceivers/";
|
||||
const MAX_PAGES = 15;
|
||||
const HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
|
||||
Accept: "text/html,application/xhtml+xml",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
};
|
||||
|
||||
interface Product {
|
||||
partNumber: string;
|
||||
name: string;
|
||||
url: string;
|
||||
price?: number;
|
||||
formFactor: string;
|
||||
speed: string;
|
||||
speedGbps: number;
|
||||
reachLabel?: string;
|
||||
reachMeters?: number;
|
||||
fiberType?: string;
|
||||
wavelength?: string;
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function detectFormFactor(text: string): { formFactor: string; speed: string; speedGbps: number } {
|
||||
const lower = text.toLowerCase();
|
||||
if (lower.includes("osfp") && !lower.includes("qsfp")) return { formFactor: "OSFP", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("qsfp-dd")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("qsfp28")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
|
||||
if (lower.includes("qsfp+") || lower.includes("qsfp plus")) return { formFactor: "QSFP+", speed: "40G", speedGbps: 40 };
|
||||
if (lower.includes("sfp56")) return { formFactor: "SFP56", speed: "50G", speedGbps: 50 };
|
||||
if (lower.includes("sfp28") || lower.includes("25g")) return { formFactor: "SFP28", speed: "25G", speedGbps: 25 };
|
||||
if (lower.includes("sfp+") || lower.includes("10gbase") || lower.includes("10g")) return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
||||
if (lower.includes("xfp")) return { formFactor: "XFP", speed: "10G", speedGbps: 10 };
|
||||
if (lower.includes("1000base") || lower.includes("1g")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||
if (lower.includes("sfp") && !lower.includes("qsfp")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||
return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
||||
}
|
||||
|
||||
function detectReach(text: string): { label: string; meters: number } | undefined {
|
||||
const patterns: [RegExp, string, number][] = [
|
||||
[/\b80\s*km\b/i, "80km", 80000],
|
||||
[/\b40\s*km\b/i, "40km", 40000],
|
||||
[/\b20\s*km\b/i, "20km", 20000],
|
||||
[/\b10\s*km\b/i, "10km", 10000],
|
||||
[/\b2\s*km\b/i, "2km", 2000],
|
||||
[/\b550\s*m\b/i, "550m", 550],
|
||||
[/\b300\s*m\b/i, "300m", 300],
|
||||
[/\b100\s*m\b/i, "100m", 100],
|
||||
[/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000],
|
||||
[/\bER4?\b/, "40km", 40000], [/\bZR4?\b/, "80km", 80000],
|
||||
[/\bSR4?\b/, "300m", 300], [/\bDR4?\b/, "500m", 500], [/\bFR4?\b/, "2km", 2000],
|
||||
];
|
||||
for (const [regex, label, meters] of patterns) {
|
||||
if (regex.test(text)) return { label, meters };
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function detectFiber(text: string): string {
|
||||
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
|
||||
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
|
||||
if (/copper|dac|twinax|rj45|base-t/i.test(text)) return "Copper";
|
||||
return "SMF";
|
||||
}
|
||||
|
||||
function detectWavelength(text: string): string {
|
||||
const match = text.match(/(\d{3,4})\s*nm/i);
|
||||
return match ? match[1] : "";
|
||||
}
|
||||
|
||||
function parseProductList(html: string): Product[] {
|
||||
const $ = cheerio.load(html);
|
||||
const products: Product[] = [];
|
||||
|
||||
// Try common product grid/list selectors
|
||||
const cardSelectors = [
|
||||
".product-item", ".product-card", ".item", "li.product",
|
||||
"article", ".product", ".col-product", ".entry",
|
||||
];
|
||||
|
||||
let found = false;
|
||||
for (const sel of cardSelectors) {
|
||||
if ($(sel).length >= 2) {
|
||||
$(sel).each((_i, el) => {
|
||||
const nameEl = $(el).find("h2, h3, h4, .product-name, .entry-title, a").first();
|
||||
const name = nameEl.text().trim();
|
||||
if (!name || name.length < 5 || !/sfp|qsfp|xfp|transceiver|optic/i.test(name)) return;
|
||||
|
||||
const linkEl = $(el).find("a[href]").first();
|
||||
const href = linkEl.attr("href") || "";
|
||||
const url = href.startsWith("http") ? href : BASE + href;
|
||||
|
||||
// USD price patterns
|
||||
const priceText = $(el).find(".price, .product-price, .amount, [data-price]").text();
|
||||
const priceMatch = priceText.match(/\$\s*([\d,]+\.?\d{0,2})/) ||
|
||||
priceText.match(/([\d,]+\.?\d{0,2})\s*USD/i);
|
||||
let price: number | undefined;
|
||||
if (priceMatch) {
|
||||
const parsed = parseFloat(priceMatch[1].replace(",", ""));
|
||||
if (parsed > 0 && parsed < 50000) price = parsed;
|
||||
}
|
||||
|
||||
const skuText = $(el).find(".sku, [data-sku], .model, .part-number").text().trim();
|
||||
const partNumber = skuText ||
|
||||
name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] ||
|
||||
name.replace(/\s+/g, "-").slice(0, 60);
|
||||
|
||||
const ff = detectFormFactor(name);
|
||||
const reach = detectReach(name);
|
||||
|
||||
products.push({
|
||||
partNumber,
|
||||
name,
|
||||
url,
|
||||
price,
|
||||
...ff,
|
||||
reachLabel: reach?.label,
|
||||
reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(name),
|
||||
wavelength: detectWavelength(name),
|
||||
});
|
||||
});
|
||||
if (products.length > 0) { found = true; break; }
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: anchor-based extraction
|
||||
if (!found) {
|
||||
$("a[href]").each((_i, el) => {
|
||||
const name = $(el).text().trim();
|
||||
const href = $(el).attr("href") || "";
|
||||
if (name.length < 8 || name.length > 200 || !/sfp|qsfp|transceiver/i.test(name)) return;
|
||||
const url = href.startsWith("http") ? href : BASE + href;
|
||||
const ff = detectFormFactor(name);
|
||||
const reach = detectReach(name);
|
||||
products.push({
|
||||
partNumber: name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] || name.replace(/\s+/g, "-").slice(0, 60),
|
||||
name, url, ...ff,
|
||||
reachLabel: reach?.label, reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(name), wavelength: detectWavelength(name),
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
const seen = new Set<string>();
|
||||
return products.filter((p) => {
|
||||
if (!p.url || seen.has(p.url)) return false;
|
||||
seen.add(p.url);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
async function fetchPage(url: string): Promise<string> {
|
||||
const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) });
|
||||
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
|
||||
return resp.text();
|
||||
}
|
||||
|
||||
export async function scrapeTsCom(): Promise<void> {
|
||||
console.log("=== T&S Communication Scraper Starting ===\n");
|
||||
|
||||
const vendorId = await ensureVendor(
|
||||
"T&S Communication",
|
||||
"compatible",
|
||||
"https://www.china-tscom.com",
|
||||
BASE + CATALOG_PATH,
|
||||
);
|
||||
|
||||
let allProducts: Product[] = [];
|
||||
|
||||
for (let page = 1; page <= MAX_PAGES; page++) {
|
||||
try {
|
||||
const url = page === 1
|
||||
? BASE + CATALOG_PATH
|
||||
: `${BASE}${CATALOG_PATH}?page=${page}`;
|
||||
const html = await fetchPage(url);
|
||||
const pageProducts = parseProductList(html);
|
||||
allProducts.push(...pageProducts);
|
||||
console.log(` Page ${page}: ${pageProducts.length} products`);
|
||||
if (pageProducts.length === 0) {
|
||||
console.log(` Empty page ${page}, stopping pagination.`);
|
||||
break;
|
||||
}
|
||||
if (page < MAX_PAGES) await sleep(2000);
|
||||
} catch (err) {
|
||||
console.warn(` Page ${page} failed: ${(err as Error).message}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const seen = new Set<string>();
|
||||
allProducts = allProducts.filter((p) => {
|
||||
if (seen.has(p.url)) return false;
|
||||
seen.add(p.url);
|
||||
return true;
|
||||
});
|
||||
|
||||
console.log(`\nTotal unique products: ${allProducts.length}`);
|
||||
|
||||
let totalProducts = 0;
|
||||
let priceUpdates = 0;
|
||||
|
||||
for (const product of allProducts) {
|
||||
try {
|
||||
const txId = await findOrCreateScrapedTransceiver({
|
||||
partNumber: product.partNumber,
|
||||
vendorId,
|
||||
formFactor: product.formFactor,
|
||||
speedGbps: product.speedGbps,
|
||||
speed: product.speed,
|
||||
reachMeters: product.reachMeters,
|
||||
reachLabel: product.reachLabel,
|
||||
fiberType: product.fiberType,
|
||||
wavelengths: product.wavelength,
|
||||
category: "DataCenter",
|
||||
});
|
||||
|
||||
if (product.price && product.price > 0) {
|
||||
const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber }));
|
||||
const updated = await upsertPriceObservation({
|
||||
transceiverId: txId,
|
||||
sourceVendorId: vendorId,
|
||||
price: product.price,
|
||||
currency: "USD",
|
||||
stockLevel: "in_stock",
|
||||
url: product.url,
|
||||
contentHash: hash,
|
||||
});
|
||||
if (updated) priceUpdates++;
|
||||
}
|
||||
totalProducts++;
|
||||
} catch (err) {
|
||||
console.warn(` Error saving ${product.partNumber}: ${(err as Error).message.slice(0, 80)}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n=== T&S Communication Complete: ${totalProducts} products, ${priceUpdates} prices ===`);
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
scrapeTsCom()
|
||||
.then(() => pool.end())
|
||||
.catch((err) => { console.error("Fatal:", err); pool.end(); process.exit(1); });
|
||||
}
|
||||
@ -1,22 +0,0 @@
|
||||
/**
|
||||
* Simple logger utility — wraps console with consistent formatting
|
||||
*/
|
||||
|
||||
const ts = () => new Date().toISOString();
|
||||
|
||||
export const logger = {
|
||||
debug: (msg: string, ctx?: Record<string, unknown>) => {
|
||||
if (process.env.LOG_LEVEL === "debug") {
|
||||
console.debug(`[DEBUG] ${ts()} ${msg}`, ctx ?? "");
|
||||
}
|
||||
},
|
||||
info: (msg: string, ctx?: Record<string, unknown>) => {
|
||||
console.log(`[INFO] ${ts()} ${msg}`, ctx ?? "");
|
||||
},
|
||||
warn: (msg: string, ctx?: Record<string, unknown>) => {
|
||||
console.warn(`[WARN] ${ts()} ${msg}`, ctx ?? "");
|
||||
},
|
||||
error: (msg: string, ctx?: Record<string, unknown>) => {
|
||||
console.error(`[ERROR] ${ts()} ${msg}`, ctx ?? "");
|
||||
},
|
||||
};
|
||||
@ -1,242 +0,0 @@
|
||||
#!/bin/bash
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# TIP Pi Scraper Setup — run this ONCE on each Raspberry Pi
|
||||
#
|
||||
# Usage (from the Pi itself or via SSH once you have access):
|
||||
# curl -sL https://gitea.context-x.org/rene/transceiver-db/raw/branch/main/scripts/pi-scraper-setup.sh | bash
|
||||
#
|
||||
# Or copy & run manually:
|
||||
# bash pi-scraper-setup.sh
|
||||
#
|
||||
# What this does:
|
||||
# 1. Installs Node.js 22 + tsx + pm2
|
||||
# 2. Clones the TIP scraper package
|
||||
# 3. Installs dependencies (no Playwright — Pi runs fetch-only scrapers)
|
||||
# 4. Creates .env pointing to Erik's PostgreSQL via WireGuard
|
||||
# 5. Starts pm2 with the Pi-specific scheduler (lightweight scrapers only)
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
set -e
|
||||
|
||||
PI_NAME="${PI_NAME:-pi-scraper}" # override with PI_NAME=pi2 bash setup.sh
|
||||
DB_HOST="${DB_HOST:-10.10.0.1}" # Erik WireGuard IP
|
||||
DB_PORT="${DB_PORT:-5433}"
|
||||
DB_USER="${DB_USER:-tip}"
|
||||
DB_PASS="${DB_PASS:-tip_prod_2026}"
|
||||
DB_NAME="${DB_NAME:-transceiver_db}"
|
||||
GITEA="http://192.168.178.196:3000/rene/transceiver-db.git"
|
||||
INSTALL_DIR="/opt/tip-scraper"
|
||||
|
||||
echo "=== TIP Pi Scraper Setup: $PI_NAME ==="
|
||||
|
||||
# ── 1. Node.js 22 ────────────────────────────────────────────────────────────
|
||||
if ! command -v node &>/dev/null || [[ "$(node --version)" < "v20" ]]; then
|
||||
echo "Installing Node.js 22..."
|
||||
curl -fsSL https://deb.nodesource.com/setup_22.x | sudo bash -
|
||||
sudo apt-get install -y nodejs
|
||||
fi
|
||||
echo "Node: $(node --version)"
|
||||
|
||||
# ── 2. Global tools ───────────────────────────────────────────────────────────
|
||||
sudo npm install -g tsx pm2 2>/dev/null || npm install -g tsx pm2
|
||||
pm2 startup systemd -u "$USER" --hp "$HOME" | tail -1 | sudo bash || true
|
||||
|
||||
# ── 3. Clone / update repo ───────────────────────────────────────────────────
|
||||
if [ -d "$INSTALL_DIR" ]; then
|
||||
echo "Updating existing repo..."
|
||||
cd "$INSTALL_DIR" && git pull
|
||||
else
|
||||
echo "Cloning from Gitea..."
|
||||
git clone "$GITEA" "$INSTALL_DIR"
|
||||
fi
|
||||
cd "$INSTALL_DIR"
|
||||
|
||||
# ── 4. Install deps (scraper package only, skip Playwright) ──────────────────
|
||||
cd packages/scraper
|
||||
npm install --ignore-scripts # --ignore-scripts skips playwright browser download
|
||||
echo "Dependencies installed"
|
||||
|
||||
# ── 5. .env file ─────────────────────────────────────────────────────────────
|
||||
cat > "$INSTALL_DIR/.env" <<EOF
|
||||
POSTGRES_HOST=$DB_HOST
|
||||
POSTGRES_PORT=$DB_PORT
|
||||
POSTGRES_USER=$DB_USER
|
||||
POSTGRES_PASSWORD=$DB_PASS
|
||||
POSTGRES_DB=$DB_NAME
|
||||
CRAWLEE_STORAGE_DIR=/tmp/tip-crawlee
|
||||
NODE_ENV=production
|
||||
PI_NODE=true
|
||||
EOF
|
||||
echo ".env written"
|
||||
|
||||
# ── 6. Pi-specific scheduler index ───────────────────────────────────────────
|
||||
# The Pi runs only fetch/cheerio scrapers — no Playwright
|
||||
cat > "$INSTALL_DIR/packages/scraper/src/index-pi.ts" <<'PIEOF'
|
||||
/**
|
||||
* Pi Scraper Index — lightweight fetch/cheerio only
|
||||
* No Playwright, no eBay enricher, no heavy compute
|
||||
* Runs 24/7 on Raspberry Pi nodes
|
||||
*/
|
||||
import { config } from "dotenv";
|
||||
import { join } from "path";
|
||||
config({ path: join(__dirname, "..", "..", "..", ".env") });
|
||||
|
||||
import PgBoss from "pg-boss";
|
||||
|
||||
const connectionString = `postgres://${process.env.POSTGRES_USER}:${process.env.POSTGRES_PASSWORD}@${process.env.POSTGRES_HOST}:${process.env.POSTGRES_PORT || "5433"}/${process.env.POSTGRES_DB}`;
|
||||
|
||||
const PI_QUEUES = [
|
||||
"scrape:pricing:fluxlight",
|
||||
"scrape:pricing:gbics",
|
||||
"scrape:pricing:optcore",
|
||||
"scrape:pricing:champion-one",
|
||||
"scrape:pricing:sfpcables",
|
||||
"scrape:pricing:blueoptics",
|
||||
"scrape:pricing:fiber24",
|
||||
"scrape:pricing:tscom",
|
||||
"scrape:pricing:skylane",
|
||||
"scrape:pricing:ascentoptics",
|
||||
"scrape:pricing:gaotek",
|
||||
"scrape:catalog:smartoptics",
|
||||
"scrape:catalog:hubersuhner",
|
||||
"scrape:news",
|
||||
"scrape:market-intel",
|
||||
];
|
||||
|
||||
async function main() {
|
||||
console.log(`\n=== TIP Pi Scraper (${process.env.PI_NAME || "pi"}) ===\n`);
|
||||
|
||||
const boss = new PgBoss({
|
||||
connectionString,
|
||||
retryLimit: 2,
|
||||
retryDelay: 60,
|
||||
expireInSeconds: 3600,
|
||||
monitorStateIntervalSeconds: 60,
|
||||
});
|
||||
|
||||
boss.on("error", (e) => console.error("pg-boss error:", e));
|
||||
await boss.start();
|
||||
|
||||
for (const q of PI_QUEUES) {
|
||||
await boss.createQueue(q).catch(() => {});
|
||||
}
|
||||
|
||||
// Register workers for all Pi-safe scrapers
|
||||
const mods: Record<string, string> = {
|
||||
"scrape:pricing:fluxlight": "./scrapers/fluxlight",
|
||||
"scrape:pricing:gbics": "./scrapers/gbics",
|
||||
"scrape:pricing:optcore": "./scrapers/optcore",
|
||||
"scrape:pricing:champion-one": "./scrapers/champion-one",
|
||||
"scrape:pricing:sfpcables": "./scrapers/sfpcables",
|
||||
"scrape:pricing:blueoptics": "./scrapers/blueoptics",
|
||||
"scrape:pricing:fiber24": "./scrapers/fiber24",
|
||||
"scrape:pricing:tscom": "./scrapers/tscom",
|
||||
"scrape:pricing:skylane": "./scrapers/skylane",
|
||||
"scrape:pricing:ascentoptics": "./scrapers/ascentoptics",
|
||||
"scrape:pricing:gaotek": "./scrapers/gaotek",
|
||||
"scrape:catalog:smartoptics": "./scrapers/smartoptics",
|
||||
"scrape:catalog:hubersuhner": "./scrapers/hubersuhner",
|
||||
"scrape:news": "./scrapers/news",
|
||||
};
|
||||
|
||||
const fnNames: Record<string, string> = {
|
||||
"scrape:pricing:fluxlight": "scrapeFluxlight",
|
||||
"scrape:pricing:gbics": "scrapeGbics",
|
||||
"scrape:pricing:optcore": "scrapeOptcore",
|
||||
"scrape:pricing:champion-one": "scrapeChampionOne",
|
||||
"scrape:pricing:sfpcables": "scrapeSfpCables",
|
||||
"scrape:pricing:blueoptics": "scrapeBlueOptics",
|
||||
"scrape:pricing:fiber24": "scrapeFiber24",
|
||||
"scrape:pricing:tscom": "scrapeTsCom",
|
||||
"scrape:pricing:skylane": "scrapeSkylane",
|
||||
"scrape:pricing:ascentoptics": "scrapeAscentOptics",
|
||||
"scrape:pricing:gaotek": "scrapeGaoTek",
|
||||
"scrape:catalog:smartoptics": "scrapeSmartOptics",
|
||||
"scrape:catalog:hubersuhner": "scrapeHuberSuhner",
|
||||
"scrape:news": "scrapeNews",
|
||||
};
|
||||
|
||||
for (const [queue, modPath] of Object.entries(mods)) {
|
||||
const mod = await import(modPath);
|
||||
const fn = mod[fnNames[queue]];
|
||||
if (!fn) { console.warn(`No function ${fnNames[queue]} in ${modPath}`); continue; }
|
||||
await boss.work(queue, async () => {
|
||||
console.log(`[${new Date().toISOString()}] [${process.env.PI_NAME || "pi"}] Running: ${queue}`);
|
||||
try { await fn(); }
|
||||
catch (e) { console.error(`[${queue}] failed:`, String(e).slice(0, 200)); }
|
||||
});
|
||||
}
|
||||
|
||||
// Market intel worker
|
||||
await boss.work("scrape:market-intel", async () => {
|
||||
console.log(`[${new Date().toISOString()}] Running: Market intelligence`);
|
||||
const { scrapeMarketIntelligence } = await import("./scrapers/market-intelligence");
|
||||
try { await scrapeMarketIntelligence(); }
|
||||
catch (e) { console.error("market-intel failed:", String(e).slice(0, 200)); }
|
||||
});
|
||||
|
||||
console.log(`Pi worker registered for ${PI_QUEUES.length} queues\nWaiting for jobs...\n`);
|
||||
|
||||
process.on("SIGTERM", async () => { await boss.stop(); process.exit(0); });
|
||||
process.on("SIGINT", async () => { await boss.stop(); process.exit(0); });
|
||||
}
|
||||
|
||||
main().catch((e) => { console.error("Fatal:", e); process.exit(1); });
|
||||
PIEOF
|
||||
|
||||
# ── 7. WireGuard (connects to Erik 10.10.0.1 for DB access) ─────────────────
|
||||
WG_PRIVKEY="${WG_PRIVKEY:-}"
|
||||
ERIK_PUBKEY="nrh8xiPzUWwLDK4y6+Cu0V3ne56zobIHKtxMGb7BKQo="
|
||||
ERIK_ENDPOINT="217.154.82.179:51820"
|
||||
WG_ADDR="${WG_ADDR:-10.10.0.9}" # override per Pi: WG_ADDR=10.10.0.6
|
||||
|
||||
if [ -n "$WG_PRIVKEY" ]; then
|
||||
sudo apt-get install -y wireguard-tools 2>/dev/null | tail -1 || true
|
||||
# Detect primary outgoing interface
|
||||
OUTIF=$(ip route get 8.8.8.8 2>/dev/null | awk '{for(i=1;i<=NF;i++) if($i=="dev") print $(i+1)}' | head -1)
|
||||
POSTUPCMD=""
|
||||
if [ -n "$OUTIF" ] && ! ping -c1 -W2 8.8.8.8 &>/dev/null; then
|
||||
# Fallback route for WG traffic if default interface has no internet
|
||||
GW=$(ip route | awk '/default/{print $3; exit}')
|
||||
POSTUPCMD="PostUp = ip route add $ERIK_ENDPOINT via $GW dev $OUTIF 2>/dev/null || true"
|
||||
fi
|
||||
cat > /tmp/wg0.conf <<WGEOF
|
||||
[Interface]
|
||||
PrivateKey = $WG_PRIVKEY
|
||||
Address = $WG_ADDR/24
|
||||
$POSTUPCMD
|
||||
|
||||
[Peer]
|
||||
PublicKey = $ERIK_PUBKEY
|
||||
Endpoint = $ERIK_ENDPOINT
|
||||
AllowedIPs = 10.10.0.1/32
|
||||
PersistentKeepalive = 25
|
||||
WGEOF
|
||||
sudo mv /tmp/wg0.conf /etc/wireguard/wg0.conf
|
||||
sudo chmod 600 /etc/wireguard/wg0.conf
|
||||
sudo wg-quick down wg0 2>/dev/null || true
|
||||
sudo wg-quick up wg0
|
||||
sudo systemctl enable wg-quick@wg0
|
||||
echo "WireGuard: $(sudo wg show wg0 | grep 'latest handshake' || echo 'starting...')"
|
||||
else
|
||||
echo "WireGuard: skipped (set WG_PRIVKEY and WG_ADDR to enable)"
|
||||
fi
|
||||
|
||||
# ── 8. PM2 process ───────────────────────────────────────────────────────────
|
||||
cd "$INSTALL_DIR"
|
||||
PI_NAME="$PI_NAME" pm2 start \
|
||||
--name "tip-pi-scraper" \
|
||||
--interpreter "$(which tsx)" \
|
||||
--cwd "$INSTALL_DIR" \
|
||||
packages/scraper/src/index-pi.ts \
|
||||
-- \
|
||||
|| pm2 restart tip-pi-scraper
|
||||
|
||||
pm2 save
|
||||
echo ""
|
||||
echo "✅ TIP Pi Scraper ($PI_NAME) is running"
|
||||
echo " pm2 logs tip-pi-scraper — view logs"
|
||||
echo " pm2 status — check status"
|
||||
echo ""
|
||||
echo "DB target: $DB_HOST:$DB_PORT/$DB_NAME"
|
||||
echo "Jobs: ${#PI_QUEUES[@]} lightweight scrapers, all day every day"
|
||||
Loading…
x
Reference in New Issue
Block a user