transceiver-db/packages/scraper/src/scrapers/flexoptix-vendors.ts
Rene Fichtmueller bd3a02ae4b feat: add Flexoptix vendor scraper, 10Gtek pricing scraper, expand news feeds
- Flexoptix vendor scraper: 285 supported switch vendors ingested from
  flexoptix.net/en/supported-vendors/ (our own data, no restrictions)
- 10Gtek Playwright scraper: Chinese OEM competitor pricing (SFP+, SFP28,
  QSFP+, QSFP28, QSFP-DD categories)
- News feeds expanded: added Lightwave, Fierce Telecom, Data Center Knowledge,
  SDxCentral, Cisco Blogs, Arista Blog (11 total sources)
- Scheduler updated: 8 job queues with appropriate intervals
- DB now: 297 vendors, 89 transceivers, 33 news articles (13 relevant)
2026-03-27 23:17:42 +13:00

132 lines
3.8 KiB
TypeScript

/**
* Flexoptix Supported Vendors Scraper
*
* Scrapes flexoptix.net/en/supported-vendors/ for the full list of
* switch vendors Flexoptix supports. This is our own data — no restrictions.
*
* Data goes into: switches (vendor names) + vendors table
* Also scrapes per-vendor pages for individual switch models when available.
*/
import { pool } from "../utils/db";
interface VendorEntry {
name: string;
url: string;
}
async function fetchVendorList(): Promise<VendorEntry[]> {
const resp = await fetch("https://www.flexoptix.net/en/supported-vendors/", {
headers: {
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; internal)",
Accept: "text/html",
},
signal: AbortSignal.timeout(30000),
});
if (!resp.ok) throw new Error(`Flexoptix returned ${resp.status}`);
const html = await resp.text();
const vendors: VendorEntry[] = [];
// Parse vendor links from the supported-vendors page
// Pattern: href="...supported-vendors/index/name/VENDOR-compatible"
const regex = /href="(https?:\/\/www\.flexoptix\.net\/en\/supported-vendors\/index\/name\/([^"]+)-compatible)"/g;
let match;
while ((match = regex.exec(html)) !== null) {
const url = match[1]
.replace(/&#x3A;/g, ":")
.replace(/&#x2F;/g, "/")
.replace(/&#x2B;/g, "+")
.replace(/&#x28;/g, "(")
.replace(/&#x29;/g, ")");
const rawName = match[2]
.replace(/\+/g, " ")
.replace(/%20/g, " ")
.replace(/%28/g, "(")
.replace(/%29/g, ")");
vendors.push({ name: rawName, url });
}
// Also catch plain link text pattern
const altRegex = /class="[^"]*vendor[^"]*"[^>]*>\s*<a[^>]*href="([^"]+)"[^>]*>([^<]+)<\/a>/gi;
while ((match = altRegex.exec(html)) !== null) {
const url = match[1];
const name = match[2].trim();
if (name && !vendors.find((v) => v.name.toLowerCase() === name.toLowerCase())) {
vendors.push({ name, url });
}
}
// Deduplicate by name (case-insensitive)
const seen = new Set<string>();
return vendors.filter((v) => {
const key = v.name.toLowerCase();
if (seen.has(key)) return false;
seen.add(key);
return true;
});
}
function slugify(name: string): string {
return name
.toLowerCase()
.replace(/[^a-z0-9]+/g, "-")
.replace(/^-|-$/g, "");
}
async function upsertVendor(name: string): Promise<string> {
const slug = slugify(name);
const result = await pool.query(
`INSERT INTO vendors (name, slug, type, website)
VALUES ($1, $2, 'manufacturer', $3)
ON CONFLICT (name) DO UPDATE SET website = COALESCE(vendors.website, EXCLUDED.website)
RETURNING id`,
[name, slug, `https://www.flexoptix.net/en/supported-vendors/`]
);
return result.rows[0].id;
}
export async function scrapeFlexoptixVendors(): Promise<void> {
console.log("=== Flexoptix Vendor Scraper Starting ===\n");
const vendors = await fetchVendorList();
console.log(`Found ${vendors.length} supported vendors\n`);
let newVendors = 0;
let updatedVendors = 0;
for (const vendor of vendors) {
try {
const existing = await pool.query(
`SELECT id FROM vendors WHERE name ILIKE $1`,
[vendor.name]
);
await upsertVendor(vendor.name);
if (existing.rows.length === 0) {
newVendors++;
console.log(` + NEW: ${vendor.name}`);
} else {
updatedVendors++;
}
} catch (err) {
console.warn(` Error saving vendor ${vendor.name}:`, (err as Error).message);
}
}
console.log(`\nVendors: ${vendors.length} total, ${newVendors} new, ${updatedVendors} existing`);
console.log("=== Flexoptix Vendor Scraper Complete ===\n");
}
if (require.main === module) {
scrapeFlexoptixVendors()
.then(() => pool.end())
.catch((err) => {
console.error("Fatal:", err);
pool.end();
process.exit(1);
});
}