- Flexoptix vendor scraper: 285 supported switch vendors ingested from flexoptix.net/en/supported-vendors/ (our own data, no restrictions) - 10Gtek Playwright scraper: Chinese OEM competitor pricing (SFP+, SFP28, QSFP+, QSFP28, QSFP-DD categories) - News feeds expanded: added Lightwave, Fierce Telecom, Data Center Knowledge, SDxCentral, Cisco Blogs, Arista Blog (11 total sources) - Scheduler updated: 8 job queues with appropriate intervals - DB now: 297 vendors, 89 transceivers, 33 news articles (13 relevant)
132 lines
3.8 KiB
TypeScript
132 lines
3.8 KiB
TypeScript
/**
|
|
* Flexoptix Supported Vendors Scraper
|
|
*
|
|
* Scrapes flexoptix.net/en/supported-vendors/ for the full list of
|
|
* switch vendors Flexoptix supports. This is our own data — no restrictions.
|
|
*
|
|
* Data goes into: switches (vendor names) + vendors table
|
|
* Also scrapes per-vendor pages for individual switch models when available.
|
|
*/
|
|
import { pool } from "../utils/db";
|
|
|
|
interface VendorEntry {
|
|
name: string;
|
|
url: string;
|
|
}
|
|
|
|
async function fetchVendorList(): Promise<VendorEntry[]> {
|
|
const resp = await fetch("https://www.flexoptix.net/en/supported-vendors/", {
|
|
headers: {
|
|
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; internal)",
|
|
Accept: "text/html",
|
|
},
|
|
signal: AbortSignal.timeout(30000),
|
|
});
|
|
|
|
if (!resp.ok) throw new Error(`Flexoptix returned ${resp.status}`);
|
|
|
|
const html = await resp.text();
|
|
const vendors: VendorEntry[] = [];
|
|
|
|
// Parse vendor links from the supported-vendors page
|
|
// Pattern: href="...supported-vendors/index/name/VENDOR-compatible"
|
|
const regex = /href="(https?:\/\/www\.flexoptix\.net\/en\/supported-vendors\/index\/name\/([^"]+)-compatible)"/g;
|
|
let match;
|
|
while ((match = regex.exec(html)) !== null) {
|
|
const url = match[1]
|
|
.replace(/:/g, ":")
|
|
.replace(///g, "/")
|
|
.replace(/+/g, "+")
|
|
.replace(/(/g, "(")
|
|
.replace(/)/g, ")");
|
|
const rawName = match[2]
|
|
.replace(/\+/g, " ")
|
|
.replace(/%20/g, " ")
|
|
.replace(/%28/g, "(")
|
|
.replace(/%29/g, ")");
|
|
|
|
vendors.push({ name: rawName, url });
|
|
}
|
|
|
|
// Also catch plain link text pattern
|
|
const altRegex = /class="[^"]*vendor[^"]*"[^>]*>\s*<a[^>]*href="([^"]+)"[^>]*>([^<]+)<\/a>/gi;
|
|
while ((match = altRegex.exec(html)) !== null) {
|
|
const url = match[1];
|
|
const name = match[2].trim();
|
|
if (name && !vendors.find((v) => v.name.toLowerCase() === name.toLowerCase())) {
|
|
vendors.push({ name, url });
|
|
}
|
|
}
|
|
|
|
// Deduplicate by name (case-insensitive)
|
|
const seen = new Set<string>();
|
|
return vendors.filter((v) => {
|
|
const key = v.name.toLowerCase();
|
|
if (seen.has(key)) return false;
|
|
seen.add(key);
|
|
return true;
|
|
});
|
|
}
|
|
|
|
function slugify(name: string): string {
|
|
return name
|
|
.toLowerCase()
|
|
.replace(/[^a-z0-9]+/g, "-")
|
|
.replace(/^-|-$/g, "");
|
|
}
|
|
|
|
async function upsertVendor(name: string): Promise<string> {
|
|
const slug = slugify(name);
|
|
const result = await pool.query(
|
|
`INSERT INTO vendors (name, slug, type, website)
|
|
VALUES ($1, $2, 'manufacturer', $3)
|
|
ON CONFLICT (name) DO UPDATE SET website = COALESCE(vendors.website, EXCLUDED.website)
|
|
RETURNING id`,
|
|
[name, slug, `https://www.flexoptix.net/en/supported-vendors/`]
|
|
);
|
|
return result.rows[0].id;
|
|
}
|
|
|
|
export async function scrapeFlexoptixVendors(): Promise<void> {
|
|
console.log("=== Flexoptix Vendor Scraper Starting ===\n");
|
|
|
|
const vendors = await fetchVendorList();
|
|
console.log(`Found ${vendors.length} supported vendors\n`);
|
|
|
|
let newVendors = 0;
|
|
let updatedVendors = 0;
|
|
|
|
for (const vendor of vendors) {
|
|
try {
|
|
const existing = await pool.query(
|
|
`SELECT id FROM vendors WHERE name ILIKE $1`,
|
|
[vendor.name]
|
|
);
|
|
|
|
await upsertVendor(vendor.name);
|
|
|
|
if (existing.rows.length === 0) {
|
|
newVendors++;
|
|
console.log(` + NEW: ${vendor.name}`);
|
|
} else {
|
|
updatedVendors++;
|
|
}
|
|
} catch (err) {
|
|
console.warn(` Error saving vendor ${vendor.name}:`, (err as Error).message);
|
|
}
|
|
}
|
|
|
|
console.log(`\nVendors: ${vendors.length} total, ${newVendors} new, ${updatedVendors} existing`);
|
|
console.log("=== Flexoptix Vendor Scraper Complete ===\n");
|
|
}
|
|
|
|
if (require.main === module) {
|
|
scrapeFlexoptixVendors()
|
|
.then(() => pool.end())
|
|
.catch((err) => {
|
|
console.error("Fatal:", err);
|
|
pool.end();
|
|
process.exit(1);
|
|
});
|
|
}
|