Rene Fichtmueller aa977abc97 feat(v0.2.0): Sales Intelligence Engine — Phase 0+A
New API routes:
- GET /api/finder — Switch→Flexoptix transceiver finder with FlexBox coding
- GET /api/competitor-alerts — Competitor intelligence (price changes, new products, stock)
- GET /api/forecast/:technology — Sales forecast 3/9/12/18 months + buy/wait/hold signal
- POST /api/transport/plan — Transport system planner (city→city BOM with fiber providers)

New MCP tools:
- find_flexoptix_for_switch — Customer switch → Flexoptix products
- get_competitor_alerts — Competitor monitoring
- plan_transport — Network transport planning
- forecast_sales — Volume/revenue prediction
- generate_blog — Enhanced blog generation

New DB tables (migration 013):
- competitor_alerts, price_changes, flexoptix_product_map
- sales_forecasts, fiber_providers, fiber_routes, cities
- generated_datasheets, blog_series
- Views: v_price_coverage, v_image_coverage, v_switch_flexoptix_finder

Seed data (migration 014):
- 25 European cities with IX/DC locations + coordinates
- 15 fiber providers (euNetworks, Telia, DTAG, Colt, Zayo, etc.)
- 16 fiber routes with pricing (Germany focus)

Infrastructure:
- Scraper scheduler: 2h Flexoptix, 4h FS.com/Optcore (was 6-8h)
- Change detector for competitor price/stock monitoring
- Image downloader utility with coverage tracking
2026-03-31 08:51:22 +02:00

285 lines
9.2 KiB
TypeScript

/**
* FS.com Scraper — Prices, Stock, Product Catalog
*
* FS.com renders products client-side (JS), so we use PlaywrightCrawler.
* Categories: /c/optical-transceivers-9
*
* Respects: robots.txt, rate limiting (2s between requests)
*/
import { PlaywrightCrawler } from "crawlee";
import { ensureVendor, upsertPriceObservation, findOrCreateScrapedTransceiver, pool } from "../utils/db";
import { contentHash, parsePrice, parseStockLevel, parseQuantity } from "../utils/hash";
const BASE_URL = "https://www.fs.com";
const CATEGORY_URLS = [
"/c/1g-sfp-81",
"/c/10g-sfp-63",
"/c/25g-sfp28-3215",
"/c/40g-qsfp-1360",
"/c/100g-qsfp28-sfp-dd-1159",
"/c/200g-qsfp-dd-qsfp56-3542",
"/c/400g-osfp-qsfp112-qsfp-dd-3652",
"/c/800g-osfp-qsfp-dd-4089",
"/c/1.6t-osfp-5597",
"/c/400g-coherent-qsfp-dd-4103",
"/c/10g-cwdm-dwdm-sfp-65",
"/c/100g-dwdm-qsfp28-3863",
];
interface FsProduct {
partNumber: string;
name: string;
price: number;
currency: string;
stockLevel: string;
quantity?: number;
url: string;
formFactor?: string;
speedGbps?: number;
speed?: string;
reachLabel?: string;
}
function detectFormFactor(text: string): string | undefined {
const lower = text.toLowerCase();
if (lower.includes("osfp") && !lower.includes("qsfp")) return "OSFP";
if (lower.includes("qsfp-dd800") || lower.includes("qsfp-dd 800")) return "QSFP-DD800";
if (lower.includes("qsfp-dd")) return "QSFP-DD";
if (lower.includes("qsfp56")) return "QSFP56";
if (lower.includes("qsfp28")) return "QSFP28";
if (lower.includes("qsfp+") || lower.includes("qsfp plus")) return "QSFP+";
if (lower.includes("sfp56")) return "SFP56";
if (lower.includes("sfp28")) return "SFP28";
if (lower.includes("sfp+") || lower.includes("sfp plus")) return "SFP+";
if (lower.includes("sfp") && !lower.includes("qsfp")) return "SFP";
if (lower.includes("cfp2")) return "CFP2";
if (lower.includes("xfp")) return "XFP";
return undefined;
}
function detectSpeed(text: string): { speed: string; speedGbps: number } | undefined {
const patterns: [RegExp, string, number][] = [
[/800\s*g/i, "800G", 800],
[/400\s*g/i, "400G", 400],
[/200\s*g/i, "200G", 200],
[/100\s*g/i, "100G", 100],
[/50\s*g/i, "50G", 50],
[/40\s*g/i, "40G", 40],
[/25\s*g/i, "25G", 25],
[/10\s*g/i, "10G", 10],
[/1\s*g\b/i, "1G", 1],
];
for (const [re, speed, gbps] of patterns) {
if (re.test(text)) return { speed, speedGbps: gbps };
}
return undefined;
}
function detectReach(text: string): string | undefined {
const match = text.match(/(\d+)\s*(m|km)\b/i);
if (match) return `${match[1]}${match[2].toLowerCase()}`;
return undefined;
}
export async function scrapeFs(): Promise<void> {
console.log("=== FS.com Scraper Starting ===\n");
const vendorId = await ensureVendor(
"FS.COM",
"compatible",
"https://www.fs.com",
"https://www.fs.com/c/optical-transceivers-9"
);
console.log(`Vendor ID: ${vendorId}`);
const products: FsProduct[] = [];
let pagesScraped = 0;
const crawler = new PlaywrightCrawler({
maxConcurrency: 1,
maxRequestsPerMinute: 15,
requestHandlerTimeoutSecs: 60,
headless: true,
launchContext: {
launchOptions: {
args: ["--disable-blink-features=AutomationControlled", "--lang=en-US"],
},
},
preNavigationHooks: [
async ({ page }) => {
await page.setExtraHTTPHeaders({
"Accept-Language": "en-US,en;q=0.9",
});
await page.context().addCookies([
{ name: "currency", value: "USD", domain: ".fs.com", path: "/" },
{ name: "lang", value: "en", domain: ".fs.com", path: "/" },
{ name: "country", value: "US", domain: ".fs.com", path: "/" },
]);
},
],
async requestHandler({ page, request, log }) {
const url = request.url;
log.info(`Scraping: ${url}`);
// Wait for Vue.js product grid to render
await page.waitForTimeout(4000);
const productData = await page.evaluate(() => {
const results: Array<{
name: string;
href: string;
price: string;
stock: string;
partNumber: string;
}> = [];
// Strategy 1: Parse .category__grid__item cards (2026 Vue.js DOM)
const gridItems = document.querySelectorAll(".category__grid__item");
for (const item of gridItems) {
const link = item.querySelector('a[href*="/products/"]') as HTMLAnchorElement | null;
const img = item.querySelector("img");
const priceEl = item.querySelector(".grid__price");
const allText = item.textContent || "";
if (!link) continue;
const name = img?.getAttribute("alt")?.trim() || link.textContent?.trim() || "";
const href = link.getAttribute("href") || "";
const price = priceEl?.textContent?.trim() || "";
// Extract stock from text like "1914 in Global Warehouse"
const stockMatch = allText.match(/(\d+)\s+in\s+(?:Global\s+)?Warehouse/i);
const stock = stockMatch ? stockMatch[1] + " in stock" : "";
// Extract FS product ID from URL
const pnMatch = href.match(/products\/(\d+)\.html/);
const partNumber = pnMatch ? `FS-${pnMatch[1]}` : "";
if (name && href) {
results.push({ name, href, price, stock, partNumber });
}
}
// Strategy 2: Fallback — look for product links with prices nearby
if (results.length === 0) {
const productLinks = document.querySelectorAll(
'a[href*="/products/"], a[href*="/product/"]'
);
for (const link of productLinks) {
const el = link as HTMLAnchorElement;
const name = el.textContent?.trim() || "";
const href = el.getAttribute("href") || "";
if (!name || name.length < 5 || !href) continue;
const container = el.closest('[class*="product"]') || el.closest('[class*="item"]') || el.closest("li") || el.parentElement?.parentElement;
let price = "";
let stock = "";
if (container) {
const priceEl = container.querySelector('[class*="price"]');
price = priceEl?.textContent?.trim() || "";
const stockEl = container.querySelector('[class*="stock"], [class*="avail"]');
stock = stockEl?.textContent?.trim() || "";
}
const pn = href.split("/").pop()?.replace(".html", "")?.replace(/\?.*/, "") || "";
if (name) results.push({ name, href, price, stock, partNumber: pn });
}
}
return results;
});
for (const item of productData) {
if (!item.name || !item.price) continue;
const { price, currency } = parsePrice(item.price);
const speedInfo = detectSpeed(item.name);
if (price > 0) {
products.push({
partNumber: item.partNumber || item.name.slice(0, 50),
name: item.name,
price,
currency,
stockLevel: item.stock ? parseStockLevel(item.stock) : "on_request",
quantity: item.stock ? parseQuantity(item.stock) : undefined,
url: item.href.startsWith("http") ? item.href : `${BASE_URL}${item.href}`,
formFactor: detectFormFactor(item.name),
speedGbps: speedInfo?.speedGbps,
speed: speedInfo?.speed,
reachLabel: detectReach(item.name),
});
}
}
pagesScraped++;
log.info(` Found ${productData.length} items on page`);
},
});
const startUrls = CATEGORY_URLS.map((path) => `${BASE_URL}${path}`);
await crawler.run(startUrls);
console.log(`\nPages scraped: ${pagesScraped}`);
console.log(`Products found: ${products.length}`);
// Deduplicate by partNumber
const uniqueProducts = new Map<string, FsProduct>();
for (const p of products) {
const key = p.partNumber || p.name;
if (!uniqueProducts.has(key)) {
uniqueProducts.set(key, p);
}
}
// Write to database
let written = 0;
let skipped = 0;
for (const p of uniqueProducts.values()) {
try {
const transceiverId = await findOrCreateScrapedTransceiver({
partNumber: p.partNumber,
vendorId,
formFactor: p.formFactor,
speedGbps: p.speedGbps,
speed: p.speed,
reachLabel: p.reachLabel,
category: "DataCenter",
});
const hash = contentHash({ price: p.price, stock: p.stockLevel, qty: p.quantity });
const isNew = await upsertPriceObservation({
transceiverId,
sourceVendorId: vendorId,
price: p.price,
currency: p.currency,
stockLevel: p.stockLevel,
quantityAvailable: p.quantity,
url: p.url,
contentHash: hash,
});
if (isNew) written++;
else skipped++;
} catch (err) {
console.error(` Error: ${p.partNumber}:`, (err as Error).message);
}
}
console.log(`\nDatabase: ${written} new, ${skipped} unchanged (${uniqueProducts.size} unique)`);
console.log("=== FS.com Scraper Complete ===\n");
}
if (require.main === module) {
scrapeFs()
.then(() => pool.end())
.catch((err) => {
console.error("Fatal:", err);
pool.end();
process.exit(1);
});
}