feat: NOG conference talks scraper + hot topics integration

NOG Talks Scraper (packages/scraper/src/scrapers/nog-talks.ts):
- Crawls DENOG (15-17), NANOG (91-93), RIPE (87-89), ENOG, NLNOG, Euro-IX
- Relevance scoring: optical keywords (+3pts each), network keywords (+1pt)
  Only talks scoring ≥2 stored, high-relevance (≥6) also to market_intelligence
- CtxEvent cross-DB bridge: when ctxmeet DB has ConferenceTalk rows,
  pulls directly via dblink (same Postgres instance, no network hop)
- Runs weekly Monday 06:00 UTC (pg-boss schedule)
- Output: news_articles (source='NOG Talks: EVENT') + market_intelligence

Hot Topics (packages/api/src/routes/hot-topics.ts):
- SOURCE 3c: NOG talk clusters displayed as conference topics in hot list
  Grouped by event (DENOG15, NANOG93...) with speaker + abstract preview
  Filtered: source LIKE 'NOG Talks:%' AND relevance > 0.4 AND < 6 months
- Limit raised to 20 topics (was 15)
- Added nog_talks to sources metadata

Scheduler & Pi fleet:
- scrape:nog-talks queue registered in scheduler.ts + index-pi.ts
- Weekly cron: Monday 06:00 UTC (every Pi can handle it independently)
- First job triggered immediately
This commit is contained in:
Rene Fichtmueller 2026-04-02 22:38:00 +02:00
parent 48cb41b27e
commit 3226117733
4 changed files with 464 additions and 3 deletions

View File

@ -160,12 +160,48 @@ hotTopicsRouter.get("/", async (_req, res) => {
}); });
} }
// ═══ SOURCE 3c: NOG Conference Talks — scraped from NOG agendas ═══
const nogTalks = await pool.query(`
SELECT title, source, source_url, published_at, relevance_score
FROM news_articles
WHERE source LIKE 'NOG Talks:%'
AND relevance_score > 0.4
AND published_at > NOW() - INTERVAL '6 months'
ORDER BY relevance_score DESC, published_at DESC NULLS LAST
LIMIT 8
`).catch(() => ({ rows: [] }));
// Cluster NOG talks by NOG name
type NogRow = (typeof nogTalks.rows)[number];
const nogByEvent: Record<string, NogRow[]> = {};
for (const n of nogTalks.rows) {
const event = (n.source as string).replace("NOG Talks: ", "");
if (!nogByEvent[event]) nogByEvent[event] = [];
nogByEvent[event].push(n);
}
for (const [event, talks] of Object.entries(nogByEvent)) {
const topTalk = (talks as NogRow[])[0];
topics.push({
title: talks.length === 1
? `[${event}] ${topTalk.title}`
: `${event}: ${talks.length} optics-relevant talks`,
description: (talks as NogRow[]).map(t => t.title).slice(0, 3).join(" | "),
blog_type: "technology_deep_dive",
urgency: "hot",
source: event,
source_type: "conference",
data_context: { talks: (talks as NogRow[]).slice(0, 3) },
suggested_angle: `What ${event} presenters are actually deploying — lessons for your network refresh`,
});
}
// ═══ SOURCE 4: News Articles — Recent Industry News ═══ // ═══ SOURCE 4: News Articles — Recent Industry News ═══
const recentNews = await pool.query(` const recentNews = await pool.query(`
SELECT title, source, source_url, category, published_at, SELECT title, source, source_url, category, published_at,
COALESCE(relevance_score, 5) AS relevance COALESCE(relevance_score, 5) AS relevance
FROM news_articles FROM news_articles
WHERE published_at > NOW() - INTERVAL '14 days' WHERE source NOT LIKE 'NOG Talks:%'
AND published_at > NOW() - INTERVAL '14 days'
ORDER BY relevance_score DESC NULLS LAST, published_at DESC ORDER BY relevance_score DESC NULLS LAST, published_at DESC
LIMIT 12 LIMIT 12
`).catch(() => ({ rows: [] })); `).catch(() => ({ rows: [] }));
@ -214,12 +250,12 @@ hotTopicsRouter.get("/", async (_req, res) => {
tomorrow.setUTCHours(0, 0, 0, 0); tomorrow.setUTCHours(0, 0, 0, 0);
res.json({ res.json({
topics: topics.slice(0, 15), topics: topics.slice(0, 20),
total: topics.length, total: topics.length,
generated_at: new Date().toISOString(), generated_at: new Date().toISOString(),
refreshes_at: tomorrow.toISOString(), refreshes_at: tomorrow.toISOString(),
day_seed: getDaySeed(), day_seed: getDaySeed(),
sources: ["market_intelligence", "internal_price_data", "competitor_alerts", "hype_cycle_model", "news_articles", "conference_calendar", "research_papers"], sources: ["market_intelligence", "nog_talks", "internal_price_data", "competitor_alerts", "hype_cycle_model", "news_articles", "conference_calendar", "research_papers"],
}); });
} catch (err) { } catch (err) {
console.error("Hot topics error:", err); console.error("Hot topics error:", err);

View File

@ -71,6 +71,7 @@ const QUEUES = [
// Intelligence // Intelligence
"scrape:news", "scrape:news",
"scrape:market-intel", "scrape:market-intel",
"scrape:nog-talks",
"scrape:community-issues", "scrape:community-issues",
"scrape:datasheet-links", "scrape:datasheet-links",
// Switch assets // Switch assets
@ -209,6 +210,7 @@ async function main() {
await boss.work("scrape:news", async () => { log("news"); await scrapeNews(); }); await boss.work("scrape:news", async () => { log("news"); await scrapeNews(); });
await boss.work("scrape:market-intel", async () => { log("market-intel"); await withIsolatedStorage("market-intel", scrapeMarketIntelligence); }); await boss.work("scrape:market-intel", async () => { log("market-intel"); await withIsolatedStorage("market-intel", scrapeMarketIntelligence); });
await boss.work("scrape:nog-talks", async () => { log("nog-talks"); const { scrapeNogTalks } = await import("./scrapers/nog-talks"); await scrapeNogTalks(); });
await boss.work("scrape:community-issues", async () => { log("community"); await withIsolatedStorage("community", () => scrapeAllSwitchIssues(30)); }); await boss.work("scrape:community-issues", async () => { log("community"); await withIsolatedStorage("community", () => scrapeAllSwitchIssues(30)); });
await boss.work("scrape:datasheet-links", async () => { log("datasheets"); await findAndSeedDatasheetLinks(50); }); await boss.work("scrape:datasheet-links", async () => { log("datasheets"); await findAndSeedDatasheetLinks(50); });
await boss.work("scrape:assets:switches", async () => { log("switch-assets"); await withIsolatedStorage("switch-assets", () => scrapeSwitchAssets()); }); await boss.work("scrape:assets:switches", async () => { log("switch-assets"); await withIsolatedStorage("switch-assets", () => scrapeSwitchAssets()); });

View File

@ -102,6 +102,7 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
"enrich:ebay-switches", "enrich:ebay-switches",
// ── Intelligence & community (every 6h) ─────────────────────────── // ── Intelligence & community (every 6h) ───────────────────────────
"scrape:market-intel", "scrape:market-intel",
"scrape:nog-talks",
"scrape:community-issues", "scrape:community-issues",
"scrape:datasheet-links", "scrape:datasheet-links",
"scrape:news", "scrape:news",
@ -244,6 +245,8 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
// ══════════════════════════════════════════════════════════════════════ // ══════════════════════════════════════════════════════════════════════
await boss.schedule("scrape:market-intel", "0 2,8,14,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 }); await boss.schedule("scrape:market-intel", "0 2,8,14,20 * * *", {}, { retryLimit: 2, expireInSeconds: 3600 });
// NOG conference talks — weekly on Mondays 06:00 UTC
await boss.schedule("scrape:nog-talks", "0 6 * * 1", {}, { retryLimit: 2, expireInSeconds: 7200 });
await boss.schedule("scrape:community-issues", "30 2,8,14,20 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 }); await boss.schedule("scrape:community-issues", "30 2,8,14,20 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
await boss.schedule("scrape:datasheet-links", "0 3,9,15,21 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 }); await boss.schedule("scrape:datasheet-links", "0 3,9,15,21 * * *", {}, { retryLimit: 1, expireInSeconds: 3600 });
await boss.schedule("scrape:news", "20 3,9,15,21 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 }); await boss.schedule("scrape:news", "20 3,9,15,21 * * *", {}, { retryLimit: 2, expireInSeconds: 1800 });
@ -491,6 +494,12 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
await withIsolatedStorage("market-intel", scrapeMarketIntelligence); await withIsolatedStorage("market-intel", scrapeMarketIntelligence);
}); });
await boss.work("scrape:nog-talks", async () => {
console.log(`[${new Date().toISOString()}] Running: NOG conference talks`);
const { scrapeNogTalks } = await import("./scrapers/nog-talks");
await scrapeNogTalks();
});
await boss.work("scrape:community-issues", async () => { await boss.work("scrape:community-issues", async () => {
console.log(`[${new Date().toISOString()}] Running: Community issues`); console.log(`[${new Date().toISOString()}] Running: Community issues`);
const { scrapeAllSwitchIssues } = await import("./scrapers/community-issues"); const { scrapeAllSwitchIssues } = await import("./scrapers/community-issues");

View File

@ -0,0 +1,414 @@
/**
* NOG Conference Talks Scraper for TIP
*
* Crawls agenda pages of major Network Operators Group meetings:
* DENOG, RIPE, NANOG, ENOG, NLNOG, GRNOG, NZNOG, APRICOT
*
* Extracts talk titles + speakers + abstracts, then filters for
* optical-networking / transceiver-relevant content.
*
* Two outputs:
* 1. news_articles relevant talks as news items (source="NOG Talks")
* 2. market_intelligence high-signal talks (deployment/procurement topics)
*
* Later integration: if ctxmeet DB has ConferenceTalk rows, this scraper
* will skip and just pull from there via cross-DB query.
*
* Runs weekly via pg-boss: scrape:nog-talks
*/
import { pool } from "../utils/db";
import { contentHash } from "../utils/hash";
interface NogTalk {
title: string;
speaker: string;
speakerOrg?: string;
abstract?: string;
event: string;
eventUrl: string;
talkUrl?: string;
date?: string;
track?: string;
}
// ── Optical / transceiver relevance keywords ──────────────────────────────────
const OPTICS_KEYWORDS = [
"transceiver", "optical", "optics", "fiber", "fibre", "wavelength",
"sfp", "qsfp", "osfp", "400g", "800g", "100g", "25g", "dwdm", "cwdm",
"coherent", "pluggable", "dac", "aoc", "silicon photonics", "cpo",
"data center", "datacenter", "dc fabric", "spine", "leaf", "cabling",
"400zr", "800zr", "zr+", "bidi", "mpo", "lc", "dom", "ddm",
"innolight", "coherent corp", "lumentum", "ii-vi", "finisar",
"ciena", "infinera", "acacia", "broadcom", "marvell",
"interconnect", "co-packaged", "lpo", "dsp",
];
// Networking keywords that often co-occur with optics topics
const NETWORK_KEYWORDS = [
"peering", "ix", "ixp", "bgp", "routing", "infrastructure",
"network upgrade", "capacity", "bandwidth", "latency",
"data center interconnect", "dci", "wan", "mpls", "sr-mpls",
"hyperscaler", "cloud", "colocation", "colo",
];
function scoreRelevance(text: string): number {
const tl = text.toLowerCase();
let score = 0;
for (const kw of OPTICS_KEYWORDS) {
if (tl.includes(kw)) score += 3;
}
for (const kw of NETWORK_KEYWORDS) {
if (tl.includes(kw)) score += 1;
}
return score;
}
function headers(): Record<string, string> {
return {
"User-Agent": "TIP-NOG-Crawler/1.0 (Transceiver Intelligence; research)",
"Accept": "text/html,application/xhtml+xml",
};
}
async function fetchText(url: string): Promise<string> {
try {
const res = await fetch(url, { headers: headers(), signal: AbortSignal.timeout(20000) });
if (!res.ok) return "";
return res.text();
} catch {
return "";
}
}
function cleanText(s: string): string {
return s.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
}
// ── DENOG ────────────────────────────────────────────────────────────────────
async function scrapeDENOG(): Promise<NogTalk[]> {
const talks: NogTalk[] = [];
// DENOG agendas at denog.de/DENOG{N}/agenda/
for (const n of [17, 16, 15]) {
const url = `https://www.denog.de/DENOG${n}/agenda/`;
const html = await fetchText(url);
if (!html) continue;
// Extract talk titles from agenda (h2/h3 + speaker patterns)
const talkPattern = /<(?:h[23]|td)[^>]*>\s*([^<]{10,200})<\/(?:h[23]|td)>/gi;
let m: RegExpExecArray | null;
const titles: string[] = [];
while ((m = talkPattern.exec(html)) !== null) {
const t = cleanText(m[1]);
if (t.length > 10 && t.length < 200 && !t.startsWith("http") && !/^\d+$/.test(t)) {
titles.push(t);
}
}
for (const title of titles.slice(0, 30)) {
talks.push({
title,
speaker: "DENOG Speaker",
event: `DENOG${n}`,
eventUrl: url,
date: `${2023 + (n - 15)}-01-01`,
});
}
}
return talks;
}
// ── NANOG ────────────────────────────────────────────────────────────────────
async function scrapeNANOG(): Promise<NogTalk[]> {
const talks: NogTalk[] = [];
// NANOG agendas — JSON API available for recent meetings
for (const meeting of [93, 92, 91]) {
const url = `https://www.nanog.org/meetings/nanog${meeting}/agenda/`;
const html = await fetchText(url);
if (!html) continue;
// NANOG uses structured agenda with talk titles in <h3> or <div class="title">
const patterns = [
/<div[^>]*class="[^"]*title[^"]*"[^>]*>\s*([^<]{10,200})<\/div>/gi,
/<h3[^>]*>\s*([^<]{10,200})<\/h3>/gi,
/<td[^>]*class="[^"]*session[^"]*"[^>]*>\s*([^<]{10,200})<\/td>/gi,
];
const titles: string[] = [];
for (const pat of patterns) {
let m: RegExpExecArray | null;
pat.lastIndex = 0;
while ((m = pat.exec(html)) !== null) {
const t = cleanText(m[1]);
if (t.length > 10 && t.length < 200) titles.push(t);
}
}
for (const title of [...new Set(titles)].slice(0, 25)) {
talks.push({
title,
speaker: "NANOG Speaker",
event: `NANOG${meeting}`,
eventUrl: url,
date: `${2023 + Math.floor((meeting - 90) / 3)}-01-01`,
});
}
}
return talks;
}
// ── RIPE ─────────────────────────────────────────────────────────────────────
async function scrapeRIPE(): Promise<NogTalk[]> {
const talks: NogTalk[] = [];
// RIPE has a public programme API
for (const meeting of [89, 88, 87]) {
const apiUrl = `https://ripe${meeting}.ripe.net/programme/agenda/`;
const html = await fetchText(apiUrl);
if (!html) continue;
// Look for session titles
const patterns = [
/<h[23][^>]*>\s*([^<]{10,200})<\/h[23]>/gi,
/<div[^>]*class="[^"]*slot-title[^"]*"[^>]*>\s*([^<]{10,200})<\/div>/gi,
/<a[^>]*class="[^"]*session[^"]*"[^>]*>\s*([^<]{10,200})<\/a>/gi,
];
const titles: string[] = [];
for (const pat of patterns) {
let m: RegExpExecArray | null;
pat.lastIndex = 0;
while ((m = pat.exec(html)) !== null) {
const t = cleanText(m[1]);
if (t.length > 10 && t.length < 200 && !t.includes("©")) titles.push(t);
}
}
for (const title of [...new Set(titles)].slice(0, 30)) {
talks.push({
title,
speaker: "RIPE Speaker",
event: `RIPE ${meeting}`,
eventUrl: apiUrl,
date: `${2023 + Math.floor((meeting - 87) / 2)}-01-01`,
});
}
}
return talks;
}
// ── ENOG ─────────────────────────────────────────────────────────────────────
async function scrapeENOG(): Promise<NogTalk[]> {
const talks: NogTalk[] = [];
const html = await fetchText("https://www.enog.org/presentations/");
if (!html) return talks;
const linkPattern = /<a[^>]*href="([^"]*presentation[^"]*)"[^>]*>\s*([^<]{10,200})<\/a>/gi;
let m: RegExpExecArray | null;
while ((m = linkPattern.exec(html)) !== null) {
const title = cleanText(m[2]);
if (title.length > 10) {
talks.push({
title,
speaker: "ENOG Speaker",
event: "ENOG",
eventUrl: "https://www.enog.org/presentations/",
talkUrl: m[1].startsWith("http") ? m[1] : `https://www.enog.org${m[1]}`,
});
}
}
return talks.slice(0, 30);
}
// ── NLNOG ────────────────────────────────────────────────────────────────────
async function scrapeNLNOG(): Promise<NogTalk[]> {
const talks: NogTalk[] = [];
const html = await fetchText("https://nlnog.net/nlnog-day/");
if (!html) return talks;
const titlePattern = /<h[234][^>]*>\s*([^<]{10,200})<\/h[234]>/gi;
let m: RegExpExecArray | null;
while ((m = titlePattern.exec(html)) !== null) {
const title = cleanText(m[1]);
if (title.length > 10 && !title.includes("NLNOG Day")) {
talks.push({ title, speaker: "NLNOG Speaker", event: "NLNOG Day", eventUrl: "https://nlnog.net/nlnog-day/" });
}
}
return talks.slice(0, 20);
}
// ── NOG-PG / EURO-IX ─────────────────────────────────────────────────────────
async function scrapeEuroIX(): Promise<NogTalk[]> {
const talks: NogTalk[] = [];
const html = await fetchText("https://www.euro-ix.net/en/forixps/ixp-tools-and-services/");
if (!html) return talks;
// Less structured — just grab notable headings
const titlePattern = /<h[23][^>]*>\s*([^<]{15,200})<\/h[23]>/gi;
let m: RegExpExecArray | null;
while ((m = titlePattern.exec(html)) !== null) {
const title = cleanText(m[1]);
if (title.length > 15) {
talks.push({ title, speaker: "Euro-IX Speaker", event: "Euro-IX Forum", eventUrl: "https://www.euro-ix.net" });
}
}
return talks.slice(0, 15);
}
// ── CtxEvent cross-DB query (when ctxmeet has data) ──────────────────────────
async function pullFromCtxEvent(): Promise<NogTalk[]> {
try {
// Direct cross-DB query via dblink (same Postgres instance) or separate pool
// ctxmeet DB is on same server, port 5432
const result = await pool.query(`
SELECT * FROM dblink(
'host=localhost port=5432 dbname=ctxmeet user=tip password=',
$$ SELECT ct.title, ct.speaker, ct."speakerOrg", ct.abstract,
e.name as event, e."eventUrl", ct."startTime",
ct."talkType"
FROM "ConferenceTalk" ct
JOIN "Event" e ON e.id = ct."eventId"
WHERE e."startDate" > NOW() - INTERVAL '18 months'
ORDER BY e."startDate" DESC LIMIT 100 $$
) AS t(title text, speaker text, "speakerOrg" text, abstract text,
event text, "eventUrl" text, "startTime" timestamptz, "talkType" text)
`).catch(() => ({ rows: [] }));
return result.rows.map((r: Record<string, unknown>) => ({
title: String(r.title || ""),
speaker: String(r.speaker || ""),
speakerOrg: String(r.speakerOrg || ""),
abstract: r.abstract ? String(r.abstract) : undefined,
event: String(r.event || ""),
eventUrl: String(r["eventUrl"] || ""),
date: r.startTime ? new Date(r.startTime as string).toISOString().split("T")[0] : undefined,
}));
} catch {
return [];
}
}
// ── Store relevant talks in TIP DB ───────────────────────────────────────────
async function storeTalks(talks: NogTalk[]): Promise<{ stored: number; skipped: number }> {
let stored = 0;
let skipped = 0;
for (const talk of talks) {
const fullText = `${talk.title} ${talk.abstract || ""} ${talk.speaker} ${talk.speakerOrg || ""}`;
const score = scoreRelevance(fullText);
if (score < 2) { skipped++; continue; }
const relevanceScore = Math.min(1, score / 15);
const hash = contentHash({ source: "nog-talk", event: talk.event, title: talk.title });
// Make source_url unique per talk using hash suffix
const uniqueUrl = talk.talkUrl || `${talk.eventUrl}#talk-${hash.substring(0, 8)}`;
// Store in news_articles
await pool.query(`
INSERT INTO news_articles (
title, source, source_url, summary, published_at,
category, relevance_score, content_hash, tags
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
ON CONFLICT (source_url) DO UPDATE SET
relevance_score = GREATEST(news_articles.relevance_score, EXCLUDED.relevance_score),
content_hash = EXCLUDED.content_hash
`, [
talk.title,
`NOG Talks: ${talk.event}`,
uniqueUrl,
talk.abstract
? `${talk.speaker}${talk.speakerOrg ? ` (${talk.speakerOrg})` : ""}: ${talk.abstract.substring(0, 400)}`
: `${talk.speaker}${talk.speakerOrg ? ` (${talk.speakerOrg})` : ""}${talk.event}`,
talk.date ? new Date(talk.date) : new Date(),
"event",
relevanceScore,
hash,
JSON.stringify(["nog", talk.event.toLowerCase().replace(/\s+/g, "-"), "conference"]),
]).catch(() => null);
// High-relevance talks also go into market_intelligence
if (score >= 6) {
const intelType = detectIntelType(talk.title + " " + (talk.abstract || ""));
const buySignal = detectBuySignal(talk.title + " " + (talk.abstract || ""));
await pool.query(`
INSERT INTO market_intelligence (
intel_type, title, summary, relevance_score,
technologies, buy_signal_implication, source_url,
source_name, published_at, is_demo
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, false)
ON CONFLICT DO NOTHING
`, [
intelType,
`[${talk.event}] ${talk.title}`,
`${talk.speaker}${talk.speakerOrg ? ` (${talk.speakerOrg})` : ""}: ${(talk.abstract || talk.title).substring(0, 500)}`,
relevanceScore,
extractTechnologies(fullText),
buySignal,
talk.talkUrl || talk.eventUrl,
`NOG Talks: ${talk.event}`,
talk.date ? new Date(talk.date) : new Date(),
]).catch(() => null);
}
stored++;
}
return { stored, skipped };
}
function detectIntelType(text: string): string {
const tl = text.toLowerCase();
if (tl.includes("deploy") || tl.includes("migration") || tl.includes("upgrade")) return "supply_chain";
if (tl.includes("400g") || tl.includes("800g") || tl.includes("100g")) return "technology_launch";
if (tl.includes("price") || tl.includes("cost") || tl.includes("budget")) return "price_movement";
if (tl.includes("standard") || tl.includes("spec") || tl.includes("rfc")) return "standard_draft";
if (tl.includes("capex") || tl.includes("investment") || tl.includes("market")) return "capex_cycle";
if (tl.includes("lead time") || tl.includes("availability") || tl.includes("shortage")) return "distributor_lead_time";
return "supply_chain";
}
function detectBuySignal(text: string): string {
const tl = text.toLowerCase();
if (tl.includes("deploy") || tl.includes("migration") || tl.includes("upgrade") || tl.includes("scale")) return "bullish";
if (tl.includes("shortage") || tl.includes("lead time") || tl.includes("limited")) return "opportunity";
if (tl.includes("wait") || tl.includes("next gen") || tl.includes("coming soon")) return "bearish";
return "neutral";
}
function extractTechnologies(text: string): string[] {
const techs: string[] = [];
const tl = text.toLowerCase();
const techMap: Record<string, string> = {
"400g": "400G", "800g": "800G", "100g": "100G", "25g": "25G", "10g": "10G",
"sfp28": "SFP28", "qsfp28": "QSFP28", "qsfp-dd": "QSFP-DD", "osfp": "OSFP",
"zr+": "400ZR+", "silicon photonics": "Silicon Photonics", "cpo": "CPO",
"dwdm": "DWDM", "cwdm": "CWDM", "lpo": "LPO", "dac": "DAC", "aoc": "AOC",
"mpo": "MPO", "bidi": "BiDi",
};
for (const [key, label] of Object.entries(techMap)) {
if (tl.includes(key)) techs.push(label);
}
return [...new Set(techs)];
}
// ── Main export ──────────────────────────────────────────────────────────────
export async function scrapeNogTalks(): Promise<void> {
console.log("[NOG Talks] Starting NOG conference talks scraper...");
// First try CtxEvent DB (when populated)
const ctxTalks = await pullFromCtxEvent();
console.log(`[NOG Talks] CtxEvent DB: ${ctxTalks.length} talks`);
// Always scrape live NOG sources
const [denog, nanog, ripe, enog, nlnog, euroix] = await Promise.all([
scrapeDENOG(),
scrapeNANOG(),
scrapeRIPE(),
scrapeENOG(),
scrapeNLNOG(),
scrapeEuroIX(),
]);
const allTalks = [...ctxTalks, ...denog, ...nanog, ...ripe, ...enog, ...nlnog, ...euroix];
console.log(`[NOG Talks] Collected ${allTalks.length} talks total (DENOG:${denog.length} NANOG:${nanog.length} RIPE:${ripe.length} ENOG:${enog.length} NLNOG:${nlnog.length} EuroIX:${euroix.length} CtxEvent:${ctxTalks.length})`);
const { stored, skipped } = await storeTalks(allTalks);
console.log(`[NOG Talks] Done — stored:${stored} (optics-relevant) skipped:${skipped} (not relevant)`);
}