feat: wire finder.ts + switch-docs + Ollama LLM tools to MCP server

MCP Server (packages/mcp-server/src/index.ts):
- Register registerSwitchDocTools (switch-docs.ts) — switch documentation lookup
- Register finderTools dynamically (finder.ts) — find_flexoptix_for_switch, get_competitor_alerts
- Add analyze_market_with_llm tool: qwen2.5:14b via Ollama, enriched with live hype cycle + pricing + news
- Add generate_blog_post tool: fo-blog-v5 (fine-tuned) with qwen2.5:14b fallback, enriched with live pricing data
- OLLAMA_BASE_URL env var (default: https://ollama.fichtmueller.org)

Also includes scraper improvements (ascentoptics, atgbics, gbics, skylane, ebay-enricher),
API route updates (blog, blog-sll, health, hot-topics, transceivers, queries),
and dashboard hot-topics refresh.
This commit is contained in:
Rene Fichtmueller 2026-04-18 00:21:58 +02:00
parent 23bdfc1585
commit 9db0335229
15 changed files with 734 additions and 227 deletions

View File

@ -13,6 +13,7 @@ export interface SearchParams {
coherent?: boolean; coherent?: boolean;
market_status?: string; market_status?: string;
vendor?: string; vendor?: string;
verified?: "price" | "image" | "details" | "full";
limit?: number; limit?: number;
offset?: number; offset?: number;
} }
@ -82,6 +83,10 @@ export async function searchTransceivers(params: SearchParams) {
values.push(`%${params.vendor}%`); values.push(`%${params.vendor}%`);
idx++; idx++;
} }
if (params.verified) {
const col = params.verified === "full" ? "fully_verified" : params.verified + "_verified";
conditions.push(`t.${col} = true`);
}
const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : ""; const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
const limit = params.limit || 50; const limit = params.limit || 50;

View File

@ -304,7 +304,47 @@ Given the topic below, expand it into:
Topic: {{TOPIC}} Topic: {{TOPIC}}
Keep it practical, not theoretical. Think about what actually goes wrong in production.`; Keep it practical, not theoretical. Think about what actually goes wrong in production.{{ADDITIONAL_CONTEXT}}`;
// ═══════════════════════════════════════════════════════
// STEP HEADLINE: Generate a compelling article headline
// (runs AFTER full article is written — never copies user input)
// ═══════════════════════════════════════════════════════
export const STEP_HEADLINE_GENERATION = `You are writing a headline for a Flexoptix technical blog article.
Read the article below and generate ONE headline.
RULES NON-NEGOTIABLE:
- Maximum 10 words
- Active voice, no adverbs
- No buzzwords: no "game-changer", "revolutionize", "deep dive", "exploring", "essential"
- No generic openers: "The Real X", "Why X Matters", "A Guide to X", "Everything You Need to Know"
- No question headlines (they underperform on technical audiences)
- Do NOT copy any phrase from the article verbatim
- Do NOT echo back the topic input or any context that was provided
- Must signal a specific insight, counterintuitive finding, or operational reality
- Must make an engineer stop scrolling specific, surprising, or blunt
- Colon allowed ONLY if both halves are strong standalone phrases
GOOD examples:
"400G Does Not Fail in Design. It Fails in Production."
"Your OTDR Is Lying to You"
"Third-Party Optics: The Risk Is Real, but It's Not What You Think"
"RPKI Fixed Origin Validation. Path Security Is Still Broken."
"InfiniBand Scales to 400,000 GPUs. Ethernet Does Not."
BAD examples (never produce these):
"Why 400G Migration Matters for Your Network" generic
"Deep Dive: Understanding Optical Transceivers" buzzword + filler
"The Complete Guide to OTDR Testing" listicle-style
"Exploring the Challenges of Coherent Optics in 2026" journalist fluff
"Key Takeaways from Our Latest Blog Post" never
Return ONLY the headline text. No quotes. No commentary. No "Here is your headline:".
Article:
{{ARTICLE}}`;
// ═══════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════
// STEP 2: ANGLE SELECTION // STEP 2: ANGLE SELECTION

View File

@ -6,6 +6,8 @@
* GET /api/blog/sll/insights current learning state * GET /api/blog/sll/insights current learning state
* POST /api/blog/sll/analyze trigger LLM pattern extraction * POST /api/blog/sll/analyze trigger LLM pattern extraction
* GET /api/blog/sll/patterns all learned patterns * GET /api/blog/sll/patterns all learned patterns
* GET /api/blog/sll/posting-time best posting time (Umami + SLL combined)
* POST /api/blog/sll/sync-umami refresh Umami analytics cache
*/ */
import { Router, Request, Response } from "express"; import { Router, Request, Response } from "express";
@ -173,6 +175,202 @@ blogSllRouter.get("/sll/insights", async (_req: Request, res: Response) => {
} }
}); });
// ─────────────────────────────────────────────────────────────────
// In-memory Umami cache (TTL 1h — single PM2 process)
// ─────────────────────────────────────────────────────────────────
interface UmamiSlot { weekday: number; hour: number; sessions: number }
let umamiCache: { slots: UmamiSlot[]; fetchedAt: number } | null = null;
const UMAMI_TTL_MS = 60 * 60 * 1000; // 1h
const UMAMI_URL = process.env["UMAMI_URL"] ?? "https://analytics.fichtmueller.org";
const UMAMI_USER = process.env["UMAMI_USER"] ?? "admin";
const UMAMI_PASS = process.env["UMAMI_PASS"] ?? "";
const UMAMI_WEBSITE = process.env["UMAMI_WEBSITE_ID"] ?? "c737bf75-ccc4-463b-992a-13bed31d7f43";
const DAY_NAMES = ["Mo","Di","Mi","Do","Fr","Sa","So"];
async function fetchUmamiToken(): Promise<string | null> {
try {
const r = await fetch(`${UMAMI_URL}/api/auth/login`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ username: UMAMI_USER, password: UMAMI_PASS }),
signal: AbortSignal.timeout(8000),
});
const d = await r.json() as { token?: string };
return d.token ?? null;
} catch { return null; }
}
async function fetchUmamiSlots(): Promise<UmamiSlot[]> {
// Return cache if fresh
if (umamiCache && Date.now() - umamiCache.fetchedAt < UMAMI_TTL_MS) {
return umamiCache.slots;
}
const token = await fetchUmamiToken();
if (!token) return [];
const startAt = Date.now() - 90 * 24 * 60 * 60 * 1000;
const endAt = Date.now();
const url = `${UMAMI_URL}/api/websites/${UMAMI_WEBSITE}/sessions?startAt=${startAt}&endAt=${endAt}&pageSize=500&page=1`;
try {
const r = await fetch(url, {
headers: { Authorization: `Bearer ${token}` },
signal: AbortSignal.timeout(15000),
});
const d = await r.json() as { data?: Array<{ firstAt?: string }> };
const sessions = d.data ?? [];
// Build (weekday, hour) counts — Berlin = UTC+2 in April/summer
const counts: Record<string, number> = {};
for (const s of sessions) {
if (!s.firstAt) continue;
const dt = new Date(s.firstAt);
const berlinH = (dt.getUTCHours() + 2) % 24;
const berlinWd = dt.getUTCDay() === 0 ? 6 : dt.getUTCDay() - 1; // 0=Mon
const key = `${berlinWd}:${berlinH}`;
counts[key] = (counts[key] ?? 0) + 1;
}
const slots: UmamiSlot[] = Object.entries(counts).map(([key, n]) => {
const [wd, h] = key.split(":").map(Number);
return { weekday: wd, hour: h, sessions: n };
});
umamiCache = { slots, fetchedAt: Date.now() };
return slots;
} catch { return []; }
}
// ─────────────────────────────────────────────────────────────────
// GET /api/blog/sll/posting-time — best posting time
// Combines Umami traffic data + SLL historical engagement by slot
// ─────────────────────────────────────────────────────────────────
blogSllRouter.get("/sll/posting-time", async (_req: Request, res: Response) => {
try {
// 1. Umami: traffic per (weekday, hour)
const umamiSlots = await fetchUmamiSlots();
const umamiMax = Math.max(1, ...umamiSlots.map((s) => s.sessions));
// 2. SLL: avg engagement per (weekday, hour) from historical posts
const sllRes = await pool.query(`
SELECT
EXTRACT(DOW FROM posted_at AT TIME ZONE 'UTC' AT TIME ZONE 'Europe/Berlin')::int AS wd_raw,
EXTRACT(HOUR FROM posted_at AT TIME ZONE 'UTC' AT TIME ZONE 'Europe/Berlin')::int AS hour,
AVG(engagement_score) AS avg_eng,
MAX(engagement_score) AS best_eng,
COUNT(*) AS post_count
FROM blog_performance
WHERE posted_at IS NOT NULL AND engagement_score IS NOT NULL
GROUP BY wd_raw, hour
`);
// Convert Sunday=0 (PostgreSQL DOW) to Monday=0 index
const sllMap: Record<string, { avgEng: number; bestEng: number; count: number }> = {};
for (const row of sllRes.rows) {
const wd = row.wd_raw === 0 ? 6 : Number(row.wd_raw) - 1; // Mon=0
const key = `${wd}:${row.hour}`;
sllMap[key] = {
avgEng: Math.round(Number(row.avg_eng) * 10) / 10,
bestEng: Number(row.best_eng),
count: Number(row.post_count),
};
}
const sllMax = Math.max(1, ...Object.values(sllMap).map((v) => v.avgEng));
// 3. Build candidate slots (union of Umami + SLL slots)
const allKeys = new Set([
...umamiSlots.map((s) => `${s.weekday}:${s.hour}`),
...Object.keys(sllMap),
]);
const scored = Array.from(allKeys).map((key) => {
const [wd, h] = key.split(":").map(Number);
const umami = umamiSlots.find((s) => s.weekday === wd && s.hour === h);
const sll = sllMap[key];
const umamiScore = umami ? umami.sessions / umamiMax : 0;
const sllScore = sll ? sll.avgEng / sllMax : 0;
// Weight: 50% Umami traffic + 50% SLL engagement
// If no SLL data → 100% Umami; if no Umami → 100% SLL
const hasUmami = !!umami;
const hasSll = !!sll;
let combined: number;
if (hasUmami && hasSll) {
combined = umamiScore * 0.5 + sllScore * 0.5;
} else if (hasUmami) {
combined = umamiScore * 0.7; // penalise slots with no SLL validation
} else {
combined = sllScore * 0.6; // SLL-only slots get a slight boost
}
return {
weekday: wd,
hour: h,
label: `${DAY_NAMES[wd]} ${String(h).padStart(2, "0")}:00h`,
score: Math.round(combined * 100),
umami_sessions: umami?.sessions ?? 0,
sll_avg_engagement: sll?.avgEng ?? null,
sll_best_engagement: sll?.bestEng ?? null,
sll_post_count: sll?.count ?? 0,
data_sources: [hasUmami ? "umami" : null, hasSll ? "sll" : null].filter(Boolean),
};
});
// Sort by score descending
scored.sort((a, b) => b.score - a.score);
const top = scored.slice(0, 10);
// Build weekday summary (best hour per weekday)
const byWeekday: Record<number, typeof top[0]> = {};
for (const slot of scored) {
if (!byWeekday[slot.weekday] || slot.score > byWeekday[slot.weekday].score) {
byWeekday[slot.weekday] = slot;
}
}
const weekdaySummary = DAY_NAMES.map((name, wd) => ({
weekday: wd,
name,
best_slot: byWeekday[wd] ?? null,
}));
res.json({
success: true,
top_slots: top,
weekday_summary: weekdaySummary,
recommended: top[0] ?? null,
data_sources: {
umami_sessions_analyzed: umamiSlots.reduce((s, x) => s + x.sessions, 0),
umami_cache_age_min: umamiCache ? Math.round((Date.now() - umamiCache.fetchedAt) / 60000) : null,
sll_posts_with_time: sllRes.rows.length,
},
note: sllRes.rows.length === 0
? "SLL has no timed posts yet — using Umami traffic data only"
: `Combined Umami + ${sllRes.rows.length} SLL engagement data point(s)`,
});
} catch (err) {
console.error("posting-time error:", err);
res.status(500).json({ success: false, error: String(err) });
}
});
// ─────────────────────────────────────────────────────────────────
// POST /api/blog/sll/sync-umami — force-refresh Umami cache
// ─────────────────────────────────────────────────────────────────
blogSllRouter.post("/sll/sync-umami", async (_req: Request, res: Response) => {
umamiCache = null; // invalidate
const slots = await fetchUmamiSlots();
res.json({
success: slots.length > 0,
slots_loaded: slots.length,
total_sessions: slots.reduce((s, x) => s + x.sessions, 0),
message: slots.length > 0 ? "Umami cache refreshed" : "Umami unreachable — check credentials",
});
});
// ───────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────
// GET /api/blog/sll/patterns — all learned patterns // GET /api/blog/sll/patterns — all learned patterns
// ───────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────

View File

@ -956,10 +956,11 @@ async function enqueueLlmPipeline(
selectedTopic: string, selectedTopic: string,
targetAudience: string, targetAudience: string,
data: Awaited<ReturnType<typeof gatherBlogData>>, data: Awaited<ReturnType<typeof gatherBlogData>>,
additionalContext?: string,
): Promise<void> { ): Promise<void> {
return new Promise<void>((resolve) => { return new Promise<void>((resolve) => {
llmQueue.push(async () => { llmQueue.push(async () => {
await runLlmPipeline(draftId, title, selectedTopic, targetAudience, data); await runLlmPipeline(draftId, title, selectedTopic, targetAudience, data, additionalContext);
resolve(); resolve();
}); });
processLlmQueue(); processLlmQueue();
@ -989,6 +990,7 @@ async function runLlmPipeline(
selectedTopic: string, selectedTopic: string,
targetAudience: string, targetAudience: string,
data: Awaited<ReturnType<typeof gatherBlogData>>, data: Awaited<ReturnType<typeof gatherBlogData>>,
additionalContext?: string,
): Promise<void> { ): Promise<void> {
// Lazy-load the new FO pipeline // Lazy-load the new FO pipeline
const { const {
@ -1009,6 +1011,7 @@ async function runLlmPipeline(
STEP9_QA_CHECK, STEP9_QA_CHECK,
STEP10_QUALITY_SCORE, STEP10_QUALITY_SCORE,
STEP_LINKEDIN_POST, STEP_LINKEDIN_POST,
STEP_HEADLINE_GENERATION,
BLOG_TYPES, BLOG_TYPES,
buildFeedbackContext, buildFeedbackContext,
withCalibration, withCalibration,
@ -1091,7 +1094,11 @@ async function runLlmPipeline(
console.log(" Step 1/10: Topic Expansion..."); console.log(" Step 1/10: Topic Expansion...");
setProgress(draftId, 1, "Step 1/10: Topic Expansion"); setProgress(draftId, 1, "Step 1/10: Topic Expansion");
const step1 = await generate(systemPrompt, const step1 = await generate(systemPrompt,
STEP1_TOPIC_EXPANSION.replace("{{TOPIC}}", title), STEP1_TOPIC_EXPANSION
.replace("{{TOPIC}}", title)
.replace("{{ADDITIONAL_CONTEXT}}", additionalContext
? `\n\n---\nBACKGROUND REFERENCE (editorial context — use as factual direction ONLY):\n${additionalContext}\n\nCRITICAL: Do NOT copy any phrase, sentence, or wording from the above into the article or any step output. It is context for your understanding, not source material.`
: ""),
LLM_OPTS LLM_OPTS
); );
stepsCompleted = 1; stepsCompleted = 1;
@ -1281,6 +1288,26 @@ async function runLlmPipeline(
} }
stepsCompleted = 16; stepsCompleted = 16;
// ═══ Headline Generation (post-article — never copies user input) ═══
console.log(" Step 17: Headline Generation...");
let generatedHeadline: string | null = null;
try {
const headlineResult = await generate(systemPrompt,
STEP_HEADLINE_GENERATION.replace("{{ARTICLE}}", step9.text),
{ temperature: 0.5, maxTokens: 64, timeoutMs: 60000 }
);
const rawHeadline = headlineResult.text.trim().replace(/^["']|["']$/g, "");
// Sanity check: must be non-empty, ≤120 chars, and not start with "Here is"
if (rawHeadline.length > 5 && rawHeadline.length <= 120 && !rawHeadline.toLowerCase().startsWith("here is")) {
generatedHeadline = rawHeadline;
console.log(` Generated headline: "${generatedHeadline}"`);
}
} catch {
console.log(" Headline generation skipped");
}
// Use generated headline if valid, fall back to original title
const finalTitle = generatedHeadline || title;
// Extract only the article from STEP9 output (QA returns review + fixed article) // Extract only the article from STEP9 output (QA returns review + fixed article)
// Look for "COMPLETE FIXED ARTICLE" marker and take everything after it // Look for "COMPLETE FIXED ARTICLE" marker and take everything after it
let finalArticleText = step9.text; let finalArticleText = step9.text;
@ -1308,14 +1335,14 @@ async function runLlmPipeline(
.join("\n") .join("\n")
.trim(); .trim();
const draftContent = `# ${title}\n\n${finalArticleText}`; const draftContent = `# ${finalTitle}\n\n${finalArticleText}`;
const wordCount = draftContent.split(/\s+/).length; const wordCount = draftContent.split(/\s+/).length;
const finalIssues = validateArticle(draftContent); const finalIssues = validateArticle(draftContent);
// Update the draft in DB // Update the draft in DB (title updated to generated headline if available)
await pool.query( await pool.query(
`UPDATE blog_drafts `UPDATE blog_drafts
SET draft_content = $1, word_count = $2, SET title = $9, draft_content = $1, word_count = $2,
generated_by = 'fo-blog-engine-v5-autokill', generated_by = 'fo-blog-engine-v5-autokill',
pipeline_version = 'v5-auto-kill-layer', pipeline_version = 'v5-auto-kill-layer',
pipeline_steps_completed = $3, pipeline_steps_completed = $3,
@ -1342,6 +1369,7 @@ async function runLlmPipeline(
linkedinPost, linkedinPost,
linkedinCharCount, linkedinCharCount,
draftId, draftId,
finalTitle,
], ],
); );
@ -1377,12 +1405,16 @@ async function runLlmPipeline(
// POST /api/blog/generate — Generate a new blog draft (returns immediately, LLM runs async) // POST /api/blog/generate — Generate a new blog draft (returns immediately, LLM runs async)
blogRouter.post("/generate", async (req: Request, res: Response) => { blogRouter.post("/generate", async (req: Request, res: Response) => {
const { topic, speed, form_factor, use_case, use_llm } = req.body as { const { topic, speed, form_factor, use_case, use_llm, custom_title, additional_context } = req.body as {
topic?: string; topic?: string;
speed?: string; speed?: string;
form_factor?: string; form_factor?: string;
use_case?: string; use_case?: string;
use_llm?: boolean; use_llm?: boolean;
/** Override the auto-selected template title with a specific topic/title */
custom_title?: string;
/** Background context for the LLM — used as factual direction ONLY, never copied verbatim into the article */
additional_context?: string;
}; };
const selectedTopic = topic || "tutorial"; const selectedTopic = topic || "tutorial";
@ -1400,7 +1432,8 @@ blogRouter.post("/generate", async (req: Request, res: Response) => {
const year = new Date().getFullYear(); const year = new Date().getFullYear();
const template = templates[Math.floor(Math.random() * templates.length)]; const template = templates[Math.floor(Math.random() * templates.length)];
const title = template.title // custom_title overrides the template title — LLM will still generate a better headline at the end
const title = custom_title || template.title
.replace("{YEAR}", String(year)) .replace("{YEAR}", String(year))
.replace("{SPEED}", speed || "400G/800G") .replace("{SPEED}", speed || "400G/800G")
.replace("{FORM_FACTOR}", form_factor || "QSFP-DD/OSFP") .replace("{FORM_FACTOR}", form_factor || "QSFP-DD/OSFP")
@ -1453,7 +1486,7 @@ blogRouter.post("/generate", async (req: Request, res: Response) => {
console.log(`Blog LLM: Using ${health.model} — enhancing draft ${draftId} in background`); console.log(`Blog LLM: Using ${health.model} — enhancing draft ${draftId} in background`);
llmStarted = true; llmStarted = true;
// Fire-and-forget: LLM pipeline queued, updates draft when done // Fire-and-forget: LLM pipeline queued, updates draft when done
enqueueLlmPipeline(draftId, title, selectedTopic, template.target_audience, data).catch((err) => { enqueueLlmPipeline(draftId, title, selectedTopic, template.target_audience, data, additional_context).catch((err) => {
console.error(`Blog LLM background pipeline error: ${(err as Error).message}`); console.error(`Blog LLM background pipeline error: ${(err as Error).message}`);
}); });
} }
@ -1492,7 +1525,8 @@ blogRouter.post("/generate", async (req: Request, res: Response) => {
blogRouter.get("/", async (_req: Request, res: Response) => { blogRouter.get("/", async (_req: Request, res: Response) => {
try { try {
const result = await pool.query( const result = await pool.query(
`SELECT id, title, topic, target_audience, status, word_count, seo_keywords, generated_by, created_at `SELECT id, title, topic, target_audience, status, word_count, seo_keywords, generated_by,
pipeline_steps_completed, linkedin_post, linkedin_char_count, review_tag, created_at
FROM blog_drafts FROM blog_drafts
ORDER BY created_at DESC ORDER BY created_at DESC
LIMIT 50`, LIMIT 50`,
@ -1575,6 +1609,28 @@ blogRouter.put("/:id/status", async (req: Request, res: Response) => {
} }
}); });
// PUT /api/blog/:id/review — Toggle reviewed/unreviewed tag
blogRouter.put("/:id/review", async (req: Request, res: Response) => {
try {
const current = await pool.query(
`SELECT review_tag FROM blog_drafts WHERE id = $1::uuid`,
[req.params.id]
);
if (current.rows.length === 0) {
res.status(404).json({ success: false, error: "Draft not found" });
return;
}
const newTag = current.rows[0].review_tag === "reviewed" ? null : "reviewed";
await pool.query(
`UPDATE blog_drafts SET review_tag = $1, updated_at = NOW() WHERE id = $2::uuid`,
[newTag, req.params.id]
);
res.json({ success: true, review_tag: newTag });
} catch (err) {
res.status(500).json({ success: false, error: (err as Error).message });
}
});
// ═══════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════
// FEEDBACK SYSTEM (v0.2.0 — FO_Blog_LLM Training Loop) // FEEDBACK SYSTEM (v0.2.0 — FO_Blog_LLM Training Loop)
// ═══════════════════════════════════════════════════════ // ═══════════════════════════════════════════════════════

View File

@ -23,6 +23,19 @@ healthRouter.get("/", async (_req: Request, res: Response) => {
`).catch(() => ({ rows: [{}] })); `).catch(() => ({ rows: [{}] }));
const v = verStats.rows[0] || {}; const v = verStats.rows[0] || {};
// Stock observations stats
const stockStats = await pool.query(`
SELECT
COUNT(*) AS total_observations,
COUNT(DISTINCT transceiver_id) AS transceivers_with_stock,
COUNT(DISTINCT source_vendor_id) AS vendors_with_stock,
SUM(warehouse_de_qty) FILTER (WHERE warehouse_de_qty > 0) AS total_de_qty,
SUM(warehouse_global_qty) FILTER (WHERE warehouse_global_qty > 0) AS total_global_qty,
MAX(time) AS last_observation_at
FROM stock_observations
`).catch(() => ({ rows: [{}] }));
const s = stockStats.rows[0] || {};
res.json({ res.json({
success: true, success: true,
status: "healthy", status: "healthy",
@ -42,6 +55,14 @@ healthRouter.get("/", async (_req: Request, res: Response) => {
price_coverage_pct: v.total ? Math.round(Number(v.price_verified) / Number(v.total) * 100) : 0, price_coverage_pct: v.total ? Math.round(Number(v.price_verified) / Number(v.total) * 100) : 0,
fully_verified_pct: v.total ? Math.round(Number(v.fully_verified) / Number(v.total) * 100) : 0, fully_verified_pct: v.total ? Math.round(Number(v.fully_verified) / Number(v.total) * 100) : 0,
}, },
stock: {
total_observations: Number(s.total_observations || 0),
transceivers_with_stock: Number(s.transceivers_with_stock || 0),
vendors_with_stock: Number(s.vendors_with_stock || 0),
total_de_qty: Number(s.total_de_qty || 0),
total_global_qty: Number(s.total_global_qty || 0),
last_observation_at: s.last_observation_at ?? null,
},
}); });
} catch (err) { } catch (err) {
res.status(503).json({ res.status(503).json({

View File

@ -24,6 +24,7 @@ interface HotTopic {
source_type: "conference" | "manufacturer" | "trade_press" | "research" | "internal_data" | "competitor"; source_type: "conference" | "manufacturer" | "trade_press" | "research" | "internal_data" | "competitor";
data_context?: Record<string, unknown>; data_context?: Record<string, unknown>;
suggested_angle?: string; suggested_angle?: string;
date?: string;
} }
/** /**
@ -57,6 +58,7 @@ hotTopicsRouter.get("/", async (_req, res) => {
source_type: "competitor", source_type: "competitor",
data_context: drop, data_context: drop,
suggested_angle: `Price war analysis: Why ${drop.vendor} is cutting ${drop.speed_gbps}G pricing and what it means for procurement`, suggested_angle: `Price war analysis: Why ${drop.vendor} is cutting ${drop.speed_gbps}G pricing and what it means for procurement`,
date: drop.detected_at ? new Date(drop.detected_at).toISOString() : undefined,
}); });
} }
@ -82,6 +84,7 @@ hotTopicsRouter.get("/", async (_req, res) => {
source_type: "internal_data", source_type: "internal_data",
data_context: { products: newProducts.rows }, data_context: { products: newProducts.rows },
suggested_angle: `Competitor roundup: What ${vendors[0]} and others just launched — and what it means for your next PO`, suggested_angle: `Competitor roundup: What ${vendors[0]} and others just launched — and what it means for your next PO`,
date: newProducts.rows[0]?.created_at ? new Date(newProducts.rows[0].created_at).toISOString() : undefined,
}); });
} }
@ -157,6 +160,7 @@ hotTopicsRouter.get("/", async (_req, res) => {
impact_months: m.impact_horizon_months, impact_months: m.impact_horizon_months,
}, },
suggested_angle: `${m.title}: ${angle}`, suggested_angle: `${m.title}: ${angle}`,
date: m.published_at ? new Date(m.published_at).toISOString() : undefined,
}); });
} }
@ -192,6 +196,7 @@ hotTopicsRouter.get("/", async (_req, res) => {
source_type: "conference", source_type: "conference",
data_context: { talks: (talks as NogRow[]).slice(0, 3) }, data_context: { talks: (talks as NogRow[]).slice(0, 3) },
suggested_angle: `What ${event} presenters are actually deploying — lessons for your network refresh`, suggested_angle: `What ${event} presenters are actually deploying — lessons for your network refresh`,
date: topTalk.published_at ? new Date(topTalk.published_at).toISOString() : undefined,
}); });
} }
@ -226,6 +231,7 @@ hotTopicsRouter.get("/", async (_req, res) => {
source_type: "trade_press", source_type: "trade_press",
data_context: { articles: articles.slice(0, 3) }, data_context: { articles: articles.slice(0, 3) },
suggested_angle: `${theme}: What the latest announcements actually mean for network operators`, suggested_angle: `${theme}: What the latest announcements actually mean for network operators`,
date: articles[0]?.published_at ? new Date(articles[0].published_at).toISOString() : undefined,
}); });
} }
} }

View File

@ -21,6 +21,7 @@ transceiverRouter.get("/", async (req: Request, res: Response) => {
coherent: q("coherent") === "true" ? true : q("coherent") === "false" ? false : undefined, coherent: q("coherent") === "true" ? true : q("coherent") === "false" ? false : undefined,
market_status: q("market_status"), market_status: q("market_status"),
vendor: q("vendor"), vendor: q("vendor"),
verified: q("verified") as "price" | "image" | "details" | "full" | undefined,
limit: q("limit") ? parseInt(q("limit")!) : 50, limit: q("limit") ? parseInt(q("limit")!) : 50,
offset: q("offset") ? parseInt(q("offset")!) : 0, offset: q("offset") ? parseInt(q("offset")!) : 0,
}); });
@ -141,9 +142,31 @@ transceiverRouter.get("/:id", async (req: Request, res: Response) => {
} }
} }
// Last time ANY competitor scraper looked at this transceiver (regardless of result)
const lastScanResult = await pool.query(
`SELECT MAX(po.time) AS last_scan
FROM price_observations po
JOIN vendors v ON po.source_vendor_id = v.id
WHERE po.transceiver_id = $1
AND v.is_competitor = true`,
[transceiver.id]
);
const lastCompetitorScan = lastScanResult.rows[0]?.last_scan ?? null;
// Has any competitor ever listed a price for this exact product?
const competitorHasProduct = prices.some(
(p) => p.vendor_type !== "flexoptix" && p.price > 0
);
res.json({ res.json({
success: true, success: true,
data: { ...transceiver, competitor_prices: allPrices, price_anomaly: priceAnomaly }, data: {
...transceiver,
competitor_prices: allPrices,
price_anomaly: priceAnomaly,
last_competitor_scan: lastCompetitorScan,
competitor_has_product: competitorHasProduct,
},
}); });
} catch (err) { } catch (err) {
console.error("Get transceiver error:", err); console.error("Get transceiver error:", err);

View File

@ -207,10 +207,11 @@
return '<div class="gen-card" style="cursor:pointer;border-left:3px solid ' + c + '" ' + return '<div class="gen-card" style="cursor:pointer;border-left:3px solid ' + c + '" ' +
'onclick="window._generateFromHotTopic(\'' + cardId + '\')">' + 'onclick="window._generateFromHotTopic(\'' + cardId + '\')">' +
'<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px">' + '<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px">' +
'<span style="font-size:0.65rem;text-transform:uppercase;font-weight:600;color:' + c + '">' + (t.urgency || '') + '</span>' + '<span style="font-size:0.65rem;text-transform:uppercase;font-weight:600;color:' + c + '">' + (t.urgency || '') + (t.blog_title_created ? ' · <span style="background:#1b4332;color:#6ee7b7;font-size:0.6rem;padding:1px 5px;border-radius:3px;font-weight:700;text-transform:none">✓ Blog erstellt</span>' : '') + '</span>' +
'<span style="font-size:0.6rem;color:var(--text-dim)">' + (t.source_type || '') + ' · ' + (t.source || '') + '</span></div>' + '<span style="font-size:0.6rem;color:var(--text-dim)">' + (t.source_type || '') + ' · ' + (t.source || '') + '</span></div>' +
'<div class="gen-card-title" style="font-size:0.85rem;line-height:1.3">' + (t.title || '') + '</div>' + '<div class="gen-card-title" style="font-size:0.85rem;line-height:1.3">' + (t.title || '') + '</div>' +
'<div class="gen-card-sub" style="font-size:0.7rem;margin-top:4px;line-height:1.4">' + (t.suggested_angle || t.description || '').slice(0, 100) + '</div>' + '<div class="gen-card-sub" style="font-size:0.7rem;margin-top:4px;line-height:1.4">' + (t.suggested_angle || t.description || '').slice(0, 100) + '</div>' +
(t.date ? '<div style="font-size:0.62rem;color:var(--text-dim);margin-top:5px">' + new Date(t.date).toLocaleDateString('de-DE', {day:'2-digit',month:'short',year:'numeric'}) + '</div>' : '') +
'</div>'; '</div>';
}).join(''); }).join('');
}).catch(function(err) { }).catch(function(err) {

View File

@ -23,6 +23,8 @@ import { registerCompatibilityTools } from "./tools/compatibility.js";
import { registerKnowledgeTools } from "./tools/knowledge.js"; import { registerKnowledgeTools } from "./tools/knowledge.js";
import { registerContentTools } from "./tools/content.js"; import { registerContentTools } from "./tools/content.js";
import { registerMarketTools } from "./tools/market.js"; import { registerMarketTools } from "./tools/market.js";
import { registerSwitchDocTools } from "./tools/switch-docs.js";
import { finderTools, handleFinderTool } from "./tools/finder.js";
async function main() { async function main() {
const server = new McpServer({ const server = new McpServer({
@ -347,6 +349,184 @@ async function main() {
await registerKnowledgeTools(server); await registerKnowledgeTools(server);
await registerContentTools(server); await registerContentTools(server);
await registerMarketTools(server); await registerMarketTools(server);
await registerSwitchDocTools(server);
// --- Register finder.ts tools (find_flexoptix_for_switch, get_competitor_alerts) ---
for (const [toolName, toolDef] of Object.entries(finderTools)) {
const schema: Record<string, z.ZodTypeAny> = {};
for (const [propName, propDef] of Object.entries(toolDef.inputSchema.properties ?? {})) {
const p = propDef as { type: string; description?: string };
schema[propName] = p.type === "number"
? z.number().optional().describe(p.description ?? "")
: z.string().optional().describe(p.description ?? "");
}
server.tool(
toolName,
toolDef.description,
schema,
async (args) => {
const result = await handleFinderTool(toolName, args as Record<string, unknown>);
return { content: [{ type: "text" as const, text: result }] };
}
);
}
// --- Ollama LLM tools: market analysis (qwen2.5:14b) + blog generation (fo-blog-v5) ---
const OLLAMA_BASE = process.env["OLLAMA_BASE_URL"] ?? "https://ollama.fichtmueller.org";
server.tool(
"analyze_market_with_llm",
"Deep market analysis for a transceiver technology using local LLM (qwen2.5:14b). Provides expert narrative on adoption trends, pricing trajectory, competitive dynamics, and buy/wait/hold recommendation.",
{
technology: z.string().describe("Technology to analyze, e.g. '400G QSFP-DD', '800G OSFP', '100G ZR'"),
context: z.string().optional().describe("Additional context or specific questions to address"),
horizon: z.enum(["3m", "6m", "12m", "18m"]).default("12m").describe("Forecast horizon"),
},
async ({ technology, context, horizon }) => {
// Gather DB data to enrich the prompt
const [hype, prices, news] = await Promise.all([
pool.query(
`SELECT hype_phase, hype_score, ROUND(current_share*100,1) AS share_pct,
asp_current_usd, asp_decline_pct_3y, years_to_next_phase
FROM hype_cycle_analysis WHERE technology ILIKE $1
ORDER BY computed_at DESC LIMIT 1`,
[`%${technology}%`]
),
pool.query(
`SELECT v.name AS vendor, ROUND(MIN(po.price)::NUMERIC,2) AS min_price,
ROUND(MAX(po.price)::NUMERIC,2) AS max_price, po.currency
FROM price_observations po JOIN vendors v ON v.id = po.source_vendor_id
JOIN transceivers t ON t.id = po.transceiver_id
WHERE t.speed ILIKE $1 AND po.time > NOW() - INTERVAL '7 days'
GROUP BY v.name, po.currency ORDER BY min_price ASC LIMIT 10`,
[`%${technology.split("-")[0]}%`]
),
pool.query(
`SELECT title, summary, published_at FROM news_articles
WHERE content_vector @@ plainto_tsquery('english', $1)
ORDER BY published_at DESC LIMIT 5`,
[technology]
).catch(() => ({ rows: [] })),
]);
const dataContext = [
hype.rows[0] ? `Hype Cycle: phase=${hype.rows[0].hype_phase}, score=${hype.rows[0].hype_score}/100, market_share=${hype.rows[0].share_pct}%, OEM_ASP=$${hype.rows[0].asp_current_usd}, ASP_decline_3y=${hype.rows[0].asp_decline_pct_3y}%, years_to_next_phase=${hype.rows[0].years_to_next_phase}` : "",
prices.rows.length > 0 ? `Current pricing: ${prices.rows.map((r) => `${r.vendor} ${r.currency}${r.min_price}${r.max_price}`).join(", ")}` : "",
news.rows.length > 0 ? `Recent news: ${news.rows.map((r: {title:string}) => r.title).join(" | ")}` : "",
].filter(Boolean).join("\n");
const prompt = `You are a senior optical networking market analyst at a transceiver intelligence platform.
Technology: ${technology}
Forecast horizon: ${horizon}
${dataContext ? `\nLive data:\n${dataContext}` : ""}
${context ? `\nSpecific questions: ${context}` : ""}
Provide a concise expert market analysis covering:
1. Current market phase and what it means for buyers/sellers
2. Price trajectory over the next ${horizon} will prices rise, fall, or stabilize?
3. Key demand drivers and risks
4. Competitive dynamics (OEM vs compatible vendors)
5. Buy / Wait / Hold recommendation with reasoning
Keep the analysis actionable and data-driven. Under 400 words.`;
try {
const resp = await fetch(`${OLLAMA_BASE}/api/generate`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ model: "qwen2.5:14b", prompt, stream: false }),
signal: AbortSignal.timeout(120_000),
});
if (!resp.ok) throw new Error(`Ollama HTTP ${resp.status}`);
const data = await resp.json() as { response?: string };
return { content: [{ type: "text" as const, text: data.response ?? "No response from model." }] };
} catch (err: unknown) {
return { content: [{ type: "text" as const, text: `LLM unavailable: ${(err as Error).message}. Use /api/hype-cycle/analysis for raw data.` }] };
}
}
);
server.tool(
"generate_blog_post",
"Generate a professional Flexoptix blog post using the fine-tuned fo-blog-v5 model (Ollama). Automatically enriched with live pricing, hype cycle data, and competitor analysis.",
{
topic: z.string().describe("Blog topic, e.g. '400G QSFP-DD vs 400G ZR — which for your DC?'"),
target_audience: z.enum(["network_engineer", "procurement", "executive", "general"]).default("network_engineer").describe("Target reader"),
tone: z.enum(["technical", "consultative", "educational"]).default("consultative").describe("Writing tone"),
word_count: z.number().default(600).describe("Target word count (3001000)"),
},
async ({ topic, target_audience, tone, word_count }) => {
// Gather enrichment data
const keywords = topic.match(/\b(\d+G|QSFP|SFP|OSFP|ZR|SR|LR|ER)\b/gi) ?? [];
const priceData = keywords.length > 0 ? await pool.query(
`SELECT v.name AS vendor, t.form_factor, t.speed,
ROUND(MIN(po.price)::NUMERIC,2) AS min_price, po.currency
FROM price_observations po JOIN vendors v ON v.id = po.source_vendor_id
JOIN transceivers t ON t.id = po.transceiver_id
WHERE t.speed ILIKE ANY($1) AND po.time > NOW() - INTERVAL '7 days'
GROUP BY v.name, t.form_factor, t.speed, po.currency ORDER BY min_price ASC LIMIT 8`,
[keywords.map((k: string) => `%${k}%`)]
).catch(() => ({ rows: [] })) : { rows: [] };
const enrichment = priceData.rows.length > 0
? `\nCurrent market prices (use naturally in article):\n${priceData.rows.map((r: {vendor:string;form_factor:string;speed:string;min_price:string;currency:string}) => `- ${r.form_factor} ${r.speed}: from ${r.currency}${r.min_price} at ${r.vendor}`).join("\n")}`
: "";
const systemPrompt = `You are a professional technical writer for Flexoptix, Europe's leading transceiver specialist. Write in a ${tone} tone for a ${target_audience.replace(/_/g," ")} audience. Articles should highlight Flexoptix expertise and the value of our FlexBox universal coding solution.`;
const userPrompt = `Write a ${word_count}-word blog post on: "${topic}"
${enrichment}
Include:
- Compelling introduction
- Technical explanation appropriate for audience
- Real pricing context where available
- Call-to-action mentioning Flexoptix or FlexBox
- SEO-friendly subheadings
Do not include a title (added separately). Start directly with the article body.`;
try {
const resp = await fetch(`${OLLAMA_BASE}/api/chat`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model: "fo-blog-v5",
messages: [
{ role: "system", content: systemPrompt },
{ role: "user", content: userPrompt },
],
stream: false,
}),
signal: AbortSignal.timeout(180_000),
});
if (!resp.ok) {
// Fallback to qwen2.5:14b if fo-blog-v5 not available
const fallbackResp = await fetch(`${OLLAMA_BASE}/api/chat`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model: "qwen2.5:14b",
messages: [
{ role: "system", content: systemPrompt },
{ role: "user", content: userPrompt },
],
stream: false,
}),
signal: AbortSignal.timeout(180_000),
});
if (!fallbackResp.ok) throw new Error(`Both fo-blog-v5 and qwen2.5:14b unavailable`);
const fallbackData = await fallbackResp.json() as { message?: { content?: string } };
return { content: [{ type: "text" as const, text: `[Generated with qwen2.5:14b — fo-blog-v5 unavailable]\n\n${fallbackData.message?.content ?? "No content"}` }] };
}
const data = await resp.json() as { message?: { content?: string } };
return { content: [{ type: "text" as const, text: data.message?.content ?? "No content generated." }] };
} catch (err: unknown) {
return { content: [{ type: "text" as const, text: `LLM unavailable: ${(err as Error).message}. Check OLLAMA_BASE_URL env var.` }] };
}
}
);
// Start server // Start server
const transport = new StdioServerTransport(); const transport = new StdioServerTransport();

View File

@ -1,34 +1,50 @@
/** /**
* Ascent Optics Scraper US-based compatible transceiver vendor * Ascent Optics Scraper Chinese OEM transceiver manufacturer
* *
* ascentoptics.com product catalog with USD prices. * ascentoptics.com product catalog loaded via JSON API endpoint.
* Tries /catalog/ and /products/ as entry points. * Products are served via /product-list?is_render=1&category_id=CID
* (HTML table in JSON response). No retail pricing "Get Quote" model.
* Category IDs are discovered from data-cid attributes on sub-category pages.
* *
* Rate limited: 1 req/2sec. * Rate limited: 1 req/2sec.
*/ */
import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db"; import { pool, findOrCreateScrapedTransceiver, ensureVendor } from "../utils/db";
import { contentHash } from "../utils/hash";
import * as cheerio from "cheerio"; import * as cheerio from "cheerio";
const BASE = "https://ascentoptics.com"; const BASE = "https://ascentoptics.com";
const CATALOG_URLS = [
"/catalog/",
"/products/",
"/products/transceivers/",
"/catalog/transceivers/",
];
const MAX_PAGES = 15;
const HEADERS = { const HEADERS = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
Accept: "text/html,application/xhtml+xml", Accept: "text/html,application/xhtml+xml,application/json,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9", "Accept-Language": "en-US,en;q=0.9",
"X-Requested-With": "XMLHttpRequest",
Referer: "https://ascentoptics.com/optical-transceivers/",
}; };
// Known transceiver categories: slug → { category_id, formFactor, speedGbps, speed }
// DAC/AOC/DCO/LPO excluded — transceivers only
const CATEGORIES = [
{ slug: "/10g-sfp/", categoryId: 33, formFactor: "SFP+", speed: "10G", speedGbps: 10 },
{ slug: "/10g-xfp/", categoryId: 34, formFactor: "XFP", speed: "10G", speedGbps: 10 },
{ slug: "/25g-sfp28/", categoryId: 22, formFactor: "SFP28", speed: "25G", speedGbps: 25 },
{ slug: "/40g-qsfp/", categoryId: 20, formFactor: "QSFP+", speed: "40G", speedGbps: 40 },
{ slug: "/100g-qsfp28/", categoryId: 15, formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
{ slug: "/100g-sfp112/", categoryId: 0, formFactor: "SFP112", speed: "100G", speedGbps: 100 },
{ slug: "/200g-qsfp56/", categoryId: 3, formFactor: "QSFP56", speed: "200G", speedGbps: 200 },
{ slug: "/200g-qsfp28-dd/", categoryId: 4, formFactor: "QSFP-DD", speed: "200G", speedGbps: 200 },
{ slug: "/400g-qsfp56-dd/", categoryId: 5, formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
{ slug: "/400g-osfp/", categoryId: 6, formFactor: "OSFP", speed: "400G", speedGbps: 400 },
{ slug: "/400g-qsfp112/", categoryId: 7, formFactor: "QSFP112", speed: "400G", speedGbps: 400 },
{ slug: "/800g-osfp/", categoryId: 9, formFactor: "OSFP", speed: "800G", speedGbps: 800 },
{ slug: "/800g-qsfp-dd800-200g-per-line/", categoryId: 121, formFactor: "QSFP-DD", speed: "800G", speedGbps: 800 },
{ slug: "/800g-qsfp112-dd/", categoryId: 0, formFactor: "QSFP112", speed: "800G", speedGbps: 800 },
{ slug: "/50g-sfp56/", categoryId: 0, formFactor: "SFP56", speed: "50G", speedGbps: 50 },
{ slug: "/16g-sfp/", categoryId: 0, formFactor: "SFP", speed: "16G", speedGbps: 16 },
];
interface Product { interface Product {
partNumber: string; partNumber: string;
name: string; name: string;
url: string; url: string;
price?: number;
formFactor: string; formFactor: string;
speed: string; speed: string;
speedGbps: number; speedGbps: number;
@ -42,37 +58,29 @@ function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms)); return new Promise((resolve) => setTimeout(resolve, ms));
} }
function detectFormFactor(text: string): { formFactor: string; speed: string; speedGbps: number } {
const lower = text.toLowerCase();
if (lower.includes("osfp") && !lower.includes("qsfp")) return { formFactor: "OSFP", speed: "400G", speedGbps: 400 };
if (lower.includes("qsfp-dd")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
if (lower.includes("qsfp28")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
if (lower.includes("qsfp+") || lower.includes("qsfp plus")) return { formFactor: "QSFP+", speed: "40G", speedGbps: 40 };
if (lower.includes("sfp56")) return { formFactor: "SFP56", speed: "50G", speedGbps: 50 };
if (lower.includes("sfp28") || lower.includes("25g")) return { formFactor: "SFP28", speed: "25G", speedGbps: 25 };
if (lower.includes("sfp+") || lower.includes("10gbase") || lower.includes("10g")) return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
if (lower.includes("xfp")) return { formFactor: "XFP", speed: "10G", speedGbps: 10 };
if (lower.includes("1000base") || lower.includes("1g")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
if (lower.includes("sfp") && !lower.includes("qsfp")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
}
function detectReach(text: string): { label: string; meters: number } | undefined { function detectReach(text: string): { label: string; meters: number } | undefined {
const t = text.toUpperCase();
const patterns: [RegExp, string, number][] = [ const patterns: [RegExp, string, number][] = [
[/\b80\s*km\b/i, "80km", 80000], [/\b120\s*KM\b/, "120km", 120000],
[/\b40\s*km\b/i, "40km", 40000], [/\b80\s*KM\b/, "80km", 80000],
[/\b20\s*km\b/i, "20km", 20000], [/\b70\s*KM\b/, "70km", 70000],
[/\b10\s*km\b/i, "10km", 10000], [/\b60\s*KM\b/, "60km", 60000],
[/\b2\s*km\b/i, "2km", 2000], [/\b40\s*KM\b/, "40km", 40000],
[/\b550\s*m\b/i, "550m", 550], [/\b20\s*KM\b/, "20km", 20000],
[/\b300\s*m\b/i, "300m", 300], [/\b10\s*KM\b/, "10km", 10000],
[/\b100\s*m\b/i, "100m", 100], [/\b5\s*KM\b/, "5km", 5000],
[/\b2\s*KM\b/, "2km", 2000],
[/\b550\s*M\b/, "550m", 550],
[/\b500\s*M\b/, "500m", 500],
[/\b300\s*M\b/, "300m", 300],
[/\b220\s*M\b/, "220m", 220],
[/\b100\s*M\b/, "100m", 100],
[/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000], [/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000],
[/\bER4?\b/, "40km", 40000], [/\bZR4?\b/, "80km", 80000], [/\bER\b/, "40km", 40000], [/\bZR\b/, "80km", 80000],
[/\bSR4?\b/, "300m", 300], [/\bDR4?\b/, "500m", 500], [/\bFR4?\b/, "2km", 2000], [/\bSR\b/, "300m", 300], [/\bDR\b/, "500m", 500], [/\bFR\b/, "2km", 2000],
]; ];
for (const [regex, label, meters] of patterns) { for (const [regex, label, meters] of patterns) {
if (regex.test(text)) return { label, meters }; if (regex.test(t)) return { label, meters };
} }
return undefined; return undefined;
} }
@ -89,96 +97,81 @@ function detectWavelength(text: string): string {
return match ? match[1] : ""; return match ? match[1] : "";
} }
function parseProductList(html: string, sourceUrl: string): Product[] { /** Fetch category ID from a sub-page's data-cid attribute (fallback for categoryId=0 entries) */
const $ = cheerio.load(html); async function fetchCategoryId(slug: string): Promise<number> {
try {
const resp = await fetch(BASE + slug, {
headers: { ...HEADERS, "X-Requested-With": "" },
signal: AbortSignal.timeout(15000),
});
if (!resp.ok) return 0;
const html = await resp.text();
const m = html.match(/data-cid="(\d+)"/);
return m ? parseInt(m[1]) : 0;
} catch {
return 0;
}
}
/** Fetch product table HTML for a category via the JSON API */
async function fetchProductTable(categoryId: number, slug: string): Promise<string> {
const url = `${BASE}/product-list?is_render=1&category_id=${categoryId}`;
const resp = await fetch(url, {
headers: { ...HEADERS, Referer: BASE + slug },
signal: AbortSignal.timeout(30000),
});
if (!resp.ok) throw new Error(`HTTP ${resp.status} for /product-list?category_id=${categoryId}`);
const data = await resp.json() as { product_table_list?: string };
return data.product_table_list ?? "";
}
/** Parse HTML table returned by /product-list API */
function parseProductTable(
tableHtml: string,
cat: typeof CATEGORIES[number],
): Product[] {
const $ = cheerio.load(tableHtml);
const products: Product[] = []; const products: Product[] = [];
const cardSelectors = [ $("tr").each((_i, row) => {
".product-item", ".product", ".item", "li.product", const cells = $(row).find("td");
".product-card", "tr", "article", ".catalog-item", if (cells.length < 3) return;
".product-list-item", ".result",
];
for (const sel of cardSelectors) { // Column layout: [image] [part_number] [description] [data_rate] [distance] [wavelength] [connector] [datasheet] [quote]
if ($(sel).length >= 2) { const partNumberCell = $(cells[1]);
$(sel).each((_i, el) => { const descCell = $(cells[2]);
const text = $(el).text().trim();
if (!/sfp|qsfp|xfp|transceiver|optic/i.test(text)) return;
const nameEl = $(el).find("h2, h3, h4, .name, .product-name, .title, td, a").first(); const rawPart = partNumberCell.text().trim();
const name = nameEl.text().trim() || text.slice(0, 120); const desc = descCell.text().trim();
if (!name || name.length < 5) return;
const linkEl = $(el).find("a[href]").first(); // Skip header rows and non-product rows
const href = linkEl.attr("href") || sourceUrl; if (!rawPart || rawPart.length < 3 || /part\s*no|description/i.test(rawPart)) return;
const url = href.startsWith("http") ? href : BASE + href; if (rawPart.length > 80) return;
// Ascent Optics part numbers: e.g. AS-SFP-10G-SR, SFP-10G-LR-AS const url = (() => {
const partNumMatch = name.match(/\b(AS[-_][A-Z0-9-]+)\b/i) || const a = partNumberCell.find("a[href]").first().attr("href") ?? descCell.find("a[href]").first().attr("href");
name.match(/\b([A-Z]{2,}[-][A-Z0-9]+[-][A-Z0-9]+[-][A-Z0-9]+)\b/) || if (!a) return BASE + cat.slug;
text.match(/Part\s*(?:No\.?|Number|#)?\s*:?\s*([A-Z0-9-]{6,})/i); return a.startsWith("http") ? a : BASE + a;
const partNumber = partNumMatch?.[1] || })();
name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] ||
name.replace(/\s+/g, "-").slice(0, 60);
// USD price const combined = `${rawPart} ${desc}`;
const priceText = $(el).find(".price, .product-price, .amount, [data-price]").text(); const reach = detectReach(combined);
const priceMatch = priceText.match(/\$\s*([\d,]+\.?\d{0,2})/);
let price: number | undefined;
if (priceMatch) {
const parsed = parseFloat(priceMatch[1].replace(",", ""));
if (parsed > 0 && parsed < 50000) price = parsed;
}
const ff = detectFormFactor(name + " " + text);
const reach = detectReach(name + " " + text);
products.push({ products.push({
partNumber, partNumber: rawPart,
name, name: desc || rawPart,
url, url,
price, formFactor: cat.formFactor,
...ff, speed: cat.speed,
speedGbps: cat.speedGbps,
reachLabel: reach?.label, reachLabel: reach?.label,
reachMeters: reach?.meters, reachMeters: reach?.meters,
fiberType: detectFiber(name + " " + text), fiberType: detectFiber(combined),
wavelength: detectWavelength(name + " " + text), wavelength: detectWavelength(combined),
}); });
}); });
if (products.length > 0) break;
}
}
// Fallback: transceiver-relevant anchors return products;
if (products.length === 0) {
$("a[href]").each((_i, el) => {
const name = $(el).text().trim();
const href = $(el).attr("href") || "";
if (name.length < 8 || name.length > 200 || !/sfp|qsfp|transceiver/i.test(name)) return;
const url = href.startsWith("http") ? href : BASE + href;
const ff = detectFormFactor(name);
const reach = detectReach(name);
products.push({
partNumber: name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] || name.replace(/\s+/g, "-").slice(0, 60),
name, url, ...ff,
reachLabel: reach?.label, reachMeters: reach?.meters,
fiberType: detectFiber(name), wavelength: detectWavelength(name),
});
});
}
const seen = new Set<string>();
return products.filter((p) => {
if (!p.url || seen.has(p.url)) return false;
seen.add(p.url);
return true;
});
}
async function fetchPage(url: string): Promise<string> {
const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) });
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
return resp.text();
} }
export async function scrapeAscentOptics(): Promise<void> { export async function scrapeAscentOptics(): Promise<void> {
@ -188,57 +181,34 @@ export async function scrapeAscentOptics(): Promise<void> {
"Ascent Optics", "Ascent Optics",
"compatible", "compatible",
"https://ascentoptics.com", "https://ascentoptics.com",
BASE + CATALOG_URLS[0], BASE + "/optical-transceivers/",
); );
const allProducts: Product[] = [];
const seenUrls = new Set<string>();
const triedUrls = new Set<string>();
for (const catalogPath of CATALOG_URLS) {
const catalogUrl = BASE + catalogPath;
if (triedUrls.has(catalogUrl)) continue;
triedUrls.add(catalogUrl);
console.log(` Fetching catalog: ${catalogUrl}`);
try {
const html = await fetchPage(catalogUrl);
const pageProducts = parseProductList(html, catalogUrl);
for (const p of pageProducts) {
if (!seenUrls.has(p.url)) { seenUrls.add(p.url); allProducts.push(p); }
}
console.log(` Found ${pageProducts.length} products`);
await sleep(2000);
// Paginate from each working catalog URL
for (let page = 2; page <= MAX_PAGES; page++) {
const pageUrl = `${catalogUrl}?page=${page}`;
try {
const pageHtml = await fetchPage(pageUrl);
const paginated = parseProductList(pageHtml, pageUrl);
if (paginated.length === 0) break;
for (const p of paginated) {
if (!seenUrls.has(p.url)) { seenUrls.add(p.url); allProducts.push(p); }
}
console.log(` Page ${page}: ${paginated.length} products`);
await sleep(2000);
} catch {
break;
}
}
} catch (err) {
console.warn(` Failed: ${(err as Error).message}`);
}
}
console.log(`\nTotal unique products: ${allProducts.length}`);
let totalProducts = 0; let totalProducts = 0;
let priceUpdates = 0;
for (const product of allProducts) { for (const cat of CATEGORIES) {
let cid = cat.categoryId;
// Resolve unknown category IDs dynamically
if (cid === 0) {
cid = await fetchCategoryId(cat.slug);
if (cid === 0) {
console.log(` Skipping ${cat.slug} — category ID not found`);
await sleep(1000);
continue;
}
}
console.log(`\n--- ${cat.formFactor} (${cat.speed}) [cid=${cid}] ---`);
try { try {
const txId = await findOrCreateScrapedTransceiver({ const tableHtml = await fetchProductTable(cid, cat.slug);
const products = parseProductTable(tableHtml, cat);
console.log(` Found ${products.length} products`);
for (const product of products) {
try {
await findOrCreateScrapedTransceiver({
partNumber: product.partNumber, partNumber: product.partNumber,
vendorId, vendorId,
formFactor: product.formFactor, formFactor: product.formFactor,
@ -250,27 +220,19 @@ export async function scrapeAscentOptics(): Promise<void> {
wavelengths: product.wavelength, wavelengths: product.wavelength,
category: "DataCenter", category: "DataCenter",
}); });
if (product.price && product.price > 0) {
const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber }));
const updated = await upsertPriceObservation({
transceiverId: txId,
sourceVendorId: vendorId,
price: product.price,
currency: "USD",
stockLevel: "in_stock",
url: product.url,
contentHash: hash,
});
if (updated) priceUpdates++;
}
totalProducts++; totalProducts++;
} catch (err) { } catch (err) {
console.warn(` Error saving ${product.partNumber}: ${(err as Error).message.slice(0, 80)}`); console.warn(` Error: ${(err as Error).message.slice(0, 80)}`);
} }
} }
} catch (err) {
console.error(` Category failed: ${(err as Error).message}`);
}
console.log(`\n=== Ascent Optics Complete: ${totalProducts} products, ${priceUpdates} prices ===`); await sleep(2000);
}
console.log(`\n=== Ascent Optics Complete: ${totalProducts} products (catalog only — no pricing) ===`);
} }
if (require.main === module) { if (require.main === module) {

View File

@ -168,6 +168,7 @@ export async function scrapeAtgbics(): Promise<void> {
maxRequestsPerCrawl: MAX_PAGES, maxRequestsPerCrawl: MAX_PAGES,
requestHandlerTimeoutSecs: 60, requestHandlerTimeoutSecs: 60,
headless: true, headless: true,
useSessionPool: false, // Disable session pool to avoid SDK_SESSION_POOL_STATE.json crash
...(proxyConfiguration ? { proxyConfiguration } : {}), ...(proxyConfiguration ? { proxyConfiguration } : {}),
launchContext: { launchContext: {
launchOptions: { launchOptions: {

View File

@ -285,7 +285,7 @@ async function saveEnrichment(switchId: string, result: EnrichResult): Promise<v
// Find eBay vendor ID (create if needed) // Find eBay vendor ID (create if needed)
const ebayVendorResult = await pool.query( const ebayVendorResult = await pool.query(
`INSERT INTO vendors (name, slug, type, website_url) `INSERT INTO vendors (name, slug, type, website)
VALUES ('eBay Marketplace', 'ebay', 'marketplace', 'https://www.ebay.de') VALUES ('eBay Marketplace', 'ebay', 'marketplace', 'https://www.ebay.de')
ON CONFLICT (slug) DO UPDATE SET name = EXCLUDED.name ON CONFLICT (slug) DO UPDATE SET name = EXCLUDED.name
RETURNING id` RETURNING id`
@ -353,7 +353,7 @@ export async function enrichTransceiversFromEbay(limit = 50): Promise<void> {
// Find eBay vendor // Find eBay vendor
const ebayVendor = await pool.query( const ebayVendor = await pool.query(
`INSERT INTO vendors (name, slug, type, website_url) `INSERT INTO vendors (name, slug, type, website)
VALUES ('eBay Marketplace', 'ebay', 'marketplace', 'https://www.ebay.de') VALUES ('eBay Marketplace', 'ebay', 'marketplace', 'https://www.ebay.de')
ON CONFLICT (slug) DO UPDATE SET updated_at = NOW() ON CONFLICT (slug) DO UPDATE SET updated_at = NOW()
RETURNING id` RETURNING id`

View File

@ -114,13 +114,24 @@ function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product
// Collapse whitespace for easier regex matching // Collapse whitespace for easier regex matching
const collapsed = html.replace(/\s+/g, " "); const collapsed = html.replace(/\s+/g, " ");
// BigCommerce card-title pattern: // BigCommerce card pattern (attribute order varies by theme version):
// <a aria-label="Product Name, £XX.XX" href="URL" data-event-type="product-click"> // Old: <a aria-label="Name, £XX.XX" href="URL" data-event-type="product-click">
const productRegex = /aria-label="([^"]+)"\s+href="(https?:\/\/(?:www\.)?gbics\.com\/[^"]+)"[^>]*data-event-type="product-click"/gi; // New: <a href="URL" class="card-figure__link..." aria-label="Name, £XX.XX">
// Two-pass approach: find all product <a> tags regardless of attribute order
const productRegex = /href="(https?:\/\/(?:www\.)?gbics\.com\/[^"]+)"[^>]*aria-label="([^"]+)"/gi;
const productRegex2 = /aria-label="([^"]+)"[^>]*href="(https?:\/\/(?:www\.)?gbics\.com\/[^"]+)"/gi;
let match; let match;
const rawMatches: { url: string; label: string; index: number }[] = [];
while ((match = productRegex.exec(collapsed)) !== null) { while ((match = productRegex.exec(collapsed)) !== null) {
const label = match[1].trim(); rawMatches.push({ url: match[1].trim(), label: match[2].trim(), index: match.index });
const url = match[2]; }
if (rawMatches.length === 0) {
while ((match = productRegex2.exec(collapsed)) !== null) {
rawMatches.push({ url: match[2].trim(), label: match[1].trim(), index: match.index });
}
}
for (const { url, label: rawLabel, index: matchIndex } of rawMatches) {
const label = rawLabel.replace(/\s+/g, " ").trim();
// aria-label contains "Product Name, £XX.XX" // aria-label contains "Product Name, £XX.XX"
// Split on last comma to separate name and price // Split on last comma to separate name and price
@ -130,7 +141,7 @@ function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product
// Fallback: extract price from data-price-asc attribute on parent <li> // Fallback: extract price from data-price-asc attribute on parent <li>
if (!price) { if (!price) {
const priceContext = collapsed.slice(Math.max(0, match.index - 500), match.index); const priceContext = collapsed.slice(Math.max(0, matchIndex - 500), matchIndex);
const dataPriceMatch = priceContext.match(/data-price-asc="(\d+)"/); const dataPriceMatch = priceContext.match(/data-price-asc="(\d+)"/);
if (dataPriceMatch) price = parseFloat(dataPriceMatch[1]); if (dataPriceMatch) price = parseFloat(dataPriceMatch[1]);
} }

View File

@ -235,17 +235,20 @@ export async function scrapeSkylane(): Promise<void> {
} }
} }
// Try pagination for this URL // Try pagination — break as soon as no NEW unique product URLs appear
// (Skylane uses Algolia/InstantSearch: ?page=N returns same content, so
// content-based termination is required instead of empty-page detection)
for (let page = 2; page <= MAX_PAGES; page++) { for (let page = 2; page <= MAX_PAGES; page++) {
const pageUrl = `${catalogUrl}?page=${page}`; const pageUrl = `${catalogUrl}?page=${page}`;
try { try {
const pageHtml = await fetchPage(pageUrl); const pageHtml = await fetchPage(pageUrl);
const paginated = parseProductList(pageHtml, pageUrl); const paginated = parseProductList(pageHtml, pageUrl);
if (paginated.length === 0) break; const newCount = paginated.filter((p) => !seenUrls.has(p.url)).length;
if (newCount === 0) break; // No genuinely new products → end of real pagination
for (const p of paginated) { for (const p of paginated) {
if (!seenUrls.has(p.url)) { seenUrls.add(p.url); allProducts.push(p); } if (!seenUrls.has(p.url)) { seenUrls.add(p.url); allProducts.push(p); }
} }
console.log(` Page ${page}: ${paginated.length} products`); console.log(` Page ${page}: ${newCount} new products`);
await sleep(2000); await sleep(2000);
} catch { } catch {
break; break;

View File

@ -4,7 +4,7 @@
set -e set -e
ERIK="root@217.154.82.179" ERIK="root@82.165.222.127"
REMOTE_PATH="/opt/tip" REMOTE_PATH="/opt/tip"
LOCAL_PATH="$(cd "$(dirname "$0")/.." && pwd)" LOCAL_PATH="$(cd "$(dirname "$0")/.." && pwd)"