diff --git a/packages/api/src/db/queries.ts b/packages/api/src/db/queries.ts index 07f243a..9f5fe89 100644 --- a/packages/api/src/db/queries.ts +++ b/packages/api/src/db/queries.ts @@ -13,6 +13,7 @@ export interface SearchParams { coherent?: boolean; market_status?: string; vendor?: string; + verified?: "price" | "image" | "details" | "full"; limit?: number; offset?: number; } @@ -82,6 +83,10 @@ export async function searchTransceivers(params: SearchParams) { values.push(`%${params.vendor}%`); idx++; } + if (params.verified) { + const col = params.verified === "full" ? "fully_verified" : params.verified + "_verified"; + conditions.push(`t.${col} = true`); + } const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : ""; const limit = params.limit || 50; diff --git a/packages/api/src/llm/fo-blog-pipeline.ts b/packages/api/src/llm/fo-blog-pipeline.ts index e4eda4e..16c2572 100644 --- a/packages/api/src/llm/fo-blog-pipeline.ts +++ b/packages/api/src/llm/fo-blog-pipeline.ts @@ -304,7 +304,47 @@ Given the topic below, expand it into: Topic: {{TOPIC}} -Keep it practical, not theoretical. Think about what actually goes wrong in production.`; +Keep it practical, not theoretical. Think about what actually goes wrong in production.{{ADDITIONAL_CONTEXT}}`; + +// ═══════════════════════════════════════════════════════ +// STEP HEADLINE: Generate a compelling article headline +// (runs AFTER full article is written — never copies user input) +// ═══════════════════════════════════════════════════════ + +export const STEP_HEADLINE_GENERATION = `You are writing a headline for a Flexoptix technical blog article. + +Read the article below and generate ONE headline. + +RULES — NON-NEGOTIABLE: +- Maximum 10 words +- Active voice, no adverbs +- No buzzwords: no "game-changer", "revolutionize", "deep dive", "exploring", "essential" +- No generic openers: "The Real X", "Why X Matters", "A Guide to X", "Everything You Need to Know" +- No question headlines (they underperform on technical audiences) +- Do NOT copy any phrase from the article verbatim +- Do NOT echo back the topic input or any context that was provided +- Must signal a specific insight, counterintuitive finding, or operational reality +- Must make an engineer stop scrolling — specific, surprising, or blunt +- Colon allowed ONLY if both halves are strong standalone phrases + +GOOD examples: + "400G Does Not Fail in Design. It Fails in Production." + "Your OTDR Is Lying to You" + "Third-Party Optics: The Risk Is Real, but It's Not What You Think" + "RPKI Fixed Origin Validation. Path Security Is Still Broken." + "InfiniBand Scales to 400,000 GPUs. Ethernet Does Not." + +BAD examples (never produce these): + "Why 400G Migration Matters for Your Network" — generic + "Deep Dive: Understanding Optical Transceivers" — buzzword + filler + "The Complete Guide to OTDR Testing" — listicle-style + "Exploring the Challenges of Coherent Optics in 2026" — journalist fluff + "Key Takeaways from Our Latest Blog Post" — never + +Return ONLY the headline text. No quotes. No commentary. No "Here is your headline:". + +Article: +{{ARTICLE}}`; // ═══════════════════════════════════════════════════════ // STEP 2: ANGLE SELECTION diff --git a/packages/api/src/routes/blog-sll.ts b/packages/api/src/routes/blog-sll.ts index 1a9b660..dc82d78 100644 --- a/packages/api/src/routes/blog-sll.ts +++ b/packages/api/src/routes/blog-sll.ts @@ -6,6 +6,8 @@ * GET /api/blog/sll/insights — current learning state * POST /api/blog/sll/analyze — trigger LLM pattern extraction * GET /api/blog/sll/patterns — all learned patterns + * GET /api/blog/sll/posting-time — best posting time (Umami + SLL combined) + * POST /api/blog/sll/sync-umami — refresh Umami analytics cache */ import { Router, Request, Response } from "express"; @@ -173,6 +175,202 @@ blogSllRouter.get("/sll/insights", async (_req: Request, res: Response) => { } }); +// ───────────────────────────────────────────────────────────────── +// In-memory Umami cache (TTL 1h — single PM2 process) +// ───────────────────────────────────────────────────────────────── +interface UmamiSlot { weekday: number; hour: number; sessions: number } +let umamiCache: { slots: UmamiSlot[]; fetchedAt: number } | null = null; +const UMAMI_TTL_MS = 60 * 60 * 1000; // 1h + +const UMAMI_URL = process.env["UMAMI_URL"] ?? "https://analytics.fichtmueller.org"; +const UMAMI_USER = process.env["UMAMI_USER"] ?? "admin"; +const UMAMI_PASS = process.env["UMAMI_PASS"] ?? ""; +const UMAMI_WEBSITE = process.env["UMAMI_WEBSITE_ID"] ?? "c737bf75-ccc4-463b-992a-13bed31d7f43"; + +const DAY_NAMES = ["Mo","Di","Mi","Do","Fr","Sa","So"]; + +async function fetchUmamiToken(): Promise { + try { + const r = await fetch(`${UMAMI_URL}/api/auth/login`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ username: UMAMI_USER, password: UMAMI_PASS }), + signal: AbortSignal.timeout(8000), + }); + const d = await r.json() as { token?: string }; + return d.token ?? null; + } catch { return null; } +} + +async function fetchUmamiSlots(): Promise { + // Return cache if fresh + if (umamiCache && Date.now() - umamiCache.fetchedAt < UMAMI_TTL_MS) { + return umamiCache.slots; + } + + const token = await fetchUmamiToken(); + if (!token) return []; + + const startAt = Date.now() - 90 * 24 * 60 * 60 * 1000; + const endAt = Date.now(); + const url = `${UMAMI_URL}/api/websites/${UMAMI_WEBSITE}/sessions?startAt=${startAt}&endAt=${endAt}&pageSize=500&page=1`; + + try { + const r = await fetch(url, { + headers: { Authorization: `Bearer ${token}` }, + signal: AbortSignal.timeout(15000), + }); + const d = await r.json() as { data?: Array<{ firstAt?: string }> }; + const sessions = d.data ?? []; + + // Build (weekday, hour) counts — Berlin = UTC+2 in April/summer + const counts: Record = {}; + for (const s of sessions) { + if (!s.firstAt) continue; + const dt = new Date(s.firstAt); + const berlinH = (dt.getUTCHours() + 2) % 24; + const berlinWd = dt.getUTCDay() === 0 ? 6 : dt.getUTCDay() - 1; // 0=Mon + const key = `${berlinWd}:${berlinH}`; + counts[key] = (counts[key] ?? 0) + 1; + } + + const slots: UmamiSlot[] = Object.entries(counts).map(([key, n]) => { + const [wd, h] = key.split(":").map(Number); + return { weekday: wd, hour: h, sessions: n }; + }); + + umamiCache = { slots, fetchedAt: Date.now() }; + return slots; + } catch { return []; } +} + +// ───────────────────────────────────────────────────────────────── +// GET /api/blog/sll/posting-time — best posting time +// Combines Umami traffic data + SLL historical engagement by slot +// ───────────────────────────────────────────────────────────────── +blogSllRouter.get("/sll/posting-time", async (_req: Request, res: Response) => { + try { + // 1. Umami: traffic per (weekday, hour) + const umamiSlots = await fetchUmamiSlots(); + const umamiMax = Math.max(1, ...umamiSlots.map((s) => s.sessions)); + + // 2. SLL: avg engagement per (weekday, hour) from historical posts + const sllRes = await pool.query(` + SELECT + EXTRACT(DOW FROM posted_at AT TIME ZONE 'UTC' AT TIME ZONE 'Europe/Berlin')::int AS wd_raw, + EXTRACT(HOUR FROM posted_at AT TIME ZONE 'UTC' AT TIME ZONE 'Europe/Berlin')::int AS hour, + AVG(engagement_score) AS avg_eng, + MAX(engagement_score) AS best_eng, + COUNT(*) AS post_count + FROM blog_performance + WHERE posted_at IS NOT NULL AND engagement_score IS NOT NULL + GROUP BY wd_raw, hour + `); + + // Convert Sunday=0 (PostgreSQL DOW) to Monday=0 index + const sllMap: Record = {}; + for (const row of sllRes.rows) { + const wd = row.wd_raw === 0 ? 6 : Number(row.wd_raw) - 1; // Mon=0 + const key = `${wd}:${row.hour}`; + sllMap[key] = { + avgEng: Math.round(Number(row.avg_eng) * 10) / 10, + bestEng: Number(row.best_eng), + count: Number(row.post_count), + }; + } + const sllMax = Math.max(1, ...Object.values(sllMap).map((v) => v.avgEng)); + + // 3. Build candidate slots (union of Umami + SLL slots) + const allKeys = new Set([ + ...umamiSlots.map((s) => `${s.weekday}:${s.hour}`), + ...Object.keys(sllMap), + ]); + + const scored = Array.from(allKeys).map((key) => { + const [wd, h] = key.split(":").map(Number); + const umami = umamiSlots.find((s) => s.weekday === wd && s.hour === h); + const sll = sllMap[key]; + + const umamiScore = umami ? umami.sessions / umamiMax : 0; + const sllScore = sll ? sll.avgEng / sllMax : 0; + + // Weight: 50% Umami traffic + 50% SLL engagement + // If no SLL data → 100% Umami; if no Umami → 100% SLL + const hasUmami = !!umami; + const hasSll = !!sll; + let combined: number; + if (hasUmami && hasSll) { + combined = umamiScore * 0.5 + sllScore * 0.5; + } else if (hasUmami) { + combined = umamiScore * 0.7; // penalise slots with no SLL validation + } else { + combined = sllScore * 0.6; // SLL-only slots get a slight boost + } + + return { + weekday: wd, + hour: h, + label: `${DAY_NAMES[wd]} ${String(h).padStart(2, "0")}:00h`, + score: Math.round(combined * 100), + umami_sessions: umami?.sessions ?? 0, + sll_avg_engagement: sll?.avgEng ?? null, + sll_best_engagement: sll?.bestEng ?? null, + sll_post_count: sll?.count ?? 0, + data_sources: [hasUmami ? "umami" : null, hasSll ? "sll" : null].filter(Boolean), + }; + }); + + // Sort by score descending + scored.sort((a, b) => b.score - a.score); + const top = scored.slice(0, 10); + + // Build weekday summary (best hour per weekday) + const byWeekday: Record = {}; + for (const slot of scored) { + if (!byWeekday[slot.weekday] || slot.score > byWeekday[slot.weekday].score) { + byWeekday[slot.weekday] = slot; + } + } + const weekdaySummary = DAY_NAMES.map((name, wd) => ({ + weekday: wd, + name, + best_slot: byWeekday[wd] ?? null, + })); + + res.json({ + success: true, + top_slots: top, + weekday_summary: weekdaySummary, + recommended: top[0] ?? null, + data_sources: { + umami_sessions_analyzed: umamiSlots.reduce((s, x) => s + x.sessions, 0), + umami_cache_age_min: umamiCache ? Math.round((Date.now() - umamiCache.fetchedAt) / 60000) : null, + sll_posts_with_time: sllRes.rows.length, + }, + note: sllRes.rows.length === 0 + ? "SLL has no timed posts yet — using Umami traffic data only" + : `Combined Umami + ${sllRes.rows.length} SLL engagement data point(s)`, + }); + } catch (err) { + console.error("posting-time error:", err); + res.status(500).json({ success: false, error: String(err) }); + } +}); + +// ───────────────────────────────────────────────────────────────── +// POST /api/blog/sll/sync-umami — force-refresh Umami cache +// ───────────────────────────────────────────────────────────────── +blogSllRouter.post("/sll/sync-umami", async (_req: Request, res: Response) => { + umamiCache = null; // invalidate + const slots = await fetchUmamiSlots(); + res.json({ + success: slots.length > 0, + slots_loaded: slots.length, + total_sessions: slots.reduce((s, x) => s + x.sessions, 0), + message: slots.length > 0 ? "Umami cache refreshed" : "Umami unreachable — check credentials", + }); +}); + // ───────────────────────────────────────────────────────────────── // GET /api/blog/sll/patterns — all learned patterns // ───────────────────────────────────────────────────────────────── diff --git a/packages/api/src/routes/blog.ts b/packages/api/src/routes/blog.ts index e764cc2..be409e7 100644 --- a/packages/api/src/routes/blog.ts +++ b/packages/api/src/routes/blog.ts @@ -956,10 +956,11 @@ async function enqueueLlmPipeline( selectedTopic: string, targetAudience: string, data: Awaited>, + additionalContext?: string, ): Promise { return new Promise((resolve) => { llmQueue.push(async () => { - await runLlmPipeline(draftId, title, selectedTopic, targetAudience, data); + await runLlmPipeline(draftId, title, selectedTopic, targetAudience, data, additionalContext); resolve(); }); processLlmQueue(); @@ -989,6 +990,7 @@ async function runLlmPipeline( selectedTopic: string, targetAudience: string, data: Awaited>, + additionalContext?: string, ): Promise { // Lazy-load the new FO pipeline const { @@ -1009,6 +1011,7 @@ async function runLlmPipeline( STEP9_QA_CHECK, STEP10_QUALITY_SCORE, STEP_LINKEDIN_POST, + STEP_HEADLINE_GENERATION, BLOG_TYPES, buildFeedbackContext, withCalibration, @@ -1091,7 +1094,11 @@ async function runLlmPipeline( console.log(" Step 1/10: Topic Expansion..."); setProgress(draftId, 1, "Step 1/10: Topic Expansion"); const step1 = await generate(systemPrompt, - STEP1_TOPIC_EXPANSION.replace("{{TOPIC}}", title), + STEP1_TOPIC_EXPANSION + .replace("{{TOPIC}}", title) + .replace("{{ADDITIONAL_CONTEXT}}", additionalContext + ? `\n\n---\nBACKGROUND REFERENCE (editorial context — use as factual direction ONLY):\n${additionalContext}\n\nCRITICAL: Do NOT copy any phrase, sentence, or wording from the above into the article or any step output. It is context for your understanding, not source material.` + : ""), LLM_OPTS ); stepsCompleted = 1; @@ -1281,6 +1288,26 @@ async function runLlmPipeline( } stepsCompleted = 16; + // ═══ Headline Generation (post-article — never copies user input) ═══ + console.log(" Step 17: Headline Generation..."); + let generatedHeadline: string | null = null; + try { + const headlineResult = await generate(systemPrompt, + STEP_HEADLINE_GENERATION.replace("{{ARTICLE}}", step9.text), + { temperature: 0.5, maxTokens: 64, timeoutMs: 60000 } + ); + const rawHeadline = headlineResult.text.trim().replace(/^["']|["']$/g, ""); + // Sanity check: must be non-empty, ≤120 chars, and not start with "Here is" + if (rawHeadline.length > 5 && rawHeadline.length <= 120 && !rawHeadline.toLowerCase().startsWith("here is")) { + generatedHeadline = rawHeadline; + console.log(` Generated headline: "${generatedHeadline}"`); + } + } catch { + console.log(" Headline generation skipped"); + } + // Use generated headline if valid, fall back to original title + const finalTitle = generatedHeadline || title; + // Extract only the article from STEP9 output (QA returns review + fixed article) // Look for "COMPLETE FIXED ARTICLE" marker and take everything after it let finalArticleText = step9.text; @@ -1308,14 +1335,14 @@ async function runLlmPipeline( .join("\n") .trim(); - const draftContent = `# ${title}\n\n${finalArticleText}`; + const draftContent = `# ${finalTitle}\n\n${finalArticleText}`; const wordCount = draftContent.split(/\s+/).length; const finalIssues = validateArticle(draftContent); - // Update the draft in DB + // Update the draft in DB (title updated to generated headline if available) await pool.query( `UPDATE blog_drafts - SET draft_content = $1, word_count = $2, + SET title = $9, draft_content = $1, word_count = $2, generated_by = 'fo-blog-engine-v5-autokill', pipeline_version = 'v5-auto-kill-layer', pipeline_steps_completed = $3, @@ -1342,6 +1369,7 @@ async function runLlmPipeline( linkedinPost, linkedinCharCount, draftId, + finalTitle, ], ); @@ -1377,12 +1405,16 @@ async function runLlmPipeline( // POST /api/blog/generate — Generate a new blog draft (returns immediately, LLM runs async) blogRouter.post("/generate", async (req: Request, res: Response) => { - const { topic, speed, form_factor, use_case, use_llm } = req.body as { + const { topic, speed, form_factor, use_case, use_llm, custom_title, additional_context } = req.body as { topic?: string; speed?: string; form_factor?: string; use_case?: string; use_llm?: boolean; + /** Override the auto-selected template title with a specific topic/title */ + custom_title?: string; + /** Background context for the LLM — used as factual direction ONLY, never copied verbatim into the article */ + additional_context?: string; }; const selectedTopic = topic || "tutorial"; @@ -1400,7 +1432,8 @@ blogRouter.post("/generate", async (req: Request, res: Response) => { const year = new Date().getFullYear(); const template = templates[Math.floor(Math.random() * templates.length)]; - const title = template.title + // custom_title overrides the template title — LLM will still generate a better headline at the end + const title = custom_title || template.title .replace("{YEAR}", String(year)) .replace("{SPEED}", speed || "400G/800G") .replace("{FORM_FACTOR}", form_factor || "QSFP-DD/OSFP") @@ -1453,7 +1486,7 @@ blogRouter.post("/generate", async (req: Request, res: Response) => { console.log(`Blog LLM: Using ${health.model} — enhancing draft ${draftId} in background`); llmStarted = true; // Fire-and-forget: LLM pipeline queued, updates draft when done - enqueueLlmPipeline(draftId, title, selectedTopic, template.target_audience, data).catch((err) => { + enqueueLlmPipeline(draftId, title, selectedTopic, template.target_audience, data, additional_context).catch((err) => { console.error(`Blog LLM background pipeline error: ${(err as Error).message}`); }); } @@ -1492,7 +1525,8 @@ blogRouter.post("/generate", async (req: Request, res: Response) => { blogRouter.get("/", async (_req: Request, res: Response) => { try { const result = await pool.query( - `SELECT id, title, topic, target_audience, status, word_count, seo_keywords, generated_by, created_at + `SELECT id, title, topic, target_audience, status, word_count, seo_keywords, generated_by, + pipeline_steps_completed, linkedin_post, linkedin_char_count, review_tag, created_at FROM blog_drafts ORDER BY created_at DESC LIMIT 50`, @@ -1575,6 +1609,28 @@ blogRouter.put("/:id/status", async (req: Request, res: Response) => { } }); +// PUT /api/blog/:id/review — Toggle reviewed/unreviewed tag +blogRouter.put("/:id/review", async (req: Request, res: Response) => { + try { + const current = await pool.query( + `SELECT review_tag FROM blog_drafts WHERE id = $1::uuid`, + [req.params.id] + ); + if (current.rows.length === 0) { + res.status(404).json({ success: false, error: "Draft not found" }); + return; + } + const newTag = current.rows[0].review_tag === "reviewed" ? null : "reviewed"; + await pool.query( + `UPDATE blog_drafts SET review_tag = $1, updated_at = NOW() WHERE id = $2::uuid`, + [newTag, req.params.id] + ); + res.json({ success: true, review_tag: newTag }); + } catch (err) { + res.status(500).json({ success: false, error: (err as Error).message }); + } +}); + // ═══════════════════════════════════════════════════════ // FEEDBACK SYSTEM (v0.2.0 — FO_Blog_LLM Training Loop) // ═══════════════════════════════════════════════════════ diff --git a/packages/api/src/routes/health.ts b/packages/api/src/routes/health.ts index 3a3fc98..6d7cee9 100644 --- a/packages/api/src/routes/health.ts +++ b/packages/api/src/routes/health.ts @@ -23,6 +23,19 @@ healthRouter.get("/", async (_req: Request, res: Response) => { `).catch(() => ({ rows: [{}] })); const v = verStats.rows[0] || {}; + // Stock observations stats + const stockStats = await pool.query(` + SELECT + COUNT(*) AS total_observations, + COUNT(DISTINCT transceiver_id) AS transceivers_with_stock, + COUNT(DISTINCT source_vendor_id) AS vendors_with_stock, + SUM(warehouse_de_qty) FILTER (WHERE warehouse_de_qty > 0) AS total_de_qty, + SUM(warehouse_global_qty) FILTER (WHERE warehouse_global_qty > 0) AS total_global_qty, + MAX(time) AS last_observation_at + FROM stock_observations + `).catch(() => ({ rows: [{}] })); + const s = stockStats.rows[0] || {}; + res.json({ success: true, status: "healthy", @@ -34,13 +47,21 @@ healthRouter.get("/", async (_req: Request, res: Response) => { stats, }, verification: { - price_verified: Number(v.price_verified || 0), - image_verified: Number(v.image_verified || 0), - details_verified: Number(v.details_verified || 0), - fully_verified: Number(v.fully_verified || 0), - total: Number(v.total || 0), - price_coverage_pct: v.total ? Math.round(Number(v.price_verified) / Number(v.total) * 100) : 0, - fully_verified_pct: v.total ? Math.round(Number(v.fully_verified) / Number(v.total) * 100) : 0, + price_verified: Number(v.price_verified || 0), + image_verified: Number(v.image_verified || 0), + details_verified: Number(v.details_verified || 0), + fully_verified: Number(v.fully_verified || 0), + total: Number(v.total || 0), + price_coverage_pct: v.total ? Math.round(Number(v.price_verified) / Number(v.total) * 100) : 0, + fully_verified_pct: v.total ? Math.round(Number(v.fully_verified) / Number(v.total) * 100) : 0, + }, + stock: { + total_observations: Number(s.total_observations || 0), + transceivers_with_stock: Number(s.transceivers_with_stock || 0), + vendors_with_stock: Number(s.vendors_with_stock || 0), + total_de_qty: Number(s.total_de_qty || 0), + total_global_qty: Number(s.total_global_qty || 0), + last_observation_at: s.last_observation_at ?? null, }, }); } catch (err) { diff --git a/packages/api/src/routes/hot-topics.ts b/packages/api/src/routes/hot-topics.ts index 3452544..35ff876 100644 --- a/packages/api/src/routes/hot-topics.ts +++ b/packages/api/src/routes/hot-topics.ts @@ -24,6 +24,7 @@ interface HotTopic { source_type: "conference" | "manufacturer" | "trade_press" | "research" | "internal_data" | "competitor"; data_context?: Record; suggested_angle?: string; + date?: string; } /** @@ -57,6 +58,7 @@ hotTopicsRouter.get("/", async (_req, res) => { source_type: "competitor", data_context: drop, suggested_angle: `Price war analysis: Why ${drop.vendor} is cutting ${drop.speed_gbps}G pricing and what it means for procurement`, + date: drop.detected_at ? new Date(drop.detected_at).toISOString() : undefined, }); } @@ -82,6 +84,7 @@ hotTopicsRouter.get("/", async (_req, res) => { source_type: "internal_data", data_context: { products: newProducts.rows }, suggested_angle: `Competitor roundup: What ${vendors[0]} and others just launched — and what it means for your next PO`, + date: newProducts.rows[0]?.created_at ? new Date(newProducts.rows[0].created_at).toISOString() : undefined, }); } @@ -157,6 +160,7 @@ hotTopicsRouter.get("/", async (_req, res) => { impact_months: m.impact_horizon_months, }, suggested_angle: `${m.title}: ${angle}`, + date: m.published_at ? new Date(m.published_at).toISOString() : undefined, }); } @@ -192,6 +196,7 @@ hotTopicsRouter.get("/", async (_req, res) => { source_type: "conference", data_context: { talks: (talks as NogRow[]).slice(0, 3) }, suggested_angle: `What ${event} presenters are actually deploying — lessons for your network refresh`, + date: topTalk.published_at ? new Date(topTalk.published_at).toISOString() : undefined, }); } @@ -226,6 +231,7 @@ hotTopicsRouter.get("/", async (_req, res) => { source_type: "trade_press", data_context: { articles: articles.slice(0, 3) }, suggested_angle: `${theme}: What the latest announcements actually mean for network operators`, + date: articles[0]?.published_at ? new Date(articles[0].published_at).toISOString() : undefined, }); } } diff --git a/packages/api/src/routes/transceivers.ts b/packages/api/src/routes/transceivers.ts index fd40107..1a5526e 100644 --- a/packages/api/src/routes/transceivers.ts +++ b/packages/api/src/routes/transceivers.ts @@ -21,6 +21,7 @@ transceiverRouter.get("/", async (req: Request, res: Response) => { coherent: q("coherent") === "true" ? true : q("coherent") === "false" ? false : undefined, market_status: q("market_status"), vendor: q("vendor"), + verified: q("verified") as "price" | "image" | "details" | "full" | undefined, limit: q("limit") ? parseInt(q("limit")!) : 50, offset: q("offset") ? parseInt(q("offset")!) : 0, }); @@ -141,9 +142,31 @@ transceiverRouter.get("/:id", async (req: Request, res: Response) => { } } + // Last time ANY competitor scraper looked at this transceiver (regardless of result) + const lastScanResult = await pool.query( + `SELECT MAX(po.time) AS last_scan + FROM price_observations po + JOIN vendors v ON po.source_vendor_id = v.id + WHERE po.transceiver_id = $1 + AND v.is_competitor = true`, + [transceiver.id] + ); + const lastCompetitorScan = lastScanResult.rows[0]?.last_scan ?? null; + + // Has any competitor ever listed a price for this exact product? + const competitorHasProduct = prices.some( + (p) => p.vendor_type !== "flexoptix" && p.price > 0 + ); + res.json({ success: true, - data: { ...transceiver, competitor_prices: allPrices, price_anomaly: priceAnomaly }, + data: { + ...transceiver, + competitor_prices: allPrices, + price_anomaly: priceAnomaly, + last_competitor_scan: lastCompetitorScan, + competitor_has_product: competitorHasProduct, + }, }); } catch (err) { console.error("Get transceiver error:", err); diff --git a/packages/dashboard/hot-topics.js b/packages/dashboard/hot-topics.js index 22a23f9..d8ee2fe 100644 --- a/packages/dashboard/hot-topics.js +++ b/packages/dashboard/hot-topics.js @@ -207,10 +207,11 @@ return '
' + '
' + - '' + (t.urgency || '') + '' + + '' + (t.urgency || '') + (t.blog_title_created ? ' · ✓ Blog erstellt' : '') + '' + '' + (t.source_type || '') + ' · ' + (t.source || '') + '
' + '
' + (t.title || '') + '
' + '
' + (t.suggested_angle || t.description || '').slice(0, 100) + '
' + + (t.date ? '
' + new Date(t.date).toLocaleDateString('de-DE', {day:'2-digit',month:'short',year:'numeric'}) + '
' : '') + '
'; }).join(''); }).catch(function(err) { diff --git a/packages/mcp-server/src/index.ts b/packages/mcp-server/src/index.ts index a99c9b0..bcd7619 100644 --- a/packages/mcp-server/src/index.ts +++ b/packages/mcp-server/src/index.ts @@ -23,6 +23,8 @@ import { registerCompatibilityTools } from "./tools/compatibility.js"; import { registerKnowledgeTools } from "./tools/knowledge.js"; import { registerContentTools } from "./tools/content.js"; import { registerMarketTools } from "./tools/market.js"; +import { registerSwitchDocTools } from "./tools/switch-docs.js"; +import { finderTools, handleFinderTool } from "./tools/finder.js"; async function main() { const server = new McpServer({ @@ -347,6 +349,184 @@ async function main() { await registerKnowledgeTools(server); await registerContentTools(server); await registerMarketTools(server); + await registerSwitchDocTools(server); + + // --- Register finder.ts tools (find_flexoptix_for_switch, get_competitor_alerts) --- + for (const [toolName, toolDef] of Object.entries(finderTools)) { + const schema: Record = {}; + for (const [propName, propDef] of Object.entries(toolDef.inputSchema.properties ?? {})) { + const p = propDef as { type: string; description?: string }; + schema[propName] = p.type === "number" + ? z.number().optional().describe(p.description ?? "") + : z.string().optional().describe(p.description ?? ""); + } + server.tool( + toolName, + toolDef.description, + schema, + async (args) => { + const result = await handleFinderTool(toolName, args as Record); + return { content: [{ type: "text" as const, text: result }] }; + } + ); + } + + // --- Ollama LLM tools: market analysis (qwen2.5:14b) + blog generation (fo-blog-v5) --- + const OLLAMA_BASE = process.env["OLLAMA_BASE_URL"] ?? "https://ollama.fichtmueller.org"; + + server.tool( + "analyze_market_with_llm", + "Deep market analysis for a transceiver technology using local LLM (qwen2.5:14b). Provides expert narrative on adoption trends, pricing trajectory, competitive dynamics, and buy/wait/hold recommendation.", + { + technology: z.string().describe("Technology to analyze, e.g. '400G QSFP-DD', '800G OSFP', '100G ZR'"), + context: z.string().optional().describe("Additional context or specific questions to address"), + horizon: z.enum(["3m", "6m", "12m", "18m"]).default("12m").describe("Forecast horizon"), + }, + async ({ technology, context, horizon }) => { + // Gather DB data to enrich the prompt + const [hype, prices, news] = await Promise.all([ + pool.query( + `SELECT hype_phase, hype_score, ROUND(current_share*100,1) AS share_pct, + asp_current_usd, asp_decline_pct_3y, years_to_next_phase + FROM hype_cycle_analysis WHERE technology ILIKE $1 + ORDER BY computed_at DESC LIMIT 1`, + [`%${technology}%`] + ), + pool.query( + `SELECT v.name AS vendor, ROUND(MIN(po.price)::NUMERIC,2) AS min_price, + ROUND(MAX(po.price)::NUMERIC,2) AS max_price, po.currency + FROM price_observations po JOIN vendors v ON v.id = po.source_vendor_id + JOIN transceivers t ON t.id = po.transceiver_id + WHERE t.speed ILIKE $1 AND po.time > NOW() - INTERVAL '7 days' + GROUP BY v.name, po.currency ORDER BY min_price ASC LIMIT 10`, + [`%${technology.split("-")[0]}%`] + ), + pool.query( + `SELECT title, summary, published_at FROM news_articles + WHERE content_vector @@ plainto_tsquery('english', $1) + ORDER BY published_at DESC LIMIT 5`, + [technology] + ).catch(() => ({ rows: [] })), + ]); + + const dataContext = [ + hype.rows[0] ? `Hype Cycle: phase=${hype.rows[0].hype_phase}, score=${hype.rows[0].hype_score}/100, market_share=${hype.rows[0].share_pct}%, OEM_ASP=$${hype.rows[0].asp_current_usd}, ASP_decline_3y=${hype.rows[0].asp_decline_pct_3y}%, years_to_next_phase=${hype.rows[0].years_to_next_phase}` : "", + prices.rows.length > 0 ? `Current pricing: ${prices.rows.map((r) => `${r.vendor} ${r.currency}${r.min_price}–${r.max_price}`).join(", ")}` : "", + news.rows.length > 0 ? `Recent news: ${news.rows.map((r: {title:string}) => r.title).join(" | ")}` : "", + ].filter(Boolean).join("\n"); + + const prompt = `You are a senior optical networking market analyst at a transceiver intelligence platform. + +Technology: ${technology} +Forecast horizon: ${horizon} +${dataContext ? `\nLive data:\n${dataContext}` : ""} +${context ? `\nSpecific questions: ${context}` : ""} + +Provide a concise expert market analysis covering: +1. Current market phase and what it means for buyers/sellers +2. Price trajectory over the next ${horizon} — will prices rise, fall, or stabilize? +3. Key demand drivers and risks +4. Competitive dynamics (OEM vs compatible vendors) +5. Buy / Wait / Hold recommendation with reasoning + +Keep the analysis actionable and data-driven. Under 400 words.`; + + try { + const resp = await fetch(`${OLLAMA_BASE}/api/generate`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model: "qwen2.5:14b", prompt, stream: false }), + signal: AbortSignal.timeout(120_000), + }); + if (!resp.ok) throw new Error(`Ollama HTTP ${resp.status}`); + const data = await resp.json() as { response?: string }; + return { content: [{ type: "text" as const, text: data.response ?? "No response from model." }] }; + } catch (err: unknown) { + return { content: [{ type: "text" as const, text: `LLM unavailable: ${(err as Error).message}. Use /api/hype-cycle/analysis for raw data.` }] }; + } + } + ); + + server.tool( + "generate_blog_post", + "Generate a professional Flexoptix blog post using the fine-tuned fo-blog-v5 model (Ollama). Automatically enriched with live pricing, hype cycle data, and competitor analysis.", + { + topic: z.string().describe("Blog topic, e.g. '400G QSFP-DD vs 400G ZR — which for your DC?'"), + target_audience: z.enum(["network_engineer", "procurement", "executive", "general"]).default("network_engineer").describe("Target reader"), + tone: z.enum(["technical", "consultative", "educational"]).default("consultative").describe("Writing tone"), + word_count: z.number().default(600).describe("Target word count (300–1000)"), + }, + async ({ topic, target_audience, tone, word_count }) => { + // Gather enrichment data + const keywords = topic.match(/\b(\d+G|QSFP|SFP|OSFP|ZR|SR|LR|ER)\b/gi) ?? []; + const priceData = keywords.length > 0 ? await pool.query( + `SELECT v.name AS vendor, t.form_factor, t.speed, + ROUND(MIN(po.price)::NUMERIC,2) AS min_price, po.currency + FROM price_observations po JOIN vendors v ON v.id = po.source_vendor_id + JOIN transceivers t ON t.id = po.transceiver_id + WHERE t.speed ILIKE ANY($1) AND po.time > NOW() - INTERVAL '7 days' + GROUP BY v.name, t.form_factor, t.speed, po.currency ORDER BY min_price ASC LIMIT 8`, + [keywords.map((k: string) => `%${k}%`)] + ).catch(() => ({ rows: [] })) : { rows: [] }; + + const enrichment = priceData.rows.length > 0 + ? `\nCurrent market prices (use naturally in article):\n${priceData.rows.map((r: {vendor:string;form_factor:string;speed:string;min_price:string;currency:string}) => `- ${r.form_factor} ${r.speed}: from ${r.currency}${r.min_price} at ${r.vendor}`).join("\n")}` + : ""; + + const systemPrompt = `You are a professional technical writer for Flexoptix, Europe's leading transceiver specialist. Write in a ${tone} tone for a ${target_audience.replace(/_/g," ")} audience. Articles should highlight Flexoptix expertise and the value of our FlexBox universal coding solution.`; + + const userPrompt = `Write a ${word_count}-word blog post on: "${topic}" +${enrichment} + +Include: +- Compelling introduction +- Technical explanation appropriate for audience +- Real pricing context where available +- Call-to-action mentioning Flexoptix or FlexBox +- SEO-friendly subheadings + +Do not include a title (added separately). Start directly with the article body.`; + + try { + const resp = await fetch(`${OLLAMA_BASE}/api/chat`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "fo-blog-v5", + messages: [ + { role: "system", content: systemPrompt }, + { role: "user", content: userPrompt }, + ], + stream: false, + }), + signal: AbortSignal.timeout(180_000), + }); + if (!resp.ok) { + // Fallback to qwen2.5:14b if fo-blog-v5 not available + const fallbackResp = await fetch(`${OLLAMA_BASE}/api/chat`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "qwen2.5:14b", + messages: [ + { role: "system", content: systemPrompt }, + { role: "user", content: userPrompt }, + ], + stream: false, + }), + signal: AbortSignal.timeout(180_000), + }); + if (!fallbackResp.ok) throw new Error(`Both fo-blog-v5 and qwen2.5:14b unavailable`); + const fallbackData = await fallbackResp.json() as { message?: { content?: string } }; + return { content: [{ type: "text" as const, text: `[Generated with qwen2.5:14b — fo-blog-v5 unavailable]\n\n${fallbackData.message?.content ?? "No content"}` }] }; + } + const data = await resp.json() as { message?: { content?: string } }; + return { content: [{ type: "text" as const, text: data.message?.content ?? "No content generated." }] }; + } catch (err: unknown) { + return { content: [{ type: "text" as const, text: `LLM unavailable: ${(err as Error).message}. Check OLLAMA_BASE_URL env var.` }] }; + } + } + ); // Start server const transport = new StdioServerTransport(); diff --git a/packages/scraper/src/scrapers/ascentoptics.ts b/packages/scraper/src/scrapers/ascentoptics.ts index 3fe4e90..4543633 100644 --- a/packages/scraper/src/scrapers/ascentoptics.ts +++ b/packages/scraper/src/scrapers/ascentoptics.ts @@ -1,34 +1,50 @@ /** - * Ascent Optics Scraper — US-based compatible transceiver vendor + * Ascent Optics Scraper — Chinese OEM transceiver manufacturer * - * ascentoptics.com — product catalog with USD prices. - * Tries /catalog/ and /products/ as entry points. + * ascentoptics.com — product catalog loaded via JSON API endpoint. + * Products are served via /product-list?is_render=1&category_id=CID + * (HTML table in JSON response). No retail pricing — "Get Quote" model. + * Category IDs are discovered from data-cid attributes on sub-category pages. * * Rate limited: 1 req/2sec. */ -import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db"; -import { contentHash } from "../utils/hash"; +import { pool, findOrCreateScrapedTransceiver, ensureVendor } from "../utils/db"; import * as cheerio from "cheerio"; const BASE = "https://ascentoptics.com"; -const CATALOG_URLS = [ - "/catalog/", - "/products/", - "/products/transceivers/", - "/catalog/transceivers/", -]; -const MAX_PAGES = 15; const HEADERS = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", - Accept: "text/html,application/xhtml+xml", + Accept: "text/html,application/xhtml+xml,application/json,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.9", + "X-Requested-With": "XMLHttpRequest", + Referer: "https://ascentoptics.com/optical-transceivers/", }; +// Known transceiver categories: slug → { category_id, formFactor, speedGbps, speed } +// DAC/AOC/DCO/LPO excluded — transceivers only +const CATEGORIES = [ + { slug: "/10g-sfp/", categoryId: 33, formFactor: "SFP+", speed: "10G", speedGbps: 10 }, + { slug: "/10g-xfp/", categoryId: 34, formFactor: "XFP", speed: "10G", speedGbps: 10 }, + { slug: "/25g-sfp28/", categoryId: 22, formFactor: "SFP28", speed: "25G", speedGbps: 25 }, + { slug: "/40g-qsfp/", categoryId: 20, formFactor: "QSFP+", speed: "40G", speedGbps: 40 }, + { slug: "/100g-qsfp28/", categoryId: 15, formFactor: "QSFP28", speed: "100G", speedGbps: 100 }, + { slug: "/100g-sfp112/", categoryId: 0, formFactor: "SFP112", speed: "100G", speedGbps: 100 }, + { slug: "/200g-qsfp56/", categoryId: 3, formFactor: "QSFP56", speed: "200G", speedGbps: 200 }, + { slug: "/200g-qsfp28-dd/", categoryId: 4, formFactor: "QSFP-DD", speed: "200G", speedGbps: 200 }, + { slug: "/400g-qsfp56-dd/", categoryId: 5, formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 }, + { slug: "/400g-osfp/", categoryId: 6, formFactor: "OSFP", speed: "400G", speedGbps: 400 }, + { slug: "/400g-qsfp112/", categoryId: 7, formFactor: "QSFP112", speed: "400G", speedGbps: 400 }, + { slug: "/800g-osfp/", categoryId: 9, formFactor: "OSFP", speed: "800G", speedGbps: 800 }, + { slug: "/800g-qsfp-dd800-200g-per-line/", categoryId: 121, formFactor: "QSFP-DD", speed: "800G", speedGbps: 800 }, + { slug: "/800g-qsfp112-dd/", categoryId: 0, formFactor: "QSFP112", speed: "800G", speedGbps: 800 }, + { slug: "/50g-sfp56/", categoryId: 0, formFactor: "SFP56", speed: "50G", speedGbps: 50 }, + { slug: "/16g-sfp/", categoryId: 0, formFactor: "SFP", speed: "16G", speedGbps: 16 }, +]; + interface Product { partNumber: string; name: string; url: string; - price?: number; formFactor: string; speed: string; speedGbps: number; @@ -42,37 +58,29 @@ function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } -function detectFormFactor(text: string): { formFactor: string; speed: string; speedGbps: number } { - const lower = text.toLowerCase(); - if (lower.includes("osfp") && !lower.includes("qsfp")) return { formFactor: "OSFP", speed: "400G", speedGbps: 400 }; - if (lower.includes("qsfp-dd")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 }; - if (lower.includes("qsfp28")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 }; - if (lower.includes("qsfp+") || lower.includes("qsfp plus")) return { formFactor: "QSFP+", speed: "40G", speedGbps: 40 }; - if (lower.includes("sfp56")) return { formFactor: "SFP56", speed: "50G", speedGbps: 50 }; - if (lower.includes("sfp28") || lower.includes("25g")) return { formFactor: "SFP28", speed: "25G", speedGbps: 25 }; - if (lower.includes("sfp+") || lower.includes("10gbase") || lower.includes("10g")) return { formFactor: "SFP+", speed: "10G", speedGbps: 10 }; - if (lower.includes("xfp")) return { formFactor: "XFP", speed: "10G", speedGbps: 10 }; - if (lower.includes("1000base") || lower.includes("1g")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 }; - if (lower.includes("sfp") && !lower.includes("qsfp")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 }; - return { formFactor: "SFP+", speed: "10G", speedGbps: 10 }; -} - function detectReach(text: string): { label: string; meters: number } | undefined { + const t = text.toUpperCase(); const patterns: [RegExp, string, number][] = [ - [/\b80\s*km\b/i, "80km", 80000], - [/\b40\s*km\b/i, "40km", 40000], - [/\b20\s*km\b/i, "20km", 20000], - [/\b10\s*km\b/i, "10km", 10000], - [/\b2\s*km\b/i, "2km", 2000], - [/\b550\s*m\b/i, "550m", 550], - [/\b300\s*m\b/i, "300m", 300], - [/\b100\s*m\b/i, "100m", 100], + [/\b120\s*KM\b/, "120km", 120000], + [/\b80\s*KM\b/, "80km", 80000], + [/\b70\s*KM\b/, "70km", 70000], + [/\b60\s*KM\b/, "60km", 60000], + [/\b40\s*KM\b/, "40km", 40000], + [/\b20\s*KM\b/, "20km", 20000], + [/\b10\s*KM\b/, "10km", 10000], + [/\b5\s*KM\b/, "5km", 5000], + [/\b2\s*KM\b/, "2km", 2000], + [/\b550\s*M\b/, "550m", 550], + [/\b500\s*M\b/, "500m", 500], + [/\b300\s*M\b/, "300m", 300], + [/\b220\s*M\b/, "220m", 220], + [/\b100\s*M\b/, "100m", 100], [/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000], - [/\bER4?\b/, "40km", 40000], [/\bZR4?\b/, "80km", 80000], - [/\bSR4?\b/, "300m", 300], [/\bDR4?\b/, "500m", 500], [/\bFR4?\b/, "2km", 2000], + [/\bER\b/, "40km", 40000], [/\bZR\b/, "80km", 80000], + [/\bSR\b/, "300m", 300], [/\bDR\b/, "500m", 500], [/\bFR\b/, "2km", 2000], ]; for (const [regex, label, meters] of patterns) { - if (regex.test(text)) return { label, meters }; + if (regex.test(t)) return { label, meters }; } return undefined; } @@ -89,96 +97,81 @@ function detectWavelength(text: string): string { return match ? match[1] : ""; } -function parseProductList(html: string, sourceUrl: string): Product[] { - const $ = cheerio.load(html); - const products: Product[] = []; - - const cardSelectors = [ - ".product-item", ".product", ".item", "li.product", - ".product-card", "tr", "article", ".catalog-item", - ".product-list-item", ".result", - ]; - - for (const sel of cardSelectors) { - if ($(sel).length >= 2) { - $(sel).each((_i, el) => { - const text = $(el).text().trim(); - if (!/sfp|qsfp|xfp|transceiver|optic/i.test(text)) return; - - const nameEl = $(el).find("h2, h3, h4, .name, .product-name, .title, td, a").first(); - const name = nameEl.text().trim() || text.slice(0, 120); - if (!name || name.length < 5) return; - - const linkEl = $(el).find("a[href]").first(); - const href = linkEl.attr("href") || sourceUrl; - const url = href.startsWith("http") ? href : BASE + href; - - // Ascent Optics part numbers: e.g. AS-SFP-10G-SR, SFP-10G-LR-AS - const partNumMatch = name.match(/\b(AS[-_][A-Z0-9-]+)\b/i) || - name.match(/\b([A-Z]{2,}[-][A-Z0-9]+[-][A-Z0-9]+[-][A-Z0-9]+)\b/) || - text.match(/Part\s*(?:No\.?|Number|#)?\s*:?\s*([A-Z0-9-]{6,})/i); - const partNumber = partNumMatch?.[1] || - name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] || - name.replace(/\s+/g, "-").slice(0, 60); - - // USD price - const priceText = $(el).find(".price, .product-price, .amount, [data-price]").text(); - const priceMatch = priceText.match(/\$\s*([\d,]+\.?\d{0,2})/); - let price: number | undefined; - if (priceMatch) { - const parsed = parseFloat(priceMatch[1].replace(",", "")); - if (parsed > 0 && parsed < 50000) price = parsed; - } - - const ff = detectFormFactor(name + " " + text); - const reach = detectReach(name + " " + text); - - products.push({ - partNumber, - name, - url, - price, - ...ff, - reachLabel: reach?.label, - reachMeters: reach?.meters, - fiberType: detectFiber(name + " " + text), - wavelength: detectWavelength(name + " " + text), - }); - }); - if (products.length > 0) break; - } - } - - // Fallback: transceiver-relevant anchors - if (products.length === 0) { - $("a[href]").each((_i, el) => { - const name = $(el).text().trim(); - const href = $(el).attr("href") || ""; - if (name.length < 8 || name.length > 200 || !/sfp|qsfp|transceiver/i.test(name)) return; - const url = href.startsWith("http") ? href : BASE + href; - const ff = detectFormFactor(name); - const reach = detectReach(name); - products.push({ - partNumber: name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] || name.replace(/\s+/g, "-").slice(0, 60), - name, url, ...ff, - reachLabel: reach?.label, reachMeters: reach?.meters, - fiberType: detectFiber(name), wavelength: detectWavelength(name), - }); +/** Fetch category ID from a sub-page's data-cid attribute (fallback for categoryId=0 entries) */ +async function fetchCategoryId(slug: string): Promise { + try { + const resp = await fetch(BASE + slug, { + headers: { ...HEADERS, "X-Requested-With": "" }, + signal: AbortSignal.timeout(15000), }); + if (!resp.ok) return 0; + const html = await resp.text(); + const m = html.match(/data-cid="(\d+)"/); + return m ? parseInt(m[1]) : 0; + } catch { + return 0; } - - const seen = new Set(); - return products.filter((p) => { - if (!p.url || seen.has(p.url)) return false; - seen.add(p.url); - return true; - }); } -async function fetchPage(url: string): Promise { - const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) }); - if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`); - return resp.text(); +/** Fetch product table HTML for a category via the JSON API */ +async function fetchProductTable(categoryId: number, slug: string): Promise { + const url = `${BASE}/product-list?is_render=1&category_id=${categoryId}`; + const resp = await fetch(url, { + headers: { ...HEADERS, Referer: BASE + slug }, + signal: AbortSignal.timeout(30000), + }); + if (!resp.ok) throw new Error(`HTTP ${resp.status} for /product-list?category_id=${categoryId}`); + const data = await resp.json() as { product_table_list?: string }; + return data.product_table_list ?? ""; +} + +/** Parse HTML table returned by /product-list API */ +function parseProductTable( + tableHtml: string, + cat: typeof CATEGORIES[number], +): Product[] { + const $ = cheerio.load(tableHtml); + const products: Product[] = []; + + $("tr").each((_i, row) => { + const cells = $(row).find("td"); + if (cells.length < 3) return; + + // Column layout: [image] [part_number] [description] [data_rate] [distance] [wavelength] [connector] [datasheet] [quote] + const partNumberCell = $(cells[1]); + const descCell = $(cells[2]); + + const rawPart = partNumberCell.text().trim(); + const desc = descCell.text().trim(); + + // Skip header rows and non-product rows + if (!rawPart || rawPart.length < 3 || /part\s*no|description/i.test(rawPart)) return; + if (rawPart.length > 80) return; + + const url = (() => { + const a = partNumberCell.find("a[href]").first().attr("href") ?? descCell.find("a[href]").first().attr("href"); + if (!a) return BASE + cat.slug; + return a.startsWith("http") ? a : BASE + a; + })(); + + const combined = `${rawPart} ${desc}`; + const reach = detectReach(combined); + + products.push({ + partNumber: rawPart, + name: desc || rawPart, + url, + formFactor: cat.formFactor, + speed: cat.speed, + speedGbps: cat.speedGbps, + reachLabel: reach?.label, + reachMeters: reach?.meters, + fiberType: detectFiber(combined), + wavelength: detectWavelength(combined), + }); + }); + + return products; } export async function scrapeAscentOptics(): Promise { @@ -188,89 +181,58 @@ export async function scrapeAscentOptics(): Promise { "Ascent Optics", "compatible", "https://ascentoptics.com", - BASE + CATALOG_URLS[0], + BASE + "/optical-transceivers/", ); - const allProducts: Product[] = []; - const seenUrls = new Set(); - const triedUrls = new Set(); + let totalProducts = 0; - for (const catalogPath of CATALOG_URLS) { - const catalogUrl = BASE + catalogPath; - if (triedUrls.has(catalogUrl)) continue; - triedUrls.add(catalogUrl); + for (const cat of CATEGORIES) { + let cid = cat.categoryId; - console.log(` Fetching catalog: ${catalogUrl}`); - try { - const html = await fetchPage(catalogUrl); - const pageProducts = parseProductList(html, catalogUrl); - for (const p of pageProducts) { - if (!seenUrls.has(p.url)) { seenUrls.add(p.url); allProducts.push(p); } + // Resolve unknown category IDs dynamically + if (cid === 0) { + cid = await fetchCategoryId(cat.slug); + if (cid === 0) { + console.log(` Skipping ${cat.slug} — category ID not found`); + await sleep(1000); + continue; } - console.log(` Found ${pageProducts.length} products`); - await sleep(2000); + } - // Paginate from each working catalog URL - for (let page = 2; page <= MAX_PAGES; page++) { - const pageUrl = `${catalogUrl}?page=${page}`; + console.log(`\n--- ${cat.formFactor} (${cat.speed}) [cid=${cid}] ---`); + + try { + const tableHtml = await fetchProductTable(cid, cat.slug); + const products = parseProductTable(tableHtml, cat); + console.log(` Found ${products.length} products`); + + for (const product of products) { try { - const pageHtml = await fetchPage(pageUrl); - const paginated = parseProductList(pageHtml, pageUrl); - if (paginated.length === 0) break; - for (const p of paginated) { - if (!seenUrls.has(p.url)) { seenUrls.add(p.url); allProducts.push(p); } - } - console.log(` Page ${page}: ${paginated.length} products`); - await sleep(2000); - } catch { - break; + await findOrCreateScrapedTransceiver({ + partNumber: product.partNumber, + vendorId, + formFactor: product.formFactor, + speedGbps: product.speedGbps, + speed: product.speed, + reachMeters: product.reachMeters, + reachLabel: product.reachLabel, + fiberType: product.fiberType, + wavelengths: product.wavelength, + category: "DataCenter", + }); + totalProducts++; + } catch (err) { + console.warn(` Error: ${(err as Error).message.slice(0, 80)}`); } } } catch (err) { - console.warn(` Failed: ${(err as Error).message}`); + console.error(` Category failed: ${(err as Error).message}`); } + + await sleep(2000); } - console.log(`\nTotal unique products: ${allProducts.length}`); - - let totalProducts = 0; - let priceUpdates = 0; - - for (const product of allProducts) { - try { - const txId = await findOrCreateScrapedTransceiver({ - partNumber: product.partNumber, - vendorId, - formFactor: product.formFactor, - speedGbps: product.speedGbps, - speed: product.speed, - reachMeters: product.reachMeters, - reachLabel: product.reachLabel, - fiberType: product.fiberType, - wavelengths: product.wavelength, - category: "DataCenter", - }); - - if (product.price && product.price > 0) { - const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber })); - const updated = await upsertPriceObservation({ - transceiverId: txId, - sourceVendorId: vendorId, - price: product.price, - currency: "USD", - stockLevel: "in_stock", - url: product.url, - contentHash: hash, - }); - if (updated) priceUpdates++; - } - totalProducts++; - } catch (err) { - console.warn(` Error saving ${product.partNumber}: ${(err as Error).message.slice(0, 80)}`); - } - } - - console.log(`\n=== Ascent Optics Complete: ${totalProducts} products, ${priceUpdates} prices ===`); + console.log(`\n=== Ascent Optics Complete: ${totalProducts} products (catalog only — no pricing) ===`); } if (require.main === module) { diff --git a/packages/scraper/src/scrapers/atgbics.ts b/packages/scraper/src/scrapers/atgbics.ts index c0d357d..8d21623 100644 --- a/packages/scraper/src/scrapers/atgbics.ts +++ b/packages/scraper/src/scrapers/atgbics.ts @@ -168,6 +168,7 @@ export async function scrapeAtgbics(): Promise { maxRequestsPerCrawl: MAX_PAGES, requestHandlerTimeoutSecs: 60, headless: true, + useSessionPool: false, // Disable session pool to avoid SDK_SESSION_POOL_STATE.json crash ...(proxyConfiguration ? { proxyConfiguration } : {}), launchContext: { launchOptions: { diff --git a/packages/scraper/src/scrapers/ebay-enricher.ts b/packages/scraper/src/scrapers/ebay-enricher.ts index 7d1dd4e..baeeb6e 100644 --- a/packages/scraper/src/scrapers/ebay-enricher.ts +++ b/packages/scraper/src/scrapers/ebay-enricher.ts @@ -285,7 +285,7 @@ async function saveEnrichment(switchId: string, result: EnrichResult): Promise { // Find eBay vendor const ebayVendor = await pool.query( - `INSERT INTO vendors (name, slug, type, website_url) + `INSERT INTO vendors (name, slug, type, website) VALUES ('eBay Marketplace', 'ebay', 'marketplace', 'https://www.ebay.de') ON CONFLICT (slug) DO UPDATE SET updated_at = NOW() RETURNING id` diff --git a/packages/scraper/src/scrapers/gbics.ts b/packages/scraper/src/scrapers/gbics.ts index 93edb69..ce6c9a7 100644 --- a/packages/scraper/src/scrapers/gbics.ts +++ b/packages/scraper/src/scrapers/gbics.ts @@ -114,13 +114,24 @@ function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product // Collapse whitespace for easier regex matching const collapsed = html.replace(/\s+/g, " "); - // BigCommerce card-title pattern: - // - const productRegex = /aria-label="([^"]+)"\s+href="(https?:\/\/(?:www\.)?gbics\.com\/[^"]+)"[^>]*data-event-type="product-click"/gi; + // BigCommerce card pattern (attribute order varies by theme version): + // Old: + // New: + // Two-pass approach: find all product tags regardless of attribute order + const productRegex = /href="(https?:\/\/(?:www\.)?gbics\.com\/[^"]+)"[^>]*aria-label="([^"]+)"/gi; + const productRegex2 = /aria-label="([^"]+)"[^>]*href="(https?:\/\/(?:www\.)?gbics\.com\/[^"]+)"/gi; let match; + const rawMatches: { url: string; label: string; index: number }[] = []; while ((match = productRegex.exec(collapsed)) !== null) { - const label = match[1].trim(); - const url = match[2]; + rawMatches.push({ url: match[1].trim(), label: match[2].trim(), index: match.index }); + } + if (rawMatches.length === 0) { + while ((match = productRegex2.exec(collapsed)) !== null) { + rawMatches.push({ url: match[2].trim(), label: match[1].trim(), index: match.index }); + } + } + for (const { url, label: rawLabel, index: matchIndex } of rawMatches) { + const label = rawLabel.replace(/\s+/g, " ").trim(); // aria-label contains "Product Name, £XX.XX" // Split on last comma to separate name and price @@ -130,7 +141,7 @@ function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product // Fallback: extract price from data-price-asc attribute on parent
  • if (!price) { - const priceContext = collapsed.slice(Math.max(0, match.index - 500), match.index); + const priceContext = collapsed.slice(Math.max(0, matchIndex - 500), matchIndex); const dataPriceMatch = priceContext.match(/data-price-asc="(\d+)"/); if (dataPriceMatch) price = parseFloat(dataPriceMatch[1]); } diff --git a/packages/scraper/src/scrapers/skylane.ts b/packages/scraper/src/scrapers/skylane.ts index c8efdde..685f61b 100644 --- a/packages/scraper/src/scrapers/skylane.ts +++ b/packages/scraper/src/scrapers/skylane.ts @@ -235,17 +235,20 @@ export async function scrapeSkylane(): Promise { } } - // Try pagination for this URL + // Try pagination — break as soon as no NEW unique product URLs appear + // (Skylane uses Algolia/InstantSearch: ?page=N returns same content, so + // content-based termination is required instead of empty-page detection) for (let page = 2; page <= MAX_PAGES; page++) { const pageUrl = `${catalogUrl}?page=${page}`; try { const pageHtml = await fetchPage(pageUrl); const paginated = parseProductList(pageHtml, pageUrl); - if (paginated.length === 0) break; + const newCount = paginated.filter((p) => !seenUrls.has(p.url)).length; + if (newCount === 0) break; // No genuinely new products → end of real pagination for (const p of paginated) { if (!seenUrls.has(p.url)) { seenUrls.add(p.url); allProducts.push(p); } } - console.log(` Page ${page}: ${paginated.length} products`); + console.log(` Page ${page}: ${newCount} new products`); await sleep(2000); } catch { break; diff --git a/scripts/sync-to-erik.sh b/scripts/sync-to-erik.sh index b433375..805dc4d 100755 --- a/scripts/sync-to-erik.sh +++ b/scripts/sync-to-erik.sh @@ -4,7 +4,7 @@ set -e -ERIK="root@217.154.82.179" +ERIK="root@82.165.222.127" REMOTE_PATH="/opt/tip" LOCAL_PATH="$(cd "$(dirname "$0")/.." && pwd)"