feat: wire finder.ts + switch-docs + Ollama LLM tools to MCP server
MCP Server (packages/mcp-server/src/index.ts): - Register registerSwitchDocTools (switch-docs.ts) — switch documentation lookup - Register finderTools dynamically (finder.ts) — find_flexoptix_for_switch, get_competitor_alerts - Add analyze_market_with_llm tool: qwen2.5:14b via Ollama, enriched with live hype cycle + pricing + news - Add generate_blog_post tool: fo-blog-v5 (fine-tuned) with qwen2.5:14b fallback, enriched with live pricing data - OLLAMA_BASE_URL env var (default: https://ollama.fichtmueller.org) Also includes scraper improvements (ascentoptics, atgbics, gbics, skylane, ebay-enricher), API route updates (blog, blog-sll, health, hot-topics, transceivers, queries), and dashboard hot-topics refresh.
This commit is contained in:
parent
b88a6e28cf
commit
e9fcda2811
@ -13,6 +13,7 @@ export interface SearchParams {
|
|||||||
coherent?: boolean;
|
coherent?: boolean;
|
||||||
market_status?: string;
|
market_status?: string;
|
||||||
vendor?: string;
|
vendor?: string;
|
||||||
|
verified?: "price" | "image" | "details" | "full";
|
||||||
limit?: number;
|
limit?: number;
|
||||||
offset?: number;
|
offset?: number;
|
||||||
}
|
}
|
||||||
@ -82,6 +83,10 @@ export async function searchTransceivers(params: SearchParams) {
|
|||||||
values.push(`%${params.vendor}%`);
|
values.push(`%${params.vendor}%`);
|
||||||
idx++;
|
idx++;
|
||||||
}
|
}
|
||||||
|
if (params.verified) {
|
||||||
|
const col = params.verified === "full" ? "fully_verified" : params.verified + "_verified";
|
||||||
|
conditions.push(`t.${col} = true`);
|
||||||
|
}
|
||||||
|
|
||||||
const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
|
const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
|
||||||
const limit = params.limit || 50;
|
const limit = params.limit || 50;
|
||||||
|
|||||||
@ -304,7 +304,47 @@ Given the topic below, expand it into:
|
|||||||
|
|
||||||
Topic: {{TOPIC}}
|
Topic: {{TOPIC}}
|
||||||
|
|
||||||
Keep it practical, not theoretical. Think about what actually goes wrong in production.`;
|
Keep it practical, not theoretical. Think about what actually goes wrong in production.{{ADDITIONAL_CONTEXT}}`;
|
||||||
|
|
||||||
|
// ═══════════════════════════════════════════════════════
|
||||||
|
// STEP HEADLINE: Generate a compelling article headline
|
||||||
|
// (runs AFTER full article is written — never copies user input)
|
||||||
|
// ═══════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
export const STEP_HEADLINE_GENERATION = `You are writing a headline for a Flexoptix technical blog article.
|
||||||
|
|
||||||
|
Read the article below and generate ONE headline.
|
||||||
|
|
||||||
|
RULES — NON-NEGOTIABLE:
|
||||||
|
- Maximum 10 words
|
||||||
|
- Active voice, no adverbs
|
||||||
|
- No buzzwords: no "game-changer", "revolutionize", "deep dive", "exploring", "essential"
|
||||||
|
- No generic openers: "The Real X", "Why X Matters", "A Guide to X", "Everything You Need to Know"
|
||||||
|
- No question headlines (they underperform on technical audiences)
|
||||||
|
- Do NOT copy any phrase from the article verbatim
|
||||||
|
- Do NOT echo back the topic input or any context that was provided
|
||||||
|
- Must signal a specific insight, counterintuitive finding, or operational reality
|
||||||
|
- Must make an engineer stop scrolling — specific, surprising, or blunt
|
||||||
|
- Colon allowed ONLY if both halves are strong standalone phrases
|
||||||
|
|
||||||
|
GOOD examples:
|
||||||
|
"400G Does Not Fail in Design. It Fails in Production."
|
||||||
|
"Your OTDR Is Lying to You"
|
||||||
|
"Third-Party Optics: The Risk Is Real, but It's Not What You Think"
|
||||||
|
"RPKI Fixed Origin Validation. Path Security Is Still Broken."
|
||||||
|
"InfiniBand Scales to 400,000 GPUs. Ethernet Does Not."
|
||||||
|
|
||||||
|
BAD examples (never produce these):
|
||||||
|
"Why 400G Migration Matters for Your Network" — generic
|
||||||
|
"Deep Dive: Understanding Optical Transceivers" — buzzword + filler
|
||||||
|
"The Complete Guide to OTDR Testing" — listicle-style
|
||||||
|
"Exploring the Challenges of Coherent Optics in 2026" — journalist fluff
|
||||||
|
"Key Takeaways from Our Latest Blog Post" — never
|
||||||
|
|
||||||
|
Return ONLY the headline text. No quotes. No commentary. No "Here is your headline:".
|
||||||
|
|
||||||
|
Article:
|
||||||
|
{{ARTICLE}}`;
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════
|
||||||
// STEP 2: ANGLE SELECTION
|
// STEP 2: ANGLE SELECTION
|
||||||
|
|||||||
@ -6,6 +6,8 @@
|
|||||||
* GET /api/blog/sll/insights — current learning state
|
* GET /api/blog/sll/insights — current learning state
|
||||||
* POST /api/blog/sll/analyze — trigger LLM pattern extraction
|
* POST /api/blog/sll/analyze — trigger LLM pattern extraction
|
||||||
* GET /api/blog/sll/patterns — all learned patterns
|
* GET /api/blog/sll/patterns — all learned patterns
|
||||||
|
* GET /api/blog/sll/posting-time — best posting time (Umami + SLL combined)
|
||||||
|
* POST /api/blog/sll/sync-umami — refresh Umami analytics cache
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { Router, Request, Response } from "express";
|
import { Router, Request, Response } from "express";
|
||||||
@ -173,6 +175,202 @@ blogSllRouter.get("/sll/insights", async (_req: Request, res: Response) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────
|
||||||
|
// In-memory Umami cache (TTL 1h — single PM2 process)
|
||||||
|
// ─────────────────────────────────────────────────────────────────
|
||||||
|
interface UmamiSlot { weekday: number; hour: number; sessions: number }
|
||||||
|
let umamiCache: { slots: UmamiSlot[]; fetchedAt: number } | null = null;
|
||||||
|
const UMAMI_TTL_MS = 60 * 60 * 1000; // 1h
|
||||||
|
|
||||||
|
const UMAMI_URL = process.env["UMAMI_URL"] ?? "https://analytics.fichtmueller.org";
|
||||||
|
const UMAMI_USER = process.env["UMAMI_USER"] ?? "admin";
|
||||||
|
const UMAMI_PASS = process.env["UMAMI_PASS"] ?? "";
|
||||||
|
const UMAMI_WEBSITE = process.env["UMAMI_WEBSITE_ID"] ?? "c737bf75-ccc4-463b-992a-13bed31d7f43";
|
||||||
|
|
||||||
|
const DAY_NAMES = ["Mo","Di","Mi","Do","Fr","Sa","So"];
|
||||||
|
|
||||||
|
async function fetchUmamiToken(): Promise<string | null> {
|
||||||
|
try {
|
||||||
|
const r = await fetch(`${UMAMI_URL}/api/auth/login`, {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({ username: UMAMI_USER, password: UMAMI_PASS }),
|
||||||
|
signal: AbortSignal.timeout(8000),
|
||||||
|
});
|
||||||
|
const d = await r.json() as { token?: string };
|
||||||
|
return d.token ?? null;
|
||||||
|
} catch { return null; }
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchUmamiSlots(): Promise<UmamiSlot[]> {
|
||||||
|
// Return cache if fresh
|
||||||
|
if (umamiCache && Date.now() - umamiCache.fetchedAt < UMAMI_TTL_MS) {
|
||||||
|
return umamiCache.slots;
|
||||||
|
}
|
||||||
|
|
||||||
|
const token = await fetchUmamiToken();
|
||||||
|
if (!token) return [];
|
||||||
|
|
||||||
|
const startAt = Date.now() - 90 * 24 * 60 * 60 * 1000;
|
||||||
|
const endAt = Date.now();
|
||||||
|
const url = `${UMAMI_URL}/api/websites/${UMAMI_WEBSITE}/sessions?startAt=${startAt}&endAt=${endAt}&pageSize=500&page=1`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const r = await fetch(url, {
|
||||||
|
headers: { Authorization: `Bearer ${token}` },
|
||||||
|
signal: AbortSignal.timeout(15000),
|
||||||
|
});
|
||||||
|
const d = await r.json() as { data?: Array<{ firstAt?: string }> };
|
||||||
|
const sessions = d.data ?? [];
|
||||||
|
|
||||||
|
// Build (weekday, hour) counts — Berlin = UTC+2 in April/summer
|
||||||
|
const counts: Record<string, number> = {};
|
||||||
|
for (const s of sessions) {
|
||||||
|
if (!s.firstAt) continue;
|
||||||
|
const dt = new Date(s.firstAt);
|
||||||
|
const berlinH = (dt.getUTCHours() + 2) % 24;
|
||||||
|
const berlinWd = dt.getUTCDay() === 0 ? 6 : dt.getUTCDay() - 1; // 0=Mon
|
||||||
|
const key = `${berlinWd}:${berlinH}`;
|
||||||
|
counts[key] = (counts[key] ?? 0) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const slots: UmamiSlot[] = Object.entries(counts).map(([key, n]) => {
|
||||||
|
const [wd, h] = key.split(":").map(Number);
|
||||||
|
return { weekday: wd, hour: h, sessions: n };
|
||||||
|
});
|
||||||
|
|
||||||
|
umamiCache = { slots, fetchedAt: Date.now() };
|
||||||
|
return slots;
|
||||||
|
} catch { return []; }
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────
|
||||||
|
// GET /api/blog/sll/posting-time — best posting time
|
||||||
|
// Combines Umami traffic data + SLL historical engagement by slot
|
||||||
|
// ─────────────────────────────────────────────────────────────────
|
||||||
|
blogSllRouter.get("/sll/posting-time", async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
// 1. Umami: traffic per (weekday, hour)
|
||||||
|
const umamiSlots = await fetchUmamiSlots();
|
||||||
|
const umamiMax = Math.max(1, ...umamiSlots.map((s) => s.sessions));
|
||||||
|
|
||||||
|
// 2. SLL: avg engagement per (weekday, hour) from historical posts
|
||||||
|
const sllRes = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
EXTRACT(DOW FROM posted_at AT TIME ZONE 'UTC' AT TIME ZONE 'Europe/Berlin')::int AS wd_raw,
|
||||||
|
EXTRACT(HOUR FROM posted_at AT TIME ZONE 'UTC' AT TIME ZONE 'Europe/Berlin')::int AS hour,
|
||||||
|
AVG(engagement_score) AS avg_eng,
|
||||||
|
MAX(engagement_score) AS best_eng,
|
||||||
|
COUNT(*) AS post_count
|
||||||
|
FROM blog_performance
|
||||||
|
WHERE posted_at IS NOT NULL AND engagement_score IS NOT NULL
|
||||||
|
GROUP BY wd_raw, hour
|
||||||
|
`);
|
||||||
|
|
||||||
|
// Convert Sunday=0 (PostgreSQL DOW) to Monday=0 index
|
||||||
|
const sllMap: Record<string, { avgEng: number; bestEng: number; count: number }> = {};
|
||||||
|
for (const row of sllRes.rows) {
|
||||||
|
const wd = row.wd_raw === 0 ? 6 : Number(row.wd_raw) - 1; // Mon=0
|
||||||
|
const key = `${wd}:${row.hour}`;
|
||||||
|
sllMap[key] = {
|
||||||
|
avgEng: Math.round(Number(row.avg_eng) * 10) / 10,
|
||||||
|
bestEng: Number(row.best_eng),
|
||||||
|
count: Number(row.post_count),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
const sllMax = Math.max(1, ...Object.values(sllMap).map((v) => v.avgEng));
|
||||||
|
|
||||||
|
// 3. Build candidate slots (union of Umami + SLL slots)
|
||||||
|
const allKeys = new Set([
|
||||||
|
...umamiSlots.map((s) => `${s.weekday}:${s.hour}`),
|
||||||
|
...Object.keys(sllMap),
|
||||||
|
]);
|
||||||
|
|
||||||
|
const scored = Array.from(allKeys).map((key) => {
|
||||||
|
const [wd, h] = key.split(":").map(Number);
|
||||||
|
const umami = umamiSlots.find((s) => s.weekday === wd && s.hour === h);
|
||||||
|
const sll = sllMap[key];
|
||||||
|
|
||||||
|
const umamiScore = umami ? umami.sessions / umamiMax : 0;
|
||||||
|
const sllScore = sll ? sll.avgEng / sllMax : 0;
|
||||||
|
|
||||||
|
// Weight: 50% Umami traffic + 50% SLL engagement
|
||||||
|
// If no SLL data → 100% Umami; if no Umami → 100% SLL
|
||||||
|
const hasUmami = !!umami;
|
||||||
|
const hasSll = !!sll;
|
||||||
|
let combined: number;
|
||||||
|
if (hasUmami && hasSll) {
|
||||||
|
combined = umamiScore * 0.5 + sllScore * 0.5;
|
||||||
|
} else if (hasUmami) {
|
||||||
|
combined = umamiScore * 0.7; // penalise slots with no SLL validation
|
||||||
|
} else {
|
||||||
|
combined = sllScore * 0.6; // SLL-only slots get a slight boost
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
weekday: wd,
|
||||||
|
hour: h,
|
||||||
|
label: `${DAY_NAMES[wd]} ${String(h).padStart(2, "0")}:00h`,
|
||||||
|
score: Math.round(combined * 100),
|
||||||
|
umami_sessions: umami?.sessions ?? 0,
|
||||||
|
sll_avg_engagement: sll?.avgEng ?? null,
|
||||||
|
sll_best_engagement: sll?.bestEng ?? null,
|
||||||
|
sll_post_count: sll?.count ?? 0,
|
||||||
|
data_sources: [hasUmami ? "umami" : null, hasSll ? "sll" : null].filter(Boolean),
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
// Sort by score descending
|
||||||
|
scored.sort((a, b) => b.score - a.score);
|
||||||
|
const top = scored.slice(0, 10);
|
||||||
|
|
||||||
|
// Build weekday summary (best hour per weekday)
|
||||||
|
const byWeekday: Record<number, typeof top[0]> = {};
|
||||||
|
for (const slot of scored) {
|
||||||
|
if (!byWeekday[slot.weekday] || slot.score > byWeekday[slot.weekday].score) {
|
||||||
|
byWeekday[slot.weekday] = slot;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const weekdaySummary = DAY_NAMES.map((name, wd) => ({
|
||||||
|
weekday: wd,
|
||||||
|
name,
|
||||||
|
best_slot: byWeekday[wd] ?? null,
|
||||||
|
}));
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
top_slots: top,
|
||||||
|
weekday_summary: weekdaySummary,
|
||||||
|
recommended: top[0] ?? null,
|
||||||
|
data_sources: {
|
||||||
|
umami_sessions_analyzed: umamiSlots.reduce((s, x) => s + x.sessions, 0),
|
||||||
|
umami_cache_age_min: umamiCache ? Math.round((Date.now() - umamiCache.fetchedAt) / 60000) : null,
|
||||||
|
sll_posts_with_time: sllRes.rows.length,
|
||||||
|
},
|
||||||
|
note: sllRes.rows.length === 0
|
||||||
|
? "SLL has no timed posts yet — using Umami traffic data only"
|
||||||
|
: `Combined Umami + ${sllRes.rows.length} SLL engagement data point(s)`,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
console.error("posting-time error:", err);
|
||||||
|
res.status(500).json({ success: false, error: String(err) });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─────────────────────────────────────────────────────────────────
|
||||||
|
// POST /api/blog/sll/sync-umami — force-refresh Umami cache
|
||||||
|
// ─────────────────────────────────────────────────────────────────
|
||||||
|
blogSllRouter.post("/sll/sync-umami", async (_req: Request, res: Response) => {
|
||||||
|
umamiCache = null; // invalidate
|
||||||
|
const slots = await fetchUmamiSlots();
|
||||||
|
res.json({
|
||||||
|
success: slots.length > 0,
|
||||||
|
slots_loaded: slots.length,
|
||||||
|
total_sessions: slots.reduce((s, x) => s + x.sessions, 0),
|
||||||
|
message: slots.length > 0 ? "Umami cache refreshed" : "Umami unreachable — check credentials",
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
// ─────────────────────────────────────────────────────────────────
|
// ─────────────────────────────────────────────────────────────────
|
||||||
// GET /api/blog/sll/patterns — all learned patterns
|
// GET /api/blog/sll/patterns — all learned patterns
|
||||||
// ─────────────────────────────────────────────────────────────────
|
// ─────────────────────────────────────────────────────────────────
|
||||||
|
|||||||
@ -956,10 +956,11 @@ async function enqueueLlmPipeline(
|
|||||||
selectedTopic: string,
|
selectedTopic: string,
|
||||||
targetAudience: string,
|
targetAudience: string,
|
||||||
data: Awaited<ReturnType<typeof gatherBlogData>>,
|
data: Awaited<ReturnType<typeof gatherBlogData>>,
|
||||||
|
additionalContext?: string,
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
return new Promise<void>((resolve) => {
|
return new Promise<void>((resolve) => {
|
||||||
llmQueue.push(async () => {
|
llmQueue.push(async () => {
|
||||||
await runLlmPipeline(draftId, title, selectedTopic, targetAudience, data);
|
await runLlmPipeline(draftId, title, selectedTopic, targetAudience, data, additionalContext);
|
||||||
resolve();
|
resolve();
|
||||||
});
|
});
|
||||||
processLlmQueue();
|
processLlmQueue();
|
||||||
@ -989,6 +990,7 @@ async function runLlmPipeline(
|
|||||||
selectedTopic: string,
|
selectedTopic: string,
|
||||||
targetAudience: string,
|
targetAudience: string,
|
||||||
data: Awaited<ReturnType<typeof gatherBlogData>>,
|
data: Awaited<ReturnType<typeof gatherBlogData>>,
|
||||||
|
additionalContext?: string,
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
// Lazy-load the new FO pipeline
|
// Lazy-load the new FO pipeline
|
||||||
const {
|
const {
|
||||||
@ -1009,6 +1011,7 @@ async function runLlmPipeline(
|
|||||||
STEP9_QA_CHECK,
|
STEP9_QA_CHECK,
|
||||||
STEP10_QUALITY_SCORE,
|
STEP10_QUALITY_SCORE,
|
||||||
STEP_LINKEDIN_POST,
|
STEP_LINKEDIN_POST,
|
||||||
|
STEP_HEADLINE_GENERATION,
|
||||||
BLOG_TYPES,
|
BLOG_TYPES,
|
||||||
buildFeedbackContext,
|
buildFeedbackContext,
|
||||||
withCalibration,
|
withCalibration,
|
||||||
@ -1091,7 +1094,11 @@ async function runLlmPipeline(
|
|||||||
console.log(" Step 1/10: Topic Expansion...");
|
console.log(" Step 1/10: Topic Expansion...");
|
||||||
setProgress(draftId, 1, "Step 1/10: Topic Expansion");
|
setProgress(draftId, 1, "Step 1/10: Topic Expansion");
|
||||||
const step1 = await generate(systemPrompt,
|
const step1 = await generate(systemPrompt,
|
||||||
STEP1_TOPIC_EXPANSION.replace("{{TOPIC}}", title),
|
STEP1_TOPIC_EXPANSION
|
||||||
|
.replace("{{TOPIC}}", title)
|
||||||
|
.replace("{{ADDITIONAL_CONTEXT}}", additionalContext
|
||||||
|
? `\n\n---\nBACKGROUND REFERENCE (editorial context — use as factual direction ONLY):\n${additionalContext}\n\nCRITICAL: Do NOT copy any phrase, sentence, or wording from the above into the article or any step output. It is context for your understanding, not source material.`
|
||||||
|
: ""),
|
||||||
LLM_OPTS
|
LLM_OPTS
|
||||||
);
|
);
|
||||||
stepsCompleted = 1;
|
stepsCompleted = 1;
|
||||||
@ -1281,6 +1288,26 @@ async function runLlmPipeline(
|
|||||||
}
|
}
|
||||||
stepsCompleted = 16;
|
stepsCompleted = 16;
|
||||||
|
|
||||||
|
// ═══ Headline Generation (post-article — never copies user input) ═══
|
||||||
|
console.log(" Step 17: Headline Generation...");
|
||||||
|
let generatedHeadline: string | null = null;
|
||||||
|
try {
|
||||||
|
const headlineResult = await generate(systemPrompt,
|
||||||
|
STEP_HEADLINE_GENERATION.replace("{{ARTICLE}}", step9.text),
|
||||||
|
{ temperature: 0.5, maxTokens: 64, timeoutMs: 60000 }
|
||||||
|
);
|
||||||
|
const rawHeadline = headlineResult.text.trim().replace(/^["']|["']$/g, "");
|
||||||
|
// Sanity check: must be non-empty, ≤120 chars, and not start with "Here is"
|
||||||
|
if (rawHeadline.length > 5 && rawHeadline.length <= 120 && !rawHeadline.toLowerCase().startsWith("here is")) {
|
||||||
|
generatedHeadline = rawHeadline;
|
||||||
|
console.log(` Generated headline: "${generatedHeadline}"`);
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
console.log(" Headline generation skipped");
|
||||||
|
}
|
||||||
|
// Use generated headline if valid, fall back to original title
|
||||||
|
const finalTitle = generatedHeadline || title;
|
||||||
|
|
||||||
// Extract only the article from STEP9 output (QA returns review + fixed article)
|
// Extract only the article from STEP9 output (QA returns review + fixed article)
|
||||||
// Look for "COMPLETE FIXED ARTICLE" marker and take everything after it
|
// Look for "COMPLETE FIXED ARTICLE" marker and take everything after it
|
||||||
let finalArticleText = step9.text;
|
let finalArticleText = step9.text;
|
||||||
@ -1308,14 +1335,14 @@ async function runLlmPipeline(
|
|||||||
.join("\n")
|
.join("\n")
|
||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
const draftContent = `# ${title}\n\n${finalArticleText}`;
|
const draftContent = `# ${finalTitle}\n\n${finalArticleText}`;
|
||||||
const wordCount = draftContent.split(/\s+/).length;
|
const wordCount = draftContent.split(/\s+/).length;
|
||||||
const finalIssues = validateArticle(draftContent);
|
const finalIssues = validateArticle(draftContent);
|
||||||
|
|
||||||
// Update the draft in DB
|
// Update the draft in DB (title updated to generated headline if available)
|
||||||
await pool.query(
|
await pool.query(
|
||||||
`UPDATE blog_drafts
|
`UPDATE blog_drafts
|
||||||
SET draft_content = $1, word_count = $2,
|
SET title = $9, draft_content = $1, word_count = $2,
|
||||||
generated_by = 'fo-blog-engine-v5-autokill',
|
generated_by = 'fo-blog-engine-v5-autokill',
|
||||||
pipeline_version = 'v5-auto-kill-layer',
|
pipeline_version = 'v5-auto-kill-layer',
|
||||||
pipeline_steps_completed = $3,
|
pipeline_steps_completed = $3,
|
||||||
@ -1342,6 +1369,7 @@ async function runLlmPipeline(
|
|||||||
linkedinPost,
|
linkedinPost,
|
||||||
linkedinCharCount,
|
linkedinCharCount,
|
||||||
draftId,
|
draftId,
|
||||||
|
finalTitle,
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -1377,12 +1405,16 @@ async function runLlmPipeline(
|
|||||||
|
|
||||||
// POST /api/blog/generate — Generate a new blog draft (returns immediately, LLM runs async)
|
// POST /api/blog/generate — Generate a new blog draft (returns immediately, LLM runs async)
|
||||||
blogRouter.post("/generate", async (req: Request, res: Response) => {
|
blogRouter.post("/generate", async (req: Request, res: Response) => {
|
||||||
const { topic, speed, form_factor, use_case, use_llm } = req.body as {
|
const { topic, speed, form_factor, use_case, use_llm, custom_title, additional_context } = req.body as {
|
||||||
topic?: string;
|
topic?: string;
|
||||||
speed?: string;
|
speed?: string;
|
||||||
form_factor?: string;
|
form_factor?: string;
|
||||||
use_case?: string;
|
use_case?: string;
|
||||||
use_llm?: boolean;
|
use_llm?: boolean;
|
||||||
|
/** Override the auto-selected template title with a specific topic/title */
|
||||||
|
custom_title?: string;
|
||||||
|
/** Background context for the LLM — used as factual direction ONLY, never copied verbatim into the article */
|
||||||
|
additional_context?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
const selectedTopic = topic || "tutorial";
|
const selectedTopic = topic || "tutorial";
|
||||||
@ -1400,7 +1432,8 @@ blogRouter.post("/generate", async (req: Request, res: Response) => {
|
|||||||
const year = new Date().getFullYear();
|
const year = new Date().getFullYear();
|
||||||
const template = templates[Math.floor(Math.random() * templates.length)];
|
const template = templates[Math.floor(Math.random() * templates.length)];
|
||||||
|
|
||||||
const title = template.title
|
// custom_title overrides the template title — LLM will still generate a better headline at the end
|
||||||
|
const title = custom_title || template.title
|
||||||
.replace("{YEAR}", String(year))
|
.replace("{YEAR}", String(year))
|
||||||
.replace("{SPEED}", speed || "400G/800G")
|
.replace("{SPEED}", speed || "400G/800G")
|
||||||
.replace("{FORM_FACTOR}", form_factor || "QSFP-DD/OSFP")
|
.replace("{FORM_FACTOR}", form_factor || "QSFP-DD/OSFP")
|
||||||
@ -1453,7 +1486,7 @@ blogRouter.post("/generate", async (req: Request, res: Response) => {
|
|||||||
console.log(`Blog LLM: Using ${health.model} — enhancing draft ${draftId} in background`);
|
console.log(`Blog LLM: Using ${health.model} — enhancing draft ${draftId} in background`);
|
||||||
llmStarted = true;
|
llmStarted = true;
|
||||||
// Fire-and-forget: LLM pipeline queued, updates draft when done
|
// Fire-and-forget: LLM pipeline queued, updates draft when done
|
||||||
enqueueLlmPipeline(draftId, title, selectedTopic, template.target_audience, data).catch((err) => {
|
enqueueLlmPipeline(draftId, title, selectedTopic, template.target_audience, data, additional_context).catch((err) => {
|
||||||
console.error(`Blog LLM background pipeline error: ${(err as Error).message}`);
|
console.error(`Blog LLM background pipeline error: ${(err as Error).message}`);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -1492,7 +1525,8 @@ blogRouter.post("/generate", async (req: Request, res: Response) => {
|
|||||||
blogRouter.get("/", async (_req: Request, res: Response) => {
|
blogRouter.get("/", async (_req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
const result = await pool.query(
|
const result = await pool.query(
|
||||||
`SELECT id, title, topic, target_audience, status, word_count, seo_keywords, generated_by, created_at
|
`SELECT id, title, topic, target_audience, status, word_count, seo_keywords, generated_by,
|
||||||
|
pipeline_steps_completed, linkedin_post, linkedin_char_count, review_tag, created_at
|
||||||
FROM blog_drafts
|
FROM blog_drafts
|
||||||
ORDER BY created_at DESC
|
ORDER BY created_at DESC
|
||||||
LIMIT 50`,
|
LIMIT 50`,
|
||||||
@ -1575,6 +1609,28 @@ blogRouter.put("/:id/status", async (req: Request, res: Response) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// PUT /api/blog/:id/review — Toggle reviewed/unreviewed tag
|
||||||
|
blogRouter.put("/:id/review", async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const current = await pool.query(
|
||||||
|
`SELECT review_tag FROM blog_drafts WHERE id = $1::uuid`,
|
||||||
|
[req.params.id]
|
||||||
|
);
|
||||||
|
if (current.rows.length === 0) {
|
||||||
|
res.status(404).json({ success: false, error: "Draft not found" });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const newTag = current.rows[0].review_tag === "reviewed" ? null : "reviewed";
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE blog_drafts SET review_tag = $1, updated_at = NOW() WHERE id = $2::uuid`,
|
||||||
|
[newTag, req.params.id]
|
||||||
|
);
|
||||||
|
res.json({ success: true, review_tag: newTag });
|
||||||
|
} catch (err) {
|
||||||
|
res.status(500).json({ success: false, error: (err as Error).message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// ═══════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════
|
||||||
// FEEDBACK SYSTEM (v0.2.0 — FO_Blog_LLM Training Loop)
|
// FEEDBACK SYSTEM (v0.2.0 — FO_Blog_LLM Training Loop)
|
||||||
// ═══════════════════════════════════════════════════════
|
// ═══════════════════════════════════════════════════════
|
||||||
|
|||||||
@ -23,6 +23,19 @@ healthRouter.get("/", async (_req: Request, res: Response) => {
|
|||||||
`).catch(() => ({ rows: [{}] }));
|
`).catch(() => ({ rows: [{}] }));
|
||||||
const v = verStats.rows[0] || {};
|
const v = verStats.rows[0] || {};
|
||||||
|
|
||||||
|
// Stock observations stats
|
||||||
|
const stockStats = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) AS total_observations,
|
||||||
|
COUNT(DISTINCT transceiver_id) AS transceivers_with_stock,
|
||||||
|
COUNT(DISTINCT source_vendor_id) AS vendors_with_stock,
|
||||||
|
SUM(warehouse_de_qty) FILTER (WHERE warehouse_de_qty > 0) AS total_de_qty,
|
||||||
|
SUM(warehouse_global_qty) FILTER (WHERE warehouse_global_qty > 0) AS total_global_qty,
|
||||||
|
MAX(time) AS last_observation_at
|
||||||
|
FROM stock_observations
|
||||||
|
`).catch(() => ({ rows: [{}] }));
|
||||||
|
const s = stockStats.rows[0] || {};
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
success: true,
|
success: true,
|
||||||
status: "healthy",
|
status: "healthy",
|
||||||
@ -34,13 +47,21 @@ healthRouter.get("/", async (_req: Request, res: Response) => {
|
|||||||
stats,
|
stats,
|
||||||
},
|
},
|
||||||
verification: {
|
verification: {
|
||||||
price_verified: Number(v.price_verified || 0),
|
price_verified: Number(v.price_verified || 0),
|
||||||
image_verified: Number(v.image_verified || 0),
|
image_verified: Number(v.image_verified || 0),
|
||||||
details_verified: Number(v.details_verified || 0),
|
details_verified: Number(v.details_verified || 0),
|
||||||
fully_verified: Number(v.fully_verified || 0),
|
fully_verified: Number(v.fully_verified || 0),
|
||||||
total: Number(v.total || 0),
|
total: Number(v.total || 0),
|
||||||
price_coverage_pct: v.total ? Math.round(Number(v.price_verified) / Number(v.total) * 100) : 0,
|
price_coverage_pct: v.total ? Math.round(Number(v.price_verified) / Number(v.total) * 100) : 0,
|
||||||
fully_verified_pct: v.total ? Math.round(Number(v.fully_verified) / Number(v.total) * 100) : 0,
|
fully_verified_pct: v.total ? Math.round(Number(v.fully_verified) / Number(v.total) * 100) : 0,
|
||||||
|
},
|
||||||
|
stock: {
|
||||||
|
total_observations: Number(s.total_observations || 0),
|
||||||
|
transceivers_with_stock: Number(s.transceivers_with_stock || 0),
|
||||||
|
vendors_with_stock: Number(s.vendors_with_stock || 0),
|
||||||
|
total_de_qty: Number(s.total_de_qty || 0),
|
||||||
|
total_global_qty: Number(s.total_global_qty || 0),
|
||||||
|
last_observation_at: s.last_observation_at ?? null,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
|||||||
@ -24,6 +24,7 @@ interface HotTopic {
|
|||||||
source_type: "conference" | "manufacturer" | "trade_press" | "research" | "internal_data" | "competitor";
|
source_type: "conference" | "manufacturer" | "trade_press" | "research" | "internal_data" | "competitor";
|
||||||
data_context?: Record<string, unknown>;
|
data_context?: Record<string, unknown>;
|
||||||
suggested_angle?: string;
|
suggested_angle?: string;
|
||||||
|
date?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -57,6 +58,7 @@ hotTopicsRouter.get("/", async (_req, res) => {
|
|||||||
source_type: "competitor",
|
source_type: "competitor",
|
||||||
data_context: drop,
|
data_context: drop,
|
||||||
suggested_angle: `Price war analysis: Why ${drop.vendor} is cutting ${drop.speed_gbps}G pricing and what it means for procurement`,
|
suggested_angle: `Price war analysis: Why ${drop.vendor} is cutting ${drop.speed_gbps}G pricing and what it means for procurement`,
|
||||||
|
date: drop.detected_at ? new Date(drop.detected_at).toISOString() : undefined,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -82,6 +84,7 @@ hotTopicsRouter.get("/", async (_req, res) => {
|
|||||||
source_type: "internal_data",
|
source_type: "internal_data",
|
||||||
data_context: { products: newProducts.rows },
|
data_context: { products: newProducts.rows },
|
||||||
suggested_angle: `Competitor roundup: What ${vendors[0]} and others just launched — and what it means for your next PO`,
|
suggested_angle: `Competitor roundup: What ${vendors[0]} and others just launched — and what it means for your next PO`,
|
||||||
|
date: newProducts.rows[0]?.created_at ? new Date(newProducts.rows[0].created_at).toISOString() : undefined,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -157,6 +160,7 @@ hotTopicsRouter.get("/", async (_req, res) => {
|
|||||||
impact_months: m.impact_horizon_months,
|
impact_months: m.impact_horizon_months,
|
||||||
},
|
},
|
||||||
suggested_angle: `${m.title}: ${angle}`,
|
suggested_angle: `${m.title}: ${angle}`,
|
||||||
|
date: m.published_at ? new Date(m.published_at).toISOString() : undefined,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -192,6 +196,7 @@ hotTopicsRouter.get("/", async (_req, res) => {
|
|||||||
source_type: "conference",
|
source_type: "conference",
|
||||||
data_context: { talks: (talks as NogRow[]).slice(0, 3) },
|
data_context: { talks: (talks as NogRow[]).slice(0, 3) },
|
||||||
suggested_angle: `What ${event} presenters are actually deploying — lessons for your network refresh`,
|
suggested_angle: `What ${event} presenters are actually deploying — lessons for your network refresh`,
|
||||||
|
date: topTalk.published_at ? new Date(topTalk.published_at).toISOString() : undefined,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -226,6 +231,7 @@ hotTopicsRouter.get("/", async (_req, res) => {
|
|||||||
source_type: "trade_press",
|
source_type: "trade_press",
|
||||||
data_context: { articles: articles.slice(0, 3) },
|
data_context: { articles: articles.slice(0, 3) },
|
||||||
suggested_angle: `${theme}: What the latest announcements actually mean for network operators`,
|
suggested_angle: `${theme}: What the latest announcements actually mean for network operators`,
|
||||||
|
date: articles[0]?.published_at ? new Date(articles[0].published_at).toISOString() : undefined,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -21,6 +21,7 @@ transceiverRouter.get("/", async (req: Request, res: Response) => {
|
|||||||
coherent: q("coherent") === "true" ? true : q("coherent") === "false" ? false : undefined,
|
coherent: q("coherent") === "true" ? true : q("coherent") === "false" ? false : undefined,
|
||||||
market_status: q("market_status"),
|
market_status: q("market_status"),
|
||||||
vendor: q("vendor"),
|
vendor: q("vendor"),
|
||||||
|
verified: q("verified") as "price" | "image" | "details" | "full" | undefined,
|
||||||
limit: q("limit") ? parseInt(q("limit")!) : 50,
|
limit: q("limit") ? parseInt(q("limit")!) : 50,
|
||||||
offset: q("offset") ? parseInt(q("offset")!) : 0,
|
offset: q("offset") ? parseInt(q("offset")!) : 0,
|
||||||
});
|
});
|
||||||
@ -141,9 +142,31 @@ transceiverRouter.get("/:id", async (req: Request, res: Response) => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Last time ANY competitor scraper looked at this transceiver (regardless of result)
|
||||||
|
const lastScanResult = await pool.query(
|
||||||
|
`SELECT MAX(po.time) AS last_scan
|
||||||
|
FROM price_observations po
|
||||||
|
JOIN vendors v ON po.source_vendor_id = v.id
|
||||||
|
WHERE po.transceiver_id = $1
|
||||||
|
AND v.is_competitor = true`,
|
||||||
|
[transceiver.id]
|
||||||
|
);
|
||||||
|
const lastCompetitorScan = lastScanResult.rows[0]?.last_scan ?? null;
|
||||||
|
|
||||||
|
// Has any competitor ever listed a price for this exact product?
|
||||||
|
const competitorHasProduct = prices.some(
|
||||||
|
(p) => p.vendor_type !== "flexoptix" && p.price > 0
|
||||||
|
);
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
success: true,
|
success: true,
|
||||||
data: { ...transceiver, competitor_prices: allPrices, price_anomaly: priceAnomaly },
|
data: {
|
||||||
|
...transceiver,
|
||||||
|
competitor_prices: allPrices,
|
||||||
|
price_anomaly: priceAnomaly,
|
||||||
|
last_competitor_scan: lastCompetitorScan,
|
||||||
|
competitor_has_product: competitorHasProduct,
|
||||||
|
},
|
||||||
});
|
});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error("Get transceiver error:", err);
|
console.error("Get transceiver error:", err);
|
||||||
|
|||||||
@ -207,10 +207,11 @@
|
|||||||
return '<div class="gen-card" style="cursor:pointer;border-left:3px solid ' + c + '" ' +
|
return '<div class="gen-card" style="cursor:pointer;border-left:3px solid ' + c + '" ' +
|
||||||
'onclick="window._generateFromHotTopic(\'' + cardId + '\')">' +
|
'onclick="window._generateFromHotTopic(\'' + cardId + '\')">' +
|
||||||
'<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px">' +
|
'<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:4px">' +
|
||||||
'<span style="font-size:0.65rem;text-transform:uppercase;font-weight:600;color:' + c + '">' + (t.urgency || '') + '</span>' +
|
'<span style="font-size:0.65rem;text-transform:uppercase;font-weight:600;color:' + c + '">' + (t.urgency || '') + (t.blog_title_created ? ' · <span style="background:#1b4332;color:#6ee7b7;font-size:0.6rem;padding:1px 5px;border-radius:3px;font-weight:700;text-transform:none">✓ Blog erstellt</span>' : '') + '</span>' +
|
||||||
'<span style="font-size:0.6rem;color:var(--text-dim)">' + (t.source_type || '') + ' · ' + (t.source || '') + '</span></div>' +
|
'<span style="font-size:0.6rem;color:var(--text-dim)">' + (t.source_type || '') + ' · ' + (t.source || '') + '</span></div>' +
|
||||||
'<div class="gen-card-title" style="font-size:0.85rem;line-height:1.3">' + (t.title || '') + '</div>' +
|
'<div class="gen-card-title" style="font-size:0.85rem;line-height:1.3">' + (t.title || '') + '</div>' +
|
||||||
'<div class="gen-card-sub" style="font-size:0.7rem;margin-top:4px;line-height:1.4">' + (t.suggested_angle || t.description || '').slice(0, 100) + '</div>' +
|
'<div class="gen-card-sub" style="font-size:0.7rem;margin-top:4px;line-height:1.4">' + (t.suggested_angle || t.description || '').slice(0, 100) + '</div>' +
|
||||||
|
(t.date ? '<div style="font-size:0.62rem;color:var(--text-dim);margin-top:5px">' + new Date(t.date).toLocaleDateString('de-DE', {day:'2-digit',month:'short',year:'numeric'}) + '</div>' : '') +
|
||||||
'</div>';
|
'</div>';
|
||||||
}).join('');
|
}).join('');
|
||||||
}).catch(function(err) {
|
}).catch(function(err) {
|
||||||
|
|||||||
@ -23,6 +23,8 @@ import { registerCompatibilityTools } from "./tools/compatibility.js";
|
|||||||
import { registerKnowledgeTools } from "./tools/knowledge.js";
|
import { registerKnowledgeTools } from "./tools/knowledge.js";
|
||||||
import { registerContentTools } from "./tools/content.js";
|
import { registerContentTools } from "./tools/content.js";
|
||||||
import { registerMarketTools } from "./tools/market.js";
|
import { registerMarketTools } from "./tools/market.js";
|
||||||
|
import { registerSwitchDocTools } from "./tools/switch-docs.js";
|
||||||
|
import { finderTools, handleFinderTool } from "./tools/finder.js";
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
const server = new McpServer({
|
const server = new McpServer({
|
||||||
@ -347,6 +349,184 @@ async function main() {
|
|||||||
await registerKnowledgeTools(server);
|
await registerKnowledgeTools(server);
|
||||||
await registerContentTools(server);
|
await registerContentTools(server);
|
||||||
await registerMarketTools(server);
|
await registerMarketTools(server);
|
||||||
|
await registerSwitchDocTools(server);
|
||||||
|
|
||||||
|
// --- Register finder.ts tools (find_flexoptix_for_switch, get_competitor_alerts) ---
|
||||||
|
for (const [toolName, toolDef] of Object.entries(finderTools)) {
|
||||||
|
const schema: Record<string, z.ZodTypeAny> = {};
|
||||||
|
for (const [propName, propDef] of Object.entries(toolDef.inputSchema.properties ?? {})) {
|
||||||
|
const p = propDef as { type: string; description?: string };
|
||||||
|
schema[propName] = p.type === "number"
|
||||||
|
? z.number().optional().describe(p.description ?? "")
|
||||||
|
: z.string().optional().describe(p.description ?? "");
|
||||||
|
}
|
||||||
|
server.tool(
|
||||||
|
toolName,
|
||||||
|
toolDef.description,
|
||||||
|
schema,
|
||||||
|
async (args) => {
|
||||||
|
const result = await handleFinderTool(toolName, args as Record<string, unknown>);
|
||||||
|
return { content: [{ type: "text" as const, text: result }] };
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Ollama LLM tools: market analysis (qwen2.5:14b) + blog generation (fo-blog-v5) ---
|
||||||
|
const OLLAMA_BASE = process.env["OLLAMA_BASE_URL"] ?? "https://ollama.fichtmueller.org";
|
||||||
|
|
||||||
|
server.tool(
|
||||||
|
"analyze_market_with_llm",
|
||||||
|
"Deep market analysis for a transceiver technology using local LLM (qwen2.5:14b). Provides expert narrative on adoption trends, pricing trajectory, competitive dynamics, and buy/wait/hold recommendation.",
|
||||||
|
{
|
||||||
|
technology: z.string().describe("Technology to analyze, e.g. '400G QSFP-DD', '800G OSFP', '100G ZR'"),
|
||||||
|
context: z.string().optional().describe("Additional context or specific questions to address"),
|
||||||
|
horizon: z.enum(["3m", "6m", "12m", "18m"]).default("12m").describe("Forecast horizon"),
|
||||||
|
},
|
||||||
|
async ({ technology, context, horizon }) => {
|
||||||
|
// Gather DB data to enrich the prompt
|
||||||
|
const [hype, prices, news] = await Promise.all([
|
||||||
|
pool.query(
|
||||||
|
`SELECT hype_phase, hype_score, ROUND(current_share*100,1) AS share_pct,
|
||||||
|
asp_current_usd, asp_decline_pct_3y, years_to_next_phase
|
||||||
|
FROM hype_cycle_analysis WHERE technology ILIKE $1
|
||||||
|
ORDER BY computed_at DESC LIMIT 1`,
|
||||||
|
[`%${technology}%`]
|
||||||
|
),
|
||||||
|
pool.query(
|
||||||
|
`SELECT v.name AS vendor, ROUND(MIN(po.price)::NUMERIC,2) AS min_price,
|
||||||
|
ROUND(MAX(po.price)::NUMERIC,2) AS max_price, po.currency
|
||||||
|
FROM price_observations po JOIN vendors v ON v.id = po.source_vendor_id
|
||||||
|
JOIN transceivers t ON t.id = po.transceiver_id
|
||||||
|
WHERE t.speed ILIKE $1 AND po.time > NOW() - INTERVAL '7 days'
|
||||||
|
GROUP BY v.name, po.currency ORDER BY min_price ASC LIMIT 10`,
|
||||||
|
[`%${technology.split("-")[0]}%`]
|
||||||
|
),
|
||||||
|
pool.query(
|
||||||
|
`SELECT title, summary, published_at FROM news_articles
|
||||||
|
WHERE content_vector @@ plainto_tsquery('english', $1)
|
||||||
|
ORDER BY published_at DESC LIMIT 5`,
|
||||||
|
[technology]
|
||||||
|
).catch(() => ({ rows: [] })),
|
||||||
|
]);
|
||||||
|
|
||||||
|
const dataContext = [
|
||||||
|
hype.rows[0] ? `Hype Cycle: phase=${hype.rows[0].hype_phase}, score=${hype.rows[0].hype_score}/100, market_share=${hype.rows[0].share_pct}%, OEM_ASP=$${hype.rows[0].asp_current_usd}, ASP_decline_3y=${hype.rows[0].asp_decline_pct_3y}%, years_to_next_phase=${hype.rows[0].years_to_next_phase}` : "",
|
||||||
|
prices.rows.length > 0 ? `Current pricing: ${prices.rows.map((r) => `${r.vendor} ${r.currency}${r.min_price}–${r.max_price}`).join(", ")}` : "",
|
||||||
|
news.rows.length > 0 ? `Recent news: ${news.rows.map((r: {title:string}) => r.title).join(" | ")}` : "",
|
||||||
|
].filter(Boolean).join("\n");
|
||||||
|
|
||||||
|
const prompt = `You are a senior optical networking market analyst at a transceiver intelligence platform.
|
||||||
|
|
||||||
|
Technology: ${technology}
|
||||||
|
Forecast horizon: ${horizon}
|
||||||
|
${dataContext ? `\nLive data:\n${dataContext}` : ""}
|
||||||
|
${context ? `\nSpecific questions: ${context}` : ""}
|
||||||
|
|
||||||
|
Provide a concise expert market analysis covering:
|
||||||
|
1. Current market phase and what it means for buyers/sellers
|
||||||
|
2. Price trajectory over the next ${horizon} — will prices rise, fall, or stabilize?
|
||||||
|
3. Key demand drivers and risks
|
||||||
|
4. Competitive dynamics (OEM vs compatible vendors)
|
||||||
|
5. Buy / Wait / Hold recommendation with reasoning
|
||||||
|
|
||||||
|
Keep the analysis actionable and data-driven. Under 400 words.`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const resp = await fetch(`${OLLAMA_BASE}/api/generate`, {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({ model: "qwen2.5:14b", prompt, stream: false }),
|
||||||
|
signal: AbortSignal.timeout(120_000),
|
||||||
|
});
|
||||||
|
if (!resp.ok) throw new Error(`Ollama HTTP ${resp.status}`);
|
||||||
|
const data = await resp.json() as { response?: string };
|
||||||
|
return { content: [{ type: "text" as const, text: data.response ?? "No response from model." }] };
|
||||||
|
} catch (err: unknown) {
|
||||||
|
return { content: [{ type: "text" as const, text: `LLM unavailable: ${(err as Error).message}. Use /api/hype-cycle/analysis for raw data.` }] };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
server.tool(
|
||||||
|
"generate_blog_post",
|
||||||
|
"Generate a professional Flexoptix blog post using the fine-tuned fo-blog-v5 model (Ollama). Automatically enriched with live pricing, hype cycle data, and competitor analysis.",
|
||||||
|
{
|
||||||
|
topic: z.string().describe("Blog topic, e.g. '400G QSFP-DD vs 400G ZR — which for your DC?'"),
|
||||||
|
target_audience: z.enum(["network_engineer", "procurement", "executive", "general"]).default("network_engineer").describe("Target reader"),
|
||||||
|
tone: z.enum(["technical", "consultative", "educational"]).default("consultative").describe("Writing tone"),
|
||||||
|
word_count: z.number().default(600).describe("Target word count (300–1000)"),
|
||||||
|
},
|
||||||
|
async ({ topic, target_audience, tone, word_count }) => {
|
||||||
|
// Gather enrichment data
|
||||||
|
const keywords = topic.match(/\b(\d+G|QSFP|SFP|OSFP|ZR|SR|LR|ER)\b/gi) ?? [];
|
||||||
|
const priceData = keywords.length > 0 ? await pool.query(
|
||||||
|
`SELECT v.name AS vendor, t.form_factor, t.speed,
|
||||||
|
ROUND(MIN(po.price)::NUMERIC,2) AS min_price, po.currency
|
||||||
|
FROM price_observations po JOIN vendors v ON v.id = po.source_vendor_id
|
||||||
|
JOIN transceivers t ON t.id = po.transceiver_id
|
||||||
|
WHERE t.speed ILIKE ANY($1) AND po.time > NOW() - INTERVAL '7 days'
|
||||||
|
GROUP BY v.name, t.form_factor, t.speed, po.currency ORDER BY min_price ASC LIMIT 8`,
|
||||||
|
[keywords.map((k: string) => `%${k}%`)]
|
||||||
|
).catch(() => ({ rows: [] })) : { rows: [] };
|
||||||
|
|
||||||
|
const enrichment = priceData.rows.length > 0
|
||||||
|
? `\nCurrent market prices (use naturally in article):\n${priceData.rows.map((r: {vendor:string;form_factor:string;speed:string;min_price:string;currency:string}) => `- ${r.form_factor} ${r.speed}: from ${r.currency}${r.min_price} at ${r.vendor}`).join("\n")}`
|
||||||
|
: "";
|
||||||
|
|
||||||
|
const systemPrompt = `You are a professional technical writer for Flexoptix, Europe's leading transceiver specialist. Write in a ${tone} tone for a ${target_audience.replace(/_/g," ")} audience. Articles should highlight Flexoptix expertise and the value of our FlexBox universal coding solution.`;
|
||||||
|
|
||||||
|
const userPrompt = `Write a ${word_count}-word blog post on: "${topic}"
|
||||||
|
${enrichment}
|
||||||
|
|
||||||
|
Include:
|
||||||
|
- Compelling introduction
|
||||||
|
- Technical explanation appropriate for audience
|
||||||
|
- Real pricing context where available
|
||||||
|
- Call-to-action mentioning Flexoptix or FlexBox
|
||||||
|
- SEO-friendly subheadings
|
||||||
|
|
||||||
|
Do not include a title (added separately). Start directly with the article body.`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const resp = await fetch(`${OLLAMA_BASE}/api/chat`, {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: "fo-blog-v5",
|
||||||
|
messages: [
|
||||||
|
{ role: "system", content: systemPrompt },
|
||||||
|
{ role: "user", content: userPrompt },
|
||||||
|
],
|
||||||
|
stream: false,
|
||||||
|
}),
|
||||||
|
signal: AbortSignal.timeout(180_000),
|
||||||
|
});
|
||||||
|
if (!resp.ok) {
|
||||||
|
// Fallback to qwen2.5:14b if fo-blog-v5 not available
|
||||||
|
const fallbackResp = await fetch(`${OLLAMA_BASE}/api/chat`, {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: "qwen2.5:14b",
|
||||||
|
messages: [
|
||||||
|
{ role: "system", content: systemPrompt },
|
||||||
|
{ role: "user", content: userPrompt },
|
||||||
|
],
|
||||||
|
stream: false,
|
||||||
|
}),
|
||||||
|
signal: AbortSignal.timeout(180_000),
|
||||||
|
});
|
||||||
|
if (!fallbackResp.ok) throw new Error(`Both fo-blog-v5 and qwen2.5:14b unavailable`);
|
||||||
|
const fallbackData = await fallbackResp.json() as { message?: { content?: string } };
|
||||||
|
return { content: [{ type: "text" as const, text: `[Generated with qwen2.5:14b — fo-blog-v5 unavailable]\n\n${fallbackData.message?.content ?? "No content"}` }] };
|
||||||
|
}
|
||||||
|
const data = await resp.json() as { message?: { content?: string } };
|
||||||
|
return { content: [{ type: "text" as const, text: data.message?.content ?? "No content generated." }] };
|
||||||
|
} catch (err: unknown) {
|
||||||
|
return { content: [{ type: "text" as const, text: `LLM unavailable: ${(err as Error).message}. Check OLLAMA_BASE_URL env var.` }] };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
// Start server
|
// Start server
|
||||||
const transport = new StdioServerTransport();
|
const transport = new StdioServerTransport();
|
||||||
|
|||||||
@ -1,34 +1,50 @@
|
|||||||
/**
|
/**
|
||||||
* Ascent Optics Scraper — US-based compatible transceiver vendor
|
* Ascent Optics Scraper — Chinese OEM transceiver manufacturer
|
||||||
*
|
*
|
||||||
* ascentoptics.com — product catalog with USD prices.
|
* ascentoptics.com — product catalog loaded via JSON API endpoint.
|
||||||
* Tries /catalog/ and /products/ as entry points.
|
* Products are served via /product-list?is_render=1&category_id=CID
|
||||||
|
* (HTML table in JSON response). No retail pricing — "Get Quote" model.
|
||||||
|
* Category IDs are discovered from data-cid attributes on sub-category pages.
|
||||||
*
|
*
|
||||||
* Rate limited: 1 req/2sec.
|
* Rate limited: 1 req/2sec.
|
||||||
*/
|
*/
|
||||||
import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db";
|
import { pool, findOrCreateScrapedTransceiver, ensureVendor } from "../utils/db";
|
||||||
import { contentHash } from "../utils/hash";
|
|
||||||
import * as cheerio from "cheerio";
|
import * as cheerio from "cheerio";
|
||||||
|
|
||||||
const BASE = "https://ascentoptics.com";
|
const BASE = "https://ascentoptics.com";
|
||||||
const CATALOG_URLS = [
|
|
||||||
"/catalog/",
|
|
||||||
"/products/",
|
|
||||||
"/products/transceivers/",
|
|
||||||
"/catalog/transceivers/",
|
|
||||||
];
|
|
||||||
const MAX_PAGES = 15;
|
|
||||||
const HEADERS = {
|
const HEADERS = {
|
||||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||||
Accept: "text/html,application/xhtml+xml",
|
Accept: "text/html,application/xhtml+xml,application/json,*/*;q=0.8",
|
||||||
"Accept-Language": "en-US,en;q=0.9",
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
|
"X-Requested-With": "XMLHttpRequest",
|
||||||
|
Referer: "https://ascentoptics.com/optical-transceivers/",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Known transceiver categories: slug → { category_id, formFactor, speedGbps, speed }
|
||||||
|
// DAC/AOC/DCO/LPO excluded — transceivers only
|
||||||
|
const CATEGORIES = [
|
||||||
|
{ slug: "/10g-sfp/", categoryId: 33, formFactor: "SFP+", speed: "10G", speedGbps: 10 },
|
||||||
|
{ slug: "/10g-xfp/", categoryId: 34, formFactor: "XFP", speed: "10G", speedGbps: 10 },
|
||||||
|
{ slug: "/25g-sfp28/", categoryId: 22, formFactor: "SFP28", speed: "25G", speedGbps: 25 },
|
||||||
|
{ slug: "/40g-qsfp/", categoryId: 20, formFactor: "QSFP+", speed: "40G", speedGbps: 40 },
|
||||||
|
{ slug: "/100g-qsfp28/", categoryId: 15, formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
|
||||||
|
{ slug: "/100g-sfp112/", categoryId: 0, formFactor: "SFP112", speed: "100G", speedGbps: 100 },
|
||||||
|
{ slug: "/200g-qsfp56/", categoryId: 3, formFactor: "QSFP56", speed: "200G", speedGbps: 200 },
|
||||||
|
{ slug: "/200g-qsfp28-dd/", categoryId: 4, formFactor: "QSFP-DD", speed: "200G", speedGbps: 200 },
|
||||||
|
{ slug: "/400g-qsfp56-dd/", categoryId: 5, formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
|
||||||
|
{ slug: "/400g-osfp/", categoryId: 6, formFactor: "OSFP", speed: "400G", speedGbps: 400 },
|
||||||
|
{ slug: "/400g-qsfp112/", categoryId: 7, formFactor: "QSFP112", speed: "400G", speedGbps: 400 },
|
||||||
|
{ slug: "/800g-osfp/", categoryId: 9, formFactor: "OSFP", speed: "800G", speedGbps: 800 },
|
||||||
|
{ slug: "/800g-qsfp-dd800-200g-per-line/", categoryId: 121, formFactor: "QSFP-DD", speed: "800G", speedGbps: 800 },
|
||||||
|
{ slug: "/800g-qsfp112-dd/", categoryId: 0, formFactor: "QSFP112", speed: "800G", speedGbps: 800 },
|
||||||
|
{ slug: "/50g-sfp56/", categoryId: 0, formFactor: "SFP56", speed: "50G", speedGbps: 50 },
|
||||||
|
{ slug: "/16g-sfp/", categoryId: 0, formFactor: "SFP", speed: "16G", speedGbps: 16 },
|
||||||
|
];
|
||||||
|
|
||||||
interface Product {
|
interface Product {
|
||||||
partNumber: string;
|
partNumber: string;
|
||||||
name: string;
|
name: string;
|
||||||
url: string;
|
url: string;
|
||||||
price?: number;
|
|
||||||
formFactor: string;
|
formFactor: string;
|
||||||
speed: string;
|
speed: string;
|
||||||
speedGbps: number;
|
speedGbps: number;
|
||||||
@ -42,37 +58,29 @@ function sleep(ms: number): Promise<void> {
|
|||||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||||
}
|
}
|
||||||
|
|
||||||
function detectFormFactor(text: string): { formFactor: string; speed: string; speedGbps: number } {
|
|
||||||
const lower = text.toLowerCase();
|
|
||||||
if (lower.includes("osfp") && !lower.includes("qsfp")) return { formFactor: "OSFP", speed: "400G", speedGbps: 400 };
|
|
||||||
if (lower.includes("qsfp-dd")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
|
||||||
if (lower.includes("qsfp28")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
|
|
||||||
if (lower.includes("qsfp+") || lower.includes("qsfp plus")) return { formFactor: "QSFP+", speed: "40G", speedGbps: 40 };
|
|
||||||
if (lower.includes("sfp56")) return { formFactor: "SFP56", speed: "50G", speedGbps: 50 };
|
|
||||||
if (lower.includes("sfp28") || lower.includes("25g")) return { formFactor: "SFP28", speed: "25G", speedGbps: 25 };
|
|
||||||
if (lower.includes("sfp+") || lower.includes("10gbase") || lower.includes("10g")) return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
|
||||||
if (lower.includes("xfp")) return { formFactor: "XFP", speed: "10G", speedGbps: 10 };
|
|
||||||
if (lower.includes("1000base") || lower.includes("1g")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
|
||||||
if (lower.includes("sfp") && !lower.includes("qsfp")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
|
||||||
return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
|
||||||
}
|
|
||||||
|
|
||||||
function detectReach(text: string): { label: string; meters: number } | undefined {
|
function detectReach(text: string): { label: string; meters: number } | undefined {
|
||||||
|
const t = text.toUpperCase();
|
||||||
const patterns: [RegExp, string, number][] = [
|
const patterns: [RegExp, string, number][] = [
|
||||||
[/\b80\s*km\b/i, "80km", 80000],
|
[/\b120\s*KM\b/, "120km", 120000],
|
||||||
[/\b40\s*km\b/i, "40km", 40000],
|
[/\b80\s*KM\b/, "80km", 80000],
|
||||||
[/\b20\s*km\b/i, "20km", 20000],
|
[/\b70\s*KM\b/, "70km", 70000],
|
||||||
[/\b10\s*km\b/i, "10km", 10000],
|
[/\b60\s*KM\b/, "60km", 60000],
|
||||||
[/\b2\s*km\b/i, "2km", 2000],
|
[/\b40\s*KM\b/, "40km", 40000],
|
||||||
[/\b550\s*m\b/i, "550m", 550],
|
[/\b20\s*KM\b/, "20km", 20000],
|
||||||
[/\b300\s*m\b/i, "300m", 300],
|
[/\b10\s*KM\b/, "10km", 10000],
|
||||||
[/\b100\s*m\b/i, "100m", 100],
|
[/\b5\s*KM\b/, "5km", 5000],
|
||||||
|
[/\b2\s*KM\b/, "2km", 2000],
|
||||||
|
[/\b550\s*M\b/, "550m", 550],
|
||||||
|
[/\b500\s*M\b/, "500m", 500],
|
||||||
|
[/\b300\s*M\b/, "300m", 300],
|
||||||
|
[/\b220\s*M\b/, "220m", 220],
|
||||||
|
[/\b100\s*M\b/, "100m", 100],
|
||||||
[/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000],
|
[/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000],
|
||||||
[/\bER4?\b/, "40km", 40000], [/\bZR4?\b/, "80km", 80000],
|
[/\bER\b/, "40km", 40000], [/\bZR\b/, "80km", 80000],
|
||||||
[/\bSR4?\b/, "300m", 300], [/\bDR4?\b/, "500m", 500], [/\bFR4?\b/, "2km", 2000],
|
[/\bSR\b/, "300m", 300], [/\bDR\b/, "500m", 500], [/\bFR\b/, "2km", 2000],
|
||||||
];
|
];
|
||||||
for (const [regex, label, meters] of patterns) {
|
for (const [regex, label, meters] of patterns) {
|
||||||
if (regex.test(text)) return { label, meters };
|
if (regex.test(t)) return { label, meters };
|
||||||
}
|
}
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
@ -89,96 +97,81 @@ function detectWavelength(text: string): string {
|
|||||||
return match ? match[1] : "";
|
return match ? match[1] : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseProductList(html: string, sourceUrl: string): Product[] {
|
/** Fetch category ID from a sub-page's data-cid attribute (fallback for categoryId=0 entries) */
|
||||||
const $ = cheerio.load(html);
|
async function fetchCategoryId(slug: string): Promise<number> {
|
||||||
const products: Product[] = [];
|
try {
|
||||||
|
const resp = await fetch(BASE + slug, {
|
||||||
const cardSelectors = [
|
headers: { ...HEADERS, "X-Requested-With": "" },
|
||||||
".product-item", ".product", ".item", "li.product",
|
signal: AbortSignal.timeout(15000),
|
||||||
".product-card", "tr", "article", ".catalog-item",
|
|
||||||
".product-list-item", ".result",
|
|
||||||
];
|
|
||||||
|
|
||||||
for (const sel of cardSelectors) {
|
|
||||||
if ($(sel).length >= 2) {
|
|
||||||
$(sel).each((_i, el) => {
|
|
||||||
const text = $(el).text().trim();
|
|
||||||
if (!/sfp|qsfp|xfp|transceiver|optic/i.test(text)) return;
|
|
||||||
|
|
||||||
const nameEl = $(el).find("h2, h3, h4, .name, .product-name, .title, td, a").first();
|
|
||||||
const name = nameEl.text().trim() || text.slice(0, 120);
|
|
||||||
if (!name || name.length < 5) return;
|
|
||||||
|
|
||||||
const linkEl = $(el).find("a[href]").first();
|
|
||||||
const href = linkEl.attr("href") || sourceUrl;
|
|
||||||
const url = href.startsWith("http") ? href : BASE + href;
|
|
||||||
|
|
||||||
// Ascent Optics part numbers: e.g. AS-SFP-10G-SR, SFP-10G-LR-AS
|
|
||||||
const partNumMatch = name.match(/\b(AS[-_][A-Z0-9-]+)\b/i) ||
|
|
||||||
name.match(/\b([A-Z]{2,}[-][A-Z0-9]+[-][A-Z0-9]+[-][A-Z0-9]+)\b/) ||
|
|
||||||
text.match(/Part\s*(?:No\.?|Number|#)?\s*:?\s*([A-Z0-9-]{6,})/i);
|
|
||||||
const partNumber = partNumMatch?.[1] ||
|
|
||||||
name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] ||
|
|
||||||
name.replace(/\s+/g, "-").slice(0, 60);
|
|
||||||
|
|
||||||
// USD price
|
|
||||||
const priceText = $(el).find(".price, .product-price, .amount, [data-price]").text();
|
|
||||||
const priceMatch = priceText.match(/\$\s*([\d,]+\.?\d{0,2})/);
|
|
||||||
let price: number | undefined;
|
|
||||||
if (priceMatch) {
|
|
||||||
const parsed = parseFloat(priceMatch[1].replace(",", ""));
|
|
||||||
if (parsed > 0 && parsed < 50000) price = parsed;
|
|
||||||
}
|
|
||||||
|
|
||||||
const ff = detectFormFactor(name + " " + text);
|
|
||||||
const reach = detectReach(name + " " + text);
|
|
||||||
|
|
||||||
products.push({
|
|
||||||
partNumber,
|
|
||||||
name,
|
|
||||||
url,
|
|
||||||
price,
|
|
||||||
...ff,
|
|
||||||
reachLabel: reach?.label,
|
|
||||||
reachMeters: reach?.meters,
|
|
||||||
fiberType: detectFiber(name + " " + text),
|
|
||||||
wavelength: detectWavelength(name + " " + text),
|
|
||||||
});
|
|
||||||
});
|
|
||||||
if (products.length > 0) break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback: transceiver-relevant anchors
|
|
||||||
if (products.length === 0) {
|
|
||||||
$("a[href]").each((_i, el) => {
|
|
||||||
const name = $(el).text().trim();
|
|
||||||
const href = $(el).attr("href") || "";
|
|
||||||
if (name.length < 8 || name.length > 200 || !/sfp|qsfp|transceiver/i.test(name)) return;
|
|
||||||
const url = href.startsWith("http") ? href : BASE + href;
|
|
||||||
const ff = detectFormFactor(name);
|
|
||||||
const reach = detectReach(name);
|
|
||||||
products.push({
|
|
||||||
partNumber: name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] || name.replace(/\s+/g, "-").slice(0, 60),
|
|
||||||
name, url, ...ff,
|
|
||||||
reachLabel: reach?.label, reachMeters: reach?.meters,
|
|
||||||
fiberType: detectFiber(name), wavelength: detectWavelength(name),
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
if (!resp.ok) return 0;
|
||||||
|
const html = await resp.text();
|
||||||
|
const m = html.match(/data-cid="(\d+)"/);
|
||||||
|
return m ? parseInt(m[1]) : 0;
|
||||||
|
} catch {
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
const seen = new Set<string>();
|
|
||||||
return products.filter((p) => {
|
|
||||||
if (!p.url || seen.has(p.url)) return false;
|
|
||||||
seen.add(p.url);
|
|
||||||
return true;
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchPage(url: string): Promise<string> {
|
/** Fetch product table HTML for a category via the JSON API */
|
||||||
const resp = await fetch(url, { headers: HEADERS, signal: AbortSignal.timeout(30000) });
|
async function fetchProductTable(categoryId: number, slug: string): Promise<string> {
|
||||||
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
|
const url = `${BASE}/product-list?is_render=1&category_id=${categoryId}`;
|
||||||
return resp.text();
|
const resp = await fetch(url, {
|
||||||
|
headers: { ...HEADERS, Referer: BASE + slug },
|
||||||
|
signal: AbortSignal.timeout(30000),
|
||||||
|
});
|
||||||
|
if (!resp.ok) throw new Error(`HTTP ${resp.status} for /product-list?category_id=${categoryId}`);
|
||||||
|
const data = await resp.json() as { product_table_list?: string };
|
||||||
|
return data.product_table_list ?? "";
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Parse HTML table returned by /product-list API */
|
||||||
|
function parseProductTable(
|
||||||
|
tableHtml: string,
|
||||||
|
cat: typeof CATEGORIES[number],
|
||||||
|
): Product[] {
|
||||||
|
const $ = cheerio.load(tableHtml);
|
||||||
|
const products: Product[] = [];
|
||||||
|
|
||||||
|
$("tr").each((_i, row) => {
|
||||||
|
const cells = $(row).find("td");
|
||||||
|
if (cells.length < 3) return;
|
||||||
|
|
||||||
|
// Column layout: [image] [part_number] [description] [data_rate] [distance] [wavelength] [connector] [datasheet] [quote]
|
||||||
|
const partNumberCell = $(cells[1]);
|
||||||
|
const descCell = $(cells[2]);
|
||||||
|
|
||||||
|
const rawPart = partNumberCell.text().trim();
|
||||||
|
const desc = descCell.text().trim();
|
||||||
|
|
||||||
|
// Skip header rows and non-product rows
|
||||||
|
if (!rawPart || rawPart.length < 3 || /part\s*no|description/i.test(rawPart)) return;
|
||||||
|
if (rawPart.length > 80) return;
|
||||||
|
|
||||||
|
const url = (() => {
|
||||||
|
const a = partNumberCell.find("a[href]").first().attr("href") ?? descCell.find("a[href]").first().attr("href");
|
||||||
|
if (!a) return BASE + cat.slug;
|
||||||
|
return a.startsWith("http") ? a : BASE + a;
|
||||||
|
})();
|
||||||
|
|
||||||
|
const combined = `${rawPart} ${desc}`;
|
||||||
|
const reach = detectReach(combined);
|
||||||
|
|
||||||
|
products.push({
|
||||||
|
partNumber: rawPart,
|
||||||
|
name: desc || rawPart,
|
||||||
|
url,
|
||||||
|
formFactor: cat.formFactor,
|
||||||
|
speed: cat.speed,
|
||||||
|
speedGbps: cat.speedGbps,
|
||||||
|
reachLabel: reach?.label,
|
||||||
|
reachMeters: reach?.meters,
|
||||||
|
fiberType: detectFiber(combined),
|
||||||
|
wavelength: detectWavelength(combined),
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return products;
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function scrapeAscentOptics(): Promise<void> {
|
export async function scrapeAscentOptics(): Promise<void> {
|
||||||
@ -188,89 +181,58 @@ export async function scrapeAscentOptics(): Promise<void> {
|
|||||||
"Ascent Optics",
|
"Ascent Optics",
|
||||||
"compatible",
|
"compatible",
|
||||||
"https://ascentoptics.com",
|
"https://ascentoptics.com",
|
||||||
BASE + CATALOG_URLS[0],
|
BASE + "/optical-transceivers/",
|
||||||
);
|
);
|
||||||
|
|
||||||
const allProducts: Product[] = [];
|
let totalProducts = 0;
|
||||||
const seenUrls = new Set<string>();
|
|
||||||
const triedUrls = new Set<string>();
|
|
||||||
|
|
||||||
for (const catalogPath of CATALOG_URLS) {
|
for (const cat of CATEGORIES) {
|
||||||
const catalogUrl = BASE + catalogPath;
|
let cid = cat.categoryId;
|
||||||
if (triedUrls.has(catalogUrl)) continue;
|
|
||||||
triedUrls.add(catalogUrl);
|
|
||||||
|
|
||||||
console.log(` Fetching catalog: ${catalogUrl}`);
|
// Resolve unknown category IDs dynamically
|
||||||
try {
|
if (cid === 0) {
|
||||||
const html = await fetchPage(catalogUrl);
|
cid = await fetchCategoryId(cat.slug);
|
||||||
const pageProducts = parseProductList(html, catalogUrl);
|
if (cid === 0) {
|
||||||
for (const p of pageProducts) {
|
console.log(` Skipping ${cat.slug} — category ID not found`);
|
||||||
if (!seenUrls.has(p.url)) { seenUrls.add(p.url); allProducts.push(p); }
|
await sleep(1000);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
console.log(` Found ${pageProducts.length} products`);
|
}
|
||||||
await sleep(2000);
|
|
||||||
|
|
||||||
// Paginate from each working catalog URL
|
console.log(`\n--- ${cat.formFactor} (${cat.speed}) [cid=${cid}] ---`);
|
||||||
for (let page = 2; page <= MAX_PAGES; page++) {
|
|
||||||
const pageUrl = `${catalogUrl}?page=${page}`;
|
try {
|
||||||
|
const tableHtml = await fetchProductTable(cid, cat.slug);
|
||||||
|
const products = parseProductTable(tableHtml, cat);
|
||||||
|
console.log(` Found ${products.length} products`);
|
||||||
|
|
||||||
|
for (const product of products) {
|
||||||
try {
|
try {
|
||||||
const pageHtml = await fetchPage(pageUrl);
|
await findOrCreateScrapedTransceiver({
|
||||||
const paginated = parseProductList(pageHtml, pageUrl);
|
partNumber: product.partNumber,
|
||||||
if (paginated.length === 0) break;
|
vendorId,
|
||||||
for (const p of paginated) {
|
formFactor: product.formFactor,
|
||||||
if (!seenUrls.has(p.url)) { seenUrls.add(p.url); allProducts.push(p); }
|
speedGbps: product.speedGbps,
|
||||||
}
|
speed: product.speed,
|
||||||
console.log(` Page ${page}: ${paginated.length} products`);
|
reachMeters: product.reachMeters,
|
||||||
await sleep(2000);
|
reachLabel: product.reachLabel,
|
||||||
} catch {
|
fiberType: product.fiberType,
|
||||||
break;
|
wavelengths: product.wavelength,
|
||||||
|
category: "DataCenter",
|
||||||
|
});
|
||||||
|
totalProducts++;
|
||||||
|
} catch (err) {
|
||||||
|
console.warn(` Error: ${(err as Error).message.slice(0, 80)}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.warn(` Failed: ${(err as Error).message}`);
|
console.error(` Category failed: ${(err as Error).message}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
await sleep(2000);
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`\nTotal unique products: ${allProducts.length}`);
|
console.log(`\n=== Ascent Optics Complete: ${totalProducts} products (catalog only — no pricing) ===`);
|
||||||
|
|
||||||
let totalProducts = 0;
|
|
||||||
let priceUpdates = 0;
|
|
||||||
|
|
||||||
for (const product of allProducts) {
|
|
||||||
try {
|
|
||||||
const txId = await findOrCreateScrapedTransceiver({
|
|
||||||
partNumber: product.partNumber,
|
|
||||||
vendorId,
|
|
||||||
formFactor: product.formFactor,
|
|
||||||
speedGbps: product.speedGbps,
|
|
||||||
speed: product.speed,
|
|
||||||
reachMeters: product.reachMeters,
|
|
||||||
reachLabel: product.reachLabel,
|
|
||||||
fiberType: product.fiberType,
|
|
||||||
wavelengths: product.wavelength,
|
|
||||||
category: "DataCenter",
|
|
||||||
});
|
|
||||||
|
|
||||||
if (product.price && product.price > 0) {
|
|
||||||
const hash = contentHash(JSON.stringify({ price: product.price, part: product.partNumber }));
|
|
||||||
const updated = await upsertPriceObservation({
|
|
||||||
transceiverId: txId,
|
|
||||||
sourceVendorId: vendorId,
|
|
||||||
price: product.price,
|
|
||||||
currency: "USD",
|
|
||||||
stockLevel: "in_stock",
|
|
||||||
url: product.url,
|
|
||||||
contentHash: hash,
|
|
||||||
});
|
|
||||||
if (updated) priceUpdates++;
|
|
||||||
}
|
|
||||||
totalProducts++;
|
|
||||||
} catch (err) {
|
|
||||||
console.warn(` Error saving ${product.partNumber}: ${(err as Error).message.slice(0, 80)}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`\n=== Ascent Optics Complete: ${totalProducts} products, ${priceUpdates} prices ===`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (require.main === module) {
|
if (require.main === module) {
|
||||||
|
|||||||
@ -168,6 +168,7 @@ export async function scrapeAtgbics(): Promise<void> {
|
|||||||
maxRequestsPerCrawl: MAX_PAGES,
|
maxRequestsPerCrawl: MAX_PAGES,
|
||||||
requestHandlerTimeoutSecs: 60,
|
requestHandlerTimeoutSecs: 60,
|
||||||
headless: true,
|
headless: true,
|
||||||
|
useSessionPool: false, // Disable session pool to avoid SDK_SESSION_POOL_STATE.json crash
|
||||||
...(proxyConfiguration ? { proxyConfiguration } : {}),
|
...(proxyConfiguration ? { proxyConfiguration } : {}),
|
||||||
launchContext: {
|
launchContext: {
|
||||||
launchOptions: {
|
launchOptions: {
|
||||||
|
|||||||
@ -285,7 +285,7 @@ async function saveEnrichment(switchId: string, result: EnrichResult): Promise<v
|
|||||||
|
|
||||||
// Find eBay vendor ID (create if needed)
|
// Find eBay vendor ID (create if needed)
|
||||||
const ebayVendorResult = await pool.query(
|
const ebayVendorResult = await pool.query(
|
||||||
`INSERT INTO vendors (name, slug, type, website_url)
|
`INSERT INTO vendors (name, slug, type, website)
|
||||||
VALUES ('eBay Marketplace', 'ebay', 'marketplace', 'https://www.ebay.de')
|
VALUES ('eBay Marketplace', 'ebay', 'marketplace', 'https://www.ebay.de')
|
||||||
ON CONFLICT (slug) DO UPDATE SET name = EXCLUDED.name
|
ON CONFLICT (slug) DO UPDATE SET name = EXCLUDED.name
|
||||||
RETURNING id`
|
RETURNING id`
|
||||||
@ -353,7 +353,7 @@ export async function enrichTransceiversFromEbay(limit = 50): Promise<void> {
|
|||||||
|
|
||||||
// Find eBay vendor
|
// Find eBay vendor
|
||||||
const ebayVendor = await pool.query(
|
const ebayVendor = await pool.query(
|
||||||
`INSERT INTO vendors (name, slug, type, website_url)
|
`INSERT INTO vendors (name, slug, type, website)
|
||||||
VALUES ('eBay Marketplace', 'ebay', 'marketplace', 'https://www.ebay.de')
|
VALUES ('eBay Marketplace', 'ebay', 'marketplace', 'https://www.ebay.de')
|
||||||
ON CONFLICT (slug) DO UPDATE SET updated_at = NOW()
|
ON CONFLICT (slug) DO UPDATE SET updated_at = NOW()
|
||||||
RETURNING id`
|
RETURNING id`
|
||||||
|
|||||||
@ -114,13 +114,24 @@ function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product
|
|||||||
// Collapse whitespace for easier regex matching
|
// Collapse whitespace for easier regex matching
|
||||||
const collapsed = html.replace(/\s+/g, " ");
|
const collapsed = html.replace(/\s+/g, " ");
|
||||||
|
|
||||||
// BigCommerce card-title pattern:
|
// BigCommerce card pattern (attribute order varies by theme version):
|
||||||
// <a aria-label="Product Name, £XX.XX" href="URL" data-event-type="product-click">
|
// Old: <a aria-label="Name, £XX.XX" href="URL" data-event-type="product-click">
|
||||||
const productRegex = /aria-label="([^"]+)"\s+href="(https?:\/\/(?:www\.)?gbics\.com\/[^"]+)"[^>]*data-event-type="product-click"/gi;
|
// New: <a href="URL" class="card-figure__link..." aria-label="Name, £XX.XX">
|
||||||
|
// Two-pass approach: find all product <a> tags regardless of attribute order
|
||||||
|
const productRegex = /href="(https?:\/\/(?:www\.)?gbics\.com\/[^"]+)"[^>]*aria-label="([^"]+)"/gi;
|
||||||
|
const productRegex2 = /aria-label="([^"]+)"[^>]*href="(https?:\/\/(?:www\.)?gbics\.com\/[^"]+)"/gi;
|
||||||
let match;
|
let match;
|
||||||
|
const rawMatches: { url: string; label: string; index: number }[] = [];
|
||||||
while ((match = productRegex.exec(collapsed)) !== null) {
|
while ((match = productRegex.exec(collapsed)) !== null) {
|
||||||
const label = match[1].trim();
|
rawMatches.push({ url: match[1].trim(), label: match[2].trim(), index: match.index });
|
||||||
const url = match[2];
|
}
|
||||||
|
if (rawMatches.length === 0) {
|
||||||
|
while ((match = productRegex2.exec(collapsed)) !== null) {
|
||||||
|
rawMatches.push({ url: match[2].trim(), label: match[1].trim(), index: match.index });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (const { url, label: rawLabel, index: matchIndex } of rawMatches) {
|
||||||
|
const label = rawLabel.replace(/\s+/g, " ").trim();
|
||||||
|
|
||||||
// aria-label contains "Product Name, £XX.XX"
|
// aria-label contains "Product Name, £XX.XX"
|
||||||
// Split on last comma to separate name and price
|
// Split on last comma to separate name and price
|
||||||
@ -130,7 +141,7 @@ function parseProductList(html: string, cat: typeof CATEGORIES[number]): Product
|
|||||||
|
|
||||||
// Fallback: extract price from data-price-asc attribute on parent <li>
|
// Fallback: extract price from data-price-asc attribute on parent <li>
|
||||||
if (!price) {
|
if (!price) {
|
||||||
const priceContext = collapsed.slice(Math.max(0, match.index - 500), match.index);
|
const priceContext = collapsed.slice(Math.max(0, matchIndex - 500), matchIndex);
|
||||||
const dataPriceMatch = priceContext.match(/data-price-asc="(\d+)"/);
|
const dataPriceMatch = priceContext.match(/data-price-asc="(\d+)"/);
|
||||||
if (dataPriceMatch) price = parseFloat(dataPriceMatch[1]);
|
if (dataPriceMatch) price = parseFloat(dataPriceMatch[1]);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -235,17 +235,20 @@ export async function scrapeSkylane(): Promise<void> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try pagination for this URL
|
// Try pagination — break as soon as no NEW unique product URLs appear
|
||||||
|
// (Skylane uses Algolia/InstantSearch: ?page=N returns same content, so
|
||||||
|
// content-based termination is required instead of empty-page detection)
|
||||||
for (let page = 2; page <= MAX_PAGES; page++) {
|
for (let page = 2; page <= MAX_PAGES; page++) {
|
||||||
const pageUrl = `${catalogUrl}?page=${page}`;
|
const pageUrl = `${catalogUrl}?page=${page}`;
|
||||||
try {
|
try {
|
||||||
const pageHtml = await fetchPage(pageUrl);
|
const pageHtml = await fetchPage(pageUrl);
|
||||||
const paginated = parseProductList(pageHtml, pageUrl);
|
const paginated = parseProductList(pageHtml, pageUrl);
|
||||||
if (paginated.length === 0) break;
|
const newCount = paginated.filter((p) => !seenUrls.has(p.url)).length;
|
||||||
|
if (newCount === 0) break; // No genuinely new products → end of real pagination
|
||||||
for (const p of paginated) {
|
for (const p of paginated) {
|
||||||
if (!seenUrls.has(p.url)) { seenUrls.add(p.url); allProducts.push(p); }
|
if (!seenUrls.has(p.url)) { seenUrls.add(p.url); allProducts.push(p); }
|
||||||
}
|
}
|
||||||
console.log(` Page ${page}: ${paginated.length} products`);
|
console.log(` Page ${page}: ${newCount} new products`);
|
||||||
await sleep(2000);
|
await sleep(2000);
|
||||||
} catch {
|
} catch {
|
||||||
break;
|
break;
|
||||||
|
|||||||
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
ERIK="root@217.154.82.179"
|
ERIK="root@82.165.222.127"
|
||||||
REMOTE_PATH="/opt/tip"
|
REMOTE_PATH="/opt/tip"
|
||||||
LOCAL_PATH="$(cd "$(dirname "$0")/.." && pwd)"
|
LOCAL_PATH="$(cd "$(dirname "$0")/.." && pwd)"
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user