feat: Phase 7 — Blog generator + scraper scheduler activation

Blog draft engine generates structured markdown from all Qdrant
collections (products, news, FAQ, troubleshooting). Supports 4
topic types: hype_cycle, comparison, new_product, tutorial.

- routes/blog.ts: POST /api/blog/generate, GET/PUT endpoints
- ecosystem.config.js: Added tip-scraper PM2 process
- Scraper scheduler (pg-boss) now running on Erik with 8 job queues
- News scraper running every 6 hours on Erik
This commit is contained in:
Rene Fichtmueller 2026-03-28 00:32:08 +13:00
parent 0a63307505
commit f48a809e40
3 changed files with 418 additions and 6 deletions

View File

@ -2,15 +2,46 @@ module.exports = {
apps: [ apps: [
{ {
name: "tip-api", name: "tip-api",
script: "packages/api/dist/index.js", script: "./node_modules/.bin/tsx",
instances: 1, args: "packages/api/src/index.ts",
autorestart: true, cwd: "/opt/tip",
watch: false, interpreter: "none",
max_memory_restart: "512M", exec_mode: "fork",
env: { env: {
NODE_ENV: "production", NODE_ENV: "production",
API_PORT: 3200, API_PORT: "3201",
POSTGRES_HOST: "localhost",
POSTGRES_PORT: "5433",
POSTGRES_DB: "transceiver_db",
POSTGRES_USER: "tip",
POSTGRES_PASSWORD: "tip_prod_2026",
OLLAMA_URL: "http://localhost:11434",
QDRANT_URL: "http://localhost:6333",
DOCLING_URL: "http://localhost:8100",
}, },
max_memory_restart: "500M",
instances: 1,
autorestart: true,
},
{
name: "tip-scraper",
script: "./node_modules/.bin/tsx",
args: "packages/scraper/src/index.ts",
cwd: "/opt/tip",
interpreter: "none",
exec_mode: "fork",
env: {
NODE_ENV: "production",
POSTGRES_HOST: "localhost",
POSTGRES_PORT: "5433",
POSTGRES_DB: "transceiver_db",
POSTGRES_USER: "tip",
POSTGRES_PASSWORD: "tip_prod_2026",
},
max_memory_restart: "1G",
instances: 1,
autorestart: true,
cron_restart: "0 0 * * *",
}, },
], ],
}; };

View File

@ -11,6 +11,7 @@ import { healthRouter } from "./routes/health";
import { hypeCycleRouter } from "./routes/hype-cycle"; import { hypeCycleRouter } from "./routes/hype-cycle";
import { searchRouter } from "./routes/search"; import { searchRouter } from "./routes/search";
import { documentRouter } from "./routes/documents"; import { documentRouter } from "./routes/documents";
import { blogRouter } from "./routes/blog";
const app = express(); const app = express();
@ -36,6 +37,7 @@ app.use("/api/health", healthRouter);
app.use("/api/hype-cycle", hypeCycleRouter); app.use("/api/hype-cycle", hypeCycleRouter);
app.use("/api/search", searchRouter); app.use("/api/search", searchRouter);
app.use("/api/documents", documentRouter); app.use("/api/documents", documentRouter);
app.use("/api/blog", blogRouter);
// Root // Root
app.get("/", (_req, res) => { app.get("/", (_req, res) => {
@ -61,6 +63,10 @@ app.get("/", (_req, res) => {
"POST /api/documents/process {url, title?, doc_type?, vendor?, collection?}", "POST /api/documents/process {url, title?, doc_type?, vendor?, collection?}",
"GET /api/documents", "GET /api/documents",
"GET /api/documents/:id", "GET /api/documents/:id",
"POST /api/blog/generate {topic, speed?, form_factor?, use_case?}",
"GET /api/blog",
"GET /api/blog/:id",
"PUT /api/blog/:id/status {status: draft|review|approved|published}",
], ],
}); });
}); });

View File

@ -0,0 +1,375 @@
/**
* Blog Draft Generator API
*
* POST /api/blog/generate Generate a blog draft from data
* GET /api/blog List all drafts
* GET /api/blog/:id Get a specific draft
* PUT /api/blog/:id/status Update draft status
*/
import { Router, Request, Response } from "express";
import { pool } from "../db/client";
import { semanticSearch } from "../embeddings/client";
export const blogRouter = Router();
interface BlogTopic {
topic: string;
title: string;
target_audience: "sales" | "technical" | "customer" | "seo";
seo_keywords: string[];
}
const BLOG_TEMPLATES: Record<string, BlogTopic[]> = {
hype_cycle: [
{
topic: "hype_cycle",
title: "The State of {SPEED} Transceivers in {YEAR}: Where Are We on the Hype Cycle?",
target_audience: "technical",
seo_keywords: ["transceiver", "hype cycle", "optical networking"],
},
{
topic: "hype_cycle",
title: "Investment Guide: Which Transceiver Speeds to Bet On in {YEAR}",
target_audience: "sales",
seo_keywords: ["transceiver investment", "data center optics", "ROI"],
},
],
comparison: [
{
topic: "comparison",
title: "{FORM_FACTOR} Transceiver Comparison: Top 5 Options for {USE_CASE}",
target_audience: "customer",
seo_keywords: ["transceiver comparison", "best transceiver"],
},
{
topic: "comparison",
title: "Original vs. Compatible Transceivers: The Real Cost Difference in {YEAR}",
target_audience: "seo",
seo_keywords: ["compatible transceiver", "original vs compatible", "cost savings"],
},
],
new_product: [
{
topic: "new_product",
title: "{SPEED} Transceivers: What's New and What It Means for Your Network",
target_audience: "technical",
seo_keywords: ["new transceiver", "latest optics"],
},
],
tutorial: [
{
topic: "tutorial",
title: "How to Choose the Right Transceiver: A Complete {YEAR} Buying Guide",
target_audience: "customer",
seo_keywords: ["transceiver buying guide", "how to choose transceiver"],
},
{
topic: "tutorial",
title: "Troubleshooting Transceiver Issues: The Definitive Guide",
target_audience: "technical",
seo_keywords: ["transceiver troubleshooting", "optical module problems"],
},
],
};
/** Gather data from various collections for blog content */
async function gatherBlogData(topic: string, keywords: string[]): Promise<{
products: Array<Record<string, unknown>>;
news: Array<Record<string, unknown>>;
faq: Array<Record<string, unknown>>;
troubleshooting: Array<Record<string, unknown>>;
}> {
const query = keywords.join(" ");
const [products, news, faq, troubleshooting] = await Promise.all([
semanticSearch("product_embeddings", query, 10).catch(() => []),
semanticSearch("news_embeddings", query, 5).catch(() => []),
semanticSearch("faq_embeddings", query, 5).catch(() => []),
semanticSearch("troubleshooting_embeddings", query, 3).catch(() => []),
]);
return {
products: products.map((r) => ({ score: r.score, ...r.payload })),
news: news.map((r) => ({ score: r.score, ...r.payload })),
faq: faq.map((r) => ({ score: r.score, ...r.payload })),
troubleshooting: troubleshooting.map((r) => ({ score: r.score, ...r.payload })),
};
}
/** Generate blog outline from gathered data */
function generateOutline(
title: string,
topic: string,
data: Awaited<ReturnType<typeof gatherBlogData>>,
): { sections: Array<{ heading: string; notes: string }> } {
const sections: Array<{ heading: string; notes: string }> = [];
sections.push({
heading: "Introduction",
notes: `Hook the reader with the key question this post answers. Reference ${data.news.length} recent news items for timeliness.`,
});
if (topic === "hype_cycle") {
sections.push({
heading: "Understanding the Hype Cycle for Optical Transceivers",
notes: "Explain the Norton-Bass model phases: Innovation Trigger → Peak of Inflated Expectations → Trough of Disillusionment → Slope of Enlightenment → Plateau of Productivity",
});
sections.push({
heading: "Current Position of Key Technologies",
notes: `Cover products found: ${data.products.slice(0, 5).map((p) => p.standard_name || p.text).join(", ")}`,
});
sections.push({
heading: "Market Signals and Recent Developments",
notes: `Reference: ${data.news.map((n) => n.title).join("; ")}`,
});
} else if (topic === "comparison") {
const formFactors = [...new Set(data.products.map((p) => String(p.form_factor)).filter(Boolean))];
sections.push({
heading: "Products Compared",
notes: `Form factors covered: ${formFactors.join(", ")}. ${data.products.length} products analyzed.`,
});
sections.push({
heading: "Key Specifications Breakdown",
notes: "Compare speed, reach, power consumption, fiber type, and pricing across products.",
});
sections.push({
heading: "Compatibility Considerations",
notes: `Reference FAQ: ${data.faq.slice(0, 3).map((f) => f.question).join("; ")}`,
});
} else if (topic === "tutorial") {
sections.push({
heading: "Step 1: Determine Your Requirements",
notes: "Speed, distance, fiber type, switch compatibility.",
});
sections.push({
heading: "Step 2: Understanding Form Factors",
notes: `Cover: ${data.faq.filter((f) => String(f.category) === "form_factor").map((f) => f.question).join("; ")}`,
});
sections.push({
heading: "Common Issues and Troubleshooting",
notes: `Reference: ${data.troubleshooting.map((t) => t.symptom).join("; ")}`,
});
} else {
sections.push({
heading: "What's New",
notes: `${data.products.length} relevant products, ${data.news.length} recent news items.`,
});
sections.push({
heading: "Technical Details",
notes: "Deep-dive into specifications and use cases.",
});
}
sections.push({
heading: "Conclusion & Recommendations",
notes: "Summarize key takeaways. Include CTA for Flexoptix product finder.",
});
return { sections };
}
/** Generate draft content from outline and data */
function generateDraft(
title: string,
outline: ReturnType<typeof generateOutline>,
data: Awaited<ReturnType<typeof gatherBlogData>>,
): string {
const parts: string[] = [];
parts.push(`# ${title}\n`);
parts.push(`*Generated by TIP Blog Engine on ${new Date().toISOString().split("T")[0]}*\n`);
for (const section of outline.sections) {
parts.push(`\n## ${section.heading}\n`);
parts.push(`<!-- NOTES: ${section.notes} -->\n`);
if (section.heading === "Introduction") {
const topNews = data.news[0];
if (topNews) {
parts.push(`The optical transceiver market continues to evolve rapidly. ${String(topNews.title || "")} highlights the pace of change in our industry.\n`);
}
parts.push(`In this article, we'll explore the key trends, products, and considerations that matter most for network professionals and procurement teams.\n`);
} else if (section.heading.includes("Products") || section.heading.includes("Technologies")) {
for (const product of data.products.slice(0, 5)) {
parts.push(`### ${product.standard_name || product.slug || "Product"}\n`);
parts.push(`- **Form Factor**: ${product.form_factor || "N/A"}`);
parts.push(`- **Speed**: ${product.speed || "N/A"}`);
parts.push(`- **Reach**: ${product.reach_label || "N/A"}`);
parts.push(`- **Fiber Type**: ${product.fiber_type || "N/A"}`);
parts.push(`- **Vendor**: ${product.vendor || "N/A"}\n`);
}
} else if (section.heading.includes("Troubleshooting") || section.heading.includes("Issues")) {
for (const ts of data.troubleshooting) {
parts.push(`### ${ts.symptom}\n`);
parts.push(`**Cause**: ${ts.cause}\n`);
parts.push(`**Solution**: ${ts.solution}\n`);
}
} else if (section.heading.includes("Conclusion")) {
parts.push(`The transceiver landscape offers more options than ever. Whether you're planning a data center upgrade, evaluating 400G/800G migration, or troubleshooting existing deployments, the right transceiver choice depends on your specific requirements.\n`);
parts.push(`**[Browse our full transceiver catalog →](https://www.flexoptix.net/en/)**\n`);
}
}
return parts.join("\n");
}
// POST /api/blog/generate — Generate a new blog draft
blogRouter.post("/generate", async (req: Request, res: Response) => {
const { topic, speed, form_factor, use_case } = req.body as {
topic?: string;
speed?: string;
form_factor?: string;
use_case?: string;
};
const selectedTopic = topic || "comparison";
const templates = BLOG_TEMPLATES[selectedTopic];
if (!templates) {
res.status(400).json({
success: false,
error: `Invalid topic. Valid: ${Object.keys(BLOG_TEMPLATES).join(", ")}`,
});
return;
}
try {
const year = new Date().getFullYear();
const template = templates[Math.floor(Math.random() * templates.length)];
// Fill template placeholders
const title = template.title
.replace("{YEAR}", String(year))
.replace("{SPEED}", speed || "400G/800G")
.replace("{FORM_FACTOR}", form_factor || "QSFP-DD/OSFP")
.replace("{USE_CASE}", use_case || "Data Center Interconnect");
// Build search keywords
const keywords = [
...template.seo_keywords,
speed || "400G",
form_factor || "",
use_case || "data center",
].filter(Boolean);
// Gather data from all collections
const data = await gatherBlogData(selectedTopic, keywords);
// Generate outline and draft
const outline = generateOutline(title, selectedTopic, data);
const draftContent = generateDraft(title, outline, data);
const wordCount = draftContent.split(/\s+/).length;
// Save to database
const result = await pool.query(
`INSERT INTO blog_drafts (title, topic, target_audience, outline, draft_content, data_sources, status, generated_by, word_count, seo_keywords)
VALUES ($1, $2, $3, $4, $5, $6, 'draft', 'tip-blog-engine', $7, $8)
RETURNING id, created_at`,
[
title,
selectedTopic,
template.target_audience,
JSON.stringify(outline),
draftContent,
JSON.stringify({
products: data.products.length,
news: data.news.length,
faq: data.faq.length,
troubleshooting: data.troubleshooting.length,
}),
wordCount,
template.seo_keywords,
],
);
res.json({
success: true,
draft: {
id: result.rows[0].id,
title,
topic: selectedTopic,
target_audience: template.target_audience,
word_count: wordCount,
sections: outline.sections.length,
data_sources: {
products: data.products.length,
news: data.news.length,
faq: data.faq.length,
troubleshooting: data.troubleshooting.length,
},
created_at: result.rows[0].created_at,
},
});
} catch (err) {
res.status(500).json({
success: false,
error: "Blog generation failed",
detail: (err as Error).message,
});
}
});
// GET /api/blog — List all drafts
blogRouter.get("/", async (_req: Request, res: Response) => {
try {
const result = await pool.query(
`SELECT id, title, topic, target_audience, status, word_count, seo_keywords, created_at
FROM blog_drafts
ORDER BY created_at DESC
LIMIT 50`,
);
res.json({ success: true, drafts: result.rows, count: result.rows.length });
} catch (err) {
res.status(500).json({ success: false, error: (err as Error).message });
}
});
// GET /api/blog/:id — Get a specific draft with full content
blogRouter.get("/:id", async (req: Request, res: Response) => {
try {
const result = await pool.query(
`SELECT * FROM blog_drafts WHERE id = $1::uuid`,
[req.params.id],
);
if (result.rows.length === 0) {
res.status(404).json({ success: false, error: "Draft not found" });
return;
}
res.json({ success: true, draft: result.rows[0] });
} catch (err) {
res.status(500).json({ success: false, error: (err as Error).message });
}
});
// PUT /api/blog/:id/status — Update draft status
blogRouter.put("/:id/status", async (req: Request, res: Response) => {
const { status } = req.body as { status?: string };
const validStatuses = ["draft", "review", "approved", "published"];
if (!status || !validStatuses.includes(status)) {
res.status(400).json({
success: false,
error: `Invalid status. Valid: ${validStatuses.join(", ")}`,
});
return;
}
try {
const result = await pool.query(
`UPDATE blog_drafts SET status = $1, updated_at = NOW() WHERE id = $2::uuid RETURNING id, title, status`,
[status, req.params.id],
);
if (result.rows.length === 0) {
res.status(404).json({ success: false, error: "Draft not found" });
return;
}
res.json({ success: true, draft: result.rows[0] });
} catch (err) {
res.status(500).json({ success: false, error: (err as Error).message });
}
});