feat: Blog Engine — generate from URL (link → BlogLLM → article)
New POST /api/blog/from-url endpoint: - Accepts url + topic in request body - Fetches page server-side (no CORS, 20s timeout, redirect-follow) - Strips script/style/nav/footer/svg; extracts readable text (~5000 chars) - Extracts page title from <title> or <h1> - Passes extracted content as structured additional_context to the existing 16-step FO blog pipeline (same as manual generation) - Returns immediately; LLM pipeline runs async - Validated: smoke test fetched flexoptix.net/en/blog/, 5040 chars, pipeline launched with llm_enhancing=true New "🔗 Blog aus URL generieren" panel in dashboard: - URL input (Enter key triggers generation) - Blog-Typ dropdown (same 8 types as manual panel) - Button shows loading state "⏳ Fetching…" during API call - Status line shows extracted char count after success - Reuses pollBlogLlm() for step-by-step progress polling - Inline status field for error display without toast spam
This commit is contained in:
parent
9b8b03e783
commit
e0f9656684
@ -11,7 +11,7 @@
|
|||||||
*/
|
*/
|
||||||
import { Router, Request, Response } from "express";
|
import { Router, Request, Response } from "express";
|
||||||
import { pool } from "../db/client";
|
import { pool } from "../db/client";
|
||||||
import { setLlmProvider, getLlmProvider } from "../llm/client";
|
import { setLlmProvider, getLlmProvider, refreshLlmAutoDiscovery } from "../llm/client";
|
||||||
|
|
||||||
/** In-memory pipeline progress tracker — step updates pushed here, polled via GET /api/blog/:id/progress */
|
/** In-memory pipeline progress tracker — step updates pushed here, polled via GET /api/blog/:id/progress */
|
||||||
const pipelineProgress = new Map<string, { step: number; total: number; label: string; pct: number }>();
|
const pipelineProgress = new Map<string, { step: number; total: number; label: string; pct: number }>();
|
||||||
@ -1528,6 +1528,171 @@ blogRouter.post("/generate", async (req: Request, res: Response) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/** Fetch a URL and extract readable text content for use as LLM context. */
|
||||||
|
async function fetchUrlContent(rawUrl: string): Promise<{ pageTitle: string; text: string }> {
|
||||||
|
const response = await fetch(rawUrl, {
|
||||||
|
headers: { "User-Agent": "TIPBot/1.0 blog-from-url (+https://tip.flexoptix.net)" },
|
||||||
|
signal: AbortSignal.timeout(20000),
|
||||||
|
redirect: "follow",
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) throw new Error(`HTTP ${response.status} ${response.statusText}`);
|
||||||
|
|
||||||
|
const contentType = response.headers.get("content-type") || "";
|
||||||
|
if (!contentType.includes("text/html") && !contentType.includes("text/plain") && !contentType.includes("application/xhtml")) {
|
||||||
|
throw new Error(`Unsupported content type: ${contentType.split(";")[0]}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const html = await response.text();
|
||||||
|
|
||||||
|
// Extract page title
|
||||||
|
const titleMatch = html.match(/<title[^>]*>([^<]{1,200})<\/title>/i);
|
||||||
|
const h1Match = html.match(/<h1[^>]*>([^<]{1,150})<\/h1>/i);
|
||||||
|
const pageTitle = (titleMatch?.[1] || h1Match?.[1] || "")
|
||||||
|
.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">")
|
||||||
|
.replace(/&#\d+;/g, "").replace(/&[a-z]+;/gi, " ").trim();
|
||||||
|
|
||||||
|
// Strip scripts, styles, SVG, navigation boilerplate
|
||||||
|
let text = html
|
||||||
|
.replace(/<script[\s\S]*?<\/script>/gi, " ")
|
||||||
|
.replace(/<style[\s\S]*?<\/style>/gi, " ")
|
||||||
|
.replace(/<svg[\s\S]*?<\/svg>/gi, " ")
|
||||||
|
.replace(/<nav[\s\S]*?<\/nav>/gi, " ")
|
||||||
|
.replace(/<footer[\s\S]*?<\/footer>/gi, " ")
|
||||||
|
.replace(/<header[\s\S]*?<\/header>/gi, " ")
|
||||||
|
.replace(/<aside[\s\S]*?<\/aside>/gi, " ")
|
||||||
|
.replace(/<form[\s\S]*?<\/form>/gi, " ")
|
||||||
|
// Block elements → newlines
|
||||||
|
.replace(/<\/?(p|div|section|article|h[1-6]|li|br|hr|tr|td|th|blockquote|pre)[^>]*>/gi, "\n")
|
||||||
|
// Strip all remaining tags
|
||||||
|
.replace(/<[^>]{0,500}>/g, " ")
|
||||||
|
// Decode common entities
|
||||||
|
.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">")
|
||||||
|
.replace(/"/g, '"').replace(/'/g, "'").replace(/ /g, " ")
|
||||||
|
.replace(/&[a-z]+;/gi, " ")
|
||||||
|
// Collapse whitespace
|
||||||
|
.split("\n").map(l => l.trim()).filter(l => l.length > 30).join("\n")
|
||||||
|
.replace(/\n{3,}/g, "\n\n")
|
||||||
|
.trim();
|
||||||
|
|
||||||
|
// Limit to ~5000 chars — enough for LLM context, not so much it blows the prompt
|
||||||
|
if (text.length > 5000) {
|
||||||
|
text = text.slice(0, 5000) + "\n[… content truncated for LLM context …]";
|
||||||
|
}
|
||||||
|
|
||||||
|
return { pageTitle, text };
|
||||||
|
}
|
||||||
|
|
||||||
|
// POST /api/blog/from-url — Fetch URL, extract content, generate a blog from it
|
||||||
|
blogRouter.post("/from-url", async (req: Request, res: Response) => {
|
||||||
|
const { url, topic } = req.body as { url?: string; topic?: string };
|
||||||
|
|
||||||
|
if (!url) {
|
||||||
|
res.status(400).json({ success: false, error: "url ist erforderlich" });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate URL — must be http/https
|
||||||
|
let parsedUrl: URL;
|
||||||
|
try {
|
||||||
|
parsedUrl = new URL(url);
|
||||||
|
if (!["http:", "https:"].includes(parsedUrl.protocol)) throw new Error("bad protocol");
|
||||||
|
} catch {
|
||||||
|
res.status(400).json({ success: false, error: "Ungültige URL — muss http:// oder https:// beginnen" });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const selectedTopic = topic || "technology_deep_dive";
|
||||||
|
const templates = BLOG_TEMPLATES[selectedTopic];
|
||||||
|
if (!templates) {
|
||||||
|
res.status(400).json({ success: false, error: `Ungültiger Blog-Typ. Gültig: ${Object.keys(BLOG_TEMPLATES).join(", ")}` });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Fetch page content server-side (no CORS issues)
|
||||||
|
const { pageTitle, text: extractedText } = await fetchUrlContent(url);
|
||||||
|
|
||||||
|
console.log(`Blog from-url: fetched "${pageTitle}" from ${parsedUrl.hostname} (${extractedText.length} chars)`);
|
||||||
|
|
||||||
|
// Build a rich additional_context from the URL content
|
||||||
|
const additionalContext =
|
||||||
|
`SOURCE URL: ${url}\n` +
|
||||||
|
`PAGE TITLE: ${pageTitle}\n` +
|
||||||
|
`HOSTNAME: ${parsedUrl.hostname}\n` +
|
||||||
|
`\n--- EXTRACTED PAGE CONTENT ---\n` +
|
||||||
|
`${extractedText}\n` +
|
||||||
|
`--- END PAGE CONTENT ---\n` +
|
||||||
|
`\nIMPORTANT: Use this content as factual background and editorial direction. ` +
|
||||||
|
`Do NOT copy sentences verbatim. Write a Flexoptix-voice blog article using these facts and insights.`;
|
||||||
|
|
||||||
|
const title = pageTitle || parsedUrl.hostname;
|
||||||
|
const template = templates[Math.floor(Math.random() * templates.length)];
|
||||||
|
const keywords = [
|
||||||
|
...template.seo_keywords,
|
||||||
|
"optical transceiver", "networking",
|
||||||
|
].filter(Boolean);
|
||||||
|
|
||||||
|
const data = await gatherBlogData(keywords, selectedTopic);
|
||||||
|
const draftContent = generateTemplateDraft(title, selectedTopic, data);
|
||||||
|
const wordCount = draftContent.split(/\s+/).length;
|
||||||
|
const initialIssues = validateArticle(draftContent);
|
||||||
|
|
||||||
|
const result = await pool.query(
|
||||||
|
`INSERT INTO blog_drafts (title, topic, target_audience, outline, draft_content, data_sources, status, generated_by, word_count, seo_keywords)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6, 'draft', $7, $8, $9)
|
||||||
|
RETURNING id, created_at`,
|
||||||
|
[
|
||||||
|
title,
|
||||||
|
selectedTopic,
|
||||||
|
template.target_audience,
|
||||||
|
JSON.stringify({ generation_method: "from-url", source_url: url, quality_issues: initialIssues }),
|
||||||
|
draftContent,
|
||||||
|
JSON.stringify({ source_url: url, extracted_chars: extractedText.length, products: data.products.length, news: data.news.length }),
|
||||||
|
"tip-blog-engine-url",
|
||||||
|
wordCount,
|
||||||
|
template.seo_keywords,
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
const draftId = result.rows[0].id;
|
||||||
|
|
||||||
|
// Launch LLM pipeline with URL content as context
|
||||||
|
const health = await checkHealth().catch(() => ({ ok: false, model: "", error: "unreachable" }));
|
||||||
|
let llmStarted = false;
|
||||||
|
if (health.ok) {
|
||||||
|
llmStarted = true;
|
||||||
|
enqueueLlmPipeline(draftId, title, selectedTopic, template.target_audience, data, additionalContext).catch((err) => {
|
||||||
|
console.error(`Blog from-url LLM pipeline error: ${(err as Error).message}`);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
source_url: url,
|
||||||
|
page_title: pageTitle,
|
||||||
|
extracted_chars: extractedText.length,
|
||||||
|
draft: {
|
||||||
|
id: draftId,
|
||||||
|
title,
|
||||||
|
topic: selectedTopic,
|
||||||
|
target_audience: template.target_audience,
|
||||||
|
word_count: wordCount,
|
||||||
|
generation_method: "from-url",
|
||||||
|
llm_enhancing: llmStarted,
|
||||||
|
created_at: result.rows[0].created_at,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
const msg = (err as Error).message;
|
||||||
|
console.error(`Blog from-url error for ${url}: ${msg}`);
|
||||||
|
res.status(500).json({
|
||||||
|
success: false,
|
||||||
|
error: `URL konnte nicht verarbeitet werden: ${msg}`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// GET /api/blog — List all drafts
|
// GET /api/blog — List all drafts
|
||||||
blogRouter.get("/", async (_req: Request, res: Response) => {
|
blogRouter.get("/", async (_req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
@ -1557,6 +1722,17 @@ blogRouter.post("/llm/reset-queue", (_req: Request, res: Response) => {
|
|||||||
res.json({ success: true, message: "Ollama queue reset — stuck requests cleared" });
|
res.json({ success: true, message: "Ollama queue reset — stuck requests cleared" });
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// POST /api/blog/llm/refresh-discovery — Force auto-discovery to pick up newly-trained fo-blog-v* versions
|
||||||
|
// Useful right after Magatama adopts a new fo-blog-vN model. Otherwise runs every 10 min by itself.
|
||||||
|
blogRouter.post("/llm/refresh-discovery", async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const active = await refreshLlmAutoDiscovery();
|
||||||
|
res.json({ success: true, active, message: `Auto-discovery refreshed. Active: ${active.provider}${active.ollamaModel ? ` (${active.ollamaModel})` : ""}` });
|
||||||
|
} catch (err) {
|
||||||
|
res.status(500).json({ success: false, error: (err as Error).message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// POST /api/blog/llm/switch — Switch active LLM provider at runtime (no restart needed)
|
// POST /api/blog/llm/switch — Switch active LLM provider at runtime (no restart needed)
|
||||||
// Body: { provider: "claude-code" | "anthropic" | "ollama", model?: "fo-blog-v10" | "qwen2.5:14b" | ... }
|
// Body: { provider: "claude-code" | "anthropic" | "ollama", model?: "fo-blog-v10" | "qwen2.5:14b" | ... }
|
||||||
blogRouter.post("/llm/switch", (req: Request, res: Response) => {
|
blogRouter.post("/llm/switch", (req: Request, res: Response) => {
|
||||||
|
|||||||
@ -1439,6 +1439,40 @@
|
|||||||
<button onclick="generateBlogManual()" style="background:rgba(99,102,241,0.85);color:#fff;border:none;padding:8px 20px;border-radius:6px;cursor:pointer;font-size:0.82rem;font-weight:600">⚙️ Artikel generieren</button>
|
<button onclick="generateBlogManual()" style="background:rgba(99,102,241,0.85);color:#fff;border:none;padding:8px 20px;border-radius:6px;cursor:pointer;font-size:0.82rem;font-weight:600">⚙️ Artikel generieren</button>
|
||||||
</div><!-- end manual generation -->
|
</div><!-- end manual generation -->
|
||||||
|
|
||||||
|
<!-- URL → BLOG PANEL -->
|
||||||
|
<div class="card" style="margin-bottom:1.25rem;border:1px solid rgba(16,185,129,0.35);background:var(--surface2)">
|
||||||
|
<div style="font-size:0.85rem;font-weight:700;color:var(--text-bright);margin-bottom:0.1rem">🔗 Blog aus URL generieren</div>
|
||||||
|
<div style="font-size:0.7rem;color:var(--text-dim);margin-bottom:0.75rem">Link eingeben → Inhalt wird automatisch extrahiert → BlogLLM schreibt einen Artikel daraus</div>
|
||||||
|
<div style="display:grid;grid-template-columns:1fr auto;gap:0.6rem;margin-bottom:0.65rem;align-items:end">
|
||||||
|
<div>
|
||||||
|
<label style="font-size:0.7rem;color:var(--text-dim);display:block;margin-bottom:3px">URL</label>
|
||||||
|
<input type="url" id="blog-from-url-input" placeholder="https://example.com/article-about-400g-transceivers"
|
||||||
|
style="width:100%;background:var(--surface);border:1px solid rgba(16,185,129,0.4);color:var(--text);padding:6px 10px;border-radius:6px;font-size:0.82rem;box-sizing:border-box"
|
||||||
|
onkeydown="if(event.key==='Enter')generateBlogFromUrl()">
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label style="font-size:0.7rem;color:var(--text-dim);display:block;margin-bottom:3px">Blog-Typ</label>
|
||||||
|
<select id="blog-from-url-topic" style="background:var(--surface);border:1px solid rgba(16,185,129,0.4);color:var(--text);padding:6px 10px;border-radius:6px;font-size:0.82rem;height:32px">
|
||||||
|
<option value="technology_deep_dive">Technology Deep Dive</option>
|
||||||
|
<option value="tutorial">Troubleshooting Tutorial</option>
|
||||||
|
<option value="migration_guide">Migration Guide</option>
|
||||||
|
<option value="market_alert">Market Alert</option>
|
||||||
|
<option value="buying_guide">Buying Guide</option>
|
||||||
|
<option value="comparison">Product Comparison</option>
|
||||||
|
<option value="competitor_analysis">Competitor Analysis</option>
|
||||||
|
<option value="hype_cycle">Hype Cycle / Strategy</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div style="display:flex;align-items:center;gap:0.75rem">
|
||||||
|
<button onclick="generateBlogFromUrl()" id="blog-from-url-btn"
|
||||||
|
style="background:rgba(16,185,129,0.85);color:#fff;border:none;padding:8px 20px;border-radius:6px;cursor:pointer;font-size:0.82rem;font-weight:600">
|
||||||
|
🔗 Aus URL generieren
|
||||||
|
</button>
|
||||||
|
<span id="blog-from-url-status" style="font-size:0.75rem;color:var(--text-dim)"></span>
|
||||||
|
</div>
|
||||||
|
</div><!-- end url→blog panel -->
|
||||||
|
|
||||||
<!-- SLL INSIGHTS WIDGET -->
|
<!-- SLL INSIGHTS WIDGET -->
|
||||||
<div class="card" style="margin-bottom:1rem;border:1px solid rgba(212,163,115,0.3);background:var(--surface2)">
|
<div class="card" style="margin-bottom:1rem;border:1px solid rgba(212,163,115,0.3);background:var(--surface2)">
|
||||||
<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:0.75rem">
|
<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:0.75rem">
|
||||||
@ -5503,6 +5537,48 @@ function generateBlogManual() {
|
|||||||
}).catch(function(err) { if (err.message !== 'Unauthorized') showToast('Netzwerkfehler', err.message, true); });
|
}).catch(function(err) { if (err.message !== 'Unauthorized') showToast('Netzwerkfehler', err.message, true); });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function generateBlogFromUrl() {
|
||||||
|
var url = (document.getElementById('blog-from-url-input').value || '').trim();
|
||||||
|
var topic = document.getElementById('blog-from-url-topic').value || 'technology_deep_dive';
|
||||||
|
var btn = document.getElementById('blog-from-url-btn');
|
||||||
|
var status = document.getElementById('blog-from-url-status');
|
||||||
|
|
||||||
|
if (!url) { showToast('Fehler', 'Bitte eine URL eingeben', true); return; }
|
||||||
|
try { new URL(url); } catch (e) { showToast('Fehler', 'Ungültige URL', true); return; }
|
||||||
|
|
||||||
|
btn.disabled = true;
|
||||||
|
btn.textContent = '⏳ Fetching…';
|
||||||
|
status.textContent = 'Seite wird abgerufen…';
|
||||||
|
|
||||||
|
var token = window.loadToken ? window.loadToken() : '';
|
||||||
|
fetch(API + '/api/blog/from-url', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json', 'Authorization': 'Bearer ' + token },
|
||||||
|
body: JSON.stringify({ url: url, topic: topic })
|
||||||
|
})
|
||||||
|
.then(function(r) { if (r.status === 401) { handleAuthError(401); throw new Error('Unauthorized'); } return r.json(); })
|
||||||
|
.then(function(data) {
|
||||||
|
btn.disabled = false;
|
||||||
|
btn.textContent = '🔗 Aus URL generieren';
|
||||||
|
if (data.success) {
|
||||||
|
status.textContent = '✓ ' + data.extracted_chars + ' Zeichen extrahiert — Pipeline läuft (~10 min)';
|
||||||
|
showToast('✓ Pipeline gestartet', (data.page_title || url) + ' — wird in ~10 min fertig');
|
||||||
|
document.getElementById('blog-from-url-input').value = '';
|
||||||
|
loadBlogDrafts();
|
||||||
|
pollBlogLlm(data.draft.id, 0);
|
||||||
|
} else {
|
||||||
|
status.textContent = '✗ Fehler: ' + (data.error || 'Unbekannt');
|
||||||
|
showToast('Fehler', data.error || 'Unbekannter Fehler', true);
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.catch(function(err) {
|
||||||
|
btn.disabled = false;
|
||||||
|
btn.textContent = '🔗 Aus URL generieren';
|
||||||
|
status.textContent = '';
|
||||||
|
if (err.message !== 'Unauthorized') showToast('Netzwerkfehler', err.message, true);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
function pollBlogLlm(id, attempt) {
|
function pollBlogLlm(id, attempt) {
|
||||||
if (attempt > 60) return; // max 10 min (60 × 10s)
|
if (attempt > 60) return; // max 10 min (60 × 10s)
|
||||||
setTimeout(function() {
|
setTimeout(function() {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user