diff --git a/packages/api/src/routes/blog.ts b/packages/api/src/routes/blog.ts index 973921f..db5843e 100644 --- a/packages/api/src/routes/blog.ts +++ b/packages/api/src/routes/blog.ts @@ -11,7 +11,7 @@ */ import { Router, Request, Response } from "express"; import { pool } from "../db/client"; -import { setLlmProvider, getLlmProvider } from "../llm/client"; +import { setLlmProvider, getLlmProvider, refreshLlmAutoDiscovery } from "../llm/client"; /** In-memory pipeline progress tracker — step updates pushed here, polled via GET /api/blog/:id/progress */ const pipelineProgress = new Map(); @@ -1528,6 +1528,171 @@ blogRouter.post("/generate", async (req: Request, res: Response) => { } }); +/** Fetch a URL and extract readable text content for use as LLM context. */ +async function fetchUrlContent(rawUrl: string): Promise<{ pageTitle: string; text: string }> { + const response = await fetch(rawUrl, { + headers: { "User-Agent": "TIPBot/1.0 blog-from-url (+https://tip.flexoptix.net)" }, + signal: AbortSignal.timeout(20000), + redirect: "follow", + }); + + if (!response.ok) throw new Error(`HTTP ${response.status} ${response.statusText}`); + + const contentType = response.headers.get("content-type") || ""; + if (!contentType.includes("text/html") && !contentType.includes("text/plain") && !contentType.includes("application/xhtml")) { + throw new Error(`Unsupported content type: ${contentType.split(";")[0]}`); + } + + const html = await response.text(); + + // Extract page title + const titleMatch = html.match(/]*>([^<]{1,200})<\/title>/i); + const h1Match = html.match(/]*>([^<]{1,150})<\/h1>/i); + const pageTitle = (titleMatch?.[1] || h1Match?.[1] || "") + .replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">") + .replace(/&#\d+;/g, "").replace(/&[a-z]+;/gi, " ").trim(); + + // Strip scripts, styles, SVG, navigation boilerplate + let text = html + .replace(//gi, " ") + .replace(//gi, " ") + .replace(//gi, " ") + .replace(//gi, " ") + .replace(//gi, " ") + .replace(//gi, " ") + .replace(//gi, " ") + .replace(//gi, " ") + // Block elements → newlines + .replace(/<\/?(p|div|section|article|h[1-6]|li|br|hr|tr|td|th|blockquote|pre)[^>]*>/gi, "\n") + // Strip all remaining tags + .replace(/<[^>]{0,500}>/g, " ") + // Decode common entities + .replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">") + .replace(/"/g, '"').replace(/'/g, "'").replace(/ /g, " ") + .replace(/&[a-z]+;/gi, " ") + // Collapse whitespace + .split("\n").map(l => l.trim()).filter(l => l.length > 30).join("\n") + .replace(/\n{3,}/g, "\n\n") + .trim(); + + // Limit to ~5000 chars — enough for LLM context, not so much it blows the prompt + if (text.length > 5000) { + text = text.slice(0, 5000) + "\n[… content truncated for LLM context …]"; + } + + return { pageTitle, text }; +} + +// POST /api/blog/from-url — Fetch URL, extract content, generate a blog from it +blogRouter.post("/from-url", async (req: Request, res: Response) => { + const { url, topic } = req.body as { url?: string; topic?: string }; + + if (!url) { + res.status(400).json({ success: false, error: "url ist erforderlich" }); + return; + } + + // Validate URL — must be http/https + let parsedUrl: URL; + try { + parsedUrl = new URL(url); + if (!["http:", "https:"].includes(parsedUrl.protocol)) throw new Error("bad protocol"); + } catch { + res.status(400).json({ success: false, error: "Ungültige URL — muss http:// oder https:// beginnen" }); + return; + } + + const selectedTopic = topic || "technology_deep_dive"; + const templates = BLOG_TEMPLATES[selectedTopic]; + if (!templates) { + res.status(400).json({ success: false, error: `Ungültiger Blog-Typ. Gültig: ${Object.keys(BLOG_TEMPLATES).join(", ")}` }); + return; + } + + try { + // Fetch page content server-side (no CORS issues) + const { pageTitle, text: extractedText } = await fetchUrlContent(url); + + console.log(`Blog from-url: fetched "${pageTitle}" from ${parsedUrl.hostname} (${extractedText.length} chars)`); + + // Build a rich additional_context from the URL content + const additionalContext = + `SOURCE URL: ${url}\n` + + `PAGE TITLE: ${pageTitle}\n` + + `HOSTNAME: ${parsedUrl.hostname}\n` + + `\n--- EXTRACTED PAGE CONTENT ---\n` + + `${extractedText}\n` + + `--- END PAGE CONTENT ---\n` + + `\nIMPORTANT: Use this content as factual background and editorial direction. ` + + `Do NOT copy sentences verbatim. Write a Flexoptix-voice blog article using these facts and insights.`; + + const title = pageTitle || parsedUrl.hostname; + const template = templates[Math.floor(Math.random() * templates.length)]; + const keywords = [ + ...template.seo_keywords, + "optical transceiver", "networking", + ].filter(Boolean); + + const data = await gatherBlogData(keywords, selectedTopic); + const draftContent = generateTemplateDraft(title, selectedTopic, data); + const wordCount = draftContent.split(/\s+/).length; + const initialIssues = validateArticle(draftContent); + + const result = await pool.query( + `INSERT INTO blog_drafts (title, topic, target_audience, outline, draft_content, data_sources, status, generated_by, word_count, seo_keywords) + VALUES ($1, $2, $3, $4, $5, $6, 'draft', $7, $8, $9) + RETURNING id, created_at`, + [ + title, + selectedTopic, + template.target_audience, + JSON.stringify({ generation_method: "from-url", source_url: url, quality_issues: initialIssues }), + draftContent, + JSON.stringify({ source_url: url, extracted_chars: extractedText.length, products: data.products.length, news: data.news.length }), + "tip-blog-engine-url", + wordCount, + template.seo_keywords, + ], + ); + + const draftId = result.rows[0].id; + + // Launch LLM pipeline with URL content as context + const health = await checkHealth().catch(() => ({ ok: false, model: "", error: "unreachable" })); + let llmStarted = false; + if (health.ok) { + llmStarted = true; + enqueueLlmPipeline(draftId, title, selectedTopic, template.target_audience, data, additionalContext).catch((err) => { + console.error(`Blog from-url LLM pipeline error: ${(err as Error).message}`); + }); + } + + res.json({ + success: true, + source_url: url, + page_title: pageTitle, + extracted_chars: extractedText.length, + draft: { + id: draftId, + title, + topic: selectedTopic, + target_audience: template.target_audience, + word_count: wordCount, + generation_method: "from-url", + llm_enhancing: llmStarted, + created_at: result.rows[0].created_at, + }, + }); + } catch (err) { + const msg = (err as Error).message; + console.error(`Blog from-url error for ${url}: ${msg}`); + res.status(500).json({ + success: false, + error: `URL konnte nicht verarbeitet werden: ${msg}`, + }); + } +}); + // GET /api/blog — List all drafts blogRouter.get("/", async (_req: Request, res: Response) => { try { @@ -1557,6 +1722,17 @@ blogRouter.post("/llm/reset-queue", (_req: Request, res: Response) => { res.json({ success: true, message: "Ollama queue reset — stuck requests cleared" }); }); +// POST /api/blog/llm/refresh-discovery — Force auto-discovery to pick up newly-trained fo-blog-v* versions +// Useful right after Magatama adopts a new fo-blog-vN model. Otherwise runs every 10 min by itself. +blogRouter.post("/llm/refresh-discovery", async (_req: Request, res: Response) => { + try { + const active = await refreshLlmAutoDiscovery(); + res.json({ success: true, active, message: `Auto-discovery refreshed. Active: ${active.provider}${active.ollamaModel ? ` (${active.ollamaModel})` : ""}` }); + } catch (err) { + res.status(500).json({ success: false, error: (err as Error).message }); + } +}); + // POST /api/blog/llm/switch — Switch active LLM provider at runtime (no restart needed) // Body: { provider: "claude-code" | "anthropic" | "ollama", model?: "fo-blog-v10" | "qwen2.5:14b" | ... } blogRouter.post("/llm/switch", (req: Request, res: Response) => { diff --git a/packages/dashboard/index.html b/packages/dashboard/index.html index 3bb04b1..2a4763b 100644 --- a/packages/dashboard/index.html +++ b/packages/dashboard/index.html @@ -1439,6 +1439,40 @@ + +
+
🔗 Blog aus URL generieren
+
Link eingeben → Inhalt wird automatisch extrahiert → BlogLLM schreibt einen Artikel daraus
+
+
+ + +
+
+ + +
+
+
+ + +
+
+
@@ -5503,6 +5537,48 @@ function generateBlogManual() { }).catch(function(err) { if (err.message !== 'Unauthorized') showToast('Netzwerkfehler', err.message, true); }); } +function generateBlogFromUrl() { + var url = (document.getElementById('blog-from-url-input').value || '').trim(); + var topic = document.getElementById('blog-from-url-topic').value || 'technology_deep_dive'; + var btn = document.getElementById('blog-from-url-btn'); + var status = document.getElementById('blog-from-url-status'); + + if (!url) { showToast('Fehler', 'Bitte eine URL eingeben', true); return; } + try { new URL(url); } catch (e) { showToast('Fehler', 'Ungültige URL', true); return; } + + btn.disabled = true; + btn.textContent = '⏳ Fetching…'; + status.textContent = 'Seite wird abgerufen…'; + + var token = window.loadToken ? window.loadToken() : ''; + fetch(API + '/api/blog/from-url', { + method: 'POST', + headers: { 'Content-Type': 'application/json', 'Authorization': 'Bearer ' + token }, + body: JSON.stringify({ url: url, topic: topic }) + }) + .then(function(r) { if (r.status === 401) { handleAuthError(401); throw new Error('Unauthorized'); } return r.json(); }) + .then(function(data) { + btn.disabled = false; + btn.textContent = '🔗 Aus URL generieren'; + if (data.success) { + status.textContent = '✓ ' + data.extracted_chars + ' Zeichen extrahiert — Pipeline läuft (~10 min)'; + showToast('✓ Pipeline gestartet', (data.page_title || url) + ' — wird in ~10 min fertig'); + document.getElementById('blog-from-url-input').value = ''; + loadBlogDrafts(); + pollBlogLlm(data.draft.id, 0); + } else { + status.textContent = '✗ Fehler: ' + (data.error || 'Unbekannt'); + showToast('Fehler', data.error || 'Unbekannter Fehler', true); + } + }) + .catch(function(err) { + btn.disabled = false; + btn.textContent = '🔗 Aus URL generieren'; + status.textContent = ''; + if (err.message !== 'Unauthorized') showToast('Netzwerkfehler', err.message, true); + }); +} + function pollBlogLlm(id, attempt) { if (attempt > 60) return; // max 10 min (60 × 10s) setTimeout(function() {