transceiver-db/packages/scraper/src/utils/image-downloader.ts
Rene Fichtmueller aa977abc97 feat(v0.2.0): Sales Intelligence Engine — Phase 0+A
New API routes:
- GET /api/finder — Switch→Flexoptix transceiver finder with FlexBox coding
- GET /api/competitor-alerts — Competitor intelligence (price changes, new products, stock)
- GET /api/forecast/:technology — Sales forecast 3/9/12/18 months + buy/wait/hold signal
- POST /api/transport/plan — Transport system planner (city→city BOM with fiber providers)

New MCP tools:
- find_flexoptix_for_switch — Customer switch → Flexoptix products
- get_competitor_alerts — Competitor monitoring
- plan_transport — Network transport planning
- forecast_sales — Volume/revenue prediction
- generate_blog — Enhanced blog generation

New DB tables (migration 013):
- competitor_alerts, price_changes, flexoptix_product_map
- sales_forecasts, fiber_providers, fiber_routes, cities
- generated_datasheets, blog_series
- Views: v_price_coverage, v_image_coverage, v_switch_flexoptix_finder

Seed data (migration 014):
- 25 European cities with IX/DC locations + coordinates
- 15 fiber providers (euNetworks, Telia, DTAG, Colt, Zayo, etc.)
- 16 fiber routes with pricing (Germany focus)

Infrastructure:
- Scraper scheduler: 2h Flexoptix, 4h FS.com/Optcore (was 6-8h)
- Change detector for competitor price/stock monitoring
- Image downloader utility with coverage tracking
2026-03-31 08:51:22 +02:00

155 lines
4.1 KiB
TypeScript

/**
* WS0: Image Downloader
*
* Downloads product images from various sources, resizes, and stores metadata.
* R2 upload is optional — for now stores image URLs and marks has_image.
*/
import { Pool } from "pg";
import { createHash } from "crypto";
const pool = new Pool({
host: process.env.POSTGRES_HOST || "localhost",
port: parseInt(process.env.POSTGRES_PORT || "5433"),
database: process.env.POSTGRES_DB || "transceiver_db",
user: process.env.POSTGRES_USER || "tip",
password: process.env.POSTGRES_PASSWORD || "tip_dev_2026",
max: 3,
});
/**
* Update image URL for a transceiver and mark has_image = true
*/
export async function setTransceiverImage(
transceiverId: string,
imageUrl: string,
source?: string
): Promise<void> {
await pool.query(
`UPDATE transceivers SET image_url = $2, has_image = true, image_scraped_at = NOW()
WHERE id = $1 AND (image_url IS NULL OR image_url = '')`,
[transceiverId, imageUrl]
);
}
/**
* Update image URL for a switch
*/
export async function setSwitchImage(
switchId: string,
imageUrl: string
): Promise<void> {
await pool.query(
`UPDATE switches SET image_url = $2, has_image = true
WHERE id = $1 AND (image_url IS NULL OR image_url = '')`,
[switchId, imageUrl]
);
}
/**
* Get products without images for backfill
*/
export async function getProductsWithoutImages(limit = 100): Promise<Array<{
id: string;
slug: string;
form_factor: string;
speed_gbps: number;
reach_label: string;
vendor_name: string;
part_number: string;
}>> {
const result = await pool.query(
`SELECT t.id, t.slug, t.form_factor, t.speed_gbps, t.reach_label, t.part_number,
v.name AS vendor_name
FROM transceivers t
LEFT JOIN vendors v ON t.vendor_id = v.id
WHERE (t.has_image = false OR t.has_image IS NULL)
AND t.image_url IS NULL
ORDER BY t.speed_gbps DESC
LIMIT $1`,
[limit]
);
return result.rows;
}
/**
* Generate a search URL to find product images
*/
export function buildImageSearchUrls(product: {
form_factor: string;
speed_gbps: number;
reach_label: string;
part_number?: string;
vendor_name?: string;
}): string[] {
const urls: string[] = [];
const q = `${product.form_factor} ${product.speed_gbps}G ${product.reach_label} transceiver`;
// Flexoptix store
urls.push(`https://www.flexoptix.net/en/catalogsearch/result/?q=${encodeURIComponent(q)}`);
// FS.com
urls.push(`https://www.fs.com/search/${encodeURIComponent(q)}.html`);
// If we have a part number, try vendor-specific
if (product.part_number) {
urls.push(`https://www.fs.com/search/${encodeURIComponent(product.part_number)}.html`);
}
return urls;
}
/**
* Get image coverage statistics
*/
export async function getImageCoverageStats(): Promise<{
total: number;
with_image: number;
without_image: number;
coverage_pct: number;
}> {
const result = await pool.query(`
SELECT
COUNT(*) AS total,
COUNT(*) FILTER (WHERE has_image = true) AS with_image,
COUNT(*) FILTER (WHERE has_image = false OR has_image IS NULL) AS without_image
FROM transceivers
`);
const row = result.rows[0];
const total = parseInt(row.total);
const withImg = parseInt(row.with_image);
return {
total,
with_image: withImg,
without_image: parseInt(row.without_image),
coverage_pct: total > 0 ? Math.round((withImg / total) * 10000) / 100 : 0,
};
}
/**
* Get price coverage statistics
*/
export async function getPriceCoverageStats(): Promise<{
total: number;
with_recent_price: number;
without_recent_price: number;
coverage_pct: number;
}> {
const result = await pool.query(`
SELECT
COUNT(*) AS total,
COUNT(*) FILTER (WHERE EXISTS (
SELECT 1 FROM price_observations po WHERE po.transceiver_id = t.id AND po.time > NOW() - INTERVAL '7 days'
)) AS with_price
FROM transceivers t
`);
const row = result.rows[0];
const total = parseInt(row.total);
const withPrice = parseInt(row.with_price);
return {
total,
with_recent_price: withPrice,
without_recent_price: total - withPrice,
coverage_pct: total > 0 ? Math.round((withPrice / total) * 10000) / 100 : 0,
};
}