Phase 0 - Foundation: - Restructure into npm workspace monorepo (packages/core, api, scraper) - PostgreSQL 17 + TimescaleDB schema (15 tables incl. hypertables) - Docker Compose for local dev (PostgreSQL on 5433 + Qdrant) - Express 5 API on port 3200 with 6 routes - Seed script to migrate 159 transceivers + 42 standards from npm package - Erik server setup script + PM2 ecosystem config Phase 1 - Scraper Engine: - Crawlee + Playwright framework with pg-boss scheduler - FS.com scraper (PlaywrightCrawler, anti-bot workaround) - Optcore.net scraper (WP REST API enumeration + PlaywrightCrawler) - Uses /wp-json/wp/v2/product to get 2000+ product URLs - Playwright renders individual product pages for price extraction - Cisco TMG Matrix scraper (compatibility data) - News RSS aggregator (optics.org, SPIE, Network World, Nature Photonics) - Keyword relevance scoring for transceiver/fiber topics - xml2js with malformed XML sanitization - SHA-256 content hashing for change detection (skip unchanged records) - pg-boss v10 with explicit queue creation before scheduling
281 lines
9.0 KiB
TypeScript
281 lines
9.0 KiB
TypeScript
/**
|
|
* Seed PostgreSQL from the existing @tip/core npm package data.
|
|
* Imports: 159 transceivers, 42 standards, 12 competitors, 11 breakouts.
|
|
*/
|
|
import { Pool } from "pg";
|
|
import { config } from "dotenv";
|
|
import { join } from "path";
|
|
|
|
config();
|
|
|
|
// Dynamic import of core package (ESM compat)
|
|
async function loadCoreData() {
|
|
const corePath = join(__dirname, "..", "packages", "core", "src");
|
|
|
|
// We need to use tsx to run this, so we can import .ts files directly
|
|
const { transceivers } = await import(join(corePath, "database"));
|
|
const { standards } = await import(join(corePath, "standards"));
|
|
const { competitors } = await import(join(corePath, "market"));
|
|
const { breakouts } = await import(join(corePath, "breakouts"));
|
|
|
|
return { transceivers, standards, competitors, breakouts };
|
|
}
|
|
|
|
const pool = new Pool({
|
|
host: process.env.POSTGRES_HOST || "localhost",
|
|
port: parseInt(process.env.POSTGRES_PORT || "5432"),
|
|
database: process.env.POSTGRES_DB || "transceiver_db",
|
|
user: process.env.POSTGRES_USER || "tip",
|
|
password: process.env.POSTGRES_PASSWORD || "tip_dev_2026",
|
|
});
|
|
|
|
function slugify(text: string): string {
|
|
return text
|
|
.toLowerCase()
|
|
.replace(/[^a-z0-9]+/g, "-")
|
|
.replace(/^-|-$/g, "");
|
|
}
|
|
|
|
async function seedVendors(client: any, competitors: readonly any[]): Promise<Map<string, string>> {
|
|
console.log("\nSeeding vendors...");
|
|
const vendorIdMap = new Map<string, string>();
|
|
|
|
// Insert Flexoptix first as primary vendor
|
|
const flexResult = await client.query(
|
|
`INSERT INTO vendors (name, slug, type, headquarters, country, website, shop_url, is_competitor, market_position, specialties, strengths, weaknesses)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
|
|
ON CONFLICT (name) DO UPDATE SET updated_at = NOW()
|
|
RETURNING id`,
|
|
[
|
|
"FLEXOPTIX",
|
|
"flexoptix",
|
|
"compatible",
|
|
"Mainz, Germany",
|
|
"Germany",
|
|
"https://www.flexoptix.net",
|
|
"https://www.flexoptix.net/en/",
|
|
false,
|
|
"Premium compatible optics with FlexBox programmer, 300+ vendor support",
|
|
["compatible optics", "FlexBox", "all speeds", "premium quality"],
|
|
["FlexBox programmer", "300+ vendor support", "lifetime warranty", "German engineering"],
|
|
[],
|
|
]
|
|
);
|
|
vendorIdMap.set("FLEXOPTIX", flexResult.rows[0].id);
|
|
console.log(" Inserted: FLEXOPTIX (primary)");
|
|
|
|
for (const comp of competitors) {
|
|
if (comp.name === "FLEXOPTIX") continue; // Already inserted
|
|
|
|
const typeMap: Record<string, string> = {
|
|
OEM: "oem",
|
|
Whitebox: "compatible",
|
|
Manufacturer: "manufacturer",
|
|
Distributor: "distributor",
|
|
Compatible: "compatible",
|
|
};
|
|
|
|
const result = await client.query(
|
|
`INSERT INTO vendors (name, slug, type, headquarters, is_competitor, market_position, specialties, strengths, weaknesses)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
|
|
ON CONFLICT (name) DO UPDATE SET updated_at = NOW()
|
|
RETURNING id`,
|
|
[
|
|
comp.name,
|
|
slugify(comp.name),
|
|
typeMap[comp.type] || "oem",
|
|
comp.headquarters,
|
|
true,
|
|
comp.marketPosition,
|
|
[...comp.formFactorsOffered, ...comp.speedTiersOffered],
|
|
comp.strengths,
|
|
comp.weaknesses,
|
|
]
|
|
);
|
|
vendorIdMap.set(comp.name, result.rows[0].id);
|
|
console.log(` Inserted: ${comp.name}`);
|
|
}
|
|
|
|
console.log(` Total vendors: ${vendorIdMap.size}`);
|
|
return vendorIdMap;
|
|
}
|
|
|
|
async function seedStandards(client: any, standards: readonly any[]): Promise<Map<string, string>> {
|
|
console.log("\nSeeding standards...");
|
|
const standardIdMap = new Map<string, string>();
|
|
|
|
for (const std of standards) {
|
|
const body = std.ieeeReference?.startsWith("IEEE")
|
|
? "IEEE"
|
|
: std.standard.includes("ZR")
|
|
? "OIF"
|
|
: std.ieeeReference
|
|
? "IEEE"
|
|
: "de_facto";
|
|
|
|
const result = await client.query(
|
|
`INSERT INTO standards (name, ieee_reference, body, speed, speed_gbps, lanes, lane_rate, modulation, fiber_type, wavelength, max_reach_meters, max_reach_label, connector, fec_required, form_factors, year_ratified, notes)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17)
|
|
ON CONFLICT (name) DO UPDATE SET notes = EXCLUDED.notes
|
|
RETURNING id`,
|
|
[
|
|
std.standard,
|
|
std.ieeeReference || null,
|
|
body,
|
|
std.speed,
|
|
parseFloat(std.speed) || null,
|
|
std.lanes,
|
|
std.laneRate,
|
|
std.modulation,
|
|
std.fiberType,
|
|
std.wavelength,
|
|
std.maxReachMeters,
|
|
std.maxReachLabel,
|
|
std.connector,
|
|
std.fecRequired,
|
|
std.formFactors,
|
|
std.yearRatified,
|
|
std.notes,
|
|
]
|
|
);
|
|
standardIdMap.set(std.standard, result.rows[0].id);
|
|
}
|
|
|
|
console.log(` Total standards: ${standardIdMap.size}`);
|
|
return standardIdMap;
|
|
}
|
|
|
|
async function seedTransceivers(
|
|
client: any,
|
|
transceivers: readonly any[],
|
|
standardIdMap: Map<string, string>
|
|
): Promise<void> {
|
|
console.log("\nSeeding transceivers...");
|
|
let count = 0;
|
|
|
|
for (const t of transceivers) {
|
|
const standardId = standardIdMap.get(t.standard) || null;
|
|
|
|
// Detect WDM type
|
|
let wdmType = null;
|
|
if (t.category === "CWDM" || t.wavelengths?.includes("CWDM")) wdmType = "CWDM";
|
|
if (t.category === "DWDM" || t.wavelengths?.includes("DWDM") || t.standard?.includes("DWDM"))
|
|
wdmType = "DWDM";
|
|
|
|
// Detect coherent
|
|
const coherent =
|
|
t.category === "Coherent" ||
|
|
t.standard?.includes("ZR") ||
|
|
t.modulation?.includes("DP-") ||
|
|
false;
|
|
|
|
await client.query(
|
|
`INSERT INTO transceivers (
|
|
slug, standard_name, standard_id, ieee_reference, form_factor,
|
|
speed, speed_gbps, lanes, lane_rate, modulation,
|
|
reach_meters, reach_label, fiber_type, wavelengths, connector,
|
|
power_consumption_w, temp_range, category, price_tier, use_case,
|
|
vendor_compat, tags, generation, market_status, year_introduced,
|
|
breakout_capable, breakout_to, wdm_type, coherent
|
|
) VALUES (
|
|
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
|
|
$11, $12, $13, $14, $15, $16, $17, $18, $19, $20,
|
|
$21, $22, $23, $24, $25, $26, $27, $28, $29
|
|
) ON CONFLICT (slug) DO UPDATE SET updated_at = NOW()`,
|
|
[
|
|
t.id,
|
|
t.standard,
|
|
standardId,
|
|
t.ieeeReference || null,
|
|
t.formFactor,
|
|
t.speed,
|
|
t.speedGbps,
|
|
t.lanes || null,
|
|
t.laneRate || null,
|
|
t.modulation || null,
|
|
t.reachMeters,
|
|
t.reachLabel,
|
|
t.fiberType,
|
|
t.wavelengths,
|
|
t.connector,
|
|
t.powerConsumptionW,
|
|
t.tempRange,
|
|
t.category,
|
|
t.priceTier,
|
|
t.useCase,
|
|
JSON.stringify(t.vendors),
|
|
t.tags,
|
|
t.generation || null,
|
|
t.marketStatus || "Mainstream",
|
|
t.yearIntroduced || null,
|
|
t.breakoutCapable || false,
|
|
t.breakoutTo || null,
|
|
wdmType,
|
|
coherent,
|
|
]
|
|
);
|
|
count++;
|
|
}
|
|
|
|
console.log(` Total transceivers: ${count}`);
|
|
}
|
|
|
|
async function seedBreakouts(client: any, breakouts: readonly any[]): Promise<void> {
|
|
console.log("\nSeeding breakouts...");
|
|
|
|
for (const b of breakouts) {
|
|
await client.query(
|
|
`INSERT INTO breakouts (slug, from_standard, to_standard, form_factor, description, cable_type, max_length, speed_per_lane)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
|
ON CONFLICT (slug) DO UPDATE SET from_standard = $2`,
|
|
[b.id, b.from, b.to, b.formFactor, b.description, b.cableType, b.maxLength, b.speedPerLane]
|
|
);
|
|
}
|
|
|
|
console.log(` Total breakouts: ${breakouts.length}`);
|
|
}
|
|
|
|
async function main(): Promise<void> {
|
|
console.log("=== TIP Seed: Importing from @tip/core ===\n");
|
|
|
|
const { transceivers, standards, competitors, breakouts } = await loadCoreData();
|
|
|
|
console.log(`Source data: ${transceivers.length} transceivers, ${standards.length} standards, ${competitors.length} competitors, ${breakouts.length} breakouts`);
|
|
|
|
const client = await pool.connect();
|
|
try {
|
|
await client.query("BEGIN");
|
|
|
|
const vendorIdMap = await seedVendors(client, competitors);
|
|
const standardIdMap = await seedStandards(client, standards);
|
|
await seedTransceivers(client, transceivers, standardIdMap);
|
|
await seedBreakouts(client, breakouts);
|
|
|
|
await client.query("COMMIT");
|
|
console.log("\n=== Seed completed successfully ===");
|
|
|
|
// Print summary
|
|
const counts = await client.query(`
|
|
SELECT
|
|
(SELECT COUNT(*) FROM vendors) as vendors,
|
|
(SELECT COUNT(*) FROM standards) as standards,
|
|
(SELECT COUNT(*) FROM transceivers) as transceivers,
|
|
(SELECT COUNT(*) FROM breakouts) as breakouts
|
|
`);
|
|
console.log("\nDatabase summary:");
|
|
console.log(` Vendors: ${counts.rows[0].vendors}`);
|
|
console.log(` Standards: ${counts.rows[0].standards}`);
|
|
console.log(` Transceivers: ${counts.rows[0].transceivers}`);
|
|
console.log(` Breakouts: ${counts.rows[0].breakouts}`);
|
|
} catch (err) {
|
|
await client.query("ROLLBACK");
|
|
console.error("\nSeed failed:", err);
|
|
process.exit(1);
|
|
} finally {
|
|
client.release();
|
|
await pool.end();
|
|
}
|
|
}
|
|
|
|
main();
|