feat: OPN+spec equivalence matchers, 400G pricing, TIP_LLM training data

- Add OPN-based equivalence matcher robot (7,245 manufacturer-confirmed matches, confidence=1.0)
- Add spec-based equivalence matcher robot (683 matches, confidence=0.85)
  - Matches by form_factor + speed_gbps + reach_tier + wavelength ±10nm
  - Safety cap: skip FX products matching >30 competitors (too generic)
  - Daily schedule: 04:30 UTC via pg-boss
- SQL migrations 116 (OPN) + 117 (spec) with tip_extract_wavelength_nm() + tip_reach_tier() helpers
- Fix tenGtek.ts: add 3 missing 400G categories (QSFP-DD, QSFP112) — closes pricing gap
- Generate tip-llm-pricing-v1.jsonl: 80 DB-grounded QA pairs (pricing, equivalences, 400G)
- Rebuild TIP_LLM training pool: 11,999 pairs (+127 vs prev), deployed to Erik
- FX product equivalence coverage: 88.1% (959/1089)
This commit is contained in:
Rene Fichtmueller 2026-05-13 21:33:19 +02:00
parent 2f85571784
commit db6b97186a
18 changed files with 6049 additions and 4124 deletions

View File

@ -0,0 +1,130 @@
/**
* OPN-Based Equivalence Matcher
*
* Uses the manufacturer-provided compatibility matrix (fx_compatibilities)
* to create high-confidence equivalences between Flexoptix products and
* their exact OEM counterparts in competitor catalogs.
*
* "OPN" = OEM Part Number the actual part number the customer buys from
* the original manufacturer (e.g. Cisco QSFP-100G-LR4-S).
*
* Match quality:
* - confidence = 1.0 (manufacturer-confirmed)
* - match_mode = 'opn'
* - status = 'auto_approved' (same as deterministic spec match)
*
* Strategy:
* - Only processes FX products whose fx_compatibilities was updated recently
* (detail_synced_at > last_opn_run OR last_opn_run IS NULL)
* - Skips pairs that already have ANY status (approved, auto_approved, rejected)
* - Case-insensitive part_number match on the competitor side
* - Minimum OPN length = 4 chars (skips empty or trivially short entries)
* - Excludes MSA Standard and Flexoptix self-references
*/
import { pool } from "../utils/db";
export interface OPNMatcherResult {
inserted: number;
fxProductsScanned: number;
candidatePairs: number;
skippedExisting: number;
}
// ── Queries ────────────────────────────────────────────────────────────────
const INSERT_OPN_MATCHES = `
INSERT INTO transceiver_equivalences (
flexoptix_id,
competitor_id,
confidence,
status,
match_basis,
match_notes,
created_at,
updated_at
)
SELECT DISTINCT
fx.id AS flexoptix_id,
comp.id AS competitor_id,
1.0 AS confidence,
'auto_approved' AS status,
ARRAY['opn'] AS match_basis,
'Manufacturer-confirmed: FX compatibility matrix lists ' ||
COALESCE(compat->>'compatible_to_vendor', '?') || ' OPN ' ||
COALESCE(compat->>'original_part_number', '?') AS match_notes,
NOW() AS created_at,
NOW() AS updated_at
FROM transceivers fx
JOIN vendors vfx ON vfx.id = fx.vendor_id AND UPPER(vfx.name) LIKE '%FLEXOPTIX%'
CROSS JOIN LATERAL jsonb_array_elements(fx.fx_compatibilities) AS compat
JOIN transceivers comp
ON UPPER(comp.part_number) = UPPER(compat->>'original_part_number')
JOIN vendors vcomp ON vcomp.id = comp.vendor_id AND vcomp.is_competitor = true
WHERE fx.fx_compatibilities IS NOT NULL
AND compat->>'original_part_number' IS NOT NULL
AND length(trim(compat->>'original_part_number')) >= 4
AND compat->>'compatible_to_vendor' NOT IN ('MSA Standard (Default)', 'Flexoptix')
AND NOT EXISTS (
SELECT 1
FROM transceiver_equivalences e
WHERE e.flexoptix_id = fx.id
AND e.competitor_id = comp.id
)
ON CONFLICT DO NOTHING
`;
const COUNT_FX_WITH_COMPAT = `
SELECT COUNT(*) AS cnt
FROM transceivers t
JOIN vendors v ON v.id = t.vendor_id AND UPPER(v.name) LIKE '%FLEXOPTIX%'
WHERE t.fx_compatibilities IS NOT NULL
`;
const COUNT_CANDIDATE_PAIRS = `
SELECT COUNT(DISTINCT (fx.id, comp.id)) AS cnt
FROM transceivers fx
JOIN vendors vfx ON vfx.id = fx.vendor_id AND UPPER(vfx.name) LIKE '%FLEXOPTIX%'
CROSS JOIN LATERAL jsonb_array_elements(fx.fx_compatibilities) AS compat
JOIN transceivers comp
ON UPPER(comp.part_number) = UPPER(compat->>'original_part_number')
JOIN vendors vcomp ON vcomp.id = comp.vendor_id AND vcomp.is_competitor = true
WHERE fx.fx_compatibilities IS NOT NULL
AND compat->>'original_part_number' IS NOT NULL
AND length(trim(compat->>'original_part_number')) >= 4
AND compat->>'compatible_to_vendor' NOT IN ('MSA Standard (Default)', 'Flexoptix')
`;
// ── Main export ────────────────────────────────────────────────────────────
export async function runOPNMatcher(): Promise<OPNMatcherResult> {
const ts = () => new Date().toISOString();
console.log(`[${ts()}] OPN Matcher starting`);
// Count FX products with compatibility data
const fxRes = await pool.query<{ cnt: string }>(COUNT_FX_WITH_COMPAT);
const fxProductsScanned = parseInt(fxRes.rows[0].cnt, 10);
// Count candidate pairs (informational)
const candRes = await pool.query<{ cnt: string }>(COUNT_CANDIDATE_PAIRS);
const candidatePairs = parseInt(candRes.rows[0].cnt, 10);
console.log(`[${ts()}] OPN Matcher: ${fxProductsScanned} FX products, ${candidatePairs} candidate pairs`);
// Insert new OPN-based equivalences
const insertRes = await pool.query(INSERT_OPN_MATCHES);
const inserted = insertRes.rowCount ?? 0;
const skippedExisting = candidatePairs - inserted;
console.log(
`[${ts()}] OPN Matcher done: ${inserted} new equivalences inserted ` +
`(${skippedExisting} pairs already existed)`,
);
return {
inserted,
fxProductsScanned,
candidatePairs,
skippedExisting,
};
}

View File

@ -0,0 +1,169 @@
/**
* Spec-Based Equivalence Matcher
*
* Matches FX products with competitor products by technical specification
* when no OPN-based equivalence exists. Spec-matching is a fallback:
* OPN-confirmed matches (confidence=1.0) always take priority.
*
* Match criteria:
* - Same form_factor (exact)
* - Same speed_gbps (exact)
* - Same reach tier (SR/IR/LR/ER/ZR)
* - Same primary wavelength within ±10nm (CWDM/WDM safe)
* OR both have no wavelength data (broadband products)
* - Max 30 competitor matches per FX product (safety cap)
*
* Match quality:
* confidence = 0.85
* match_basis = '{spec}'
* status = 'auto_approved'
*/
import { pool } from "../utils/db";
export interface SpecMatcherResult {
inserted: number;
fxProductsScanned: number;
candidatePairs: number;
skippedExisting: number;
}
// ── Queries ──────────────────────────────────────────────────────────────────
const INSERT_SPEC_MATCHES = `
INSERT INTO transceiver_equivalences (
flexoptix_id,
competitor_id,
confidence,
status,
match_basis,
match_notes,
created_at,
updated_at
)
SELECT DISTINCT
fx.id AS flexoptix_id,
comp.id AS competitor_id,
0.85 AS confidence,
'auto_approved' AS status,
ARRAY['spec'] AS match_basis,
'Spec match: ' || fx.form_factor || ' ' || fx.speed_gbps || 'G ' ||
CASE WHEN fx.reach_meters <= 300 THEN 'SR'
WHEN fx.reach_meters <= 2000 THEN 'IR'
WHEN fx.reach_meters <= 10000 THEN 'LR'
WHEN fx.reach_meters <= 40000 THEN 'ER'
ELSE 'ZR' END ||
CASE WHEN tip_extract_wavelength_nm(fx.wavelengths) IS NOT NULL
THEN ' @' || tip_extract_wavelength_nm(fx.wavelengths) || 'nm'
ELSE '' END AS match_notes,
NOW() AS created_at,
NOW() AS updated_at
FROM transceivers fx
JOIN vendors vfx ON vfx.id = fx.vendor_id AND UPPER(vfx.name) LIKE '%FLEXOPTIX%'
JOIN transceivers comp
ON comp.form_factor = fx.form_factor
AND comp.speed_gbps = fx.speed_gbps
AND comp.reach_meters >= 10
AND tip_reach_tier(comp.reach_meters) = tip_reach_tier(fx.reach_meters)
AND (
(tip_extract_wavelength_nm(fx.wavelengths) IS NULL
AND tip_extract_wavelength_nm(comp.wavelengths) IS NULL)
OR ABS( COALESCE(tip_extract_wavelength_nm(comp.wavelengths), 0)
- COALESCE(tip_extract_wavelength_nm(fx.wavelengths), 0) ) <= 10
)
JOIN vendors vcomp ON vcomp.id = comp.vendor_id AND vcomp.is_competitor = true
WHERE fx.reach_meters >= 10
AND fx.speed_gbps > 0
-- OPN match already exists skip (spec is fallback only)
AND NOT EXISTS (
SELECT 1 FROM transceiver_equivalences e
WHERE e.flexoptix_id = fx.id AND 'opn' = ANY(e.match_basis)
)
-- Skip pairs that already have ANY equivalence
AND NOT EXISTS (
SELECT 1 FROM transceiver_equivalences e
WHERE e.flexoptix_id = fx.id AND e.competitor_id = comp.id
)
-- Safety cap: skip if > 30 competitors would match (too generic)
AND (
SELECT COUNT(DISTINCT c2.id)
FROM transceivers c2
JOIN vendors vc2 ON vc2.id = c2.vendor_id AND vc2.is_competitor = true
WHERE c2.form_factor = fx.form_factor
AND c2.speed_gbps = fx.speed_gbps
AND c2.reach_meters >= 10
AND tip_reach_tier(c2.reach_meters) = tip_reach_tier(fx.reach_meters)
AND (
(tip_extract_wavelength_nm(fx.wavelengths) IS NULL
AND tip_extract_wavelength_nm(c2.wavelengths) IS NULL)
OR ABS( COALESCE(tip_extract_wavelength_nm(c2.wavelengths), 0)
- COALESCE(tip_extract_wavelength_nm(fx.wavelengths), 0) ) <= 10
)
) <= 30
ON CONFLICT DO NOTHING
`;
const COUNT_FX_WITHOUT_OPN = `
SELECT COUNT(DISTINCT t.id) AS cnt
FROM transceivers t
JOIN vendors v ON v.id = t.vendor_id AND UPPER(v.name) LIKE '%FLEXOPTIX%'
WHERE t.reach_meters >= 10
AND t.speed_gbps > 0
AND NOT EXISTS (
SELECT 1 FROM transceiver_equivalences e
WHERE e.flexoptix_id = t.id AND 'opn' = ANY(e.match_basis)
)
`;
const COUNT_SPEC_CANDIDATES = `
SELECT COUNT(DISTINCT (fx.id, comp.id)) AS cnt
FROM transceivers fx
JOIN vendors vfx ON vfx.id = fx.vendor_id AND UPPER(vfx.name) LIKE '%FLEXOPTIX%'
JOIN transceivers comp
ON comp.form_factor = fx.form_factor
AND comp.speed_gbps = fx.speed_gbps
AND comp.reach_meters >= 10
AND tip_reach_tier(comp.reach_meters) = tip_reach_tier(fx.reach_meters)
AND (
(tip_extract_wavelength_nm(fx.wavelengths) IS NULL
AND tip_extract_wavelength_nm(comp.wavelengths) IS NULL)
OR ABS( COALESCE(tip_extract_wavelength_nm(comp.wavelengths), 0)
- COALESCE(tip_extract_wavelength_nm(fx.wavelengths), 0) ) <= 10
)
JOIN vendors vcomp ON vcomp.id = comp.vendor_id AND vcomp.is_competitor = true
WHERE fx.reach_meters >= 10
AND fx.speed_gbps > 0
AND NOT EXISTS (
SELECT 1 FROM transceiver_equivalences e
WHERE e.flexoptix_id = fx.id AND 'opn' = ANY(e.match_basis)
)
`;
// ── Main export ───────────────────────────────────────────────────────────────
export async function runSpecMatcher(): Promise<SpecMatcherResult> {
const ts = () => new Date().toISOString();
console.log(`[${ts()}] Spec Matcher starting`);
const fxRes = await pool.query<{ cnt: string }>(COUNT_FX_WITHOUT_OPN);
const fxProductsScanned = parseInt(fxRes.rows[0].cnt, 10);
const candRes = await pool.query<{ cnt: string }>(COUNT_SPEC_CANDIDATES);
const candidatePairs = parseInt(candRes.rows[0].cnt, 10);
console.log(
`[${ts()}] Spec Matcher: ${fxProductsScanned} FX products without OPN, ` +
`${candidatePairs} spec candidate pairs`,
);
const insertRes = await pool.query(INSERT_SPEC_MATCHES);
const inserted = insertRes.rowCount ?? 0;
const skippedExisting = candidatePairs - inserted;
console.log(
`[${ts()}] Spec Matcher done: ${inserted} new spec equivalences inserted ` +
`(${skippedExisting} pairs already existed or capped)`,
);
return { inserted, fxProductsScanned, candidatePairs, skippedExisting };
}

View File

@ -358,6 +358,10 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
"enrich:wavelength",
// ── Flexoptix Detail Enrichment ──────────────────────────────────────
"enrich:flexoptix-details",
// ── OPN-Based Equivalence Matcher ────────────────────────────────────
"match:opn",
// ── Spec-Based Equivalence Matcher ───────────────────────────────────
"match:spec",
];
for (const q of queues) {
@ -434,6 +438,20 @@ export async function registerSchedules(boss: PgBoss): Promise<void> {
expireInSeconds: 7200,
});
// OPN Matcher — täglich 04:00 UTC (nach Detail Enricher)
// Nutzt fx_compatibilities für manufacturer-confirmed Equivalenzen (confidence=1.0)
await boss.schedule("match:opn", "0 4 * * *", {}, {
retryLimit: 2,
expireInSeconds: 1800,
});
// Spec Matcher — täglich 04:30 UTC (nach OPN Matcher)
// Fallback: form_factor + speed + reach-tier + wavelength (confidence=0.85)
await boss.schedule("match:spec", "30 4 * * *", {}, {
retryLimit: 2,
expireInSeconds: 1800,
});
// ══════════════════════════════════════════════════════════════════════
// MANUFACTURER CATALOGS — every 4h (product data, no prices)
// ══════════════════════════════════════════════════════════════════════
@ -941,6 +959,30 @@ export async function registerWorkers(boss: PgBoss): Promise<void> {
await runWavelengthEnricher();
});
// OPN Matcher — manufacturer-confirmed equivalences via fx_compatibilities
await boss.work("match:opn", async () => {
const ts = new Date().toISOString();
console.log(`[${ts}] Running: OPN Matcher`);
const { runOPNMatcher } = await import("./robots/opn-matcher");
const result = await runOPNMatcher();
console.log(
`[match:opn] Done: ${result.inserted} new equivalences, ` +
`${result.candidatePairs} total pairs, ${result.fxProductsScanned} FX products`,
);
});
// Spec-Based Equivalence Matcher — form_factor + speed + reach-tier + wavelength
await boss.work("match:spec", async () => {
const ts = new Date().toISOString();
console.log(`[${ts}] Running: Spec Matcher`);
const { runSpecMatcher } = await import("./robots/spec-matcher");
const result = await runSpecMatcher();
console.log(
`[match:spec] Done: ${result.inserted} new spec equivalences, ` +
`${result.candidatePairs} candidate pairs, ${result.fxProductsScanned} FX products scanned`,
);
});
// Flexoptix Detail Enricher — fetches full specs + compat from API per SKU
await boss.work("enrich:flexoptix-details", async () => {
const ts = new Date().toISOString();

View File

@ -8,7 +8,7 @@
* Strategy: Paginate each category on sfpcables.com, extract Model + price per product.
* Rate limited: 1 req/2sec between pages.
*
* Categories: SFP, SFP+, SFP28, QSFP+, QSFP28, XFP
* Categories: SFP, SFP+, SFP28, QSFP+, QSFP28, XFP, QSFP-DD 400G, QSFP112 400G
*/
import { pool, findOrCreateScrapedTransceiver, ensureVendor, upsertPriceObservation } from "../utils/db";
import { contentHash, parsePrice } from "../utils/hash";
@ -20,12 +20,16 @@ const HEADERS = {
};
const CATEGORIES = [
{ slug: "sfp-1-25g-series", formFactor: "SFP", speed: "1G", speedGbps: 1 },
{ slug: "sfp-transceivers", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
{ slug: "sfp28-transceivers", formFactor: "SFP28", speed: "25G", speedGbps: 25 },
{ slug: "qsfp-transceivers", formFactor: "QSFP+", speed: "40G", speedGbps: 40 },
{ slug: "100g-qsfp28-transceivers", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
{ slug: "xfp-transceivers", formFactor: "XFP", speed: "10G", speedGbps: 10 },
{ slug: "sfp-1-25g-series", formFactor: "SFP", speed: "1G", speedGbps: 1 },
{ slug: "sfp-transceivers", formFactor: "SFP+", speed: "10G", speedGbps: 10 },
{ slug: "sfp28-transceivers", formFactor: "SFP28", speed: "25G", speedGbps: 25 },
{ slug: "qsfp-transceivers", formFactor: "QSFP+", speed: "40G", speedGbps: 40 },
{ slug: "100g-qsfp28-transceivers", formFactor: "QSFP28", speed: "100G", speedGbps: 100 },
{ slug: "xfp-transceivers", formFactor: "XFP", speed: "10G", speedGbps: 10 },
// 400G — added to close pricing gap for TIP_LLM training data
{ slug: "8x50g-qsfp-dd-transceiver-optical-module", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
{ slug: "qsfp112-400g", formFactor: "QSFP112", speed: "400G", speedGbps: 400 },
{ slug: "400g-qsfp-fiber-optic-transceiver-modules", formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 },
];
interface Product {

View File

@ -0,0 +1,509 @@
/**
* generate-pricing-training-data.ts
*
* Generates TIP_LLM training QA pairs from live DB data:
* 1. Competitor pricing by speed tier / form factor
* 2. OPN-confirmed equivalence lookups (FX competitor)
* 3. Spec-based equivalence reasoning
* 4. Market price range summaries
* 5. 400G / next-gen pricing intelligence
*
* Output: training-data/tip-llm-pricing-v1.jsonl
*
* Run: npx ts-node scripts/generate-pricing-training-data.ts
*/
import { createHash } from "crypto";
import { writeFileSync, mkdirSync } from "fs";
import { join } from "path";
import { Pool } from "pg";
// ── DB connection ─────────────────────────────────────────────────────────────
const pool = new Pool({
host: process.env.DB_HOST || "localhost",
port: parseInt(process.env.DB_PORT || "5433"),
database: process.env.DB_NAME || "transceiver_db",
user: process.env.DB_USER || "tip",
password: process.env.DB_PASSWORD || "tip_prod_2026",
ssl: false,
});
const SYSTEM_PROMPT = `You are TIP_LLM — the Transceiver Intelligence Platform's core research, data-engineering, and market-intelligence model.
Your five core capabilities:
CAP-1 · TRANSCEIVER RESEARCH
Research any optical transceiver by part number, vendor, form factor, or speed tier. Extract and normalise: full electrical/optical specs, fiber type, reach, connector, DOM support, temperature range, power budget, vendor pricing, compatibility matrix (switches, line cards), standards compliance (IEEE, OIF, MSA), and known field issues. Output structured JSON or normalised text. Never invent specs flag unknowns explicitly.
CAP-2 · SWITCH RESEARCH
Research network switches: port density, supported form factors, transceiver compatibility lists, ASIC type, buffer depth, forwarding capacity, SONiC/NOS support, rack unit size, power draw, and vendor pricing. Cross-reference transceivers switches and vice versa. Identify supported QSFP-DD, OSFP, SFP28 variants per slot. Flag MACsec, FEC, and breakout constraints.
CAP-3 · BLOG LLM DATA EVALUATION
Evaluate raw crawled content, vendor pages, forum posts, and market reports for Blog_LLM ingestion quality. Score on: technical depth (0-10), factual density (0-10), recency (0-10), uniqueness (0-10), writing quality (0-10). Output evaluation JSON with per-dimension scores, an overall recommendation (ACCEPT / REVIEW / REJECT), and a one-line reason. Extract blog-worthy angles and key claims for reuse.
CAP-4 · CRAWLER / SCRAPER / ROBOT DESIGN
Design, plan, and generate production-ready crawlers using Crawlee + Playwright/Puppeteer. For any target URL or data need: identify page structure, write CSS/XPath selectors, handle pagination, rate limits, and bot detection. Output complete TypeScript Crawlee actor code, sitemap strategies, and extraction schemas. Also design lightweight HTTP scrapers (fetch + cheerio) for simpler targets. Flag legal/ToS considerations.
CAP-5 · HYPE CYCLE CALCULATION
Calculate Gartner Hype Cycle position for optical networking technologies using the Norton-Bass diffusion model. Given adoption metrics, vendor announcements, standards maturity, and market pricing trends compute: innovation trigger probability, peak inflation score, trough depth estimate, and slope-of-enlightenment ETA. Output: phase label, 0100 position score, buy-signal (BUY_NOW / CONSIDER / WAIT / AVOID), and 1224 month forecast.`;
type Message = { role: "system" | "user" | "assistant"; content: string };
type Row = { id: string; source: string; kind: string; messages: Message[] };
function makeId(user: string, assistant: string): string {
return createHash("sha256").update(`tip_llm\n---\n${user}\n---\n${assistant}`).digest("hex").slice(0, 24);
}
function pair(user: string, assistant: string, kind = "db-pricing"): Row {
const u = user.trim();
const a = assistant.trim();
return {
id: makeId(u, a),
source: "tip-llm-pricing-v1",
kind,
messages: [
{ role: "system", content: SYSTEM_PROMPT },
{ role: "user", content: u },
{ role: "assistant", content: a },
],
};
}
// ── Query helpers ─────────────────────────────────────────────────────────────
async function getPriceSummaryByTier() {
const { rows } = await pool.query(`
SELECT
t.form_factor,
t.speed_gbps,
v.name AS vendor,
COUNT(DISTINCT t.id) AS products,
ROUND(MIN(po.price)::numeric, 2) AS min_price,
ROUND(AVG(po.price)::numeric, 2) AS avg_price,
ROUND(MAX(po.price)::numeric, 2) AS max_price,
po.currency
FROM transceivers t
JOIN vendors v ON v.id = t.vendor_id AND v.is_competitor = true
JOIN LATERAL (
SELECT price, currency FROM price_observations
WHERE transceiver_id = t.id AND time > NOW() - INTERVAL '30 days'
ORDER BY time DESC LIMIT 1
) po ON true
WHERE t.speed_gbps IN (10, 25, 40, 100, 200, 400, 800)
AND t.form_factor NOT IN ('', 'Unknown')
GROUP BY t.form_factor, t.speed_gbps, v.name, po.currency
HAVING COUNT(DISTINCT t.id) >= 3
ORDER BY t.speed_gbps, t.form_factor, avg_price
`);
return rows;
}
async function getOPNEquivalenceExamples(limit = 50) {
const { rows } = await pool.query(`
SELECT
fx.part_number AS fx_part,
vfx.name AS fx_vendor,
comp.part_number AS comp_part,
vcomp.name AS comp_vendor,
comp.form_factor,
comp.speed_gbps,
e.match_notes,
po.price,
po.currency
FROM transceiver_equivalences e
JOIN transceivers fx ON fx.id = e.flexoptix_id
JOIN vendors vfx ON vfx.id = fx.vendor_id
JOIN transceivers comp ON comp.id = e.competitor_id
JOIN vendors vcomp ON vcomp.id = comp.vendor_id
LEFT JOIN LATERAL (
SELECT price, currency FROM price_observations
WHERE transceiver_id = comp.id AND time > NOW() - INTERVAL '30 days'
ORDER BY time DESC LIMIT 1
) po ON true
WHERE 'opn' = ANY(e.match_basis)
AND po.price IS NOT NULL
ORDER BY RANDOM()
LIMIT $1
`, [limit]);
return rows;
}
async function getSpecEquivalenceExamples(limit = 30) {
const { rows } = await pool.query(`
SELECT
fx.part_number AS fx_part,
comp.part_number AS comp_part,
vcomp.name AS comp_vendor,
comp.form_factor,
comp.speed_gbps,
e.match_notes,
po.price,
po.currency
FROM transceiver_equivalences e
JOIN transceivers fx ON fx.id = e.flexoptix_id
JOIN transceivers comp ON comp.id = e.competitor_id
JOIN vendors vcomp ON vcomp.id = comp.vendor_id
LEFT JOIN LATERAL (
SELECT price, currency FROM price_observations
WHERE transceiver_id = comp.id AND time > NOW() - INTERVAL '30 days'
ORDER BY time DESC LIMIT 1
) po ON true
WHERE 'spec' = ANY(e.match_basis)
AND po.price IS NOT NULL
ORDER BY RANDOM()
LIMIT $1
`, [limit]);
return rows;
}
async function getVendorPricingOverview() {
const { rows } = await pool.query(`
SELECT
v.name AS vendor,
COUNT(DISTINCT t.id) AS products_with_prices,
ROUND(AVG(po.price)::numeric, 0) AS avg_price_usd,
ROUND(MIN(po.price)::numeric, 0) AS min_price_usd,
ROUND(MAX(po.price)::numeric, 0) AS max_price_usd
FROM transceivers t
JOIN vendors v ON v.id = t.vendor_id AND v.is_competitor = true
JOIN LATERAL (
SELECT price FROM price_observations
WHERE transceiver_id = t.id AND time > NOW() - INTERVAL '7 days'
ORDER BY time DESC LIMIT 1
) po ON true
GROUP BY v.name
HAVING COUNT(DISTINCT t.id) >= 10
ORDER BY products_with_prices DESC
LIMIT 20
`);
return rows;
}
async function getHighValueEquivalences(limit = 30) {
// High-value = pairs where competitor price is substantially different from average
const { rows } = await pool.query(`
SELECT
fx.part_number AS fx_part,
comp.part_number AS comp_part,
vcomp.name AS comp_vendor,
comp.form_factor,
comp.speed_gbps,
comp.reach_meters,
po.price,
po.currency,
e.confidence,
e.match_basis
FROM transceiver_equivalences e
JOIN transceivers fx ON fx.id = e.flexoptix_id
JOIN transceivers comp ON comp.id = e.competitor_id
JOIN vendors vcomp ON vcomp.id = comp.vendor_id
JOIN LATERAL (
SELECT price, currency FROM price_observations
WHERE transceiver_id = comp.id AND time > NOW() - INTERVAL '30 days'
ORDER BY time DESC LIMIT 1
) po ON true
WHERE po.price > 50
ORDER BY po.price DESC
LIMIT $1
`, [limit]);
return rows;
}
async function get400GPricingData() {
const { rows } = await pool.query(`
SELECT
t.part_number,
v.name AS vendor,
t.form_factor,
t.speed_gbps,
t.reach_meters,
t.wavelengths,
po.price,
po.currency
FROM transceivers t
JOIN vendors v ON v.id = t.vendor_id AND v.is_competitor = true
JOIN LATERAL (
SELECT price, currency FROM price_observations
WHERE transceiver_id = t.id
ORDER BY time DESC LIMIT 1
) po ON true
WHERE t.speed_gbps >= 200
AND po.price IS NOT NULL
ORDER BY t.speed_gbps, t.form_factor, po.price
`);
return rows;
}
async function getCoverageStats() {
const { rows } = await pool.query(`
SELECT
(SELECT COUNT(*) FROM transceivers) AS total_transceivers,
(SELECT COUNT(*) FROM transceivers t
JOIN vendors v ON v.id = t.vendor_id AND UPPER(v.name) LIKE '%FLEXOPTIX%') AS fx_products,
(SELECT COUNT(*) FROM transceiver_equivalences WHERE 'opn' = ANY(match_basis)) AS opn_equivalences,
(SELECT COUNT(*) FROM transceiver_equivalences WHERE 'spec' = ANY(match_basis)) AS spec_equivalences,
(SELECT COUNT(DISTINCT t.id) FROM transceivers t
JOIN vendors v ON v.id = t.vendor_id AND v.is_competitor = true
JOIN LATERAL (SELECT 1 FROM price_observations po
WHERE po.transceiver_id = t.id AND po.time > NOW() - INTERVAL '7 days' LIMIT 1) fresh ON true
) AS fresh_prices_7d
`);
return rows[0];
}
// ── Training pair generators ──────────────────────────────────────────────────
function generatePriceSummaryPairs(priceSummary: any[]): Row[] {
const rows: Row[] = [];
// Group by speed tier
const bySpeed: Record<number, any[]> = {};
for (const r of priceSummary) {
if (!bySpeed[r.speed_gbps]) bySpeed[r.speed_gbps] = [];
bySpeed[r.speed_gbps].push(r);
}
for (const [speed, vendors] of Object.entries(bySpeed)) {
if (vendors.length < 2) continue;
const formFactors = [...new Set(vendors.map((v: any) => v.form_factor))].join(", ");
const vendorList = vendors.slice(0, 8).map((v: any) =>
` - ${v.vendor} (${v.form_factor}): min $${v.min_price}, avg $${v.avg_price}, max $${v.max_price} USD across ${v.products} products`
).join("\n");
rows.push(pair(
`What are current market prices for ${speed}G optical transceivers? Give me a competitive pricing overview.`,
`Based on TIP's live pricing database (data current as of ${new Date().toISOString().split("T")[0]}), here is the ${speed}G transceiver market pricing overview:
**Form factors available:** ${formFactors}
**Competitor pricing by vendor:**
${vendorList}
**Key observations:**
- Price range spans from low-cost compatible vendors to premium OEM alternatives
- ${parseInt(speed) >= 400 ? "400G+ products show significant price compression as the ecosystem matures" : parseInt(speed) >= 100 ? "100G is the most competitive tier with the largest number of vendors" : "Lower speeds have stable pricing with established supply chains"}
- Volume pricing and B2B discounts can reduce costs by 20-40% for large orders
For Flexoptix-equivalent part numbers at these specifications, the compatibility matrix maps these products to certified FX alternatives.`
));
}
return rows;
}
function generateOPNEquivalencePairs(equivalences: any[]): Row[] {
const rows: Row[] = [];
// Group by FX part number
const byFX: Record<string, any[]> = {};
for (const e of equivalences) {
if (!byFX[e.fx_part]) byFX[e.fx_part] = [];
byFX[e.fx_part].push(e);
}
for (const [fxPart, matches] of Object.entries(byFX)) {
if (matches.length === 0) continue;
const m = matches[0];
const matchList = matches.map((match: any) =>
` - ${match.comp_vendor} ${match.comp_part}: $${match.price} ${match.currency}`
).join("\n");
rows.push(pair(
`What competitor products are OPN-confirmed equivalents to Flexoptix ${fxPart}?`,
`Based on the TIP manufacturer compatibility matrix, the following are OPN-confirmed (confidence: 1.0) equivalences for Flexoptix **${fxPart}** (${m.form_factor}, ${m.speed_gbps}G):
**Manufacturer-confirmed equivalences:**
${matchList}
These matches are derived from the Flexoptix compatibility matrix which lists the original OEM part numbers that each FX product replaces. Confidence = 1.0 means this is manufacturer-confirmed, not spec-estimated.
${m.match_notes ? `\n**Notes:** ${m.match_notes}` : ""}
For procurement decisions, these prices reflect current market rates. Contact Flexoptix for volume pricing on the FX equivalent.`
));
}
return rows;
}
function generateSpecEquivalencePairs(equivalences: any[]): Row[] {
const rows: Row[] = [];
// Group by form_factor + speed
const groups: Record<string, any[]> = {};
for (const e of equivalences) {
const key = `${e.form_factor}-${e.speed_gbps}G`;
if (!groups[key]) groups[key] = [];
groups[key].push(e);
}
for (const [key, matches] of Object.entries(groups)) {
if (matches.length < 2) continue;
const m = matches[0];
const matchList = matches.slice(0, 6).map((match: any) =>
` - ${match.comp_vendor} ${match.comp_part}: $${match.price} ${match.currency}`
).join("\n");
rows.push(pair(
`I'm looking for ${key} compatible transceivers. What are the spec-based equivalent options with pricing?`,
`Based on TIP's spec-matching engine for **${key}** transceivers (confidence: 0.85, spec-matched):
**Available compatible products (current market prices):**
${matchList}
**Matching criteria applied:**
${m.match_notes || `Form factor: ${m.form_factor}, Speed: ${m.speed_gbps}G, Reach tier, Wavelength ±10nm`}
**Important notes:**
- Spec matches have 0.85 confidence (vs 1.0 for OPN-confirmed matches)
- Verify specific reach and wavelength requirements before ordering
- For OPN-confirmed alternatives with the highest confidence, check if an FX part number maps to this spec
Flexoptix offers fully programmable transceivers that can often address multiple spec variants from a single SKU, reducing inventory complexity.`
));
}
return rows;
}
function generate400GPairs(products400g: any[]): Row[] {
const rows: Row[] = [];
if (products400g.length === 0) return rows;
const byFormFactor: Record<string, any[]> = {};
for (const p of products400g) {
if (!byFormFactor[p.form_factor]) byFormFactor[p.form_factor] = [];
byFormFactor[p.form_factor].push(p);
}
for (const [ff, products] of Object.entries(byFormFactor)) {
if (products.length === 0) continue;
const priceList = products.map((p: any) =>
` - ${p.vendor} ${p.part_number} (${p.reach_meters}m${p.wavelengths ? " @ " + p.wavelengths + "nm" : ""}): $${p.price} ${p.currency}`
).join("\n");
const speeds = [...new Set(products.map((p: any) => p.speed_gbps))].sort().join("/");
rows.push(pair(
`What is current market pricing for ${ff} ${speeds}G transceivers? I'm planning a data center upgrade.`,
`Here is the current TIP pricing intelligence for **${ff} ${speeds}G** transceivers (data: ${new Date().toISOString().split("T")[0]}):
**Market pricing:**
${priceList}
**Market context:**
- ${ff === "QSFP-DD" ? "QSFP-DD 400G is the dominant 400G form factor for data center deployments, with 8x50G PAM4 electrical interface" : ff === "QSFP112" ? "QSFP112 uses 4x100G PAM4 lanes, preferred for high-density 400G where thermal budget is critical" : ff === "OSFP" ? "OSFP supports up to 800G and is preferred for AI/ML cluster spine deployments" : `${ff} is a key form factor in next-gen networking deployments`}
- Price points vary significantly by reach: DR4/FR4 (2km) is lowest cost; LR4/ER4/ZR (10km+) commands premium
- 400G pricing has compressed 30-40% over the past 18 months as manufacturing volumes increased
For Flexoptix QSFP-DD 400G equivalents, the D.xxx product family covers SR4, DR4, FR4, and LR4 variants with full compatibility guarantees.`
));
}
return rows;
}
function generateVendorOverviewPair(vendorData: any[]): Row {
const vendorList = vendorData.slice(0, 12).map((v: any) =>
` - **${v.vendor}**: ${v.products_with_prices} products, avg $${v.avg_price_usd} (range: $${v.min_price_usd}$${v.max_price_usd})`
).join("\n");
return pair(
`Which compatible optical transceiver vendors does TIP track, and what are their pricing profiles?`,
`TIP tracks real-time pricing across all major compatible transceiver vendors. Here is the current competitive landscape (data: ${new Date().toISOString().split("T")[0]}):
**Vendors with live pricing data:**
${vendorList}
**Vendor tier summary:**
- **Tier 1 (Broad catalog, competitive pricing):** fs.com, 10Gtek, Optcore, Fibertrade large assortment, aggressive retail pricing, good for 10G/25G/100G commodity items
- **Tier 2 (Specialized/niche):** IntelliPhy, ATGBICS, QSFPTEK focused on specific form factors or regions
- **B2B Quote-Only:** Eoptolink, Ascent Optics, GAO Tek no public pricing, volume/contract based
- **OEM/Premium:** Cisco, Juniper, Arista original vendor pricing, highest cost, lock-in dependent
TIP updates prices continuously via automated scrapers. The compatibility matrix maps these competitor products to Flexoptix FX equivalents with confidence scores.`
);
}
function generateCoverageStatsPair(stats: any): Row {
return pair(
`What is the current scope and coverage of the Transceiver Intelligence Platform database?`,
`The TIP database as of ${new Date().toISOString().split("T")[0]} contains:
**Catalog coverage:**
- **${stats.total_transceivers.toLocaleString()} transceivers** total (all vendors)
- **${stats.fx_products} Flexoptix products** the reference catalog
- Multiple competitor vendors tracked continuously
**Equivalence matching:**
- **${parseInt(stats.opn_equivalences).toLocaleString()} OPN-confirmed equivalences** (confidence: 1.0) manufacturer-verified
- **${parseInt(stats.spec_equivalences)} spec-based equivalences** (confidence: 0.85) algorithmically matched by form factor + speed + reach + wavelength
- Coverage: ~88% of Flexoptix products have at least one confirmed competitor equivalent
**Pricing intelligence:**
- **${parseInt(stats.fresh_prices_7d).toLocaleString()} competitor products with fresh pricing** (updated within 7 days)
- Automated scrapers cover: fs.com, sfpcables.com (10Gtek), Optcore, Fibertrade, ATGBICS, IntelliPhy, and more
- Prices updated continuously via pg-boss job scheduler (24/7 operation)
**Data quality:**
- OPN matches use the official Flexoptix compatibility matrix same source used by network engineers
- Spec matches use: form_factor + speed_gbps + reach tier (SR/IR/LR/ER/ZR) + wavelength ±10nm
- Safety cap: FX products matching >30 competitors are excluded (too generic, unreliable)`,
"db-coverage"
);
}
// ── Main ──────────────────────────────────────────────────────────────────────
async function main() {
console.log("Generating TIP_LLM pricing training data from DB...\n");
const [priceSummary, opnEquivalences, specEquivalences, vendorData, products400g, stats] = await Promise.all([
getPriceSummaryByTier(),
getOPNEquivalenceExamples(60),
getSpecEquivalenceExamples(40),
getVendorPricingOverview(),
get400GPricingData(),
getCoverageStats(),
]);
console.log(`Price summary rows: ${priceSummary.length}`);
console.log(`OPN equivalence examples: ${opnEquivalences.length}`);
console.log(`Spec equivalence examples: ${specEquivalences.length}`);
console.log(`Vendor overview rows: ${vendorData.length}`);
console.log(`400G+ products: ${products400g.length}`);
const allPairs: Row[] = [
...generatePriceSummaryPairs(priceSummary),
...generateOPNEquivalencePairs(opnEquivalences),
...generateSpecEquivalencePairs(specEquivalences),
...generate400GPairs(products400g),
generateVendorOverviewPair(vendorData),
generateCoverageStatsPair(stats),
];
// Deduplicate by id
const seen = new Set<string>();
const unique = allPairs.filter((r) => {
if (seen.has(r.id)) return false;
seen.add(r.id);
return true;
});
console.log(`\nGenerated ${unique.length} unique training pairs`);
const outDir = join(process.cwd(), "training-data");
mkdirSync(outDir, { recursive: true });
const outPath = join(outDir, "tip-llm-pricing-v1.jsonl");
writeFileSync(outPath, unique.map((r) => JSON.stringify(r)).join("\n") + "\n");
console.log(`\nOutput: ${outPath}`);
console.log(`Training pairs: ${unique.length}`);
await pool.end();
}
main().catch((err) => {
console.error("Fatal:", err);
pool.end();
process.exit(1);
});

View File

@ -34,6 +34,7 @@ const files: Record<Lane, string[]> = {
"market-business-analysis-part5.jsonl",
"market-business-analysis-part6.jsonl",
"training-data/tip-llm-capabilities-v1.jsonl",
"training-data/tip-llm-pricing-v1.jsonl",
],
blog_llm: [
"master-training-dataset.jsonl",

View File

@ -0,0 +1,85 @@
-- Migration 116: OPN-Based Equivalence Matcher
-- Uses the manufacturer-provided compatibility matrix (fx_compatibilities) to
-- create high-confidence equivalences between Flexoptix products and their
-- exact OEM counterparts in competitor catalogs.
--
-- Source of truth: FX API `fx_compatibilities` field — the vendor explicitly
-- states "this FX product replaces [vendor] [part_number]".
--
-- Match quality: confidence=1.0, match_basis='{opn}' (OEM Part Number)
-- These are better than spec-based matches because they are manufacturer-confirmed.
--
-- Rules:
-- - Only inserts NEW pairs (skips existing approved, auto_approved, rejected)
-- - Skips MSA Standard and Flexoptix entries (not real competitors)
-- - Case-insensitive part_number match
-- - Target must be a competitor vendor (is_competitor = true)
-- ── Insert new OPN-based equivalences ────────────────────────────────────────
INSERT INTO transceiver_equivalences (
flexoptix_id,
competitor_id,
confidence,
status,
match_basis,
match_notes,
created_at,
updated_at
)
SELECT DISTINCT
fx.id AS flexoptix_id,
comp.id AS competitor_id,
1.0 AS confidence,
'auto_approved' AS status,
ARRAY['opn'] AS match_basis,
'Manufacturer-confirmed: FX compatibility matrix lists ' ||
COALESCE(compat->>'compatible_to_vendor', '?') || ' OPN ' ||
COALESCE(compat->>'original_part_number', '?') AS match_notes,
NOW() AS created_at,
NOW() AS updated_at
FROM transceivers fx
JOIN vendors vfx ON vfx.id = fx.vendor_id AND UPPER(vfx.name) LIKE '%FLEXOPTIX%'
CROSS JOIN LATERAL jsonb_array_elements(fx.fx_compatibilities) AS compat
JOIN transceivers comp
ON UPPER(comp.part_number) = UPPER(compat->>'original_part_number')
JOIN vendors vcomp ON vcomp.id = comp.vendor_id AND vcomp.is_competitor = true
WHERE fx.fx_compatibilities IS NOT NULL
AND compat->>'original_part_number' IS NOT NULL
AND length(trim(compat->>'original_part_number')) >= 4 -- ignore very short/empty OPNs
AND compat->>'compatible_to_vendor' NOT IN ('MSA Standard (Default)', 'Flexoptix')
-- Skip pairs that already have ANY equivalence (approved, auto_approved, rejected)
AND NOT EXISTS (
SELECT 1
FROM transceiver_equivalences e
WHERE e.flexoptix_id = fx.id
AND e.competitor_id = comp.id
)
ON CONFLICT DO NOTHING;
-- ── Statistics ────────────────────────────────────────────────────────────────
DO $$
DECLARE
new_cnt INTEGER;
fx_covered INTEGER;
comp_covered INTEGER;
total_approved INTEGER;
BEGIN
SELECT COUNT(*) INTO new_cnt
FROM transceiver_equivalences WHERE 'opn' = ANY(match_basis);
SELECT COUNT(DISTINCT flexoptix_id) INTO fx_covered
FROM transceiver_equivalences WHERE 'opn' = ANY(match_basis);
SELECT COUNT(DISTINCT competitor_id) INTO comp_covered
FROM transceiver_equivalences WHERE 'opn' = ANY(match_basis);
SELECT COUNT(*) INTO total_approved
FROM transceiver_equivalences WHERE status = 'auto_approved';
RAISE NOTICE 'Migration 116 complete: OPN-Based Equivalence Matcher';
RAISE NOTICE ' New OPN equivalences inserted: %', new_cnt;
RAISE NOTICE ' FX products covered: %', fx_covered;
RAISE NOTICE ' Competitor products matched: %', comp_covered;
RAISE NOTICE ' Total auto_approved: %', total_approved;
END $$;

View File

@ -0,0 +1,139 @@
-- Migration 117: Spec-Based Equivalence Matcher
-- Matches FX products with competitor products by technical specification
-- when no OPN-based equivalence already exists.
--
-- Match criteria (ALL must apply):
-- 1. Same form_factor (exact)
-- 2. Same speed_gbps (exact)
-- 3. Same reach tier (SR/IR/LR/ER/ZR — based on reach_meters)
-- 4. Same primary wavelength (within ±10nm, extracted from wavelengths field)
-- OR both have no wavelength data (broadband / non-WDM products)
-- 5. Target must be a competitor vendor (is_competitor = true)
-- 6. Max 30 competitor matches per FX product (too many = too generic)
--
-- Match quality:
-- confidence = 0.85 (high but below OPN-confirmed 1.0)
-- match_basis = '{spec}'
-- status = 'auto_approved'
--
-- Rules:
-- - Skips pairs that already have ANY equivalence (approved, auto_approved, rejected)
-- - Skips FX products that already have an OPN-based equivalence
-- (OPN match is preferred; spec is only a fallback)
-- - Minimum reach_meters = 10 on both sides (avoids reach=0 garbage data)
-- - Reach tier comparison handles DAC/AOC (SR ≤ 300m)
-- ── Helper: extract primary wavelength in nm from text field ─────────────────
-- Handles: "1310nm", "850nm", "1310/1550nm", "1270nm-1610nm", NULL
CREATE OR REPLACE FUNCTION tip_extract_wavelength_nm(wl text)
RETURNS integer LANGUAGE sql IMMUTABLE PARALLEL SAFE AS $$
SELECT (regexp_match(wl, '(\d{3,4})\s*nm'))[1]::integer
$$;
-- ── Helper: reach tier label ─────────────────────────────────────────────────
CREATE OR REPLACE FUNCTION tip_reach_tier(reach integer)
RETURNS text LANGUAGE sql IMMUTABLE PARALLEL SAFE AS $$
SELECT CASE
WHEN reach <= 300 THEN 'SR' -- ≤300m (SR, VSR, DAC, AOC)
WHEN reach <= 2000 THEN 'IR' -- ≤2km (LX, LH intermediate)
WHEN reach <= 10000 THEN 'LR' -- ≤10km (LR, LX, standard LH)
WHEN reach <= 40000 THEN 'ER' -- ≤40km (ER, extended reach)
ELSE 'ZR' -- >40km (ZR, ZR+, coherent)
END
$$;
-- ── Insert spec-based equivalences ──────────────────────────────────────────
INSERT INTO transceiver_equivalences (
flexoptix_id,
competitor_id,
confidence,
status,
match_basis,
match_notes,
created_at,
updated_at
)
SELECT DISTINCT
fx.id AS flexoptix_id,
comp.id AS competitor_id,
0.85 AS confidence,
'auto_approved' AS status,
ARRAY['spec'] AS match_basis,
'Spec match: ' || fx.form_factor || ' ' || fx.speed_gbps || 'G ' ||
tip_reach_tier(fx.reach_meters) ||
CASE WHEN tip_extract_wavelength_nm(fx.wavelengths) IS NOT NULL
THEN ' @' || tip_extract_wavelength_nm(fx.wavelengths) || 'nm'
ELSE '' END AS match_notes,
NOW() AS created_at,
NOW() AS updated_at
FROM transceivers fx
JOIN vendors vfx ON vfx.id = fx.vendor_id AND UPPER(vfx.name) LIKE '%FLEXOPTIX%'
JOIN transceivers comp
ON comp.form_factor = fx.form_factor
AND comp.speed_gbps = fx.speed_gbps
AND comp.reach_meters >= 10 -- no garbage reach=0
AND tip_reach_tier(comp.reach_meters) = tip_reach_tier(fx.reach_meters)
-- Wavelength: both must match within ±10nm, OR both have no wavelength
AND (
(tip_extract_wavelength_nm(fx.wavelengths) IS NULL
AND tip_extract_wavelength_nm(comp.wavelengths) IS NULL)
OR
ABS( COALESCE(tip_extract_wavelength_nm(comp.wavelengths), 0)
- COALESCE(tip_extract_wavelength_nm(fx.wavelengths), 0) ) <= 10
)
JOIN vendors vcomp ON vcomp.id = comp.vendor_id AND vcomp.is_competitor = true
WHERE fx.reach_meters >= 10 -- no garbage reach=0 on FX side
AND fx.speed_gbps > 0
-- FX product has no OPN-based equivalence at all (spec is fallback only)
AND NOT EXISTS (
SELECT 1 FROM transceiver_equivalences e
WHERE e.flexoptix_id = fx.id
AND 'opn' = ANY(e.match_basis)
)
-- Skip pairs that already have ANY equivalence
AND NOT EXISTS (
SELECT 1 FROM transceiver_equivalences e
WHERE e.flexoptix_id = fx.id
AND e.competitor_id = comp.id
)
-- Safety cap: skip FX product if it would match > 30 competitors
-- (indicates too-generic spec — needs stricter criteria)
AND (
SELECT COUNT(DISTINCT c2.id)
FROM transceivers c2
JOIN vendors vc2 ON vc2.id = c2.vendor_id AND vc2.is_competitor = true
WHERE c2.form_factor = fx.form_factor
AND c2.speed_gbps = fx.speed_gbps
AND c2.reach_meters >= 10
AND tip_reach_tier(c2.reach_meters) = tip_reach_tier(fx.reach_meters)
AND (
(tip_extract_wavelength_nm(fx.wavelengths) IS NULL
AND tip_extract_wavelength_nm(c2.wavelengths) IS NULL)
OR ABS( COALESCE(tip_extract_wavelength_nm(c2.wavelengths), 0)
- COALESCE(tip_extract_wavelength_nm(fx.wavelengths), 0) ) <= 10
)
) <= 30
ON CONFLICT DO NOTHING;
-- ── Statistics ───────────────────────────────────────────────────────────────
DO $$
DECLARE
new_cnt INTEGER;
fx_covered INTEGER;
comp_covered INTEGER;
BEGIN
SELECT COUNT(*) INTO new_cnt
FROM transceiver_equivalences WHERE 'spec' = ANY(match_basis);
SELECT COUNT(DISTINCT flexoptix_id) INTO fx_covered
FROM transceiver_equivalences WHERE 'spec' = ANY(match_basis);
SELECT COUNT(DISTINCT competitor_id) INTO comp_covered
FROM transceiver_equivalences WHERE 'spec' = ANY(match_basis);
RAISE NOTICE 'Migration 117 complete: Spec-Based Equivalence Matcher';
RAISE NOTICE ' Spec equivalences total: %', new_cnt;
RAISE NOTICE ' FX products newly covered: %', fx_covered;
RAISE NOTICE ' Competitor products matched: %', comp_covered;
END $$;

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,20 +1,29 @@
{
"raw_pairs": 11508,
"raw_pairs": 11635,
"duplicates_removed": 100,
"training_pairs": 11408,
"train_pairs": 10267,
"eval_pairs": 1141,
"training_pairs": 11535,
"train_pairs": 10381,
"eval_pairs": 1154,
"sources": {
"external:vendor-deep-dives.jsonl": 11200,
"blog-training-data/blog-164-network-research-innovation-emerging-technologies.md": 1,
"external:technical-deep-dives.jsonl": 84,
"blog-training-data/blog-174-network-performance-testing-rfc2544-y1564.md": 1,
"blog-training-data/blog-179-data-center-physical-infrastructure-design.md": 1,
"blog-training-data/blog-025-sfp28-lab-vs-rack.md": 1,
"blog-training-data/blog-091-wavelength-selective-switch-wss-explainer.md": 1,
"blog-training-data/blog-008-oem-vs-compatible-real-numbers.md": 1,
"blog-training-data/blog-150-comprehensive-optical-network-program-management.md": 1,
"blog-training-data/blog-014-800g-new-products-what-ships.md": 1,
"blog-training-data/blog-045-osnr-link-budget-practical-guide.md": 1,
"blog-training-data/blog-178-outside-plant-construction-cable-installation.md": 1,
"blog-training-data/blog-024-rx-power-budgets-400g.md": 1,
"blog-training-data/blog-187-ab-testing-conversion-optimization-b2b-content.md": 1,
"blog-training-data/blog-151-optical-network-troubleshooting-advanced-scenarios.md": 1,
"blog-training-data/blog-107-dwdm-when-you-need-it.md": 1,
"blog-training-data/blog-017-dom-readings-lie.md": 1,
"blog-training-data/blog-010-qsfp-dd-vs-osfp-form-factor-reality.md": 1,
"blog-training-data/blog-153-optical-deployment-best-practices-comprehensive.md": 1,
"blog-training-data/blog-072-optical-amplifier-edfa-raman-basics.md": 1,
"blog-training-data/blog-028-400g-dac-3m-vs-5m.md": 1,
"blog-training-data/blog-011-transceiver-procurement-checklist.md": 1,
@ -22,87 +31,205 @@
"blog-training-data/blog-083-fiber-optic-testing-otdr-basics.md": 1,
"blog-training-data/blog-038-cpo-pluggable-future.md": 1,
"blog-training-data/blog-054-multimode-fiber-om3-om4-om5-guide.md": 1,
"blog-training-data/blog-127-streaming-cdn-content-delivery.md": 1,
"blog-training-data/blog-015-compatible-vendor-comparison-who-to-trust.md": 1,
"blog-training-data/blog-063-100g-zr-coherent-pluggable-timing.md": 1,
"blog-training-data/blog-195-case-study-craft-stories-drive-decisions.md": 1,
"blog-training-data/blog-221-content-attribution-multi-touch-modeling.md": 1,
"blog-training-data/blog-192-ai-prompt-engineering-technical-content.md": 1,
"blog-training-data/blog-135-network-security-optical-physical-layer.md": 1,
"blog-training-data/blog-144-network-virtualization-overlays-optical.md": 1,
"blog-training-data/blog-125-optical-network-troubleshooting-mastery.md": 1,
"blog-training-data/blog-197-content-analytics-roi-measurement.md": 1,
"blog-training-data/blog-219-content-governance-compliance-regulated-industries.md": 1,
"blog-training-data/blog-171-fiber-types-specifications-complete-reference.md": 1,
"blog-training-data/blog-069-optical-budget-calculator-guide.md": 1,
"blog-training-data/blog-169-optical-networking-competitive-landscape-analysis.md": 1,
"blog-training-data/blog-070-mtp-mpo-cassette-fiber-management.md": 1,
"blog-training-data/blog-134-cloud-networking-optical-transceiver-strategy.md": 1,
"blog-training-data/blog-138-network-observability-telemetry-optical.md": 1,
"blog-training-data/blog-159-optical-network-incident-management-emergency.md": 1,
"blog-training-data/blog-092-sfp-sfp-plus-backward-compatibility.md": 1,
"blog-training-data/blog-086-hyperscale-optics-purchasing-strategy.md": 1,
"blog-training-data/blog-055-transceiver-lifecycle-management-enterprise.md": 1,
"blog-training-data/blog-161-optical-network-mergers-acquisitions-integration.md": 1,
"blog-training-data/blog-066-400g-zr-interoperability-matrix.md": 1,
"blog-training-data/blog-228-economics-content-marketing-business-model.md": 1,
"blog-training-data/blog-193-advanced-seo-b2b-technical-content.md": 1,
"blog-training-data/blog-166-osi-model-optical-networking-complete-layer-analysis.md": 1,
"blog-training-data/blog-093-google-meta-microsoft-optics-strategy.md": 1,
"blog-training-data/blog-019-cleaning-fiber-400g-tolerance.md": 1,
"blog-training-data/blog-102-compliance-checklist-imported-transceivers.md": 1,
"blog-training-data/blog-175-cloud-networking-deep-dive-vpc-containers-mesh.md": 1,
"blog-training-data/blog-026-400g-zr-vs-zrplus.md": 1,
"blog-training-data/blog-035-esd-damage-transceivers.md": 1,
"blog-training-data/blog-199-industry-analyst-relations-gartner-forrester.md": 1,
"blog-training-data/blog-124-network-automation-optical-infrastructure.md": 1,
"blog-training-data/blog-123-silicon-photonics-co-packaged-optics.md": 1,
"blog-training-data/blog-087-rj45-vs-sfp-copper-1g-switches.md": 1,
"blog-training-data/blog-132-quantum-networking-optical-infrastructure.md": 1,
"blog-training-data/blog-120-telco-5g-6g-fronthaul-midhaul-backhaul.md": 1,
"blog-training-data/blog-009-100g-to-400g-migration-what-breaks.md": 1,
"blog-training-data/blog-104-ai-chip-shortage-optics-supply.md": 1,
"blog-training-data/blog-034-grey-optics-vs-dwdm-metro-aggregation.md": 1,
"blog-training-data/blog-167-security-layers-defense-depth-optical-networks.md": 1,
"blog-training-data/blog-154-optical-network-roi-business-value-analysis.md": 1,
"blog-training-data/blog-082-coherent-dsp-power-consumption.md": 1,
"blog-training-data/blog-062-transceiver-inventory-management-excel-vs-cmdb.md": 1,
"blog-training-data/blog-088-transceiver-sff-committee-history.md": 1,
"blog-training-data/blog-098-carrier-ethernet-timing-syncE-ptp-optics.md": 1,
"blog-training-data/blog-122-pam4-pam8-modulation-data-center.md": 1,
"blog-training-data/blog-003-silicon-photonics.md": 1,
"blog-training-data/blog-130-edge-computing-network-optics-future.md": 1,
"blog-training-data/blog-037-fec-deep-dive.md": 1,
"blog-training-data/blog-099-transceiver-market-2026-pricing-forecast.md": 1,
"blog-training-data/blog-155-optical-networking-knowledge-management.md": 1,
"blog-training-data/blog-021-validating-compatible-optics.md": 1,
"blog-training-data/blog-176-greenfield-network-infrastructure-complete-build.md": 1,
"blog-training-data/blog-023-pam4-800g-fec-errors.md": 1,
"blog-training-data/blog-204-customer-marketing-advocacy-programs.md": 1,
"blog-training-data/blog-030-when-to-upgrade-from-10g.md": 1,
"blog-training-data/blog-131-telco-carrier-grade-optical-operations.md": 1,
"blog-training-data/blog-002-vendor-lock-in-optics.md": 1,
"blog-training-data/blog-198-complete-content-engine-operating-system.md": 1,
"blog-training-data/blog-173-internet-architecture-deep-dive-bgp-ixps-peering.md": 1,
"blog-training-data/blog-220-investor-relations-content-tech-companies.md": 1,
"blog-training-data/blog-225-privacy-data-protection-content-practices.md": 1,
"blog-training-data/blog-180-network-project-management-permitting-execution.md": 1,
"blog-training-data/blog-224-generative-ai-future-content-marketing.md": 1,
"blog-training-data/blog-081-transceiver-rma-process-best-practices.md": 1,
"blog-training-data/blog-013-price-drop-timing-when-to-buy.md": 1,
"blog-training-data/blog-160-future-of-optical-networking-comprehensive.md": 1,
"blog-training-data/blog-095-optical-lan-versus-fiber-ethernet.md": 1,
"blog-training-data/blog-117-submarine-cable-coherent-long-haul.md": 1,
"blog-training-data/blog-067-single-mode-fiber-types-g652-g657.md": 1,
"blog-training-data/blog-177-site-survey-capacity-planning-methodology.md": 1,
"blog-training-data/blog-039-cmis-400g-management.md": 1,
"blog-training-data/blog-213-original-research-proprietary-data.md": 1,
"blog-training-data/blog-226-accessibility-inclusive-content-design.md": 1,
"blog-training-data/blog-142-network-design-patterns-optical-architecture.md": 1,
"blog-training-data/blog-113-rma-warranty-optimization.md": 1,
"blog-training-data/blog-071-sff-8024-transceiver-id-codes.md": 1,
"blog-training-data/blog-097-liquid-cooling-impact-optical-transceivers.md": 1,
"blog-training-data/blog-007-800g-readiness.md": 1,
"blog-training-data/blog-058-arista-eos-optic-compatibility.md": 1,
"blog-training-data/blog-136-emerging-protocols-cxl-roce-rdma.md": 1,
"blog-training-data/blog-068-25g-vs-10g-upgrade-path-decision.md": 1,
"blog-training-data/blog-170-network-management-protocols-comprehensive-snmp-netconf.md": 1,
"blog-training-data/blog-061-cfp2-cfp4-qsfp28-form-factor-migration.md": 1,
"blog-training-data/blog-147-optical-network-testing-validation-procedures.md": 1,
"blog-training-data/blog-079-ip-optical-integration-disaggregation.md": 1,
"blog-training-data/blog-129-manufacturing-iot-industrial-network.md": 1,
"blog-training-data/blog-046-transceiver-counterfeit-detection.md": 1,
"blog-training-data/blog-183-perfect-hooks-teasers-curiosity-gap.md": 1,
"blog-training-data/blog-056-cisco-qsfp28-compatibility-list.md": 1,
"blog-training-data/blog-005-coherent-400zr-reality.md": 1,
"blog-training-data/blog-203-executive-personal-brand-technical-leaders.md": 1,
"blog-training-data/blog-109-third-party-optics-validation-lab-testing.md": 1,
"blog-training-data/blog-065-dwdm-channel-plan-100ghz-vs-50ghz.md": 1,
"blog-training-data/blog-227-emerging-platforms-content-innovation.md": 1,
"blog-training-data/blog-078-pon-gpon-xgspon-optics-explainer.md": 1,
"blog-training-data/blog-051-spine-leaf-transceiver-strategy.md": 1,
"blog-training-data/blog-032-msa-compliance-vs-interoperability.md": 1,
"blog-training-data/blog-064-optic-burn-in-testing.md": 1,
"blog-training-data/blog-114-counterfeit-detection-supply-chain.md": 1,
"blog-training-data/blog-133-disaggregated-networking-future-architecture.md": 1,
"blog-training-data/blog-105-why-it-teams-care-optics.md": 1,
"blog-training-data/blog-001-400g-dr4-price-war.md": 1,
"blog-training-data/blog-040-evaluating-compatible-vendor.md": 1,
"blog-training-data/blog-211-employee-advocacy-internal-content.md": 1,
"blog-training-data/blog-202-video-podcast-content-b2b-tech.md": 1,
"blog-training-data/blog-042-800g-osfp-vs-qsfp-dd-port-density.md": 1,
"blog-training-data/blog-140-future-optical-networking-2030.md": 1,
"blog-training-data/blog-139-disaster-recovery-business-continuity-optical.md": 1,
"blog-training-data/blog-148-vendor-relationship-strategic-partnerships.md": 1,
"blog-training-data/blog-100-flexoptix-programming-service-technical.md": 1,
"blog-training-data/blog-118-ai-ml-workload-network-optics.md": 1,
"blog-training-data/blog-076-cisco-nexus-vs-catalyst-optic-behavior.md": 1,
"blog-training-data/blog-053-cisco-juniper-arista-optic-lock-in.md": 1,
"blog-training-data/blog-044-laser-safety-class-1m-transceivers.md": 1,
"blog-training-data/blog-152-optical-network-architecture-evolution-2025-2030.md": 1,
"blog-training-data/blog-094-transceiver-programming-eeprom-guide.md": 1,
"blog-training-data/blog-222-content-marketing-team-development.md": 1,
"blog-training-data/blog-085-ai-inference-cluster-optics-requirements.md": 1,
"blog-training-data/blog-206-crisis-communications-reputation-management.md": 1,
"blog-training-data/blog-188-email-marketing-b2b-technical-content.md": 1,
"blog-training-data/blog-149-network-security-zero-trust-optical-implementation.md": 1,
"blog-training-data/blog-216-ai-ethics-responsible-content-creation.md": 1,
"blog-training-data/blog-182-science-of-perfect-blog-writing.md": 1,
"blog-training-data/blog-194-brand-voice-architecture-technical-companies.md": 1,
"blog-training-data/blog-163-network-skills-careers-optical-engineering-future.md": 1,
"blog-training-data/blog-052-roa-replacing-optics-proactively.md": 1,
"blog-training-data/blog-162-network-as-a-service-cloud-native-optical.md": 1,
"blog-training-data/blog-090-optics-for-5g-fronthaul-midhaul.md": 1,
"blog-training-data/blog-126-fintech-financial-services-network-optics.md": 1,
"blog-training-data/blog-201-sales-enablement-content-strategy.md": 1,
"blog-training-data/blog-186-perfect-blog-engine-architecture-synthesis.md": 1,
"blog-training-data/blog-041-silicon-photonics-co-packaging-2026.md": 1,
"blog-training-data/blog-156-network-protocols-l1-encryption-deep-dive.md": 1,
"blog-training-data/blog-096-dark-fiber-leasing-optics-considerations.md": 1,
"blog-training-data/blog-108-advanced-fiber-contamination-diagnostics.md": 1,
"blog-training-data/blog-215-recruiting-employer-branding-content.md": 1,
"blog-training-data/blog-112-open-networking-optics-ecosystem.md": 1,
"blog-training-data/blog-121-400g-800g-coherent-optics-deep-dive.md": 1,
"blog-training-data/blog-084-ieee-802.3-standards-transceiver-reference.md": 1,
"blog-training-data/blog-012-coherent-vs-direct-detect-decision.md": 1,
"blog-training-data/blog-165-optical-networking-comprehensive-reference-guide.md": 1,
"blog-training-data/blog-004-400g-migration-fiber-plant.md": 1,
"blog-training-data/blog-115-healthcare-network-optics-compliance.md": 1,
"blog-training-data/blog-119-sustainability-carbon-footprint-optical.md": 1,
"blog-training-data/blog-060-fiber-connector-cleaning-protocol.md": 1,
"blog-training-data/blog-143-network-protocols-modern-optical-infrastructure.md": 1,
"blog-training-data/blog-172-transceiver-form-factors-complete-reference.md": 1,
"blog-training-data/blog-207-localization-international-content-strategy.md": 1,
"blog-training-data/blog-106-fiber-diagnostics-eye-diagrams.md": 1,
"blog-training-data/blog-158-network-time-synchronization-precision-timing.md": 1,
"blog-training-data/blog-217-strategic-partnerships-co-marketing.md": 1,
"blog-training-data/blog-027-fiber-plant-audit-100g-upgrade.md": 1,
"blog-training-data/blog-016-400g-qsfp-dd-after-fiber-moves.md": 1,
"blog-training-data/blog-145-data-center-interconnect-dci-optical-design.md": 1,
"blog-training-data/blog-205-product-launch-content-strategy.md": 1,
"blog-training-data/blog-074-fiber-optic-patch-cord-standards.md": 1,
"blog-training-data/blog-057-juniper-optic-unlock-ex-qfx.md": 1,
"blog-training-data/blog-196-newsletter-strategy-technical-audiences.md": 1,
"blog-training-data/blog-214-press-relations-media-strategy.md": 1,
"blog-training-data/blog-022-oem-vs-compatible-lab-tests.md": 1,
"blog-training-data/blog-218-sustainable-content-marketing-practice.md": 1,
"blog-training-data/blog-020-100g-link-drops-temperature.md": 1,
"blog-training-data/blog-191-editorial-operations-content-engine-management.md": 1,
"blog-training-data/blog-146-optical-network-capacity-planning-bandwidth.md": 1,
"blog-training-data/blog-050-optical-transceiver-temperature-grades.md": 1,
"blog-training-data/blog-208-community-building-technical-content.md": 1,
"blog-training-data/blog-111-cisco-arista-juniper-optics-strategies.md": 1,
"blog-training-data/blog-141-optical-network-cost-engineering-tco.md": 1,
"blog-training-data/blog-036-coherent-tunable-vs-fixed-wavelength.md": 1,
"blog-training-data/blog-181-neurolinguistic-persuasion-blog-writing.md": 1,
"blog-training-data/blog-209-account-based-marketing-abm-content.md": 1,
"blog-training-data/blog-200-webinar-virtual-event-content-strategy.md": 1,
"blog-training-data/blog-077-pam4-vs-nrz-modulation-transceivers.md": 1,
"blog-training-data/blog-212-interactive-content-calculators-tools.md": 1,
"blog-training-data/blog-080-fcoe-fibre-channel-sfp-differences.md": 1,
"blog-training-data/blog-168-optical-transceiver-manufacturers-comprehensive-landscape.md": 1,
"blog-training-data/blog-043-zr-zr-plus-coherent-pluggables-comparison.md": 1,
"blog-training-data/blog-049-wavelength-division-multiplexing-primer.md": 1,
"blog-training-data/blog-089-metro-dwdm-open-vs-proprietary.md": 1,
"blog-training-data/blog-128-government-federal-network-optics.md": 1,
"blog-training-data/blog-116-carrier-isp-optics-operations.md": 1,
"blog-training-data/blog-073-qsfp-dd-800g-ecosystem-2026.md": 1,
"blog-training-data/blog-210-marketing-automation-lead-nurturing.md": 1,
"blog-training-data/blog-189-linkedin-social-distribution-b2b-tech.md": 1,
"blog-training-data/blog-018-800g-sr8-dr8-fr8-comparison.md": 1,
"blog-training-data/blog-029-800g-osfp-spineleaf-checklist.md": 1,
"blog-training-data/blog-110-wavelength-tuning-dwdm.md": 1,
"blog-training-data/blog-103-carbon-footprint-oem-compatible-tco.md": 1,
"blog-training-data/blog-137-regional-optical-network-considerations-global.md": 1,
"blog-training-data/blog-006-dom-diagnostics.md": 1,
"blog-training-data/blog-157-multicast-video-broadcast-optical-networks.md": 1,
"blog-training-data/blog-185-b2b-decision-psychology-trust-signals.md": 1,
"blog-training-data/blog-223-final-capstone-sustainable-excellence.md": 1,
"blog-training-data/blog-075-transceiver-failure-root-cause-analysis.md": 1,
"blog-training-data/blog-190-content-repurposing-multi-format-strategy.md": 1,
"blog-training-data/blog-184-perfect-visuals-infographics-header-design.md": 1,
"blog-training-data/blog-048-400g-dr4-fr4-lr4-comparison.md": 1,
"blog-training-data/blog-031-cwdm4-vs-psm4-100g-datacenter.md": 1,
"blog-training-data/blog-059-100g-sr4-multimode-distance-limits.md": 1,

View File

@ -1,13 +1,13 @@
{
"generated_at": "2026-04-25T21:56:31.560Z",
"generated_at": "2026-05-13T19:32:40.656Z",
"version": "TIP-LearningPool-v1",
"lanes": {
"tip_llm": {
"raw_pairs": 12141,
"raw_pairs": 12268,
"duplicates_removed": 269,
"training_pairs": 11872,
"train_pairs": 10684,
"eval_pairs": 1188,
"training_pairs": 11999,
"train_pairs": 10799,
"eval_pairs": 1200,
"sources": {
"external:vendor-deep-dives.jsonl": 11200,
"external:technical-deep-dives.jsonl": 84,
@ -16,8 +16,10 @@
"external:synthesized-training-samples.jsonl": 219,
"external:nanog-ripe-labs-content.jsonl": 34,
"external:academic-research-synthesis.jsonl": 109,
"training-data/tip-llm-capabilities-v1.jsonl": 34,
"training-data/tip-llm-pricing-v1.jsonl": 80,
"training-data/tip-llm-capabilities-v1.jsonl": 69,
"external:market-business-analysis-part6.jsonl": 5,
"robot-control-high.jsonl": 12,
"external:market-business-analysis-part5.jsonl": 7,
"external:market-business-analysis-part4.jsonl": 5,
"external:market-business-analysis-part2.jsonl": 8,
@ -31,22 +33,31 @@
}
},
"blog_llm": {
"raw_pairs": 11508,
"raw_pairs": 11635,
"duplicates_removed": 100,
"training_pairs": 11408,
"train_pairs": 10267,
"eval_pairs": 1141,
"training_pairs": 11535,
"train_pairs": 10381,
"eval_pairs": 1154,
"sources": {
"external:vendor-deep-dives.jsonl": 11200,
"blog-training-data/blog-164-network-research-innovation-emerging-technologies.md": 1,
"external:technical-deep-dives.jsonl": 84,
"blog-training-data/blog-174-network-performance-testing-rfc2544-y1564.md": 1,
"blog-training-data/blog-179-data-center-physical-infrastructure-design.md": 1,
"blog-training-data/blog-025-sfp28-lab-vs-rack.md": 1,
"blog-training-data/blog-091-wavelength-selective-switch-wss-explainer.md": 1,
"blog-training-data/blog-008-oem-vs-compatible-real-numbers.md": 1,
"blog-training-data/blog-150-comprehensive-optical-network-program-management.md": 1,
"blog-training-data/blog-014-800g-new-products-what-ships.md": 1,
"blog-training-data/blog-045-osnr-link-budget-practical-guide.md": 1,
"blog-training-data/blog-178-outside-plant-construction-cable-installation.md": 1,
"blog-training-data/blog-024-rx-power-budgets-400g.md": 1,
"blog-training-data/blog-187-ab-testing-conversion-optimization-b2b-content.md": 1,
"blog-training-data/blog-151-optical-network-troubleshooting-advanced-scenarios.md": 1,
"blog-training-data/blog-107-dwdm-when-you-need-it.md": 1,
"blog-training-data/blog-017-dom-readings-lie.md": 1,
"blog-training-data/blog-010-qsfp-dd-vs-osfp-form-factor-reality.md": 1,
"blog-training-data/blog-153-optical-deployment-best-practices-comprehensive.md": 1,
"blog-training-data/blog-072-optical-amplifier-edfa-raman-basics.md": 1,
"blog-training-data/blog-028-400g-dac-3m-vs-5m.md": 1,
"blog-training-data/blog-011-transceiver-procurement-checklist.md": 1,
@ -54,87 +65,205 @@
"blog-training-data/blog-083-fiber-optic-testing-otdr-basics.md": 1,
"blog-training-data/blog-038-cpo-pluggable-future.md": 1,
"blog-training-data/blog-054-multimode-fiber-om3-om4-om5-guide.md": 1,
"blog-training-data/blog-127-streaming-cdn-content-delivery.md": 1,
"blog-training-data/blog-015-compatible-vendor-comparison-who-to-trust.md": 1,
"blog-training-data/blog-063-100g-zr-coherent-pluggable-timing.md": 1,
"blog-training-data/blog-195-case-study-craft-stories-drive-decisions.md": 1,
"blog-training-data/blog-221-content-attribution-multi-touch-modeling.md": 1,
"blog-training-data/blog-192-ai-prompt-engineering-technical-content.md": 1,
"blog-training-data/blog-135-network-security-optical-physical-layer.md": 1,
"blog-training-data/blog-144-network-virtualization-overlays-optical.md": 1,
"blog-training-data/blog-125-optical-network-troubleshooting-mastery.md": 1,
"blog-training-data/blog-197-content-analytics-roi-measurement.md": 1,
"blog-training-data/blog-219-content-governance-compliance-regulated-industries.md": 1,
"blog-training-data/blog-171-fiber-types-specifications-complete-reference.md": 1,
"blog-training-data/blog-069-optical-budget-calculator-guide.md": 1,
"blog-training-data/blog-169-optical-networking-competitive-landscape-analysis.md": 1,
"blog-training-data/blog-070-mtp-mpo-cassette-fiber-management.md": 1,
"blog-training-data/blog-134-cloud-networking-optical-transceiver-strategy.md": 1,
"blog-training-data/blog-138-network-observability-telemetry-optical.md": 1,
"blog-training-data/blog-159-optical-network-incident-management-emergency.md": 1,
"blog-training-data/blog-092-sfp-sfp-plus-backward-compatibility.md": 1,
"blog-training-data/blog-086-hyperscale-optics-purchasing-strategy.md": 1,
"blog-training-data/blog-055-transceiver-lifecycle-management-enterprise.md": 1,
"blog-training-data/blog-161-optical-network-mergers-acquisitions-integration.md": 1,
"blog-training-data/blog-066-400g-zr-interoperability-matrix.md": 1,
"blog-training-data/blog-228-economics-content-marketing-business-model.md": 1,
"blog-training-data/blog-193-advanced-seo-b2b-technical-content.md": 1,
"blog-training-data/blog-166-osi-model-optical-networking-complete-layer-analysis.md": 1,
"blog-training-data/blog-093-google-meta-microsoft-optics-strategy.md": 1,
"blog-training-data/blog-019-cleaning-fiber-400g-tolerance.md": 1,
"blog-training-data/blog-102-compliance-checklist-imported-transceivers.md": 1,
"blog-training-data/blog-175-cloud-networking-deep-dive-vpc-containers-mesh.md": 1,
"blog-training-data/blog-026-400g-zr-vs-zrplus.md": 1,
"blog-training-data/blog-035-esd-damage-transceivers.md": 1,
"blog-training-data/blog-199-industry-analyst-relations-gartner-forrester.md": 1,
"blog-training-data/blog-124-network-automation-optical-infrastructure.md": 1,
"blog-training-data/blog-123-silicon-photonics-co-packaged-optics.md": 1,
"blog-training-data/blog-087-rj45-vs-sfp-copper-1g-switches.md": 1,
"blog-training-data/blog-132-quantum-networking-optical-infrastructure.md": 1,
"blog-training-data/blog-120-telco-5g-6g-fronthaul-midhaul-backhaul.md": 1,
"blog-training-data/blog-009-100g-to-400g-migration-what-breaks.md": 1,
"blog-training-data/blog-104-ai-chip-shortage-optics-supply.md": 1,
"blog-training-data/blog-034-grey-optics-vs-dwdm-metro-aggregation.md": 1,
"blog-training-data/blog-167-security-layers-defense-depth-optical-networks.md": 1,
"blog-training-data/blog-154-optical-network-roi-business-value-analysis.md": 1,
"blog-training-data/blog-082-coherent-dsp-power-consumption.md": 1,
"blog-training-data/blog-062-transceiver-inventory-management-excel-vs-cmdb.md": 1,
"blog-training-data/blog-088-transceiver-sff-committee-history.md": 1,
"blog-training-data/blog-098-carrier-ethernet-timing-syncE-ptp-optics.md": 1,
"blog-training-data/blog-122-pam4-pam8-modulation-data-center.md": 1,
"blog-training-data/blog-003-silicon-photonics.md": 1,
"blog-training-data/blog-130-edge-computing-network-optics-future.md": 1,
"blog-training-data/blog-037-fec-deep-dive.md": 1,
"blog-training-data/blog-099-transceiver-market-2026-pricing-forecast.md": 1,
"blog-training-data/blog-155-optical-networking-knowledge-management.md": 1,
"blog-training-data/blog-021-validating-compatible-optics.md": 1,
"blog-training-data/blog-176-greenfield-network-infrastructure-complete-build.md": 1,
"blog-training-data/blog-023-pam4-800g-fec-errors.md": 1,
"blog-training-data/blog-204-customer-marketing-advocacy-programs.md": 1,
"blog-training-data/blog-030-when-to-upgrade-from-10g.md": 1,
"blog-training-data/blog-131-telco-carrier-grade-optical-operations.md": 1,
"blog-training-data/blog-002-vendor-lock-in-optics.md": 1,
"blog-training-data/blog-198-complete-content-engine-operating-system.md": 1,
"blog-training-data/blog-173-internet-architecture-deep-dive-bgp-ixps-peering.md": 1,
"blog-training-data/blog-220-investor-relations-content-tech-companies.md": 1,
"blog-training-data/blog-225-privacy-data-protection-content-practices.md": 1,
"blog-training-data/blog-180-network-project-management-permitting-execution.md": 1,
"blog-training-data/blog-224-generative-ai-future-content-marketing.md": 1,
"blog-training-data/blog-081-transceiver-rma-process-best-practices.md": 1,
"blog-training-data/blog-013-price-drop-timing-when-to-buy.md": 1,
"blog-training-data/blog-160-future-of-optical-networking-comprehensive.md": 1,
"blog-training-data/blog-095-optical-lan-versus-fiber-ethernet.md": 1,
"blog-training-data/blog-117-submarine-cable-coherent-long-haul.md": 1,
"blog-training-data/blog-067-single-mode-fiber-types-g652-g657.md": 1,
"blog-training-data/blog-177-site-survey-capacity-planning-methodology.md": 1,
"blog-training-data/blog-039-cmis-400g-management.md": 1,
"blog-training-data/blog-213-original-research-proprietary-data.md": 1,
"blog-training-data/blog-226-accessibility-inclusive-content-design.md": 1,
"blog-training-data/blog-142-network-design-patterns-optical-architecture.md": 1,
"blog-training-data/blog-113-rma-warranty-optimization.md": 1,
"blog-training-data/blog-071-sff-8024-transceiver-id-codes.md": 1,
"blog-training-data/blog-097-liquid-cooling-impact-optical-transceivers.md": 1,
"blog-training-data/blog-007-800g-readiness.md": 1,
"blog-training-data/blog-058-arista-eos-optic-compatibility.md": 1,
"blog-training-data/blog-136-emerging-protocols-cxl-roce-rdma.md": 1,
"blog-training-data/blog-068-25g-vs-10g-upgrade-path-decision.md": 1,
"blog-training-data/blog-170-network-management-protocols-comprehensive-snmp-netconf.md": 1,
"blog-training-data/blog-061-cfp2-cfp4-qsfp28-form-factor-migration.md": 1,
"blog-training-data/blog-147-optical-network-testing-validation-procedures.md": 1,
"blog-training-data/blog-079-ip-optical-integration-disaggregation.md": 1,
"blog-training-data/blog-129-manufacturing-iot-industrial-network.md": 1,
"blog-training-data/blog-046-transceiver-counterfeit-detection.md": 1,
"blog-training-data/blog-183-perfect-hooks-teasers-curiosity-gap.md": 1,
"blog-training-data/blog-056-cisco-qsfp28-compatibility-list.md": 1,
"blog-training-data/blog-005-coherent-400zr-reality.md": 1,
"blog-training-data/blog-203-executive-personal-brand-technical-leaders.md": 1,
"blog-training-data/blog-109-third-party-optics-validation-lab-testing.md": 1,
"blog-training-data/blog-065-dwdm-channel-plan-100ghz-vs-50ghz.md": 1,
"blog-training-data/blog-227-emerging-platforms-content-innovation.md": 1,
"blog-training-data/blog-078-pon-gpon-xgspon-optics-explainer.md": 1,
"blog-training-data/blog-051-spine-leaf-transceiver-strategy.md": 1,
"blog-training-data/blog-032-msa-compliance-vs-interoperability.md": 1,
"blog-training-data/blog-064-optic-burn-in-testing.md": 1,
"blog-training-data/blog-114-counterfeit-detection-supply-chain.md": 1,
"blog-training-data/blog-133-disaggregated-networking-future-architecture.md": 1,
"blog-training-data/blog-105-why-it-teams-care-optics.md": 1,
"blog-training-data/blog-001-400g-dr4-price-war.md": 1,
"blog-training-data/blog-040-evaluating-compatible-vendor.md": 1,
"blog-training-data/blog-211-employee-advocacy-internal-content.md": 1,
"blog-training-data/blog-202-video-podcast-content-b2b-tech.md": 1,
"blog-training-data/blog-042-800g-osfp-vs-qsfp-dd-port-density.md": 1,
"blog-training-data/blog-140-future-optical-networking-2030.md": 1,
"blog-training-data/blog-139-disaster-recovery-business-continuity-optical.md": 1,
"blog-training-data/blog-148-vendor-relationship-strategic-partnerships.md": 1,
"blog-training-data/blog-100-flexoptix-programming-service-technical.md": 1,
"blog-training-data/blog-118-ai-ml-workload-network-optics.md": 1,
"blog-training-data/blog-076-cisco-nexus-vs-catalyst-optic-behavior.md": 1,
"blog-training-data/blog-053-cisco-juniper-arista-optic-lock-in.md": 1,
"blog-training-data/blog-044-laser-safety-class-1m-transceivers.md": 1,
"blog-training-data/blog-152-optical-network-architecture-evolution-2025-2030.md": 1,
"blog-training-data/blog-094-transceiver-programming-eeprom-guide.md": 1,
"blog-training-data/blog-222-content-marketing-team-development.md": 1,
"blog-training-data/blog-085-ai-inference-cluster-optics-requirements.md": 1,
"blog-training-data/blog-206-crisis-communications-reputation-management.md": 1,
"blog-training-data/blog-188-email-marketing-b2b-technical-content.md": 1,
"blog-training-data/blog-149-network-security-zero-trust-optical-implementation.md": 1,
"blog-training-data/blog-216-ai-ethics-responsible-content-creation.md": 1,
"blog-training-data/blog-182-science-of-perfect-blog-writing.md": 1,
"blog-training-data/blog-194-brand-voice-architecture-technical-companies.md": 1,
"blog-training-data/blog-163-network-skills-careers-optical-engineering-future.md": 1,
"blog-training-data/blog-052-roa-replacing-optics-proactively.md": 1,
"blog-training-data/blog-162-network-as-a-service-cloud-native-optical.md": 1,
"blog-training-data/blog-090-optics-for-5g-fronthaul-midhaul.md": 1,
"blog-training-data/blog-126-fintech-financial-services-network-optics.md": 1,
"blog-training-data/blog-201-sales-enablement-content-strategy.md": 1,
"blog-training-data/blog-186-perfect-blog-engine-architecture-synthesis.md": 1,
"blog-training-data/blog-041-silicon-photonics-co-packaging-2026.md": 1,
"blog-training-data/blog-156-network-protocols-l1-encryption-deep-dive.md": 1,
"blog-training-data/blog-096-dark-fiber-leasing-optics-considerations.md": 1,
"blog-training-data/blog-108-advanced-fiber-contamination-diagnostics.md": 1,
"blog-training-data/blog-215-recruiting-employer-branding-content.md": 1,
"blog-training-data/blog-112-open-networking-optics-ecosystem.md": 1,
"blog-training-data/blog-121-400g-800g-coherent-optics-deep-dive.md": 1,
"blog-training-data/blog-084-ieee-802.3-standards-transceiver-reference.md": 1,
"blog-training-data/blog-012-coherent-vs-direct-detect-decision.md": 1,
"blog-training-data/blog-165-optical-networking-comprehensive-reference-guide.md": 1,
"blog-training-data/blog-004-400g-migration-fiber-plant.md": 1,
"blog-training-data/blog-115-healthcare-network-optics-compliance.md": 1,
"blog-training-data/blog-119-sustainability-carbon-footprint-optical.md": 1,
"blog-training-data/blog-060-fiber-connector-cleaning-protocol.md": 1,
"blog-training-data/blog-143-network-protocols-modern-optical-infrastructure.md": 1,
"blog-training-data/blog-172-transceiver-form-factors-complete-reference.md": 1,
"blog-training-data/blog-207-localization-international-content-strategy.md": 1,
"blog-training-data/blog-106-fiber-diagnostics-eye-diagrams.md": 1,
"blog-training-data/blog-158-network-time-synchronization-precision-timing.md": 1,
"blog-training-data/blog-217-strategic-partnerships-co-marketing.md": 1,
"blog-training-data/blog-027-fiber-plant-audit-100g-upgrade.md": 1,
"blog-training-data/blog-016-400g-qsfp-dd-after-fiber-moves.md": 1,
"blog-training-data/blog-145-data-center-interconnect-dci-optical-design.md": 1,
"blog-training-data/blog-205-product-launch-content-strategy.md": 1,
"blog-training-data/blog-074-fiber-optic-patch-cord-standards.md": 1,
"blog-training-data/blog-057-juniper-optic-unlock-ex-qfx.md": 1,
"blog-training-data/blog-196-newsletter-strategy-technical-audiences.md": 1,
"blog-training-data/blog-214-press-relations-media-strategy.md": 1,
"blog-training-data/blog-022-oem-vs-compatible-lab-tests.md": 1,
"blog-training-data/blog-218-sustainable-content-marketing-practice.md": 1,
"blog-training-data/blog-020-100g-link-drops-temperature.md": 1,
"blog-training-data/blog-191-editorial-operations-content-engine-management.md": 1,
"blog-training-data/blog-146-optical-network-capacity-planning-bandwidth.md": 1,
"blog-training-data/blog-050-optical-transceiver-temperature-grades.md": 1,
"blog-training-data/blog-208-community-building-technical-content.md": 1,
"blog-training-data/blog-111-cisco-arista-juniper-optics-strategies.md": 1,
"blog-training-data/blog-141-optical-network-cost-engineering-tco.md": 1,
"blog-training-data/blog-036-coherent-tunable-vs-fixed-wavelength.md": 1,
"blog-training-data/blog-181-neurolinguistic-persuasion-blog-writing.md": 1,
"blog-training-data/blog-209-account-based-marketing-abm-content.md": 1,
"blog-training-data/blog-200-webinar-virtual-event-content-strategy.md": 1,
"blog-training-data/blog-077-pam4-vs-nrz-modulation-transceivers.md": 1,
"blog-training-data/blog-212-interactive-content-calculators-tools.md": 1,
"blog-training-data/blog-080-fcoe-fibre-channel-sfp-differences.md": 1,
"blog-training-data/blog-168-optical-transceiver-manufacturers-comprehensive-landscape.md": 1,
"blog-training-data/blog-043-zr-zr-plus-coherent-pluggables-comparison.md": 1,
"blog-training-data/blog-049-wavelength-division-multiplexing-primer.md": 1,
"blog-training-data/blog-089-metro-dwdm-open-vs-proprietary.md": 1,
"blog-training-data/blog-128-government-federal-network-optics.md": 1,
"blog-training-data/blog-116-carrier-isp-optics-operations.md": 1,
"blog-training-data/blog-073-qsfp-dd-800g-ecosystem-2026.md": 1,
"blog-training-data/blog-210-marketing-automation-lead-nurturing.md": 1,
"blog-training-data/blog-189-linkedin-social-distribution-b2b-tech.md": 1,
"blog-training-data/blog-018-800g-sr8-dr8-fr8-comparison.md": 1,
"blog-training-data/blog-029-800g-osfp-spineleaf-checklist.md": 1,
"blog-training-data/blog-110-wavelength-tuning-dwdm.md": 1,
"blog-training-data/blog-103-carbon-footprint-oem-compatible-tco.md": 1,
"blog-training-data/blog-137-regional-optical-network-considerations-global.md": 1,
"blog-training-data/blog-006-dom-diagnostics.md": 1,
"blog-training-data/blog-157-multicast-video-broadcast-optical-networks.md": 1,
"blog-training-data/blog-185-b2b-decision-psychology-trust-signals.md": 1,
"blog-training-data/blog-223-final-capstone-sustainable-excellence.md": 1,
"blog-training-data/blog-075-transceiver-failure-root-cause-analysis.md": 1,
"blog-training-data/blog-190-content-repurposing-multi-format-strategy.md": 1,
"blog-training-data/blog-184-perfect-visuals-infographics-header-design.md": 1,
"blog-training-data/blog-048-400g-dr4-fr4-lr4-comparison.md": 1,
"blog-training-data/blog-031-cwdm4-vs-psm4-100g-datacenter.md": 1,
"blog-training-data/blog-059-100g-sr4-multimode-distance-limits.md": 1,

View File

@ -1,9 +1,9 @@
{
"raw_pairs": 12141,
"raw_pairs": 12268,
"duplicates_removed": 269,
"training_pairs": 11872,
"train_pairs": 10684,
"eval_pairs": 1188,
"training_pairs": 11999,
"train_pairs": 10799,
"eval_pairs": 1200,
"sources": {
"external:vendor-deep-dives.jsonl": 11200,
"external:technical-deep-dives.jsonl": 84,
@ -12,8 +12,10 @@
"external:synthesized-training-samples.jsonl": 219,
"external:nanog-ripe-labs-content.jsonl": 34,
"external:academic-research-synthesis.jsonl": 109,
"training-data/tip-llm-capabilities-v1.jsonl": 34,
"training-data/tip-llm-pricing-v1.jsonl": 80,
"training-data/tip-llm-capabilities-v1.jsonl": 69,
"external:market-business-analysis-part6.jsonl": 5,
"robot-control-high.jsonl": 12,
"external:market-business-analysis-part5.jsonl": 7,
"external:market-business-analysis-part4.jsonl": 5,
"external:market-business-analysis-part2.jsonl": 8,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long