Rene Fichtmueller 8e367b3c33 feat: TIP_LLM 5-capability training data + updated system prompt
- Add scripts/seed-tip-llm-capabilities.ts: generator for 34 SFT pairs
  covering all 5 TIP_LLM capabilities (transceiver research, switch
  research, Blog_LLM data evaluation, crawler/scraper design, Hype Cycle)
- Add training-data/tip-llm-capabilities-v1.jsonl: generated output (34 pairs)
- Update tip-learning-pool-build.ts: expanded 5-capability system prompt
  replaces single-line prompt; register capabilities file in files.tip_llm
- Regenerate tip_llm runpod outputs: 12141 raw pairs → 11872 training pairs
  (up from 10654 before capabilities addition)
- Published tip_llm (11872 pairs) + blog_llm (11408 pairs) to HuggingFace
2026-04-26 00:01:21 +02:00

153 lines
9.5 KiB
JSON

{
"generated_at": "2026-04-25T21:56:31.560Z",
"version": "TIP-LearningPool-v1",
"lanes": {
"tip_llm": {
"raw_pairs": 12141,
"duplicates_removed": 269,
"training_pairs": 11872,
"train_pairs": 10684,
"eval_pairs": 1188,
"sources": {
"external:vendor-deep-dives.jsonl": 11200,
"external:technical-deep-dives.jsonl": 84,
"external:rir-infrastructure-data.jsonl": 150,
"external:market-business-analysis-part1.jsonl": 10,
"external:synthesized-training-samples.jsonl": 219,
"external:nanog-ripe-labs-content.jsonl": 34,
"external:academic-research-synthesis.jsonl": 109,
"training-data/tip-llm-capabilities-v1.jsonl": 34,
"external:market-business-analysis-part6.jsonl": 5,
"external:market-business-analysis-part5.jsonl": 7,
"external:market-business-analysis-part4.jsonl": 5,
"external:market-business-analysis-part2.jsonl": 8,
"external:market-business-analysis-part3.jsonl": 7
},
"files": {
"train": "training-data/runpod/tip_llm/tip_llm-sft-train.jsonl",
"eval": "training-data/runpod/tip_llm/tip_llm-sft-eval.jsonl",
"all": "training-data/runpod/tip_llm/tip_llm-sft-all.jsonl",
"manifest": "training-data/runpod/tip_llm/manifest.json"
}
},
"blog_llm": {
"raw_pairs": 11508,
"duplicates_removed": 100,
"training_pairs": 11408,
"train_pairs": 10267,
"eval_pairs": 1141,
"sources": {
"external:vendor-deep-dives.jsonl": 11200,
"external:technical-deep-dives.jsonl": 84,
"blog-training-data/blog-025-sfp28-lab-vs-rack.md": 1,
"blog-training-data/blog-091-wavelength-selective-switch-wss-explainer.md": 1,
"blog-training-data/blog-008-oem-vs-compatible-real-numbers.md": 1,
"blog-training-data/blog-014-800g-new-products-what-ships.md": 1,
"blog-training-data/blog-045-osnr-link-budget-practical-guide.md": 1,
"blog-training-data/blog-024-rx-power-budgets-400g.md": 1,
"blog-training-data/blog-017-dom-readings-lie.md": 1,
"blog-training-data/blog-010-qsfp-dd-vs-osfp-form-factor-reality.md": 1,
"blog-training-data/blog-072-optical-amplifier-edfa-raman-basics.md": 1,
"blog-training-data/blog-028-400g-dac-3m-vs-5m.md": 1,
"blog-training-data/blog-011-transceiver-procurement-checklist.md": 1,
"external:blog-fichtmueller-posts.jsonl": 24,
"blog-training-data/blog-083-fiber-optic-testing-otdr-basics.md": 1,
"blog-training-data/blog-038-cpo-pluggable-future.md": 1,
"blog-training-data/blog-054-multimode-fiber-om3-om4-om5-guide.md": 1,
"blog-training-data/blog-015-compatible-vendor-comparison-who-to-trust.md": 1,
"blog-training-data/blog-063-100g-zr-coherent-pluggable-timing.md": 1,
"blog-training-data/blog-069-optical-budget-calculator-guide.md": 1,
"blog-training-data/blog-070-mtp-mpo-cassette-fiber-management.md": 1,
"blog-training-data/blog-092-sfp-sfp-plus-backward-compatibility.md": 1,
"blog-training-data/blog-086-hyperscale-optics-purchasing-strategy.md": 1,
"blog-training-data/blog-055-transceiver-lifecycle-management-enterprise.md": 1,
"blog-training-data/blog-066-400g-zr-interoperability-matrix.md": 1,
"blog-training-data/blog-093-google-meta-microsoft-optics-strategy.md": 1,
"blog-training-data/blog-019-cleaning-fiber-400g-tolerance.md": 1,
"blog-training-data/blog-026-400g-zr-vs-zrplus.md": 1,
"blog-training-data/blog-035-esd-damage-transceivers.md": 1,
"blog-training-data/blog-087-rj45-vs-sfp-copper-1g-switches.md": 1,
"blog-training-data/blog-009-100g-to-400g-migration-what-breaks.md": 1,
"blog-training-data/blog-034-grey-optics-vs-dwdm-metro-aggregation.md": 1,
"blog-training-data/blog-082-coherent-dsp-power-consumption.md": 1,
"blog-training-data/blog-062-transceiver-inventory-management-excel-vs-cmdb.md": 1,
"blog-training-data/blog-088-transceiver-sff-committee-history.md": 1,
"blog-training-data/blog-098-carrier-ethernet-timing-syncE-ptp-optics.md": 1,
"blog-training-data/blog-003-silicon-photonics.md": 1,
"blog-training-data/blog-037-fec-deep-dive.md": 1,
"blog-training-data/blog-099-transceiver-market-2026-pricing-forecast.md": 1,
"blog-training-data/blog-021-validating-compatible-optics.md": 1,
"blog-training-data/blog-023-pam4-800g-fec-errors.md": 1,
"blog-training-data/blog-030-when-to-upgrade-from-10g.md": 1,
"blog-training-data/blog-002-vendor-lock-in-optics.md": 1,
"blog-training-data/blog-081-transceiver-rma-process-best-practices.md": 1,
"blog-training-data/blog-013-price-drop-timing-when-to-buy.md": 1,
"blog-training-data/blog-095-optical-lan-versus-fiber-ethernet.md": 1,
"blog-training-data/blog-067-single-mode-fiber-types-g652-g657.md": 1,
"blog-training-data/blog-039-cmis-400g-management.md": 1,
"blog-training-data/blog-071-sff-8024-transceiver-id-codes.md": 1,
"blog-training-data/blog-097-liquid-cooling-impact-optical-transceivers.md": 1,
"blog-training-data/blog-007-800g-readiness.md": 1,
"blog-training-data/blog-058-arista-eos-optic-compatibility.md": 1,
"blog-training-data/blog-068-25g-vs-10g-upgrade-path-decision.md": 1,
"blog-training-data/blog-061-cfp2-cfp4-qsfp28-form-factor-migration.md": 1,
"blog-training-data/blog-079-ip-optical-integration-disaggregation.md": 1,
"blog-training-data/blog-046-transceiver-counterfeit-detection.md": 1,
"blog-training-data/blog-056-cisco-qsfp28-compatibility-list.md": 1,
"blog-training-data/blog-005-coherent-400zr-reality.md": 1,
"blog-training-data/blog-065-dwdm-channel-plan-100ghz-vs-50ghz.md": 1,
"blog-training-data/blog-078-pon-gpon-xgspon-optics-explainer.md": 1,
"blog-training-data/blog-051-spine-leaf-transceiver-strategy.md": 1,
"blog-training-data/blog-032-msa-compliance-vs-interoperability.md": 1,
"blog-training-data/blog-064-optic-burn-in-testing.md": 1,
"blog-training-data/blog-001-400g-dr4-price-war.md": 1,
"blog-training-data/blog-040-evaluating-compatible-vendor.md": 1,
"blog-training-data/blog-042-800g-osfp-vs-qsfp-dd-port-density.md": 1,
"blog-training-data/blog-100-flexoptix-programming-service-technical.md": 1,
"blog-training-data/blog-076-cisco-nexus-vs-catalyst-optic-behavior.md": 1,
"blog-training-data/blog-053-cisco-juniper-arista-optic-lock-in.md": 1,
"blog-training-data/blog-044-laser-safety-class-1m-transceivers.md": 1,
"blog-training-data/blog-094-transceiver-programming-eeprom-guide.md": 1,
"blog-training-data/blog-085-ai-inference-cluster-optics-requirements.md": 1,
"blog-training-data/blog-052-roa-replacing-optics-proactively.md": 1,
"blog-training-data/blog-090-optics-for-5g-fronthaul-midhaul.md": 1,
"blog-training-data/blog-041-silicon-photonics-co-packaging-2026.md": 1,
"blog-training-data/blog-096-dark-fiber-leasing-optics-considerations.md": 1,
"blog-training-data/blog-084-ieee-802.3-standards-transceiver-reference.md": 1,
"blog-training-data/blog-012-coherent-vs-direct-detect-decision.md": 1,
"blog-training-data/blog-004-400g-migration-fiber-plant.md": 1,
"blog-training-data/blog-060-fiber-connector-cleaning-protocol.md": 1,
"blog-training-data/blog-027-fiber-plant-audit-100g-upgrade.md": 1,
"blog-training-data/blog-016-400g-qsfp-dd-after-fiber-moves.md": 1,
"blog-training-data/blog-074-fiber-optic-patch-cord-standards.md": 1,
"blog-training-data/blog-057-juniper-optic-unlock-ex-qfx.md": 1,
"blog-training-data/blog-022-oem-vs-compatible-lab-tests.md": 1,
"blog-training-data/blog-020-100g-link-drops-temperature.md": 1,
"blog-training-data/blog-050-optical-transceiver-temperature-grades.md": 1,
"blog-training-data/blog-036-coherent-tunable-vs-fixed-wavelength.md": 1,
"blog-training-data/blog-077-pam4-vs-nrz-modulation-transceivers.md": 1,
"blog-training-data/blog-080-fcoe-fibre-channel-sfp-differences.md": 1,
"blog-training-data/blog-043-zr-zr-plus-coherent-pluggables-comparison.md": 1,
"blog-training-data/blog-049-wavelength-division-multiplexing-primer.md": 1,
"blog-training-data/blog-089-metro-dwdm-open-vs-proprietary.md": 1,
"blog-training-data/blog-073-qsfp-dd-800g-ecosystem-2026.md": 1,
"blog-training-data/blog-018-800g-sr8-dr8-fr8-comparison.md": 1,
"blog-training-data/blog-029-800g-osfp-spineleaf-checklist.md": 1,
"blog-training-data/blog-006-dom-diagnostics.md": 1,
"blog-training-data/blog-075-transceiver-failure-root-cause-analysis.md": 1,
"blog-training-data/blog-048-400g-dr4-fr4-lr4-comparison.md": 1,
"blog-training-data/blog-031-cwdm4-vs-psm4-100g-datacenter.md": 1,
"blog-training-data/blog-059-100g-sr4-multimode-distance-limits.md": 1,
"blog-training-data/blog-047-dom-digital-optical-monitoring-guide.md": 1,
"blog-training-data/blog-033-25g-dac-aoc-optical-tco.md": 1
},
"files": {
"train": "training-data/runpod/blog_llm/blog_llm-sft-train.jsonl",
"eval": "training-data/runpod/blog_llm/blog_llm-sft-eval.jsonl",
"all": "training-data/runpod/blog_llm/blog_llm-sft-all.jsonl",
"manifest": "training-data/runpod/blog_llm/manifest.json"
}
}
}
}