transceiver-db/sql/006-seed-knowledge-base.sql
Rene Fichtmueller 4b452ab49e feat(scrapers+mcp): ATGBICS + ProLabs scrapers, MCP HTTP/SSE server
Scrapers:
- atgbics.ts: PlaywrightCrawler for UK vendor ATGBICS (Shopify store),
  scrapes SFP/SFP+/SFP28/QSFP+/QSFP28/QSFP-DD in GBP, max 50 pages/run
- prolabs.ts: HttpCrawler for ProLabs (Legrand subsidiary), USD pricing,
  category-driven crawl with reach/fiber/speed detection
- Both registered in scheduler (every 8h, staggered) and index.ts CLI

MCP HTTP Server:
- packages/mcp-server/src/http-server.ts: Express + SSEServerTransport
- Exposes all 12 TIP tools via GET /sse + POST /message
- Bearer token auth (MCP_SECRET env), CORS-configurable
- GET /health → { status: "ok", tools: 12 }
- Port: MCP_HTTP_PORT (default 3201)

SQL + tools:
- sql/006-009: seed scripts for whitebox switches, vendors, assets
- switch-docs.ts: MCP tool for switch documentation queries
2026-03-29 02:26:45 +08:00

122 lines
15 KiB
SQL

-- Knowledge Base seed data: Troubleshooting, FAQ, Best Practices, Known Issues
-- Run: docker exec -i tip-postgres psql -U tip -d transceiver_db < sql/006-seed-knowledge-base.sql
-- Clean test data
DELETE FROM knowledge_base WHERE subcategory = 'test';
-- === TROUBLESHOOTING ===
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES
('troubleshooting', 'tx_power', 'Low Tx power alarm on SFP+ SR module',
'Tx power dropped below -11.0 dBm on a module rated for -8.2 to +0.5 dBm. This indicates laser degradation. The laser is approaching end-of-life — you have approximately 2-4 weeks before complete failure. Replace during the next maintenance window, do not wait for an unplanned outage.',
'{SFP+}', '{10G}', 'high', '{tx_power,laser,degradation,alarm}',
'[{"step": 1, "action": "Check DOM readings: show interface transceiver details"}, {"step": 2, "action": "Compare Tx power to module spec (-8.2 to +0.5 dBm for SR)"}, {"step": 3, "action": "If Tx < -11.0 dBm, schedule replacement"}, {"step": 4, "action": "Order spare and replace in next maintenance window"}]');
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES
('troubleshooting', 'tx_power', 'Low Tx power on QSFP-DD DR4 — per-lane diagnostics',
'QSFP-DD DR4 modules have 4 independent lanes, each rated for -2.9 to +3.0 dBm Tx power. If only one lane shows low power, the module has a failing lane laser. If all lanes drop, check the module temperature first — overheating causes power rollback. Use per-lane DOM: show interface transceiver details.',
'{QSFP-DD}', '{400G}', 'high', '{tx_power,qsfp-dd,per_lane,dom}',
'[{"step": 1, "action": "show interface transceiver details — check per-lane Tx power"}, {"step": 2, "action": "Check module temperature (alarm above 75C)"}, {"step": 3, "action": "If single lane low: failing laser, replace module"}, {"step": 4, "action": "If all lanes low + high temp: improve airflow first"}]');
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES
('troubleshooting', 'ber_errors', 'High pre-FEC BER on 100G QSFP28 link',
'Pre-FEC BER above 2.4e-4 (KP4 FEC threshold) means the Forward Error Correction is struggling. Common causes: dirty fiber end-faces (40% of cases), fiber type mismatch (SMF cable on MMF optic), or exceeded power budget. Post-FEC errors (uncorrected) mean the FEC has lost the fight — the link will drop packets.',
'{QSFP28}', '{100G}', 'high', '{ber,fec,kp4,errors,pre-fec}',
'[{"step": 1, "action": "show interface counters errors — check CRC and FEC counters"}, {"step": 2, "action": "If CRC > 100/min: inspect and clean fiber end-faces"}, {"step": 3, "action": "If CRC > 10000/min: check fiber type match (SMF vs MMF)"}, {"step": 4, "action": "Calculate power budget: Tx - losses >= Rx sensitivity + 3dB margin"}]');
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES
('troubleshooting', 'temperature', 'Transceiver temperature alarm in top-of-rack switch',
'Top-of-rack switches run hotter because heat rises. A transceiver rated for 0-70C (COM) will alarm above 75C. Common in high-density spine switches with poor airflow. Before replacing the optic, fix the thermal environment. An overheating laser degrades 10x faster than a properly cooled one.',
'{SFP+,QSFP28,QSFP-DD,OSFP}', '{10G,100G,400G}', 'medium', '{temperature,thermal,overheating,airflow}',
'[{"step": 1, "action": "show interface transceiver details — check temperature"}, {"step": 2, "action": "Verify fan tray status and speed"}, {"step": 3, "action": "Install blanking panels in empty slots"}, {"step": 4, "action": "Consider IND-rated (-40 to +85C) modules if environment is harsh"}]');
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES
('troubleshooting', 'fiber_mismatch', 'Link down: SMF optic with MMF patch cable',
'A common deployment mistake: using a multimode fiber patch cable with a single-mode optic (LR/ER/ZR modules). The core diameter mismatch (9um SMF vs 50um MMF) causes massive signal loss. Symptoms: link stays down, Rx power extremely low despite good Tx. Always verify fiber type: SR = MMF (orange cable), LR/ER/ZR/DR/FR = SMF (yellow cable).',
'{SFP+,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G}', 'critical', '{fiber,mismatch,smf,mmf,link_down}',
'[{"step": 1, "action": "Check optic type: SR = MMF, LR/ER/ZR/DR/FR = SMF"}, {"step": 2, "action": "Verify patch cable color: orange = MMF, yellow = SMF"}, {"step": 3, "action": "Replace patch cable with correct type"}, {"step": 4, "action": "Verify link comes up and check Rx power"}]');
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES
('troubleshooting', 'coherent', '400ZR link not establishing — OSNR too low',
'Coherent 400ZR optics require OSNR > 20 dB for reliable operation with 16QAM modulation. Unlike direct-detect modules, coherent links fail silently when OSNR drops. Check Tx power (-10.0 to +2.0 dBm), OSNR at receiver (> 20 dB), and chromatic dispersion within module compensation range. For DWDM, verify channel plan alignment.',
'{QSFP-DD,OSFP}', '{400G}', 'high', '{coherent,400zr,osnr,dwdm}',
'[{"step": 1, "action": "show interfaces diagnostics optics — check OSNR and CD"}, {"step": 2, "action": "Verify OSNR > 20 dB"}, {"step": 3, "action": "Check Tx power range: -10.0 to +2.0 dBm"}, {"step": 4, "action": "Verify DWDM channel plan alignment"}]');
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES
('troubleshooting', 'power_budget', 'Link flapping on long-distance 10G LR link',
'A 10G LR (1310nm SMF) rated for 10km flaps at 8km. Power budget: Tx -6.0 dBm, fiber 8km x 0.35 dB/km = 2.8 dB, 4 connectors x 0.3 dB = 1.2 dB, 2 splices x 0.1 dB = 0.2 dB. Total loss: 4.2 dB. Margin: 4.2 dB looks fine. But: two dirty connectors at +1.5 dB each = 3.0 dB extra. New margin: 1.2 dB — below 3 dB safety threshold. Fix: clean all connectors.',
'{SFP+}', '{10G}', 'medium', '{power_budget,flapping,distance,connector,cleaning}',
'[{"step": 1, "action": "Calculate complete power budget"}, {"step": 2, "action": "Include ALL connectors and patch panels"}, {"step": 3, "action": "Add 0.5-1.5 dB per dirty connector"}, {"step": 4, "action": "Clean all fiber end-faces"}, {"step": 5, "action": "Verify with optical power meter"}]');
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags, resolution_steps) VALUES
('troubleshooting', 'intermittent', 'Transceiver intermittently drops — CRC bursts',
'CRC error bursts (100-500 per minute, then clean for hours) usually indicate a micro-bend in the fiber or a loose connector. The fiber moves slightly with vibration or temperature changes. Check the physical fiber path: tight bends below minimum radius, cables pinched under trays, or connectors not fully seated.',
'{SFP+,QSFP28,QSFP-DD}', '{10G,100G,400G}', 'medium', '{crc,intermittent,microbend,connector}',
'[{"step": 1, "action": "show interface counters errors — correlate bursts with timestamps"}, {"step": 2, "action": "Check if errors correlate with HVAC cycles"}, {"step": 3, "action": "Inspect fiber path for tight bends (min radius: 30mm)"}, {"step": 4, "action": "Reseat all connectors"}, {"step": 5, "action": "Use OTDR to find fault point"}]');
-- === FAQ ===
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
('faq', 'compatibility', 'Can I use compatible transceivers in Cisco/Arista/Juniper switches?',
'Yes, in most cases. Cisco Nexus uses "service unsupported-transceiver", Arista allows them by default, Juniper may need "set chassis fpc pic port allow-unsupported-sfp". The EEPROM coding must match the switch vendor. Compatible vendors like Flexoptix code modules to match the target platform. Always test 10 units before bulk ordering.',
'{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G,800G}', 'info', '{compatibility,third-party,vendor-lock,coding}');
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
('faq', 'compatibility', 'What is transceiver EEPROM coding?',
'Every transceiver has an EEPROM storing its identity: vendor name, part number, serial, speeds, and calibration data. Switches read this to identify the module. Some vendors check for their vendor ID and may reject third-party modules. Compatible vendors program the EEPROM to match the target platform — same hardware, different EEPROM programming.',
'{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G,800G}', 'info', '{eeprom,coding,compatibility}');
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
('faq', 'technology', 'What is the difference between QSFP-DD and OSFP?',
'Both are 400G+ form factors. QSFP-DD is backward-compatible with QSFP28 cages — same width, just longer. OSFP is wider, allowing better thermal dissipation for high-power coherent modules (20W+). QSFP-DD dominates hyperscale data centers (more ports per linecard), OSFP is preferred for telecom/coherent where thermal headroom matters more than density.',
'{QSFP-DD,OSFP}', '{400G,800G}', 'info', '{qsfp-dd,osfp,form-factor,comparison}');
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
('faq', 'technology', 'What is 400ZR and when should I use it?',
'400ZR is a coherent pluggable standard (OIF) that packs DSP, laser, and modulator into QSFP-DD or OSFP. Enables 400G over 80+ km on single wavelength without external line equipment. Use for DCI between campuses. Do not use for intra-DC links under 2km (DR4/FR4 cheaper) or ultra-long-haul >120km (needs ZR+ or traditional line systems). Power: 15-20W.',
'{QSFP-DD,OSFP}', '{400G}', 'info', '{400zr,coherent,dci,pluggable}');
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
('faq', 'purchasing', 'How much can I save with compatible vs OEM transceivers?',
'Typical savings: SFP+ 10G: 5-10x ($15 vs $80-150). SFP28 25G: 3-5x ($20-35 vs $100-180). QSFP28 100G: 4-8x ($45-120 vs $300-900). QSFP-DD 400G: 2-4x ($250-500 vs $900-3200). Gap narrows at higher speeds because silicon cost dominates. For 400ZR coherent: ~50% savings only because the DSP is the main cost.',
'{SFP+,SFP28,QSFP28,QSFP-DD}', '{10G,25G,100G,400G}', 'info', '{pricing,cost,savings,compatible,oem}');
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
('faq', 'technology', 'What does DOM (Digital Optical Monitoring) show?',
'DOM gives real-time telemetry: Tx Power (dBm), Rx Power (dBm), Temperature (C), Supply Voltage (V), Laser Bias Current (mA). Each has 4 alarm thresholds. Monitor Tx power for laser health (trending down = dying), Rx power for link quality, temperature for environment. CLI: show interface transceiver details.',
'{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G,800G}', 'info', '{dom,monitoring,diagnostics,telemetry}');
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
('faq', 'deployment', 'How do I calculate a fiber link power budget?',
'Power Budget = Tx Power - Total Loss. Total Loss = Fiber Loss + Connector Loss + Splice Loss. Fiber: 0.35 dB/km at 1310nm, 0.22 dB/km at 1550nm. Connector: 0.3 dB each. Splice: 0.1 dB each. Always reserve 3 dB margin. Example: 10G LR 8km, 4 connectors, 2 splices: Tx -6.0, loss 4.2 dB, Rx -10.2, sensitivity -14.4, margin 4.2 dB.',
'{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G}', 'info', '{power_budget,calculation,fiber_loss}');
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
('faq', 'deployment', 'SR vs LR vs ER vs ZR vs DR vs FR — what do reach codes mean?',
'SR (Short Reach): MMF, 100-300m, within-rack. LR (Long Reach): SMF, 10km, inter-building. ER (Extended Reach): SMF, 40km, metro. ZR (Very Long Reach): SMF, 80km, DCI. DR (Data center Reach): SMF, 500m, inter-pod parallel. FR (2km Reach): SMF, 2km, campus. Each uses different wavelengths — both ends must match.',
'{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G,800G}', 'info', '{reach,sr,lr,er,zr,dr,fr}');
-- === BEST PRACTICES ===
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
('best_practice', 'operations', 'Always clean fiber connectors before inserting transceivers',
'40% of transceiver RMAs test fine at the vendor — the problem was dirty connectors. One fingerprint adds 1-2 dB loss. Use IPA-based cleaning pen or cassette, verify with 200x inspection scope. Never blow on connectors. Cost of cleaning: $0.50. Cost of unnecessary RMA: $50-200 plus weeks of lead time.',
'{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G,800G}', 'info', '{cleaning,connector,best_practice,rma}');
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
('best_practice', 'procurement', 'Test compatible transceivers in small batches before bulk ordering',
'Never order 200 compatible optics based on datasheet alone. Buy 10, install in production switches, run for 2 weeks monitoring DOM, BER, temperature. Check: vendor authentication passes? DOM accurate? Any CRC errors? Survives switch reboot? Only after successful pilot, order the full batch.',
'{SFP+,SFP28,QSFP28,QSFP-DD,OSFP}', '{10G,25G,100G,400G,800G}', 'info', '{testing,procurement,pilot}');
-- === KNOWN ISSUES ===
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
('known_issue', 'interop', 'Cisco Nexus 9000 vendor-locking on QSFP-DD modules',
'Cisco Nexus 9000 with NX-OS 10.2+ enforces stricter vendor checks on QSFP-DD. Compatible modules may log persistent "unsupported transceiver" warnings. Fix: "service unsupported-transceiver" in global config, then "no shut". Some firmware versions need "hardware profile transceiver-frequency default" for DWDM. Test with exact NX-OS version.',
'{QSFP-DD}', '{400G}', 'medium', '{cisco,nexus,vendor-lock,nx-os}');
INSERT INTO knowledge_base (category, subcategory, question, answer, applies_to_form_factors, applies_to_speeds, severity, tags) VALUES
('known_issue', 'interop', 'Arista 400G FEC negotiation mismatch with compatible optics',
'Arista 7060X5/7260X defaults to specific FEC mode (RS-FEC CL119) for 400G. If compatible QSFP-DD advertises different FEC, link stays "notconnect" despite good power. Fix: manually set "fec rs-fec" or "fec cl119" under interface config. Negotiation issue, not hardware.',
'{QSFP-DD,OSFP}', '{400G}', 'medium', '{arista,fec,negotiation,400g}');