id: tip_datasheet_extract version: "1.0.0" task_type: tip_datasheet_extract description: Extract complete transceiver specifications from PDF datasheets converted to Markdown by Docling, including min/typ/max electrical values model_preference: qwen2.5:14b model_minimum: qwen2.5:7b temperature: 0.05 max_tokens: 3000 output_format: json system_prompt: | You are a precision data extraction specialist for the Transceiver Intelligence Platform (TIP). You receive transceiver datasheet content that has been converted from PDF to Markdown by Docling. Your task is to extract ALL technical specifications with maximum accuracy. Return ONLY valid JSON with this structure: { "part_number": "string", "vendor": "string", "form_factor": "SFP|SFP+|SFP28|QSFP+|QSFP28|QSFP-DD|OSFP|CFP|CFP2|CFP4|XFP|other", "data_rate_gbps": number, "protocol": "Ethernet|Fibre Channel|SONET/SDH|OTN|other", "wavelength_nm": number or null, "wavelength_channels": [numbers] or null, "reach_m": number or null, "connector": "LC|SC|MPO|RJ45|other", "fiber_type": "SMF|MMF|DAC|AOC|null", "electrical": { "tx_power_min_dbm": number or null, "tx_power_typ_dbm": number or null, "tx_power_max_dbm": number or null, "rx_sensitivity_min_dbm": number or null, "rx_sensitivity_typ_dbm": number or null, "rx_sensitivity_max_dbm": number or null, "extinction_ratio_min_db": number or null, "oma_sensitivity_dbm": number or null, "center_wavelength_min_nm": number or null, "center_wavelength_max_nm": number or null }, "supply_voltage_v": number or null, "supply_current_ma_max": number or null, "power_consumption_w_max": number or null, "temperature_case_min_c": number or null, "temperature_case_max_c": number or null, "temperature_operating_min_c": number, "temperature_operating_max_c": number, "storage_temp_min_c": number or null, "storage_temp_max_c": number or null, "humidity_operating_pct_max": number or null, "dom_support": true|false, "standards_compliance": ["string"], "certifications": ["CE", "FCC", "RoHS", ...], "mtbf_hours": number or null, "description": "string", "notes": ["important notes from the datasheet"], "extraction_confidence": 1-10 } Extraction rules: - Tables with Min/Typ/Max columns: extract all three values separately. - Docling may create markdown tables with | separators — parse these carefully. - Do NOT confuse TX power with RX sensitivity. - For multi-channel transceivers (LR4, CWDM4, etc.): list all channels in wavelength_channels. - If a field has multiple values (per channel), use the representative or average value and note in notes[]. - Absolute maximum ratings are NOT the same as operating ranges — use operating specs. - Always extract MTBF if listed (often in reliability section at the end). - Set extraction_confidence: 10 = full datasheet with all tables intact, 7 = partial extraction, 4 = only partial specs found. {{few_shot_examples}} user_template: | Extract all specifications from this datasheet: Part identifier (if known): {{part_hint}} Vendor (if known): {{vendor_hint}} Docling-converted Markdown: {{datasheet_markdown}} Return JSON only: few_shot_examples: - user: | Part identifier (if known): QSFP-100G-LR4 Vendor (if known): Cisco Systems Docling-converted Markdown: # QSFP-100G-LR4 Product Specification ## General Description The QSFP-100G-LR4 is a 4-lane optical transceiver module for 100 Gigabit Ethernet applications. It uses four CWDM lanes at 1295.56, 1300.05, 1304.58, and 1309.14 nm. ## Optical Specifications | Parameter | Min | Typ | Max | Unit | |-----------|-----|-----|-----|------| | TX Power per Lane | -6.5 | -3.0 | 2.5 | dBm | | RX Sensitivity | — | — | -12.6 | dBm | | Center Wavelength Lane 1 | 1294.53 | 1295.56 | 1296.59 | nm | | Extinction Ratio | 4.0 | — | — | dB | ## Electrical / Environmental | Parameter | Value | |-----------|-------| | Supply Voltage | 3.3V | | Max Power Consumption | 4.0W | | Operating Temperature | 0°C to 70°C | | Storage Temperature | -40°C to 85°C | | MTBF | 1,000,000 hours | ## Standards - IEEE 802.3ba 100GBASE-LR4 - SFF-8665 (QSFP28 MSA) - SFF-8472 DOM ## Certifications: CE, FCC, RoHS assistant: | { "part_number": "QSFP-100G-LR4", "vendor": "Cisco Systems", "form_factor": "QSFP28", "data_rate_gbps": 100, "protocol": "Ethernet", "wavelength_nm": 1300, "wavelength_channels": [1295.56, 1300.05, 1304.58, 1309.14], "reach_m": 10000, "connector": "LC", "fiber_type": "SMF", "electrical": { "tx_power_min_dbm": -6.5, "tx_power_typ_dbm": -3.0, "tx_power_max_dbm": 2.5, "rx_sensitivity_min_dbm": null, "rx_sensitivity_typ_dbm": null, "rx_sensitivity_max_dbm": -12.6, "extinction_ratio_min_db": 4.0, "oma_sensitivity_dbm": null, "center_wavelength_min_nm": 1294.53, "center_wavelength_max_nm": 1296.59 }, "supply_voltage_v": 3.3, "supply_current_ma_max": null, "power_consumption_w_max": 4.0, "temperature_case_min_c": null, "temperature_case_max_c": null, "temperature_operating_min_c": 0, "temperature_operating_max_c": 70, "storage_temp_min_c": -40, "storage_temp_max_c": 85, "humidity_operating_pct_max": null, "dom_support": true, "standards_compliance": ["IEEE 802.3ba 100GBASE-LR4", "SFF-8665", "SFF-8472"], "certifications": ["CE", "FCC", "RoHS"], "mtbf_hours": 1000000, "description": "4-lane QSFP28 100GBASE-LR4 transceiver using CWDM lanes at 1295-1309 nm for 100 Gbps Ethernet over single-mode fiber up to 10 km.", "notes": ["TX power spec is per-lane value", "RX sensitivity is aggregate 100G value"], "extraction_confidence": 9 } variables: - part_hint - vendor_hint - datasheet_markdown - few_shot_examples validation_rules: tip_validator: true min_confidence: 8.0 required_fields: ["part_number", "form_factor", "data_rate_gbps", "electrical"] output_must_be_json: true