transceiver-db/scripts/enrich-v3.sh

126 lines
5.2 KiB
Bash

#!/bin/bash
# V3: Flexoptix enrichment - simple, direct, works
LOG="/tmp/enrich-v3.log"
SQL="/tmp/enrichment-v3.sql"
echo "$(date): V3 start" > "$LOG"
# Direct psql (no eval)
export PGPASSWORD="tip_prod_2026"
psql -h localhost -p 5433 -U tip -d transceiver_db -t -A -F'|' -c \
"SELECT t.id, t.product_page_url, t.part_number, t.standard_name FROM transceivers t JOIN vendors v ON t.vendor_id = v.id WHERE v.name = 'FLEXOPTIX' AND t.product_page_url IS NOT NULL" \
> /tmp/fo-list.txt 2>> "$LOG"
TOTAL=$(wc -l < /tmp/fo-list.txt | tr -d ' ')
echo "Products: $TOTAL" >> "$LOG"
echo "-- V3 enrichment $(date '+%Y-%m-%d %H:%M')" > "$SQL"
echo "" >> "$SQL"
OK=0
while IFS='|' read -r ID URL PN SN; do
[ -z "$URL" ] && continue
OK_THIS=0
NAME="${SN:-$PN}"
HTML=$(curl -s -L --max-time 15 -H "User-Agent: Mozilla/5.0 TIP-Bot/1.0" "$URL" 2>/dev/null)
[ ${#HTML} -lt 500 ] && continue
SQLLINE=$(echo "$HTML" | python3 << 'PYEOF'
import re, sys
html = sys.stdin.read()
img = None
for m in re.finditer(r'https://[^"\s]+/cache/[^"\s]+_A_[^"\s]+\.jpg', html):
img = m.group(0); break
specs = {}
for m in re.finditer(r'<th[^>]*>(.*?)</th>\s*<td[^>]*>(.*?)</td>', html, re.S|re.I):
label = re.sub(r'<[^>]+>', '', m.group(1)).strip().upper()
value = re.sub(r'<[^>]+>', '', m.group(2)).strip()
if label and value and value.lower() not in ('n/a', '-', ''):
specs[label] = value
if not specs and not img: sys.exit(0)
cols = {}
MAP = [
('POWER CONSUMPTION', 'power_consumption_w', lambda v: re.search(r'[\d.]+', v).group() if re.search(r'[\d.]+', v) else None),
('CONNECTOR / POLISH', 'connector', None),
('CONNECTOR', 'connector', None),
('MODULATION', 'modulation', None),
('WAVELENGTH TX (TYPICAL)', 'wavelengths', None),
('WAVELENGTH', 'wavelengths', None),
('DISTANCE', 'reach_label', None),
('TEMPERATURE RANGE', 'temp_range', lambda v: 'COM' if any(x in v for x in ['ommercial','0°C to 70']) else ('IND' if any(x in v for x in ['ndustrial','-40']) else ('EXT' if 'xtended' in v else 'COM'))),
('OPERATING TEMPERATURE', 'temp_range', lambda v: 'COM' if any(x in v for x in ['ommercial','0°C to 70']) else ('IND' if any(x in v for x in ['ndustrial','-40']) else ('EXT' if 'xtended' in v else 'COM'))),
('LANE COUNT', 'lanes', lambda v: re.search(r'\d+', v).group() if re.search(r'\d+', v) else None),
('BANDWIDTH PER LANE', 'lane_rate', None),
('BANDWIDTH', 'lane_rate', None),
('INBUILT FEC', 'fec_type', lambda v: v if v.lower() not in ('no','none') else None),
('POWERBUDGET (DB)', 'optical_budget_db', lambda v: re.search(r'[\d.]+', v).group() if re.search(r'[\d.]+', v) else None),
('TRANSMIT MIN/MAX PER LANE', 'tx_power_min_dbm', lambda v: re.search(r'-?[\d.]+', v).group() if re.search(r'-?[\d.]+', v) else None),
('RECEIVER MIN/MAX PER LANE', 'rx_sensitivity_dbm', lambda v: re.search(r'-?[\d.]+', v).group() if re.search(r'-?[\d.]+', v) else None),
('INTERFACE', 'fiber_type', None),
('COMPLIANCE CODE', 'ieee_reference', None),
('DIGITAL DIAGNOSTIC MONITORING (DDM)', 'dom_support', lambda v: 'true' if 'yes' in v.lower() else 'false'),
]
mapped = set()
for label, col, fn in MAP:
if label in specs and col not in cols:
try:
val = fn(specs[label]) if fn else specs[label]
if val is not None:
cols[col] = val
mapped.add(label)
except: pass
extra = [f'{k}: {v}' for k,v in specs.items() if k not in mapped and len(v) < 200]
if extra: cols['notes'] = '; '.join(extra)[:1000]
if img: cols['image_url'] = img
if not cols: sys.exit(0)
def e(s): return str(s).replace("'","''")
parts = []
for c, v in cols.items():
if c == 'dom_support': parts.append(f'{c} = {v}')
else: parts.append(f"{c} = '{e(v)}'")
print(', '.join(parts))
PYEOF
)
if [ -n "$SQLLINE" ]; then
echo "-- $NAME" >> "$SQL"
echo "UPDATE transceivers SET $SQLLINE WHERE id = '$ID';" >> "$SQL"
echo "" >> "$SQL"
OK=$((OK + 1))
fi
sleep 0.3
done < /tmp/fo-list.txt
echo "-- Total: $OK enriched" >> "$SQL"
echo "" >> "$LOG"
echo "Generated: $OK/$TOTAL" >> "$LOG"
# Apply
echo "Applying SQL..." >> "$LOG"
psql -h localhost -p 5433 -U tip -d transceiver_db -f "$SQL" >> "$LOG" 2>&1
# Restart API
cd /opt/tip && pm2 restart tip-api >> "$LOG" 2>&1
# Counts
echo "=== COUNTS ===" >> "$LOG"
psql -h localhost -p 5433 -U tip -d transceiver_db -t -A -c "SELECT 'img=' || count(*) FROM transceivers WHERE image_url IS NOT NULL" >> "$LOG"
psql -h localhost -p 5433 -U tip -d transceiver_db -t -A -c "SELECT 'conn=' || count(*) FROM transceivers WHERE connector IS NOT NULL" >> "$LOG"
psql -h localhost -p 5433 -U tip -d transceiver_db -t -A -c "SELECT 'notes=' || count(*) FROM transceivers WHERE notes IS NOT NULL AND notes != ''" >> "$LOG"
psql -h localhost -p 5433 -U tip -d transceiver_db -t -A -c "SELECT 'mod=' || count(*) FROM transceivers WHERE modulation IS NOT NULL" >> "$LOG"
psql -h localhost -p 5433 -U tip -d transceiver_db -t -A -c "SELECT 'power=' || count(*) FROM transceivers WHERE power_consumption_w IS NOT NULL" >> "$LOG"
psql -h localhost -p 5433 -U tip -d transceiver_db -t -A -c "SELECT 'lane_rate=' || count(*) FROM transceivers WHERE lane_rate IS NOT NULL" >> "$LOG"
echo "$(date): V3 DONE" >> "$LOG"