126 lines
5.2 KiB
Bash
126 lines
5.2 KiB
Bash
#!/bin/bash
|
|
# V3: Flexoptix enrichment - simple, direct, works
|
|
LOG="/tmp/enrich-v3.log"
|
|
SQL="/tmp/enrichment-v3.sql"
|
|
|
|
echo "$(date): V3 start" > "$LOG"
|
|
|
|
# Direct psql (no eval)
|
|
export PGPASSWORD="***REDACTED***"
|
|
|
|
psql -h localhost -p 5433 -U tip -d transceiver_db -t -A -F'|' -c \
|
|
"SELECT t.id, t.product_page_url, t.part_number, t.standard_name FROM transceivers t JOIN vendors v ON t.vendor_id = v.id WHERE v.name = 'FLEXOPTIX' AND t.product_page_url IS NOT NULL" \
|
|
> /tmp/fo-list.txt 2>> "$LOG"
|
|
|
|
TOTAL=$(wc -l < /tmp/fo-list.txt | tr -d ' ')
|
|
echo "Products: $TOTAL" >> "$LOG"
|
|
|
|
echo "-- V3 enrichment $(date '+%Y-%m-%d %H:%M')" > "$SQL"
|
|
echo "" >> "$SQL"
|
|
|
|
OK=0
|
|
|
|
while IFS='|' read -r ID URL PN SN; do
|
|
[ -z "$URL" ] && continue
|
|
OK_THIS=0
|
|
NAME="${SN:-$PN}"
|
|
|
|
HTML=$(curl -s -L --max-time 15 -H "User-Agent: Mozilla/5.0 TIP-Bot/1.0" "$URL" 2>/dev/null)
|
|
[ ${#HTML} -lt 500 ] && continue
|
|
|
|
SQLLINE=$(echo "$HTML" | python3 << 'PYEOF'
|
|
import re, sys
|
|
html = sys.stdin.read()
|
|
|
|
img = None
|
|
for m in re.finditer(r'https://[^"\s]+/cache/[^"\s]+_A_[^"\s]+\.jpg', html):
|
|
img = m.group(0); break
|
|
|
|
specs = {}
|
|
for m in re.finditer(r'<th[^>]*>(.*?)</th>\s*<td[^>]*>(.*?)</td>', html, re.S|re.I):
|
|
label = re.sub(r'<[^>]+>', '', m.group(1)).strip().upper()
|
|
value = re.sub(r'<[^>]+>', '', m.group(2)).strip()
|
|
if label and value and value.lower() not in ('n/a', '-', ''):
|
|
specs[label] = value
|
|
|
|
if not specs and not img: sys.exit(0)
|
|
|
|
cols = {}
|
|
MAP = [
|
|
('POWER CONSUMPTION', 'power_consumption_w', lambda v: re.search(r'[\d.]+', v).group() if re.search(r'[\d.]+', v) else None),
|
|
('CONNECTOR / POLISH', 'connector', None),
|
|
('CONNECTOR', 'connector', None),
|
|
('MODULATION', 'modulation', None),
|
|
('WAVELENGTH TX (TYPICAL)', 'wavelengths', None),
|
|
('WAVELENGTH', 'wavelengths', None),
|
|
('DISTANCE', 'reach_label', None),
|
|
('TEMPERATURE RANGE', 'temp_range', lambda v: 'COM' if any(x in v for x in ['ommercial','0°C to 70']) else ('IND' if any(x in v for x in ['ndustrial','-40']) else ('EXT' if 'xtended' in v else 'COM'))),
|
|
('OPERATING TEMPERATURE', 'temp_range', lambda v: 'COM' if any(x in v for x in ['ommercial','0°C to 70']) else ('IND' if any(x in v for x in ['ndustrial','-40']) else ('EXT' if 'xtended' in v else 'COM'))),
|
|
('LANE COUNT', 'lanes', lambda v: re.search(r'\d+', v).group() if re.search(r'\d+', v) else None),
|
|
('BANDWIDTH PER LANE', 'lane_rate', None),
|
|
('BANDWIDTH', 'lane_rate', None),
|
|
('INBUILT FEC', 'fec_type', lambda v: v if v.lower() not in ('no','none') else None),
|
|
('POWERBUDGET (DB)', 'optical_budget_db', lambda v: re.search(r'[\d.]+', v).group() if re.search(r'[\d.]+', v) else None),
|
|
('TRANSMIT MIN/MAX PER LANE', 'tx_power_min_dbm', lambda v: re.search(r'-?[\d.]+', v).group() if re.search(r'-?[\d.]+', v) else None),
|
|
('RECEIVER MIN/MAX PER LANE', 'rx_sensitivity_dbm', lambda v: re.search(r'-?[\d.]+', v).group() if re.search(r'-?[\d.]+', v) else None),
|
|
('INTERFACE', 'fiber_type', None),
|
|
('COMPLIANCE CODE', 'ieee_reference', None),
|
|
('DIGITAL DIAGNOSTIC MONITORING (DDM)', 'dom_support', lambda v: 'true' if 'yes' in v.lower() else 'false'),
|
|
]
|
|
|
|
mapped = set()
|
|
for label, col, fn in MAP:
|
|
if label in specs and col not in cols:
|
|
try:
|
|
val = fn(specs[label]) if fn else specs[label]
|
|
if val is not None:
|
|
cols[col] = val
|
|
mapped.add(label)
|
|
except: pass
|
|
|
|
extra = [f'{k}: {v}' for k,v in specs.items() if k not in mapped and len(v) < 200]
|
|
if extra: cols['notes'] = '; '.join(extra)[:1000]
|
|
if img: cols['image_url'] = img
|
|
if not cols: sys.exit(0)
|
|
|
|
def e(s): return str(s).replace("'","''")
|
|
parts = []
|
|
for c, v in cols.items():
|
|
if c == 'dom_support': parts.append(f'{c} = {v}')
|
|
else: parts.append(f"{c} = '{e(v)}'")
|
|
print(', '.join(parts))
|
|
PYEOF
|
|
)
|
|
|
|
if [ -n "$SQLLINE" ]; then
|
|
echo "-- $NAME" >> "$SQL"
|
|
echo "UPDATE transceivers SET $SQLLINE WHERE id = '$ID';" >> "$SQL"
|
|
echo "" >> "$SQL"
|
|
OK=$((OK + 1))
|
|
fi
|
|
|
|
sleep 0.3
|
|
done < /tmp/fo-list.txt
|
|
|
|
echo "-- Total: $OK enriched" >> "$SQL"
|
|
echo "" >> "$LOG"
|
|
echo "Generated: $OK/$TOTAL" >> "$LOG"
|
|
|
|
# Apply
|
|
echo "Applying SQL..." >> "$LOG"
|
|
psql -h localhost -p 5433 -U tip -d transceiver_db -f "$SQL" >> "$LOG" 2>&1
|
|
|
|
# Restart API
|
|
cd /opt/tip && pm2 restart tip-api >> "$LOG" 2>&1
|
|
|
|
# Counts
|
|
echo "=== COUNTS ===" >> "$LOG"
|
|
psql -h localhost -p 5433 -U tip -d transceiver_db -t -A -c "SELECT 'img=' || count(*) FROM transceivers WHERE image_url IS NOT NULL" >> "$LOG"
|
|
psql -h localhost -p 5433 -U tip -d transceiver_db -t -A -c "SELECT 'conn=' || count(*) FROM transceivers WHERE connector IS NOT NULL" >> "$LOG"
|
|
psql -h localhost -p 5433 -U tip -d transceiver_db -t -A -c "SELECT 'notes=' || count(*) FROM transceivers WHERE notes IS NOT NULL AND notes != ''" >> "$LOG"
|
|
psql -h localhost -p 5433 -U tip -d transceiver_db -t -A -c "SELECT 'mod=' || count(*) FROM transceivers WHERE modulation IS NOT NULL" >> "$LOG"
|
|
psql -h localhost -p 5433 -U tip -d transceiver_db -t -A -c "SELECT 'power=' || count(*) FROM transceivers WHERE power_consumption_w IS NOT NULL" >> "$LOG"
|
|
psql -h localhost -p 5433 -U tip -d transceiver_db -t -A -c "SELECT 'lane_rate=' || count(*) FROM transceivers WHERE lane_rate IS NOT NULL" >> "$LOG"
|
|
|
|
echo "$(date): V3 DONE" >> "$LOG"
|