transceiver-db/scripts/enrich-on-erik.sh

171 lines
5.4 KiB
Bash

#!/bin/bash
# Self-contained Flexoptix enrichment script to run ON Erik
# Does: DB query → scrape flexoptix.net → generate SQL → apply to DB
DB_PASS="tip_prod_2026"
DB_USER="tip"
DB_NAME="transceiver_db"
DB_PORT="5433"
OUTPUT="/tmp/011-flexoptix-enrichment.sql"
LOG="/tmp/enrich-flexoptix.log"
echo "$(date): Starting enrichment" > "$LOG"
# Step 1: Get Flexoptix product list from DB
echo "Step 1: Querying DB..." >> "$LOG"
PGPASSWORD=$DB_PASS psql -h localhost -p $DB_PORT -U $DB_USER -d $DB_NAME -t -A -F'|' -c \
"SELECT t.id, t.product_page_url, t.part_number, t.standard_name FROM transceivers t JOIN vendors v ON t.vendor_id = v.id WHERE v.name = 'FLEXOPTIX' AND t.product_page_url IS NOT NULL ORDER BY t.part_number" \
> /tmp/flexoptix-products.txt 2>> "$LOG"
TOTAL=$(wc -l < /tmp/flexoptix-products.txt | tr -d ' ')
echo " Found $TOTAL products" >> "$LOG"
if [ "$TOTAL" -lt 1 ]; then
echo "ERROR: No products found" >> "$LOG"
exit 1
fi
# Step 2: Start SQL file
cat > "$OUTPUT" << SQLEOF
-- 011: Flexoptix product enrichment
-- Generated: $(date '+%Y-%m-%d %H:%M')
-- Products: $TOTAL
BEGIN;
SQLEOF
COUNT=0
IMAGES=0
ENRICHED=0
# Step 3: Scrape each product page
while IFS='|' read -r ID URL PARTNUM STDNAME; do
[ -z "$URL" ] && continue
COUNT=$((COUNT + 1))
NAME="${STDNAME:-$PARTNUM}"
echo "[$COUNT/$TOTAL] $NAME" >> "$LOG"
# Fetch page
HTML=$(curl -s -L --max-time 15 -H "User-Agent: Mozilla/5.0 TIP-Bot/1.0" "$URL" 2>/dev/null)
if [ ${#HTML} -lt 500 ]; then
echo " SKIP (empty/small)" >> "$LOG"
continue
fi
SETS=""
# Extract image URL
IMG=$(echo "$HTML" | grep -oE 'https://[^"]+/cache/[^"]+_A_[^"]+\.jpg' | head -1)
if [ -n "$IMG" ]; then
IMG_ESC=$(echo "$IMG" | sed "s/'/''/g")
SETS="image_url = '$IMG_ESC'"
IMAGES=$((IMAGES + 1))
fi
# Extract specs using python3 if available
if command -v python3 > /dev/null 2>&1; then
SPEC_DATA=$(echo "$HTML" | python3 -c "
import sys, re
html = sys.stdin.read()
for m in re.finditer(r'<th[^>]*>(.*?)</th>\s*<td[^>]*>(.*?)</td>', html, re.S|re.I):
label = re.sub(r'<[^>]+>', '', m.group(1)).strip().upper()
value = re.sub(r'<[^>]+>', '', m.group(2)).strip()
if label and value and value.lower() not in ('n/a', '-', ''):
# Use tab separator to avoid issues with = in values
print(label + '\t' + value)
" 2>/dev/null)
else
SPEC_DATA=""
fi
NOTES=""
while IFS=$'\t' read -r KEY VAL; do
[ -z "$KEY" ] && continue
VAL_ESC=$(echo "$VAL" | sed "s/'/''/g")
case "$KEY" in
"POWER CONSUMPTION")
W=$(echo "$VAL" | grep -oE '[0-9]+\.?[0-9]*' | head -1)
[ -n "$W" ] && SETS="${SETS:+$SETS, }power_consumption_w = '$W'"
;;
"CONNECTOR / POLISH"|"CONNECTOR")
SETS="${SETS:+$SETS, }connector = '$VAL_ESC'"
;;
"MODULATION")
SETS="${SETS:+$SETS, }modulation = '$VAL_ESC'"
;;
"WAVELENGTH TX (TYPICAL)"|"WAVELENGTH")
SETS="${SETS:+$SETS, }wavelengths = '$VAL_ESC'"
;;
"DISTANCE")
SETS="${SETS:+$SETS, }reach_label = '$VAL_ESC'"
;;
"TEMPERATURE RANGE"|"OPERATING TEMPERATURE")
SETS="${SETS:+$SETS, }temp_range = '$VAL_ESC'"
;;
"LANE COUNT")
LC=$(echo "$VAL" | grep -oE '[0-9]+' | head -1)
[ -n "$LC" ] && SETS="${SETS:+$SETS, }lanes = '$LC'"
;;
"BANDWIDTH PER LANE"|"BANDWIDTH")
SETS="${SETS:+$SETS, }lane_rate = '$VAL_ESC'"
;;
"INBUILT FEC")
echo "$VAL" | grep -qiE '^(no|none)$' || SETS="${SETS:+$SETS, }fec_type = '$VAL_ESC'"
;;
"POWERBUDGET (DB)")
PB=$(echo "$VAL" | grep -oE '[0-9]+\.?[0-9]*' | head -1)
[ -n "$PB" ] && SETS="${SETS:+$SETS, }optical_budget_db = '$PB'"
;;
"TRANSMIT MIN/MAX PER LANE")
TX=$(echo "$VAL" | grep -oE '\-?[0-9]+\.?[0-9]*' | head -1)
[ -n "$TX" ] && SETS="${SETS:+$SETS, }tx_power_min_dbm = '$TX'"
;;
"RECEIVER MIN/MAX PER LANE")
RX=$(echo "$VAL" | grep -oE '\-?[0-9]+\.?[0-9]*' | head -1)
[ -n "$RX" ] && SETS="${SETS:+$SETS, }rx_sensitivity_dbm = '$RX'"
;;
"INTERFACE")
SETS="${SETS:+$SETS, }fiber_type = '$VAL_ESC'"
;;
"COMPLIANCE CODE")
SETS="${SETS:+$SETS, }ieee_reference = '$VAL_ESC'"
;;
"DIGITAL DIAGNOSTIC MONITORING (DDM)")
echo "$VAL" | grep -qi 'yes' && SETS="${SETS:+$SETS, }dom_support = true" || SETS="${SETS:+$SETS, }dom_support = false"
;;
*)
[ ${#VAL} -lt 200 ] && NOTES="${NOTES:+$NOTES; }$KEY: $VAL"
;;
esac
done <<< "$SPEC_DATA"
# Add notes
if [ -n "$NOTES" ]; then
NOTES_CUT="${NOTES:0:1000}"
NOTES_ESC=$(echo "$NOTES_CUT" | sed "s/'/''/g")
SETS="${SETS:+$SETS, }notes = '$NOTES_ESC'"
fi
if [ -n "$SETS" ]; then
echo "-- $NAME" >> "$OUTPUT"
echo "UPDATE transceivers SET $SETS WHERE id = '$ID';" >> "$OUTPUT"
echo "" >> "$OUTPUT"
ENRICHED=$((ENRICHED + 1))
echo " -> OK ($ENRICHED enriched, $IMAGES imgs)" >> "$LOG"
fi
sleep 0.3
done < /tmp/flexoptix-products.txt
echo "COMMIT;" >> "$OUTPUT"
echo "-- Summary: $ENRICHED enriched, $IMAGES images" >> "$OUTPUT"
echo "" >> "$LOG"
echo "Step 3 done: $ENRICHED/$TOTAL enriched, $IMAGES images" >> "$LOG"
# Step 4: Apply SQL
echo "Step 4: Applying SQL..." >> "$LOG"
PGPASSWORD=$DB_PASS psql -h localhost -p $DB_PORT -U $DB_USER -d $DB_NAME -f "$OUTPUT" >> "$LOG" 2>&1
echo "$(date): ALL DONE" >> "$LOG"