171 lines
5.4 KiB
Bash
171 lines
5.4 KiB
Bash
#!/bin/bash
|
|
# Self-contained Flexoptix enrichment script to run ON Erik
|
|
# Does: DB query → scrape flexoptix.net → generate SQL → apply to DB
|
|
|
|
DB_PASS="tip_prod_2026"
|
|
DB_USER="tip"
|
|
DB_NAME="transceiver_db"
|
|
DB_PORT="5433"
|
|
OUTPUT="/tmp/011-flexoptix-enrichment.sql"
|
|
LOG="/tmp/enrich-flexoptix.log"
|
|
|
|
echo "$(date): Starting enrichment" > "$LOG"
|
|
|
|
# Step 1: Get Flexoptix product list from DB
|
|
echo "Step 1: Querying DB..." >> "$LOG"
|
|
PGPASSWORD=$DB_PASS psql -h localhost -p $DB_PORT -U $DB_USER -d $DB_NAME -t -A -F'|' -c \
|
|
"SELECT t.id, t.product_page_url, t.part_number, t.standard_name FROM transceivers t JOIN vendors v ON t.vendor_id = v.id WHERE v.name = 'FLEXOPTIX' AND t.product_page_url IS NOT NULL ORDER BY t.part_number" \
|
|
> /tmp/flexoptix-products.txt 2>> "$LOG"
|
|
|
|
TOTAL=$(wc -l < /tmp/flexoptix-products.txt | tr -d ' ')
|
|
echo " Found $TOTAL products" >> "$LOG"
|
|
|
|
if [ "$TOTAL" -lt 1 ]; then
|
|
echo "ERROR: No products found" >> "$LOG"
|
|
exit 1
|
|
fi
|
|
|
|
# Step 2: Start SQL file
|
|
cat > "$OUTPUT" << SQLEOF
|
|
-- 011: Flexoptix product enrichment
|
|
-- Generated: $(date '+%Y-%m-%d %H:%M')
|
|
-- Products: $TOTAL
|
|
BEGIN;
|
|
|
|
SQLEOF
|
|
|
|
COUNT=0
|
|
IMAGES=0
|
|
ENRICHED=0
|
|
|
|
# Step 3: Scrape each product page
|
|
while IFS='|' read -r ID URL PARTNUM STDNAME; do
|
|
[ -z "$URL" ] && continue
|
|
COUNT=$((COUNT + 1))
|
|
NAME="${STDNAME:-$PARTNUM}"
|
|
echo "[$COUNT/$TOTAL] $NAME" >> "$LOG"
|
|
|
|
# Fetch page
|
|
HTML=$(curl -s -L --max-time 15 -H "User-Agent: Mozilla/5.0 TIP-Bot/1.0" "$URL" 2>/dev/null)
|
|
if [ ${#HTML} -lt 500 ]; then
|
|
echo " SKIP (empty/small)" >> "$LOG"
|
|
continue
|
|
fi
|
|
|
|
SETS=""
|
|
|
|
# Extract image URL
|
|
IMG=$(echo "$HTML" | grep -oE 'https://[^"]+/cache/[^"]+_A_[^"]+\.jpg' | head -1)
|
|
if [ -n "$IMG" ]; then
|
|
IMG_ESC=$(echo "$IMG" | sed "s/'/''/g")
|
|
SETS="image_url = '$IMG_ESC'"
|
|
IMAGES=$((IMAGES + 1))
|
|
fi
|
|
|
|
# Extract specs using python3 if available
|
|
if command -v python3 > /dev/null 2>&1; then
|
|
SPEC_DATA=$(echo "$HTML" | python3 -c "
|
|
import sys, re
|
|
html = sys.stdin.read()
|
|
for m in re.finditer(r'<th[^>]*>(.*?)</th>\s*<td[^>]*>(.*?)</td>', html, re.S|re.I):
|
|
label = re.sub(r'<[^>]+>', '', m.group(1)).strip().upper()
|
|
value = re.sub(r'<[^>]+>', '', m.group(2)).strip()
|
|
if label and value and value.lower() not in ('n/a', '-', ''):
|
|
# Use tab separator to avoid issues with = in values
|
|
print(label + '\t' + value)
|
|
" 2>/dev/null)
|
|
else
|
|
SPEC_DATA=""
|
|
fi
|
|
|
|
NOTES=""
|
|
while IFS=$'\t' read -r KEY VAL; do
|
|
[ -z "$KEY" ] && continue
|
|
VAL_ESC=$(echo "$VAL" | sed "s/'/''/g")
|
|
case "$KEY" in
|
|
"POWER CONSUMPTION")
|
|
W=$(echo "$VAL" | grep -oE '[0-9]+\.?[0-9]*' | head -1)
|
|
[ -n "$W" ] && SETS="${SETS:+$SETS, }power_consumption_w = '$W'"
|
|
;;
|
|
"CONNECTOR / POLISH"|"CONNECTOR")
|
|
SETS="${SETS:+$SETS, }connector = '$VAL_ESC'"
|
|
;;
|
|
"MODULATION")
|
|
SETS="${SETS:+$SETS, }modulation = '$VAL_ESC'"
|
|
;;
|
|
"WAVELENGTH TX (TYPICAL)"|"WAVELENGTH")
|
|
SETS="${SETS:+$SETS, }wavelengths = '$VAL_ESC'"
|
|
;;
|
|
"DISTANCE")
|
|
SETS="${SETS:+$SETS, }reach_label = '$VAL_ESC'"
|
|
;;
|
|
"TEMPERATURE RANGE"|"OPERATING TEMPERATURE")
|
|
SETS="${SETS:+$SETS, }temp_range = '$VAL_ESC'"
|
|
;;
|
|
"LANE COUNT")
|
|
LC=$(echo "$VAL" | grep -oE '[0-9]+' | head -1)
|
|
[ -n "$LC" ] && SETS="${SETS:+$SETS, }lanes = '$LC'"
|
|
;;
|
|
"BANDWIDTH PER LANE"|"BANDWIDTH")
|
|
SETS="${SETS:+$SETS, }lane_rate = '$VAL_ESC'"
|
|
;;
|
|
"INBUILT FEC")
|
|
echo "$VAL" | grep -qiE '^(no|none)$' || SETS="${SETS:+$SETS, }fec_type = '$VAL_ESC'"
|
|
;;
|
|
"POWERBUDGET (DB)")
|
|
PB=$(echo "$VAL" | grep -oE '[0-9]+\.?[0-9]*' | head -1)
|
|
[ -n "$PB" ] && SETS="${SETS:+$SETS, }optical_budget_db = '$PB'"
|
|
;;
|
|
"TRANSMIT MIN/MAX PER LANE")
|
|
TX=$(echo "$VAL" | grep -oE '\-?[0-9]+\.?[0-9]*' | head -1)
|
|
[ -n "$TX" ] && SETS="${SETS:+$SETS, }tx_power_min_dbm = '$TX'"
|
|
;;
|
|
"RECEIVER MIN/MAX PER LANE")
|
|
RX=$(echo "$VAL" | grep -oE '\-?[0-9]+\.?[0-9]*' | head -1)
|
|
[ -n "$RX" ] && SETS="${SETS:+$SETS, }rx_sensitivity_dbm = '$RX'"
|
|
;;
|
|
"INTERFACE")
|
|
SETS="${SETS:+$SETS, }fiber_type = '$VAL_ESC'"
|
|
;;
|
|
"COMPLIANCE CODE")
|
|
SETS="${SETS:+$SETS, }ieee_reference = '$VAL_ESC'"
|
|
;;
|
|
"DIGITAL DIAGNOSTIC MONITORING (DDM)")
|
|
echo "$VAL" | grep -qi 'yes' && SETS="${SETS:+$SETS, }dom_support = true" || SETS="${SETS:+$SETS, }dom_support = false"
|
|
;;
|
|
*)
|
|
[ ${#VAL} -lt 200 ] && NOTES="${NOTES:+$NOTES; }$KEY: $VAL"
|
|
;;
|
|
esac
|
|
done <<< "$SPEC_DATA"
|
|
|
|
# Add notes
|
|
if [ -n "$NOTES" ]; then
|
|
NOTES_CUT="${NOTES:0:1000}"
|
|
NOTES_ESC=$(echo "$NOTES_CUT" | sed "s/'/''/g")
|
|
SETS="${SETS:+$SETS, }notes = '$NOTES_ESC'"
|
|
fi
|
|
|
|
if [ -n "$SETS" ]; then
|
|
echo "-- $NAME" >> "$OUTPUT"
|
|
echo "UPDATE transceivers SET $SETS WHERE id = '$ID';" >> "$OUTPUT"
|
|
echo "" >> "$OUTPUT"
|
|
ENRICHED=$((ENRICHED + 1))
|
|
echo " -> OK ($ENRICHED enriched, $IMAGES imgs)" >> "$LOG"
|
|
fi
|
|
|
|
sleep 0.3
|
|
done < /tmp/flexoptix-products.txt
|
|
|
|
echo "COMMIT;" >> "$OUTPUT"
|
|
echo "-- Summary: $ENRICHED enriched, $IMAGES images" >> "$OUTPUT"
|
|
|
|
echo "" >> "$LOG"
|
|
echo "Step 3 done: $ENRICHED/$TOTAL enriched, $IMAGES images" >> "$LOG"
|
|
|
|
# Step 4: Apply SQL
|
|
echo "Step 4: Applying SQL..." >> "$LOG"
|
|
PGPASSWORD=$DB_PASS psql -h localhost -p $DB_PORT -U $DB_USER -d $DB_NAME -f "$OUTPUT" >> "$LOG" 2>&1
|
|
|
|
echo "$(date): ALL DONE" >> "$LOG"
|