#!/bin/bash # Self-contained Flexoptix enrichment script to run ON Erik # Does: DB query → scrape flexoptix.net → generate SQL → apply to DB DB_PASS="***REDACTED***" DB_USER="tip" DB_NAME="transceiver_db" DB_PORT="5433" OUTPUT="/tmp/011-flexoptix-enrichment.sql" LOG="/tmp/enrich-flexoptix.log" echo "$(date): Starting enrichment" > "$LOG" # Step 1: Get Flexoptix product list from DB echo "Step 1: Querying DB..." >> "$LOG" PGPASSWORD=$DB_PASS psql -h localhost -p $DB_PORT -U $DB_USER -d $DB_NAME -t -A -F'|' -c \ "SELECT t.id, t.product_page_url, t.part_number, t.standard_name FROM transceivers t JOIN vendors v ON t.vendor_id = v.id WHERE v.name = 'FLEXOPTIX' AND t.product_page_url IS NOT NULL ORDER BY t.part_number" \ > /tmp/flexoptix-products.txt 2>> "$LOG" TOTAL=$(wc -l < /tmp/flexoptix-products.txt | tr -d ' ') echo " Found $TOTAL products" >> "$LOG" if [ "$TOTAL" -lt 1 ]; then echo "ERROR: No products found" >> "$LOG" exit 1 fi # Step 2: Start SQL file cat > "$OUTPUT" << SQLEOF -- 011: Flexoptix product enrichment -- Generated: $(date '+%Y-%m-%d %H:%M') -- Products: $TOTAL BEGIN; SQLEOF COUNT=0 IMAGES=0 ENRICHED=0 # Step 3: Scrape each product page while IFS='|' read -r ID URL PARTNUM STDNAME; do [ -z "$URL" ] && continue COUNT=$((COUNT + 1)) NAME="${STDNAME:-$PARTNUM}" echo "[$COUNT/$TOTAL] $NAME" >> "$LOG" # Fetch page HTML=$(curl -s -L --max-time 15 -H "User-Agent: Mozilla/5.0 TIP-Bot/1.0" "$URL" 2>/dev/null) if [ ${#HTML} -lt 500 ]; then echo " SKIP (empty/small)" >> "$LOG" continue fi SETS="" # Extract image URL IMG=$(echo "$HTML" | grep -oE 'https://[^"]+/cache/[^"]+_A_[^"]+\.jpg' | head -1) if [ -n "$IMG" ]; then IMG_ESC=$(echo "$IMG" | sed "s/'/''/g") SETS="image_url = '$IMG_ESC'" IMAGES=$((IMAGES + 1)) fi # Extract specs using python3 if available if command -v python3 > /dev/null 2>&1; then SPEC_DATA=$(echo "$HTML" | python3 -c " import sys, re html = sys.stdin.read() for m in re.finditer(r']*>(.*?)\s*]*>(.*?)', html, re.S|re.I): label = re.sub(r'<[^>]+>', '', m.group(1)).strip().upper() value = re.sub(r'<[^>]+>', '', m.group(2)).strip() if label and value and value.lower() not in ('n/a', '-', ''): # Use tab separator to avoid issues with = in values print(label + '\t' + value) " 2>/dev/null) else SPEC_DATA="" fi NOTES="" while IFS=$'\t' read -r KEY VAL; do [ -z "$KEY" ] && continue VAL_ESC=$(echo "$VAL" | sed "s/'/''/g") case "$KEY" in "POWER CONSUMPTION") W=$(echo "$VAL" | grep -oE '[0-9]+\.?[0-9]*' | head -1) [ -n "$W" ] && SETS="${SETS:+$SETS, }power_consumption_w = '$W'" ;; "CONNECTOR / POLISH"|"CONNECTOR") SETS="${SETS:+$SETS, }connector = '$VAL_ESC'" ;; "MODULATION") SETS="${SETS:+$SETS, }modulation = '$VAL_ESC'" ;; "WAVELENGTH TX (TYPICAL)"|"WAVELENGTH") SETS="${SETS:+$SETS, }wavelengths = '$VAL_ESC'" ;; "DISTANCE") SETS="${SETS:+$SETS, }reach_label = '$VAL_ESC'" ;; "TEMPERATURE RANGE"|"OPERATING TEMPERATURE") SETS="${SETS:+$SETS, }temp_range = '$VAL_ESC'" ;; "LANE COUNT") LC=$(echo "$VAL" | grep -oE '[0-9]+' | head -1) [ -n "$LC" ] && SETS="${SETS:+$SETS, }lanes = '$LC'" ;; "BANDWIDTH PER LANE"|"BANDWIDTH") SETS="${SETS:+$SETS, }lane_rate = '$VAL_ESC'" ;; "INBUILT FEC") echo "$VAL" | grep -qiE '^(no|none)$' || SETS="${SETS:+$SETS, }fec_type = '$VAL_ESC'" ;; "POWERBUDGET (DB)") PB=$(echo "$VAL" | grep -oE '[0-9]+\.?[0-9]*' | head -1) [ -n "$PB" ] && SETS="${SETS:+$SETS, }optical_budget_db = '$PB'" ;; "TRANSMIT MIN/MAX PER LANE") TX=$(echo "$VAL" | grep -oE '\-?[0-9]+\.?[0-9]*' | head -1) [ -n "$TX" ] && SETS="${SETS:+$SETS, }tx_power_min_dbm = '$TX'" ;; "RECEIVER MIN/MAX PER LANE") RX=$(echo "$VAL" | grep -oE '\-?[0-9]+\.?[0-9]*' | head -1) [ -n "$RX" ] && SETS="${SETS:+$SETS, }rx_sensitivity_dbm = '$RX'" ;; "INTERFACE") SETS="${SETS:+$SETS, }fiber_type = '$VAL_ESC'" ;; "COMPLIANCE CODE") SETS="${SETS:+$SETS, }ieee_reference = '$VAL_ESC'" ;; "DIGITAL DIAGNOSTIC MONITORING (DDM)") echo "$VAL" | grep -qi 'yes' && SETS="${SETS:+$SETS, }dom_support = true" || SETS="${SETS:+$SETS, }dom_support = false" ;; *) [ ${#VAL} -lt 200 ] && NOTES="${NOTES:+$NOTES; }$KEY: $VAL" ;; esac done <<< "$SPEC_DATA" # Add notes if [ -n "$NOTES" ]; then NOTES_CUT="${NOTES:0:1000}" NOTES_ESC=$(echo "$NOTES_CUT" | sed "s/'/''/g") SETS="${SETS:+$SETS, }notes = '$NOTES_ESC'" fi if [ -n "$SETS" ]; then echo "-- $NAME" >> "$OUTPUT" echo "UPDATE transceivers SET $SETS WHERE id = '$ID';" >> "$OUTPUT" echo "" >> "$OUTPUT" ENRICHED=$((ENRICHED + 1)) echo " -> OK ($ENRICHED enriched, $IMAGES imgs)" >> "$LOG" fi sleep 0.3 done < /tmp/flexoptix-products.txt echo "COMMIT;" >> "$OUTPUT" echo "-- Summary: $ENRICHED enriched, $IMAGES images" >> "$OUTPUT" echo "" >> "$LOG" echo "Step 3 done: $ENRICHED/$TOTAL enriched, $IMAGES images" >> "$LOG" # Step 4: Apply SQL echo "Step 4: Applying SQL..." >> "$LOG" PGPASSWORD=$DB_PASS psql -h localhost -p $DB_PORT -U $DB_USER -d $DB_NAME -f "$OUTPUT" >> "$LOG" 2>&1 echo "$(date): ALL DONE" >> "$LOG"