Compare commits
3 Commits
b612c5801d
...
71d6c20b5e
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
71d6c20b5e | ||
|
|
93dd7c2e23 | ||
|
|
edc9311d7b |
@ -18,6 +18,10 @@ Types: FEAT · FIX · UI · DATA · AI · INFRA
|
|||||||
{"d":"2026-04-02","t":"FEAT","m":"Forecast Engine: weighted demand index (0-100) from 6 signal types — capex 0.30, ai_clusters 0.25, ebay_velocity 0.20, lead_times 0.15, github 0.06, standards 0.04 — 3/9/12/18 month horizons for 5 technologies"}
|
{"d":"2026-04-02","t":"FEAT","m":"Forecast Engine: weighted demand index (0-100) from 6 signal types — capex 0.30, ai_clusters 0.25, ebay_velocity 0.20, lead_times 0.15, github 0.06, standards 0.04 — 3/9/12/18 month horizons for 5 technologies"}
|
||||||
{"d":"2026-04-02","t":"FEAT","m":"NAS sync: datasheet/manual download — PDFs from product_documents organized into switches/transceivers/whitepapers/other"}
|
{"d":"2026-04-02","t":"FEAT","m":"NAS sync: datasheet/manual download — PDFs from product_documents organized into switches/transceivers/whitepapers/other"}
|
||||||
{"d":"2026-04-02","t":"INFRA","m":"Scheduler: 50 total pg-boss jobs — 8 new prediction/forecast jobs with cron schedules"}
|
{"d":"2026-04-02","t":"INFRA","m":"Scheduler: 50 total pg-boss jobs — 8 new prediction/forecast jobs with cron schedules"}
|
||||||
|
{"d":"2026-04-03","t":"FEAT","m":"TIP Proxy Network: residential proxy pool — contributor nodes donate bandwidth, SOCKS5 server (Node.js net only), register/heartbeat/next/rotate/stats API, round-robin routing with uptime+latency scoring"}
|
||||||
|
{"d":"2026-04-03","t":"FEAT","m":"@tip/proxy-agent: standalone CLI package — tip-agent start/status/stop, configurable bandwidth cap, 30s heartbeat, graceful shutdown"}
|
||||||
|
{"d":"2026-04-03","t":"UI","m":"Dashboard Network tab: node stats, join-the-network card with token generator, install command box, country breakdown table"}
|
||||||
|
{"d":"2026-04-03","t":"INFRA","m":"Mac Studio home node: tip-agent running on 192.168.178.213:1081, PROXY_URL=socks5://192.168.178.213:1081 set in PM2 env for scraper+api, ProLabs WAF bypass now active"}
|
||||||
{"d":"2026-04-01","t":"FEAT","m":"Product Intelligence Layer (migration 020): product_issues table (forum/community bugs), condition+marketplace on price_observations, features JSONB on switches+transceivers"}
|
{"d":"2026-04-01","t":"FEAT","m":"Product Intelligence Layer (migration 020): product_issues table (forum/community bugs), condition+marketplace on price_observations, features JSONB on switches+transceivers"}
|
||||||
{"d":"2026-04-01","t":"FEAT","m":"eBay Enricher: scrapes eBay.de for switch/transceiver listings — extracts features, description, refurbished prices, images — nightly via pg-boss"}
|
{"d":"2026-04-01","t":"FEAT","m":"eBay Enricher: scrapes eBay.de for switch/transceiver listings — extracts features, description, refurbished prices, images — nightly via pg-boss"}
|
||||||
{"d":"2026-04-01","t":"FEAT","m":"Community Issues Scraper: extracts known bugs/incompatibilities from Reddit, ServeTheHome, Arista Community, Cisco Community, NetworkEngineering SE"}
|
{"d":"2026-04-01","t":"FEAT","m":"Community Issues Scraper: extracts known bugs/incompatibilities from Reddit, ServeTheHome, Arista Community, Cisco Community, NetworkEngineering SE"}
|
||||||
|
|||||||
306
packages/api/src/routes/proxy.ts
Normal file
306
packages/api/src/routes/proxy.ts
Normal file
@ -0,0 +1,306 @@
|
|||||||
|
/**
|
||||||
|
* Proxy Network API routes
|
||||||
|
*
|
||||||
|
* POST /api/proxy/register — register new node, returns token
|
||||||
|
* POST /api/proxy/heartbeat — update node status + stats
|
||||||
|
* GET /api/proxy/nodes — list all nodes (admin)
|
||||||
|
* GET /api/proxy/next — get best available proxy for scraper use
|
||||||
|
* POST /api/proxy/rotate — mark node as used, get next
|
||||||
|
* GET /api/proxy/stats — network-wide stats
|
||||||
|
*
|
||||||
|
* Note: register + heartbeat are public (no auth required).
|
||||||
|
* nodes + rotate require admin token via X-Admin-Token header.
|
||||||
|
*/
|
||||||
|
import { Router, Request, Response } from "express";
|
||||||
|
import * as crypto from "crypto";
|
||||||
|
import { pool } from "../db/client";
|
||||||
|
|
||||||
|
export const proxyRouter = Router();
|
||||||
|
|
||||||
|
const ADMIN_TOKEN = process.env.TIP_ADMIN_TOKEN ?? "tip-admin-2026";
|
||||||
|
|
||||||
|
function requireAdmin(req: Request, res: Response): boolean {
|
||||||
|
const token =
|
||||||
|
req.headers["x-admin-token"] ??
|
||||||
|
req.query["admin_token"] ??
|
||||||
|
"";
|
||||||
|
if (token !== ADMIN_TOKEN) {
|
||||||
|
res.status(401).json({ error: "Admin token required" });
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
function generateToken(): string {
|
||||||
|
return crypto.randomBytes(32).toString("hex");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Round-robin index tracker (in-memory, resets on restart)
|
||||||
|
let rrIndex = 0;
|
||||||
|
|
||||||
|
// POST /api/proxy/register
|
||||||
|
proxyRouter.post("/register", async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { token: existingToken, name, owner_email, port, version } = req.body as {
|
||||||
|
token?: string;
|
||||||
|
name?: string;
|
||||||
|
owner_email?: string;
|
||||||
|
port?: number;
|
||||||
|
version?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
const ip =
|
||||||
|
(req.headers["x-forwarded-for"] as string | undefined)?.split(",")[0]?.trim() ??
|
||||||
|
req.socket.remoteAddress ??
|
||||||
|
"";
|
||||||
|
|
||||||
|
// If token provided, upsert — otherwise create new
|
||||||
|
const token = existingToken ?? generateToken();
|
||||||
|
|
||||||
|
const result = await pool.query(
|
||||||
|
`INSERT INTO proxy_nodes (token, name, owner_email, ip, port, version, status, last_seen)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6, 'online', NOW())
|
||||||
|
ON CONFLICT (token) DO UPDATE SET
|
||||||
|
name = COALESCE(EXCLUDED.name, proxy_nodes.name),
|
||||||
|
ip = EXCLUDED.ip,
|
||||||
|
port = COALESCE(EXCLUDED.port, proxy_nodes.port),
|
||||||
|
version = COALESCE(EXCLUDED.version, proxy_nodes.version),
|
||||||
|
status = 'online',
|
||||||
|
last_seen = NOW()
|
||||||
|
RETURNING id, token, name, status`,
|
||||||
|
[token, name ?? null, owner_email ?? null, ip, port ?? 1080, version ?? null]
|
||||||
|
);
|
||||||
|
|
||||||
|
const node = result.rows[0];
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
token: node.token,
|
||||||
|
nodeId: node.id,
|
||||||
|
message: "Node registered. Keep this token safe!",
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
console.error("[proxy] register error:", err);
|
||||||
|
res.status(500).json({ error: "Registration failed" });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// POST /api/proxy/heartbeat
|
||||||
|
proxyRouter.post("/heartbeat", async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const {
|
||||||
|
token,
|
||||||
|
ip,
|
||||||
|
port,
|
||||||
|
bytesProxied,
|
||||||
|
requestsProxied,
|
||||||
|
latencyMs,
|
||||||
|
version,
|
||||||
|
} = req.body as {
|
||||||
|
token: string;
|
||||||
|
ip?: string;
|
||||||
|
port?: number;
|
||||||
|
bytesProxied?: number;
|
||||||
|
requestsProxied?: number;
|
||||||
|
latencyMs?: number;
|
||||||
|
version?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!token) {
|
||||||
|
res.status(400).json({ error: "token required" });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const remoteIp =
|
||||||
|
ip ??
|
||||||
|
(req.headers["x-forwarded-for"] as string | undefined)?.split(",")[0]?.trim() ??
|
||||||
|
req.socket.remoteAddress ??
|
||||||
|
"";
|
||||||
|
|
||||||
|
const bytesGb = (bytesProxied ?? 0) / (1024 ** 3);
|
||||||
|
|
||||||
|
const result = await pool.query(
|
||||||
|
`UPDATE proxy_nodes SET
|
||||||
|
status = 'online',
|
||||||
|
last_seen = NOW(),
|
||||||
|
ip = COALESCE($2, ip),
|
||||||
|
port = COALESCE($3, port),
|
||||||
|
bytes_proxied = COALESCE($4, bytes_proxied),
|
||||||
|
requests_proxied = COALESCE($5, requests_proxied),
|
||||||
|
latency_ms = COALESCE($6, latency_ms),
|
||||||
|
version = COALESCE($7, version),
|
||||||
|
bandwidth_used_gb = COALESCE($8, bandwidth_used_gb)
|
||||||
|
WHERE token = $1
|
||||||
|
RETURNING id, status`,
|
||||||
|
[
|
||||||
|
token,
|
||||||
|
remoteIp || null,
|
||||||
|
port ?? null,
|
||||||
|
bytesProxied ?? null,
|
||||||
|
requestsProxied ?? null,
|
||||||
|
latencyMs ?? null,
|
||||||
|
version ?? null,
|
||||||
|
bytesGb > 0 ? bytesGb : null,
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.rowCount === 0) {
|
||||||
|
res.status(404).json({ error: "Node not found. Please register first." });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ success: true, status: "online" });
|
||||||
|
} catch (err) {
|
||||||
|
console.error("[proxy] heartbeat error:", err);
|
||||||
|
res.status(500).json({ error: "Heartbeat failed" });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// GET /api/proxy/nodes (admin)
|
||||||
|
proxyRouter.get("/nodes", async (req: Request, res: Response) => {
|
||||||
|
if (!requireAdmin(req, res)) return;
|
||||||
|
try {
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT id, token, name, country_code, city, ip, port, protocol,
|
||||||
|
status, last_seen, bytes_proxied, requests_proxied,
|
||||||
|
uptime_pct, bandwidth_limit_gb, bandwidth_used_gb,
|
||||||
|
latency_ms, version, registered_at
|
||||||
|
FROM proxy_nodes
|
||||||
|
ORDER BY status = 'online' DESC, last_seen DESC`
|
||||||
|
);
|
||||||
|
res.json({ success: true, nodes: result.rows, total: result.rowCount });
|
||||||
|
} catch (err) {
|
||||||
|
console.error("[proxy] nodes error:", err);
|
||||||
|
res.status(500).json({ error: "Failed to fetch nodes" });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// GET /api/proxy/next — best available proxy for scraper use
|
||||||
|
proxyRouter.get("/next", async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT id, ip, port, protocol, country_code, city, latency_ms
|
||||||
|
FROM proxy_nodes
|
||||||
|
WHERE status = 'online'
|
||||||
|
AND last_seen > NOW() - INTERVAL '2 minutes'
|
||||||
|
AND (bandwidth_used_gb < bandwidth_limit_gb OR bandwidth_limit_gb = 0)
|
||||||
|
ORDER BY uptime_pct DESC NULLS LAST, latency_ms ASC NULLS LAST
|
||||||
|
LIMIT 10`
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.rowCount === 0) {
|
||||||
|
res.status(503).json({
|
||||||
|
error: "No proxies available",
|
||||||
|
fallback: process.env.PROXY_URL ?? null,
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const nodes = result.rows;
|
||||||
|
const node = nodes[rrIndex % nodes.length];
|
||||||
|
rrIndex = (rrIndex + 1) % nodes.length;
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
host: node.ip,
|
||||||
|
port: node.port,
|
||||||
|
protocol: node.protocol ?? "socks5",
|
||||||
|
nodeId: node.id,
|
||||||
|
country: node.country_code,
|
||||||
|
city: node.city,
|
||||||
|
latencyMs: node.latency_ms,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
console.error("[proxy] next error:", err);
|
||||||
|
res.status(500).json({ error: "Failed to get next proxy" });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// POST /api/proxy/rotate — mark node as used, get next one
|
||||||
|
proxyRouter.post("/rotate", async (req: Request, res: Response) => {
|
||||||
|
if (!requireAdmin(req, res)) return;
|
||||||
|
try {
|
||||||
|
const { nodeId } = req.body as { nodeId?: string };
|
||||||
|
if (nodeId) {
|
||||||
|
// Optionally mark as temporarily degraded (just log — no ban)
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE proxy_nodes SET requests_proxied = requests_proxied + 1 WHERE id = $1`,
|
||||||
|
[nodeId]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// Return next proxy
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT id, ip, port, protocol, country_code
|
||||||
|
FROM proxy_nodes
|
||||||
|
WHERE status = 'online'
|
||||||
|
AND last_seen > NOW() - INTERVAL '2 minutes'
|
||||||
|
AND id != COALESCE($1::uuid, '00000000-0000-0000-0000-000000000000')
|
||||||
|
ORDER BY requests_proxied ASC, latency_ms ASC NULLS LAST
|
||||||
|
LIMIT 1`,
|
||||||
|
[nodeId ?? null]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.rowCount === 0) {
|
||||||
|
res.status(503).json({ error: "No other proxies available" });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const node = result.rows[0];
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
host: node.ip,
|
||||||
|
port: node.port,
|
||||||
|
protocol: node.protocol ?? "socks5",
|
||||||
|
nodeId: node.id,
|
||||||
|
country: node.country_code,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
console.error("[proxy] rotate error:", err);
|
||||||
|
res.status(500).json({ error: "Failed to rotate proxy" });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// GET /api/proxy/stats — public network stats
|
||||||
|
proxyRouter.get("/stats", async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) AS total_nodes,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'online') AS online_nodes,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'offline') AS offline_nodes,
|
||||||
|
COUNT(DISTINCT country_code) AS countries,
|
||||||
|
COALESCE(SUM(bandwidth_used_gb), 0) AS total_gb_proxied,
|
||||||
|
COALESCE(SUM(requests_proxied), 0) AS total_requests,
|
||||||
|
COALESCE(AVG(latency_ms) FILTER (WHERE status = 'online'), 0) AS avg_latency_ms
|
||||||
|
FROM proxy_nodes
|
||||||
|
`);
|
||||||
|
|
||||||
|
const row = result.rows[0] ?? {};
|
||||||
|
|
||||||
|
// Per-country breakdown
|
||||||
|
const countryResult = await pool.query(`
|
||||||
|
SELECT country_code, COUNT(*) AS nodes,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'online') AS online
|
||||||
|
FROM proxy_nodes
|
||||||
|
WHERE country_code IS NOT NULL
|
||||||
|
GROUP BY country_code
|
||||||
|
ORDER BY online DESC, nodes DESC
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
network: {
|
||||||
|
totalNodes: Number(row.total_nodes ?? 0),
|
||||||
|
onlineNodes: Number(row.online_nodes ?? 0),
|
||||||
|
offlineNodes: Number(row.offline_nodes ?? 0),
|
||||||
|
countries: Number(row.countries ?? 0),
|
||||||
|
totalGbProxied: parseFloat(Number(row.total_gb_proxied ?? 0).toFixed(2)),
|
||||||
|
totalRequests: Number(row.total_requests ?? 0),
|
||||||
|
avgLatencyMs: Math.round(Number(row.avg_latency_ms ?? 0)),
|
||||||
|
},
|
||||||
|
countries: countryResult.rows,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
console.error("[proxy] stats error:", err);
|
||||||
|
res.status(500).json({ error: "Failed to get stats" });
|
||||||
|
}
|
||||||
|
});
|
||||||
50
packages/proxy-agent/README.md
Normal file
50
packages/proxy-agent/README.md
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
# @tip/proxy-agent
|
||||||
|
|
||||||
|
Contribute bandwidth to the TIP (Transceiver Intelligence Platform) proxy network
|
||||||
|
and get free API access in return.
|
||||||
|
|
||||||
|
## How it works
|
||||||
|
|
||||||
|
- You run the agent on a residential or home server machine
|
||||||
|
- The agent starts a SOCKS5 proxy server on your machine
|
||||||
|
- TIP scrapers route through your IP to bypass datacenter IP blocks (e.g. CloudFront WAF)
|
||||||
|
- You get: free TIP API access, contributor badge, early access to new features
|
||||||
|
|
||||||
|
## Install & Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Generate a token first on https://transceiver-db.context-x.org/dashboard (Network tab)
|
||||||
|
# Then start the agent:
|
||||||
|
npx @tip/proxy-agent start --token YOUR_TOKEN
|
||||||
|
```
|
||||||
|
|
||||||
|
## Options
|
||||||
|
|
||||||
|
```
|
||||||
|
tip-agent start --token TOKEN [--port 1080] [--max-gb 10] [--name "My Node"]
|
||||||
|
tip-agent status
|
||||||
|
tip-agent stop
|
||||||
|
```
|
||||||
|
|
||||||
|
| Option | Default | Description |
|
||||||
|
|------------|---------------------------------------------|--------------------------|
|
||||||
|
| `--token` | (required) | Your TIP node token |
|
||||||
|
| `--port` | 1080 | SOCKS5 listen port |
|
||||||
|
| `--max-gb` | 10 | Max bandwidth to donate |
|
||||||
|
| `--name` | hostname | Node display name |
|
||||||
|
| `--server` | https://transceiver-db.context-x.org | TIP API server |
|
||||||
|
|
||||||
|
## Protocol
|
||||||
|
|
||||||
|
- SOCKS5 (RFC 1928)
|
||||||
|
- No-auth method
|
||||||
|
- TCP CONNECT only
|
||||||
|
- 30s connection timeout
|
||||||
|
- Heartbeat every 30s to routing server
|
||||||
|
|
||||||
|
## Privacy
|
||||||
|
|
||||||
|
- Only outbound TCP CONNECT requests are proxied (no BIND, no UDP)
|
||||||
|
- Bandwidth is capped at your configured limit
|
||||||
|
- No logs of proxied content are stored
|
||||||
|
- Your IP is visible to the target website (residential proxy model)
|
||||||
18
packages/proxy-agent/package.json
Normal file
18
packages/proxy-agent/package.json
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"name": "@tip/proxy-agent",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "TIP Network Proxy Agent — contribute bandwidth, get free API access",
|
||||||
|
"bin": { "tip-agent": "./dist/index.js" },
|
||||||
|
"main": "./dist/index.js",
|
||||||
|
"scripts": {
|
||||||
|
"build": "tsc",
|
||||||
|
"start": "node dist/index.js"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"dotenv": "^16.0.0"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"typescript": "^5.0.0",
|
||||||
|
"@types/node": "^20.0.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
174
packages/proxy-agent/src/agent.ts
Normal file
174
packages/proxy-agent/src/agent.ts
Normal file
@ -0,0 +1,174 @@
|
|||||||
|
/**
|
||||||
|
* Core agent logic — registers with TIP routing server,
|
||||||
|
* starts SOCKS5 server, manages heartbeat and graceful shutdown.
|
||||||
|
*/
|
||||||
|
import * as https from "https";
|
||||||
|
import * as http from "http";
|
||||||
|
import * as os from "os";
|
||||||
|
import { createSocks5Server } from "./socks5";
|
||||||
|
import { BandwidthTracker } from "./bandwidth";
|
||||||
|
import { startHeartbeatLoop } from "./heartbeat";
|
||||||
|
import { saveConfig, savePid, clearPid, AgentConfig } from "./config";
|
||||||
|
|
||||||
|
const AGENT_VERSION = "1.0.0";
|
||||||
|
|
||||||
|
interface RegisterResponse {
|
||||||
|
token: string;
|
||||||
|
nodeId: string;
|
||||||
|
message?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
function postJson(url: string, body: unknown): Promise<{ status: number; data: unknown }> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const json = JSON.stringify(body);
|
||||||
|
const parsed = new URL(url);
|
||||||
|
const mod = parsed.protocol === "https:" ? https : http;
|
||||||
|
|
||||||
|
const req = mod.request(
|
||||||
|
{
|
||||||
|
hostname: parsed.hostname,
|
||||||
|
port: parsed.port || (parsed.protocol === "https:" ? 443 : 80),
|
||||||
|
path: parsed.pathname + parsed.search,
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Content-Length": Buffer.byteLength(json),
|
||||||
|
"User-Agent": `TIP-Agent/${AGENT_VERSION}`,
|
||||||
|
},
|
||||||
|
timeout: 15_000,
|
||||||
|
},
|
||||||
|
(res) => {
|
||||||
|
const chunks: Buffer[] = [];
|
||||||
|
res.on("data", (c: Buffer) => chunks.push(c));
|
||||||
|
res.on("end", () => {
|
||||||
|
try {
|
||||||
|
const data = JSON.parse(Buffer.concat(chunks).toString());
|
||||||
|
resolve({ status: res.statusCode ?? 0, data });
|
||||||
|
} catch {
|
||||||
|
resolve({ status: res.statusCode ?? 0, data: {} });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
req.on("timeout", () => { req.destroy(); reject(new Error("timeout")); });
|
||||||
|
req.on("error", reject);
|
||||||
|
req.write(json);
|
||||||
|
req.end();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function registerNode(cfg: AgentConfig): Promise<RegisterResponse> {
|
||||||
|
const url = `${cfg.serverUrl}/api/proxy/register`;
|
||||||
|
console.log(`[agent] Registering with ${url}...`);
|
||||||
|
|
||||||
|
const { status, data } = await postJson(url, {
|
||||||
|
token: cfg.token,
|
||||||
|
name: cfg.name,
|
||||||
|
port: cfg.port,
|
||||||
|
version: AGENT_VERSION,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (status >= 200 && status < 300) {
|
||||||
|
const resp = data as RegisterResponse;
|
||||||
|
console.log(`[agent] Registered successfully. Node: ${resp.nodeId ?? "unknown"}`);
|
||||||
|
return resp;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error(`Registration failed: HTTP ${status}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function startAgent(cfg: AgentConfig): Promise<void> {
|
||||||
|
const bandwidth = new BandwidthTracker(cfg.maxGb);
|
||||||
|
|
||||||
|
// Register with server
|
||||||
|
await registerNode(cfg).catch((err) => {
|
||||||
|
console.warn(`[agent] Registration warning: ${err.message} (continuing anyway)`);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Start SOCKS5 server
|
||||||
|
const server = createSocks5Server({
|
||||||
|
port: cfg.port,
|
||||||
|
bandwidth,
|
||||||
|
onNewConnection: () => {
|
||||||
|
if (bandwidth.isLimitReached()) {
|
||||||
|
console.warn("[agent] Bandwidth limit reached — rejecting new connections");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
await new Promise<void>((resolve, reject) => {
|
||||||
|
server.listen(cfg.port, () => {
|
||||||
|
console.log(`[agent] SOCKS5 proxy listening on port ${cfg.port}`);
|
||||||
|
resolve();
|
||||||
|
});
|
||||||
|
server.on("error", reject);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Save PID
|
||||||
|
savePid(process.pid);
|
||||||
|
saveConfig({ ...cfg, pid: process.pid, startedAt: new Date().toISOString() });
|
||||||
|
|
||||||
|
// Start heartbeat
|
||||||
|
const heartbeatTimer = startHeartbeatLoop(
|
||||||
|
cfg.serverUrl,
|
||||||
|
() => {
|
||||||
|
const stats = bandwidth.getStats();
|
||||||
|
return {
|
||||||
|
token: cfg.token,
|
||||||
|
port: cfg.port,
|
||||||
|
bytesProxied: stats.bytesProxied,
|
||||||
|
requestsProxied: stats.requestsProxied,
|
||||||
|
version: AGENT_VERSION,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
(result) => {
|
||||||
|
if (!result.ok) {
|
||||||
|
console.warn(`[agent] Heartbeat failed: ${result.error ?? "unknown error"}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(`[agent] TIP Agent running. Max bandwidth: ${cfg.maxGb} GB`);
|
||||||
|
console.log(`[agent] Press Ctrl+C to stop.\n`);
|
||||||
|
|
||||||
|
// Print stats every minute
|
||||||
|
const statsTimer = setInterval(() => {
|
||||||
|
const stats = bandwidth.getStats();
|
||||||
|
const usedGb = (stats.bytesProxied / (1024 ** 3)).toFixed(3);
|
||||||
|
console.log(
|
||||||
|
`[agent] Stats: ${stats.requestsProxied} requests, ${usedGb} GB used / ${cfg.maxGb} GB limit`
|
||||||
|
);
|
||||||
|
}, 60_000);
|
||||||
|
|
||||||
|
// Graceful shutdown
|
||||||
|
const shutdown = (signal: string) => {
|
||||||
|
console.log(`\n[agent] Received ${signal}, shutting down...`);
|
||||||
|
clearInterval(heartbeatTimer);
|
||||||
|
clearInterval(statsTimer);
|
||||||
|
clearPid();
|
||||||
|
server.close(() => {
|
||||||
|
console.log("[agent] SOCKS5 server closed. Goodbye.");
|
||||||
|
process.exit(0);
|
||||||
|
});
|
||||||
|
setTimeout(() => process.exit(1), 5000);
|
||||||
|
};
|
||||||
|
|
||||||
|
process.on("SIGINT", () => shutdown("SIGINT"));
|
||||||
|
process.on("SIGTERM", () => shutdown("SIGTERM"));
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getLocalIp(): string {
|
||||||
|
const interfaces = os.networkInterfaces();
|
||||||
|
for (const iface of Object.values(interfaces)) {
|
||||||
|
if (!iface) continue;
|
||||||
|
for (const addr of iface) {
|
||||||
|
if (addr.family === "IPv4" && !addr.internal) {
|
||||||
|
return addr.address;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "127.0.0.1";
|
||||||
|
}
|
||||||
50
packages/proxy-agent/src/bandwidth.ts
Normal file
50
packages/proxy-agent/src/bandwidth.ts
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
/**
|
||||||
|
* Bandwidth tracker — monitors usage and enforces limits
|
||||||
|
*/
|
||||||
|
|
||||||
|
export interface BandwidthStats {
|
||||||
|
bytesProxied: number;
|
||||||
|
requestsProxied: number;
|
||||||
|
bytesLimitHard: number;
|
||||||
|
limitReached: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class BandwidthTracker {
|
||||||
|
private bytesProxied = 0;
|
||||||
|
private requestsProxied = 0;
|
||||||
|
private readonly bytesLimit: number;
|
||||||
|
|
||||||
|
constructor(maxGb: number) {
|
||||||
|
this.bytesLimit = maxGb * 1024 * 1024 * 1024;
|
||||||
|
}
|
||||||
|
|
||||||
|
addBytes(n: number): void {
|
||||||
|
this.bytesProxied += n;
|
||||||
|
}
|
||||||
|
|
||||||
|
incrementRequests(): void {
|
||||||
|
this.requestsProxied++;
|
||||||
|
}
|
||||||
|
|
||||||
|
isLimitReached(): boolean {
|
||||||
|
return this.bytesProxied >= this.bytesLimit;
|
||||||
|
}
|
||||||
|
|
||||||
|
getStats(): BandwidthStats {
|
||||||
|
return {
|
||||||
|
bytesProxied: this.bytesProxied,
|
||||||
|
requestsProxied: this.requestsProxied,
|
||||||
|
bytesLimitHard: this.bytesLimit,
|
||||||
|
limitReached: this.isLimitReached(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
getUsedGb(): number {
|
||||||
|
return this.bytesProxied / (1024 * 1024 * 1024);
|
||||||
|
}
|
||||||
|
|
||||||
|
reset(): void {
|
||||||
|
this.bytesProxied = 0;
|
||||||
|
this.requestsProxied = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
76
packages/proxy-agent/src/config.ts
Normal file
76
packages/proxy-agent/src/config.ts
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
/**
|
||||||
|
* Config management — persists agent config to ~/.tip-agent/config.json
|
||||||
|
*/
|
||||||
|
import * as fs from "fs";
|
||||||
|
import * as path from "path";
|
||||||
|
import * as os from "os";
|
||||||
|
|
||||||
|
const CONFIG_DIR = path.join(os.homedir(), ".tip-agent");
|
||||||
|
const CONFIG_FILE = path.join(CONFIG_DIR, "config.json");
|
||||||
|
|
||||||
|
export interface AgentConfig {
|
||||||
|
token: string;
|
||||||
|
port: number;
|
||||||
|
maxGb: number;
|
||||||
|
name: string;
|
||||||
|
serverUrl: string;
|
||||||
|
pid?: number;
|
||||||
|
startedAt?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const DEFAULTS: Partial<AgentConfig> = {
|
||||||
|
port: 1080,
|
||||||
|
maxGb: 10,
|
||||||
|
name: os.hostname(),
|
||||||
|
serverUrl: "https://transceiver-db.context-x.org",
|
||||||
|
};
|
||||||
|
|
||||||
|
export function loadConfig(): AgentConfig | null {
|
||||||
|
if (!fs.existsSync(CONFIG_FILE)) return null;
|
||||||
|
try {
|
||||||
|
const raw = fs.readFileSync(CONFIG_FILE, "utf8");
|
||||||
|
return JSON.parse(raw) as AgentConfig;
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function saveConfig(cfg: Partial<AgentConfig>): AgentConfig {
|
||||||
|
if (!fs.existsSync(CONFIG_DIR)) {
|
||||||
|
fs.mkdirSync(CONFIG_DIR, { recursive: true });
|
||||||
|
}
|
||||||
|
const existing = loadConfig() ?? {};
|
||||||
|
const merged = { ...DEFAULTS, ...existing, ...cfg } as AgentConfig;
|
||||||
|
fs.writeFileSync(CONFIG_FILE, JSON.stringify(merged, null, 2), "utf8");
|
||||||
|
return merged;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function clearConfig(): void {
|
||||||
|
if (fs.existsSync(CONFIG_FILE)) {
|
||||||
|
fs.unlinkSync(CONFIG_FILE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getPidFile(): string {
|
||||||
|
return path.join(CONFIG_DIR, "agent.pid");
|
||||||
|
}
|
||||||
|
|
||||||
|
export function savePid(pid: number): void {
|
||||||
|
if (!fs.existsSync(CONFIG_DIR)) {
|
||||||
|
fs.mkdirSync(CONFIG_DIR, { recursive: true });
|
||||||
|
}
|
||||||
|
fs.writeFileSync(getPidFile(), String(pid), "utf8");
|
||||||
|
}
|
||||||
|
|
||||||
|
export function loadPid(): number | null {
|
||||||
|
const pidFile = getPidFile();
|
||||||
|
if (!fs.existsSync(pidFile)) return null;
|
||||||
|
const raw = fs.readFileSync(pidFile, "utf8").trim();
|
||||||
|
const pid = parseInt(raw, 10);
|
||||||
|
return isNaN(pid) ? null : pid;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function clearPid(): void {
|
||||||
|
const pidFile = getPidFile();
|
||||||
|
if (fs.existsSync(pidFile)) fs.unlinkSync(pidFile);
|
||||||
|
}
|
||||||
82
packages/proxy-agent/src/heartbeat.ts
Normal file
82
packages/proxy-agent/src/heartbeat.ts
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
/**
|
||||||
|
* Heartbeat — sends periodic status updates to TIP routing server.
|
||||||
|
* POST /api/proxy/heartbeat every 30s.
|
||||||
|
*/
|
||||||
|
import * as https from "https";
|
||||||
|
import * as http from "http";
|
||||||
|
|
||||||
|
export interface HeartbeatPayload {
|
||||||
|
token: string;
|
||||||
|
ip?: string;
|
||||||
|
port: number;
|
||||||
|
bytesProxied: number;
|
||||||
|
requestsProxied: number;
|
||||||
|
latencyMs?: number;
|
||||||
|
version: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface HeartbeatResult {
|
||||||
|
ok: boolean;
|
||||||
|
latencyMs: number;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
function postJson(url: string, body: unknown): Promise<{ status: number; latencyMs: number }> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const start = Date.now();
|
||||||
|
const json = JSON.stringify(body);
|
||||||
|
const parsed = new URL(url);
|
||||||
|
const mod = parsed.protocol === "https:" ? https : http;
|
||||||
|
|
||||||
|
const req = mod.request(
|
||||||
|
{
|
||||||
|
hostname: parsed.hostname,
|
||||||
|
port: parsed.port || (parsed.protocol === "https:" ? 443 : 80),
|
||||||
|
path: parsed.pathname + parsed.search,
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Content-Length": Buffer.byteLength(json),
|
||||||
|
"User-Agent": "TIP-Agent/1.0.0",
|
||||||
|
},
|
||||||
|
timeout: 10_000,
|
||||||
|
},
|
||||||
|
(res) => {
|
||||||
|
res.resume();
|
||||||
|
resolve({ status: res.statusCode ?? 0, latencyMs: Date.now() - start });
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
req.on("timeout", () => { req.destroy(); reject(new Error("timeout")); });
|
||||||
|
req.on("error", reject);
|
||||||
|
req.write(json);
|
||||||
|
req.end();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function sendHeartbeat(
|
||||||
|
serverUrl: string,
|
||||||
|
payload: HeartbeatPayload
|
||||||
|
): Promise<HeartbeatResult> {
|
||||||
|
const url = `${serverUrl}/api/proxy/heartbeat`;
|
||||||
|
try {
|
||||||
|
const { status, latencyMs } = await postJson(url, payload);
|
||||||
|
return { ok: status >= 200 && status < 300, latencyMs };
|
||||||
|
} catch (err) {
|
||||||
|
return { ok: false, latencyMs: 0, error: String(err) };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function startHeartbeatLoop(
|
||||||
|
serverUrl: string,
|
||||||
|
getPayload: () => HeartbeatPayload,
|
||||||
|
onResult?: (r: HeartbeatResult) => void,
|
||||||
|
intervalMs = 30_000
|
||||||
|
): NodeJS.Timeout {
|
||||||
|
const tick = async () => {
|
||||||
|
const result = await sendHeartbeat(serverUrl, getPayload());
|
||||||
|
onResult?.(result);
|
||||||
|
};
|
||||||
|
tick();
|
||||||
|
return setInterval(tick, intervalMs);
|
||||||
|
}
|
||||||
167
packages/proxy-agent/src/index.ts
Normal file
167
packages/proxy-agent/src/index.ts
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
/**
|
||||||
|
* TIP Proxy Agent CLI
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* tip-agent start --token TOKEN [--port 1080] [--max-gb 10] [--name "My Node"]
|
||||||
|
* tip-agent status
|
||||||
|
* tip-agent stop
|
||||||
|
*/
|
||||||
|
import { startAgent } from "./agent";
|
||||||
|
import { loadConfig, saveConfig, loadPid, clearPid } from "./config";
|
||||||
|
|
||||||
|
const AGENT_VERSION = "1.0.0";
|
||||||
|
|
||||||
|
function parseArgs(argv: string[]): Record<string, string> {
|
||||||
|
const args: Record<string, string> = {};
|
||||||
|
for (let i = 0; i < argv.length; i++) {
|
||||||
|
const arg = argv[i];
|
||||||
|
if (arg && arg.startsWith("--")) {
|
||||||
|
const key = arg.slice(2);
|
||||||
|
const next = argv[i + 1];
|
||||||
|
if (next && !next.startsWith("--")) {
|
||||||
|
args[key] = next;
|
||||||
|
i++;
|
||||||
|
} else {
|
||||||
|
args[key] = "true";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
|
||||||
|
function isProcessRunning(pid: number): boolean {
|
||||||
|
try {
|
||||||
|
process.kill(pid, 0);
|
||||||
|
return true;
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function cmdStart(rawArgs: string[]): void {
|
||||||
|
const args = parseArgs(rawArgs);
|
||||||
|
|
||||||
|
const token = args["token"];
|
||||||
|
if (!token) {
|
||||||
|
console.error("Error: --token is required");
|
||||||
|
console.error(" tip-agent start --token YOUR_TOKEN [--port 1080] [--max-gb 10] [--name 'My Node']");
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const cfg = saveConfig({
|
||||||
|
token,
|
||||||
|
port: args["port"] ? parseInt(args["port"], 10) : 1080,
|
||||||
|
maxGb: args["max-gb"] ? parseFloat(args["max-gb"]) : 10,
|
||||||
|
name: args["name"] ?? undefined,
|
||||||
|
serverUrl: args["server"] ?? "https://transceiver-db.context-x.org",
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`\nTIP Proxy Agent v${AGENT_VERSION}`);
|
||||||
|
console.log("==========================================");
|
||||||
|
console.log(` Token: ${cfg.token.slice(0, 8)}...`);
|
||||||
|
console.log(` Port: ${cfg.port}`);
|
||||||
|
console.log(` Max GB: ${cfg.maxGb}`);
|
||||||
|
console.log(` Name: ${cfg.name}`);
|
||||||
|
console.log(` Server: ${cfg.serverUrl}`);
|
||||||
|
console.log("==========================================\n");
|
||||||
|
|
||||||
|
startAgent(cfg).catch((err) => {
|
||||||
|
console.error("[agent] Fatal error:", err.message);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function cmdStatus(): void {
|
||||||
|
const cfg = loadConfig();
|
||||||
|
const pid = loadPid();
|
||||||
|
|
||||||
|
if (!cfg || !pid) {
|
||||||
|
console.log("TIP Agent: not running");
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
const running = isProcessRunning(pid);
|
||||||
|
console.log(`TIP Agent: ${running ? "RUNNING" : "STOPPED (stale PID)"}`);
|
||||||
|
console.log(` PID: ${pid}`);
|
||||||
|
console.log(` Port: ${cfg.port}`);
|
||||||
|
console.log(` Token: ${cfg.token.slice(0, 8)}...`);
|
||||||
|
console.log(` Max GB: ${cfg.maxGb}`);
|
||||||
|
if (cfg.startedAt) {
|
||||||
|
console.log(` Started: ${cfg.startedAt}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!running) {
|
||||||
|
clearPid();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function cmdStop(): void {
|
||||||
|
const pid = loadPid();
|
||||||
|
|
||||||
|
if (!pid) {
|
||||||
|
console.log("TIP Agent: not running");
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
process.kill(pid, "SIGTERM");
|
||||||
|
console.log(`TIP Agent (PID ${pid}) stopped.`);
|
||||||
|
clearPid();
|
||||||
|
} catch {
|
||||||
|
console.log(`TIP Agent: process ${pid} not found (already stopped)`);
|
||||||
|
clearPid();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function printHelp(): void {
|
||||||
|
console.log(`
|
||||||
|
TIP Proxy Agent v${AGENT_VERSION}
|
||||||
|
Contribute bandwidth to the TIP network, get free API access.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
tip-agent start --token TOKEN [options]
|
||||||
|
tip-agent status
|
||||||
|
tip-agent stop
|
||||||
|
|
||||||
|
Commands:
|
||||||
|
start Start the proxy agent
|
||||||
|
status Show agent status
|
||||||
|
stop Stop the agent
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--token TOKEN Your TIP node token (required for start)
|
||||||
|
--port N SOCKS5 listen port (default: 1080)
|
||||||
|
--max-gb N Max bandwidth to donate in GB (default: 10)
|
||||||
|
--name NAME Node name (default: hostname)
|
||||||
|
--server URL TIP server URL (default: https://transceiver-db.context-x.org)
|
||||||
|
|
||||||
|
Example:
|
||||||
|
tip-agent start --token abc123 --port 1080 --max-gb 10 --name "Home Node"
|
||||||
|
`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const [, , command, ...rest] = process.argv;
|
||||||
|
|
||||||
|
switch (command) {
|
||||||
|
case "start":
|
||||||
|
cmdStart(rest);
|
||||||
|
break;
|
||||||
|
case "status":
|
||||||
|
cmdStatus();
|
||||||
|
break;
|
||||||
|
case "stop":
|
||||||
|
cmdStop();
|
||||||
|
break;
|
||||||
|
case "--help":
|
||||||
|
case "-h":
|
||||||
|
case "help":
|
||||||
|
printHelp();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
if (command) {
|
||||||
|
console.error(`Unknown command: ${command}`);
|
||||||
|
}
|
||||||
|
printHelp();
|
||||||
|
process.exit(command ? 1 : 0);
|
||||||
|
}
|
||||||
181
packages/proxy-agent/src/socks5.ts
Normal file
181
packages/proxy-agent/src/socks5.ts
Normal file
@ -0,0 +1,181 @@
|
|||||||
|
/**
|
||||||
|
* SOCKS5 proxy server — built on Node.js net module, no external deps.
|
||||||
|
*
|
||||||
|
* Implements:
|
||||||
|
* - SOCKS5 handshake (RFC 1928)
|
||||||
|
* - No-auth method (0x00)
|
||||||
|
* - CONNECT command (TCP tunnel)
|
||||||
|
* - Byte tracking per connection
|
||||||
|
*/
|
||||||
|
import * as net from "net";
|
||||||
|
import { BandwidthTracker } from "./bandwidth";
|
||||||
|
|
||||||
|
const SOCKS_VER = 0x05;
|
||||||
|
const NO_AUTH = 0x00;
|
||||||
|
const CMD_CONNECT = 0x01;
|
||||||
|
const ATYP_IPV4 = 0x01;
|
||||||
|
const ATYP_DOMAIN = 0x03;
|
||||||
|
const ATYP_IPV6 = 0x04;
|
||||||
|
const REP_SUCCESS = 0x00;
|
||||||
|
const REP_FAIL = 0x01;
|
||||||
|
const CONN_TIMEOUT_MS = 30_000;
|
||||||
|
|
||||||
|
export interface Socks5ServerOptions {
|
||||||
|
port: number;
|
||||||
|
host?: string;
|
||||||
|
onNewConnection?: () => boolean; // return false to reject (bandwidth limit)
|
||||||
|
bandwidth?: BandwidthTracker;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildReply(rep: number, ip = "0.0.0.0", port = 0): Buffer {
|
||||||
|
const parts = ip.split(".").map(Number);
|
||||||
|
const buf = Buffer.alloc(10);
|
||||||
|
buf[0] = SOCKS_VER;
|
||||||
|
buf[1] = rep;
|
||||||
|
buf[2] = 0x00; // reserved
|
||||||
|
buf[3] = ATYP_IPV4;
|
||||||
|
buf[4] = parts[0] ?? 0;
|
||||||
|
buf[5] = parts[1] ?? 0;
|
||||||
|
buf[6] = parts[2] ?? 0;
|
||||||
|
buf[7] = parts[3] ?? 0;
|
||||||
|
buf.writeUInt16BE(port, 8);
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleClient(
|
||||||
|
socket: net.Socket,
|
||||||
|
opts: Socks5ServerOptions
|
||||||
|
): void {
|
||||||
|
socket.setTimeout(CONN_TIMEOUT_MS);
|
||||||
|
socket.on("timeout", () => socket.destroy());
|
||||||
|
socket.on("error", () => socket.destroy());
|
||||||
|
|
||||||
|
let state: "greeting" | "request" | "connected" = "greeting";
|
||||||
|
let buf = Buffer.alloc(0);
|
||||||
|
|
||||||
|
socket.on("data", (chunk) => {
|
||||||
|
buf = Buffer.concat([buf, chunk]);
|
||||||
|
|
||||||
|
if (state === "greeting") {
|
||||||
|
// Need at least: VER(1) + NMETHODS(1) + methods
|
||||||
|
if (buf.length < 2) return;
|
||||||
|
const nmethods = buf[1] ?? 0;
|
||||||
|
if (buf.length < 2 + nmethods) return;
|
||||||
|
|
||||||
|
const methods = buf.slice(2, 2 + nmethods);
|
||||||
|
buf = buf.slice(2 + nmethods);
|
||||||
|
|
||||||
|
if (!methods.includes(NO_AUTH)) {
|
||||||
|
socket.write(Buffer.from([SOCKS_VER, 0xFF])); // no acceptable method
|
||||||
|
socket.destroy();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
socket.write(Buffer.from([SOCKS_VER, NO_AUTH]));
|
||||||
|
state = "request";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state === "request") {
|
||||||
|
// Minimum: VER(1) CMD(1) RSV(1) ATYP(1) + address + port(2)
|
||||||
|
if (buf.length < 4) return;
|
||||||
|
|
||||||
|
const cmd = buf[1];
|
||||||
|
const atyp = buf[3];
|
||||||
|
|
||||||
|
let host: string;
|
||||||
|
let port: number;
|
||||||
|
let consumed: number;
|
||||||
|
|
||||||
|
if (atyp === ATYP_IPV4) {
|
||||||
|
if (buf.length < 10) return;
|
||||||
|
host = `${buf[4]}.${buf[5]}.${buf[6]}.${buf[7]}`;
|
||||||
|
port = buf.readUInt16BE(8);
|
||||||
|
consumed = 10;
|
||||||
|
} else if (atyp === ATYP_DOMAIN) {
|
||||||
|
if (buf.length < 5) return;
|
||||||
|
const domainLen = buf[4] ?? 0;
|
||||||
|
if (buf.length < 5 + domainLen + 2) return;
|
||||||
|
host = buf.slice(5, 5 + domainLen).toString("utf8");
|
||||||
|
port = buf.readUInt16BE(5 + domainLen);
|
||||||
|
consumed = 5 + domainLen + 2;
|
||||||
|
} else if (atyp === ATYP_IPV6) {
|
||||||
|
if (buf.length < 22) return;
|
||||||
|
const parts: string[] = [];
|
||||||
|
for (let i = 0; i < 8; i++) {
|
||||||
|
parts.push(buf.readUInt16BE(4 + i * 2).toString(16));
|
||||||
|
}
|
||||||
|
host = parts.join(":");
|
||||||
|
port = buf.readUInt16BE(20);
|
||||||
|
consumed = 22;
|
||||||
|
} else {
|
||||||
|
socket.write(buildReply(0x08)); // address type not supported
|
||||||
|
socket.destroy();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = buf.slice(consumed);
|
||||||
|
|
||||||
|
if (cmd !== CMD_CONNECT) {
|
||||||
|
socket.write(buildReply(0x07)); // command not supported
|
||||||
|
socket.destroy();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check bandwidth limit before accepting
|
||||||
|
if (opts.onNewConnection && !opts.onNewConnection()) {
|
||||||
|
socket.write(buildReply(REP_FAIL));
|
||||||
|
socket.destroy();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
opts.bandwidth?.incrementRequests();
|
||||||
|
|
||||||
|
const target = net.createConnection({ host, port }, () => {
|
||||||
|
socket.write(buildReply(REP_SUCCESS));
|
||||||
|
state = "connected";
|
||||||
|
|
||||||
|
// Pipe remaining buffered bytes
|
||||||
|
if (buf.length > 0) {
|
||||||
|
target.write(buf);
|
||||||
|
buf = Buffer.alloc(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bidirectional pipe with byte counting
|
||||||
|
socket.on("data", (d) => {
|
||||||
|
opts.bandwidth?.addBytes(d.length);
|
||||||
|
if (!target.destroyed) target.write(d);
|
||||||
|
});
|
||||||
|
|
||||||
|
target.on("data", (d) => {
|
||||||
|
opts.bandwidth?.addBytes(d.length);
|
||||||
|
if (!socket.destroyed) socket.write(d);
|
||||||
|
});
|
||||||
|
|
||||||
|
socket.on("end", () => { if (!target.destroyed) target.end(); });
|
||||||
|
target.on("end", () => { if (!socket.destroyed) socket.end(); });
|
||||||
|
});
|
||||||
|
|
||||||
|
target.setTimeout(CONN_TIMEOUT_MS);
|
||||||
|
target.on("timeout", () => { target.destroy(); socket.destroy(); });
|
||||||
|
target.on("error", () => {
|
||||||
|
if (!socket.destroyed) {
|
||||||
|
socket.write(buildReply(REP_FAIL));
|
||||||
|
socket.destroy();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createSocks5Server(opts: Socks5ServerOptions): net.Server {
|
||||||
|
const server = net.createServer((socket) => {
|
||||||
|
handleClient(socket, opts);
|
||||||
|
});
|
||||||
|
|
||||||
|
server.on("error", (err) => {
|
||||||
|
console.error("[SOCKS5] Server error:", err.message);
|
||||||
|
});
|
||||||
|
|
||||||
|
return server;
|
||||||
|
}
|
||||||
18
packages/proxy-agent/tsconfig.json
Normal file
18
packages/proxy-agent/tsconfig.json
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "ES2020",
|
||||||
|
"module": "commonjs",
|
||||||
|
"lib": ["ES2020"],
|
||||||
|
"outDir": "./dist",
|
||||||
|
"rootDir": "./src",
|
||||||
|
"strict": true,
|
||||||
|
"esModuleInterop": true,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"resolveJsonModule": true,
|
||||||
|
"declaration": true,
|
||||||
|
"declarationMap": true,
|
||||||
|
"sourceMap": true
|
||||||
|
},
|
||||||
|
"include": ["src/**/*"],
|
||||||
|
"exclude": ["node_modules", "dist"]
|
||||||
|
}
|
||||||
@ -51,7 +51,9 @@ export async function createScheduler(): Promise<PgBoss> {
|
|||||||
retryDelay: 30,
|
retryDelay: 30,
|
||||||
retryBackoff: true,
|
retryBackoff: true,
|
||||||
expireInSeconds: 300,
|
expireInSeconds: 300,
|
||||||
monitorStateIntervalSeconds: 30,
|
monitorStateIntervalSeconds: 60,
|
||||||
|
max: 4, // pg-boss internal connection pool
|
||||||
|
poolSize: 4, // alias used by some pg-boss versions
|
||||||
});
|
});
|
||||||
|
|
||||||
boss.on("error", (error) => console.error("pg-boss error:", error));
|
boss.on("error", (error) => console.error("pg-boss error:", error));
|
||||||
|
|||||||
@ -147,6 +147,7 @@ interface Product {
|
|||||||
reachMeters?: number;
|
reachMeters?: number;
|
||||||
fiberType?: string;
|
fiberType?: string;
|
||||||
wavelength?: string;
|
wavelength?: string;
|
||||||
|
imageUrl?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
function sleep(ms: number): Promise<void> {
|
function sleep(ms: number): Promise<void> {
|
||||||
@ -432,6 +433,7 @@ export async function scrapeFlexoptixCatalog(): Promise<void> {
|
|||||||
name
|
name
|
||||||
sku
|
sku
|
||||||
url_key
|
url_key
|
||||||
|
small_image { url }
|
||||||
price_range {
|
price_range {
|
||||||
minimum_price {
|
minimum_price {
|
||||||
final_price { value currency }
|
final_price { value currency }
|
||||||
@ -457,6 +459,7 @@ export async function scrapeFlexoptixCatalog(): Promise<void> {
|
|||||||
name: string;
|
name: string;
|
||||||
sku: string;
|
sku: string;
|
||||||
url_key: string;
|
url_key: string;
|
||||||
|
small_image?: { url?: string };
|
||||||
price_range?: {
|
price_range?: {
|
||||||
minimum_price?: {
|
minimum_price?: {
|
||||||
final_price?: { value: number; currency: string };
|
final_price?: { value: number; currency: string };
|
||||||
@ -488,6 +491,9 @@ export async function scrapeFlexoptixCatalog(): Promise<void> {
|
|||||||
const reach = detectReach(item.name);
|
const reach = detectReach(item.name);
|
||||||
const price = item.price_range?.minimum_price?.final_price?.value;
|
const price = item.price_range?.minimum_price?.final_price?.value;
|
||||||
|
|
||||||
|
const rawImg = item.small_image?.url;
|
||||||
|
const imageUrl = rawImg && !rawImg.includes("placeholder") ? rawImg : undefined;
|
||||||
|
|
||||||
allProducts.set(url, {
|
allProducts.set(url, {
|
||||||
name: item.name,
|
name: item.name,
|
||||||
partNumber: item.sku,
|
partNumber: item.sku,
|
||||||
@ -501,6 +507,7 @@ export async function scrapeFlexoptixCatalog(): Promise<void> {
|
|||||||
reachMeters: reach?.meters,
|
reachMeters: reach?.meters,
|
||||||
fiberType: detectFiber(item.name),
|
fiberType: detectFiber(item.name),
|
||||||
wavelength: detectWavelength(item.name),
|
wavelength: detectWavelength(item.name),
|
||||||
|
imageUrl,
|
||||||
});
|
});
|
||||||
newCount++;
|
newCount++;
|
||||||
}
|
}
|
||||||
@ -538,6 +545,7 @@ export async function scrapeFlexoptixCatalog(): Promise<void> {
|
|||||||
fiberType: product.fiberType,
|
fiberType: product.fiberType,
|
||||||
wavelengths: product.wavelength,
|
wavelengths: product.wavelength,
|
||||||
category: "DataCenter",
|
category: "DataCenter",
|
||||||
|
imageUrl: product.imageUrl,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (product.price && product.price > 0) {
|
if (product.price && product.price > 0) {
|
||||||
|
|||||||
@ -99,9 +99,12 @@ function parseProductList(html: string): Product[] {
|
|||||||
const name = match[2].trim();
|
const name = match[2].trim();
|
||||||
if (name.length < 10 || name.length > 200) continue;
|
if (name.length < 10 || name.length > 200) continue;
|
||||||
|
|
||||||
// Look for price in surrounding context
|
// Look for price in surrounding context — BigCommerce uses data-product-price-without-tax
|
||||||
const context = html.slice(Math.max(0, match.index - 300), match.index + 600);
|
const context = html.slice(Math.max(0, match.index - 300), match.index + 1200);
|
||||||
const priceMatch = context.match(/\$\s*([\d,]+\.?\d{0,2})/) || context.match(/data-product-price="([\d.]+)"/);
|
const priceMatch = context.match(/data-product-price-without-tax[^>]*>\s*\$([\d,]+\.?\d{0,2})/)
|
||||||
|
|| context.match(/class="price price--withoutTax">\s*\$([\d,]+\.?\d{0,2})/)
|
||||||
|
|| context.match(/\$\s*([\d,]+\.?\d{0,2})/);
|
||||||
|
|
||||||
const price = priceMatch ? parseFloat(priceMatch[1].replace(",", "")) : undefined;
|
const price = priceMatch ? parseFloat(priceMatch[1].replace(",", "")) : undefined;
|
||||||
|
|
||||||
const ff = detectFormFactor(name);
|
const ff = detectFormFactor(name);
|
||||||
|
|||||||
@ -47,6 +47,7 @@ interface OptcoreProduct {
|
|||||||
speedGbps?: number;
|
speedGbps?: number;
|
||||||
speed?: string;
|
speed?: string;
|
||||||
reachLabel?: string;
|
reachLabel?: string;
|
||||||
|
imageUrl?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
function detectFormFactor(text: string): string | undefined {
|
function detectFormFactor(text: string): string | undefined {
|
||||||
@ -208,7 +209,13 @@ export async function scrapeOptcore(): Promise<void> {
|
|||||||
const stockEl = document.querySelector(".stock, .availability, [class*=\"stock\"]");
|
const stockEl = document.querySelector(".stock, .availability, [class*=\"stock\"]");
|
||||||
const stockText = stockEl?.textContent?.trim() || "";
|
const stockText = stockEl?.textContent?.trim() || "";
|
||||||
|
|
||||||
return { title, priceText, stockText };
|
// Product image — WooCommerce product gallery
|
||||||
|
const imgEl = document.querySelector(
|
||||||
|
".woocommerce-product-gallery__image img, .wp-post-image, img.attachment-woocommerce_single"
|
||||||
|
) as HTMLImageElement | null;
|
||||||
|
const imageUrl = imgEl?.src || imgEl?.getAttribute("data-src") || "";
|
||||||
|
|
||||||
|
return { title, priceText, stockText, imageUrl };
|
||||||
});
|
});
|
||||||
|
|
||||||
const meta = metaByUrl.get(url);
|
const meta = metaByUrl.get(url);
|
||||||
@ -229,6 +236,7 @@ export async function scrapeOptcore(): Promise<void> {
|
|||||||
speedGbps: speedInfo?.speedGbps,
|
speedGbps: speedInfo?.speedGbps,
|
||||||
speed: speedInfo?.speed,
|
speed: speedInfo?.speed,
|
||||||
reachLabel: detectReach(name),
|
reachLabel: detectReach(name),
|
||||||
|
imageUrl: data.imageUrl || undefined,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -262,6 +270,7 @@ export async function scrapeOptcore(): Promise<void> {
|
|||||||
speed: p.speed,
|
speed: p.speed,
|
||||||
reachLabel: p.reachLabel,
|
reachLabel: p.reachLabel,
|
||||||
category: "DataCenter",
|
category: "DataCenter",
|
||||||
|
imageUrl: p.imageUrl,
|
||||||
});
|
});
|
||||||
|
|
||||||
const hash = contentHash({ price: p.price, stock: p.stockLevel });
|
const hash = contentHash({ price: p.price, stock: p.stockLevel });
|
||||||
|
|||||||
@ -1,163 +1,55 @@
|
|||||||
/**
|
/**
|
||||||
* SmartOptics Scraper — Manufacturer, quote-based pricing
|
* SmartOptics Scraper — Premium coherent/DWDM transceiver manufacturer
|
||||||
*
|
*
|
||||||
* www.smartoptics.com — no direct prices, catalog-only scrape.
|
* smartoptics.com — WordPress site, no prices (B2B, RFQ model).
|
||||||
* Extracts product names, part numbers, form factors and specs.
|
* Scrapes product catalog for specs, images, datasheets.
|
||||||
*
|
* Products listed at /products/optical-transceivers/ → individual /product/SKU/ pages.
|
||||||
* Rate limited: 1 req/2sec.
|
|
||||||
*/
|
*/
|
||||||
import { pool, findOrCreateScrapedTransceiver, ensureVendor } from "../utils/db";
|
import { pool, findOrCreateScrapedTransceiver, ensureVendor } from "../utils/db";
|
||||||
import * as cheerio from "cheerio";
|
|
||||||
|
|
||||||
const BASE = "https://www.smartoptics.com";
|
const BASE = "https://smartoptics.com";
|
||||||
const CATALOG_URLS = [
|
const CATALOG_URL = `${BASE}/products/optical-transceivers/`;
|
||||||
"/products/transceivers/",
|
|
||||||
"/products/",
|
|
||||||
"/products/sfp-transceivers/",
|
|
||||||
"/products/qsfp-transceivers/",
|
|
||||||
];
|
|
||||||
const HEADERS = {
|
const HEADERS = {
|
||||||
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||||
Accept: "text/html,application/xhtml+xml",
|
Accept: "text/html,application/xhtml+xml",
|
||||||
"Accept-Language": "en-US,en;q=0.9",
|
|
||||||
};
|
};
|
||||||
|
|
||||||
interface Product {
|
|
||||||
partNumber: string;
|
|
||||||
name: string;
|
|
||||||
url: string;
|
|
||||||
formFactor: string;
|
|
||||||
speed: string;
|
|
||||||
speedGbps: number;
|
|
||||||
reachLabel?: string;
|
|
||||||
reachMeters?: number;
|
|
||||||
fiberType?: string;
|
|
||||||
wavelength?: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
function sleep(ms: number): Promise<void> {
|
function sleep(ms: number): Promise<void> {
|
||||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
return new Promise((r) => setTimeout(r, ms));
|
||||||
}
|
}
|
||||||
|
|
||||||
function detectFormFactor(text: string): { formFactor: string; speed: string; speedGbps: number } {
|
function detectFormFactor(text: string): { formFactor: string; speed: string; speedGbps: number } {
|
||||||
const lower = text.toLowerCase();
|
const t = text.toLowerCase();
|
||||||
if (lower.includes("osfp") && !lower.includes("qsfp")) return { formFactor: "OSFP", speed: "400G", speedGbps: 400 };
|
if (t.includes("qsfp-dd800") || t.includes("sfp-dd800") || t.includes("800ge")) return { formFactor: "QSFP-DD", speed: "800G", speedGbps: 800 };
|
||||||
if (lower.includes("qsfp-dd")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
if (t.includes("qsfp-dd") || (t.includes("400g") && t.includes("qsfp"))) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
||||||
if (lower.includes("qsfp28")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
|
if (t.includes("qsfp112")) return { formFactor: "QSFP112", speed: "400G", speedGbps: 400 };
|
||||||
if (lower.includes("qsfp+") || lower.includes("qsfp plus")) return { formFactor: "QSFP+", speed: "40G", speedGbps: 40 };
|
if (t.includes("qsfp56")) return { formFactor: "QSFP56", speed: "200G", speedGbps: 200 };
|
||||||
if (lower.includes("sfp56")) return { formFactor: "SFP56", speed: "50G", speedGbps: 50 };
|
if (t.includes("qsfp28") || t.includes("100ge") || t.includes("100g")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
|
||||||
if (lower.includes("sfp28") || lower.includes("25g")) return { formFactor: "SFP28", speed: "25G", speedGbps: 25 };
|
if (t.includes("sfp28") || t.includes("25g")) return { formFactor: "SFP28", speed: "25G", speedGbps: 25 };
|
||||||
if (lower.includes("sfp+") || lower.includes("10gbase") || lower.includes("10g")) return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
if (t.includes("qsfp+") || t.includes("40g")) return { formFactor: "QSFP+", speed: "40G", speedGbps: 40 };
|
||||||
if (lower.includes("xfp")) return { formFactor: "XFP", speed: "10G", speedGbps: 10 };
|
if (t.includes("sfp+") || t.includes("10g")) return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
||||||
if (lower.includes("1000base") || lower.includes("1g")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
if (t.includes("sfp")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||||
if (lower.includes("sfp") && !lower.includes("qsfp")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
|
||||||
return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function detectReach(text: string): { label: string; meters: number } | undefined {
|
function detectReach(text: string): { label: string; meters: number } | undefined {
|
||||||
const patterns: [RegExp, string, number][] = [
|
const kmMatch = text.match(/(\d+)\s*km/i);
|
||||||
[/\b80\s*km\b/i, "80km", 80000],
|
if (kmMatch) {
|
||||||
[/\b40\s*km\b/i, "40km", 40000],
|
const km = parseInt(kmMatch[1]);
|
||||||
[/\b20\s*km\b/i, "20km", 20000],
|
return { label: `${km}km`, meters: km * 1000 };
|
||||||
[/\b10\s*km\b/i, "10km", 10000],
|
}
|
||||||
[/\b2\s*km\b/i, "2km", 2000],
|
const mMatch = text.match(/(\d+)\s*m\b/i);
|
||||||
[/\b550\s*m\b/i, "550m", 550],
|
if (mMatch) {
|
||||||
[/\b300\s*m\b/i, "300m", 300],
|
const m = parseInt(mMatch[1]);
|
||||||
[/\b100\s*m\b/i, "100m", 100],
|
return { label: `${m}m`, meters: m };
|
||||||
[/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000],
|
|
||||||
[/\bER4?\b/, "40km", 40000], [/\bZR4?\b/, "80km", 80000],
|
|
||||||
[/\bSR4?\b/, "300m", 300], [/\bDR4?\b/, "500m", 500], [/\bFR4?\b/, "2km", 2000],
|
|
||||||
];
|
|
||||||
for (const [regex, label, meters] of patterns) {
|
|
||||||
if (regex.test(text)) return { label, meters };
|
|
||||||
}
|
}
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
function detectFiber(text: string): string {
|
function detectFiber(text: string): string {
|
||||||
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
|
if (/dwdm|cwdm|coherent|coh|single.?mode|smf/i.test(text)) return "SMF";
|
||||||
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
|
if (/multi.?mode|mmf|sr/i.test(text)) return "MMF";
|
||||||
if (/copper|dac|twinax|rj45|base-t/i.test(text)) return "Copper";
|
return "SMF"; // SmartOptics is almost exclusively SMF/coherent
|
||||||
return "SMF";
|
|
||||||
}
|
|
||||||
|
|
||||||
function detectWavelength(text: string): string {
|
|
||||||
const match = text.match(/(\d{3,4})\s*nm/i);
|
|
||||||
return match ? match[1] : "";
|
|
||||||
}
|
|
||||||
|
|
||||||
function parseProductList(html: string, sourceUrl: string): Product[] {
|
|
||||||
const $ = cheerio.load(html);
|
|
||||||
const products: Product[] = [];
|
|
||||||
|
|
||||||
// Manufacturer site: product cards or table rows
|
|
||||||
const cardSelectors = [
|
|
||||||
".product-item", ".product-card", ".product", "tr",
|
|
||||||
"article", ".item", ".col-xs-12.col-sm-6", ".entry",
|
|
||||||
];
|
|
||||||
|
|
||||||
for (const sel of cardSelectors) {
|
|
||||||
if ($(sel).length >= 2) {
|
|
||||||
$(sel).each((_i, el) => {
|
|
||||||
const text = $(el).text().trim();
|
|
||||||
if (!/sfp|qsfp|xfp|transceiver|optic/i.test(text)) return;
|
|
||||||
|
|
||||||
const nameEl = $(el).find("h2, h3, h4, td, .title, .name, a").first();
|
|
||||||
const name = nameEl.text().trim() || text.slice(0, 100);
|
|
||||||
if (!name || name.length < 5) return;
|
|
||||||
|
|
||||||
const linkEl = $(el).find("a[href]").first();
|
|
||||||
const href = linkEl.attr("href") || sourceUrl;
|
|
||||||
const url = href.startsWith("http") ? href : BASE + href;
|
|
||||||
|
|
||||||
// Part number: look for alphanumeric patterns typical of transceiver SKUs
|
|
||||||
const partNumMatch = text.match(/\b([A-Z]{2,}[-_][A-Z0-9]{2,}[-_][A-Z0-9]+)\b/) ||
|
|
||||||
text.match(/\b(SO[-_][A-Z0-9]+)\b/i) ||
|
|
||||||
name.match(/[A-Z0-9][-A-Z0-9]{5,}/);
|
|
||||||
const partNumber = partNumMatch?.[0] || name.replace(/\s+/g, "-").slice(0, 60);
|
|
||||||
|
|
||||||
const ff = detectFormFactor(name + " " + text);
|
|
||||||
const reach = detectReach(name + " " + text);
|
|
||||||
|
|
||||||
products.push({
|
|
||||||
partNumber,
|
|
||||||
name,
|
|
||||||
url,
|
|
||||||
...ff,
|
|
||||||
reachLabel: reach?.label,
|
|
||||||
reachMeters: reach?.meters,
|
|
||||||
fiberType: detectFiber(name + " " + text),
|
|
||||||
wavelength: detectWavelength(name + " " + text),
|
|
||||||
});
|
|
||||||
});
|
|
||||||
if (products.length > 0) break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback: all transceiver-relevant anchors
|
|
||||||
if (products.length === 0) {
|
|
||||||
$("a[href]").each((_i, el) => {
|
|
||||||
const name = $(el).text().trim();
|
|
||||||
const href = $(el).attr("href") || "";
|
|
||||||
if (name.length < 5 || name.length > 200 || !/sfp|qsfp|transceiver|optic/i.test(name)) return;
|
|
||||||
const url = href.startsWith("http") ? href : BASE + href;
|
|
||||||
const ff = detectFormFactor(name);
|
|
||||||
const reach = detectReach(name);
|
|
||||||
products.push({
|
|
||||||
partNumber: name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] || name.replace(/\s+/g, "-").slice(0, 60),
|
|
||||||
name, url, ...ff,
|
|
||||||
reachLabel: reach?.label, reachMeters: reach?.meters,
|
|
||||||
fiberType: detectFiber(name), wavelength: detectWavelength(name),
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
const seen = new Set<string>();
|
|
||||||
return products.filter((p) => {
|
|
||||||
if (!p.url || seen.has(p.url)) return false;
|
|
||||||
seen.add(p.url);
|
|
||||||
return true;
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function fetchPage(url: string): Promise<string> {
|
async function fetchPage(url: string): Promise<string> {
|
||||||
@ -166,46 +58,108 @@ async function fetchPage(url: string): Promise<string> {
|
|||||||
return resp.text();
|
return resp.text();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function extractProductUrls(html: string): string[] {
|
||||||
|
const urls = new Set<string>();
|
||||||
|
const regex = /href="(https?:\/\/smartoptics\.com\/product\/[^"]+)"/gi;
|
||||||
|
let m: RegExpExecArray | null;
|
||||||
|
while ((m = regex.exec(html)) !== null) {
|
||||||
|
const u = m[1].replace(/\/$/, "") + "/";
|
||||||
|
urls.add(u);
|
||||||
|
}
|
||||||
|
return Array.from(urls);
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ProductData {
|
||||||
|
sku: string;
|
||||||
|
name: string;
|
||||||
|
url: string;
|
||||||
|
imageUrl?: string;
|
||||||
|
formFactor: string;
|
||||||
|
speed: string;
|
||||||
|
speedGbps: number;
|
||||||
|
reachLabel?: string;
|
||||||
|
reachMeters?: number;
|
||||||
|
fiberType: string;
|
||||||
|
coherent: boolean;
|
||||||
|
wdmType?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function scrapeProductPage(url: string): Promise<ProductData | null> {
|
||||||
|
try {
|
||||||
|
const html = await fetchPage(url);
|
||||||
|
|
||||||
|
const nameMatch = html.match(/<h1[^>]*>([^<]+)<\/h1>/) || html.match(/og:title" content="([^"]+)"/);
|
||||||
|
const name = nameMatch ? nameMatch[1].trim().replace(/ \| Smartoptics$/, "") : "";
|
||||||
|
if (!name) return null;
|
||||||
|
|
||||||
|
const sku = url.split("/").filter(Boolean).pop()?.toUpperCase() || name.replace(/\s+/g, "-");
|
||||||
|
|
||||||
|
const imgMatch = html.match(/property="og:image" content="([^"]+)"/)
|
||||||
|
|| html.match(/<img[^>]+src="([^"]*wp-content\/uploads[^"]*\.(?:png|jpg|webp))"[^>]* class="[^"]*product/i);
|
||||||
|
const imageUrl = imgMatch ? imgMatch[1] : undefined;
|
||||||
|
|
||||||
|
const ff = detectFormFactor(name);
|
||||||
|
const reach = detectReach(name);
|
||||||
|
const coherent = /coherent|coh-t|coh\.|dwdm|dp-qpsk|qpsk|cfp2/i.test(name + html.slice(0, 3000));
|
||||||
|
const wdmType = /dwdm/i.test(name) ? "DWDM" : /cwdm/i.test(name) ? "CWDM" : undefined;
|
||||||
|
|
||||||
|
return {
|
||||||
|
sku, name, url, imageUrl,
|
||||||
|
...ff,
|
||||||
|
reachLabel: reach?.label,
|
||||||
|
reachMeters: reach?.meters,
|
||||||
|
fiberType: detectFiber(name),
|
||||||
|
coherent,
|
||||||
|
wdmType,
|
||||||
|
};
|
||||||
|
} catch (err) {
|
||||||
|
console.warn(` Failed ${url}: ${(err as Error).message}`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export async function scrapeSmartOptics(): Promise<void> {
|
export async function scrapeSmartOptics(): Promise<void> {
|
||||||
console.log("=== SmartOptics Scraper Starting (catalog-only, no prices) ===\n");
|
console.log("=== SmartOptics Scraper Starting ===\n");
|
||||||
|
console.log("Note: SmartOptics is B2B — no public prices. Scraping specs + images only.\n");
|
||||||
|
|
||||||
const vendorId = await ensureVendor(
|
const vendorId = await ensureVendor(
|
||||||
"SmartOptics",
|
"SmartOptics",
|
||||||
"manufacturer",
|
"manufacturer",
|
||||||
"https://www.smartoptics.com",
|
"https://www.smartoptics.com",
|
||||||
BASE + CATALOG_URLS[0],
|
"https://smartoptics.com/products/optical-transceivers/"
|
||||||
);
|
);
|
||||||
|
|
||||||
const allProducts: Product[] = [];
|
const productUrls = new Set<string>();
|
||||||
const seenUrls = new Set<string>();
|
for (let page = 1; page <= 10; page++) {
|
||||||
|
|
||||||
for (const catalogPath of CATALOG_URLS) {
|
|
||||||
const catalogUrl = BASE + catalogPath;
|
|
||||||
console.log(` Fetching catalog: ${catalogUrl}`);
|
|
||||||
try {
|
try {
|
||||||
const html = await fetchPage(catalogUrl);
|
const url = page === 1 ? CATALOG_URL : `${CATALOG_URL}page/${page}/`;
|
||||||
const pageProducts = parseProductList(html, catalogUrl);
|
const html = await fetchPage(url);
|
||||||
for (const p of pageProducts) {
|
const urls = extractProductUrls(html);
|
||||||
if (!seenUrls.has(p.url)) {
|
if (urls.length === 0) break;
|
||||||
seenUrls.add(p.url);
|
urls.forEach((u) => productUrls.add(u));
|
||||||
allProducts.push(p);
|
console.log(` Catalog page ${page}: ${urls.length} products`);
|
||||||
}
|
await sleep(1500);
|
||||||
}
|
} catch {
|
||||||
console.log(` Found ${pageProducts.length} products`);
|
break;
|
||||||
await sleep(2000);
|
|
||||||
} catch (err) {
|
|
||||||
console.warn(` Failed ${catalogPath}: ${(err as Error).message}`);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`\nTotal unique products: ${allProducts.length}`);
|
console.log(`\nTotal product URLs: ${productUrls.size}`);
|
||||||
|
if (productUrls.size === 0) {
|
||||||
|
console.log("No products found — site may have changed structure");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
let totalProducts = 0;
|
let saved = 0;
|
||||||
|
let withImages = 0;
|
||||||
|
|
||||||
|
for (const url of productUrls) {
|
||||||
|
const product = await scrapeProductPage(url);
|
||||||
|
if (!product) continue;
|
||||||
|
|
||||||
for (const product of allProducts) {
|
|
||||||
try {
|
try {
|
||||||
await findOrCreateScrapedTransceiver({
|
await findOrCreateScrapedTransceiver({
|
||||||
partNumber: product.partNumber,
|
partNumber: product.sku,
|
||||||
vendorId,
|
vendorId,
|
||||||
formFactor: product.formFactor,
|
formFactor: product.formFactor,
|
||||||
speedGbps: product.speedGbps,
|
speedGbps: product.speedGbps,
|
||||||
@ -213,16 +167,20 @@ export async function scrapeSmartOptics(): Promise<void> {
|
|||||||
reachMeters: product.reachMeters,
|
reachMeters: product.reachMeters,
|
||||||
reachLabel: product.reachLabel,
|
reachLabel: product.reachLabel,
|
||||||
fiberType: product.fiberType,
|
fiberType: product.fiberType,
|
||||||
wavelengths: product.wavelength,
|
wavelengths: product.wdmType ? "DWDM-tunable" : undefined,
|
||||||
category: "DataCenter",
|
category: product.coherent ? "Coherent" : "DataCenter",
|
||||||
|
imageUrl: product.imageUrl,
|
||||||
});
|
});
|
||||||
totalProducts++;
|
saved++;
|
||||||
|
if (product.imageUrl) withImages++;
|
||||||
|
console.log(` ✓ ${product.sku} — ${product.name.slice(0, 60)}`);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.warn(` Error saving ${product.partNumber}: ${(err as Error).message.slice(0, 80)}`);
|
console.warn(` Error saving ${product.sku}: ${(err as Error).message.slice(0, 80)}`);
|
||||||
}
|
}
|
||||||
|
await sleep(1500);
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`\n=== SmartOptics Complete: ${totalProducts} products catalogued (no prices — quote-based) ===`);
|
console.log(`\n=== SmartOptics Complete: ${saved} products, ${withImages} with images ===`);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (require.main === module) {
|
if (require.main === module) {
|
||||||
|
|||||||
540
packages/scraper/src/utils/backfill-images.ts
Normal file
540
packages/scraper/src/utils/backfill-images.ts
Normal file
@ -0,0 +1,540 @@
|
|||||||
|
/**
|
||||||
|
* Image Backfill Script
|
||||||
|
*
|
||||||
|
* Fills `image_url` for transceivers that currently have none.
|
||||||
|
* Priority order:
|
||||||
|
* 1. Optcore (2,580 products) — WP REST API featured_media per product
|
||||||
|
* 2. Flexoptix (344 products) — Magento GraphQL image fields
|
||||||
|
* 3. GAO Tek (413 products) — product page og:image scrape
|
||||||
|
* 4. Other vendors — og:image from stored price_observations URLs
|
||||||
|
*
|
||||||
|
* Run on Erik:
|
||||||
|
* node packages/scraper/dist/utils/backfill-images.js
|
||||||
|
*
|
||||||
|
* Or locally with SSH port-forward:
|
||||||
|
* ssh -L 5433:127.0.0.1:5433 erik -N &
|
||||||
|
* node dist/utils/backfill-images.js
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { pool } from "./db";
|
||||||
|
import { logger } from "./logger";
|
||||||
|
|
||||||
|
function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise((r) => setTimeout(r, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function updateImageUrl(id: string, imageUrl: string): Promise<void> {
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE transceivers SET image_url = $1, image_scraped_at = NOW(), has_image = TRUE WHERE id = $2`,
|
||||||
|
[imageUrl, id]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchJson(url: string, init?: RequestInit): Promise<unknown> {
|
||||||
|
const resp = await fetch(url, {
|
||||||
|
...init,
|
||||||
|
headers: {
|
||||||
|
"User-Agent": "Mozilla/5.0 (compatible; TIP-ImageBackfill/1.0)",
|
||||||
|
Accept: "application/json",
|
||||||
|
...(init?.headers ?? {}),
|
||||||
|
},
|
||||||
|
signal: AbortSignal.timeout(20000),
|
||||||
|
});
|
||||||
|
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
|
||||||
|
return resp.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchHtml(url: string): Promise<string> {
|
||||||
|
const resp = await fetch(url, {
|
||||||
|
headers: {
|
||||||
|
"User-Agent": "Mozilla/5.0 (compatible; TIP-ImageBackfill/1.0)",
|
||||||
|
Accept: "text/html",
|
||||||
|
},
|
||||||
|
signal: AbortSignal.timeout(20000),
|
||||||
|
});
|
||||||
|
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
|
||||||
|
return resp.text();
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Optcore
|
||||||
|
// Strategy:
|
||||||
|
// Step 1 — enumerate all products per category, collect slug -> featured_media id
|
||||||
|
// Step 2 — resolve unique media IDs individually via /wp-json/wp/v2/media/<id>
|
||||||
|
// Step 3 — update DB for matched slugs
|
||||||
|
//
|
||||||
|
// Note: _embed does NOT work for the "product" custom post type on Optcore.
|
||||||
|
// We must resolve media IDs separately.
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
const OPTCORE_BASE = "https://www.optcore.net";
|
||||||
|
const OPTCORE_CATEGORY_IDS = [
|
||||||
|
309, 173, 76, 79, 73, 311, 313, 312, 333, 1088,
|
||||||
|
59, 1102, 4097, 77, 4101, 4092, 6441,
|
||||||
|
];
|
||||||
|
|
||||||
|
interface OptcoreWpProductRaw {
|
||||||
|
slug: string;
|
||||||
|
featured_media: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface OptcoreMediaRaw {
|
||||||
|
id: number;
|
||||||
|
source_url: string;
|
||||||
|
media_details?: {
|
||||||
|
sizes?: {
|
||||||
|
medium?: { source_url: string };
|
||||||
|
large?: { source_url: string };
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchOptcoreMediaUrl(mediaId: number): Promise<string | null> {
|
||||||
|
if (mediaId === 0) return null;
|
||||||
|
try {
|
||||||
|
const data = await fetchJson(
|
||||||
|
`${OPTCORE_BASE}/wp-json/wp/v2/media/${mediaId}?_fields=id,source_url,media_details`
|
||||||
|
) as OptcoreMediaRaw;
|
||||||
|
return (
|
||||||
|
data.media_details?.sizes?.medium?.source_url ||
|
||||||
|
data.media_details?.sizes?.large?.source_url ||
|
||||||
|
data.source_url ||
|
||||||
|
null
|
||||||
|
);
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function backfillOptcore(): Promise<{ updated: number; skipped: number; notFound: number }> {
|
||||||
|
logger.info("=== Optcore image backfill starting ===");
|
||||||
|
|
||||||
|
const rows = await pool.query(`
|
||||||
|
SELECT t.id, t.part_number
|
||||||
|
FROM transceivers t
|
||||||
|
JOIN vendors v ON t.vendor_id = v.id
|
||||||
|
WHERE v.name = 'Optcore' AND (t.image_url IS NULL OR t.image_url = '')
|
||||||
|
`);
|
||||||
|
|
||||||
|
// Map: part_number (lowercase) -> DB id
|
||||||
|
const byPartNumber = new Map<string, string>();
|
||||||
|
for (const row of rows.rows) {
|
||||||
|
byPartNumber.set((row.part_number as string).toLowerCase(), row.id as string);
|
||||||
|
}
|
||||||
|
logger.info(`Optcore: ${byPartNumber.size} products need images`);
|
||||||
|
|
||||||
|
if (byPartNumber.size === 0) {
|
||||||
|
return { updated: 0, skipped: 0, notFound: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 1: enumerate all products across all transceiver categories
|
||||||
|
const slugToMediaId = new Map<string, number>();
|
||||||
|
const seenSlugs = new Set<string>();
|
||||||
|
|
||||||
|
for (const catId of OPTCORE_CATEGORY_IDS) {
|
||||||
|
let page = 1;
|
||||||
|
while (true) {
|
||||||
|
const apiUrl = `${OPTCORE_BASE}/wp-json/wp/v2/product?product_cat=${catId}&per_page=100&page=${page}&_fields=slug,featured_media`;
|
||||||
|
try {
|
||||||
|
const resp = await fetch(apiUrl, {
|
||||||
|
headers: { "User-Agent": "Mozilla/5.0 (compatible; TIP-ImageBackfill/1.0)" },
|
||||||
|
signal: AbortSignal.timeout(20000),
|
||||||
|
});
|
||||||
|
if (!resp.ok) break;
|
||||||
|
const totalPages = parseInt(resp.headers.get("X-WP-TotalPages") || "1", 10);
|
||||||
|
const products = (await resp.json()) as OptcoreWpProductRaw[];
|
||||||
|
|
||||||
|
for (const p of products) {
|
||||||
|
if (seenSlugs.has(p.slug)) continue;
|
||||||
|
seenSlugs.add(p.slug);
|
||||||
|
if (p.featured_media) {
|
||||||
|
slugToMediaId.set(p.slug.toLowerCase(), p.featured_media);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (page >= totalPages) break;
|
||||||
|
page++;
|
||||||
|
} catch (err) {
|
||||||
|
logger.warn(`Optcore cat=${catId} page=${page} fetch failed`, { error: (err as Error).message });
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
await sleep(100);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(`Optcore: enumerated ${seenSlugs.size} WP products, ${slugToMediaId.size} have featured media`);
|
||||||
|
|
||||||
|
// Step 2: resolve unique media IDs
|
||||||
|
const uniqueMediaIds = new Set(slugToMediaId.values());
|
||||||
|
logger.info(`Optcore: resolving ${uniqueMediaIds.size} unique media IDs`);
|
||||||
|
|
||||||
|
const mediaIdToUrl = new Map<number, string>();
|
||||||
|
let resolved = 0;
|
||||||
|
for (const mediaId of uniqueMediaIds) {
|
||||||
|
const url = await fetchOptcoreMediaUrl(mediaId);
|
||||||
|
if (url) mediaIdToUrl.set(mediaId, url);
|
||||||
|
resolved++;
|
||||||
|
if (resolved % 20 === 0) {
|
||||||
|
logger.info(`Optcore media resolution: ${resolved}/${uniqueMediaIds.size}`);
|
||||||
|
}
|
||||||
|
await sleep(100);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 3: match slugs to DB records and update
|
||||||
|
let updated = 0;
|
||||||
|
let skipped = 0;
|
||||||
|
let notFound = 0;
|
||||||
|
|
||||||
|
for (const [slug, mediaId] of slugToMediaId) {
|
||||||
|
const dbId = byPartNumber.get(slug);
|
||||||
|
if (!dbId) {
|
||||||
|
notFound++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const imgUrl = mediaIdToUrl.get(mediaId);
|
||||||
|
if (!imgUrl) {
|
||||||
|
skipped++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
await updateImageUrl(dbId, imgUrl);
|
||||||
|
byPartNumber.delete(slug);
|
||||||
|
updated++;
|
||||||
|
if (updated % 50 === 0) {
|
||||||
|
logger.info(`Optcore DB updates: ${updated} so far`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(`Optcore done: ${updated} updated, ${skipped} no-media-url, ${notFound} slug-not-in-db`);
|
||||||
|
return { updated, skipped, notFound };
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Flexoptix
|
||||||
|
// Magento GraphQL: /graphql — query by SKU, returns small_image.url
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
const FLEXOPTIX_BASE = "https://www.flexoptix.net";
|
||||||
|
const FLEXOPTIX_GRAPHQL = `${FLEXOPTIX_BASE}/graphql`;
|
||||||
|
|
||||||
|
interface FlexoptixGqlProduct {
|
||||||
|
name: string;
|
||||||
|
sku: string;
|
||||||
|
url_key: string;
|
||||||
|
small_image: { url: string } | null;
|
||||||
|
image: { url: string } | null;
|
||||||
|
thumbnail: { url: string } | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchFlexoptixImage(sku: string): Promise<string | null> {
|
||||||
|
// Strip ":Sx" coding suffix to get canonical SKU for search
|
||||||
|
const canonicalSku = sku.replace(/:.*$/, "").trim();
|
||||||
|
|
||||||
|
const query = `{
|
||||||
|
products(search: ${JSON.stringify(canonicalSku)}, pageSize: 5) {
|
||||||
|
items {
|
||||||
|
name
|
||||||
|
sku
|
||||||
|
url_key
|
||||||
|
small_image { url }
|
||||||
|
image { url }
|
||||||
|
thumbnail { url }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}`;
|
||||||
|
|
||||||
|
const data = await fetchJson(FLEXOPTIX_GRAPHQL, {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({ query }),
|
||||||
|
}) as {
|
||||||
|
data?: { products?: { items: FlexoptixGqlProduct[] } };
|
||||||
|
};
|
||||||
|
|
||||||
|
const items = data.data?.products?.items ?? [];
|
||||||
|
|
||||||
|
// Prefer exact SKU match (canonical, case-insensitive)
|
||||||
|
for (const item of items) {
|
||||||
|
const itemSku = item.sku.replace(/:.*$/, "").trim().toLowerCase();
|
||||||
|
if (itemSku === canonicalSku.toLowerCase()) {
|
||||||
|
return (
|
||||||
|
item.small_image?.url ||
|
||||||
|
item.image?.url ||
|
||||||
|
item.thumbnail?.url ||
|
||||||
|
null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: single result
|
||||||
|
if (items.length === 1) {
|
||||||
|
return (
|
||||||
|
items[0].small_image?.url ||
|
||||||
|
items[0].image?.url ||
|
||||||
|
items[0].thumbnail?.url ||
|
||||||
|
null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function backfillFlexoptix(): Promise<{ updated: number; skipped: number; errors: number }> {
|
||||||
|
logger.info("=== Flexoptix image backfill starting ===");
|
||||||
|
|
||||||
|
const rows = await pool.query(`
|
||||||
|
SELECT t.id, t.part_number
|
||||||
|
FROM transceivers t
|
||||||
|
JOIN vendors v ON t.vendor_id = v.id
|
||||||
|
WHERE v.name = 'FLEXOPTIX' AND (t.image_url IS NULL OR t.image_url = '')
|
||||||
|
ORDER BY t.part_number
|
||||||
|
`);
|
||||||
|
|
||||||
|
logger.info(`Flexoptix: ${rows.rows.length} products need images`);
|
||||||
|
|
||||||
|
let updated = 0;
|
||||||
|
let skipped = 0;
|
||||||
|
let errors = 0;
|
||||||
|
|
||||||
|
for (const row of rows.rows) {
|
||||||
|
const sku = row.part_number as string;
|
||||||
|
try {
|
||||||
|
const imgUrl = await fetchFlexoptixImage(sku);
|
||||||
|
if (imgUrl) {
|
||||||
|
await updateImageUrl(row.id as string, imgUrl);
|
||||||
|
updated++;
|
||||||
|
if (updated % 25 === 0) {
|
||||||
|
logger.info(`Flexoptix progress: ${updated} updated so far`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
skipped++;
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
logger.error(`Flexoptix error for ${sku}`, { error: (err as Error).message });
|
||||||
|
errors++;
|
||||||
|
}
|
||||||
|
await sleep(100);
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(`Flexoptix done: ${updated} updated, ${skipped} no-image, ${errors} errors`);
|
||||||
|
return { updated, skipped, errors };
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// GAO Tek
|
||||||
|
// Fetch product page and extract og:image meta tag
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
const GAOTEK_BASE = "https://gaotek.com";
|
||||||
|
|
||||||
|
function extractOgImage(html: string): string | null {
|
||||||
|
const match =
|
||||||
|
html.match(/<meta[^>]+property=["']og:image["'][^>]+content=["']([^"']+)["']/i) ||
|
||||||
|
html.match(/<meta[^>]+content=["']([^"']+)["'][^>]+property=["']og:image["']/i);
|
||||||
|
if (!match) return null;
|
||||||
|
const url = match[1];
|
||||||
|
// Skip placeholder and logo images
|
||||||
|
if (url.includes("placeholder") || url.includes("logo") || url.includes("mobilelogo")) return null;
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchGaoTekImage(productUrl: string): Promise<string | null> {
|
||||||
|
const html = await fetchHtml(productUrl);
|
||||||
|
return extractOgImage(html);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function backfillGaoTek(): Promise<{ updated: number; skipped: number; errors: number }> {
|
||||||
|
logger.info("=== GAO Tek image backfill starting ===");
|
||||||
|
|
||||||
|
// Prefer records that have stored product URLs in price_observations
|
||||||
|
const withUrlRows = await pool.query(`
|
||||||
|
SELECT DISTINCT ON (t.id)
|
||||||
|
t.id, t.part_number, t.slug, po.url as product_url
|
||||||
|
FROM transceivers t
|
||||||
|
JOIN vendors v ON t.vendor_id = v.id
|
||||||
|
LEFT JOIN price_observations po ON po.transceiver_id = t.id
|
||||||
|
WHERE v.name = 'GAO Tek'
|
||||||
|
AND (t.image_url IS NULL OR t.image_url = '')
|
||||||
|
AND po.url IS NOT NULL
|
||||||
|
AND po.url LIKE '%gaotek%'
|
||||||
|
ORDER BY t.id, po.time DESC
|
||||||
|
`);
|
||||||
|
|
||||||
|
// All records without images
|
||||||
|
const allRows = await pool.query(`
|
||||||
|
SELECT t.id, t.part_number, t.slug
|
||||||
|
FROM transceivers t
|
||||||
|
JOIN vendors v ON t.vendor_id = v.id
|
||||||
|
WHERE v.name = 'GAO Tek'
|
||||||
|
AND (t.image_url IS NULL OR t.image_url = '')
|
||||||
|
`);
|
||||||
|
|
||||||
|
const withUrlIds = new Set(withUrlRows.rows.map((r) => r.id as string));
|
||||||
|
|
||||||
|
// Combine: known URLs first, then slug-derived URLs
|
||||||
|
const toProcess: Array<{ id: string; partNumber: string; productUrl: string }> = [];
|
||||||
|
|
||||||
|
for (const row of withUrlRows.rows) {
|
||||||
|
toProcess.push({
|
||||||
|
id: row.id as string,
|
||||||
|
partNumber: row.part_number as string,
|
||||||
|
productUrl: row.product_url as string,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const row of allRows.rows) {
|
||||||
|
if (withUrlIds.has(row.id as string)) continue;
|
||||||
|
const rawSlug = (row.slug as string).replace(/^scraped-/, "");
|
||||||
|
toProcess.push({
|
||||||
|
id: row.id as string,
|
||||||
|
partNumber: row.part_number as string,
|
||||||
|
productUrl: `${GAOTEK_BASE}/product/${rawSlug}/`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(`GAO Tek: ${toProcess.length} products to attempt`);
|
||||||
|
|
||||||
|
let updated = 0;
|
||||||
|
let skipped = 0;
|
||||||
|
let errors = 0;
|
||||||
|
|
||||||
|
for (const item of toProcess) {
|
||||||
|
try {
|
||||||
|
const imgUrl = await fetchGaoTekImage(item.productUrl);
|
||||||
|
if (imgUrl) {
|
||||||
|
await updateImageUrl(item.id, imgUrl);
|
||||||
|
updated++;
|
||||||
|
if (updated % 25 === 0) {
|
||||||
|
logger.info(`GAO Tek progress: ${updated} updated so far`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
skipped++;
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
const msg = (err as Error).message;
|
||||||
|
if (!msg.includes("HTTP 404") && !msg.includes("HTTP 403")) {
|
||||||
|
logger.warn(`GAO Tek error for ${item.productUrl}`, { error: msg.slice(0, 80) });
|
||||||
|
}
|
||||||
|
errors++;
|
||||||
|
}
|
||||||
|
await sleep(100);
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(`GAO Tek done: ${updated} updated, ${skipped} no-image, ${errors} errors/404s`);
|
||||||
|
return { updated, skipped, errors };
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Other vendors — og:image from stored price_observations URLs
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
const OTHER_VENDOR_NAMES = [
|
||||||
|
"T&S Communication",
|
||||||
|
"Ascent Optics",
|
||||||
|
"ATGBICS",
|
||||||
|
"Skylane Optics",
|
||||||
|
"SmartOptics",
|
||||||
|
"ProLabs",
|
||||||
|
"FS.COM",
|
||||||
|
"GBICS",
|
||||||
|
"Fluxlight",
|
||||||
|
];
|
||||||
|
|
||||||
|
async function backfillOtherVendors(): Promise<{ total: number; updated: number }> {
|
||||||
|
logger.info("=== Other vendors og:image backfill starting ===");
|
||||||
|
|
||||||
|
const rows = await pool.query(`
|
||||||
|
SELECT DISTINCT ON (t.id)
|
||||||
|
t.id, t.part_number, v.name as vendor_name, po.url as product_url
|
||||||
|
FROM transceivers t
|
||||||
|
JOIN vendors v ON t.vendor_id = v.id
|
||||||
|
JOIN price_observations po ON po.transceiver_id = t.id
|
||||||
|
WHERE v.name = ANY($1)
|
||||||
|
AND (t.image_url IS NULL OR t.image_url = '')
|
||||||
|
AND po.url IS NOT NULL
|
||||||
|
AND po.url ~ '^https?://'
|
||||||
|
ORDER BY t.id, po.time DESC
|
||||||
|
`, [OTHER_VENDOR_NAMES]);
|
||||||
|
|
||||||
|
logger.info(`Other vendors: ${rows.rows.length} products with URLs to process`);
|
||||||
|
|
||||||
|
let updated = 0;
|
||||||
|
|
||||||
|
for (const row of rows.rows) {
|
||||||
|
const productUrl = row.product_url as string;
|
||||||
|
try {
|
||||||
|
const html = await fetchHtml(productUrl);
|
||||||
|
const imgUrl = extractOgImage(html);
|
||||||
|
if (imgUrl) {
|
||||||
|
await updateImageUrl(row.id as string, imgUrl);
|
||||||
|
updated++;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Skip 404s, timeouts, etc. silently
|
||||||
|
}
|
||||||
|
await sleep(100);
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(`Other vendors done: ${updated} / ${rows.rows.length} updated`);
|
||||||
|
return { total: rows.rows.length, updated };
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Main
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
async function main(): Promise<void> {
|
||||||
|
logger.info("=== TIP Image Backfill Script ===");
|
||||||
|
logger.info(`DB: ${process.env.POSTGRES_HOST ?? "localhost"}:${process.env.POSTGRES_PORT ?? "5433"}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
await pool.query("SELECT 1");
|
||||||
|
logger.info("DB connection OK");
|
||||||
|
} catch (err) {
|
||||||
|
logger.error("DB connection failed", { error: (err as Error).message });
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const startTime = Date.now();
|
||||||
|
const results: Record<string, unknown> = {};
|
||||||
|
|
||||||
|
try {
|
||||||
|
results.optcore = await backfillOptcore();
|
||||||
|
} catch (err) {
|
||||||
|
logger.error("Optcore backfill failed", { error: (err as Error).message });
|
||||||
|
results.optcore = { error: (err as Error).message };
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
results.flexoptix = await backfillFlexoptix();
|
||||||
|
} catch (err) {
|
||||||
|
logger.error("Flexoptix backfill failed", { error: (err as Error).message });
|
||||||
|
results.flexoptix = { error: (err as Error).message };
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
results.gaotek = await backfillGaoTek();
|
||||||
|
} catch (err) {
|
||||||
|
logger.error("GAO Tek backfill failed", { error: (err as Error).message });
|
||||||
|
results.gaotek = { error: (err as Error).message };
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
results.others = await backfillOtherVendors();
|
||||||
|
} catch (err) {
|
||||||
|
logger.error("Other vendors backfill failed", { error: (err as Error).message });
|
||||||
|
results.others = { error: (err as Error).message };
|
||||||
|
}
|
||||||
|
|
||||||
|
const elapsedSec = ((Date.now() - startTime) / 1000).toFixed(1);
|
||||||
|
logger.info("=== Backfill complete ===", { results, elapsedSec });
|
||||||
|
}
|
||||||
|
|
||||||
|
main()
|
||||||
|
.then(() => pool.end())
|
||||||
|
.catch((err) => {
|
||||||
|
logger.error("Fatal error", { error: (err as Error).message });
|
||||||
|
pool.end();
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
@ -10,7 +10,9 @@ export const pool = new Pool({
|
|||||||
database: process.env.POSTGRES_DB || "transceiver_db",
|
database: process.env.POSTGRES_DB || "transceiver_db",
|
||||||
user: process.env.POSTGRES_USER || "tip",
|
user: process.env.POSTGRES_USER || "tip",
|
||||||
password: process.env.POSTGRES_PASSWORD || "tip_dev_2026",
|
password: process.env.POSTGRES_PASSWORD || "tip_dev_2026",
|
||||||
max: 10,
|
max: 5,
|
||||||
|
idleTimeoutMillis: 10000,
|
||||||
|
connectionTimeoutMillis: 5000,
|
||||||
});
|
});
|
||||||
|
|
||||||
// Alias — some scrapers import { db } instead of { pool }
|
// Alias — some scrapers import { db } instead of { pool }
|
||||||
@ -54,6 +56,11 @@ export async function upsertPriceObservation(params: {
|
|||||||
params.contentHash,
|
params.contentHash,
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
// Mark the transceiver as price-verified whenever we successfully record a price
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE transceivers SET price_verified = true WHERE id = $1 AND (price_verified IS NULL OR price_verified = false)`,
|
||||||
|
[params.transceiverId]
|
||||||
|
);
|
||||||
return true; // New observation written
|
return true; // New observation written
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -68,23 +75,31 @@ export async function findOrCreateScrapedTransceiver(params: {
|
|||||||
fiberType?: string;
|
fiberType?: string;
|
||||||
wavelengths?: string;
|
wavelengths?: string;
|
||||||
category?: string;
|
category?: string;
|
||||||
|
imageUrl?: string;
|
||||||
}): Promise<string> {
|
}): Promise<string> {
|
||||||
// Try to match existing transceiver by part number + vendor
|
// Try to match existing transceiver by part number + vendor
|
||||||
const existing = await pool.query(
|
const existing = await pool.query(
|
||||||
`SELECT id FROM transceivers WHERE part_number = $1 AND vendor_id = $2`,
|
`SELECT id, image_url FROM transceivers WHERE part_number = $1 AND vendor_id = $2`,
|
||||||
[params.partNumber, params.vendorId]
|
[params.partNumber, params.vendorId]
|
||||||
);
|
);
|
||||||
|
|
||||||
if (existing.rows.length > 0) {
|
if (existing.rows.length > 0) {
|
||||||
|
// Update image_url and image_verified if we have a new image for a record without one
|
||||||
|
if (params.imageUrl && !existing.rows[0].image_url) {
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE transceivers SET image_url = $1, image_verified = true, updated_at = NOW() WHERE id = $2`,
|
||||||
|
[params.imageUrl, existing.rows[0].id]
|
||||||
|
);
|
||||||
|
}
|
||||||
return existing.rows[0].id;
|
return existing.rows[0].id;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create new transceiver entry
|
// Create new transceiver entry
|
||||||
const slug = `scraped-${params.partNumber.toLowerCase().replace(/[^a-z0-9]+/g, "-")}`;
|
const slug = `scraped-${params.partNumber.toLowerCase().replace(/[^a-z0-9]+/g, "-")}`;
|
||||||
const result = await pool.query(
|
const result = await pool.query(
|
||||||
`INSERT INTO transceivers (slug, part_number, vendor_id, form_factor, speed_gbps, speed, reach_meters, reach_label, fiber_type, wavelengths, category, market_status)
|
`INSERT INTO transceivers (slug, part_number, vendor_id, form_factor, speed_gbps, speed, reach_meters, reach_label, fiber_type, wavelengths, category, market_status, image_url, image_verified)
|
||||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, 'Mainstream')
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, 'Mainstream', $12, $13)
|
||||||
ON CONFLICT (slug) DO UPDATE SET updated_at = NOW()
|
ON CONFLICT (slug) DO UPDATE SET image_url = COALESCE(transceivers.image_url, EXCLUDED.image_url), image_verified = COALESCE(transceivers.image_verified, EXCLUDED.image_verified), updated_at = NOW()
|
||||||
RETURNING id`,
|
RETURNING id`,
|
||||||
[
|
[
|
||||||
slug,
|
slug,
|
||||||
@ -98,6 +113,8 @@ export async function findOrCreateScrapedTransceiver(params: {
|
|||||||
params.fiberType || "",
|
params.fiberType || "",
|
||||||
params.wavelengths || "",
|
params.wavelengths || "",
|
||||||
params.category || "DataCenter",
|
params.category || "DataCenter",
|
||||||
|
params.imageUrl || null,
|
||||||
|
params.imageUrl ? true : null,
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
return result.rows[0].id;
|
return result.rows[0].id;
|
||||||
|
|||||||
77
packages/scraper/src/utils/proxy.ts
Normal file
77
packages/scraper/src/utils/proxy.ts
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
/**
|
||||||
|
* Proxy utility — fetches next available proxy from TIP proxy pool.
|
||||||
|
* Falls back to process.env.PROXY_URL if pool is empty or unreachable.
|
||||||
|
*/
|
||||||
|
import * as https from "https";
|
||||||
|
import * as http from "http";
|
||||||
|
|
||||||
|
const TIP_API = process.env.TIP_API_URL ?? "https://transceiver-db.context-x.org";
|
||||||
|
const FALLBACK = process.env.PROXY_URL ?? "";
|
||||||
|
|
||||||
|
interface ProxyResponse {
|
||||||
|
success: boolean;
|
||||||
|
host: string;
|
||||||
|
port: number;
|
||||||
|
protocol: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getJson(url: string): Promise<unknown> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const parsed = new URL(url);
|
||||||
|
const mod = parsed.protocol === "https:" ? https : http;
|
||||||
|
|
||||||
|
const req = mod.request(
|
||||||
|
{
|
||||||
|
hostname: parsed.hostname,
|
||||||
|
port: parsed.port || (parsed.protocol === "https:" ? 443 : 80),
|
||||||
|
path: parsed.pathname + parsed.search,
|
||||||
|
method: "GET",
|
||||||
|
headers: { "User-Agent": "TIP-Scraper/1.0.0" },
|
||||||
|
timeout: 5_000,
|
||||||
|
},
|
||||||
|
(res) => {
|
||||||
|
const chunks: Buffer[] = [];
|
||||||
|
res.on("data", (c: Buffer) => chunks.push(c));
|
||||||
|
res.on("end", () => {
|
||||||
|
try {
|
||||||
|
resolve(JSON.parse(Buffer.concat(chunks).toString()));
|
||||||
|
} catch {
|
||||||
|
reject(new Error("Invalid JSON response"));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
req.on("timeout", () => { req.destroy(); reject(new Error("timeout")); });
|
||||||
|
req.on("error", reject);
|
||||||
|
req.end();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the next available proxy URL from the TIP proxy pool.
|
||||||
|
* Format: "socks5://host:port" or "http://host:port"
|
||||||
|
* Returns undefined if no proxy is available (direct connection).
|
||||||
|
*/
|
||||||
|
export async function getProxyUrl(): Promise<string | undefined> {
|
||||||
|
// Try proxy pool first
|
||||||
|
try {
|
||||||
|
const data = await getJson(`${TIP_API}/api/proxy/next`) as ProxyResponse;
|
||||||
|
if (data?.success && data.host && data.port) {
|
||||||
|
const protocol = data.protocol ?? "socks5";
|
||||||
|
const proxyUrl = `${protocol}://${data.host}:${data.port}`;
|
||||||
|
console.log(`[proxy] Using TIP pool node: ${data.host}:${data.port} (${protocol})`);
|
||||||
|
return proxyUrl;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Pool unavailable — fall through to env fallback
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to env var
|
||||||
|
if (FALLBACK) {
|
||||||
|
console.log(`[proxy] Using env PROXY_URL fallback`);
|
||||||
|
return FALLBACK;
|
||||||
|
}
|
||||||
|
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user