Compare commits
3 Commits
b612c5801d
...
71d6c20b5e
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
71d6c20b5e | ||
|
|
93dd7c2e23 | ||
|
|
edc9311d7b |
@ -18,6 +18,10 @@ Types: FEAT · FIX · UI · DATA · AI · INFRA
|
||||
{"d":"2026-04-02","t":"FEAT","m":"Forecast Engine: weighted demand index (0-100) from 6 signal types — capex 0.30, ai_clusters 0.25, ebay_velocity 0.20, lead_times 0.15, github 0.06, standards 0.04 — 3/9/12/18 month horizons for 5 technologies"}
|
||||
{"d":"2026-04-02","t":"FEAT","m":"NAS sync: datasheet/manual download — PDFs from product_documents organized into switches/transceivers/whitepapers/other"}
|
||||
{"d":"2026-04-02","t":"INFRA","m":"Scheduler: 50 total pg-boss jobs — 8 new prediction/forecast jobs with cron schedules"}
|
||||
{"d":"2026-04-03","t":"FEAT","m":"TIP Proxy Network: residential proxy pool — contributor nodes donate bandwidth, SOCKS5 server (Node.js net only), register/heartbeat/next/rotate/stats API, round-robin routing with uptime+latency scoring"}
|
||||
{"d":"2026-04-03","t":"FEAT","m":"@tip/proxy-agent: standalone CLI package — tip-agent start/status/stop, configurable bandwidth cap, 30s heartbeat, graceful shutdown"}
|
||||
{"d":"2026-04-03","t":"UI","m":"Dashboard Network tab: node stats, join-the-network card with token generator, install command box, country breakdown table"}
|
||||
{"d":"2026-04-03","t":"INFRA","m":"Mac Studio home node: tip-agent running on 192.168.178.213:1081, PROXY_URL=socks5://192.168.178.213:1081 set in PM2 env for scraper+api, ProLabs WAF bypass now active"}
|
||||
{"d":"2026-04-01","t":"FEAT","m":"Product Intelligence Layer (migration 020): product_issues table (forum/community bugs), condition+marketplace on price_observations, features JSONB on switches+transceivers"}
|
||||
{"d":"2026-04-01","t":"FEAT","m":"eBay Enricher: scrapes eBay.de for switch/transceiver listings — extracts features, description, refurbished prices, images — nightly via pg-boss"}
|
||||
{"d":"2026-04-01","t":"FEAT","m":"Community Issues Scraper: extracts known bugs/incompatibilities from Reddit, ServeTheHome, Arista Community, Cisco Community, NetworkEngineering SE"}
|
||||
|
||||
306
packages/api/src/routes/proxy.ts
Normal file
306
packages/api/src/routes/proxy.ts
Normal file
@ -0,0 +1,306 @@
|
||||
/**
|
||||
* Proxy Network API routes
|
||||
*
|
||||
* POST /api/proxy/register — register new node, returns token
|
||||
* POST /api/proxy/heartbeat — update node status + stats
|
||||
* GET /api/proxy/nodes — list all nodes (admin)
|
||||
* GET /api/proxy/next — get best available proxy for scraper use
|
||||
* POST /api/proxy/rotate — mark node as used, get next
|
||||
* GET /api/proxy/stats — network-wide stats
|
||||
*
|
||||
* Note: register + heartbeat are public (no auth required).
|
||||
* nodes + rotate require admin token via X-Admin-Token header.
|
||||
*/
|
||||
import { Router, Request, Response } from "express";
|
||||
import * as crypto from "crypto";
|
||||
import { pool } from "../db/client";
|
||||
|
||||
export const proxyRouter = Router();
|
||||
|
||||
const ADMIN_TOKEN = process.env.TIP_ADMIN_TOKEN ?? "tip-admin-2026";
|
||||
|
||||
function requireAdmin(req: Request, res: Response): boolean {
|
||||
const token =
|
||||
req.headers["x-admin-token"] ??
|
||||
req.query["admin_token"] ??
|
||||
"";
|
||||
if (token !== ADMIN_TOKEN) {
|
||||
res.status(401).json({ error: "Admin token required" });
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
function generateToken(): string {
|
||||
return crypto.randomBytes(32).toString("hex");
|
||||
}
|
||||
|
||||
// Round-robin index tracker (in-memory, resets on restart)
|
||||
let rrIndex = 0;
|
||||
|
||||
// POST /api/proxy/register
|
||||
proxyRouter.post("/register", async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { token: existingToken, name, owner_email, port, version } = req.body as {
|
||||
token?: string;
|
||||
name?: string;
|
||||
owner_email?: string;
|
||||
port?: number;
|
||||
version?: string;
|
||||
};
|
||||
|
||||
const ip =
|
||||
(req.headers["x-forwarded-for"] as string | undefined)?.split(",")[0]?.trim() ??
|
||||
req.socket.remoteAddress ??
|
||||
"";
|
||||
|
||||
// If token provided, upsert — otherwise create new
|
||||
const token = existingToken ?? generateToken();
|
||||
|
||||
const result = await pool.query(
|
||||
`INSERT INTO proxy_nodes (token, name, owner_email, ip, port, version, status, last_seen)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, 'online', NOW())
|
||||
ON CONFLICT (token) DO UPDATE SET
|
||||
name = COALESCE(EXCLUDED.name, proxy_nodes.name),
|
||||
ip = EXCLUDED.ip,
|
||||
port = COALESCE(EXCLUDED.port, proxy_nodes.port),
|
||||
version = COALESCE(EXCLUDED.version, proxy_nodes.version),
|
||||
status = 'online',
|
||||
last_seen = NOW()
|
||||
RETURNING id, token, name, status`,
|
||||
[token, name ?? null, owner_email ?? null, ip, port ?? 1080, version ?? null]
|
||||
);
|
||||
|
||||
const node = result.rows[0];
|
||||
res.json({
|
||||
success: true,
|
||||
token: node.token,
|
||||
nodeId: node.id,
|
||||
message: "Node registered. Keep this token safe!",
|
||||
});
|
||||
} catch (err) {
|
||||
console.error("[proxy] register error:", err);
|
||||
res.status(500).json({ error: "Registration failed" });
|
||||
}
|
||||
});
|
||||
|
||||
// POST /api/proxy/heartbeat
|
||||
proxyRouter.post("/heartbeat", async (req: Request, res: Response) => {
|
||||
try {
|
||||
const {
|
||||
token,
|
||||
ip,
|
||||
port,
|
||||
bytesProxied,
|
||||
requestsProxied,
|
||||
latencyMs,
|
||||
version,
|
||||
} = req.body as {
|
||||
token: string;
|
||||
ip?: string;
|
||||
port?: number;
|
||||
bytesProxied?: number;
|
||||
requestsProxied?: number;
|
||||
latencyMs?: number;
|
||||
version?: string;
|
||||
};
|
||||
|
||||
if (!token) {
|
||||
res.status(400).json({ error: "token required" });
|
||||
return;
|
||||
}
|
||||
|
||||
const remoteIp =
|
||||
ip ??
|
||||
(req.headers["x-forwarded-for"] as string | undefined)?.split(",")[0]?.trim() ??
|
||||
req.socket.remoteAddress ??
|
||||
"";
|
||||
|
||||
const bytesGb = (bytesProxied ?? 0) / (1024 ** 3);
|
||||
|
||||
const result = await pool.query(
|
||||
`UPDATE proxy_nodes SET
|
||||
status = 'online',
|
||||
last_seen = NOW(),
|
||||
ip = COALESCE($2, ip),
|
||||
port = COALESCE($3, port),
|
||||
bytes_proxied = COALESCE($4, bytes_proxied),
|
||||
requests_proxied = COALESCE($5, requests_proxied),
|
||||
latency_ms = COALESCE($6, latency_ms),
|
||||
version = COALESCE($7, version),
|
||||
bandwidth_used_gb = COALESCE($8, bandwidth_used_gb)
|
||||
WHERE token = $1
|
||||
RETURNING id, status`,
|
||||
[
|
||||
token,
|
||||
remoteIp || null,
|
||||
port ?? null,
|
||||
bytesProxied ?? null,
|
||||
requestsProxied ?? null,
|
||||
latencyMs ?? null,
|
||||
version ?? null,
|
||||
bytesGb > 0 ? bytesGb : null,
|
||||
]
|
||||
);
|
||||
|
||||
if (result.rowCount === 0) {
|
||||
res.status(404).json({ error: "Node not found. Please register first." });
|
||||
return;
|
||||
}
|
||||
|
||||
res.json({ success: true, status: "online" });
|
||||
} catch (err) {
|
||||
console.error("[proxy] heartbeat error:", err);
|
||||
res.status(500).json({ error: "Heartbeat failed" });
|
||||
}
|
||||
});
|
||||
|
||||
// GET /api/proxy/nodes (admin)
|
||||
proxyRouter.get("/nodes", async (req: Request, res: Response) => {
|
||||
if (!requireAdmin(req, res)) return;
|
||||
try {
|
||||
const result = await pool.query(
|
||||
`SELECT id, token, name, country_code, city, ip, port, protocol,
|
||||
status, last_seen, bytes_proxied, requests_proxied,
|
||||
uptime_pct, bandwidth_limit_gb, bandwidth_used_gb,
|
||||
latency_ms, version, registered_at
|
||||
FROM proxy_nodes
|
||||
ORDER BY status = 'online' DESC, last_seen DESC`
|
||||
);
|
||||
res.json({ success: true, nodes: result.rows, total: result.rowCount });
|
||||
} catch (err) {
|
||||
console.error("[proxy] nodes error:", err);
|
||||
res.status(500).json({ error: "Failed to fetch nodes" });
|
||||
}
|
||||
});
|
||||
|
||||
// GET /api/proxy/next — best available proxy for scraper use
|
||||
proxyRouter.get("/next", async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const result = await pool.query(
|
||||
`SELECT id, ip, port, protocol, country_code, city, latency_ms
|
||||
FROM proxy_nodes
|
||||
WHERE status = 'online'
|
||||
AND last_seen > NOW() - INTERVAL '2 minutes'
|
||||
AND (bandwidth_used_gb < bandwidth_limit_gb OR bandwidth_limit_gb = 0)
|
||||
ORDER BY uptime_pct DESC NULLS LAST, latency_ms ASC NULLS LAST
|
||||
LIMIT 10`
|
||||
);
|
||||
|
||||
if (result.rowCount === 0) {
|
||||
res.status(503).json({
|
||||
error: "No proxies available",
|
||||
fallback: process.env.PROXY_URL ?? null,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const nodes = result.rows;
|
||||
const node = nodes[rrIndex % nodes.length];
|
||||
rrIndex = (rrIndex + 1) % nodes.length;
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
host: node.ip,
|
||||
port: node.port,
|
||||
protocol: node.protocol ?? "socks5",
|
||||
nodeId: node.id,
|
||||
country: node.country_code,
|
||||
city: node.city,
|
||||
latencyMs: node.latency_ms,
|
||||
});
|
||||
} catch (err) {
|
||||
console.error("[proxy] next error:", err);
|
||||
res.status(500).json({ error: "Failed to get next proxy" });
|
||||
}
|
||||
});
|
||||
|
||||
// POST /api/proxy/rotate — mark node as used, get next one
|
||||
proxyRouter.post("/rotate", async (req: Request, res: Response) => {
|
||||
if (!requireAdmin(req, res)) return;
|
||||
try {
|
||||
const { nodeId } = req.body as { nodeId?: string };
|
||||
if (nodeId) {
|
||||
// Optionally mark as temporarily degraded (just log — no ban)
|
||||
await pool.query(
|
||||
`UPDATE proxy_nodes SET requests_proxied = requests_proxied + 1 WHERE id = $1`,
|
||||
[nodeId]
|
||||
);
|
||||
}
|
||||
// Return next proxy
|
||||
const result = await pool.query(
|
||||
`SELECT id, ip, port, protocol, country_code
|
||||
FROM proxy_nodes
|
||||
WHERE status = 'online'
|
||||
AND last_seen > NOW() - INTERVAL '2 minutes'
|
||||
AND id != COALESCE($1::uuid, '00000000-0000-0000-0000-000000000000')
|
||||
ORDER BY requests_proxied ASC, latency_ms ASC NULLS LAST
|
||||
LIMIT 1`,
|
||||
[nodeId ?? null]
|
||||
);
|
||||
|
||||
if (result.rowCount === 0) {
|
||||
res.status(503).json({ error: "No other proxies available" });
|
||||
return;
|
||||
}
|
||||
|
||||
const node = result.rows[0];
|
||||
res.json({
|
||||
success: true,
|
||||
host: node.ip,
|
||||
port: node.port,
|
||||
protocol: node.protocol ?? "socks5",
|
||||
nodeId: node.id,
|
||||
country: node.country_code,
|
||||
});
|
||||
} catch (err) {
|
||||
console.error("[proxy] rotate error:", err);
|
||||
res.status(500).json({ error: "Failed to rotate proxy" });
|
||||
}
|
||||
});
|
||||
|
||||
// GET /api/proxy/stats — public network stats
|
||||
proxyRouter.get("/stats", async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const result = await pool.query(`
|
||||
SELECT
|
||||
COUNT(*) AS total_nodes,
|
||||
COUNT(*) FILTER (WHERE status = 'online') AS online_nodes,
|
||||
COUNT(*) FILTER (WHERE status = 'offline') AS offline_nodes,
|
||||
COUNT(DISTINCT country_code) AS countries,
|
||||
COALESCE(SUM(bandwidth_used_gb), 0) AS total_gb_proxied,
|
||||
COALESCE(SUM(requests_proxied), 0) AS total_requests,
|
||||
COALESCE(AVG(latency_ms) FILTER (WHERE status = 'online'), 0) AS avg_latency_ms
|
||||
FROM proxy_nodes
|
||||
`);
|
||||
|
||||
const row = result.rows[0] ?? {};
|
||||
|
||||
// Per-country breakdown
|
||||
const countryResult = await pool.query(`
|
||||
SELECT country_code, COUNT(*) AS nodes,
|
||||
COUNT(*) FILTER (WHERE status = 'online') AS online
|
||||
FROM proxy_nodes
|
||||
WHERE country_code IS NOT NULL
|
||||
GROUP BY country_code
|
||||
ORDER BY online DESC, nodes DESC
|
||||
`);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
network: {
|
||||
totalNodes: Number(row.total_nodes ?? 0),
|
||||
onlineNodes: Number(row.online_nodes ?? 0),
|
||||
offlineNodes: Number(row.offline_nodes ?? 0),
|
||||
countries: Number(row.countries ?? 0),
|
||||
totalGbProxied: parseFloat(Number(row.total_gb_proxied ?? 0).toFixed(2)),
|
||||
totalRequests: Number(row.total_requests ?? 0),
|
||||
avgLatencyMs: Math.round(Number(row.avg_latency_ms ?? 0)),
|
||||
},
|
||||
countries: countryResult.rows,
|
||||
});
|
||||
} catch (err) {
|
||||
console.error("[proxy] stats error:", err);
|
||||
res.status(500).json({ error: "Failed to get stats" });
|
||||
}
|
||||
});
|
||||
50
packages/proxy-agent/README.md
Normal file
50
packages/proxy-agent/README.md
Normal file
@ -0,0 +1,50 @@
|
||||
# @tip/proxy-agent
|
||||
|
||||
Contribute bandwidth to the TIP (Transceiver Intelligence Platform) proxy network
|
||||
and get free API access in return.
|
||||
|
||||
## How it works
|
||||
|
||||
- You run the agent on a residential or home server machine
|
||||
- The agent starts a SOCKS5 proxy server on your machine
|
||||
- TIP scrapers route through your IP to bypass datacenter IP blocks (e.g. CloudFront WAF)
|
||||
- You get: free TIP API access, contributor badge, early access to new features
|
||||
|
||||
## Install & Start
|
||||
|
||||
```bash
|
||||
# Generate a token first on https://transceiver-db.context-x.org/dashboard (Network tab)
|
||||
# Then start the agent:
|
||||
npx @tip/proxy-agent start --token YOUR_TOKEN
|
||||
```
|
||||
|
||||
## Options
|
||||
|
||||
```
|
||||
tip-agent start --token TOKEN [--port 1080] [--max-gb 10] [--name "My Node"]
|
||||
tip-agent status
|
||||
tip-agent stop
|
||||
```
|
||||
|
||||
| Option | Default | Description |
|
||||
|------------|---------------------------------------------|--------------------------|
|
||||
| `--token` | (required) | Your TIP node token |
|
||||
| `--port` | 1080 | SOCKS5 listen port |
|
||||
| `--max-gb` | 10 | Max bandwidth to donate |
|
||||
| `--name` | hostname | Node display name |
|
||||
| `--server` | https://transceiver-db.context-x.org | TIP API server |
|
||||
|
||||
## Protocol
|
||||
|
||||
- SOCKS5 (RFC 1928)
|
||||
- No-auth method
|
||||
- TCP CONNECT only
|
||||
- 30s connection timeout
|
||||
- Heartbeat every 30s to routing server
|
||||
|
||||
## Privacy
|
||||
|
||||
- Only outbound TCP CONNECT requests are proxied (no BIND, no UDP)
|
||||
- Bandwidth is capped at your configured limit
|
||||
- No logs of proxied content are stored
|
||||
- Your IP is visible to the target website (residential proxy model)
|
||||
18
packages/proxy-agent/package.json
Normal file
18
packages/proxy-agent/package.json
Normal file
@ -0,0 +1,18 @@
|
||||
{
|
||||
"name": "@tip/proxy-agent",
|
||||
"version": "1.0.0",
|
||||
"description": "TIP Network Proxy Agent — contribute bandwidth, get free API access",
|
||||
"bin": { "tip-agent": "./dist/index.js" },
|
||||
"main": "./dist/index.js",
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"start": "node dist/index.js"
|
||||
},
|
||||
"dependencies": {
|
||||
"dotenv": "^16.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"typescript": "^5.0.0",
|
||||
"@types/node": "^20.0.0"
|
||||
}
|
||||
}
|
||||
174
packages/proxy-agent/src/agent.ts
Normal file
174
packages/proxy-agent/src/agent.ts
Normal file
@ -0,0 +1,174 @@
|
||||
/**
|
||||
* Core agent logic — registers with TIP routing server,
|
||||
* starts SOCKS5 server, manages heartbeat and graceful shutdown.
|
||||
*/
|
||||
import * as https from "https";
|
||||
import * as http from "http";
|
||||
import * as os from "os";
|
||||
import { createSocks5Server } from "./socks5";
|
||||
import { BandwidthTracker } from "./bandwidth";
|
||||
import { startHeartbeatLoop } from "./heartbeat";
|
||||
import { saveConfig, savePid, clearPid, AgentConfig } from "./config";
|
||||
|
||||
const AGENT_VERSION = "1.0.0";
|
||||
|
||||
interface RegisterResponse {
|
||||
token: string;
|
||||
nodeId: string;
|
||||
message?: string;
|
||||
}
|
||||
|
||||
function postJson(url: string, body: unknown): Promise<{ status: number; data: unknown }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const json = JSON.stringify(body);
|
||||
const parsed = new URL(url);
|
||||
const mod = parsed.protocol === "https:" ? https : http;
|
||||
|
||||
const req = mod.request(
|
||||
{
|
||||
hostname: parsed.hostname,
|
||||
port: parsed.port || (parsed.protocol === "https:" ? 443 : 80),
|
||||
path: parsed.pathname + parsed.search,
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"Content-Length": Buffer.byteLength(json),
|
||||
"User-Agent": `TIP-Agent/${AGENT_VERSION}`,
|
||||
},
|
||||
timeout: 15_000,
|
||||
},
|
||||
(res) => {
|
||||
const chunks: Buffer[] = [];
|
||||
res.on("data", (c: Buffer) => chunks.push(c));
|
||||
res.on("end", () => {
|
||||
try {
|
||||
const data = JSON.parse(Buffer.concat(chunks).toString());
|
||||
resolve({ status: res.statusCode ?? 0, data });
|
||||
} catch {
|
||||
resolve({ status: res.statusCode ?? 0, data: {} });
|
||||
}
|
||||
});
|
||||
}
|
||||
);
|
||||
|
||||
req.on("timeout", () => { req.destroy(); reject(new Error("timeout")); });
|
||||
req.on("error", reject);
|
||||
req.write(json);
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
|
||||
async function registerNode(cfg: AgentConfig): Promise<RegisterResponse> {
|
||||
const url = `${cfg.serverUrl}/api/proxy/register`;
|
||||
console.log(`[agent] Registering with ${url}...`);
|
||||
|
||||
const { status, data } = await postJson(url, {
|
||||
token: cfg.token,
|
||||
name: cfg.name,
|
||||
port: cfg.port,
|
||||
version: AGENT_VERSION,
|
||||
});
|
||||
|
||||
if (status >= 200 && status < 300) {
|
||||
const resp = data as RegisterResponse;
|
||||
console.log(`[agent] Registered successfully. Node: ${resp.nodeId ?? "unknown"}`);
|
||||
return resp;
|
||||
}
|
||||
|
||||
throw new Error(`Registration failed: HTTP ${status}`);
|
||||
}
|
||||
|
||||
export async function startAgent(cfg: AgentConfig): Promise<void> {
|
||||
const bandwidth = new BandwidthTracker(cfg.maxGb);
|
||||
|
||||
// Register with server
|
||||
await registerNode(cfg).catch((err) => {
|
||||
console.warn(`[agent] Registration warning: ${err.message} (continuing anyway)`);
|
||||
});
|
||||
|
||||
// Start SOCKS5 server
|
||||
const server = createSocks5Server({
|
||||
port: cfg.port,
|
||||
bandwidth,
|
||||
onNewConnection: () => {
|
||||
if (bandwidth.isLimitReached()) {
|
||||
console.warn("[agent] Bandwidth limit reached — rejecting new connections");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
},
|
||||
});
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
server.listen(cfg.port, () => {
|
||||
console.log(`[agent] SOCKS5 proxy listening on port ${cfg.port}`);
|
||||
resolve();
|
||||
});
|
||||
server.on("error", reject);
|
||||
});
|
||||
|
||||
// Save PID
|
||||
savePid(process.pid);
|
||||
saveConfig({ ...cfg, pid: process.pid, startedAt: new Date().toISOString() });
|
||||
|
||||
// Start heartbeat
|
||||
const heartbeatTimer = startHeartbeatLoop(
|
||||
cfg.serverUrl,
|
||||
() => {
|
||||
const stats = bandwidth.getStats();
|
||||
return {
|
||||
token: cfg.token,
|
||||
port: cfg.port,
|
||||
bytesProxied: stats.bytesProxied,
|
||||
requestsProxied: stats.requestsProxied,
|
||||
version: AGENT_VERSION,
|
||||
};
|
||||
},
|
||||
(result) => {
|
||||
if (!result.ok) {
|
||||
console.warn(`[agent] Heartbeat failed: ${result.error ?? "unknown error"}`);
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
console.log(`[agent] TIP Agent running. Max bandwidth: ${cfg.maxGb} GB`);
|
||||
console.log(`[agent] Press Ctrl+C to stop.\n`);
|
||||
|
||||
// Print stats every minute
|
||||
const statsTimer = setInterval(() => {
|
||||
const stats = bandwidth.getStats();
|
||||
const usedGb = (stats.bytesProxied / (1024 ** 3)).toFixed(3);
|
||||
console.log(
|
||||
`[agent] Stats: ${stats.requestsProxied} requests, ${usedGb} GB used / ${cfg.maxGb} GB limit`
|
||||
);
|
||||
}, 60_000);
|
||||
|
||||
// Graceful shutdown
|
||||
const shutdown = (signal: string) => {
|
||||
console.log(`\n[agent] Received ${signal}, shutting down...`);
|
||||
clearInterval(heartbeatTimer);
|
||||
clearInterval(statsTimer);
|
||||
clearPid();
|
||||
server.close(() => {
|
||||
console.log("[agent] SOCKS5 server closed. Goodbye.");
|
||||
process.exit(0);
|
||||
});
|
||||
setTimeout(() => process.exit(1), 5000);
|
||||
};
|
||||
|
||||
process.on("SIGINT", () => shutdown("SIGINT"));
|
||||
process.on("SIGTERM", () => shutdown("SIGTERM"));
|
||||
}
|
||||
|
||||
export function getLocalIp(): string {
|
||||
const interfaces = os.networkInterfaces();
|
||||
for (const iface of Object.values(interfaces)) {
|
||||
if (!iface) continue;
|
||||
for (const addr of iface) {
|
||||
if (addr.family === "IPv4" && !addr.internal) {
|
||||
return addr.address;
|
||||
}
|
||||
}
|
||||
}
|
||||
return "127.0.0.1";
|
||||
}
|
||||
50
packages/proxy-agent/src/bandwidth.ts
Normal file
50
packages/proxy-agent/src/bandwidth.ts
Normal file
@ -0,0 +1,50 @@
|
||||
/**
|
||||
* Bandwidth tracker — monitors usage and enforces limits
|
||||
*/
|
||||
|
||||
export interface BandwidthStats {
|
||||
bytesProxied: number;
|
||||
requestsProxied: number;
|
||||
bytesLimitHard: number;
|
||||
limitReached: boolean;
|
||||
}
|
||||
|
||||
export class BandwidthTracker {
|
||||
private bytesProxied = 0;
|
||||
private requestsProxied = 0;
|
||||
private readonly bytesLimit: number;
|
||||
|
||||
constructor(maxGb: number) {
|
||||
this.bytesLimit = maxGb * 1024 * 1024 * 1024;
|
||||
}
|
||||
|
||||
addBytes(n: number): void {
|
||||
this.bytesProxied += n;
|
||||
}
|
||||
|
||||
incrementRequests(): void {
|
||||
this.requestsProxied++;
|
||||
}
|
||||
|
||||
isLimitReached(): boolean {
|
||||
return this.bytesProxied >= this.bytesLimit;
|
||||
}
|
||||
|
||||
getStats(): BandwidthStats {
|
||||
return {
|
||||
bytesProxied: this.bytesProxied,
|
||||
requestsProxied: this.requestsProxied,
|
||||
bytesLimitHard: this.bytesLimit,
|
||||
limitReached: this.isLimitReached(),
|
||||
};
|
||||
}
|
||||
|
||||
getUsedGb(): number {
|
||||
return this.bytesProxied / (1024 * 1024 * 1024);
|
||||
}
|
||||
|
||||
reset(): void {
|
||||
this.bytesProxied = 0;
|
||||
this.requestsProxied = 0;
|
||||
}
|
||||
}
|
||||
76
packages/proxy-agent/src/config.ts
Normal file
76
packages/proxy-agent/src/config.ts
Normal file
@ -0,0 +1,76 @@
|
||||
/**
|
||||
* Config management — persists agent config to ~/.tip-agent/config.json
|
||||
*/
|
||||
import * as fs from "fs";
|
||||
import * as path from "path";
|
||||
import * as os from "os";
|
||||
|
||||
const CONFIG_DIR = path.join(os.homedir(), ".tip-agent");
|
||||
const CONFIG_FILE = path.join(CONFIG_DIR, "config.json");
|
||||
|
||||
export interface AgentConfig {
|
||||
token: string;
|
||||
port: number;
|
||||
maxGb: number;
|
||||
name: string;
|
||||
serverUrl: string;
|
||||
pid?: number;
|
||||
startedAt?: string;
|
||||
}
|
||||
|
||||
const DEFAULTS: Partial<AgentConfig> = {
|
||||
port: 1080,
|
||||
maxGb: 10,
|
||||
name: os.hostname(),
|
||||
serverUrl: "https://transceiver-db.context-x.org",
|
||||
};
|
||||
|
||||
export function loadConfig(): AgentConfig | null {
|
||||
if (!fs.existsSync(CONFIG_FILE)) return null;
|
||||
try {
|
||||
const raw = fs.readFileSync(CONFIG_FILE, "utf8");
|
||||
return JSON.parse(raw) as AgentConfig;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function saveConfig(cfg: Partial<AgentConfig>): AgentConfig {
|
||||
if (!fs.existsSync(CONFIG_DIR)) {
|
||||
fs.mkdirSync(CONFIG_DIR, { recursive: true });
|
||||
}
|
||||
const existing = loadConfig() ?? {};
|
||||
const merged = { ...DEFAULTS, ...existing, ...cfg } as AgentConfig;
|
||||
fs.writeFileSync(CONFIG_FILE, JSON.stringify(merged, null, 2), "utf8");
|
||||
return merged;
|
||||
}
|
||||
|
||||
export function clearConfig(): void {
|
||||
if (fs.existsSync(CONFIG_FILE)) {
|
||||
fs.unlinkSync(CONFIG_FILE);
|
||||
}
|
||||
}
|
||||
|
||||
export function getPidFile(): string {
|
||||
return path.join(CONFIG_DIR, "agent.pid");
|
||||
}
|
||||
|
||||
export function savePid(pid: number): void {
|
||||
if (!fs.existsSync(CONFIG_DIR)) {
|
||||
fs.mkdirSync(CONFIG_DIR, { recursive: true });
|
||||
}
|
||||
fs.writeFileSync(getPidFile(), String(pid), "utf8");
|
||||
}
|
||||
|
||||
export function loadPid(): number | null {
|
||||
const pidFile = getPidFile();
|
||||
if (!fs.existsSync(pidFile)) return null;
|
||||
const raw = fs.readFileSync(pidFile, "utf8").trim();
|
||||
const pid = parseInt(raw, 10);
|
||||
return isNaN(pid) ? null : pid;
|
||||
}
|
||||
|
||||
export function clearPid(): void {
|
||||
const pidFile = getPidFile();
|
||||
if (fs.existsSync(pidFile)) fs.unlinkSync(pidFile);
|
||||
}
|
||||
82
packages/proxy-agent/src/heartbeat.ts
Normal file
82
packages/proxy-agent/src/heartbeat.ts
Normal file
@ -0,0 +1,82 @@
|
||||
/**
|
||||
* Heartbeat — sends periodic status updates to TIP routing server.
|
||||
* POST /api/proxy/heartbeat every 30s.
|
||||
*/
|
||||
import * as https from "https";
|
||||
import * as http from "http";
|
||||
|
||||
export interface HeartbeatPayload {
|
||||
token: string;
|
||||
ip?: string;
|
||||
port: number;
|
||||
bytesProxied: number;
|
||||
requestsProxied: number;
|
||||
latencyMs?: number;
|
||||
version: string;
|
||||
}
|
||||
|
||||
export interface HeartbeatResult {
|
||||
ok: boolean;
|
||||
latencyMs: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
function postJson(url: string, body: unknown): Promise<{ status: number; latencyMs: number }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const start = Date.now();
|
||||
const json = JSON.stringify(body);
|
||||
const parsed = new URL(url);
|
||||
const mod = parsed.protocol === "https:" ? https : http;
|
||||
|
||||
const req = mod.request(
|
||||
{
|
||||
hostname: parsed.hostname,
|
||||
port: parsed.port || (parsed.protocol === "https:" ? 443 : 80),
|
||||
path: parsed.pathname + parsed.search,
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"Content-Length": Buffer.byteLength(json),
|
||||
"User-Agent": "TIP-Agent/1.0.0",
|
||||
},
|
||||
timeout: 10_000,
|
||||
},
|
||||
(res) => {
|
||||
res.resume();
|
||||
resolve({ status: res.statusCode ?? 0, latencyMs: Date.now() - start });
|
||||
}
|
||||
);
|
||||
|
||||
req.on("timeout", () => { req.destroy(); reject(new Error("timeout")); });
|
||||
req.on("error", reject);
|
||||
req.write(json);
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
|
||||
export async function sendHeartbeat(
|
||||
serverUrl: string,
|
||||
payload: HeartbeatPayload
|
||||
): Promise<HeartbeatResult> {
|
||||
const url = `${serverUrl}/api/proxy/heartbeat`;
|
||||
try {
|
||||
const { status, latencyMs } = await postJson(url, payload);
|
||||
return { ok: status >= 200 && status < 300, latencyMs };
|
||||
} catch (err) {
|
||||
return { ok: false, latencyMs: 0, error: String(err) };
|
||||
}
|
||||
}
|
||||
|
||||
export function startHeartbeatLoop(
|
||||
serverUrl: string,
|
||||
getPayload: () => HeartbeatPayload,
|
||||
onResult?: (r: HeartbeatResult) => void,
|
||||
intervalMs = 30_000
|
||||
): NodeJS.Timeout {
|
||||
const tick = async () => {
|
||||
const result = await sendHeartbeat(serverUrl, getPayload());
|
||||
onResult?.(result);
|
||||
};
|
||||
tick();
|
||||
return setInterval(tick, intervalMs);
|
||||
}
|
||||
167
packages/proxy-agent/src/index.ts
Normal file
167
packages/proxy-agent/src/index.ts
Normal file
@ -0,0 +1,167 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* TIP Proxy Agent CLI
|
||||
*
|
||||
* Usage:
|
||||
* tip-agent start --token TOKEN [--port 1080] [--max-gb 10] [--name "My Node"]
|
||||
* tip-agent status
|
||||
* tip-agent stop
|
||||
*/
|
||||
import { startAgent } from "./agent";
|
||||
import { loadConfig, saveConfig, loadPid, clearPid } from "./config";
|
||||
|
||||
const AGENT_VERSION = "1.0.0";
|
||||
|
||||
function parseArgs(argv: string[]): Record<string, string> {
|
||||
const args: Record<string, string> = {};
|
||||
for (let i = 0; i < argv.length; i++) {
|
||||
const arg = argv[i];
|
||||
if (arg && arg.startsWith("--")) {
|
||||
const key = arg.slice(2);
|
||||
const next = argv[i + 1];
|
||||
if (next && !next.startsWith("--")) {
|
||||
args[key] = next;
|
||||
i++;
|
||||
} else {
|
||||
args[key] = "true";
|
||||
}
|
||||
}
|
||||
}
|
||||
return args;
|
||||
}
|
||||
|
||||
function isProcessRunning(pid: number): boolean {
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function cmdStart(rawArgs: string[]): void {
|
||||
const args = parseArgs(rawArgs);
|
||||
|
||||
const token = args["token"];
|
||||
if (!token) {
|
||||
console.error("Error: --token is required");
|
||||
console.error(" tip-agent start --token YOUR_TOKEN [--port 1080] [--max-gb 10] [--name 'My Node']");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const cfg = saveConfig({
|
||||
token,
|
||||
port: args["port"] ? parseInt(args["port"], 10) : 1080,
|
||||
maxGb: args["max-gb"] ? parseFloat(args["max-gb"]) : 10,
|
||||
name: args["name"] ?? undefined,
|
||||
serverUrl: args["server"] ?? "https://transceiver-db.context-x.org",
|
||||
});
|
||||
|
||||
console.log(`\nTIP Proxy Agent v${AGENT_VERSION}`);
|
||||
console.log("==========================================");
|
||||
console.log(` Token: ${cfg.token.slice(0, 8)}...`);
|
||||
console.log(` Port: ${cfg.port}`);
|
||||
console.log(` Max GB: ${cfg.maxGb}`);
|
||||
console.log(` Name: ${cfg.name}`);
|
||||
console.log(` Server: ${cfg.serverUrl}`);
|
||||
console.log("==========================================\n");
|
||||
|
||||
startAgent(cfg).catch((err) => {
|
||||
console.error("[agent] Fatal error:", err.message);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
function cmdStatus(): void {
|
||||
const cfg = loadConfig();
|
||||
const pid = loadPid();
|
||||
|
||||
if (!cfg || !pid) {
|
||||
console.log("TIP Agent: not running");
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const running = isProcessRunning(pid);
|
||||
console.log(`TIP Agent: ${running ? "RUNNING" : "STOPPED (stale PID)"}`);
|
||||
console.log(` PID: ${pid}`);
|
||||
console.log(` Port: ${cfg.port}`);
|
||||
console.log(` Token: ${cfg.token.slice(0, 8)}...`);
|
||||
console.log(` Max GB: ${cfg.maxGb}`);
|
||||
if (cfg.startedAt) {
|
||||
console.log(` Started: ${cfg.startedAt}`);
|
||||
}
|
||||
|
||||
if (!running) {
|
||||
clearPid();
|
||||
}
|
||||
}
|
||||
|
||||
function cmdStop(): void {
|
||||
const pid = loadPid();
|
||||
|
||||
if (!pid) {
|
||||
console.log("TIP Agent: not running");
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
try {
|
||||
process.kill(pid, "SIGTERM");
|
||||
console.log(`TIP Agent (PID ${pid}) stopped.`);
|
||||
clearPid();
|
||||
} catch {
|
||||
console.log(`TIP Agent: process ${pid} not found (already stopped)`);
|
||||
clearPid();
|
||||
}
|
||||
}
|
||||
|
||||
function printHelp(): void {
|
||||
console.log(`
|
||||
TIP Proxy Agent v${AGENT_VERSION}
|
||||
Contribute bandwidth to the TIP network, get free API access.
|
||||
|
||||
Usage:
|
||||
tip-agent start --token TOKEN [options]
|
||||
tip-agent status
|
||||
tip-agent stop
|
||||
|
||||
Commands:
|
||||
start Start the proxy agent
|
||||
status Show agent status
|
||||
stop Stop the agent
|
||||
|
||||
Options:
|
||||
--token TOKEN Your TIP node token (required for start)
|
||||
--port N SOCKS5 listen port (default: 1080)
|
||||
--max-gb N Max bandwidth to donate in GB (default: 10)
|
||||
--name NAME Node name (default: hostname)
|
||||
--server URL TIP server URL (default: https://transceiver-db.context-x.org)
|
||||
|
||||
Example:
|
||||
tip-agent start --token abc123 --port 1080 --max-gb 10 --name "Home Node"
|
||||
`);
|
||||
}
|
||||
|
||||
const [, , command, ...rest] = process.argv;
|
||||
|
||||
switch (command) {
|
||||
case "start":
|
||||
cmdStart(rest);
|
||||
break;
|
||||
case "status":
|
||||
cmdStatus();
|
||||
break;
|
||||
case "stop":
|
||||
cmdStop();
|
||||
break;
|
||||
case "--help":
|
||||
case "-h":
|
||||
case "help":
|
||||
printHelp();
|
||||
break;
|
||||
default:
|
||||
if (command) {
|
||||
console.error(`Unknown command: ${command}`);
|
||||
}
|
||||
printHelp();
|
||||
process.exit(command ? 1 : 0);
|
||||
}
|
||||
181
packages/proxy-agent/src/socks5.ts
Normal file
181
packages/proxy-agent/src/socks5.ts
Normal file
@ -0,0 +1,181 @@
|
||||
/**
|
||||
* SOCKS5 proxy server — built on Node.js net module, no external deps.
|
||||
*
|
||||
* Implements:
|
||||
* - SOCKS5 handshake (RFC 1928)
|
||||
* - No-auth method (0x00)
|
||||
* - CONNECT command (TCP tunnel)
|
||||
* - Byte tracking per connection
|
||||
*/
|
||||
import * as net from "net";
|
||||
import { BandwidthTracker } from "./bandwidth";
|
||||
|
||||
const SOCKS_VER = 0x05;
|
||||
const NO_AUTH = 0x00;
|
||||
const CMD_CONNECT = 0x01;
|
||||
const ATYP_IPV4 = 0x01;
|
||||
const ATYP_DOMAIN = 0x03;
|
||||
const ATYP_IPV6 = 0x04;
|
||||
const REP_SUCCESS = 0x00;
|
||||
const REP_FAIL = 0x01;
|
||||
const CONN_TIMEOUT_MS = 30_000;
|
||||
|
||||
export interface Socks5ServerOptions {
|
||||
port: number;
|
||||
host?: string;
|
||||
onNewConnection?: () => boolean; // return false to reject (bandwidth limit)
|
||||
bandwidth?: BandwidthTracker;
|
||||
}
|
||||
|
||||
function buildReply(rep: number, ip = "0.0.0.0", port = 0): Buffer {
|
||||
const parts = ip.split(".").map(Number);
|
||||
const buf = Buffer.alloc(10);
|
||||
buf[0] = SOCKS_VER;
|
||||
buf[1] = rep;
|
||||
buf[2] = 0x00; // reserved
|
||||
buf[3] = ATYP_IPV4;
|
||||
buf[4] = parts[0] ?? 0;
|
||||
buf[5] = parts[1] ?? 0;
|
||||
buf[6] = parts[2] ?? 0;
|
||||
buf[7] = parts[3] ?? 0;
|
||||
buf.writeUInt16BE(port, 8);
|
||||
return buf;
|
||||
}
|
||||
|
||||
function handleClient(
|
||||
socket: net.Socket,
|
||||
opts: Socks5ServerOptions
|
||||
): void {
|
||||
socket.setTimeout(CONN_TIMEOUT_MS);
|
||||
socket.on("timeout", () => socket.destroy());
|
||||
socket.on("error", () => socket.destroy());
|
||||
|
||||
let state: "greeting" | "request" | "connected" = "greeting";
|
||||
let buf = Buffer.alloc(0);
|
||||
|
||||
socket.on("data", (chunk) => {
|
||||
buf = Buffer.concat([buf, chunk]);
|
||||
|
||||
if (state === "greeting") {
|
||||
// Need at least: VER(1) + NMETHODS(1) + methods
|
||||
if (buf.length < 2) return;
|
||||
const nmethods = buf[1] ?? 0;
|
||||
if (buf.length < 2 + nmethods) return;
|
||||
|
||||
const methods = buf.slice(2, 2 + nmethods);
|
||||
buf = buf.slice(2 + nmethods);
|
||||
|
||||
if (!methods.includes(NO_AUTH)) {
|
||||
socket.write(Buffer.from([SOCKS_VER, 0xFF])); // no acceptable method
|
||||
socket.destroy();
|
||||
return;
|
||||
}
|
||||
|
||||
socket.write(Buffer.from([SOCKS_VER, NO_AUTH]));
|
||||
state = "request";
|
||||
return;
|
||||
}
|
||||
|
||||
if (state === "request") {
|
||||
// Minimum: VER(1) CMD(1) RSV(1) ATYP(1) + address + port(2)
|
||||
if (buf.length < 4) return;
|
||||
|
||||
const cmd = buf[1];
|
||||
const atyp = buf[3];
|
||||
|
||||
let host: string;
|
||||
let port: number;
|
||||
let consumed: number;
|
||||
|
||||
if (atyp === ATYP_IPV4) {
|
||||
if (buf.length < 10) return;
|
||||
host = `${buf[4]}.${buf[5]}.${buf[6]}.${buf[7]}`;
|
||||
port = buf.readUInt16BE(8);
|
||||
consumed = 10;
|
||||
} else if (atyp === ATYP_DOMAIN) {
|
||||
if (buf.length < 5) return;
|
||||
const domainLen = buf[4] ?? 0;
|
||||
if (buf.length < 5 + domainLen + 2) return;
|
||||
host = buf.slice(5, 5 + domainLen).toString("utf8");
|
||||
port = buf.readUInt16BE(5 + domainLen);
|
||||
consumed = 5 + domainLen + 2;
|
||||
} else if (atyp === ATYP_IPV6) {
|
||||
if (buf.length < 22) return;
|
||||
const parts: string[] = [];
|
||||
for (let i = 0; i < 8; i++) {
|
||||
parts.push(buf.readUInt16BE(4 + i * 2).toString(16));
|
||||
}
|
||||
host = parts.join(":");
|
||||
port = buf.readUInt16BE(20);
|
||||
consumed = 22;
|
||||
} else {
|
||||
socket.write(buildReply(0x08)); // address type not supported
|
||||
socket.destroy();
|
||||
return;
|
||||
}
|
||||
|
||||
buf = buf.slice(consumed);
|
||||
|
||||
if (cmd !== CMD_CONNECT) {
|
||||
socket.write(buildReply(0x07)); // command not supported
|
||||
socket.destroy();
|
||||
return;
|
||||
}
|
||||
|
||||
// Check bandwidth limit before accepting
|
||||
if (opts.onNewConnection && !opts.onNewConnection()) {
|
||||
socket.write(buildReply(REP_FAIL));
|
||||
socket.destroy();
|
||||
return;
|
||||
}
|
||||
|
||||
opts.bandwidth?.incrementRequests();
|
||||
|
||||
const target = net.createConnection({ host, port }, () => {
|
||||
socket.write(buildReply(REP_SUCCESS));
|
||||
state = "connected";
|
||||
|
||||
// Pipe remaining buffered bytes
|
||||
if (buf.length > 0) {
|
||||
target.write(buf);
|
||||
buf = Buffer.alloc(0);
|
||||
}
|
||||
|
||||
// Bidirectional pipe with byte counting
|
||||
socket.on("data", (d) => {
|
||||
opts.bandwidth?.addBytes(d.length);
|
||||
if (!target.destroyed) target.write(d);
|
||||
});
|
||||
|
||||
target.on("data", (d) => {
|
||||
opts.bandwidth?.addBytes(d.length);
|
||||
if (!socket.destroyed) socket.write(d);
|
||||
});
|
||||
|
||||
socket.on("end", () => { if (!target.destroyed) target.end(); });
|
||||
target.on("end", () => { if (!socket.destroyed) socket.end(); });
|
||||
});
|
||||
|
||||
target.setTimeout(CONN_TIMEOUT_MS);
|
||||
target.on("timeout", () => { target.destroy(); socket.destroy(); });
|
||||
target.on("error", () => {
|
||||
if (!socket.destroyed) {
|
||||
socket.write(buildReply(REP_FAIL));
|
||||
socket.destroy();
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
export function createSocks5Server(opts: Socks5ServerOptions): net.Server {
|
||||
const server = net.createServer((socket) => {
|
||||
handleClient(socket, opts);
|
||||
});
|
||||
|
||||
server.on("error", (err) => {
|
||||
console.error("[SOCKS5] Server error:", err.message);
|
||||
});
|
||||
|
||||
return server;
|
||||
}
|
||||
18
packages/proxy-agent/tsconfig.json
Normal file
18
packages/proxy-agent/tsconfig.json
Normal file
@ -0,0 +1,18 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2020",
|
||||
"module": "commonjs",
|
||||
"lib": ["ES2020"],
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"resolveJsonModule": true,
|
||||
"declaration": true,
|
||||
"declarationMap": true,
|
||||
"sourceMap": true
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
@ -51,7 +51,9 @@ export async function createScheduler(): Promise<PgBoss> {
|
||||
retryDelay: 30,
|
||||
retryBackoff: true,
|
||||
expireInSeconds: 300,
|
||||
monitorStateIntervalSeconds: 30,
|
||||
monitorStateIntervalSeconds: 60,
|
||||
max: 4, // pg-boss internal connection pool
|
||||
poolSize: 4, // alias used by some pg-boss versions
|
||||
});
|
||||
|
||||
boss.on("error", (error) => console.error("pg-boss error:", error));
|
||||
|
||||
@ -147,6 +147,7 @@ interface Product {
|
||||
reachMeters?: number;
|
||||
fiberType?: string;
|
||||
wavelength?: string;
|
||||
imageUrl?: string;
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
@ -432,6 +433,7 @@ export async function scrapeFlexoptixCatalog(): Promise<void> {
|
||||
name
|
||||
sku
|
||||
url_key
|
||||
small_image { url }
|
||||
price_range {
|
||||
minimum_price {
|
||||
final_price { value currency }
|
||||
@ -457,6 +459,7 @@ export async function scrapeFlexoptixCatalog(): Promise<void> {
|
||||
name: string;
|
||||
sku: string;
|
||||
url_key: string;
|
||||
small_image?: { url?: string };
|
||||
price_range?: {
|
||||
minimum_price?: {
|
||||
final_price?: { value: number; currency: string };
|
||||
@ -488,6 +491,9 @@ export async function scrapeFlexoptixCatalog(): Promise<void> {
|
||||
const reach = detectReach(item.name);
|
||||
const price = item.price_range?.minimum_price?.final_price?.value;
|
||||
|
||||
const rawImg = item.small_image?.url;
|
||||
const imageUrl = rawImg && !rawImg.includes("placeholder") ? rawImg : undefined;
|
||||
|
||||
allProducts.set(url, {
|
||||
name: item.name,
|
||||
partNumber: item.sku,
|
||||
@ -501,6 +507,7 @@ export async function scrapeFlexoptixCatalog(): Promise<void> {
|
||||
reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(item.name),
|
||||
wavelength: detectWavelength(item.name),
|
||||
imageUrl,
|
||||
});
|
||||
newCount++;
|
||||
}
|
||||
@ -538,6 +545,7 @@ export async function scrapeFlexoptixCatalog(): Promise<void> {
|
||||
fiberType: product.fiberType,
|
||||
wavelengths: product.wavelength,
|
||||
category: "DataCenter",
|
||||
imageUrl: product.imageUrl,
|
||||
});
|
||||
|
||||
if (product.price && product.price > 0) {
|
||||
|
||||
@ -99,9 +99,12 @@ function parseProductList(html: string): Product[] {
|
||||
const name = match[2].trim();
|
||||
if (name.length < 10 || name.length > 200) continue;
|
||||
|
||||
// Look for price in surrounding context
|
||||
const context = html.slice(Math.max(0, match.index - 300), match.index + 600);
|
||||
const priceMatch = context.match(/\$\s*([\d,]+\.?\d{0,2})/) || context.match(/data-product-price="([\d.]+)"/);
|
||||
// Look for price in surrounding context — BigCommerce uses data-product-price-without-tax
|
||||
const context = html.slice(Math.max(0, match.index - 300), match.index + 1200);
|
||||
const priceMatch = context.match(/data-product-price-without-tax[^>]*>\s*\$([\d,]+\.?\d{0,2})/)
|
||||
|| context.match(/class="price price--withoutTax">\s*\$([\d,]+\.?\d{0,2})/)
|
||||
|| context.match(/\$\s*([\d,]+\.?\d{0,2})/);
|
||||
|
||||
const price = priceMatch ? parseFloat(priceMatch[1].replace(",", "")) : undefined;
|
||||
|
||||
const ff = detectFormFactor(name);
|
||||
|
||||
@ -47,6 +47,7 @@ interface OptcoreProduct {
|
||||
speedGbps?: number;
|
||||
speed?: string;
|
||||
reachLabel?: string;
|
||||
imageUrl?: string;
|
||||
}
|
||||
|
||||
function detectFormFactor(text: string): string | undefined {
|
||||
@ -208,7 +209,13 @@ export async function scrapeOptcore(): Promise<void> {
|
||||
const stockEl = document.querySelector(".stock, .availability, [class*=\"stock\"]");
|
||||
const stockText = stockEl?.textContent?.trim() || "";
|
||||
|
||||
return { title, priceText, stockText };
|
||||
// Product image — WooCommerce product gallery
|
||||
const imgEl = document.querySelector(
|
||||
".woocommerce-product-gallery__image img, .wp-post-image, img.attachment-woocommerce_single"
|
||||
) as HTMLImageElement | null;
|
||||
const imageUrl = imgEl?.src || imgEl?.getAttribute("data-src") || "";
|
||||
|
||||
return { title, priceText, stockText, imageUrl };
|
||||
});
|
||||
|
||||
const meta = metaByUrl.get(url);
|
||||
@ -229,6 +236,7 @@ export async function scrapeOptcore(): Promise<void> {
|
||||
speedGbps: speedInfo?.speedGbps,
|
||||
speed: speedInfo?.speed,
|
||||
reachLabel: detectReach(name),
|
||||
imageUrl: data.imageUrl || undefined,
|
||||
});
|
||||
}
|
||||
|
||||
@ -262,6 +270,7 @@ export async function scrapeOptcore(): Promise<void> {
|
||||
speed: p.speed,
|
||||
reachLabel: p.reachLabel,
|
||||
category: "DataCenter",
|
||||
imageUrl: p.imageUrl,
|
||||
});
|
||||
|
||||
const hash = contentHash({ price: p.price, stock: p.stockLevel });
|
||||
|
||||
@ -1,163 +1,55 @@
|
||||
/**
|
||||
* SmartOptics Scraper — Manufacturer, quote-based pricing
|
||||
* SmartOptics Scraper — Premium coherent/DWDM transceiver manufacturer
|
||||
*
|
||||
* www.smartoptics.com — no direct prices, catalog-only scrape.
|
||||
* Extracts product names, part numbers, form factors and specs.
|
||||
*
|
||||
* Rate limited: 1 req/2sec.
|
||||
* smartoptics.com — WordPress site, no prices (B2B, RFQ model).
|
||||
* Scrapes product catalog for specs, images, datasheets.
|
||||
* Products listed at /products/optical-transceivers/ → individual /product/SKU/ pages.
|
||||
*/
|
||||
import { pool, findOrCreateScrapedTransceiver, ensureVendor } from "../utils/db";
|
||||
import * as cheerio from "cheerio";
|
||||
|
||||
const BASE = "https://www.smartoptics.com";
|
||||
const CATALOG_URLS = [
|
||||
"/products/transceivers/",
|
||||
"/products/",
|
||||
"/products/sfp-transceivers/",
|
||||
"/products/qsfp-transceivers/",
|
||||
];
|
||||
const BASE = "https://smartoptics.com";
|
||||
const CATALOG_URL = `${BASE}/products/optical-transceivers/`;
|
||||
const HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
Accept: "text/html,application/xhtml+xml",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
};
|
||||
|
||||
interface Product {
|
||||
partNumber: string;
|
||||
name: string;
|
||||
url: string;
|
||||
formFactor: string;
|
||||
speed: string;
|
||||
speedGbps: number;
|
||||
reachLabel?: string;
|
||||
reachMeters?: number;
|
||||
fiberType?: string;
|
||||
wavelength?: string;
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
return new Promise((r) => setTimeout(r, ms));
|
||||
}
|
||||
|
||||
function detectFormFactor(text: string): { formFactor: string; speed: string; speedGbps: number } {
|
||||
const lower = text.toLowerCase();
|
||||
if (lower.includes("osfp") && !lower.includes("qsfp")) return { formFactor: "OSFP", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("qsfp-dd")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
||||
if (lower.includes("qsfp28")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
|
||||
if (lower.includes("qsfp+") || lower.includes("qsfp plus")) return { formFactor: "QSFP+", speed: "40G", speedGbps: 40 };
|
||||
if (lower.includes("sfp56")) return { formFactor: "SFP56", speed: "50G", speedGbps: 50 };
|
||||
if (lower.includes("sfp28") || lower.includes("25g")) return { formFactor: "SFP28", speed: "25G", speedGbps: 25 };
|
||||
if (lower.includes("sfp+") || lower.includes("10gbase") || lower.includes("10g")) return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
||||
if (lower.includes("xfp")) return { formFactor: "XFP", speed: "10G", speedGbps: 10 };
|
||||
if (lower.includes("1000base") || lower.includes("1g")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||
if (lower.includes("sfp") && !lower.includes("qsfp")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||
return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
||||
const t = text.toLowerCase();
|
||||
if (t.includes("qsfp-dd800") || t.includes("sfp-dd800") || t.includes("800ge")) return { formFactor: "QSFP-DD", speed: "800G", speedGbps: 800 };
|
||||
if (t.includes("qsfp-dd") || (t.includes("400g") && t.includes("qsfp"))) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
|
||||
if (t.includes("qsfp112")) return { formFactor: "QSFP112", speed: "400G", speedGbps: 400 };
|
||||
if (t.includes("qsfp56")) return { formFactor: "QSFP56", speed: "200G", speedGbps: 200 };
|
||||
if (t.includes("qsfp28") || t.includes("100ge") || t.includes("100g")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
|
||||
if (t.includes("sfp28") || t.includes("25g")) return { formFactor: "SFP28", speed: "25G", speedGbps: 25 };
|
||||
if (t.includes("qsfp+") || t.includes("40g")) return { formFactor: "QSFP+", speed: "40G", speedGbps: 40 };
|
||||
if (t.includes("sfp+") || t.includes("10g")) return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
|
||||
if (t.includes("sfp")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
|
||||
return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
|
||||
}
|
||||
|
||||
function detectReach(text: string): { label: string; meters: number } | undefined {
|
||||
const patterns: [RegExp, string, number][] = [
|
||||
[/\b80\s*km\b/i, "80km", 80000],
|
||||
[/\b40\s*km\b/i, "40km", 40000],
|
||||
[/\b20\s*km\b/i, "20km", 20000],
|
||||
[/\b10\s*km\b/i, "10km", 10000],
|
||||
[/\b2\s*km\b/i, "2km", 2000],
|
||||
[/\b550\s*m\b/i, "550m", 550],
|
||||
[/\b300\s*m\b/i, "300m", 300],
|
||||
[/\b100\s*m\b/i, "100m", 100],
|
||||
[/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000],
|
||||
[/\bER4?\b/, "40km", 40000], [/\bZR4?\b/, "80km", 80000],
|
||||
[/\bSR4?\b/, "300m", 300], [/\bDR4?\b/, "500m", 500], [/\bFR4?\b/, "2km", 2000],
|
||||
];
|
||||
for (const [regex, label, meters] of patterns) {
|
||||
if (regex.test(text)) return { label, meters };
|
||||
const kmMatch = text.match(/(\d+)\s*km/i);
|
||||
if (kmMatch) {
|
||||
const km = parseInt(kmMatch[1]);
|
||||
return { label: `${km}km`, meters: km * 1000 };
|
||||
}
|
||||
const mMatch = text.match(/(\d+)\s*m\b/i);
|
||||
if (mMatch) {
|
||||
const m = parseInt(mMatch[1]);
|
||||
return { label: `${m}m`, meters: m };
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function detectFiber(text: string): string {
|
||||
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
|
||||
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
|
||||
if (/copper|dac|twinax|rj45|base-t/i.test(text)) return "Copper";
|
||||
return "SMF";
|
||||
}
|
||||
|
||||
function detectWavelength(text: string): string {
|
||||
const match = text.match(/(\d{3,4})\s*nm/i);
|
||||
return match ? match[1] : "";
|
||||
}
|
||||
|
||||
function parseProductList(html: string, sourceUrl: string): Product[] {
|
||||
const $ = cheerio.load(html);
|
||||
const products: Product[] = [];
|
||||
|
||||
// Manufacturer site: product cards or table rows
|
||||
const cardSelectors = [
|
||||
".product-item", ".product-card", ".product", "tr",
|
||||
"article", ".item", ".col-xs-12.col-sm-6", ".entry",
|
||||
];
|
||||
|
||||
for (const sel of cardSelectors) {
|
||||
if ($(sel).length >= 2) {
|
||||
$(sel).each((_i, el) => {
|
||||
const text = $(el).text().trim();
|
||||
if (!/sfp|qsfp|xfp|transceiver|optic/i.test(text)) return;
|
||||
|
||||
const nameEl = $(el).find("h2, h3, h4, td, .title, .name, a").first();
|
||||
const name = nameEl.text().trim() || text.slice(0, 100);
|
||||
if (!name || name.length < 5) return;
|
||||
|
||||
const linkEl = $(el).find("a[href]").first();
|
||||
const href = linkEl.attr("href") || sourceUrl;
|
||||
const url = href.startsWith("http") ? href : BASE + href;
|
||||
|
||||
// Part number: look for alphanumeric patterns typical of transceiver SKUs
|
||||
const partNumMatch = text.match(/\b([A-Z]{2,}[-_][A-Z0-9]{2,}[-_][A-Z0-9]+)\b/) ||
|
||||
text.match(/\b(SO[-_][A-Z0-9]+)\b/i) ||
|
||||
name.match(/[A-Z0-9][-A-Z0-9]{5,}/);
|
||||
const partNumber = partNumMatch?.[0] || name.replace(/\s+/g, "-").slice(0, 60);
|
||||
|
||||
const ff = detectFormFactor(name + " " + text);
|
||||
const reach = detectReach(name + " " + text);
|
||||
|
||||
products.push({
|
||||
partNumber,
|
||||
name,
|
||||
url,
|
||||
...ff,
|
||||
reachLabel: reach?.label,
|
||||
reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(name + " " + text),
|
||||
wavelength: detectWavelength(name + " " + text),
|
||||
});
|
||||
});
|
||||
if (products.length > 0) break;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: all transceiver-relevant anchors
|
||||
if (products.length === 0) {
|
||||
$("a[href]").each((_i, el) => {
|
||||
const name = $(el).text().trim();
|
||||
const href = $(el).attr("href") || "";
|
||||
if (name.length < 5 || name.length > 200 || !/sfp|qsfp|transceiver|optic/i.test(name)) return;
|
||||
const url = href.startsWith("http") ? href : BASE + href;
|
||||
const ff = detectFormFactor(name);
|
||||
const reach = detectReach(name);
|
||||
products.push({
|
||||
partNumber: name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] || name.replace(/\s+/g, "-").slice(0, 60),
|
||||
name, url, ...ff,
|
||||
reachLabel: reach?.label, reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(name), wavelength: detectWavelength(name),
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
const seen = new Set<string>();
|
||||
return products.filter((p) => {
|
||||
if (!p.url || seen.has(p.url)) return false;
|
||||
seen.add(p.url);
|
||||
return true;
|
||||
});
|
||||
if (/dwdm|cwdm|coherent|coh|single.?mode|smf/i.test(text)) return "SMF";
|
||||
if (/multi.?mode|mmf|sr/i.test(text)) return "MMF";
|
||||
return "SMF"; // SmartOptics is almost exclusively SMF/coherent
|
||||
}
|
||||
|
||||
async function fetchPage(url: string): Promise<string> {
|
||||
@ -166,46 +58,108 @@ async function fetchPage(url: string): Promise<string> {
|
||||
return resp.text();
|
||||
}
|
||||
|
||||
function extractProductUrls(html: string): string[] {
|
||||
const urls = new Set<string>();
|
||||
const regex = /href="(https?:\/\/smartoptics\.com\/product\/[^"]+)"/gi;
|
||||
let m: RegExpExecArray | null;
|
||||
while ((m = regex.exec(html)) !== null) {
|
||||
const u = m[1].replace(/\/$/, "") + "/";
|
||||
urls.add(u);
|
||||
}
|
||||
return Array.from(urls);
|
||||
}
|
||||
|
||||
interface ProductData {
|
||||
sku: string;
|
||||
name: string;
|
||||
url: string;
|
||||
imageUrl?: string;
|
||||
formFactor: string;
|
||||
speed: string;
|
||||
speedGbps: number;
|
||||
reachLabel?: string;
|
||||
reachMeters?: number;
|
||||
fiberType: string;
|
||||
coherent: boolean;
|
||||
wdmType?: string;
|
||||
}
|
||||
|
||||
async function scrapeProductPage(url: string): Promise<ProductData | null> {
|
||||
try {
|
||||
const html = await fetchPage(url);
|
||||
|
||||
const nameMatch = html.match(/<h1[^>]*>([^<]+)<\/h1>/) || html.match(/og:title" content="([^"]+)"/);
|
||||
const name = nameMatch ? nameMatch[1].trim().replace(/ \| Smartoptics$/, "") : "";
|
||||
if (!name) return null;
|
||||
|
||||
const sku = url.split("/").filter(Boolean).pop()?.toUpperCase() || name.replace(/\s+/g, "-");
|
||||
|
||||
const imgMatch = html.match(/property="og:image" content="([^"]+)"/)
|
||||
|| html.match(/<img[^>]+src="([^"]*wp-content\/uploads[^"]*\.(?:png|jpg|webp))"[^>]* class="[^"]*product/i);
|
||||
const imageUrl = imgMatch ? imgMatch[1] : undefined;
|
||||
|
||||
const ff = detectFormFactor(name);
|
||||
const reach = detectReach(name);
|
||||
const coherent = /coherent|coh-t|coh\.|dwdm|dp-qpsk|qpsk|cfp2/i.test(name + html.slice(0, 3000));
|
||||
const wdmType = /dwdm/i.test(name) ? "DWDM" : /cwdm/i.test(name) ? "CWDM" : undefined;
|
||||
|
||||
return {
|
||||
sku, name, url, imageUrl,
|
||||
...ff,
|
||||
reachLabel: reach?.label,
|
||||
reachMeters: reach?.meters,
|
||||
fiberType: detectFiber(name),
|
||||
coherent,
|
||||
wdmType,
|
||||
};
|
||||
} catch (err) {
|
||||
console.warn(` Failed ${url}: ${(err as Error).message}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function scrapeSmartOptics(): Promise<void> {
|
||||
console.log("=== SmartOptics Scraper Starting (catalog-only, no prices) ===\n");
|
||||
console.log("=== SmartOptics Scraper Starting ===\n");
|
||||
console.log("Note: SmartOptics is B2B — no public prices. Scraping specs + images only.\n");
|
||||
|
||||
const vendorId = await ensureVendor(
|
||||
"SmartOptics",
|
||||
"manufacturer",
|
||||
"https://www.smartoptics.com",
|
||||
BASE + CATALOG_URLS[0],
|
||||
"https://smartoptics.com/products/optical-transceivers/"
|
||||
);
|
||||
|
||||
const allProducts: Product[] = [];
|
||||
const seenUrls = new Set<string>();
|
||||
|
||||
for (const catalogPath of CATALOG_URLS) {
|
||||
const catalogUrl = BASE + catalogPath;
|
||||
console.log(` Fetching catalog: ${catalogUrl}`);
|
||||
const productUrls = new Set<string>();
|
||||
for (let page = 1; page <= 10; page++) {
|
||||
try {
|
||||
const html = await fetchPage(catalogUrl);
|
||||
const pageProducts = parseProductList(html, catalogUrl);
|
||||
for (const p of pageProducts) {
|
||||
if (!seenUrls.has(p.url)) {
|
||||
seenUrls.add(p.url);
|
||||
allProducts.push(p);
|
||||
}
|
||||
}
|
||||
console.log(` Found ${pageProducts.length} products`);
|
||||
await sleep(2000);
|
||||
} catch (err) {
|
||||
console.warn(` Failed ${catalogPath}: ${(err as Error).message}`);
|
||||
const url = page === 1 ? CATALOG_URL : `${CATALOG_URL}page/${page}/`;
|
||||
const html = await fetchPage(url);
|
||||
const urls = extractProductUrls(html);
|
||||
if (urls.length === 0) break;
|
||||
urls.forEach((u) => productUrls.add(u));
|
||||
console.log(` Catalog page ${page}: ${urls.length} products`);
|
||||
await sleep(1500);
|
||||
} catch {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\nTotal unique products: ${allProducts.length}`);
|
||||
console.log(`\nTotal product URLs: ${productUrls.size}`);
|
||||
if (productUrls.size === 0) {
|
||||
console.log("No products found — site may have changed structure");
|
||||
return;
|
||||
}
|
||||
|
||||
let totalProducts = 0;
|
||||
let saved = 0;
|
||||
let withImages = 0;
|
||||
|
||||
for (const url of productUrls) {
|
||||
const product = await scrapeProductPage(url);
|
||||
if (!product) continue;
|
||||
|
||||
for (const product of allProducts) {
|
||||
try {
|
||||
await findOrCreateScrapedTransceiver({
|
||||
partNumber: product.partNumber,
|
||||
partNumber: product.sku,
|
||||
vendorId,
|
||||
formFactor: product.formFactor,
|
||||
speedGbps: product.speedGbps,
|
||||
@ -213,16 +167,20 @@ export async function scrapeSmartOptics(): Promise<void> {
|
||||
reachMeters: product.reachMeters,
|
||||
reachLabel: product.reachLabel,
|
||||
fiberType: product.fiberType,
|
||||
wavelengths: product.wavelength,
|
||||
category: "DataCenter",
|
||||
wavelengths: product.wdmType ? "DWDM-tunable" : undefined,
|
||||
category: product.coherent ? "Coherent" : "DataCenter",
|
||||
imageUrl: product.imageUrl,
|
||||
});
|
||||
totalProducts++;
|
||||
saved++;
|
||||
if (product.imageUrl) withImages++;
|
||||
console.log(` ✓ ${product.sku} — ${product.name.slice(0, 60)}`);
|
||||
} catch (err) {
|
||||
console.warn(` Error saving ${product.partNumber}: ${(err as Error).message.slice(0, 80)}`);
|
||||
console.warn(` Error saving ${product.sku}: ${(err as Error).message.slice(0, 80)}`);
|
||||
}
|
||||
await sleep(1500);
|
||||
}
|
||||
|
||||
console.log(`\n=== SmartOptics Complete: ${totalProducts} products catalogued (no prices — quote-based) ===`);
|
||||
console.log(`\n=== SmartOptics Complete: ${saved} products, ${withImages} with images ===`);
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
|
||||
540
packages/scraper/src/utils/backfill-images.ts
Normal file
540
packages/scraper/src/utils/backfill-images.ts
Normal file
@ -0,0 +1,540 @@
|
||||
/**
|
||||
* Image Backfill Script
|
||||
*
|
||||
* Fills `image_url` for transceivers that currently have none.
|
||||
* Priority order:
|
||||
* 1. Optcore (2,580 products) — WP REST API featured_media per product
|
||||
* 2. Flexoptix (344 products) — Magento GraphQL image fields
|
||||
* 3. GAO Tek (413 products) — product page og:image scrape
|
||||
* 4. Other vendors — og:image from stored price_observations URLs
|
||||
*
|
||||
* Run on Erik:
|
||||
* node packages/scraper/dist/utils/backfill-images.js
|
||||
*
|
||||
* Or locally with SSH port-forward:
|
||||
* ssh -L 5433:127.0.0.1:5433 erik -N &
|
||||
* node dist/utils/backfill-images.js
|
||||
*/
|
||||
|
||||
import { pool } from "./db";
|
||||
import { logger } from "./logger";
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((r) => setTimeout(r, ms));
|
||||
}
|
||||
|
||||
async function updateImageUrl(id: string, imageUrl: string): Promise<void> {
|
||||
await pool.query(
|
||||
`UPDATE transceivers SET image_url = $1, image_scraped_at = NOW(), has_image = TRUE WHERE id = $2`,
|
||||
[imageUrl, id]
|
||||
);
|
||||
}
|
||||
|
||||
async function fetchJson(url: string, init?: RequestInit): Promise<unknown> {
|
||||
const resp = await fetch(url, {
|
||||
...init,
|
||||
headers: {
|
||||
"User-Agent": "Mozilla/5.0 (compatible; TIP-ImageBackfill/1.0)",
|
||||
Accept: "application/json",
|
||||
...(init?.headers ?? {}),
|
||||
},
|
||||
signal: AbortSignal.timeout(20000),
|
||||
});
|
||||
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
|
||||
return resp.json();
|
||||
}
|
||||
|
||||
async function fetchHtml(url: string): Promise<string> {
|
||||
const resp = await fetch(url, {
|
||||
headers: {
|
||||
"User-Agent": "Mozilla/5.0 (compatible; TIP-ImageBackfill/1.0)",
|
||||
Accept: "text/html",
|
||||
},
|
||||
signal: AbortSignal.timeout(20000),
|
||||
});
|
||||
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
|
||||
return resp.text();
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Optcore
|
||||
// Strategy:
|
||||
// Step 1 — enumerate all products per category, collect slug -> featured_media id
|
||||
// Step 2 — resolve unique media IDs individually via /wp-json/wp/v2/media/<id>
|
||||
// Step 3 — update DB for matched slugs
|
||||
//
|
||||
// Note: _embed does NOT work for the "product" custom post type on Optcore.
|
||||
// We must resolve media IDs separately.
|
||||
// =============================================================================
|
||||
|
||||
const OPTCORE_BASE = "https://www.optcore.net";
|
||||
const OPTCORE_CATEGORY_IDS = [
|
||||
309, 173, 76, 79, 73, 311, 313, 312, 333, 1088,
|
||||
59, 1102, 4097, 77, 4101, 4092, 6441,
|
||||
];
|
||||
|
||||
interface OptcoreWpProductRaw {
|
||||
slug: string;
|
||||
featured_media: number;
|
||||
}
|
||||
|
||||
interface OptcoreMediaRaw {
|
||||
id: number;
|
||||
source_url: string;
|
||||
media_details?: {
|
||||
sizes?: {
|
||||
medium?: { source_url: string };
|
||||
large?: { source_url: string };
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchOptcoreMediaUrl(mediaId: number): Promise<string | null> {
|
||||
if (mediaId === 0) return null;
|
||||
try {
|
||||
const data = await fetchJson(
|
||||
`${OPTCORE_BASE}/wp-json/wp/v2/media/${mediaId}?_fields=id,source_url,media_details`
|
||||
) as OptcoreMediaRaw;
|
||||
return (
|
||||
data.media_details?.sizes?.medium?.source_url ||
|
||||
data.media_details?.sizes?.large?.source_url ||
|
||||
data.source_url ||
|
||||
null
|
||||
);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function backfillOptcore(): Promise<{ updated: number; skipped: number; notFound: number }> {
|
||||
logger.info("=== Optcore image backfill starting ===");
|
||||
|
||||
const rows = await pool.query(`
|
||||
SELECT t.id, t.part_number
|
||||
FROM transceivers t
|
||||
JOIN vendors v ON t.vendor_id = v.id
|
||||
WHERE v.name = 'Optcore' AND (t.image_url IS NULL OR t.image_url = '')
|
||||
`);
|
||||
|
||||
// Map: part_number (lowercase) -> DB id
|
||||
const byPartNumber = new Map<string, string>();
|
||||
for (const row of rows.rows) {
|
||||
byPartNumber.set((row.part_number as string).toLowerCase(), row.id as string);
|
||||
}
|
||||
logger.info(`Optcore: ${byPartNumber.size} products need images`);
|
||||
|
||||
if (byPartNumber.size === 0) {
|
||||
return { updated: 0, skipped: 0, notFound: 0 };
|
||||
}
|
||||
|
||||
// Step 1: enumerate all products across all transceiver categories
|
||||
const slugToMediaId = new Map<string, number>();
|
||||
const seenSlugs = new Set<string>();
|
||||
|
||||
for (const catId of OPTCORE_CATEGORY_IDS) {
|
||||
let page = 1;
|
||||
while (true) {
|
||||
const apiUrl = `${OPTCORE_BASE}/wp-json/wp/v2/product?product_cat=${catId}&per_page=100&page=${page}&_fields=slug,featured_media`;
|
||||
try {
|
||||
const resp = await fetch(apiUrl, {
|
||||
headers: { "User-Agent": "Mozilla/5.0 (compatible; TIP-ImageBackfill/1.0)" },
|
||||
signal: AbortSignal.timeout(20000),
|
||||
});
|
||||
if (!resp.ok) break;
|
||||
const totalPages = parseInt(resp.headers.get("X-WP-TotalPages") || "1", 10);
|
||||
const products = (await resp.json()) as OptcoreWpProductRaw[];
|
||||
|
||||
for (const p of products) {
|
||||
if (seenSlugs.has(p.slug)) continue;
|
||||
seenSlugs.add(p.slug);
|
||||
if (p.featured_media) {
|
||||
slugToMediaId.set(p.slug.toLowerCase(), p.featured_media);
|
||||
}
|
||||
}
|
||||
|
||||
if (page >= totalPages) break;
|
||||
page++;
|
||||
} catch (err) {
|
||||
logger.warn(`Optcore cat=${catId} page=${page} fetch failed`, { error: (err as Error).message });
|
||||
break;
|
||||
}
|
||||
await sleep(100);
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(`Optcore: enumerated ${seenSlugs.size} WP products, ${slugToMediaId.size} have featured media`);
|
||||
|
||||
// Step 2: resolve unique media IDs
|
||||
const uniqueMediaIds = new Set(slugToMediaId.values());
|
||||
logger.info(`Optcore: resolving ${uniqueMediaIds.size} unique media IDs`);
|
||||
|
||||
const mediaIdToUrl = new Map<number, string>();
|
||||
let resolved = 0;
|
||||
for (const mediaId of uniqueMediaIds) {
|
||||
const url = await fetchOptcoreMediaUrl(mediaId);
|
||||
if (url) mediaIdToUrl.set(mediaId, url);
|
||||
resolved++;
|
||||
if (resolved % 20 === 0) {
|
||||
logger.info(`Optcore media resolution: ${resolved}/${uniqueMediaIds.size}`);
|
||||
}
|
||||
await sleep(100);
|
||||
}
|
||||
|
||||
// Step 3: match slugs to DB records and update
|
||||
let updated = 0;
|
||||
let skipped = 0;
|
||||
let notFound = 0;
|
||||
|
||||
for (const [slug, mediaId] of slugToMediaId) {
|
||||
const dbId = byPartNumber.get(slug);
|
||||
if (!dbId) {
|
||||
notFound++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const imgUrl = mediaIdToUrl.get(mediaId);
|
||||
if (!imgUrl) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
await updateImageUrl(dbId, imgUrl);
|
||||
byPartNumber.delete(slug);
|
||||
updated++;
|
||||
if (updated % 50 === 0) {
|
||||
logger.info(`Optcore DB updates: ${updated} so far`);
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(`Optcore done: ${updated} updated, ${skipped} no-media-url, ${notFound} slug-not-in-db`);
|
||||
return { updated, skipped, notFound };
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Flexoptix
|
||||
// Magento GraphQL: /graphql — query by SKU, returns small_image.url
|
||||
// =============================================================================
|
||||
|
||||
const FLEXOPTIX_BASE = "https://www.flexoptix.net";
|
||||
const FLEXOPTIX_GRAPHQL = `${FLEXOPTIX_BASE}/graphql`;
|
||||
|
||||
interface FlexoptixGqlProduct {
|
||||
name: string;
|
||||
sku: string;
|
||||
url_key: string;
|
||||
small_image: { url: string } | null;
|
||||
image: { url: string } | null;
|
||||
thumbnail: { url: string } | null;
|
||||
}
|
||||
|
||||
async function fetchFlexoptixImage(sku: string): Promise<string | null> {
|
||||
// Strip ":Sx" coding suffix to get canonical SKU for search
|
||||
const canonicalSku = sku.replace(/:.*$/, "").trim();
|
||||
|
||||
const query = `{
|
||||
products(search: ${JSON.stringify(canonicalSku)}, pageSize: 5) {
|
||||
items {
|
||||
name
|
||||
sku
|
||||
url_key
|
||||
small_image { url }
|
||||
image { url }
|
||||
thumbnail { url }
|
||||
}
|
||||
}
|
||||
}`;
|
||||
|
||||
const data = await fetchJson(FLEXOPTIX_GRAPHQL, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ query }),
|
||||
}) as {
|
||||
data?: { products?: { items: FlexoptixGqlProduct[] } };
|
||||
};
|
||||
|
||||
const items = data.data?.products?.items ?? [];
|
||||
|
||||
// Prefer exact SKU match (canonical, case-insensitive)
|
||||
for (const item of items) {
|
||||
const itemSku = item.sku.replace(/:.*$/, "").trim().toLowerCase();
|
||||
if (itemSku === canonicalSku.toLowerCase()) {
|
||||
return (
|
||||
item.small_image?.url ||
|
||||
item.image?.url ||
|
||||
item.thumbnail?.url ||
|
||||
null
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: single result
|
||||
if (items.length === 1) {
|
||||
return (
|
||||
items[0].small_image?.url ||
|
||||
items[0].image?.url ||
|
||||
items[0].thumbnail?.url ||
|
||||
null
|
||||
);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function backfillFlexoptix(): Promise<{ updated: number; skipped: number; errors: number }> {
|
||||
logger.info("=== Flexoptix image backfill starting ===");
|
||||
|
||||
const rows = await pool.query(`
|
||||
SELECT t.id, t.part_number
|
||||
FROM transceivers t
|
||||
JOIN vendors v ON t.vendor_id = v.id
|
||||
WHERE v.name = 'FLEXOPTIX' AND (t.image_url IS NULL OR t.image_url = '')
|
||||
ORDER BY t.part_number
|
||||
`);
|
||||
|
||||
logger.info(`Flexoptix: ${rows.rows.length} products need images`);
|
||||
|
||||
let updated = 0;
|
||||
let skipped = 0;
|
||||
let errors = 0;
|
||||
|
||||
for (const row of rows.rows) {
|
||||
const sku = row.part_number as string;
|
||||
try {
|
||||
const imgUrl = await fetchFlexoptixImage(sku);
|
||||
if (imgUrl) {
|
||||
await updateImageUrl(row.id as string, imgUrl);
|
||||
updated++;
|
||||
if (updated % 25 === 0) {
|
||||
logger.info(`Flexoptix progress: ${updated} updated so far`);
|
||||
}
|
||||
} else {
|
||||
skipped++;
|
||||
}
|
||||
} catch (err) {
|
||||
logger.error(`Flexoptix error for ${sku}`, { error: (err as Error).message });
|
||||
errors++;
|
||||
}
|
||||
await sleep(100);
|
||||
}
|
||||
|
||||
logger.info(`Flexoptix done: ${updated} updated, ${skipped} no-image, ${errors} errors`);
|
||||
return { updated, skipped, errors };
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// GAO Tek
|
||||
// Fetch product page and extract og:image meta tag
|
||||
// =============================================================================
|
||||
|
||||
const GAOTEK_BASE = "https://gaotek.com";
|
||||
|
||||
function extractOgImage(html: string): string | null {
|
||||
const match =
|
||||
html.match(/<meta[^>]+property=["']og:image["'][^>]+content=["']([^"']+)["']/i) ||
|
||||
html.match(/<meta[^>]+content=["']([^"']+)["'][^>]+property=["']og:image["']/i);
|
||||
if (!match) return null;
|
||||
const url = match[1];
|
||||
// Skip placeholder and logo images
|
||||
if (url.includes("placeholder") || url.includes("logo") || url.includes("mobilelogo")) return null;
|
||||
return url;
|
||||
}
|
||||
|
||||
async function fetchGaoTekImage(productUrl: string): Promise<string | null> {
|
||||
const html = await fetchHtml(productUrl);
|
||||
return extractOgImage(html);
|
||||
}
|
||||
|
||||
async function backfillGaoTek(): Promise<{ updated: number; skipped: number; errors: number }> {
|
||||
logger.info("=== GAO Tek image backfill starting ===");
|
||||
|
||||
// Prefer records that have stored product URLs in price_observations
|
||||
const withUrlRows = await pool.query(`
|
||||
SELECT DISTINCT ON (t.id)
|
||||
t.id, t.part_number, t.slug, po.url as product_url
|
||||
FROM transceivers t
|
||||
JOIN vendors v ON t.vendor_id = v.id
|
||||
LEFT JOIN price_observations po ON po.transceiver_id = t.id
|
||||
WHERE v.name = 'GAO Tek'
|
||||
AND (t.image_url IS NULL OR t.image_url = '')
|
||||
AND po.url IS NOT NULL
|
||||
AND po.url LIKE '%gaotek%'
|
||||
ORDER BY t.id, po.time DESC
|
||||
`);
|
||||
|
||||
// All records without images
|
||||
const allRows = await pool.query(`
|
||||
SELECT t.id, t.part_number, t.slug
|
||||
FROM transceivers t
|
||||
JOIN vendors v ON t.vendor_id = v.id
|
||||
WHERE v.name = 'GAO Tek'
|
||||
AND (t.image_url IS NULL OR t.image_url = '')
|
||||
`);
|
||||
|
||||
const withUrlIds = new Set(withUrlRows.rows.map((r) => r.id as string));
|
||||
|
||||
// Combine: known URLs first, then slug-derived URLs
|
||||
const toProcess: Array<{ id: string; partNumber: string; productUrl: string }> = [];
|
||||
|
||||
for (const row of withUrlRows.rows) {
|
||||
toProcess.push({
|
||||
id: row.id as string,
|
||||
partNumber: row.part_number as string,
|
||||
productUrl: row.product_url as string,
|
||||
});
|
||||
}
|
||||
|
||||
for (const row of allRows.rows) {
|
||||
if (withUrlIds.has(row.id as string)) continue;
|
||||
const rawSlug = (row.slug as string).replace(/^scraped-/, "");
|
||||
toProcess.push({
|
||||
id: row.id as string,
|
||||
partNumber: row.part_number as string,
|
||||
productUrl: `${GAOTEK_BASE}/product/${rawSlug}/`,
|
||||
});
|
||||
}
|
||||
|
||||
logger.info(`GAO Tek: ${toProcess.length} products to attempt`);
|
||||
|
||||
let updated = 0;
|
||||
let skipped = 0;
|
||||
let errors = 0;
|
||||
|
||||
for (const item of toProcess) {
|
||||
try {
|
||||
const imgUrl = await fetchGaoTekImage(item.productUrl);
|
||||
if (imgUrl) {
|
||||
await updateImageUrl(item.id, imgUrl);
|
||||
updated++;
|
||||
if (updated % 25 === 0) {
|
||||
logger.info(`GAO Tek progress: ${updated} updated so far`);
|
||||
}
|
||||
} else {
|
||||
skipped++;
|
||||
}
|
||||
} catch (err) {
|
||||
const msg = (err as Error).message;
|
||||
if (!msg.includes("HTTP 404") && !msg.includes("HTTP 403")) {
|
||||
logger.warn(`GAO Tek error for ${item.productUrl}`, { error: msg.slice(0, 80) });
|
||||
}
|
||||
errors++;
|
||||
}
|
||||
await sleep(100);
|
||||
}
|
||||
|
||||
logger.info(`GAO Tek done: ${updated} updated, ${skipped} no-image, ${errors} errors/404s`);
|
||||
return { updated, skipped, errors };
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Other vendors — og:image from stored price_observations URLs
|
||||
// =============================================================================
|
||||
|
||||
const OTHER_VENDOR_NAMES = [
|
||||
"T&S Communication",
|
||||
"Ascent Optics",
|
||||
"ATGBICS",
|
||||
"Skylane Optics",
|
||||
"SmartOptics",
|
||||
"ProLabs",
|
||||
"FS.COM",
|
||||
"GBICS",
|
||||
"Fluxlight",
|
||||
];
|
||||
|
||||
async function backfillOtherVendors(): Promise<{ total: number; updated: number }> {
|
||||
logger.info("=== Other vendors og:image backfill starting ===");
|
||||
|
||||
const rows = await pool.query(`
|
||||
SELECT DISTINCT ON (t.id)
|
||||
t.id, t.part_number, v.name as vendor_name, po.url as product_url
|
||||
FROM transceivers t
|
||||
JOIN vendors v ON t.vendor_id = v.id
|
||||
JOIN price_observations po ON po.transceiver_id = t.id
|
||||
WHERE v.name = ANY($1)
|
||||
AND (t.image_url IS NULL OR t.image_url = '')
|
||||
AND po.url IS NOT NULL
|
||||
AND po.url ~ '^https?://'
|
||||
ORDER BY t.id, po.time DESC
|
||||
`, [OTHER_VENDOR_NAMES]);
|
||||
|
||||
logger.info(`Other vendors: ${rows.rows.length} products with URLs to process`);
|
||||
|
||||
let updated = 0;
|
||||
|
||||
for (const row of rows.rows) {
|
||||
const productUrl = row.product_url as string;
|
||||
try {
|
||||
const html = await fetchHtml(productUrl);
|
||||
const imgUrl = extractOgImage(html);
|
||||
if (imgUrl) {
|
||||
await updateImageUrl(row.id as string, imgUrl);
|
||||
updated++;
|
||||
}
|
||||
} catch {
|
||||
// Skip 404s, timeouts, etc. silently
|
||||
}
|
||||
await sleep(100);
|
||||
}
|
||||
|
||||
logger.info(`Other vendors done: ${updated} / ${rows.rows.length} updated`);
|
||||
return { total: rows.rows.length, updated };
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Main
|
||||
// =============================================================================
|
||||
|
||||
async function main(): Promise<void> {
|
||||
logger.info("=== TIP Image Backfill Script ===");
|
||||
logger.info(`DB: ${process.env.POSTGRES_HOST ?? "localhost"}:${process.env.POSTGRES_PORT ?? "5433"}`);
|
||||
|
||||
try {
|
||||
await pool.query("SELECT 1");
|
||||
logger.info("DB connection OK");
|
||||
} catch (err) {
|
||||
logger.error("DB connection failed", { error: (err as Error).message });
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const startTime = Date.now();
|
||||
const results: Record<string, unknown> = {};
|
||||
|
||||
try {
|
||||
results.optcore = await backfillOptcore();
|
||||
} catch (err) {
|
||||
logger.error("Optcore backfill failed", { error: (err as Error).message });
|
||||
results.optcore = { error: (err as Error).message };
|
||||
}
|
||||
|
||||
try {
|
||||
results.flexoptix = await backfillFlexoptix();
|
||||
} catch (err) {
|
||||
logger.error("Flexoptix backfill failed", { error: (err as Error).message });
|
||||
results.flexoptix = { error: (err as Error).message };
|
||||
}
|
||||
|
||||
try {
|
||||
results.gaotek = await backfillGaoTek();
|
||||
} catch (err) {
|
||||
logger.error("GAO Tek backfill failed", { error: (err as Error).message });
|
||||
results.gaotek = { error: (err as Error).message };
|
||||
}
|
||||
|
||||
try {
|
||||
results.others = await backfillOtherVendors();
|
||||
} catch (err) {
|
||||
logger.error("Other vendors backfill failed", { error: (err as Error).message });
|
||||
results.others = { error: (err as Error).message };
|
||||
}
|
||||
|
||||
const elapsedSec = ((Date.now() - startTime) / 1000).toFixed(1);
|
||||
logger.info("=== Backfill complete ===", { results, elapsedSec });
|
||||
}
|
||||
|
||||
main()
|
||||
.then(() => pool.end())
|
||||
.catch((err) => {
|
||||
logger.error("Fatal error", { error: (err as Error).message });
|
||||
pool.end();
|
||||
process.exit(1);
|
||||
});
|
||||
@ -10,7 +10,9 @@ export const pool = new Pool({
|
||||
database: process.env.POSTGRES_DB || "transceiver_db",
|
||||
user: process.env.POSTGRES_USER || "tip",
|
||||
password: process.env.POSTGRES_PASSWORD || "tip_dev_2026",
|
||||
max: 10,
|
||||
max: 5,
|
||||
idleTimeoutMillis: 10000,
|
||||
connectionTimeoutMillis: 5000,
|
||||
});
|
||||
|
||||
// Alias — some scrapers import { db } instead of { pool }
|
||||
@ -54,6 +56,11 @@ export async function upsertPriceObservation(params: {
|
||||
params.contentHash,
|
||||
]
|
||||
);
|
||||
// Mark the transceiver as price-verified whenever we successfully record a price
|
||||
await pool.query(
|
||||
`UPDATE transceivers SET price_verified = true WHERE id = $1 AND (price_verified IS NULL OR price_verified = false)`,
|
||||
[params.transceiverId]
|
||||
);
|
||||
return true; // New observation written
|
||||
}
|
||||
|
||||
@ -68,23 +75,31 @@ export async function findOrCreateScrapedTransceiver(params: {
|
||||
fiberType?: string;
|
||||
wavelengths?: string;
|
||||
category?: string;
|
||||
imageUrl?: string;
|
||||
}): Promise<string> {
|
||||
// Try to match existing transceiver by part number + vendor
|
||||
const existing = await pool.query(
|
||||
`SELECT id FROM transceivers WHERE part_number = $1 AND vendor_id = $2`,
|
||||
`SELECT id, image_url FROM transceivers WHERE part_number = $1 AND vendor_id = $2`,
|
||||
[params.partNumber, params.vendorId]
|
||||
);
|
||||
|
||||
if (existing.rows.length > 0) {
|
||||
// Update image_url and image_verified if we have a new image for a record without one
|
||||
if (params.imageUrl && !existing.rows[0].image_url) {
|
||||
await pool.query(
|
||||
`UPDATE transceivers SET image_url = $1, image_verified = true, updated_at = NOW() WHERE id = $2`,
|
||||
[params.imageUrl, existing.rows[0].id]
|
||||
);
|
||||
}
|
||||
return existing.rows[0].id;
|
||||
}
|
||||
|
||||
// Create new transceiver entry
|
||||
const slug = `scraped-${params.partNumber.toLowerCase().replace(/[^a-z0-9]+/g, "-")}`;
|
||||
const result = await pool.query(
|
||||
`INSERT INTO transceivers (slug, part_number, vendor_id, form_factor, speed_gbps, speed, reach_meters, reach_label, fiber_type, wavelengths, category, market_status)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, 'Mainstream')
|
||||
ON CONFLICT (slug) DO UPDATE SET updated_at = NOW()
|
||||
`INSERT INTO transceivers (slug, part_number, vendor_id, form_factor, speed_gbps, speed, reach_meters, reach_label, fiber_type, wavelengths, category, market_status, image_url, image_verified)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, 'Mainstream', $12, $13)
|
||||
ON CONFLICT (slug) DO UPDATE SET image_url = COALESCE(transceivers.image_url, EXCLUDED.image_url), image_verified = COALESCE(transceivers.image_verified, EXCLUDED.image_verified), updated_at = NOW()
|
||||
RETURNING id`,
|
||||
[
|
||||
slug,
|
||||
@ -98,6 +113,8 @@ export async function findOrCreateScrapedTransceiver(params: {
|
||||
params.fiberType || "",
|
||||
params.wavelengths || "",
|
||||
params.category || "DataCenter",
|
||||
params.imageUrl || null,
|
||||
params.imageUrl ? true : null,
|
||||
]
|
||||
);
|
||||
return result.rows[0].id;
|
||||
|
||||
77
packages/scraper/src/utils/proxy.ts
Normal file
77
packages/scraper/src/utils/proxy.ts
Normal file
@ -0,0 +1,77 @@
|
||||
/**
|
||||
* Proxy utility — fetches next available proxy from TIP proxy pool.
|
||||
* Falls back to process.env.PROXY_URL if pool is empty or unreachable.
|
||||
*/
|
||||
import * as https from "https";
|
||||
import * as http from "http";
|
||||
|
||||
const TIP_API = process.env.TIP_API_URL ?? "https://transceiver-db.context-x.org";
|
||||
const FALLBACK = process.env.PROXY_URL ?? "";
|
||||
|
||||
interface ProxyResponse {
|
||||
success: boolean;
|
||||
host: string;
|
||||
port: number;
|
||||
protocol: string;
|
||||
}
|
||||
|
||||
function getJson(url: string): Promise<unknown> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const parsed = new URL(url);
|
||||
const mod = parsed.protocol === "https:" ? https : http;
|
||||
|
||||
const req = mod.request(
|
||||
{
|
||||
hostname: parsed.hostname,
|
||||
port: parsed.port || (parsed.protocol === "https:" ? 443 : 80),
|
||||
path: parsed.pathname + parsed.search,
|
||||
method: "GET",
|
||||
headers: { "User-Agent": "TIP-Scraper/1.0.0" },
|
||||
timeout: 5_000,
|
||||
},
|
||||
(res) => {
|
||||
const chunks: Buffer[] = [];
|
||||
res.on("data", (c: Buffer) => chunks.push(c));
|
||||
res.on("end", () => {
|
||||
try {
|
||||
resolve(JSON.parse(Buffer.concat(chunks).toString()));
|
||||
} catch {
|
||||
reject(new Error("Invalid JSON response"));
|
||||
}
|
||||
});
|
||||
}
|
||||
);
|
||||
|
||||
req.on("timeout", () => { req.destroy(); reject(new Error("timeout")); });
|
||||
req.on("error", reject);
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next available proxy URL from the TIP proxy pool.
|
||||
* Format: "socks5://host:port" or "http://host:port"
|
||||
* Returns undefined if no proxy is available (direct connection).
|
||||
*/
|
||||
export async function getProxyUrl(): Promise<string | undefined> {
|
||||
// Try proxy pool first
|
||||
try {
|
||||
const data = await getJson(`${TIP_API}/api/proxy/next`) as ProxyResponse;
|
||||
if (data?.success && data.host && data.port) {
|
||||
const protocol = data.protocol ?? "socks5";
|
||||
const proxyUrl = `${protocol}://${data.host}:${data.port}`;
|
||||
console.log(`[proxy] Using TIP pool node: ${data.host}:${data.port} (${protocol})`);
|
||||
return proxyUrl;
|
||||
}
|
||||
} catch {
|
||||
// Pool unavailable — fall through to env fallback
|
||||
}
|
||||
|
||||
// Fallback to env var
|
||||
if (FALLBACK) {
|
||||
console.log(`[proxy] Using env PROXY_URL fallback`);
|
||||
return FALLBACK;
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user