Compare commits

..

No commits in common. "71d6c20b5e1f96071f5a114936e8e93b26a3ea50" and "b612c5801db210b18a27da75e3e4bf3b27f2dbd7" have entirely different histories.

19 changed files with 178 additions and 1918 deletions

View File

@ -18,10 +18,6 @@ Types: FEAT · FIX · UI · DATA · AI · INFRA
{"d":"2026-04-02","t":"FEAT","m":"Forecast Engine: weighted demand index (0-100) from 6 signal types — capex 0.30, ai_clusters 0.25, ebay_velocity 0.20, lead_times 0.15, github 0.06, standards 0.04 — 3/9/12/18 month horizons for 5 technologies"}
{"d":"2026-04-02","t":"FEAT","m":"NAS sync: datasheet/manual download — PDFs from product_documents organized into switches/transceivers/whitepapers/other"}
{"d":"2026-04-02","t":"INFRA","m":"Scheduler: 50 total pg-boss jobs — 8 new prediction/forecast jobs with cron schedules"}
{"d":"2026-04-03","t":"FEAT","m":"TIP Proxy Network: residential proxy pool — contributor nodes donate bandwidth, SOCKS5 server (Node.js net only), register/heartbeat/next/rotate/stats API, round-robin routing with uptime+latency scoring"}
{"d":"2026-04-03","t":"FEAT","m":"@tip/proxy-agent: standalone CLI package — tip-agent start/status/stop, configurable bandwidth cap, 30s heartbeat, graceful shutdown"}
{"d":"2026-04-03","t":"UI","m":"Dashboard Network tab: node stats, join-the-network card with token generator, install command box, country breakdown table"}
{"d":"2026-04-03","t":"INFRA","m":"Mac Studio home node: tip-agent running on 192.168.178.213:1081, PROXY_URL=socks5://192.168.178.213:1081 set in PM2 env for scraper+api, ProLabs WAF bypass now active"}
{"d":"2026-04-01","t":"FEAT","m":"Product Intelligence Layer (migration 020): product_issues table (forum/community bugs), condition+marketplace on price_observations, features JSONB on switches+transceivers"}
{"d":"2026-04-01","t":"FEAT","m":"eBay Enricher: scrapes eBay.de for switch/transceiver listings — extracts features, description, refurbished prices, images — nightly via pg-boss"}
{"d":"2026-04-01","t":"FEAT","m":"Community Issues Scraper: extracts known bugs/incompatibilities from Reddit, ServeTheHome, Arista Community, Cisco Community, NetworkEngineering SE"}

View File

@ -1,306 +0,0 @@
/**
* Proxy Network API routes
*
* POST /api/proxy/register register new node, returns token
* POST /api/proxy/heartbeat update node status + stats
* GET /api/proxy/nodes list all nodes (admin)
* GET /api/proxy/next get best available proxy for scraper use
* POST /api/proxy/rotate mark node as used, get next
* GET /api/proxy/stats network-wide stats
*
* Note: register + heartbeat are public (no auth required).
* nodes + rotate require admin token via X-Admin-Token header.
*/
import { Router, Request, Response } from "express";
import * as crypto from "crypto";
import { pool } from "../db/client";
export const proxyRouter = Router();
const ADMIN_TOKEN = process.env.TIP_ADMIN_TOKEN ?? "tip-admin-2026";
function requireAdmin(req: Request, res: Response): boolean {
const token =
req.headers["x-admin-token"] ??
req.query["admin_token"] ??
"";
if (token !== ADMIN_TOKEN) {
res.status(401).json({ error: "Admin token required" });
return false;
}
return true;
}
function generateToken(): string {
return crypto.randomBytes(32).toString("hex");
}
// Round-robin index tracker (in-memory, resets on restart)
let rrIndex = 0;
// POST /api/proxy/register
proxyRouter.post("/register", async (req: Request, res: Response) => {
try {
const { token: existingToken, name, owner_email, port, version } = req.body as {
token?: string;
name?: string;
owner_email?: string;
port?: number;
version?: string;
};
const ip =
(req.headers["x-forwarded-for"] as string | undefined)?.split(",")[0]?.trim() ??
req.socket.remoteAddress ??
"";
// If token provided, upsert — otherwise create new
const token = existingToken ?? generateToken();
const result = await pool.query(
`INSERT INTO proxy_nodes (token, name, owner_email, ip, port, version, status, last_seen)
VALUES ($1, $2, $3, $4, $5, $6, 'online', NOW())
ON CONFLICT (token) DO UPDATE SET
name = COALESCE(EXCLUDED.name, proxy_nodes.name),
ip = EXCLUDED.ip,
port = COALESCE(EXCLUDED.port, proxy_nodes.port),
version = COALESCE(EXCLUDED.version, proxy_nodes.version),
status = 'online',
last_seen = NOW()
RETURNING id, token, name, status`,
[token, name ?? null, owner_email ?? null, ip, port ?? 1080, version ?? null]
);
const node = result.rows[0];
res.json({
success: true,
token: node.token,
nodeId: node.id,
message: "Node registered. Keep this token safe!",
});
} catch (err) {
console.error("[proxy] register error:", err);
res.status(500).json({ error: "Registration failed" });
}
});
// POST /api/proxy/heartbeat
proxyRouter.post("/heartbeat", async (req: Request, res: Response) => {
try {
const {
token,
ip,
port,
bytesProxied,
requestsProxied,
latencyMs,
version,
} = req.body as {
token: string;
ip?: string;
port?: number;
bytesProxied?: number;
requestsProxied?: number;
latencyMs?: number;
version?: string;
};
if (!token) {
res.status(400).json({ error: "token required" });
return;
}
const remoteIp =
ip ??
(req.headers["x-forwarded-for"] as string | undefined)?.split(",")[0]?.trim() ??
req.socket.remoteAddress ??
"";
const bytesGb = (bytesProxied ?? 0) / (1024 ** 3);
const result = await pool.query(
`UPDATE proxy_nodes SET
status = 'online',
last_seen = NOW(),
ip = COALESCE($2, ip),
port = COALESCE($3, port),
bytes_proxied = COALESCE($4, bytes_proxied),
requests_proxied = COALESCE($5, requests_proxied),
latency_ms = COALESCE($6, latency_ms),
version = COALESCE($7, version),
bandwidth_used_gb = COALESCE($8, bandwidth_used_gb)
WHERE token = $1
RETURNING id, status`,
[
token,
remoteIp || null,
port ?? null,
bytesProxied ?? null,
requestsProxied ?? null,
latencyMs ?? null,
version ?? null,
bytesGb > 0 ? bytesGb : null,
]
);
if (result.rowCount === 0) {
res.status(404).json({ error: "Node not found. Please register first." });
return;
}
res.json({ success: true, status: "online" });
} catch (err) {
console.error("[proxy] heartbeat error:", err);
res.status(500).json({ error: "Heartbeat failed" });
}
});
// GET /api/proxy/nodes (admin)
proxyRouter.get("/nodes", async (req: Request, res: Response) => {
if (!requireAdmin(req, res)) return;
try {
const result = await pool.query(
`SELECT id, token, name, country_code, city, ip, port, protocol,
status, last_seen, bytes_proxied, requests_proxied,
uptime_pct, bandwidth_limit_gb, bandwidth_used_gb,
latency_ms, version, registered_at
FROM proxy_nodes
ORDER BY status = 'online' DESC, last_seen DESC`
);
res.json({ success: true, nodes: result.rows, total: result.rowCount });
} catch (err) {
console.error("[proxy] nodes error:", err);
res.status(500).json({ error: "Failed to fetch nodes" });
}
});
// GET /api/proxy/next — best available proxy for scraper use
proxyRouter.get("/next", async (_req: Request, res: Response) => {
try {
const result = await pool.query(
`SELECT id, ip, port, protocol, country_code, city, latency_ms
FROM proxy_nodes
WHERE status = 'online'
AND last_seen > NOW() - INTERVAL '2 minutes'
AND (bandwidth_used_gb < bandwidth_limit_gb OR bandwidth_limit_gb = 0)
ORDER BY uptime_pct DESC NULLS LAST, latency_ms ASC NULLS LAST
LIMIT 10`
);
if (result.rowCount === 0) {
res.status(503).json({
error: "No proxies available",
fallback: process.env.PROXY_URL ?? null,
});
return;
}
const nodes = result.rows;
const node = nodes[rrIndex % nodes.length];
rrIndex = (rrIndex + 1) % nodes.length;
res.json({
success: true,
host: node.ip,
port: node.port,
protocol: node.protocol ?? "socks5",
nodeId: node.id,
country: node.country_code,
city: node.city,
latencyMs: node.latency_ms,
});
} catch (err) {
console.error("[proxy] next error:", err);
res.status(500).json({ error: "Failed to get next proxy" });
}
});
// POST /api/proxy/rotate — mark node as used, get next one
proxyRouter.post("/rotate", async (req: Request, res: Response) => {
if (!requireAdmin(req, res)) return;
try {
const { nodeId } = req.body as { nodeId?: string };
if (nodeId) {
// Optionally mark as temporarily degraded (just log — no ban)
await pool.query(
`UPDATE proxy_nodes SET requests_proxied = requests_proxied + 1 WHERE id = $1`,
[nodeId]
);
}
// Return next proxy
const result = await pool.query(
`SELECT id, ip, port, protocol, country_code
FROM proxy_nodes
WHERE status = 'online'
AND last_seen > NOW() - INTERVAL '2 minutes'
AND id != COALESCE($1::uuid, '00000000-0000-0000-0000-000000000000')
ORDER BY requests_proxied ASC, latency_ms ASC NULLS LAST
LIMIT 1`,
[nodeId ?? null]
);
if (result.rowCount === 0) {
res.status(503).json({ error: "No other proxies available" });
return;
}
const node = result.rows[0];
res.json({
success: true,
host: node.ip,
port: node.port,
protocol: node.protocol ?? "socks5",
nodeId: node.id,
country: node.country_code,
});
} catch (err) {
console.error("[proxy] rotate error:", err);
res.status(500).json({ error: "Failed to rotate proxy" });
}
});
// GET /api/proxy/stats — public network stats
proxyRouter.get("/stats", async (_req: Request, res: Response) => {
try {
const result = await pool.query(`
SELECT
COUNT(*) AS total_nodes,
COUNT(*) FILTER (WHERE status = 'online') AS online_nodes,
COUNT(*) FILTER (WHERE status = 'offline') AS offline_nodes,
COUNT(DISTINCT country_code) AS countries,
COALESCE(SUM(bandwidth_used_gb), 0) AS total_gb_proxied,
COALESCE(SUM(requests_proxied), 0) AS total_requests,
COALESCE(AVG(latency_ms) FILTER (WHERE status = 'online'), 0) AS avg_latency_ms
FROM proxy_nodes
`);
const row = result.rows[0] ?? {};
// Per-country breakdown
const countryResult = await pool.query(`
SELECT country_code, COUNT(*) AS nodes,
COUNT(*) FILTER (WHERE status = 'online') AS online
FROM proxy_nodes
WHERE country_code IS NOT NULL
GROUP BY country_code
ORDER BY online DESC, nodes DESC
`);
res.json({
success: true,
network: {
totalNodes: Number(row.total_nodes ?? 0),
onlineNodes: Number(row.online_nodes ?? 0),
offlineNodes: Number(row.offline_nodes ?? 0),
countries: Number(row.countries ?? 0),
totalGbProxied: parseFloat(Number(row.total_gb_proxied ?? 0).toFixed(2)),
totalRequests: Number(row.total_requests ?? 0),
avgLatencyMs: Math.round(Number(row.avg_latency_ms ?? 0)),
},
countries: countryResult.rows,
});
} catch (err) {
console.error("[proxy] stats error:", err);
res.status(500).json({ error: "Failed to get stats" });
}
});

View File

@ -1,50 +0,0 @@
# @tip/proxy-agent
Contribute bandwidth to the TIP (Transceiver Intelligence Platform) proxy network
and get free API access in return.
## How it works
- You run the agent on a residential or home server machine
- The agent starts a SOCKS5 proxy server on your machine
- TIP scrapers route through your IP to bypass datacenter IP blocks (e.g. CloudFront WAF)
- You get: free TIP API access, contributor badge, early access to new features
## Install & Start
```bash
# Generate a token first on https://transceiver-db.context-x.org/dashboard (Network tab)
# Then start the agent:
npx @tip/proxy-agent start --token YOUR_TOKEN
```
## Options
```
tip-agent start --token TOKEN [--port 1080] [--max-gb 10] [--name "My Node"]
tip-agent status
tip-agent stop
```
| Option | Default | Description |
|------------|---------------------------------------------|--------------------------|
| `--token` | (required) | Your TIP node token |
| `--port` | 1080 | SOCKS5 listen port |
| `--max-gb` | 10 | Max bandwidth to donate |
| `--name` | hostname | Node display name |
| `--server` | https://transceiver-db.context-x.org | TIP API server |
## Protocol
- SOCKS5 (RFC 1928)
- No-auth method
- TCP CONNECT only
- 30s connection timeout
- Heartbeat every 30s to routing server
## Privacy
- Only outbound TCP CONNECT requests are proxied (no BIND, no UDP)
- Bandwidth is capped at your configured limit
- No logs of proxied content are stored
- Your IP is visible to the target website (residential proxy model)

View File

@ -1,18 +0,0 @@
{
"name": "@tip/proxy-agent",
"version": "1.0.0",
"description": "TIP Network Proxy Agent — contribute bandwidth, get free API access",
"bin": { "tip-agent": "./dist/index.js" },
"main": "./dist/index.js",
"scripts": {
"build": "tsc",
"start": "node dist/index.js"
},
"dependencies": {
"dotenv": "^16.0.0"
},
"devDependencies": {
"typescript": "^5.0.0",
"@types/node": "^20.0.0"
}
}

View File

@ -1,174 +0,0 @@
/**
* Core agent logic registers with TIP routing server,
* starts SOCKS5 server, manages heartbeat and graceful shutdown.
*/
import * as https from "https";
import * as http from "http";
import * as os from "os";
import { createSocks5Server } from "./socks5";
import { BandwidthTracker } from "./bandwidth";
import { startHeartbeatLoop } from "./heartbeat";
import { saveConfig, savePid, clearPid, AgentConfig } from "./config";
const AGENT_VERSION = "1.0.0";
interface RegisterResponse {
token: string;
nodeId: string;
message?: string;
}
function postJson(url: string, body: unknown): Promise<{ status: number; data: unknown }> {
return new Promise((resolve, reject) => {
const json = JSON.stringify(body);
const parsed = new URL(url);
const mod = parsed.protocol === "https:" ? https : http;
const req = mod.request(
{
hostname: parsed.hostname,
port: parsed.port || (parsed.protocol === "https:" ? 443 : 80),
path: parsed.pathname + parsed.search,
method: "POST",
headers: {
"Content-Type": "application/json",
"Content-Length": Buffer.byteLength(json),
"User-Agent": `TIP-Agent/${AGENT_VERSION}`,
},
timeout: 15_000,
},
(res) => {
const chunks: Buffer[] = [];
res.on("data", (c: Buffer) => chunks.push(c));
res.on("end", () => {
try {
const data = JSON.parse(Buffer.concat(chunks).toString());
resolve({ status: res.statusCode ?? 0, data });
} catch {
resolve({ status: res.statusCode ?? 0, data: {} });
}
});
}
);
req.on("timeout", () => { req.destroy(); reject(new Error("timeout")); });
req.on("error", reject);
req.write(json);
req.end();
});
}
async function registerNode(cfg: AgentConfig): Promise<RegisterResponse> {
const url = `${cfg.serverUrl}/api/proxy/register`;
console.log(`[agent] Registering with ${url}...`);
const { status, data } = await postJson(url, {
token: cfg.token,
name: cfg.name,
port: cfg.port,
version: AGENT_VERSION,
});
if (status >= 200 && status < 300) {
const resp = data as RegisterResponse;
console.log(`[agent] Registered successfully. Node: ${resp.nodeId ?? "unknown"}`);
return resp;
}
throw new Error(`Registration failed: HTTP ${status}`);
}
export async function startAgent(cfg: AgentConfig): Promise<void> {
const bandwidth = new BandwidthTracker(cfg.maxGb);
// Register with server
await registerNode(cfg).catch((err) => {
console.warn(`[agent] Registration warning: ${err.message} (continuing anyway)`);
});
// Start SOCKS5 server
const server = createSocks5Server({
port: cfg.port,
bandwidth,
onNewConnection: () => {
if (bandwidth.isLimitReached()) {
console.warn("[agent] Bandwidth limit reached — rejecting new connections");
return false;
}
return true;
},
});
await new Promise<void>((resolve, reject) => {
server.listen(cfg.port, () => {
console.log(`[agent] SOCKS5 proxy listening on port ${cfg.port}`);
resolve();
});
server.on("error", reject);
});
// Save PID
savePid(process.pid);
saveConfig({ ...cfg, pid: process.pid, startedAt: new Date().toISOString() });
// Start heartbeat
const heartbeatTimer = startHeartbeatLoop(
cfg.serverUrl,
() => {
const stats = bandwidth.getStats();
return {
token: cfg.token,
port: cfg.port,
bytesProxied: stats.bytesProxied,
requestsProxied: stats.requestsProxied,
version: AGENT_VERSION,
};
},
(result) => {
if (!result.ok) {
console.warn(`[agent] Heartbeat failed: ${result.error ?? "unknown error"}`);
}
}
);
console.log(`[agent] TIP Agent running. Max bandwidth: ${cfg.maxGb} GB`);
console.log(`[agent] Press Ctrl+C to stop.\n`);
// Print stats every minute
const statsTimer = setInterval(() => {
const stats = bandwidth.getStats();
const usedGb = (stats.bytesProxied / (1024 ** 3)).toFixed(3);
console.log(
`[agent] Stats: ${stats.requestsProxied} requests, ${usedGb} GB used / ${cfg.maxGb} GB limit`
);
}, 60_000);
// Graceful shutdown
const shutdown = (signal: string) => {
console.log(`\n[agent] Received ${signal}, shutting down...`);
clearInterval(heartbeatTimer);
clearInterval(statsTimer);
clearPid();
server.close(() => {
console.log("[agent] SOCKS5 server closed. Goodbye.");
process.exit(0);
});
setTimeout(() => process.exit(1), 5000);
};
process.on("SIGINT", () => shutdown("SIGINT"));
process.on("SIGTERM", () => shutdown("SIGTERM"));
}
export function getLocalIp(): string {
const interfaces = os.networkInterfaces();
for (const iface of Object.values(interfaces)) {
if (!iface) continue;
for (const addr of iface) {
if (addr.family === "IPv4" && !addr.internal) {
return addr.address;
}
}
}
return "127.0.0.1";
}

View File

@ -1,50 +0,0 @@
/**
* Bandwidth tracker monitors usage and enforces limits
*/
export interface BandwidthStats {
bytesProxied: number;
requestsProxied: number;
bytesLimitHard: number;
limitReached: boolean;
}
export class BandwidthTracker {
private bytesProxied = 0;
private requestsProxied = 0;
private readonly bytesLimit: number;
constructor(maxGb: number) {
this.bytesLimit = maxGb * 1024 * 1024 * 1024;
}
addBytes(n: number): void {
this.bytesProxied += n;
}
incrementRequests(): void {
this.requestsProxied++;
}
isLimitReached(): boolean {
return this.bytesProxied >= this.bytesLimit;
}
getStats(): BandwidthStats {
return {
bytesProxied: this.bytesProxied,
requestsProxied: this.requestsProxied,
bytesLimitHard: this.bytesLimit,
limitReached: this.isLimitReached(),
};
}
getUsedGb(): number {
return this.bytesProxied / (1024 * 1024 * 1024);
}
reset(): void {
this.bytesProxied = 0;
this.requestsProxied = 0;
}
}

View File

@ -1,76 +0,0 @@
/**
* Config management persists agent config to ~/.tip-agent/config.json
*/
import * as fs from "fs";
import * as path from "path";
import * as os from "os";
const CONFIG_DIR = path.join(os.homedir(), ".tip-agent");
const CONFIG_FILE = path.join(CONFIG_DIR, "config.json");
export interface AgentConfig {
token: string;
port: number;
maxGb: number;
name: string;
serverUrl: string;
pid?: number;
startedAt?: string;
}
const DEFAULTS: Partial<AgentConfig> = {
port: 1080,
maxGb: 10,
name: os.hostname(),
serverUrl: "https://transceiver-db.context-x.org",
};
export function loadConfig(): AgentConfig | null {
if (!fs.existsSync(CONFIG_FILE)) return null;
try {
const raw = fs.readFileSync(CONFIG_FILE, "utf8");
return JSON.parse(raw) as AgentConfig;
} catch {
return null;
}
}
export function saveConfig(cfg: Partial<AgentConfig>): AgentConfig {
if (!fs.existsSync(CONFIG_DIR)) {
fs.mkdirSync(CONFIG_DIR, { recursive: true });
}
const existing = loadConfig() ?? {};
const merged = { ...DEFAULTS, ...existing, ...cfg } as AgentConfig;
fs.writeFileSync(CONFIG_FILE, JSON.stringify(merged, null, 2), "utf8");
return merged;
}
export function clearConfig(): void {
if (fs.existsSync(CONFIG_FILE)) {
fs.unlinkSync(CONFIG_FILE);
}
}
export function getPidFile(): string {
return path.join(CONFIG_DIR, "agent.pid");
}
export function savePid(pid: number): void {
if (!fs.existsSync(CONFIG_DIR)) {
fs.mkdirSync(CONFIG_DIR, { recursive: true });
}
fs.writeFileSync(getPidFile(), String(pid), "utf8");
}
export function loadPid(): number | null {
const pidFile = getPidFile();
if (!fs.existsSync(pidFile)) return null;
const raw = fs.readFileSync(pidFile, "utf8").trim();
const pid = parseInt(raw, 10);
return isNaN(pid) ? null : pid;
}
export function clearPid(): void {
const pidFile = getPidFile();
if (fs.existsSync(pidFile)) fs.unlinkSync(pidFile);
}

View File

@ -1,82 +0,0 @@
/**
* Heartbeat sends periodic status updates to TIP routing server.
* POST /api/proxy/heartbeat every 30s.
*/
import * as https from "https";
import * as http from "http";
export interface HeartbeatPayload {
token: string;
ip?: string;
port: number;
bytesProxied: number;
requestsProxied: number;
latencyMs?: number;
version: string;
}
export interface HeartbeatResult {
ok: boolean;
latencyMs: number;
error?: string;
}
function postJson(url: string, body: unknown): Promise<{ status: number; latencyMs: number }> {
return new Promise((resolve, reject) => {
const start = Date.now();
const json = JSON.stringify(body);
const parsed = new URL(url);
const mod = parsed.protocol === "https:" ? https : http;
const req = mod.request(
{
hostname: parsed.hostname,
port: parsed.port || (parsed.protocol === "https:" ? 443 : 80),
path: parsed.pathname + parsed.search,
method: "POST",
headers: {
"Content-Type": "application/json",
"Content-Length": Buffer.byteLength(json),
"User-Agent": "TIP-Agent/1.0.0",
},
timeout: 10_000,
},
(res) => {
res.resume();
resolve({ status: res.statusCode ?? 0, latencyMs: Date.now() - start });
}
);
req.on("timeout", () => { req.destroy(); reject(new Error("timeout")); });
req.on("error", reject);
req.write(json);
req.end();
});
}
export async function sendHeartbeat(
serverUrl: string,
payload: HeartbeatPayload
): Promise<HeartbeatResult> {
const url = `${serverUrl}/api/proxy/heartbeat`;
try {
const { status, latencyMs } = await postJson(url, payload);
return { ok: status >= 200 && status < 300, latencyMs };
} catch (err) {
return { ok: false, latencyMs: 0, error: String(err) };
}
}
export function startHeartbeatLoop(
serverUrl: string,
getPayload: () => HeartbeatPayload,
onResult?: (r: HeartbeatResult) => void,
intervalMs = 30_000
): NodeJS.Timeout {
const tick = async () => {
const result = await sendHeartbeat(serverUrl, getPayload());
onResult?.(result);
};
tick();
return setInterval(tick, intervalMs);
}

View File

@ -1,167 +0,0 @@
#!/usr/bin/env node
/**
* TIP Proxy Agent CLI
*
* Usage:
* tip-agent start --token TOKEN [--port 1080] [--max-gb 10] [--name "My Node"]
* tip-agent status
* tip-agent stop
*/
import { startAgent } from "./agent";
import { loadConfig, saveConfig, loadPid, clearPid } from "./config";
const AGENT_VERSION = "1.0.0";
function parseArgs(argv: string[]): Record<string, string> {
const args: Record<string, string> = {};
for (let i = 0; i < argv.length; i++) {
const arg = argv[i];
if (arg && arg.startsWith("--")) {
const key = arg.slice(2);
const next = argv[i + 1];
if (next && !next.startsWith("--")) {
args[key] = next;
i++;
} else {
args[key] = "true";
}
}
}
return args;
}
function isProcessRunning(pid: number): boolean {
try {
process.kill(pid, 0);
return true;
} catch {
return false;
}
}
function cmdStart(rawArgs: string[]): void {
const args = parseArgs(rawArgs);
const token = args["token"];
if (!token) {
console.error("Error: --token is required");
console.error(" tip-agent start --token YOUR_TOKEN [--port 1080] [--max-gb 10] [--name 'My Node']");
process.exit(1);
}
const cfg = saveConfig({
token,
port: args["port"] ? parseInt(args["port"], 10) : 1080,
maxGb: args["max-gb"] ? parseFloat(args["max-gb"]) : 10,
name: args["name"] ?? undefined,
serverUrl: args["server"] ?? "https://transceiver-db.context-x.org",
});
console.log(`\nTIP Proxy Agent v${AGENT_VERSION}`);
console.log("==========================================");
console.log(` Token: ${cfg.token.slice(0, 8)}...`);
console.log(` Port: ${cfg.port}`);
console.log(` Max GB: ${cfg.maxGb}`);
console.log(` Name: ${cfg.name}`);
console.log(` Server: ${cfg.serverUrl}`);
console.log("==========================================\n");
startAgent(cfg).catch((err) => {
console.error("[agent] Fatal error:", err.message);
process.exit(1);
});
}
function cmdStatus(): void {
const cfg = loadConfig();
const pid = loadPid();
if (!cfg || !pid) {
console.log("TIP Agent: not running");
process.exit(0);
}
const running = isProcessRunning(pid);
console.log(`TIP Agent: ${running ? "RUNNING" : "STOPPED (stale PID)"}`);
console.log(` PID: ${pid}`);
console.log(` Port: ${cfg.port}`);
console.log(` Token: ${cfg.token.slice(0, 8)}...`);
console.log(` Max GB: ${cfg.maxGb}`);
if (cfg.startedAt) {
console.log(` Started: ${cfg.startedAt}`);
}
if (!running) {
clearPid();
}
}
function cmdStop(): void {
const pid = loadPid();
if (!pid) {
console.log("TIP Agent: not running");
process.exit(0);
}
try {
process.kill(pid, "SIGTERM");
console.log(`TIP Agent (PID ${pid}) stopped.`);
clearPid();
} catch {
console.log(`TIP Agent: process ${pid} not found (already stopped)`);
clearPid();
}
}
function printHelp(): void {
console.log(`
TIP Proxy Agent v${AGENT_VERSION}
Contribute bandwidth to the TIP network, get free API access.
Usage:
tip-agent start --token TOKEN [options]
tip-agent status
tip-agent stop
Commands:
start Start the proxy agent
status Show agent status
stop Stop the agent
Options:
--token TOKEN Your TIP node token (required for start)
--port N SOCKS5 listen port (default: 1080)
--max-gb N Max bandwidth to donate in GB (default: 10)
--name NAME Node name (default: hostname)
--server URL TIP server URL (default: https://transceiver-db.context-x.org)
Example:
tip-agent start --token abc123 --port 1080 --max-gb 10 --name "Home Node"
`);
}
const [, , command, ...rest] = process.argv;
switch (command) {
case "start":
cmdStart(rest);
break;
case "status":
cmdStatus();
break;
case "stop":
cmdStop();
break;
case "--help":
case "-h":
case "help":
printHelp();
break;
default:
if (command) {
console.error(`Unknown command: ${command}`);
}
printHelp();
process.exit(command ? 1 : 0);
}

View File

@ -1,181 +0,0 @@
/**
* SOCKS5 proxy server built on Node.js net module, no external deps.
*
* Implements:
* - SOCKS5 handshake (RFC 1928)
* - No-auth method (0x00)
* - CONNECT command (TCP tunnel)
* - Byte tracking per connection
*/
import * as net from "net";
import { BandwidthTracker } from "./bandwidth";
const SOCKS_VER = 0x05;
const NO_AUTH = 0x00;
const CMD_CONNECT = 0x01;
const ATYP_IPV4 = 0x01;
const ATYP_DOMAIN = 0x03;
const ATYP_IPV6 = 0x04;
const REP_SUCCESS = 0x00;
const REP_FAIL = 0x01;
const CONN_TIMEOUT_MS = 30_000;
export interface Socks5ServerOptions {
port: number;
host?: string;
onNewConnection?: () => boolean; // return false to reject (bandwidth limit)
bandwidth?: BandwidthTracker;
}
function buildReply(rep: number, ip = "0.0.0.0", port = 0): Buffer {
const parts = ip.split(".").map(Number);
const buf = Buffer.alloc(10);
buf[0] = SOCKS_VER;
buf[1] = rep;
buf[2] = 0x00; // reserved
buf[3] = ATYP_IPV4;
buf[4] = parts[0] ?? 0;
buf[5] = parts[1] ?? 0;
buf[6] = parts[2] ?? 0;
buf[7] = parts[3] ?? 0;
buf.writeUInt16BE(port, 8);
return buf;
}
function handleClient(
socket: net.Socket,
opts: Socks5ServerOptions
): void {
socket.setTimeout(CONN_TIMEOUT_MS);
socket.on("timeout", () => socket.destroy());
socket.on("error", () => socket.destroy());
let state: "greeting" | "request" | "connected" = "greeting";
let buf = Buffer.alloc(0);
socket.on("data", (chunk) => {
buf = Buffer.concat([buf, chunk]);
if (state === "greeting") {
// Need at least: VER(1) + NMETHODS(1) + methods
if (buf.length < 2) return;
const nmethods = buf[1] ?? 0;
if (buf.length < 2 + nmethods) return;
const methods = buf.slice(2, 2 + nmethods);
buf = buf.slice(2 + nmethods);
if (!methods.includes(NO_AUTH)) {
socket.write(Buffer.from([SOCKS_VER, 0xFF])); // no acceptable method
socket.destroy();
return;
}
socket.write(Buffer.from([SOCKS_VER, NO_AUTH]));
state = "request";
return;
}
if (state === "request") {
// Minimum: VER(1) CMD(1) RSV(1) ATYP(1) + address + port(2)
if (buf.length < 4) return;
const cmd = buf[1];
const atyp = buf[3];
let host: string;
let port: number;
let consumed: number;
if (atyp === ATYP_IPV4) {
if (buf.length < 10) return;
host = `${buf[4]}.${buf[5]}.${buf[6]}.${buf[7]}`;
port = buf.readUInt16BE(8);
consumed = 10;
} else if (atyp === ATYP_DOMAIN) {
if (buf.length < 5) return;
const domainLen = buf[4] ?? 0;
if (buf.length < 5 + domainLen + 2) return;
host = buf.slice(5, 5 + domainLen).toString("utf8");
port = buf.readUInt16BE(5 + domainLen);
consumed = 5 + domainLen + 2;
} else if (atyp === ATYP_IPV6) {
if (buf.length < 22) return;
const parts: string[] = [];
for (let i = 0; i < 8; i++) {
parts.push(buf.readUInt16BE(4 + i * 2).toString(16));
}
host = parts.join(":");
port = buf.readUInt16BE(20);
consumed = 22;
} else {
socket.write(buildReply(0x08)); // address type not supported
socket.destroy();
return;
}
buf = buf.slice(consumed);
if (cmd !== CMD_CONNECT) {
socket.write(buildReply(0x07)); // command not supported
socket.destroy();
return;
}
// Check bandwidth limit before accepting
if (opts.onNewConnection && !opts.onNewConnection()) {
socket.write(buildReply(REP_FAIL));
socket.destroy();
return;
}
opts.bandwidth?.incrementRequests();
const target = net.createConnection({ host, port }, () => {
socket.write(buildReply(REP_SUCCESS));
state = "connected";
// Pipe remaining buffered bytes
if (buf.length > 0) {
target.write(buf);
buf = Buffer.alloc(0);
}
// Bidirectional pipe with byte counting
socket.on("data", (d) => {
opts.bandwidth?.addBytes(d.length);
if (!target.destroyed) target.write(d);
});
target.on("data", (d) => {
opts.bandwidth?.addBytes(d.length);
if (!socket.destroyed) socket.write(d);
});
socket.on("end", () => { if (!target.destroyed) target.end(); });
target.on("end", () => { if (!socket.destroyed) socket.end(); });
});
target.setTimeout(CONN_TIMEOUT_MS);
target.on("timeout", () => { target.destroy(); socket.destroy(); });
target.on("error", () => {
if (!socket.destroyed) {
socket.write(buildReply(REP_FAIL));
socket.destroy();
}
});
}
});
}
export function createSocks5Server(opts: Socks5ServerOptions): net.Server {
const server = net.createServer((socket) => {
handleClient(socket, opts);
});
server.on("error", (err) => {
console.error("[SOCKS5] Server error:", err.message);
});
return server;
}

View File

@ -1,18 +0,0 @@
{
"compilerOptions": {
"target": "ES2020",
"module": "commonjs",
"lib": ["ES2020"],
"outDir": "./dist",
"rootDir": "./src",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"resolveJsonModule": true,
"declaration": true,
"declarationMap": true,
"sourceMap": true
},
"include": ["src/**/*"],
"exclude": ["node_modules", "dist"]
}

View File

@ -51,9 +51,7 @@ export async function createScheduler(): Promise<PgBoss> {
retryDelay: 30,
retryBackoff: true,
expireInSeconds: 300,
monitorStateIntervalSeconds: 60,
max: 4, // pg-boss internal connection pool
poolSize: 4, // alias used by some pg-boss versions
monitorStateIntervalSeconds: 30,
});
boss.on("error", (error) => console.error("pg-boss error:", error));

View File

@ -147,7 +147,6 @@ interface Product {
reachMeters?: number;
fiberType?: string;
wavelength?: string;
imageUrl?: string;
}
function sleep(ms: number): Promise<void> {
@ -433,7 +432,6 @@ export async function scrapeFlexoptixCatalog(): Promise<void> {
name
sku
url_key
small_image { url }
price_range {
minimum_price {
final_price { value currency }
@ -459,7 +457,6 @@ export async function scrapeFlexoptixCatalog(): Promise<void> {
name: string;
sku: string;
url_key: string;
small_image?: { url?: string };
price_range?: {
minimum_price?: {
final_price?: { value: number; currency: string };
@ -491,9 +488,6 @@ export async function scrapeFlexoptixCatalog(): Promise<void> {
const reach = detectReach(item.name);
const price = item.price_range?.minimum_price?.final_price?.value;
const rawImg = item.small_image?.url;
const imageUrl = rawImg && !rawImg.includes("placeholder") ? rawImg : undefined;
allProducts.set(url, {
name: item.name,
partNumber: item.sku,
@ -507,7 +501,6 @@ export async function scrapeFlexoptixCatalog(): Promise<void> {
reachMeters: reach?.meters,
fiberType: detectFiber(item.name),
wavelength: detectWavelength(item.name),
imageUrl,
});
newCount++;
}
@ -545,7 +538,6 @@ export async function scrapeFlexoptixCatalog(): Promise<void> {
fiberType: product.fiberType,
wavelengths: product.wavelength,
category: "DataCenter",
imageUrl: product.imageUrl,
});
if (product.price && product.price > 0) {

View File

@ -99,12 +99,9 @@ function parseProductList(html: string): Product[] {
const name = match[2].trim();
if (name.length < 10 || name.length > 200) continue;
// Look for price in surrounding context — BigCommerce uses data-product-price-without-tax
const context = html.slice(Math.max(0, match.index - 300), match.index + 1200);
const priceMatch = context.match(/data-product-price-without-tax[^>]*>\s*\$([\d,]+\.?\d{0,2})/)
|| context.match(/class="price price--withoutTax">\s*\$([\d,]+\.?\d{0,2})/)
|| context.match(/\$\s*([\d,]+\.?\d{0,2})/);
// Look for price in surrounding context
const context = html.slice(Math.max(0, match.index - 300), match.index + 600);
const priceMatch = context.match(/\$\s*([\d,]+\.?\d{0,2})/) || context.match(/data-product-price="([\d.]+)"/);
const price = priceMatch ? parseFloat(priceMatch[1].replace(",", "")) : undefined;
const ff = detectFormFactor(name);

View File

@ -47,7 +47,6 @@ interface OptcoreProduct {
speedGbps?: number;
speed?: string;
reachLabel?: string;
imageUrl?: string;
}
function detectFormFactor(text: string): string | undefined {
@ -209,13 +208,7 @@ export async function scrapeOptcore(): Promise<void> {
const stockEl = document.querySelector(".stock, .availability, [class*=\"stock\"]");
const stockText = stockEl?.textContent?.trim() || "";
// Product image — WooCommerce product gallery
const imgEl = document.querySelector(
".woocommerce-product-gallery__image img, .wp-post-image, img.attachment-woocommerce_single"
) as HTMLImageElement | null;
const imageUrl = imgEl?.src || imgEl?.getAttribute("data-src") || "";
return { title, priceText, stockText, imageUrl };
return { title, priceText, stockText };
});
const meta = metaByUrl.get(url);
@ -236,7 +229,6 @@ export async function scrapeOptcore(): Promise<void> {
speedGbps: speedInfo?.speedGbps,
speed: speedInfo?.speed,
reachLabel: detectReach(name),
imageUrl: data.imageUrl || undefined,
});
}
@ -270,7 +262,6 @@ export async function scrapeOptcore(): Promise<void> {
speed: p.speed,
reachLabel: p.reachLabel,
category: "DataCenter",
imageUrl: p.imageUrl,
});
const hash = contentHash({ price: p.price, stock: p.stockLevel });

View File

@ -1,55 +1,163 @@
/**
* SmartOptics Scraper Premium coherent/DWDM transceiver manufacturer
* SmartOptics Scraper Manufacturer, quote-based pricing
*
* smartoptics.com WordPress site, no prices (B2B, RFQ model).
* Scrapes product catalog for specs, images, datasheets.
* Products listed at /products/optical-transceivers/ individual /product/SKU/ pages.
* www.smartoptics.com no direct prices, catalog-only scrape.
* Extracts product names, part numbers, form factors and specs.
*
* Rate limited: 1 req/2sec.
*/
import { pool, findOrCreateScrapedTransceiver, ensureVendor } from "../utils/db";
import * as cheerio from "cheerio";
const BASE = "https://smartoptics.com";
const CATALOG_URL = `${BASE}/products/optical-transceivers/`;
const BASE = "https://www.smartoptics.com";
const CATALOG_URLS = [
"/products/transceivers/",
"/products/",
"/products/sfp-transceivers/",
"/products/qsfp-transceivers/",
];
const HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
Accept: "text/html,application/xhtml+xml",
"Accept-Language": "en-US,en;q=0.9",
};
interface Product {
partNumber: string;
name: string;
url: string;
formFactor: string;
speed: string;
speedGbps: number;
reachLabel?: string;
reachMeters?: number;
fiberType?: string;
wavelength?: string;
}
function sleep(ms: number): Promise<void> {
return new Promise((r) => setTimeout(r, ms));
return new Promise((resolve) => setTimeout(resolve, ms));
}
function detectFormFactor(text: string): { formFactor: string; speed: string; speedGbps: number } {
const t = text.toLowerCase();
if (t.includes("qsfp-dd800") || t.includes("sfp-dd800") || t.includes("800ge")) return { formFactor: "QSFP-DD", speed: "800G", speedGbps: 800 };
if (t.includes("qsfp-dd") || (t.includes("400g") && t.includes("qsfp"))) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
if (t.includes("qsfp112")) return { formFactor: "QSFP112", speed: "400G", speedGbps: 400 };
if (t.includes("qsfp56")) return { formFactor: "QSFP56", speed: "200G", speedGbps: 200 };
if (t.includes("qsfp28") || t.includes("100ge") || t.includes("100g")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
if (t.includes("sfp28") || t.includes("25g")) return { formFactor: "SFP28", speed: "25G", speedGbps: 25 };
if (t.includes("qsfp+") || t.includes("40g")) return { formFactor: "QSFP+", speed: "40G", speedGbps: 40 };
if (t.includes("sfp+") || t.includes("10g")) return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
if (t.includes("sfp")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
const lower = text.toLowerCase();
if (lower.includes("osfp") && !lower.includes("qsfp")) return { formFactor: "OSFP", speed: "400G", speedGbps: 400 };
if (lower.includes("qsfp-dd")) return { formFactor: "QSFP-DD", speed: "400G", speedGbps: 400 };
if (lower.includes("qsfp28")) return { formFactor: "QSFP28", speed: "100G", speedGbps: 100 };
if (lower.includes("qsfp+") || lower.includes("qsfp plus")) return { formFactor: "QSFP+", speed: "40G", speedGbps: 40 };
if (lower.includes("sfp56")) return { formFactor: "SFP56", speed: "50G", speedGbps: 50 };
if (lower.includes("sfp28") || lower.includes("25g")) return { formFactor: "SFP28", speed: "25G", speedGbps: 25 };
if (lower.includes("sfp+") || lower.includes("10gbase") || lower.includes("10g")) return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
if (lower.includes("xfp")) return { formFactor: "XFP", speed: "10G", speedGbps: 10 };
if (lower.includes("1000base") || lower.includes("1g")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
if (lower.includes("sfp") && !lower.includes("qsfp")) return { formFactor: "SFP", speed: "1G", speedGbps: 1 };
return { formFactor: "SFP+", speed: "10G", speedGbps: 10 };
}
function detectReach(text: string): { label: string; meters: number } | undefined {
const kmMatch = text.match(/(\d+)\s*km/i);
if (kmMatch) {
const km = parseInt(kmMatch[1]);
return { label: `${km}km`, meters: km * 1000 };
}
const mMatch = text.match(/(\d+)\s*m\b/i);
if (mMatch) {
const m = parseInt(mMatch[1]);
return { label: `${m}m`, meters: m };
const patterns: [RegExp, string, number][] = [
[/\b80\s*km\b/i, "80km", 80000],
[/\b40\s*km\b/i, "40km", 40000],
[/\b20\s*km\b/i, "20km", 20000],
[/\b10\s*km\b/i, "10km", 10000],
[/\b2\s*km\b/i, "2km", 2000],
[/\b550\s*m\b/i, "550m", 550],
[/\b300\s*m\b/i, "300m", 300],
[/\b100\s*m\b/i, "100m", 100],
[/\bLR4\b/, "10km", 10000], [/\bLR\b/, "10km", 10000],
[/\bER4?\b/, "40km", 40000], [/\bZR4?\b/, "80km", 80000],
[/\bSR4?\b/, "300m", 300], [/\bDR4?\b/, "500m", 500], [/\bFR4?\b/, "2km", 2000],
];
for (const [regex, label, meters] of patterns) {
if (regex.test(text)) return { label, meters };
}
return undefined;
}
function detectFiber(text: string): string {
if (/dwdm|cwdm|coherent|coh|single.?mode|smf/i.test(text)) return "SMF";
if (/multi.?mode|mmf|sr/i.test(text)) return "MMF";
return "SMF"; // SmartOptics is almost exclusively SMF/coherent
if (/single.?mode|smf|[^a-z]lx[^a-z]|[^a-z]lr[^a-z]|[^a-z]er[^a-z]|[^a-z]zr[^a-z]|bidi|cwdm|dwdm/i.test(text)) return "SMF";
if (/multi.?mode|mmf|[^a-z]sx[^a-z]|[^a-z]sr[^a-z]/i.test(text)) return "MMF";
if (/copper|dac|twinax|rj45|base-t/i.test(text)) return "Copper";
return "SMF";
}
function detectWavelength(text: string): string {
const match = text.match(/(\d{3,4})\s*nm/i);
return match ? match[1] : "";
}
function parseProductList(html: string, sourceUrl: string): Product[] {
const $ = cheerio.load(html);
const products: Product[] = [];
// Manufacturer site: product cards or table rows
const cardSelectors = [
".product-item", ".product-card", ".product", "tr",
"article", ".item", ".col-xs-12.col-sm-6", ".entry",
];
for (const sel of cardSelectors) {
if ($(sel).length >= 2) {
$(sel).each((_i, el) => {
const text = $(el).text().trim();
if (!/sfp|qsfp|xfp|transceiver|optic/i.test(text)) return;
const nameEl = $(el).find("h2, h3, h4, td, .title, .name, a").first();
const name = nameEl.text().trim() || text.slice(0, 100);
if (!name || name.length < 5) return;
const linkEl = $(el).find("a[href]").first();
const href = linkEl.attr("href") || sourceUrl;
const url = href.startsWith("http") ? href : BASE + href;
// Part number: look for alphanumeric patterns typical of transceiver SKUs
const partNumMatch = text.match(/\b([A-Z]{2,}[-_][A-Z0-9]{2,}[-_][A-Z0-9]+)\b/) ||
text.match(/\b(SO[-_][A-Z0-9]+)\b/i) ||
name.match(/[A-Z0-9][-A-Z0-9]{5,}/);
const partNumber = partNumMatch?.[0] || name.replace(/\s+/g, "-").slice(0, 60);
const ff = detectFormFactor(name + " " + text);
const reach = detectReach(name + " " + text);
products.push({
partNumber,
name,
url,
...ff,
reachLabel: reach?.label,
reachMeters: reach?.meters,
fiberType: detectFiber(name + " " + text),
wavelength: detectWavelength(name + " " + text),
});
});
if (products.length > 0) break;
}
}
// Fallback: all transceiver-relevant anchors
if (products.length === 0) {
$("a[href]").each((_i, el) => {
const name = $(el).text().trim();
const href = $(el).attr("href") || "";
if (name.length < 5 || name.length > 200 || !/sfp|qsfp|transceiver|optic/i.test(name)) return;
const url = href.startsWith("http") ? href : BASE + href;
const ff = detectFormFactor(name);
const reach = detectReach(name);
products.push({
partNumber: name.match(/[A-Z0-9][-A-Z0-9]{5,}/)?.[0] || name.replace(/\s+/g, "-").slice(0, 60),
name, url, ...ff,
reachLabel: reach?.label, reachMeters: reach?.meters,
fiberType: detectFiber(name), wavelength: detectWavelength(name),
});
});
}
const seen = new Set<string>();
return products.filter((p) => {
if (!p.url || seen.has(p.url)) return false;
seen.add(p.url);
return true;
});
}
async function fetchPage(url: string): Promise<string> {
@ -58,108 +166,46 @@ async function fetchPage(url: string): Promise<string> {
return resp.text();
}
function extractProductUrls(html: string): string[] {
const urls = new Set<string>();
const regex = /href="(https?:\/\/smartoptics\.com\/product\/[^"]+)"/gi;
let m: RegExpExecArray | null;
while ((m = regex.exec(html)) !== null) {
const u = m[1].replace(/\/$/, "") + "/";
urls.add(u);
}
return Array.from(urls);
}
interface ProductData {
sku: string;
name: string;
url: string;
imageUrl?: string;
formFactor: string;
speed: string;
speedGbps: number;
reachLabel?: string;
reachMeters?: number;
fiberType: string;
coherent: boolean;
wdmType?: string;
}
async function scrapeProductPage(url: string): Promise<ProductData | null> {
try {
const html = await fetchPage(url);
const nameMatch = html.match(/<h1[^>]*>([^<]+)<\/h1>/) || html.match(/og:title" content="([^"]+)"/);
const name = nameMatch ? nameMatch[1].trim().replace(/ \| Smartoptics$/, "") : "";
if (!name) return null;
const sku = url.split("/").filter(Boolean).pop()?.toUpperCase() || name.replace(/\s+/g, "-");
const imgMatch = html.match(/property="og:image" content="([^"]+)"/)
|| html.match(/<img[^>]+src="([^"]*wp-content\/uploads[^"]*\.(?:png|jpg|webp))"[^>]* class="[^"]*product/i);
const imageUrl = imgMatch ? imgMatch[1] : undefined;
const ff = detectFormFactor(name);
const reach = detectReach(name);
const coherent = /coherent|coh-t|coh\.|dwdm|dp-qpsk|qpsk|cfp2/i.test(name + html.slice(0, 3000));
const wdmType = /dwdm/i.test(name) ? "DWDM" : /cwdm/i.test(name) ? "CWDM" : undefined;
return {
sku, name, url, imageUrl,
...ff,
reachLabel: reach?.label,
reachMeters: reach?.meters,
fiberType: detectFiber(name),
coherent,
wdmType,
};
} catch (err) {
console.warn(` Failed ${url}: ${(err as Error).message}`);
return null;
}
}
export async function scrapeSmartOptics(): Promise<void> {
console.log("=== SmartOptics Scraper Starting ===\n");
console.log("Note: SmartOptics is B2B — no public prices. Scraping specs + images only.\n");
console.log("=== SmartOptics Scraper Starting (catalog-only, no prices) ===\n");
const vendorId = await ensureVendor(
"SmartOptics",
"manufacturer",
"https://www.smartoptics.com",
"https://smartoptics.com/products/optical-transceivers/"
BASE + CATALOG_URLS[0],
);
const productUrls = new Set<string>();
for (let page = 1; page <= 10; page++) {
const allProducts: Product[] = [];
const seenUrls = new Set<string>();
for (const catalogPath of CATALOG_URLS) {
const catalogUrl = BASE + catalogPath;
console.log(` Fetching catalog: ${catalogUrl}`);
try {
const url = page === 1 ? CATALOG_URL : `${CATALOG_URL}page/${page}/`;
const html = await fetchPage(url);
const urls = extractProductUrls(html);
if (urls.length === 0) break;
urls.forEach((u) => productUrls.add(u));
console.log(` Catalog page ${page}: ${urls.length} products`);
await sleep(1500);
} catch {
break;
const html = await fetchPage(catalogUrl);
const pageProducts = parseProductList(html, catalogUrl);
for (const p of pageProducts) {
if (!seenUrls.has(p.url)) {
seenUrls.add(p.url);
allProducts.push(p);
}
}
console.log(` Found ${pageProducts.length} products`);
await sleep(2000);
} catch (err) {
console.warn(` Failed ${catalogPath}: ${(err as Error).message}`);
}
}
console.log(`\nTotal product URLs: ${productUrls.size}`);
if (productUrls.size === 0) {
console.log("No products found — site may have changed structure");
return;
}
console.log(`\nTotal unique products: ${allProducts.length}`);
let saved = 0;
let withImages = 0;
for (const url of productUrls) {
const product = await scrapeProductPage(url);
if (!product) continue;
let totalProducts = 0;
for (const product of allProducts) {
try {
await findOrCreateScrapedTransceiver({
partNumber: product.sku,
partNumber: product.partNumber,
vendorId,
formFactor: product.formFactor,
speedGbps: product.speedGbps,
@ -167,20 +213,16 @@ export async function scrapeSmartOptics(): Promise<void> {
reachMeters: product.reachMeters,
reachLabel: product.reachLabel,
fiberType: product.fiberType,
wavelengths: product.wdmType ? "DWDM-tunable" : undefined,
category: product.coherent ? "Coherent" : "DataCenter",
imageUrl: product.imageUrl,
wavelengths: product.wavelength,
category: "DataCenter",
});
saved++;
if (product.imageUrl) withImages++;
console.log(`${product.sku}${product.name.slice(0, 60)}`);
totalProducts++;
} catch (err) {
console.warn(` Error saving ${product.sku}: ${(err as Error).message.slice(0, 80)}`);
console.warn(` Error saving ${product.partNumber}: ${(err as Error).message.slice(0, 80)}`);
}
await sleep(1500);
}
console.log(`\n=== SmartOptics Complete: ${saved} products, ${withImages} with images ===`);
console.log(`\n=== SmartOptics Complete: ${totalProducts} products catalogued (no prices — quote-based) ===`);
}
if (require.main === module) {

View File

@ -1,540 +0,0 @@
/**
* Image Backfill Script
*
* Fills `image_url` for transceivers that currently have none.
* Priority order:
* 1. Optcore (2,580 products) WP REST API featured_media per product
* 2. Flexoptix (344 products) Magento GraphQL image fields
* 3. GAO Tek (413 products) product page og:image scrape
* 4. Other vendors og:image from stored price_observations URLs
*
* Run on Erik:
* node packages/scraper/dist/utils/backfill-images.js
*
* Or locally with SSH port-forward:
* ssh -L 5433:127.0.0.1:5433 erik -N &
* node dist/utils/backfill-images.js
*/
import { pool } from "./db";
import { logger } from "./logger";
function sleep(ms: number): Promise<void> {
return new Promise((r) => setTimeout(r, ms));
}
async function updateImageUrl(id: string, imageUrl: string): Promise<void> {
await pool.query(
`UPDATE transceivers SET image_url = $1, image_scraped_at = NOW(), has_image = TRUE WHERE id = $2`,
[imageUrl, id]
);
}
async function fetchJson(url: string, init?: RequestInit): Promise<unknown> {
const resp = await fetch(url, {
...init,
headers: {
"User-Agent": "Mozilla/5.0 (compatible; TIP-ImageBackfill/1.0)",
Accept: "application/json",
...(init?.headers ?? {}),
},
signal: AbortSignal.timeout(20000),
});
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
return resp.json();
}
async function fetchHtml(url: string): Promise<string> {
const resp = await fetch(url, {
headers: {
"User-Agent": "Mozilla/5.0 (compatible; TIP-ImageBackfill/1.0)",
Accept: "text/html",
},
signal: AbortSignal.timeout(20000),
});
if (!resp.ok) throw new Error(`HTTP ${resp.status} for ${url}`);
return resp.text();
}
// =============================================================================
// Optcore
// Strategy:
// Step 1 — enumerate all products per category, collect slug -> featured_media id
// Step 2 — resolve unique media IDs individually via /wp-json/wp/v2/media/<id>
// Step 3 — update DB for matched slugs
//
// Note: _embed does NOT work for the "product" custom post type on Optcore.
// We must resolve media IDs separately.
// =============================================================================
const OPTCORE_BASE = "https://www.optcore.net";
const OPTCORE_CATEGORY_IDS = [
309, 173, 76, 79, 73, 311, 313, 312, 333, 1088,
59, 1102, 4097, 77, 4101, 4092, 6441,
];
interface OptcoreWpProductRaw {
slug: string;
featured_media: number;
}
interface OptcoreMediaRaw {
id: number;
source_url: string;
media_details?: {
sizes?: {
medium?: { source_url: string };
large?: { source_url: string };
};
};
}
async function fetchOptcoreMediaUrl(mediaId: number): Promise<string | null> {
if (mediaId === 0) return null;
try {
const data = await fetchJson(
`${OPTCORE_BASE}/wp-json/wp/v2/media/${mediaId}?_fields=id,source_url,media_details`
) as OptcoreMediaRaw;
return (
data.media_details?.sizes?.medium?.source_url ||
data.media_details?.sizes?.large?.source_url ||
data.source_url ||
null
);
} catch {
return null;
}
}
async function backfillOptcore(): Promise<{ updated: number; skipped: number; notFound: number }> {
logger.info("=== Optcore image backfill starting ===");
const rows = await pool.query(`
SELECT t.id, t.part_number
FROM transceivers t
JOIN vendors v ON t.vendor_id = v.id
WHERE v.name = 'Optcore' AND (t.image_url IS NULL OR t.image_url = '')
`);
// Map: part_number (lowercase) -> DB id
const byPartNumber = new Map<string, string>();
for (const row of rows.rows) {
byPartNumber.set((row.part_number as string).toLowerCase(), row.id as string);
}
logger.info(`Optcore: ${byPartNumber.size} products need images`);
if (byPartNumber.size === 0) {
return { updated: 0, skipped: 0, notFound: 0 };
}
// Step 1: enumerate all products across all transceiver categories
const slugToMediaId = new Map<string, number>();
const seenSlugs = new Set<string>();
for (const catId of OPTCORE_CATEGORY_IDS) {
let page = 1;
while (true) {
const apiUrl = `${OPTCORE_BASE}/wp-json/wp/v2/product?product_cat=${catId}&per_page=100&page=${page}&_fields=slug,featured_media`;
try {
const resp = await fetch(apiUrl, {
headers: { "User-Agent": "Mozilla/5.0 (compatible; TIP-ImageBackfill/1.0)" },
signal: AbortSignal.timeout(20000),
});
if (!resp.ok) break;
const totalPages = parseInt(resp.headers.get("X-WP-TotalPages") || "1", 10);
const products = (await resp.json()) as OptcoreWpProductRaw[];
for (const p of products) {
if (seenSlugs.has(p.slug)) continue;
seenSlugs.add(p.slug);
if (p.featured_media) {
slugToMediaId.set(p.slug.toLowerCase(), p.featured_media);
}
}
if (page >= totalPages) break;
page++;
} catch (err) {
logger.warn(`Optcore cat=${catId} page=${page} fetch failed`, { error: (err as Error).message });
break;
}
await sleep(100);
}
}
logger.info(`Optcore: enumerated ${seenSlugs.size} WP products, ${slugToMediaId.size} have featured media`);
// Step 2: resolve unique media IDs
const uniqueMediaIds = new Set(slugToMediaId.values());
logger.info(`Optcore: resolving ${uniqueMediaIds.size} unique media IDs`);
const mediaIdToUrl = new Map<number, string>();
let resolved = 0;
for (const mediaId of uniqueMediaIds) {
const url = await fetchOptcoreMediaUrl(mediaId);
if (url) mediaIdToUrl.set(mediaId, url);
resolved++;
if (resolved % 20 === 0) {
logger.info(`Optcore media resolution: ${resolved}/${uniqueMediaIds.size}`);
}
await sleep(100);
}
// Step 3: match slugs to DB records and update
let updated = 0;
let skipped = 0;
let notFound = 0;
for (const [slug, mediaId] of slugToMediaId) {
const dbId = byPartNumber.get(slug);
if (!dbId) {
notFound++;
continue;
}
const imgUrl = mediaIdToUrl.get(mediaId);
if (!imgUrl) {
skipped++;
continue;
}
await updateImageUrl(dbId, imgUrl);
byPartNumber.delete(slug);
updated++;
if (updated % 50 === 0) {
logger.info(`Optcore DB updates: ${updated} so far`);
}
}
logger.info(`Optcore done: ${updated} updated, ${skipped} no-media-url, ${notFound} slug-not-in-db`);
return { updated, skipped, notFound };
}
// =============================================================================
// Flexoptix
// Magento GraphQL: /graphql — query by SKU, returns small_image.url
// =============================================================================
const FLEXOPTIX_BASE = "https://www.flexoptix.net";
const FLEXOPTIX_GRAPHQL = `${FLEXOPTIX_BASE}/graphql`;
interface FlexoptixGqlProduct {
name: string;
sku: string;
url_key: string;
small_image: { url: string } | null;
image: { url: string } | null;
thumbnail: { url: string } | null;
}
async function fetchFlexoptixImage(sku: string): Promise<string | null> {
// Strip ":Sx" coding suffix to get canonical SKU for search
const canonicalSku = sku.replace(/:.*$/, "").trim();
const query = `{
products(search: ${JSON.stringify(canonicalSku)}, pageSize: 5) {
items {
name
sku
url_key
small_image { url }
image { url }
thumbnail { url }
}
}
}`;
const data = await fetchJson(FLEXOPTIX_GRAPHQL, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ query }),
}) as {
data?: { products?: { items: FlexoptixGqlProduct[] } };
};
const items = data.data?.products?.items ?? [];
// Prefer exact SKU match (canonical, case-insensitive)
for (const item of items) {
const itemSku = item.sku.replace(/:.*$/, "").trim().toLowerCase();
if (itemSku === canonicalSku.toLowerCase()) {
return (
item.small_image?.url ||
item.image?.url ||
item.thumbnail?.url ||
null
);
}
}
// Fallback: single result
if (items.length === 1) {
return (
items[0].small_image?.url ||
items[0].image?.url ||
items[0].thumbnail?.url ||
null
);
}
return null;
}
async function backfillFlexoptix(): Promise<{ updated: number; skipped: number; errors: number }> {
logger.info("=== Flexoptix image backfill starting ===");
const rows = await pool.query(`
SELECT t.id, t.part_number
FROM transceivers t
JOIN vendors v ON t.vendor_id = v.id
WHERE v.name = 'FLEXOPTIX' AND (t.image_url IS NULL OR t.image_url = '')
ORDER BY t.part_number
`);
logger.info(`Flexoptix: ${rows.rows.length} products need images`);
let updated = 0;
let skipped = 0;
let errors = 0;
for (const row of rows.rows) {
const sku = row.part_number as string;
try {
const imgUrl = await fetchFlexoptixImage(sku);
if (imgUrl) {
await updateImageUrl(row.id as string, imgUrl);
updated++;
if (updated % 25 === 0) {
logger.info(`Flexoptix progress: ${updated} updated so far`);
}
} else {
skipped++;
}
} catch (err) {
logger.error(`Flexoptix error for ${sku}`, { error: (err as Error).message });
errors++;
}
await sleep(100);
}
logger.info(`Flexoptix done: ${updated} updated, ${skipped} no-image, ${errors} errors`);
return { updated, skipped, errors };
}
// =============================================================================
// GAO Tek
// Fetch product page and extract og:image meta tag
// =============================================================================
const GAOTEK_BASE = "https://gaotek.com";
function extractOgImage(html: string): string | null {
const match =
html.match(/<meta[^>]+property=["']og:image["'][^>]+content=["']([^"']+)["']/i) ||
html.match(/<meta[^>]+content=["']([^"']+)["'][^>]+property=["']og:image["']/i);
if (!match) return null;
const url = match[1];
// Skip placeholder and logo images
if (url.includes("placeholder") || url.includes("logo") || url.includes("mobilelogo")) return null;
return url;
}
async function fetchGaoTekImage(productUrl: string): Promise<string | null> {
const html = await fetchHtml(productUrl);
return extractOgImage(html);
}
async function backfillGaoTek(): Promise<{ updated: number; skipped: number; errors: number }> {
logger.info("=== GAO Tek image backfill starting ===");
// Prefer records that have stored product URLs in price_observations
const withUrlRows = await pool.query(`
SELECT DISTINCT ON (t.id)
t.id, t.part_number, t.slug, po.url as product_url
FROM transceivers t
JOIN vendors v ON t.vendor_id = v.id
LEFT JOIN price_observations po ON po.transceiver_id = t.id
WHERE v.name = 'GAO Tek'
AND (t.image_url IS NULL OR t.image_url = '')
AND po.url IS NOT NULL
AND po.url LIKE '%gaotek%'
ORDER BY t.id, po.time DESC
`);
// All records without images
const allRows = await pool.query(`
SELECT t.id, t.part_number, t.slug
FROM transceivers t
JOIN vendors v ON t.vendor_id = v.id
WHERE v.name = 'GAO Tek'
AND (t.image_url IS NULL OR t.image_url = '')
`);
const withUrlIds = new Set(withUrlRows.rows.map((r) => r.id as string));
// Combine: known URLs first, then slug-derived URLs
const toProcess: Array<{ id: string; partNumber: string; productUrl: string }> = [];
for (const row of withUrlRows.rows) {
toProcess.push({
id: row.id as string,
partNumber: row.part_number as string,
productUrl: row.product_url as string,
});
}
for (const row of allRows.rows) {
if (withUrlIds.has(row.id as string)) continue;
const rawSlug = (row.slug as string).replace(/^scraped-/, "");
toProcess.push({
id: row.id as string,
partNumber: row.part_number as string,
productUrl: `${GAOTEK_BASE}/product/${rawSlug}/`,
});
}
logger.info(`GAO Tek: ${toProcess.length} products to attempt`);
let updated = 0;
let skipped = 0;
let errors = 0;
for (const item of toProcess) {
try {
const imgUrl = await fetchGaoTekImage(item.productUrl);
if (imgUrl) {
await updateImageUrl(item.id, imgUrl);
updated++;
if (updated % 25 === 0) {
logger.info(`GAO Tek progress: ${updated} updated so far`);
}
} else {
skipped++;
}
} catch (err) {
const msg = (err as Error).message;
if (!msg.includes("HTTP 404") && !msg.includes("HTTP 403")) {
logger.warn(`GAO Tek error for ${item.productUrl}`, { error: msg.slice(0, 80) });
}
errors++;
}
await sleep(100);
}
logger.info(`GAO Tek done: ${updated} updated, ${skipped} no-image, ${errors} errors/404s`);
return { updated, skipped, errors };
}
// =============================================================================
// Other vendors — og:image from stored price_observations URLs
// =============================================================================
const OTHER_VENDOR_NAMES = [
"T&S Communication",
"Ascent Optics",
"ATGBICS",
"Skylane Optics",
"SmartOptics",
"ProLabs",
"FS.COM",
"GBICS",
"Fluxlight",
];
async function backfillOtherVendors(): Promise<{ total: number; updated: number }> {
logger.info("=== Other vendors og:image backfill starting ===");
const rows = await pool.query(`
SELECT DISTINCT ON (t.id)
t.id, t.part_number, v.name as vendor_name, po.url as product_url
FROM transceivers t
JOIN vendors v ON t.vendor_id = v.id
JOIN price_observations po ON po.transceiver_id = t.id
WHERE v.name = ANY($1)
AND (t.image_url IS NULL OR t.image_url = '')
AND po.url IS NOT NULL
AND po.url ~ '^https?://'
ORDER BY t.id, po.time DESC
`, [OTHER_VENDOR_NAMES]);
logger.info(`Other vendors: ${rows.rows.length} products with URLs to process`);
let updated = 0;
for (const row of rows.rows) {
const productUrl = row.product_url as string;
try {
const html = await fetchHtml(productUrl);
const imgUrl = extractOgImage(html);
if (imgUrl) {
await updateImageUrl(row.id as string, imgUrl);
updated++;
}
} catch {
// Skip 404s, timeouts, etc. silently
}
await sleep(100);
}
logger.info(`Other vendors done: ${updated} / ${rows.rows.length} updated`);
return { total: rows.rows.length, updated };
}
// =============================================================================
// Main
// =============================================================================
async function main(): Promise<void> {
logger.info("=== TIP Image Backfill Script ===");
logger.info(`DB: ${process.env.POSTGRES_HOST ?? "localhost"}:${process.env.POSTGRES_PORT ?? "5433"}`);
try {
await pool.query("SELECT 1");
logger.info("DB connection OK");
} catch (err) {
logger.error("DB connection failed", { error: (err as Error).message });
process.exit(1);
}
const startTime = Date.now();
const results: Record<string, unknown> = {};
try {
results.optcore = await backfillOptcore();
} catch (err) {
logger.error("Optcore backfill failed", { error: (err as Error).message });
results.optcore = { error: (err as Error).message };
}
try {
results.flexoptix = await backfillFlexoptix();
} catch (err) {
logger.error("Flexoptix backfill failed", { error: (err as Error).message });
results.flexoptix = { error: (err as Error).message };
}
try {
results.gaotek = await backfillGaoTek();
} catch (err) {
logger.error("GAO Tek backfill failed", { error: (err as Error).message });
results.gaotek = { error: (err as Error).message };
}
try {
results.others = await backfillOtherVendors();
} catch (err) {
logger.error("Other vendors backfill failed", { error: (err as Error).message });
results.others = { error: (err as Error).message };
}
const elapsedSec = ((Date.now() - startTime) / 1000).toFixed(1);
logger.info("=== Backfill complete ===", { results, elapsedSec });
}
main()
.then(() => pool.end())
.catch((err) => {
logger.error("Fatal error", { error: (err as Error).message });
pool.end();
process.exit(1);
});

View File

@ -10,9 +10,7 @@ export const pool = new Pool({
database: process.env.POSTGRES_DB || "transceiver_db",
user: process.env.POSTGRES_USER || "tip",
password: process.env.POSTGRES_PASSWORD || "tip_dev_2026",
max: 5,
idleTimeoutMillis: 10000,
connectionTimeoutMillis: 5000,
max: 10,
});
// Alias — some scrapers import { db } instead of { pool }
@ -56,11 +54,6 @@ export async function upsertPriceObservation(params: {
params.contentHash,
]
);
// Mark the transceiver as price-verified whenever we successfully record a price
await pool.query(
`UPDATE transceivers SET price_verified = true WHERE id = $1 AND (price_verified IS NULL OR price_verified = false)`,
[params.transceiverId]
);
return true; // New observation written
}
@ -75,31 +68,23 @@ export async function findOrCreateScrapedTransceiver(params: {
fiberType?: string;
wavelengths?: string;
category?: string;
imageUrl?: string;
}): Promise<string> {
// Try to match existing transceiver by part number + vendor
const existing = await pool.query(
`SELECT id, image_url FROM transceivers WHERE part_number = $1 AND vendor_id = $2`,
`SELECT id FROM transceivers WHERE part_number = $1 AND vendor_id = $2`,
[params.partNumber, params.vendorId]
);
if (existing.rows.length > 0) {
// Update image_url and image_verified if we have a new image for a record without one
if (params.imageUrl && !existing.rows[0].image_url) {
await pool.query(
`UPDATE transceivers SET image_url = $1, image_verified = true, updated_at = NOW() WHERE id = $2`,
[params.imageUrl, existing.rows[0].id]
);
}
return existing.rows[0].id;
}
// Create new transceiver entry
const slug = `scraped-${params.partNumber.toLowerCase().replace(/[^a-z0-9]+/g, "-")}`;
const result = await pool.query(
`INSERT INTO transceivers (slug, part_number, vendor_id, form_factor, speed_gbps, speed, reach_meters, reach_label, fiber_type, wavelengths, category, market_status, image_url, image_verified)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, 'Mainstream', $12, $13)
ON CONFLICT (slug) DO UPDATE SET image_url = COALESCE(transceivers.image_url, EXCLUDED.image_url), image_verified = COALESCE(transceivers.image_verified, EXCLUDED.image_verified), updated_at = NOW()
`INSERT INTO transceivers (slug, part_number, vendor_id, form_factor, speed_gbps, speed, reach_meters, reach_label, fiber_type, wavelengths, category, market_status)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, 'Mainstream')
ON CONFLICT (slug) DO UPDATE SET updated_at = NOW()
RETURNING id`,
[
slug,
@ -113,8 +98,6 @@ export async function findOrCreateScrapedTransceiver(params: {
params.fiberType || "",
params.wavelengths || "",
params.category || "DataCenter",
params.imageUrl || null,
params.imageUrl ? true : null,
]
);
return result.rows[0].id;

View File

@ -1,77 +0,0 @@
/**
* Proxy utility fetches next available proxy from TIP proxy pool.
* Falls back to process.env.PROXY_URL if pool is empty or unreachable.
*/
import * as https from "https";
import * as http from "http";
const TIP_API = process.env.TIP_API_URL ?? "https://transceiver-db.context-x.org";
const FALLBACK = process.env.PROXY_URL ?? "";
interface ProxyResponse {
success: boolean;
host: string;
port: number;
protocol: string;
}
function getJson(url: string): Promise<unknown> {
return new Promise((resolve, reject) => {
const parsed = new URL(url);
const mod = parsed.protocol === "https:" ? https : http;
const req = mod.request(
{
hostname: parsed.hostname,
port: parsed.port || (parsed.protocol === "https:" ? 443 : 80),
path: parsed.pathname + parsed.search,
method: "GET",
headers: { "User-Agent": "TIP-Scraper/1.0.0" },
timeout: 5_000,
},
(res) => {
const chunks: Buffer[] = [];
res.on("data", (c: Buffer) => chunks.push(c));
res.on("end", () => {
try {
resolve(JSON.parse(Buffer.concat(chunks).toString()));
} catch {
reject(new Error("Invalid JSON response"));
}
});
}
);
req.on("timeout", () => { req.destroy(); reject(new Error("timeout")); });
req.on("error", reject);
req.end();
});
}
/**
* Returns the next available proxy URL from the TIP proxy pool.
* Format: "socks5://host:port" or "http://host:port"
* Returns undefined if no proxy is available (direct connection).
*/
export async function getProxyUrl(): Promise<string | undefined> {
// Try proxy pool first
try {
const data = await getJson(`${TIP_API}/api/proxy/next`) as ProxyResponse;
if (data?.success && data.host && data.port) {
const protocol = data.protocol ?? "socks5";
const proxyUrl = `${protocol}://${data.host}:${data.port}`;
console.log(`[proxy] Using TIP pool node: ${data.host}:${data.port} (${protocol})`);
return proxyUrl;
}
} catch {
// Pool unavailable — fall through to env fallback
}
// Fallback to env var
if (FALLBACK) {
console.log(`[proxy] Using env PROXY_URL fallback`);
return FALLBACK;
}
return undefined;
}