Rene Fichtmueller edc9311d7b feat: add proxy network, image backfill, and scraper improvements
- Add TIP Proxy Network (packages/proxy-agent): SOCKS5 proxy agent
  for residential IP bypass of CloudFront WAF blocks
- Add /api/proxy/* routes: node registration, heartbeat, load balancing
- Add image extraction to Flexoptix catalog scraper (GraphQL small_image)
- Add image extraction to Optcore scraper (Playwright gallery img)
- Fix Fluxlight price scraping (BigCommerce HTML structure: data-product-price-without-tax)
- Add SmartOptics scraper (8 DWDM/coherent products, og:image extraction)
- Fix findOrCreateScrapedTransceiver to update image_url for existing records
- Add image backfill script (backfill-images.ts): 178 Flexoptix images added
- Fix DB connection pool: max 5, idleTimeoutMillis 10s (was unlimited, caused >100 connections)
- Add proxy.ts utility for scraper proxy rotation
2026-04-03 21:13:03 +02:00

175 lines
5.0 KiB
TypeScript

/**
* Core agent logic — registers with TIP routing server,
* starts SOCKS5 server, manages heartbeat and graceful shutdown.
*/
import * as https from "https";
import * as http from "http";
import * as os from "os";
import { createSocks5Server } from "./socks5";
import { BandwidthTracker } from "./bandwidth";
import { startHeartbeatLoop } from "./heartbeat";
import { saveConfig, savePid, clearPid, AgentConfig } from "./config";
const AGENT_VERSION = "1.0.0";
interface RegisterResponse {
token: string;
nodeId: string;
message?: string;
}
function postJson(url: string, body: unknown): Promise<{ status: number; data: unknown }> {
return new Promise((resolve, reject) => {
const json = JSON.stringify(body);
const parsed = new URL(url);
const mod = parsed.protocol === "https:" ? https : http;
const req = mod.request(
{
hostname: parsed.hostname,
port: parsed.port || (parsed.protocol === "https:" ? 443 : 80),
path: parsed.pathname + parsed.search,
method: "POST",
headers: {
"Content-Type": "application/json",
"Content-Length": Buffer.byteLength(json),
"User-Agent": `TIP-Agent/${AGENT_VERSION}`,
},
timeout: 15_000,
},
(res) => {
const chunks: Buffer[] = [];
res.on("data", (c: Buffer) => chunks.push(c));
res.on("end", () => {
try {
const data = JSON.parse(Buffer.concat(chunks).toString());
resolve({ status: res.statusCode ?? 0, data });
} catch {
resolve({ status: res.statusCode ?? 0, data: {} });
}
});
}
);
req.on("timeout", () => { req.destroy(); reject(new Error("timeout")); });
req.on("error", reject);
req.write(json);
req.end();
});
}
async function registerNode(cfg: AgentConfig): Promise<RegisterResponse> {
const url = `${cfg.serverUrl}/api/proxy/register`;
console.log(`[agent] Registering with ${url}...`);
const { status, data } = await postJson(url, {
token: cfg.token,
name: cfg.name,
port: cfg.port,
version: AGENT_VERSION,
});
if (status >= 200 && status < 300) {
const resp = data as RegisterResponse;
console.log(`[agent] Registered successfully. Node: ${resp.nodeId ?? "unknown"}`);
return resp;
}
throw new Error(`Registration failed: HTTP ${status}`);
}
export async function startAgent(cfg: AgentConfig): Promise<void> {
const bandwidth = new BandwidthTracker(cfg.maxGb);
// Register with server
await registerNode(cfg).catch((err) => {
console.warn(`[agent] Registration warning: ${err.message} (continuing anyway)`);
});
// Start SOCKS5 server
const server = createSocks5Server({
port: cfg.port,
bandwidth,
onNewConnection: () => {
if (bandwidth.isLimitReached()) {
console.warn("[agent] Bandwidth limit reached — rejecting new connections");
return false;
}
return true;
},
});
await new Promise<void>((resolve, reject) => {
server.listen(cfg.port, () => {
console.log(`[agent] SOCKS5 proxy listening on port ${cfg.port}`);
resolve();
});
server.on("error", reject);
});
// Save PID
savePid(process.pid);
saveConfig({ ...cfg, pid: process.pid, startedAt: new Date().toISOString() });
// Start heartbeat
const heartbeatTimer = startHeartbeatLoop(
cfg.serverUrl,
() => {
const stats = bandwidth.getStats();
return {
token: cfg.token,
port: cfg.port,
bytesProxied: stats.bytesProxied,
requestsProxied: stats.requestsProxied,
version: AGENT_VERSION,
};
},
(result) => {
if (!result.ok) {
console.warn(`[agent] Heartbeat failed: ${result.error ?? "unknown error"}`);
}
}
);
console.log(`[agent] TIP Agent running. Max bandwidth: ${cfg.maxGb} GB`);
console.log(`[agent] Press Ctrl+C to stop.\n`);
// Print stats every minute
const statsTimer = setInterval(() => {
const stats = bandwidth.getStats();
const usedGb = (stats.bytesProxied / (1024 ** 3)).toFixed(3);
console.log(
`[agent] Stats: ${stats.requestsProxied} requests, ${usedGb} GB used / ${cfg.maxGb} GB limit`
);
}, 60_000);
// Graceful shutdown
const shutdown = (signal: string) => {
console.log(`\n[agent] Received ${signal}, shutting down...`);
clearInterval(heartbeatTimer);
clearInterval(statsTimer);
clearPid();
server.close(() => {
console.log("[agent] SOCKS5 server closed. Goodbye.");
process.exit(0);
});
setTimeout(() => process.exit(1), 5000);
};
process.on("SIGINT", () => shutdown("SIGINT"));
process.on("SIGTERM", () => shutdown("SIGTERM"));
}
export function getLocalIp(): string {
const interfaces = os.networkInterfaces();
for (const iface of Object.values(interfaces)) {
if (!iface) continue;
for (const addr of iface) {
if (addr.family === "IPv4" && !addr.internal) {
return addr.address;
}
}
}
return "127.0.0.1";
}