fix(scrapers): replace bot User-Agents with Chrome UA + disable dead domain

- 16 commercial scrapers: replace TIP-Bot/1.0 with Chrome/120 UA
  (GBICS confirmed returning 0 bytes for bot UA, Chrome UA returns 200KB)
- gbics.ts: fix User-Agent (was returning empty HTML, now returns products)
- optictransceiver.ts: disable — domain repurposed as plant shop (2026-04-06)
  Alocasia Regal Shield is not a transceiver.
This commit is contained in:
Rene Fichtmueller 2026-04-06 02:17:50 +02:00
parent 80aa85961b
commit 2e852e0a2f
18 changed files with 22 additions and 19 deletions

View File

@ -19,7 +19,7 @@ const CATALOG_URLS = [
];
const MAX_PAGES = 15;
const HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
Accept: "text/html,application/xhtml+xml",
"Accept-Language": "en-US,en;q=0.9",
};

View File

@ -14,7 +14,7 @@ const BASE = "https://www.blueoptics.de";
const CATALOG_PATH = "/transceivers/";
const MAX_PAGES = 20;
const HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
Accept: "text/html,application/xhtml+xml",
"Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
};

View File

@ -11,7 +11,7 @@ import { contentHash } from "../utils/hash";
const BASE = "https://www.championone.com";
const HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
Accept: "text/html,application/xhtml+xml",
};

View File

@ -53,7 +53,7 @@ interface Product {
async function fetchCategory(cat: typeof CATEGORIES[0], vendorId: string, page = 1): Promise<Product[]> {
const url = `${BASE}${cat.url}?page=${page}`;
const resp = await fetch(url, {
headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0; +https://tip.context-x.org)" },
headers: { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" },
signal: AbortSignal.timeout(30_000),
});
if (!resp.ok) return [];

View File

@ -14,7 +14,7 @@ const BASE = "https://shop.fiber24.net";
const CATALOG_PATH = "/FO-TRANSCEIVER/de";
const MAX_PAGES = 20;
const HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
Accept: "text/html,application/xhtml+xml",
"Accept-Language": "de-DE,de;q=0.9,en;q=0.8",
};

View File

@ -12,7 +12,7 @@ import { contentHash } from "../utils/hash";
const BASE = "https://fluxlight.com";
const CATALOG_PATH = "/transceivers/";
const HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
Accept: "text/html,application/xhtml+xml",
};

View File

@ -14,7 +14,7 @@ const BASE = "https://gaotek.com";
const CATALOG_PATH = "/category/fiber-optics/transceivers/";
const MAX_PAGES = 20;
const HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
Accept: "text/html,application/xhtml+xml",
"Accept-Language": "en-US,en;q=0.9",
};

View File

@ -10,8 +10,9 @@ import { contentHash } from "../utils/hash";
const BASE = "https://www.gbics.com";
const HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
Accept: "text/html,application/xhtml+xml",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
};
const CATEGORIES = [

View File

@ -27,7 +27,7 @@ const API_URLS = [
];
const HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
Accept: "text/html,application/xhtml+xml,application/json",
"Accept-Language": "en-US,en;q=0.9,de;q=0.8",
};

View File

@ -38,7 +38,7 @@ export async function scrapeMultimodeInc(): Promise<void> {
for (const cat of CATEGORIES) {
try {
const resp = await fetch(`${BASE}${cat.path}`, {
headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0)" },
headers: { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" },
signal: AbortSignal.timeout(25_000),
});
if (!resp.ok) continue;

View File

@ -110,7 +110,7 @@ async function fetchTransceiverUrls(): Promise<Array<{ url: string; title: strin
const apiUrl = `${BASE_URL}/wp-json/wp/v2/product?product_cat=${catId}&per_page=100&page=${page}&_fields=slug,link,title`;
try {
const resp = await fetch(apiUrl, {
headers: { "User-Agent": "Mozilla/5.0 (compatible; TIP-Scraper/1.0)" },
headers: { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" },
signal: AbortSignal.timeout(15000),
});

View File

@ -86,7 +86,9 @@ async function scrapeCategory(path: string, form_factor: string, vendorId: strin
}
export async function scrapeOpticTransceiver(): Promise<void> {
logger.info("OpticTransceiver.com scraper starting");
// Domain repurposed as plant shop (2026-04-06) — skip entirely
logger.warn("OpticTransceiver.com is no longer an optics vendor (domain repurposed). Scraper disabled.");
return;
const vendorId = await ensureVendor("OpticTransceiver", BASE);
let total = 0;

View File

@ -35,7 +35,7 @@ async function fetchPage(catUrl: string, form_factor: string, vendorId: string,
const sep = catUrl.includes("?") ? "&" : "?";
const url = `${BASE}${catUrl}${page > 1 ? `${sep}p=${page}` : ""}`;
const resp = await fetch(url, {
headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0)" },
headers: { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" },
signal: AbortSignal.timeout(30_000),
});
if (!resp.ok) return 0;

View File

@ -10,7 +10,7 @@ import { contentHash } from "../utils/hash";
const BASE = "https://www.sfpcables.com";
const HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
Accept: "text/html,application/xhtml+xml",
};

View File

@ -21,7 +21,7 @@ const CATALOG_URLS = [
];
const MAX_PAGES = 10;
const HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
Accept: "text/html,application/xhtml+xml",
"Accept-Language": "en-US,en;q=0.9",
};

View File

@ -12,7 +12,7 @@ import { contentHash, parsePrice } from "../utils/hash";
const BASE = "https://www.10gtek.com";
const HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
Accept: "text/html,application/xhtml+xml",
};

View File

@ -14,7 +14,7 @@ const BASE = "https://www.china-tscom.com";
const CATALOG_PATH = "/products/fiber-optic-transceivers/";
const MAX_PAGES = 15;
const HEADERS = {
"User-Agent": "Mozilla/5.0 (compatible; TIP-Bot/1.0; research)",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
Accept: "text/html,application/xhtml+xml",
"Accept-Language": "en-US,en;q=0.9",
};

View File

@ -47,7 +47,7 @@ export async function scrapeWiitek(): Promise<void> {
for (const cat of CATEGORIES) {
try {
const resp = await fetch(`${BASE}${cat.path}`, {
headers: { "User-Agent": "Mozilla/5.0 (compatible; TIPBot/1.0)" },
headers: { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" },
signal: AbortSignal.timeout(20_000),
});
if (!resp.ok) continue;