- Rename scrapers/digikey.ts → scrapers/mouser.ts: export scrapeMouser() (file was Mouser API implementation mislabeled from task origin) - Fix scheduler.ts mouser-oem worker: import scrapeMouser from ./scrapers/mouser - Delete switch-seed-smb.ts (unreferenced, no CLI flag, no scheduler job) - Add storage/, storage-fs/, .crawlee/ to .gitignore (Crawlee runtime artifacts)
292 lines
9.9 KiB
TypeScript
292 lines
9.9 KiB
TypeScript
/**
|
|
* OEM Reference Price Scraper — Mouser Electronics API
|
|
*
|
|
* Source: api.mouser.com (free REST API, no bot-detection)
|
|
* Target: Juniper, Cisco, Arista OEM transceiver PIDs already in our DB
|
|
* Stores: price_observations (marketplace='mouser', condition='new')
|
|
*
|
|
* API key: Free registration at mouser.com/api — set MOUSER_API_KEY env var
|
|
* endpoint: POST https://api.mouser.com/api/v1.0/search/keyword
|
|
*
|
|
* Rate limit: 30 req/min on free tier → 2s delay between requests
|
|
*
|
|
* Note: DigiKey + Arrow both require Playwright to bypass Cloudflare/Akamai.
|
|
* Mouser offers a free REST API with the same data — hence this implementation.
|
|
*/
|
|
|
|
import { pool, ensureVendor, upsertPriceObservation } from "../utils/db";
|
|
import { contentHash } from "../utils/hash";
|
|
|
|
const MOUSER_API_BASE = "https://api.mouser.com/api/v1.0";
|
|
const MOUSER_API_KEY = process.env["MOUSER_API_KEY"] ?? "";
|
|
const DELAY_MS = 2_100; // ≤ 30 req/min on free tier
|
|
|
|
// ── Types ─────────────────────────────────────────────────────────────────────
|
|
|
|
interface MouserPriceBreak {
|
|
Quantity: number;
|
|
Price: string; // e.g. "1,234.56" or "1234.56"
|
|
Currency: string; // e.g. "EUR"
|
|
}
|
|
|
|
interface MouserPart {
|
|
ManufacturerPartNumber: string;
|
|
MouserPartNumber: string;
|
|
Availability: string; // e.g. "117 auf Lager"
|
|
DataSheetUrl: string;
|
|
Description: string;
|
|
LeadTime: string; // e.g. "10 Weeks"
|
|
Min: string; // min order qty
|
|
ProductDetailUrl: string;
|
|
PriceBreaks: MouserPriceBreak[];
|
|
AvailabilityInStock: string;
|
|
}
|
|
|
|
interface MouserSearchResponse {
|
|
Errors: Array<{ Code: string; Message: string }>;
|
|
SearchResults: {
|
|
NumberOfResult: number;
|
|
Parts: MouserPart[];
|
|
} | null;
|
|
}
|
|
|
|
// ── Helpers ───────────────────────────────────────────────────────────────────
|
|
|
|
function sleep(ms: number): Promise<void> {
|
|
return new Promise((r) => setTimeout(r, ms));
|
|
}
|
|
|
|
/** Parse Mouser price string "1.234,56" (DE locale) or "1234.56" (US) */
|
|
function parseMouserPrice(raw: string, currency: string): number | null {
|
|
if (!raw || raw === "") return null;
|
|
// German locale uses comma decimal, dot thousands → "1.234,56"
|
|
// US locale uses dot decimal → "1234.56"
|
|
const cleaned = currency === "EUR"
|
|
? raw.replace(/\./g, "").replace(",", ".") // "1.234,56" → "1234.56"
|
|
: raw.replace(/,/g, ""); // "1,234.56" → "1234.56"
|
|
const n = parseFloat(cleaned);
|
|
return Number.isFinite(n) && n > 0 ? n : null;
|
|
}
|
|
|
|
/** Extract quantity from availability string like "117 auf Lager" or "117 In Stock" */
|
|
function parseAvailability(avail: string): { qty: number; stockLevel: string } {
|
|
if (!avail) return { qty: 0, stockLevel: "out_of_stock" };
|
|
const lower = avail.toLowerCase();
|
|
|
|
// Check for discontinued / not available
|
|
if (lower.includes("nicht verfügbar") || lower.includes("not available") || lower.includes("obsolete")) {
|
|
return { qty: 0, stockLevel: "discontinued" };
|
|
}
|
|
|
|
// Extract number
|
|
const match = avail.match(/(\d[\d,.]*)(?:\s|$)/);
|
|
const qty = match ? parseInt(match[1].replace(/[,.]/, ""), 10) : 0;
|
|
|
|
if (qty === 0) return { qty: 0, stockLevel: "out_of_stock" };
|
|
if (qty < 10) return { qty, stockLevel: "low_stock" };
|
|
return { qty, stockLevel: "in_stock" };
|
|
}
|
|
|
|
/** Get the 1-unit price (or lowest break price) in EUR */
|
|
function extractPrice(part: MouserPart): { price: number; currency: string } | null {
|
|
const breaks = part.PriceBreaks;
|
|
if (!breaks || breaks.length === 0) return null;
|
|
|
|
// Sort by quantity ascending, take qty=1 or first available
|
|
const sorted = [...breaks].sort((a, b) => a.Quantity - b.Quantity);
|
|
const first = sorted[0];
|
|
if (!first) return null;
|
|
|
|
const currency = (first.Currency ?? "EUR").toUpperCase();
|
|
const price = parseMouserPrice(first.Price, currency);
|
|
if (price === null) return null;
|
|
|
|
return { price, currency };
|
|
}
|
|
|
|
// ── API call ──────────────────────────────────────────────────────────────────
|
|
|
|
async function searchMouser(partNumber: string): Promise<MouserPart | null> {
|
|
if (!MOUSER_API_KEY) return null;
|
|
|
|
const url = `${MOUSER_API_BASE}/search/keyword?apiKey=${MOUSER_API_KEY}&langId=1&searchWithSapnningRows=false`;
|
|
|
|
let resp: Response;
|
|
try {
|
|
resp = await fetch(url, {
|
|
method: "POST",
|
|
headers: {
|
|
"Content-Type": "application/json",
|
|
Accept: "application/json",
|
|
},
|
|
body: JSON.stringify({
|
|
SearchByKeywordRequest: {
|
|
keyword: partNumber,
|
|
records: 5,
|
|
startingRecord: 0,
|
|
searchOptions: "1", // Exact match preferred
|
|
searchWithSapnningRows: false,
|
|
},
|
|
}),
|
|
signal: AbortSignal.timeout(15_000),
|
|
});
|
|
} catch (err: unknown) {
|
|
console.warn(` [Mouser] Fetch error for ${partNumber}: ${(err as Error).message.slice(0, 60)}`);
|
|
return null;
|
|
}
|
|
|
|
if (!resp.ok) {
|
|
if (resp.status === 429) {
|
|
console.warn(` [Mouser] Rate limited — backing off 30s`);
|
|
await sleep(30_000);
|
|
} else {
|
|
console.warn(` [Mouser] HTTP ${resp.status} for ${partNumber}`);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
const data = (await resp.json()) as MouserSearchResponse;
|
|
|
|
if (data.Errors && data.Errors.length > 0) {
|
|
const errMsg = data.Errors.map((e) => e.Message).join("; ");
|
|
console.warn(` [Mouser] API error for ${partNumber}: ${errMsg.slice(0, 80)}`);
|
|
return null;
|
|
}
|
|
|
|
const parts = data.SearchResults?.Parts ?? [];
|
|
if (parts.length === 0) return null;
|
|
|
|
const norm = partNumber.toUpperCase().trim();
|
|
|
|
// Prefer exact MPN match
|
|
const exact = parts.find((p) => (p.ManufacturerPartNumber ?? "").toUpperCase().trim() === norm);
|
|
return exact ?? parts[0] ?? null;
|
|
}
|
|
|
|
// ── Main ──────────────────────────────────────────────────────────────────────
|
|
|
|
export async function scrapeMouser(): Promise<void> {
|
|
console.log("=== OEM Reference Price Scraper (Mouser Electronics API) ===\n");
|
|
|
|
if (!MOUSER_API_KEY) {
|
|
console.error(
|
|
" ERROR: MOUSER_API_KEY not set.\n" +
|
|
" Register free at https://www.mouser.com/api-hub/ → get API key → set env var.\n" +
|
|
" Free tier: 1000 queries/month — enough for 475 Juniper PIDs."
|
|
);
|
|
return;
|
|
}
|
|
|
|
// Register Mouser as a vendor (distributor = reseller type)
|
|
const vendorId = await ensureVendor(
|
|
"Mouser Electronics",
|
|
"reseller",
|
|
"https://www.mouser.de",
|
|
"https://www.mouser.de/Search/Refine?Keyword="
|
|
);
|
|
console.log(` Vendor ID: ${vendorId}`);
|
|
|
|
// Load OEM transceiver PIDs
|
|
const TARGET_VENDORS = ["Juniper Networks", "Cisco Systems", "Arista Networks", "FS.COM", "SmartOptics"];
|
|
const { rows: transceivers } = await pool.query<{
|
|
id: string;
|
|
part_number: string;
|
|
form_factor: string;
|
|
speed: string;
|
|
vendor_name: string;
|
|
}>(
|
|
`SELECT t.id, t.part_number, t.form_factor, t.speed, v.name AS vendor_name
|
|
FROM transceivers t
|
|
JOIN vendors v ON v.id = t.vendor_id
|
|
WHERE v.name = ANY($1)
|
|
AND t.part_number IS NOT NULL
|
|
AND t.part_number NOT ILIKE '%Transceiver%'
|
|
AND t.part_number NOT ILIKE '%-Transceivers'
|
|
AND LENGTH(t.part_number) BETWEEN 4 AND 35
|
|
ORDER BY v.name, t.part_number`,
|
|
[TARGET_VENDORS]
|
|
);
|
|
|
|
console.log(` Found ${transceivers.length} OEM PIDs to price-check\n`);
|
|
|
|
let found = 0;
|
|
let notFound = 0;
|
|
let errors = 0;
|
|
|
|
for (let i = 0; i < transceivers.length; i++) {
|
|
const tx = transceivers[i];
|
|
|
|
if (i > 0 && i % 20 === 0) {
|
|
console.log(
|
|
` [${i}/${transceivers.length}] found=${found} not_found=${notFound} errors=${errors}`
|
|
);
|
|
}
|
|
|
|
const part = await searchMouser(tx.part_number);
|
|
|
|
if (!part) {
|
|
notFound++;
|
|
await sleep(DELAY_MS);
|
|
continue;
|
|
}
|
|
|
|
const priceData = extractPrice(part);
|
|
if (!priceData) {
|
|
notFound++;
|
|
await sleep(DELAY_MS);
|
|
continue;
|
|
}
|
|
|
|
const { qty, stockLevel } = parseAvailability(part.Availability);
|
|
const productUrl = part.ProductDetailUrl
|
|
? `https://www.mouser.de${part.ProductDetailUrl}`
|
|
: `https://www.mouser.de/Search/Refine?Keyword=${encodeURIComponent(tx.part_number)}`;
|
|
|
|
const hash = contentHash(
|
|
`mouser:${tx.id}:${priceData.price}:${priceData.currency}:${stockLevel}`
|
|
);
|
|
|
|
try {
|
|
await upsertPriceObservation({
|
|
transceiverId: tx.id,
|
|
sourceVendorId: vendorId,
|
|
price: priceData.price,
|
|
currency: priceData.currency,
|
|
stockLevel,
|
|
quantityAvailable: qty,
|
|
url: productUrl,
|
|
contentHash: hash,
|
|
});
|
|
found++;
|
|
console.log(
|
|
` ✓ ${tx.part_number.padEnd(32)} ${priceData.currency} ${priceData.price.toFixed(2).padStart(9)} ${stockLevel.padEnd(13)} qty=${qty}`
|
|
);
|
|
} catch (err: unknown) {
|
|
errors++;
|
|
console.warn(
|
|
` ✗ DB error ${tx.part_number}: ${(err as Error).message.slice(0, 60)}`
|
|
);
|
|
}
|
|
|
|
await sleep(DELAY_MS);
|
|
}
|
|
|
|
console.log(`\n=== Mouser OEM Scraper Complete ===`);
|
|
console.log(` Processed: ${transceivers.length}`);
|
|
console.log(` Found: ${found}`);
|
|
console.log(` Not found: ${notFound}`);
|
|
console.log(` DB errors: ${errors}\n`);
|
|
}
|
|
|
|
// ── CLI ───────────────────────────────────────────────────────────────────────
|
|
|
|
if (require.main === module) {
|
|
scrapeMouser()
|
|
.then(() => pool.end())
|
|
.catch((err: unknown) => {
|
|
console.error("Fatal:", err);
|
|
pool.end();
|
|
process.exit(1);
|
|
});
|
|
}
|