fix: enrich - skip disambiguation pages, try first-word fallback for compound names

This commit is contained in:
Rene Fichtmueller 2026-03-30 06:04:34 +02:00
parent 0cebb1973f
commit 35e0b69442

View File

@ -3480,15 +3480,20 @@ const server = http.createServer(async (req, res) => {
return titleMatch && (hasNetContext || titleMatch); return titleMatch && (hasNetContext || titleMatch);
}; };
// Direct title lookup // Direct title lookup — try full name first, then first word as fallback
const wikiDirect = await fetchJSON( const cleanName = companyName.replace(/\s+(GmbH|AG|Ltd|Inc|LLC|BV|SA|SAS|Oy|AB)$/i, "").trim();
"https://en.wikipedia.org/api/rest_v1/page/summary/" + encodeURIComponent(companyName.replace(/\s+(GmbH|AG|Ltd|Inc|LLC|BV|SA|SAS|Oy|AB)$/i, "").trim()), const firstName = cleanName.split(/\s+/)[0];
{ timeout: 5000 } const namesToTry = cleanName === firstName ? [cleanName] : [cleanName, firstName];
); for (const tryName of namesToTry) {
if (wikiDirect && wikiDirect.extract && wikiDirect.extract.length > 30) { const wikiDirect = await fetchJSON(
if (isRelevant(wikiDirect.title, wikiDirect.extract)) { "https://en.wikipedia.org/api/rest_v1/page/summary/" + encodeURIComponent(tryName),
{ timeout: 5000 }
);
if (wikiDirect && wikiDirect.type === "disambiguation") continue; // skip disambiguation pages
if (wikiDirect && wikiDirect.extract && wikiDirect.extract.length > 30 && isRelevant(wikiDirect.title, wikiDirect.extract)) {
description = wikiDirect.extract.replace(/\s+/g, " ").trim().slice(0, 300); description = wikiDirect.extract.replace(/\s+/g, " ").trim().slice(0, 300);
wikiUrl = wikiDirect.content_urls && wikiDirect.content_urls.desktop && wikiDirect.content_urls.desktop.page; wikiUrl = wikiDirect.content_urls && wikiDirect.content_urls.desktop && wikiDirect.content_urls.desktop.page;
break;
} }
} }