fix: enrich - skip disambiguation pages, try first-word fallback for compound names
This commit is contained in:
parent
0cebb1973f
commit
35e0b69442
@ -3480,15 +3480,20 @@ const server = http.createServer(async (req, res) => {
|
|||||||
return titleMatch && (hasNetContext || titleMatch);
|
return titleMatch && (hasNetContext || titleMatch);
|
||||||
};
|
};
|
||||||
|
|
||||||
// Direct title lookup
|
// Direct title lookup — try full name first, then first word as fallback
|
||||||
|
const cleanName = companyName.replace(/\s+(GmbH|AG|Ltd|Inc|LLC|BV|SA|SAS|Oy|AB)$/i, "").trim();
|
||||||
|
const firstName = cleanName.split(/\s+/)[0];
|
||||||
|
const namesToTry = cleanName === firstName ? [cleanName] : [cleanName, firstName];
|
||||||
|
for (const tryName of namesToTry) {
|
||||||
const wikiDirect = await fetchJSON(
|
const wikiDirect = await fetchJSON(
|
||||||
"https://en.wikipedia.org/api/rest_v1/page/summary/" + encodeURIComponent(companyName.replace(/\s+(GmbH|AG|Ltd|Inc|LLC|BV|SA|SAS|Oy|AB)$/i, "").trim()),
|
"https://en.wikipedia.org/api/rest_v1/page/summary/" + encodeURIComponent(tryName),
|
||||||
{ timeout: 5000 }
|
{ timeout: 5000 }
|
||||||
);
|
);
|
||||||
if (wikiDirect && wikiDirect.extract && wikiDirect.extract.length > 30) {
|
if (wikiDirect && wikiDirect.type === "disambiguation") continue; // skip disambiguation pages
|
||||||
if (isRelevant(wikiDirect.title, wikiDirect.extract)) {
|
if (wikiDirect && wikiDirect.extract && wikiDirect.extract.length > 30 && isRelevant(wikiDirect.title, wikiDirect.extract)) {
|
||||||
description = wikiDirect.extract.replace(/\s+/g, " ").trim().slice(0, 300);
|
description = wikiDirect.extract.replace(/\s+/g, " ").trim().slice(0, 300);
|
||||||
wikiUrl = wikiDirect.content_urls && wikiDirect.content_urls.desktop && wikiDirect.content_urls.desktop.page;
|
wikiUrl = wikiDirect.content_urls && wikiDirect.content_urls.desktop && wikiDirect.content_urls.desktop.page;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user