1432 lines
54 KiB
JavaScript
1432 lines
54 KiB
JavaScript
// js/processing/reference-doi-resolver.js
|
||
// 多源DOI解析器 - CrossRef + OpenAlex + PubMed + arXiv + Semantic Scholar
|
||
|
||
(function(global) {
|
||
'use strict';
|
||
|
||
/**
|
||
* 代理配置 - 从 localStorage 读取
|
||
*/
|
||
function getProxyConfig() {
|
||
try {
|
||
const config = JSON.parse(localStorage.getItem('academicSearchProxyConfig') || 'null');
|
||
if (!config) {
|
||
return {
|
||
enabled: false,
|
||
baseUrl: '',
|
||
authKey: null,
|
||
semanticScholarApiKey: null,
|
||
pubmedApiKey: null,
|
||
rateLimit: null
|
||
};
|
||
}
|
||
return {
|
||
enabled: config.enabled !== false,
|
||
baseUrl: config.baseUrl || '',
|
||
authKey: config.authKey || null,
|
||
semanticScholarApiKey: config.semanticScholarApiKey || null,
|
||
pubmedApiKey: config.pubmedApiKey || null,
|
||
rateLimit: config.rateLimit || null // 从health检测获取的速率限制信息
|
||
};
|
||
} catch (error) {
|
||
console.warn('[DOIResolver] Failed to load proxy config:', error);
|
||
return {
|
||
enabled: false,
|
||
baseUrl: '',
|
||
authKey: null,
|
||
semanticScholarApiKey: null,
|
||
pubmedApiKey: null,
|
||
rateLimit: null
|
||
};
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 标题规范化 - 去除特殊符号和格式
|
||
* @param {string} title - 原始标题
|
||
* @returns {string} 清理后的标题
|
||
*/
|
||
function normalizeTitle(title) {
|
||
if (!title) return '';
|
||
|
||
let normalized = title;
|
||
|
||
// 1. 处理同位素标记:(18)F → 18F, [(11)C] → 11C, (99m)Tc → 99mTc
|
||
normalized = normalized.replace(/[\(\[](\d+m?)\)?\]?([A-Z][a-z]?)/g, '$1$2');
|
||
|
||
// 2. 处理化学式和数学符号:去除多余括号
|
||
normalized = normalized.replace(/\[([^\]]+)\]/g, '$1');
|
||
normalized = normalized.replace(/\(([^)]{1,3})\)/g, '$1'); // 只处理短括号内容(避免误删作者名等)
|
||
|
||
// 3. 去除多余的标点符号
|
||
normalized = normalized.replace(/\s+([,;:.!?])/g, '$1'); // 标点前的空格
|
||
normalized = normalized.replace(/([,;:.!?])\s*([,;:.!?])/g, '$1'); // 连续标点
|
||
|
||
// 4. 统一空格
|
||
normalized = normalized.replace(/\s+/g, ' ').trim();
|
||
|
||
// 5. 去除末尾的句号(如果存在)
|
||
normalized = normalized.replace(/\.$/, '');
|
||
|
||
console.log(`[TitleNormalize] "${title.substring(0, 60)}..." → "${normalized.substring(0, 60)}..."`);
|
||
|
||
return normalized;
|
||
}
|
||
|
||
/**
|
||
* 获取学术搜索源配置
|
||
*/
|
||
function getSourcesConfig() {
|
||
try {
|
||
const config = JSON.parse(localStorage.getItem('academicSearchSourcesConfig') || 'null');
|
||
if (!config || !config.sources) {
|
||
// 默认配置
|
||
return {
|
||
sources: [
|
||
{ key: 'crossref', name: 'CrossRef', enabled: true, order: 0 },
|
||
{ key: 'openalex', name: 'OpenAlex', enabled: true, order: 1 },
|
||
{ key: 'arxiv', name: 'arXiv', enabled: true, order: 2 },
|
||
{ key: 'pubmed', name: 'PubMed', enabled: true, order: 3 },
|
||
{ key: 'semanticscholar', name: 'Semantic Scholar', enabled: true, order: 4 }
|
||
]
|
||
};
|
||
}
|
||
return config;
|
||
} catch (error) {
|
||
console.warn('[DOIResolver] Failed to load sources config:', error);
|
||
return {
|
||
sources: [
|
||
{ key: 'crossref', name: 'CrossRef', enabled: true, order: 0 },
|
||
{ key: 'openalex', name: 'OpenAlex', enabled: true, order: 1 },
|
||
{ key: 'arxiv', name: 'arXiv', enabled: true, order: 2 },
|
||
{ key: 'pubmed', name: 'PubMed', enabled: true, order: 3 },
|
||
{ key: 'semanticscholar', name: 'Semantic Scholar', enabled: true, order: 4 }
|
||
]
|
||
};
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 构建代理 URL(只对需要的服务使用代理)
|
||
*/
|
||
function buildProxyUrl(service, path) {
|
||
const config = getProxyConfig();
|
||
|
||
// PubMed、Semantic Scholar 和 arXiv 需要代理
|
||
const needsProxy = ['pubmed', 'semanticscholar', 'arxiv'];
|
||
|
||
if (!config.enabled || !needsProxy.includes(service)) {
|
||
return null;
|
||
}
|
||
|
||
return `${config.baseUrl}/api/${service}/${path}`;
|
||
}
|
||
|
||
/**
|
||
* 添加代理认证头
|
||
* @param {string} service - 服务名称(用于选择正确的 API Key)
|
||
*/
|
||
function getProxyHeaders(service) {
|
||
const config = getProxyConfig();
|
||
const headers = {};
|
||
|
||
// Auth Key(共享模式)
|
||
if (config.authKey) {
|
||
headers['X-Auth-Key'] = config.authKey;
|
||
}
|
||
|
||
// API Key 透传(透传模式)
|
||
if (service === 'semanticscholar' && config.semanticScholarApiKey) {
|
||
headers['X-Api-Key'] = config.semanticScholarApiKey;
|
||
} else if (service === 'pubmed' && config.pubmedApiKey) {
|
||
headers['X-Api-Key'] = config.pubmedApiKey;
|
||
}
|
||
|
||
return headers;
|
||
}
|
||
|
||
/**
|
||
* CrossRef API查询
|
||
* 免费,无需API key,覆盖140M+ DOI
|
||
*/
|
||
class CrossRefResolver {
|
||
constructor() {
|
||
this.baseUrl = 'https://api.crossref.org/works';
|
||
// CrossRef建议提供邮箱可获得更高速率限制(polite pool)
|
||
const config = getProxyConfig();
|
||
this.mailto = config.contactEmail || null;
|
||
}
|
||
|
||
/**
|
||
* 通过标题查询DOI
|
||
* @param {string} title - 论文标题
|
||
* @param {Object} metadata - 可选的额外元数据(author, year等)
|
||
* @returns {Promise<Object|null>} DOI信息
|
||
*/
|
||
async queryByTitle(title, metadata = {}) {
|
||
if (!title || title.length < 10) {
|
||
return null;
|
||
}
|
||
|
||
try {
|
||
// 标题规范化
|
||
const normalizedTitle = normalizeTitle(title);
|
||
|
||
// 构建查询URL
|
||
const params = new URLSearchParams({
|
||
'query.title': normalizedTitle,
|
||
rows: 5 // 返回前5个结果
|
||
});
|
||
|
||
// 只在有邮箱时才添加mailto参数
|
||
if (this.mailto) {
|
||
params.append('mailto', this.mailto);
|
||
}
|
||
|
||
// 如果有作者信息,添加到查询
|
||
if (metadata.authors && metadata.authors.length > 0) {
|
||
params.append('query.author', metadata.authors[0]);
|
||
}
|
||
|
||
// CrossRef 支持 CORS,不需要代理
|
||
const url = `${this.baseUrl}?${params.toString()}`;
|
||
console.log('[CrossRef] Querying:', title.substring(0, 50));
|
||
|
||
const headers = {};
|
||
// 只在有邮箱时才设置User-Agent
|
||
if (this.mailto) {
|
||
headers['User-Agent'] = `PaperBurner/1.0 (mailto:${this.mailto})`;
|
||
}
|
||
|
||
const response = await fetch(url, { headers });
|
||
|
||
if (!response.ok) {
|
||
console.warn('[CrossRef] API error:', response.status);
|
||
return null;
|
||
}
|
||
|
||
const data = await response.json();
|
||
|
||
// 只记录摘要信息,不打印完整的响应对象(数据量大)
|
||
console.log('[CrossRef] API 响应摘要:', {
|
||
totalResults: data.message?.['total-results'] || 0,
|
||
itemsReturned: data.message?.items?.length || 0
|
||
});
|
||
|
||
if (!data.message || !data.message.items || data.message.items.length === 0) {
|
||
return null;
|
||
}
|
||
|
||
// 选择最佳匹配
|
||
const bestMatch = this._findBestMatch(data.message.items, title, metadata);
|
||
|
||
if (bestMatch) {
|
||
return this._formatResult(bestMatch);
|
||
}
|
||
|
||
return null;
|
||
|
||
} catch (error) {
|
||
console.error('[CrossRef] Query failed:', error);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 批量查询
|
||
* @param {Array} references - 文献列表
|
||
* @returns {Promise<Array>} 查询结果
|
||
*/
|
||
async batchQuery(references) {
|
||
console.log(`[CrossRef] Batch querying ${references.length} references`);
|
||
|
||
const results = [];
|
||
|
||
for (let i = 0; i < references.length; i++) {
|
||
const ref = references[i];
|
||
|
||
const result = await this.queryByTitle(ref.title, {
|
||
authors: ref.authors,
|
||
year: ref.year
|
||
});
|
||
|
||
results.push({
|
||
original: ref,
|
||
resolved: result,
|
||
success: !!result
|
||
});
|
||
|
||
// CrossRef 不限制但保持礼貌,较短延迟
|
||
if (i < references.length - 1) {
|
||
await new Promise(resolve => setTimeout(resolve, 400));
|
||
}
|
||
}
|
||
|
||
return results;
|
||
}
|
||
|
||
/**
|
||
* 查找最佳匹配结果
|
||
*/
|
||
_findBestMatch(items, queryTitle, metadata) {
|
||
const queryTitleLower = queryTitle.toLowerCase().trim();
|
||
|
||
for (const item of items) {
|
||
const itemTitle = (item.title && item.title[0]) || '';
|
||
const itemTitleLower = itemTitle.toLowerCase().trim();
|
||
|
||
// 计算相似度
|
||
const similarity = this._calculateSimilarity(queryTitleLower, itemTitleLower);
|
||
|
||
// 相似度阈值:0.8
|
||
if (similarity >= 0.8) {
|
||
// 如果有年份信息,验证年份
|
||
if (metadata.year && item.published) {
|
||
const itemYear = item.published['date-parts']?.[0]?.[0];
|
||
if (itemYear && Math.abs(itemYear - metadata.year) > 1) {
|
||
continue; // 年份不匹配,跳过
|
||
}
|
||
}
|
||
|
||
return item;
|
||
}
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
/**
|
||
* 简单的字符串相似度计算(Levenshtein距离)
|
||
*/
|
||
_calculateSimilarity(str1, str2) {
|
||
const longer = str1.length > str2.length ? str1 : str2;
|
||
const shorter = str1.length > str2.length ? str2 : str1;
|
||
|
||
if (longer.length === 0) {
|
||
return 1.0;
|
||
}
|
||
|
||
// 如果短字符串是长字符串的子串,认为高度相似
|
||
if (longer.includes(shorter)) {
|
||
return 0.95;
|
||
}
|
||
|
||
const editDistance = this._levenshteinDistance(str1, str2);
|
||
return (longer.length - editDistance) / longer.length;
|
||
}
|
||
|
||
/**
|
||
* Levenshtein距离算法
|
||
*/
|
||
_levenshteinDistance(str1, str2) {
|
||
const matrix = [];
|
||
|
||
for (let i = 0; i <= str2.length; i++) {
|
||
matrix[i] = [i];
|
||
}
|
||
|
||
for (let j = 0; j <= str1.length; j++) {
|
||
matrix[0][j] = j;
|
||
}
|
||
|
||
for (let i = 1; i <= str2.length; i++) {
|
||
for (let j = 1; j <= str1.length; j++) {
|
||
if (str2.charAt(i - 1) === str1.charAt(j - 1)) {
|
||
matrix[i][j] = matrix[i - 1][j - 1];
|
||
} else {
|
||
matrix[i][j] = Math.min(
|
||
matrix[i - 1][j - 1] + 1,
|
||
matrix[i][j - 1] + 1,
|
||
matrix[i - 1][j] + 1
|
||
);
|
||
}
|
||
}
|
||
}
|
||
|
||
return matrix[str2.length][str1.length];
|
||
}
|
||
|
||
/**
|
||
* 格式化结果
|
||
*/
|
||
_formatResult(item) {
|
||
// 记录原始数据的关键字段,帮助调试
|
||
console.log('[CrossRef] 原始数据字段:', {
|
||
hasAbstract: !!item.abstract,
|
||
hasTitle: !!item.title,
|
||
hasAuthors: !!item.author,
|
||
hasDOI: !!item.DOI,
|
||
hasContainerTitle: !!item['container-title'],
|
||
abstractLength: item.abstract ? item.abstract.length : 0,
|
||
// 列出所有可用的字段
|
||
availableFields: Object.keys(item).filter(key => item[key] !== null && item[key] !== undefined)
|
||
});
|
||
|
||
const authors = (item.author || []).map(a => {
|
||
return `${a.given || ''} ${a.family || ''}`.trim();
|
||
});
|
||
|
||
// 处理 abstract:CrossRef 的 abstract 可能包含 HTML 标签或 JATS XML
|
||
let abstract = null;
|
||
if (item.abstract) {
|
||
// 移除 JATS XML 标签 <jats:p> 等
|
||
abstract = item.abstract
|
||
.replace(/<jats:[^>]+>/g, '')
|
||
.replace(/<\/jats:[^>]+>/g, '')
|
||
.replace(/<[^>]+>/g, '') // 移除所有 HTML 标签
|
||
.trim();
|
||
|
||
// 如果处理后为空,设为 null
|
||
if (!abstract) {
|
||
abstract = null;
|
||
} else {
|
||
console.log('[CrossRef] 提取到摘要,长度:', abstract.length);
|
||
}
|
||
}
|
||
|
||
return {
|
||
doi: item.DOI,
|
||
title: item.title?.[0] || null,
|
||
authors: authors.length > 0 ? authors : null,
|
||
year: item.published?.['date-parts']?.[0]?.[0] || null,
|
||
journal: item['container-title']?.[0] || null,
|
||
volume: item.volume || null,
|
||
issue: item.issue || null,
|
||
pages: item.page || null,
|
||
url: item.URL || `https://doi.org/${item.DOI}`,
|
||
publisher: item.publisher || null,
|
||
type: item.type || null,
|
||
abstract: abstract,
|
||
source: 'crossref',
|
||
confidence: 0.9
|
||
};
|
||
}
|
||
}
|
||
|
||
/**
|
||
* OpenAlex API查询
|
||
* 免费开放学术图谱,覆盖更广(包括预印本)
|
||
*/
|
||
class OpenAlexResolver {
|
||
constructor() {
|
||
this.baseUrl = 'https://api.openalex.org/works';
|
||
// OpenAlex建议提供邮箱可获得更高速率限制,但不强制
|
||
// 从配置读取,如果没有则不发送mailto参数
|
||
const config = getProxyConfig();
|
||
this.email = config.contactEmail || null;
|
||
}
|
||
|
||
async queryByTitle(title, metadata = {}) {
|
||
if (!title || title.length < 10) {
|
||
return null;
|
||
}
|
||
|
||
try {
|
||
// 标题规范化
|
||
const normalizedTitle = normalizeTitle(title);
|
||
|
||
const params = new URLSearchParams({
|
||
search: normalizedTitle
|
||
});
|
||
|
||
// 只在有邮箱时才添加mailto参数
|
||
if (this.email) {
|
||
params.append('mailto', this.email);
|
||
}
|
||
|
||
// OpenAlex 支持 CORS,不需要代理
|
||
const url = `${this.baseUrl}?${params.toString()}`;
|
||
console.log('[OpenAlex] Querying:', title.substring(0, 50));
|
||
|
||
const response = await fetch(url);
|
||
|
||
if (!response.ok) {
|
||
console.warn('[OpenAlex] API error:', response.status);
|
||
return null;
|
||
}
|
||
|
||
const data = await response.json();
|
||
|
||
if (!data.results || data.results.length === 0) {
|
||
return null;
|
||
}
|
||
|
||
// 选择最佳匹配
|
||
const bestMatch = this._findBestMatch(data.results, title, metadata);
|
||
|
||
if (bestMatch) {
|
||
return this._formatResult(bestMatch);
|
||
}
|
||
|
||
return null;
|
||
|
||
} catch (error) {
|
||
console.error('[OpenAlex] Query failed:', error);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
_findBestMatch(results, queryTitle, metadata) {
|
||
const queryTitleLower = queryTitle.toLowerCase().trim();
|
||
|
||
for (const item of results) {
|
||
const itemTitle = (item.title || '').toLowerCase().trim();
|
||
|
||
// 简单的包含判断
|
||
if (itemTitle.includes(queryTitleLower) || queryTitleLower.includes(itemTitle)) {
|
||
// 验证年份
|
||
if (metadata.year && item.publication_year) {
|
||
if (Math.abs(item.publication_year - metadata.year) > 1) {
|
||
continue;
|
||
}
|
||
}
|
||
|
||
return item;
|
||
}
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
_formatResult(item) {
|
||
const authors = (item.authorships || []).map(a => a.author?.display_name).filter(Boolean);
|
||
|
||
// 重建摘要
|
||
let abstract = null;
|
||
if (item.abstract_inverted_index) {
|
||
abstract = this._reconstructAbstract(item.abstract_inverted_index);
|
||
console.log('[OpenAlex] 从倒排索引重建摘要,长度:', abstract ? abstract.length : 0);
|
||
}
|
||
|
||
// 记录可用字段
|
||
console.log('[OpenAlex] 原始数据字段:', {
|
||
hasAbstract: !!abstract,
|
||
hasTitle: !!item.title,
|
||
hasAuthors: authors.length > 0,
|
||
hasDOI: !!item.doi,
|
||
hasJournal: !!item.primary_location?.source?.display_name,
|
||
abstractLength: abstract ? abstract.length : 0
|
||
});
|
||
|
||
return {
|
||
doi: item.doi?.replace('https://doi.org/', '') || null,
|
||
title: item.title || null,
|
||
authors: authors.length > 0 ? authors : null,
|
||
year: item.publication_year || null,
|
||
journal: item.primary_location?.source?.display_name || null,
|
||
url: item.doi || item.id || null,
|
||
openAccessUrl: item.open_access?.oa_url || null,
|
||
citationCount: item.cited_by_count || 0,
|
||
abstract: abstract,
|
||
source: 'openalex',
|
||
confidence: 0.85
|
||
};
|
||
}
|
||
|
||
/**
|
||
* 重建摘要(OpenAlex使用倒排索引存储摘要)
|
||
*/
|
||
_reconstructAbstract(invertedIndex) {
|
||
try {
|
||
if (!invertedIndex || typeof invertedIndex !== 'object') {
|
||
return null;
|
||
}
|
||
|
||
// 找出最大位置,确定数组大小
|
||
let maxPos = 0;
|
||
for (const positions of Object.values(invertedIndex)) {
|
||
if (Array.isArray(positions)) {
|
||
for (const pos of positions) {
|
||
if (pos > maxPos) maxPos = pos;
|
||
}
|
||
}
|
||
}
|
||
|
||
// 创建数组并填充单词
|
||
const words = new Array(maxPos + 1);
|
||
for (const [word, positions] of Object.entries(invertedIndex)) {
|
||
if (Array.isArray(positions)) {
|
||
positions.forEach(pos => {
|
||
words[pos] = word;
|
||
});
|
||
}
|
||
}
|
||
|
||
// 过滤 undefined 并连接
|
||
const abstract = words.filter(w => w !== undefined).join(' ');
|
||
|
||
// 如果重建的摘要太短(可能有问题),返回 null
|
||
if (abstract.length < 50) {
|
||
console.warn('[OpenAlex] 重建的摘要太短,可能有问题:', abstract.length);
|
||
return null;
|
||
}
|
||
|
||
return abstract;
|
||
} catch (error) {
|
||
console.error('[OpenAlex] Abstract reconstruction failed:', error);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 批量查询
|
||
* @param {Array} references - 文献列表
|
||
* @returns {Promise<Array>} 查询结果
|
||
*/
|
||
async batchQuery(references) {
|
||
console.log(`[OpenAlex] Batch querying ${references.length} references`);
|
||
|
||
const results = [];
|
||
|
||
for (let i = 0; i < references.length; i++) {
|
||
const ref = references[i];
|
||
|
||
const result = await this.queryByTitle(ref.title, {
|
||
authors: ref.authors,
|
||
year: ref.year
|
||
});
|
||
|
||
results.push({
|
||
original: ref,
|
||
resolved: result,
|
||
success: !!result
|
||
});
|
||
|
||
// OpenAlex Polite Pool: 10 req/s,留余量使用较短延迟
|
||
if (i < references.length - 1) {
|
||
await new Promise(resolve => setTimeout(resolve, 300));
|
||
}
|
||
}
|
||
|
||
return results;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Semantic Scholar API查询
|
||
* 学术搜索引擎,支持批量查询
|
||
*/
|
||
class SemanticScholarResolver {
|
||
constructor() {
|
||
this.baseUrl = 'https://api.semanticscholar.org/graph/v1';
|
||
this.batchUrl = 'https://api.semanticscholar.org/graph/v1/paper/batch';
|
||
}
|
||
|
||
async queryByTitle(title, metadata = {}) {
|
||
if (!title || title.length < 10) {
|
||
return null;
|
||
}
|
||
|
||
try {
|
||
// 标题规范化
|
||
const normalizedTitle = normalizeTitle(title);
|
||
|
||
const params = new URLSearchParams({
|
||
query: normalizedTitle,
|
||
limit: 5,
|
||
fields: 'title,authors,year,venue,externalIds,url,citationCount,abstract'
|
||
});
|
||
|
||
// Semantic Scholar 需要通过代理
|
||
const proxyUrl = buildProxyUrl('semanticscholar', `graph/v1/paper/search?${params.toString()}`);
|
||
const url = proxyUrl || `${this.baseUrl}/paper/search?${params.toString()}`;
|
||
const headers = proxyUrl ? getProxyHeaders('semanticscholar') : {};
|
||
|
||
console.log('[SemanticScholar] Querying:', title.substring(0, 50), proxyUrl ? '(via proxy)' : '(direct - may fail due to CORS)');
|
||
|
||
const response = await fetch(url, { headers });
|
||
|
||
if (!response.ok) {
|
||
console.warn('[SemanticScholar] API error:', response.status);
|
||
return null;
|
||
}
|
||
|
||
const data = await response.json();
|
||
|
||
if (!data.data || data.data.length === 0) {
|
||
return null;
|
||
}
|
||
|
||
const bestMatch = this._findBestMatch(data.data, title, metadata);
|
||
|
||
if (bestMatch) {
|
||
return this._formatResult(bestMatch);
|
||
}
|
||
|
||
return null;
|
||
|
||
} catch (error) {
|
||
console.error('[SemanticScholar] Query failed:', error);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 批量查询(一次性查询多个文献)
|
||
*/
|
||
async batchQuery(references) {
|
||
if (!references || references.length === 0) {
|
||
return [];
|
||
}
|
||
|
||
try {
|
||
console.log(`[SemanticScholar] Batch querying ${references.length} references`);
|
||
|
||
// 从配置读取速率限制
|
||
const proxyConfig = getProxyConfig();
|
||
let delay = 1200; // 默认保守值
|
||
|
||
if (proxyConfig.rateLimit?.services?.semanticscholar?.tps) {
|
||
const tps = proxyConfig.rateLimit.services.semanticscholar.tps;
|
||
// 计算延迟 = (1000 / TPS) * 1.5 (留50%余量)
|
||
delay = Math.ceil((1000 / tps) * 1.5);
|
||
console.log(`[SemanticScholar] Using rate limit: ${tps} TPS, delay: ${delay}ms`);
|
||
}
|
||
|
||
const results = [];
|
||
|
||
// 为了遵守速率限制,串行处理每个文献
|
||
for (let i = 0; i < references.length; i++) {
|
||
const ref = references[i];
|
||
|
||
// 在每个请求前延迟(除了第一个),确保严格遵守速率限制
|
||
if (i > 0) {
|
||
await new Promise(resolve => setTimeout(resolve, delay));
|
||
}
|
||
|
||
const result = await this.queryByTitle(ref.title, {
|
||
authors: ref.authors,
|
||
year: ref.year
|
||
});
|
||
|
||
results.push({
|
||
original: ref,
|
||
resolved: result,
|
||
success: !!result
|
||
});
|
||
}
|
||
|
||
return results;
|
||
|
||
} catch (error) {
|
||
console.error('[SemanticScholar] Batch query failed:', error);
|
||
return [];
|
||
}
|
||
}
|
||
|
||
_findBestMatch(papers, queryTitle, metadata) {
|
||
const queryTitleLower = queryTitle.toLowerCase().trim();
|
||
|
||
for (const paper of papers) {
|
||
const paperTitle = (paper.title || '').toLowerCase().trim();
|
||
|
||
if (paperTitle.includes(queryTitleLower) || queryTitleLower.includes(paperTitle)) {
|
||
if (metadata.year && paper.year) {
|
||
if (Math.abs(paper.year - metadata.year) > 1) {
|
||
continue;
|
||
}
|
||
}
|
||
|
||
return paper;
|
||
}
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
_formatResult(paper) {
|
||
const authors = (paper.authors || []).map(a => a.name).filter(Boolean);
|
||
|
||
// 记录可用字段
|
||
console.log('[SemanticScholar] 原始数据字段:', {
|
||
hasAbstract: !!paper.abstract,
|
||
hasTitle: !!paper.title,
|
||
hasAuthors: authors.length > 0,
|
||
hasDOI: !!paper.externalIds?.DOI,
|
||
hasVenue: !!paper.venue,
|
||
abstractLength: paper.abstract ? paper.abstract.length : 0
|
||
});
|
||
|
||
return {
|
||
doi: paper.externalIds?.DOI || null,
|
||
title: paper.title || null,
|
||
authors: authors.length > 0 ? authors : null,
|
||
year: paper.year || null,
|
||
journal: paper.venue || null,
|
||
url: paper.url || (paper.externalIds?.DOI ? `https://doi.org/${paper.externalIds.DOI}` : null),
|
||
citationCount: paper.citationCount || 0,
|
||
paperId: paper.paperId || null,
|
||
abstract: paper.abstract || null,
|
||
source: 'semanticscholar',
|
||
confidence: 0.8
|
||
};
|
||
}
|
||
}
|
||
|
||
/**
|
||
* PubMed API查询
|
||
* 医学/生物学领域最全,返回PMID(可转换为DOI)
|
||
*/
|
||
class PubMedResolver {
|
||
constructor() {
|
||
this.searchUrl = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi';
|
||
this.fetchUrl = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi';
|
||
}
|
||
|
||
async queryByTitle(title, metadata = {}) {
|
||
if (!title || title.length < 10) {
|
||
return null;
|
||
}
|
||
|
||
try {
|
||
// 标题规范化
|
||
const normalizedTitle = normalizeTitle(title);
|
||
|
||
// 截断过长的标题,避免URL过长或查询超时
|
||
// PubMed 搜索对长标题支持不好,取前200字符通常足够匹配
|
||
let searchTitle = normalizedTitle.length > 200 ? normalizedTitle.substring(0, 200) : normalizedTitle;
|
||
|
||
// Step 1: 搜索获取PMID
|
||
const searchParams = new URLSearchParams({
|
||
db: 'pubmed',
|
||
term: searchTitle,
|
||
retmode: 'json',
|
||
retmax: 5
|
||
});
|
||
|
||
// PubMed 需要通过代理
|
||
const searchProxyUrl = buildProxyUrl('pubmed', `esearch.fcgi?${searchParams.toString()}`);
|
||
const searchUrl = searchProxyUrl || `${this.searchUrl}?${searchParams.toString()}`;
|
||
const headers = searchProxyUrl ? getProxyHeaders('pubmed') : {};
|
||
|
||
console.log('[PubMed] Searching:', searchTitle.substring(0, 50), searchProxyUrl ? '(via proxy)' : '(direct)');
|
||
|
||
const searchResponse = await fetch(searchUrl, { headers });
|
||
if (!searchResponse.ok) {
|
||
console.warn('[PubMed] Search failed:', searchResponse.status);
|
||
return null;
|
||
}
|
||
|
||
const searchData = await searchResponse.json();
|
||
const pmids = searchData.esearchresult?.idlist || [];
|
||
|
||
if (pmids.length === 0) {
|
||
return null;
|
||
}
|
||
|
||
// Step 2: 获取详细信息
|
||
const fetchParams = new URLSearchParams({
|
||
db: 'pubmed',
|
||
id: pmids.join(','),
|
||
retmode: 'xml'
|
||
});
|
||
|
||
const fetchProxyUrl = buildProxyUrl('pubmed', `efetch.fcgi?${fetchParams.toString()}`);
|
||
const fetchUrl = fetchProxyUrl || `${this.fetchUrl}?${fetchParams.toString()}`;
|
||
|
||
const fetchResponse = await fetch(fetchUrl, { headers });
|
||
if (!fetchResponse.ok) {
|
||
return null;
|
||
}
|
||
|
||
const xmlText = await fetchResponse.text();
|
||
const result = this._parseXML(xmlText, title, metadata);
|
||
|
||
return result;
|
||
|
||
} catch (error) {
|
||
console.error('[PubMed] Query failed:', error);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
_parseXML(xmlText, queryTitle, metadata) {
|
||
try {
|
||
const parser = new DOMParser();
|
||
const xmlDoc = parser.parseFromString(xmlText, 'text/xml');
|
||
|
||
const articles = xmlDoc.getElementsByTagName('PubmedArticle');
|
||
|
||
for (let i = 0; i < articles.length; i++) {
|
||
const article = articles[i];
|
||
|
||
// 提取标题
|
||
const titleElement = article.querySelector('ArticleTitle');
|
||
const articleTitle = titleElement ? titleElement.textContent : '';
|
||
|
||
// 标题匹配
|
||
if (!this._isTitleMatch(articleTitle, queryTitle)) {
|
||
continue;
|
||
}
|
||
|
||
// 提取PMID
|
||
const pmidElement = article.querySelector('PMID');
|
||
const pmid = pmidElement ? pmidElement.textContent : null;
|
||
|
||
// 提取DOI
|
||
let doi = null;
|
||
const articleIds = article.querySelectorAll('ArticleId');
|
||
for (let id of articleIds) {
|
||
if (id.getAttribute('IdType') === 'doi') {
|
||
doi = id.textContent;
|
||
break;
|
||
}
|
||
}
|
||
|
||
// 提取作者
|
||
const authorElements = article.querySelectorAll('Author');
|
||
const authors = Array.from(authorElements).map(author => {
|
||
const lastName = author.querySelector('LastName')?.textContent || '';
|
||
const foreName = author.querySelector('ForeName')?.textContent || '';
|
||
return `${foreName} ${lastName}`.trim();
|
||
}).filter(Boolean);
|
||
|
||
// 提取年份
|
||
const yearElement = article.querySelector('PubDate Year');
|
||
const year = yearElement ? parseInt(yearElement.textContent) : null;
|
||
|
||
// 提取期刊
|
||
const journalElement = article.querySelector('Journal Title');
|
||
const journal = journalElement ? journalElement.textContent : null;
|
||
|
||
// 提取摘要
|
||
const abstractElements = article.querySelectorAll('AbstractText');
|
||
let abstract = null;
|
||
if (abstractElements.length > 0) {
|
||
abstract = Array.from(abstractElements)
|
||
.map(el => el.textContent)
|
||
.join(' ');
|
||
}
|
||
|
||
return {
|
||
doi: doi,
|
||
pmid: pmid,
|
||
title: articleTitle,
|
||
authors: authors.length > 0 ? authors : null,
|
||
year: year,
|
||
journal: journal,
|
||
abstract: abstract,
|
||
url: doi ? `https://doi.org/${doi}` : `https://pubmed.ncbi.nlm.nih.gov/${pmid}/`,
|
||
source: 'pubmed',
|
||
confidence: 0.9
|
||
};
|
||
}
|
||
|
||
return null;
|
||
|
||
} catch (error) {
|
||
console.error('[PubMed] XML parsing failed:', error);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
_isTitleMatch(title1, title2) {
|
||
const t1 = title1.toLowerCase().trim();
|
||
const t2 = title2.toLowerCase().trim();
|
||
return t1.includes(t2) || t2.includes(t1);
|
||
}
|
||
|
||
/**
|
||
* 批量查询
|
||
* @param {Array} references - 文献列表
|
||
* @returns {Promise<Array>} 查询结果
|
||
*/
|
||
async batchQuery(references) {
|
||
console.log(`[PubMed] Batch querying ${references.length} references`);
|
||
|
||
// 从配置读取速率限制
|
||
const proxyConfig = getProxyConfig();
|
||
let delay = 1200; // 默认保守值
|
||
|
||
if (proxyConfig.rateLimit?.services?.pubmed?.tps) {
|
||
const tps = proxyConfig.rateLimit.services.pubmed.tps;
|
||
// 计算延迟 = (1000 / TPS) * 1.5 (留50%余量)
|
||
delay = Math.ceil((1000 / tps) * 1.5);
|
||
console.log(`[PubMed] Using rate limit: ${tps} TPS, delay: ${delay}ms`);
|
||
}
|
||
|
||
const results = [];
|
||
|
||
for (let i = 0; i < references.length; i++) {
|
||
const ref = references[i];
|
||
|
||
// 在每个请求前延迟(除了第一个)
|
||
if (i > 0) {
|
||
await new Promise(resolve => setTimeout(resolve, delay));
|
||
}
|
||
|
||
const result = await this.queryByTitle(ref.title, {
|
||
authors: ref.authors,
|
||
year: ref.year
|
||
});
|
||
|
||
results.push({
|
||
original: ref,
|
||
resolved: result,
|
||
success: !!result
|
||
});
|
||
}
|
||
|
||
return results;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* arXiv API查询
|
||
* 免费预印本库,主要覆盖CS/物理/数学领域
|
||
*/
|
||
class ArXivResolver {
|
||
constructor() {
|
||
this.baseUrl = 'http://export.arxiv.org/api/query';
|
||
}
|
||
|
||
async queryByTitle(title, metadata = {}) {
|
||
if (!title || title.length < 10) {
|
||
return null;
|
||
}
|
||
|
||
try {
|
||
// 标题规范化
|
||
const normalizedTitle = normalizeTitle(title);
|
||
|
||
// 构建查询
|
||
const params = new URLSearchParams({
|
||
search_query: `ti:"${normalizedTitle}"`,
|
||
start: 0,
|
||
max_results: 5,
|
||
sortBy: 'relevance',
|
||
sortOrder: 'descending'
|
||
});
|
||
|
||
// arXiv 需要通过代理
|
||
const proxyUrl = buildProxyUrl('arxiv', `query?${params.toString()}`);
|
||
const url = proxyUrl || `${this.baseUrl}?${params.toString()}`;
|
||
const headers = proxyUrl ? getProxyHeaders('arxiv') : {};
|
||
|
||
console.log('[arXiv] Querying:', title.substring(0, 50), proxyUrl ? '(via proxy)' : '(direct - may fail due to CORS)');
|
||
|
||
const response = await fetch(url, { headers });
|
||
|
||
if (!response.ok) {
|
||
console.warn('[arXiv] API error:', response.status);
|
||
return null;
|
||
}
|
||
|
||
const xmlText = await response.text();
|
||
const result = this._parseXML(xmlText, title, metadata);
|
||
|
||
return result;
|
||
|
||
} catch (error) {
|
||
console.error('[arXiv] Query failed:', error);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
_parseXML(xmlText, queryTitle, metadata) {
|
||
try {
|
||
const parser = new DOMParser();
|
||
const xmlDoc = parser.parseFromString(xmlText, 'application/xml');
|
||
|
||
// arXiv 使用 Atom 格式
|
||
const entries = xmlDoc.getElementsByTagName('entry');
|
||
|
||
for (let i = 0; i < entries.length; i++) {
|
||
const entry = entries[i];
|
||
|
||
// 提取标题
|
||
const titleElement = entry.querySelector('title');
|
||
const entryTitle = titleElement ? titleElement.textContent.trim() : '';
|
||
|
||
// 标题匹配
|
||
if (!this._isTitleMatch(entryTitle, queryTitle)) {
|
||
continue;
|
||
}
|
||
|
||
// 提取arXiv ID
|
||
const idElement = entry.querySelector('id');
|
||
const arxivId = idElement ? idElement.textContent.split('/').pop() : null;
|
||
|
||
// 提取DOI(如果有)
|
||
let doi = null;
|
||
const doiElement = entry.querySelector('arxiv\\:doi, doi');
|
||
if (doiElement) {
|
||
doi = doiElement.textContent.trim();
|
||
}
|
||
|
||
// 提取作者
|
||
const authorElements = entry.querySelectorAll('author name');
|
||
const authors = Array.from(authorElements).map(el => el.textContent.trim()).filter(Boolean);
|
||
|
||
// 提取年份
|
||
const publishedElement = entry.querySelector('published');
|
||
let year = null;
|
||
if (publishedElement) {
|
||
const dateStr = publishedElement.textContent;
|
||
year = parseInt(dateStr.substring(0, 4));
|
||
}
|
||
|
||
// 提取摘要
|
||
const summaryElement = entry.querySelector('summary');
|
||
const abstract = summaryElement ? summaryElement.textContent.trim() : null;
|
||
|
||
// 提取分类
|
||
const categoryElements = entry.querySelectorAll('category');
|
||
const categories = Array.from(categoryElements).map(el => el.getAttribute('term')).filter(Boolean);
|
||
|
||
// 验证年份
|
||
if (metadata.year && year) {
|
||
if (Math.abs(year - metadata.year) > 1) {
|
||
continue;
|
||
}
|
||
}
|
||
|
||
return {
|
||
doi: doi,
|
||
arxivId: arxivId,
|
||
title: entryTitle,
|
||
authors: authors.length > 0 ? authors : null,
|
||
year: year,
|
||
journal: 'arXiv',
|
||
categories: categories,
|
||
abstract: abstract,
|
||
url: doi ? `https://doi.org/${doi}` : `https://arxiv.org/abs/${arxivId}`,
|
||
pdfUrl: `https://arxiv.org/pdf/${arxivId}.pdf`,
|
||
source: 'arxiv',
|
||
confidence: 0.85
|
||
};
|
||
}
|
||
|
||
return null;
|
||
|
||
} catch (error) {
|
||
console.error('[arXiv] XML parsing failed:', error);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
_isTitleMatch(title1, title2) {
|
||
const t1 = title1.toLowerCase().trim();
|
||
const t2 = title2.toLowerCase().trim();
|
||
return t1.includes(t2) || t2.includes(t1);
|
||
}
|
||
|
||
/**
|
||
* 批量查询
|
||
* @param {Array} references - 文献列表
|
||
* @returns {Promise<Array>} 查询结果
|
||
*/
|
||
async batchQuery(references) {
|
||
console.log(`[arXiv] Batch querying ${references.length} references`);
|
||
|
||
// arXiv 使用全局速率限制(没有单独的服务级限制)
|
||
const proxyConfig = getProxyConfig();
|
||
let delay = 1000; // 默认值
|
||
|
||
if (proxyConfig.rateLimit?.perIpTps) {
|
||
const tps = proxyConfig.rateLimit.perIpTps;
|
||
// 计算延迟 = (1000 / TPS) * 1.5 (留50%余量)
|
||
delay = Math.ceil((1000 / tps) * 1.5);
|
||
console.log(`[arXiv] Using rate limit: ${tps} TPS (global), delay: ${delay}ms`);
|
||
}
|
||
|
||
const results = [];
|
||
|
||
for (let i = 0; i < references.length; i++) {
|
||
const ref = references[i];
|
||
|
||
// 在每个请求前延迟(除了第一个)
|
||
if (i > 0) {
|
||
await new Promise(resolve => setTimeout(resolve, delay));
|
||
}
|
||
|
||
const result = await this.queryByTitle(ref.title, {
|
||
authors: ref.authors,
|
||
year: ref.year
|
||
});
|
||
|
||
results.push({
|
||
original: ref,
|
||
resolved: result,
|
||
success: !!result
|
||
});
|
||
}
|
||
|
||
return results;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 多源DOI解析器 - 统一接口
|
||
*/
|
||
class MultiSourceDOIResolver {
|
||
constructor(options = {}) {
|
||
this.crossref = new CrossRefResolver();
|
||
this.openalex = new OpenAlexResolver();
|
||
this.pubmed = new PubMedResolver();
|
||
this.arxiv = new ArXivResolver();
|
||
this.semanticscholar = new SemanticScholarResolver();
|
||
|
||
// 从 localStorage 读取源配置
|
||
const sourcesConfig = getSourcesConfig();
|
||
const enabledSources = sourcesConfig.sources
|
||
.filter(s => s.enabled && s.key !== 'semanticscholar') // semanticscholar 单独处理
|
||
.sort((a, b) => a.order - b.order)
|
||
.map(s => s.key);
|
||
|
||
// 可配置查询顺序(不包括semanticscholar,它用于托底)
|
||
this.queryOrder = options.queryOrder || enabledSources;
|
||
|
||
// 超时设置(毫秒)
|
||
this.timeout = options.timeout || 8000;
|
||
|
||
// 是否启用Semantic Scholar托底(从配置读取)
|
||
const s2Source = sourcesConfig.sources.find(s => s.key === 'semanticscholar');
|
||
this.enableSemanticScholarFallback = options.enableSemanticScholarFallback !== undefined
|
||
? options.enableSemanticScholarFallback
|
||
: (s2Source ? s2Source.enabled : true);
|
||
|
||
console.log('[DOIResolver] Initialized with funnel strategy - source order:', this.queryOrder,
|
||
'S2 fallback:', this.enableSemanticScholarFallback);
|
||
}
|
||
|
||
/**
|
||
* 统一查询接口(多源回退)
|
||
* @param {Object} reference - 文献信息 {title, authors, year, journal}
|
||
* @returns {Promise<Object|null>} 包含DOI的完整元数据
|
||
*/
|
||
async resolve(reference) {
|
||
if (!reference || !reference.title) {
|
||
return null;
|
||
}
|
||
|
||
const title = reference.title;
|
||
const metadata = {
|
||
authors: reference.authors,
|
||
year: reference.year,
|
||
journal: reference.journal
|
||
};
|
||
|
||
console.log(`[DOIResolver] Resolving: "${title.substring(0, 60)}..."`);
|
||
|
||
// 按顺序尝试各个数据源
|
||
for (const source of this.queryOrder) {
|
||
try {
|
||
const resolver = this._getResolver(source);
|
||
if (!resolver) continue;
|
||
|
||
// 添加超时保护
|
||
const result = await Promise.race([
|
||
resolver.queryByTitle(title, metadata),
|
||
new Promise((_, reject) =>
|
||
setTimeout(() => reject(new Error('Timeout')), this.timeout)
|
||
)
|
||
]);
|
||
|
||
if (result && result.doi) {
|
||
console.log(`[DOIResolver] ✓ Found via ${source}: ${result.doi}`);
|
||
return result;
|
||
}
|
||
|
||
} catch (error) {
|
||
console.warn(`[DOIResolver] ${source} failed:`, error.message);
|
||
continue;
|
||
}
|
||
}
|
||
|
||
console.log(`[DOIResolver] ✗ No DOI found for: "${title.substring(0, 60)}..."`);
|
||
return null;
|
||
}
|
||
|
||
/**
|
||
* 批量解析(漏斗形)
|
||
* @param {Array} references - 文献列表
|
||
* @param {Function} progressCallback - 进度回调
|
||
* @returns {Promise<Array>} 解析结果
|
||
*/
|
||
async batchResolve(references, progressCallback = null) {
|
||
if (!references || references.length === 0) {
|
||
return [];
|
||
}
|
||
|
||
console.log(`[DOIResolver] Batch resolving ${references.length} references using funnel strategy`);
|
||
|
||
// 初始化所有文献为待解析状态
|
||
const results = references.map(ref => ({
|
||
original: ref,
|
||
resolved: null,
|
||
success: false
|
||
}));
|
||
|
||
let completed = 0;
|
||
|
||
// 漏斗形查询:按源顺序逐个尝试,只查询失败的文献
|
||
for (let sourceIndex = 0; sourceIndex < this.queryOrder.length; sourceIndex++) {
|
||
const source = this.queryOrder[sourceIndex];
|
||
|
||
// 收集还未成功的文献
|
||
const pending = results.filter(r => !r.success);
|
||
|
||
if (pending.length === 0) {
|
||
console.log(`[DOIResolver] All references resolved, skipping remaining sources`);
|
||
break;
|
||
}
|
||
|
||
console.log(`[DOIResolver] Round ${sourceIndex + 1}/${this.queryOrder.length}: ${source} - querying ${pending.length} pending references`);
|
||
|
||
try {
|
||
const resolver = this._getResolver(source);
|
||
if (!resolver || !resolver.batchQuery) {
|
||
console.warn(`[DOIResolver] ${source} resolver not available or missing batchQuery`);
|
||
continue;
|
||
}
|
||
|
||
// 批量查询当前源
|
||
const sourceResults = await resolver.batchQuery(pending.map(r => r.original));
|
||
|
||
// 更新成功的结果
|
||
sourceResults.forEach(sourceResult => {
|
||
if (sourceResult.success) {
|
||
const index = results.findIndex(r => r.original === sourceResult.original);
|
||
if (index !== -1) {
|
||
results[index] = sourceResult;
|
||
completed++;
|
||
|
||
if (progressCallback) {
|
||
progressCallback({
|
||
completed,
|
||
total: references.length,
|
||
current: sourceResult.original.title,
|
||
phase: 'primary'
|
||
});
|
||
}
|
||
|
||
console.log(`[DOIResolver] ✓ Resolved via ${source}: "${sourceResult.original.title.substring(0, 60)}..."`);
|
||
}
|
||
}
|
||
});
|
||
|
||
const successCount = results.filter(r => r.success).length;
|
||
console.log(`[DOIResolver] ${source} round complete: ${successCount}/${references.length} total resolved`);
|
||
|
||
// 源之间延迟,避免快速切换
|
||
if (sourceIndex < this.queryOrder.length - 1 && pending.length > 0) {
|
||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||
}
|
||
|
||
} catch (error) {
|
||
console.warn(`[DOIResolver] ${source} batch query failed:`, error.message);
|
||
continue;
|
||
}
|
||
}
|
||
|
||
const successCount = results.filter(r => r.success).length;
|
||
console.log(`[DOIResolver] Primary phase complete: ${successCount}/${references.length}`);
|
||
|
||
// 托底查询:使用 Semantic Scholar 处理失败的文献
|
||
if (this.enableSemanticScholarFallback) {
|
||
const failed = results.filter(r => !r.success);
|
||
|
||
if (failed.length > 0) {
|
||
console.log(`[DOIResolver] Fallback phase: using Semantic Scholar for ${failed.length} failed references`);
|
||
|
||
if (progressCallback) {
|
||
progressCallback({
|
||
completed: completed,
|
||
total: references.length,
|
||
current: 'Semantic Scholar托底查询',
|
||
phase: 'fallback'
|
||
});
|
||
}
|
||
|
||
const fallbackResults = await this.semanticscholar.batchQuery(
|
||
failed.map(r => r.original)
|
||
);
|
||
|
||
// 更新失败的结果
|
||
fallbackResults.forEach(fallbackResult => {
|
||
if (fallbackResult.success) {
|
||
const index = results.findIndex(r =>
|
||
r.original === fallbackResult.original
|
||
);
|
||
if (index !== -1) {
|
||
results[index] = fallbackResult;
|
||
console.log(`[DOIResolver] ✓ Resolved via S2 fallback: "${fallbackResult.original.title.substring(0, 60)}..."`);
|
||
}
|
||
}
|
||
});
|
||
|
||
successCount = results.filter(r => r.success).length;
|
||
console.log(`[DOIResolver] Fallback complete: ${successCount}/${references.length} total resolved`);
|
||
}
|
||
}
|
||
|
||
// 输出失败的文献,并为它们生成 Google 搜索链接
|
||
const finalFailed = results.filter(r => !r.success);
|
||
if (finalFailed.length > 0) {
|
||
console.log(`[DOIResolver] Failed to resolve ${finalFailed.length} references, generating Google search links:`);
|
||
finalFailed.forEach(f => {
|
||
console.log(` ✗ "${f.original.title.substring(0, 60)}..."`);
|
||
|
||
// 为失败的文献生成 Google 搜索链接作为兜底
|
||
const normalizedTitle = normalizeTitle(f.original.title);
|
||
const searchQuery = encodeURIComponent(normalizedTitle);
|
||
f.resolved = {
|
||
doi: null,
|
||
title: f.original.title,
|
||
url: `https://www.google.com/search?q=${searchQuery}`,
|
||
fallback: true,
|
||
source: 'google-search',
|
||
message: '未找到DOI,请手动搜索'
|
||
};
|
||
f.success = true; // 标记为"成功"以便返回结果
|
||
});
|
||
}
|
||
|
||
console.log(`[DOIResolver] Batch complete: ${results.filter(r => r.resolved && !r.resolved.fallback).length}/${references.length} resolved, ${finalFailed.length} with fallback search`);
|
||
|
||
return results;
|
||
}
|
||
|
||
/**
|
||
* 获取解析器实例
|
||
*/
|
||
_getResolver(source) {
|
||
const resolvers = {
|
||
'crossref': this.crossref,
|
||
'openalex': this.openalex,
|
||
'pubmed': this.pubmed,
|
||
'arxiv': this.arxiv,
|
||
'semanticscholar': this.semanticscholar
|
||
};
|
||
return resolvers[source] || null;
|
||
}
|
||
|
||
/**
|
||
* 设置邮箱(用于API礼貌池)
|
||
*/
|
||
setEmail(email) {
|
||
this.crossref.mailto = email;
|
||
this.openalex.email = email;
|
||
}
|
||
}
|
||
|
||
// 导出API
|
||
global.DOIResolver = {
|
||
MultiSourceDOIResolver,
|
||
CrossRefResolver,
|
||
OpenAlexResolver,
|
||
PubMedResolver,
|
||
ArXivResolver,
|
||
SemanticScholarResolver,
|
||
|
||
// 便捷方法
|
||
create: (options) => new MultiSourceDOIResolver(options),
|
||
|
||
version: '1.2.0'
|
||
};
|
||
|
||
console.log('[DOIResolver] Multi-source DOI resolver loaded (CrossRef + OpenAlex + arXiv + PubMed + Semantic Scholar).');
|
||
|
||
})(window);
|