Greasy Fork

Google Scholar to free PDFs

Adds Sci-Hub, LibGen, Anna's Archive, Sci-net, LibSTC Nexus, Spacefrontiers to Google Scholar results

// ==UserScript==
// @name         Google Scholar to free PDFs
// @namespace    ScholarToSciHub
// @version      1.20
// @description  Adds Sci-Hub, LibGen, Anna's Archive, Sci-net, LibSTC Nexus, Spacefrontiers to Google Scholar results
// @author       Bui Quoc Dung
// @match        https://scholar.google.*/*
// @license      AGPL-3.0-or-later
// @grant        GM.xmlHttpRequest
// @connect      *
// ==/UserScript==

const SCIHUB_URL = 'https://tesble.com/';
const LIBGEN_URL = 'https://libgen.li/';
const LIBGEN_SEARCH_URL = LIBGEN_URL + 'index.php?req=';
const ANNA_URL = 'https://annas-archive.org';
const ANNA_SCIDB_URL = ANNA_URL + '/scidb/';
const ANNA_CHECK_URL = ANNA_URL + '/search?index=journals&q=';
const LIBSTC_URL = 'https://hub.libstc.cc/';
const SCINET_URL = 'https://sci-net.xyz/';
const SPACEFRONTIERS_URL = 'https://spacefrontiers.org/';
const CROSSREF_URL = 'https://api.crossref.org/works?query.title=';
const DOI_REGEX = /\b(10\.\d{4,}(?:\.\d+)*\/(?:(?!["&'<>])\S)+)\b/gi;

function httpRequest(details) {
    return new Promise((resolve, reject) => {
        GM.xmlHttpRequest({
            ...details,
            onload: resolve,
            onerror: reject
        });
    });
}

function updateLink(span, text, href, isNo = false) {
    const link = document.createElement('a');
    link.href = href;
    link.target = '_blank';
    link.rel = 'noopener noreferrer';
    link.style.fontSize = '15px';
    if (isNo) link.style.color = 'gray';
    link.innerHTML = text.replace('[PDF]', '<b>[PDF]</b>').replace('[Chat]', '<b>[Chat]</b>').replace('[Maybe]', '<b>[Maybe]</b>');
    span.replaceWith(link);
}

function addLoadingIndicator(container) {
    const span = document.createElement('div');
    span.textContent = 'Loading...';
    span.style.marginBottom = '4px';
    span.style.color = 'gray';
    span.style.fontSize = '15px';
    container.appendChild(span);
    return span;
}

async function fetchDOI(titleLink) {
    try {
        const res = await httpRequest({ method: 'GET', url: titleLink.href });
        const match = res.responseText.match(DOI_REGEX);
        if (match) return match[0].replace(/\/(full\/html|full|pdf|epdf|abs|abstract)$/i, '');
        const title = encodeURIComponent(titleLink.textContent.trim());
        const crRes = await httpRequest({ method: 'GET', url: `${CROSSREF_URL}${title}&rows=1` });
        const data = JSON.parse(crRes.responseText);
        return data.message.items?.[0]?.DOI || null;
    } catch {
        return null;
    }
}

async function checkLibgen(title, doi, span) {
    const trySearch = async (query) => {
        try {
            const res = await httpRequest({ method: 'GET', url: LIBGEN_SEARCH_URL + query });
            const doc = new DOMParser().parseFromString(res.responseText, 'text/html');
            const table = doc.querySelector('.table.table-striped');
            const firstRow = table?.querySelector('tbody > tr');
            const secondTd = firstRow?.querySelectorAll('td')?.[1];
            const linkEl = secondTd?.querySelector('a[href^="edition.php?id="]');
            if (linkEl) {
                const href = linkEl.getAttribute('href');
                const detailUrl = LIBGEN_URL + href;

                const detailRes = await httpRequest({ method: 'GET', url: detailUrl });
                const detailDoc = new DOMParser().parseFromString(detailRes.responseText, 'text/html');
                const hasPDF = !!detailDoc.querySelector('table');

                if (hasPDF) {
                    updateLink(span, '[PDF] LibGen', LIBGEN_SEARCH_URL + query);
                    return true;
                }
            }
        } catch (e) {
            console.error('LibGen check failed for query:', query, e);
        }
        return false;
    };

    const encTitle = encodeURIComponent(title);
    if (!(await trySearch(encTitle)) && doi) {
        const encDOI = encodeURIComponent(doi);
        if (!(await trySearch(encDOI))) {
            updateLink(span, '[No] LibGen', LIBGEN_SEARCH_URL + encDOI, true);
        }
    } else if (!doi) {
        updateLink(span, '[No] LibGen', LIBGEN_SEARCH_URL + encTitle, true);
    }
}

async function checkSciHub(href, doi, span) {
    const tryURL = async (url) => {
        try {
            const res = await httpRequest({ method: 'GET', url });
            if (/iframe|embed/.test(res.responseText)) {
                updateLink(span, '[PDF] Sci-Hub', url);
                return true;
            }
        } catch {}
        return false;
    };

    if (!(await tryURL(SCIHUB_URL + href)) && doi) {
        if (!(await tryURL(SCIHUB_URL + doi))) {
            updateLink(span, '[No] Sci-Hub', SCIHUB_URL + doi, true);
        }
    } else if (!doi) updateLink(span, '[No] Sci-Hub', SCIHUB_URL + href, true);
}

async function checkAnna(doi, span, retry = 0) {
    const checkUrl = ANNA_CHECK_URL + encodeURIComponent(doi);
    const directUrl = ANNA_SCIDB_URL + doi;
    try {
        const res = await httpRequest({ method: 'GET', url: checkUrl });
        const doc = new DOMParser().parseFromString(res.responseText, 'text/html');
        const bodyText = doc.body.textContent;

        if (bodyText.includes("Rate limited") && retry < 10) {
            setTimeout(() => checkAnna(doi, span, retry + 1), 5000);
            return;
        }

        const found = doc.querySelector('.mt-4.uppercase.text-xs.text-gray-500') ||
            [...doc.querySelectorAll('div.text-gray-500')].some(div => div.textContent.includes(doi));
        if (found) {
            const res2 = await httpRequest({ method: 'GET', url: directUrl });
            const doc2 = new DOMParser().parseFromString(res2.responseText, 'text/html');
            const hasPDF = doc2.querySelector('.pdfViewer, #viewerContainer, iframe[src*="viewer.html?file="]');
            updateLink(span, hasPDF ? '[PDF] Anna' : '[Maybe] Anna', directUrl);
        } else {
            updateLink(span, '[No] Anna', checkUrl, true);
        }
    } catch {
        updateLink(span, '[No] Anna', checkUrl, true);
    }
}

async function checkLibSTC(doi, span) {
    try {
        const res = await httpRequest({ method: 'HEAD', url: LIBSTC_URL + doi + '.pdf' });
        const isPDF = res.status === 200 && res.responseHeaders.toLowerCase().includes('application/pdf');
        updateLink(span, isPDF ? '[PDF] LibSTC' : '[No] LibSTC', LIBSTC_URL + doi + '.pdf', !isPDF);
    } catch {
        updateLink(span, '[No] LibSTC', LIBSTC_URL + doi + '.pdf', true);
    }
}

async function checkSciNet(doi, span) {
    try {
        const res = await httpRequest({ method: 'GET', url: SCINET_URL + doi });
        updateLink(span, /iframe|pdf|embed/.test(res.responseText) ? '[PDF] Sci-net' : '[No] Sci-net', SCINET_URL + doi, !/pdf/.test(res.responseText));
    } catch {
        updateLink(span, '[No] Sci-net', SCINET_URL + doi, true);
    }
}

async function checkSpaceFrontiers(doi, span) {
    const checkUrl = SPACEFRONTIERS_URL + 'r/' + doi;
    const chatUrl = SPACEFRONTIERS_URL + 'c?context=' + encodeURIComponent(JSON.stringify({ uris: [`doi://${doi}`] })) + '&no-auto-search=1';
    try {
        const res = await httpRequest({ method: 'GET', url: checkUrl });
        const doc = new DOMParser().parseFromString(res.responseText, 'text/html');
        const hasChat = doc.querySelector('span.relative.flex')?.textContent.includes('Chat with the Research');
        updateLink(span, hasChat ? '[Chat] Spacefrontiers' : '[No] Spacefrontiers', hasChat ? chatUrl : checkUrl, !hasChat);
    } catch {
        updateLink(span, '[No] Spacefrontiers', checkUrl, true);
    }
}

async function processEntry(result) {
    const titleLink = result.querySelector('.gs_rt a');
    if (!titleLink) return;

    let buttonContainer = result.querySelector('.gs_or_ggsm');
    if (!buttonContainer) {
        const div = document.createElement('div');
        div.className = 'gs_ggs gs_fl';
        div.innerHTML = '<div class="gs_ggsd"><div class="gs_or_ggsm"></div></div>';
        result.insertBefore(div, result.firstChild);
        buttonContainer = div.querySelector('.gs_or_ggsm');
    }
    if (buttonContainer.classList.contains('scihub-processed')) return;
    buttonContainer.classList.add('scihub-processed');

    const row1 = document.createElement('span');
    row1.style.display = 'inline-flex'; row1.style.gap = '6px';
    const scihubSpan = addLoadingIndicator(row1);
    const libgenSpan = addLoadingIndicator(row1);

    const row2 = document.createElement('span');
    row2.style.display = 'inline-flex'; row2.style.gap = '6px';
    const annaSpan = addLoadingIndicator(row2);
    const scinetSpan = addLoadingIndicator(row2);

    const row3 = document.createElement('span');
    row3.style.display = 'flex'; row3.style.gap = '6px';
    const libstcSpan = addLoadingIndicator(row3);

    const row4 = document.createElement('span');
    row4.style.display = 'flex'; row4.style.gap = '6px';
    const spacefrontiersSpan = addLoadingIndicator(row4);

    [row1, row2, row3, row4].forEach(r => buttonContainer.appendChild(r));

    const doi = await fetchDOI(titleLink);
    checkLibgen(titleLink.textContent, doi, libgenSpan);
    checkSciHub(titleLink.href, doi, scihubSpan);

    if (doi) {
        checkAnna(doi, annaSpan);
        checkSciNet(doi, scinetSpan);
        checkLibSTC(doi, libstcSpan);
        checkSpaceFrontiers(doi, spacefrontiersSpan);
    } else {
        [annaSpan, scinetSpan, libstcSpan, spacefrontiersSpan].forEach(span =>
            updateLink(span, '[No] Source', '#', true));
    }
}

async function addButtons() {
    const results = document.querySelectorAll('#gs_res_ccl_mid .gs_r.gs_or.gs_scl');
    for (const result of results) await processEntry(result);
}

addButtons();
new MutationObserver(() => addButtons()).observe(document.body, { childList: true, subtree: true });