Greasy Fork

Ximalaya Opus Data Collector

自动采集喜马拉雅“个人中心”专辑“播放”和“订阅”数据。

// ==UserScript==
// @name         Ximalaya Opus Data Collector
// @namespace    http://tampermonkey.net/
// @version      2.9
// @description  自动采集喜马拉雅“个人中心”专辑“播放”和“订阅”数据。
// @author       You
// @match        https://studio.ximalaya.com/opus
// @grant        none
// @license      MIT
// ==/UserScript==
(function() {
    'use strict';

    let isCollecting = false;
    let allData = [];
    let timeoutId = null;

    // 创建按钮和输入框
    function createControls() {
        // 停止采集按钮
        const stopButton = createButton('停止采集', '210px', '#dc3545', () => {
            if (isCollecting) {
                isCollecting = false;
                if (timeoutId) {
                    clearTimeout(timeoutId);
                    timeoutId = null;
                }
                stopButton.disabled = true;
                stopButton.textContent = '已停止';
                saveAndDownloadCurrentData();
                alert('采集已停止,当前数据已保存为文件!');
                returnToFirstPage();
                setTimeout(() => {
                    stopButton.disabled = false;
                    stopButton.textContent = '停止采集';
                    allData = [];
                }, 2000);
            } else {
                alert('当前未在采集数据!');
            }
        });
        document.body.appendChild(stopButton);

        // 指定页码范围采集区域
        const rangeContainer = document.createElement('div');
        rangeContainer.style.position = 'fixed';
        rangeContainer.style.top = '260px';  // 原230px + 30px
        rangeContainer.style.right = '0px';
        rangeContainer.style.zIndex = '9999';

        const startLabel = document.createElement('label');
        startLabel.textContent = '起始页: ';
        startLabel.style.marginRight = '5px';
        startLabel.style.backgroundColor = '#666';
        startLabel.style.color = 'white';
        startLabel.style.padding = '2px 5px';
        const startInput = document.createElement('input');
        startInput.type = 'number';
        startInput.value = '1';
        startInput.min = '1';
        startInput.id = 'startPage';
        startInput.style.width = '60px';
        startInput.style.marginRight = '10px';
        startInput.style.backgroundColor = '#666';
        startInput.style.color = 'white';

        const endLabel = document.createElement('label');
        endLabel.textContent = '结束页: ';
        endLabel.style.marginRight = '5px';
        endLabel.style.backgroundColor = '#666';
        endLabel.style.color = 'white';
        endLabel.style.padding = '2px 5px';
        const endInput = document.createElement('input');
        endInput.type = 'number';
        endInput.value = '57';
        endInput.min = '1';
        endInput.max = '100';
        endInput.id = 'endPage';
        endInput.style.width = '60px';
        endInput.style.marginRight = '0px';
        endInput.style.backgroundColor = '#666';
        endInput.style.color = 'white';

        const rangeButton = createButton('开始采集', '150px', '#17a2b8', () => {
            // [保持原有逻辑不变]
            const startInputEl = document.getElementById('startPage');
            const endInputEl = document.getElementById('endPage');
            if (!startInputEl || !endInputEl) {
                alert('输入框未找到,请刷新页面重试!');
                return;
            }
            const start = parseInt(startInputEl.value, 10);
            const end = parseInt(endInputEl.value, 10);
            if (isNaN(start) || isNaN(end) || start < 1 || end > 100 || start > end) {
                alert('输入的页码无效!请确保起始页大于等于 1,结束页小于等于 100,且起始页小于等于结束页。');
                return;
            }
            if (!isCollecting) {
                isCollecting = true;
                allData = [];
                rangeButton.textContent = '正在采集';
                rangeButton.disabled = true;
                stopButton.disabled = false;
                collectRangePages(start, end).then(() => {
                    isCollecting = false;
                    rangeButton.textContent = '开始采集';
                    rangeButton.disabled = false;
                    stopButton.disabled = false;
                    checkAndTriggerNextCollection();
                }).catch(err => {
                    isCollecting = false;
                    rangeButton.textContent = '开始采集';
                    rangeButton.disabled = false;
                    console.error(err);
                    alert('采集失败: ' + err.message);
                    stopButton.disabled = false;
                });
            } else {
                alert('正在采集中,请先停止当前任务!');
            }
        });
        rangeContainer.appendChild(startLabel);
        rangeContainer.appendChild(startInput);
        rangeContainer.appendChild(endLabel);
        rangeContainer.appendChild(endInput);
        rangeContainer.appendChild(rangeButton);
        document.body.appendChild(rangeContainer);

        // 循环控制和时间间隔区域
        const loopContainer = document.createElement('div');
        loopContainer.style.position = 'fixed';
        loopContainer.style.top = '300px';  // 原280px + 30px
        loopContainer.style.right = '0px';
        loopContainer.style.zIndex = '9999';

        const loopCheckbox = document.createElement('input');
        loopCheckbox.type = 'checkbox';
        loopCheckbox.id = 'loopEnabled';
        loopCheckbox.checked = false;
        const loopLabel = document.createElement('label');
        loopLabel.textContent = ' 启用循环采集';
        loopLabel.style.marginRight = '10px';
        loopLabel.style.backgroundColor = '#666';
        loopLabel.style.color = 'white';
        loopLabel.style.padding = '2px 5px';
        loopLabel.htmlFor = 'loopEnabled';

        const intervalLabel = document.createElement('label');
        intervalLabel.textContent = '循环间隔 (分钟): ';
        intervalLabel.style.marginRight = '5px';
        intervalLabel.style.backgroundColor = '#666';
        intervalLabel.style.color = 'white';
        intervalLabel.style.padding = '2px 5px';
        const intervalInput = document.createElement('input');
        intervalInput.type = 'number';
        intervalInput.value = '30';
        intervalInput.min = '1';
        intervalInput.id = 'intervalMinutes';
        intervalInput.style.width = '60px';
        intervalInput.style.backgroundColor = '#666';
        intervalInput.style.color = 'white';
        loopContainer.appendChild(loopCheckbox);
        loopContainer.appendChild(loopLabel);
        loopContainer.appendChild(intervalLabel);
        loopContainer.appendChild(intervalInput);
        document.body.appendChild(loopContainer);
    }

    // 创建按钮的辅助函数
    function createButton(text, top, color, onClick) {
        const button = document.createElement('button');
        button.textContent = text;
        button.style.position = 'fixed';
        button.style.top = top;
        button.style.right = '0px';
        button.style.zIndex = '9999';
        button.style.padding = '8px 16px';
        button.style.backgroundColor = color;
        button.style.color = 'white';
        button.style.border = 'none';
        button.style.borderRadius = '4px';
        button.style.cursor = 'pointer';
        button.addEventListener('click', onClick);
        return button;
    }


    // 等待 iframe 加载(3秒超时)
    function waitForIframe(timeout = 3000) {
        return new Promise((resolve, reject) => {
            const iframe = document.getElementById('contentWrapper');
            if (!iframe) {
                reject(new Error('未找到 iframe 元素'));
                return;
            }

            const checkIframe = () => {
                if (iframe.contentDocument && iframe.contentDocument.body) {
                    clearInterval(interval);
                    clearTimeout(timeoutId);
                    resolve(iframe.contentDocument);
                }
            };

            const interval = setInterval(checkIframe, 100);
            const timeoutId = setTimeout(() => {
                clearInterval(interval);
                reject(new Error('等待 iframe 加载超时(3秒)'));
            }, timeout);
        });
    }

    // 采集指定页码范围的数据
    async function collectRangePages(startPage, endPage) {
        console.log(`开始采集从第 ${startPage} 页到第 ${endPage} 页的数据...`);
        allData = []; // 清空之前的数据
        let currentPage = startPage;

        try {
            // 刷新 iframe,确保内容重置但脚本状态不变
            await refreshIframe();
            const iframeDoc = await waitForIframe(3000); // 3 秒等待,确保加载
            let hasNextPage = true;

            while (hasNextPage && currentPage <= endPage && isCollecting) {
                console.log(`正在采集第 ${currentPage} 页...`);
                try {
                    const pageData = await collectPageData(iframeDoc, currentPage);
                    allData = allData.concat(pageData.filter(item => item.plays !== '0')); // 过滤播放数为 0 的数据

                    // 等待 3 秒,确保数据加载完成
                    await new Promise(resolve => setTimeout(resolve, 3000));

                    if (currentPage === endPage) {
                        hasNextPage = false;
                        saveToCsv(allData, 'all');
                        returnToFirstPage();
                    } else {
                        // 检查是否有下一页
                        const nextButton = iframeDoc.querySelector('.ant-pagination-next:not(.ant-pagination-disabled)');
                        const paginationItems = iframeDoc.querySelectorAll('.ant-pagination-item');
                        const lastPageItem = Array.from(paginationItems).pop();
                        const isLastPage = !nextButton || nextButton.getAttribute('aria-disabled') === 'true' ||
                                         (lastPageItem && lastPageItem.textContent.trim() === currentPage.toString());

                        if (isLastPage) {
                            hasNextPage = false;
                            saveToCsv(allData, 'all');
                            returnToFirstPage();
                        } else {
                            // 模拟点击下一页按钮
                            nextButton.click();
                            currentPage++;

                            // 等待 3 秒,确保页面翻转并加载完成
                            await new Promise(resolve => setTimeout(resolve, 3000));
                        }
                    }
                } catch (err) {
                    console.warn(`第 ${currentPage} 页采集失败: ${err.message},继续下一页`);
                    currentPage++;
                    continue;
                }

                if (!isCollecting) break; // 检查是否需要停止
            }

            if (allData.length === 0) {
                throw new Error('未采集到任何数据,请确保页面已加载专辑列表!');
            }
        } catch (err) {
            throw err;
        }
    }

    // 采集单页数据
    async function collectPageData(iframeDoc, pageNumber) {
        console.log(`采集第 ${pageNumber} 页数据...`);
        const albumItems = iframeDoc.querySelectorAll('[class*="AlbumItem_listItem"]');
        console.log(`找到 ${albumItems.length} 个专辑项`);

        if (albumItems.length === 0) {
            throw new Error(`第 ${pageNumber} 页未找到专辑数据!`);
        }

        const pageData = [];
        const currentTime = new Date().toLocaleString('zh-CN', {
            year: 'numeric',
            month: '2-digit',
            day: '2-digit',
            hour: '2-digit',
            minute: '2-digit'
        }).replace(/\//g, '-').replace(' ', '-'); // 格式化为 "YYYY-MM-DD-HH:MM"

        albumItems.forEach((item, index) => {
            let title = '未知专辑';
            let episodes = '0';
            let plays = '0';
            let subscribes = '0';

            // 提取标题,增加容错
            const titleEl = item.querySelector('.AlbumItem_title__1q92X, [class*="title"]');
            if (titleEl) {
                title = titleEl.textContent.trim().replace('公开', '') || '未知专辑';
            }

            // 提取集数,增加容错
            const episodeEl = item.querySelector('.AlbumItem_number__2lJS9 span, [class*="number"] span');
            if (episodeEl) {
                episodes = episodeEl.textContent.replace(/[^\d]/g, '').trim() || '0';
            }

            // 提取播放和订阅,增加容错并处理“万”、“亿”格式
            const dataEls = item.querySelectorAll('.AlbumItem_data__upkkX span, [class*="data"] span');
            dataEls.forEach(data => {
                const text = data.textContent.trim();
                if (text.includes('播放')) {
                    plays = convertNumericFormat(text) || '0';
                } else if (text.includes('订阅')) {
                    subscribes = convertNumericFormat(text) || '0';
                }
            });

            pageData.push({ title, episodes, plays, subscribes, time: currentTime });
            console.log(`采集第 ${pageNumber} 页的第 ${index + 1} 个专辑: ${title}, 集数: ${episodes}, 播放: ${plays}, 订阅: ${subscribes}, 时间: ${currentTime}`);
        });

        return pageData;
    }

    // 转换数值格式(如“2.64万”转换为 26400)
    function convertNumericFormat(text) {
        if (!text) return '0';
        const match = text.match(/([\d.]+)(万|亿)?/);
        if (!match) return '0';
        const value = parseFloat(match[1]);
        const unit = match[2];
        if (unit === '亿') {
            return Math.round(value * 100000000).toString(); // 亿 -> 整数
        } else if (unit === '万') {
            return Math.round(value * 10000).toString(); // 万 -> 整数
        } else {
            return Math.round(value).toString(); // 纯数字
        }
    }

    // 保存并下载当前数据
    function saveAndDownloadCurrentData() {
        if (allData.length === 0) return;

        let csvContent = '\ufeff' + '专辑名,集数,播放,订阅,时间\n'; // 确保 UTF-8 BOM 避免中文乱码
        allData.forEach(item => {
            csvContent += `"${item.title}",${item.episodes},${item.plays},${item.subscribes},"${item.time}"\n`;
        });

        const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
        const link = document.createElement('a');
        const timestamp = new Date().toLocaleString('zh-CN', {
            year: 'numeric',
            month: '2-digit',
            day: '2-digit',
            hour: '2-digit',
            minute: '2-digit'
        }).replace(/\//g, '-').replace(' ', '-'); // 格式化为 "YYYY-MM-DD-HH:MM"
        link.setAttribute('href', URL.createObjectURL(blob));
        link.setAttribute('download', `中心数据${timestamp}.csv`);
        document.body.appendChild(link);
        link.click();
        document.body.removeChild(link);
    }

    // 保存数据到 CSV 文件
    function saveToCsv(data, pageNumber) {
        let csvContent = '\ufeff' + '专辑名,集数,播放,订阅,时间\n'; // 确保 UTF-8 BOM 避免中文乱码
        data.forEach(item => {
            csvContent += `"${item.title}",${item.episodes},${item.plays},${item.subscribes},"${item.time}"\n`;
        });

        const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
        const link = document.createElement('a');
        const timestamp = new Date().toLocaleString('zh-CN', {
            year: 'numeric',
            month: '2-digit',
            day: '2-digit',
            hour: '2-digit',
            minute: '2-digit'
        }).replace(/\//g, '-').replace(' ', '-'); // 格式化为 "YYYY-MM-DD-HH:MM"
        link.setAttribute('href', URL.createObjectURL(blob));
        link.setAttribute('download', `中心数据${timestamp}.csv`);
        document.body.appendChild(link);
        link.click();
        document.body.removeChild(link);
    }

    // 返回到第 1 页
    function returnToFirstPage() {
        const iframe = document.getElementById('contentWrapper');
        if (iframe && iframe.contentDocument) {
            const firstPageButton = iframe.contentDocument.querySelector('.ant-pagination-item-1:not(.ant-pagination-item-active)');
            if (firstPageButton) {
                firstPageButton.click();
                console.log('已返回到第 1 页');
            }
        }
    }

    // 刷新 iframe
    function refreshIframe() {
        return new Promise((resolve) => {
            const iframe = document.getElementById('contentWrapper');
            if (iframe) {
                iframe.contentWindow.location.reload(); // 仅刷新 iframe
                setTimeout(() => {
                    waitForIframe(3000).then(iframeDoc => {
                        resolve(iframeDoc);
                    }).catch(err => {
                        console.error('刷新 iframe 后等待超时:', err);
                        resolve(null); // 即使超时也继续
                    });
                }, 1000); // 等待 1 秒后检查,总计 3 秒
            } else {
                resolve(waitForIframe(3000)); // 如果 iframe 还未加载,直接等待
            }
        });
    }

    // 检查是否启用循环并触发下一次采集
    function checkAndTriggerNextCollection() {
        const loopCheckbox = document.getElementById('loopEnabled');
        if (loopCheckbox && loopCheckbox.checked) {
            const intervalInput = document.getElementById('intervalMinutes');
            if (!intervalInput) {
                console.error('循环间隔输入框未找到');
                return;
            }
            const intervalMinutes = parseInt(intervalInput.value, 10) || 30; // 默认30分钟
            timeoutId = setTimeout(() => {
                refreshPageAndTriggerCollection();
            }, intervalMinutes * 60 * 1000); // 转换为毫秒
        }
    }

    // 刷新 iframe 并触发下一次采集,增加 5 秒间隔
    function refreshPageAndTriggerCollection() {
        return new Promise((resolve) => {
            const iframe = document.getElementById('contentWrapper');
            if (iframe) {
                iframe.contentWindow.location.reload(); // 仅刷新 iframe
                setTimeout(() => {
                    // 增加 5 秒间隔后模拟按下“开始采集”按钮
                    setTimeout(() => {
                        const rangeButton = document.querySelector('button[style*="top: 0px"][style*="right: 0px"]');
                        if (rangeButton) {
                            rangeButton.click(); // 模拟按下“开始采集”按钮
                            console.log('已模拟按下“开始采集”按钮');
                        } else {
                            console.error('未找到“开始采集”按钮');
                        }
                        resolve();
                    }, 5000); // 5 秒间隔
                }, 3000); // 等待 3 秒确保 iframe 加载
            } else {
                console.error('未找到 iframe 元素');
                resolve();
            }
        });
    }

    // 页面加载完成后添加按钮和输入框
    window.addEventListener('load', () => {
        createControls();
    });
})();