Jump to content

User:Polygnotus/Scripts/WebArchives.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// Wikipedia Archive Checker - Add to your common.js
// Adds an archive checking form to Special:Preferences

(function() {
    'use strict';
    
    // Only run on watchlist page
    if (mw.config.get('wgCanonicalSpecialPageName') !== 'Watchlist') {
        return;
    }
    
    // Flag to prevent double initialization
    let initialized = false;
    
    // Create the archive checker section
    function createArchiveChecker() {
        const section = document.createElement('div');
        section.id = 'archive-checker-section';
        section.style.cssText = `
            margin: 20px 0;
            padding: 20px;
            border: 1px solid #a2a9b1;
            border-radius: 4px;
            background-color: #f8f9fa;
        `;
        
        section.innerHTML = `
            <h3 style="margin-top: 0; color: #0645ad;">Archive Checker</h3>
            <p>Check if a URL exists in various web archives:</p>
            <div style="margin-bottom: 15px;">
                <input type="text" id="url-input" placeholder="Enter URL (e.g., example.com)" 
                       style="width: 300px; padding: 8px; border: 1px solid #a2a9b1; border-radius: 2px;">
                <button id="check-btn" style="margin-left: 10px; padding: 8px 15px; background: #0645ad; color: white; border: none; border-radius: 2px; cursor: pointer;">Check</button>
            </div>
            <div id="results-container" style="margin-top: 15px;"></div>
        `;
        
        return section;
    }
    
    // Normalize URL (add protocol if missing)
    function normalizeUrl(url) {
        url = url.trim();
        if (!url) return '';
        
        // Remove protocol if present, we'll add it back consistently
        url = url.replace(/^https?:\/\//, '');
        url = url.replace(/^www\./, '');
        
        return url;
    }
    
    // Archive services configuration
    const archiveServices = [
        {
            name: 'Archive.today',
            checkUrl: (url) => `https://archive.ph/${encodeURIComponent(url)}`,
            searchUrl: (url) => `https://archive.ph/${encodeURIComponent(url)}`,
            apiCheck: true,
            notFoundStrings: ['No results']
        },
        {
            name: 'Ghost Archive',
            checkUrl: (url) => `https://ghostarchive.org/search?term=${encodeURIComponent(url)}`,
            searchUrl: (url) => `https://ghostarchive.org/search?term=${encodeURIComponent(url)}`,
            apiCheck: false
        },
        {
            name: 'Wayback Machine',
            checkUrl: (url) => `https://web.archive.org/web/20250000000000*/${encodeURIComponent(url)}`,
            searchUrl: (url) => `https://web.archive.org/web/*/${encodeURIComponent(url)}`,
            apiUrl: (url) => `https://archive.org/wayback/available?url=${encodeURIComponent(url)}`,
            apiCheck: true,
            notFoundStrings: ['Wayback Machine has not archived that URL.']
        }
    ];
    
    // Check archives
    async function checkArchives(url) {
        const resultsContainer = document.getElementById('results-container');
        resultsContainer.innerHTML = '<p>Checking archives...</p>';
        
        const normalizedUrl = normalizeUrl(url);
        if (!normalizedUrl) {
            resultsContainer.innerHTML = '<p style="color: red;">Please enter a valid URL.</p>';
            return;
        }
        
        console.log(`[Archive Checker] Starting check for URL: ${normalizedUrl}`);
        
        const results = [];
        
        for (const service of archiveServices) {
            console.log(`[Archive Checker] Checking ${service.name}...`);
            
            const result = {
                name: service.name,
                url: service.searchUrl(normalizedUrl),
                available: null,
                error: null
            };
            
            if (service.apiCheck) {
                try {
                    let checkUrl;
                    
                    if (service.name === 'Wayback Machine' && service.apiUrl) {
                        // Use the dedicated API for Wayback Machine
                        checkUrl = service.apiUrl(normalizedUrl);
                        console.log(`[Archive Checker] ${service.name} API URL: ${checkUrl}`);
                        
                        const response = await fetch(checkUrl);
                        console.log(`[Archive Checker] ${service.name} API Response status: ${response.status}`);
                        
                        const data = await response.json();
                        console.log(`[Archive Checker] ${service.name} API Response data:`, data);
                        
                        // Check if archived_snapshots has any properties (indicating archives exist)
                        const hasArchives = data.archived_snapshots && 
                            Object.keys(data.archived_snapshots).length > 0 && 
                            data.archived_snapshots.closest;
                        
                        result.available = !!hasArchives; // Ensure it's a boolean
                        if (hasArchives) {
                            result.url = data.archived_snapshots.closest.url;
                            console.log(`[Archive Checker] ${service.name} - Archive found: ${result.url}`);
                        } else {
                            console.log(`[Archive Checker] ${service.name} - No archive found (empty archived_snapshots)`);
                        }
                    } else {
                        // For other services, fetch the page content and check for "not found" strings
                        checkUrl = service.checkUrl(normalizedUrl);
                        console.log(`[Archive Checker] ${service.name} Check URL: ${checkUrl}`);
                        
                        const response = await fetch(checkUrl, {
                            mode: 'cors',
                            credentials: 'omit'
                        });
                        console.log(`[Archive Checker] ${service.name} Response status: ${response.status}`);
                        
                        if (response.ok) {
                            const text = await response.text();
                            console.log(`[Archive Checker] ${service.name} Response length: ${text.length} characters`);
                            
                            // Check if any "not found" strings are present
                            const notFound = service.notFoundStrings && 
                                service.notFoundStrings.some(str => text.includes(str));
                            
                            if (notFound) {
                                result.available = false;
                                console.log(`[Archive Checker] ${service.name} - Not found (detected: "${service.notFoundStrings.find(str => text.includes(str))}")`);
                            } else {
                                result.available = true;
                                console.log(`[Archive Checker] ${service.name} - Archive appears to be available`);
                            }
                        } else {
                            result.available = null;
                            result.error = `HTTP ${response.status}`;
                            console.log(`[Archive Checker] ${service.name} - HTTP error: ${response.status}`);
                        }
                    }
                } catch (error) {
                    console.error(`[Archive Checker] ${service.name} - Error:`, error);
                    result.error = error.message;
                    result.available = null;
                }
            } else {
                console.log(`[Archive Checker] ${service.name} - API check disabled, will show manual link`);
            }
            
            results.push(result);
        }
        
        console.log(`[Archive Checker] Check completed. Results:`, results);
        displayResults(results, normalizedUrl);
    }
    
    // Display results
    function displayResults(results, originalUrl) {
        const resultsContainer = document.getElementById('results-container');
        
        console.log(`[Archive Checker] Displaying results for ${originalUrl}:`, results);
        
        let html = `<h4>Results for: ${originalUrl}</h4>`;
        html += '<div style="display: grid; gap: 10px;">';
        
        results.forEach(result => {
            console.log(`[Archive Checker] Processing result for ${result.name}:`, {
                available: result.available,
                availableType: typeof result.available,
                error: result.error
            });
            
            const statusText = result.available === true ? '✓ Available' : 
                              result.available === false ? '✗ Not found' : 
                              result.error ? `⚠ ${result.error}` : '? Check manually';
            
            const statusColor = result.available === true ? '#006400' : 
                               result.available === false ? '#8b0000' : '#666';
            
            console.log(`[Archive Checker] ${result.name} - Status: ${statusText}, Color: ${statusColor}`);
            
            html += `
                <div style="padding: 10px; border: 1px solid #ddd; border-radius: 4px; background: white;">
                    <div style="display: flex; justify-content: space-between; align-items: center;">
                        <span><strong>${result.name}</strong></span>
                        <span style="color: ${statusColor};">${statusText}</span>
                    </div>
                    <div style="margin-top: 8px;">
                        <a href="${result.url}" target="_blank" rel="noopener" 
                           style="color: #0645ad; text-decoration: none;">
                            Visit ${result.name}
                        </a>
                    </div>
                </div>
            `;
        });
        
        html += '</div>';
        html += `<p style="margin-top: 15px; font-size: 0.9em; color: #666;">
            <strong>Note:</strong> Some archives may require manual verification. 
            Click the links above to check each archive service.
        </p>`;
        
        resultsContainer.innerHTML = html;
    }
    
    // Add the archive checker section and set up event listeners
    function addArchiveChecker() {
        // Prevent multiple initialization
        if (initialized || document.getElementById('archive-checker-section')) {
            return;
        }
        
        const watchlistContent = document.querySelector('.mw-changeslist, #mw-content-text');
        if (!watchlistContent) {
            return;
        }
        
        initialized = true;
        
        // Add our section to the top of the watchlist page
        const archiveChecker = createArchiveChecker();
        watchlistContent.parentNode.insertBefore(archiveChecker, watchlistContent);
        
        // Add event listeners
        const checkBtn = document.getElementById('check-btn');
        const urlInput = document.getElementById('url-input');
        
        checkBtn.addEventListener('click', () => {
            const url = urlInput.value;
            checkArchives(url);
        });
        
        urlInput.addEventListener('keypress', (e) => {
            if (e.key === 'Enter') {
                checkBtn.click();
            }
        });
    }
    
    // Initialize the checker
    function init() {
        // Try to add immediately if page is already loaded
        addArchiveChecker();
        
        // If not added yet, wait for the page to be fully loaded
        if (!initialized) {
            const observer = new MutationObserver(function(mutations, obs) {
                if (!initialized) {
                    addArchiveChecker();
                    if (initialized) {
                        obs.disconnect();
                    }
                }
            });
            
            observer.observe(document.body, {
                childList: true,
                subtree: true
            });
            
            // Cleanup observer after reasonable time
            setTimeout(() => {
                observer.disconnect();
            }, 5000);
        }
    }
    
    // Start when DOM is ready
    if (document.readyState === 'loading') {
        document.addEventListener('DOMContentLoaded', init);
    } else {
        init();
    }
    
})();