Jump to content

User:Qwerfjkl/scripts/linkrot.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// WARNING - this is not 100% accurate! Use editor discretion.

// This is a Bandersnatch ([[:w:de:Benutzer:Schnark/js/bandersnatch]]) edit function 
// It is a fork of [[User:BrownHairedGirl/linkrot.js]], with only minor changes to fit the Bandersnatch format
// The search query I use is `-insource:/\<ref[^>]*?\>\s*https?:[^>< \|\[\]]+\s*\<\s*\/\s*ref/ -hastemplate:"Bare URL inline" hastemplate:"Cleanup bare URLs"`
// based on BHG's own regex

// Linkrot.js v2.0 -- with untagging, and ref counting. 9 March 2022
// 
// Install with:
// <code><nowiki>		{{subst:Iusc|User:BrownHairedGirl/linkrot.js}}																</nowiki></code>
// or with
// <code><nowiki>		importScript( 'User:BrownHairedGirl/linkrot.js' ); // Backlink: [[User:BrownHairedGirl/linkrot.js]]			</nowiki></code> 

// This script is hacked from [[User:DannyS712/Draft no cat.js]]
// If forking this script, please note our contributions / give us credit


// making these global variables global to avoid pssing them  as parameters
let CleanupBareURLsTagMatcher = /\{\{ *([tT]emplate *: *)?([Cc]leanup[_ ]+bare[_ ]+URLs|[Bb]are[_ ]+|[Bb]are|[Bb]are[_ ]+link|[Bb]are[_ ]+linkname|[Bb]are[_ ]+links|[Bb]are[_ ]+references|[Bb]are[_ ]+refs|[Bb]are[_ ]+URL|[Bb]are[_ ]+[uU][rR][[lL]s|[Bb]are-URLs|[Bb]arelinks|[Bb]areURL|[Bb]areURLs|[Cc]leanup[_ ]+bare-URLs|[Cc]leanup[_ ]+link[_ ]+rot|[Cc]leanup[_ ]+link-rot|[Cc]leanup-Bare[_ ]+URLs|[Cc]leanup-barelinks|[Cc]leanup-link[_ ]+rot|[Cc]leanup-link-rot|[Cc]leanup-linkrot|[Cc]UBURL|[Ll]ink[_ ]*rot|[Ll]INKROT|[Ll]R) *(\|[^\}]*)?\}\}\s*/g;
var lkrArticleOriginalText = null;
var lkrWgPageName = null;


let add_linkrot_edit_summary = "Added {{[[Template:Cleanup bare URLs|Cleanup bare URLs]]}}, " +
    "using [[User:BrownHairedGirl/linkrot.js|a script]]. " +
    "For other recently-tagged pages with [[WP:Bare URLs|bare URLs]], " +
    "see [[:Category:Articles with bare URLs for citations from " + monthyear datestamp() + "]]";
let remove_linkrot_edit_summary = "Removed {{[[Template:Cleanup bare URLs|Cleanup bare URLs]]}}, " +
    "using [[User:BrownHairedGirl/linkrot.js|a script]]. " +
    "This page currently has no [[WP:Bare URLs|Bare URLs]]";


// nested funtions
function monthyear_datestamp() {
    var d = new Date();
    let month = ["January", "February", "March", "April", "May", "June",
        "July", "August", "September", "October", "November", "December"
    ];

    var myyear = d.getFullYear();

    let mydatestamp = month[d.getMonth()] + " " + myyear.toString();
    return mydatestamp;
}


function lkrPreCheck() {
    // Start by checking for an existing {{Cleanup Bare URLs}} tag
    var CleanupBareURLsTagCount = lkrCountMatches(CleanupBareURLsTagMatcher);

    // Now count bare URLs and tags
    var untaggedBareURLcount = lkrCountMatches(/<ref[^>]*?\>\s*\[?\s*https?:[^>< \|\[\]]+\s*\]?\s*<\s*\/\s*ref/gi);
    var BareURLinlineTagcount = lkrCountMatches(/\{\{ *([tT]emplate *: *)?([Bb]are[_ ]+URL[\- ]inline|[Ll]inkrot-inline|[Bb]are-inline|[Bb]are[_ ]+inline|[Bb]are[_ ]+url[_ ]+inline|[Bb]are-url[_ ]+inline|[Bb]are[_ ]+link[_ ]+inline|[Bb]are-link-inline|[Bb]are-url-inline|[Bb]are[_ ]+url) *(\|[^\}]*)?\}\}/g);
    var BareURLPDFTagcount = lkrCountMatches(/\{\{ *([tT]emplate *: *)?([Bb]are[_ ]+URL[\- ]PDF) *(\|[^\}]*)?\}\}/g);
    if (CleanupBareURLsTagCount > 0) {
        // Alreday tagged with {{Cleanup Bare URLs}}
        if ((untaggedBareURLcount + BareURLinlineTagcount + BareURLPDFTagcount) == 0) {
            if (confirm("[[" + lkrWgPageName + "]] has no bare URL refs.\n\n" + toUnicodeVariant("Remove", 'bold') + " {{Cleanup Bare URLs}}?")) {
                return "untag";
            } else return null;
        } else {
            // Already tagged
            alert("[[" + lkrWgPageName + "]] is already tagged with {{Cleanup Bare URLs}}");
            return null;
        }
    } else {
        // No existing {{Cleanup Bare URLs}} tag
        if ((untaggedBareURLcount + BareURLinlineTagcount + BareURLPDFTagcount) == 0) {
            alert("[[" + lkrWgPageName + "]] has no bare URLs");
            return null;
        } else {
            // We have bare URLs
            var myTotalCountOfBareURLs = untaggedBareURLcount + BareURLinlineTagcount + BareURLPDFTagcount;
            var myTotalCountOfTaggedBareURLs = BareURLinlineTagcount + BareURLPDFTagcount;
            if (confirm("[[" + lkrWgPageName + "]] has " + myTotalCountOfBareURLs +
                    " bare URLs:\n* " + myTotalCountOfTaggedBareURLs + " inline-tagged\n* " +
                    untaggedBareURLcount + " untagged." +
                    "\n\n" + toUnicodeVariant("Add", 'bold') + " the banner tag {{Cleanup Bare URLs}}?")) {
                return "addtag";
            }
            return null;
        }
    }
    return null; // we shouldn't be able to get here, but just in case ...
}


function lkrCountMatches(myREgEx) {
    var count = 0;
    while (myREgEx.exec(lkrArticleOriginalText) !== null) {
        ++count;
    }
    return count;
}

/**
 * (c) David Konrad 2018
 * MIT License
 *
 * Javascript function to convert plain text to unicode variants
 *
 * Loosely based on the nodejs monotext CLI utility https://github.com/cpsdqs/monotext 
 * (c) cpsdqs 2016
 *
 * For more inspiration see  http://unicode.org/charts/
 *
 */

/*
 * supported unicode variants
 *
 * m: monospace
 * b: bold
 * i: italic
 * c: script
 * g: gothic / fraktur
 * d: double-struck
 * s: sans-serif
 * o: circled text 
 * p: parenthesized latin letters
 * w: fullwidth
 */

function toUnicodeVariant(str, variant, flags) {

    const offsets = {
        m: [0x1d670, 0x1d7f6],
        b: [0x1d400, 0x1d7ce],
        i: [0x1d434, 0x00030],
        bi: [0x1d468, 0x00030],
        c: [0x1d49c, 0x00030],
        bc: [0x1d4d0, 0x00030],
        g: [0x1d504, 0x00030],
        d: [0x1d538, 0x1d7d8],
        bg: [0x1d56c, 0x00030],
        s: [0x1d5a0, 0x1d7e2],
        bs: [0x1d5d4, 0x1d7ec],
        is: [0x1d608, 0x00030],
        bis: [0x1d63c, 0x00030],
        o: [0x24B6, 0x2460],
        p: [0x249C, 0x2474],
        w: [0xff21, 0xff10],
        u: [0x2090, 0xff10]
    };

    const variantOffsets = {
        'monospace': 'm',
        'bold': 'b',
        'italic': 'i',
        'bold italic': 'bi',
        'script': 'c',
        'bold script': 'bc',
        'gothic': 'g',
        'gothic bold': 'bg',
        'doublestruck': 'd',
        'sans': 's',
        'bold sans': 'bs',
        'italic sans': 'is',
        'bold italic sans': 'bis',
        'parenthesis': 'p',
        'circled': 'o',
        'fullwidth': 'w'
    };

    // special characters (absolute values)
    var special = {
        m: {
            ' ': 0x2000,
            '-': 0x2013
        },
        i: {
            'h': 0x210e
        },
        g: {
            'C': 0x212d,
            'H': 0x210c,
            'I': 0x2111,
            'R': 0x211c,
            'Z': 0x2128
        },
        o: {
            '0': 0x24EA,
            '1': 0x2460,
            '2': 0x2461,
            '3': 0x2462,
            '4': 0x2463,
            '5': 0x2464,
            '6': 0x2465,
            '7': 0x2466,
            '8': 0x2467,
            '9': 0x2468,
        },
        p: {},
        w: {}
    };
    //support for parenthesized latin letters small cases 
    for (var i = 97; i <= 122; i++) {
        special.p[String.fromCharCode(i)] = 0x249C + (i - 97);
    }
    //support for full width latin letters small cases 
    for (var iz = 97; iz <= 122; iz++) {
        special.w[String.fromCharCode(iz)] = 0xff41 + (iz - 97);
    }

    const chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz';
    const numbers = '0123456789';

    var getType = function(variant) {
        if (variantOffsets[variant]) return variantOffsets[variant];
        if (offsets[variant]) return variant;
        return 'm'; //monospace as default
    };
    var getFlag = function(flag, flags) {
        if (!flags) return false;
        return flags.split(',').indexOf(flag) > -1;
    };

    var type = getType(variant);
    var underline = getFlag('underline', flags);
    var strike = getFlag('strike', flags);
    var result = '';

    for (var k of str) {
        let index;
        let c = k;
        if (special[type] && special[type][c]) c = String.fromCodePoint(special[type][c]);
        if (type && (index = chars.indexOf(c)) > -1) {
            result += String.fromCodePoint(index + offsets[type][0]);
        } else if (type && (index = numbers.indexOf(c)) > -1) {
            result += String.fromCodePoint(index + offsets[type][1]);
        } else {
            result += c;
        }
        if (underline) result += '\u0332'; // add combining underline
        if (strike) result += '\u0336'; // add combining strike
    }
    return result;
}

// main code

lkrWgPageName = title;

lkrArticleOriginalText = oldText;
var lkrArticleNewText = null;
var lkr_edit_summary = null;
var myActionString = lkrPreCheck();
if ((myActionString) == null || (myActionString == "")) {
    return; // no action
} else if (myActionString == "addtag") {
    lkrArticleNewText = "{{Cleanup bare URLs|date=" + monthyear_datestamp() + "}}\n" + lkrArticleOriginalText;
    lkr_edit_summary = add_linkrot_edit_summary;
} else if (myActionString == "untag") {
    lkrArticleNewText = lkrArticleOriginalText.replaceAll(CleanupBareURLsTagMatcher, '');
    // check that removal worked
    if (lkrArticleNewText == lkrArticleOriginalText) {
        //alert("ERROR\n\nTag removal failed");
        return;
    }
    lkr_edit_summary = remove_linkrot_edit_summary;
}
// sanity check
if (!(lkr_edit_summary && lkrArticleNewText)) {
    //alert("ERROR!\n\naction failed: " + myActionString);
    return;
}
return {
    text: lkrArticleNewText,
    summary: lkr_edit_summary
};