Jump to content

User:AnomieBOT/source/tasks/LanguageCategoryCreator.pm

From Wikipedia, the free encyclopedia
package tasks::LanguageCategoryCreator;

=pod

=begin metadata

Bot:      AnomieBOT
Task:     LanguageCategoryCreator
BRFA:     Wikipedia:Bots/Requests for approval/AnomieBOT 84
Status:   Approved 2025-05-31
Created:  2025-04-10

Create needed categories for language templates such as {{tl|lang}}.

=end metadata

=cut

use utf8;
use strict;

use POSIX;
use Data::Dumper;
use AnomieBOT::API;
use AnomieBOT::Task qw/:time bunchlist/;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

my $screwup;

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    $self->{'iter'}=undef;
    bless $self, $class;
    return $self;
}

=pod

=for info
BRFA approved 2025-05-31.<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 84]]

=cut

sub approved {
    return 2;
}

sub run {
    my ($self, $api)=@_;

    $api->task('LanguageCategoryCreator',0,0,qw/d::Talk d::Redirects d::Templates/);
    $screwup='Report errors at [[User:'.$api->user.'/shutoff/LanguageCategoryCreator]]';

    my $cont = $self->{'dbcontinue'} // '';

    my ($dbh);
    eval {
        ($dbh) = $api->connectToReplica( 'enwiki' );
    };
    if ( $@ ) {
        $api->warn( "Error connecting to replica: $@\n" );
        return 300;
    }
    $dbh->do( q{SET NAMES 'utf8'} );

    # Load data from module
    my $res = $api->query(
        action => 'parse',
        title => 'User:AnomieBOT',
        text => '{{subst:#invoke:User:AnomieBOT/LanguageCategoryCreator|list_cat_likes}}',
        onlypst => 1,
        formatversion => 2,
    );
    if($res->{'code'} ne 'success'){
        $api->warn( "Failed to fetch data from User:AnomieBOT/LanguageCategoryCreator list_cat_likes: " . $res->{'content'} . "\n" );
        return 60;
    }
    my $catsql = join( ' OR ', map { "cat_title LIKE " . $dbh->quote( $_ ) } split( "\n", $res->{'parse'}{'text'} ) );

    # Spend a max of 5 minutes on this task before restarting
    my $endtime=time()+300;

    my $didstart = ( $cont eq '' );
    while ( 1 ) {
        return 0 if $api->halting;

        # Load the list of categories needing creation
        my @rows;
        my $t0 = Time::HiRes::time();
        eval {
            @rows = @{ $dbh->selectall_arrayref( qq{
                SET STATEMENT max_statement_time=300 FOR
                SELECT cat_title
                FROM category
                  LEFT JOIN page ON (page_namespace=14 AND page_title=cat_title)
                WHERE
                  ($catsql)
                  AND page_id IS NULL
                  $cont
                ORDER BY cat_title
                LIMIT 50
            }, { Slice => {} } ) };
        };
        if ( $@ ) {
            $api->warn( "Error fetching page list from replica: $@\n" );
            return 300;
        }
        my $t1 = Time::HiRes::time();
        $api->log( 'DB query took ' . ($t1-$t0) . ' seconds' );
        unless ( @rows ) {
            last if $didstart;
            $didstart = 1;
            $cont = '';
            $self->{'dbcontinue'} = $cont;
            next;
        }

        for my $row (@rows) {
            utf8::decode( $row->{'cat_title'} ); # Data from database is binary
            my $title = $row->{'cat_title'};
            $title =~ s/_/ /g;
            my $ret = $self->make_cat_if_needed( $api, $title );
            if ( $ret ) {
                $title = $dbh->quote( $title );
                $cont = " AND (cat_title > $title)";
                $self->{'dbcontinue'} = $cont;
                return $ret;
            }
        }

        # On the next time around, skip any we've already processed this run
        my $title = $rows[$#rows]->{'cat_title'};
        $title = $dbh->quote( $title );
        $cont = " AND (cat_title > $title)";
        $self->{'dbcontinue'} = $cont;

        # If we've been at it long enough, let another task have a go.
        return 0 if time()>=$endtime;
    }

    $self->{'dbcontinue'} = '';

    # No more pages to check for now
    return 7200;
}

# Create the category, if it seems to be sane
sub make_cat_if_needed {
    my ($self,$api,$title)=@_;

    # Check if the category actually has members.
    my $res = $api->query(
        list => 'categorymembers',
        cmtitle => "Category:$title",
        cmlimit => 1,
        formatversion => 2,
    );
    if ( $res->{'code'} ne 'success' ) {
        $api->warn( "Failed to get category members for Category:$title: " . $res->{'error'} . "\n" );
        return 60;
    }
    unless ( @{$res->{'query'}{'categorymembers'}} ) {
        $api->log( "Skipping Category:$title, has no members" );
        return 0;
    }

    # Get wikitext for category page from the module.
    $res = $api->query(
        action => 'parse',
        title => 'User:AnomieBOT',
        text => "{{subst:#invoke:User:AnomieBOT/LanguageCategoryCreator|generate_cat_wikitext|cat=$title}}",
        onlypst => 1,
        formatversion => 2,
    );
    if($res->{'code'} ne 'success'){
        $api->warn( "For $title: Failed to check: " . $res->{'content'} . "\n" );
        return 60;
    }

    my $txt = $res->{'parse'}{'text'};
    if ( $txt =~ /^ERROR: (.*)/ ) {
        $api->log( "Failed to determine content for Category:$title: $1\n" );
        return 0;
    }

    # Do it!
    my $tok=$api->edittoken( "Category:$title", EditRedir => 1 );
    if ( $tok->{'code'} eq 'shutoff' ) {
        $api->warn( "Task disabled: " . $tok->{'content'} . "\n" );
        return 300;
    }
    if ( $tok->{'code'} eq 'pageprotected' || $tok->{'code'} eq 'botexcluded' ) {
        # Skip protected and excluded pages
        $api->log( "Skipping $title, $tok->{code}" );
        return 0;
    }
    if ( $tok->{'code'} ne 'success' ) {
        $api->warn( "Failed to get edit token for $title: " . $tok->{'error'} . "\n" );
        return 60;
    }
    unless ( exists($tok->{'missing'}) ) {
        $api->log( "Skipping $title, already exists" );
        return 0;
    }

    $api->log( "Creating language category in Category:$title" );
    my $r = $api->edit( $tok, $txt, "Creating non-empty language category. $screwup", 1, 1 );
    if ( $r->{'code'} ne 'success' ) {
        $api->warn( "Write failed on Category:$title: " . $r->{'error'} . "\n" );
        return 60;
    }

    return 0;
}

1;