Skip to content

Commit

Permalink
Merge pull request #45 from metacpan/mickey/purge
Browse files Browse the repository at this point in the history
Added purge script
  • Loading branch information
mickeyn authored Oct 26, 2024
2 parents 3fc7e6b + 4fe2368 commit 7910662
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 10 deletions.
85 changes: 85 additions & 0 deletions bin/purge.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
use strict;
use warnings;
use v5.36;

use Getopt::Long;
use MetaCPAN::Logger qw< :log :dlog >;

use MetaCPAN::ES;
use MetaCPAN::Ingest qw<
are_you_sure
author_dir
>;

# args
my ( $author, $release, $force );
GetOptions(
"author=s" => \$author,
"release=s" => \$release,
"force" => \$force,
);

# setup
my $type2index = {
release => 'cpan',
file => 'cpan',
author => 'cpan',
favorite => 'cpan',
permission => 'cpan',
contributor => 'contributor',
};


purge_author() if $author;

log_info {'Done'};

sub purge_author () {
# confirm
$release
? are_you_sure( sprintf("%s's %s release is about to be purged!", $author, $release), $force )
: are_you_sure( sprintf("All of %s's releases are about to be purged!", $author), $force );

my $query = {
bool => {
must => [
{ term => { author => $author } },
( $release
? { term => { release => $release } }
: ()
)
]
}
};

purge_ids( type => 'favorite', query => $query);
purge_ids( type => 'file', query => $query);
purge_ids( type => 'release', query => $query);
if ( !$release ) {
purge_ids( type => 'author', id => $author );
purge_ids( type => 'contributor', id => $author );
}
}

sub purge_ids ( %args ) {
my $type = $args{type};
my $es = MetaCPAN::ES->new(
index => $type2index->{$type},
type => $type
);

my $bulk = $es->bulk;

my $id = $args{id};
my $ids = $id
? [ $id ]
: $es->get_ids( query => $args{query} );

$bulk->delete_ids(@$ids);

$bulk->flush;
}

1;

__END__
1 change: 1 addition & 0 deletions cpanfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ requires 'Search::Elasticsearch';
requires 'Search::Elasticsearch::Client::2_0';
requires 'Sub::Exporter';
requires 'Text::CSV_XS';
requires 'Term::ANSIColor';
requires 'URI';
requires 'XML::Simple';

Expand Down
30 changes: 21 additions & 9 deletions lib/MetaCPAN/ES.pm
Original file line number Diff line number Diff line change
Expand Up @@ -109,25 +109,37 @@ sub count ( $self, %args ) {
);
}

sub clear_type ( $self ) {
my $bulk = $self->bulk;
sub get_ids ( $self, %args ) {
my $query = $args{query};

my $scroll = $self->scroll(
query => { match_all => {} },
query => $query // { match_all => {} },
sort => '_doc',
);

my @ids;

while ( my $search = $scroll->next ) {
push @ids => $search->{_id};
log_debug { "deleting id=" . $search->{_id} };
if ( @ids == 500 ) {
$bulk->delete_ids(@ids);
@ids = ();
}
}
$bulk->delete_ids(@ids);

return \@ids;
}

sub delete_ids ( $self, $ids ) {
my $bulk = $self->bulk;

while ( my @batch = splice(@$ids, 0, 500) ) {
$bulk->delete_ids(@batch);
}

$bulk->flush;
}

sub clear_type ( $self ) {
my $ids = $self->get_ids();

$self->delete_ids(@$ids);
}

1;
5 changes: 4 additions & 1 deletion lib/MetaCPAN/Ingest.pm
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use LWP::UserAgent;
use Path::Tiny qw< path >;
use PAUSE::Permissions ();
use Ref::Util qw< is_ref is_plain_arrayref is_plain_hashref >;
use Term::ANSIColor qw< colored >;
use XML::Simple qw< XMLin >;

use MetaCPAN::Config;
Expand Down Expand Up @@ -56,7 +57,9 @@ $config->init_logger;

sub config () {$config}

sub are_you_sure ( $msg ) {
sub are_you_sure ( $msg, $force=0 ) {
return 1 if $force;

my $iconfirmed = 0;

if ( -t *STDOUT ) {
Expand Down

0 comments on commit 7910662

Please sign in to comment.