Skip to content

Commit

Permalink
Optional ADFs-checked files (#11)
Browse files Browse the repository at this point in the history
* Make adf_checked files writing optional based on flag in config

* Option in AE config for using the adf_checked files

* Reverse logic to actively (not passively) skip checks
  • Loading branch information
anjaf authored Mar 19, 2021
1 parent fdf34f5 commit 1997696
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 114 deletions.
233 changes: 119 additions & 114 deletions perl_modules/EBI/FGPT/CheckSet/AEAtlas.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1390,44 +1390,44 @@ Checks ADF(s) against YAML config file to ensure they are supported by the Atlas

sub check_microarray_adf_support {

my ( $self ) = @_;
my ($self) = @_;

$self->info("Checking for array design support in Atlas...");

$self->info( "Checking for array design support in Atlas..." );

my $microarrayAssays = $self->_collect_microarray_assays;

unless( @{ $microarrayAssays } ) {
unless (@{$microarrayAssays}) {

$self->debug( "No microarray assays found, not checking array design support." );
$self->debug("No microarray assays found, not checking array design support.");

return;
}

# Get the unique array design accessions from these assays.

my $assaysWithAdfs = {};

foreach my $assay ( @{ $microarrayAssays } ) {
foreach my $assay (@{$microarrayAssays}) {

unless( $assay->has_arrayDesign ) {
unless ($assay->has_arrayDesign) {

$self->warn(
"No array design found for assay \"",
$assay->get_name,
"\" -- cannot check array design support."
);

$self->_add_atlas_fail_code( -2 );
$self->_add_atlas_fail_code(-2);

next;
}
else {
else {
$assaysWithAdfs->{ $assay->get_name } = $assay;
}
}

# If there are no assays with ADFs, quit here.
unless( keys %{ $assaysWithAdfs } ) {
unless (keys %{$assaysWithAdfs}) {

$self->error(
"No assays with array designs found."
Expand All @@ -1436,131 +1436,136 @@ sub check_microarray_adf_support {
return;
}

my %magetabAdfAccs = map { $_->get_arrayDesign->get_name => 1 } ( values %{ $assaysWithAdfs } );



#######################################
#######################################
# TODO: can/should we consolidate files?
my $adf_tracking_file_path = $CONFIG->get_ADF_CHECKED_LIST;
my %magetabAdfAccs = map {$_->get_arrayDesign->get_name => 1} (values %{$assaysWithAdfs});

my $expt_tracking_file_path = $CONFIG->get_ATLAS_EXPT_CHECKED_LIST;

my ( %absent_adf_acc_count, @checked_expt_list );

open( IN, $adf_tracking_file_path )
|| $self->logdie(
"Can't open file $adf_tracking_file_path to fetch the list of ADFs which are not in the Atlas database."
);

while (<IN>) {
my ( $old_adf_acc, $count ) = $_ =~ /^(A-[A-Z]{4}-\d+)\t(\d+)$/;
$absent_adf_acc_count{$old_adf_acc} = $count;
}

close IN;
# A "true" flag in the config can be used to skip the writing to the "checked files"
my $skip_tracking_files = $CONFIG->get_SKIP_CHECKED_LIST_FILES eq "true";
if ($skip_tracking_files) {
$self->debug("Skipping writing to ADF checked files");
}
my (%absent_adf_acc_count, @checked_expt_list);

open( IN2, $expt_tracking_file_path )
|| $self->logdie(
"Can't open file $expt_tracking_file_path to fetch the list of experiments already checked for Atlas eligibility."
);

while (<IN2>) {
chomp $_;
push( @checked_expt_list, $_ );
}
my @acc_comments =
grep {$_->get_name eq "ArrayExpressAccession"}
@{$self->get_investigation->get_comments || []};
my $expt_acc;
if ($acc_comments[0]) { # In case the spreadsheet is in AE curation stage still and has no such comment yet
$expt_acc = $acc_comments[0]->get_value if ($acc_comments[0]);
}
else {
$expt_acc = "dummy_expt_acc";
}

close IN2;

# We need to keep track of whether this experiment has been checked before for Atlas eligibility
# If yes, and if the experiment's ADF is not in Atlas database, we don't increment the ADF count
# in adfs_not_in_atlas.txt file (or else many ADFs will be counted multiple times as the cause
# of failing Atlas eligiblity)
my $atlasSiteConfig = $self->get_atlas_site_config;

my @acc_comments =
grep { $_->get_name eq "ArrayExpressAccession" }
@{ $self->get_investigation->get_comments || [] };
my $expt_acc;
if ( $acc_comments[0] )
{ # In case the spreadsheet is in AE curation stage still and has no such comment yet
$expt_acc = $acc_comments[0]->get_value if ( $acc_comments[0] );
}
else {
$expt_acc = "dummy_expt_acc";
}
# TODO: end
#######################################
#######################################
my %supportedAdfs = map {$_ => 1} (keys %{$atlasSiteConfig->get_atlas_supported_adfs});

foreach my $arrayDesignAcc (keys %magetabAdfAccs) {

my $atlasSiteConfig = $self->get_atlas_site_config;
if ($arrayDesignAcc =~ /A-[A-Z]{4}-\d+$/) {

my %supportedAdfs = map { $_ => 1 } ( keys %{ $atlasSiteConfig->get_atlas_supported_adfs } );
### FIXME: Need to get ADF synonyms and parse them too. get from AE2 DB?

foreach my $arrayDesignAcc ( keys %magetabAdfAccs ) {

if ( $arrayDesignAcc =~ /A-[A-Z]{4}-\d+$/ ) {

### FIXME: Need to get ADF synonyms and parse them too. get from AE2 DB?
unless ($supportedAdfs{ $arrayDesignAcc }) {

unless( $supportedAdfs{ $arrayDesignAcc } ) {

$self->error(
"Array design \"$arrayDesignAcc\" is not currently supported by Atlas."
"Array design \"$arrayDesignAcc\" is not currently supported by Atlas."
);
$self->_add_atlas_fail_code( 2 );
# if this experiment is checked for Atlas eligility for the first time
if ( ( !grep $expt_acc eq $_, @checked_expt_list ) ) {

push( @checked_expt_list, $expt_acc );
# if this ADF acc has been flagged before, increment the count
if ( $absent_adf_acc_count{ $arrayDesignAcc } ) {
$absent_adf_acc_count{ $arrayDesignAcc }++;
}
else {
# initiate a record of this ADF acc and start with count 1
$absent_adf_acc_count{ $arrayDesignAcc } = 1;
}
}
}
}

$self->_add_atlas_fail_code(2);

# if this experiment is checked for Atlas eligility for the first time
if ((!grep $expt_acc eq $_, @checked_expt_list)) {

push(@checked_expt_list, $expt_acc);

# if this ADF acc has been flagged before, increment the count
if ($absent_adf_acc_count{ $arrayDesignAcc }) {
$absent_adf_acc_count{ $arrayDesignAcc }++;
}
else {
# initiate a record of this ADF acc and start with count 1
$absent_adf_acc_count{ $arrayDesignAcc } = 1;
}
}
}
}
# Catch cases where non AE ADF accession is provided.
else {
else {

$self->error(
$self->error(
"Array design \"",
$arrayDesignAcc,
"\" is not a valid ArrayExpress array design accession and hence is not supported by Atlas."
);
);

$self->_add_atlas_fail_code( 2 );
}
}
$self->_add_atlas_fail_code(2);
}
}

# TODO: investigate consolidating the two files.
# Now update the tracking files
open( OUT, ">$adf_tracking_file_path" )
|| $self->logdie(
"Can't open file $adf_tracking_file_path to write the list of updated ADFs which are not supported by Atlas."
);
foreach my $key ( keys %absent_adf_acc_count ) {
print OUT "$key\t$absent_adf_acc_count{$key}\n";
}
close OUT;

open( OUT2, ">$expt_tracking_file_path" )
|| $self->logdie(
"Can't open file $expt_tracking_file_path to write the list of experiments already checked for Atlas eligiblity."
);
foreach (@checked_expt_list) {
print OUT2 "$_\n";
}
close OUT2;
unless ($skip_tracking_files) {

#######################################
#######################################
# TODO: can/should we consolidate files?

my $adf_tracking_file_path = $CONFIG->get_ADF_CHECKED_LIST;

my $expt_tracking_file_path = $CONFIG->get_ATLAS_EXPT_CHECKED_LIST;

open(IN, $adf_tracking_file_path)
|| $self->logdie(
"Can't open file $adf_tracking_file_path to fetch the list of ADFs which are not in the Atlas database."
);

while (<IN>) {
my ($old_adf_acc, $count) = $_ =~ /^(A-[A-Z]{4}-\d+)\t(\d+)$/;
$absent_adf_acc_count{$old_adf_acc} = $count;
}

close IN;

open(IN2, $expt_tracking_file_path)
|| $self->logdie(
"Can't open file $expt_tracking_file_path to fetch the list of experiments already checked for Atlas eligibility."
);

while (<IN2>) {
chomp $_;
push(@checked_expt_list, $_);
}

close IN2;

# We need to keep track of whether this experiment has been checked before for Atlas eligibility
# If yes, and if the experiment's ADF is not in Atlas database, we don't increment the ADF count
# in adfs_not_in_atlas.txt file (or else many ADFs will be counted multiple times as the cause
# of failing Atlas eligiblity)

# TODO: investigate consolidating the two files.
# Now update the tracking files
open(OUT, ">$adf_tracking_file_path")
|| $self->logdie(
"Can't open file $adf_tracking_file_path to write the list of updated ADFs which are not supported by Atlas."
);
foreach my $key (keys %absent_adf_acc_count) {
print OUT "$key\t$absent_adf_acc_count{$key}\n";
}
close OUT;

open(OUT2, ">$expt_tracking_file_path")
|| $self->logdie(
"Can't open file $expt_tracking_file_path to write the list of experiments already checked for Atlas eligiblity."
);
foreach (@checked_expt_list) {
print OUT2 "$_\n";
}
close OUT2;
}
$self->info( "Finshed checking for Atlas array design support..." );
}

Expand Down
1 change: 1 addition & 0 deletions supporting_files/ArrayExpressSiteConfig.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ VALIDATION_SCRIPT:
# Location of ADF and Experiments checked in Atlas.pm
ADF_CHECKED_LIST: /nfs/production3/ma/home/atlas3-production/sw/configs/adfs_not_in_atlas.txt
ATLAS_EXPT_CHECKED_LIST: /nfs/production3/ma/home/atlas3-production/sw/configs/expts_checked_for_atlas.txt
SKIP_CHECKED_LIST_FILES: true

PRIVATE_ADF_USERNAME: xxxx
PRIVATE_ADF_PASSWORD: xxxxx
Expand Down

0 comments on commit 1997696

Please sign in to comment.