Skip to content
This repository has been archived by the owner on Jul 24, 2021. It is now read-only.

Commit

Permalink
run all incoming reports through validations, and then possibly delet…
Browse files Browse the repository at this point in the history
…e older passing reports

We do not keep a device report if it follows a passing result and also is followed by a passing
result. This means that we consider *previous* reports for deletion, never the most recent one.

- remove duplicate device report detection logic (see #460, #492): we now once again process
  all device reports as they come in.
- reorder validation_status_enum for nicer queries
- create a command-line script to thin out historical reports.

closes #619, #461.
  • Loading branch information
karenetheridge committed Feb 13, 2019
1 parent 778bbdf commit d088336
Show file tree
Hide file tree
Showing 10 changed files with 458 additions and 138 deletions.
1 change: 1 addition & 0 deletions cpanfile
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ on 'test' => sub {
requires 'Test::Warnings';
requires 'Test::Fatal';
requires 'Test::Deep';
requires 'Test::Deep::JSON';
requires 'Test::Memory::Cycle';
requires 'Module::CPANfile';
requires 'DBIx::Class::EasyFixture', '0.13'; # Moo not Moose
Expand Down
21 changes: 21 additions & 0 deletions cpanfile.snapshot
Original file line number Diff line number Diff line change
Expand Up @@ -1583,6 +1583,16 @@ DISTRIBUTIONS
perl 5.008001
strict 0
warnings 0
Exporter-Lite-0.08
pathname: N/NE/NEILB/Exporter-Lite-0.08.tar.gz
provides:
Exporter::Lite 0.08
requirements:
Carp 0
ExtUtils::MakeMaker 6.3
perl 5.006
strict 0
warnings 0
Exporter-Tiny-1.002001
pathname: T/TO/TOBYINK/Exporter-Tiny-1.002001.tar.gz
provides:
Expand Down Expand Up @@ -3070,6 +3080,17 @@ DISTRIBUTIONS
List::Util 1.09
Scalar::Util 1.09
Test::Builder 0
Test-Deep-JSON-0.05
pathname: M/MO/MOTEMEN/Test-Deep-JSON-0.05.tar.gz
provides:
Test::Deep::JSON 0.05
requirements:
Exporter::Lite 0
ExtUtils::MakeMaker 6.59
JSON::MaybeXS 0
Module::Build::Tiny 0.035
Test::Deep 0
perl 5.008001
Test-Differences-0.64
pathname: D/DC/DCANTRELL/Test-Differences-0.64.tar.gz
provides:
Expand Down
210 changes: 210 additions & 0 deletions lib/Conch/Command/thin_device_reports.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
package Conch::Command::thin_device_reports;

=pod
=head1 NAME
thin_device_reports - remove unwanted device reports
=head1 SYNOPSIS
bin/conch thin_device_reports [long options...]
--help print usage message and exit
=cut

use Mojo::Base 'Mojolicious::Command', -signatures;
use Getopt::Long::Descriptive;
use Try::Tiny;

has description => 'remove unwanted device reports';

has usage => sub { shift->extract_usage }; # extracts from SYNOPSIS

has 'dry_run';

sub run ($self, @opts) {
# if the user needs to ^C, print the post-processing statistics before exiting.
local $SIG{INT} = sub {
say "\naborting! We now have this many records:";
$self->_print_stats;
exit;
};

local @ARGV = @opts;
my ($opt, $usage) = describe_options(
# the descriptions aren't actually used anymore (mojo uses the synopsis instead)... but
# the 'usage' text block can be accessed with $usage->text
'thin_device_reports %o',
[ 'dry-run|n', 'dry-run (no changes are made)' ],
[],
[ 'help', 'print usage message and exit', { shortcircuit => 1 } ],
);

$self->dry_run($opt->dry_run);

say 'at start, we have this many records:';
$self->_print_stats;

# consider each device, oldest devices first, in pages of 100 rows each
my $device_rs = ($self->dry_run ? $self->app->db_ro_devices : $self->app->db_devices)
->active
->rows(100)
->page(1)
->order_by('created');

my ($device_count, $device_reports_deleted, $validation_results_deleted) = (0)x3;

foreach my $page (1 .. $device_rs->pager->last_page) {
$device_rs = $device_rs->page($page);
while (my $device = $device_rs->next) {
# we process each device's reports in a separate transaction,
# so we can abort and resume without redoing everything all over again
try {
my @deleted = $self->app->schema->txn_do(sub {
$self->_process_device($device);
});
++$device_count;
$device_reports_deleted += $deleted[0];
$validation_results_deleted += $deleted[1];
}
catch {
if ($_ =~ /Rollback failed/) {
local $@ = $_;
die; # propagate the error
}
print STDERR "\n", 'aborted processing of device ' . $device->id . ': ', $_, "\n";
};
}
}

say "\n$device_count devices processed.";
say $device_reports_deleted.' device_reports deleted.' if $device_reports_deleted;
say $validation_results_deleted.' validation_results deleted.' if $validation_results_deleted;

say 'at finish, we have this many records:';
$self->_print_stats;
}

sub _print_stats ($self) {
say 'device_report: ', $self->app->db_ro_device_reports->count;
say 'validation_state: ', $self->app->db_ro_validation_states->count;
say 'validation_state_member: ', $self->app->db_ro_validation_state_members->count;
say 'validation_result: ', $self->app->db_ro_validation_results->count;
}

sub _process_device ($self, $device) {
my $report_count = 0;
print 'device id ', $device->id, ': ';

# Consider the validation status of all reports, oldest first, in pages of 100 rows each.
# Valid reports with no validation results are considered to be a 'pass', i.e. eligible for
# deletion.
my $device_report_rs = $self->app->db_device_reports
->search({ 'device_report.device_id' => $device->id })
->columns('device_report.id')
->with_report_status
->order_by({ -asc => 'device_report.created' })
->rows(100)
->page(1)
->hri;

# we only delete reports when we are done, so we can safely iterate through reports
# without the pages changing strangely
my @delete_report_ids;

# we push data about reports to the end as we consider each one,
# and shift data off at the beginning when we're done
# $report_statuses[-1] current report
# $report_statuses[-2] previous report
# $report_statuses[-3] 2 reports ago
my @report_statuses;

foreach my $page (1 .. $device_report_rs->pager->last_page) {
$device_report_rs = $device_report_rs->page($page);
while (my $device_report = $device_report_rs->next) {
++$report_count;
print '.' if $report_count % 100 == 0;

# capture information about the latest report we just fetched.
push @report_statuses, $device_report;

# we maintain a sliding window of (at least?) 3 reports.
# We can consider what to do about the middle report now.

# prevprev previous current delete previous?
# dne dne FAIL 0 previous report does not exist
# dne dne PASS 0 previous report does not exist
# dne FAIL FAIL 0 keep first
# dne FAIL PASS 0 keep first
# dne PASS FAIL 0 keep first
# dne PASS PASS 0 keep first
# FAIL FAIL FAIL 0 keep reports that fail
# FAIL FAIL PASS 0 keep reports that fail
# FAIL PASS FAIL 0 keep first pass after a failure
# FAIL PASS PASS 0 keep first pass after a failure
# PASS FAIL FAIL 0 keep reports that fail
# PASS FAIL PASS 0 keep reports that fail
# PASS PASS FAIL 0 last pass before a failure
# PASS PASS PASS 1

# we only delete the previous report (index [-2]) iff:
# - the current report was a pass
# - the previous exists and was a pass
# - the previous-previous exists and was a pass

push @delete_report_ids, $report_statuses[-2]{id}
if $report_statuses[-1]{status} eq 'pass'
and $report_statuses[-2] and $report_statuses[-2]{status} eq 'pass'
and $report_statuses[-3] and $report_statuses[-3]{status} eq 'pass';

# forget about the oldest report if we are watching at least 3.
shift @report_statuses if $report_statuses[-3];
}
}

print "\n";

my ($device_reports_deleted, $validation_results_deleted) = (0,0);

if ($self->dry_run) {
say 'Would delete ', scalar(@delete_report_ids), ' reports for device id ', $device->id,
' out of ', $report_count, ' examined.';
}
else {
# delete all reports that we identified for deletion
# this may also cause cascade deletes on validation_state, validation_state_member.
say 'deleting ', scalar(@delete_report_ids), ' reports for device id ', $device->id,
' out of ', $report_count, ' examined...';
$device_reports_deleted = $device
->search_related('device_reports', { id => { -in => \@delete_report_ids } })
->delete;

# delete all newly-orphaned validation_result rows for this device
$validation_results_deleted = $device->search_related('validation_results',
{ 'validation_state_members.validation_state_id' => undef },
{ join => 'validation_state_members' },
)->delete;
}

print "\n";

return ($device_reports_deleted, $validation_results_deleted);
}

1;
__END__
=pod
=head1 LICENSING
Copyright Joyent, Inc.
This Source Code Form is subject to the terms of the Mozilla Public License,
v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
one at http://mozilla.org/MPL/2.0/.
=cut
# vim: set ts=4 sts=4 sw=4 et :
90 changes: 39 additions & 51 deletions lib/Conch/Controller/DeviceReport.pm
Original file line number Diff line number Diff line change
Expand Up @@ -88,56 +88,19 @@ sub process ($c) {

my $existing_device = $c->db_devices->active->find($c->stash('device_id'));

if ($existing_device
and $existing_device->latest_report_matches($raw_report)) {

$existing_device->self_rs->latest_device_report->update({
last_received => \'now()',
received_count => \'received_count + 1',
});

if ($unserialized_report->{relay}) {
$existing_device
->search_related('device_relay_connections',
{ relay_id => $unserialized_report->{relay}{serial} })
->update_or_create({ last_seen => \'NOW()' });
} else {
$c->log->warn('received report without relay id (device_id '. $existing_device->id.')');
}

# this magically DTRT, without having to inject a ->as_subselect_rs,
# because DBIx::Class::ResultSet::_chain_relationship understands how to wrap
# joins using order by/limit into a subquery
my $validation_state = $existing_device->self_rs->latest_device_report
->related_resultset('validation_states')
->order_by({ -desc => 'validation_states.created' })
->rows(1)
->single;

if (not $validation_state) {
# normally we should always find an associated validation_state record, because all
# incoming device reports (that get stored) have validations run against them.
$c->log->warn('Duplicate device report detected (device_report_id '
. $existing_device->self_rs->latest_device_report->get_column('id')->single
. ' but could not find an associated validation_state record to return');

# but we can try harder to find *something* to return, in most cases...
$validation_state = $c->db_device_reports
->matches_jsonb($raw_report)
->related_resultset('validation_states')
->order_by({ -desc => 'validation_states.created' })
->rows(1)
->single;

return $c->status(400, { error => 'duplicate report; could not find relevant validation_state record to return from matching reports' }) if not $validation_state;
}

$c->log->debug('Duplicate device report detected (device_report_id '
. $validation_state->device_report_id
. '; returning previous validation_state (id ' . $validation_state->id .')');

return $c->status(200, $validation_state);
}
# capture information about the last report before we store the new one
# state can be: error, fail, processing, pass, where no validations on a valid report is
# considered to be a pass.
my ($previous_report_id, $previous_report_status);
if ($existing_device) {
($previous_report_id, $previous_report_status) =
$existing_device->self_rs->latest_device_report
->columns('device_reports.id')
->with_report_status
->hri
->single
->@{qw(id status)};
}

# Update/create the device and create the device report
$c->log->debug("Updating or creating device ".$c->stash('device_id'));
Expand All @@ -161,7 +124,10 @@ sub process ($c) {
$c->log->debug("Creating device report");
my $device_report = $device->create_related('device_reports', {
report => $raw_report,
# invalid, created, last_received, received_count all use defaults.
# we will always keep this report if the previous report failed, or this is the first
# report (in its phase).
!$previous_report_status || $previous_report_status ne 'pass' ? ( retain => 1 ) : (),
# invalid, created use defaults.
});
$c->log->info("Created device report ".$device_report->id);

Expand Down Expand Up @@ -208,6 +174,28 @@ sub process ($c) {

$device->update( { health => uc( $validation_state->status ), updated => \'NOW()' } );

# save some state about this report that will help us out next time, when we consider
# deleting it... we always keep all failing reports (we also keep the first report after a
# failure)
$device_report->update({ retain => 1 })
if $validation_state->status ne 'pass' and not $device_report->retain;

# now delete that previous report, if we can
if ($previous_report_id and $previous_report_status eq 'pass') {
if ($c->db_device_reports
->search({ id => $previous_report_id, retain => \'is not TRUE' })
->delete > 0)
{
$c->log->debug('deleted previous device report id '.$previous_report_id);
# deleting device_report cascaded to validation_state and validation_state_member;
# now clean up orphaned results
$device->search_related('validation_results',
{ 'validation_state_members.validation_state_id' => undef },
{ join => 'validation_state_members' },
)->delete;
}
}

$c->status( 200, $validation_state );
}

Expand Down
Loading

0 comments on commit d088336

Please sign in to comment.