Skip to content
This repository has been archived by the owner on Jul 24, 2021. It is now read-only.

Remove duplicate device reports; shunt environment data through a different endpoint #550

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion json-schema/input.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,35 @@ definitions:
type: string
vendor_name:
type: string

EnvironmentData:
properties:
temp:
type: object
required:
- cpu0
- cpu1
properties:
cpu0:
type: integer
cpu1:
type: integer
exhaust:
type: integer
inlet:
type: integer
disks:
type: object
patternProperties:
^\S+$:
description: key = device_disk.serial_number
type: integer
voltage:
type: object
properties:
psu0:
type: number
psu1:
type: number
DeviceReport:
required:
- bios_version
Expand Down Expand Up @@ -121,6 +149,7 @@ definitions:
drive_type:
type: string
temp:
# TODO: remove, once /device/:id/environment endpoint starts getting used
$ref: /definitions/int_or_stringy_int
enclosure:
type: string
Expand Down Expand Up @@ -219,6 +248,7 @@ definitions:
system_uuid:
$ref: /definitions/uuid
temp:
# TODO: remove, once /device/:id/environment endpoint starts getting used
type: object
required:
- cpu0
Expand Down
180 changes: 180 additions & 0 deletions lib/Conch/Command/dedupe_device_reports.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
package Conch::Command::dedupe_device_reports;

=pod

=head1 NAME

dedupe_device_reports - remove duplicate device reports

=head1 SYNOPSIS

dedupe_device_reports [long options...]

--help print usage message and exit

=cut

use Mojo::Base 'Mojolicious::Command', -signatures;
use Getopt::Long::Descriptive;
use Try::Tiny;

has description => 'remove duplicate device reports';

has usage => sub { shift->extract_usage }; # extracts from SYNOPSIS

has 'dry_run';

sub run {
my $self = shift;

# if the user needs to ^C, print the post-processing statistics before exiting.
local $SIG{INT} = sub {
say "\naborting! We now have this many records:";
$self->_print_stats;
exit;
};

local @ARGV = @_;
my ($opt, $usage) = describe_options(
# the descriptions aren't actually used anymore (mojo uses the synopsis instead)... but
# the 'usage' text block can be accessed with $usage->text
'dedupe_device_reports %o',
[ 'dry-run|n', 'dry-run (no changes are made)' ],
[],
[ 'help', 'print usage message and exit', { shortcircuit => 1 } ],
);

$self->dry_run($opt->dry_run);

say 'at start, we have this many records:';
$self->_print_stats;

# consider each device, oldest devices first, in pages of 100 rows each
my $device_rs = ($self->dry_run ? $self->app->db_ro_devices : $self->app->db_devices)
->active
->rows(100)
->page(1)
->order_by('created');

my $device_count = 0;

foreach my $page (1 .. $device_rs->pager->last_page) {

$device_rs = $device_rs->page($page);
while (my $device = $device_rs->next) {

# we process each device's reports in a separate transaction,
# so we can abort and resume without redoing everything all over again
try {
$self->app->schema->txn_do(sub {
$self->_process_device($device);
});
++$device_count;
}
catch {
if ($_ =~ /Rollback failed/) {
local $@ = $_;
die; # propagate the error
}
print STDERR "\n", 'aborted processing of device ' . $device->id . ': ', $_, "\n";
};
}
}

say "\n$device_count devices processed.";

say 'at finish, we have this many records:';
$self->_print_stats;
}

sub _print_stats ($self) {
say 'device_report: ', $self->app->db_ro_device_reports->count;
say 'validation_state: ', $self->app->db_ro_validation_states->count;
say 'validation_state_member: ', $self->app->db_ro_validation_state_members->count;
say 'validation_result: ', $self->app->db_ro_validation_results->count;
}

sub _process_device ($self, $device) {

my $report_count = 0;
print 'device id ', $device->id, ': ';

# consider all PASSING device reports, newest first, in pages of 100 rows each
my $device_report_rs = $device
->search_related('validation_states', { status => 'pass' })
->related_resultset('device_report')
->columns([ qw(id created) ])
->rows(100)
->page(1)
->order_by({ -desc => 'created' })
->hri; # raw hashref data; do not inflate to objects or alter timestamps

# we accumulate report ids to delete so we can safely iterate through reports
# without the pages changing strangely
my @delete_report_ids;

foreach my $page (1 .. $device_report_rs->pager->last_page) {
print "\n" if $page % 100 == 0;
print '.';

$device_report_rs = $device_report_rs->page($page);

while (my $device_report = $device_report_rs->next) {
++$report_count;
print '.' if $page % 100 == 0;

# delete this report if it is identical (excluding time-series data)
# to another report. (a *newer* report may be found if it did not have a
# validation_state record linked to it, but usually the matching duplicate will be
# older.)
if ($device->related_resultset('device_reports')
->matches_report_id($device_report->{id})
->exists)
{
print 'x';
push @delete_report_ids, $device_report->{id};
}
}
}

print "\n";

if ($self->dry_run) {
say 'Would delete ', scalar(@delete_report_ids), ' reports for device id ', $device->id,
' out of ', $report_count, ' examined.';
}
else {
# delete all duplicate reports that we found
# this may also cause cascade deletes on validation_state, validation_state_member.
say 'deleting ', scalar(@delete_report_ids), ' reports for device id ', $device->id,
' out of ', $report_count, ' examined...';
$device
->search_related('device_reports', { id => { -in => \@delete_report_ids } })
->delete;

# delete all newly-orphaned validation_result rows for this device
$device
->search_related('validation_results',
{ 'validation_state_members.validation_result_id' => undef },
{ join => 'validation_state_members' },
)->delete;
}

print "\n";
}

1;
__END__

=pod

=head1 LICENSING

Copyright Joyent, Inc.

This Source Code Form is subject to the terms of the Mozilla Public License,
v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
one at http://mozilla.org/MPL/2.0/.

=cut
# vim: set ts=4 sts=4 sw=4 et :
138 changes: 138 additions & 0 deletions lib/Conch/Command/extract_temperatures.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
package Conch::Command::extract_temperatures;

=pod

=head1 NAME

extract_temperatures - extract temperatures from historical device reports

=head1 SYNOPSIS

extract_temperatures [long options...]

--dir directory to create data files in
--help print usage message and exit

=cut

use Mojo::Base 'Mojolicious::Command', -signatures;
use Getopt::Long::Descriptive;
use Mojo::JSON 'from_json', 'encode_json';

has description => 'extract temperatures from historical device reports';

has usage => sub { shift->extract_usage }; # extracts from SYNOPSIS

sub run {
my $self = shift;

local @ARGV = @_;
my ($opt, $usage) = describe_options(
# the descriptions aren't actually used anymore (mojo uses the synopsis instead)... but
# the 'usage' text block can be accessed with $usage->text
'extract_temperatures %o',
[ 'dir|d=s', 'directory to create data files in' ],
[],
[ 'help', 'print usage message and exit', { shortcircuit => 1 } ],
);

if ($opt->dir) {
say 'creating files in ', $opt->dir, '...';
mkdir $opt->dir if not -d $opt->dir;
chdir $opt->dir;
}

# process reports in pages of 100 rows each
my $device_report_rs = $self->app->db_ro_device_reports
->rows(100)
->page(1)
->hri; # raw hashref data; do not inflate to objects or alter timestamps

my $num_pages = $device_report_rs->pager->last_page;
foreach my $page (1 .. $num_pages) {
print "\n" if $page % 100 == 0;
print '.';

$device_report_rs = $device_report_rs->page($page);
while (my $device_report = $device_report_rs->next) {

my $data = from_json($device_report->{report});

# TODO: I have no idea if this is the desired format! adjust as needed.

my $temp = +{
$data->{temp}->%*,

(map {; "disk_$_" => $data->{disks}{$_}{temp} }
grep { exists $data->{disks}{$_}{temp} }
keys $data->{disks}->%*),
};

my $fan_speeds = +{
(map {; "fan_$_" => $data->{fans}{units}[$_]{speed_pct} }
grep { exists $data->{fans}{units}[$_]{speed_pct} }
0 .. $data->{fans}{count} - 1),
};

my $psus = +{
(map {;
my $num = $_;
"psu_$num" => +{
map { $_ => $data->{psus}{units}[$num]{$_} }
grep { /^(amps|volts|watts)/ }
keys $data->{psus}{units}[$num]->%*
}
}
0 .. $data->{psus}{count} - 1),
};

my $output_data = {
date => $device_report->{created},
device_id => $device_report->{device_id},
( keys %$temp ? ( temp => $temp ) : () ),
( keys %$fan_speeds ? ( fan_speeds => $fan_speeds ) : () ),
( keys %$psus ? ( psus => $psus ) : () ),
};

next if keys %$output_data == 2;

my $fh = $self->_fh_for_date($device_report->{created});
print $fh encode_json($output_data), "\n";
}
}

print "\n\ndone.\n";
}

my %fh_cache;

sub _fh_for_date ($self, $timestamp) {

my $date = Conch::Time->new($timestamp)->strftime('%Y-%m-%d');

return $fh_cache{$date} if exists $fh_cache{$date};

# we're on a new date; close the old file and open a new one
close $_ foreach values %fh_cache;

# use raw binmode, as json data will be utf8-encoded if needed.
open $fh_cache{$date}, '>', "temperatures-$date.json"
or die "could not open temperatures-$date.json for writing: $!";
return $fh_cache{$date};
}

1;
__END__

=pod

=head1 LICENSING

Copyright Joyent, Inc.

This Source Code Form is subject to the terms of the Mozilla Public License,
v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
one at http://mozilla.org/MPL/2.0/.

=cut
# vim: set ts=4 sts=4 sw=4 et :
Loading