diff --git a/cgi/CRMS.pm b/cgi/CRMS.pm index 24af31ca..757b4b4a 100755 --- a/cgi/CRMS.pm +++ b/cgi/CRMS.pm @@ -83,8 +83,7 @@ sub SetupUser my $note = ''; my $sdr_dbh = $self->get('ht_repository'); - if (!defined $sdr_dbh) - { + if (!defined $sdr_dbh) { $sdr_dbh = $self->ConnectToSdrDb('ht_repository'); $self->set('ht_repository', $sdr_dbh) if defined $sdr_dbh; } @@ -95,66 +94,53 @@ sub SetupUser my $candidate = $ENV{'REMOTE_USER'}; $candidate = lc $candidate if defined $candidate; $note .= sprintf "ENV{REMOTE_USER}=%s\n", (defined $candidate)? $candidate:''; - if ($candidate) - { - my $candidate2; + if ($candidate) { + my $ht_users_email; my $ref = $sdr_dbh->selectall_arrayref($htsql, undef, $candidate); - if ($ref && scalar @{$ref}) - { + if ($ref && scalar @{$ref}) { $ht_user = $candidate; $note .= "Set ht_user=$ht_user\n"; - $candidate2 = $ref->[0]->[0]; + $ht_users_email = $ref->[0]->[0]; } - if ($self->SimpleSqlGet($usersql, $candidate)) - { + if ($self->SimpleSqlGet($usersql, $candidate)) { $crms_user = $candidate; $note .= "Set crms_user=$crms_user from lc ENV{REMOTE_USER}\n"; } - if (!$crms_user && $self->SimpleSqlGet($usersql, $candidate2)) - { - $crms_user = $candidate2; + if (!$crms_user && $self->SimpleSqlGet($usersql, $ht_users_email)) { + $crms_user = $ht_users_email; $note .= "Set crms_user=$crms_user from ht_users.email\n"; } } - if (!$crms_user || !$ht_user) - { - $candidate = $ENV{'email'}; - $candidate = lc $candidate if defined $candidate; - $candidate =~ s/\@umich.edu// if defined $candidate; - $note .= sprintf "ENV{email}=%s\n", (defined $candidate)? $candidate:''; - if ($candidate) - { - my $candidate2; + if (!$crms_user || !$ht_user) { + $note .= sprintf "ENV{email}=%s\n", (defined $ENV{email}) ? $ENV{email} : ''; + foreach my $candidate (@{$self->extract_env_email}) { + my $ht_users_email; my $ref = $sdr_dbh->selectall_arrayref($htsql, undef, $candidate); - if ($ref && scalar @{$ref} && !$ht_user) - { + if ($ref && scalar @$ref && !$ht_user) { $ht_user = $candidate; $note .= "Set ht_user=$ht_user\n"; - $candidate2 = $ref->[0]->[0]; + $ht_users_email = $ref->[0]->[0]; } - if ($self->SimpleSqlGet($usersql, $candidate) && !$crms_user) - { + if ($self->SimpleSqlGet($usersql, $candidate) && !$crms_user) { $crms_user = $candidate; - $note .= "Set crms_user=$crms_user from lc ENV{email}\n"; + $note .= "Set crms_user=$crms_user from ENV{email} candidate $candidate\n"; } - if (!$crms_user && $self->SimpleSqlGet($usersql, $candidate2) && !$crms_user) - { - $crms_user = $candidate2; + if (!$crms_user && $self->SimpleSqlGet($usersql, $ht_users_email)) { + $crms_user = $ht_users_email; $note .= "Set crms_user=$crms_user from ht_users.email\n"; } + # No need to iterate further if we have a match in both crms.users and ht.ht_users + last if $ht_user && $crms_user; } } - if ($ht_user) - { - if ($self->NeedStepUpAuth($ht_user)) - { + if ($ht_user) { + if ($self->NeedStepUpAuth($ht_user)) { $note .= "HT user $ht_user step-up auth required.\n"; $self->set('stepup', 1); } $self->set('ht_user', $ht_user); } - if ($crms_user) - { + if ($crms_user) { $note .= "Setting CRMS user to $crms_user.\n"; $self->set('remote_user', $crms_user); my $alias = $self->GetAlias($crms_user); @@ -165,6 +151,28 @@ sub SetupUser return $crms_user; } +# Extract potentially multiple values of ENV{email} as an array ref. +# We have seen multiple (duplicate) values of email separated by semicolons +# coming from Shib. +# Values are downcased, unique, nonempty strings with any "@umich.edu" stripped. +sub extract_env_email { + my $self = shift; + my $env_email = shift || $ENV{email}; + + my $emails = []; + if (defined $env_email) { + my %seen; + foreach my $email (split(';', lc $env_email)) { + $email =~ s/\@umich.edu//; + if (length $email && !$seen{$email}) { + push @$emails, $email; + $seen{$email} = 1; + } + } + } + return $emails; +} + # Construct redirect URL based on template # replace __HOST__ with $ENV{SERVER_NAME} # replace __TARGET__ with something like CGI::self_url($cgi) diff --git a/t/CRMS.t b/t/CRMS.t index 369eb82d..f35f2b07 100755 --- a/t/CRMS.t +++ b/t/CRMS.t @@ -18,6 +18,29 @@ subtest '#Version' => sub { ok($crms->Version); }; +subtest '#extract_env_email' => sub { + my $tests = [ + # Each is [INPUT, OUTPUT, TEST_COMMENT] + [undef, [], 'empty array if no email defined'], + ['', [], 'empty array if empty string'], + ['someone@somewhere.edu', ['someone@somewhere.edu'], 'extracts single value'], + ['someone@somewhere.edu;someone_else@somewhere.edu', ['someone@somewhere.edu', 'someone_else@somewhere.edu'], 'extracts multiple values'], + [';someone@somewhere.edu', ['someone@somewhere.edu'], 'ignores leading semicolon'], + ['someone@somewhere.edu;', ['someone@somewhere.edu'], 'ignores trailing semicolon'], + ['someone@umich.edu', ['someone'], 'strips @umich.edu'], + ['someone@somewhere.edu;someone@somewhere.edu', ['someone@somewhere.edu'], 'merges duplicates'], + ['SOMEONE@SOMEWHERE.EDU', ['someone@somewhere.edu'], 'downcases'] + ]; + my $save_email = $ENV{email}; + delete $ENV{email}; + foreach my $test (@$tests) { + is_deeply($crms->extract_env_email($test->[0]), $test->[1], $test->[2]); + } + $ENV{email} = 'someone@somewhere.edu'; + is_deeply($crms->extract_env_email, ['someone@somewhere.edu'], 'uses ENV{email} if no parameter'); + $ENV{email} = $save_email; +}; + subtest 'CRMS::MoveToHathitrustFiles' => sub { my $tempdir = File::Temp::tempdir(CLEANUP => 1); my $save_hathitrust_files_directory = $ENV{'CRMS_HATHITRUST_FILES_DIRECTORY'};