From 8619de7a19c907effe78694d375c399d19af9f1e Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Wed, 18 Mar 2026 12:04:27 -0400 Subject: [PATCH 01/10] ETT-1346: testing for populate_rights_data.pl * wrap populate_rights_data.pl in "main" function so we can unit test * use Test2::Tools::Spec (similar to Test::Spec but maintained) * remove unused --source option from populate_rights * don't set a default source (from old HT-1733) --- Dockerfile | 2 + bin/populate_rights_data.pl | 818 ++++++++++++++++++------------------ t/populate_rights_test.t | 293 +++++++++++++ 3 files changed, 708 insertions(+), 405 deletions(-) create mode 100644 t/populate_rights_test.t diff --git a/Dockerfile b/Dockerfile index 180ada9..f19feba 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,7 +20,9 @@ RUN apt-get update && apt-get install -y \ libmarc-xml-perl \ libmariadb-dev \ libnet-ssleay-perl \ + libtest-exception-perl \ libtest-output-perl \ + libtest-simple-perl \ libwww-perl \ libxml-libxml-perl \ libyaml-libyaml-perl \ diff --git a/bin/populate_rights_data.pl b/bin/populate_rights_data.pl index edf8076..6e8cd9a 100755 --- a/bin/populate_rights_data.pl +++ b/bin/populate_rights_data.pl @@ -24,22 +24,22 @@ my $exit = 0; my $priorities = { - $PRIORITY_BIB => [qw(ic/bib und/bib pd/bib pdus/bib)], - $PRIORITY_GFV => [qw(pdus/gfv)], - $PRIORITY_COPYRIGHT => [qw(ic/unp pd/ncn pdus/ncn pdus/crms pd/ren pdus/ren - ic/ren und/nfi pd/cdpp ic/cdpp pdus/cdpp ic/add pdus/add pd/add pd/exp - icus/gatt icus/ren op/ipma ic/ipma und/ipma und/ren ic/crms pd/crms - und/crms)], - $PRIORITY_ACCESS => [qw(ic-world/con und-world/con nobody/pvt cc-by-3.0/con - cc-by-4.0/con cc-by-nd-3.0/con cc-by-nd-4.0/con orph/ddd orphcand/ddd - cc-by-nc-3.0/con cc-by-nc-4.0/con cc-by-sa-3.0/con cc-by-sa-4.0/con - cc-by-nc-nd-3.0/con cc-by-nc-nd-4.0/con cc-by-nc-sa-3.0/con - cc-by-nc-sa-4.0/con cc-zero/con pd/con)], - $PRIORITY_MAN => [qw(pd/man pdus/man ic-world/man und-world/man ic/man - nobody/man nobody/del pd-pvt/pvt supp/supp cc-by-3.0/man cc-by-4.0/man - cc-by-nd-3.0/man cc-by-nd-4.0/man cc-by-nc-3.0/man cc-by-nc-4.0/man - cc-by-sa-3.0/man cc-by-sa-4.0/man cc-by-nc-nd-3.0/man cc-by-nc-nd-4.0/man - cc-by-nc-sa-3.0/man cc-by-nc-sa-4.0/man cc-zero/man)], + $PRIORITY_BIB => [qw(ic/bib und/bib pd/bib pdus/bib)], + $PRIORITY_GFV => [qw(pdus/gfv)], + $PRIORITY_COPYRIGHT => [qw(ic/unp pd/ncn pdus/ncn pdus/crms pd/ren pdus/ren + ic/ren und/nfi pd/cdpp ic/cdpp pdus/cdpp ic/add pdus/add pd/add pd/exp + icus/gatt icus/ren op/ipma ic/ipma und/ipma und/ren ic/crms pd/crms + und/crms)], + $PRIORITY_ACCESS => [qw(ic-world/con und-world/con nobody/pvt cc-by-3.0/con + cc-by-4.0/con cc-by-nd-3.0/con cc-by-nd-4.0/con orph/ddd orphcand/ddd + cc-by-nc-3.0/con cc-by-nc-4.0/con cc-by-sa-3.0/con cc-by-sa-4.0/con + cc-by-nc-nd-3.0/con cc-by-nc-nd-4.0/con cc-by-nc-sa-3.0/con + cc-by-nc-sa-4.0/con cc-zero/con pd/con)], + $PRIORITY_MAN => [qw(pd/man pdus/man ic-world/man und-world/man ic/man + nobody/man nobody/del pd-pvt/pvt supp/supp cc-by-3.0/man cc-by-4.0/man + cc-by-nd-3.0/man cc-by-nd-4.0/man cc-by-nc-3.0/man cc-by-nc-4.0/man + cc-by-sa-3.0/man cc-by-sa-4.0/man cc-by-nc-nd-3.0/man cc-by-nc-nd-4.0/man + cc-by-nc-sa-3.0/man cc-by-nc-sa-4.0/man cc-zero/man)], }; # prepared statements @@ -49,11 +49,11 @@ my $reason_sth = undef; my $source_sth = undef; my $queue_update_sth = undef; +my $no_wait = undef; # Command line args my $data = 0; my $note = undef; -my $new_source_cmdline = 'null'; my $force_override = 0; my $mock_tracker = 0; @@ -64,76 +64,86 @@ my $archive = $config->{rights}->{archive}; my $rights_dir = $config->{rights}->{rights_dir}; -# allow overriding some of the vars set by yaml config -# with commandline options. -GetOptions( +my $thisprog = 'populate_rights_data'; + +# global state +my $tracker = undef; + +my $user = `whoami`; +chomp($user); + +# Structure to keep track of results - which records were created. +my %results; + +sub main { + + # allow overriding some of the vars set by yaml config + # with commandline options. + GetOptions( 'data=s' => \$data, 'archive=s' => \$archive, 'rights_dir=s' => \$rights_dir, 'note=s' => \$note, - 'source=s' => \$new_source_cmdline, 'force-override!' => \$force_override, + 'no-wait!' => \$no_wait, 'mock-tracker!' => \$mock_tracker, -) or pod2usage(); + ) or pod2usage(); -# pass in --mock-tracker to disable ProgressTracker (for dev/test purposes). -my $tracker = $mock_tracker ? MockTracker->new() : ProgressTracker->new(report_interval => '10000'); + # pass in --mock-tracker to disable ProgressTracker (for dev/test purposes). + $tracker = $mock_tracker ? MockTracker->new() : ProgressTracker->new(report_interval => '10000'); -my $thisprog = 'populate_rights_data'; -print "$thisprog -INFO- START: " . CORE::localtime() . "\n"; + print "$thisprog -INFO- START: " . CORE::localtime() . "\n"; -if ($force_override and not defined $note) { + if ($force_override and not defined $note) { print "You must provide a note (with --note) with the --force-override option.\n"; exit 1; -} + } + + # TODO - consider getting rid of this since we don't run this in interactive mode any more. + # Need to document current process for manual rights loading. + if ($force_override and !$no_wait) { + print <<~EOT ; -if ($force_override) { - print <finish(); @@ -143,371 +153,368 @@ print "Results:\n"; # TODO: Export these result metrics -- right now we only export a metric for lines processed if(defined $results{'inserted'}) { - print " Rows inserted: " . @{$results{'inserted'}} . "\n"; + print " Rows inserted: " . @{$results{'inserted'}} . "\n"; } if ($force_override) { - export_barcodes($results{'inserted'}) + export_barcodes($results{'inserted'}) } if (defined $results{'skipped'}) { - print " Items skipped (manually set): " . @{$results{'skipped'}} . "\n"; - foreach (@{$results{'skipped'}}) { - print "\t$_\n"; - } + print " Items skipped (manually set): " . @{$results{'skipped'}} . "\n"; + foreach (@{$results{'skipped'}}) { + print "\t$_\n"; + } } if (defined $results{'already_in_db'}) { - print " Items skipped (already in database with same attribute and reason): " . @{$results{'already_in_db'}} . "\n"; - foreach (@{$results{'already_in_db'}}) { - print "\t$_\n"; - } + print " Items skipped (already in database with same attribute and reason): " . @{$results{'already_in_db'}} . "\n"; + foreach (@{$results{'already_in_db'}}) { + print "\t$_\n"; + } } print "$thisprog -INFO- Done\n"; -} else { + } else { print "$thisprog -INFO- No rights files to process.\n"; + } + $tracker->finalize; } -$tracker->finalize; sub prepare_statements { - # Prepare SQL statements - my $replace_sql = "REPLACE INTO rights_current (namespace, id, attr, reason, source, access_profile, user, note) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"; - $insert_sth = $dbh->prepare($replace_sql) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); + $dbh ||= Database::get_rights_rw_dbh(); + # Prepare SQL statements + my $replace_sql = "REPLACE INTO rights_current (namespace, id, attr, reason, source, access_profile, user, note) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"; + $insert_sth = $dbh->prepare($replace_sql) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); - my $reason_sql = "SELECT name FROM reasons WHERE id = (SELECT reason FROM rights_current WHERE namespace = ? AND id = ?)"; - $reason_sth = $dbh->prepare($reason_sql) || die ("$thisprog -ERR- Database error: " . $dbh->errstr()); + my $reason_sql = "SELECT name FROM reasons WHERE id = (SELECT reason FROM rights_current WHERE namespace = ? AND id = ?)"; + $reason_sth = $dbh->prepare($reason_sql) || die ("$thisprog -ERR- Database error: " . $dbh->errstr()); - my $attr_sql = "SELECT name FROM attributes WHERE id = (SELECT attr FROM rights_current WHERE namespace = ? AND id = ?)"; - $attr_sth = $dbh->prepare($attr_sql) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); + my $attr_sql = "SELECT name FROM attributes WHERE id = (SELECT attr FROM rights_current WHERE namespace = ? AND id = ?)"; + $attr_sth = $dbh->prepare($attr_sql) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); - my $source_sql = "SELECT name FROM sources WHERE id = (SELECT source FROM rights_current WHERE namespace = ? AND id = ?)"; - $source_sth = $dbh->prepare($source_sql) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); + my $source_sql = "SELECT name FROM sources WHERE id = (SELECT source FROM rights_current WHERE namespace = ? AND id = ?)"; + $source_sth = $dbh->prepare($source_sql) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); - # To consider moving this to an event-based thing at some point, but for - # now we reach our tendrils into somebody else's database table... - my $queue_update_sql = "UPDATE ht.feed_queue SET status = 'done' WHERE - namespace = ? and id = ? AND status = 'rights'"; - $queue_update_sth = $dbh->prepare($queue_update_sql) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); + # To consider moving this to an event-based thing at some point, but for + # now we reach our tendrils into somebody else's database table... + my $queue_update_sql = "UPDATE ht.feed_queue SET status = 'done' WHERE + namespace = ? and id = ? AND status = 'rights'"; + $queue_update_sth = $dbh->prepare($queue_update_sql) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); } sub process_file { - my $file = shift; - - # Open input file, Loop through lines of input file - open(IN, '<:encoding(UTF-8)', $file) or die("$thisprog -ERR- Could not open $file for reading: $!"); - while (my $line = ) { - process_rights_line($line); - $tracker->inc(); + my $file = shift; + + # Open input file, Loop through lines of input file + open(IN, '<:encoding(UTF-8)', $file) or die("$thisprog -ERR- Could not open $file for reading: $!"); + while (my $line = ) { + process_rights_line($line); + $tracker->inc(); + } + close(IN); + + if ($archive) { + # After populating rights database from *.rights file(s), + # move file(s) to archive directory. + + my $cmd = "cp $file $archive"; + if (!$data) { + $cmd = "mv $file $archive"; } - close(IN); - - if ($archive) { - # After populating rights database from *.rights file(s), - # move file(s) to archive directory. - - my $cmd = "cp $file $archive"; - if (!$data) { - $cmd = "mv $file $archive"; - } - if (my $res = `$cmd`) { - die("Error moving/copying rights file to archive: $res"); - } + if (my $res = `$cmd`) { + die("Error moving/copying rights file to archive: $res"); } + } } sub process_rights_line { - my $line = shift; - - chomp($line); - - # get rid of trailing tab - $line =~ s/\t$//; - - # Format of line had better be: - # - # namespace.barcode attribute reason username source note - # - # ... where any of those fields can contain the string - # /null/i to default to default values. - # Also, the line could contain less than those fields, - # like just 'barcode attribute reason username', - # in which case 'source' will be the default value. - - my ($namespace_and_barcode, $new_attr, $new_reason, $uniqname, $new_source, $new_note) = split("\t", $line); - - if (defined $namespace_and_barcode && $namespace_and_barcode !~ /\bnull\b/i) { - $namespace_and_barcode =~ s/\"//g; + my $line = shift; + + chomp($line); + + # get rid of trailing tab + $line =~ s/\t$//; + + # Format of line had better be: + # + # namespace.barcode attribute reason username source note + # + # ... where any of those fields can contain the string + # /null/i to default to default values. + # Also, the line could contain less than those fields, + # like just 'barcode attribute reason username', + # in which case 'source' will be the default value. + + my ($namespace_and_barcode, $new_attr, $new_reason, $uniqname, $new_source, $new_note) = split("\t", $line); + + if (defined $namespace_and_barcode && $namespace_and_barcode !~ /\bnull\b/i) { + $namespace_and_barcode =~ s/\"//g; + } else { + die("namespace and barcode missing from input: $namespace_and_barcode"); + } + + # The ? is needed right where it is in the regex below + # (greedy matching - so the namespace will be everything up to the first period). + $namespace_and_barcode =~ /(.+?)\.(.+)/ || die("Invalid namespace/barcode: $namespace_and_barcode"); + my $namespace = $1; + my $barcode = $2; + + my $attribute; + if (defined $new_attr && $new_attr !~ /\bnull\b/i) { + $new_attr =~ s/\"//g; + + # Make sure attribute is a valid attribute in the db + my $hr = $dbh->selectcol_arrayref("SELECT id FROM attributes WHERE name = '$new_attr'"); + if (! defined $$hr[0]) { + die("Invalid attribute: $new_attr ($barcode)"); } else { - die("namespace and barcode missing from input: $namespace_and_barcode"); + $attribute = $$hr[0]; } - - # The ? is needed right where it is in the regex below - # (greedy matching - so the namespace will be everything up to the first period). - $namespace_and_barcode =~ /(.+?)\.(.+)/ || die("Invalid namespace/barcode: $namespace_and_barcode"); - my $namespace = $1; - my $barcode = $2; - - my $attribute; - if (defined $new_attr && $new_attr !~ /\bnull\b/i) { - $new_attr =~ s/\"//g; - - # Make sure attribute is a valid attribute in the db - my $hr = $dbh->selectcol_arrayref("SELECT id FROM attributes WHERE name = '$new_attr'"); - if (! defined $$hr[0]) { - die("Invalid attribute: $attribute ($barcode)"); - } else { - $attribute = $$hr[0]; - } + } else { + die("attribute missing from input"); + } + + my $reason; + if (defined $new_reason && $new_reason !~ /\bnull\b/i) { + $new_reason =~ s/\"//g; + + # Make sure reason is a valid reason in the db + my $hr = $dbh->selectcol_arrayref("SELECT id FROM reasons WHERE name = '$new_reason'"); + if (! defined $$hr[0]) { + die("Invalid reason: $new_reason ($barcode)"); } else { - die("attribute missing from input"); + $reason = $$hr[0]; } - - my $reason; - if (defined $new_reason && $new_reason !~ /\bnull\b/i) { - $new_reason =~ s/\"//g; - - # Make sure reason is a valid reason in the db - my $hr = $dbh->selectcol_arrayref("SELECT id FROM reasons WHERE name = '$new_reason'"); - if (! defined $$hr[0]) { - die("Invalid reason: $new_reason ($barcode)"); - } else { - $reason = $$hr[0]; - } - } else { - # default: - $new_reason = 'bib'; - my $hr = $dbh->selectcol_arrayref("SELECT id FROM reasons WHERE name = '$new_reason'"); - $reason = $$hr[0]; + } else { + # default: + $new_reason = 'bib'; + my $hr = $dbh->selectcol_arrayref("SELECT id FROM reasons WHERE name = '$new_reason'"); + $reason = $$hr[0]; + } + + if (defined $uniqname && $uniqname !~ /\bnull\b/i) { + $uniqname =~ s/\"//g; + + if ($uniqname =~ /\W/) { + die("Invalid user: $uniqname for $namespace.$barcode"); } - if (defined $uniqname && $uniqname !~ /\bnull\b/i) { - $uniqname =~ s/\"//g; + $user = $uniqname; + } else { + # the default was set above + } - if ($uniqname =~ /\W/) { - die("Invalid user: $uniqname for $namespace.$barcode"); - } + my $source; + my $access_profile; - $user = $uniqname; - } else { - # the default was set above - } - - my $source; - my $access_profile; - if (defined $new_source_cmdline && $new_source_cmdline !~ /\bnull\b/i) { - # source on command line trumps source in input file - $new_source = $new_source_cmdline; - } + if (defined $new_source && $new_source !~ /\bnull\b/i) { + $new_source =~ s/\"//g; - if (defined $new_source && $new_source !~ /\bnull\b/i) { - $new_source =~ s/\"//g; - - # Make sure source is a valid value in the db - my $hr = $dbh->selectrow_arrayref("SELECT id, access_profile FROM sources WHERE name = '$new_source'"); - if (! defined $$hr[0]) { - die("Invalid source: $new_source ($barcode)"); - } else { - $source = $$hr[0]; - $access_profile = $$hr[1]; - } + # Make sure source is a valid value in the db + my $hr = $dbh->selectrow_arrayref("SELECT id, access_profile FROM sources WHERE name = '$new_source'"); + if (! defined $$hr[0]) { + die("Invalid source: $new_source ($barcode)"); } else { - # Default source should be whatever the source value was - # in any previous rights db rows for this ID, or 'google' - my $hr = $dbh->selectrow_arrayref( - "SELECT source, access_profile FROM rights_current WHERE namespace = '$namespace' AND id = '$barcode'" - ); - - if (! defined $$hr[0]) { - $source = 1; # 'google' - $access_profile = 2; - } else { - $source = $$hr[0]; - $access_profile = $$hr[1]; - } + $source = $$hr[0]; + $access_profile = $$hr[1]; } + } else { + # Default source should be whatever the source value was + # in any previous rights db rows for this ID, or 'google' + my $hr = $dbh->selectrow_arrayref( + "SELECT source, access_profile FROM rights_current WHERE namespace = '$namespace' AND id = '$barcode'" + ); - if (defined $new_note && $new_note !~ /\bnull\b/i) { - if (defined $note && $note && $note ne $new_note) { - die("Command-line note conflicts with .rights note ($barcode)"); - } + if (! defined $$hr[0]) { + die("Missing source (not already in rights) ($barcode)"); } else { - $new_note = $note; + $source = $$hr[0]; + $access_profile = $$hr[1]; } + } - my ($old_attr, $old_reason, $old_source) = get_old_rights($namespace, $barcode); - my $do_insert = 0; - - if (defined $old_reason && defined $old_attr) { - # If the new reason, attribute and source are the same as the most - # recent ones, ignore at this point. - if ( ($new_reason eq $old_reason) && ($new_attr eq $old_attr) ) { - # Update if the source is different, but the attribute and reason are the same - # or if a note was provided - if ( - (defined $new_source and $new_source ne 'null' and $new_source ne $old_source) - or (defined $new_note and $new_note) - or ($uniqname eq 'crms' or $uniqname eq 'crmsworld') - ) { - $do_insert = 1; - } else { - set_queue_done($namespace, $barcode); - push @{$results{'already_in_db'}}, "$namespace.$barcode"; - next; - } - } else { - $do_insert = should_update_rights( - $namespace, - $barcode, - $old_attr, - $old_reason, - $old_source, - $new_attr, - $new_reason, - $new_source, - $new_note - ); - } - } else { - # No rights in the db yet for this barcode so just insert whatever we have here + if (defined $new_note && $new_note !~ /\bnull\b/i) { + if (defined $note && $note && $note ne $new_note) { + die("Command-line note conflicts with .rights note ($barcode)"); + } + } else { + $new_note = $note; + } + + my ($old_attr, $old_reason, $old_source) = get_old_rights($namespace, $barcode); + my $do_insert = 0; + + if (defined $old_reason && defined $old_attr) { + # If the new reason, attribute and source are the same as the most + # recent ones, ignore at this point. + if ( ($new_reason eq $old_reason) && ($new_attr eq $old_attr) ) { + # Update if the source is different, but the attribute and reason are the same + # or if a note was provided + if ( + (defined $new_source and $new_source ne 'null' and $new_source ne $old_source) + or (defined $new_note and $new_note) + or ($uniqname eq 'crms' or $uniqname eq 'crmsworld') + ) { $do_insert = 1; + } else { + set_queue_done($namespace, $barcode); + push @{$results{'already_in_db'}}, "$namespace.$barcode"; + return; + } + } else { + $do_insert = should_update_rights( + $namespace, + $barcode, + $old_attr, + $old_reason, + $old_source, + $new_attr, + $new_reason, + $new_source, + $new_note + ); } - - # Insert new row with most recent rights data - if ($do_insert) { - eval { - $insert_sth->execute( - $namespace, - $barcode, - $attribute, - $reason, - $source, - $access_profile, - $user, - $new_note - ) or die("$thisprog -ERR- Database error: " . $dbh->errstr()); - }; - if ($@) { - warn($@); - } else { - push @{$results{'inserted'}}, "$namespace.$barcode"; - set_queue_done($namespace, $barcode); - } + } else { + # No rights in the db yet for this barcode so just insert whatever we have here + $do_insert = 1; + } + + # Insert new row with most recent rights data + if ($do_insert) { + eval { + $insert_sth->execute( + $namespace, + $barcode, + $attribute, + $reason, + $source, + $access_profile, + $user, + $new_note + ) or die("$thisprog -ERR- Database error: " . $dbh->errstr()); + }; + if ($@) { + warn($@); } else { - push @{$results{'skipped'}}, "$namespace.$barcode"; - set_queue_done($namespace, $barcode); - next; + push @{$results{'inserted'}}, "$namespace.$barcode"; + set_queue_done($namespace, $barcode); } + } else { + push @{$results{'skipped'}}, "$namespace.$barcode"; + set_queue_done($namespace, $barcode); + next; + } } sub get_old_rights { - my ($namespace, $barcode) = @_; - - # Determine if a row already exists for this barcode. - # Get reason from most recent rights data: - $reason_sth->execute($namespace, $barcode) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); - my $hr = $reason_sth->fetchrow_hashref(); - my $old_reason = $$hr{'name'} || undef; - $reason_sth->finish(); - - # Get attribute from most recent rights data: - $attr_sth->execute($namespace, $barcode) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); - $hr = $attr_sth->fetchrow_hashref(); - my $old_attr = $$hr{'name'} || undef; - $attr_sth->finish(); - - # Get source from most recent rights data: - $source_sth->execute($namespace, $barcode) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); - $hr = $source_sth->fetchrow_hashref(); - my $old_source = $$hr{'name'} || undef; - $source_sth->finish(); - - return ($old_attr, $old_reason, $old_source); + my ($namespace, $barcode) = @_; + + # Determine if a row already exists for this barcode. + # Get reason from most recent rights data: + $reason_sth->execute($namespace, $barcode) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); + my $hr = $reason_sth->fetchrow_hashref(); + my $old_reason = $$hr{'name'} || undef; + $reason_sth->finish(); + + # Get attribute from most recent rights data: + $attr_sth->execute($namespace, $barcode) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); + $hr = $attr_sth->fetchrow_hashref(); + my $old_attr = $$hr{'name'} || undef; + $attr_sth->finish(); + + # Get source from most recent rights data: + $source_sth->execute($namespace, $barcode) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); + $hr = $source_sth->fetchrow_hashref(); + my $old_source = $$hr{'name'} || undef; + $source_sth->finish(); + + return ($old_attr, $old_reason, $old_source); } sub should_update_rights { - my ( + my ( + $namespace, + $barcode, + $old_attr, + $old_reason, + $old_source, + $new_attr, + $new_reason, + $new_source, + $new_note + ) = @_; + + my $do_insert = 0; + + # $old_reason -> Most recent reason + # $new_reason -> New reason + + # Does the old one or the new one win? + my $old_priority = get_priority($old_attr, $old_reason); + my $new_priority = get_priority($new_attr, $new_reason); + + # Make sure we were able to determine the priority + if (not defined $old_priority) { + warn("Unknown old rights $old_attr/$old_reason for $namespace.$barcode"); + $exit = 1; + $do_insert = 0; + } + if (not defined $new_priority) { + warn("Unknown new rights $new_attr/$new_reason for $namespace.$barcode"); + $exit = 1; + $do_insert = 0; + } + if (defined $old_priority and defined $new_priority) { + if ($force_override) { + $do_insert = 1; + } elsif ( + ($old_reason eq 'gfv' or $new_reason eq 'gfv') and + ($old_reason eq 'bib' or $new_reason eq 'bib') + ) { + # handle pdus/gfv precedence + ($exit, $do_insert) = gfv_overrides( $namespace, $barcode, $old_attr, $old_reason, - $old_source, $new_attr, - $new_reason, - $new_source, - $new_note - ) = @_; - - my $do_insert = 0; - - # $old_reason -> Most recent reason - # $new_reason -> New reason - - # Does the old one or the new one win? - my $old_priority = get_priority($old_attr, $old_reason); - my $new_priority = get_priority($new_attr, $new_reason); - - # Make sure we were able to determine the priority - if (not defined $old_priority) { - warn("Unknown old rights $old_attr/$old_reason for $namespace.$barcode"); - $exit = 1; - $do_insert = 0; - } - if (not defined $new_priority) { - warn("Unknown new rights $new_attr/$new_reason for $namespace.$barcode"); - $exit = 1; - $do_insert = 0; + $new_reason + ); + } elsif ($new_priority >= $old_priority) { + $do_insert = 1; + } else { + # new priority is too low; ignore + $do_insert = 0; } - if (defined $old_priority and defined $new_priority) { - if ($force_override) { - $do_insert = 1; - } elsif ( - ($old_reason eq 'gfv' or $new_reason eq 'gfv') and - ($old_reason eq 'bib' or $new_reason eq 'bib') - ) { - # handle pdus/gfv precedence - ($exit, $do_insert) = gfv_overrides( - $namespace, - $barcode, - $old_attr, - $old_reason, - $new_attr, - $new_reason - ); - } elsif ($new_priority >= $old_priority) { - $do_insert = 1; - } else { - # new priority is too low; ignore - $do_insert = 0; - } - - if ($new_priority == $PRIORITY_MAN && not defined $new_note) { - warn("$new_attr/$new_reason requested for $namespace.$barcode but note not provided"); - $exit = 1; - $do_insert = 0; - } + + if ($new_priority == $PRIORITY_MAN && not defined $new_note) { + warn("$new_attr/$new_reason requested for $namespace.$barcode but note not provided"); + $exit = 1; + $do_insert = 0; } + } - return $do_insert; + return $do_insert; } sub get_priority { - my ($attr, $reason) = @_; + my ($attr, $reason) = @_; - my $code = "$attr/$reason"; - my $toreturn = undef; + my $code = "$attr/$reason"; + my $toreturn = undef; - while (my ($priority, $codes) = each %$priorities) { - if (grep {$_ eq $code} @$codes) { - warn("$attr/$reason has two priorities ($priority and $toreturn)??") if defined $toreturn; - $exit = 1; - $toreturn = $priority - } + while (my ($priority, $codes) = each %$priorities) { + if (grep {$_ eq $code} @$codes) { + warn("$attr/$reason has two priorities ($priority and $toreturn)??") if defined $toreturn; + $exit = 1; + $toreturn = $priority } + } - if (not defined $toreturn) { - warn("Unknown code $attr/$reason"); - $exit = 1; - } + if (not defined $toreturn) { + warn("Unknown code $attr/$reason"); + $exit = 1; + } - return $toreturn; + return $toreturn; } # returns $exit, $do_insert @@ -516,64 +523,65 @@ sub get_priority { # pd/bib and pdus/bib should override pdus/gfv # all bibs should override each other sub gfv_overrides { - my ( - $namespace, - $barcode, - $old_attr, - $old_reason, - $new_attr, - $new_reason - ) = @_; - - # pd/bib and pdus/bib override pdus/gfv - if ($old_attr eq 'pdus' and $old_reason eq 'gfv') { - if ($new_reason eq 'bib' and ($new_attr eq 'pd' or $new_attr eq 'pdus')) { - return (0, 1); # use new rights - pd/bib, pdus/bib overrides pdus/gfv - } elsif ($new_reason eq 'bib' and ($new_attr eq 'ic' or $new_attr eq 'und')) { - return (0, 0); # ignore new rights - } else { - warn("unknown new attr/reason: $old_attr/$old_reason, new: $new_attr/$new_reason for $namespace.$barcode"); - return (1, 0); - } - - } elsif ($new_attr eq 'pdus' and $new_reason = 'gfv') { - if ($old_reason eq 'bib' and ($old_attr eq 'ic' or $old_attr eq 'und')) { - return (0, 1); # use new rights - pdus/gfv overrides ic/bib and und/bib - } elsif ($old_reason eq 'bib' and ($old_attr eq 'pd' or $old_attr eq 'pdus')) { - return (0, 0); # ignore new rights - } else { - warn("unknown old attr/reason: $old_attr/$old_reason, new: $new_attr/$new_reason for $namespace.$barcode"); - return (1, 0); - } + my ( + $namespace, + $barcode, + $old_attr, + $old_reason, + $new_attr, + $new_reason + ) = @_; + + # pd/bib and pdus/bib override pdus/gfv + if ($old_attr eq 'pdus' and $old_reason eq 'gfv') { + if ($new_reason eq 'bib' and ($new_attr eq 'pd' or $new_attr eq 'pdus')) { + return (0, 1); # use new rights - pd/bib, pdus/bib overrides pdus/gfv + } elsif ($new_reason eq 'bib' and ($new_attr eq 'ic' or $new_attr eq 'und')) { + return (0, 0); # ignore new rights + } else { + warn("unknown new attr/reason: $old_attr/$old_reason, new: $new_attr/$new_reason for $namespace.$barcode"); + return (1, 0); + } + } elsif ($new_attr eq 'pdus' and $new_reason = 'gfv') { + if ($old_reason eq 'bib' and ($old_attr eq 'ic' or $old_attr eq 'und')) { + return (0, 1); # use new rights - pdus/gfv overrides ic/bib and und/bib + } elsif ($old_reason eq 'bib' and ($old_attr eq 'pd' or $old_attr eq 'pdus')) { + return (0, 0); # ignore new rights } else { - warn("gfv but not pdus?? old: $old_attr/$old_reason, new: $new_attr/$new_reason for $namespace.$barcode"); - return (1, 0); + warn("unknown old attr/reason: $old_attr/$old_reason, new: $new_attr/$new_reason for $namespace.$barcode"); + return (1, 0); } + + } else { + warn("gfv but not pdus?? old: $old_attr/$old_reason, new: $new_attr/$new_reason for $namespace.$barcode"); + return (1, 0); + } } sub set_queue_done { - my $namespace = shift; - my $barcode = shift; + my $namespace = shift; + my $barcode = shift; - $queue_update_sth->execute($namespace, $barcode); + $queue_update_sth->execute($namespace, $barcode); } sub export_barcodes { - my $htids = shift; - - my $barcode_log = sprintf( - '%s/barcodes_%s_override_feed', - $rights_dir, - UnixDate(ParseDate("now"), '%Y-%m-%d_%H-%M-%S') - ); - open(my $fh, ">>", $barcode_log) or die("can't open $barcode_log: $!"); - - foreach my $htid (@$htids) { - print $fh $htid, "\n"; - } + my $htids = shift; + + my $barcode_log = sprintf( + '%s/barcodes_%s_override_feed', + $rights_dir, + UnixDate(ParseDate("now"), '%Y-%m-%d_%H-%M-%S') + ); + open(my $fh, ">>", $barcode_log) or die("can't open $barcode_log: $!"); + + foreach my $htid (@$htids) { + print $fh $htid, "\n"; + } } +main unless caller; =head1 NAME @@ -643,16 +651,16 @@ package MockTracker; sub new { - my $class = shift; - my $self = {}; - print "MockTracker->new\n"; - bless($self, $class); + my $class = shift; + my $self = {}; + print "MockTracker->new\n"; + bless($self, $class); } sub finalize { - print "MockTracker->finalize\n"; + print "MockTracker->finalize\n"; } sub inc { - print "MockTracker->inc\n"; + print "MockTracker->inc\n"; } diff --git a/t/populate_rights_test.t b/t/populate_rights_test.t new file mode 100644 index 0000000..a7b0e34 --- /dev/null +++ b/t/populate_rights_test.t @@ -0,0 +1,293 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use utf8; + +use Data::Dumper; +use File::Temp qw(tempdir); +use POSIX qw(strftime); +use Test2::Bundle::Extended; +use Test2::Tools::Spec; +use Test2::Tools::Compare; +use Test::Exception; +# use Test::More; + +use lib "$ENV{ROOTDIR}/perl_lib"; +use lib "$ENV{ROOTDIR}/bin"; +use Database; +use grin_gfv; + +require "populate_rights_data.pl"; + +my $dbh = Database::get_rights_rw_dbh; + +# These are used by `write_reversion_from_gfv` and `write_reversion_from_gfv` tests, as well as `load_test_fixtures` +my $rights_current_sql = "INSERT INTO rights_current (namespace, id, attr, reason, source, access_profile, user) VALUES (?, ?, ?, ?, ?, ?, 'defaultuser')"; +my $rights_current_sth = $dbh->prepare($rights_current_sql); + +sub delete_rights_current_prtest { + $dbh->prepare("DELETE FROM rights_current WHERE namespace = 'prtest'")->execute; +} + +# Clean up any previously failed tests + +describe "populate_rights_data.pl" => sub { + + before_all "prepare statement" => sub { prepare_statements(); }; + before_each "clean up" => sub { delete_rights_current_prtest; }; + + describe "should_update_rights" => sub { + + sub should_update { + + my ($old_attr, $old_reason, $new_attr, $new_reason, $note) = @_; + + should_update_rights( + "prtest","testitem", + $old_attr,$old_reason,"open", + $new_attr,$new_reason,"open", + $note); + } + + sub expect_update { + ok(should_update(@_)); + } + + sub expect_no_update { + ok(!should_update(@_)); + } + + # arguments: namespace, id, old + + it "bib overrides bib" => sub { expect_update('pd','bib','ic','bib'); }; + + it "copyright overrides bib" => sub { expect_update('ic','bib','pd','ren'); }; + + it "bib doesn't override copyright" => sub { expect_no_update('pd','ren','ic','bib') }; + + it "access overrides copyright" => sub { expect_update('ic','ren','cc-by-4.0','con') }; + + it "copyright doesn't override access" => sub { expect_no_update('cc-by-4.0','con','ic','ren') }; + + it "man with note overrides man" => sub { expect_update('nobody','man','pd','man','test note') }; + + it "man without note doesn't override bib" => sub { expect_no_update('nobody','man','pd','bib') }; + + it "access doesn't override man" => sub { expect_no_update('nobody','man','cc-by-4.0','con') }; + + describe "gfv overrides" => sub { + it "pdus/gfv overrides ic/bib" => sub { expect_update('ic','bib','pdus','gfv') }; + it "pdus/gfv overrides und/bib" => sub { expect_update('und','bib','pdus','gfv') }; + it "pdus/gfv does not override pd/bib" => sub { expect_no_update('pd','bib','pdus','gfv') }; + }; + + }; + + describe "process_rights_line" => sub { + + it "requires attr" => sub { + dies_ok { process_rights_line("prtest.123456") }; + like $@, qr(attribute missing); + }; + + it "requires valid id" => sub { + dies_ok { process_rights_line("not_an_id\tpd\tbib\ttestuser\tgoogle\n") }; + like $@, qr(Invalid namespace/barcode); + }; + + it "requires valid attr" => sub { + dies_ok { process_rights_line("prtest.123456\tnot_attr\tbib\ttestuser\tgoogle\n") }; + like $@, qr(Invalid attribute); + }; + + it "requires valid reason" => sub { + dies_ok { process_rights_line("prtest.123456\tpd\tnot_reason\ttestuser\tgoogle\n") }; + like $@, qr(Invalid reason); + }; + + it "requires valid source" => sub { + dies_ok { process_rights_line("prtest.123456\tpd\tbib\ttestuser\tnot_source\n") }; + like $@, qr(Invalid source); + }; + + it "requires source if not previously loaded" => sub { + dies_ok { process_rights_line("prtest.123456\tpd\tbib\n") }; + like $@, qr(Missing source); + }; + + it "loads bib rights for something not there" => sub { + process_rights_line("prtest.newitem\tpd\tbib\ttestuser\tgoogle\n"); + + my $rights = $dbh->selectrow_arrayref("SELECT attr, reason FROM rights_current WHERE namespace = 'prtest' and id = 'newitem'"); + + # numerical values for pd/bib + is([1,1],$rights); + }; + + it "doesn't update rights with same attr/reason/source" => sub { + # pd/bib/google/google -- sets user to 'defaultuser' by default + $rights_current_sth->execute("prtest","samevals","1","1","1","2"); + + # add it with a different user, shouldn't reload + process_rights_line("prtest.samevals\tpd\tbib\tnewuser\tgoogle"); + + my $user = $dbh->selectrow_arrayref("SELECT user FROM rights_current WHERE namespace = 'prtest' and id = 'samevals'"); + is(["defaultuser"], $user); + }; + + it "retains source if not given & rights previously loaded" => sub { + # pd/bib/ia/open + $rights_current_sth->execute("prtest","keepsource","1","1","4","1"); + + process_rights_line("prtest.keepsource\tic\tbib"); + my $rights = $dbh->selectrow_arrayref("SELECT attr, reason, source FROM rights_current WHERE namespace = 'prtest' and id = 'keepsource'"); + + # ic/bib/ia + is([2,1,4],$rights); + }; + + it "new source updates access profile (as specified in sources)" => sub { + # pd/bib/google/google + $rights_current_sth->execute("prtest","newsource","1","1","1","2"); + + process_rights_line("prtest.newsource\tpd\tbib\ttestuser\tia"); + my $access_profile = $dbh->selectrow_arrayref("SELECT access_profile FROM rights_current WHERE namespace = 'prtest' and id = 'newsource'"); + + # access profile open + is([1],$access_profile); + }; + + }; + + describe "get_old_rights" => sub { + + it "gets old attribute, reason, source" => sub { + # pd/bib/google/google + $rights_current_sth->execute("prtest","oldrights","1","1","1","2"); + + my ($old_attr, $old_reason, $old_source) = get_old_rights("prtest", "oldrights"); + is("pd",$old_attr); + is("bib",$old_reason); + is("google",$old_source); + }; + + it "returns undef if there are no rights" => sub { + my ($old_attr, $old_reason, $old_source) = get_old_rights("prtest", "nonexistent"); + + is(undef,$old_attr); + is(undef,$old_reason); + is(undef,$old_source); + } + }; + + describe "data loading/integration" => sub { + my $tempdir; + + before_all "set up temp dir" => sub { + $tempdir = tempdir( "/tmp/populate-rights-test-XXXXXX", CLEANUP => 1 ); + mkdir("$tempdir/rights"); + mkdir("$tempdir/archive"); + }; + + it "loads files in rights_dir" => sub { + open(my $rights, ">", "$tempdir/rights/testfile1.rights"); + print $rights "prtest.loadfile\tpd\tbib\ttestuser\tgoogle\n"; + close($rights); + + my $res = qx(perl -w bin/populate_rights_data.pl --rights_dir=$tempdir/rights --archive=$tempdir/archive 2>&1); + # 0 (ok) exit code + ok(!$?); + ok($res =~ /Rows inserted: 1/m); + + my $count = $dbh->selectrow_arrayref("SELECT count(*) FROM rights_current WHERE namespace = 'prtest' and id = 'loadfile'"); + + is([1],$count); + ok(!-e "$tempdir/rights/testfile1.rights"); + ok(-e "$tempdir/archive/testfile1.rights"); + }; + + it "accepts --data for individual file; processes all lines" => sub { + open(my $rights, ">", "$tempdir/testfile2.rights"); + print $rights "prtest.procfile1\tic\tbib\ttestuser\tia\n"; + print $rights "prtest.procfile2\tpd\tbib\ttestuser\tgoogle\n"; + close($rights); + + my $res = qx(perl -w bin/populate_rights_data.pl --data=$tempdir/testfile2.rights --archive=$tempdir/archive 2>&1); + # 0 (ok) exit code + ok(!$?); + ok($res =~ /Rows inserted: 2/m); + + my $count = $dbh->selectrow_arrayref("SELECT count(*) FROM rights_current WHERE namespace = 'prtest' and id in ('procfile1','procfile2')"); + + is([2],$count); + }; + + it "bails out when encountering invalid data" => sub { + open(my $rights, ">", "$tempdir/testfile3.rights"); + print $rights "prtest.goodline1\tic\tbib\ttestuser\tia\n"; + print $rights "badline\n"; + print $rights "prtest.goodline2\tpd\tbib\ttestuser\tgoogle\n"; + close($rights); + + my $res = qx(perl -w bin/populate_rights_data.pl --data=$tempdir/testfile3.rights --archive=$tempdir/archive 2>&1); + # nonzero exit code (error) + ok($?); + ok($res =~ /Invalid namespace\/barcode/); + + # Should have loaded goodline1, but not goodline2 (since it bailed out after badline) + my $count = $dbh->selectrow_arrayref("SELECT count(*) FROM rights_current WHERE namespace = 'prtest' and id = 'goodline1'"); + is([1],$count); + + $count = $dbh->selectrow_arrayref("SELECT count(*) FROM rights_current WHERE namespace = 'prtest' and id = 'goodline2'"); + is([0],$count); + }; + + it "force-override requires note" => sub { + open(my $rights, ">", "$tempdir/testfile4.rights"); + print $rights "prtest.override1\tpd\tbib\ttestuser\tia\n"; + close($rights); + + my $res = qx(perl -w bin/populate_rights_data.pl --force-override --data=$tempdir/testfile4.rights --archive=$tempdir/archive 2>&1); + # nonzero exit code (error) + ok($?); + ok($res =~ /must provide a note/m); + + # Should not have loaded anything + my $count = $dbh->selectrow_arrayref("SELECT count(*) FROM rights_current WHERE namespace = 'prtest' and id = 'override1'"); + is([0],$count); + }; + + + it "force-override allows bib to override man & exports barcodes" => sub { + # preload 'man' rights + # nobody/man/google/google + $rights_current_sth->execute("prtest","override2","8","5","1","2"); + + open(my $rights, ">", "$tempdir/testfile5.rights"); + print $rights "prtest.override2\tpd\tbib\ttestuser\tia\n"; + close($rights); + + my $res = qx(perl -w bin/populate_rights_data.pl --force-override --no-wait --note="override note" --data=$tempdir/testfile5.rights --archive=$tempdir/archive --rights_dir=$tempdir/rights 2>&1); + + # zero exit code (success) + ok(!$?) or print STDERR $res; + ok($res =~ /Rows inserted: 1/m); + + # Should have loaded + my $count = $dbh->selectrow_arrayref("SELECT count(*) FROM rights_current WHERE namespace = 'prtest' and id = 'override2'"); + is([1],$count); + + my @override_feed_barcodes = glob("$tempdir/rights/barcodes_*_override_feed"); + + is(1,scalar @override_feed_barcodes); + + open(my $fh, "<", $override_feed_barcodes[0]); + my $line = <$fh>; + is($line,"prtest.override2\n"); + }; + }; + +}; + +done_testing(); From c22f2f20ab20b2708ec58eef232852f10d735e9c Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Tue, 12 May 2026 16:26:11 -0400 Subject: [PATCH 02/10] Back out $no_wait; remove 10 second delay --- bin/populate_rights_data.pl | 25 ------------------------- t/populate_rights_test.t | 2 +- 2 files changed, 1 insertion(+), 26 deletions(-) diff --git a/bin/populate_rights_data.pl b/bin/populate_rights_data.pl index 6e8cd9a..10f0b89 100755 --- a/bin/populate_rights_data.pl +++ b/bin/populate_rights_data.pl @@ -49,7 +49,6 @@ my $reason_sth = undef; my $source_sth = undef; my $queue_update_sth = undef; -my $no_wait = undef; # Command line args my $data = 0; @@ -85,7 +84,6 @@ sub main { 'rights_dir=s' => \$rights_dir, 'note=s' => \$note, 'force-override!' => \$force_override, - 'no-wait!' => \$no_wait, 'mock-tracker!' => \$mock_tracker, ) or pod2usage(); @@ -99,29 +97,6 @@ sub main { exit 1; } - # TODO - consider getting rid of this since we don't run this in interactive mode any more. - # Need to document current process for manual rights loading. - if ($force_override and !$no_wait) { - print <<~EOT ; - - You have selected the --force-override option. This option will allow you to - override any rights with any other rights (for example, reverting a temporary - manual override to bib-determined rights). This option will export a "barcodes" - - file so that Zephir will see these items as updated. - - You have 10 seconds to abort (press Ctrl-C) before these rights are loaded. - - EOT - - for (my $i = 0; $i < 10; $i++) { - print STDERR '.'; - sleep 1; - } - print STDERR "\n"; - } - - # Build list of rights data files # Rights files had better be tab-delimited CSV files with no header lines or comments diff --git a/t/populate_rights_test.t b/t/populate_rights_test.t index a7b0e34..66a8b6c 100644 --- a/t/populate_rights_test.t +++ b/t/populate_rights_test.t @@ -268,7 +268,7 @@ describe "populate_rights_data.pl" => sub { print $rights "prtest.override2\tpd\tbib\ttestuser\tia\n"; close($rights); - my $res = qx(perl -w bin/populate_rights_data.pl --force-override --no-wait --note="override note" --data=$tempdir/testfile5.rights --archive=$tempdir/archive --rights_dir=$tempdir/rights 2>&1); + my $res = qx(perl -w bin/populate_rights_data.pl --force-override --note="override note" --data=$tempdir/testfile5.rights --archive=$tempdir/archive --rights_dir=$tempdir/rights 2>&1); # zero exit code (success) ok(!$?) or print STDERR $res; From ddc1765e6914272e388bc61233f4274ced120fe4 Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Wed, 13 May 2026 14:35:06 -0400 Subject: [PATCH 03/10] ETT-1346: refactoring tests & populate_rights_data; stub out hvd tests * add constants for numerical values * make process_rights_line tests more readable by using join("\t",...) * stub out tests for harvard access profile (skipped; currently failing) * use selectrow_array when only fetching a single value * use placeholders for values selectrow_arrayref (more to do) * add additional gfv tests --- bin/populate_rights_data.pl | 68 ++++++++----- t/populate_rights_test.t | 198 +++++++++++++++++++++++++----------- 2 files changed, 180 insertions(+), 86 deletions(-) diff --git a/bin/populate_rights_data.pl b/bin/populate_rights_data.pl index 10f0b89..f186da7 100755 --- a/bin/populate_rights_data.pl +++ b/bin/populate_rights_data.pl @@ -277,34 +277,8 @@ sub process_rights_line { # the default was set above } - my $source; - my $access_profile; - - if (defined $new_source && $new_source !~ /\bnull\b/i) { - $new_source =~ s/\"//g; - - # Make sure source is a valid value in the db - my $hr = $dbh->selectrow_arrayref("SELECT id, access_profile FROM sources WHERE name = '$new_source'"); - if (! defined $$hr[0]) { - die("Invalid source: $new_source ($barcode)"); - } else { - $source = $$hr[0]; - $access_profile = $$hr[1]; - } - } else { - # Default source should be whatever the source value was - # in any previous rights db rows for this ID, or 'google' - my $hr = $dbh->selectrow_arrayref( - "SELECT source, access_profile FROM rights_current WHERE namespace = '$namespace' AND id = '$barcode'" - ); - - if (! defined $$hr[0]) { - die("Missing source (not already in rights) ($barcode)"); - } else { - $source = $$hr[0]; - $access_profile = $$hr[1]; - } - } + my $source = get_source($namespace, $barcode, $new_source); + my $access_profile = get_access_profile($namespace, $barcode, $source); if (defined $new_note && $new_note !~ /\bnull\b/i) { if (defined $note && $note && $note ne $new_note) { @@ -379,6 +353,44 @@ sub process_rights_line { } } +sub get_source { + my ($namespace, $barcode, $new_source) = @_; + + if (defined $new_source && $new_source !~ /\bnull\b/i) { + $new_source =~ s/\"//g; + + # Make sure source is a valid value in the db + my $hr = $dbh->selectrow_arrayref("SELECT id FROM sources WHERE name = '$new_source'"); + if (! defined $$hr[0]) { + die("Invalid source: $new_source ($barcode)"); + } else { + return $$hr[0]; + } + } else { + # Default source should be whatever the source value was + # in any previous rights db rows for this ID + my $hr = $dbh->selectrow_arrayref( + "SELECT source FROM rights_current WHERE namespace = '$namespace' AND id = '$barcode'" + ); + + if (! defined $$hr[0]) { + die("Missing source (not already in rights) ($barcode)"); + } else { + return $$hr[0]; + } + } + +} + +sub get_access_profile { + my ($namespace, $barcode, $source) = @_; + + my $hr = $dbh->selectrow_arrayref("SELECT access_profile FROM sources WHERE id = '$source'"); + + return $$hr[0]; + +} + sub get_old_rights { my ($namespace, $barcode) = @_; diff --git a/t/populate_rights_test.t b/t/populate_rights_test.t index 66a8b6c..0860e22 100644 --- a/t/populate_rights_test.t +++ b/t/populate_rights_test.t @@ -20,22 +20,50 @@ use grin_gfv; require "populate_rights_data.pl"; +# numerical constants corresponding to various rights values +use constant { + ATTR_PD => 1, + ATTR_IC => 2, + ATTR_PDUS => 9, + ATTR_NOBODY => 8, + + REASON_BIB => 1, + REASON_MAN => 5, + + SOURCE_GOOGLE => 1, + SOURCE_IA => 4, + + ACCESS_PROFILE_OPEN => 1, + ACCESS_PROFILE_GOOGLE => 2 + +}; + my $dbh = Database::get_rights_rw_dbh; -# These are used by `write_reversion_from_gfv` and `write_reversion_from_gfv` tests, as well as `load_test_fixtures` +# These are used by `write_reversion_from_gfv` and `write_reversion_from_gfv` describe, as well as `load_test_fixtures` my $rights_current_sql = "INSERT INTO rights_current (namespace, id, attr, reason, source, access_profile, user) VALUES (?, ?, ?, ?, ?, ?, 'defaultuser')"; my $rights_current_sth = $dbh->prepare($rights_current_sql); -sub delete_rights_current_prtest { - $dbh->prepare("DELETE FROM rights_current WHERE namespace = 'prtest'")->execute; +my $feed_grin_sth = $dbh->prepare("INSERT INTO feed_grin (namespace, id, scan_date) VALUES (?,?,?)"); + +sub joinline { + return join("\t", @_) . "\n"; } -# Clean up any previously failed tests +sub test_process_rights_line { + process_rights_line(joinline(@_)); +} + +# Clean up any previously failed describe describe "populate_rights_data.pl" => sub { before_all "prepare statement" => sub { prepare_statements(); }; - before_each "clean up" => sub { delete_rights_current_prtest; }; + + before_each "clean up database" => sub { + $dbh->do("DELETE FROM rights_current"); + $dbh->do("DELETE FROM feed_grin"); + }; describe "should_update_rights" => sub { @@ -58,28 +86,23 @@ describe "populate_rights_data.pl" => sub { ok(!should_update(@_)); } - # arguments: namespace, id, old - it "bib overrides bib" => sub { expect_update('pd','bib','ic','bib'); }; - it "copyright overrides bib" => sub { expect_update('ic','bib','pd','ren'); }; - it "bib doesn't override copyright" => sub { expect_no_update('pd','ren','ic','bib') }; - it "access overrides copyright" => sub { expect_update('ic','ren','cc-by-4.0','con') }; - it "copyright doesn't override access" => sub { expect_no_update('cc-by-4.0','con','ic','ren') }; - it "man with note overrides man" => sub { expect_update('nobody','man','pd','man','test note') }; - it "man without note doesn't override bib" => sub { expect_no_update('nobody','man','pd','bib') }; - it "access doesn't override man" => sub { expect_no_update('nobody','man','cc-by-4.0','con') }; describe "gfv overrides" => sub { it "pdus/gfv overrides ic/bib" => sub { expect_update('ic','bib','pdus','gfv') }; it "pdus/gfv overrides und/bib" => sub { expect_update('und','bib','pdus','gfv') }; it "pdus/gfv does not override pd/bib" => sub { expect_no_update('pd','bib','pdus','gfv') }; + it "ic/bib does not override pdus/gfv" => sub { expect_no_update('pdus','gfv','ic','bib') }; + it "und/bib does not override pdus/gfv" => sub { expect_no_update('pdus','gfv','und','bib') }; + it "pd/bib overrides pdus/gfv" => sub { expect_update('pdus','gfv','pd','bib') }; + it "pdus/bib overrides pdus/gfv" => sub { expect_update('pdus','gfv','pdus','bib') }; }; }; @@ -92,70 +115,65 @@ describe "populate_rights_data.pl" => sub { }; it "requires valid id" => sub { - dies_ok { process_rights_line("not_an_id\tpd\tbib\ttestuser\tgoogle\n") }; + dies_ok { test_process_rights_line("not_an_id","pd","bib","testuser","google") }; like $@, qr(Invalid namespace/barcode); }; it "requires valid attr" => sub { - dies_ok { process_rights_line("prtest.123456\tnot_attr\tbib\ttestuser\tgoogle\n") }; + dies_ok { test_process_rights_line("prtest.123456","not_attr","bib","testuser","google") }; like $@, qr(Invalid attribute); }; it "requires valid reason" => sub { - dies_ok { process_rights_line("prtest.123456\tpd\tnot_reason\ttestuser\tgoogle\n") }; + dies_ok { test_process_rights_line("prtest.123456","pd","not_reason","testuser","google") }; like $@, qr(Invalid reason); }; it "requires valid source" => sub { - dies_ok { process_rights_line("prtest.123456\tpd\tbib\ttestuser\tnot_source\n") }; + dies_ok { test_process_rights_line("prtest.123456","pd","bib","testuser","not_source") }; like $@, qr(Invalid source); }; it "requires source if not previously loaded" => sub { - dies_ok { process_rights_line("prtest.123456\tpd\tbib\n") }; + dies_ok { test_process_rights_line("prtest.123456","pd","bib") }; like $@, qr(Missing source); }; it "loads bib rights for something not there" => sub { - process_rights_line("prtest.newitem\tpd\tbib\ttestuser\tgoogle\n"); + test_process_rights_line("prtest.newitem","pd","bib","testuser","google"); my $rights = $dbh->selectrow_arrayref("SELECT attr, reason FROM rights_current WHERE namespace = 'prtest' and id = 'newitem'"); - # numerical values for pd/bib - is([1,1],$rights); + is([ATTR_PD, REASON_BIB],$rights); }; it "doesn't update rights with same attr/reason/source" => sub { - # pd/bib/google/google -- sets user to 'defaultuser' by default - $rights_current_sth->execute("prtest","samevals","1","1","1","2"); + $rights_current_sth->execute("prtest","samevals",ATTR_PD, REASON_BIB, SOURCE_GOOGLE, ACCESS_PROFILE_GOOGLE); # add it with a different user, shouldn't reload - process_rights_line("prtest.samevals\tpd\tbib\tnewuser\tgoogle"); + test_process_rights_line("prtest.samevals","pd","bib","newuser","google"); - my $user = $dbh->selectrow_arrayref("SELECT user FROM rights_current WHERE namespace = 'prtest' and id = 'samevals'"); - is(["defaultuser"], $user); + my ($user) = $dbh->selectrow_array("SELECT user FROM rights_current WHERE namespace = 'prtest' and id = 'samevals'"); + is("defaultuser", $user); }; it "retains source if not given & rights previously loaded" => sub { - # pd/bib/ia/open - $rights_current_sth->execute("prtest","keepsource","1","1","4","1"); + $rights_current_sth->execute("prtest","keepsource",ATTR_PD, REASON_BIB, SOURCE_IA, ACCESS_PROFILE_OPEN); - process_rights_line("prtest.keepsource\tic\tbib"); + test_process_rights_line("prtest.keepsource","ic","bib"); my $rights = $dbh->selectrow_arrayref("SELECT attr, reason, source FROM rights_current WHERE namespace = 'prtest' and id = 'keepsource'"); - # ic/bib/ia - is([2,1,4],$rights); + is([ATTR_IC, REASON_BIB, SOURCE_IA], $rights); }; it "new source updates access profile (as specified in sources)" => sub { # pd/bib/google/google - $rights_current_sth->execute("prtest","newsource","1","1","1","2"); + $rights_current_sth->execute("prtest","newsource",ATTR_PD, REASON_BIB, SOURCE_GOOGLE, ACCESS_PROFILE_GOOGLE); - process_rights_line("prtest.newsource\tpd\tbib\ttestuser\tia"); - my $access_profile = $dbh->selectrow_arrayref("SELECT access_profile FROM rights_current WHERE namespace = 'prtest' and id = 'newsource'"); + test_process_rights_line("prtest.newsource","pd","bib","testuser","ia"); + my ($access_profile) = $dbh->selectrow_array("SELECT access_profile FROM rights_current WHERE namespace = 'prtest' and id = 'newsource'"); - # access profile open - is([1],$access_profile); + is(ACCESS_PROFILE_OPEN, $access_profile); }; }; @@ -164,7 +182,7 @@ describe "populate_rights_data.pl" => sub { it "gets old attribute, reason, source" => sub { # pd/bib/google/google - $rights_current_sth->execute("prtest","oldrights","1","1","1","2"); + $rights_current_sth->execute("prtest","oldrights",ATTR_PD, REASON_BIB, SOURCE_GOOGLE, ACCESS_PROFILE_GOOGLE); my ($old_attr, $old_reason, $old_source) = get_old_rights("prtest", "oldrights"); is("pd",$old_attr); @@ -192,7 +210,7 @@ describe "populate_rights_data.pl" => sub { it "loads files in rights_dir" => sub { open(my $rights, ">", "$tempdir/rights/testfile1.rights"); - print $rights "prtest.loadfile\tpd\tbib\ttestuser\tgoogle\n"; + print $rights joinline("prtest.loadfile","pd","bib","testuser","google"); close($rights); my $res = qx(perl -w bin/populate_rights_data.pl --rights_dir=$tempdir/rights --archive=$tempdir/archive 2>&1); @@ -200,17 +218,17 @@ describe "populate_rights_data.pl" => sub { ok(!$?); ok($res =~ /Rows inserted: 1/m); - my $count = $dbh->selectrow_arrayref("SELECT count(*) FROM rights_current WHERE namespace = 'prtest' and id = 'loadfile'"); + my ($count) = $dbh->selectrow_array("SELECT count(*) FROM rights_current WHERE namespace = 'prtest' and id = 'loadfile'"); - is([1],$count); + is(1,$count); ok(!-e "$tempdir/rights/testfile1.rights"); ok(-e "$tempdir/archive/testfile1.rights"); }; it "accepts --data for individual file; processes all lines" => sub { open(my $rights, ">", "$tempdir/testfile2.rights"); - print $rights "prtest.procfile1\tic\tbib\ttestuser\tia\n"; - print $rights "prtest.procfile2\tpd\tbib\ttestuser\tgoogle\n"; + print $rights joinline("prtest.procfile1","ic","bib","testuser","ia"); + print $rights joinline("prtest.procfile2","pd","bib","testuser","google"); close($rights); my $res = qx(perl -w bin/populate_rights_data.pl --data=$tempdir/testfile2.rights --archive=$tempdir/archive 2>&1); @@ -218,16 +236,16 @@ describe "populate_rights_data.pl" => sub { ok(!$?); ok($res =~ /Rows inserted: 2/m); - my $count = $dbh->selectrow_arrayref("SELECT count(*) FROM rights_current WHERE namespace = 'prtest' and id in ('procfile1','procfile2')"); + my ($count) = $dbh->selectrow_array("SELECT count(*) FROM rights_current WHERE namespace = 'prtest' and id in ('procfile1','procfile2')"); - is([2],$count); + is(2,$count); }; it "bails out when encountering invalid data" => sub { open(my $rights, ">", "$tempdir/testfile3.rights"); - print $rights "prtest.goodline1\tic\tbib\ttestuser\tia\n"; + print $rights joinline("prtest.goodline1","ic","bib","testuser","ia"); print $rights "badline\n"; - print $rights "prtest.goodline2\tpd\tbib\ttestuser\tgoogle\n"; + print $rights joinline("prtest.goodline2","pd","bib","testuser","google"); close($rights); my $res = qx(perl -w bin/populate_rights_data.pl --data=$tempdir/testfile3.rights --archive=$tempdir/archive 2>&1); @@ -236,16 +254,16 @@ describe "populate_rights_data.pl" => sub { ok($res =~ /Invalid namespace\/barcode/); # Should have loaded goodline1, but not goodline2 (since it bailed out after badline) - my $count = $dbh->selectrow_arrayref("SELECT count(*) FROM rights_current WHERE namespace = 'prtest' and id = 'goodline1'"); - is([1],$count); + my ($count) = $dbh->selectrow_array("SELECT count(*) FROM rights_current WHERE namespace = 'prtest' and id = 'goodline1'"); + is(1,$count); - $count = $dbh->selectrow_arrayref("SELECT count(*) FROM rights_current WHERE namespace = 'prtest' and id = 'goodline2'"); - is([0],$count); + ($count) = $dbh->selectrow_array("SELECT count(*) FROM rights_current WHERE namespace = 'prtest' and id = 'goodline2'"); + is(0,$count); }; it "force-override requires note" => sub { open(my $rights, ">", "$tempdir/testfile4.rights"); - print $rights "prtest.override1\tpd\tbib\ttestuser\tia\n"; + print $rights joinline("prtest.override1","pd","bib","testuser","ia"); close($rights); my $res = qx(perl -w bin/populate_rights_data.pl --force-override --data=$tempdir/testfile4.rights --archive=$tempdir/archive 2>&1); @@ -254,18 +272,18 @@ describe "populate_rights_data.pl" => sub { ok($res =~ /must provide a note/m); # Should not have loaded anything - my $count = $dbh->selectrow_arrayref("SELECT count(*) FROM rights_current WHERE namespace = 'prtest' and id = 'override1'"); - is([0],$count); + my ($count) = $dbh->selectrow_array("SELECT count(*) FROM rights_current WHERE namespace = 'prtest' and id = 'override1'"); + is(0,$count); }; it "force-override allows bib to override man & exports barcodes" => sub { # preload 'man' rights # nobody/man/google/google - $rights_current_sth->execute("prtest","override2","8","5","1","2"); + $rights_current_sth->execute("prtest","override2",ATTR_NOBODY, REASON_MAN, SOURCE_GOOGLE, ACCESS_PROFILE_GOOGLE); open(my $rights, ">", "$tempdir/testfile5.rights"); - print $rights "prtest.override2\tpd\tbib\ttestuser\tia\n"; + print $rights joinline("prtest.override2","pd","bib","testuser","ia"); close($rights); my $res = qx(perl -w bin/populate_rights_data.pl --force-override --note="override note" --data=$tempdir/testfile5.rights --archive=$tempdir/archive --rights_dir=$tempdir/rights 2>&1); @@ -275,8 +293,8 @@ describe "populate_rights_data.pl" => sub { ok($res =~ /Rows inserted: 1/m); # Should have loaded - my $count = $dbh->selectrow_arrayref("SELECT count(*) FROM rights_current WHERE namespace = 'prtest' and id = 'override2'"); - is([1],$count); + my ($count) = $dbh->selectrow_array("SELECT count(*) FROM rights_current WHERE namespace = 'prtest' and id = 'override2'"); + is(1,$count); my @override_feed_barcodes = glob("$tempdir/rights/barcodes_*_override_feed"); @@ -288,6 +306,70 @@ describe "populate_rights_data.pl" => sub { }; }; + describe "access profile for Google-scanned harvard material" => {skip => "not implemented"}, sub { + + sub expect_access_profile { + my $id = shift; + my $expected_access_profile = shift; + + my ($actual_access_profile) = $dbh->selectrow_array("SELECT access_profile FROM rights_current WHERE namespace = 'hvd' and id = ?",{},$id); + + is($expected_access_profile,$actual_access_profile); + } + + describe "scan_date before 2025-03-24" => sub { + it "sets profile to open for pd" => sub { + $feed_grin_sth->execute("hvd","testitem1","2010-01-01 00:00:00"); + + test_process_rights_line("hvd.testitem1","pd","bib","testuser","google"); + + expect_access_profile("testitem1",ACCESS_PROFILE_OPEN); + }; + + it "updates profile for item that was ic and is now pd" => sub { + $feed_grin_sth->execute("hvd","testitem2","2010-01-01 00:00:00"); + $rights_current_sth->execute("hvd","testitem2",ATTR_IC, REASON_BIB, SOURCE_GOOGLE, ACCESS_PROFILE_GOOGLE); + test_process_rights_line("hvd.testitem2","pd","bib","testuser","google"); + + expect_access_profile("testitem2", ACCESS_PROFILE_OPEN); + }; + + it "updates profile for pd item" => sub { + # load data: pd/bib/google/google + $feed_grin_sth->execute("hvd","testitem3","2010-01-01 00:00:00"); + $rights_current_sth->execute("hvd","testitem3",ATTR_PD, REASON_BIB, SOURCE_GOOGLE, ACCESS_PROFILE_GOOGLE); + test_process_rights_line("hvd.testitem3","pd","bib","testuser","google"); + + expect_access_profile("testitem3", ACCESS_PROFILE_OPEN); + }; + + it "sets profile to open for pdus" => sub { + $feed_grin_sth->execute("hvd","testitem4","2010-01-01 00:00:00"); + test_process_rights_line("hvd.testitem4","pdus","bib","testuser","google"); + + expect_access_profile("testitem4", ACCESS_PROFILE_OPEN); + }; + + # load data: pd/bib/google/open + it "sets profile to google for ic" => sub { + test_process_rights_line("hvd.testitem5","ic","bib","testuser","google"); + + $feed_grin_sth->execute("hvd","testitem5","2010-01-01 00:00:00"); + expect_access_profile("testitem5", ACCESS_PROFILE_GOOGLE); + }; + }; + + # describe "scan_date after 2025-03-24" => sub { + # # load data: pd/bib/google/open + # it "sets profile to google for pd"; + # it "sets profile to google for pdus"; + # it "sets profile to google for ic"; + # }; + + # it "sets profile to google if item is not in feed_grin"; + + }; + }; done_testing(); From f27f44b294f9b47062a9a03f6fb1f55b24c5749c Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Wed, 13 May 2026 15:03:05 -0400 Subject: [PATCH 04/10] Database refactoring * Avoid explicit error checking -- if there is an error, the DBI functions themselves raise an exception. * Prepare statements for things previously using selectrow_arrayref / selectcol_arrayref in populate_rights_data.pl --- bin/populate_rights_data.pl | 47 +++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/bin/populate_rights_data.pl b/bin/populate_rights_data.pl index f186da7..7a87d6d 100755 --- a/bin/populate_rights_data.pl +++ b/bin/populate_rights_data.pl @@ -49,6 +49,10 @@ my $reason_sth = undef; my $source_sth = undef; my $queue_update_sth = undef; +my $attr_id_sth = undef; +my $reason_id_sth = undef; +my $source_id_sth = undef; +my $current_source_id_sth = undef; # Command line args my $data = 0; @@ -156,22 +160,28 @@ sub prepare_statements { $dbh ||= Database::get_rights_rw_dbh(); # Prepare SQL statements my $replace_sql = "REPLACE INTO rights_current (namespace, id, attr, reason, source, access_profile, user, note) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"; - $insert_sth = $dbh->prepare($replace_sql) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); + $insert_sth = $dbh->prepare($replace_sql); my $reason_sql = "SELECT name FROM reasons WHERE id = (SELECT reason FROM rights_current WHERE namespace = ? AND id = ?)"; - $reason_sth = $dbh->prepare($reason_sql) || die ("$thisprog -ERR- Database error: " . $dbh->errstr()); + $reason_sth = $dbh->prepare($reason_sql); my $attr_sql = "SELECT name FROM attributes WHERE id = (SELECT attr FROM rights_current WHERE namespace = ? AND id = ?)"; - $attr_sth = $dbh->prepare($attr_sql) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); + $attr_sth = $dbh->prepare($attr_sql); my $source_sql = "SELECT name FROM sources WHERE id = (SELECT source FROM rights_current WHERE namespace = ? AND id = ?)"; - $source_sth = $dbh->prepare($source_sql) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); + $source_sth = $dbh->prepare($source_sql); # To consider moving this to an event-based thing at some point, but for # now we reach our tendrils into somebody else's database table... my $queue_update_sql = "UPDATE ht.feed_queue SET status = 'done' WHERE namespace = ? and id = ? AND status = 'rights'"; - $queue_update_sth = $dbh->prepare($queue_update_sql) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); + $queue_update_sth = $dbh->prepare($queue_update_sql); + + $attr_id_sth = $dbh->prepare("SELECT id FROM attributes WHERE name = ?"); + $reason_id_sth = $dbh->prepare("SELECT id FROM reasons WHERE name = ?"); + $source_id_sth = $dbh->prepare("SELECT id FROM sources WHERE name = ?"); + $current_source_id_sth = $dbh->prepare("SELECT source FROM rights_current WHERE namespace = ? AND id = ?"); + } sub process_file { @@ -237,7 +247,8 @@ sub process_rights_line { $new_attr =~ s/\"//g; # Make sure attribute is a valid attribute in the db - my $hr = $dbh->selectcol_arrayref("SELECT id FROM attributes WHERE name = '$new_attr'"); + $attr_id_sth->execute($new_attr); + my $hr = $attr_id_sth->fetchrow_arrayref(); if (! defined $$hr[0]) { die("Invalid attribute: $new_attr ($barcode)"); } else { @@ -252,7 +263,8 @@ sub process_rights_line { $new_reason =~ s/\"//g; # Make sure reason is a valid reason in the db - my $hr = $dbh->selectcol_arrayref("SELECT id FROM reasons WHERE name = '$new_reason'"); + $reason_id_sth->execute($new_reason); + my $hr = $reason_id_sth->fetchrow_arrayref(); if (! defined $$hr[0]) { die("Invalid reason: $new_reason ($barcode)"); } else { @@ -261,7 +273,8 @@ sub process_rights_line { } else { # default: $new_reason = 'bib'; - my $hr = $dbh->selectcol_arrayref("SELECT id FROM reasons WHERE name = '$new_reason'"); + $reason_id_sth->execute($new_reason); + my $hr = $reason_id_sth->fetchrow_arrayref(); $reason = $$hr[0]; } @@ -338,7 +351,7 @@ sub process_rights_line { $access_profile, $user, $new_note - ) or die("$thisprog -ERR- Database error: " . $dbh->errstr()); + ) }; if ($@) { warn($@); @@ -360,7 +373,8 @@ sub get_source { $new_source =~ s/\"//g; # Make sure source is a valid value in the db - my $hr = $dbh->selectrow_arrayref("SELECT id FROM sources WHERE name = '$new_source'"); + $source_id_sth->execute($new_source); + my $hr = $source_id_sth->fetchrow_arrayref; if (! defined $$hr[0]) { die("Invalid source: $new_source ($barcode)"); } else { @@ -369,9 +383,8 @@ sub get_source { } else { # Default source should be whatever the source value was # in any previous rights db rows for this ID - my $hr = $dbh->selectrow_arrayref( - "SELECT source FROM rights_current WHERE namespace = '$namespace' AND id = '$barcode'" - ); + $current_source_id_sth->execute($namespace,$barcode); + my $hr = $current_source_id_sth->fetchrow_arrayref; if (! defined $$hr[0]) { die("Missing source (not already in rights) ($barcode)"); @@ -385,7 +398,7 @@ sub get_source { sub get_access_profile { my ($namespace, $barcode, $source) = @_; - my $hr = $dbh->selectrow_arrayref("SELECT access_profile FROM sources WHERE id = '$source'"); + my $hr = $dbh->selectrow_arrayref("SELECT access_profile FROM sources WHERE id = ?",{},$source); return $$hr[0]; @@ -396,19 +409,19 @@ sub get_old_rights { # Determine if a row already exists for this barcode. # Get reason from most recent rights data: - $reason_sth->execute($namespace, $barcode) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); + $reason_sth->execute($namespace, $barcode); my $hr = $reason_sth->fetchrow_hashref(); my $old_reason = $$hr{'name'} || undef; $reason_sth->finish(); # Get attribute from most recent rights data: - $attr_sth->execute($namespace, $barcode) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); + $attr_sth->execute($namespace, $barcode); $hr = $attr_sth->fetchrow_hashref(); my $old_attr = $$hr{'name'} || undef; $attr_sth->finish(); # Get source from most recent rights data: - $source_sth->execute($namespace, $barcode) || die("$thisprog -ERR- Database error: " . $dbh->errstr()); + $source_sth->execute($namespace, $barcode); $hr = $source_sth->fetchrow_hashref(); my $old_source = $$hr{'name'} || undef; $source_sth->finish(); From ac12efcb7174a3759cc68ad7cd56a3a53a57b593 Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Wed, 13 May 2026 17:48:04 -0400 Subject: [PATCH 05/10] ETT-1346 - access profile for Harvard material Initial pass at updating access profile for Harvard material based on scan date. --- bin/populate_rights_data.pl | 116 ++++++++++++++++++++++++++++-------- t/populate_rights_test.t | 2 +- 2 files changed, 91 insertions(+), 27 deletions(-) diff --git a/bin/populate_rights_data.pl b/bin/populate_rights_data.pl index 7a87d6d..8aae5c2 100755 --- a/bin/populate_rights_data.pl +++ b/bin/populate_rights_data.pl @@ -21,6 +21,11 @@ my $PRIORITY_COPYRIGHT = 3; my $PRIORITY_ACCESS = 4; my $PRIORITY_MAN = 5; + +use constant { + HARVARD_CUTOFF_DATE => '2025-03-24' +}; + my $exit = 0; my $priorities = { @@ -48,11 +53,14 @@ my $attr_sth = undef; my $reason_sth = undef; my $source_sth = undef; +my $access_profile_sth = undef; +my $access_profile_for_source_sth = undef; my $queue_update_sth = undef; my $attr_id_sth = undef; my $reason_id_sth = undef; my $source_id_sth = undef; -my $current_source_id_sth = undef; +my $access_profile_id_sth = undef; +my $scan_date_sth = undef; # Command line args my $data = 0; @@ -78,6 +86,13 @@ # Structure to keep track of results - which records were created. my %results; +### TODO +# +# * (now) preload attributes, reasons, sources, access profiles +# * (now) wrapper for getting a single value from a select +# * (maybe now) clean up some of the nested conditionals +# * (later) object-orientify -- rights file, rights line + sub main { # allow overriding some of the vars set by yaml config @@ -171,6 +186,10 @@ sub prepare_statements { my $source_sql = "SELECT name FROM sources WHERE id = (SELECT source FROM rights_current WHERE namespace = ? AND id = ?)"; $source_sth = $dbh->prepare($source_sql); + $access_profile_sth = $dbh->prepare("SELECT name FROM access_profiles WHERE id = (SELECT access_profile FROM rights_current WHERE namespace = ? AND id = ?)"); + + $access_profile_for_source_sth = $dbh->prepare("SELECT ap.name FROM sources s, access_profiles ap WHERE s.access_profile = ap.id AND s.name = ?"); + # To consider moving this to an event-based thing at some point, but for # now we reach our tendrils into somebody else's database table... my $queue_update_sql = "UPDATE ht.feed_queue SET status = 'done' WHERE @@ -180,7 +199,8 @@ sub prepare_statements { $attr_id_sth = $dbh->prepare("SELECT id FROM attributes WHERE name = ?"); $reason_id_sth = $dbh->prepare("SELECT id FROM reasons WHERE name = ?"); $source_id_sth = $dbh->prepare("SELECT id FROM sources WHERE name = ?"); - $current_source_id_sth = $dbh->prepare("SELECT source FROM rights_current WHERE namespace = ? AND id = ?"); + $access_profile_id_sth = $dbh->prepare("SELECT id FROM access_profiles WHERE name = ?"); + $scan_date_sth = $dbh->prepare("SELECT DATE(scan_date) FROM feed_grin WHERE namespace = ? and id = ?"); } @@ -290,8 +310,6 @@ sub process_rights_line { # the default was set above } - my $source = get_source($namespace, $barcode, $new_source); - my $access_profile = get_access_profile($namespace, $barcode, $source); if (defined $new_note && $new_note !~ /\bnull\b/i) { if (defined $note && $note && $note ne $new_note) { @@ -301,7 +319,33 @@ sub process_rights_line { $new_note = $note; } - my ($old_attr, $old_reason, $old_source) = get_old_rights($namespace, $barcode); + my ($old_attr, $old_reason, $old_source, $old_access_profile) = get_old_rights($namespace, $barcode); + + my $final_new_source = final_new_source($namespace, $barcode, $old_source, $new_source); + my $new_access_profile = get_access_profile($namespace, $barcode, $final_new_source, $new_attr); + + my $source; + # Make sure source is a valid value in the db + $source_id_sth->execute($final_new_source); + my $hr = $source_id_sth->fetchrow_arrayref; + $source_id_sth->finish; + if (! defined $$hr[0]) { + die("Invalid source: $final_new_source ($namespace.$barcode)"); + } else { + $source = $$hr[0]; + } + + my $access_profile; + # Make sure access_profile is a valid value in the db + $access_profile_id_sth->execute($new_access_profile); + $hr = $access_profile_id_sth->fetchrow_arrayref; + $access_profile_id_sth->finish; + if (! defined $$hr[0]) { + die("Invalid access profile: $new_access_profile ($namespace.$barcode)"); + } else { + $access_profile = $$hr[0]; + } + my $do_insert = 0; if (defined $old_reason && defined $old_attr) { @@ -312,6 +356,7 @@ sub process_rights_line { # or if a note was provided if ( (defined $new_source and $new_source ne 'null' and $new_source ne $old_source) + or ($new_access_profile ne $old_access_profile) or (defined $new_note and $new_note) or ($uniqname eq 'crms' or $uniqname eq 'crmsworld') ) { @@ -366,39 +411,51 @@ sub process_rights_line { } } -sub get_source { - my ($namespace, $barcode, $new_source) = @_; +# Use the new source if given; otherwise the old source +sub final_new_source { + my ($namespace, $barcode, $old_source, $new_source) = @_; if (defined $new_source && $new_source !~ /\bnull\b/i) { $new_source =~ s/\"//g; - # Make sure source is a valid value in the db - $source_id_sth->execute($new_source); - my $hr = $source_id_sth->fetchrow_arrayref; - if (! defined $$hr[0]) { - die("Invalid source: $new_source ($barcode)"); - } else { - return $$hr[0]; - } + return $new_source; } else { - # Default source should be whatever the source value was - # in any previous rights db rows for this ID - $current_source_id_sth->execute($namespace,$barcode); - my $hr = $current_source_id_sth->fetchrow_arrayref; - - if (! defined $$hr[0]) { - die("Missing source (not already in rights) ($barcode)"); + if (! defined $old_source) { + die("Missing source (not already in rights) ($namespace.$barcode)"); } else { - return $$hr[0]; + return $old_source; } } +} + +# Use access profile 'open' for pd and pdus Harvard material scanned by Google +# prior to 2025-03-24; access profile 'google' otherwise. +sub harvard_access_profile { + my ($namespace, $barcode, $new_attr) = @_; + + return 'google' unless ($new_attr eq 'pd' || $new_attr eq 'pdus'); + + $scan_date_sth->execute($namespace, $barcode); + my ($scan_date) = $scan_date_sth->fetchrow_array; + $scan_date_sth->finish; + + # FIXME should use date comparison, not string comparison + if ($scan_date le HARVARD_CUTOFF_DATE) { + return 'open'; + } else { + return 'google'; + } } sub get_access_profile { - my ($namespace, $barcode, $source) = @_; + my ($namespace, $barcode, $source, $new_attr) = @_; + + return harvard_access_profile($namespace, $barcode, $new_attr) if $namespace eq 'hvd' and $source eq 'google'; - my $hr = $dbh->selectrow_arrayref("SELECT access_profile FROM sources WHERE id = ?",{},$source); + $access_profile_for_source_sth->execute($source); + my $hr = $access_profile_for_source_sth->fetchrow_arrayref(); + $access_profile_for_source_sth->finish; return $$hr[0]; @@ -407,6 +464,8 @@ sub get_access_profile { sub get_old_rights { my ($namespace, $barcode) = @_; + # TODO get one row and use mapping tables + # Determine if a row already exists for this barcode. # Get reason from most recent rights data: $reason_sth->execute($namespace, $barcode); @@ -426,7 +485,12 @@ sub get_old_rights { my $old_source = $$hr{'name'} || undef; $source_sth->finish(); - return ($old_attr, $old_reason, $old_source); + $access_profile_sth->execute($namespace, $barcode); + $hr = $access_profile_sth->fetchrow_hashref(); + my $old_access_profile = $$hr{'name'} || undef; + $access_profile_sth->finish(); + + return ($old_attr, $old_reason, $old_source, $old_access_profile); } sub should_update_rights { diff --git a/t/populate_rights_test.t b/t/populate_rights_test.t index 0860e22..ad12f4d 100644 --- a/t/populate_rights_test.t +++ b/t/populate_rights_test.t @@ -306,7 +306,7 @@ describe "populate_rights_data.pl" => sub { }; }; - describe "access profile for Google-scanned harvard material" => {skip => "not implemented"}, sub { + describe "access profile for Google-scanned harvard material" => sub { sub expect_access_profile { my $id = shift; From a1c83eac87f2198ec632b74dc5f8eb8c85098ebe Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Wed, 13 May 2026 17:54:54 -0400 Subject: [PATCH 06/10] Remove finish calls https://metacpan.org/pod/DBI#finish says: "When all the data has been fetched from a SELECT statement, the driver will automatically call finish for you. So you should not call it explicitly except when you know that you've not fetched all the data from a statement handle and the handle won't be destroyed soon." Generally we're fetching one row from things that should return one or zero rows, so there isn't a need to use it. --- bin/populate_rights_data.pl | 9 --------- 1 file changed, 9 deletions(-) diff --git a/bin/populate_rights_data.pl b/bin/populate_rights_data.pl index 8aae5c2..fe61e5d 100755 --- a/bin/populate_rights_data.pl +++ b/bin/populate_rights_data.pl @@ -140,7 +140,6 @@ sub main { process_file($file); } - $insert_sth->finish(); $dbh->disconnect(); # Print results @@ -328,7 +327,6 @@ sub process_rights_line { # Make sure source is a valid value in the db $source_id_sth->execute($final_new_source); my $hr = $source_id_sth->fetchrow_arrayref; - $source_id_sth->finish; if (! defined $$hr[0]) { die("Invalid source: $final_new_source ($namespace.$barcode)"); } else { @@ -339,7 +337,6 @@ sub process_rights_line { # Make sure access_profile is a valid value in the db $access_profile_id_sth->execute($new_access_profile); $hr = $access_profile_id_sth->fetchrow_arrayref; - $access_profile_id_sth->finish; if (! defined $$hr[0]) { die("Invalid access profile: $new_access_profile ($namespace.$barcode)"); } else { @@ -438,7 +435,6 @@ sub harvard_access_profile { $scan_date_sth->execute($namespace, $barcode); my ($scan_date) = $scan_date_sth->fetchrow_array; - $scan_date_sth->finish; # FIXME should use date comparison, not string comparison if ($scan_date le HARVARD_CUTOFF_DATE) { @@ -455,7 +451,6 @@ sub get_access_profile { $access_profile_for_source_sth->execute($source); my $hr = $access_profile_for_source_sth->fetchrow_arrayref(); - $access_profile_for_source_sth->finish; return $$hr[0]; @@ -471,24 +466,20 @@ sub get_old_rights { $reason_sth->execute($namespace, $barcode); my $hr = $reason_sth->fetchrow_hashref(); my $old_reason = $$hr{'name'} || undef; - $reason_sth->finish(); # Get attribute from most recent rights data: $attr_sth->execute($namespace, $barcode); $hr = $attr_sth->fetchrow_hashref(); my $old_attr = $$hr{'name'} || undef; - $attr_sth->finish(); # Get source from most recent rights data: $source_sth->execute($namespace, $barcode); $hr = $source_sth->fetchrow_hashref(); my $old_source = $$hr{'name'} || undef; - $source_sth->finish(); $access_profile_sth->execute($namespace, $barcode); $hr = $access_profile_sth->fetchrow_hashref(); my $old_access_profile = $$hr{'name'} || undef; - $access_profile_sth->finish(); return ($old_attr, $old_reason, $old_source, $old_access_profile); } From c72ec193d79d0253652e98102fa5cfc924f0a8cf Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Wed, 27 May 2026 10:55:33 -0400 Subject: [PATCH 07/10] Preload data for static tables * reduces number of database calls * simplifies nested conditionals --- bin/populate_rights_data.pl | 151 +++++++++++++----------------------- t/populate_rights_test.t | 5 +- 2 files changed, 60 insertions(+), 96 deletions(-) diff --git a/bin/populate_rights_data.pl b/bin/populate_rights_data.pl index fe61e5d..c801491 100755 --- a/bin/populate_rights_data.pl +++ b/bin/populate_rights_data.pl @@ -49,19 +49,22 @@ # prepared statements my $dbh = undef; +my $rights_current_sth = undef; my $insert_sth = undef; -my $attr_sth = undef; -my $reason_sth = undef; -my $source_sth = undef; -my $access_profile_sth = undef; my $access_profile_for_source_sth = undef; my $queue_update_sth = undef; -my $attr_id_sth = undef; -my $reason_id_sth = undef; -my $source_id_sth = undef; -my $access_profile_id_sth = undef; my $scan_date_sth = undef; +my $attribute_names = {}; +my $reason_names = {}; +my $source_names = {}; +my $access_profile_names = {}; + +my $attribute_ids = {}; +my $reason_ids = {}; +my $source_ids = {}; +my $access_profile_ids = {}; + # Command line args my $data = 0; my $note = undef; @@ -88,8 +91,6 @@ ### TODO # -# * (now) preload attributes, reasons, sources, access profiles -# * (now) wrapper for getting a single value from a select # * (maybe now) clean up some of the nested conditionals # * (later) object-orientify -- rights file, rights line @@ -135,6 +136,7 @@ sub main { if (@rights_files) { $dbh = Database::get_rights_rw_dbh(); prepare_statements(); + load_tables(); foreach my $file (@rights_files) { process_file($file); @@ -170,22 +172,34 @@ sub main { $tracker->finalize; } +sub load_table { + $dbh ||= Database::get_rights_rw_dbh(); + + my ($table, $names, $ids) = @_; + + foreach my $row ( @{ $dbh->selectall_arrayref("select id, name from $table") } ) { + my ($id, $name) = @$row; + $names->{$id} = $name; + $ids->{$name} = $id; + } +} + +sub load_tables { + $dbh ||= Database::get_rights_rw_dbh(); + + load_table('attributes',$attribute_names,$attribute_ids); + load_table('reasons',$reason_names,$reason_ids); + load_table('sources',$source_names,$source_ids); + load_table('access_profiles',$access_profile_names,$access_profile_ids); +}; + sub prepare_statements { $dbh ||= Database::get_rights_rw_dbh(); # Prepare SQL statements my $replace_sql = "REPLACE INTO rights_current (namespace, id, attr, reason, source, access_profile, user, note) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"; $insert_sth = $dbh->prepare($replace_sql); - my $reason_sql = "SELECT name FROM reasons WHERE id = (SELECT reason FROM rights_current WHERE namespace = ? AND id = ?)"; - $reason_sth = $dbh->prepare($reason_sql); - - my $attr_sql = "SELECT name FROM attributes WHERE id = (SELECT attr FROM rights_current WHERE namespace = ? AND id = ?)"; - $attr_sth = $dbh->prepare($attr_sql); - - my $source_sql = "SELECT name FROM sources WHERE id = (SELECT source FROM rights_current WHERE namespace = ? AND id = ?)"; - $source_sth = $dbh->prepare($source_sql); - - $access_profile_sth = $dbh->prepare("SELECT name FROM access_profiles WHERE id = (SELECT access_profile FROM rights_current WHERE namespace = ? AND id = ?)"); + $rights_current_sth = $dbh->prepare("SELECT * FROM rights_current WHERE namespace = ? AND id = ?"); $access_profile_for_source_sth = $dbh->prepare("SELECT ap.name FROM sources s, access_profiles ap WHERE s.access_profile = ap.id AND s.name = ?"); @@ -195,10 +209,6 @@ sub prepare_statements { namespace = ? and id = ? AND status = 'rights'"; $queue_update_sth = $dbh->prepare($queue_update_sql); - $attr_id_sth = $dbh->prepare("SELECT id FROM attributes WHERE name = ?"); - $reason_id_sth = $dbh->prepare("SELECT id FROM reasons WHERE name = ?"); - $source_id_sth = $dbh->prepare("SELECT id FROM sources WHERE name = ?"); - $access_profile_id_sth = $dbh->prepare("SELECT id FROM access_profiles WHERE name = ?"); $scan_date_sth = $dbh->prepare("SELECT DATE(scan_date) FROM feed_grin WHERE namespace = ? and id = ?"); } @@ -261,41 +271,17 @@ sub process_rights_line { my $namespace = $1; my $barcode = $2; - my $attribute; - if (defined $new_attr && $new_attr !~ /\bnull\b/i) { - $new_attr =~ s/\"//g; + # attribute is required + die("attribute missing from input") unless defined $new_attr && $new_attr !~ /\bnull\b/i; + $new_attr =~ s/\"//g; + my $attribute = $attribute_ids->{$new_attr}; + die("Invalid attribute: $new_attr ($barcode)") unless defined $attribute; - # Make sure attribute is a valid attribute in the db - $attr_id_sth->execute($new_attr); - my $hr = $attr_id_sth->fetchrow_arrayref(); - if (! defined $$hr[0]) { - die("Invalid attribute: $new_attr ($barcode)"); - } else { - $attribute = $$hr[0]; - } - } else { - die("attribute missing from input"); - } - - my $reason; - if (defined $new_reason && $new_reason !~ /\bnull\b/i) { - $new_reason =~ s/\"//g; - - # Make sure reason is a valid reason in the db - $reason_id_sth->execute($new_reason); - my $hr = $reason_id_sth->fetchrow_arrayref(); - if (! defined $$hr[0]) { - die("Invalid reason: $new_reason ($barcode)"); - } else { - $reason = $$hr[0]; - } - } else { - # default: - $new_reason = 'bib'; - $reason_id_sth->execute($new_reason); - my $hr = $reason_id_sth->fetchrow_arrayref(); - $reason = $$hr[0]; - } + # reason has a default + $new_reason = 'bib' unless defined $new_reason && $new_reason !~ /\bnull\b/i; + $new_reason =~ s/\"//g; + my $reason = $reason_ids->{$new_reason}; + die("Invalid reason: $new_reason ($barcode)") unless defined $reason; if (defined $uniqname && $uniqname !~ /\bnull\b/i) { $uniqname =~ s/\"//g; @@ -323,25 +309,11 @@ sub process_rights_line { my $final_new_source = final_new_source($namespace, $barcode, $old_source, $new_source); my $new_access_profile = get_access_profile($namespace, $barcode, $final_new_source, $new_attr); - my $source; - # Make sure source is a valid value in the db - $source_id_sth->execute($final_new_source); - my $hr = $source_id_sth->fetchrow_arrayref; - if (! defined $$hr[0]) { - die("Invalid source: $final_new_source ($namespace.$barcode)"); - } else { - $source = $$hr[0]; - } + my $source = $source_ids->{$final_new_source}; + die("Invalid source: $final_new_source ($namespace.$barcode)") if not defined $source; - my $access_profile; - # Make sure access_profile is a valid value in the db - $access_profile_id_sth->execute($new_access_profile); - $hr = $access_profile_id_sth->fetchrow_arrayref; - if (! defined $$hr[0]) { - die("Invalid access profile: $new_access_profile ($namespace.$barcode)"); - } else { - $access_profile = $$hr[0]; - } + my $access_profile = $access_profile_ids->{$new_access_profile}; + die("Invalid access profile: $new_access_profile ($namespace.$barcode)") if not defined $access_profile; my $do_insert = 0; @@ -460,26 +432,15 @@ sub get_old_rights { my ($namespace, $barcode) = @_; # TODO get one row and use mapping tables + $rights_current_sth->execute($namespace, $barcode); + my $rights = $rights_current_sth->fetchrow_hashref; + + return undef unless $rights; - # Determine if a row already exists for this barcode. - # Get reason from most recent rights data: - $reason_sth->execute($namespace, $barcode); - my $hr = $reason_sth->fetchrow_hashref(); - my $old_reason = $$hr{'name'} || undef; - - # Get attribute from most recent rights data: - $attr_sth->execute($namespace, $barcode); - $hr = $attr_sth->fetchrow_hashref(); - my $old_attr = $$hr{'name'} || undef; - - # Get source from most recent rights data: - $source_sth->execute($namespace, $barcode); - $hr = $source_sth->fetchrow_hashref(); - my $old_source = $$hr{'name'} || undef; - - $access_profile_sth->execute($namespace, $barcode); - $hr = $access_profile_sth->fetchrow_hashref(); - my $old_access_profile = $$hr{'name'} || undef; + my $old_reason = $reason_names->{$rights->{reason}}; + my $old_attr = $attribute_names->{$rights->{attr}}; + my $old_source = $source_names->{$rights->{source}}; + my $old_access_profile = $access_profile_names->{$rights->{access_profile}}; return ($old_attr, $old_reason, $old_source, $old_access_profile); } diff --git a/t/populate_rights_test.t b/t/populate_rights_test.t index ad12f4d..03d1a37 100644 --- a/t/populate_rights_test.t +++ b/t/populate_rights_test.t @@ -58,7 +58,10 @@ sub test_process_rights_line { describe "populate_rights_data.pl" => sub { - before_all "prepare statement" => sub { prepare_statements(); }; + before_all "prepare statement" => sub { + prepare_statements(); + load_tables(); + }; before_each "clean up database" => sub { $dbh->do("DELETE FROM rights_current"); From 15aa66520e50d8229d16465fe63bbe12db84b304 Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Wed, 27 May 2026 11:33:06 -0400 Subject: [PATCH 08/10] Use Date_Cmp for comparing scan date & harvard date --- bin/populate_rights_data.pl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/bin/populate_rights_data.pl b/bin/populate_rights_data.pl index c801491..e7ab04b 100755 --- a/bin/populate_rights_data.pl +++ b/bin/populate_rights_data.pl @@ -90,9 +90,8 @@ my %results; ### TODO -# -# * (maybe now) clean up some of the nested conditionals -# * (later) object-orientify -- rights file, rights line +# * clean up the nested conditionals +# * object-orientify -- rights file, rights line sub main { @@ -398,7 +397,7 @@ sub final_new_source { } # Use access profile 'open' for pd and pdus Harvard material scanned by Google -# prior to 2025-03-24; access profile 'google' otherwise. +# on or before 2025-03-24; access profile 'google' otherwise. sub harvard_access_profile { my ($namespace, $barcode, $new_attr) = @_; @@ -408,8 +407,9 @@ sub harvard_access_profile { $scan_date_sth->execute($namespace, $barcode); my ($scan_date) = $scan_date_sth->fetchrow_array; - # FIXME should use date comparison, not string comparison - if ($scan_date le HARVARD_CUTOFF_DATE) { + # Date_Cmp works like the <=> operator; we want to ensure $scan_date is less + # than or equal to the cutoff date. + if (Date_Cmp($scan_date,HARVARD_CUTOFF_DATE) < 1) { return 'open'; } else { return 'google'; @@ -431,7 +431,6 @@ sub get_access_profile { sub get_old_rights { my ($namespace, $barcode) = @_; - # TODO get one row and use mapping tables $rights_current_sth->execute($namespace, $barcode); my $rights = $rights_current_sth->fetchrow_hashref; From cbbd8fedf536fe93646d5ffff462b3c0457a613a Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Wed, 27 May 2026 11:36:21 -0400 Subject: [PATCH 09/10] Remove crmsworld --- bin/populate_rights_data.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/populate_rights_data.pl b/bin/populate_rights_data.pl index e7ab04b..8fdd417 100755 --- a/bin/populate_rights_data.pl +++ b/bin/populate_rights_data.pl @@ -321,12 +321,12 @@ sub process_rights_line { # recent ones, ignore at this point. if ( ($new_reason eq $old_reason) && ($new_attr eq $old_attr) ) { # Update if the source is different, but the attribute and reason are the same - # or if a note was provided + # if a note was provided, or if the rights came from CRMS if ( (defined $new_source and $new_source ne 'null' and $new_source ne $old_source) or ($new_access_profile ne $old_access_profile) or (defined $new_note and $new_note) - or ($uniqname eq 'crms' or $uniqname eq 'crmsworld') + or ($uniqname eq 'crms') ) { $do_insert = 1; } else { From bb2e0869d6be4d5c3b10341896edb06e1e6c8c4b Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Wed, 27 May 2026 12:51:31 -0400 Subject: [PATCH 10/10] Swap order of priorities hash Avoids issue of needing to search arrays and the possibility of values being present for multiple keys. Removes unused 'orph/ddd' and 'orphcand/ddd' combinations. --- bin/populate_rights_data.pl | 102 ++++++++++++++++++++++++++---------- 1 file changed, 75 insertions(+), 27 deletions(-) diff --git a/bin/populate_rights_data.pl b/bin/populate_rights_data.pl index 8fdd417..5eaae0a 100755 --- a/bin/populate_rights_data.pl +++ b/bin/populate_rights_data.pl @@ -28,23 +28,79 @@ my $exit = 0; -my $priorities = { - $PRIORITY_BIB => [qw(ic/bib und/bib pd/bib pdus/bib)], - $PRIORITY_GFV => [qw(pdus/gfv)], - $PRIORITY_COPYRIGHT => [qw(ic/unp pd/ncn pdus/ncn pdus/crms pd/ren pdus/ren - ic/ren und/nfi pd/cdpp ic/cdpp pdus/cdpp ic/add pdus/add pd/add pd/exp - icus/gatt icus/ren op/ipma ic/ipma und/ipma und/ren ic/crms pd/crms - und/crms)], - $PRIORITY_ACCESS => [qw(ic-world/con und-world/con nobody/pvt cc-by-3.0/con - cc-by-4.0/con cc-by-nd-3.0/con cc-by-nd-4.0/con orph/ddd orphcand/ddd - cc-by-nc-3.0/con cc-by-nc-4.0/con cc-by-sa-3.0/con cc-by-sa-4.0/con - cc-by-nc-nd-3.0/con cc-by-nc-nd-4.0/con cc-by-nc-sa-3.0/con - cc-by-nc-sa-4.0/con cc-zero/con pd/con)], - $PRIORITY_MAN => [qw(pd/man pdus/man ic-world/man und-world/man ic/man - nobody/man nobody/del pd-pvt/pvt supp/supp cc-by-3.0/man cc-by-4.0/man - cc-by-nd-3.0/man cc-by-nd-4.0/man cc-by-nc-3.0/man cc-by-nc-4.0/man - cc-by-sa-3.0/man cc-by-sa-4.0/man cc-by-nc-nd-3.0/man cc-by-nc-nd-4.0/man - cc-by-nc-sa-3.0/man cc-by-nc-sa-4.0/man cc-zero/man)], +my $attr_reason_priorities = { + 'ic/bib' => $PRIORITY_BIB, + 'pd/bib' => $PRIORITY_BIB, + 'pdus/bib' => $PRIORITY_BIB, + 'und/bib' => $PRIORITY_BIB, + + 'pdus/gfv' => $PRIORITY_GFV, + + 'ic/add' => $PRIORITY_COPYRIGHT, + 'ic/cdpp' => $PRIORITY_COPYRIGHT, + 'ic/crms' => $PRIORITY_COPYRIGHT, + 'ic/ipma' => $PRIORITY_COPYRIGHT, + 'ic/ren' => $PRIORITY_COPYRIGHT, + 'ic/unp' => $PRIORITY_COPYRIGHT, + 'icus/gatt' => $PRIORITY_COPYRIGHT, + 'icus/ren' => $PRIORITY_COPYRIGHT, + 'op/ipma' => $PRIORITY_COPYRIGHT, + 'pd/add' => $PRIORITY_COPYRIGHT, + 'pd/cdpp' => $PRIORITY_COPYRIGHT, + 'pd/crms' => $PRIORITY_COPYRIGHT, + 'pd/exp' => $PRIORITY_COPYRIGHT, + 'pd/ncn' => $PRIORITY_COPYRIGHT, + 'pd/ren' => $PRIORITY_COPYRIGHT, + 'pdus/add' => $PRIORITY_COPYRIGHT, + 'pdus/cdpp' => $PRIORITY_COPYRIGHT, + 'pdus/crms' => $PRIORITY_COPYRIGHT, + 'pdus/ncn' => $PRIORITY_COPYRIGHT, + 'pdus/ren' => $PRIORITY_COPYRIGHT, + 'und/crms' => $PRIORITY_COPYRIGHT, + 'und/ipma' => $PRIORITY_COPYRIGHT, + 'und/nfi' => $PRIORITY_COPYRIGHT, + 'und/ren' => $PRIORITY_COPYRIGHT, + + 'cc-by-3.0/con' => $PRIORITY_ACCESS, + 'cc-by-nd-3.0/con' => $PRIORITY_ACCESS, + 'cc-by-sa-3.0/con' => $PRIORITY_ACCESS, + 'cc-by-nc-3.0/con' => $PRIORITY_ACCESS, + 'cc-by-nc-nd-3.0/con' => $PRIORITY_ACCESS, + 'cc-by-nc-sa-3.0/con' => $PRIORITY_ACCESS, + 'cc-by-4.0/con' => $PRIORITY_ACCESS, + 'cc-by-nd-4.0/con' => $PRIORITY_ACCESS, + 'cc-by-sa-4.0/con' => $PRIORITY_ACCESS, + 'cc-by-nc-4.0/con' => $PRIORITY_ACCESS, + 'cc-by-nc-nd-4.0/con' => $PRIORITY_ACCESS, + 'cc-by-nc-sa-4.0/con' => $PRIORITY_ACCESS, + 'cc-zero/con' => $PRIORITY_ACCESS, + 'ic-world/con' => $PRIORITY_ACCESS, + 'nobody/pvt' => $PRIORITY_ACCESS, + 'pd/con' => $PRIORITY_ACCESS, + 'und-world/con' => $PRIORITY_ACCESS, + + 'cc-by-3.0/man' => $PRIORITY_MAN, + 'cc-by-4.0/man' => $PRIORITY_MAN, + 'cc-by-nc-3.0/man' => $PRIORITY_MAN, + 'cc-by-nc-4.0/man' => $PRIORITY_MAN, + 'cc-by-nc-nd-3.0/man' => $PRIORITY_MAN, + 'cc-by-nc-nd-4.0/man' => $PRIORITY_MAN, + 'cc-by-nc-sa-3.0/man' => $PRIORITY_MAN, + 'cc-by-nc-sa-4.0/man' => $PRIORITY_MAN, + 'cc-by-nd-3.0/man' => $PRIORITY_MAN, + 'cc-by-nd-4.0/man' => $PRIORITY_MAN, + 'cc-by-sa-3.0/man' => $PRIORITY_MAN, + 'cc-by-sa-4.0/man' => $PRIORITY_MAN, + 'cc-zero/man' => $PRIORITY_MAN, + 'ic-world/man' => $PRIORITY_MAN, + 'ic/man' => $PRIORITY_MAN, + 'nobody/del' => $PRIORITY_MAN, + 'nobody/man' => $PRIORITY_MAN, + 'pd-pvt/pvt' => $PRIORITY_MAN, + 'pd/man' => $PRIORITY_MAN, + 'pdus/man' => $PRIORITY_MAN, + 'supp/supp' => $PRIORITY_MAN, + 'und-world/man' => $PRIORITY_MAN, }; # prepared statements @@ -514,18 +570,10 @@ sub get_priority { my ($attr, $reason) = @_; my $code = "$attr/$reason"; - my $toreturn = undef; - - while (my ($priority, $codes) = each %$priorities) { - if (grep {$_ eq $code} @$codes) { - warn("$attr/$reason has two priorities ($priority and $toreturn)??") if defined $toreturn; - $exit = 1; - $toreturn = $priority - } - } + my $toreturn = $attr_reason_priorities->{$code}; if (not defined $toreturn) { - warn("Unknown code $attr/$reason"); + die("Unknown code $attr/$reason"); $exit = 1; }