Mary,
Here's a perl script I used to make some changes to 856 tags in our
system. You might be able to use it as a model for what you're trying to
do. Let me know if I can answer any questions about it.
J
***
#!/usr/bin/perl
use strict;
use DBI;
use Config::IniFiles;
use MARC::Record;
use MARC::File::XML;
use LWP::UserAgent;
my $cfg = Config::IniFiles-new( -file =
"/usr/local/vufind/web/conf/Evergreen-production.ini" );
my $port = $cfg-val( 'Catalog', 'port' );
my $hostname = $cfg-val( 'Catalog', 'hostname' );
my $database = $cfg-val( 'Catalog', 'database' );
my $username = $cfg-val( 'Catalog', 'user' );
my $password = $cfg-val( 'Catalog', 'password' );
my $dsn = "dbi:Pg:dbname=$database;host=$hostname;port=$port";
my $dbh = DBI-connect($dsn, $username, $password, {AutoCommit =
0, RaiseError = 0, PrintError = 0});
die("Could not connect to database!") unless $dbh;
$/ = "\035";
$| = 1;
my $total = 0;
my $updated = 0;
my $errors = 0;
my @ids = ();
my $sql = "
SELECT record_entry.id
FROM biblio.record_entry
WHERE record_entry.deleted IS FALSE
AND record_entry.active IS TRUE
";
print "$sql\n";
my $sth = $dbh-prepare($sql);
$sth-execute;
my $rv = $sth-err;
die($sth-errstr) if $rv;
while ( my $id = $sth-fetchrow ) {
push @ids, $id;
}
$sth-finish;
LOOP: while ( my $record_id = pop @ids ) {
$total++;
my $sql2 = "
SELECT record_entry.marc
FROM biblio.record_entry
WHERE record_entry.id = $record_id
";
my $sth2 = $dbh-prepare($sql2);
$sth2-execute;
my $rv2 = $sth2-err;
die($sth2-errstr) if $rv2;
my $hashref = $sth2-fetchrow_hashref;
my $marc_xml = $hashref-{'marc'};
$sth2-finish;
# Read in MARC and set values
my $record = '';
eval { $record = MARC::Record-new_from_xml($marc_xml, 'UTF-8');
};
if ( $@ ) {
print STDERR "ERROR LOADING TCN $record_id\n";
next LOOP;
}
# Fix the leader to indicate UTF-8
$record-encoding('UTF-8');
my @f856 = $record-field('856');
if ( @f856 ) {
print "\nTCN $record_id\n";
FOR: foreach my $field (@f856) {
print 'FOUND ', $field-as_formatted(), "\n";
# Check for URL in the 856 $u
my $u = $field-subfield('u');
if ( ! $u ) {
$record-delete_fields($field);
next FOR;
}
# First make the fixes
my $z = $field-subfield('z');
my $s3 = $field-subfield('3');
# Remove spaces from the URL
$u =~ s/\s//g;
# Copy $3 to $z if no $z
if ( ! $z $s3 ) {
$field-add_subfields('z' = $s3);
$z = $s3;
}
# Fix LC links
if ( $u =~ m/www\.loc\.gov\/catdir\/bios/ ) {
$field-delete_subfield(code = 'z');
$field-add_subfields('z' = 'Contributor
biographical information');
} elsif ( $u =~ m/www\.loc\.gov\/catdir\/samples/ ) {
$field-delete_subfield(code = 'z');
$field-add_subfields('z' = 'Sample text');
} elsif ( $u =~ m/www\.loc\.gov\/catdir\/description/ ) {
$field-delete_subfield(code = 'z');
$field-add_subfields('z' = 'Publisher
description');
} elsif ( $u =~ m/www\.loc\.gov\/catdir\/toc/ ) {
$field-delete_subfield(code = 'z');
$field-add_subfields('z' = 'Table of contents');
}
# Change first "www.http" to "http"
$u =~ s/www\.http/http/i;
# Change "www.loc/gov" to "www.loc.gov"
$u =~ s/www\.loc\/gov/www\.loc\.gov/i;
# Change "hhtp" to "http" at beginning of $u
$u =~ s/^hhtp/http/i;
# Add "http://" if protocol missing from start of $u
unless ( $u =~ m/^http/i || $u =~ m/^ftp/i ) {
$u = 'http://' . $u;
}
# Save changes
$field-delete_subfield(code = 'u');
$field-add_subfields('u' = $u);
# Now check the link
my $ua = LWP::UserAgent-new;
my $response = $ua-get($u);
if (! $response-is_success) {
print 'ERROR ', $response-status_line, "\n";
$errors++;
my $status = $response-status_line;
if ( $status =~ m/400 URL missing/i ||
$status =~ m/^401 Unauthorized/i ||
$status =~ m/^403 Forbidden/i ||
$status =~ m/^404 Can't chdir to/i ||
$status =~ m/^404 - File not found/i ||
$status =~ m/^404 Object Not Found/i ||
$status =~ m/^406 Not Acceptable/i ||
$status =~ m/^410 Gone/i ||
$status =~ m/^500 Can't connect to .* \(certificate
verify failed\)/i ||
$status =~ m/^500 Can't connect to .* \(No route to
host\)/i ||
$status =~ m/^500 No Host option provided/i ||
$status =~ m/^501 Protocol scheme .* is not
supported/i ||
$status =~ m/^503 Server Error/i
) {
$record-delete_fields($field);
print "DELETED 856\n";
next FOR;
} elsif ( $status =~ m/500 read timeout/i ) {
next FOR;
} else {
# Copy subfield 856 $z to 999 $z
my $f999 = $record-field('999');
if ( $f999 ) {
$f999-add_subfields('z' = $z);
} else {
my $new = MARC::Field-new('999', ' ', ' ',
'z' = $z);
$record-insert_fields_ordered($new);
}
# Delete subfield z from 856 (causes link not to
appear)
$field-delete_subfield(code = 'z');
# Show what we did
$f999