#!/usr/local/bin/perl ### extract marc data to standard output from Voyager database as ### raw Marc records or human-readable data # this is required for database access use DBI; # get input arguments if ($#ARGV < 0) {usage();} $searchtype = $ARGV[0]; $idstart = $ARGV[1]; $idend = $ARGV[2]; if ($#ARGV = 4) {$raw = $ARGV[3];} if ($raw) {$raw = 1;} else {$raw = 0;} # show program usage if (($searchtype ne "auth") and ($searchtype ne "bib") and ($searchtype ne "mfhd")) {usage();} ### connect to database # specifying which type of database, the host name, the SID # and the database username and password # receive a database handle for this database connection $dbh = DBI->connect('DBI:Oracle:host=voyager.library.wmich.edu;sid=LIBR', 'dbread', 'dbread') or die "connecting: $DBI::errstr"; # formulate the query statement to be used # parameters taken from program arguments above $sqlquery = sprintf("select %s_id, record_segment, seqnum from wmichdb.%s_data where %s_id >= %s and %s_id <= %s order by %s_id asc, seqnum desc", $searchtype, $searchtype, $searchtype, $idstart, $searchtype, $idend, $searchtype); # have DBI prepare the query, identified by statement handle "sth" $sth = $dbh->prepare($sqlquery) or die "preparing query statement"; # execute the query, getting a return code $rc = $sth->execute; ### usual assembly of marc data in reverse order (per sort in query) ### by auth/bib/mfhd id # shunt complete records to stdout (screen) for raw output, or # write to array for processing to get human-readable output $marcstuff = ""; $marc = ""; $oldrec_id = 0; # following statement gets one row at a time from the query result set while (($rec_id, $recseg, $seqnum) = $sth->fetchrow_array) { # when transitioning from one marc record to another, # print or store previous marc record, and # start storing this marc record if ($rec_id != $oldrec_id) { if (!$raw) {$marcstuff = $marcstuff . $marc;} else {print $marc;} $oldrec_id = $rec_id; $marc = $recseg; } # else just prepend the record segment to the current marc record being built else {$marc = $recseg . $marc;} } # handle the last record at the end if (!$raw) {$marcstuff = $marcstuff . $marc;} else {print $marc;} # release resources associated with this statement handle $sth->finish; # release the database connection associated with this database handle $dbh->disconnect; # if want human-readable output if (!$raw) { # marc records are delimited by this character # this creates the array of marc records from the # previously built string of marc data @marcrec = split /\x1d/, $marcstuff; # loop through array of marc records $idx = 0; while ($idx < @marcrec) { # output the leader $leader = substr($marcrec[$idx], 0, 24); if ($idx != 0) {printf("\n");} printf("LDR:%s\n", $leader); # grab the record length and the data base-address, # "move" to the start of the directory $reclen = substr($marcrec[$idx], 1, 5); $baseaddr = substr($marcrec[$idx], 12, 5) - 1; $strptr = 24; # loop through the directory while ($strptr < $baseaddr-1) { # get the tag id, the tag's length, and the tag's offset $tagid = substr($marcrec[$idx], $strptr, 3); $taglen = substr($marcrec[$idx], $strptr+3, 4); $offset = substr($marcrec[$idx], $strptr+7, 5); # read the tag's data from the computed start of the tag's data, # for tag length characters $tagdata = substr($marcrec[$idx], $baseaddr+$offset, $taglen); # do the pretty printing formatting for human readability $tagdata =~ s/\x1f[a-z]/ \|$& /g; # use " |x " for subfield ind, $tagdata =~ s/\x1f//g; # remove original subfield ind, $tagdata =~ s/\x1e//g; # remove field ind, if (substr($tagdata, 2, 2) eq " |") # & remove the "1st" space in the line {$tagdata = substr($tagdata, 0, 2) . substr($tagdata, 3);} # output the tag parameters and its data printf("%3s:%4s:%5s:%s\n", $tagid, $taglen, $offset, $tagdata); # move to the next tag in the directory $strptr+= 12; } # move to the next record in the array of marc records $idx++; } # provide count of marc records handled if ($idx > 1) {$plural = "s read";} else {$plural = " read";} printf ("\n<<%d Marc record%s>>\n\n", $idx, $plural); } # show this to illustrate program usage sub usage() { printf ("\nUsage: perl example1.pl [auth | bib | mfhd] startID endID [raw]\n"); printf (" Pick one of the 3 data types.\n"); printf (" Specify record ID numbers; specified range is inclusive.\n"); printf (" Parameters must be in the above order.\n"); printf (" All parameters are required except for the last one.\n"); printf (" Program extracts marc data from blobs in Oracle.\n"); printf (" Output is human-formatted unless *raw* is specified\n"); printf (" and it goes to STDOUT.\n"); exit(0); }