#!/usr/local/bin/perl # from incoming authority records, removes those whose # 010 |a fields begin with "sj" # remaining records are stripped of the 9xx fields if ($#ARGV < 0) {usage();} $sub_a = sprintf("%ca", 0x1f); #### BEGIN NOT SO GOOD CODE ## read the file to be processed #$marcin = $ARGV[0]; #$fopen = sprintf("Cannot open %s for input\n", $marcin); #open(marcin, $marcin) or die $fopen; #@marclines = ; #close(marcin); # #$marcstuff = ''; #$idx = 0; #while ($idx < @marclines) #{ # $marcstuff = $marcstuff . $marclines[$idx++]; #} #@marcrecords = split /\x1d/, $marcstuff; #### END NOT SO GOOD CODE #### BEGIN BETTER CODE $/ = chr(0x1d); # so can easily read a file of MARC records # read the file to be processed $marcin = $ARGV[0]; $fopen = sprintf("Cannot open %s for input\n", $marcin); open(marcin, $marcin) or die $fopen; @marcrecords = ; close(marcin); #### END BETTER CODE # open the output files $marcpre = sprintf(">%s.preprocessed", $marcin); $fopen = sprintf("Cannot open %s for output\n", substr($marcpre,1)); open(marcpre, $marcpre) or die $fopen; $marcdel = sprintf(">%s.deleted", $marcin); $fopen = sprintf("Cannot open %s for output\n", substr($marcdel,1)); open(marcdel, $marcdel) or die $fopen; # process the file foreach $marcrec (@marcrecords) { @deleteme = (); $iskeeper = 1; $baseaddr = substr($marcrec, 12, 5); $mptr = 24; while ($mptr < ($baseaddr-1)) { $tagid = substr($marcrec, $mptr, 3); $taglen = substr($marcrec, $mptr+3, 4); $offset = substr($marcrec, $mptr+7, 5); $tagaddr = $baseaddr + $offset - 1; $tagdata = substr($marcrec, $tagaddr, $taglen); if ($tagid eq '010') { $pos = index($tagdata, $sub_a); if ($pos != -1) {if (substr($tagdata, $pos+2, 2) eq 'sj') {$iskeeper = 0;}} } if ($iskeeper) {if ($tagid =~ /^9/) {push @deleteme, $tagid;}} $mptr += 12; } if ($iskeeper) { foreach $fieldtodelete (@deleteme) {$marcrec = deletetag($fieldtodelete, $marcrec);} printf marcpre ("%s%c", $marcrec, 0x1d); } else {printf marcdel ("%s%c", $marcrec, 0x1d);} } close(marcpre); close(marcdel); sub deletetag { my ($deltag, $oldmarcrec) = @_; my $newmarcrec = ''; my $leader = ''; my $baseaddr = 0; my $mustdelete = 0; my $deletepoint = 0; my $taghole = 0; my $didx = 0; my $tagctr = -1; my $strptr = 24; my $tagidx = 0; my @tagid = (); my @taglen = (); my @offset = (); my @tagdata = (); my @deltaglength = (); my $subfdelim = chr(0x1f); my $fdelim = chr(0x1e); my $recdelim = chr(0x1d); $leader = substr($oldmarcrec, 0, 24); $baseaddr = substr($oldmarcrec, 12, 5) - 1; # go through tags and find deletion points while ($strptr < $baseaddr-1) { $tagctr++; $tagid[$tagctr] = substr($oldmarcrec, $strptr, 3); $taglen[$tagctr] = substr($oldmarcrec, $strptr+3, 4); $offset[$tagctr] = substr($oldmarcrec, $strptr+7, 5); $tagdata[$tagctr] = substr($oldmarcrec, $baseaddr+$offset[$tagctr], $taglen[$tagctr]); # check if current tag should be deleted if ($tagid[$tagctr] eq $deltag) { $deltaglength[$didx++] = $taglen[$tagctr]; $mustdelete = 1; } $strptr += 12; } if ($mustdelete) { for ($didx=0; $didx<@deltaglength; $didx++) { # modify record length $leader = sprintf("%5.5d%s", substr($leader,0,5)-12-$deltaglength[$didx], substr($leader,5)); # modify base address $leader = sprintf("%s%5.5d%s", substr($leader,0,12), substr($leader,12,5)-12, substr($leader,17)); # now modify tag directory; no changes up to tag to be deleted $tagidx = 0; while (($tagidx <= $tagctr) and ($tagid[$tagidx] ne $deltag)) {$tagidx++;} # now at tag to be deleted $taghole = $tagidx; # remember tag's number in array $tagidx++; # step over tag to delete # keep rest of tags while ($tagidx <= $tagctr) { $offset[$tagidx] -= $deltaglength[$didx]; # data location has to shift over $tagidx++; } # shrink array to fill deleted tag's hole for ($tagidx=$taghole; $tagidx<$tagctr; $tagidx++) { $tagid[$tagidx] = $tagid[$tagidx+1]; $taglen[$tagidx] = $taglen[$tagidx+1]; $offset[$tagidx] = $offset[$tagidx+1]; $tagdata[$tagidx] = $tagdata[$tagidx+1]; } $tagctr--; # one less tag } # write leader and tag directory $newmarcrec = $leader; for ($tagidx=0; $tagidx<=$tagctr; $tagidx++) {$newmarcrec .= sprintf ("%3.3d%4.4d%5.5d", $tagid[$tagidx], $taglen[$tagidx], $offset[$tagidx]); } # write tag data for ($tagidx=0; $tagidx<=$tagctr; $tagidx++) {$newmarcrec .= $tagdata[$tagidx];} $newmarcrec .= sprintf ("$fdelim$recdelim"); return $newmarcrec; } else {return $oldmarcrec;} } sub usage() { printf("\nUsage: noauthsj.pl authfile\n"); printf(" Removes records where the 010 |a starts with \"sj\".\n"); printf(" Those records are written to authfile.deleted.\n"); printf(" Untouched records are written to authfile.preprocessed.\n"); printf(" Kept records have the 9xx fields removed.\n"); exit(0); }