[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Koha-cvs] koha C4/AuthoritiesMarc.pm C4/Biblio.pm C4/Sear...
From: |
paul poulain |
Subject: |
[Koha-cvs] koha C4/AuthoritiesMarc.pm C4/Biblio.pm C4/Sear... |
Date: |
Thu, 10 May 2007 14:45:15 +0000 |
CVSROOT: /sources/koha
Module name: koha
Changes by: paul poulain <tipaul> 07/05/10 14:45:15
Modified files:
C4 : AuthoritiesMarc.pm Biblio.pm Search.pm
authorities : authorities.pl
misc/migration_tools: rebuild_nozebra.pl rebuild_zebra.pl
opac : opac-rss.pl
Log message:
Koha NoZebra :
- support for authorities
- some bugfixes in ordering and "CCL" parsing
- support for authorities <=> biblios walking
Seems I can do what I want now, so I consider its done, except for
bugfixes that will be needed i m sure !
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/koha/C4/AuthoritiesMarc.pm?cvsroot=koha&r1=1.45&r2=1.46
http://cvs.savannah.gnu.org/viewcvs/koha/C4/Biblio.pm?cvsroot=koha&r1=1.203&r2=1.204
http://cvs.savannah.gnu.org/viewcvs/koha/C4/Search.pm?cvsroot=koha&r1=1.141&r2=1.142
http://cvs.savannah.gnu.org/viewcvs/koha/authorities/authorities.pl?cvsroot=koha&r1=1.23&r2=1.24
http://cvs.savannah.gnu.org/viewcvs/koha/misc/migration_tools/rebuild_nozebra.pl?cvsroot=koha&r1=1.4&r2=1.5
http://cvs.savannah.gnu.org/viewcvs/koha/misc/migration_tools/rebuild_zebra.pl?cvsroot=koha&r1=1.7&r2=1.8
http://cvs.savannah.gnu.org/viewcvs/koha/opac/opac-rss.pl?cvsroot=koha&r1=1.2&r2=1.3
Patches:
Index: C4/AuthoritiesMarc.pm
===================================================================
RCS file: /sources/koha/koha/C4/AuthoritiesMarc.pm,v
retrieving revision 1.45
retrieving revision 1.46
diff -u -b -r1.45 -r1.46
--- C4/AuthoritiesMarc.pm 6 Apr 2007 14:48:45 -0000 1.45
+++ C4/AuthoritiesMarc.pm 10 May 2007 14:45:15 -0000 1.46
@@ -93,7 +93,100 @@
=cut
sub SearchAuthorities {
my ($tags, $and_or, $excluding, $operator, $value,
$offset,$length,$authtypecode,$sortby) = @_;
+# warn "CALL : $tags, $and_or, $excluding, $operator, $value,
$offset,$length,$authtypecode,$sortby";
my $dbh=C4::Context->dbh;
+ if (C4::Context->preference('NoZebra')) {
+
+ #
+ # build the query
+ #
+ my $query;
+ my @auths=split / /,$authtypecode ;
+ foreach my $auth (@auths){
+ $query .="AND auth_type= $auth ";
+ }
+ $query =~ s/^AND //;
+ my $dosearch;
+ for(my $i = 0 ; $i <= $#{$value} ; $i++)
+ {
+ if (@$value[$i]){
+ if (@$tags[$i] eq "mainmainentry") {
+ $query .=" AND mainmainentry";
+ }elsif (@$tags[$i] eq "mainentry") {
+ $query .=" AND mainentry";
+ } else {
+ $query .=" AND ";
+ }
+ if (@$operator[$i] eq 'is') {
+ $query.=(@$tags[$i]?"=":""). '"'address@hidden'"';
+ }elsif (@$operator[$i] eq "="){
+ $query.=(@$tags[$i]?"=":""). '"'address@hidden'"';
+ }elsif (@$operator[$i] eq "start"){
+ $query.=(@$tags[$i]?"=":"").'"'address@hidden'%"';
+ } else {
+ $query.=(@$tags[$i]?"=":"").'"'address@hidden'%"';
+ }
+ $dosearch=1;
+ }#if value
+ }
+ #
+ # do the query (if we had some search term
+ #
+ if ($dosearch) {
+# warn "QUERY : $query";
+ my $result = C4::Search::NZanalyse($query,'authorityserver');
+# warn "result : $result";
+ my %result;
+ foreach (split /;/,$result) {
+ my ($authid,$title) = split /,/,$_;
+ # hint : the result is sorted by title.biblionumber because we
can have X biblios with the same title
+ # and we don't want to get only 1 result for each of them !!!
+ # hint & speed improvement : we can order without reading the
record
+ # so order, and read records only for the requested page !
+ $result{$title.$authid}=$authid;
+ }
+ # sort the hash and return the same structure as GetRecords (Zebra
querying)
+ my @finalresult = ();
+ my $numbers=0;
+ if ($sortby eq 'HeadingDsc') { # sort by mainmainentry desc
+ foreach my $key (sort {$b cmp $a} (keys %result)) {
+ push @finalresult, $result{$key};
+# warn "push..."$#finalresult;
+ $numbers++;
+ }
+ } else { # sort by mainmainentry ASC
+ foreach my $key (sort (keys %result)) {
+ push @finalresult, $result{$key};
+# warn "push..."$#finalresult;
+ $numbers++;
+ }
+ }
+ # limit the $results_per_page to result size if it's more
+ $length = $numbers-1 if $numbers < $length;
+ # for the requested page, replace authid by the complete record
+ # speed improvement : avoid reading too much things
+ for (my $counter=$offset;$counter<=$offset+$length;$counter++) {
+# $finalresult[$counter] =
GetAuthority($finalresult[$counter])->as_usmarc;
+ my $separator=C4::Context->preference('authoritysep');
+ my $authrecord =
MARC::File::USMARC::decode(GetAuthority($finalresult[$counter])->as_usmarc);
+ my $authid=$authrecord->field('001')->data();
+ my $summary=BuildSummary($authrecord,$authid,$authtypecode);
+ my $query_auth_tag = "SELECT auth_tag_to_report FROM
auth_types WHERE authtypecode=?";
+ my $sth = $dbh->prepare($query_auth_tag);
+ $sth->execute($authtypecode);
+ my $auth_tag_to_report = $sth->fetchrow;
+ my %newline;
+ $newline{used}=CountUsage($authid);
+ $newline{summary} = $summary;
+ $newline{authid} = $authid;
+ $newline{even} = $counter % 2;
+ $finalresult[$counter]= \%newline;
+ }
+ return (address@hidden, $numbers);
+ } else {
+ return;
+ }
+ } else {
my $query;
my $attr;
# the marclist may contain "mainentry". In this case, search the
tag_to_report, that depends on
@@ -147,7 +240,6 @@
## Adding order
$query=' @or @attr 7=1 @attr 1=Heading 0 @or @attr 7=1 @attr
1=Heading-Entity 1'.$query if ($sortby eq "HeadingAsc");
$query=' @or @attr 7=2 @attr 1=Heading 0 @or @attr 7=1 @attr
1=Heading-Entity 1'.$query if ($sortby eq "HeadingDsc");
- warn $query;
$offset=0 unless $offset;
my $counter = $offset;
@@ -209,11 +301,12 @@
}# all $z's
}## if nbresult
-NOLUCK:
-# $oAResult->destroy();
-# $oAuth[0]->destroy();
+ NOLUCK:
+ # $oAResult->destroy();
+ # $oAuth[0]->destroy();
return (address@hidden, $nbresults);
+ }
}
=head2 CountUsage
@@ -228,11 +321,15 @@
=cut
sub CountUsage {
my ($authid) = @_;
- ### try ZOOM search here
+ if (C4::Context->preference('NoZebra')) {
+ # Read the index Koha-Auth-Number for this authid and count the lines
+ my $result = C4::Search::NZanalyse("an=$authid");
+ return scalar split /;/,$result;
+ } else {
+ ### ZOOM search here
my $oConnection=C4::Context->Zconn("biblioserver",1);
my $query;
$query= "an=".$authid;
-
my $oResult = $oConnection->search(new ZOOM::Query::CCL2RPN( $query,
$oConnection ));
my $result;
while ((my $i = ZOOM::event([ $oConnection ])) != 0) {
@@ -242,6 +339,7 @@
}
}
return ($result);
+ }
}
=head2 CountUsageChildren
@@ -402,7 +500,7 @@
##Both authid and authtypecode is expected to be in the same field. Modify
if other requirements arise
$record->add_fields('001',$authid) unless $record->field('001');
$record->add_fields('152','','','b'=>$authtypecode) unless
$record->field('152');
- warn $record->as_formatted;
+# warn $record->as_formatted;
$dbh->do("lock tables auth_header WRITE");
$sth=$dbh->prepare("insert into auth_header
(authid,datecreated,authtypecode,marc) values (?,now(),?,?)");
$sth->execute($authid,$authtypecode,$record->as_usmarc);
@@ -417,8 +515,7 @@
$sth->finish;
}
$dbh->do("unlock tables");
- ModZebra($authid,'specialUpdate',"authorityserver");
-
+ ModZebra($authid,'specialUpdate',"authorityserver",$record);
return ($authid);
}
@@ -439,7 +536,7 @@
my ($authid) = @_;
my $dbh=C4::Context->dbh;
- ModZebra($authid,"recordDelete","authorityserver");
+ ModZebra($authid,"recordDelete","authorityserver",GetAuthority($authid));
$dbh->do("delete from auth_header where authid=$authid") ;
}
@@ -950,6 +1047,9 @@
push @tags_using_authtype,$tagfield."9" ;
}
+ if (C4::Context->preference('NoZebra')) {
+ warn "MERGE TO DO";
+ } else {
# now, find every biblio using this authority
my $oConnection=C4::Context->Zconn("biblioserver");
my $query;
@@ -994,6 +1094,7 @@
}
}#foreach $marc
+ }
# now, find every other authority linked with this authority
# my $oConnection=C4::Context->Zconn("authorityserver");
# my $query;
@@ -1054,8 +1155,16 @@
=cut
-# $Id: AuthoritiesMarc.pm,v 1.45 2007/04/06 14:48:45 hdl Exp $
+# $Id: AuthoritiesMarc.pm,v 1.46 2007/05/10 14:45:15 tipaul Exp $
# $Log: AuthoritiesMarc.pm,v $
+# Revision 1.46 2007/05/10 14:45:15 tipaul
+# Koha NoZebra :
+# - support for authorities
+# - some bugfixes in ordering and "CCL" parsing
+# - support for authorities <=> biblios walking
+#
+# Seems I can do what I want now, so I consider its done, except for bugfixes
that will be needed i m sure !
+#
# Revision 1.45 2007/04/06 14:48:45 hdl
# Code Cleaning : AuthoritiesMARC.
#
Index: C4/Biblio.pm
===================================================================
RCS file: /sources/koha/koha/C4/Biblio.pm,v
retrieving revision 1.203
retrieving revision 1.204
diff -u -b -r1.203 -r1.204
--- C4/Biblio.pm 3 May 2007 15:16:02 -0000 1.203
+++ C4/Biblio.pm 10 May 2007 14:45:15 -0000 1.204
@@ -33,7 +33,7 @@
use vars qw($VERSION @ISA @EXPORT);
# set the version for version checking
-$VERSION = do { my @v = '$Revision: 1.203 $' =~ /\d+/g; shift(@v).".".join(
"_", map { sprintf "%03d", $_ } @v ); };
+$VERSION = do { my @v = '$Revision: 1.204 $' =~ /\d+/g; shift(@v).".".join(
"_", map { sprintf "%03d", $_ } @v ); };
@ISA = qw( Exporter );
@@ -2712,6 +2712,7 @@
sub ModZebra {
###Accepts a $server variable thus we can use it for biblios authorities or
other zebra dbs
my ( $biblionumber, $op, $server, $newRecord ) = @_;
+# warn "ModZebra with : ( $biblionumber, $op, $server, $newRecord )";
my $dbh=C4::Context->dbh;
#warn "SERVER:".$server;
#
@@ -2724,25 +2725,31 @@
# lock the nozebra table : we will read index lines, update them in
Perl process
# and write everything in 1 transaction.
# lock the table to avoid someone else overwriting what we are doing
- $dbh->do('LOCK TABLES nozebra WRITE,biblio WRITE,biblioitems WRITE,
systempreferences WRITE');
+ $dbh->do('LOCK TABLES nozebra WRITE,biblio WRITE,biblioitems WRITE,
systempreferences WRITE, auth_types WRITE, auth_header WRITE');
my %result; # the result hash that will be builded by deletion / add,
and written on mySQL at the end, to improve speed
- my $record= GetMarcBiblio($biblionumber);
+ my $record;
+ if ($server eq 'biblioserver') {
+ $record= GetMarcBiblio($biblionumber);
+ } else {
+ $record= C4::AuthoritiesMarc::GetAuthority($biblionumber);
+ }
if ($op eq 'specialUpdate') {
# OK, we have to add or update the record
# 1st delete (virtually, in indexes) ...
- %result = _DelBiblioNoZebra($biblionumber,$record);
+ %result = _DelBiblioNoZebra($biblionumber,$record,$server);
# ... add the record
- %result=_AddBiblioNoZebra($biblionumber,$newRecord, %result);
+ %result=_AddBiblioNoZebra($biblionumber,$newRecord, $server,
%result);
} else {
# it's a deletion, delete the record...
- %result=_DelBiblioNoZebra($biblionumber,$record);
+# warn "DELETE the record $biblionumber on
$server".$record->as_formatted;
+ %result=_DelBiblioNoZebra($biblionumber,$record,$server);
}
# ok, now update the database...
- my $sth = $dbh->prepare("UPDATE nozebra SET biblionumbers=? WHERE
indexname=? AND value=?");
+ my $sth = $dbh->prepare("UPDATE nozebra SET biblionumbers=? WHERE
server=? AND indexname=? AND value=?");
foreach my $key (keys %result) {
foreach my $index (keys %{$result{$key}}) {
- warn "UPDATING : $key , $index with :".$result{$key}->{$index};
- $sth->execute($result{$key}->{$index},$key,$index);
+# warn "UPDATING : $server $key , $index with
:".$result{$key}->{$index};
+ $sth->execute($result{$key}->{$index}, $server, $key, $index);
}
}
$dbh->do('UNLOCK TABLES');
@@ -2777,7 +2784,7 @@
=head1 INTERNAL FUNCTIONS
-=head2 _DelBiblioNoZebra($biblionumber,$record);
+=head2 _DelBiblioNoZebra($biblionumber,$record,$server);
function to delete a biblio in NoZebra indexes
This function does NOT delete anything in database : it reads all the
indexes entries
@@ -2785,21 +2792,33 @@
The SQL part is done either :
- after the Add if we are modifying a biblio (delete + add again)
- immediatly after this sub if we are doing a true deletion.
+ $server can be 'biblioserver' or 'authorityserver' : it indexes biblios or
authorities (in the same table, $server being part of the table itself
=cut
sub _DelBiblioNoZebra {
- my ($biblionumber,$record)address@hidden;
+ my ($biblionumber, $record, $server)address@hidden;
- warn "DELETING".$record->as_formatted;
# Get the indexes
my $dbh = C4::Context->dbh;
# Get the indexes
- my %index=GetNoZebraIndexes;
+ my %index;
+ my $title;
+ if ($server eq 'biblioserver') {
+ %index=GetNoZebraIndexes;
# get title of the record (to store the 10 first letters with the index)
my ($titletag,$titlesubfield) = GetMarcFromKohaField('biblio.title');
- my $title = lc($record->subfield($titletag,$titlesubfield));
+ $title = lc($record->subfield($titletag,$titlesubfield));
+ } else {
+ # for authorities, the "title" is the $a mainentry
+ my $authref =
C4::AuthoritiesMarc::GetAuthType($record->subfield(152,'b'));
+ warn "ERROR : authtype undefined for ".$record->as_formatted unless
$authref;
+ $title = $record->subfield($authref->{auth_tag_to_report},'a');
+ $index{'mainmainentry'}= $authref->{'auth_tag_to_report'}.'a';
+ $index{'mainentry'} = $authref->{'auth_tag_to_report'}.'*';
+ $index{'auth_type'} = '152b';
+ }
my %result;
# remove blancks comma (that could cause problem when decoding the string
for CQL retrieval) and regexp specific values
@@ -2807,7 +2826,7 @@
# limit to 10 char, should be enough, and limit the DB size
$title = substr($title,0,10);
#parse each field
- my $sth2=$dbh->prepare('SELECT biblionumbers FROM nozebra WHERE
indexname=? AND value=?');
+ my $sth2=$dbh->prepare('SELECT biblionumbers FROM nozebra WHERE server=?
AND indexname=? AND value=?');
foreach my $field ($record->fields()) {
#parse each subfield
next if $field->tag <10;
@@ -2823,22 +2842,20 @@
$indexed=1;
my $line= lc $subfield->[1];
# remove meaningless value in the field...
- $line =~
s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=/ /g;
+ $line =~
s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g;
# ... and split in words
- warn "DELETING : $key / $tag / $subfieldcode / $line";
foreach (split / /,$line) {
next unless $_; # skip empty values (multiple spaces)
# if the entry is already here, do nothing, the
biblionumber has already be removed
unless ($result{$key}->{$_} =~
/$biblionumber,$title\-(\d);/) {
# get the index value if it exist in the nozebra
table and remove the entry, otherwise, do nothing
- $sth2->execute($key,$_);
+ $sth2->execute($server,$key,$_);
my $existing_biblionumbers = $sth2->fetchrow;
# it exists
if ($existing_biblionumbers) {
# warn " existing for $key $_:
$existing_biblionumbers";
$result{$key}->{$_} =$existing_biblionumbers;
$result{$key}->{$_} =~
s/$biblionumber,$title\-(\d);//;
- warn "after cleaning : $key / $_ =
".$result{$key}->{$_};
}
}
}
@@ -2847,18 +2864,17 @@
# the subfield is not indexed, store it in __RAW__ index anyway
unless ($indexed) {
my $line= lc $subfield->[1];
- $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=/
/g;
+ $line =~
s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g;
# ... and split in words
foreach (split / /,$line) {
next unless $_; # skip empty values (multiple spaces)
# if the entry is already here, do nothing, the
biblionumber has already be removed
unless ($result{'__RAW__'}->{$_} =~
/$biblionumber,$title\-(\d);/) {
# get the index value if it exist in the nozebra table
and remove the entry, otherwise, do nothing
- $sth2->execute('__RAW__',$_);
+ $sth2->execute($server,'__RAW__',$_);
my $existing_biblionumbers = $sth2->fetchrow;
# it exists
if ($existing_biblionumbers) {
- warn " existing for __RAW__ $_ :
$existing_biblionumbers";
$result{'__RAW__'}->{$_} =$existing_biblionumbers;
$result{'__RAW__'}->{$_} =~
s/$biblionumber,$title\-(\d);//;
}
@@ -2878,20 +2894,33 @@
sub _AddBiblioNoZebra {
- my ($biblionumber,$record,%result)address@hidden;
+ my ($biblionumber, $record, $server, %result)address@hidden;
my $dbh = C4::Context->dbh;
# Get the indexes
- my %index=GetNoZebraIndexes;
+ my %index;
+ my $title;
+ if ($server eq 'biblioserver') {
+ %index=GetNoZebraIndexes;
# get title of the record (to store the 10 first letters with the index)
my ($titletag,$titlesubfield) = GetMarcFromKohaField('biblio.title');
- my $title = lc($record->subfield($titletag,$titlesubfield));
+ $title = lc($record->subfield($titletag,$titlesubfield));
+ } else {
+# warn "server : $server";
+ # for authorities, the "title" is the $a mainentry
+ my $authref =
C4::AuthoritiesMarc::GetAuthType($record->subfield(152,'b'));
+ warn "ERROR : authtype undefined for ".$record->as_formatted unless
$authref;
+ $title = $record->subfield($authref->{auth_tag_to_report},'a');
+ $index{'mainmainentry'}=$authref->{auth_tag_to_report}.'a';
+ $index{'mainentry'} = $authref->{auth_tag_to_report}.'*';
+ $index{'auth_type'} = '152b';
+ }
# remove blancks comma (that could cause problem when decoding the string
for CQL retrieval) and regexp specific values
$title =~ s/ |,|;|\[|\]|\(|\)|\*|-|'|=//g;
# limit to 10 char, should be enough, and limit the DB size
$title = substr($title,0,10);
#parse each field
- my $sth2=$dbh->prepare('SELECT biblionumbers FROM nozebra WHERE
indexname=? AND value=?');
+ my $sth2=$dbh->prepare('SELECT biblionumbers FROM nozebra WHERE server=?
AND indexname=? AND value=?');
foreach my $field ($record->fields()) {
#parse each subfield
next if $field->tag <10;
@@ -2907,7 +2936,7 @@
$indexed=1;
my $line= lc $subfield->[1];
# remove meaningless value in the field...
- $line =~
s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=/ /g;
+ $line =~
s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g;
# ... and split in words
foreach (split / /,$line) {
next unless $_; # skip empty values (multiple spaces)
@@ -2919,18 +2948,18 @@
$result{$key}->{$_} .=
"$biblionumber,$title-$weight;";
} else {
# get the value if it exist in the nozebra table,
otherwise, create it
- $sth2->execute($key,$_);
+ $sth2->execute($server,$key,$_);
my $existing_biblionumbers = $sth2->fetchrow;
# it exists
if ($existing_biblionumbers) {
- warn" existing : $existing_biblionumbers";
$result{$key}->{$_} =$existing_biblionumbers;
my $weight=$1+1;
$result{$key}->{$_} =~
s/$biblionumber,$title\-(\d);//;
$result{$key}->{$_} .=
"$biblionumber,$title-$weight;";
# create a new ligne for this entry
} else {
- $dbh->do('INSERT INTO nozebra SET
indexname='.$dbh->quote($key).',value='.$dbh->quote($_));
+# warn "INSERT : $server / $key / $_";
+ $dbh->do('INSERT INTO nozebra SET
server='.$dbh->quote($server).',
indexname='.$dbh->quote($key).',value='.$dbh->quote($_));
$result{$key}->{$_}.="$biblionumber,$title-1;";
}
}
@@ -2940,7 +2969,7 @@
# the subfield is not indexed, store it in __RAW__ index anyway
unless ($indexed) {
my $line= lc $subfield->[1];
- $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=/
/g;
+ $line =~
s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g;
# ... and split in words
foreach (split / /,$line) {
next unless $_; # skip empty values (multiple spaces)
@@ -2951,7 +2980,7 @@
$result{'__RAW__'}->{$_} .=
"$biblionumber,$title-$weight;";
} else {
# get the value if it exist in the nozebra table,
otherwise, create it
- $sth2->execute('__RAW__',$_);
+ $sth2->execute($server,'__RAW__',$_);
my $existing_biblionumbers = $sth2->fetchrow;
# it exists
if ($existing_biblionumbers) {
@@ -2961,7 +2990,7 @@
$result{'__RAW__'}->{$_} .=
"$biblionumber,$title-$weight;";
# create a new ligne for this entry
} else {
- $dbh->do('INSERT INTO nozebra SET
indexname="__RAW__",value='.$dbh->quote($_));
+ $dbh->do('INSERT INTO nozebra SET
server='.$dbh->quote($server).', indexname="__RAW__",value='.$dbh->quote($_));
$result{'__RAW__'}->{$_}.="$biblionumber,$title-1;";
}
}
@@ -3174,7 +3203,7 @@
$dbh->do($query);
if ( $dbh->errstr ) {
- warn "$query";
+ warn "ERROR in _koha_modify_biblioitem $query";
}
}
@@ -3866,8 +3895,16 @@
=cut
-# $Id: Biblio.pm,v 1.203 2007/05/03 15:16:02 tipaul Exp $
+# $Id: Biblio.pm,v 1.204 2007/05/10 14:45:15 tipaul Exp $
# $Log: Biblio.pm,v $
+# Revision 1.204 2007/05/10 14:45:15 tipaul
+# Koha NoZebra :
+# - support for authorities
+# - some bugfixes in ordering and "CCL" parsing
+# - support for authorities <=> biblios walking
+#
+# Seems I can do what I want now, so I consider its done, except for bugfixes
that will be needed i m sure !
+#
# Revision 1.203 2007/05/03 15:16:02 tipaul
# BUGFIX for : NoZebra
# - NoZebra features : seems they work fine now (adding, modifying, deleting)
Index: C4/Search.pm
===================================================================
RCS file: /sources/koha/koha/C4/Search.pm,v
retrieving revision 1.141
retrieving revision 1.142
diff -u -b -r1.141 -r1.142
--- C4/Search.pm 9 May 2007 19:42:48 -0000 1.141
+++ C4/Search.pm 10 May 2007 14:45:15 -0000 1.142
@@ -25,7 +25,7 @@
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
# set the version for version checking
-$VERSION = do { my @v = '$Revision: 1.141 $' =~ /\d+/g;
+$VERSION = do { my @v = '$Revision: 1.142 $' =~ /\d+/g;
shift(@v) . "." . join( "_", map { sprintf "%03d", $_ } @v );
};
@@ -1145,7 +1145,10 @@
=cut
sub NZanalyse {
- my ($string) = @_;
+ my ($string,$server) = @_;
+ # $server contains biblioserver or authorities, depending on what we
search on.
+ warn "querying : $string on $server";
+ $server='biblioserver' unless $server;
# if we have a ", replace the content to discard temporarily any
and/or/not inside
my $commacontent;
if ($string =~/"/) {
@@ -1156,32 +1159,32 @@
# split the query string in 3 parts : X AND Y means : $left="X",
$operand="AND" and $right="Y"
# then, call again NZanalyse with $left and $right
# (recursive until we find a leaf (=> something without and/or/not)
- $string =~ /(.*)( and | or | not )(.*)/;
+ $string =~ /(.*)( and | or | not | AND | OR | NOT )(.*)/;
my $left = $1;
my $right = $3;
- my $operand = $2;
+ my $operand = lc($2);
# it's not a leaf, we have a and/or/not
if ($operand) {
# reintroduce comma content if needed
$right =~ s/__X__/"$commacontent"/ if $commacontent;
$left =~ s/__X__/"$commacontent"/ if $commacontent;
-# print "noeud : $left / $operand / $right\n";
- my $leftresult = NZanalyse($left);
- my $rightresult = NZanalyse($right);
+# warn "node : $left / $operand / $right\n";
+ my $leftresult = NZanalyse($left,$server);
+ my $rightresult = NZanalyse($right,$server);
# OK, we have the results for right and left part of the query
# depending of operand, intersect, union or exclude both lists
# to get a result list
if ($operand eq ' and ') {
- my @leftresult = split /,/, $leftresult;
-# my @rightresult = split /,/,$leftresult;
+ my @leftresult = split /;/, $leftresult;
+# my @rightresult = split /;/,$leftresult;
my $finalresult;
# parse the left results, and if the biblionumber exist in the
right result, save it in finalresult
# the result is stored twice, to have the same weight for AND than
OR.
# example : TWO : 61,61,64,121 (two is twice in the biblio #61) /
TOWER : 61,64,130
# result : 61,61,61,61,64,64 for two AND tower : 61 has more
weight than 64
foreach (@leftresult) {
- if ($rightresult =~ "$_,") {
- $finalresult .= "$_,$_,";
+ if ($rightresult =~ "$_;") {
+ $finalresult .= "$_;$_;";
}
}
return $finalresult;
@@ -1189,12 +1192,12 @@
# just merge the 2 strings
return $leftresult.$rightresult;
} elsif ($operand eq ' not ') {
- my @leftresult = split /,/, $leftresult;
-# my @rightresult = split /,/,$leftresult;
+ my @leftresult = split /;/, $leftresult;
+# my @rightresult = split /;/,$leftresult;
my $finalresult;
foreach (@leftresult) {
- unless ($rightresult =~ "$_,") {
- $finalresult .= "$_,";
+ unless ($rightresult =~ "$_;") {
+ $finalresult .= "$_;";
}
}
return $finalresult;
@@ -1206,28 +1209,32 @@
} else {
$string =~ s/__X__/"$commacontent"/ if $commacontent;
$string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\// /g;
-# print "feuille : $string\n";
+# warn "leaf : $string\n";
# parse the string in in operator/operand/value again
$string =~ /(.*)(=|>|>=|<|<=)(.*)/;
my $left = $1;
my $operator = $2;
my $right = $3;
my $results;
- # automatic replace for short operator
+ # automatic replace for short operators
$left='title' if $left eq 'ti';
$left='author' if $left eq 'au';
+ $left='koha-Auth-Number' if $left eq 'an';
if ($operator) {
#do a specific search
my $dbh = C4::Context->dbh;
$operator='LIKE' if $operator eq '=' and $right=~ /%/;
- my $sth = $dbh->prepare("SELECT biblionumbers FROM nozebra WHERE
indexname=? AND value $operator ?");
-# print "$left / $operator / $right\n";
+ my $sth = $dbh->prepare("SELECT biblionumbers FROM nozebra WHERE
server=? AND indexname=? AND value $operator ?");
+ warn "$left / $operator / $right\n";
# split each word, query the DB and build the biblionumbers result
foreach (split / /,$right) {
my $biblionumbers;
- $sth->execute($left,$_);
+ next unless $_;
+# warn "EXECUTE : $server, $left, $_";
+ $sth->execute($server, $left, $_);
while (my $line = $sth->fetchrow) {
$biblionumbers .= $line;
+# warn "result : $line";
}
# do a AND with existing list if there is one, otherwise, use
the biblionumbers list as 1st result list
if ($results) {
@@ -1246,17 +1253,19 @@
} else {
#do a complete search (all indexes)
my $dbh = C4::Context->dbh;
- my $sth = $dbh->prepare("SELECT biblionumbers FROM nozebra WHERE
value LIKE ?");
+ my $sth = $dbh->prepare("SELECT biblionumbers FROM nozebra WHERE
server=? AND value LIKE ?");
# split each word, query the DB and build the biblionumbers result
foreach (split / /,$string) {
+# warn "search on all indexes on $_";
my $biblionumbers;
- $sth->execute($_);
+ next unless $_;
+ $sth->execute($server, $_);
while (my $line = $sth->fetchrow) {
$biblionumbers .= $line;
}
# do a AND with existing list if there is one, otherwise, use
the biblionumbers list as 1st result list
if ($results) {
- my @leftresult = split /,/, $biblionumbers;
+ my @leftresult = split /;/, $biblionumbers;
my $temp;
foreach (@leftresult) {
if ($results =~ "$_;") {
@@ -1269,6 +1278,7 @@
}
}
}
+# warn "return : $results for LEAF : $string";
return $results;
}
}
@@ -1303,7 +1313,7 @@
my $result_hash;
my $numbers=0;
if ($ordering eq '1=9523 >i') { # sort popularity DESC
- foreach my $key (sort {$b <=> $a} (keys %popularity)) {
+ foreach my $key (sort {$b cmp $a} (keys %popularity)) {
$result_hash->{'RECORDS'}[$numbers++] =
$result{$popularity{$key}}->as_usmarc();
}
} else { # sort popularity ASC
@@ -1337,12 +1347,12 @@
# sort the hash and return the same structure as GetRecords (Zebra
querying)
my $result_hash;
my $numbers=0;
- if ($ordering eq '1=1003 <i') { # sort by title desc
+ if ($ordering eq '1=1003 <i') { # sort by author desc
foreach my $key (sort (keys %result)) {
$result_hash->{'RECORDS'}[$numbers++] =
$result{$key}->as_usmarc();
}
- } else { # sort by title ASC
- foreach my $key (sort { $a <=> $b } (keys %result)) {
+ } else { # sort by author ASC
+ foreach my $key (sort { $a cmp $b } (keys %result)) {
$result_hash->{'RECORDS'}[$numbers++] =
$result{$key}->as_usmarc();
}
}
@@ -1378,7 +1388,7 @@
$result_hash->{'RECORDS'}[$numbers++] =
$result{$key}->as_usmarc();
}
} else { # sort by title ASC
- foreach my $key (sort { $a <=> $b } (keys %result)) {
+ foreach my $key (sort { $a cmp $b } (keys %result)) {
$result_hash->{'RECORDS'}[$numbers++] =
$result{$key}->as_usmarc();
}
}
@@ -1400,12 +1410,12 @@
# sort the hash and return the same structure as GetRecords (Zebra
querying)
my $result_hash;
my $numbers=0;
- if ($ordering eq '1=31 <i') { # sort by title desc
+ if ($ordering eq '1=31 <i') { # sort by pubyear desc
foreach my $key (sort (keys %result)) {
$result_hash->{'RECORDS'}[$numbers++] =
$result{$key}->as_usmarc();
}
- } else { # sort by title ASC
- foreach my $key (sort { $a <=> $b } (keys %result)) {
+ } else { # sort by pub year ASC
+ foreach my $key (sort { $b cmp $a } (keys %result)) {
$result_hash->{'RECORDS'}[$numbers++] =
$result{$key}->as_usmarc();
}
}
@@ -1435,7 +1445,7 @@
$result_hash->{'RECORDS'}[$numbers++] = $result{$key};
}
} else { # sort by title ASC
- foreach my $key (sort { $a <=> $b } (keys %result)) {
+ foreach my $key (sort { $b cmp $a } (keys %result)) {
$result_hash->{'RECORDS'}[$numbers++] = $result{$key};
}
}
@@ -1476,7 +1486,7 @@
# sort the hash and return the same structure as GetRecords (Zebra
querying)
my $result_hash;
my $numbers=0;
- foreach my $key (sort {$b <=> $a} (keys %result)) {
+ foreach my $key (sort {$b cmp $a} (keys %result)) {
$result_hash->{'RECORDS'}[$numbers++] = $result{$key};
}
# limit the $results_per_page to result size if it's more
Index: authorities/authorities.pl
===================================================================
RCS file: /sources/koha/koha/authorities/authorities.pl,v
retrieving revision 1.23
retrieving revision 1.24
diff -u -b -r1.23 -r1.24
--- authorities/authorities.pl 24 Apr 2007 13:54:29 -0000 1.23
+++ authorities/authorities.pl 10 May 2007 14:45:15 -0000 1.24
@@ -1,6 +1,6 @@
#!/usr/bin/perl
-# $Id: authorities.pl,v 1.23 2007/04/24 13:54:29 hdl Exp $
+# $Id: authorities.pl,v 1.24 2007/05/10 14:45:15 tipaul Exp $
# Copyright 2000-2002 Katipo Communications
#
@@ -392,7 +392,6 @@
#warn $record->as_formatted;
# check for a duplicate
my ($duplicateauthid,$duplicateauthvalue) =
FindDuplicateAuthority($record,$authtypecode) if ($op eq "add") &&
(!$is_a_modif);
-warn "duplicate:$duplicateauthid,$duplicateauthvalue";
my $confirm_not_duplicate = $input->param('confirm_not_duplicate');
# it is not a duplicate (determined either by Koha itself or by user checking
it's not a duplicate)
if (!$duplicateauthid or $confirm_not_duplicate) {
Index: misc/migration_tools/rebuild_nozebra.pl
===================================================================
RCS file: /sources/koha/koha/misc/migration_tools/rebuild_nozebra.pl,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -b -r1.4 -r1.5
--- misc/migration_tools/rebuild_nozebra.pl 4 May 2007 16:24:09 -0000
1.4
+++ misc/migration_tools/rebuild_nozebra.pl 10 May 2007 14:45:15 -0000
1.5
@@ -34,18 +34,15 @@
my $dbh=C4::Context->dbh;
$dbh->do("update systempreferences set value=1 where variable='NoZebra'");
$dbh->do("CREATE TABLE `nozebra` (
- `indexname` varchar(40) character set latin1 NOT NULL,
- `value` varchar(250) character set latin1 NOT NULL,
- `biblionumbers` longtext character set latin1 NOT NULL,
- KEY `indexname` (`indexname`),
- KEY `value` (`value`))
+ `server` varchar(20) NOT NULL,
+ `indexname` varchar(40) NOT NULL,
+ `value` varchar(250) NOT NULL,
+ `biblionumbers` longtext NOT NULL,
+ KEY `indexname` (`server`,`indexname`),
+ KEY `value` (`server`,`value`))
ENGINE=InnoDB DEFAULT CHARSET=utf8");
+
$dbh->do("truncate nozebra");
-my $sth;
-$sth=$dbh->prepare("select biblionumber from biblioitems order by biblionumber
$limit");
-$sth->execute();
-my $i=0;
-my %result;
my %index = GetNoZebraIndexes();
@@ -57,11 +54,11 @@
'issn' => '011a',
'biblionumber' =>'0909',
'itemtype' => '200b',
- 'language' => '010a',
+ 'language' => '101a',
'publisher' => '210x',
'date' => '210d',
'note' =>
'300a,301a,302a,303a,304a,305a,306az,307a,308a,309a,310a,311a,312a,313a,314a,315a,316a,317a,318a,319a,320a,321a,322a,323a,324a,325a,326a,327a,328a,330a,332a,333a,336a,337a,345a',
- 'Koha-Auth-Number' => '6009,6019,6029,6039,6049,6059,6069,6109',
+ 'Koha-Auth-Number' =>
'6009,6019,6029,6039,6049,6059,6069,6109,7009,7019,7029,7109,7119,7129',
'subject' => '600*,601*,606*,610*',
'dewey' => '676a',
'host-item' => '995a,995c',\" where variable='NoZebraIndexes'");
@@ -71,6 +68,15 @@
}
}
$|=1;
+
+print "***********************************\n";
+print "***** building BIBLIO indexes *****\n";
+print "***********************************\n";
+my $sth;
+$sth=$dbh->prepare("select biblionumber from biblioitems order by biblionumber
$limit");
+$sth->execute();
+my $i=0;
+my %result;
while (my ($biblionumber) = $sth->fetchrow) {
$i++;
print "\r$i";
@@ -99,7 +105,7 @@
$indexed=1;
my $line= lc $subfield->[1];
# remove meaningless value in the field...
- $line =~
s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=/ /g;
+ $line =~
s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g;
# ... and split in words
foreach (split / /,$line) {
next unless $_; # skip empty values (multiple spaces)
@@ -135,7 +141,97 @@
}
}
}
-my $sth = $dbh->prepare("INSERT INTO nozebra (indexname,value,biblionumbers)
VALUES (?,?,?)");
+my $sth = $dbh->prepare("INSERT INTO nozebra
(server,indexname,value,biblionumbers) VALUES ('biblioserver',?,?,?)");
+foreach my $key (keys %result) {
+ foreach my $index (keys %{$result{$key}}) {
+ if (length($result{$key}->{$index}) > 1000000) {
+ print "very long index (".length($result{$key}->{$index}).")for
$key / $index. update mySQL config file if you have an error just after this
warning (max_paquet_size parameter)\n";
+ }
+ $sth->execute($key,$index,$result{$key}->{$index});
+ }
+}
+
+print "\n***********************************\n";
+print "***** building AUTHORITIES indexes *****\n";
+print "***********************************\n";
+
+my $sth;
+$sth=$dbh->prepare("select authid from auth_header order by authid $limit");
+$sth->execute();
+my $i=0;
+my %result;
+while (my ($authid) = $sth->fetchrow) {
+ $i++;
+ print "\r$i";
+ my $record = GetAuthority($authid);
+
+ my %index;
+ # for authorities, the "title" is the $a mainentry
+ my $authref = C4::AuthoritiesMarc::GetAuthType($record->subfield(152,'b'));
+ use Data::Dumper;
+# warn "for $authid / ".$record->as_formatted. "Dumper :
".Dumper($authref);
+ warn "ERROR : authtype undefined for ".$record->as_formatted unless
$authref;
+ my $title = $record->subfield($authref->{auth_tag_to_report},'a');
+ $index{'mainmainentry'}= $authref->{'auth_tag_to_report'}.'a';
+ $index{'mainentry'} = $authref->{'auth_tag_to_report'}.'*';
+ $index{'auth_type'} = '152b';
+
+ # remove blancks comma (that could cause problem when decoding the string
for CQL retrieval) and regexp specific values
+ $title =~ s/ |,|;|\[|\]|\(|\)|\*|-|'|=//g;
+ # limit to 10 char, should be enough, and limit the DB size
+ $title = substr($title,0,10);
+ #parse each field
+ foreach my $field ($record->fields()) {
+ #parse each subfield
+ next if $field->tag <10;
+ foreach my $subfield ($field->subfields()) {
+ my $tag = $field->tag();
+ my $subfieldcode = $subfield->[0];
+ my $indexed=0;
+ # check each index to see if the subfield is stored somewhere
+ # otherwise, store it in __RAW__ index
+ foreach my $key (keys %index) {
+ if ($index{$key} =~ /$tag\*/ or $index{$key} =~
/$tag$subfieldcode/) {
+ $indexed=1;
+ my $line= lc $subfield->[1];
+ # remove meaningless value in the field...
+ $line =~
s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g;
+ # ... and split in words
+ foreach (split / /,$line) {
+ next unless $_; # skip empty values (multiple spaces)
+ # if the entry is already here, improve weight
+ if ($result{$key}->{$_} =~ /$authid,$title\-(\d);/) {
+ my $weight=$1+1;
+ $result{$key}->{$_} =~ s/$authid,$title\-(\d);//;
+ $result{$key}->{$_} .= "$authid,$title-$weight;";
+ # otherwise, create it, with weight=1
+ } else {
+ $result{$key}->{$_}.="$authid,$title-1;";
+ }
+ }
+ }
+ }
+ # the subfield is not indexed, store it in __RAW__ index anyway
+ unless ($indexed) {
+ my $line= lc $subfield->[1];
+ $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=/
/g;
+ foreach (split / /,$line) {
+ next unless $_;
+# warn $record->as_formatted."$_ =>".$title;
+ if ($result{__RAW__}->{$_} =~ /$authid,$title\-(\d);/)
{
+ my $weight=$1+1;
+# $weight++;
+ $result{__RAW__}->{$_} =~
s/$authid,$title\-(\d);//;
+ $result{__RAW__}->{$_} .=
"$authid,$title-$weight;";
+ } else {
+ $result{__RAW__}->{$_}.="$authid,$title-1;";
+ }
+ }
+ }
+ }
+ }
+}
+my $sth = $dbh->prepare("INSERT INTO nozebra
(server,indexname,value,biblionumbers) VALUES ('authorityserver',?,?,?)");
foreach my $key (keys %result) {
foreach my $index (keys %{$result{$key}}) {
if (length($result{$key}->{$index}) > 1000000) {
Index: misc/migration_tools/rebuild_zebra.pl
===================================================================
RCS file: /sources/koha/koha/misc/migration_tools/rebuild_zebra.pl,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -b -r1.7 -r1.8
--- misc/migration_tools/rebuild_zebra.pl 17 Apr 2007 08:50:33 -0000
1.7
+++ misc/migration_tools/rebuild_zebra.pl 10 May 2007 14:45:15 -0000
1.8
@@ -14,7 +14,7 @@
$|=1; # flushes output
# limit for database dumping
-my $limit = "LIMIT 500";
+my $limit;# = "LIMIT 500";
my $directory;
my $skip_export;
my $keep_export;
Index: opac/opac-rss.pl
===================================================================
RCS file: /sources/koha/koha/opac/opac-rss.pl,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -b -r1.2 -r1.3
--- opac/opac-rss.pl 9 May 2007 10:18:11 -0000 1.2
+++ opac/opac-rss.pl 10 May 2007 14:45:15 -0000 1.3
@@ -68,7 +68,7 @@
$query =~ s/:/=/g;
# the number of lines to retrieve
-my $size=$cgi->param('size') || 20;
+my $size=$cgi->param('size') || 50;
# the filename of the cached rdf file.
my $filename = md5_base64($query);
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Koha-cvs] koha C4/AuthoritiesMarc.pm C4/Biblio.pm C4/Sear...,
paul poulain <=