[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Koha-cvs] CVS: koha/misc/translator TmplTokenizer.pm,1.5,1.6 text-extra
From: |
Ambrose Li |
Subject: |
[Koha-cvs] CVS: koha/misc/translator TmplTokenizer.pm,1.5,1.6 text-extract2.pl,1.38,1.39 |
Date: |
Mon, 16 Feb 2004 21:07:06 -0800 |
Update of /cvsroot/koha/koha/misc/translator
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30220
Modified Files:
TmplTokenizer.pm text-extract2.pl
Log Message:
Converted TmplTokenizer into a class. Everything still seems ok, but it is
not tested thoroughly.
Index: TmplTokenizer.pm
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/TmplTokenizer.pm,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -r1.5 -r1.6
*** TmplTokenizer.pm 17 Feb 2004 03:17:48 -0000 1.5
--- TmplTokenizer.pm 17 Feb 2004 05:07:04 -0000 1.6
***************
*** 13,17 ****
=head1 NAME
! TmplTokenizer.pm - Simple-minded tokenizer for HTML::Template .tmpl files
=head1 DESCRIPTION
--- 13,17 ----
=head1 NAME
! TmplTokenizer.pm - Simple-minded tokenizer class for HTML::Template .tmpl
files
=head1 DESCRIPTION
***************
*** 32,45 ****
###############################################################################
! $VERSION = 0.01;
@ISA = qw(Exporter);
@EXPORT_OK = qw();
- use vars qw( $input );
- use vars qw( $debug_dump_only_p );
use vars qw( $pedantic_attribute_error_in_nonpedantic_mode_p );
use vars qw( $pedantic_tmpl_var_use_in_nonpedantic_mode_p );
- use vars qw( $fatal_p );
###############################################################################
--- 32,42 ----
###############################################################################
! $VERSION = 0.02;
@ISA = qw(Exporter);
@EXPORT_OK = qw();
use vars qw( $pedantic_attribute_error_in_nonpedantic_mode_p );
use vars qw( $pedantic_tmpl_var_use_in_nonpedantic_mode_p );
###############################################################################
***************
*** 85,106 ****
# End of the hideous stuff
! use vars qw( @readahead $lc_0 $lc $syntaxerror_p );
! use vars qw( $cdata_mode_p $cdata_close );
###############################################################################
! # Easy accessors
! sub fatal_p () {
! return $fatal_p;
}
! sub syntaxerror_p () {
! return $syntaxerror_p;
}
###############################################################################
! sub extract_attributes ($;$) {
my($s, $lc) = @_;
my %attr;
--- 82,234 ----
# End of the hideous stuff
! use vars qw( $serial );
###############################################################################
! sub FATAL_P () {'fatal-p'}
! sub SYNTAXERROR_P () {'syntaxerror-p'}
! sub FILENAME () {'input'}
! sub HANDLE () {'handle'}
!
! sub READAHEAD () {'readahead'}
! sub LINENUM_START () {'lc_0'}
! sub LINENUM () {'lc'}
! sub CDATA_MODE_P () {'cdata-mode-p'}
! sub CDATA_CLOSE () {'cdata-close'}
!
! sub new {
! my $this = shift;
! my($input) = @_;
! my $class = ref($this) || $this;
! my $self = {};
! bless $self, $class;
!
! my $handle = sprintf('TMPLTOKENIZER%d', $serial);
! $serial += 1;
!
! no strict;
! open($handle, "<$input") || die "$input: $!\n";
! use strict;
! $self->{+FILENAME} = $input;
! $self->{+HANDLE} = $handle;
! $self->{+READAHEAD} = [];
! return $self;
! }
!
!
###############################################################################
!
! # Simple getters
!
! sub _handle {
! my $this = shift;
! return $this->{+HANDLE};
! }
!
! sub fatal_p {
! my $this = shift;
! return $this->{+FATAL_P};
! }
!
! sub syntaxerror_p {
! my $this = shift;
! return $this->{+SYNTAXERROR_P};
! }
!
! sub has_readahead_p {
! my $this = shift;
! return @{$this->{+READAHEAD}};
! }
!
! sub _peek_readahead {
! my $this = shift;
! return $this->{+READAHEAD}->[$#{$this->{+READAHEAD}}];
}
! sub line_number_start {
! my $this = shift;
! return $this->{+LINENUM_START};
! }
!
! sub line_number {
! my $this = shift;
! return $this->{+LINENUM};
! }
!
! sub cdata_mode_p {
! my $this = shift;
! return $this->{+CDATA_MODE_P};
! }
!
! sub cdata_close {
! my $this = shift;
! return $this->{+CDATA_CLOSE};
! }
!
! # Simple setters
!
! sub _set_fatal {
! my $this = shift;
! $this->{+FATAL_P} = $_[0];
! return $this;
! }
!
! sub _set_syntaxerror {
! my $this = shift;
! $this->{+SYNTAXERROR_P} = $_[0];
! return $this;
! }
!
! sub _push_readahead {
! my $this = shift;
! push @{$this->{+READAHEAD}}, $_[0];
! return $this;
! }
!
! sub _pop_readahead {
! my $this = shift;
! return pop @{$this->{+READAHEAD}};
! }
!
! sub _append_readahead {
! my $this = shift;
! $this->{+READAHEAD}->[$#{$this->{+READAHEAD}}] .= $_[0];
! return $this;
! }
!
! sub _set_readahead {
! my $this = shift;
! $this->{+READAHEAD}->[$#{$this->{+READAHEAD}}] = $_[0];
! return $this;
! }
!
! sub _increment_line_number {
! my $this = shift;
! $this->{+LINENUM} += 1;
! return $this;
! }
!
! sub _set_line_number_start {
! my $this = shift;
! $this->{+LINENUM_START} = $_[0];
! return $this;
! }
!
! sub _set_cdata_mode {
! my $this = shift;
! $this->{+CDATA_MODE_P} = $_[0];
! return $this;
! }
!
! sub _set_cdata_close {
! my $this = shift;
! $this->{+CDATA_CLOSE} = $_[0];
! return $this;
}
###############################################################################
! sub _extract_attributes ($;$) {
! my $this = shift;
my($s, $lc) = @_;
my %attr;
***************
*** 140,144 ****
error_normal("Completely confused while extracting attributes: $1",
$lc);
error_normal((scalar(split(/\n/, $s)) - 1) . " more line(s) not
shown.", undef);
! $fatal_p = 1;
} else {
warn_normal "Strange attribute syntax: $s\n", $lc;
--- 268,272 ----
error_normal("Completely confused while extracting attributes: $1",
$lc);
error_normal((scalar(split(/\n/, $s)) - 1) . " more line(s) not
shown.", undef);
! $this->_set_fatal( 1 );
} else {
warn_normal "Strange attribute syntax: $s\n", $lc;
***************
*** 148,198 ****
}
! sub next_token_internal (*) {
my($h) = @_;
my($it, $kind);
my $eof_p = 0;
! pop @readahead if @readahead && !ref $readahead[$#readahead]
! && !length $readahead[$#readahead];
! if (address@hidden) {
my $next = scalar <$h>;
$eof_p = !defined $next;
if (!$eof_p) {
! $lc += 1;
! push @readahead, $next;
}
}
! $lc_0 = $lc; # remember line number of first line
! if (@readahead && ref $readahead[$#readahead]) { # TmplToken object
! my $t = pop @readahead;
! ($it, $kind, local $lc) = ($t->string, $t->type, $t->line_number);
! } elsif ($eof_p && address@hidden) { # nothing left to do
;
! } elsif ($readahead[$#readahead] =~ /^\s+/s) { # whitespace
! ($kind, $it, $readahead[$#readahead]) = (TmplTokenType::TEXT, $&, $');
# FIXME the following (the [<\s] part) is an unreliable HACK :-(
! } elsif ($readahead[$#readahead] =~ /^(?:[^<]|<[<\s])+/s) { #
non-space normal text
! ($kind, $it, $readahead[$#readahead]) = (TmplTokenType::TEXT, $&, $');
! warn_normal "Unescaped < in $it\n", $lc_0
! if !$cdata_mode_p && $it =~ /</s;
} else { # tag/declaration/processing instruction
my $ok_p = 0;
! for (;;) {
! if ($cdata_mode_p) {
! if ($readahead[$#readahead] =~ /^$cdata_close/) {
! ($kind, $it, $readahead[$#readahead]) =
(TmplTokenType::TAG, $&, $');
$ok_p = 1;
} else {
! ($kind, $it) = (TmplTokenType::TEXT, pop @readahead);
$ok_p = 1;
}
! } elsif ($readahead[$#readahead] =~ /^$re_tag_compat/os) {
! ($kind, $it, $readahead[$#readahead]) = (TmplTokenType::TAG,
"$1>", $3);
$ok_p = 1;
! warn_normal "SGML \"closed start tag\" notation: $1<\n", $lc_0
if $2 eq '';
! } elsif ($readahead[$#readahead] =~ /^<!--(?:(?!-->).)*-->/s) {
! ($kind, $it, $readahead[$#readahead]) =
(TmplTokenType::COMMENT, $&, $');
$ok_p = 1;
! warn_normal "Syntax error in comment: $&\n", $lc_0;
! $syntaxerror_p = 1;
}
last if $ok_p;
--- 276,332 ----
}
! sub _next_token_internal {
! my $this = shift;
my($h) = @_;
my($it, $kind);
my $eof_p = 0;
! $this->_pop_readahead if $this->has_readahead_p
! && !ref $this->_peek_readahead
! && !length $this->_peek_readahead;
! if (!$this->has_readahead_p) {
my $next = scalar <$h>;
$eof_p = !defined $next;
if (!$eof_p) {
! $this->_increment_line_number;
! $this->_push_readahead( $next );
}
}
! $this->_set_line_number_start( $this->line_number ); # remember 1st line
num
! if ($this->has_readahead_p && ref $this->_peek_readahead) { #
TmplToken obj.
! ($it, $kind) = ($this->_pop_readahead, undef);
! } elsif ($eof_p && !$this->has_readahead_p) { # nothing left to do
;
! } elsif ($this->_peek_readahead =~ /^\s+/s) { # whitespace
! ($kind, $it) = (TmplTokenType::TEXT, $&);
! $this->_set_readahead( $' );
# FIXME the following (the [<\s] part) is an unreliable HACK :-(
! } elsif ($this->_peek_readahead =~ /^(?:[^<]|<[<\s])+/s) { #
non-space normal text
! ($kind, $it) = (TmplTokenType::TEXT, $&);
! $this->_set_readahead( $' );
! warn_normal "Unescaped < in $it\n", $this->line_number_start
! if !$this->cdata_mode_p && $it =~ /</s;
} else { # tag/declaration/processing instruction
my $ok_p = 0;
! for (my $cdata_close = $this->cdata_close;;) {
! if ($this->cdata_mode_p) {
! if ($this->_peek_readahead =~ /^$cdata_close/) {
! ($kind, $it) = (TmplTokenType::TAG, $&);
! $this->_set_readahead( $' );
$ok_p = 1;
} else {
! ($kind, $it) = (TmplTokenType::TEXT, $this->_pop_readahead);
$ok_p = 1;
}
! } elsif ($this->_peek_readahead =~ /^$re_tag_compat/os) {
! ($kind, $it) = (TmplTokenType::TAG, "$1>");
! $this->_set_readahead( $3 );
$ok_p = 1;
! warn_normal "SGML \"closed start tag\" notation: $1<\n",
$this->line_number_start if $2 eq '';
! } elsif ($this->_peek_readahead =~ /^<!--(?:(?!-->).)*-->/s) {
! ($kind, $it) = (TmplTokenType::COMMENT, $&);
! $this->_set_readahead( $' );
$ok_p = 1;
! warn_normal "Syntax error in comment: $&\n",
$this->line_number_start;
! $this->_set_syntaxerror( 1 );
}
last if $ok_p;
***************
*** 200,205 ****
$eof_p = !defined $next;
last if $eof_p;
! $lc += 1;
! $readahead[$#readahead] .= $next;
}
if ($kind ne TmplTokenType::TAG) {
--- 334,339 ----
$eof_p = !defined $next;
last if $eof_p;
! $this->_increment_line_number;
! $this->_append_readahead( $next );
}
if ($kind ne TmplTokenType::TAG) {
***************
*** 211,252 ****
$kind = TmplTokenType::PI;
}
! if ($it =~ /^$re_directive/ios && !$cdata_mode_p) {
$kind = TmplTokenType::DIRECTIVE;
}
if (!$ok_p && $eof_p) {
! ($kind, $it, $readahead[$#readahead]) = (TmplTokenType::UNKNOWN,
$readahead[$#readahead], undef);
! $syntaxerror_p = 1;
}
}
! warn_normal "Unrecognizable token found: $it\n", $lc_0
if $kind eq TmplTokenType::UNKNOWN;
! return defined $it? TmplToken->new($it, $kind, $lc): undef;
}
! sub next_token (*) {
! my($h) = @_;
my $it;
! if (!$cdata_mode_p) {
! $it = next_token_internal($h);
if (defined $it && $it->type eq TmplTokenType::TAG) {
! ($cdata_mode_p, $cdata_close) = (1, "</$1\\s*>")
! if $it->string =~ /^<(script|style|textarea)\b/i;
! $it->set_attributes( extract_attributes($it->string, $lc_0) );
}
} else {
! for ($it = '';;) {
! my $lc_prev = $lc;
! my $next = next_token_internal($h);
last if !defined $next;
if (defined $next && $next->string =~ /$cdata_close/i) {
! push @readahead, $next; # push the entire TmplToken object
! #$lc = $lc_prev; XXX
! $cdata_mode_p = 0;
}
! last unless $cdata_mode_p;
$it .= $next->string;
}
! $it = TmplToken->new( $it, TmplTokenType::CDATA, $lc );
! $cdata_close = undef;
}
return $it;
--- 345,388 ----
$kind = TmplTokenType::PI;
}
! if ($it =~ /^$re_directive/ios && !$this->cdata_mode_p) {
$kind = TmplTokenType::DIRECTIVE;
}
if (!$ok_p && $eof_p) {
! ($kind, $it) = (TmplTokenType::UNKNOWN, $this->_peek_readahead);
! $this->_set_readahead, undef;
! $this->_set_syntaxerror( 1 );
}
}
! warn_normal "Unrecognizable token found: $it\n", $this->line_number_start
if $kind eq TmplTokenType::UNKNOWN;
! return defined $it? (ref $it? $it: TmplToken->new($it, $kind,
$this->line_number)): undef;
}
! sub next_token {
! my $this = shift;
! my $h = $this->_handle;
my $it;
! if (!$this->cdata_mode_p) {
! $it = $this->_next_token_internal($h);
if (defined $it && $it->type eq TmplTokenType::TAG) {
! if ($it->string =~ /^<(script|style|textarea)\b/i) {
! $this->_set_cdata_mode( 1 );
! $this->_set_cdata_close( "</$1\\s*>" );
! }
! $it->set_attributes( $this->_extract_attributes($it->string,
$it->line_number) );
}
} else {
! for ($it = '', my $cdata_close = $this->cdata_close;;) {
! my $next = $this->_next_token_internal($h);
last if !defined $next;
if (defined $next && $next->string =~ /$cdata_close/i) {
! $this->_push_readahead( $next ); # push entire TmplToken object
! $this->_set_cdata_mode( 0 );
}
! last unless $this->cdata_mode_p;
$it .= $next->string;
}
! $it = TmplToken->new( $it, TmplTokenType::CDATA, $this->line_number );
! $this->_set_cdata_close, undef;
}
return $it;
Index: text-extract2.pl
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/text-extract2.pl,v
retrieving revision 1.38
retrieving revision 1.39
diff -C2 -r1.38 -r1.39
*** text-extract2.pl 17 Feb 2004 03:02:39 -0000 1.38
--- text-extract2.pl 17 Feb 2004 05:07:04 -0000 1.39
***************
*** 26,30 ****
###############################################################################
! sub debug_dump (*) { # for testing only
my($h) = @_;
print "re_tag_compat is /", TmplTokenizer::re_tag(1), "/\n";
--- 26,30 ----
###############################################################################
! sub debug_dump ($) { # for testing only
my($h) = @_;
print "re_tag_compat is /", TmplTokenizer::re_tag(1), "/\n";
***************
*** 51,55 ****
###############################################################################
! sub text_extract (*) {
my($h) = @_;
my %text = ();
--- 51,55 ----
###############################################################################
! sub text_extract ($) {
my($h) = @_;
my %text = ();
***************
*** 125,133 ****
usage_error('Missing mandatory option -f') unless defined $input;
! open(INPUT, "<$input") || die "$0: $input: $!\n";
if ($debug_dump_only_p) {
! debug_dump(*INPUT);
} else {
! text_extract(*INPUT);
}
--- 125,133 ----
usage_error('Missing mandatory option -f') unless defined $input;
! my $h = TmplTokenizer->new( $input );
if ($debug_dump_only_p) {
! debug_dump( $h );
} else {
! text_extract( $h );
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Koha-cvs] CVS: koha/misc/translator TmplTokenizer.pm,1.5,1.6 text-extract2.pl,1.38,1.39,
Ambrose Li <=
- Prev by Date:
[Koha-cvs] CVS: koha/misc/translator TmplTokenizer.pm,1.4,1.5
- Next by Date:
[Koha-cvs] CVS: koha/misc/translator TmplTokenizer.pm,1.6,1.7 TmplToken.pm,1.1,1.2
- Previous by thread:
[Koha-cvs] CVS: koha/misc/translator TmplTokenizer.pm,1.4,1.5
- Next by thread:
[Koha-cvs] CVS: koha/misc/translator TmplTokenizer.pm,1.6,1.7 TmplToken.pm,1.1,1.2
- Index(es):