# XML::Parser # # Copyright (c) 1998-2000 Larry Wall and Clark Cooper # All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. package XML::Parser; use Carp; use IO::File; BEGIN { require XML::Parser::Expat; $VERSION = '2.30'; die "Parser.pm and Expat.pm versions don't match" unless $VERSION eq $XML::Parser::Expat::VERSION; } use strict; use vars qw($VERSION %Built_In_Styles $LWP_load_failed); $LWP_load_failed = 0; sub new { my ($class, %args) = @_; my $style = $args{Style}; my $nonexopt = $args{Non_Expat_Options} ||= {}; $nonexopt->{Style} = 1; $nonexopt->{Non_Expat_Options} = 1; $nonexopt->{Handlers} = 1; $nonexopt->{_HNDL_TYPES} = 1; $nonexopt->{NoLWP} = 1; $args{_HNDL_TYPES} = {%XML::Parser::Expat::Handler_Setters}; $args{_HNDL_TYPES}->{Init} = 1; $args{_HNDL_TYPES}->{Final} = 1; $args{Handlers} ||= {}; my $handlers = $args{Handlers}; if (defined($style)) { my $stylepkg = $style; if ($stylepkg !~ /::/) { $stylepkg = "\u$style"; # I'm using the Built_In_Styles hash to define # valid internal styles, since a style doesn't # need to define any particular Handler to be valid. # So I can't check for the existence of a particular sub. croak "Undefined style: $style" unless defined($Built_In_Styles{$stylepkg}); $stylepkg = 'XML::Parser::' . $stylepkg; } my $htype; foreach $htype (keys %{$args{_HNDL_TYPES}}) { # Handlers explicity given override # handlers from the Style package unless (defined($handlers->{$htype})) { # A handler in the style package must either have # exactly the right case as the type name or a # completely lower case version of it. my $hname = "${stylepkg}::$htype"; if (defined(&$hname)) { $handlers->{$htype} = \&$hname; next; } $hname = "${stylepkg}::\L$htype"; if (defined(&$hname)) { $handlers->{$htype} = \&$hname; next; } } } } unless (defined($handlers->{ExternEnt}) or defined ($handlers->{ExternEntFin})) { if ($args{NoLWP} or $LWP_load_failed) { $handlers->{ExternEnt} = \&file_ext_ent_handler; $handlers->{ExternEntFin} = \&file_ext_ent_cleanup; } else { # The following just bootstraps the real LWP external entity # handler $handlers->{ExternEnt} = \&initial_ext_ent_handler; # No cleanup function available until LWPExternEnt.pl loaded } } $args{Pkg} ||= caller; bless \%args, $class; } # End of new sub setHandlers { my ($self, @handler_pairs) = @_; croak("Uneven number of arguments to setHandlers method") if (int(@handler_pairs) & 1); my @ret; while (@handler_pairs) { my $type = shift @handler_pairs; my $handler = shift @handler_pairs; unless (defined($self->{_HNDL_TYPES}->{$type})) { my @types = sort keys %{$self->{_HNDL_TYPES}}; croak("Unknown Parser handler type: $type\n Valid types: @types"); } push(@ret, $type, $self->{Handlers}->{$type}); $self->{Handlers}->{$type} = $handler; } return @ret; } # End of setHandlers sub parse_start { my $self = shift; my @expat_options = (); my ($key, $val); while (($key, $val) = each %{$self}) { push (@expat_options, $key, $val) unless exists $self->{Non_Expat_Options}->{$key}; } my %handlers = %{$self->{Handlers}}; my $init = delete $handlers{Init}; my $final = delete $handlers{Final}; my $expatnb = new XML::Parser::ExpatNB(@expat_options, @_); $expatnb->setHandlers(%handlers); &$init($expatnb) if defined($init); $expatnb->{_State_} = 1; $expatnb->{FinalHandler} = $final if defined($final); return $expatnb; } sub parse { my $self = shift; my $arg = shift; my @expat_options = (); my ($key, $val); while (($key, $val) = each %{$self}) { push(@expat_options, $key, $val) unless exists $self->{Non_Expat_Options}->{$key}; } my $expat = new XML::Parser::Expat(@expat_options, @_); my %handlers = %{$self->{Handlers}}; my $init = delete $handlers{Init}; my $final = delete $handlers{Final}; $expat->setHandlers(%handlers); if ($self->{Base}) { $expat->base($self->{Base}); } &$init($expat) if defined($init); my @result = (); my $result; eval { $result = $expat->parse($arg); }; my $err = $@; if ($err) { $expat->release; die $err; } if ($result and defined($final)) { if (wantarray) { @result = &$final($expat); } else { $result = &$final($expat); } } $expat->release; return unless defined wantarray; return wantarray ? @result : $result; } # End of parse sub parsestring { my $self = shift; $self->parse(@_); } # End of parsestring sub parsefile { my $self = shift; my $file = shift; local(*FILE); open(FILE, $file) or croak "Couldn't open $file:\n$!"; binmode(FILE); my @ret; my $ret; $self->{Base} = $file; if (wantarray) { eval { @ret = $self->parse(*FILE, @_); }; } else { eval { $ret = $self->parse(*FILE, @_); }; } my $err = $@; close(FILE); die $err if $err; return unless defined wantarray; return wantarray ? @ret : $ret; } # End of parsefile sub initial_ext_ent_handler { # This just bootstraps in the real lwp_ext_ent_handler which # also loads the URI and LWP modules. unless ($LWP_load_failed) { local($^W) = 0; my $stat = eval { require('XML/Parser/LWPExternEnt.pl'); }; if ($stat) { $_[0]->setHandlers(ExternEnt => \&lwp_ext_ent_handler, ExternEntFin => \&lwp_ext_ent_cleanup); goto &lwp_ext_ent_handler; } # Failed to load lwp handler, act as if NoLWP $LWP_load_failed = 1; my $cmsg = "Couldn't load LWP based external entity handler\n"; $cmsg .= "Switching to file-based external entity handler\n"; $cmsg .= " (To avoid this message, use NoLWP option to XML::Parser)\n"; warn($cmsg); } $_[0]->setHandlers(ExternEnt => \&file_ext_ent_handler, ExternEntFin => \&file_ext_ent_cleanup); goto &file_ext_ent_handler; } # End initial_ext_ent_handler sub file_ext_ent_handler { my ($xp, $base, $path) = @_; # Prepend base only for relative paths if (defined($base) and not ($path =~ m!^(?:[\\/]|\w+:)!)) { my $newpath = $base; $newpath =~ s![^\\/:]*$!$path!; $path = $newpath; } if ($path =~ /^\s*[|>+]/ or $path =~ /\|\s*$/) { $xp->{ErrorMessage} .= "System ID ($path) contains Perl IO control characters"; return undef; } my $fh = new IO::File($path); unless (defined $fh) { $xp->{ErrorMessage} .= "Failed to open $path:\n$!"; return undef; } $xp->{_BaseStack} ||= []; $xp->{_FhStack} ||= []; push(@{$xp->{_BaseStack}}, $base); push(@{$xp->{_FhStack}}, $fh); $xp->base($path); return $fh; } # End file_ext_ent_handler sub file_ext_ent_cleanup { my ($xp) = @_; my $fh = pop(@{$xp->{_FhStack}}); $fh->close; my $base = pop(@{$xp->{_BaseStack}}); $xp->base($base); } # End file_ext_ent_cleanup ################################################################### package XML::Parser::Debug; $XML::Parser::Built_In_Styles{Debug} = 1; sub Start { my $expat = shift; my $tag = shift; print STDERR "@{$expat->{Context}} \\\\ (@_)\n"; } sub End { my $expat = shift; my $tag = shift; print STDERR "@{$expat->{Context}} //\n"; } sub Char { my $expat = shift; my $text = shift; $text =~ s/([\x80-\xff])/sprintf "#x%X;", ord $1/eg; $text =~ s/([\t\n])/sprintf "#%d;", ord $1/eg; print STDERR "@{$expat->{Context}} || $text\n"; } sub Proc { my $expat = shift; my $target = shift; my $text = shift; my @foo = @{$expat->{Context}}; print STDERR "@foo $target($text)\n"; } ################################################################### package XML::Parser::Subs; $XML::Parser::Built_In_Styles{Subs} = 1; sub Start { no strict 'refs'; my $expat = shift; my $tag = shift; my $sub = $expat->{Pkg} . "::$tag"; eval { &$sub($expat, $tag, @_) }; } sub End { no strict 'refs'; my $expat = shift; my $tag = shift; my $sub = $expat->{Pkg} . "::${tag}_"; eval { &$sub($expat, $tag) }; } ################################################################### package XML::Parser::Tree; $XML::Parser::Built_In_Styles{Tree} = 1; sub Init { my $expat = shift; $expat->{Lists} = []; $expat->{Curlist} = $expat->{Tree} = []; } sub Start { my $expat = shift; my $tag = shift; my $newlist = [ { @_ } ]; push @{ $expat->{Lists} }, $expat->{Curlist}; push @{ $expat->{Curlist} }, $tag => $newlist; $expat->{Curlist} = $newlist; } sub End { my $expat = shift; my $tag = shift; $expat->{Curlist} = pop @{ $expat->{Lists} }; } sub Char { my $expat = shift; my $text = shift; my $clist = $expat->{Curlist}; my $pos = $#$clist; if ($pos > 0 and $clist->[$pos - 1] eq '0') { $clist->[$pos] .= $text; } else { push @$clist, 0 => $text; } } sub Final { my $expat = shift; delete $expat->{Curlist}; delete $expat->{Lists}; $expat->{Tree}; } ################################################################### package XML::Parser::Objects; $XML::Parser::Built_In_Styles{Objects} = 1; sub Init { my $expat = shift; $expat->{Lists} = []; $expat->{Curlist} = $expat->{Tree} = []; } sub Start { my $expat = shift; my $tag = shift; my $newlist = [ ]; my $class = "${$expat}{Pkg}::$tag"; my $newobj = bless { @_, Kids => $newlist }, $class; push @{ $expat->{Lists} }, $expat->{Curlist}; push @{ $expat->{Curlist} }, $newobj; $expat->{Curlist} = $newlist; } sub End { my $expat = shift; my $tag = shift; $expat->{Curlist} = pop @{ $expat->{Lists} }; } sub Char { my $expat = shift; my $text = shift; my $class = "${$expat}{Pkg}::Characters"; my $clist = $expat->{Curlist}; my $pos = $#$clist; if ($pos >= 0 and ref($clist->[$pos]) eq $class) { $clist->[$pos]->{Text} .= $text; } else { push @$clist, bless { Text => $text }, $class; } } sub Final { my $expat = shift; delete $expat->{Curlist}; delete $expat->{Lists}; $expat->{Tree}; } ################################################################ package XML::Parser::Stream; $XML::Parser::Built_In_Styles{Stream} = 1; # This style invented by Tim Bray
sub Init { no strict 'refs'; my $expat = shift; $expat->{Text} = ''; my $sub = $expat->{Pkg} ."::StartDocument"; &$sub($expat) if defined(&$sub); } sub Start { no strict 'refs'; my $expat = shift; my $type = shift; doText($expat); $_ = "<$type"; %_ = @_; while (@_) { $_ .= ' ' . shift() . '="' . shift() . '"'; } $_ .= '>'; my $sub = $expat->{Pkg} . "::StartTag"; if (defined(&$sub)) { &$sub($expat, $type); } else { print; } } sub End { no strict 'refs'; my $expat = shift; my $type = shift; # Set right context for Text handler push(@{$expat->{Context}}, $type); doText($expat); pop(@{$expat->{Context}}); $_ = "$type>"; my $sub = $expat->{Pkg} . "::EndTag"; if (defined(&$sub)) { &$sub($expat, $type); } else { print; } } sub Char { my $expat = shift; $expat->{Text} .= shift; } sub Proc { no strict 'refs'; my $expat = shift; my $target = shift; my $text = shift; doText($expat); $_ = "$target $text?>"; my $sub = $expat->{Pkg} . "::PI"; if (defined(&$sub)) { &$sub($expat, $target, $text); } else { print; } } sub Final { no strict 'refs'; my $expat = shift; my $sub = $expat->{Pkg} . "::EndDocument"; &$sub($expat) if defined(&$sub); } sub doText { no strict 'refs'; my $expat = shift; $_ = $expat->{Text}; if (length($_)) { my $sub = $expat->{Pkg} . "::Text"; if (defined(&$sub)) { &$sub($expat); } else { print; } $expat->{Text} = ''; } } 1; __END__ =head1 NAME XML::Parser - A perl module for parsing XML documents =head1 SYNOPSIS use XML::Parser; $p1 = new XML::Parser(Style => 'Debug'); $p1->parsefile('REC-xml-19980210.xml'); $p1->parse('