diff options
| author | Paul Buetow (pluto.buetow.org) <paul@buetow.org> | 2013-09-15 11:51:10 +0200 |
|---|---|---|
| committer | Paul Buetow (pluto.buetow.org) <paul@buetow.org> | 2013-09-15 11:51:10 +0200 |
| commit | 6aa12ae5f556ab884b7705379c41a566df86d028 (patch) | |
| tree | 85441b3a046f58970ebe9ad55460c16e4f4d3d01 /Xerl/XML/Reader.pm | |
| parent | c183faa4d53b6e4f091d6b38397847e55b5d2251 (diff) | |
temp remove includedirs tag, perltidy indention set to 2, initial XML::LibXML
Diffstat (limited to 'Xerl/XML/Reader.pm')
| -rw-r--r-- | Xerl/XML/Reader.pm | 282 |
1 files changed, 123 insertions, 159 deletions
diff --git a/Xerl/XML/Reader.pm b/Xerl/XML/Reader.pm index 2562fea..e31ef11 100644 --- a/Xerl/XML/Reader.pm +++ b/Xerl/XML/Reader.pm @@ -1,6 +1,6 @@ # Xerl (c) 2005-2011,2013 Dipl.-Inform. (FH) Paul C. Buetow # -# E-Mail: xerl@dev.buetow.org WWW: http://xerl.buetow.org +# E-Mail: xerl@dev.buetow.org WWW: http://xerl.buetow.org # # All rights reserved. # @@ -12,8 +12,8 @@ # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of buetow.org nor the names of its contributors may -# be used to endorse or promote products derived from this software -# without specific prior written permission. +# be used to endorse or promote products derived from this software +# without specific prior written permission. # # THIS SOFTWARE IS PROVIDED Paul C. Buetow ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED @@ -32,172 +32,136 @@ package Xerl::XML::Reader; use strict; use warnings; +use XML::LibXML; + use Xerl::Base; use Xerl::XML::Element; +sub newparse($) { + my Xerl::XML::Reader $self = shift; + + return undef; +} + sub open($) { - my Xerl::XML::Reader $self = $_[0]; + my Xerl::XML::Reader $self = $_[0]; - my Xerl::Tools::FileIO $xmlfile = - Xerl::Tools::FileIO->new( path => $self->get_path() ); + my Xerl::Tools::FileIO $xmlfile = + Xerl::Tools::FileIO->new( path => $self->get_path() ); - return -1 if -1 == $xmlfile->fslurp(); - $self->set_array( $xmlfile->get_array() ); + return -1 if -1 == $xmlfile->fslurp(); + $self->set_array( $xmlfile->get_array() ); - return 0; + return 0; } sub parse($) { - my Xerl::XML::Reader $self = $_[0]; - - my $rarray = $self->get_array(); - return $self unless ref $rarray eq 'ARRAY'; - - my Xerl::XML::Element $element = Xerl::XML::Element->new(); - my Xerl::XML::Element( $root, $next, $prev, $insert ); - - # Prove and remove XML Header. - Xerl::Main::Global::ERROR( 'No valid XML header', caller() ) - unless $rarray->[0] =~ s/<\?xml .*?version.+?\?>//io; - - my ( $newlineadd, $linecount, $notrim ) = ( 0, 0, 0 ); - - #for my $line (@$rarray) { - for my $line (@$rarray) { - $newlineadd = 1 if length $line == 1 and $linecount > 3; - ++$linecount; - - $line =~ s/\\</!!LT!!/g; - $line =~ s/\\>/!!GT!!/g; - - # Allow <tag /> - my $is_single_tag = $line =~ s#<([^/].+?)( (.*?))? ?/ *>#<$1 $3></$1>#o; - - my $flag = 0; - - do { - - # Open XML tag - if ( $line =~ s#<([^/].+?)( (.*?))? *>##o ) { - my ( $name, $params ) = ( $1, $3 ); - $flag = 1; - - # Ignore XML comments - next if $name =~ /^!--/o; - - $next = Xerl::XML::Element->new(); - $next->set_name($name); - $next->set_prev($element); - $next->set_single($is_single_tag); - - # Handle tag parameters - if ( defined $params ) { - my %params = $params =~ / - (?: ( [^\s]+? ) \s*=\s* ( - (?: '(?:.|(?:\\'))*?' ) | - (?: "(?:.|(?:\\"))*?" ) | - (?: [^\s]+ ) ) ) - /gox; - - # Remove " and ' - $params{$_} =~ s/^(?:"|')|(?:"|')$//go for keys %params; - $next->set_params( \%params ); - $notrim = 1 if exists $params{notrim}; - } - - $element->push_array($next); - - $root = $element unless defined $root; - $element = $next; - $insert = $element; - - redo; - } - - # Close XML tag - if ( $line =~ s#<(/.+?)>##o ) { - $flag = 1; - - #print "XML::<$1>\n"; - if ( $element->get_name() eq 'includefiles' ) { - my $config = $self->get_config(); - my $params = $element->get_params(); - my $path = - $config->get_hostpath() . 'content/' . $params->{reldir}; - my $pattern = $params->{pattern}; - my $maxitems = - exists $params->{maxitems} ? $params->{maxitems} : 100; - my $startindex = - exists $params->{startindex} ? $params->{startindex} : 0; - - my Xerl::Tools::FileIO $io = - Xerl::Tools::FileIO->new( path => $path ); - - $io->dslurp(); - $io->reverse_array() if exists $params->{reversed}; - - for - my $include ( grep { /$pattern/o } @{ $io->get_array() } ) - { - last unless $maxitems--; - next if 0 < $startindex--; - - my Xerl::XML::Reader $reader = Xerl::XML::Reader->new( - path => $include, - config => $config - ); - - if ( -1 == $reader->open() ) { - $config->set_finish_request(1); - return undef; - } - $reader->parse(); - - my Xerl::XML::Element $starttag = - $reader->get_root()->starttag('content'); - - my $sep = - exists $params->{separator} - ? $params->{separator} - : 'noop'; - $starttag->set_name($sep); - $element->set_name('noop'); - $element->push_array($starttag); - } - } - - $insert = $element; - $prev = $element->get_prev(); - $element = $prev if defined $prev; - $notrim = 0 if $notrim; - - redo; - } - - # XML text - if ( defined $insert - and $line =~ s/^( *)(.+?) *$/$notrim ? $1.$2 : $2/oe ) - { - - if ($newlineadd) { - $insert->append_text("\n"); - $newlineadd = 0; - } - - $line =~ s/!!LT!!/</g; - $line =~ s/!!GT!!/>/g; - - $insert->append_text($line); - } - } while ( $flag == 1 ); - } - - $root->set_name('root'); - - # $root->print(); - $self->set_root($root); - - return undef; + my Xerl::XML::Reader $self = $_[0]; + + $self->newparse( $self->get_path() ); + + my $rarray = $self->get_array(); + return $self unless ref $rarray eq 'ARRAY'; + + my Xerl::XML::Element $element = Xerl::XML::Element->new(); + my Xerl::XML::Element( $root, $next, $prev, $insert ); + + # Prove and remove XML Header. + Xerl::Main::Global::ERROR( 'No valid XML header', caller() ) + unless $rarray->[0] =~ s/<\?xml .*?version.+?\?>//io; + + my ( $newlineadd, $linecount, $notrim ) = ( 0, 0, 0 ); + + #for my $line (@$rarray) { + for my $line (@$rarray) { + $newlineadd = 1 if length $line == 1 and $linecount > 3; + ++$linecount; + + $line =~ s/\\</!!LT!!/g; + $line =~ s/\\>/!!GT!!/g; + + # Allow <tag /> + my $is_single_tag = $line =~ s#<([^/].+?)( (.*?))? ?/ *>#<$1 $3></$1>#o; + + my $flag = 0; + + do { + + # Open XML tag + if ( $line =~ s#<([^/].+?)( (.*?))? *>##o ) { + my ( $name, $params ) = ( $1, $3 ); + $flag = 1; + + # Ignore XML comments + next if $name =~ /^!--/o; + + $next = Xerl::XML::Element->new(); + $next->set_name($name); + $next->set_prev($element); + $next->set_single($is_single_tag); + + # Handle tag parameters + if ( defined $params ) { + my %params = $params =~ / + (?: ( [^\s]+? ) \s*=\s* ( + (?: '(?:.|(?:\\'))*?' ) | + (?: "(?:.|(?:\\"))*?" ) | + (?: [^\s]+ ) ) ) + /gox; + + # Remove " and ' + $params{$_} =~ s/^(?:"|')|(?:"|')$//go for keys %params; + $next->set_params( \%params ); + $notrim = 1 if exists $params{notrim}; + } + + $element->push_array($next); + + $root = $element unless defined $root; + $element = $next; + $insert = $element; + + redo; + } + + # Close XML tag + if ( $line =~ s#<(/.+?)>##o ) { + $flag = 1; + + #print "XML::<$1>\n"; + + $insert = $element; + $prev = $element->get_prev(); + $element = $prev if defined $prev; + $notrim = 0 if $notrim; + + redo; + } + + # XML text + if ( defined $insert + and $line =~ s/^( *)(.+?) *$/$notrim ? $1.$2 : $2/oe ) + { + + if ($newlineadd) { + $insert->append_text("\n"); + $newlineadd = 0; + } + + $line =~ s/!!LT!!/</g; + $line =~ s/!!GT!!/>/g; + + $insert->append_text($line); + } + } while ( $flag == 1 ); + } + + $root->set_name('root'); + + # $root->print(); + $self->set_root($root); + + return undef; } 1; |
