[Affelio-cvs 7] CVS update: affelio/extlib/XML/Parser

Back to archive index

Tadashi Okoshi slash****@users*****
2005年 6月 17日 (金) 00:57:06 JST


Index: affelio/extlib/XML/Parser/Lite.pm
diff -u /dev/null affelio/extlib/XML/Parser/Lite.pm:1.1
--- /dev/null	Fri Jun 17 00:57:06 2005
+++ affelio/extlib/XML/Parser/Lite.pm	Fri Jun 17 00:57:05 2005
@@ -0,0 +1,202 @@
+# ======================================================================
+#
+# Copyright (C) 2000-2001 Paul Kulchenko (paulc****@yahoo*****)
+# SOAP::Lite is free software; you can redistribute it
+# and/or modify it under the same terms as Perl itself.
+#
+# $Id: Lite.pm,v 1.1 2005/06/16 15:57:05 slash5234 Exp $
+#
+# ======================================================================
+
+package XML::Parser::Lite;
+
+use strict;
+use vars qw($VERSION);
+$VERSION = sprintf("%d.%s", map {s/_//g; $_} q$Name:  $ =~ /-(\d+)_([\d_]+)/);
+
+sub new { 
+  my $self = shift;
+  my $class = ref($self) || $self;
+  return $self if ref $self;
+
+  $self = bless {} => $class;
+  my %parameters = @_;
+  $self->setHandlers(); # clear first 
+  $self->setHandlers(%{$parameters{Handlers} || {}});
+  return $self;
+}
+
+sub setHandlers {
+  my $self = shift; 
+  no strict 'refs'; local $^W;
+  # clear all handlers if called without parameters
+  unless (@_) { foreach (qw(Start End Char Final Init)) { *$_ = sub {} } }
+  while (@_) { my($name => $func) = splice(@_, 0, 2); *$name = defined $func ? $func : sub {} }
+  return $self;
+}
+
+sub regexp {
+  my $patch = shift || '';
+  my $package = __PACKAGE__;
+
+  # This parser is based on "shallow parser" http://www.cs.sfu.ca/~cameron/REX.html 
+
+  # Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions",
+  # Technical Report TR 1998-17, School of Computing Science, Simon Fraser University, November, 1998.
+  # Copyright (c) 1998, Robert D. Cameron. 
+  # The following code may be freely used and distributed provided that
+  # this copyright and citation notice remains intact and that modifications
+  # or additions are clearly identified.
+
+  my $TextSE = "[^<]+";
+  my $UntilHyphen = "[^-]*-";
+  my $Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-";
+  my $CommentCE = "$Until2Hyphens>?";
+  my $UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+";
+  my $CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>";
+  my $S = "[ \\n\\t\\r]+";
+  my $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]";
+  my $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]";
+  my $Name = "(?:$NameStrt)(?:$NameChar)*";
+  my $QuoteSE = "\"[^\"]*\"|'[^']*'";
+  my $DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*";
+  my $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>";
+  my $S1 = "[\\n\\r\\t ]";
+  my $UntilQMs = "[^?]*\\?+";
+  my $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>";
+  my $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S";
+  my $DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?";
+  my $DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?";
+  my $PI_CE = "$Name(?:$PI_Tail)?";
+
+  # these expressions were modified for backtracking and events
+  my $EndTagCE = "($Name)(?{${package}::end(\$2)})(?:$S)?>";
+  my $AttValSE = "\"([^<\"]*)\"|'([^<']*)'";
+  my $ElemTagCE = "($Name)(?:$S($Name)(?:$S)?=(?:$S)?(?:$AttValSE)(?{[\@{\$^R||[]},\$4=>defined\$5?\$5:\$6]}))*(?:$S)?(/)?>(?{${package}::start(\$3,\@{\$^R||[]})})(?{\${7} and ${package}::end(\$3)})";
+  my $MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)";
+
+  # Next expression is under "black magic".
+  # Ideally it should be '($TextSE)(?{${package}::char(\$1)})|$MarkupSPE',
+  # but it doesn't work under Perl 5.005 and only magic with
+  # (?:....)?? solved the problem. 
+  # I would appreciate if someone let me know what is the right thing to do 
+  # and what's the reason for all this magic. 
+  # Seems like a problem related to (?:....)? rather than to ?{} feature.
+  # Tests are in t/31-xmlparserlite.t if you decide to play with it.
+  "(?:($TextSE)(?{${package}::char(\$1)}))$patch|$MarkupSPE";
+}
+
+sub compile { local $^W; 
+  # try regexp as it should be, apply patch if doesn't work
+  foreach (regexp(), regexp('??')) {
+    eval qq{sub parse_re { use re "eval"; 1 while \$_[0] =~ m{$_}go }; 1} or die;
+    last if eval { parse_re('<foo>bar</foo>'); 1 }
+  };
+
+  *compile = sub {};
+}
+
+setHandlers();
+compile();
+
+sub parse { 
+  init(); 
+  parse_re($_[1]);
+  final(); 
+}
+
+my(@stack, $level);
+
+sub init { 
+  @stack = (); $level = 0;
+  Init(__PACKAGE__, @_);  
+}
+
+sub final { 
+  die "not properly closed tag '$stack[-1]'\n" if @stack;
+  die "no element found\n" unless $level;
+  Final(__PACKAGE__, @_) 
+} 
+
+sub start { 
+  die "multiple roots, wrong element '$_[0]'\n" if $level++ && !@stack;
+  push(@stack, $_[0]);
+  Start(__PACKAGE__, @_); 
+}
+
+sub char { 
+  Char(__PACKAGE__, $_[0]), return if @stack;
+
+  # check for junk before or after element
+  # can't use split or regexp due to limitations in ?{} implementation, 
+  # will iterate with loop, but we'll do it no more than two times, so
+  # it shouldn't affect performance
+  for (my $i=0; $i < length $_[0]; $i++) {
+    die "junk '$_[0]' @{[$level ? 'after' : 'before']} XML element\n"
+      if index("\n\r\t ", substr($_[0],$i,1)) < 0; # or should '< $[' be there
+  }
+}
+
+sub end { 
+  pop(@stack) eq $_[0] or die "mismatched tag '$_[0]'\n";
+  End(__PACKAGE__, $_[0]);
+}
+
+# ======================================================================
+
+1;
+
+__END__
+
+=head1 NAME
+
+XML::Parser::Lite - Lightweight regexp-based XML parser
+
+=head1 SYNOPSIS
+
+  use XML::Parser::Lite;
+  
+  $p1 = new XML::Parser::Lite;
+  $p1->setHandlers(
+    Start => sub { shift; print "start: @_\n" },
+    Char => sub { shift; print "char: @_\n" },
+    End => sub { shift; print "end: @_\n" },
+  );
+  $p1->parse('<foo id="me">Hello World!</foo>');
+
+  $p2 = new XML::Parser::Lite
+    Handlers => {
+      Start => sub { shift; print "start: @_\n" },
+      Char => sub { shift; print "char: @_\n" },
+      End => sub { shift; print "end: @_\n" },
+    }
+  ;
+  $p2->parse('<foo id="me">Hello <bar>cruel</bar> World!</foo>');
+
+=head1 DESCRIPTION
+
+This Perl module gives you access to XML parser with interface similar to
+XML::Parser interface. Though only basic calls are supported (init, final,
+start, char, and end) you should be able to use it in the same way you use
+XML::Parser. Due to using experimantal regexp features it'll work only on
+Perl 5.6 and may behave differently on different platforms.
+ 
+=head1 SEE ALSO
+
+ XML::Parser
+
+=head1 COPYRIGHT
+
+Copyright (C) 2000-2001 Paul Kulchenko. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+This parser is based on "shallow parser" http://www.cs.sfu.ca/~cameron/REX.html
+Copyright (c) 1998, Robert D. Cameron.
+
+=head1 AUTHOR
+
+Paul Kulchenko (paulc****@yahoo*****)
+
+=cut


Affelio-cvs メーリングリストの案内
Back to archive index