Masato Kikuhara
en-sf****@users*****
2005年 6月 30日 (木) 02:38:51 JST
Index: affelio/apps/diary/extlib/XML/RSS/LP.pm diff -u /dev/null affelio/apps/diary/extlib/XML/RSS/LP.pm:1.1 --- /dev/null Thu Jun 30 02:38:50 2005 +++ affelio/apps/diary/extlib/XML/RSS/LP.pm Thu Jun 30 02:38:50 2005 @@ -0,0 +1,272 @@ +package XML::RSS::LP; + +# XML::RSS:LP +# +# A liberal parser for RSS Feeds. +# +# Copyright (c) 2002 Timothy Appnel +# http://tima.mplode.com/ +# +# This software is provided 'as-is', without any express or implied +# warranty. In no event will the authors be held liable for any damages +# arising from the use of this software. +# +# Permission is granted to anyone to use this software for any purpose, +# including commercial applications, and to alter it and redistribute it +# freely, subject to the following restrictions: +# +# 1. The origin of this software must not be misrepresented; you must not +# claim that you wrote the original software. If you use this software +# in a product, an acknowledgment in the product documentation would be +# appreciated but is not required. +# 2. Altered source versions must be plainly marked as such, and must not be +# misrepresented as being the original software. +# 3. This notice may not be removed or altered from any source distribution. +# +# +# Requires the XML::Parser or XML::Parser::Lite. +# +# Usage +# ----- +# +# use XML::RSS::LP; +# +# my $rss = new XML::RSS::LP; +# $rss->parsefile('foo.rss'); +# print "$rss->{channel}->{title}\n"; +# foreach (@{ $rss->{items} }) { +# print "$_->{title} [$_->{link}]\n"; +# } +# +# +# Changelog +# --------- +# 0.1 September 02 2002 +# - Initial Release +# +# 0.2 October 30 2002 +# - Borrowing heavily from XML::Parser::Expat improved and proper namespace handling including... +# - namespace, eq_name, generate_ns_name, expand_prefix, current_ns_prefixes routines added. +# - Added tag_name and namespace_prefix utility routines. +# - Added namespace parsing and store code to start handler +# - Modified character handler to take advantage of namespace awareness +# +# +# Implementation Notes +# -------------------- +# Originally inspired by my own work developing a homegrown RSS feed aggregator with +# Blagg [] and Mark Pilgrim's writing []. +# this package is similar in function to the parsing capabilities of XML::RSS, but +# more lightweight and liberal in its enforcement of RSS feeds. XML::RSS:LP is not +# "ultra" liberal -- it still requires feeds to be well-formed XML. Utilizes +# XML::Parser, but falls back on XML:Parser:Lite if not available. +# +# * requires RSS feed to be well-formed XML but... +# * does not validate or force conformance to a specific RSS spec thereby... +# * allowing mixes of .9x and 1.0 tags and 1.0 modules +# * allowing for unknown and made-up tags. +# * implements very "loose" namespace functionality. +# * ignores skiphours, skipdays, textinput. (did i miss any defined others?) +# + +sub xmlparser { + my $self = shift; + return eval { require XML::Parser; XML::Parser->new } || + eval { require XML::Parser::Lite; XML::Parser::Lite->new } || + die 'XML::Parser is not available'; +} + +sub parser { + my $self = shift->new; + @_ ? ($self->{'_parser'} = shift, return $self) : return ($self->{'_parser'} ||= $self->xmlparser); +} + +sub new { + my $self = shift; + my $class = ref($self) || $self; + return $self if ref $self; # ???? + return bless {_parser => shift} => $class; +} + +sub init { } + +sub parse { + my $self = shift; + $self->parser->setHandlers( + Init => sub { shift; $self->init_handler(@_) }, + Final => sub { shift; $self->final_handler(@_) }, + Start => sub { shift; $self->start_handler(@_) }, + End => sub { shift; $self->end_handler(@_) }, + Char => sub { shift; $self->char_handler(@_) } + ); + $self->{_unparsed}=$_[0]; + $self->parser->parse($_[0]); +} + +sub parsefile { + my $self=shift; + my $file=shift; + open(FH, "< $file"); + my $contents=join('',<FH>); + close(FH); + $self->parse($contents); +} + +sub as_string { shift->{_unparsed} } + +sub init_handler { undef shift->{_values}; } #inits vars. others? item_num? _done? + +sub final_handler { shift->{_done} } + +sub start_handler { + my $self=shift; +#warn "start_handler $self\n"; + my $element=shift; + my $flag; + + # store namespaces mapping + while (@_) { + my($prefix,$url) = splice(@_, 0, 2); + if (substr($prefix,0,5) eq 'xmlns') { + $prefix=length($prefix)>5?substr($prefix,6):'#default'; +#warn "$prefix, $url\n"; + my $stack = $self->{prefix_table}->{$prefix}; + if (defined $stack) { push(@$stack, $url); } + else { + $self->{prefix_table}->{$prefix} = [$url]; # why an array? + push(@{$self->{namespace_list}},$url); + } + $flag=1; + } + } + + #generate namespace_list if new namespaces where taken in. + if ($flag) { + my $count; + foreach (@{ $self->{namespace_list} }) { + $count++; + $self->{namespace_table}->{$_}=$count; }; + $flag=0; + } + + # namespace qualify element if necessary. + if ($self->{prefix_table}) { + if ($self->namespace_prefix($element)) { + $element=$self->generate_ns_name($self->tag_name($element),$self->expand_ns_prefix($self->namespace_prefix($element))); + } + elsif ($self->{prefix_table}->{'#default'}) { + $element=$self->generate_ns_name($element,$self->expand_ns_prefix('#default')); + } + } + push @{$self->{_values}}, [$element, {@_}]; + $self->{num_items}++ if ($self->eq_name($element,$self->generate_ns_name('item',$self->expand_ns_prefix('#default')))); # can combine with the above line. +} + +sub char_handler { + my $self= shift; + my $cdata =shift; +#warn "start char handler\n"; + + if ($self->within_element($self->generate_ns_name('image',$self->expand_ns_prefix('#default')))) { + $self->{'image'}->{$self->current_element} .= $cdata; + } elsif ($self->within_element($self->generate_ns_name('item',$self->expand_ns_prefix('#default')))) { + $self->{'items'}->[$self->{num_items}-1]->{$self->current_element} .= $cdata; + } elsif ($self->within_element($self->generate_ns_name('channel',$self->expand_ns_prefix('#default')))) { + $self->{'channel'}->{$self->current_element} .= $cdata; + } +} + +sub end_handler { + my $self = shift; + my $done = pop @{$self->{_values}}; + $self->{_done}=$done unless(@{$self->{_values}}); +} + +sub within_element { + my $self=shift; + my $element=shift; + my $count=0; + foreach (@{ $self->{_values} }) { $count++ if $_->[0] eq $element; } + $count-- unless ($self->current_element ne $element); # SHOULD THIS SUB COUNT THE CURRENT ITEM? GUESSING NO. + return $count; +} + +sub current_element { return shift->{_values}->[-1]->[0] } #if defined. + +# Namespace routines. + +sub namespace { # assuming tag with prefix handed in. + my $self = shift; + my $prefix = shift; + if ($self->{prefix_table}) { + if ($self->namespace_prefix($prefix)) { return $self->expand_ns_prefix($self->namespace_prefix($prefix)); } + else { return $self->expand_ns_prefix('#default') } + } + return undef; +} + +sub eq_name { # unlike Expat it will take in an array of namespace qualified or prefixed elements to compare. + my $self = shift; + my $count=0; + my $compare; + foreach (@_) { + my $nsq_name=$_; + if ($self->namespace_prefix($_)) { $nsq_name=$self->generate_ns_name($self->tag_name($_),$self->namespace($self->namespace_prefix($_))); } + elsif ($self->{prefix_table}->{'#default'}) { $nsq_name=$self->generate_ns_name($_,'#default'); } + (defined $compare && ($nsq_name ne $compare)) ? return 0 : 1; + $compare=$nsq_name; + $count++; + } + return $count; +} + +sub generate_ns_name { + my ($self, $name, $namespace) = @_; + if ($self->{namespace_table}->{$namespace}) { + $namespace .= '/' unless $namespace=~/\/$/; # necessary? + return $namespace .$name; + } + else { return $name; } +} + +sub expand_ns_prefix { + my $self = shift; + my $prefix = shift; + my $stack = $self->{prefix_table}->{$prefix}; + return (defined($stack) and @$stack) ? $stack->[-1] : ''; # switched to null string from undef +} + +sub current_ns_prefixes { + my $self = shift; + my %set = %{ $self->{prefix_table} }; + if (exists $set{'#default'} and not defined($set{'#default'}->[-1])) { + delete $set{'#default'}; + } + return keys %set; +} + +# Namespace utility subroutines. + +sub tag_name { + shift; + my($prefix,$name)=split(/:/,shift); + return $name || $prefix; +} + +sub namespace_prefix { + shift; + my($prefix,$name)=split(/:/,shift); + return $name?$prefix:undef; +} + + +# Would be helpful if in a streamed parser. +# +# sub context { return @{ shift->{_values} } } #returns array in context. +# sub in_element { return shift->{_values}->[-1] eq shift } +# sub depth { return int(@{ shift->{_values} }) } #Returns the size of the context list. + +#sub xml_decode { } +# NG LP.pm: Override handlers? + +1;