1: #!/usr/local/bin/perl 2: # Copyright (c) 1998 Robert Braddock. All rights reserved. 3: # This program is free software; you can redistribute it and/or 4: # modify it under the same terms as Perl itself. 6: package SGML::ElementMap::Driver::Grove; 7: use strict; 9: use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION $REVISION); BEGIN { 10: @ISA = qw( SGML::ElementMap::Driver ); 11: @EXPORT = (); 12: @EXPORT_OK = (); 13: %EXPORT_TAGS = (); 14: $VERSION = 0.1; 15: $REVISION = q$Revision: 1.6 $;# $REVISION =~ s/^\s*\w*:\s*(.*?)\s*$/$1/; 16: } 17: use SGML::ElementMap::Driver (); 18: use Exporter; 21: BEGIN { 22: SGML::ElementMap::Driver::set_obj_constants('IDX',&SGML::ElementMap::Driver::IDX_LAST(), qw(context on_next current_element)); 23: sub IDX_parser () { return &SGML::ElementMap::Driver::IDX_parser(); } 24: sub IDX_input_mode () { return &SGML::ElementMap::Driver::IDX_input_mode(); } 25: sub IDX_markup_mode () { return &SGML::ElementMap::Driver::IDX_markup_mode(); } 26: } 29: use Carp; 30: use Data::Grove::Visitor; 31: use XML::Grove::Builder (); 32: use XML::Parser::PerlSAX (); 33: use XML::ESISParser (); 35: my $debug = 0; 36: my $debug_visit = 0; 38: if (!caller()) { die "$0: perl module, not program\n"; } 39: #else { warn __PACKAGE__." V".$VERSION." R".$REVISION." loaded from ". 40: # join(', ', ((caller(0))[1,2]))."\n"; } 42: # execute transformation on a grove object 43: sub process_grove { 44: my ($self,$main,$grove,@extra) = @_; 45: @{ $self->[&IDX_context()] } = (), 46: %{ $self->[&IDX_on_next()] } = (), 47: $self->[&IDX_current_element()] = $grove; 48: return $self->dispatch_subtrees($main,'',@extra); 49: } 51: # execute transformation on a handle or file 52: sub process { 53: my ($self,$main,$source,@extra) = @_; 54: my $grove = eval { 55: my $input_type = $self->[&IDX_input_mode()]; 56: if (!$input_type || $input_type eq 'default') { # infer type 57: if (ref($source) eq 'SCALAR') { 58: $input_type = 'literal'; 59: } elsif (ref $source) { 60: $input_type = 'handle'; 61: } else { 62: $input_type = 'file'; 63: } 64: } 65: my %source; 66: if ($input_type eq 'handle') { 67: %source = ('ByteStream', $source); 68: } elsif ($input_type eq 'literal' || $input_type eq 'string') { 69: %source = ('String', $source); 70: } elsif ($input_type eq 'file' || $input_type eq 'path') { 71: %source = ('SystemId', $source); 72: } else { 73: die "unknown input type '".$input_type."'"; 74: } 76: if (! $self->[&IDX_parser()]) { # make default parser object 77: if ($self->[&IDX_markup_mode()] eq 'sgml') { 78: $self->parser(XML::ESISParser->new ( IsSGML=>1 )); 79: } else { 80: #if ($input_type eq 'esis') { 81: # $self->parser(XML::ESISParser->new ( IsSGML=>0 )); 82: #} else { 83: $self->parser(XML::Parser::PerlSAX->new()); 84: #} 85: } 86: } 87: die 'failed to instantiate parser' unless defined $self->[&IDX_parser()]; 88: $self->[&IDX_parser()]->parse ( Source => \%source ); 89: }; 90: die "parsing failed: ".$@ unless defined $grove; 91: return $self->process_grove($main,$grove,@extra); 92: } 94: # set and retrieve the current parser object 95: sub parser { 96: my ($self,$parser) = @_; 97: if (defined $parser) { 98: #use XML::Grove::Builder; 99: my $builder = XML::Grove::Builder->new(); 100: $parser->{'Handler'} = $builder; 101: # delete other handlers, just in case? 102: } 103: return $self->SUPER::parser($parser); 104: } 106: #create empty object 107: sub new { 108: my $proto = shift; 109: my $class = ref($proto) || $proto; 110: my $self = $class->SUPER::new(@_); 111: return undef unless defined $self; 112: #die "failed to construct parent class object" 114: $self->[&IDX_context()] = []; 115: $self->[&IDX_on_next()] = {}; 117: $self = bless $self, $class; 118: return $self; 119: } 121: sub context_path { 122: my $self = shift; 123: return '/'.join '/', (map { 124: $_->{'Name'} || '' 125: } @{ $self->[&IDX_context()] }); 126: } 128: #reparent current event subtree 129: sub reparent_current_subtree { 130: my ($self,$psname,@extraargs) = @_; 131: my ($state,$element,$newelement,$parent,$saved_env,$ps_env,$cname, 132: @elcontent,%elattrs,%elflags); 133: 134: #get current object/element 135: $element = $self->[&IDX_current_element()]; 136: 137: #make pseudo element 138: $newelement = 139: XML::Grove::Element->new('Name'=>$psname, 140: 'Contents'=> [ $element ], 141: 'Attributes'=> { @extraargs } 142: ); 143: 144: #warn "DBG:context:reparent pseudo:".$newelement->{"Name"}. 145: # ":FROM:".$element->{"Name"}."\n" 146: # if SGML::ElementMap::$debug_elpath; 147: 148: $self->[&IDX_current_element()] = $newelement; 149: $self->[&IDX_on_next()]->{'repeat current'} = 1; 150: $self->[&IDX_on_next()]->{'hide current context'} = 1; 151: return 1; 152: } 154: #insert event over next event subtree 155: sub reparent_subtrees { 156: my ($self,$psname,@extraargs) = @_; 157: my ($state,$element,$newelement); 159: #get current object/element 160: $element = $self->[&IDX_current_element()]; 161: 162: #make pseudo element 163: $newelement = 164: XML::Grove::Element->new('Name'=>$psname, 165: 'Contents'=>$element->{'Contents'}, 166: 'Attributes'=>{ @extraargs } 167: ); 168: 169: #warn "DBG:context:insert pseudo:". 170: # $newelement->{"Name"}."\n" if $debug_elpath; 171: 172: $self->[&IDX_current_element()] = $newelement; 173: $self->[&IDX_on_next()]->{'repeat current'} = 1; 174: return 1; 175: } 177: #dispatch on next event subtree 178: sub dispatch_subtrees { 179: my ($self,$main,$pattern,@extra) = @_; 180: my ($element,$tmp,@result); 181: $element = $self->[&IDX_current_element()]; 182: if (!$element->isa("XML::Grove")) { 183: croak ref($self).":ERROR:content processing function called out of context"; 184: } 185: my %onetime = %{ $self->[&IDX_on_next()] }; 186: %{ $self->[&IDX_on_next()] } = (); 187: if (defined $onetime{'hide current context'}) { 188: $tmp = pop @{ $self->[&IDX_context()] }; 189: } 190: #here we enter the sub tree 191: if (defined $onetime{'repeat current'}) { 192: @result = $element->accept ($self,$main,'',@extra); 193: } else { 194: #@result = $element->children_accept ($self,$main, @extra); 195: my ($children,$c,@res); 196: $children = $element->{'Contents'}; 197: $c = $onetime{'start content at'} || 0; 198: for (; $c<@$children; $c+=1) { 199: @res = $children->[$c]->accept($self,$main,$pattern,@extra); 200: if (@res == 1 && !defined $res[0]) { # pattern did not match 201: $self->[&IDX_on_next()]->{'start content at'} = $c; 202: last; 203: } else { 204: push @result, @res; 205: } 206: } 207: } 208: #finished with subtree, do clean up 209: # WARNING: I am not certain if hide current content can interact with 210: # selecting partial content. Should test if cases not disjoint. 211: if (defined $onetime{'hide current context'}) { 212: push @{ $self->[&IDX_context()] }, $tmp; 213: } 214: return @result; 215: } 217: #skip over next event subtree 218: sub skip_subtrees { 219: my $self = shift; 220: %{ $self->[&IDX_on_next()] } = (); 221: return; 222: } 224: #common inner code for visitor calls 225: sub _dispatch { 226: my $self = shift; 227: my $main = shift; 228: push @{ $self->[&IDX_context()] }, $_[2]; #save the event object 229: my @results = $main->_dispatch_event(@_); 230: pop @{ $self->[&IDX_context()] }; 231: return @results; 232: } 234: ##### Visitor hooks 236: #Set up and start the transformation (not called in current code) 237: sub visit_document { 238: warn "DBG:VISIT:document\n" if $debug_visit; 239: my $self = shift; 240: my $grove = shift; 241: return $grove->children_accept ($self, @_); 242: } 244: sub visit_element { 245: warn "DBG:VISIT:element\n" if $debug_visit; 246: my ($self,$element,$main,$pattern) = splice @_, 0, 4, (); 247: my (@results,$saved); 248: #my $state = $$self[$st]; 249: #$element->{"_parent"} = $$state[&IDX_current_element()]; #set parent 250: #print STDERR "DBG:context:".$$state{'context environment'}.":".$element->{"Name"}."\n" if $debug_elpath; 251: if ($pattern ne '') { 252: return undef unless $element->{"Name"} =~ m/^$pattern$/; 253: } 254: $saved = $self->[&IDX_current_element()]; 255: $self->[&IDX_current_element()] = $element; 256: @results = $self->_dispatch($main,'Element',$element->{"Name"}, 257: $element,@_); 258: #delete $element->{"_parent"}; #break circular reference 259: $self->[&IDX_current_element()] = $saved; 260: return @results; 261: } 263: sub visit_entity { 264: warn "DBG:VISIT:entity (sdata)\n" if $debug_visit; 265: my ($self,$sdata,$main,$pattern) = splice @_, 0, 4, (); 266: return undef if $pattern ne ''; # patterns only match elements 267: return $self->_dispatch($main,'SData',$sdata->{'Name'},$sdata,@_); 268: } 270: sub visit_pi { 271: warn "DBG:VISIT:pi\n" if $debug_visit; 272: my ($self,$pi,$main,$pattern) = splice @_, 0, 4, (); 273: return undef if $pattern ne ''; # patterns only match elements 274: if (exists $pi->{'Target'}) { 275: return $self->_dispatch($main,'PI',$pi->{"Target"},$pi,@_); 276: } else { 277: return $self->_dispatch($main,'PI',$pi->{"Data"},$pi,@_); 278: } 279: } 281: sub visit_characters { 282: warn "DBG:VISIT:characters\n" if $debug_visit; 283: my ($self,$cdata,$main,$pattern) = splice @_, 0, 4, (); 284: return undef if $pattern ne ''; # patterns only match elements 285: #cdata has no keys 286: return $self->_dispatch($main,'CData','',$cdata,@_); 287: } 289: #I think characters has superceded this 290: sub visit_cdata { 291: warn "DBG:VISIT:cdata\n" if $debug_visit; 292: my ($self,$cdata,$main,$pattern) = splice @_, 0, 4, (); 293: return undef if $pattern ne ''; # patterns only match elements 294: #cdata has no keys 295: return $self->_dispatch($main,'CData','',$cdata,@_); 296: } 298: #can this be a content object? 299: sub visit_sgml_entity { 300: warn "DBG:VISIT:sgml_entity\n" if $debug_visit; 301: warn "DBG:VISIT:entity:untested\n" if $debug; 302: my ($self,$entity,$main,$pattern) = splice @_, 0, 4, (); 303: return undef if $pattern ne ''; # patterns only match elements 304: return $self->_dispatch($main,'Entity',$entity->{"Name"},$entity,@_); 305: } 307: sub visit_external_entity { 308: warn "DBG:VISIT:external_entity\n" if $debug_visit; 309: warn "DBG:VISIT:entity:untested\n" if $debug; 310: my ($self,$ext_entity,$main,$pattern) = splice @_, 0, 4, (); 311: return undef if $pattern ne ''; # patterns only match elements 312: return $self->_dispatch($main,'Entity',$ext_entity->{"Name"}, 313: $ext_entity,@_); 314: } 316: sub visit_subdoc_entity { 317: warn "DBG:VISIT:subdoc_entity\n" if $debug_visit; 318: warn "DBG:VISIT:entity:untested\n" if $debug; 319: my ($self,$subdoc_entity,$main,$pattern) = splice @_, 0, 4, (); 320: return undef if $pattern ne ''; # patterns only match elements 321: return $self->_dispatch($main,'SubDoc',$subdoc_entity->{"Name"}, 322: $subdoc_entity,@_); 323: } 325: sub visit_comment { 326: warn "DBG:VISIT:comment\n" if $debug_visit; 327: my ($self,$comment,$main,$pattern) = splice @_, 0, 4, (); 328: return undef if $pattern ne ''; # patterns only match elements 329: #no key for comments 330: return $self->_dispatch($main,'Comment','',$comment,@_); 331: } 333: #sub visit_ { 334: # warn "DBG:VISIT:\n" if $debug_visit; 335: # my $self = shift; 336: # warn "No internal support for '' objects" if $^W; 337: #} 339: sub visit_notation { 340: warn "DBG:VISIT:notation\n" if $debug_visit; 341: my $self = shift; 342: warn "No internal support for notation objects" if $^W; 343: return undef; 344: } 346: sub visit_subdoc { 347: warn "DBG:VISIT:subdoc\n" if $debug_visit; 348: my $self = shift; 349: warn "No internal support for subdoc objects" if $^W; 350: return undef; 351: } 354: #this becomes cdata, for now 355: #sub visit_scalar { 356: # warn "DBG:VISIT:scalar\n" if $debug_visit; 357: # my ($self) = shift; 358: # my ($scalar) = shift; 359: # $self->DispatchEvent('CData','',$scalar,@_); 360: #} 362: #does not catch visitor calls 363: #sub AUTOLOAD { 364: # my $self = shift; 365: # warn "DBG:missing method:$AUTOLOAD\n" if $debug; 366: # my $method = $AUTOLOAD; 367: # $method =~ s/.*:://; 368: # return if $method eq 'DESTROY'; 369: # 370: # print "UNRECOGNIZED $method\n"; 371: #} 373: 1;