1: #!/usr/local/bin/perl 2: # Copyright (c) 1998 Robert Braddock. All rights reserved. 3: # This program is free software; you can redistribute it and/or 4: # modify it under the same terms as Perl itself. 6: package SGML::ElementMap::Driver::EventQueue; 7: use strict; 9: use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION $REVISION); BEGIN { 10: @ISA = qw( SGML::ElementMap::Driver ); 11: @EXPORT = (); 12: @EXPORT_OK = (); 13: %EXPORT_TAGS = (); 14: $VERSION = 0.2; 15: $REVISION = q$Revision: 1.1 $; 16: } 17: use SGML::ElementMap::Driver (); 18: use Exporter; 20: use Carp; 22: my $debug = 0; 23: my $trace = 0; 24: my $trace_hnd = 0; 26: BEGIN { 27: SGML::ElementMap::Driver::set_obj_constants('IDX',&SGML::ElementMap::Driver::IDX_LAST(), qw(context current_context control_stack parse_queue)); 28: sub IDX_parser () { return &SGML::ElementMap::Driver::IDX_parser(); } 29: sub IDX_input_mode () { return &SGML::ElementMap::Driver::IDX_input_mode(); } 30: sub IDX_markup_mode () { return &SGML::ElementMap::Driver::IDX_markup_mode(); } 31: } 34: eval "use Data::Dumper; 35: $Data::Dumper::Indent = $Data::Dumper::Indent = 0; 36: $Data::Dumper::Terse = $Data::Dumper::Terse = 1;" if $debug; 38: if (!caller()) { die "$0: perl module, not program\n"; } 40: # create empty object 41: # no arguments means default parser setup 42: sub new { 43: my $proto = shift; 44: my $class = ref($proto) || $proto; 46: carp __PACKAGE__.' does not know what to do with arguments' if @_; 47: my $self = $class->SUPER::new(); 48: $self->[&IDX_context()] = ''; 49: $self->[&IDX_current_context()] = ''; 50: $self->[&IDX_control_stack()] = ''; 51: $self->[&IDX_parse_queue()] = ''; 52: 53: warn 'TRACE new('.__PACKAGE__.') from '.join(', line ',(caller())[1,2]). 54: "\n" if $trace; 55: 56: $self = bless $self,$class; 57: return $self; 58: } 61: #start new traversal (for use by users) 62: #passed main module as first (normal) argument, rest are process specific 63: sub process { 64: my ($self,$main,$source,@list) = @_; 65: 66: warn '--PROCESS from '.join(', line ',(caller())[1,2])."\n" if $trace; 67: 68: # init state vars 69: my (@control,@context,@queue) = ('','until close'); 70: $self->[&IDX_context()] = \@context; 71: $self->[&IDX_control_stack()] = \@control; 72: $self->[&IDX_parse_queue()] = \@queue; 73: # make an element-like object to use as outermost context 74: my %obj = ('Name' => '', 'Type' => 'Element'); 75: $self->[&IDX_current_context()] = \%obj; 76: 77: # enter the processing proper 78: my $es = eval { 79: @list = $self->dispatch_subtrees($main,'',@list); 80: #arn join("][",$queue[1]); 81: #if (@queue > 0 && $queue[1] eq 'conforming') { 1; } else { 0; } 82: 1; 83: }; 85: # remove unnecessary references 86: @{ $self }[&IDX_current_context(), &IDX_control_stack(), &IDX_context(), 87: &IDX_parse_queue()] = ( ('') x 10 ); 88: 89: die "parse failed: ".$@ unless defined $es; 90: #croak "final event not found:".join('][',@queue)."\n" unless $es; 92: warn "return from process" if $debug; 93: return @list; 94: } 97: sub trace { 98: my ($arg) = @_; 99: if (defined $arg) { 100: $debug = $arg; 101: } else { 102: $debug = 1; 103: } 104: } 106: #reparent current event subtree 107: sub reparent_current_subtree { 108: warn 'TRACE reparent_current_subtree'."\n" if $trace; 109: my $self = shift @_; 110: my $psname = shift @_; 112: my $cur = pop @{ $self->[&IDX_context()] }; #save the event object 113: my $control = $self->[&IDX_control_stack()]; 114: my $parse = $self->[&IDX_parse_queue()]; 116: # set up and initiate new element event 117: my $obj = { 118: 'Name' => $psname, 119: 'Type' => 'Element', 120: 'Attributes' => { @_ }, 121: }; 122: 123: push @$control, ( # READ THESE BOTTOW TO TOP 125: # replace the current context (was popped of at ins end) 126: $cur, 'push context', 127: # close the new element 128: ['end', $obj], 'return', 129: # close the inserted current element 130: ['end'=>$cur], 'return', 131: # wait for the current element to close 132: # -- and keep it in the queue 133: 'push', $cur->{'Name'}, 'until close', 134: # resend a start even for the current element 135: ['start'=>$cur], 'return', 136: # insert the new start element 137: ['start'=>$obj], 'return', 138: ); 140: return 1; 141: } 143: #insert event over next event subtree 144: sub reparent_subtrees { 145: warn 'TRACE reparent_subtrees'."\n" if $trace; 146: my $self = shift; 147: my $psname = shift; 149: my $context = $self->[&IDX_current_context()]; 150: my $control = $self->[&IDX_control_stack()]; 152: # set up and initiate new element event 153: my $obj = { 154: 'Name' => $psname, 155: 'Type' => 'Element', 156: 'Attributes' => { @_ } 157: }; 158: 159: push @$control, ( # these go backwards to pop off 160: # return inserted element end event (actual el close in Q) 161: ['end', $obj], 'return', 162: # run until actual element close 163: # -- but keep that tag in the queue 164: 'push', $context->{'Name'}, 'until close', 165: # insert the start tag ahead in the queue 166: ['start', $obj], 'return'); 167: return 1; 168: } 171: # control stack commands processed here 172: sub get_event { 173: my $self = shift; 174: my $control_stack = $self->[&IDX_control_stack()]; 175: my $parse_stack = $self->[&IDX_parse_queue()]; 176: return '' unless ref $control_stack; 178: my $event = ''; 179: while (! ref $event) { 180: warn Data::Dumper->Dump([$control_stack], 181: ['*CONTROL_STACK'])."\n" if $debug; 182: my $control = pop @$control_stack; 183: 184: if ($control eq 'until close') { 185: # process events from the parser until the specified event 186: # closes. 'push' => and stick that close element back in the queue 188: push @$control_stack, $control; #this event stays on the stack 189: while (@$parse_stack == 0) { 190: last unless $self->queue_more_events($parse_stack); 191: #last unless $event; # no infinite loop on error 192: } 193: if (@$parse_stack == 0) { 194: $event = ['', 'nonconforming', '']; 195: } else { 196: $event = [ splice @$parse_stack, 0, 3, () ]; 197: } 198: } elsif ($control eq 'return') { 199: # return the next control object as the next event 200: $event = pop @$control_stack; 201: } elsif ($control eq 'push context') { 202: # insert the next control object onto the context stack 203: push @{ $self->[&IDX_context()] }, pop @$control_stack; 204: $event = ''; 205: } else { 206: die "unexpected event on control stack: ".$control.""; 207: } 208: } 209: warn "NEXT ".Data::Dumper->Dump([$event],['*EVENT'])."\n" if $debug; 210: warn 'QUEUE '.Data::Dumper->Dump([$parse_stack],['*PARSE'])."\n" if $debug; 211: 212: return $event; 213: } 215: #dispatch on next event subtree 216: #passed main module as first (normal) argument 217: sub dispatch_subtrees { 218: warn 'TRACE dispatch_subtrees'."\n" if $trace; 219: my ($self,$main,$pattern,@other_args) = @_; 220: my $context = $self->[&IDX_current_context()]; 221: my $current = $self->[&IDX_control_stack()]; 222: my $parse = $self->[&IDX_parse_queue()]; 223: my $top = 0; 224: if ($debug && $self->[&IDX_current_context()]->{'Name'} eq '') { 225: warn 'ENTER TOP EVENT LOOP' if $trace; 226: $top = 1; 227: } 229: my ($event,$type,$data,$third,$saved_element,@results,$obj,%obj,%aux, 230: $name); 232: warn "dispatch Context [".$self->context_path."]\n" if $debug; 233: @results = (); 234: while ($event = $self->get_event) { 235: ($type,$data,$third) = @$event; 236: warn "dispatch_loop($top) '".$type."' event at". 237: $self->context_path."\n" if $debug; 238: 239: if ($type eq 'start_element' || $type eq 'start') { 240: $obj = $data; 241: if ($pattern ne '' && $obj->{"Name"} !~ m/^$pattern$/) { 242: push @$current, $event, 'return'; 243: last; 244: } 245: 246: $self->[&IDX_current_context()] = $obj; 247: #push @$current,['until close',$obj->{'Name'}]; 248: $obj->{'Type'} = 'Element'; 249: push @results, $self->_dispatch($main,'Element',$obj->{"Name"}, 250: $obj,@other_args); 251: #die "context mismatch" 252: # unless $self->[&IDX_current_context()]->{'Name'} eq $obj{'Name'}; 253: $self->[&IDX_current_context()] = $context; 254: 255: } else { 256: # pattern only matches elements and we don't end if we have a pattern 257: if ($pattern ne '') { 258: push @$current, $event, 'return'; 259: last; 260: } 261: if ($type eq 'end_element' || $type eq 'end') { 262: if (ref $data) { 263: # element object already constructed 264: $name = $data->{'Name'}; 265: } else { 266: $name = $data; 267: } 268: if ( @$current > 0 269: && $current->[$#$current] eq 'until close' 270: && $current->[$#$current-1] eq $name ) { 271: # got an element we've been looking for 272: my ($c_data,$c_type) = splice @$current,-2, 2, (); 273: # clean up element event and return up 274: if (@$current > 0 && $current->[$#$current] eq 'push') { 275: pop @$current; 276: # un-get this event 277: unshift @$parse, @$event; 278: } 279: next; 280: } elsif ($name eq $context->{'Name'}) { 281: # end of the element for this dispatch 282: last; 283: } else { 284: die "context mismatch: $type $name <=> ".$context->{'Name'}; 285: } 287: } elsif ($type eq 'cdata') { 288: #%obj = ( 'Data' => $data ); 289: push @results, $self->_dispatch($main, 'CData', '', 290: $data, @other_args); 292: } elsif ($type eq 'sdata') { #data is string 293: #%obj = ('Name' => $data, 'Data' => $third); 294: push @results, $self->_dispatch($main, 'SData', $data->{'Data'}, 295: $data, @other_args); 296: 297: } elsif ($type eq 'pi') { #data is string 298: $type = $data->{'Target'}; 299: $type = $data->{'Data'} unless defined $type; 300: push @results, $self->_dispatch($main, 'PI', $type, 301: $data, @other_args); 303: } elsif ($type eq 'comment') { #data is string 304: push @results, $self->_dispatch($main, 'Comment', $data->{'Data'}, 305: $data, @other_args); 307: } elsif ($type eq 'entity') { #data is external entity 308: die __PACKAGE__." has no support for entity event"; 309: 310: #} elsif ($type eq 'start_subdoc') { #data is SGMLS::Entity 311: # %obj = ('Name' => $data->name, 312: # 'Data' => $data->value # TODO needs processing 313: # ); 314: # $self->[&IDX_current_context()] = \%obj; 315: # push @results, $self->_dispatch($main,'SubDoc',$obj{'Name'}, 316: # \%obj,@other_args); 317: # die "context mismatch" 318: # unless $self->[&IDX_current_context()]->{'Name'} eq $obj{'Name'}; 319: # $self->[&IDX_current_context()] = $context; 320: #} elsif ($type eq 'end_subdoc') { #data is SGMLS::Entity 321: # die "context mismatch" 322: # unless $self->{'Name'} eq $data->name; 323: # $self->[&IDX_current_context()] = $context; 324: # last; 326: } elsif ($type eq '') { # end 327: if (defined($data) && $data ne 'conforming') { 328: warn 'non-conforming end of parser event stream'."\n"; 329: } 330: if (@$current == 2) { 331: ($data,$type) = @$current; 332: @$current = (); 333: if ($type eq 'until close' && $data eq '') { 334: push @$parse, @$event; 335: if ($self->[&IDX_current_context()]->{'Name'} eq '') { 336: last; 337: } elsif ($top) { 338: warn 'CONTEXT WRONG AT TOP:'."\n[". 339: join('][',%$context)."]\n[". 340: join('][',%{$self->[&IDX_current_context()]}). 341: "]\n"; 342: } else { warn 'boom'; } 343: } else { warn 'boom'; } 344: } else { warn 'boom'; } 345: die 'oops, hit correct document ending unexpectedly'; 347: } elsif ($type eq '[error]') { #no data 348: die 'boom [error from get_event]'; 349: } else { 350: warn "WARNING ignoring unknown event '$type' from parser"; 351: warn "[".join('][',@$event)."]\n"; 352: warn "[".join('][',@$parse)."]\n"; 353: } 354: } 355: } 356: warn "dispatch end Context ".$self->context_path."\n" if $debug; 358: #if ($event && $^W) { 359: # my $o_name = $context->{'Name'}; 360: # my $c_name = $self->[&IDX_current_context()]->{'Name'}; 361: # die "context mismatch: $c_name <=> $o_name" 362: # if ($c_name ne $o_name); 363: #} 364: return @results; 365: } 368: #skip over next event subtree 369: sub skip_subtrees { 370: warn "TRACE skip_subtrees\n" if $trace; 371: my $self = shift; 372: my $context = $self->[&IDX_context()]->[-1]; 374: warn 'SKIP at '.Data::Dumper->Dump([$context],['*context'])."\n" if $debug; 375: # trivial for non-element handlers 376: return 1 if 'Element' ne ($context->{'Type'} || ''); 377: my $cur_name = $context->{'Name'}; 379: my ($event,$type,$name,$data); 380: my $depth = 1; 382: warn "BEGIN_SKIP_SUBTREE: ".$cur_name."\n" if $debug; 383: while ($event = $self->get_event) { 384: croak "end document during skip_subtree(".$cur_name.")" unless $event; 385: ($type, $data) = @$event; 386: if (ref $data) { 387: $name = $data->{'Name'} || ''; 388: } else { 389: $name = $data; # if it's an element, at least... 390: } 392: if ($type eq 'start' || $type eq 'start_element') { 393: $depth += 1 if $name eq $cur_name; 394: } elsif ($type eq 'end' || $type eq 'end_element') { 395: $depth -= 1 if $name eq $cur_name; 396: last if $depth == 0; 397: } 398: } 399: warn "END_SKIP_SUBTREE: ".$cur_name."\n" if $debug; 400: return 1; 401: } 403: #return the context as a simple path 404: sub context_path { 405: my $self = shift; 406: return '/'.join('/', (map { 407: (ref($_) && $_->{'Name'}) || '' 408: } @{ $self->[&IDX_context()] })); 409: } 411: #common inner code 412: sub _dispatch { 413: my $self = shift; 414: my $main = shift; 415: # ($type,$key,$object,@args) = @_; 416: push @{ $self->[&IDX_context()] }, $_[2]; #save the event object 417: warn "CONTEXT out of sync! (PRE)" 418: if ($_[1] eq 'Element' && $self->[&IDX_context()]->[-1]->{'Name'} ne 419: $self->[&IDX_current_context()]->{'Name'}); 420: my @results = $main->_dispatch_event(@_); 421: warn "CONTEXT out of sync! (POST)" 422: if ($_[1] eq 'Element' && $self->[&IDX_context()]->[-1]->{'Name'} ne 423: $self->[&IDX_current_context()]->{'Name'}); 424: pop @{ $self->[&IDX_context()] }; 425: return @results; 426: } 428: 1;