1: #!/usr/local/bin/perl
2: # Copyright (c) 1998 Robert Braddock. All rights reserved.
3: # This program is free software; you can redistribute it and/or
4: # modify it under the same terms as Perl itself.
6: package SGML::ElementMap::Driver::EventQueue;
7: use strict;
9: use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION $REVISION); BEGIN {
10: @ISA = qw( SGML::ElementMap::Driver );
11: @EXPORT = ();
12: @EXPORT_OK = ();
13: %EXPORT_TAGS = ();
14: $VERSION = 0.2;
15: $REVISION = q$Revision: 1.1 $;
16: }
17: use SGML::ElementMap::Driver ();
18: use Exporter;
20: use Carp;
22: my $debug = 0;
23: my $trace = 0;
24: my $trace_hnd = 0;
26: BEGIN {
27: SGML::ElementMap::Driver::set_obj_constants('IDX',&SGML::ElementMap::Driver::IDX_LAST(), qw(context current_context control_stack parse_queue));
28: sub IDX_parser () { return &SGML::ElementMap::Driver::IDX_parser(); }
29: sub IDX_input_mode () { return &SGML::ElementMap::Driver::IDX_input_mode(); }
30: sub IDX_markup_mode () { return &SGML::ElementMap::Driver::IDX_markup_mode(); }
31: }
34: eval "use Data::Dumper;
35: $Data::Dumper::Indent = $Data::Dumper::Indent = 0;
36: $Data::Dumper::Terse = $Data::Dumper::Terse = 1;" if $debug;
38: if (!caller()) { die "$0: perl module, not program\n"; }
40: # create empty object
41: # no arguments means default parser setup
42: sub new {
43: my $proto = shift;
44: my $class = ref($proto) || $proto;
46: carp __PACKAGE__.' does not know what to do with arguments' if @_;
47: my $self = $class->SUPER::new();
48: $self->[&IDX_context()] = '';
49: $self->[&IDX_current_context()] = '';
50: $self->[&IDX_control_stack()] = '';
51: $self->[&IDX_parse_queue()] = '';
52:
53: warn 'TRACE new('.__PACKAGE__.') from '.join(', line ',(caller())[1,2]).
54: "\n" if $trace;
55:
56: $self = bless $self,$class;
57: return $self;
58: }
61: #start new traversal (for use by users)
62: #passed main module as first (normal) argument, rest are process specific
63: sub process {
64: my ($self,$main,$source,@list) = @_;
65:
66: warn '--PROCESS from '.join(', line ',(caller())[1,2])."\n" if $trace;
67:
68: # init state vars
69: my (@control,@context,@queue) = ('','until close');
70: $self->[&IDX_context()] = \@context;
71: $self->[&IDX_control_stack()] = \@control;
72: $self->[&IDX_parse_queue()] = \@queue;
73: # make an element-like object to use as outermost context
74: my %obj = ('Name' => '', 'Type' => 'Element');
75: $self->[&IDX_current_context()] = \%obj;
76:
77: # enter the processing proper
78: my $es = eval {
79: @list = $self->dispatch_subtrees($main,'',@list);
80: #arn join("][",$queue[1]);
81: #if (@queue > 0 && $queue[1] eq 'conforming') { 1; } else { 0; }
82: 1;
83: };
85: # remove unnecessary references
86: @{ $self }[&IDX_current_context(), &IDX_control_stack(), &IDX_context(),
87: &IDX_parse_queue()] = ( ('') x 10 );
88:
89: die "parse failed: ".$@ unless defined $es;
90: #croak "final event not found:".join('][',@queue)."\n" unless $es;
92: warn "return from process" if $debug;
93: return @list;
94: }
97: sub trace {
98: my ($arg) = @_;
99: if (defined $arg) {
100: $debug = $arg;
101: } else {
102: $debug = 1;
103: }
104: }
106: #reparent current event subtree
107: sub reparent_current_subtree {
108: warn 'TRACE reparent_current_subtree'."\n" if $trace;
109: my $self = shift @_;
110: my $psname = shift @_;
112: my $cur = pop @{ $self->[&IDX_context()] }; #save the event object
113: my $control = $self->[&IDX_control_stack()];
114: my $parse = $self->[&IDX_parse_queue()];
116: # set up and initiate new element event
117: my $obj = {
118: 'Name' => $psname,
119: 'Type' => 'Element',
120: 'Attributes' => { @_ },
121: };
122:
123: push @$control, ( # READ THESE BOTTOW TO TOP
125: # replace the current context (was popped of at ins end)
126: $cur, 'push context',
127: # close the new element
128: ['end', $obj], 'return',
129: # close the inserted current element
130: ['end'=>$cur], 'return',
131: # wait for the current element to close
132: # -- and keep it in the queue
133: 'push', $cur->{'Name'}, 'until close',
134: # resend a start even for the current element
135: ['start'=>$cur], 'return',
136: # insert the new start element
137: ['start'=>$obj], 'return',
138: );
140: return 1;
141: }
143: #insert event over next event subtree
144: sub reparent_subtrees {
145: warn 'TRACE reparent_subtrees'."\n" if $trace;
146: my $self = shift;
147: my $psname = shift;
149: my $context = $self->[&IDX_current_context()];
150: my $control = $self->[&IDX_control_stack()];
152: # set up and initiate new element event
153: my $obj = {
154: 'Name' => $psname,
155: 'Type' => 'Element',
156: 'Attributes' => { @_ }
157: };
158:
159: push @$control, ( # these go backwards to pop off
160: # return inserted element end event (actual el close in Q)
161: ['end', $obj], 'return',
162: # run until actual element close
163: # -- but keep that tag in the queue
164: 'push', $context->{'Name'}, 'until close',
165: # insert the start tag ahead in the queue
166: ['start', $obj], 'return');
167: return 1;
168: }
171: # control stack commands processed here
172: sub get_event {
173: my $self = shift;
174: my $control_stack = $self->[&IDX_control_stack()];
175: my $parse_stack = $self->[&IDX_parse_queue()];
176: return '' unless ref $control_stack;
178: my $event = '';
179: while (! ref $event) {
180: warn Data::Dumper->Dump([$control_stack],
181: ['*CONTROL_STACK'])."\n" if $debug;
182: my $control = pop @$control_stack;
183:
184: if ($control eq 'until close') {
185: # process events from the parser until the specified event
186: # closes. 'push' => and stick that close element back in the queue
188: push @$control_stack, $control; #this event stays on the stack
189: while (@$parse_stack == 0) {
190: last unless $self->queue_more_events($parse_stack);
191: #last unless $event; # no infinite loop on error
192: }
193: if (@$parse_stack == 0) {
194: $event = ['', 'nonconforming', ''];
195: } else {
196: $event = [ splice @$parse_stack, 0, 3, () ];
197: }
198: } elsif ($control eq 'return') {
199: # return the next control object as the next event
200: $event = pop @$control_stack;
201: } elsif ($control eq 'push context') {
202: # insert the next control object onto the context stack
203: push @{ $self->[&IDX_context()] }, pop @$control_stack;
204: $event = '';
205: } else {
206: die "unexpected event on control stack: ".$control."";
207: }
208: }
209: warn "NEXT ".Data::Dumper->Dump([$event],['*EVENT'])."\n" if $debug;
210: warn 'QUEUE '.Data::Dumper->Dump([$parse_stack],['*PARSE'])."\n" if $debug;
211:
212: return $event;
213: }
215: #dispatch on next event subtree
216: #passed main module as first (normal) argument
217: sub dispatch_subtrees {
218: warn 'TRACE dispatch_subtrees'."\n" if $trace;
219: my ($self,$main,$pattern,@other_args) = @_;
220: my $context = $self->[&IDX_current_context()];
221: my $current = $self->[&IDX_control_stack()];
222: my $parse = $self->[&IDX_parse_queue()];
223: my $top = 0;
224: if ($debug && $self->[&IDX_current_context()]->{'Name'} eq '') {
225: warn 'ENTER TOP EVENT LOOP' if $trace;
226: $top = 1;
227: }
229: my ($event,$type,$data,$third,$saved_element,@results,$obj,%obj,%aux,
230: $name);
232: warn "dispatch Context [".$self->context_path."]\n" if $debug;
233: @results = ();
234: while ($event = $self->get_event) {
235: ($type,$data,$third) = @$event;
236: warn "dispatch_loop($top) '".$type."' event at".
237: $self->context_path."\n" if $debug;
238:
239: if ($type eq 'start_element' || $type eq 'start') {
240: $obj = $data;
241: if ($pattern ne '' && $obj->{"Name"} !~ m/^$pattern$/) {
242: push @$current, $event, 'return';
243: last;
244: }
245:
246: $self->[&IDX_current_context()] = $obj;
247: #push @$current,['until close',$obj->{'Name'}];
248: $obj->{'Type'} = 'Element';
249: push @results, $self->_dispatch($main,'Element',$obj->{"Name"},
250: $obj,@other_args);
251: #die "context mismatch"
252: # unless $self->[&IDX_current_context()]->{'Name'} eq $obj{'Name'};
253: $self->[&IDX_current_context()] = $context;
254:
255: } else {
256: # pattern only matches elements and we don't end if we have a pattern
257: if ($pattern ne '') {
258: push @$current, $event, 'return';
259: last;
260: }
261: if ($type eq 'end_element' || $type eq 'end') {
262: if (ref $data) {
263: # element object already constructed
264: $name = $data->{'Name'};
265: } else {
266: $name = $data;
267: }
268: if ( @$current > 0
269: && $current->[$#$current] eq 'until close'
270: && $current->[$#$current-1] eq $name ) {
271: # got an element we've been looking for
272: my ($c_data,$c_type) = splice @$current,-2, 2, ();
273: # clean up element event and return up
274: if (@$current > 0 && $current->[$#$current] eq 'push') {
275: pop @$current;
276: # un-get this event
277: unshift @$parse, @$event;
278: }
279: next;
280: } elsif ($name eq $context->{'Name'}) {
281: # end of the element for this dispatch
282: last;
283: } else {
284: die "context mismatch: $type $name <=> ".$context->{'Name'};
285: }
287: } elsif ($type eq 'cdata') {
288: #%obj = ( 'Data' => $data );
289: push @results, $self->_dispatch($main, 'CData', '',
290: $data, @other_args);
292: } elsif ($type eq 'sdata') { #data is string
293: #%obj = ('Name' => $data, 'Data' => $third);
294: push @results, $self->_dispatch($main, 'SData', $data->{'Data'},
295: $data, @other_args);
296:
297: } elsif ($type eq 'pi') { #data is string
298: $type = $data->{'Target'};
299: $type = $data->{'Data'} unless defined $type;
300: push @results, $self->_dispatch($main, 'PI', $type,
301: $data, @other_args);
303: } elsif ($type eq 'comment') { #data is string
304: push @results, $self->_dispatch($main, 'Comment', $data->{'Data'},
305: $data, @other_args);
307: } elsif ($type eq 'entity') { #data is external entity
308: die __PACKAGE__." has no support for entity event";
309:
310: #} elsif ($type eq 'start_subdoc') { #data is SGMLS::Entity
311: # %obj = ('Name' => $data->name,
312: # 'Data' => $data->value # TODO needs processing
313: # );
314: # $self->[&IDX_current_context()] = \%obj;
315: # push @results, $self->_dispatch($main,'SubDoc',$obj{'Name'},
316: # \%obj,@other_args);
317: # die "context mismatch"
318: # unless $self->[&IDX_current_context()]->{'Name'} eq $obj{'Name'};
319: # $self->[&IDX_current_context()] = $context;
320: #} elsif ($type eq 'end_subdoc') { #data is SGMLS::Entity
321: # die "context mismatch"
322: # unless $self->{'Name'} eq $data->name;
323: # $self->[&IDX_current_context()] = $context;
324: # last;
326: } elsif ($type eq '') { # end
327: if (defined($data) && $data ne 'conforming') {
328: warn 'non-conforming end of parser event stream'."\n";
329: }
330: if (@$current == 2) {
331: ($data,$type) = @$current;
332: @$current = ();
333: if ($type eq 'until close' && $data eq '') {
334: push @$parse, @$event;
335: if ($self->[&IDX_current_context()]->{'Name'} eq '') {
336: last;
337: } elsif ($top) {
338: warn 'CONTEXT WRONG AT TOP:'."\n[".
339: join('][',%$context)."]\n[".
340: join('][',%{$self->[&IDX_current_context()]}).
341: "]\n";
342: } else { warn 'boom'; }
343: } else { warn 'boom'; }
344: } else { warn 'boom'; }
345: die 'oops, hit correct document ending unexpectedly';
347: } elsif ($type eq '[error]') { #no data
348: die 'boom [error from get_event]';
349: } else {
350: warn "WARNING ignoring unknown event '$type' from parser";
351: warn "[".join('][',@$event)."]\n";
352: warn "[".join('][',@$parse)."]\n";
353: }
354: }
355: }
356: warn "dispatch end Context ".$self->context_path."\n" if $debug;
358: #if ($event && $^W) {
359: # my $o_name = $context->{'Name'};
360: # my $c_name = $self->[&IDX_current_context()]->{'Name'};
361: # die "context mismatch: $c_name <=> $o_name"
362: # if ($c_name ne $o_name);
363: #}
364: return @results;
365: }
368: #skip over next event subtree
369: sub skip_subtrees {
370: warn "TRACE skip_subtrees\n" if $trace;
371: my $self = shift;
372: my $context = $self->[&IDX_context()]->[-1];
374: warn 'SKIP at '.Data::Dumper->Dump([$context],['*context'])."\n" if $debug;
375: # trivial for non-element handlers
376: return 1 if 'Element' ne ($context->{'Type'} || '');
377: my $cur_name = $context->{'Name'};
379: my ($event,$type,$name,$data);
380: my $depth = 1;
382: warn "BEGIN_SKIP_SUBTREE: ".$cur_name."\n" if $debug;
383: while ($event = $self->get_event) {
384: croak "end document during skip_subtree(".$cur_name.")" unless $event;
385: ($type, $data) = @$event;
386: if (ref $data) {
387: $name = $data->{'Name'} || '';
388: } else {
389: $name = $data; # if it's an element, at least...
390: }
392: if ($type eq 'start' || $type eq 'start_element') {
393: $depth += 1 if $name eq $cur_name;
394: } elsif ($type eq 'end' || $type eq 'end_element') {
395: $depth -= 1 if $name eq $cur_name;
396: last if $depth == 0;
397: }
398: }
399: warn "END_SKIP_SUBTREE: ".$cur_name."\n" if $debug;
400: return 1;
401: }
403: #return the context as a simple path
404: sub context_path {
405: my $self = shift;
406: return '/'.join('/', (map {
407: (ref($_) && $_->{'Name'}) || ''
408: } @{ $self->[&IDX_context()] }));
409: }
411: #common inner code
412: sub _dispatch {
413: my $self = shift;
414: my $main = shift;
415: # ($type,$key,$object,@args) = @_;
416: push @{ $self->[&IDX_context()] }, $_[2]; #save the event object
417: warn "CONTEXT out of sync! (PRE)"
418: if ($_[1] eq 'Element' && $self->[&IDX_context()]->[-1]->{'Name'} ne
419: $self->[&IDX_current_context()]->{'Name'});
420: my @results = $main->_dispatch_event(@_);
421: warn "CONTEXT out of sync! (POST)"
422: if ($_[1] eq 'Element' && $self->[&IDX_context()]->[-1]->{'Name'} ne
423: $self->[&IDX_current_context()]->{'Name'});
424: pop @{ $self->[&IDX_context()] };
425: return @results;
426: }
428: 1;