1: #!/usr/local/bin/perl
2: # Copyright (c) 1998 Robert Braddock. All rights reserved.
3: # This program is free software; you can redistribute it and/or
4: # modify it under the same terms as Perl itself.
6: package SGML::ElementMap::Driver::Grove;
7: use strict;
9: use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION $REVISION); BEGIN {
10: @ISA = qw( SGML::ElementMap::Driver );
11: @EXPORT = ();
12: @EXPORT_OK = ();
13: %EXPORT_TAGS = ();
14: $VERSION = 0.1;
15: $REVISION = q$Revision: 1.6 $;# $REVISION =~ s/^\s*\w*:\s*(.*?)\s*$/$1/;
16: }
17: use SGML::ElementMap::Driver ();
18: use Exporter;
21: BEGIN {
22: SGML::ElementMap::Driver::set_obj_constants('IDX',&SGML::ElementMap::Driver::IDX_LAST(), qw(context on_next current_element));
23: sub IDX_parser () { return &SGML::ElementMap::Driver::IDX_parser(); }
24: sub IDX_input_mode () { return &SGML::ElementMap::Driver::IDX_input_mode(); }
25: sub IDX_markup_mode () { return &SGML::ElementMap::Driver::IDX_markup_mode(); }
26: }
29: use Carp;
30: use Data::Grove::Visitor;
31: use XML::Grove::Builder ();
32: use XML::Parser::PerlSAX ();
33: use XML::ESISParser ();
35: my $debug = 0;
36: my $debug_visit = 0;
38: if (!caller()) { die "$0: perl module, not program\n"; }
39: #else { warn __PACKAGE__." V".$VERSION." R".$REVISION." loaded from ".
40: # join(', ', ((caller(0))[1,2]))."\n"; }
42: # execute transformation on a grove object
43: sub process_grove {
44: my ($self,$main,$grove,@extra) = @_;
45: @{ $self->[&IDX_context()] } = (),
46: %{ $self->[&IDX_on_next()] } = (),
47: $self->[&IDX_current_element()] = $grove;
48: return $self->dispatch_subtrees($main,'',@extra);
49: }
51: # execute transformation on a handle or file
52: sub process {
53: my ($self,$main,$source,@extra) = @_;
54: my $grove = eval {
55: my $input_type = $self->[&IDX_input_mode()];
56: if (!$input_type || $input_type eq 'default') { # infer type
57: if (ref($source) eq 'SCALAR') {
58: $input_type = 'literal';
59: } elsif (ref $source) {
60: $input_type = 'handle';
61: } else {
62: $input_type = 'file';
63: }
64: }
65: my %source;
66: if ($input_type eq 'handle') {
67: %source = ('ByteStream', $source);
68: } elsif ($input_type eq 'literal' || $input_type eq 'string') {
69: %source = ('String', $source);
70: } elsif ($input_type eq 'file' || $input_type eq 'path') {
71: %source = ('SystemId', $source);
72: } else {
73: die "unknown input type '".$input_type."'";
74: }
76: if (! $self->[&IDX_parser()]) { # make default parser object
77: if ($self->[&IDX_markup_mode()] eq 'sgml') {
78: $self->parser(XML::ESISParser->new ( IsSGML=>1 ));
79: } else {
80: #if ($input_type eq 'esis') {
81: # $self->parser(XML::ESISParser->new ( IsSGML=>0 ));
82: #} else {
83: $self->parser(XML::Parser::PerlSAX->new());
84: #}
85: }
86: }
87: die 'failed to instantiate parser' unless defined $self->[&IDX_parser()];
88: $self->[&IDX_parser()]->parse ( Source => \%source );
89: };
90: die "parsing failed: ".$@ unless defined $grove;
91: return $self->process_grove($main,$grove,@extra);
92: }
94: # set and retrieve the current parser object
95: sub parser {
96: my ($self,$parser) = @_;
97: if (defined $parser) {
98: #use XML::Grove::Builder;
99: my $builder = XML::Grove::Builder->new();
100: $parser->{'Handler'} = $builder;
101: # delete other handlers, just in case?
102: }
103: return $self->SUPER::parser($parser);
104: }
106: #create empty object
107: sub new {
108: my $proto = shift;
109: my $class = ref($proto) || $proto;
110: my $self = $class->SUPER::new(@_);
111: return undef unless defined $self;
112: #die "failed to construct parent class object"
114: $self->[&IDX_context()] = [];
115: $self->[&IDX_on_next()] = {};
117: $self = bless $self, $class;
118: return $self;
119: }
121: sub context_path {
122: my $self = shift;
123: return '/'.join '/', (map {
124: $_->{'Name'} || ''
125: } @{ $self->[&IDX_context()] });
126: }
128: #reparent current event subtree
129: sub reparent_current_subtree {
130: my ($self,$psname,@extraargs) = @_;
131: my ($state,$element,$newelement,$parent,$saved_env,$ps_env,$cname,
132: @elcontent,%elattrs,%elflags);
133:
134: #get current object/element
135: $element = $self->[&IDX_current_element()];
136:
137: #make pseudo element
138: $newelement =
139: XML::Grove::Element->new('Name'=>$psname,
140: 'Contents'=> [ $element ],
141: 'Attributes'=> { @extraargs }
142: );
143:
144: #warn "DBG:context:reparent pseudo:".$newelement->{"Name"}.
145: # ":FROM:".$element->{"Name"}."\n"
146: # if SGML::ElementMap::$debug_elpath;
147:
148: $self->[&IDX_current_element()] = $newelement;
149: $self->[&IDX_on_next()]->{'repeat current'} = 1;
150: $self->[&IDX_on_next()]->{'hide current context'} = 1;
151: return 1;
152: }
154: #insert event over next event subtree
155: sub reparent_subtrees {
156: my ($self,$psname,@extraargs) = @_;
157: my ($state,$element,$newelement);
159: #get current object/element
160: $element = $self->[&IDX_current_element()];
161:
162: #make pseudo element
163: $newelement =
164: XML::Grove::Element->new('Name'=>$psname,
165: 'Contents'=>$element->{'Contents'},
166: 'Attributes'=>{ @extraargs }
167: );
168:
169: #warn "DBG:context:insert pseudo:".
170: # $newelement->{"Name"}."\n" if $debug_elpath;
171:
172: $self->[&IDX_current_element()] = $newelement;
173: $self->[&IDX_on_next()]->{'repeat current'} = 1;
174: return 1;
175: }
177: #dispatch on next event subtree
178: sub dispatch_subtrees {
179: my ($self,$main,$pattern,@extra) = @_;
180: my ($element,$tmp,@result);
181: $element = $self->[&IDX_current_element()];
182: if (!$element->isa("XML::Grove")) {
183: croak ref($self).":ERROR:content processing function called out of context";
184: }
185: my %onetime = %{ $self->[&IDX_on_next()] };
186: %{ $self->[&IDX_on_next()] } = ();
187: if (defined $onetime{'hide current context'}) {
188: $tmp = pop @{ $self->[&IDX_context()] };
189: }
190: #here we enter the sub tree
191: if (defined $onetime{'repeat current'}) {
192: @result = $element->accept ($self,$main,'',@extra);
193: } else {
194: #@result = $element->children_accept ($self,$main, @extra);
195: my ($children,$c,@res);
196: $children = $element->{'Contents'};
197: $c = $onetime{'start content at'} || 0;
198: for (; $c<@$children; $c+=1) {
199: @res = $children->[$c]->accept($self,$main,$pattern,@extra);
200: if (@res == 1 && !defined $res[0]) { # pattern did not match
201: $self->[&IDX_on_next()]->{'start content at'} = $c;
202: last;
203: } else {
204: push @result, @res;
205: }
206: }
207: }
208: #finished with subtree, do clean up
209: # WARNING: I am not certain if hide current content can interact with
210: # selecting partial content. Should test if cases not disjoint.
211: if (defined $onetime{'hide current context'}) {
212: push @{ $self->[&IDX_context()] }, $tmp;
213: }
214: return @result;
215: }
217: #skip over next event subtree
218: sub skip_subtrees {
219: my $self = shift;
220: %{ $self->[&IDX_on_next()] } = ();
221: return;
222: }
224: #common inner code for visitor calls
225: sub _dispatch {
226: my $self = shift;
227: my $main = shift;
228: push @{ $self->[&IDX_context()] }, $_[2]; #save the event object
229: my @results = $main->_dispatch_event(@_);
230: pop @{ $self->[&IDX_context()] };
231: return @results;
232: }
234: ##### Visitor hooks
236: #Set up and start the transformation (not called in current code)
237: sub visit_document {
238: warn "DBG:VISIT:document\n" if $debug_visit;
239: my $self = shift;
240: my $grove = shift;
241: return $grove->children_accept ($self, @_);
242: }
244: sub visit_element {
245: warn "DBG:VISIT:element\n" if $debug_visit;
246: my ($self,$element,$main,$pattern) = splice @_, 0, 4, ();
247: my (@results,$saved);
248: #my $state = $$self[$st];
249: #$element->{"_parent"} = $$state[&IDX_current_element()]; #set parent
250: #print STDERR "DBG:context:".$$state{'context environment'}.":".$element->{"Name"}."\n" if $debug_elpath;
251: if ($pattern ne '') {
252: return undef unless $element->{"Name"} =~ m/^$pattern$/;
253: }
254: $saved = $self->[&IDX_current_element()];
255: $self->[&IDX_current_element()] = $element;
256: @results = $self->_dispatch($main,'Element',$element->{"Name"},
257: $element,@_);
258: #delete $element->{"_parent"}; #break circular reference
259: $self->[&IDX_current_element()] = $saved;
260: return @results;
261: }
263: sub visit_entity {
264: warn "DBG:VISIT:entity (sdata)\n" if $debug_visit;
265: my ($self,$sdata,$main,$pattern) = splice @_, 0, 4, ();
266: return undef if $pattern ne ''; # patterns only match elements
267: return $self->_dispatch($main,'SData',$sdata->{'Name'},$sdata,@_);
268: }
270: sub visit_pi {
271: warn "DBG:VISIT:pi\n" if $debug_visit;
272: my ($self,$pi,$main,$pattern) = splice @_, 0, 4, ();
273: return undef if $pattern ne ''; # patterns only match elements
274: if (exists $pi->{'Target'}) {
275: return $self->_dispatch($main,'PI',$pi->{"Target"},$pi,@_);
276: } else {
277: return $self->_dispatch($main,'PI',$pi->{"Data"},$pi,@_);
278: }
279: }
281: sub visit_characters {
282: warn "DBG:VISIT:characters\n" if $debug_visit;
283: my ($self,$cdata,$main,$pattern) = splice @_, 0, 4, ();
284: return undef if $pattern ne ''; # patterns only match elements
285: #cdata has no keys
286: return $self->_dispatch($main,'CData','',$cdata,@_);
287: }
289: #I think characters has superceded this
290: sub visit_cdata {
291: warn "DBG:VISIT:cdata\n" if $debug_visit;
292: my ($self,$cdata,$main,$pattern) = splice @_, 0, 4, ();
293: return undef if $pattern ne ''; # patterns only match elements
294: #cdata has no keys
295: return $self->_dispatch($main,'CData','',$cdata,@_);
296: }
298: #can this be a content object?
299: sub visit_sgml_entity {
300: warn "DBG:VISIT:sgml_entity\n" if $debug_visit;
301: warn "DBG:VISIT:entity:untested\n" if $debug;
302: my ($self,$entity,$main,$pattern) = splice @_, 0, 4, ();
303: return undef if $pattern ne ''; # patterns only match elements
304: return $self->_dispatch($main,'Entity',$entity->{"Name"},$entity,@_);
305: }
307: sub visit_external_entity {
308: warn "DBG:VISIT:external_entity\n" if $debug_visit;
309: warn "DBG:VISIT:entity:untested\n" if $debug;
310: my ($self,$ext_entity,$main,$pattern) = splice @_, 0, 4, ();
311: return undef if $pattern ne ''; # patterns only match elements
312: return $self->_dispatch($main,'Entity',$ext_entity->{"Name"},
313: $ext_entity,@_);
314: }
316: sub visit_subdoc_entity {
317: warn "DBG:VISIT:subdoc_entity\n" if $debug_visit;
318: warn "DBG:VISIT:entity:untested\n" if $debug;
319: my ($self,$subdoc_entity,$main,$pattern) = splice @_, 0, 4, ();
320: return undef if $pattern ne ''; # patterns only match elements
321: return $self->_dispatch($main,'SubDoc',$subdoc_entity->{"Name"},
322: $subdoc_entity,@_);
323: }
325: sub visit_comment {
326: warn "DBG:VISIT:comment\n" if $debug_visit;
327: my ($self,$comment,$main,$pattern) = splice @_, 0, 4, ();
328: return undef if $pattern ne ''; # patterns only match elements
329: #no key for comments
330: return $self->_dispatch($main,'Comment','',$comment,@_);
331: }
333: #sub visit_ {
334: # warn "DBG:VISIT:\n" if $debug_visit;
335: # my $self = shift;
336: # warn "No internal support for '' objects" if $^W;
337: #}
339: sub visit_notation {
340: warn "DBG:VISIT:notation\n" if $debug_visit;
341: my $self = shift;
342: warn "No internal support for notation objects" if $^W;
343: return undef;
344: }
346: sub visit_subdoc {
347: warn "DBG:VISIT:subdoc\n" if $debug_visit;
348: my $self = shift;
349: warn "No internal support for subdoc objects" if $^W;
350: return undef;
351: }
354: #this becomes cdata, for now
355: #sub visit_scalar {
356: # warn "DBG:VISIT:scalar\n" if $debug_visit;
357: # my ($self) = shift;
358: # my ($scalar) = shift;
359: # $self->DispatchEvent('CData','',$scalar,@_);
360: #}
362: #does not catch visitor calls
363: #sub AUTOLOAD {
364: # my $self = shift;
365: # warn "DBG:missing method:$AUTOLOAD\n" if $debug;
366: # my $method = $AUTOLOAD;
367: # $method =~ s/.*:://;
368: # return if $method eq 'DESTROY';
369: #
370: # print "UNRECOGNIZED $method\n";
371: #}
373: 1;