1: #!/usr/local/bin/perl
  2: # Copyright (c) 1998 Robert Braddock. All rights reserved.
  3: # This program is free software; you can redistribute it and/or
  4: # modify it under the same terms as Perl itself.

  6: package SGML::ElementMap::Driver::SGMLS;
  7: use strict;

  9: use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION $REVISION);  BEGIN {
 10:     @ISA =       qw( SGML::ElementMap::Driver::EventQueue );
 11:     @EXPORT =      ();
 12:     @EXPORT_OK =   ();
 13:     %EXPORT_TAGS = ();
 14:     $VERSION =     0.2;
 15:     $REVISION =   q$Revision: $;
 16: }
 17: use SGML::ElementMap::Driver::EventQueue ();
 18: use Exporter;

 20: use Carp;
 21: use SGMLS 1.4;

 23: my $debug = 0;
 24: my $trace = 0;
 25: #if ($debug)
 26: eval ' use Data::Dumper;
 27: $Data::Dumper::Indent = 0;
 28: $Data::Dumper::Terse = 1;
 29: ' if $debug;

 31: BEGIN {
 32: SGML::ElementMap::Driver::set_obj_constants('IDX',&SGML::ElementMap::Driver::EventQueue::IDX_LAST(), qw(source));
 33: sub IDX_parser () { return &SGML::ElementMap::Driver::EventQueue::IDX_parser(); }
 34: sub IDX_input_mode () { return &SGML::ElementMap::Driver::EventQueue::IDX_input_mode(); }
 35: sub IDX_markup_mode () { return &SGML::ElementMap::Driver::EventQueue::IDX_markup_mode(); }
 36: }


 39: #create empty object
 40: sub new {
 41:     my $proto = shift;
 42:     my $class = ref($proto) || $proto;
 43:     my $self = $class->SUPER::new();
 44:     $self->[&IDX_parser()] = '';
 45:     $self = bless $self, $class;
 46:     return $self;
 47: }

 49: sub parser {
 50:     my ($self,$parser) = @_;
 51:     croak __PACKAGE__." cannot change parser\n";
 52:     return $self->SUPER::parser($parser);
 53: }
 54: #start new traversal (for use by users)
 55: #passed main module as first (normal) argument, rest are process specific
 56: sub process {
 57:     my ($self,$main,$source,@list) = @_;

 59:     warn '--PROCESS from '.join(', line ',(caller())[1,2])."\n" if $trace;
 60:     #if (! $self->[&IDX_parser()]) { # make default parser object
 61:     #}

 63:     my @env = ();
 64:     my @args = ();
 65:     if ($self->[&IDX_markup_mode()] eq 'sgml') {

 67:     } elsif ($self->[&IDX_markup_mode()] eq 'xml') {
 68:         #croak "failed to instantiate parser" unless
 69:         #    defined $self->parser('XML::Parser::ExpatNB');
 70:         @env = ('SP_CHARSET_FIXED' => 'YES',
 71:                 'SP_ENCODING' => 'XML');
 72:         @args = ("-wxml","-cdeclaration/xml.soc");

 74:     } else {
 75:         croak "unknown markup mode '".$self->[&IDX_markup_mode()]."'\n";
 76:     }

 78:     # set output options
 79:     push @args,(map { '-o'.$_; } qw(entity id line included empty
 80:                                    notation-sysid nonsgml 
 81:                                    comment omitted));

 83:     my $input_type = $self->[&IDX_input_mode()];
 84:     if (!$input_type || $input_type eq 'default') { # infer type
 85:         if (ref($source) eq 'SCALAR') {
 86:             $input_type = 'literal';
 87:         } elsif (ref $source) {
 88:             $input_type = 'handle';
 89:         } else {
 90:             $input_type = 'file';
 91:         }
 92:     }

 94:     my $esis_fh;
 95:     my $esis_cmd = 'onsgmls';
 96:     if ($input_type eq 'file' || $input_type eq 'path') {
 97:         my $pid = open(ESIS, '-|');
 98:         croak "fork failed: $!\n" unless defined $pid;
 99:         if ($pid == 0) {
100:             close STDIN;
101:             while (@env) { $ENV{shift @env} = shift @env; }
102:             exec $esis_cmd,@args,$source;
103:         }
104:         warn "attempting FILE ($pid)\n" if $debug;
105:         $esis_fh = \*ESIS; #new IO::Handle;
106:         #warn "<>".scalar(<ESIS>);
107:         #die  "~~".$esis_fh->getline;
108:         #$esis_fh->open("<&ESIS");
109:         #$esis_fh->fdopen(fileno(ESIS),'r');
110:         
111:         croak "failed setting handle to parser output: $!"
112:             unless defined $esis_fh;
113:         #close ESIS;
114:         warn "filehandles open" if $debug;

116:     } elsif ($input_type eq 'handle') {
117:         warn "attempting HANDLE\n" if $debug;
118:         my $in_fh = new IO::Handle;
119:         my $pipe = new IO::Pipe($in_fh, $source);
120:         croak "failed setting handle to parser output: $!"
121:             unless defined $pipe;

123:         my $pid = open(ESIS, '-|');
124:         croak "fork failed: $!\n" unless defined $pid;
125:         if ($pid == 0) {
126:             close $source;
127:             #$pipe->reader;
128:             my $in_fh = 'STDIN'->fdopen($pipe->fileno,'w');

130:             while (@env) { $ENV{shift @env} = shift @env; }
131:             exec $esis_cmd,@args,'-';
132:         }
133:         #$pipe->writer;

135:         my $esis_fh = new IO::Handle;
136:         $esis_fh->fdopen(fileno(ESIS),'r');
137:         croak "failed setting handle to parser output: $!"
138:             unless defined $esis_fh;
139:         close ESIS;
140:         warn "filehandles open" if $debug;
141:     } elsif (0) {

143:         $esis_fh = new IO::Pipe;
144:         my $pid = fork; # open(PIPE,'-|');
145:         die "fork failed: $!" unless defined $pid;
146:             $esis_fh = new IO::Handle('-','r');
147:         if ($pid == 0) {
148:             $esis_fh->writer();
149:             
150:             while (@env) { $ENV{shift @env} = shift @env; }
151:             exec $esis_cmd,@args,$source;
152:         }
153:         my $fh = new IO::File($source,'r');
154:         die "failed to open $source: $!\n" unless defined $fh;
155:         $source = $fh;
156:         $input_type = 'handle';
157:     #} elsif ($input_type eq 'literal' || $input_type eq 'string') {
158:     #    $self->{'offset'} = 0;
159:         $self->[&IDX_source()] = ref($source) ? $source : \$source;
160:     } elsif ($input_type eq 'esis-file') {
161:         $esis_fh = new IO::File($source, 'r');
162:         die "failed to open $source: $!\n" unless defined $esis_fh;
163:     } else {
164:         die "unknown input type '".$input_type."'";
165:     }

167:     die "esis stream open failed: $!"
168:         unless defined($esis_fh) && !eof($esis_fh);

170:     my $parse = new SGMLS('handle'=>$esis_fh, 'use_re_events'=>0,
171:                           'filter_rs' =>1);
172:     return undef unless defined $parse;
173:     $parse->handle_attr_sdata(\&SGMLS::bracketed_sdata_to_entity,
174:                               \&SGMLS::optional_list_join);
175:     $parse->handle_sdata(\&SGMLS::bracketed_sdata_to_entity);

177:     #$main->debug('events', 'handler_lookup', 'modes');

179:     $self->[&IDX_parser()] = $parse;
180:     @list = $self->SUPER::process($main,'',@list);

182:     warn "processing complete" if $debug;
183:     undef $parse;
184:     return @list;
185: }

187: #start new traversal of file (for default use by main)
188: #passed main module and file name as first (normal) argument, 
189: #   rest are process specific
190: #sub process_xml_file {
191: #    my ($self,$main,$file,@list) = @_;
192: #    use IO::File;
193: #    my $fh = new IO::File;
194:     #need some command and argument string options?
195:     #$ENV{SP_CHARSET_FIXED} = 'YES';
196:     #$ENV{SP_ENCODING} = 'XML';
197:     #$ENV{SGML_CATALOG_FILES} = 'pubtext/xml.soc';
198: #    $fh->open("SP_CHARSET_FIXED=YES SP_ENCODING=XML onsgmls -wxml -cdeclaration/xml.soc $file |");
199: #    croak "Failed to start parser: $!\n" unless defined $fh;
200: #    @list = $self->process($main,$fh,@list);
201: #    $fh->close;
202: #
203: #    return @list;
204: #}

206: #start new traversal of file (for default use by main)
207: #passed main module and file name as first (normal) argument, 
208: #   rest are process specific
209: #sub process_sgml_file {
210: #    my ($self,$main,$file,@list) = @_;
211: #    use IO::File;
212: #    my $fh = new IO::File;
213: #    #need some command and argument string options
214: #    $fh->open("onsgmls $file |");
215: #    croak "Failed to start parser: $!\n" unless defined $fh;
216: #    @list = $self->process($main,$fh,@list);
217: #    $fh->close;
218: #
219: #    return @list;
220: #}

222: sub trace {
223:     my ($arg) = @_;
224:     if (defined $arg) {
225:         $debug = $arg;
226:     } else {
227:         $debug = 1;
228:     }
229: }

231: sub queue_more_events {
232:     my ($self,$queue) = @_;
233:     my $parser = $self->[&IDX_parser()];
234:     my ($event,$data,$type, @aux);
235:     $event = $parser->next_event;
236:     if ($event) {
237:         $type = $event->type;
238:         $data = $event->data;

240:         if ($type eq 'start_element') {
241:             #     TODO attributes need further processing!
242:             my @order = $data->attribute_order('all');
243:             my $c = $data->attribute_order('specified');
244:             #if (@order > 0 && !defined $order[0]) {
245:             #    @order = $data->attribute_names;
246:             #}
247:             if ($debug) {
248:                 warn "ATTRIBUTE LIST:".join(':',map {
249:                     $_.'['.$data->attribute($_)->is_implied.'/'.
250:                         $data->attribute($_)->is_specified.'/'.
251:                             $data->attribute($_)->position.']' } @order);
252:             }
253:             my @extra = ();
254:             @aux = ();

256:             for(my $i=0; $i<@order; $i+=1) { # was $i<$c
257:                 my $aname = $order[$i];
258:                 my $adata = $data->attribute($aname);
259:                 if ($adata->is_implied) {
260:                     push @aux, ($aname, undef);
261:                 } else {
262:                     push @aux, ($aname, $adata->value);
263:                 }
264:             }
265:             #if ($data->attribute($_)->is_implied) { push @extra,$_; }
266:             #else {
267:             #}
268:             if (defined($c) && $c < @order) {
269:                 warn "ATTRIBUTE READ($c):".join(':',@aux)."\nORDER:".
270:                     join(':',@order) if $debug;
271:                 #die 'attribute construction error' if $c != (@aux / 2);
272:             } else {
273:                 $c = @aux / 2;
274:             }

276:             #warn "ATTRIBUTE DUMP:".Dumper($data->attributes) if $debug;
277:             warn "ATTRIBUTE READ($c):".join(':',@aux)."\nORDER:".join(':',@order) if $debug;
278:             $data =  {'Name' => $data->name,
279:                       'Attributes' => { @aux },
280:                       'AttributeOrder' => [@order],
281:                       'Defaulted' => $c };
282:             
283:         } elsif ($type eq 'end_element') {
284:             $data = $data->name;

286:         } elsif ($type eq 'cdata') { #data is string
287:             warn "CHAR_DATA:[".$data."]\n" if $debug;
288:             $data = { 'Data'=>$data };

290:         } elsif ($type eq 're') { #no data
291:             warn "RE\n" if $debug;
292:             $data = { 'Data'=>"\n" };
293:             $type = 'cdata';

295:         } elsif ($type eq 'comment') { #no data
296:             warn "COMMENT:[".$data."]\n" if $debug;
297:             $data = { 'Data'=>$data };

299:         } elsif ($type eq 'sdata') { #data is entity object or string
300:             warn "SPECIAL_DATA:[".$data."]\n" if $debug;
301:             if (ref $data) {
302:                 $data = { 'Name' => $data->name, 'Data' => $data->value };
303:             } elsif ($data =~ m/\[\s*(\S+)\s*\]/) {
304:                 $data = { 'Name' => $1, 'Data' => $data };
305:             } else {
306:                 $data = { 'Data' => $data };
307:             }
308:         } elsif ($type eq 'pi') { #data is string
309:             warn "PI:[".$data."]\n" if $debug;
310:             if ($self->[&IDX_markup_mode()] eq 'sgml') {
311:                 $data = { 'Data' => $data };
312:             } else {
313:                 my @pi = split /\s+/, $data, 2;
314:                 if (@pi == 2 && $pi[0] =~ m/\S/) {
315:                     $data = { 'Target' => $pi[0], 'Data' => $pi[1] };
316:                 } else {
317:                     $data = { 'Data' => $data };
318:                 }
319:             }

321:         } elsif ($type eq 'entity') {  #data is SGMLS::Entity
322:             die "unimplemented";

324:         } elsif ($type eq 'start_subdoc') { #data is SGMLS::Entity
325:             $data =  {'Name' => $data->name,
326:                       'Data' => $data->value # TODO needs processing
327:                       };

329:         } elsif ($type eq 'end_subdoc') { #data is SGMLS::Entity
330:             $data =  { 'Name' => $data->name,
331:                        'Data' => $data->value, # TODO needs processing
332:                        };
333:         } elsif ($type eq 'conforming') { #no data
334:             $data = 'conforming';
335:             $type = '';
336:             $event = $parser->next_event;
337:             die 'conforming event not final event' if $event;
338:         } else {
339:             die "Unknown event '$type' from parser";
340:         }

342:         push @$queue, $type, $data, '';
343:         
344:     } else {
345:         return 0;
346:     }
347:     return 1;
348: }

350: 1;