1: #!/usr/local/bin/perl 2: # Copyright (c) 1998 Robert Braddock. All rights reserved. 3: # This program is free software; you can redistribute it and/or 4: # modify it under the same terms as Perl itself. 6: package SGML::ElementMap::Driver::SGMLS; 7: use strict; 9: use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION $REVISION); BEGIN { 10: @ISA = qw( SGML::ElementMap::Driver::EventQueue ); 11: @EXPORT = (); 12: @EXPORT_OK = (); 13: %EXPORT_TAGS = (); 14: $VERSION = 0.2; 15: $REVISION = q$Revision: $; 16: } 17: use SGML::ElementMap::Driver::EventQueue (); 18: use Exporter; 20: use Carp; 21: use SGMLS 1.4; 23: my $debug = 0; 24: my $trace = 0; 25: #if ($debug) 26: eval ' use Data::Dumper; 27: $Data::Dumper::Indent = 0; 28: $Data::Dumper::Terse = 1; 29: ' if $debug; 31: BEGIN { 32: SGML::ElementMap::Driver::set_obj_constants('IDX',&SGML::ElementMap::Driver::EventQueue::IDX_LAST(), qw(source)); 33: sub IDX_parser () { return &SGML::ElementMap::Driver::EventQueue::IDX_parser(); } 34: sub IDX_input_mode () { return &SGML::ElementMap::Driver::EventQueue::IDX_input_mode(); } 35: sub IDX_markup_mode () { return &SGML::ElementMap::Driver::EventQueue::IDX_markup_mode(); } 36: } 39: #create empty object 40: sub new { 41: my $proto = shift; 42: my $class = ref($proto) || $proto; 43: my $self = $class->SUPER::new(); 44: $self->[&IDX_parser()] = ''; 45: $self = bless $self, $class; 46: return $self; 47: } 49: sub parser { 50: my ($self,$parser) = @_; 51: croak __PACKAGE__." cannot change parser\n"; 52: return $self->SUPER::parser($parser); 53: } 54: #start new traversal (for use by users) 55: #passed main module as first (normal) argument, rest are process specific 56: sub process { 57: my ($self,$main,$source,@list) = @_; 59: warn '--PROCESS from '.join(', line ',(caller())[1,2])."\n" if $trace; 60: #if (! $self->[&IDX_parser()]) { # make default parser object 61: #} 63: my @env = (); 64: my @args = (); 65: if ($self->[&IDX_markup_mode()] eq 'sgml') { 67: } elsif ($self->[&IDX_markup_mode()] eq 'xml') { 68: #croak "failed to instantiate parser" unless 69: # defined $self->parser('XML::Parser::ExpatNB'); 70: @env = ('SP_CHARSET_FIXED' => 'YES', 71: 'SP_ENCODING' => 'XML'); 72: @args = ("-wxml","-cdeclaration/xml.soc"); 74: } else { 75: croak "unknown markup mode '".$self->[&IDX_markup_mode()]."'\n"; 76: } 78: # set output options 79: push @args,(map { '-o'.$_; } qw(entity id line included empty 80: notation-sysid nonsgml 81: comment omitted)); 83: my $input_type = $self->[&IDX_input_mode()]; 84: if (!$input_type || $input_type eq 'default') { # infer type 85: if (ref($source) eq 'SCALAR') { 86: $input_type = 'literal'; 87: } elsif (ref $source) { 88: $input_type = 'handle'; 89: } else { 90: $input_type = 'file'; 91: } 92: } 94: my $esis_fh; 95: my $esis_cmd = 'onsgmls'; 96: if ($input_type eq 'file' || $input_type eq 'path') { 97: my $pid = open(ESIS, '-|'); 98: croak "fork failed: $!\n" unless defined $pid; 99: if ($pid == 0) { 100: close STDIN; 101: while (@env) { $ENV{shift @env} = shift @env; } 102: exec $esis_cmd,@args,$source; 103: } 104: warn "attempting FILE ($pid)\n" if $debug; 105: $esis_fh = \*ESIS; #new IO::Handle; 106: #warn "<>".scalar(<ESIS>); 107: #die "~~".$esis_fh->getline; 108: #$esis_fh->open("<&ESIS"); 109: #$esis_fh->fdopen(fileno(ESIS),'r'); 110: 111: croak "failed setting handle to parser output: $!" 112: unless defined $esis_fh; 113: #close ESIS; 114: warn "filehandles open" if $debug; 116: } elsif ($input_type eq 'handle') { 117: warn "attempting HANDLE\n" if $debug; 118: my $in_fh = new IO::Handle; 119: my $pipe = new IO::Pipe($in_fh, $source); 120: croak "failed setting handle to parser output: $!" 121: unless defined $pipe; 123: my $pid = open(ESIS, '-|'); 124: croak "fork failed: $!\n" unless defined $pid; 125: if ($pid == 0) { 126: close $source; 127: #$pipe->reader; 128: my $in_fh = 'STDIN'->fdopen($pipe->fileno,'w'); 130: while (@env) { $ENV{shift @env} = shift @env; } 131: exec $esis_cmd,@args,'-'; 132: } 133: #$pipe->writer; 135: my $esis_fh = new IO::Handle; 136: $esis_fh->fdopen(fileno(ESIS),'r'); 137: croak "failed setting handle to parser output: $!" 138: unless defined $esis_fh; 139: close ESIS; 140: warn "filehandles open" if $debug; 141: } elsif (0) { 143: $esis_fh = new IO::Pipe; 144: my $pid = fork; # open(PIPE,'-|'); 145: die "fork failed: $!" unless defined $pid; 146: $esis_fh = new IO::Handle('-','r'); 147: if ($pid == 0) { 148: $esis_fh->writer(); 149: 150: while (@env) { $ENV{shift @env} = shift @env; } 151: exec $esis_cmd,@args,$source; 152: } 153: my $fh = new IO::File($source,'r'); 154: die "failed to open $source: $!\n" unless defined $fh; 155: $source = $fh; 156: $input_type = 'handle'; 157: #} elsif ($input_type eq 'literal' || $input_type eq 'string') { 158: # $self->{'offset'} = 0; 159: $self->[&IDX_source()] = ref($source) ? $source : \$source; 160: } elsif ($input_type eq 'esis-file') { 161: $esis_fh = new IO::File($source, 'r'); 162: die "failed to open $source: $!\n" unless defined $esis_fh; 163: } else { 164: die "unknown input type '".$input_type."'"; 165: } 167: die "esis stream open failed: $!" 168: unless defined($esis_fh) && !eof($esis_fh); 170: my $parse = new SGMLS('handle'=>$esis_fh, 'use_re_events'=>0, 171: 'filter_rs' =>1); 172: return undef unless defined $parse; 173: $parse->handle_attr_sdata(\&SGMLS::bracketed_sdata_to_entity, 174: \&SGMLS::optional_list_join); 175: $parse->handle_sdata(\&SGMLS::bracketed_sdata_to_entity); 177: #$main->debug('events', 'handler_lookup', 'modes'); 179: $self->[&IDX_parser()] = $parse; 180: @list = $self->SUPER::process($main,'',@list); 182: warn "processing complete" if $debug; 183: undef $parse; 184: return @list; 185: } 187: #start new traversal of file (for default use by main) 188: #passed main module and file name as first (normal) argument, 189: # rest are process specific 190: #sub process_xml_file { 191: # my ($self,$main,$file,@list) = @_; 192: # use IO::File; 193: # my $fh = new IO::File; 194: #need some command and argument string options? 195: #$ENV{SP_CHARSET_FIXED} = 'YES'; 196: #$ENV{SP_ENCODING} = 'XML'; 197: #$ENV{SGML_CATALOG_FILES} = 'pubtext/xml.soc'; 198: # $fh->open("SP_CHARSET_FIXED=YES SP_ENCODING=XML onsgmls -wxml -cdeclaration/xml.soc $file |"); 199: # croak "Failed to start parser: $!\n" unless defined $fh; 200: # @list = $self->process($main,$fh,@list); 201: # $fh->close; 202: # 203: # return @list; 204: #} 206: #start new traversal of file (for default use by main) 207: #passed main module and file name as first (normal) argument, 208: # rest are process specific 209: #sub process_sgml_file { 210: # my ($self,$main,$file,@list) = @_; 211: # use IO::File; 212: # my $fh = new IO::File; 213: # #need some command and argument string options 214: # $fh->open("onsgmls $file |"); 215: # croak "Failed to start parser: $!\n" unless defined $fh; 216: # @list = $self->process($main,$fh,@list); 217: # $fh->close; 218: # 219: # return @list; 220: #} 222: sub trace { 223: my ($arg) = @_; 224: if (defined $arg) { 225: $debug = $arg; 226: } else { 227: $debug = 1; 228: } 229: } 231: sub queue_more_events { 232: my ($self,$queue) = @_; 233: my $parser = $self->[&IDX_parser()]; 234: my ($event,$data,$type, @aux); 235: $event = $parser->next_event; 236: if ($event) { 237: $type = $event->type; 238: $data = $event->data; 240: if ($type eq 'start_element') { 241: # TODO attributes need further processing! 242: my @order = $data->attribute_order('all'); 243: my $c = $data->attribute_order('specified'); 244: #if (@order > 0 && !defined $order[0]) { 245: # @order = $data->attribute_names; 246: #} 247: if ($debug) { 248: warn "ATTRIBUTE LIST:".join(':',map { 249: $_.'['.$data->attribute($_)->is_implied.'/'. 250: $data->attribute($_)->is_specified.'/'. 251: $data->attribute($_)->position.']' } @order); 252: } 253: my @extra = (); 254: @aux = (); 256: for(my $i=0; $i<@order; $i+=1) { # was $i<$c 257: my $aname = $order[$i]; 258: my $adata = $data->attribute($aname); 259: if ($adata->is_implied) { 260: push @aux, ($aname, undef); 261: } else { 262: push @aux, ($aname, $adata->value); 263: } 264: } 265: #if ($data->attribute($_)->is_implied) { push @extra,$_; } 266: #else { 267: #} 268: if (defined($c) && $c < @order) { 269: warn "ATTRIBUTE READ($c):".join(':',@aux)."\nORDER:". 270: join(':',@order) if $debug; 271: #die 'attribute construction error' if $c != (@aux / 2); 272: } else { 273: $c = @aux / 2; 274: } 276: #warn "ATTRIBUTE DUMP:".Dumper($data->attributes) if $debug; 277: warn "ATTRIBUTE READ($c):".join(':',@aux)."\nORDER:".join(':',@order) if $debug; 278: $data = {'Name' => $data->name, 279: 'Attributes' => { @aux }, 280: 'AttributeOrder' => [@order], 281: 'Defaulted' => $c }; 282: 283: } elsif ($type eq 'end_element') { 284: $data = $data->name; 286: } elsif ($type eq 'cdata') { #data is string 287: warn "CHAR_DATA:[".$data."]\n" if $debug; 288: $data = { 'Data'=>$data }; 290: } elsif ($type eq 're') { #no data 291: warn "RE\n" if $debug; 292: $data = { 'Data'=>"\n" }; 293: $type = 'cdata'; 295: } elsif ($type eq 'comment') { #no data 296: warn "COMMENT:[".$data."]\n" if $debug; 297: $data = { 'Data'=>$data }; 299: } elsif ($type eq 'sdata') { #data is entity object or string 300: warn "SPECIAL_DATA:[".$data."]\n" if $debug; 301: if (ref $data) { 302: $data = { 'Name' => $data->name, 'Data' => $data->value }; 303: } elsif ($data =~ m/\[\s*(\S+)\s*\]/) { 304: $data = { 'Name' => $1, 'Data' => $data }; 305: } else { 306: $data = { 'Data' => $data }; 307: } 308: } elsif ($type eq 'pi') { #data is string 309: warn "PI:[".$data."]\n" if $debug; 310: if ($self->[&IDX_markup_mode()] eq 'sgml') { 311: $data = { 'Data' => $data }; 312: } else { 313: my @pi = split /\s+/, $data, 2; 314: if (@pi == 2 && $pi[0] =~ m/\S/) { 315: $data = { 'Target' => $pi[0], 'Data' => $pi[1] }; 316: } else { 317: $data = { 'Data' => $data }; 318: } 319: } 321: } elsif ($type eq 'entity') { #data is SGMLS::Entity 322: die "unimplemented"; 324: } elsif ($type eq 'start_subdoc') { #data is SGMLS::Entity 325: $data = {'Name' => $data->name, 326: 'Data' => $data->value # TODO needs processing 327: }; 329: } elsif ($type eq 'end_subdoc') { #data is SGMLS::Entity 330: $data = { 'Name' => $data->name, 331: 'Data' => $data->value, # TODO needs processing 332: }; 333: } elsif ($type eq 'conforming') { #no data 334: $data = 'conforming'; 335: $type = ''; 336: $event = $parser->next_event; 337: die 'conforming event not final event' if $event; 338: } else { 339: die "Unknown event '$type' from parser"; 340: } 342: push @$queue, $type, $data, ''; 343: 344: } else { 345: return 0; 346: } 347: return 1; 348: } 350: 1;