1: #!/usr/local/bin/perl
2: # Copyright (c) 1998 Robert Braddock. All rights reserved.
3: # This program is free software; you can redistribute it and/or
4: # modify it under the same terms as Perl itself.
6: package SGML::ElementMap::Driver::SGMLS;
7: use strict;
9: use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION $REVISION); BEGIN {
10: @ISA = qw( SGML::ElementMap::Driver::EventQueue );
11: @EXPORT = ();
12: @EXPORT_OK = ();
13: %EXPORT_TAGS = ();
14: $VERSION = 0.2;
15: $REVISION = q$Revision: $;
16: }
17: use SGML::ElementMap::Driver::EventQueue ();
18: use Exporter;
20: use Carp;
21: use SGMLS 1.4;
23: my $debug = 0;
24: my $trace = 0;
25: #if ($debug)
26: eval ' use Data::Dumper;
27: $Data::Dumper::Indent = 0;
28: $Data::Dumper::Terse = 1;
29: ' if $debug;
31: BEGIN {
32: SGML::ElementMap::Driver::set_obj_constants('IDX',&SGML::ElementMap::Driver::EventQueue::IDX_LAST(), qw(source));
33: sub IDX_parser () { return &SGML::ElementMap::Driver::EventQueue::IDX_parser(); }
34: sub IDX_input_mode () { return &SGML::ElementMap::Driver::EventQueue::IDX_input_mode(); }
35: sub IDX_markup_mode () { return &SGML::ElementMap::Driver::EventQueue::IDX_markup_mode(); }
36: }
39: #create empty object
40: sub new {
41: my $proto = shift;
42: my $class = ref($proto) || $proto;
43: my $self = $class->SUPER::new();
44: $self->[&IDX_parser()] = '';
45: $self = bless $self, $class;
46: return $self;
47: }
49: sub parser {
50: my ($self,$parser) = @_;
51: croak __PACKAGE__." cannot change parser\n";
52: return $self->SUPER::parser($parser);
53: }
54: #start new traversal (for use by users)
55: #passed main module as first (normal) argument, rest are process specific
56: sub process {
57: my ($self,$main,$source,@list) = @_;
59: warn '--PROCESS from '.join(', line ',(caller())[1,2])."\n" if $trace;
60: #if (! $self->[&IDX_parser()]) { # make default parser object
61: #}
63: my @env = ();
64: my @args = ();
65: if ($self->[&IDX_markup_mode()] eq 'sgml') {
67: } elsif ($self->[&IDX_markup_mode()] eq 'xml') {
68: #croak "failed to instantiate parser" unless
69: # defined $self->parser('XML::Parser::ExpatNB');
70: @env = ('SP_CHARSET_FIXED' => 'YES',
71: 'SP_ENCODING' => 'XML');
72: @args = ("-wxml","-cdeclaration/xml.soc");
74: } else {
75: croak "unknown markup mode '".$self->[&IDX_markup_mode()]."'\n";
76: }
78: # set output options
79: push @args,(map { '-o'.$_; } qw(entity id line included empty
80: notation-sysid nonsgml
81: comment omitted));
83: my $input_type = $self->[&IDX_input_mode()];
84: if (!$input_type || $input_type eq 'default') { # infer type
85: if (ref($source) eq 'SCALAR') {
86: $input_type = 'literal';
87: } elsif (ref $source) {
88: $input_type = 'handle';
89: } else {
90: $input_type = 'file';
91: }
92: }
94: my $esis_fh;
95: my $esis_cmd = 'onsgmls';
96: if ($input_type eq 'file' || $input_type eq 'path') {
97: my $pid = open(ESIS, '-|');
98: croak "fork failed: $!\n" unless defined $pid;
99: if ($pid == 0) {
100: close STDIN;
101: while (@env) { $ENV{shift @env} = shift @env; }
102: exec $esis_cmd,@args,$source;
103: }
104: warn "attempting FILE ($pid)\n" if $debug;
105: $esis_fh = \*ESIS; #new IO::Handle;
106: #warn "<>".scalar(<ESIS>);
107: #die "~~".$esis_fh->getline;
108: #$esis_fh->open("<&ESIS");
109: #$esis_fh->fdopen(fileno(ESIS),'r');
110:
111: croak "failed setting handle to parser output: $!"
112: unless defined $esis_fh;
113: #close ESIS;
114: warn "filehandles open" if $debug;
116: } elsif ($input_type eq 'handle') {
117: warn "attempting HANDLE\n" if $debug;
118: my $in_fh = new IO::Handle;
119: my $pipe = new IO::Pipe($in_fh, $source);
120: croak "failed setting handle to parser output: $!"
121: unless defined $pipe;
123: my $pid = open(ESIS, '-|');
124: croak "fork failed: $!\n" unless defined $pid;
125: if ($pid == 0) {
126: close $source;
127: #$pipe->reader;
128: my $in_fh = 'STDIN'->fdopen($pipe->fileno,'w');
130: while (@env) { $ENV{shift @env} = shift @env; }
131: exec $esis_cmd,@args,'-';
132: }
133: #$pipe->writer;
135: my $esis_fh = new IO::Handle;
136: $esis_fh->fdopen(fileno(ESIS),'r');
137: croak "failed setting handle to parser output: $!"
138: unless defined $esis_fh;
139: close ESIS;
140: warn "filehandles open" if $debug;
141: } elsif (0) {
143: $esis_fh = new IO::Pipe;
144: my $pid = fork; # open(PIPE,'-|');
145: die "fork failed: $!" unless defined $pid;
146: $esis_fh = new IO::Handle('-','r');
147: if ($pid == 0) {
148: $esis_fh->writer();
149:
150: while (@env) { $ENV{shift @env} = shift @env; }
151: exec $esis_cmd,@args,$source;
152: }
153: my $fh = new IO::File($source,'r');
154: die "failed to open $source: $!\n" unless defined $fh;
155: $source = $fh;
156: $input_type = 'handle';
157: #} elsif ($input_type eq 'literal' || $input_type eq 'string') {
158: # $self->{'offset'} = 0;
159: $self->[&IDX_source()] = ref($source) ? $source : \$source;
160: } elsif ($input_type eq 'esis-file') {
161: $esis_fh = new IO::File($source, 'r');
162: die "failed to open $source: $!\n" unless defined $esis_fh;
163: } else {
164: die "unknown input type '".$input_type."'";
165: }
167: die "esis stream open failed: $!"
168: unless defined($esis_fh) && !eof($esis_fh);
170: my $parse = new SGMLS('handle'=>$esis_fh, 'use_re_events'=>0,
171: 'filter_rs' =>1);
172: return undef unless defined $parse;
173: $parse->handle_attr_sdata(\&SGMLS::bracketed_sdata_to_entity,
174: \&SGMLS::optional_list_join);
175: $parse->handle_sdata(\&SGMLS::bracketed_sdata_to_entity);
177: #$main->debug('events', 'handler_lookup', 'modes');
179: $self->[&IDX_parser()] = $parse;
180: @list = $self->SUPER::process($main,'',@list);
182: warn "processing complete" if $debug;
183: undef $parse;
184: return @list;
185: }
187: #start new traversal of file (for default use by main)
188: #passed main module and file name as first (normal) argument,
189: # rest are process specific
190: #sub process_xml_file {
191: # my ($self,$main,$file,@list) = @_;
192: # use IO::File;
193: # my $fh = new IO::File;
194: #need some command and argument string options?
195: #$ENV{SP_CHARSET_FIXED} = 'YES';
196: #$ENV{SP_ENCODING} = 'XML';
197: #$ENV{SGML_CATALOG_FILES} = 'pubtext/xml.soc';
198: # $fh->open("SP_CHARSET_FIXED=YES SP_ENCODING=XML onsgmls -wxml -cdeclaration/xml.soc $file |");
199: # croak "Failed to start parser: $!\n" unless defined $fh;
200: # @list = $self->process($main,$fh,@list);
201: # $fh->close;
202: #
203: # return @list;
204: #}
206: #start new traversal of file (for default use by main)
207: #passed main module and file name as first (normal) argument,
208: # rest are process specific
209: #sub process_sgml_file {
210: # my ($self,$main,$file,@list) = @_;
211: # use IO::File;
212: # my $fh = new IO::File;
213: # #need some command and argument string options
214: # $fh->open("onsgmls $file |");
215: # croak "Failed to start parser: $!\n" unless defined $fh;
216: # @list = $self->process($main,$fh,@list);
217: # $fh->close;
218: #
219: # return @list;
220: #}
222: sub trace {
223: my ($arg) = @_;
224: if (defined $arg) {
225: $debug = $arg;
226: } else {
227: $debug = 1;
228: }
229: }
231: sub queue_more_events {
232: my ($self,$queue) = @_;
233: my $parser = $self->[&IDX_parser()];
234: my ($event,$data,$type, @aux);
235: $event = $parser->next_event;
236: if ($event) {
237: $type = $event->type;
238: $data = $event->data;
240: if ($type eq 'start_element') {
241: # TODO attributes need further processing!
242: my @order = $data->attribute_order('all');
243: my $c = $data->attribute_order('specified');
244: #if (@order > 0 && !defined $order[0]) {
245: # @order = $data->attribute_names;
246: #}
247: if ($debug) {
248: warn "ATTRIBUTE LIST:".join(':',map {
249: $_.'['.$data->attribute($_)->is_implied.'/'.
250: $data->attribute($_)->is_specified.'/'.
251: $data->attribute($_)->position.']' } @order);
252: }
253: my @extra = ();
254: @aux = ();
256: for(my $i=0; $i<@order; $i+=1) { # was $i<$c
257: my $aname = $order[$i];
258: my $adata = $data->attribute($aname);
259: if ($adata->is_implied) {
260: push @aux, ($aname, undef);
261: } else {
262: push @aux, ($aname, $adata->value);
263: }
264: }
265: #if ($data->attribute($_)->is_implied) { push @extra,$_; }
266: #else {
267: #}
268: if (defined($c) && $c < @order) {
269: warn "ATTRIBUTE READ($c):".join(':',@aux)."\nORDER:".
270: join(':',@order) if $debug;
271: #die 'attribute construction error' if $c != (@aux / 2);
272: } else {
273: $c = @aux / 2;
274: }
276: #warn "ATTRIBUTE DUMP:".Dumper($data->attributes) if $debug;
277: warn "ATTRIBUTE READ($c):".join(':',@aux)."\nORDER:".join(':',@order) if $debug;
278: $data = {'Name' => $data->name,
279: 'Attributes' => { @aux },
280: 'AttributeOrder' => [@order],
281: 'Defaulted' => $c };
282:
283: } elsif ($type eq 'end_element') {
284: $data = $data->name;
286: } elsif ($type eq 'cdata') { #data is string
287: warn "CHAR_DATA:[".$data."]\n" if $debug;
288: $data = { 'Data'=>$data };
290: } elsif ($type eq 're') { #no data
291: warn "RE\n" if $debug;
292: $data = { 'Data'=>"\n" };
293: $type = 'cdata';
295: } elsif ($type eq 'comment') { #no data
296: warn "COMMENT:[".$data."]\n" if $debug;
297: $data = { 'Data'=>$data };
299: } elsif ($type eq 'sdata') { #data is entity object or string
300: warn "SPECIAL_DATA:[".$data."]\n" if $debug;
301: if (ref $data) {
302: $data = { 'Name' => $data->name, 'Data' => $data->value };
303: } elsif ($data =~ m/\[\s*(\S+)\s*\]/) {
304: $data = { 'Name' => $1, 'Data' => $data };
305: } else {
306: $data = { 'Data' => $data };
307: }
308: } elsif ($type eq 'pi') { #data is string
309: warn "PI:[".$data."]\n" if $debug;
310: if ($self->[&IDX_markup_mode()] eq 'sgml') {
311: $data = { 'Data' => $data };
312: } else {
313: my @pi = split /\s+/, $data, 2;
314: if (@pi == 2 && $pi[0] =~ m/\S/) {
315: $data = { 'Target' => $pi[0], 'Data' => $pi[1] };
316: } else {
317: $data = { 'Data' => $data };
318: }
319: }
321: } elsif ($type eq 'entity') { #data is SGMLS::Entity
322: die "unimplemented";
324: } elsif ($type eq 'start_subdoc') { #data is SGMLS::Entity
325: $data = {'Name' => $data->name,
326: 'Data' => $data->value # TODO needs processing
327: };
329: } elsif ($type eq 'end_subdoc') { #data is SGMLS::Entity
330: $data = { 'Name' => $data->name,
331: 'Data' => $data->value, # TODO needs processing
332: };
333: } elsif ($type eq 'conforming') { #no data
334: $data = 'conforming';
335: $type = '';
336: $event = $parser->next_event;
337: die 'conforming event not final event' if $event;
338: } else {
339: die "Unknown event '$type' from parser";
340: }
342: push @$queue, $type, $data, '';
343:
344: } else {
345: return 0;
346: }
347: return 1;
348: }
350: 1;