File Coverage

File:lib/Parse/EBNF/Rule.pm
Coverage:81.6%

linestmtbrancondsubtimecode
1package Parse::EBNF::Rule;
2
3
2
2
2
1761
6
22
use Parse::EBNF::Token;
4
5sub new {
6
2
9
  my ($class, $rule) = @_;
7
2
10
        my $self = bless {}, $class;
8
2
8
        $self->{error} = 0;
9
10
2
11
        $self->parse($rule) if defined $rule;
11
12
2
8
        return $self;
13}
14
15sub parse {
16
22
72
        my ($self, $rule) = @_;
17
18
22
58
        $self->{error} = 0;
19
20        # strip comments
21
22
59
        $rule =~ s!/\*([^\*]|\*[^\/])*\*\/!!g;
22
23
22
110
        unless ($rule =~ m!^\s*\[(\d+)\]\s*([A-Z][a-zA-Z]*)\s*\:\:=!){
24
25
2
13
                $self->{error} = "can't parse rule $rule";
26
2
5
                return;
27        }
28
29
20
73
        $self->{index} = $1;
30
20
55
        $self->{name} = $2;
31
32
20
60
        $rule =~ s!^(.*?)\:\:=!!;
33
34
20
54
        $self->{rule} = $rule;
35
36
37        # now try and tokenise the rule
38        # we first tokenise it, and *then* split it into alternations,
39        # since finding the pipes will be tricky if they occur inside
40        # literals or character classes
41
42
20
45
        my $tokens = [];
43
44
20
46
        $rule =~ s/^\s+//;
45
46
20
56
        while($rule){
47
32
61
                my $token = undef;
48
49
32
281
                if ($rule =~ m!^'([^']+)'!){
50
51
3
11
                        $token = Parse::EBNF::Token->new();
52
3
10
                        $token->{content} = $1;
53
3
9
                        $token->{type} = 'literal';
54
3
14
                        $rule = substr $rule, 2 + length $1;
55
56                }elsif ($rule =~ m!^"([^"]+)"!){
57
58
1
3
                        $token = Parse::EBNF::Token->new();
59
1
4
                        $token->{content} = $1;
60
1
3
                        $token->{type} = 'literal';
61
1
5
                        $rule = substr $rule, 2 + length $1;
62
63                }elsif ($rule =~ m!^\|!){
64
65
2
7
                        $token = Parse::EBNF::Token->new();
66
2
6
                        $token->{type} = 'alt';
67
2
6
                        $rule = substr $rule, 1;
68
69                }elsif ($rule =~ m!^([A-Z][a-zA-Z]*)!){
70
71
12
41
                        $token = Parse::EBNF::Token->new();
72
12
42
                        $token->{content} = $1;
73
12
36
                        $token->{type} = 'subrule';
74
12
35
                        $rule = substr $rule, length $1;
75
76                }elsif ($rule =~ m!^\[(\^?)(([^\]]|\\\])+)\]!){
77
78                        # some sort of class - sub-parse it
79
80
8
21
                        my $neg = $1;
81
8
16
                        my $inner = $2;
82
83
8
30
                        $rule = substr $rule, 2 + length($neg) + length($inner);
84
85
8
18
                        my $rx = '['.$neg;
86
8
25
                        while(length $inner){
87
88
18
93
                                if ($inner =~ m!^#x([0-9a-f]+)-#x([0-9a-f]+)!i){
89
90
2
9
                                        $inner = substr $inner, 5 + length($1) + length($2);
91
2
6
                                        $rx .= $self->hexchar($1).'-'.$self->hexchar($2);
92
93                                }elsif ($inner =~ m!^#x([0-9a-f]+)!i){
94
95
6
19
                                        $inner = substr $inner, 2 + length($1);
96
6
19
                                        $rx .= $self->hexchar($1);
97
98                                }elsif ($inner =~ m!^([^-])-([^-])!i){
99
100
4
10
                                        $inner = substr $inner, 3;
101
4
24
                                        $rx .= quotemeta($1).'-'.quotemeta($2);
102
103                                }elsif ($inner =~ m!^([^-])!i){
104
105
6
15
                                        $inner = substr $inner, 1;
106
6
25
                                        $rx .= quotemeta($1);
107
108                                }else{
109
110
0
0
                                        $self->{error} = "couldn't parse class rx at $inner";
111
0
0
                                        exit;
112                                }
113                        }
114
8
17
                        $rx .= ']';
115
116
8
28
                        $token = Parse::EBNF::Token->new();
117
8
22
                        $token->{content} = $rx;
118
8
27
                        $token->{type} = 'rx';
119
120
121                }elsif ($rule =~ m!^\[(([^\]]|\\\])+)\]!){
122
123
0
0
                        $token = Parse::EBNF::Token->new();
124
0
0
                        $token->{content} = $1;
125
0
0
                        $token->{type} = 'class';
126
0
0
                        $rule = substr $rule, 2 + length $1;
127
128                }elsif ($rule =~ m!^\*!){
129
130
1
5
                        $token = Parse::EBNF::Token->new();
131
1
4
                        $token->{type} = 'rep star';
132
1
3
                        $rule = substr $rule, 1;
133
134                }elsif ($rule =~ m!^\+!){
135
136
1
3
                        $token = Parse::EBNF::Token->new();
137
1
3
                        $token->{type} = 'rep plus';
138
1
4
                        $rule = substr $rule, 1;
139
140                }elsif ($rule =~ m!^\?!){
141
142
1
4
                        $token = Parse::EBNF::Token->new();
143
1
3
                        $token->{type} = 'rep quest';
144
1
4
                        $rule = substr $rule, 1;
145
146                }elsif ($rule =~ m!^\(!){
147
148
1
4
                        $token = Parse::EBNF::Token->new();
149
1
4
                        $token->{type} = 'group start';
150
1
3
                        $rule = substr $rule, 1;
151
152                }elsif ($rule =~ m!^\)!){
153
154
1
3
                        $token = Parse::EBNF::Token->new();
155
1
3
                        $token->{type} = 'group end';
156
1
3
                        $rule = substr $rule, 1;
157
158
159                }elsif ($rule =~ m!^\-!){
160
161
0
0
                        $token = Parse::EBNF::Token->new();
162
0
0
                        $token->{type} = 'dash';
163
0
0
                        $rule = substr $rule, 1;
164
165                }elsif ($rule =~ m!^#x([0-9a-f]+)!i){
166
167
1
4
                        $token = Parse::EBNF::Token->new();
168
1
4
                        $token->{content} = $self->hexchar($1);
169
1
5
                        $token->{type} = 'rx';
170
1
5
                        $rule = substr $rule, 2 + length $1;
171
172                }else{
173
174
0
0
                        $self->{error} = "couldn't parse token at start of $rule";
175
0
0
                        return;
176                }
177
178
32
32
52
83
                push @{$tokens}, $token;
179
180
32
128
                $rule =~ s/^\s+//;
181        }
182
183        #
184        # first we create a base token (of type list)
185        # which will represent a list of tokens for this rule
186        #
187
188
20
63
        my $base = Parse::EBNF::Token->new();
189
20
67
        $base->{type} = 'list';
190
20
52
        $base->{tokens} = $tokens;
191
20
50
        $self->{base} = $base;
192
193
194        #
195        # now we create a node tree from the flat list
196        #
197
198
20
81
        return unless $self->produce_groups($base);
199
200
201        #
202        # and perform recursive cleanups
203        #
204
205
20
61
        unless ($base->reduce_alternations()){
206
0
0
                $self->{error} = $base->{error};
207
0
0
                return;
208        }
209
210
20
61
        unless ($base->reduce_repetition()){
211
0
0
                $self->{error} = $base->{error};
212
0
0
                return;
213        }
214
215        # TODO: negations
216
217
20
61
        unless ($base->reduce_empty()){
218
0
0
                $self->{error} = $base->{error};
219
0
0
                return;
220        }
221
222
20
60
        unless ($base->reduce_rx()){
223
0
0
                $self->{error} = $base->{error};
224
0
0
                return;
225        }
226}
227
228sub hexchar {
229
11
32
        my ($self, $char) = @_;
230
231
11
20
        $char =~ s!^0+!!;
232
233
11
39
        if (hex($char) > 255){
234
235
0
0
                return '\\x{'.lc($char).'}';
236        }else{
237
238
11
60
                return '\\x'.lc($char);
239        }
240}
241
242sub produce_groups {
243
20
51
        my ($self, $base) = @_;
244
245
20
52
        my $tokens = $base->{tokens};
246
20
55
        $base->{tokens} = [];
247
20
36
        my $current = $base;
248
249
20
52
33
196
        while(my $token = shift @{$tokens}){
250
251
32
171
                if ($token->{type} eq 'group start'){
252
253
1
3
                        my $parent = Parse::EBNF::Token->new();
254
1
4
                        $parent->{type} = 'list';
255
1
3
                        $parent->{parent} = $current;
256
1
3
                        $parent->{tokens} = [];
257
258
1
1
2
3
                        push @{$current->{tokens}}, $parent;
259
260
1
6
                        $current = $parent;
261
262                }elsif ($token->{type} eq 'group end'){
263
264
1
2
                        $current = $current->{parent};
265
266
1
4
                        if (!defined($current)){
267
0
0
                                $self->{error} = "end of group found without matching begin in rule $self->{rule}";
268
0
0
                                return 0;
269                        }
270
271                }else{
272
30
30
51
110
                        push @{$current->{tokens}}, $token;
273                }
274
275        }
276
277
20
63
        return 1;
278}
279
280sub has_error {
281
21
52
        my ($self) = @_;
282
21
119
        return $self->{error} ? 1 : 0;
283}
284
285sub error {
286
0
0
        my ($self) = @_;
287
0
0
        return $self->{error} ? $self->{error} : '';
288}
289
290sub base_token {
291
54
140
        my ($self) = @_;
292
54
358
        return $self->{base};
293}
294
2951;
296