File Coverage

File:lib/XML/Parser/Lite/Tree/XPath/Tree.pm
Coverage:84.0%

linestmtbrancondsubtimecode
1package XML::Parser::Lite::Tree::XPath::Tree;
2
3
30
30
30
116
47
123
use strict;
4
30
30
30
145
49
135
use XML::Parser::Lite::Tree::XPath::Tokener;
5
6sub new {
7
179
530
        my ($class) = @_;
8
179
759
        my $self = bless {}, $class;
9
179
556
        $self->{error} = 0;
10
179
500
        return $self;
11}
12
13sub build_tree {
14
179
642
        my ($self, $tokens) = @_;
15
16
179
470
        $self->{error} = 0;
17
179
470
        $self->{tokens} = $tokens;
18
19        #
20        # build a basic tree using the brackets
21        #
22
23
179
575
        return 0 unless $self->make_groups();
24
179
823
        $self->recurse_before($self, 'del_links');
25
26
27        #
28        # simple groupings
29        #
30
31
179
577
        return 0 unless $self->recurse_before($self, 'clean_axis_and_abbreviations');
32
179
562
        return 0 unless $self->recurse_before($self, 'claim_groups');
33
176
637
        return 0 unless $self->recurse_after($self, 'build_steps');
34
176
545
        return 0 unless $self->recurse_after($self, 'build_paths');
35
36
37        #
38        # get operator oprands
39        #
40
41
176
712
        return 0 unless $self->binops(['|'], 'UnionExpr');
42
176
608
        return 0 unless $self->recurse_before($self, 'unary_minus');
43
176
831
        return 0 unless $self->binops(['*','div','mod'], 'MultiplicativeExpr');
44
176
765
        return 0 unless $self->binops(['+','-'], 'AdditiveExpr');
45
176
900
        return 0 unless $self->binops(['<','<=','>','>='], 'RelationalExpr');
46
176
785
        return 0 unless $self->binops(['=','!='], 'EqualityExpr');
47
176
777
        return 0 unless $self->binops(['and'], 'AndExpr');
48
176
676
        return 0 unless $self->binops(['or'], 'OrExpr');
49
50        #return 0 unless $self->find_expressions(['UnionExpr', 'MultiplicativeExpr', 'AdditiveExpr', 'RelationalExpr', 'EqualityExpr', 'AndExpr', 'OrExpr']);
51
52
53
176
738
        return 1;
54}
55
56sub dump_flat {
57
41
105
        my ($self) = @_;
58
41
114
        $self->{dump} = '';
59
60
41
41
69
136
        for my $token(@{$self->{tokens}}){
61
41
124
                $self->dump_flat_go($token);
62        }
63
64
41
111
        my $dump = $self->{dump};
65
41
91
        delete $self->{dump};
66
41
125
        return $dump;
67}
68
69sub dump_flat_go {
70
279
696
        my ($self, $node) = @_;
71
72
279
1024
        $self->{dump} .= '['.$node->dump();
73
74
279
279
531
1058
        for my $token(@{$node->{tokens}}){
75
76
238
702
                $self->dump_flat_go($token);
77        }
78
79
279
1025
        $self->{dump} .= ']';
80}
81
82sub dump_tree {
83
0
0
        my ($self) = @_;
84
0
0
        $self->{dump} = '';
85
0
0
        $self->{indent} = [''];
86
87
0
0
0
0
        for my $token(@{$self->{tokens}}){
88
0
0
                $self->dump_tree_go($token);
89        }
90
91
0
0
        my $dump = $self->{dump};
92
0
0
        delete $self->{dump};
93
0
0
        delete $self->{indent};
94
0
0
        return $dump;
95}
96
97sub dump_tree_go {
98
0
0
        my ($self, $node) = @_;
99
100
0
0
0
0
        $self->{dump} .= @{$self->{indent}}[-1].$node->dump()."\n";
101
102
0
0
0
0
0
0
        push @{$self->{indent}}, @{$self->{indent}}[-1].' - ';
103
104
0
0
0
0
        for my $token(@{$node->{tokens}}){
105
106
0
0
                $self->dump_tree_go($token);
107        }
108
109
0
0
0
0
        pop @{$self->{indent}};
110}
111
112sub make_groups {
113
179
524
        my ($self) = @_;
114
115
179
549
        my $tokens = $self->{tokens};
116
179
503
        $self->{tokens} = [];
117
118
179
469
        my $parent = $self;
119
120
179
179
316
521
        for my $token(@{$tokens}){
121
122
1223
3905
                if ($token->match('Symbol', '(')){
123
124
112
366
                        my $group = XML::Parser::Lite::Tree::XPath::Token->new();
125
112
337
                        $group->{type} = 'Group()';
126
112
317
                        $group->{tokens} = [];
127
112
290
                        $group->{parent} = $parent;
128
129
112
112
241
384
                        push @{$parent->{tokens}}, $group;
130
112
335
                        $parent = $group;
131
132                }elsif ($token->match('Symbol', '[')){
133
134
50
331
                        my $group = XML::Parser::Lite::Tree::XPath::Token->new();
135
50
272
                        $group->{type} = 'Predicate';
136
50
152
                        $group->{tokens} = [];
137
50
127
                        $group->{parent} = $parent;
138
139
50
50
304
165
                        push @{$parent->{tokens}}, $group;
140
50
152
                        $parent = $group;
141
142                }elsif ($token->match('Symbol', ')')){
143
144
112
432
                        if ($parent->{type} ne 'Group()'){
145
0
0
                                $self->{error} = "Found unexpected closing bracket ')'.";
146
0
0
                                return 0;
147                        }
148
149
112
417
                        $parent = $parent->{parent};
150
151                }elsif ($token->match('Symbol', ']')){
152
153
50
201
                        if ($parent->{type} ne 'Predicate'){
154
0
0
                                $self->{error} = "Found unexpected closing bracket ']'.";
155
0
0
                                return 0;
156                        }
157
158
50
209
                        $parent = $parent->{parent};
159
160                }else{
161
899
2508
                        $token->{parent} = $parent;
162
899
899
1501
4260
                        push @{$parent->{tokens}}, $token;
163                }
164        }
165
166
179
928
        return 1;
167}
168
169sub recurse_before {
170
4300
12898
        my ($self, $root, $method) = @_;
171
172
4300
13205
        return 0 unless $self->$method($root);
173
174
4118
4118
6767
14342
        for my $token(@{$root->{tokens}}){
175
176
3587
10666
                return 0 unless $self->recurse_before($token, $method);
177        }
178
179
4118
16258
        return 1;
180}
181
182sub recurse_after {
183
12633
35922
        my ($self, $root, $method) = @_;
184
185
12633
12633
20041
42890
        for my $token(@{$root->{tokens}}){
186
187
11049
31461
                return 0 unless $self->recurse_after($token, $method);
188        }
189
190
12633
39186
        return 0 unless $self->$method($root);
191
192
12633
49986
        return 1;
193}
194
195sub binops {
196
1232
3645
        my ($self, $ops, $production) = @_;
197
1232
3240
        $self->{binops} = $ops;
198
1232
3265
        $self->{binop_production} = $production;
199
200
1232
3564
        my $ret = $self->recurse_after($self, 'do_binops');
201
202
1232
2822
        delete $self->{binops};
203
1232
2797
        delete $self->{binop_production};
204
205
1232
4263
        return $ret;
206}
207
208sub claim_groups {
209
1443
3816
        my ($self, $root) = @_;
210
211
1443
3655
        my $tokens = $root->{tokens};
212
1443
4028
        $root->{tokens} = [];
213
214
1443
2802
2702
10296
        while(my $token = shift @{$tokens}){
215
216
217                #
218                # makes claims
219                #
220
221
1362
4306
                if ($token->match('NodeType')){
222
223                        # node type's claim the follow group node
224
225
6
6
9
15
                        my $next = shift @{$tokens};
226
227
6
21
                        if (!$next->match('Group()')){
228
0
0
                                $self->{error} = "Found NodeType '$token->{content}' without a following '(' (found a following '$next->{type}').";
229
0
0
                                return 0;
230                        }
231
232
6
6
8
19
                        my $childs = scalar(@{$next->{tokens}});
233
234
6
77
                        if ($token->{content} eq 'processing-instruction'){
235
236
4
17
                                if ($childs == 0){
237
238                                        #ok
239
240                                }elsif ($childs == 1){
241
242
2
10
                                        if ($next->{tokens}->[0]->{type} eq 'Literal'){
243
244
1
7
                                                $token->{argument} = $next->{tokens}->[0]->{content};
245
246                                        }else{
247
1
8
                                                $self->{error} = "processing-instruction node has a non-Literal child node (of type '$next->{tokens}->[0]->{type}').";
248
1
9
                                                return 0;
249                                        }
250                                }else{
251
1
4
                                        $self->{error} = "processing-instruction node has more than one child node.";
252
1
8
                                        return 0;
253                                }
254
255                        }else{
256
2
7
                                if ($childs > 0){
257
1
6
                                        $self->{error} = "NodeType $token->{content} node has unexpected children.";
258
1
8
                                        return 0;
259                                }
260                        }
261
262
3
9
                        $token->{type} = 'NodeTypeTest';
263
3
3
6
19
                        push @{$root->{tokens}}, $token;
264
265                }elsif ($token->match('FunctionName')){
266
267                        # FunctionNames's claim the follow group node - it should be an arglist
268
269
106
106
169
276
                        my $next = shift @{$tokens};
270
271
106
338
                        if (!$next->match('Group()')){
272
0
0
                                $self->{error} = "Found FunctionName '$token->{content}' without a following '(' (found a following '$next->{type}').";
273
0
0
                                return 0;
274                        }
275
276                        #
277                        # recurse manually - this node will never be scanned by this loop
278                        #
279
280
106
386
                        return 0 unless $self->claim_groups($next);
281
282
283                        #
284                        # organise it into an arg list
285                        #
286
287
106
336
                        return 0 unless $self->make_arg_list($token, $next);
288
289
290
291
106
106
185
510
                        push @{$root->{tokens}}, $token;
292
293
294                }elsif ($token->match('Group()')){
295
296
0
0
                        $token->{type} = 'PrimaryExpr';
297
298
0
0
0
0
                        push @{$root->{tokens}}, $token;
299
300                }else{
301
302
1250
1250
2027
4778
                        push @{$root->{tokens}}, $token;
303                }
304
305        }
306
307
1440
4827
        return 1;
308}
309
310sub make_arg_list {
311
106
318
        my ($self, $root, $arg_group) = @_;
312
313
106
269
        $root->{type} = 'FunctionCall';
314
106
286
        $root->{tokens} = [];
315
316        # no need to construct an arg list if there aren't any args
317
106
106
174
477
        return 1 unless scalar @{$arg_group->{tokens}};
318
319
71
236
        my $arg = XML::Parser::Lite::Tree::XPath::Token->new();
320
71
217
        $arg->{type} = 'FunctionArg';
321
71
207
        $arg->{tokens} = [];
322
323
71
272
128
1142
        while(my $token = shift @{$arg_group->{tokens}}){
324
325
201
633
                if ($token->match('Symbol', ',')){
326
327
25
25
41
83
                        push @{$root->{tokens}}, $arg;
328
329
25
76
                        $arg = XML::Parser::Lite::Tree::XPath::Token->new();
330
25
73
                        $arg->{type} = 'FunctionArg';
331
25
85
                        $arg->{tokens} = [];
332
333                }else{
334
335
176
456
                        $token->{parent} = $arg;
336
176
176
273
699
                        push @{$arg->{tokens}}, $token;
337                }
338        }
339
340
71
191
        $arg->{parent} = $root;
341
71
71
123
210
        push @{$root->{tokens}}, $arg;
342
343
344
71
243
        return 1;
345}
346
347sub clean_axis_and_abbreviations {
348
349
1386
3787
        my ($self, $root) = @_;
350
351
1386
3345
        my $tokens = $root->{tokens};
352
1386
3851
        $root->{tokens} = [];
353
354
1386
2408
2480
9187
        while(my $token = shift @{$tokens}){
355
356
1022
3305
                if ($token->match('AxisName')){
357
358
39
39
63
108
                        my $next = shift @{$tokens};
359
360
39
241
                        unless ($next->match('Symbol', '::')){
361
362
0
0
                                $self->{error} = "Found an AxisName '$token->{content}' without a following ::";
363
0
0
                                return 0;
364                        }
365
366
39
114
                        $token->{type} = 'AxisSpecifier';
367
368
39
39
68
154
                        push @{$root->{tokens}}, $token;
369
370
371                }elsif ($token->match('Symbol', '@')){
372
373
15
44
                        $token->{type} = 'AxisSpecifier';
374
15
41
                        $token->{content} = 'attribute';
375
376
15
15
23
58
                        push @{$root->{tokens}}, $token;
377
378
379                }elsif ($token->match('Operator', '//')){
380
381                        # // == /descendant-or-self::node()/
382
383
61
217
                        $token = XML::Parser::Lite::Tree::XPath::Token->new();
384
61
203
                        $token->{type} = 'Operator';
385
61
180
                        $token->{content} = '/';
386
61
61
104
201
                        push @{$root->{tokens}}, $token;
387
388
61
199
                        $token = XML::Parser::Lite::Tree::XPath::Token->new();
389
61
242
                        $token->{type} = 'AxisSpecifier';
390
61
168
                        $token->{content} = 'descendant-or-self';
391
61
61
107
186
                        push @{$root->{tokens}}, $token;
392
393
61
195
                        $token = XML::Parser::Lite::Tree::XPath::Token->new();
394
61
182
                        $token->{type} = 'NodeTypeTest';
395
61
166
                        $token->{content} = 'node';
396
61
61
111
195
                        push @{$root->{tokens}}, $token;
397
398
61
191
                        $token = XML::Parser::Lite::Tree::XPath::Token->new();
399
61
176
                        $token->{type} = 'Operator';
400
61
165
                        $token->{content} = '/';
401
61
61
107
295
                        push @{$root->{tokens}}, $token;
402
403
404                }elsif ($token->match('Symbol', '.')){
405
406
1
5
                        $token = XML::Parser::Lite::Tree::XPath::Token->new();
407
1
3
                        $token->{type} = 'AxisSpecifier';
408
1
4
                        $token->{content} = 'self';
409
1
1
2
3
                        push @{$root->{tokens}}, $token;
410
411
1
3
                        $token = XML::Parser::Lite::Tree::XPath::Token->new();
412
1
3
                        $token->{type} = 'NodeTypeTest';
413
1
3
                        $token->{content} = 'node';
414
1
1
2
4
                        push @{$root->{tokens}}, $token;
415
416
417                }elsif ($token->match('Symbol', '..')){
418
419
1
4
                        $token = XML::Parser::Lite::Tree::XPath::Token->new();
420
1
4
                        $token->{type} = 'AxisSpecifier';
421
1
3
                        $token->{content} = 'parent';
422
1
1
3
4
                        push @{$root->{tokens}}, $token;
423
424
1
3
                        $token = XML::Parser::Lite::Tree::XPath::Token->new();
425
1
3
                        $token->{type} = 'NodeTypeTest';
426
1
3
                        $token->{content} = 'node';
427
1
1
2
3
                        push @{$root->{tokens}}, $token;
428
429
430                }else{
431
432
905
905
1493
3523
                        push @{$root->{tokens}}, $token;
433                }
434        }
435
436
1386
4405
        return 1;
437}
438
439sub build_steps {
440
1334
3522
        my ($self, $root) = @_;
441
442
1334
3308
        my $tokens = $root->{tokens};
443
1334
3637
        $root->{tokens} = [];
444
445
1334
2325
2204
8796
        while(my $token = shift @{$tokens}){
446
447
991
3125
                if ($token->match('AxisSpecifier')){
448
449
117
117
240
302
                        my $next = shift @{$tokens};
450
451
117
356
                        unless (defined $next){
452
453
0
0
                                $self->{error} = "AxisSpecifier found without following NodeTest.";
454
0
0
                                return 0;
455                        }
456
457
117
368
                        unless ($next->match('NodeTypeTest') || $next->match('NameTest')){
458
459
0
0
                                $self->{error} = "AxisSpecifier found without following NodeTest (NodeTypeTest | NameTest) (found $next->{type} instead).";
460
0
0
                                return 0;
461                        }
462
463
117
405
                        my $step = XML::Parser::Lite::Tree::XPath::Token->new();
464
117
365
                        $step->{type} = 'Step';
465
117
458
                        $step->{axis} = $token->{content};
466
117
402
                        $step->{tokens} = [];
467
468
117
117
220
377
                        push @{$step->{tokens}}, $next;
469
470
471
117
117
210
474
                        while(my $token = shift @{$tokens}){
472
473
73
243
                                if ($token->match('Predicate')){
474
475
0
0
0
0
                                        push @{$step->{tokens}}, $token;
476                                }else{
477
73
73
142
196
                                        unshift @{$tokens}, $token;
478
73
154
                                        last;
479                                }
480                        }
481
482
117
117
315
551
                        push @{$root->{tokens}}, $step;
483
484
485                }elsif ($token->match('NodeTypeTest') || $token->match('NameTest')){
486
487
179
668
                        my $step = XML::Parser::Lite::Tree::XPath::Token->new();
488
179
556
                        $step->{type} = 'Step';
489
179
575
                        $step->{tokens} = [];
490
491
179
179
366
589
                        push @{$step->{tokens}}, $token;
492
493
494
179
229
381
1001
                        while(my $token = shift @{$tokens}){
495
496
134
431
                                if ($token->match('Predicate')){
497
498
50
50
88
188
                                        push @{$step->{tokens}}, $token;
499                                }else{
500
84
84
151
218
                                        unshift @{$tokens}, $token;
501
84
161
                                        last;
502                                }
503                        }
504
505
179
179
314
685
                        push @{$root->{tokens}}, $step;
506
507
508                }elsif ($token->match('Predicate')){
509
510
0
0
                        $self->{error} = "Predicate found without preceeding NodeTest.";
511
0
0
                        return 0;
512
513                }else{
514
515
695
695
1157
2738
                        push @{$root->{tokens}}, $token;
516                }
517        }
518
519
1334
4379
        return 1;
520}
521
522sub build_paths {
523
1513
4380
        my ($self, $root) = @_;
524
525
1513
3727
        my $tokens = $root->{tokens};
526
1513
4070
        $root->{tokens} = [];
527
528
1513
2439
2518
9253
        while(my $token = shift @{$tokens}){
529
530
926
2915
                if ($token->match('Step')){
531
532
25
81
                        my $path = XML::Parser::Lite::Tree::XPath::Token->new();
533
25
78
                        $path->{type} = 'LocationPath';
534
25
68
                        $path->{absolute} = 0;
535
25
81
                        $path->{tokens} = [$token];
536
537
25
88
                        return 0 unless $self->slurp_path($path, $tokens);
538
539
25
25
42
101
                        push @{$root->{tokens}}, $path;
540
541                }elsif ($token->match('Operator', '/')){
542
543
131
131
240
340
                        unshift @{$tokens}, $token;
544
545
131
420
                        my $path = XML::Parser::Lite::Tree::XPath::Token->new();
546
131
409
                        $path->{type} = 'LocationPath';
547
131
363
                        $path->{absolute} = 1;
548
131
382
                        $path->{tokens} = [];
549
550
131
438
                        return 0 unless $self->slurp_path($path, $tokens);
551
552
131
131
256
514
                        unless (scalar @{$path->{tokens}}){
553
0
0
                                $self->{error} = "Slash found at end of path.";
554
0
0
                                return 0;
555                        }
556
557
131
131
226
601
                        push @{$root->{tokens}}, $path;
558
559                }else{
560
561
770
770
1262
2968
                        push @{$root->{tokens}}, $token;
562                }
563        }
564
565
1513
4971
        return 1;
566}
567
568sub slurp_path {
569
156
470
        my ($self, $path, $tokens) = @_;
570
571
156
256
        while(1){
572
573
427
427
663
1078
                my $t1 = shift @{$tokens};
574
575
427
1222
                if (defined $t1){
576
288
925
                        if ($t1->match('Operator', '/')){
577
578
271
271
399
749
                                my $t2 = shift @{$tokens};
579
580
271
797
                                if (defined $t2){
581
271
859
                                        if ($t2->match('Step')){
582
583
271
271
453
1245
                                                push @{$path->{tokens}}, $t2;
584                                        }else{
585
0
0
                                                $self->{error} = "Non Step token ($t2->{type}) found after slash.";
586
0
0
                                                return 0;
587                                        }
588                                }else{
589
0
0
                                        $self->{error} = "Slash found at end of path.";
590
0
0
                                        return 0;
591                                }
592                        }else{
593
17
17
35
43
                                unshift @{$tokens}, $t1;
594
17
61
                                return 1;
595                        }
596                }else{
597
139
519
                        return 1;
598                }
599        }
600}
601
602sub do_binops {
603
9786
25388
        my ($self, $root) = @_;
604
605
9786
24466
        my $tokens = $root->{tokens};
606
9786
26191
        $root->{tokens} = [];
607
608
9786
18264
16950
68826
        while(my $token = shift @{$tokens}){
609
610
611
8478
8478
14547
27654
                for my $op(@{$self->{binops}}){
612
613
16895
52587
                        if ($token->match('Operator', $op)){
614
615
76
76
118
313
                                if (!scalar(@{$root->{tokens}})){
616
0
0
                                        $self->{error} = "Found a binop $token->{content} with no preceeding token";
617
0
0
                                        return 0;
618                                }
619
620
76
76
119
244
                                if (!scalar(@{$tokens})){
621
0
0
                                        $self->{error} = "Found a binop $token->{content} with no following token";
622
0
0
                                        return 0;
623                                }
624
625
76
76
121
252
                                my $prev = pop @{$root->{tokens}};
626
76
76
116
198
                                my $next = shift @{$tokens};
627
628
76
76
124
239
                                push @{$token->{tokens}}, $prev;
629
76
76
137
226
                                push @{$token->{tokens}}, $next;
630
76
244
                                $token->{type} = $self->{binop_production};
631
632
76
146
                                last;
633                        }
634                }
635
636
8478
8478
15232
30265
                push @{$root->{tokens}}, $token;
637        }
638
639
9786
31888
        return 1;
640}
641
642sub add_links {
643
1398
3628
        my ($self, $root) = @_;
644
645
1398
2537
        my $prev = undef;
646
647
1398
1398
2263
4900
        for my $token(@{$root->{tokens}}){
648
649
1230
3167
                $token->{prev} = $prev;
650
1230
3737
                $prev->{next} = $token if defined $prev;
651
652
1230
3732
                $prev = $token;
653        }
654}
655
656sub del_links {
657
179
513
        my ($self, $root) = @_;
658
659
179
179
315
663
        for my $token(@{$root->{tokens}}){
660
661
683
1664
                delete $token->{parent};
662
683
1383
                delete $token->{prev};
663
683
2519
                delete $token->{next};
664        }
665}
666
667sub unary_minus {
668
1398
3573
        my ($self, $root) = @_;
669
670
1398
3802
        $self->add_links($root);
671
672
1398
3644
        my $tokens = $root->{tokens};
673
1398
3697
        $root->{tokens} = [];
674
675
1398
2620
2360
9804
        while(my $token = shift @{$tokens}){
676
677
1222
3983
                if ($token->match('Operator', '-')){
678
679
13
136
                        if (defined($token->{next}) && defined($token->{prev}) && $token->{prev}->is_expression){
680
681                                # not unary
682                        }else{
683                                # unary minus
684
685
8
23
                                $token->{type} = 'UnaryExpr';
686
8
8
8
13
23
24
                                push @{$token->{tokens}}, shift @{$tokens};
687                        }
688                }
689
690
1222
1222
2148
4375
                push @{$root->{tokens}}, $token;
691        }
692
693
1398
4538
        return 1;
694}
695
6961;