File Coverage

File:blib/lib/XML/Parser/LiteCopy.pm
Coverage:93.6%

linestmtbrancondsubtimecode
1# NOTE: This module originally came from SOAP::Lite, which you probably
2# don't have. It was first repackaged here just to avoid the huge
3# dependancy tree, but this version has several features (CDATA
4# support, better PI and Comment support) that have been added.
5
6#
7# Copyright (C) 2000-2007 Paul Kulchenko (paulclinger@yahoo.com)
8# Copyright (C) 2008 Martin Kutter (martin.kutter@fen-net.de)
9# Copyright (C) 2009 Cal Henderson (cal@iamcal.com)
10#
11# SOAP::Lite is free software; you can redistribute it
12# and/or modify it under the same terms as Perl itself.
13#
14
15package XML::Parser::LiteCopy;
16
17
9
9
9
36
16
42
use strict;
18
9
9
9
44
19
59
use vars qw($VERSION);
19$VERSION = '0.720.00';
20
21my $ReturnErrors = 0;
22
23sub new {
24
48
995
    my $class = shift;
25
26
48
156
    return $class if ref $class;
27
48
162
    my $self = bless {} => $class;
28
29
48
157
    my %parameters = @_;
30
48
152
    $self->setHandlers(); # clear first
31
48
48
95
309
    $self->setHandlers(%{$parameters{Handlers} || {}});
32
33
48
317
    $ReturnErrors = $parameters{ReturnErrors} || 0;
34
35
48
170
    return $self;
36}
37
38sub setHandlers {
39
106
284
    my $self = shift;
40
41    # allow symbolic refs, avoid "subroutine redefined" warnings
42
9
9
9
106
53
15
37
239
    no strict 'refs'; local $^W;
43    # clear all handlers if called without parameters
44
106
380
    if (not @_) {
45
64
291
        for (qw(Start End Char Final Init CData Comment Doctype PI Error)) {
46
163
340
            *$_ = sub {}
47
640
2594
        }
48    }
49
50    # we could use each here, too...
51
106
370
    while (@_) {
52
112
401
        my($name, $func) = splice(@_, 0, 2);
53        *$name = defined $func
54            ? $func
55
0
0
            : sub {}
56
112
623
    }
57
106
259
    return $self;
58}
59
60sub _regexp {
61
18
126
    my $patch = shift || '';
62
18
41
    my $package = __PACKAGE__;
63
64    # This parser is based on "shallow parser" http://www.cs.sfu.ca/~cameron/REX.html
65
66    # Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions",
67    # Technical Report TR 1998-17, School of Computing Science, Simon Fraser University, November, 1998.
68    # Copyright (c) 1998, Robert D. Cameron.
69    # The following code may be freely used and distributed provided that
70    # this copyright and citation notice remains intact and that modifications
71    # or additions are clearly identified.
72
73
9
9
9
58
15
71
    use re 'eval';
74
18
49
    my $TextSE = "[^<]+";
75
76    # the following backrefs have been added:
77    # 1 : TextSE
78    # 2 : MarkupSPE / DeclCE / CommentCE
79    # 3 : MarkupSPE / DeclCE / CDATA_CE
80    # 4 : MarkupSPE / DeclCE / DocTypeCE
81    # 5 : MarkupSPE / PI_CE
82    # 6 : MarkupSPE / EndTagCE
83    # 7+: MarkupSPE / ElemTagCE
84
85
18
43
    my $Until2Hyphens = "(?:[^-]*)-(?:[^-]+-)*-";
86
18
85
    my $CommentCE = "($Until2Hyphens)(?{${package}::comment(\$2)})>?";
87
88
18
42
    my $UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+";
89
18
94
    my $CDATA_CE = "($UntilRSBs(?:[^\\]>]$UntilRSBs)*)(?{${package}::cdata(\$3)})>";
90
91
18
43
    my $S = "[ \\n\\t\\r]+";
92
18
42
    my $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]";
93
18
40
    my $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]";
94
18
79
    my $Name = "(?:$NameStrt)(?:$NameChar)*";
95
18
47
    my $QuoteSE = "\"[^\"]*\"|'[^']*'";
96
18
105
    my $DT_IdentSE = "$Name(?:$S(?:$Name|$QuoteSE))*";
97
18
57
    my $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>";
98
18
43
    my $S1 = "[\\n\\r\\t ]";
99
18
40
    my $UntilQMs = "[^?]*\\?+";
100
101
18
86
    my $PI_Tail = "\\?|$S1$UntilQMs(?:[^>?]$UntilQMs)*";
102
18
140
    my $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail>))|%$Name;|$S";
103
18
153
    my $DocTypeCE = "$S($DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?)>(?{${package}::_doctype(\$4)})";
104
105
18
101
    my $DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?";
106
107
18
101
    my $PI_CE = "($Name(?:$PI_Tail))>(?{${package}::_pi(\$5)})";
108
109    # these expressions were modified for backtracking and events
110
111
18
92
    my $EndTagCE = "($Name)(?{${package}::_end(\$6)})(?:$S)?>";
112
18
38
    my $AttValSE = "\"([^<\"]*)\"|'([^<']*)'";
113
114
18
238
    my $ElemTagCE = "($Name)"
115        . "(?:$S($Name)(?:$S)?=(?:$S)?(?:$AttValSE)"
116        . "(?{[\@{\$^R||[]},\$8=>defined\$9?\$9:\$10]}))*(?:$S)?(/)?>"
117        . "(?{${package}::_start(\$7,\@{\$^R||[]}),\$^R=[]})(?{\$11 and ${package}::_end(\$7)})";
118
119
18
136
    my $MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)";
120
121    # Next expression is under "black magic".
122    # Ideally it should be '($TextSE)(?{${package}::char(\$1)})|$MarkupSPE',
123    # but it doesn't work under Perl 5.005 and only magic with
124    # (?:....)?? solved the problem.
125    # I would appreciate if someone let me know what is the right thing to do
126    # and what's the reason for all this magic.
127    # Seems like a problem related to (?:....)? rather than to ?{} feature.
128    # Tests are in t/31-xmlparserlite.t if you decide to play with it.
129    #"(?{[]})(?:($TextSE)(?{${package}::_char(\$1)}))$patch|$MarkupSPE";
130
18
151
    "(?:($TextSE)(?{${package}::_char(\$1)}))$patch|$MarkupSPE";
131}
132
133setHandlers();
134
135# Try 5.6 and 5.10 regex first
136my $REGEXP = _regexp('??');
137
138sub _parse_re {
139
9
9
9
55
14
39
    use re "eval";
140    undef $^R;
141    1 while $_[0] =~ m{$REGEXP}go
142};
143
144# fixup regex if it does not work...
145{
146    if (not eval { _parse_re('<soap:foo xmlns:soap="foo">bar</soap:foo>'); 1; } ) {
147        $REGEXP = _regexp();
148        local $^W;
149        *_parse_re = sub {
150
9
9
9
54
19
36
                use re "eval";
151
48
92
                undef $^R;
152
48
74
                1 while $_[0] =~ m{$REGEXP}go
153            };
154    }
155}
156
157sub parse {
158
48
428
    _init();
159
48
170
    _parse_re($_[1]);
160
44
376
    _final();
161}
162
163my(@stack, $level);
164
165sub _init {
166
48
108
    @stack = ();
167
48
99
    $level = 0;
168
48
146
    Init(__PACKAGE__, @_);
169}
170
171sub _final {
172
44
141
    return _error("not properly closed tag '$stack[-1]'") if @stack;
173
42
122
    return _error("no element found") unless $level;
174
40
116
    Final(__PACKAGE__, @_)
175}
176
177sub _start {
178
71
378
    return _error("multiple roots, wrong element '$_[0]'") if $level++ && !@stack;
179
69
218
    push(@stack, $_[0]);
180
69
238
    Start(__PACKAGE__, @_);
181}
182
183sub _char {
184
65
302
    Char(__PACKAGE__, $_[0]), return if @stack;
185
186    # check for junk before or after element
187    # can't use split or regexp due to limitations in ?{} implementation,
188    # will iterate with loop, but we'll do it no more than two times, so
189    # it shouldn't affect performance
190    for (my $i=0; $i < length $_[0]; $i++) {
191
33
13
334
101
        return _error("junk '$_[0]' @{[$level ? 'after' : 'before']} XML element")
192        if index("\n\r\t ", substr($_[0],$i,1)) < 0; # or should '< $[' be there
193
25
62
    }
194}
195
196sub _end {
197
67
210
    return _error("unexpected closing tag '$_[0]'") if !@stack;
198
66
277
    pop(@stack) eq $_[0] or return _error("mismatched tag '$_[0]'");
199
63
245
    End(__PACKAGE__, $_[0]);
200}
201
202sub comment {
203
9
44
    Comment(__PACKAGE__, substr $_[0], 0, -2);
204}
205
206sub end {
207
0
0
     pop(@stack) eq $_[0] or return _error("mismatched tag '$_[0]'");
208
0
0
     End(__PACKAGE__, $_[0]);
209}
210
211sub cdata {
212
9
25
    return _error("CDATA outside of tag stack") unless @stack;
213
9
47
    CData(__PACKAGE__, substr $_[0], 0, -2);
214}
215
216sub _doctype {
217
1
4
    Doctype(__PACKAGE__, $_[0]);
218}
219
220sub _pi {
221
6
32
    PI(__PACKAGE__, substr $_[0], 0, -1);
222}
223
224sub _error {
225
23
71
    if ($ReturnErrors){
226
8
29
      Error(__PACKAGE__, $_[0]);
227
8
51
      return;
228    }
229
15
38
    die "$_[0]\n";
230}
231
232# ======================================================================
2331;
234