| File: | lib/XML/Parser/Lite/Tree/XPath/Tokener.pm |
| Coverage: | 70.4% |
| line | stmt | bran | cond | sub | time | code |
|---|---|---|---|---|---|---|
| 1 | package XML::Parser::Lite::Tree::XPath::Tokener; | |||||
| 2 | ||||||
| 3 | 31 31 31 | 170 119 280 | use XML::Parser::Lite::Tree::XPath::Token; | |||
| 4 | ||||||
| 5 | sub new { | |||||
| 6 | 180 | 669 | my $class = shift; | |||
| 7 | 180 | 796 | my $self = bless {}, $class; | |||
| 8 | ||||||
| 9 | 180 | 553 | return $self; | |||
| 10 | } | |||||
| 11 | ||||||
| 12 | sub parse { | |||||
| 13 | 216 | 921 | my ($self, $input) = @_; | |||
| 14 | ||||||
| 15 | 216 | 727 | $self->{tokens} = []; | |||
| 16 | 216 | 832 | $self->{input} = $input; | |||
| 17 | 216 | 585 | $self->{error} = 0; | |||
| 18 | 216 | 659 | $self->{rx} = XML::Parser::Lite::Tree::XPath::Tokener::Rx::fetch(); | |||
| 19 | ||||||
| 20 | 216 | 781 | $self->trim(); | |||
| 21 | ||||||
| 22 | 216 | 902 | while($self->{input}){ | |||
| 23 | 1507 | 3928 | $self->step(); | |||
| 24 | 1507 | 8643 | last if $self->{error}; | |||
| 25 | } | |||||
| 26 | ||||||
| 27 | 216 | 616 | $self->{rx} = 0; | |||
| 28 | ||||||
| 29 | 216 | 1413 | warn $self->{error} if $self->{error}; | |||
| 30 | ||||||
| 31 | 216 | 656 | $self->special_rules(); | |||
| 32 | ||||||
| 33 | 216 | 784 | warn $self->{error} if $self->{error}; | |||
| 34 | ||||||
| 35 | 216 | 808 | return 1; | |||
| 36 | } | |||||
| 37 | ||||||
| 38 | sub step { | |||||
| 39 | 1507 | 3842 | my ($self) = @_; | |||
| 40 | ||||||
| 41 | 1507 | 3939 | $self->trim(); | |||
| 42 | ||||||
| 43 | ||||||
| 44 | # | |||||
| 45 | # Symbols | |||||
| 46 | # | |||||
| 47 | ||||||
| 48 | 1507 | 6239 | if ($self->{input} =~ m!^(\(|\)|\[|\]|\.\.|\.|\@|,|::)!){ | |||
| 49 | ||||||
| 50 | 538 | 1714 | $self->push_token('Symbol', $1); | |||
| 51 | 538 | 1843 | $self->consume(length $1); | |||
| 52 | 538 | 972 | return; | |||
| 53 | } | |||||
| 54 | ||||||
| 55 | # | |||||
| 56 | # NameTest | |||||
| 57 | # | |||||
| 58 | ||||||
| 59 | 969 | 3501 | if ($self->{input} =~ m!^(\*)!){ | |||
| 60 | ||||||
| 61 | 81 | 311 | $self->push_token('Star', '*'); | |||
| 62 | 81 | 296 | $self->consume(1); | |||
| 63 | 81 | 159 | return; | |||
| 64 | } | |||||
| 65 | ||||||
| 66 | 888 0 0 0 | 17921 0 0 0 | if ($self->{input} =~ m!^($self->{rx}->{NCName})\:\*!){ | |||
| 67 | ||||||
| 68 | 0 | 0 | $self->push_token('NCName', $1); | |||
| 69 | 0 | 0 | $self->push_token('NameTestPostfix', ':*'); | |||
| 70 | ||||||
| 71 | 0 | 0 | $self->consume(2 + length $1); | |||
| 72 | 0 | 0 | return; | |||
| 73 | } | |||||
| 74 | ||||||
| 75 | # QName test | |||||
| 76 | ||||||
| 77 | 888 0 0 0 | 19157 0 0 0 | if ($self->{input} =~ m!^((($self->{rx}->{NCName})\\x3a)?($self->{rx}->{NCName}))!){ | |||
| 78 | ||||||
| 79 | 424 | 1737 | $self->push_token('NCName', $3) if defined $3; | |||
| 80 | 424 | 1358 | $self->push_token('QNameSep', ':') if defined $3; | |||
| 81 | 424 | 1407 | $self->push_token('NCName', $4); | |||
| 82 | 424 | 1577 | $self->consume(length $1); | |||
| 83 | 424 | 874 | return; | |||
| 84 | } | |||||
| 85 | ||||||
| 86 | ||||||
| 87 | # | |||||
| 88 | # NodeType | |||||
| 89 | # | |||||
| 90 | ||||||
| 91 | 464 | 1905 | if ($self->{input} =~ m!^(comment|text|processing-instruction|node)!){ | |||
| 92 | ||||||
| 93 | 0 | 0 | $self->push_token('NodeType', $1); | |||
| 94 | 0 | 0 | $self->consume(length $1); | |||
| 95 | 0 | 0 | return; | |||
| 96 | } | |||||
| 97 | ||||||
| 98 | # | |||||
| 99 | # Operator | |||||
| 100 | # | |||||
| 101 | ||||||
| 102 | 464 | 1952 | if ($self->{input} =~ m!^(and|or|mod|div|//|/|\||\+|-|=|\!=|<=|<|>=|>)!){ | |||
| 103 | ||||||
| 104 | 304 | 1052 | $self->push_token('Operator', $1); | |||
| 105 | 304 | 1184 | $self->consume(length $1); | |||
| 106 | 304 | 592 | return; | |||
| 107 | } | |||||
| 108 | ||||||
| 109 | # | |||||
| 110 | # FunctionName (no need to test - it's a QName - it'll be found later on via special rules) | |||||
| 111 | # | |||||
| 112 | ||||||
| 113 | # | |||||
| 114 | # AxisName (no test - it's a NCName) | |||||
| 115 | # | |||||
| 116 | ||||||
| 117 | # | |||||
| 118 | # Literal | |||||
| 119 | # | |||||
| 120 | ||||||
| 121 | 160 | 676 | if ($self->{input} =~ m!^(('[^']*')|("[^"]*"))!){ | |||
| 122 | ||||||
| 123 | 68 | 173 | my $inner = $1; | |||
| 124 | 68 | 179 | $inner =~ m!^.(.*).$!; | |||
| 125 | ||||||
| 126 | 68 | 223 | $self->push_token('Literal', $1); | |||
| 127 | 68 | 292 | $self->consume(2 + length $1); | |||
| 128 | 68 | 131 | return; | |||
| 129 | } | |||||
| 130 | ||||||
| 131 | # | |||||
| 132 | # Number | |||||
| 133 | # | |||||
| 134 | ||||||
| 135 | 92 | 881 | if ($self->{input} =~ m!^($self->{rx}->{Number})!){ | |||
| 136 | ||||||
| 137 | 92 | 293 | $self->push_token('Number', $1); | |||
| 138 | 92 | 334 | $self->consume(length $1); | |||
| 139 | 92 | 183 | return; | |||
| 140 | } | |||||
| 141 | ||||||
| 142 | # | |||||
| 143 | # VariableReference | |||||
| 144 | # | |||||
| 145 | ||||||
| 146 | 0 | 0 | if ($self->{input} =~ m!^\$($self->{rx}->{QName})!){ | |||
| 147 | ||||||
| 148 | 0 | 0 | $self->push_token('VariableReference', $1); | |||
| 149 | 0 | 0 | $self->consume(1 + length $1); | |||
| 150 | 0 | 0 | return; | |||
| 151 | } | |||||
| 152 | ||||||
| 153 | ||||||
| 154 | ||||||
| 155 | 0 | 0 | $self->{error} = "couldn't toke at >>>$self->{input}<<<"; | |||
| 156 | } | |||||
| 157 | ||||||
| 158 | sub push_token { | |||||
| 159 | 1507 | 5109 | my ($self, $type, $content) = @_; | |||
| 160 | ||||||
| 161 | 1507 | 5260 | my $token = XML::Parser::Lite::Tree::XPath::Token->new(); | |||
| 162 | 1507 | 4655 | $token->{type} = $type; | |||
| 163 | 1507 | 6421 | $token->{content} = $content if defined $content; | |||
| 164 | ||||||
| 165 | 1507 1507 | 2738 5510 | push @{$self->{tokens}}, $token; | |||
| 166 | } | |||||
| 167 | ||||||
| 168 | sub consume { | |||||
| 169 | 1507 | 4268 | my ($self, $count) = @_; | |||
| 170 | 1507 | 6124 | $self->{input} = substr $self->{input}, $count; | |||
| 171 | } | |||||
| 172 | ||||||
| 173 | sub trim { | |||||
| 174 | 1723 | 4333 | my ($self) = @_; | |||
| 175 | 1723 | 5789 | $self->{input} =~ s!^[\x20\x09\x0D\x0A]+!!; | |||
| 176 | } | |||||
| 177 | ||||||
| 178 | sub special_rules { | |||||
| 179 | 216 | 578 | my ($self) = @_; | |||
| 180 | ||||||
| 181 | # | |||||
| 182 | # set up node chain | |||||
| 183 | # | |||||
| 184 | ||||||
| 185 | 216 | 434 | my $prev = undef; | |||
| 186 | 216 216 | 375 831 | for my $token(@{$self->{tokens}}){ | |||
| 187 | ||||||
| 188 | 1507 | 3779 | $token->{prev} = $prev; | |||
| 189 | 1507 | 3623 | $token->{next} = undef; | |||
| 190 | 1507 | 5705 | $prev->{next} = $token if defined $prev; | |||
| 191 | 1507 | 4115 | $prev = $token; | |||
| 192 | } | |||||
| 193 | ||||||
| 194 | ||||||
| 195 | # | |||||
| 196 | # special rules | |||||
| 197 | # | |||||
| 198 | ||||||
| 199 | 216 216 | 440 814 | for my $token(@{$self->{tokens}}){ | |||
| 200 | ||||||
| 201 | # | |||||
| 202 | # rule 1 | |||||
| 203 | # | |||||
| 204 | # If there is a preceding token and the preceding token is not one of @, ::, (, [, , or an Operator, | |||||
| 205 | # then a * must be recognized as a MultiplyOperator and an NCName must be recognized as an OperatorName. | |||||
| 206 | # | |||||
| 207 | ||||||
| 208 | 1507 | 5488 | if (defined $token->{prev}){ | |||
| 209 | 1291 | 3233 | my $p = $token->{prev}; | |||
| 210 | ||||||
| 211 | 1291 | 3978 | unless ($p->match('Symbol', '@') | |||
| 212 | || $p->match('Symbol', '::') | |||||
| 213 | || $p->match('Symbol', '(') | |||||
| 214 | || $p->match('Symbol', '[') | |||||
| 215 | || $p->match('Symbol', ',') | |||||
| 216 | || $p->match('Operator')){ | |||||
| 217 | ||||||
| 218 | 641 | 2272 | if ($token->{type} eq 'Star'){ | |||
| 219 | ||||||
| 220 | 1 | 4 | $token->{type} = 'Operator'; | |||
| 221 | }else{ | |||||
| 222 | 640 | 2808 | if ($token->{type} eq 'NCName'){ | |||
| 223 | ||||||
| 224 | 18 | 66 | if ($self->is_OperatorName($token->{content})){ | |||
| 225 | ||||||
| 226 | 18 | 71 | $token->{type} = 'Operator'; | |||
| 227 | ||||||
| 228 | }else{ | |||||
| 229 | 0 | 0 | $self->{error} = "Found NCName '$token->{content}' when an OperatorName was required"; | |||
| 230 | 0 | 0 | return; | |||
| 231 | } | |||||
| 232 | } | |||||
| 233 | } | |||||
| 234 | } | |||||
| 235 | } | |||||
| 236 | ||||||
| 237 | # | |||||
| 238 | # rule 2 | |||||
| 239 | # | |||||
| 240 | # If the character following an NCName (possibly after intervening ExprWhitespace) is (, | |||||
| 241 | # then the token must be recognized as a NodeType or a FunctionName. | |||||
| 242 | # | |||||
| 243 | ||||||
| 244 | 1507 | 4925 | if ($token->match('NCName')){ | |||
| 245 | ||||||
| 246 | 406 | 1573 | if (defined $token->{next}){ | |||
| 247 | ||||||
| 248 | 366 | 1449 | if ($token->{next}->match('Symbol', '(')){ | |||
| 249 | ||||||
| 250 | 130 | 501 | if ($self->is_NodeType($token->{content})){ | |||
| 251 | ||||||
| 252 | 9 | 36 | $token->{type} = 'NodeType'; | |||
| 253 | }else{ | |||||
| 254 | 121 | 495 | $token->{type} = 'FunctionName'; | |||
| 255 | } | |||||
| 256 | } | |||||
| 257 | } | |||||
| 258 | } | |||||
| 259 | ||||||
| 260 | # | |||||
| 261 | # rule 3 | |||||
| 262 | # | |||||
| 263 | # If the two characters following an NCName (possibly after intervening ExprWhitespace) are ::, | |||||
| 264 | # then the token must be recognized as an AxisName. | |||||
| 265 | # | |||||
| 266 | ||||||
| 267 | 1507 | 4798 | if ($token->match('NCName')){ | |||
| 268 | ||||||
| 269 | 276 | 1127 | if (defined $token->{next}){ | |||
| 270 | ||||||
| 271 | 236 | 907 | if ($token->{next}->match('Symbol', '::')){ | |||
| 272 | ||||||
| 273 | 84 | 332 | if ($self->is_AxisName($token->{content})){ | |||
| 274 | ||||||
| 275 | 84 | 429 | $token->{type} = 'AxisName'; | |||
| 276 | }else{ | |||||
| 277 | 0 | 0 | $self->{error} = "Found NCName '$token->{content}' when an AxisName was required"; | |||
| 278 | 0 | 0 | return; | |||
| 279 | } | |||||
| 280 | } | |||||
| 281 | } | |||||
| 282 | } | |||||
| 283 | } | |||||
| 284 | ||||||
| 285 | 216 216 | 451 959 | for my $token(@{$self->{tokens}}){ | |||
| 286 | ||||||
| 287 | # | |||||
| 288 | # rule 4 | |||||
| 289 | # | |||||
| 290 | # Otherwise, the token must not be recognized as a MultiplyOperator, an OperatorName, | |||||
| 291 | # a NodeType, a FunctionName, or an AxisName. | |||||
| 292 | # | |||||
| 293 | # (this means we need to clean up Star and NCName tokens) | |||||
| 294 | # | |||||
| 295 | ||||||
| 296 | 1507 | 4464 | if ($token->match('Star')){ | |||
| 297 | 80 | 239 | $token->{type} = 'NameTest'; | |||
| 298 | } | |||||
| 299 | ||||||
| 300 | 1507 | 4611 | if ($token->match('NCName')){ | |||
| 301 | 192 | 1238 | if (defined $token->{next} && $token->{next}->match('NameTestPostfix')){ | |||
| 302 | ||||||
| 303 | 0 | 0 | $token->{type} = 'NameTestBase'; | |||
| 304 | ||||||
| 305 | }else{ | |||||
| 306 | ||||||
| 307 | 192 | 1322 | if (defined $token->{next} && $token->{next}->match('QNameSep') | |||
| 308 | && defined $token->{next}->{next} && $token->{next}->{next}->match('NCName')){ | |||||
| 309 | ||||||
| 310 | 0 | 0 | $token->{type} = 'QNamePre'; | |||
| 311 | 0 | 0 | $token->{next}->{next}->{type} = 'QNamePost'; | |||
| 312 | ||||||
| 313 | }else{ | |||||
| 314 | ||||||
| 315 | 192 | 826 | $token->{type} = 'NameTest'; | |||
| 316 | } | |||||
| 317 | } | |||||
| 318 | } | |||||
| 319 | } | |||||
| 320 | ||||||
| 321 | # | |||||
| 322 | # remove the node chain | |||||
| 323 | # (it's a pain for debugging) | |||||
| 324 | # | |||||
| 325 | ||||||
| 326 | 216 216 | 438 780 | for my $token(@{$self->{tokens}}){ | |||
| 327 | ||||||
| 328 | 1507 | 3348 | delete $token->{prev}; | |||
| 329 | 1507 | 4431 | delete $token->{next}; | |||
| 330 | } | |||||
| 331 | ||||||
| 332 | ||||||
| 333 | # | |||||
| 334 | # squish temp token sequences together | |||||
| 335 | # | |||||
| 336 | ||||||
| 337 | 216 | 598 | my $old_tokens = $self->{tokens}; | |||
| 338 | 216 | 708 | $self->{tokens} = []; | |||
| 339 | ||||||
| 340 | 216 1723 | 426 6345 | while(my $token = shift @{$old_tokens}){ | |||
| 341 | ||||||
| 342 | 1507 | 4884 | if ($token->match('NameTestBase')){ | |||
| 343 | ||||||
| 344 | 0 | 0 | $token->{type} = 'NameTest'; | |||
| 345 | 0 | 0 | $token->{content} .= ':*'; | |||
| 346 | ||||||
| 347 | 0 0 | 0 0 | shift @{$old_tokens}; | |||
| 348 | } | |||||
| 349 | ||||||
| 350 | 1507 | 4551 | if ($token->match('QNamePre')){ | |||
| 351 | ||||||
| 352 | 0 0 | 0 0 | shift @{$old_tokens}; | |||
| 353 | 0 0 | 0 0 | my $post = shift @{$old_tokens}; | |||
| 354 | ||||||
| 355 | 0 | 0 | $token->{type} = 'NameTest'; | |||
| 356 | 0 | 0 | $token->{content} .= ':'.$post->{content}; | |||
| 357 | } | |||||
| 358 | ||||||
| 359 | 1507 1507 | 2783 5489 | push @{$self->{tokens}}, $token; | |||
| 360 | } | |||||
| 361 | ||||||
| 362 | # | |||||
| 363 | # TODO - need to check we don't have any temporaory tokens still in the list | |||||
| 364 | # i.e. invalid sub-sequences. not sure what ones we could end up with | |||||
| 365 | # | |||||
| 366 | } | |||||
| 367 | ||||||
| 368 | sub is_OperatorName { | |||||
| 369 | 18 | 56 | my ($self, $content) = @_; | |||
| 370 | ||||||
| 371 | 18 | 108 | return 1 if $content =~ m!^(and|or|mod|div)$!; | |||
| 372 | 0 | 0 | return 0; | |||
| 373 | } | |||||
| 374 | ||||||
| 375 | sub is_NodeType { | |||||
| 376 | 130 | 374 | my ($self, $content) = @_; | |||
| 377 | ||||||
| 378 | 130 | 540 | return 1 if $content =~ m!^(comment|text|processing-instruction|node)$!; | |||
| 379 | 121 | 382 | return 0; | |||
| 380 | } | |||||
| 381 | ||||||
| 382 | sub is_AxisName { | |||||
| 383 | 84 | 254 | my ($self, $content) = @_; | |||
| 384 | ||||||
| 385 | 84 | 543 | return 1 if $content =~ m!^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self| | |||
| 386 | following|following-sibling|namespace|parent|preceding|preceding-sibling|self)$!x; | |||||
| 387 | 0 | 0 | return 0; | |||
| 388 | } | |||||
| 389 | ||||||
| 390 | ||||||
| 391 | package XML::Parser::Lite::Tree::XPath::Tokener::Token; | |||||
| 392 | ||||||
| 393 | sub new { | |||||
| 394 | 0 | 0 | my $class = shift; | |||
| 395 | 0 | 0 | my $self = bless {}, $class; | |||
| 396 | 0 | 0 | return $self; | |||
| 397 | } | |||||
| 398 | ||||||
| 399 | sub match { | |||||
| 400 | 0 | 0 | my ($self, $type, $content) = @_; | |||
| 401 | ||||||
| 402 | 0 | 0 | return 0 unless $self->{type} eq $type; | |||
| 403 | ||||||
| 404 | 0 | 0 | return 0 if (defined($content) && ($self->{content} ne $content)); | |||
| 405 | ||||||
| 406 | 0 | 0 | return 1; | |||
| 407 | } | |||||
| 408 | ||||||
| 409 | sub dump { | |||||
| 410 | 0 | 0 | my ($self) = @_; | |||
| 411 | ||||||
| 412 | 0 | 0 | my $ret = $self->{type}; | |||
| 413 | 0 | 0 | $ret .= ':absolute' if $self->{absolute}; | |||
| 414 | 0 | 0 | $ret .= ':'.$self->{content} if defined $self->{content}; | |||
| 415 | 0 | 0 | $ret .= $self->{axis} if defined $self->{axis}; | |||
| 416 | ||||||
| 417 | 0 | 0 | return $ret; | |||
| 418 | } | |||||
| 419 | ||||||
| 420 | package XML::Parser::Lite::Tree::XPath::Tokener::Rx; | |||||
| 421 | ||||||
| 422 | sub fetch { | |||||
| 423 | ||||||
| 424 | 216 | 352 | my %rx; | |||
| 425 | ||||||
| 426 | 216 | 1004 | $rx{CombiningChar} = '\\x{300}-\\x{345}\\x{360}-\\x{361}\\x{483}-\\x{486}\\x{591}-\\x{5a1}\\x{5a3}-\\x{5b9}\\x{5bb}' | |||
| 427 | .'-\\x{5bd}\\x{5bf}\\x{5c1}-\\x{5c2}\\x{5c4}\\x{64b}-\\x{652}\\x{670}\\x{6d6}-\\x{6dc}\\x{6dd}-\\' | |||||
| 428 | .'x{6df}\\x{6e0}-\\x{6e4}\\x{6e7}-\\x{6e8}\\x{6ea}-\\x{6ed}\\x{901}-\\x{903}\\x{93c}\\x{93e}-\\x' | |||||
| 429 | .'{94c}\\x{94d}\\x{951}-\\x{954}\\x{962}-\\x{963}\\x{981}-\\x{983}\\x{9bc}\\x{9be}\\x{9bf}\\x{9c' | |||||
| 430 | .'0}-\\x{9c4}\\x{9c7}-\\x{9c8}\\x{9cb}-\\x{9cd}\\x{9d7}\\x{9e2}-\\x{9e3}\\x{a02}\\x{a3c}\\x{a3e}' | |||||
| 431 | .'\\x{a3f}\\x{a40}-\\x{a42}\\x{a47}-\\x{a48}\\x{a4b}-\\x{a4d}\\x{a70}-\\x{a71}\\x{a81}-\\x{a83}\\' | |||||
| 432 | .'x{abc}\\x{abe}-\\x{ac5}\\x{ac7}-\\x{ac9}\\x{acb}-\\x{acd}\\x{b01}-\\x{b03}\\x{b3c}\\x{b3e}-\\x' | |||||
| 433 | .'{b43}\\x{b47}-\\x{b48}\\x{b4b}-\\x{b4d}\\x{b56}-\\x{b57}\\x{b82}-\\x{b83}\\x{bbe}-\\x{bc2}\\x{' | |||||
| 434 | .'bc6}-\\x{bc8}\\x{bca}-\\x{bcd}\\x{bd7}\\x{c01}-\\x{c03}\\x{c3e}-\\x{c44}\\x{c46}-\\x{c48}\\x{c' | |||||
| 435 | .'4a}-\\x{c4d}\\x{c55}-\\x{c56}\\x{c82}-\\x{c83}\\x{cbe}-\\x{cc4}\\x{cc6}-\\x{cc8}\\x{cca}-\\x{c' | |||||
| 436 | .'cd}\\x{cd5}-\\x{cd6}\\x{d02}-\\x{d03}\\x{d3e}-\\x{d43}\\x{d46}-\\x{d48}\\x{d4a}-\\x{d4d}\\x{d5' | |||||
| 437 | .'7}\\x{e31}\\x{e34}-\\x{e3a}\\x{e47}-\\x{e4e}\\x{eb1}\\x{eb4}-\\x{eb9}\\x{ebb}-\\x{ebc}\\x{ec8}' | |||||
| 438 | .'-\\x{ecd}\\x{f18}-\\x{f19}\\x{f35}\\x{f37}\\x{f39}\\x{f3e}\\x{f3f}\\x{f71}-\\x{f84}\\x{f86}-\\' | |||||
| 439 | .'x{f8b}\\x{f90}-\\x{f95}\\x{f97}\\x{f99}-\\x{fad}\\x{fb1}-\\x{fb7}\\x{fb9}\\x{20d0}-\\x{20dc}\\' | |||||
| 440 | .'x{20e1}\\x{302a}-\\x{302f}\\x{3099}\\x{309a}'; | |||||
| 441 | ||||||
| 442 | 216 | 647 | $rx{Extender} = '\\xb7\\x{2d0}\\x{2d1}\\x{387}\\x{640}\\x{e46}\\x{ec6}\\x{3005}\\x{3031}-\\x{3035}\\x{309d}-\\' | |||
| 443 | .'x{309e}\\x{30fc}-\\x{30fe}'; | |||||
| 444 | ||||||
| 445 | 216 | 657 | $rx{Digit} = '\\x30-\\x39\\x{660}-\\x{669}\\x{6f0}-\\x{6f9}\\x{966}-\\x{96f}\\x{9e6}-\\x{9ef}\\x{a66}-\\x{a' | |||
| 446 | .'6f}\\x{ae6}-\\x{aef}\\x{b66}-\\x{b6f}\\x{be7}-\\x{bef}\\x{c66}-\\x{c6f}\\x{ce6}-\\x{cef}\\x{d6' | |||||
| 447 | .'6}-\\x{d6f}\\x{e50}-\\x{e59}\\x{ed0}-\\x{ed9}\\x{f20}-\\x{f29}'; | |||||
| 448 | ||||||
| 449 | 216 | 824 | $rx{BaseChar} = '\\x41-\\x5a\\x61-\\x7a\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\xff\\x{100}-\\x{131}\\x{134}-\\x{13e}\\x{' | |||
| 450 | .'141}-\\x{148}\\x{14a}-\\x{17e}\\x{180}-\\x{1c3}\\x{1cd}-\\x{1f0}\\x{1f4}-\\x{1f5}\\x{1fa}-\\x{' | |||||
| 451 | .'217}\\x{250}-\\x{2a8}\\x{2bb}-\\x{2c1}\\x{386}\\x{388}-\\x{38a}\\x{38c}\\x{38e}-\\x{3a1}\\x{3a' | |||||
| 452 | .'3}-\\x{3ce}\\x{3d0}-\\x{3d6}\\x{3da}\\x{3dc}\\x{3de}\\x{3e0}\\x{3e2}-\\x{3f3}\\x{401}-\\x{40c}' | |||||
| 453 | .'\\x{40e}-\\x{44f}\\x{451}-\\x{45c}\\x{45e}-\\x{481}\\x{490}-\\x{4c4}\\x{4c7}-\\x{4c8}\\x{4cb}-' | |||||
| 454 | .'\\x{4cc}\\x{4d0}-\\x{4eb}\\x{4ee}-\\x{4f5}\\x{4f8}-\\x{4f9}\\x{531}-\\x{556}\\x{559}\\x{561}-\\' | |||||
| 455 | .'x{586}\\x{5d0}-\\x{5ea}\\x{5f0}-\\x{5f2}\\x{621}-\\x{63a}\\x{641}-\\x{64a}\\x{671}-\\x{6b7}\\x' | |||||
| 456 | .'{6ba}-\\x{6be}\\x{6c0}-\\x{6ce}\\x{6d0}-\\x{6d3}\\x{6d5}\\x{6e5}-\\x{6e6}\\x{905}-\\x{939}\\x{' | |||||
| 457 | .'93d}\\x{958}-\\x{961}\\x{985}-\\x{98c}\\x{98f}-\\x{990}\\x{993}-\\x{9a8}\\x{9aa}-\\x{9b0}\\x{9' | |||||
| 458 | .'b2}\\x{9b6}-\\x{9b9}\\x{9dc}-\\x{9dd}\\x{9df}-\\x{9e1}\\x{9f0}-\\x{9f1}\\x{a05}-\\x{a0a}\\x{a0' | |||||
| 459 | .'f}-\\x{a10}\\x{a13}-\\x{a28}\\x{a2a}-\\x{a30}\\x{a32}-\\x{a33}\\x{a35}-\\x{a36}\\x{a38}-\\x{a3' | |||||
| 460 | .'9}\\x{a59}-\\x{a5c}\\x{a5e}\\x{a72}-\\x{a74}\\x{a85}-\\x{a8b}\\x{a8d}\\x{a8f}-\\x{a91}\\x{a93}' | |||||
| 461 | .'-\\x{aa8}\\x{aaa}-\\x{ab0}\\x{ab2}-\\x{ab3}\\x{ab5}-\\x{ab9}\\x{abd}\\x{ae0}\\x{b05}-\\x{b0c}\\' | |||||
| 462 | .'x{b0f}-\\x{b10}\\x{b13}-\\x{b28}\\x{b2a}-\\x{b30}\\x{b32}-\\x{b33}\\x{b36}-\\x{b39}\\x{b3d}\\x' | |||||
| 463 | .'{b5c}-\\x{b5d}\\x{b5f}-\\x{b61}\\x{b85}-\\x{b8a}\\x{b8e}-\\x{b90}\\x{b92}-\\x{b95}\\x{b99}-\\x' | |||||
| 464 | .'{b9a}\\x{b9c}\\x{b9e}-\\x{b9f}\\x{ba3}-\\x{ba4}\\x{ba8}-\\x{baa}\\x{bae}-\\x{bb5}\\x{bb7}-\\x{' | |||||
| 465 | .'bb9}\\x{c05}-\\x{c0c}\\x{c0e}-\\x{c10}\\x{c12}-\\x{c28}\\x{c2a}-\\x{c33}\\x{c35}-\\x{c39}\\x{c' | |||||
| 466 | .'60}-\\x{c61}\\x{c85}-\\x{c8c}\\x{c8e}-\\x{c90}\\x{c92}-\\x{ca8}\\x{caa}-\\x{cb3}\\x{cb5}-\\x{c' | |||||
| 467 | .'b9}\\x{cde}\\x{ce0}-\\x{ce1}\\x{d05}-\\x{d0c}\\x{d0e}-\\x{d10}\\x{d12}-\\x{d28}\\x{d2a}-\\x{d3' | |||||
| 468 | .'9}\\x{d60}-\\x{d61}\\x{e01}-\\x{e2e}\\x{e30}\\x{e32}-\\x{e33}\\x{e40}-\\x{e45}\\x{e81}-\\x{e82' | |||||
| 469 | .'}\\x{e84}\\x{e87}-\\x{e88}\\x{e8a}\\x{e8d}\\x{e94}-\\x{e97}\\x{e99}-\\x{e9f}\\x{ea1}-\\x{ea3}\\' | |||||
| 470 | .'x{ea5}\\x{ea7}\\x{eaa}-\\x{eab}\\x{ead}-\\x{eae}\\x{eb0}\\x{eb2}-\\x{eb3}\\x{ebd}\\x{ec0}-\\x{' | |||||
| 471 | .'ec4}\\x{f40}-\\x{f47}\\x{f49}-\\x{f69}\\x{10a0}-\\x{10c5}\\x{10d0}-\\x{10f6}\\x{1100}\\x{1102}' | |||||
| 472 | .'-\\x{1103}\\x{1105}-\\x{1107}\\x{1109}\\x{110b}-\\x{110c}\\x{110e}-\\x{1112}\\x{113c}\\x{113e}' | |||||
| 473 | .'\\x{1140}\\x{114c}\\x{114e}\\x{1150}\\x{1154}-\\x{1155}\\x{1159}\\x{115f}-\\x{1161}\\x{1163}\\' | |||||
| 474 | .'x{1165}\\x{1167}\\x{1169}\\x{116d}-\\x{116e}\\x{1172}-\\x{1173}\\x{1175}\\x{119e}\\x{11a8}\\x{' | |||||
| 475 | .'11ab}\\x{11ae}-\\x{11af}\\x{11b7}-\\x{11b8}\\x{11ba}\\x{11bc}-\\x{11c2}\\x{11eb}\\x{11f0}\\x{1' | |||||
| 476 | .'1f9}\\x{1e00}-\\x{1e9b}\\x{1ea0}-\\x{1ef9}\\x{1f00}-\\x{1f15}\\x{1f18}-\\x{1f1d}\\x{1f20}-\\x{' | |||||
| 477 | .'1f45}\\x{1f48}-\\x{1f4d}\\x{1f50}-\\x{1f57}\\x{1f59}\\x{1f5b}\\x{1f5d}\\x{1f5f}-\\x{1f7d}\\x{1' | |||||
| 478 | .'f80}-\\x{1fb4}\\x{1fb6}-\\x{1fbc}\\x{1fbe}\\x{1fc2}-\\x{1fc4}\\x{1fc6}-\\x{1fcc}\\x{1fd0}-\\x{' | |||||
| 479 | .'1fd3}\\x{1fd6}-\\x{1fdb}\\x{1fe0}-\\x{1fec}\\x{1ff2}-\\x{1ff4}\\x{1ff6}-\\x{1ffc}\\x{2126}\\x{' | |||||
| 480 | .'212a}-\\x{212b}\\x{212e}\\x{2180}-\\x{2182}\\x{3041}-\\x{3094}\\x{30a1}-\\x{30fa}\\x{3105}-\\x' | |||||
| 481 | .'{312c}\\x{ac00}-\\x{d7a3}'; | |||||
| 482 | ||||||
| 483 | 216 | 589 | $rx{IdeoGraphic} = '\\x{4e00}-\\x{9fa5}\\x{3007}\\x{3021}-\\x{3029}'; | |||
| 484 | ||||||
| 485 | 216 | 1151 | $rx{Letter} = $rx{BaseChar} . $rx{IdeoGraphic}; | |||
| 486 | ||||||
| 487 | 216 | 1706 | $rx{NCNameChar} = $rx{Letter} . $rx{Digit} . '\\x2e\\x2d\\x5f' . $rx{CombiningChar} . $rx{Extender}; | |||
| 488 | ||||||
| 489 | 216 | 1910 | $rx{NCName} = '['.$rx{Letter}.'\\x5f]['.$rx{NCNameChar}.']*'; | |||
| 490 | ||||||
| 491 | 216 | 2849 | $rx{QName} = '('.$rx{NCName}.'\\x3a)?'.$rx{NCName}; | |||
| 492 | ||||||
| 493 | 216 | 586 | $rx{Digits} = '[0-9]+'; | |||
| 494 | 216 | 586 | $rx{Number} = '([0-9]+(\\.([0-9]+)?)?)|(\\.[0-9]+)'; | |||
| 495 | ||||||
| 496 | 216 | 998 | return \%rx; | |||
| 497 | } | |||||
| 498 | ||||||
| 499 | 1; | |||||