| File: | blib/lib/CSS/Grammar/CSS10.pm |
| Coverage: | 98.9% |
| line | stmt | bran | cond | sub | time | code |
|---|---|---|---|---|---|---|
| 1 | package CSS::Grammar::CSS10; | |||||
| 2 | ||||||
| 3 | 4 4 4 | 22 8 28 | use strict; | |||
| 4 | 4 4 4 | 27 8 25 | use warnings; | |||
| 5 | 4 4 4 | 24 10 40 | use Data::Dumper; | |||
| 6 | ||||||
| 7 | 4 4 4 | 30 14 33 | use base 'CSS::Grammar'; | |||
| 8 | ||||||
| 9 | # | |||||
| 10 | # http://www.w3.org/TR/REC-CSS1-961217.html#appendix-b | |||||
| 11 | # | |||||
| 12 | ||||||
| 13 | sub init { | |||||
| 14 | 4 | 12 | my ($self) = @_; | |||
| 15 | ||||||
| 16 | 4 | 7 | my %rx; | |||
| 17 | ||||||
| 18 | ##################################################################################### | |||||
| 19 | ||||||
| 20 | 4 | 10 | $self->{case_insensitive} = 1; | |||
| 21 | ||||||
| 22 | ||||||
| 23 | ##################################################################################### | |||||
| 24 | ||||||
| 25 | #unicode \\[0-9a-f]{1,4} | |||||
| 26 | #latin1 [¡-ÿ] | |||||
| 27 | #escape {unicode}|\\[ -~¡-ÿ] | |||||
| 28 | #stringchar {escape}|{latin1}|[ !#$%&(-~] | |||||
| 29 | #nmstrt [a-z]|{latin1}|{escape} | |||||
| 30 | #nmchar [-a-z0-9]|{latin1}|{escape} | |||||
| 31 | #ident {nmstrt}{nmchar}* | |||||
| 32 | #name {nmchar}+ | |||||
| 33 | #d [0-9] | |||||
| 34 | #notnm [^-a-z0-9\\]|{latin1} | |||||
| 35 | #w [ \t\n]* | |||||
| 36 | #num {d}+|{d}*\.{d}+ | |||||
| 37 | #string \"({stringchar}|\')*\"|\'({stringchar}|\")*\' | |||||
| 38 | ||||||
| 39 | 4 | 13 | $rx{unicode} = '(\\[0-9a-f]{1,4})'; | |||
| 40 | 4 | 11 | $rx{ascii} = '[\x20-\x7e]'; | |||
| 41 | 4 | 10 | $rx{latin1} = '[\xa1-\xff]'; | |||
| 42 | 4 | 34 | $rx{escape} = "($rx{unicode}|\\\\$rx{ascii}|\\\\$rx{latin1})"; | |||
| 43 | 4 | 47 | $rx{stringchar} = "($rx{escape}|$rx{latin1}|[ !#$%&(-~])"; | |||
| 44 | 4 | 23 | $rx{nmstrt} = "([a-z]|$rx{latin1}|$rx{escape})"; | |||
| 45 | 4 | 22 | $rx{nmchar} = "([-a-z0-9]|$rx{latin1}|$rx{escape})"; | |||
| 46 | 4 | 24 | $rx{ident} = "($rx{nmstrt}$rx{nmchar}*)"; | |||
| 47 | 4 | 19 | $rx{name} = "($rx{nmchar}+)"; | |||
| 48 | 4 | 15 | $rx{d} = "[0-9]"; | |||
| 49 | 4 | 16 | $rx{notnm} = "([^-a-z0-9\\\\]|$rx{latin1})"; | |||
| 50 | 4 | 11 | $rx{w} = "([ \\t\\n]*)"; | |||
| 51 | 4 | 27 | $rx{num} = "($rx{d}+|$rx{d}*\\.$rx{d}+)"; | |||
| 52 | 4 | 26 | $rx{string} = "(\"($rx{stringchar}|\')*\"|\'($rx{stringchar}|\")*\')"; | |||
| 53 | ||||||
| 54 | ||||||
| 55 | ##################################################################################### | |||||
| 56 | ||||||
| 57 | #"/*" {BEGIN(COMMENT);} | |||||
| 58 | #<COMMENT>"*/" {BEGIN(0);} | |||||
| 59 | #<COMMENT>\n {/* ignore */} | |||||
| 60 | #<COMMENT>. {/* ignore */} | |||||
| 61 | #@import {BEGIN(0); return IMPORT_SYM;} | |||||
| 62 | #"!"{w}important {BEGIN(0); return IMPORTANT_SYM;} | |||||
| 63 | #{ident} {BEGIN(AFTER_IDENT); return IDENT;} | |||||
| 64 | #{string} {BEGIN(0); return STRING;} | |||||
| 65 | ||||||
| 66 | 4 | 23 | $self->add_toke_rule('COMMENT' , "\\/\\*(\\*[^\\/]|[^\\*])*\\*\\/"); | |||
| 67 | 4 | 15 | $self->add_toke_rule('IMPORT_SYM' , "\@import"); | |||
| 68 | 4 | 25 | $self->add_toke_rule('IMPORTANT_SYM' , "!$rx{w}important"); | |||
| 69 | 4 | 18 | $self->add_toke_rule('IDENT' , $rx{ident}); | |||
| 70 | 4 | 16 | $self->add_toke_rule('STRING' , $rx{string}); | |||
| 71 | ||||||
| 72 | ||||||
| 73 | #{num} {BEGIN(0); return NUMBER;} | |||||
| 74 | #{num}"%" {BEGIN(0); return PERCENTAGE;} | |||||
| 75 | #{num}pt/{notnm} {BEGIN(0); return LENGTH;} | |||||
| 76 | #{num}mm/{notnm} {BEGIN(0); return LENGTH;} | |||||
| 77 | #{num}cm/{notnm} {BEGIN(0); return LENGTH;} | |||||
| 78 | #{num}pc/{notnm} {BEGIN(0); return LENGTH;} | |||||
| 79 | #{num}in/{notnm} {BEGIN(0); return LENGTH;} | |||||
| 80 | #{num}px/{notnm} {BEGIN(0); return LENGTH;} | |||||
| 81 | #{num}em/{notnm} {BEGIN(0); return EMS;} | |||||
| 82 | #{num}ex/{notnm} {BEGIN(0); return EXS;} | |||||
| 83 | ||||||
| 84 | 4 | 19 | $self->add_toke_rule('NUMBER' , $rx{num}); | |||
| 85 | # TODO: | |||||
| 86 | # this is kinda weird - what does the slash mean here? it's not literal. | |||||
| 87 | # need to revisit this block of rules | |||||
| 88 | ||||||
| 89 | ||||||
| 90 | #<AFTER_IDENT>":"link {return LINK_PSCLASS_AFTER_IDENT;} | |||||
| 91 | #<AFTER_IDENT>":"visited {return VISITED_PSCLASS_AFTER_IDENT;} | |||||
| 92 | #<AFTER_IDENT>":"active {return ACTIVE_PSCLASS_AFTER_IDENT;} | |||||
| 93 | #<AFTER_IDENT>":"first-line {return FIRST_LINE_AFTER_IDENT;} | |||||
| 94 | #<AFTER_IDENT>":"first-letter {return FIRST_LETTER_AFTER_IDENT;} | |||||
| 95 | #<AFTER_IDENT>"#"{name} {return HASH_AFTER_IDENT;} | |||||
| 96 | #<AFTER_IDENT>"."{name} {return CLASS_AFTER_IDENT;} | |||||
| 97 | ||||||
| 98 | #":"link {BEGIN(AFTER_IDENT); return LINK_PSCLASS;} | |||||
| 99 | #":"visited {BEGIN(AFTER_IDENT); return VISITED_PSCLASS;} | |||||
| 100 | #":"active {BEGIN(AFTER_IDENT); return ACTIVE_PSCLASS;} | |||||
| 101 | #":"first-line {BEGIN(AFTER_IDENT); return FIRST_LINE;} | |||||
| 102 | #":"first-letter {BEGIN(AFTER_IDENT); return FIRST_LETTER;} | |||||
| 103 | #"#"{name} {BEGIN(AFTER_IDENT); return HASH;} | |||||
| 104 | #"."{name} {BEGIN(AFTER_IDENT); return CLASS;} | |||||
| 105 | ||||||
| 106 | # all this <AFTER_IDENT> crap isn't really needed - it's just for context once we | |||||
| 107 | # finish the parse (or so it seems to me) and doesn;t actually affect the toke/lex | |||||
| 108 | ||||||
| 109 | 4 | 16 | $self->add_toke_rule('LINK_PSCLASS' , "\\:link"); | |||
| 110 | 4 | 15 | $self->add_toke_rule('VISITED_PSCLASS' , "\\:visited"); | |||
| 111 | 4 | 13 | $self->add_toke_rule('ACTIVE_PSCLASS' , "\\:active"); | |||
| 112 | 4 | 15 | $self->add_toke_rule('FIRST_LINE' , "\\:first-line"); | |||
| 113 | 4 | 59 | $self->add_toke_rule('FIRST_LETTER' , "\\:first-letter"); | |||
| 114 | ||||||
| 115 | 4 | 20 | $self->add_toke_rule('HASH' , "\\#$rx{name}"); | |||
| 116 | 4 | 20 | $self->add_toke_rule('CLASS' , "\\.$rx{name}"); | |||
| 117 | ||||||
| 118 | ||||||
| 119 | #url\({w}{string}{w}\) | | |||||
| 120 | #url\({w}([^ \n\'\")]|\\\ |\\\'|\\\"|\\\))+{w}\) {BEGIN(0); return URL;} | |||||
| 121 | #rgb\({w}{num}%?{w}\,{w}{num}%?{w}\,{w}{num}%?{w}\) {BEGIN(0); return RGB;} | |||||
| 122 | ||||||
| 123 | # TODO: | |||||
| 124 | # i've not done these rules yet | |||||
| 125 | ||||||
| 126 | ||||||
| 127 | #[ \t]+ {BEGIN(0); /* ignore whitespace */} | |||||
| 128 | #\n {BEGIN(0); /* ignore whitespace */} | |||||
| 129 | #\<\!\-\- {BEGIN(0); return CDO;} | |||||
| 130 | #\-\-\> {BEGIN(0); return CDC;} | |||||
| 131 | ||||||
| 132 | 4 | 15 | $self->add_toke_rule('CDO' , "<!--"); | |||
| 133 | 4 | 15 | $self->add_toke_rule('CDC' , "-->"); | |||
| 134 | ||||||
| 135 | 4 | 15 | $self->add_toke_rule('WHITESPACE' , '[ \t\n]+'); | |||
| 136 | ||||||
| 137 | ||||||
| 138 | ##################################################################################### | |||||
| 139 | ||||||
| 140 | 4 | 17 | $self->add_toke_rule('_SEMICOLON' , ';'); | |||
| 141 | 4 | 15 | $self->add_toke_rule('_MINUS' , '-'); | |||
| 142 | 4 | 15 | $self->add_toke_rule('_PLUS' , '\\+'); | |||
| 143 | 4 | 15 | $self->add_toke_rule('_SLASH' , '/'); | |||
| 144 | 4 | 16 | $self->add_toke_rule('_COMMA' , ','); | |||
| 145 | 4 | 17 | $self->add_toke_rule('_BRACE_OPEN' , '\\{'); | |||
| 146 | 4 | 23 | $self->add_toke_rule('_BRACE_CLOSE' , '\\}'); | |||
| 147 | 4 | 14 | $self->add_toke_rule('_COLON' , '\\:'); | |||
| 148 | ||||||
| 149 | ||||||
| 150 | ##################################################################################### | |||||
| 151 | ||||||
| 152 | #stylesheet | |||||
| 153 | # : [CDO|CDC]* [ import [CDO|CDC]* ]* [ ruleset [CDO|CDC]* ]* | |||||
| 154 | # ; | |||||
| 155 | #import | |||||
| 156 | # : IMPORT_SYM [STRING|URL] ';' /* E.g., @import url(fun.css); */ | |||||
| 157 | # ; | |||||
| 158 | #unary_operator | |||||
| 159 | # : '-' | '+' | |||||
| 160 | # ; | |||||
| 161 | #operator | |||||
| 162 | # : '/' | ',' | /* empty */ | |||||
| 163 | # ; | |||||
| 164 | ||||||
| 165 | 4 | 75 | $self->add_lex_rule('stylesheet' , '[CDO|CDC]* [ import [CDO|CDC]* ]* [ ruleset [CDO|CDC]* ]*'); | |||
| 166 | 4 | 15 | $self->add_lex_rule('import' , 'IMPORT_SYM [STRING|URL] _SEMICOLON'); | |||
| 167 | 4 | 16 | $self->add_lex_rule('unary_operator' , '_MINUS | _PLUS'); | |||
| 168 | 4 | 15 | $self->add_lex_rule('operator' , '_SLASH | _COMMA | '); | |||
| 169 | ||||||
| 170 | ||||||
| 171 | #property | |||||
| 172 | # : IDENT | |||||
| 173 | # ; | |||||
| 174 | #ruleset | |||||
| 175 | # : selector [ ',' selector ]* | |||||
| 176 | # '{' declaration [ ';' declaration ]* '}' | |||||
| 177 | # ; | |||||
| 178 | #selector | |||||
| 179 | # : simple_selector+ [ pseudo_element | solitary_pseudo_element ]? | |||||
| 180 | # | solitary_pseudo_element | |||||
| 181 | # ; | |||||
| 182 | ||||||
| 183 | 4 | 16 | $self->add_lex_rule('property' , 'IDENT'); | |||
| 184 | 4 | 15 | $self->add_lex_rule('ruleset' , 'selector [ _COMMA selector ]* _BRACE_OPEN declaration [ _SEMICOLON declaration ]* _BRACE_CLOSE'); | |||
| 185 | 4 | 16 | $self->add_lex_rule('selector' , 'simple_selector+ [ pseudo_element ]? | pseudo_element'); | |||
| 186 | ||||||
| 187 | ||||||
| 188 | #simple_selector | |||||
| 189 | # : element_name id? class? pseudo_class? /* eg: H1.subject */ | |||||
| 190 | # | solitary_id class? pseudo_class? /* eg: #xyz33 */ | |||||
| 191 | # | solitary_class pseudo_class? /* eg: .author */ | |||||
| 192 | # | solitary_pseudo_class /* eg: :link */ | |||||
| 193 | # ; | |||||
| 194 | #element_name | |||||
| 195 | # : IDENT | |||||
| 196 | # ; | |||||
| 197 | #pseudo_class /* as in: A:link */ | |||||
| 198 | # : LINK_PSCLASS_AFTER_IDENT | |||||
| 199 | # | VISITED_PSCLASS_AFTER_IDENT | |||||
| 200 | # | ACTIVE_PSCLASS_AFTER_IDENT | |||||
| 201 | # ; | |||||
| 202 | #solitary_pseudo_class /* as in: :link */ | |||||
| 203 | # : LINK_PSCLASS | |||||
| 204 | # | VISITED_PSCLASS | |||||
| 205 | # | ACTIVE_PSCLASS | |||||
| 206 | # ; | |||||
| 207 | ||||||
| 208 | 4 | 17 | $self->add_lex_rule('simple_selector' , 'element_name id? class? pseudo_class? | id class? pseudo_class? | class pseudo_class? | pseudo_class'); | |||
| 209 | 4 | 16 | $self->add_lex_rule('element_name' , 'IDENT'); | |||
| 210 | 4 | 16 | $self->add_lex_rule('pseudo_class' , 'LINK_PSCLASS | VISITED_PSCLASS | ACTIVE_PSCLASS'); | |||
| 211 | ||||||
| 212 | ||||||
| 213 | #class /* as in: P.note */ | |||||
| 214 | # : CLASS_AFTER_IDENT | |||||
| 215 | # ; | |||||
| 216 | #solitary_class /* as in: .note */ | |||||
| 217 | # : CLASS | |||||
| 218 | # ; | |||||
| 219 | #pseudo_element /* as in: P:first-line */ | |||||
| 220 | # : FIRST_LETTER_AFTER_IDENT | |||||
| 221 | # | FIRST_LINE_AFTER_IDENT | |||||
| 222 | # ; | |||||
| 223 | #solitary_pseudo_element /* as in: :first-line */ | |||||
| 224 | # : FIRST_LETTER | |||||
| 225 | # | FIRST_LINE | |||||
| 226 | # ; | |||||
| 227 | ||||||
| 228 | 4 | 16 | $self->add_lex_rule('class' , 'CLASS'); | |||
| 229 | 4 | 14 | $self->add_lex_rule('pseudo_element' , 'FIRST_LETTER | FIRST_LINE'); | |||
| 230 | ||||||
| 231 | ||||||
| 232 | #id | |||||
| 233 | # : HASH_AFTER_IDENT | |||||
| 234 | # ; | |||||
| 235 | #solitary_id | |||||
| 236 | # : HASH | |||||
| 237 | # ; | |||||
| 238 | #declaration | |||||
| 239 | # : property ':' expr prio? | |||||
| 240 | # | /* empty */ /* Prevents syntax errors... */ | |||||
| 241 | # ; | |||||
| 242 | #prio | |||||
| 243 | # : IMPORTANT_SYM /* !important */ | |||||
| 244 | # ; | |||||
| 245 | #expr | |||||
| 246 | # : term [ operator term ]* | |||||
| 247 | # ; | |||||
| 248 | ||||||
| 249 | 4 | 14 | $self->add_lex_rule('id' , 'HASH'); | |||
| 250 | 4 | 15 | $self->add_lex_rule('declaration' , 'property _COLON expr prio? | '); | |||
| 251 | 4 | 73 | $self->add_lex_rule('prio' , 'IMPORTANT_SYM'); | |||
| 252 | 4 | 14 | $self->add_lex_rule('expr' , 'term [ operator term ]*'); | |||
| 253 | ||||||
| 254 | ||||||
| 255 | #term | |||||
| 256 | # : unary_operator? | |||||
| 257 | # [ NUMBER | STRING | PERCENTAGE | LENGTH | EMS | EXS | |||||
| 258 | # | IDENT | hexcolor | URL | RGB ] | |||||
| 259 | # ; | |||||
| 260 | #hexcolor | |||||
| 261 | # : HASH | HASH_AFTER_IDENT | |||||
| 262 | # ; | |||||
| 263 | ||||||
| 264 | 4 | 15 | $self->add_lex_rule('term' , 'unary_operator? [ NUMBER | STRING | PERCENTAGE | LENGTH | EMS | EXS | IDENT | hexcolor | URL | RGB ]'); | |||
| 265 | 4 | 16 | $self->add_lex_rule('hexcolor' , 'HASH | HASH_AFTER_IDENT'); | |||
| 266 | ||||||
| 267 | ||||||
| 268 | ##################################################################################### | |||||
| 269 | ||||||
| 270 | 4 | 25 | $self->set_base('stylesheet'); | |||
| 271 | } | |||||
| 272 | ||||||
| 273 | sub toke { | |||||
| 274 | 3 | 18 | my ($self, $input) = @_; | |||
| 275 | ||||||
| 276 | # | |||||
| 277 | # CSS 1 treats whitespace oddly - we need to toke and then remove all | |||||
| 278 | # WHITESPACE tokens before we try and lex. we'll do the same with | |||||
| 279 | # comments (the other modules do this differently right now). maybe | |||||
| 280 | # we should remove these tokens at the start of lexing instead? | |||||
| 281 | # | |||||
| 282 | ||||||
| 283 | 3 | 26 | my $tokens = $self->SUPER::toke($input); | |||
| 284 | 3 | 8 | my $out = []; | |||
| 285 | ||||||
| 286 | 3 3 | 6 9 | for my $token (@{$tokens}){ | |||
| 287 | ||||||
| 288 | 33 | 124 | next if $token->{type} eq 'WHITESPACE'; | |||
| 289 | 21 | 68 | next if $token->{type} eq 'COMMENT'; | |||
| 290 | ||||||
| 291 | 21 21 | 33 66 | push @{$out}, $token; | |||
| 292 | } | |||||
| 293 | ||||||
| 294 | 3 | 85 | return $out; | |||
| 295 | } | |||||
| 296 | ||||||
| 297 | 1; | |||||