File: | blib/lib/CSS/Grammar/CSS10.pm |
Coverage: | 98.9% |
line | stmt | bran | cond | sub | time | code |
---|---|---|---|---|---|---|
1 | package CSS::Grammar::CSS10; | |||||
2 | ||||||
3 | 4 4 4 | 22 8 28 | use strict; | |||
4 | 4 4 4 | 27 8 25 | use warnings; | |||
5 | 4 4 4 | 24 10 40 | use Data::Dumper; | |||
6 | ||||||
7 | 4 4 4 | 30 14 33 | use base 'CSS::Grammar'; | |||
8 | ||||||
9 | # | |||||
10 | # http://www.w3.org/TR/REC-CSS1-961217.html#appendix-b | |||||
11 | # | |||||
12 | ||||||
13 | sub init { | |||||
14 | 4 | 12 | my ($self) = @_; | |||
15 | ||||||
16 | 4 | 7 | my %rx; | |||
17 | ||||||
18 | ##################################################################################### | |||||
19 | ||||||
20 | 4 | 10 | $self->{case_insensitive} = 1; | |||
21 | ||||||
22 | ||||||
23 | ##################################################################################### | |||||
24 | ||||||
25 | #unicode \\[0-9a-f]{1,4} | |||||
26 | #latin1 [¡-ÿ] | |||||
27 | #escape {unicode}|\\[ -~¡-ÿ] | |||||
28 | #stringchar {escape}|{latin1}|[ !#$%&(-~] | |||||
29 | #nmstrt [a-z]|{latin1}|{escape} | |||||
30 | #nmchar [-a-z0-9]|{latin1}|{escape} | |||||
31 | #ident {nmstrt}{nmchar}* | |||||
32 | #name {nmchar}+ | |||||
33 | #d [0-9] | |||||
34 | #notnm [^-a-z0-9\\]|{latin1} | |||||
35 | #w [ \t\n]* | |||||
36 | #num {d}+|{d}*\.{d}+ | |||||
37 | #string \"({stringchar}|\')*\"|\'({stringchar}|\")*\' | |||||
38 | ||||||
39 | 4 | 13 | $rx{unicode} = '(\\[0-9a-f]{1,4})'; | |||
40 | 4 | 11 | $rx{ascii} = '[\x20-\x7e]'; | |||
41 | 4 | 10 | $rx{latin1} = '[\xa1-\xff]'; | |||
42 | 4 | 34 | $rx{escape} = "($rx{unicode}|\\\\$rx{ascii}|\\\\$rx{latin1})"; | |||
43 | 4 | 47 | $rx{stringchar} = "($rx{escape}|$rx{latin1}|[ !#$%&(-~])"; | |||
44 | 4 | 23 | $rx{nmstrt} = "([a-z]|$rx{latin1}|$rx{escape})"; | |||
45 | 4 | 22 | $rx{nmchar} = "([-a-z0-9]|$rx{latin1}|$rx{escape})"; | |||
46 | 4 | 24 | $rx{ident} = "($rx{nmstrt}$rx{nmchar}*)"; | |||
47 | 4 | 19 | $rx{name} = "($rx{nmchar}+)"; | |||
48 | 4 | 15 | $rx{d} = "[0-9]"; | |||
49 | 4 | 16 | $rx{notnm} = "([^-a-z0-9\\\\]|$rx{latin1})"; | |||
50 | 4 | 11 | $rx{w} = "([ \\t\\n]*)"; | |||
51 | 4 | 27 | $rx{num} = "($rx{d}+|$rx{d}*\\.$rx{d}+)"; | |||
52 | 4 | 26 | $rx{string} = "(\"($rx{stringchar}|\')*\"|\'($rx{stringchar}|\")*\')"; | |||
53 | ||||||
54 | ||||||
55 | ##################################################################################### | |||||
56 | ||||||
57 | #"/*" {BEGIN(COMMENT);} | |||||
58 | #<COMMENT>"*/" {BEGIN(0);} | |||||
59 | #<COMMENT>\n {/* ignore */} | |||||
60 | #<COMMENT>. {/* ignore */} | |||||
61 | #@import {BEGIN(0); return IMPORT_SYM;} | |||||
62 | #"!"{w}important {BEGIN(0); return IMPORTANT_SYM;} | |||||
63 | #{ident} {BEGIN(AFTER_IDENT); return IDENT;} | |||||
64 | #{string} {BEGIN(0); return STRING;} | |||||
65 | ||||||
66 | 4 | 23 | $self->add_toke_rule('COMMENT' , "\\/\\*(\\*[^\\/]|[^\\*])*\\*\\/"); | |||
67 | 4 | 15 | $self->add_toke_rule('IMPORT_SYM' , "\@import"); | |||
68 | 4 | 25 | $self->add_toke_rule('IMPORTANT_SYM' , "!$rx{w}important"); | |||
69 | 4 | 18 | $self->add_toke_rule('IDENT' , $rx{ident}); | |||
70 | 4 | 16 | $self->add_toke_rule('STRING' , $rx{string}); | |||
71 | ||||||
72 | ||||||
73 | #{num} {BEGIN(0); return NUMBER;} | |||||
74 | #{num}"%" {BEGIN(0); return PERCENTAGE;} | |||||
75 | #{num}pt/{notnm} {BEGIN(0); return LENGTH;} | |||||
76 | #{num}mm/{notnm} {BEGIN(0); return LENGTH;} | |||||
77 | #{num}cm/{notnm} {BEGIN(0); return LENGTH;} | |||||
78 | #{num}pc/{notnm} {BEGIN(0); return LENGTH;} | |||||
79 | #{num}in/{notnm} {BEGIN(0); return LENGTH;} | |||||
80 | #{num}px/{notnm} {BEGIN(0); return LENGTH;} | |||||
81 | #{num}em/{notnm} {BEGIN(0); return EMS;} | |||||
82 | #{num}ex/{notnm} {BEGIN(0); return EXS;} | |||||
83 | ||||||
84 | 4 | 19 | $self->add_toke_rule('NUMBER' , $rx{num}); | |||
85 | # TODO: | |||||
86 | # this is kinda weird - what does the slash mean here? it's not literal. | |||||
87 | # need to revisit this block of rules | |||||
88 | ||||||
89 | ||||||
90 | #<AFTER_IDENT>":"link {return LINK_PSCLASS_AFTER_IDENT;} | |||||
91 | #<AFTER_IDENT>":"visited {return VISITED_PSCLASS_AFTER_IDENT;} | |||||
92 | #<AFTER_IDENT>":"active {return ACTIVE_PSCLASS_AFTER_IDENT;} | |||||
93 | #<AFTER_IDENT>":"first-line {return FIRST_LINE_AFTER_IDENT;} | |||||
94 | #<AFTER_IDENT>":"first-letter {return FIRST_LETTER_AFTER_IDENT;} | |||||
95 | #<AFTER_IDENT>"#"{name} {return HASH_AFTER_IDENT;} | |||||
96 | #<AFTER_IDENT>"."{name} {return CLASS_AFTER_IDENT;} | |||||
97 | ||||||
98 | #":"link {BEGIN(AFTER_IDENT); return LINK_PSCLASS;} | |||||
99 | #":"visited {BEGIN(AFTER_IDENT); return VISITED_PSCLASS;} | |||||
100 | #":"active {BEGIN(AFTER_IDENT); return ACTIVE_PSCLASS;} | |||||
101 | #":"first-line {BEGIN(AFTER_IDENT); return FIRST_LINE;} | |||||
102 | #":"first-letter {BEGIN(AFTER_IDENT); return FIRST_LETTER;} | |||||
103 | #"#"{name} {BEGIN(AFTER_IDENT); return HASH;} | |||||
104 | #"."{name} {BEGIN(AFTER_IDENT); return CLASS;} | |||||
105 | ||||||
106 | # all this <AFTER_IDENT> crap isn't really needed - it's just for context once we | |||||
107 | # finish the parse (or so it seems to me) and doesn;t actually affect the toke/lex | |||||
108 | ||||||
109 | 4 | 16 | $self->add_toke_rule('LINK_PSCLASS' , "\\:link"); | |||
110 | 4 | 15 | $self->add_toke_rule('VISITED_PSCLASS' , "\\:visited"); | |||
111 | 4 | 13 | $self->add_toke_rule('ACTIVE_PSCLASS' , "\\:active"); | |||
112 | 4 | 15 | $self->add_toke_rule('FIRST_LINE' , "\\:first-line"); | |||
113 | 4 | 59 | $self->add_toke_rule('FIRST_LETTER' , "\\:first-letter"); | |||
114 | ||||||
115 | 4 | 20 | $self->add_toke_rule('HASH' , "\\#$rx{name}"); | |||
116 | 4 | 20 | $self->add_toke_rule('CLASS' , "\\.$rx{name}"); | |||
117 | ||||||
118 | ||||||
119 | #url\({w}{string}{w}\) | | |||||
120 | #url\({w}([^ \n\'\")]|\\\ |\\\'|\\\"|\\\))+{w}\) {BEGIN(0); return URL;} | |||||
121 | #rgb\({w}{num}%?{w}\,{w}{num}%?{w}\,{w}{num}%?{w}\) {BEGIN(0); return RGB;} | |||||
122 | ||||||
123 | # TODO: | |||||
124 | # i've not done these rules yet | |||||
125 | ||||||
126 | ||||||
127 | #[ \t]+ {BEGIN(0); /* ignore whitespace */} | |||||
128 | #\n {BEGIN(0); /* ignore whitespace */} | |||||
129 | #\<\!\-\- {BEGIN(0); return CDO;} | |||||
130 | #\-\-\> {BEGIN(0); return CDC;} | |||||
131 | ||||||
132 | 4 | 15 | $self->add_toke_rule('CDO' , "<!--"); | |||
133 | 4 | 15 | $self->add_toke_rule('CDC' , "-->"); | |||
134 | ||||||
135 | 4 | 15 | $self->add_toke_rule('WHITESPACE' , '[ \t\n]+'); | |||
136 | ||||||
137 | ||||||
138 | ##################################################################################### | |||||
139 | ||||||
140 | 4 | 17 | $self->add_toke_rule('_SEMICOLON' , ';'); | |||
141 | 4 | 15 | $self->add_toke_rule('_MINUS' , '-'); | |||
142 | 4 | 15 | $self->add_toke_rule('_PLUS' , '\\+'); | |||
143 | 4 | 15 | $self->add_toke_rule('_SLASH' , '/'); | |||
144 | 4 | 16 | $self->add_toke_rule('_COMMA' , ','); | |||
145 | 4 | 17 | $self->add_toke_rule('_BRACE_OPEN' , '\\{'); | |||
146 | 4 | 23 | $self->add_toke_rule('_BRACE_CLOSE' , '\\}'); | |||
147 | 4 | 14 | $self->add_toke_rule('_COLON' , '\\:'); | |||
148 | ||||||
149 | ||||||
150 | ##################################################################################### | |||||
151 | ||||||
152 | #stylesheet | |||||
153 | # : [CDO|CDC]* [ import [CDO|CDC]* ]* [ ruleset [CDO|CDC]* ]* | |||||
154 | # ; | |||||
155 | #import | |||||
156 | # : IMPORT_SYM [STRING|URL] ';' /* E.g., @import url(fun.css); */ | |||||
157 | # ; | |||||
158 | #unary_operator | |||||
159 | # : '-' | '+' | |||||
160 | # ; | |||||
161 | #operator | |||||
162 | # : '/' | ',' | /* empty */ | |||||
163 | # ; | |||||
164 | ||||||
165 | 4 | 75 | $self->add_lex_rule('stylesheet' , '[CDO|CDC]* [ import [CDO|CDC]* ]* [ ruleset [CDO|CDC]* ]*'); | |||
166 | 4 | 15 | $self->add_lex_rule('import' , 'IMPORT_SYM [STRING|URL] _SEMICOLON'); | |||
167 | 4 | 16 | $self->add_lex_rule('unary_operator' , '_MINUS | _PLUS'); | |||
168 | 4 | 15 | $self->add_lex_rule('operator' , '_SLASH | _COMMA | '); | |||
169 | ||||||
170 | ||||||
171 | #property | |||||
172 | # : IDENT | |||||
173 | # ; | |||||
174 | #ruleset | |||||
175 | # : selector [ ',' selector ]* | |||||
176 | # '{' declaration [ ';' declaration ]* '}' | |||||
177 | # ; | |||||
178 | #selector | |||||
179 | # : simple_selector+ [ pseudo_element | solitary_pseudo_element ]? | |||||
180 | # | solitary_pseudo_element | |||||
181 | # ; | |||||
182 | ||||||
183 | 4 | 16 | $self->add_lex_rule('property' , 'IDENT'); | |||
184 | 4 | 15 | $self->add_lex_rule('ruleset' , 'selector [ _COMMA selector ]* _BRACE_OPEN declaration [ _SEMICOLON declaration ]* _BRACE_CLOSE'); | |||
185 | 4 | 16 | $self->add_lex_rule('selector' , 'simple_selector+ [ pseudo_element ]? | pseudo_element'); | |||
186 | ||||||
187 | ||||||
188 | #simple_selector | |||||
189 | # : element_name id? class? pseudo_class? /* eg: H1.subject */ | |||||
190 | # | solitary_id class? pseudo_class? /* eg: #xyz33 */ | |||||
191 | # | solitary_class pseudo_class? /* eg: .author */ | |||||
192 | # | solitary_pseudo_class /* eg: :link */ | |||||
193 | # ; | |||||
194 | #element_name | |||||
195 | # : IDENT | |||||
196 | # ; | |||||
197 | #pseudo_class /* as in: A:link */ | |||||
198 | # : LINK_PSCLASS_AFTER_IDENT | |||||
199 | # | VISITED_PSCLASS_AFTER_IDENT | |||||
200 | # | ACTIVE_PSCLASS_AFTER_IDENT | |||||
201 | # ; | |||||
202 | #solitary_pseudo_class /* as in: :link */ | |||||
203 | # : LINK_PSCLASS | |||||
204 | # | VISITED_PSCLASS | |||||
205 | # | ACTIVE_PSCLASS | |||||
206 | # ; | |||||
207 | ||||||
208 | 4 | 17 | $self->add_lex_rule('simple_selector' , 'element_name id? class? pseudo_class? | id class? pseudo_class? | class pseudo_class? | pseudo_class'); | |||
209 | 4 | 16 | $self->add_lex_rule('element_name' , 'IDENT'); | |||
210 | 4 | 16 | $self->add_lex_rule('pseudo_class' , 'LINK_PSCLASS | VISITED_PSCLASS | ACTIVE_PSCLASS'); | |||
211 | ||||||
212 | ||||||
213 | #class /* as in: P.note */ | |||||
214 | # : CLASS_AFTER_IDENT | |||||
215 | # ; | |||||
216 | #solitary_class /* as in: .note */ | |||||
217 | # : CLASS | |||||
218 | # ; | |||||
219 | #pseudo_element /* as in: P:first-line */ | |||||
220 | # : FIRST_LETTER_AFTER_IDENT | |||||
221 | # | FIRST_LINE_AFTER_IDENT | |||||
222 | # ; | |||||
223 | #solitary_pseudo_element /* as in: :first-line */ | |||||
224 | # : FIRST_LETTER | |||||
225 | # | FIRST_LINE | |||||
226 | # ; | |||||
227 | ||||||
228 | 4 | 16 | $self->add_lex_rule('class' , 'CLASS'); | |||
229 | 4 | 14 | $self->add_lex_rule('pseudo_element' , 'FIRST_LETTER | FIRST_LINE'); | |||
230 | ||||||
231 | ||||||
232 | #id | |||||
233 | # : HASH_AFTER_IDENT | |||||
234 | # ; | |||||
235 | #solitary_id | |||||
236 | # : HASH | |||||
237 | # ; | |||||
238 | #declaration | |||||
239 | # : property ':' expr prio? | |||||
240 | # | /* empty */ /* Prevents syntax errors... */ | |||||
241 | # ; | |||||
242 | #prio | |||||
243 | # : IMPORTANT_SYM /* !important */ | |||||
244 | # ; | |||||
245 | #expr | |||||
246 | # : term [ operator term ]* | |||||
247 | # ; | |||||
248 | ||||||
249 | 4 | 14 | $self->add_lex_rule('id' , 'HASH'); | |||
250 | 4 | 15 | $self->add_lex_rule('declaration' , 'property _COLON expr prio? | '); | |||
251 | 4 | 73 | $self->add_lex_rule('prio' , 'IMPORTANT_SYM'); | |||
252 | 4 | 14 | $self->add_lex_rule('expr' , 'term [ operator term ]*'); | |||
253 | ||||||
254 | ||||||
255 | #term | |||||
256 | # : unary_operator? | |||||
257 | # [ NUMBER | STRING | PERCENTAGE | LENGTH | EMS | EXS | |||||
258 | # | IDENT | hexcolor | URL | RGB ] | |||||
259 | # ; | |||||
260 | #hexcolor | |||||
261 | # : HASH | HASH_AFTER_IDENT | |||||
262 | # ; | |||||
263 | ||||||
264 | 4 | 15 | $self->add_lex_rule('term' , 'unary_operator? [ NUMBER | STRING | PERCENTAGE | LENGTH | EMS | EXS | IDENT | hexcolor | URL | RGB ]'); | |||
265 | 4 | 16 | $self->add_lex_rule('hexcolor' , 'HASH | HASH_AFTER_IDENT'); | |||
266 | ||||||
267 | ||||||
268 | ##################################################################################### | |||||
269 | ||||||
270 | 4 | 25 | $self->set_base('stylesheet'); | |||
271 | } | |||||
272 | ||||||
273 | sub toke { | |||||
274 | 3 | 18 | my ($self, $input) = @_; | |||
275 | ||||||
276 | # | |||||
277 | # CSS 1 treats whitespace oddly - we need to toke and then remove all | |||||
278 | # WHITESPACE tokens before we try and lex. we'll do the same with | |||||
279 | # comments (the other modules do this differently right now). maybe | |||||
280 | # we should remove these tokens at the start of lexing instead? | |||||
281 | # | |||||
282 | ||||||
283 | 3 | 26 | my $tokens = $self->SUPER::toke($input); | |||
284 | 3 | 8 | my $out = []; | |||
285 | ||||||
286 | 3 3 | 6 9 | for my $token (@{$tokens}){ | |||
287 | ||||||
288 | 33 | 124 | next if $token->{type} eq 'WHITESPACE'; | |||
289 | 21 | 68 | next if $token->{type} eq 'COMMENT'; | |||
290 | ||||||
291 | 21 21 | 33 66 | push @{$out}, $token; | |||
292 | } | |||||
293 | ||||||
294 | 3 | 85 | return $out; | |||
295 | } | |||||
296 | ||||||
297 | 1; |