File: | blib/lib/CSS/Grammar/CSS20.pm |
Coverage: | 100.0% |
line | stmt | bran | cond | sub | time | code |
---|---|---|---|---|---|---|
1 | package CSS::Grammar::CSS20; | |||||
2 | ||||||
3 | 4 4 4 | 21 7 22 | use strict; | |||
4 | 4 4 4 | 23 101 20 | use warnings; | |||
5 | ||||||
6 | 4 4 4 | 26 8 24 | use base 'CSS::Grammar'; | |||
7 | ||||||
8 | # | |||||
9 | # http://www.w3.org/TR/1998/REC-CSS2-19980512/Overview.html | |||||
10 | # http://www.w3.org/TR/REC-CSS2/grammar.html | |||||
11 | # http://www.w3.org/TR/REC-CSS2/syndata.html#parsing-errors | |||||
12 | # | |||||
13 | ||||||
14 | sub init { | |||||
15 | 4 | 11 | my ($self) = @_; | |||
16 | ||||||
17 | 4 | 8 | my %rx; | |||
18 | ||||||
19 | ##################################################################################### | |||||
20 | ||||||
21 | #%option case-insensitive | |||||
22 | ||||||
23 | 4 | 10 | $self->{case_insensitive} = 1; | |||
24 | ||||||
25 | ||||||
26 | ##################################################################################### | |||||
27 | ||||||
28 | #h [0-9a-f] | |||||
29 | #nonascii [\200-\377] | |||||
30 | #unicode \\{h}{1,6}[ \t\r\n\f]? | |||||
31 | #escape {unicode}|\\[ -~\200-\377] | |||||
32 | #w [ \t\r\n\f]* | |||||
33 | #nl \n|\r\n|\r|\f | |||||
34 | ||||||
35 | 4 | 13 | $rx{h} = '[0-9a-f]'; | |||
36 | 4 | 11 | $rx{nonascii} = '[\\x80-\\xff]'; | |||
37 | 4 | 20 | $rx{unicode} = '(\\'.$rx{h}.'{1,6}(\\r\\n|[ \\t\\r\\n\\f])?)'; | |||
38 | 4 | 18 | $rx{escape} = '('.$rx{unicode}.'|\\\\[ -~\\x80-\\xff])'; | |||
39 | 4 | 11 | $rx{w} = '[ \t\r\n\f]*'; | |||
40 | 4 | 12 | $rx{nl} = '(\\n|\\r\\n|\\r|\\f)'; | |||
41 | ||||||
42 | ||||||
43 | #nmstart [a-z]|{nonascii}|{escape} | |||||
44 | #nmchar [a-z0-9-]|{nonascii}|{escape} | |||||
45 | #string1 \"([\t !#$%&(-~]|\\{nl}|\'|{nonascii}|{escape})*\" | |||||
46 | #string2 \'([\t !#$%&(-~]|\\{nl}|\"|{nonascii}|{escape})*\' | |||||
47 | ||||||
48 | 4 | 24 | $rx{nmstart} = '([_a-z]|'.$rx{nonascii}.'|'.$rx{escape}.')'; | |||
49 | 4 | 26 | $rx{nmchar} = '([_a-zA-Z0-9-]|'.$rx{nonascii}.'|'.$rx{escape}.')'; | |||
50 | 4 | 29 | $rx{string1} = '("([\\t !#$%&(-~]|\\\\('.$rx{nl}.')|\'|('.$rx{nonascii}.')|('.$rx{escape}.'))*")'; | |||
51 | 4 | 27 | $rx{string2} = '(\'([\\t !#$%&(-~]|\\\\('.$rx{nl}.')|"|('.$rx{nonascii}.')|('.$rx{escape}.'))*\')'; | |||
52 | ||||||
53 | ||||||
54 | #ident {nmstart}{nmchar}* | |||||
55 | #name {nmchar}+ | |||||
56 | #num [0-9]+|[0-9]*"."[0-9]+ | |||||
57 | #string {string1}|{string2} | |||||
58 | #url ([!#$%&*-~]|{nonascii}|{escape})* | |||||
59 | #range \?{1,6}|{h}(\?{0,5}|{h}(\?{0,4}|{h}(\?{0,3}|{h}(\?{0,2}|{h}(\??|{h}))))) | |||||
60 | ||||||
61 | 4 | 20 | $rx{ident} = "$rx{nmstart}$rx{nmchar}*"; | |||
62 | 4 | 20 | $rx{name} = "$rx{nmchar}+"; | |||
63 | 4 | 11 | $rx{num} = '([0-9]+|[0-9]*\\.[0-9]+)'; | |||
64 | 4 | 24 | $rx{string} = "($rx{string1}|$rx{string2})"; | |||
65 | 4 | 23 | $rx{url} = "(([!#\$%&*-~]|$rx{nonascii}|$rx{escape})*)"; | |||
66 | 4 | 48 | $rx{range} = "(\\?{1,6}|$rx{h}(\\?{0,5}|$rx{h}(\\?{0,4}|$rx{h}(\\?{0,3}|$rx{h}(\\?{0,2}|$rx{h}(\\??|$rx{h}))))))"; | |||
67 | ||||||
68 | ||||||
69 | ##################################################################################### | |||||
70 | ||||||
71 | #[ \t\r\n\f]+ {return S;} | |||||
72 | #"<!--" {return CDO;} | |||||
73 | #"-->" {return CDC;} | |||||
74 | #"~=" {return INCLUDES;} | |||||
75 | #"|=" {return DASHMATCH;} | |||||
76 | #{string} {return STRING;} | |||||
77 | #{ident} {return IDENT;} | |||||
78 | #"#"{name} {return HASH;} | |||||
79 | ||||||
80 | 4 | 19 | $self->add_toke_rule('S' , "[ \t\r\n\f]+"); | |||
81 | 4 | 14 | $self->add_toke_rule('CDO' , '<!--'); | |||
82 | 4 | 15 | $self->add_toke_rule('CDC' , '-->'); | |||
83 | 4 | 14 | $self->add_toke_rule('INCLUDES' , '~='); | |||
84 | 4 | 15 | $self->add_toke_rule('DASHMATCH' , '\\|='); | |||
85 | 4 | 17 | $self->add_toke_rule('STRING' , $rx{string}); | |||
86 | 4 | 18 | $self->add_toke_rule('IDENT' , $rx{ident}); | |||
87 | 4 | 23 | $self->add_toke_rule('HASH' , "#$rx{name}"); | |||
88 | ||||||
89 | ||||||
90 | #"@import" {return IMPORT_SYM;} | |||||
91 | #"@page" {return PAGE_SYM;} | |||||
92 | #"@media" {return MEDIA_SYM;} | |||||
93 | #"@font-face" {return FONT_FACE_SYM;} | |||||
94 | #"@charset" {return CHARSET_SYM;} | |||||
95 | #"@"{ident} {return ATKEYWORD;} | |||||
96 | #"!{w}important" {return IMPORTANT_SYM;} | |||||
97 | ||||||
98 | 4 | 15 | $self->add_toke_rule('IMPORT_SYM' , '@import'); | |||
99 | 4 | 15 | $self->add_toke_rule('PAGE_SYM' , '@page'); | |||
100 | 4 | 15 | $self->add_toke_rule('MEDIA_SYM' , '@media'); | |||
101 | 4 | 15 | $self->add_toke_rule('FONT_FACE_SYM' , '@font-face'); | |||
102 | 4 | 17 | $self->add_toke_rule('CHARSET_SYM' , '@charset'); | |||
103 | 4 | 23 | $self->add_toke_rule('ATKEYWORD' , "\@$rx{ident}"); # this rule isn't in the lexer, as per the spec | |||
104 | 4 | 23 | $self->add_toke_rule('IMPORTANT_SYM' , "!$rx{w}important"); | |||
105 | ||||||
106 | ||||||
107 | #{num}em {return EMS;} | |||||
108 | #{num}ex {return EXS;} | |||||
109 | #{num}px {return LENGTH;} | |||||
110 | #{num}cm {return LENGTH;} | |||||
111 | #{num}mm {return LENGTH;} | |||||
112 | #{num}in {return LENGTH;} | |||||
113 | #{num}pt {return LENGTH;} | |||||
114 | #{num}pc {return LENGTH;} | |||||
115 | #{num}deg {return ANGLE;} | |||||
116 | #{num}rad {return ANGLE;} | |||||
117 | #{num}grad {return ANGLE;} | |||||
118 | #{num}ms {return TIME;} | |||||
119 | #{num}s {return TIME;} | |||||
120 | #{num}Hz {return FREQ;} | |||||
121 | #{num}kHz {return FREQ;} | |||||
122 | #{num}{ident} {return DIMEN;} | |||||
123 | #{num}% {return PERCENTAGE;} | |||||
124 | #{num} {return NUMBER;} | |||||
125 | ||||||
126 | 4 | 23 | $self->add_toke_rule('EMS' , "$rx{num}em"); | |||
127 | 4 | 21 | $self->add_toke_rule('EXS' , "$rx{num}ex"); | |||
128 | 4 | 19 | $self->add_toke_rule('LENGTH' , "$rx{num}(px|cm|mm|in|pt|pc)"); | |||
129 | 4 | 20 | $self->add_toke_rule('ANGLE' , "$rx{num}(deg|rad|grad)"); | |||
130 | 4 | 19 | $self->add_toke_rule('TIME' , "$rx{num}(ms|s)"); | |||
131 | 4 | 21 | $self->add_toke_rule('FREQ' , "$rx{num}(Hz|kHz)"); | |||
132 | 4 | 42 | $self->add_toke_rule('DIMEN' , "$rx{num}$rx{ident}"); | |||
133 | 4 | 21 | $self->add_toke_rule('PERCENTAGE' , "$rx{num}%"); | |||
134 | 4 | 18 | $self->add_toke_rule('NUMBER' , "$rx{num}"); | |||
135 | ||||||
136 | ||||||
137 | #"url("{w}{string}{w}")" {return URI;} | |||||
138 | #"url("{w}{url}{w}")" {return URI;} | |||||
139 | #{ident}"(" {return FUNCTION;} | |||||
140 | ||||||
141 | 4 | 41 | $self->add_toke_rule('URI' , "url\\($rx{w}($rx{string}|$rx{url}$rx{w})\\)"); | |||
142 | 4 | 21 | $self->add_toke_rule('FUNCTION' , "$rx{ident}\\("); | |||
143 | ||||||
144 | ||||||
145 | ##################################################################################### | |||||
146 | ||||||
147 | 4 | 111 | $self->add_toke_rule('_BRACE_OPEN' , '{'); | |||
148 | 4 | 15 | $self->add_toke_rule('_PLUS' , '\\+'); | |||
149 | 4 | 16 | $self->add_toke_rule('_GREATER_THAN' , '>'); | |||
150 | 4 | 15 | $self->add_toke_rule('_COMMA' , ','); | |||
151 | 4 | 16 | $self->add_toke_rule('_SEMICOLON' , ';'); | |||
152 | 4 | 17 | $self->add_toke_rule('_BRACE_CLOSE' , '}'); | |||
153 | 4 | 16 | $self->add_toke_rule('_COLON' , ':'); | |||
154 | 4 | 17 | $self->add_toke_rule('_SLASH' , '/'); | |||
155 | 4 | 17 | $self->add_toke_rule('_MINUS' , '-'); | |||
156 | 4 | 14 | $self->add_toke_rule('_PERIOD' , '\\.'); | |||
157 | 4 | 18 | $self->add_toke_rule('_STAR' , '\\*'); | |||
158 | 4 | 15 | $self->add_toke_rule('_SQUARE_OPEN' , '\\['); | |||
159 | 4 | 26 | $self->add_toke_rule('_SQUARE_CLOSE' , '\\]'); | |||
160 | 4 | 16 | $self->add_toke_rule('_EQUALS' , '='); | |||
161 | 4 | 14 | $self->add_toke_rule('_ROUND_CLOSE' , '\\)'); | |||
162 | ||||||
163 | ||||||
164 | ##################################################################################### | |||||
165 | ||||||
166 | #stylesheet | |||||
167 | # : [ CHARSET_SYM S* STRING S* ';' ]? | |||||
168 | # [S|CDO|CDC]* [ import [S|CDO|CDC]* ]* | |||||
169 | # [ [ ruleset | media | page | font_face ] [S|CDO|CDC]* ]* | |||||
170 | # ; | |||||
171 | #import | |||||
172 | # : IMPORT_SYM S* | |||||
173 | # [STRING|URI] S* [ medium [ ',' S* medium]* ]? ';' S* | |||||
174 | # ; | |||||
175 | #media | |||||
176 | # : MEDIA_SYM S* medium [ ',' S* medium ]* '{' S* ruleset* '}' S* | |||||
177 | # ; | |||||
178 | ||||||
179 | 4 | 25 | $self->add_lex_rule('stylesheet', '[ CHARSET_SYM S* STRING S* _SEMICOLON ]? [S|CDO|CDC]* [ import [S|CDO|CDC]* ]* [ [ ruleset | media | '. | |||
180 | 'page | font_face ] [S|CDO|CDC]* ]*'); | |||||
181 | 4 | 15 | $self->add_lex_rule('import', 'IMPORT_SYM S* [STRING|URI] S* [ medium [ _COMMA S* medium]* ]? _SEMICOLON S*'); | |||
182 | 4 | 15 | $self->add_lex_rule('media', 'MEDIA_SYM S* medium [ _COMMA S* medium ]* _BRACE_OPEN S* ruleset* _BRACE_CLOSE S*'); | |||
183 | ||||||
184 | ||||||
185 | #medium | |||||
186 | # : IDENT S* | |||||
187 | # ; | |||||
188 | #page | |||||
189 | # : PAGE_SYM S* IDENT? pseudo_page? S* | |||||
190 | # '{' S* declaration [ ';' S* declaration ]* '}' S* | |||||
191 | # ; | |||||
192 | #pseudo_page | |||||
193 | # : ':' IDENT | |||||
194 | # ; | |||||
195 | #font_face | |||||
196 | # : FONT_FACE_SYM S* | |||||
197 | # '{' S* declaration [ ';' S* declaration ]* '}' S* | |||||
198 | # ; | |||||
199 | ||||||
200 | 4 | 15 | $self->add_lex_rule('medium', 'IDENT S*'); | |||
201 | 4 | 15 | $self->add_lex_rule('page', 'PAGE_SYM S* IDENT? pseudo_page? S* _BRACE_OPEN S* declaration [ _SEMICOLON S* declaration ]* _BRACE_CLOSE S*'); | |||
202 | 4 | 15 | $self->add_lex_rule('pseudo_page', '_COLON IDENT'); | |||
203 | 4 | 16 | $self->add_lex_rule('font_face', 'FONT_FACE_SYM S* _BRACE_OPEN S* declaration [ _SEMICOLON S* declaration ]* _BRACE_CLOSE S*'); | |||
204 | ||||||
205 | ||||||
206 | #operator | |||||
207 | # : '/' S* | ',' S* | /* empty */ | |||||
208 | # ; | |||||
209 | #combinator | |||||
210 | # : '+' S* | '>' S* | /* empty */ | |||||
211 | # ; | |||||
212 | #unary_operator | |||||
213 | # : '-' | '+' | |||||
214 | # ; | |||||
215 | #property | |||||
216 | # : IDENT S* | |||||
217 | # ; | |||||
218 | ||||||
219 | 4 | 14 | $self->add_lex_rule('operator', '_SLASH S* | _COMMA S* |'); | |||
220 | 4 | 17 | $self->add_lex_rule('combinator', '_PLUS S* | _GREATER_THAN S* |'); | |||
221 | 4 | 16 | $self->add_lex_rule('unary_operator', '_MINUS | _PLUS'); | |||
222 | 4 | 15 | $self->add_lex_rule('property', 'IDENT S*'); | |||
223 | ||||||
224 | ||||||
225 | #ruleset | |||||
226 | # : selector [ ',' S* selector ]* | |||||
227 | # '{' S* declaration [ ';' S* declaration ]* '}' S* | |||||
228 | # ; | |||||
229 | #selector | |||||
230 | # : simple_selector [ combinator simple_selector ]* | |||||
231 | # ; | |||||
232 | #simple_selector | |||||
233 | # : element_name? [ HASH | class | attrib | pseudo ]* S* | |||||
234 | # ; | |||||
235 | #class | |||||
236 | # : '.' IDENT | |||||
237 | # ; | |||||
238 | ||||||
239 | 4 | 15 | $self->add_lex_rule('ruleset', 'selector [ _COMMA S* selector ]* _BRACE_OPEN S* declaration [ _SEMICOLON S* declaration ]* _BRACE_CLOSE S*'); | |||
240 | 4 | 16 | $self->add_lex_rule('selector', 'simple_selector [ combinator simple_selector ]*'); | |||
241 | 4 | 16 | $self->add_lex_rule('simple_selector', 'element_name? [ HASH | class | attrib | pseudo ]* S*'); | |||
242 | 4 | 16 | $self->add_lex_rule('class', '_PERIOD IDENT'); | |||
243 | ||||||
244 | ||||||
245 | #element_name | |||||
246 | # : IDENT | '*' | |||||
247 | # ; | |||||
248 | #attrib | |||||
249 | # : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S* | |||||
250 | # [ IDENT | STRING ] S* ]? ']' | |||||
251 | # ; | |||||
252 | #pseudo | |||||
253 | # : ':' [ IDENT | FUNCTION S* IDENT S* ')' ] | |||||
254 | # ; | |||||
255 | #declaration | |||||
256 | # : property ':' S* expr prio? | |||||
257 | # | /* empty */ | |||||
258 | # ; | |||||
259 | ||||||
260 | 4 | 16 | $self->add_lex_rule('element_name', 'IDENT | _STAR'); | |||
261 | 4 | 15 | $self->add_lex_rule('attrib', '_SQUARE_OPEN S* IDENT S* [ [ _EQUALS | INCLUDES | DASHMATCH ] S* [ IDENT | STRING ] S* ]? _SQUARE_CLOSE'); | |||
262 | 4 | 17 | $self->add_lex_rule('pseudo', '_COLON [ IDENT | FUNCTION S* IDENT S* _ROUND_CLOSE ]'); | |||
263 | 4 | 14 | $self->add_lex_rule('declaration', 'property _COLON S* expr prio? |'); | |||
264 | ||||||
265 | ||||||
266 | #prio | |||||
267 | # : IMPORTANT_SYM S* | |||||
268 | # ; | |||||
269 | #expr | |||||
270 | # : term [ operator term ]* | |||||
271 | # ; | |||||
272 | #term | |||||
273 | # : unary_operator? | |||||
274 | # [ NUMBER S* | PERCENTAGE S* | LENGTH S* | EMS S* | EXS S* | ANGLE S* | | |||||
275 | # TIME S* | FREQ S* | function ] | |||||
276 | # | STRING S* | IDENT S* | URI S* | RGB S* | UNICODERANGE S* | hexcolor | |||||
277 | # ; | |||||
278 | ||||||
279 | 4 | 15 | $self->add_lex_rule('prio', 'IMPORTANT_SYM S*'); | |||
280 | 4 | 14 | $self->add_lex_rule('expr', 'term [ operator term ]*'); | |||
281 | 4 | 16 | $self->add_lex_rule('term', 'unary_operator? [ NUMBER S* | PERCENTAGE S* | LENGTH S* | EMS S* | EXS S* | ANGLE S* | TIME S* | FREQ S* | function ] '. | |||
282 | '| STRING S* | IDENT S* | URI S* | RGB S* | UNICODERANGE S* | hexcolor'); | |||||
283 | ||||||
284 | ||||||
285 | #function | |||||
286 | # : FUNCTION S* expr ')' S* | |||||
287 | # ; | |||||
288 | #hexcolor | |||||
289 | # : HASH S* | |||||
290 | # ; | |||||
291 | ||||||
292 | 4 | 16 | $self->add_lex_rule('function', 'FUNCTION S* expr _ROUND_CLOSE S*'); | |||
293 | 4 | 16 | $self->add_lex_rule('hexcolor', 'HASH S*'); | |||
294 | ||||||
295 | ||||||
296 | ##################################################################################### | |||||
297 | ||||||
298 | 4 | 27 | $self->set_base('stylesheet'); | |||
299 | } | |||||
300 | ||||||
301 | sub toke { | |||||
302 | 3 | 20 | my ($self, $input) = @_; | |||
303 | ||||||
304 | # strip comments first | |||||
305 | 3 | 8 | $input =~ s!/\*[^*]*\*+([^/*][^*]*\*+)*/!!g; | |||
306 | ||||||
307 | 3 | 27 | $self->SUPER::toke($input); | |||
308 | } | |||||
309 | ||||||
310 | 1; |