File Coverage

File:blib/lib/CSS/Grammar/CSS10.pm
Coverage:98.9%

linestmtbrancondsubtimecode
1package CSS::Grammar::CSS10;
2
3
4
4
4
22
8
28
use strict;
4
4
4
4
27
8
25
use warnings;
5
4
4
4
24
10
40
use Data::Dumper;
6
7
4
4
4
30
14
33
use base 'CSS::Grammar';
8
9#
10# http://www.w3.org/TR/REC-CSS1-961217.html#appendix-b
11#
12
13sub init {
14
4
12
        my ($self) = @_;
15
16
4
7
        my %rx;
17
18        #####################################################################################
19
20
4
10
        $self->{case_insensitive} = 1;
21
22
23        #####################################################################################
24
25        #unicode \\[0-9a-f]{1,4}
26        #latin1 [¡-ÿ]
27        #escape {unicode}|\\[ -~¡-ÿ]
28        #stringchar {escape}|{latin1}|[ !#$%&(-~]
29        #nmstrt [a-z]|{latin1}|{escape}
30        #nmchar [-a-z0-9]|{latin1}|{escape}
31        #ident {nmstrt}{nmchar}*
32        #name {nmchar}+
33        #d [0-9]
34        #notnm [^-a-z0-9\\]|{latin1}
35        #w [ \t\n]*
36        #num {d}+|{d}*\.{d}+
37        #string \"({stringchar}|\')*\"|\'({stringchar}|\")*\'
38
39
4
13
        $rx{unicode} = '(\\[0-9a-f]{1,4})';
40
4
11
        $rx{ascii} = '[\x20-\x7e]';
41
4
10
        $rx{latin1} = '[\xa1-\xff]';
42
4
34
        $rx{escape} = "($rx{unicode}|\\\\$rx{ascii}|\\\\$rx{latin1})";
43
4
47
        $rx{stringchar} = "($rx{escape}|$rx{latin1}|[ !#$%&(-~])";
44
4
23
        $rx{nmstrt} = "([a-z]|$rx{latin1}|$rx{escape})";
45
4
22
        $rx{nmchar} = "([-a-z0-9]|$rx{latin1}|$rx{escape})";
46
4
24
        $rx{ident} = "($rx{nmstrt}$rx{nmchar}*)";
47
4
19
        $rx{name} = "($rx{nmchar}+)";
48
4
15
        $rx{d} = "[0-9]";
49
4
16
        $rx{notnm} = "([^-a-z0-9\\\\]|$rx{latin1})";
50
4
11
        $rx{w} = "([ \\t\\n]*)";
51
4
27
        $rx{num} = "($rx{d}+|$rx{d}*\\.$rx{d}+)";
52
4
26
        $rx{string} = "(\"($rx{stringchar}|\')*\"|\'($rx{stringchar}|\")*\')";
53
54
55        #####################################################################################
56
57        #"/*" {BEGIN(COMMENT);}
58        #<COMMENT>"*/" {BEGIN(0);}
59        #<COMMENT>\n {/* ignore */}
60        #<COMMENT>. {/* ignore */}
61        #@import {BEGIN(0); return IMPORT_SYM;}
62        #"!"{w}important {BEGIN(0); return IMPORTANT_SYM;}
63        #{ident} {BEGIN(AFTER_IDENT); return IDENT;}
64        #{string} {BEGIN(0); return STRING;}
65
66
4
23
        $self->add_toke_rule('COMMENT' , "\\/\\*(\\*[^\\/]|[^\\*])*\\*\\/");
67
4
15
        $self->add_toke_rule('IMPORT_SYM' , "\@import");
68
4
25
        $self->add_toke_rule('IMPORTANT_SYM' , "!$rx{w}important");
69
4
18
        $self->add_toke_rule('IDENT' , $rx{ident});
70
4
16
        $self->add_toke_rule('STRING' , $rx{string});
71
72
73        #{num} {BEGIN(0); return NUMBER;}
74        #{num}"%" {BEGIN(0); return PERCENTAGE;}
75        #{num}pt/{notnm} {BEGIN(0); return LENGTH;}
76        #{num}mm/{notnm} {BEGIN(0); return LENGTH;}
77        #{num}cm/{notnm} {BEGIN(0); return LENGTH;}
78        #{num}pc/{notnm} {BEGIN(0); return LENGTH;}
79        #{num}in/{notnm} {BEGIN(0); return LENGTH;}
80        #{num}px/{notnm} {BEGIN(0); return LENGTH;}
81        #{num}em/{notnm} {BEGIN(0); return EMS;}
82        #{num}ex/{notnm} {BEGIN(0); return EXS;}
83
84
4
19
        $self->add_toke_rule('NUMBER' , $rx{num});
85        # TODO:
86        # this is kinda weird - what does the slash mean here? it's not literal.
87        # need to revisit this block of rules
88
89
90        #<AFTER_IDENT>":"link {return LINK_PSCLASS_AFTER_IDENT;}
91        #<AFTER_IDENT>":"visited {return VISITED_PSCLASS_AFTER_IDENT;}
92        #<AFTER_IDENT>":"active {return ACTIVE_PSCLASS_AFTER_IDENT;}
93        #<AFTER_IDENT>":"first-line {return FIRST_LINE_AFTER_IDENT;}
94        #<AFTER_IDENT>":"first-letter {return FIRST_LETTER_AFTER_IDENT;}
95        #<AFTER_IDENT>"#"{name} {return HASH_AFTER_IDENT;}
96        #<AFTER_IDENT>"."{name} {return CLASS_AFTER_IDENT;}
97
98        #":"link {BEGIN(AFTER_IDENT); return LINK_PSCLASS;}
99        #":"visited {BEGIN(AFTER_IDENT); return VISITED_PSCLASS;}
100        #":"active {BEGIN(AFTER_IDENT); return ACTIVE_PSCLASS;}
101        #":"first-line {BEGIN(AFTER_IDENT); return FIRST_LINE;}
102        #":"first-letter {BEGIN(AFTER_IDENT); return FIRST_LETTER;}
103        #"#"{name} {BEGIN(AFTER_IDENT); return HASH;}
104        #"."{name} {BEGIN(AFTER_IDENT); return CLASS;}
105
106        # all this <AFTER_IDENT> crap isn't really needed - it's just for context once we
107        # finish the parse (or so it seems to me) and doesn;t actually affect the toke/lex
108
109
4
16
        $self->add_toke_rule('LINK_PSCLASS' , "\\:link");
110
4
15
        $self->add_toke_rule('VISITED_PSCLASS' , "\\:visited");
111
4
13
        $self->add_toke_rule('ACTIVE_PSCLASS' , "\\:active");
112
4
15
        $self->add_toke_rule('FIRST_LINE' , "\\:first-line");
113
4
59
        $self->add_toke_rule('FIRST_LETTER' , "\\:first-letter");
114
115
4
20
        $self->add_toke_rule('HASH' , "\\#$rx{name}");
116
4
20
        $self->add_toke_rule('CLASS' , "\\.$rx{name}");
117
118
119        #url\({w}{string}{w}\) |
120        #url\({w}([^ \n\'\")]|\\\ |\\\'|\\\"|\\\))+{w}\) {BEGIN(0); return URL;}
121        #rgb\({w}{num}%?{w}\,{w}{num}%?{w}\,{w}{num}%?{w}\) {BEGIN(0); return RGB;}
122
123        # TODO:
124        # i've not done these rules yet
125
126
127        #[ \t]+ {BEGIN(0); /* ignore whitespace */}
128        #\n {BEGIN(0); /* ignore whitespace */}
129        #\<\!\-\- {BEGIN(0); return CDO;}
130        #\-\-\> {BEGIN(0); return CDC;}
131
132
4
15
        $self->add_toke_rule('CDO' , "<!--");
133
4
15
        $self->add_toke_rule('CDC' , "-->");
134
135
4
15
        $self->add_toke_rule('WHITESPACE' , '[ \t\n]+');
136
137
138        #####################################################################################
139
140
4
17
        $self->add_toke_rule('_SEMICOLON' , ';');
141
4
15
        $self->add_toke_rule('_MINUS' , '-');
142
4
15
        $self->add_toke_rule('_PLUS' , '\\+');
143
4
15
        $self->add_toke_rule('_SLASH' , '/');
144
4
16
        $self->add_toke_rule('_COMMA' , ',');
145
4
17
        $self->add_toke_rule('_BRACE_OPEN' , '\\{');
146
4
23
        $self->add_toke_rule('_BRACE_CLOSE' , '\\}');
147
4
14
        $self->add_toke_rule('_COLON' , '\\:');
148
149
150        #####################################################################################
151
152        #stylesheet
153        # : [CDO|CDC]* [ import [CDO|CDC]* ]* [ ruleset [CDO|CDC]* ]*
154        # ;
155        #import
156        # : IMPORT_SYM [STRING|URL] ';' /* E.g., @import url(fun.css); */
157        # ;
158        #unary_operator
159        # : '-' | '+'
160        # ;
161        #operator
162        # : '/' | ',' | /* empty */
163        # ;
164
165
4
75
        $self->add_lex_rule('stylesheet' , '[CDO|CDC]* [ import [CDO|CDC]* ]* [ ruleset [CDO|CDC]* ]*');
166
4
15
        $self->add_lex_rule('import' , 'IMPORT_SYM [STRING|URL] _SEMICOLON');
167
4
16
        $self->add_lex_rule('unary_operator' , '_MINUS | _PLUS');
168
4
15
        $self->add_lex_rule('operator' , '_SLASH | _COMMA | ');
169
170
171        #property
172        # : IDENT
173        # ;
174        #ruleset
175        # : selector [ ',' selector ]*
176        # '{' declaration [ ';' declaration ]* '}'
177        # ;
178        #selector
179        # : simple_selector+ [ pseudo_element | solitary_pseudo_element ]?
180        # | solitary_pseudo_element
181        # ;
182
183
4
16
        $self->add_lex_rule('property' , 'IDENT');
184
4
15
        $self->add_lex_rule('ruleset' , 'selector [ _COMMA selector ]* _BRACE_OPEN declaration [ _SEMICOLON declaration ]* _BRACE_CLOSE');
185
4
16
        $self->add_lex_rule('selector' , 'simple_selector+ [ pseudo_element ]? | pseudo_element');
186
187
188        #simple_selector
189        # : element_name id? class? pseudo_class? /* eg: H1.subject */
190        # | solitary_id class? pseudo_class? /* eg: #xyz33 */
191        # | solitary_class pseudo_class? /* eg: .author */
192        # | solitary_pseudo_class /* eg: :link */
193        # ;
194        #element_name
195        # : IDENT
196        # ;
197        #pseudo_class /* as in: A:link */
198        # : LINK_PSCLASS_AFTER_IDENT
199        # | VISITED_PSCLASS_AFTER_IDENT
200        # | ACTIVE_PSCLASS_AFTER_IDENT
201        # ;
202        #solitary_pseudo_class /* as in: :link */
203        # : LINK_PSCLASS
204        # | VISITED_PSCLASS
205        # | ACTIVE_PSCLASS
206        # ;
207
208
4
17
        $self->add_lex_rule('simple_selector' , 'element_name id? class? pseudo_class? | id class? pseudo_class? | class pseudo_class? | pseudo_class');
209
4
16
        $self->add_lex_rule('element_name' , 'IDENT');
210
4
16
        $self->add_lex_rule('pseudo_class' , 'LINK_PSCLASS | VISITED_PSCLASS | ACTIVE_PSCLASS');
211
212
213        #class /* as in: P.note */
214        # : CLASS_AFTER_IDENT
215        # ;
216        #solitary_class /* as in: .note */
217        # : CLASS
218        # ;
219        #pseudo_element /* as in: P:first-line */
220        # : FIRST_LETTER_AFTER_IDENT
221        # | FIRST_LINE_AFTER_IDENT
222        # ;
223        #solitary_pseudo_element /* as in: :first-line */
224        # : FIRST_LETTER
225        # | FIRST_LINE
226        # ;
227
228
4
16
        $self->add_lex_rule('class' , 'CLASS');
229
4
14
        $self->add_lex_rule('pseudo_element' , 'FIRST_LETTER | FIRST_LINE');
230
231
232        #id
233        # : HASH_AFTER_IDENT
234        # ;
235        #solitary_id
236        # : HASH
237        # ;
238        #declaration
239        # : property ':' expr prio?
240        # | /* empty */ /* Prevents syntax errors... */
241        # ;
242        #prio
243        # : IMPORTANT_SYM /* !important */
244        # ;
245        #expr
246        # : term [ operator term ]*
247        # ;
248
249
4
14
        $self->add_lex_rule('id' , 'HASH');
250
4
15
        $self->add_lex_rule('declaration' , 'property _COLON expr prio? | ');
251
4
73
        $self->add_lex_rule('prio' , 'IMPORTANT_SYM');
252
4
14
        $self->add_lex_rule('expr' , 'term [ operator term ]*');
253
254
255        #term
256        # : unary_operator?
257        # [ NUMBER | STRING | PERCENTAGE | LENGTH | EMS | EXS
258        # | IDENT | hexcolor | URL | RGB ]
259        # ;
260        #hexcolor
261        # : HASH | HASH_AFTER_IDENT
262        # ;
263
264
4
15
        $self->add_lex_rule('term' , 'unary_operator? [ NUMBER | STRING | PERCENTAGE | LENGTH | EMS | EXS | IDENT | hexcolor | URL | RGB ]');
265
4
16
        $self->add_lex_rule('hexcolor' , 'HASH | HASH_AFTER_IDENT');
266
267
268        #####################################################################################
269
270
4
25
        $self->set_base('stylesheet');
271}
272
273sub toke {
274
3
18
        my ($self, $input) = @_;
275
276        #
277        # CSS 1 treats whitespace oddly - we need to toke and then remove all
278        # WHITESPACE tokens before we try and lex. we'll do the same with
279        # comments (the other modules do this differently right now). maybe
280        # we should remove these tokens at the start of lexing instead?
281        #
282
283
3
26
        my $tokens = $self->SUPER::toke($input);
284
3
8
        my $out = [];
285
286
3
3
6
9
        for my $token (@{$tokens}){
287
288
33
124
                next if $token->{type} eq 'WHITESPACE';
289
21
68
                next if $token->{type} eq 'COMMENT';
290
291
21
21
33
66
                push @{$out}, $token;
292        }
293
294
3
85
        return $out;
295}
296
2971;