File Coverage

File:	blib/lib/CSS/Grammar/CSS10.pm
Coverage:	98.9%

line	stmt	bran	sub	time	code
1					package CSS::Grammar::CSS10;
2
3	4 4 4		4	22 8 28	use strict;
4	4 4 4		4	27 8 25	use warnings;
5	4 4 4		4	24 10 40	use Data::Dumper;
6
7	4 4 4		4	30 14 33	use base 'CSS::Grammar';
8
9					#
10					# http://www.w3.org/TR/REC-CSS1-961217.html#appendix-b
11					#
12
13					sub init {
14	4		4	12	my ($self) = @_;
15
16	4			7	my %rx;
17
18					#####################################################################################
19
20	4			10	$self->{case_insensitive} = 1;
21
22
23					#####################################################################################
24
25					#unicode \\[0-9a-f]{1,4}
26					#latin1 [�-�]
27					#escape {unicode}\|\\[ -~�-�]
28					#stringchar {escape}\|{latin1}\|[ !#$%&(-~]
29					#nmstrt [a-z]\|{latin1}\|{escape}
30					#nmchar [-a-z0-9]\|{latin1}\|{escape}
31					#ident {nmstrt}{nmchar}*
32					#name {nmchar}+
33					#d [0-9]
34					#notnm [^-a-z0-9\\]\|{latin1}
35					#w [ \t\n]*
36					#num {d}+\|{d}*\.{d}+
37					#string \"({stringchar}\|\')\"\|\'({stringchar}\|\")\'
38
39	4			13	$rx{unicode} = '(\\[0-9a-f]{1,4})';
40	4			11	$rx{ascii} = '[\x20-\x7e]';
41	4			10	$rx{latin1} = '[\xa1-\xff]';
42	4			34	$rx{escape} = "($rx{unicode}\|\\\\$rx{ascii}\|\\\\$rx{latin1})";
43	4			47	$rx{stringchar} = "($rx{escape}\|$rx{latin1}\|[ !#$%&(-~])";
44	4			23	$rx{nmstrt} = "([a-z]\|$rx{latin1}\|$rx{escape})";
45	4			22	$rx{nmchar} = "([-a-z0-9]\|$rx{latin1}\|$rx{escape})";
46	4			24	$rx{ident} = "($rx{nmstrt}$rx{nmchar}*)";
47	4			19	$rx{name} = "($rx{nmchar}+)";
48	4			15	$rx{d} = "[0-9]";
49	4			16	$rx{notnm} = "([^-a-z0-9\\\\]\|$rx{latin1})";
50	4			11	$rx{w} = "([ \\t\\n]*)";
51	4			27	$rx{num} = "($rx{d}+\|$rx{d}*\\.$rx{d}+)";
52	4			26	$rx{string} = "(\"($rx{stringchar}\|\')\"\|\'($rx{stringchar}\|\")\')";
53
54
55					#####################################################################################
56
57					#"/*" {BEGIN(COMMENT);}
58					#<COMMENT>"*/" {BEGIN(0);}
59					#<COMMENT>\n {/* ignore */}
60					#<COMMENT>. {/* ignore */}
61					#@import {BEGIN(0); return IMPORT_SYM;}
62					#"!"{w}important {BEGIN(0); return IMPORTANT_SYM;}
63					#{ident} {BEGIN(AFTER_IDENT); return IDENT;}
64					#{string} {BEGIN(0); return STRING;}
65
66	4			23	$self->add_toke_rule('COMMENT' , "\\/\\(\\[^\\/]\|[^\\])\\*\\/");
67	4			15	$self->add_toke_rule('IMPORT_SYM' , "\@import");
68	4			25	$self->add_toke_rule('IMPORTANT_SYM' , "!$rx{w}important");
69	4			18	$self->add_toke_rule('IDENT' , $rx{ident});
70	4			16	$self->add_toke_rule('STRING' , $rx{string});
71
72
73					#{num} {BEGIN(0); return NUMBER;}
74					#{num}"%" {BEGIN(0); return PERCENTAGE;}
75					#{num}pt/{notnm} {BEGIN(0); return LENGTH;}
76					#{num}mm/{notnm} {BEGIN(0); return LENGTH;}
77					#{num}cm/{notnm} {BEGIN(0); return LENGTH;}
78					#{num}pc/{notnm} {BEGIN(0); return LENGTH;}
79					#{num}in/{notnm} {BEGIN(0); return LENGTH;}
80					#{num}px/{notnm} {BEGIN(0); return LENGTH;}
81					#{num}em/{notnm} {BEGIN(0); return EMS;}
82					#{num}ex/{notnm} {BEGIN(0); return EXS;}
83
84	4			19	$self->add_toke_rule('NUMBER' , $rx{num});
85					# TODO:
86					# this is kinda weird - what does the slash mean here? it's not literal.
87					# need to revisit this block of rules
88
89
90					#<AFTER_IDENT>":"link {return LINK_PSCLASS_AFTER_IDENT;}
91					#<AFTER_IDENT>":"visited {return VISITED_PSCLASS_AFTER_IDENT;}
92					#<AFTER_IDENT>":"active {return ACTIVE_PSCLASS_AFTER_IDENT;}
93					#<AFTER_IDENT>":"first-line {return FIRST_LINE_AFTER_IDENT;}
94					#<AFTER_IDENT>":"first-letter {return FIRST_LETTER_AFTER_IDENT;}
95					#<AFTER_IDENT>"#"{name} {return HASH_AFTER_IDENT;}
96					#<AFTER_IDENT>"."{name} {return CLASS_AFTER_IDENT;}
97
98					#":"link {BEGIN(AFTER_IDENT); return LINK_PSCLASS;}
99					#":"visited {BEGIN(AFTER_IDENT); return VISITED_PSCLASS;}
100					#":"active {BEGIN(AFTER_IDENT); return ACTIVE_PSCLASS;}
101					#":"first-line {BEGIN(AFTER_IDENT); return FIRST_LINE;}
102					#":"first-letter {BEGIN(AFTER_IDENT); return FIRST_LETTER;}
103					#"#"{name} {BEGIN(AFTER_IDENT); return HASH;}
104					#"."{name} {BEGIN(AFTER_IDENT); return CLASS;}
105
106					# all this <AFTER_IDENT> crap isn't really needed - it's just for context once we
107					# finish the parse (or so it seems to me) and doesn;t actually affect the toke/lex
108
109	4			16	$self->add_toke_rule('LINK_PSCLASS' , "\\:link");
110	4			15	$self->add_toke_rule('VISITED_PSCLASS' , "\\:visited");
111	4			13	$self->add_toke_rule('ACTIVE_PSCLASS' , "\\:active");
112	4			15	$self->add_toke_rule('FIRST_LINE' , "\\:first-line");
113	4			59	$self->add_toke_rule('FIRST_LETTER' , "\\:first-letter");
114
115	4			20	$self->add_toke_rule('HASH' , "\\#$rx{name}");
116	4			20	$self->add_toke_rule('CLASS' , "\\.$rx{name}");
117
118
119					#url${w}{string}{w}$ \|
120					#url${w}([^ \n\'\")]\|\\\ \|\\\'\|\\\"\|\\$)+{w}\) {BEGIN(0); return URL;}
121					#rgb${w}{num}%?{w}\,{w}{num}%?{w}\,{w}{num}%?{w}$ {BEGIN(0); return RGB;}
122
123					# TODO:
124					# i've not done these rules yet
125
126
127					#[ \t]+ {BEGIN(0); /* ignore whitespace */}
128					#\n {BEGIN(0); /* ignore whitespace */}
129					#\<\!\-\- {BEGIN(0); return CDO;}
130					#\-\-\> {BEGIN(0); return CDC;}
131
132	4			15	$self->add_toke_rule('CDO' , "<!--");
133	4			15	$self->add_toke_rule('CDC' , "-->");
134
135	4			15	$self->add_toke_rule('WHITESPACE' , '[ \t\n]+');
136
137
138					#####################################################################################
139
140	4			17	$self->add_toke_rule('_SEMICOLON' , ';');
141	4			15	$self->add_toke_rule('_MINUS' , '-');
142	4			15	$self->add_toke_rule('_PLUS' , '\\+');
143	4			15	$self->add_toke_rule('_SLASH' , '/');
144	4			16	$self->add_toke_rule('_COMMA' , ',');
145	4			17	$self->add_toke_rule('_BRACE_OPEN' , '\\{');
146	4			23	$self->add_toke_rule('_BRACE_CLOSE' , '\\}');
147	4			14	$self->add_toke_rule('_COLON' , '\\:');
148
149
150					#####################################################################################
151
152					#stylesheet
153					# : [CDO\|CDC]* [ import [CDO\|CDC]* ]* [ ruleset [CDO\|CDC]* ]*
154					# ;
155					#import
156					# : IMPORT_SYM [STRING\|URL] ';' /* E.g., @import url(fun.css); */
157					# ;
158					#unary_operator
159					# : '-' \| '+'
160					# ;
161					#operator
162					# : '/' \| ',' \| /* empty */
163					# ;
164
165	4			75	$self->add_lex_rule('stylesheet' , '[CDO\|CDC]* [ import [CDO\|CDC]* ]* [ ruleset [CDO\|CDC]* ]*');
166	4			15	$self->add_lex_rule('import' , 'IMPORT_SYM [STRING\|URL] _SEMICOLON');
167	4			16	$self->add_lex_rule('unary_operator' , '_MINUS \| _PLUS');
168	4			15	$self->add_lex_rule('operator' , '_SLASH \| _COMMA \| ');
169
170
171					#property
172					# : IDENT
173					# ;
174					#ruleset
175					# : selector [ ',' selector ]*
176					# '{' declaration [ ';' declaration ]* '}'
177					# ;
178					#selector
179					# : simple_selector+ [ pseudo_element \| solitary_pseudo_element ]?
180					# \| solitary_pseudo_element
181					# ;
182
183	4			16	$self->add_lex_rule('property' , 'IDENT');
184	4			15	$self->add_lex_rule('ruleset' , 'selector [ _COMMA selector ]* _BRACE_OPEN declaration [ _SEMICOLON declaration ]* _BRACE_CLOSE');
185	4			16	$self->add_lex_rule('selector' , 'simple_selector+ [ pseudo_element ]? \| pseudo_element');
186
187
188					#simple_selector
189					# : element_name id? class? pseudo_class? /* eg: H1.subject */
190					# \| solitary_id class? pseudo_class? /* eg: #xyz33 */
191					# \| solitary_class pseudo_class? /* eg: .author */
192					# \| solitary_pseudo_class /* eg: :link */
193					# ;
194					#element_name
195					# : IDENT
196					# ;
197					#pseudo_class /* as in: A:link */
198					# : LINK_PSCLASS_AFTER_IDENT
199					# \| VISITED_PSCLASS_AFTER_IDENT
200					# \| ACTIVE_PSCLASS_AFTER_IDENT
201					# ;
202					#solitary_pseudo_class /* as in: :link */
203					# : LINK_PSCLASS
204					# \| VISITED_PSCLASS
205					# \| ACTIVE_PSCLASS
206					# ;
207
208	4			17	$self->add_lex_rule('simple_selector' , 'element_name id? class? pseudo_class? \| id class? pseudo_class? \| class pseudo_class? \| pseudo_class');
209	4			16	$self->add_lex_rule('element_name' , 'IDENT');
210	4			16	$self->add_lex_rule('pseudo_class' , 'LINK_PSCLASS \| VISITED_PSCLASS \| ACTIVE_PSCLASS');
211
212
213					#class /* as in: P.note */
214					# : CLASS_AFTER_IDENT
215					# ;
216					#solitary_class /* as in: .note */
217					# : CLASS
218					# ;
219					#pseudo_element /* as in: P:first-line */
220					# : FIRST_LETTER_AFTER_IDENT
221					# \| FIRST_LINE_AFTER_IDENT
222					# ;
223					#solitary_pseudo_element /* as in: :first-line */
224					# : FIRST_LETTER
225					# \| FIRST_LINE
226					# ;
227
228	4			16	$self->add_lex_rule('class' , 'CLASS');
229	4			14	$self->add_lex_rule('pseudo_element' , 'FIRST_LETTER \| FIRST_LINE');
230
231
232					#id
233					# : HASH_AFTER_IDENT
234					# ;
235					#solitary_id
236					# : HASH
237					# ;
238					#declaration
239					# : property ':' expr prio?
240					# \| /* empty / / Prevents syntax errors... */
241					# ;
242					#prio
243					# : IMPORTANT_SYM /* !important */
244					# ;
245					#expr
246					# : term [ operator term ]*
247					# ;
248
249	4			14	$self->add_lex_rule('id' , 'HASH');
250	4			15	$self->add_lex_rule('declaration' , 'property _COLON expr prio? \| ');
251	4			73	$self->add_lex_rule('prio' , 'IMPORTANT_SYM');
252	4			14	$self->add_lex_rule('expr' , 'term [ operator term ]*');
253
254
255					#term
256					# : unary_operator?
257					# [ NUMBER \| STRING \| PERCENTAGE \| LENGTH \| EMS \| EXS
258					# \| IDENT \| hexcolor \| URL \| RGB ]
259					# ;
260					#hexcolor
261					# : HASH \| HASH_AFTER_IDENT
262					# ;
263
264	4			15	$self->add_lex_rule('term' , 'unary_operator? [ NUMBER \| STRING \| PERCENTAGE \| LENGTH \| EMS \| EXS \| IDENT \| hexcolor \| URL \| RGB ]');
265	4			16	$self->add_lex_rule('hexcolor' , 'HASH \| HASH_AFTER_IDENT');
266
267
268					#####################################################################################
269
270	4			25	$self->set_base('stylesheet');
271					}
272
273					sub toke {
274	3		3	18	my ($self, $input) = @_;
275
276					#
277					# CSS 1 treats whitespace oddly - we need to toke and then remove all
278					# WHITESPACE tokens before we try and lex. we'll do the same with
279					# comments (the other modules do this differently right now). maybe
280					# we should remove these tokens at the start of lexing instead?
281					#
282
283	3			26	my $tokens = $self->SUPER::toke($input);
284	3			8	my $out = [];
285
286	3 3			6 9	for my $token (@{$tokens}){
287
288	33	100		124	next if $token->{type} eq 'WHITESPACE';
289	21	50		68	next if $token->{type} eq 'COMMENT';
290
291	21 21			33 66	push @{$out}, $token;
292					}
293
294	3			85	return $out;
295					}
296
297					1;