Parser.cs 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488
  1. #region Copyright and License
  2. //
  3. // Fizzler - CSS Selector Engine for Microsoft .NET Framework
  4. // Copyright (c) 2009 Atif Aziz, Colin Ramsay. All rights reserved.
  5. //
  6. // This library is free software; you can redistribute it and/or modify it under
  7. // the terms of the GNU Lesser General Public License as published by the Free
  8. // Software Foundation; either version 3 of the License, or (at your option)
  9. // any later version.
  10. //
  11. // This library is distributed in the hope that it will be useful, but WITHOUT
  12. // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  13. // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  14. // details.
  15. //
  16. // You should have received a copy of the GNU Lesser General Public License
  17. // along with this library; if not, write to the Free Software Foundation, Inc.,
  18. // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  19. //
  20. #endregion
  21. #pragma warning disable
  22. namespace Fizzler
  23. {
  24. #region Imports
  25. using System;
  26. using System.Collections.Generic;
  27. using System.Diagnostics;
  28. using System.Globalization;
  29. using System.Linq;
  30. using TokenSpec = Either<TokenKind, Token>;
  31. #endregion
  32. /// <summary>
  33. /// Semantic parser for CSS selector grammar.
  34. /// </summary>
  35. public sealed class Parser
  36. {
  37. private readonly Reader<Token> _reader;
  38. private readonly ISelectorGenerator _generator;
  39. private Parser(Reader<Token> reader, ISelectorGenerator generator)
  40. {
  41. Debug.Assert(reader != null);
  42. Debug.Assert(generator != null);
  43. _reader = reader;
  44. _generator = generator;
  45. }
  46. /// <summary>
  47. /// Parses a CSS selector group and generates its implementation.
  48. /// </summary>
  49. public static TGenerator Parse<TGenerator>(string selectors, TGenerator generator)
  50. where TGenerator : ISelectorGenerator
  51. {
  52. return Parse(selectors, generator, g => g);
  53. }
  54. /// <summary>
  55. /// Parses a CSS selector group and generates its implementation.
  56. /// </summary>
  57. public static T Parse<TGenerator, T>(string selectors, TGenerator generator, Func<TGenerator, T> resultor)
  58. where TGenerator : ISelectorGenerator
  59. {
  60. if (selectors == null) throw new ArgumentNullException("selectors");
  61. if (selectors.Length == 0) throw new ArgumentException(null, "selectors");
  62. return Parse(Tokener.Tokenize(selectors), generator, resultor);
  63. }
  64. /// <summary>
  65. /// Parses a tokenized stream representing a CSS selector group and
  66. /// generates its implementation.
  67. /// </summary>
  68. public static TGenerator Parse<TGenerator>(IEnumerable<Token> tokens, TGenerator generator)
  69. where TGenerator : ISelectorGenerator
  70. {
  71. return Parse(tokens, generator, g => g);
  72. }
  73. /// <summary>
  74. /// Parses a tokenized stream representing a CSS selector group and
  75. /// generates its implementation.
  76. /// </summary>
  77. public static T Parse<TGenerator, T>(IEnumerable<Token> tokens, TGenerator generator, Func<TGenerator, T> resultor)
  78. where TGenerator : ISelectorGenerator
  79. {
  80. if (tokens == null) throw new ArgumentNullException("tokens");
  81. if (resultor == null) throw new ArgumentNullException("resultor");
  82. new Parser(new Reader<Token>(tokens.GetEnumerator()), generator).Parse();
  83. return resultor(generator);
  84. }
  85. private void Parse()
  86. {
  87. _generator.OnInit();
  88. SelectorGroup();
  89. _generator.OnClose();
  90. }
  91. private void SelectorGroup()
  92. {
  93. //selectors_group
  94. // : selector [ COMMA S* selector ]*
  95. // ;
  96. Selector();
  97. while (TryRead(ToTokenSpec(Token.Comma())) != null)
  98. {
  99. TryRead(ToTokenSpec(TokenKind.WhiteSpace));
  100. Selector();
  101. }
  102. Read(ToTokenSpec(TokenKind.Eoi));
  103. }
  104. private void Selector()
  105. {
  106. _generator.OnSelector();
  107. //selector
  108. // : simple_selector_sequence [ combinator simple_selector_sequence ]*
  109. // ;
  110. SimpleSelectorSequence();
  111. while (TryCombinator())
  112. SimpleSelectorSequence();
  113. }
  114. private bool TryCombinator()
  115. {
  116. //combinator
  117. // /* combinators can be surrounded by whitespace */
  118. // : PLUS S* | GREATER S* | TILDE S* | S+
  119. // ;
  120. var token = TryRead(ToTokenSpec(TokenKind.Plus), ToTokenSpec(TokenKind.Greater), ToTokenSpec(TokenKind.Tilde), ToTokenSpec(TokenKind.WhiteSpace));
  121. if (token == null)
  122. return false;
  123. if (token.Value.Kind == TokenKind.WhiteSpace)
  124. {
  125. _generator.Descendant();
  126. }
  127. else
  128. {
  129. switch (token.Value.Kind)
  130. {
  131. case TokenKind.Tilde: _generator.GeneralSibling(); break;
  132. case TokenKind.Greater: _generator.Child(); break;
  133. case TokenKind.Plus: _generator.Adjacent(); break;
  134. }
  135. TryRead(ToTokenSpec(TokenKind.WhiteSpace));
  136. }
  137. return true;
  138. }
  139. private void SimpleSelectorSequence()
  140. {
  141. //simple_selector_sequence
  142. // : [ type_selector | universal ]
  143. // [ HASH | class | attrib | pseudo | negation ]*
  144. // | [ HASH | class | attrib | pseudo | negation ]+
  145. // ;
  146. var named = false;
  147. for (var modifiers = 0; ; modifiers++)
  148. {
  149. var token = TryRead(ToTokenSpec(TokenKind.Hash), ToTokenSpec(Token.Dot()), ToTokenSpec(Token.LeftBracket()), ToTokenSpec(Token.Colon()));
  150. if (token == null)
  151. {
  152. if (named || modifiers > 0)
  153. break;
  154. TypeSelectorOrUniversal();
  155. named = true;
  156. }
  157. else
  158. {
  159. if (modifiers == 0 && !named)
  160. _generator.Universal(NamespacePrefix.None); // implied
  161. if (token.Value.Kind == TokenKind.Hash)
  162. {
  163. _generator.Id(token.Value.Text);
  164. }
  165. else
  166. {
  167. Unread(token.Value);
  168. switch (token.Value.Text[0])
  169. {
  170. case '.': Class(); break;
  171. case '[': Attrib(); break;
  172. case ':': Pseudo(); break;
  173. default: throw new Exception("Internal error.");
  174. }
  175. }
  176. }
  177. }
  178. }
  179. private void Pseudo()
  180. {
  181. //pseudo
  182. // /* '::' starts a pseudo-element, ':' a pseudo-class */
  183. // /* Exceptions: :first-line, :first-letter, :before and :after. */
  184. // /* Note that pseudo-elements are restricted to one per selector and */
  185. // /* occur only in the last simple_selector_sequence. */
  186. // : ':' ':'? [ IDENT | functional_pseudo ]
  187. // ;
  188. PseudoClass(); // We do pseudo-class only for now
  189. }
  190. private void PseudoClass()
  191. {
  192. //pseudo
  193. // : ':' [ IDENT | functional_pseudo ]
  194. // ;
  195. Read(ToTokenSpec(Token.Colon()));
  196. if (!TryFunctionalPseudo())
  197. {
  198. var clazz = Read(ToTokenSpec(TokenKind.Ident)).Text;
  199. switch (clazz)
  200. {
  201. case "first-child": _generator.FirstChild(); break;
  202. case "last-child": _generator.LastChild(); break;
  203. case "only-child": _generator.OnlyChild(); break;
  204. case "empty": _generator.Empty(); break;
  205. default:
  206. {
  207. throw new FormatException(string.Format(
  208. "Unknown pseudo-class '{0}'. Use either first-child, last-child, only-child or empty.", clazz));
  209. }
  210. }
  211. }
  212. }
  213. private bool TryFunctionalPseudo()
  214. {
  215. //functional_pseudo
  216. // : FUNCTION S* expression ')'
  217. // ;
  218. var token = TryRead(ToTokenSpec(TokenKind.Function));
  219. if (token == null)
  220. return false;
  221. TryRead(ToTokenSpec(TokenKind.WhiteSpace));
  222. var func = token.Value.Text;
  223. switch (func)
  224. {
  225. case "nth-child": Nth(); break;
  226. case "nth-last-child": NthLast(); break;
  227. default:
  228. {
  229. throw new FormatException(string.Format(
  230. "Unknown functional pseudo '{0}'. Only nth-child and nth-last-child are supported.", func));
  231. }
  232. }
  233. Read(ToTokenSpec(Token.RightParenthesis()));
  234. return true;
  235. }
  236. private void Nth()
  237. {
  238. //nth
  239. // : S* [ ['-'|'+']? INTEGER? {N} [ S* ['-'|'+'] S* INTEGER ]? |
  240. // ['-'|'+']? INTEGER | {O}{D}{D} | {E}{V}{E}{N} ] S*
  241. // ;
  242. // TODO Add support for the full syntax
  243. // At present, only INTEGER is allowed
  244. _generator.NthChild(1, NthB());
  245. }
  246. private void NthLast()
  247. {
  248. //nth
  249. // : S* [ ['-'|'+']? INTEGER? {N} [ S* ['-'|'+'] S* INTEGER ]? |
  250. // ['-'|'+']? INTEGER | {O}{D}{D} | {E}{V}{E}{N} ] S*
  251. // ;
  252. // TODO Add support for the full syntax
  253. // At present, only INTEGER is allowed
  254. _generator.NthLastChild(1, NthB());
  255. }
  256. private int NthB()
  257. {
  258. return int.Parse(Read(ToTokenSpec(TokenKind.Integer)).Text, CultureInfo.InvariantCulture);
  259. }
  260. private void Attrib()
  261. {
  262. //attrib
  263. // : '[' S* [ namespace_prefix ]? IDENT S*
  264. // [ [ PREFIXMATCH |
  265. // SUFFIXMATCH |
  266. // SUBSTRINGMATCH |
  267. // '=' |
  268. // INCLUDES |
  269. // DASHMATCH ] S* [ IDENT | STRING ] S*
  270. // ]? ']'
  271. // ;
  272. Read(ToTokenSpec(Token.LeftBracket()));
  273. var prefix = TryNamespacePrefix() ?? NamespacePrefix.None;
  274. var name = Read(ToTokenSpec(TokenKind.Ident)).Text;
  275. var hasValue = false;
  276. while (true)
  277. {
  278. var op = TryRead(
  279. ToTokenSpec(Token.Equals()),
  280. ToTokenSpec(TokenKind.Includes),
  281. ToTokenSpec(TokenKind.DashMatch),
  282. ToTokenSpec(TokenKind.PrefixMatch),
  283. ToTokenSpec(TokenKind.SuffixMatch),
  284. ToTokenSpec(TokenKind.SubstringMatch));
  285. if (op == null)
  286. break;
  287. hasValue = true;
  288. var value = Read(ToTokenSpec(TokenKind.String), ToTokenSpec(TokenKind.Ident)).Text;
  289. if (op.Value == Token.Equals())
  290. {
  291. _generator.AttributeExact(prefix, name, value);
  292. }
  293. else
  294. {
  295. switch (op.Value.Kind)
  296. {
  297. case TokenKind.Includes: _generator.AttributeIncludes(prefix, name, value); break;
  298. case TokenKind.DashMatch: _generator.AttributeDashMatch(prefix, name, value); break;
  299. case TokenKind.PrefixMatch: _generator.AttributePrefixMatch(prefix, name, value); break;
  300. case TokenKind.SuffixMatch: _generator.AttributeSuffixMatch(prefix, name, value); break;
  301. case TokenKind.SubstringMatch: _generator.AttributeSubstring(prefix, name, value); break;
  302. }
  303. }
  304. }
  305. if (!hasValue)
  306. _generator.AttributeExists(prefix, name);
  307. Read(ToTokenSpec(Token.RightBracket()));
  308. }
  309. private void Class()
  310. {
  311. //class
  312. // : '.' IDENT
  313. // ;
  314. Read(ToTokenSpec(Token.Dot()));
  315. _generator.Class(Read(ToTokenSpec(TokenKind.Ident)).Text);
  316. }
  317. private NamespacePrefix? TryNamespacePrefix()
  318. {
  319. //namespace_prefix
  320. // : [ IDENT | '*' ]? '|'
  321. // ;
  322. var pipe = Token.Pipe();
  323. var token = TryRead(ToTokenSpec(TokenKind.Ident), ToTokenSpec(Token.Star()), ToTokenSpec(pipe));
  324. if (token == null)
  325. return null;
  326. if (token.Value == pipe)
  327. return NamespacePrefix.Empty;
  328. var prefix = token.Value;
  329. if (TryRead(ToTokenSpec(pipe)) == null)
  330. {
  331. Unread(prefix);
  332. return null;
  333. }
  334. return prefix.Kind == TokenKind.Ident
  335. ? new NamespacePrefix(prefix.Text)
  336. : NamespacePrefix.Any;
  337. }
  338. private void TypeSelectorOrUniversal()
  339. {
  340. //type_selector
  341. // : [ namespace_prefix ]? element_name
  342. // ;
  343. //element_name
  344. // : IDENT
  345. // ;
  346. //universal
  347. // : [ namespace_prefix ]? '*'
  348. // ;
  349. var prefix = TryNamespacePrefix() ?? NamespacePrefix.None;
  350. var token = Read(ToTokenSpec(TokenKind.Ident), ToTokenSpec(Token.Star()));
  351. if (token.Kind == TokenKind.Ident)
  352. _generator.Type(prefix, token.Text);
  353. else
  354. _generator.Universal(prefix);
  355. }
  356. private Token Peek()
  357. {
  358. return _reader.Peek();
  359. }
  360. private Token Read(TokenSpec spec)
  361. {
  362. var token = TryRead(spec);
  363. if (token == null)
  364. {
  365. throw new FormatException(
  366. string.Format(@"Unexpected token {{{0}}} where {{{1}}} was expected.",
  367. Peek().Kind, spec));
  368. }
  369. return token.Value;
  370. }
  371. private Token Read(params TokenSpec[] specs)
  372. {
  373. var token = TryRead(specs);
  374. if (token == null)
  375. {
  376. throw new FormatException(string.Format(
  377. @"Unexpected token {{{0}}} where one of [{1}] was expected.",
  378. Peek().Kind, string.Join(", ", specs.Select(k => k.ToString()).ToArray())));
  379. }
  380. return token.Value;
  381. }
  382. private Token? TryRead(params TokenSpec[] specs)
  383. {
  384. foreach (var kind in specs)
  385. {
  386. var token = TryRead(kind);
  387. if (token != null)
  388. return token;
  389. }
  390. return null;
  391. }
  392. private Token? TryRead(TokenSpec spec)
  393. {
  394. var token = Peek();
  395. if (!spec.Fold(a => a == token.Kind, b => b == token))
  396. return null;
  397. _reader.Read();
  398. return token;
  399. }
  400. private void Unread(Token token)
  401. {
  402. _reader.Unread(token);
  403. }
  404. private static TokenSpec ToTokenSpec(TokenKind kind)
  405. {
  406. return TokenSpec.A(kind);
  407. }
  408. private static TokenSpec ToTokenSpec(Token token)
  409. {
  410. return TokenSpec.B(token);
  411. }
  412. }
  413. }
  414. #pragma warning restore