SqlTokenizer.cs 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Text;
  4. public static class SqlTokenizer
  5. {
  6. public static List<string> SplitStatements(string sql, bool keepTerminator = false)
  7. {
  8. if (sql == null) throw new ArgumentNullException(nameof(sql));
  9. var results = new List<string>();
  10. var sb = new StringBuilder(sql.Length);
  11. int nested = 0;
  12. int i = 0;
  13. while (i < sql.Length)
  14. {
  15. char c = sql[i];
  16. // 1) Statement terminator (only when not in any special mode)
  17. if (c == ';')
  18. {
  19. if(nested == 0)
  20. {
  21. if (keepTerminator) sb.Append(c);
  22. var stmt = sb.ToString().Trim();
  23. if (stmt.Length > 0) results.Add(stmt);
  24. sb.Clear();
  25. i++;
  26. }
  27. else
  28. {
  29. // Don't do anything; we're still in a BEGIN block.
  30. sb.Append(c);
  31. i++;
  32. }
  33. continue;
  34. }
  35. // 2) Single-quoted string: '...'
  36. if (c == '\'')
  37. {
  38. ConsumeSingleQuoted(sql, ref i, sb);
  39. continue;
  40. }
  41. // 3) Double-quoted identifier/string: "..."
  42. if (c == '"')
  43. {
  44. ConsumeDoubleQuoted(sql, ref i, sb);
  45. continue;
  46. }
  47. // 4) Line comment: -- ... (to end of line)
  48. if (c == '-' && i + 1 < sql.Length && sql[i + 1] == '-')
  49. {
  50. ConsumeLineComment(sql, ref i, sb);
  51. continue;
  52. }
  53. // 5) Block comment: /* ... */
  54. if (c == '/' && i + 1 < sql.Length && sql[i + 1] == '*')
  55. {
  56. ConsumeBlockComment(sql, ref i, sb);
  57. continue;
  58. }
  59. // // 6) PostgreSQL dollar-quoted string: $tag$ ... $tag$ or $$ ... $$
  60. // if (c == '$')
  61. // {
  62. // if (TryConsumeDollarQuoted(sql, ref i, sb))
  63. // continue;
  64. // }
  65. // CASE statement
  66. if(c == 'C' && i + 3 < sql.Length
  67. && sql[i + 1] == 'A'
  68. && sql[i + 2] == 'S'
  69. && sql[i + 3] == 'E')
  70. {
  71. ++nested;
  72. sb.Append("CASE");
  73. i += 4;
  74. continue;
  75. }
  76. // BEGIN statement
  77. if(c == 'B' && i + 4 < sql.Length
  78. && sql[i + 1] == 'E'
  79. && sql[i + 2] == 'G'
  80. && sql[i + 3] == 'I'
  81. && sql[i + 4] == 'N')
  82. {
  83. ++nested;
  84. sb.Append("BEGIN");
  85. i += 5;
  86. continue;
  87. }
  88. // END statement
  89. if(c == 'E' && i + 2 < sql.Length
  90. && sql[i + 1] == 'N'
  91. && sql[i + 2] == 'D')
  92. {
  93. --nested;
  94. sb.Append("END");
  95. i += 3;
  96. continue;
  97. }
  98. // Default: copy char
  99. sb.Append(c);
  100. i++;
  101. }
  102. // Remainder
  103. var last = sb.ToString().Trim();
  104. if (last.Length > 0) results.Add(last);
  105. return results;
  106. }
  107. private static void ConsumeSingleQuoted(string s, ref int i, StringBuilder sb)
  108. {
  109. // We are at opening '
  110. sb.Append(s[i]);
  111. i++;
  112. while (i < s.Length)
  113. {
  114. char c = s[i];
  115. sb.Append(c);
  116. i++;
  117. if (c == '\'')
  118. {
  119. // SQL escape for ' inside string is doubled ''
  120. if (i < s.Length && s[i] == '\'')
  121. {
  122. sb.Append(s[i]);
  123. i++;
  124. continue;
  125. }
  126. break; // end of string
  127. }
  128. }
  129. }
  130. private static void ConsumeDoubleQuoted(string s, ref int i, StringBuilder sb)
  131. {
  132. // We are at opening "
  133. sb.Append(s[i]);
  134. i++;
  135. while (i < s.Length)
  136. {
  137. char c = s[i];
  138. sb.Append(c);
  139. i++;
  140. if (c == '"')
  141. {
  142. // Escaped " inside identifier/string is doubled ""
  143. if (i < s.Length && s[i] == '"')
  144. {
  145. sb.Append(s[i]);
  146. i++;
  147. continue;
  148. }
  149. break; // end
  150. }
  151. }
  152. }
  153. private static void ConsumeLineComment(string s, ref int i, StringBuilder sb)
  154. {
  155. // We are at first '-'
  156. sb.Append(s[i]);
  157. sb.Append(s[i + 1]);
  158. i += 2;
  159. while (i < s.Length)
  160. {
  161. char c = s[i];
  162. sb.Append(c);
  163. i++;
  164. if (c == '\n') break; // end of line comment
  165. }
  166. }
  167. private static void ConsumeBlockComment(string s, ref int i, StringBuilder sb)
  168. {
  169. // We are at '/'
  170. sb.Append(s[i]);
  171. sb.Append(s[i + 1]);
  172. i += 2;
  173. while (i < s.Length)
  174. {
  175. char c = s[i];
  176. sb.Append(c);
  177. i++;
  178. if (c == '*' && i < s.Length && s[i] == '/')
  179. {
  180. sb.Append(s[i]);
  181. i++;
  182. break;
  183. }
  184. }
  185. }
  186. private static bool TryConsumeDollarQuoted(string s, ref int i, StringBuilder sb)
  187. {
  188. // Dollar quote opener: $tag$ where tag is [A-Za-z_][A-Za-z0-9_]* or empty (i.e. $$)
  189. // If not a valid opener, return false and let caller handle '$' normally.
  190. int start = i;
  191. int j = i + 1;
  192. // Find next '$' to close the opener
  193. while (j < s.Length && s[j] != '$')
  194. {
  195. // tag chars must be letters/digits/_ only
  196. char ch = s[j];
  197. if (!(char.IsLetterOrDigit(ch) || ch == '_'))
  198. return false;
  199. j++;
  200. }
  201. if (j >= s.Length || s[j] != '$')
  202. return false; // no closing '$' for opener
  203. // opener is s[start..j] inclusive
  204. string tag = s.Substring(start, j - start + 1); // includes both '$'
  205. // Examples: "$$" or "$abc$"
  206. // Consume opener
  207. sb.Append(tag);
  208. i = j + 1;
  209. // Now consume until we see the same tag again
  210. while (i < s.Length)
  211. {
  212. // fast check for tag match
  213. if (s[i] == '$' && i + tag.Length <= s.Length &&
  214. string.CompareOrdinal(s, i, tag, 0, tag.Length) == 0)
  215. {
  216. sb.Append(tag);
  217. i += tag.Length;
  218. return true;
  219. }
  220. sb.Append(s[i]);
  221. i++;
  222. }
  223. // Unterminated dollar quote: we consumed to end; still treat as consumed
  224. return true;
  225. }
  226. }