HighLevelEncoder.cs 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. /*
  2. * Copyright 2013 ZXing authors
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. using System;
  17. using System.Collections.Generic;
  18. using System.Collections.ObjectModel;
  19. namespace FastReport.Barcode.Aztec
  20. {
  21. /// <summary>
  22. /// This produces nearly optimal encodings of text into the first-level of
  23. /// encoding used by Aztec code.
  24. /// It uses a dynamic algorithm. For each prefix of the string, it determines
  25. /// a set of encodings that could lead to this prefix. We repeatedly add a
  26. /// character and generate a new set of optimal encodings until we have read
  27. /// through the entire input.
  28. /// @author Frank Yellin
  29. /// @author Rustam Abdullaev
  30. /// </summary>
  31. internal sealed class HighLevelEncoder
  32. {
  33. internal static String[] MODE_NAMES = {"UPPER", "LOWER", "DIGIT", "MIXED", "PUNCT"};
  34. internal const int MODE_UPPER = 0; // 5 bits
  35. internal const int MODE_LOWER = 1; // 5 bits
  36. internal const int MODE_DIGIT = 2; // 4 bits
  37. internal const int MODE_MIXED = 3; // 5 bits
  38. internal const int MODE_PUNCT = 4; // 5 bits
  39. // The Latch Table shows, for each pair of Modes, the optimal method for
  40. // getting from one mode to another. In the worst possible case, this can
  41. // be up to 14 bits. In the best possible case, we are already there!
  42. // The high half-word of each entry gives the number of bits.
  43. // The low half-word of each entry are the actual bits necessary to change
  44. internal static readonly int[][] LATCH_TABLE = new int[][]
  45. {
  46. new int[]
  47. {
  48. 0,
  49. (5 << 16) + 28, // UPPER -> LOWER
  50. (5 << 16) + 30, // UPPER -> DIGIT
  51. (5 << 16) + 29, // UPPER -> MIXED
  52. (10 << 16) + (29 << 5) + 30, // UPPER -> MIXED -> PUNCT
  53. },
  54. new int[]
  55. {
  56. (9 << 16) + (30 << 4) + 14, // LOWER -> DIGIT -> UPPER
  57. 0,
  58. (5 << 16) + 30, // LOWER -> DIGIT
  59. (5 << 16) + 29, // LOWER -> MIXED
  60. (10 << 16) + (29 << 5) + 30, // LOWER -> MIXED -> PUNCT
  61. },
  62. new int[]
  63. {
  64. (4 << 16) + 14, // DIGIT -> UPPER
  65. (9 << 16) + (14 << 5) + 28, // DIGIT -> UPPER -> LOWER
  66. 0,
  67. (9 << 16) + (14 << 5) + 29, // DIGIT -> UPPER -> MIXED
  68. (14 << 16) + (14 << 10) + (29 << 5) + 30,
  69. // DIGIT -> UPPER -> MIXED -> PUNCT
  70. },
  71. new int[]
  72. {
  73. (5 << 16) + 29, // MIXED -> UPPER
  74. (5 << 16) + 28, // MIXED -> LOWER
  75. (10 << 16) + (29 << 5) + 30, // MIXED -> UPPER -> DIGIT
  76. 0,
  77. (5 << 16) + 30, // MIXED -> PUNCT
  78. },
  79. new int[]
  80. {
  81. (5 << 16) + 31, // PUNCT -> UPPER
  82. (10 << 16) + (31 << 5) + 28, // PUNCT -> UPPER -> LOWER
  83. (10 << 16) + (31 << 5) + 30, // PUNCT -> UPPER -> DIGIT
  84. (10 << 16) + (31 << 5) + 29, // PUNCT -> UPPER -> MIXED
  85. 0,
  86. }
  87. };
  88. // A reverse mapping from [mode][char] to the encoding for that character
  89. // in that mode. An entry of 0 indicates no mapping exists.
  90. internal static readonly int[][] CHAR_MAP = new int[5][];
  91. // A map showing the available shift codes. (The shifts to BINARY are not shown
  92. internal static readonly int[][] SHIFT_TABLE = new int[6][]; // mode shift codes, per table
  93. private readonly byte[] text;
  94. static HighLevelEncoder()
  95. {
  96. CHAR_MAP[0] = new int[256];
  97. CHAR_MAP[1] = new int[256];
  98. CHAR_MAP[2] = new int[256];
  99. CHAR_MAP[3] = new int[256];
  100. CHAR_MAP[4] = new int[256];
  101. SHIFT_TABLE[0] = new int[6];
  102. SHIFT_TABLE[1] = new int[6];
  103. SHIFT_TABLE[2] = new int[6];
  104. SHIFT_TABLE[3] = new int[6];
  105. SHIFT_TABLE[4] = new int[6];
  106. SHIFT_TABLE[5] = new int[6];
  107. CHAR_MAP[MODE_UPPER][' '] = 1;
  108. for (int c = 'A'; c <= 'Z'; c++)
  109. {
  110. CHAR_MAP[MODE_UPPER][c] = c - 'A' + 2;
  111. }
  112. CHAR_MAP[MODE_LOWER][' '] = 1;
  113. for (int c = 'a'; c <= 'z'; c++)
  114. {
  115. CHAR_MAP[MODE_LOWER][c] = c - 'a' + 2;
  116. }
  117. CHAR_MAP[MODE_DIGIT][' '] = 1;
  118. for (int c = '0'; c <= '9'; c++)
  119. {
  120. CHAR_MAP[MODE_DIGIT][c] = c - '0' + 2;
  121. }
  122. CHAR_MAP[MODE_DIGIT][','] = 12;
  123. CHAR_MAP[MODE_DIGIT]['.'] = 13;
  124. int[] mixedTable = {
  125. '\0', ' ', 1, 2, 3, 4, 5, 6, 7, '\b', '\t', '\n', 11, '\f', '\r',
  126. 27, 28, 29, 30, 31, '@', '\\', '^', '_', '`', '|', '~', 127
  127. };
  128. for (int i = 0; i < mixedTable.Length; i++)
  129. {
  130. CHAR_MAP[MODE_MIXED][mixedTable[i]] = i;
  131. }
  132. int[] punctTable =
  133. {
  134. '\0', '\r', '\0', '\0', '\0', '\0', '!', '\'', '#', '$', '%', '&', '\'',
  135. '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?',
  136. '[', ']', '{', '}'
  137. };
  138. for (int i = 0; i < punctTable.Length; i++)
  139. {
  140. if (punctTable[i] > 0)
  141. {
  142. CHAR_MAP[MODE_PUNCT][punctTable[i]] = i;
  143. }
  144. }
  145. foreach (int[] table in SHIFT_TABLE)
  146. {
  147. SupportClass.Fill(table, -1);
  148. }
  149. SHIFT_TABLE[MODE_UPPER][MODE_PUNCT] = 0;
  150. SHIFT_TABLE[MODE_LOWER][MODE_PUNCT] = 0;
  151. SHIFT_TABLE[MODE_LOWER][MODE_UPPER] = 28;
  152. SHIFT_TABLE[MODE_MIXED][MODE_PUNCT] = 0;
  153. SHIFT_TABLE[MODE_DIGIT][MODE_PUNCT] = 0;
  154. SHIFT_TABLE[MODE_DIGIT][MODE_UPPER] = 15;
  155. }
  156. public HighLevelEncoder(byte[] text)
  157. {
  158. this.text = text;
  159. }
  160. /// <summary>
  161. /// Convert the text represented by this High Level Encoder into a BitArray.
  162. /// </summary>
  163. /// <returns>text represented by this encoder encoded as a <see cref="BitArray"/></returns>
  164. public BitArray encode()
  165. {
  166. ICollection<State> states = new Collection<State>();
  167. states.Add(State.INITIAL_STATE);
  168. for (int index = 0; index < text.Length; index++)
  169. {
  170. int pairCode;
  171. // don't remove the (int) type cast, mono compiler needs it
  172. int nextChar = (index + 1 < text.Length) ? (int)text[index + 1] : 0;
  173. switch (text[index])
  174. {
  175. case (byte)'\r':
  176. pairCode = nextChar == '\n' ? 2 : 0;
  177. break;
  178. case (byte)'.':
  179. pairCode = nextChar == ' ' ? 3 : 0;
  180. break;
  181. case (byte)',':
  182. pairCode = nextChar == ' ' ? 4 : 0;
  183. break;
  184. case (byte)':':
  185. pairCode = nextChar == ' ' ? 5 : 0;
  186. break;
  187. default:
  188. pairCode = 0;
  189. break;
  190. }
  191. if (pairCode > 0)
  192. {
  193. // We have one of the four special PUNCT pairs. Treat them specially.
  194. // Get a new set of states for the two new characters.
  195. states = updateStateListForPair(states, index, pairCode);
  196. index++;
  197. }
  198. else
  199. {
  200. // Get a new set of states for the new character.
  201. states = updateStateListForChar(states, index);
  202. }
  203. }
  204. // We are left with a set of states. Find the shortest one.
  205. State minState = null;
  206. foreach (State state in states)
  207. {
  208. if (minState == null)
  209. {
  210. minState = state;
  211. }
  212. else
  213. {
  214. if (state.BitCount < minState.BitCount)
  215. {
  216. minState = state;
  217. }
  218. }
  219. }
  220. /*
  221. State minState = Collections.min(states, new Comparator<State>() {
  222. @Override
  223. public int compare(State a, State b) {
  224. return a.getBitCount() - b.getBitCount();
  225. }
  226. });
  227. */
  228. // Convert it to a bit array, and return.
  229. return minState.toBitArray(text);
  230. }
  231. // We update a set of states for a new character by updating each state
  232. // for the new character, merging the results, and then removing the
  233. // non-optimal states.
  234. private ICollection<State> updateStateListForChar(IEnumerable<State> states, int index)
  235. {
  236. LinkedList<State> result = new LinkedList<State>();
  237. foreach (State state in states)
  238. {
  239. updateStateForChar(state, index, result);
  240. }
  241. return simplifyStates(result);
  242. }
  243. // Return a set of states that represent the possible ways of updating this
  244. // state for the next character. The resulting set of states are added to
  245. // the "result" list.
  246. private void updateStateForChar(State state, int index, ICollection<State> result)
  247. {
  248. char ch = (char) (text[index] & 0xFF);
  249. bool charInCurrentTable = CHAR_MAP[state.Mode][ch] > 0;
  250. State stateNoBinary = null;
  251. for (int mode = 0; mode <= MODE_PUNCT; mode++)
  252. {
  253. int charInMode = CHAR_MAP[mode][ch];
  254. if (charInMode > 0)
  255. {
  256. if (stateNoBinary == null)
  257. {
  258. // Only create stateNoBinary the first time it's required.
  259. stateNoBinary = state.endBinaryShift(index);
  260. }
  261. // Try generating the character by latching to its mode
  262. if (!charInCurrentTable || mode == state.Mode || mode == MODE_DIGIT)
  263. {
  264. // If the character is in the current table, we don't want to latch to
  265. // any other mode except possibly digit (which uses only 4 bits). Any
  266. // other latch would be equally successful *after* this character, and
  267. // so wouldn't save any bits.
  268. State latch_state = stateNoBinary.latchAndAppend(mode, charInMode);
  269. result.Add(latch_state);
  270. }
  271. // Try generating the character by switching to its mode.
  272. if (!charInCurrentTable && SHIFT_TABLE[state.Mode][mode] >= 0)
  273. {
  274. // It never makes sense to temporarily shift to another mode if the
  275. // character exists in the current mode. That can never save bits.
  276. State shift_state = stateNoBinary.shiftAndAppend(mode, charInMode);
  277. result.Add(shift_state);
  278. }
  279. }
  280. }
  281. if (state.BinaryShiftByteCount > 0 || CHAR_MAP[state.Mode][ch] == 0)
  282. {
  283. // It's never worthwhile to go into binary shift mode if you're not already
  284. // in binary shift mode, and the character exists in your current mode.
  285. // That can never save bits over just outputting the char in the current mode.
  286. State binaryState = state.addBinaryShiftChar(index);
  287. result.Add(binaryState);
  288. }
  289. }
  290. private static ICollection<State> updateStateListForPair(IEnumerable<State> states, int index, int pairCode)
  291. {
  292. LinkedList<State> result = new LinkedList<State>();
  293. foreach (State state in states)
  294. {
  295. updateStateForPair(state, index, pairCode, result);
  296. }
  297. return simplifyStates(result);
  298. }
  299. private static void updateStateForPair(State state, int index, int pairCode, ICollection<State> result)
  300. {
  301. State stateNoBinary = state.endBinaryShift(index);
  302. // Possibility 1. Latch to MODE_PUNCT, and then append this code
  303. result.Add(stateNoBinary.latchAndAppend(MODE_PUNCT, pairCode));
  304. if (state.Mode != MODE_PUNCT)
  305. {
  306. // Possibility 2. Shift to MODE_PUNCT, and then append this code.
  307. // Every state except MODE_PUNCT (handled above) can shift
  308. result.Add(stateNoBinary.shiftAndAppend(MODE_PUNCT, pairCode));
  309. }
  310. if (pairCode == 3 || pairCode == 4)
  311. {
  312. // both characters are in DIGITS. Sometimes better to just add two digits
  313. State digit_state = stateNoBinary
  314. .latchAndAppend(MODE_DIGIT, 16 - pairCode) // period or comma in DIGIT
  315. .latchAndAppend(MODE_DIGIT, 1); // space in DIGIT
  316. result.Add(digit_state);
  317. }
  318. if (state.BinaryShiftByteCount > 0)
  319. {
  320. // It only makes sense to do the characters as binary if we're already
  321. // in binary mode.
  322. State binaryState = state.addBinaryShiftChar(index).addBinaryShiftChar(index + 1);
  323. result.Add(binaryState);
  324. }
  325. }
  326. private static ICollection<State> simplifyStates(IEnumerable<State> states)
  327. {
  328. LinkedList<State> result = new LinkedList<State>();
  329. List<State> removeList = new List<State>();
  330. foreach (State newState in states)
  331. {
  332. bool add = true;
  333. removeList.Clear();
  334. foreach (State oldState in result)
  335. {
  336. if (oldState.isBetterThanOrEqualTo(newState))
  337. {
  338. add = false;
  339. break;
  340. }
  341. if (newState.isBetterThanOrEqualTo(oldState))
  342. {
  343. removeList.Add(oldState);
  344. }
  345. }
  346. if (add)
  347. {
  348. result.AddLast(newState);
  349. }
  350. foreach (State removeItem in removeList)
  351. {
  352. result.Remove(removeItem);
  353. }
  354. }
  355. return result;
  356. }
  357. }
  358. }