From 11dbdde1c576760fb12ded977c9d2c73b3368955 Mon Sep 17 00:00:00 2001 From: "srowen@gmail.com" Date: Sat, 1 Jun 2013 13:56:31 +0000 Subject: [PATCH] Issue 1693 Aztec encoder improvements git-svn-id: https://zxing.googlecode.com/svn/trunk@2814 59b500cc-1b3d-0410-9834-0bbf25fbcc57 --- .../google/zxing/aztec/decoder/Decoder.java | 9 +- .../zxing/aztec/encoder/BinaryShiftToken.java | 62 ++++ .../google/zxing/aztec/encoder/Encoder.java | 254 +-------------- .../zxing/aztec/encoder/HighLevelEncoder.java | 307 ++++++++++++++++++ .../zxing/aztec/encoder/SimpleToken.java | 45 +++ .../com/google/zxing/aztec/encoder/State.java | 169 ++++++++++ .../com/google/zxing/aztec/encoder/Token.java | 52 +++ .../zxing/aztec/encoder/EncoderTest.java | 221 ++++++++----- 8 files changed, 793 insertions(+), 326 deletions(-) create mode 100644 core/src/com/google/zxing/aztec/encoder/BinaryShiftToken.java create mode 100644 core/src/com/google/zxing/aztec/encoder/HighLevelEncoder.java create mode 100644 core/src/com/google/zxing/aztec/encoder/SimpleToken.java create mode 100644 core/src/com/google/zxing/aztec/encoder/State.java create mode 100644 core/src/com/google/zxing/aztec/encoder/Token.java diff --git a/core/src/com/google/zxing/aztec/decoder/Decoder.java b/core/src/com/google/zxing/aztec/decoder/Decoder.java index 7ff82de86..b2293bf7a 100644 --- a/core/src/com/google/zxing/aztec/decoder/Decoder.java +++ b/core/src/com/google/zxing/aztec/decoder/Decoder.java @@ -113,12 +113,19 @@ public final class Decoder { * @throws FormatException if the input is not valid */ private String getEncodedData(boolean[] correctedBits) throws FormatException { - int endIndex = codewordSize * ddata.getNbDatablocks() - invertedBitCount; if (endIndex > correctedBits.length) { throw FormatException.getFormatInstance(); } + return getEncodedData(correctedBits, endIndex); + } + // This method is used for testing the high-level encoder + public static String highLevelDecode(boolean[] correctedBits) { + return getEncodedData(correctedBits, correctedBits.length); + } + + private static String getEncodedData(boolean[] correctedBits, int endIndex) { Table lastTable = Table.UPPER; Table table = Table.UPPER; int startIndex = 0; diff --git a/core/src/com/google/zxing/aztec/encoder/BinaryShiftToken.java b/core/src/com/google/zxing/aztec/encoder/BinaryShiftToken.java new file mode 100644 index 000000000..7d8940cea --- /dev/null +++ b/core/src/com/google/zxing/aztec/encoder/BinaryShiftToken.java @@ -0,0 +1,62 @@ +/* + * Copyright 2013 ZXing authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.zxing.aztec.encoder; + +import com.google.zxing.common.BitArray; + +final class BinaryShiftToken extends Token { + + private final short binaryShiftStart; + private final short binaryShiftByteCount; + + BinaryShiftToken(Token previous, + int totalBitCount, + int binaryShiftStart, + int binaryShiftByteCount) { + super(previous, totalBitCount); + this.binaryShiftStart = (short) binaryShiftStart; + this.binaryShiftByteCount = (short) binaryShiftByteCount; + } + + @Override + public void appendTo(BitArray bitArray, byte[] text) { + for (int i = 0; i < binaryShiftByteCount; i++) { + if (i == 0 || (i == 31 && binaryShiftByteCount <= 62)) { + // We need a header before the first character, and before + // character 31 when the total byte code is <= 62 + bitArray.appendBits(31, 5); + if (binaryShiftByteCount > 62) { + bitArray.appendBits(binaryShiftByteCount - 31, 16); + } else if (i == 0) { + // 1 <= binaryShiftByteCode <= 62 + bitArray.appendBits(Math.min(binaryShiftByteCount, 31), 5); + } else { + // 32 <= binaryShiftCount <= 62 and i == 31 + bitArray.appendBits(binaryShiftByteCount - 31, 5); + } + } + bitArray.appendBits(text[binaryShiftStart + i], 8); + } + //assert bitArray.getSize() == getTotalBitCount(); + } + + @Override + public String toString() { + return "<" + binaryShiftStart + "::" + (binaryShiftStart + binaryShiftByteCount - 1) + '>'; + } + +} diff --git a/core/src/com/google/zxing/aztec/encoder/Encoder.java b/core/src/com/google/zxing/aztec/encoder/Encoder.java index 387c3b7cf..9d18618bc 100644 --- a/core/src/com/google/zxing/aztec/encoder/Encoder.java +++ b/core/src/com/google/zxing/aztec/encoder/Encoder.java @@ -16,8 +16,6 @@ package com.google.zxing.aztec.encoder; -import java.util.Arrays; - import com.google.zxing.common.BitArray; import com.google.zxing.common.BitMatrix; import com.google.zxing.common.reedsolomon.GenericGF; @@ -29,78 +27,12 @@ import com.google.zxing.common.reedsolomon.ReedSolomonEncoder; * @author Rustam Abdullaev */ public final class Encoder { - - public static final int DEFAULT_EC_PERCENT = 33; // default minimal percentage of error check words - private static final int TABLE_UPPER = 0; // 5 bits - private static final int TABLE_LOWER = 1; // 5 bits - private static final int TABLE_DIGIT = 2; // 4 bits - private static final int TABLE_MIXED = 3; // 5 bits - private static final int TABLE_PUNCT = 4; // 5 bits - private static final int TABLE_BINARY = 5; // 8 bits - - private static final int[][] CHAR_MAP = new int[5][256]; // reverse mapping ASCII -> table offset, per table - private static final int[][] SHIFT_TABLE = new int[6][6]; // mode shift codes, per table - private static final int[][] LATCH_TABLE = new int[6][6]; // mode latch codes, per table + public static final int DEFAULT_EC_PERCENT = 33; // default minimal percentage of error check words private static final int[] NB_BITS; // total bits per compact symbol for a given number of layers private static final int[] NB_BITS_COMPACT; // total bits per full symbol for a given number of layers static { - CHAR_MAP[TABLE_UPPER][' '] = 1; - for (int c = 'A'; c <= 'Z'; c++) { - CHAR_MAP[TABLE_UPPER][c] = c - 'A' + 2; - } - CHAR_MAP[TABLE_LOWER][' '] = 1; - for (int c = 'a'; c <= 'z'; c++) { - CHAR_MAP[TABLE_LOWER][c] = c - 'a' + 2; - } - CHAR_MAP[TABLE_DIGIT][' '] = 1; - for (int c = '0'; c <= '9'; c++) { - CHAR_MAP[TABLE_DIGIT][c] = c - '0' + 2; - } - CHAR_MAP[TABLE_DIGIT][','] = 12; - CHAR_MAP[TABLE_DIGIT]['.'] = 13; - int[] mixedTable = { - '\0', ' ', '\1', '\2', '\3', '\4', '\5', '\6', '\7', '\b', '\t', '\n', '\13', '\f', '\r', - '\33', '\34', '\35', '\36', '\37', '@', '\\', '^', '_', '`', '|', '~', '\177' - }; - for (int i = 0; i < mixedTable.length; i++) { - CHAR_MAP[TABLE_MIXED][mixedTable[i]] = i; - } - int[] punctTable = { - '\0', '\r', '\0', '\0', '\0', '\0', '!', '\'', '#', '$', '%', '&', '\'', '(', ')', '*', '+', - ',', '-', '.', '/', ':', ';', '<', '=', '>', '?', '[', ']', '{', '}' - }; - for (int i = 0; i < punctTable.length; i++) { - if (punctTable[i] > 0) { - CHAR_MAP[TABLE_PUNCT][punctTable[i]] = i; - } - } - for (int[] table : SHIFT_TABLE) { - Arrays.fill(table, -1); - } - for (int[] table : LATCH_TABLE) { - Arrays.fill(table, -1); - } - SHIFT_TABLE[TABLE_UPPER][TABLE_PUNCT] = 0; - LATCH_TABLE[TABLE_UPPER][TABLE_LOWER] = 28; - LATCH_TABLE[TABLE_UPPER][TABLE_MIXED] = 29; - LATCH_TABLE[TABLE_UPPER][TABLE_DIGIT] = 30; - SHIFT_TABLE[TABLE_UPPER][TABLE_BINARY] = 31; - SHIFT_TABLE[TABLE_LOWER][TABLE_PUNCT] = 0; - SHIFT_TABLE[TABLE_LOWER][TABLE_UPPER] = 28; - LATCH_TABLE[TABLE_LOWER][TABLE_MIXED] = 29; - LATCH_TABLE[TABLE_LOWER][TABLE_DIGIT] = 30; - SHIFT_TABLE[TABLE_LOWER][TABLE_BINARY] = 31; - SHIFT_TABLE[TABLE_MIXED][TABLE_PUNCT] = 0; - LATCH_TABLE[TABLE_MIXED][TABLE_LOWER] = 28; - LATCH_TABLE[TABLE_MIXED][TABLE_UPPER] = 29; - LATCH_TABLE[TABLE_MIXED][TABLE_PUNCT] = 30; - SHIFT_TABLE[TABLE_MIXED][TABLE_BINARY] = 31; - LATCH_TABLE[TABLE_PUNCT][TABLE_UPPER] = 31; - SHIFT_TABLE[TABLE_DIGIT][TABLE_PUNCT] = 0; - LATCH_TABLE[TABLE_DIGIT][TABLE_UPPER] = 30; - SHIFT_TABLE[TABLE_DIGIT][TABLE_UPPER] = 31; NB_BITS_COMPACT = new int[5]; for (int i = 1; i < NB_BITS_COMPACT.length; i++) { NB_BITS_COMPACT[i] = (88 + 16 * i) * i; @@ -140,7 +72,7 @@ public final class Encoder { public static AztecCode encode(byte[] data, int minECCPercent) { // High-level encode - BitArray bits = highLevelEncode(data); + BitArray bits = new HighLevelEncoder(data).encode(); // stuff bits and choose symbol size int eccBits = bits.getSize() * minECCPercent / 100 + 11; @@ -183,12 +115,9 @@ public final class Encoder { // pad the end int messageSizeInWords = (stuffedBits.getSize() + wordSize - 1) / wordSize; - // This seems to be redundant? - /* for (int i = messageSizeInWords * wordSize - stuffedBits.getSize(); i > 0; i--) { stuffedBits.appendBit(true); } - */ // generate check words ReedSolomonEncoder rs = new ReedSolomonEncoder(getGF(wordSize)); @@ -277,7 +206,7 @@ public final class Encoder { return aztec; } - static void drawBullsEye(BitMatrix matrix, int center, int size) { + private static void drawBullsEye(BitMatrix matrix, int center, int size) { for (int i = 0; i < size; i += 2) { for (int j = center - i; j <= center + i; j++) { matrix.set(j, center - i); @@ -308,7 +237,7 @@ public final class Encoder { return modeMessage; } - static void drawModeMessage(BitMatrix matrix, boolean compact, int matrixSize, BitArray modeMessage) { + private static void drawModeMessage(BitMatrix matrix, boolean compact, int matrixSize, BitArray modeMessage) { if (compact) { for (int i = 0; i < 7; i++) { if (modeMessage.get(i)) { @@ -342,7 +271,7 @@ public final class Encoder { } } - static BitArray generateCheckWords(BitArray stuffedBits, int totalSymbolBits, int wordSize) { + private static BitArray generateCheckWords(BitArray stuffedBits, int totalSymbolBits, int wordSize) { int messageSizeInWords = (stuffedBits.getSize() + wordSize - 1) / wordSize; for (int i = messageSizeInWords * wordSize - stuffedBits.getSize(); i > 0; i--) { stuffedBits.appendBit(true); @@ -360,7 +289,7 @@ public final class Encoder { return messageBits; } - static int[] bitsToWords(BitArray stuffedBits, int wordSize, int totalWords) { + private static int[] bitsToWords(BitArray stuffedBits, int wordSize, int totalWords) { int[] message = new int[totalWords]; int i; int n; @@ -374,7 +303,7 @@ public final class Encoder { return message; } - static GenericGF getGF(int wordSize) { + private static GenericGF getGF(int wordSize) { switch (wordSize) { case 4: return GenericGF.AZTEC_PARAM; @@ -416,8 +345,6 @@ public final class Encoder { } // 2. pad last word to wordSize - // This seems to be redundant? - /* n = out.getSize(); int remainder = n % wordSize; if (remainder != 0) { @@ -432,173 +359,6 @@ public final class Encoder { } out.appendBit(j == 0); } - */ return out; } - - static BitArray highLevelEncode(byte[] data) { - BitArray bits = new BitArray(); - int mode = TABLE_UPPER; - int[] idx = new int[5]; - int[] idxnext = new int[5]; - for (int i = 0; i < data.length; i++) { - int c = data[i] & 0xFF; - int next = i < data.length - 1 ? data[i + 1] & 0xFF : 0; - int punctWord = 0; - // special case: double-character codes - if (c == '\r' && next == '\n') { - punctWord = 2; - } else if (c == '.' && next == ' ') { - punctWord = 3; - } else if (c == ',' && next == ' ') { - punctWord = 4; - } else if (c == ':' && next == ' ') { - punctWord = 5; - } - if (punctWord > 0) { - if (mode == TABLE_PUNCT) { - outputWord(bits, TABLE_PUNCT, punctWord); - i++; - continue; - } else if (SHIFT_TABLE[mode][TABLE_PUNCT] >= 0) { - outputWord(bits, mode, SHIFT_TABLE[mode][TABLE_PUNCT]); - outputWord(bits, TABLE_PUNCT, punctWord); - i++; - continue; - } else if (LATCH_TABLE[mode][TABLE_PUNCT] >= 0) { - outputWord(bits, mode, LATCH_TABLE[mode][TABLE_PUNCT]); - outputWord(bits, TABLE_PUNCT, punctWord); - mode = TABLE_PUNCT; - i++; - continue; - } - } - // find the best matching table, taking current mode and next character into account - int firstMatch = -1; - int shiftMode = -1; - int latchMode = -1; - int j; - for (j = 0; j < TABLE_BINARY; j++) { - idx[j] = CHAR_MAP[j][c]; - if (idx[j] > 0 && firstMatch < 0) { - firstMatch = j; - } - if (shiftMode < 0 && idx[j] > 0 && SHIFT_TABLE[mode][j] >= 0) { - shiftMode = j; - } - idxnext[j] = CHAR_MAP[j][next]; - if (latchMode < 0 && idx[j] > 0 && (next == 0 || idxnext[j] > 0) && LATCH_TABLE[mode][j] >= 0) { - latchMode = j; - } - } - if (shiftMode < 0 && latchMode < 0) { - for (j = 0; j < TABLE_BINARY; j++) { - if (idx[j] > 0 && LATCH_TABLE[mode][j] >= 0) { - latchMode = j; - break; - } - } - } - if (idx[mode] > 0) { - // found character in current table - stay in current table - outputWord(bits, mode, idx[mode]); - } else { - if (latchMode >= 0) { - // latch into mode latchMode - outputWord(bits, mode, LATCH_TABLE[mode][latchMode]); - outputWord(bits, latchMode, idx[latchMode]); - mode = latchMode; - } else if (shiftMode >= 0) { - // shift into shiftMode - outputWord(bits, mode, SHIFT_TABLE[mode][shiftMode]); - outputWord(bits, shiftMode, idx[shiftMode]); - } else { - if (firstMatch >= 0) { - // can't switch into this mode from current mode - switch in two steps - if (mode == TABLE_PUNCT) { - outputWord(bits, TABLE_PUNCT, LATCH_TABLE[TABLE_PUNCT][TABLE_UPPER]); - mode = TABLE_UPPER; - i--; - continue; - } else if (mode == TABLE_DIGIT) { - outputWord(bits, TABLE_DIGIT, LATCH_TABLE[TABLE_DIGIT][TABLE_UPPER]); - mode = TABLE_UPPER; - i--; - continue; - } - } - // use binary table - // find the binary string length - int k; - int lookahead; - for (k = i + 1, lookahead = 0; k < data.length; k++) { - next = data[k] & 0xFF; - boolean binary = true; - for (j = 0; j < TABLE_BINARY; j++) { - if (CHAR_MAP[j][next] > 0) { - binary = false; - break; - } - } - if (binary) { - lookahead = 0; - } else { - // skip over single character in between binary bytes - if (lookahead >= 1) { - k -= lookahead; - break; - } - lookahead++; - } - } - k -= i; - // switch into binary table - switch (mode) { - case TABLE_UPPER: - case TABLE_LOWER: - case TABLE_MIXED: - outputWord(bits, mode, SHIFT_TABLE[mode][TABLE_BINARY]); - break; - case TABLE_DIGIT: - outputWord(bits, mode, LATCH_TABLE[mode][TABLE_UPPER]); - mode = TABLE_UPPER; - outputWord(bits, mode, SHIFT_TABLE[mode][TABLE_BINARY]); - break; - case TABLE_PUNCT: - outputWord(bits, mode, LATCH_TABLE[mode][TABLE_UPPER]); - mode = TABLE_UPPER; - outputWord(bits, mode, SHIFT_TABLE[mode][TABLE_BINARY]); - break; - } - if (k >= 32 && k < 63) { // optimization: split one long form into two short forms, saves 1 bit - k = 31; - } - if (k > 542) { // maximum encodable binary length in long form is 511 + 31 - k = 542; - } - if (k < 32) { - bits.appendBits(k, 5); - } else { - bits.appendBits(k - 31, 16); - } - for (; k > 0; k--, i++) { - bits.appendBits(data[i], 8); - } - i--; - } - } - } - return bits; - } - - static void outputWord(BitArray bits, int mode, int value) { - if (mode == TABLE_DIGIT) { - bits.appendBits(value, 4); - } else if (mode < TABLE_BINARY) { - bits.appendBits(value, 5); - } else { - bits.appendBits(value, 8); - } - } - } diff --git a/core/src/com/google/zxing/aztec/encoder/HighLevelEncoder.java b/core/src/com/google/zxing/aztec/encoder/HighLevelEncoder.java new file mode 100644 index 000000000..cf022769a --- /dev/null +++ b/core/src/com/google/zxing/aztec/encoder/HighLevelEncoder.java @@ -0,0 +1,307 @@ +/* + * Copyright 2013 ZXing authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.zxing.aztec.encoder; + +import com.google.zxing.common.BitArray; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; + +/** + * This produces nearly optimal encodings of text into the first-level of + * encoding used by Aztec code. + * + * It uses a dynamic algorithm. For each prefix of the string, it determines + * a set of encodings that could lead to this prefix. We repeatedly add a + * character and generate a new set of optimal encodings until we have read + * through the entire input. + * + * @author Frank Yellin + * @author Rustam Abdullaev + */ +public final class HighLevelEncoder { + + static final String[] MODE_NAMES = {"UPPER", "LOWER", "DIGIT", "MIXED", "PUNCT"}; + + static final int MODE_UPPER = 0; // 5 bits + static final int MODE_LOWER = 1; // 5 bits + static final int MODE_DIGIT = 2; // 4 bits + static final int MODE_MIXED = 3; // 5 bits + static final int MODE_PUNCT = 4; // 5 bits + + // The Latch Table shows, for each pair of Modes, the optimal method for + // getting from one mode to another. In the worst possible case, this can + // be up to 14 bits. In the best possible case, we are already there! + // The high half-word of each entry gives the number of bits. + // The low half-word of each entry are the actual bits necessary to change + static final int[][] LATCH_TABLE = { + { + 0, + (5 << 16) + 28, // UPPER -> LOWER + (5 << 16) + 30, // UPPER -> DIGIT + (5 << 16) + 29, // UPPER -> MIXED + (10 << 16) + (29 << 5) + 30, // UPPER -> MIXED -> PUNCT + }, + { + (9 << 16) + (30 << 4) + 14, // LOWER -> DIGIT -> UPPER + 0, + (5 << 16) + 30, // LOWER -> DIGIT + (5 << 16) + 29, // LOWER -> MIXED + (10 << 16) + (29 << 5) + 30, // LOWER -> MIXED -> PUNCT + }, + { + (4 << 16) + 14, // DIGIT -> UPPER + (9 << 16) + (14 << 5) + 28, // DIGIT -> UPPER -> LOWER + 0, + (9 << 16) + (14 << 5) + 29, // DIGIT -> UPPER -> MIXED + (14 << 16) + (14 << 10) + (29 << 5) + 30, + // DIGIT -> UPPER -> MIXED -> PUNCT + }, + { + (5 << 16) + 29, // MIXED -> UPPER + (5 << 16) + 28, // MIXED -> LOWER + (10 << 16) + (29 << 5) + 30, // MIXED -> UPPER -> DIGIT + 0, + (5 << 16) + 30, // MIXED -> PUNCT + }, + { + (5 << 16) + 31, // PUNCT -> UPPER + (10 << 16) + (31 << 5) + 28, // PUNCT -> UPPER -> LOWER + (10 << 16) + (31 << 5) + 30, // PUNCT -> UPPER -> DIGIT + (10 << 16) + (31 << 5) + 29, // PUNCT -> UPPER -> MIXED + 0, + }, + }; + + // A reverse mapping from [mode][char] to the encoding for that character + // in that mode. An entry of 0 indicates no mapping exists. + private static final int[][] CHAR_MAP = new int[5][256]; + static { + CHAR_MAP[MODE_UPPER][' '] = 1; + for (int c = 'A'; c <= 'Z'; c++) { + CHAR_MAP[MODE_UPPER][c] = c - 'A' + 2; + } + CHAR_MAP[MODE_LOWER][' '] = 1; + for (int c = 'a'; c <= 'z'; c++) { + CHAR_MAP[MODE_LOWER][c] = c - 'a' + 2; + } + CHAR_MAP[MODE_DIGIT][' '] = 1; + for (int c = '0'; c <= '9'; c++) { + CHAR_MAP[MODE_DIGIT][c] = c - '0' + 2; + } + CHAR_MAP[MODE_DIGIT][','] = 12; + CHAR_MAP[MODE_DIGIT]['.'] = 13; + int[] mixedTable = { + '\0', ' ', '\1', '\2', '\3', '\4', '\5', '\6', '\7', '\b', '\t', '\n', + '\13', '\f', '\r', '\33', '\34', '\35', '\36', '\37', '@', '\\', '^', + '_', '`', '|', '~', '\177' + }; + for (int i = 0; i < mixedTable.length; i++) { + CHAR_MAP[MODE_MIXED][mixedTable[i]] = i; + } + int[] punctTable = { + '\0', '\r', '\0', '\0', '\0', '\0', '!', '\'', '#', '$', '%', '&', '\'', + '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?', + '[', ']', '{', '}' + }; + for (int i = 0; i < punctTable.length; i++) { + if (punctTable[i] > 0) { + CHAR_MAP[MODE_PUNCT][punctTable[i]] = i; + } + } + } + + // A map showing the available shift coodes. (The shifts to BINARY are not + // shown + static final int[][] SHIFT_TABLE = new int[6][6]; // mode shift codes, per table + static { + for (int[] table : SHIFT_TABLE) { + Arrays.fill(table, -1); + } + SHIFT_TABLE[MODE_UPPER][MODE_PUNCT] = 0; + + SHIFT_TABLE[MODE_LOWER][MODE_PUNCT] = 0; + SHIFT_TABLE[MODE_LOWER][MODE_UPPER] = 28; + + SHIFT_TABLE[MODE_MIXED][MODE_PUNCT] = 0; + + SHIFT_TABLE[MODE_DIGIT][MODE_PUNCT] = 0; + SHIFT_TABLE[MODE_DIGIT][MODE_UPPER] = 15; + } + + private final byte[] text; + + public HighLevelEncoder(byte[] text) { + this.text = text; + } + + /** + * Convert the text represented by this High Level Encoder into a BitArray. + */ + public BitArray encode() { + List states = Collections.singletonList(State.INITIAL_STATE); + for (int index = 0; index < text.length; index++) { + int pairCode; + int nextChar = index + 1 < text.length ? text[index + 1] : 0; + switch (text[index]) { + case '\r': + pairCode = nextChar == '\n' ? 2 : 0; + break; + case '.' : + pairCode = nextChar == ' ' ? 3 : 0; + break; + case ',' : + pairCode = nextChar == ' ' ? 4 : 0; + break; + case ':' : + pairCode = nextChar == ' ' ? 5 : 0; + break; + default: + pairCode = 0; + } + if (pairCode > 0) { + // We have one of the four special PUNCT pairs. Treat them specially. + // Get a new set of states for the two new characters. + states = updateStateListForPair(states, index, pairCode); + index++; + } else { + // Get a new set of states for the new character. + states = updateStateListForChar(states, index); + } + } + // We are left with a set of states. Find the shortest one. + State minState = Collections.min(states, new Comparator() { + @Override + public int compare(State a, State b) { + return a.getBitCount() - b.getBitCount(); + } + }); + // Convert it to a bit array, and return. + return minState.toBitArray(text); + } + + // We update a set of states for a new character by updating each state + // for the new character, merging the results, and then removing the + // non-optimal states. + private List updateStateListForChar(Iterable states, int index) { + Collection result = new LinkedList(); + for (State state : states) { + updateStateForChar(state, index, result); + } + return simplifyStates(result); + } + + // Return a set of states that represent the possible ways of updating this + // state for the next character. The resulting set of states are added to + // the "result" list. + private void updateStateForChar(State state, int index, Collection result) { + char ch = (char) (text[index] & 0xFF); + boolean charInCurrentTable = CHAR_MAP[state.getMode()][ch] > 0; + State stateNoBinary = null; + for (int mode = 0; mode <= MODE_PUNCT; mode++) { + int charInMode = CHAR_MAP[mode][ch]; + if (charInMode > 0) { + if (stateNoBinary == null) { + // Only create stateNoBinary the first time it's required. + stateNoBinary = state.endBinaryShift(index); + } + // Try generating the character by latching to its mode + if (!charInCurrentTable || mode == state.getMode() || mode == MODE_DIGIT) { + // If the character is in the current table, we don't want to latch to + // any other mode except possibly digit (which uses only 4 bits). Any + // other latch would be equally successful *after* this character, and + // so wouldn't save any bits. + State latch_state = stateNoBinary.latchAndAppend(mode, charInMode); + result.add(latch_state); + } + // Try generating the character by switching to its mode. + if (!charInCurrentTable && SHIFT_TABLE[state.getMode()][mode] >= 0) { + // It never makes sense to temporarily shift to another mode if the + // character exists in the current mode. That can never save bits. + State shift_state = stateNoBinary.shiftAndAppend(mode, charInMode); + result.add(shift_state); + } + } + } + if (state.getBinaryShiftByteCount() > 0 || CHAR_MAP[state.getMode()][ch] == 0) { + // It's never worthwhile to go into binary shift mode if you're not already + // in binary shift mode, and the character exists in your current mode. + // That can never save bits over just outputting the char in the current mode. + State binaryState = state.addBinaryShiftChar(index); + result.add(binaryState); + } + } + + private static List updateStateListForPair(Iterable states, int index, int pairCode) { + Collection result = new LinkedList(); + for (State state : states) { + updateStateForPair(state, index, pairCode, result); + } + return simplifyStates(result); + } + + private static void updateStateForPair(State state, int index, int pairCode, Collection result) { + State stateNoBinary = state.endBinaryShift(index); + // Possibility 1. Latch to MODE_PUNCT, and then append this code + result.add(stateNoBinary.latchAndAppend(MODE_PUNCT, pairCode)); + if (state.getMode() != MODE_PUNCT) { + // Possibility 2. Shift to MODE_PUNCT, and then append this code. + // Every state except MODE_PUNCT (handled above) can shift + result.add(stateNoBinary.shiftAndAppend(MODE_PUNCT, pairCode)); + } + if (pairCode == 3 || pairCode == 4) { + // both characters are in DIGITS. Sometimes better to just add two digits + State digit_state = stateNoBinary + .latchAndAppend(MODE_DIGIT, 16 - pairCode) // period or comma in DIGIT + .latchAndAppend(MODE_DIGIT, 1); // space in DIGIT + result.add(digit_state); + } + if (state.getBinaryShiftByteCount() > 0) { + // It only makes sense to do the characters as binary if we're already + // in binary mode. + State binaryState = state.addBinaryShiftChar(index).addBinaryShiftChar(index + 1); + result.add(binaryState); + } + } + + private static List simplifyStates(Iterable states) { + List result = new LinkedList(); + for (State newState : states) { + boolean add = true; + for (Iterator iterator = result.iterator(); iterator.hasNext(); ) { + State oldState = iterator.next(); + if (oldState.isBetterThanOrEqualTo(newState)) { + add = false; + break; + } + if (newState.isBetterThanOrEqualTo(oldState)) { + iterator.remove(); + } + } + if (add) { + result.add(newState); + } + } + return result; + } + +} diff --git a/core/src/com/google/zxing/aztec/encoder/SimpleToken.java b/core/src/com/google/zxing/aztec/encoder/SimpleToken.java new file mode 100644 index 000000000..da3276486 --- /dev/null +++ b/core/src/com/google/zxing/aztec/encoder/SimpleToken.java @@ -0,0 +1,45 @@ +/* + * Copyright 2013 ZXing authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.zxing.aztec.encoder; + +import com.google.zxing.common.BitArray; + +final class SimpleToken extends Token { + + // For normal words, indicates value and bitCount + private final short value; + private final short bitCount; + + SimpleToken(Token previous, int totalBitCount, int value, int bitCount) { + super(previous, totalBitCount); + this.value = (short) value; + this.bitCount = (short) bitCount; + } + + @Override + void appendTo(BitArray bitArray, byte[] text) { + bitArray.appendBits(value, bitCount); + } + + @Override + public String toString() { + int value = this.value & ((1 << bitCount) - 1); + value |= 1 << bitCount; + return '<' + Integer.toBinaryString(value | (1 << bitCount)).substring(1) + '>'; + } + +} diff --git a/core/src/com/google/zxing/aztec/encoder/State.java b/core/src/com/google/zxing/aztec/encoder/State.java new file mode 100644 index 000000000..bd8e2035f --- /dev/null +++ b/core/src/com/google/zxing/aztec/encoder/State.java @@ -0,0 +1,169 @@ +/* + * Copyright 2013 ZXing authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.zxing.aztec.encoder; + +import java.util.Deque; +import java.util.LinkedList; + +import com.google.zxing.common.BitArray; + +/** + * State represents all information about a sequence necessary to generate the current output. + * Note that a state is immutable. + */ +final class State { + + static final State INITIAL_STATE = new State(Token.EMPTY, HighLevelEncoder.MODE_UPPER, 0, 0); + + // The current mode of the encoding (or the mode to which we'll return if + // we're in Binary Shift mode. + private final int mode; + // The list of tokens that we output. If we are in Binary Shift mode, this + // token list does *not* yet included the token for those bytes + private final Token token; + // If non-zero, the number of most recent bytes that should be output + // in Binary Shift mode. + private final int binaryShiftByteCount; + // The total number of bits generated (including Binary Shift). + private final int bitCount; + + private State(Token token, int mode, int binaryBytes, int bitCount) { + this.token = token; + this.mode = mode; + this.binaryShiftByteCount = binaryBytes; + this.bitCount = bitCount; + // Make sure we match the token + //int binaryShiftBitCount = (binaryShiftByteCount * 8) + + // (binaryShiftByteCount == 0 ? 0 : + // binaryShiftByteCount <= 31 ? 10 : + // binaryShiftByteCount <= 62 ? 20 : 21); + //assert this.bitCount == token.getTotalBitCount() + binaryShiftBitCount; + } + + int getMode() { + return mode; + } + + Token getToken() { + return token; + } + + int getBinaryShiftByteCount() { + return binaryShiftByteCount; + } + + int getBitCount() { + return bitCount; + } + + // Create a new state representing this state with a latch to a (not + // necessary different) mode, and then a code. + State latchAndAppend(int mode, int value) { + //assert binaryShiftByteCount == 0; + int bitCount = this.bitCount; + Token token = this.token; + if (mode != this.mode) { + int latch = HighLevelEncoder.LATCH_TABLE[this.mode][mode]; + token = token.add(latch & 0xFFFF, latch >> 16); + bitCount += latch >> 16; + } + int latchModeBitCount = mode == HighLevelEncoder.MODE_DIGIT ? 4 : 5; + token = token.add(value, latchModeBitCount); + return new State(token, mode, 0, bitCount + latchModeBitCount); + } + + // Create a new state representing this state, with a temporary shift + // to a different mode to output a single value. + State shiftAndAppend(int mode, int value) { + //assert binaryShiftByteCount == 0 && this.mode != mode; + Token token = this.token; + int thisModeBitCount = this.mode == HighLevelEncoder.MODE_DIGIT ? 4 : 5; + // Shifts exist only to UPPER and PUNCT, both with tokens size 5. + token = token.add(HighLevelEncoder.SHIFT_TABLE[this.mode][mode], thisModeBitCount); + token = token.add(value, 5); + return new State(token, this.mode, 0, this.bitCount + thisModeBitCount + 5); + } + + // Create a new state representing this state, but an additional character + // output in Binary Shift mode. + State addBinaryShiftChar(int index) { + Token token = this.token; + int mode = this.mode; + int bitCount = this.bitCount; + if (this.mode == HighLevelEncoder.MODE_PUNCT || this.mode == HighLevelEncoder.MODE_DIGIT) { + //assert binaryShiftByteCount == 0; + int latch = HighLevelEncoder.LATCH_TABLE[mode][HighLevelEncoder.MODE_UPPER]; + token = token.add(latch & 0xFFFF, latch >> 16); + bitCount += latch >> 16; + mode = HighLevelEncoder.MODE_UPPER; + } + int deltaBitCount = + (binaryShiftByteCount == 0 || binaryShiftByteCount == 31) ? 18 : + (binaryShiftByteCount == 62) ? 9 : 8; + State result = new State(token, mode, binaryShiftByteCount + 1, bitCount + deltaBitCount); + if (result.binaryShiftByteCount == 2047 + 31) { + // The string is as long as it's allowed to be. We should end it. + result = result.endBinaryShift(index + 1); + } + return result; + } + + // Create the state identical to this one, but we are no longer in + // Binary Shift mode. + State endBinaryShift(int index) { + if (binaryShiftByteCount == 0) { + return this; + } + Token token = this.token; + token = token.addBinaryShift(index - binaryShiftByteCount, binaryShiftByteCount); + //assert token.getTotalBitCount() == this.bitCount; + return new State(token, mode, 0, this.bitCount); + } + + // Returns true if "this" state is better (or equal) to be in than "that" + // state under all possible circumstances. + boolean isBetterThanOrEqualTo(State other) { + int mySize = this.bitCount + (HighLevelEncoder.LATCH_TABLE[this.mode][other.mode] >> 16); + if (other.binaryShiftByteCount > 0 && + (this.binaryShiftByteCount == 0 || this.binaryShiftByteCount > other.binaryShiftByteCount)) { + mySize += 10; // Cost of entering Binary Shift mode. + } + return mySize <= other.bitCount; + } + + BitArray toBitArray(byte[] text) { + // Reverse the tokens, so that they are in the order that they should + // be output + Deque symbols = new LinkedList(); + for (Token token = endBinaryShift(text.length).token; token != null; token = token.getPrevious()) { + symbols.addFirst(token); + } + BitArray bitArray = new BitArray(); + // Add each token to the result. + for (Token symbol : symbols) { + symbol.appendTo(bitArray, text); + } + //assert bitArray.getSize() == this.bitCount; + return bitArray; + } + + @Override + public String toString() { + return String.format("%s bits=%d bytes=%d", HighLevelEncoder.MODE_NAMES[mode], bitCount, binaryShiftByteCount); + } + +} diff --git a/core/src/com/google/zxing/aztec/encoder/Token.java b/core/src/com/google/zxing/aztec/encoder/Token.java new file mode 100644 index 000000000..a41da2c3f --- /dev/null +++ b/core/src/com/google/zxing/aztec/encoder/Token.java @@ -0,0 +1,52 @@ +/* + * Copyright 2013 ZXing authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.zxing.aztec.encoder; + +import com.google.zxing.common.BitArray; + +abstract class Token { + + static final Token EMPTY = new SimpleToken(null, 0, 0, 0); + + private final Token previous; + private final int totalBitCount; // For debugging purposes, only + + Token(Token previous, int totalBitCount) { + this.previous = previous; + this.totalBitCount = totalBitCount; + } + + final Token getPrevious() { + return previous; + } + + final int getTotalBitCount() { + return totalBitCount; + } + + final Token add(int value, int bitCount) { + return new SimpleToken(this, this.totalBitCount + bitCount, value, bitCount); + } + + final Token addBinaryShift(int start, int byteCount) { + int bitCount = (byteCount * 8) + (byteCount <= 31 ? 10 : byteCount <= 62 ? 20 : 21); + return new BinaryShiftToken(this, this.totalBitCount + bitCount, start, byteCount); + } + + abstract void appendTo(BitArray bitArray, byte[] text); + +} diff --git a/core/test/src/com/google/zxing/aztec/encoder/EncoderTest.java b/core/test/src/com/google/zxing/aztec/encoder/EncoderTest.java index b468a7075..3418cbf48 100644 --- a/core/test/src/com/google/zxing/aztec/encoder/EncoderTest.java +++ b/core/test/src/com/google/zxing/aztec/encoder/EncoderTest.java @@ -41,12 +41,13 @@ import com.google.zxing.common.DecoderResult; * Aztec 2D generator unit tests. * * @author Rustam Abdullaev + * @author Frank Yellin */ public final class EncoderTest extends Assert { private static final Charset LATIN_1 = Charset.forName("ISO-8859-1"); private static final Pattern DOTX = Pattern.compile("[^.X]"); - public static final ResultPoint[] NO_POINTS = new ResultPoint[0]; + private static final ResultPoint[] NO_POINTS = new ResultPoint[0]; // real life tests @@ -81,49 +82,49 @@ public final class EncoderTest extends Assert { @Test public void testEncode2() throws Exception { testEncode("Aztec Code is a public domain 2D matrix barcode symbology" + - " of nominally square symbols built on a square grid with a " + - "distinctive square bullseye pattern at their center.", false, 6, - " X X X X X X X X X X X X X X X \n" + - " X X X X X X X X X X X X X X X \n" + - " X X X X X X X X X X X X X X X X X X X \n" + - "X X X X X X X X X X X X X X \n" + - "X X X X X X X X X X X X X X X X X X X X X \n" + - " X X X X X X X X X X X X X X X X \n" + - "X X X X X X X X X X X X X X X X X X X X \n" + - " X X X X X X X X X X X X X X \n" + - "X X X X X X X X X X X X X X X X X X X X X X X X \n" + - " X X X X X X X X X X X X X X X X X X X X \n" + - " X X X X X X X X X X X X X X X X X X X X X X X X X X X \n" + - " X X X X X X X X X X X X X X X X X X \n" + - "X X X X X X X X X X X X X X X X X X X X X X X X \n" + - " X X X X X X X X X X X X \n" + - " X X X X X X X X X X X X X X X X X X X X X X X X X X X X \n" + - " X X X X X X X X X X X X X X \n" + - " X X X X X X X X X X X X X X X X X X X X X X X \n" + - " X X X X X X X X X X X X X \n" + - "X X X X X X X X X X X X X X X X X X X X X X \n" + - "X X X X X X X X X X X X X X X X X X X X X \n" + - "X X X X X X X X X X X X X X X X X X X X X \n" + - " X X X X X X X X X X X \n" + - " X X X X X X X X X X X X X X X X X X X X X \n" + - "X X X X X X X X X X X X X X X X X X X \n" + - " X X X X X X X X X X X X X X X X X X X X X X X X X X X X \n" + - " X X X X X X X X X X X X X X \n" + - " X X X X X X X X X X X X X X X X X X X X X X X X X X \n" + - " X X X X X X X X X X X X X X X X X X X \n" + - "X X X X X X X X X X X X X X X X X X X \n" + - "X X X X X X X X X X X X X X X \n" + - " X X X X X X X X X X X X X X X X X X X X X X \n" + - "X X X X X X X X X X X X X X X X X X \n" + - " X X X X X X X X X X X X X X X X X X X X X X X X X \n" + - " X X X X X X X X X X X X X X X \n" + - "X X X X X X X X X X X X X X X X X \n" + - "X X X X X X X X X X X X X X X X X X X X X X X \n" + - "X X X X X X X X X X X X X X X X X X X X X \n" + - "X X X X X X X X X X X X X X X X \n" + - "X X X X X X X X X X X X X X X X X X X X X \n" + - " X X X X X X X X X X X X X X X X \n" + - "X X X X X X X X X X X X X \n"); + " of nominally square symbols built on a square grid with a " + + "distinctive square bullseye pattern at their center.", false, 6, + " X X X X X X X X X X X X X X X \n" + + " X X X X X X X X X X X X X X X \n" + + " X X X X X X X X X X X X X X X X X X X \n" + + "X X X X X X X X X X X X X X \n" + + "X X X X X X X X X X X X X X X X X X X X X \n" + + " X X X X X X X X X X X X X X X X \n" + + "X X X X X X X X X X X X X X X X X X X X \n" + + " X X X X X X X X X X X X X X X X X X X X X X \n" + + "X X X X X X X X X X X X X X X X X X X X X X X X X X X X \n" + + " X X X X X X X X X X X X X X X X X X X X \n" + + " X X X X X X X X X X X X X X X X X X X X \n" + + " X X X X X X X X X X X X X X X X X X X X X X X \n" + + "X X X X X X X X X X X X X X X X X X X X X \n" + + " X X X X X X X X X X X X X X X \n" + + " X X X X X X X X X X X X X X X X X X X X X X X X X X X \n" + + " X X X X X X X X X X X \n" + + " X X X X X X X X X X X X X X X X X X X X X X X X X \n" + + " X X X X X X X X X X X X X X X \n" + + "X X X X X X X X X X X X X X X X X X X X X X X X X X \n" + + "X X X X X X X X X X X X X X X X X X X X \n" + + "X X X X X X X X X X X X X X X X X X X X X \n" + + " X X X X X X X X X X X X \n" + + " X X X X X X X X X X X X X X X X X X X X X X X \n" + + "X X X X X X X X X X X X X X X X X \n" + + " X X X X X X X X X X X X X X X X X X X X X X X X X X X \n" + + " X X X X X X X X X X X X X X X X \n" + + " X X X X X X X X X X X X X X X X X X X X X X X X X X X \n" + + " X X X X X X X X X X X X X X X X X \n" + + "X X X X X X X X X X X X X X X X X \n" + + "X X X X X X X X X X X X X X X X X X X X X X X X \n" + + " X X X X X X X X X X X X X X X X X X X X \n" + + "X X X X X X X X X X X X X X X \n" + + " X X X X X X X X X X X X X X X X X X X X X X X X X \n" + + " X X X X X X X X X X X X X X X X X \n" + + "X X X X X X X X X X X X X X X X X X \n" + + "X X X X X X X X X X X X X X X X X X X X X X X \n" + + "X X X X X X X X X X X X X X X X X X X X X \n" + + "X X X X X X X X X X X X X X X X \n" + + "X X X X X X X X X X X X X X X X X X X X X \n" + + " X X X X X X X X X X X X X X X X \n" + + "X X X X X X X X X X X X X \n"); } @Test @@ -167,7 +168,8 @@ public final class EncoderTest extends Assert { @Test public void testEncodeDecode5() throws Exception { - testEncodeDecode("http://test/~!@#*^%&)__ ;:'\"[]{}\\|-+-=`1029384756<>/?abc", false, 5); + testEncodeDecode("http://test/~!@#*^%&)__ ;:'\"[]{}\\|-+-=`1029384756<>/?abc" + + "Four score and seven our forefathers brought forth", false, 5); } @Test @@ -277,59 +279,106 @@ public final class EncoderTest extends Assert { @Test public void testHighLevelEncode() throws Exception { testHighLevelEncodeString("A. b.", + // 'A' P/S '. ' L/L b D/L '.' "...X. ..... ...XX XXX.. ...XX XXXX. XX.X"); testHighLevelEncodeString("Lorem ipsum.", + // 'L' L/L 'o' 'r' 'e' 'm' ' ' 'i' 'p' 's' 'u' 'm' D/L '.' ".XX.X XXX.. X.... X..XX ..XX. .XXX. ....X .X.X. X...X X.X.. X.XX. .XXX. XXXX. XX.X"); testHighLevelEncodeString("Lo. Test 123.", - ".XX.X XXX.. X.... ..... ...XX XXX.. X.X.X ..XX. X.X.. X.X.X ....X XXXX. ..XX .X.. .X.X XX.X"); + // 'L' L/L 'o' P/S '. ' U/S 'T' 'e' 's' 't' D/L ' ' '1' '2' '3' '.' + ".XX.X XXX.. X.... ..... ...XX XXX.. X.X.X ..XX. X.X.. X.X.X XXXX. ...X ..XX .X.. .X.X XX.X"); testHighLevelEncodeString("Lo...x", + // 'L' L/L 'o' D/L '.' '.' '.' U/L L/L 'x' ".XX.X XXX.. X.... XXXX. XX.X XX.X XX.X XXX. XXX.. XX..X"); testHighLevelEncodeString(". x://abc/.", + //P/S '. ' L/L 'x' P/S ':' P/S '/' P/S '/' 'a' 'b' 'c' P/S '/' D/L '.' "..... ...XX XXX.. XX..X ..... X.X.X ..... X.X.. ..... X.X.. ...X. ...XX ..X.. ..... X.X.. XXXX. XX.X"); + // Uses Binary/Shift rather than Lower/Shift to save two bits. + testHighLevelEncodeString("ABCdEFG", + //'A' 'B' 'C' B/S =1 'd' 'E' 'F' 'G' + "...X. ...XX ..X.. XXXXX ....X .XX..X.. ..XX. ..XXX .X..."); + + testHighLevelEncodeString( + // Found on an airline boarding pass. Several stretches of Binary shift are + // necessary to keep the bitcount so low. + "09 UAG ^160MEUCIQC0sYS/HpKxnBELR1uB85R20OoqqwFGa0q2uEi" + + "Ygh6utAIgLl1aBVM4EOTQtMQQYH9M2Z3Dp4qnA/fwWuQ+M8L3V8U=", + 823); } @Test public void testHighLevelEncodeBinary() throws Exception { // binary short form single byte testHighLevelEncodeString("N\0N", - ".XXXX XXXXX ...X. ........ .X..XXX."); + // 'N' B/S =1 '\0' N + ".XXXX XXXXX ....X ........ .XXXX"); // Encode "N" in UPPER + + testHighLevelEncodeString("N\0n", + // 'N' B/S =2 '\0' 'n' + ".XXXX XXXXX ...X. ........ .XX.XXX."); // Encode "n" in BINARY + // binary short form consecutive bytes testHighLevelEncodeString("N\0\u0080 A", + // 'N' B/S =2 '\0' \u0080 ' ' 'A' ".XXXX XXXXX ...X. ........ X....... ....X ...X."); + // binary skipping over single character testHighLevelEncodeString("\0a\u00FF\u0080 A", + // B/S =4 '\0' 'a' '\3ff' '\200' ' ' 'A' "XXXXX ..X.. ........ .XX....X XXXXXXXX X....... ....X ...X."); - // binary long form optimization into 2 short forms (saves 1 bit) - testHighLevelEncodeString( - "\0\0\0\0 \0\0\0\0 \0\0\0\0 \0\0\0\0 \0\0\0\0 \0\0\0\0 \u0082\u0084\u0088\0 \0\0\0\0 \0\0\0\0 ", - "XXXXX XXXXX ........ ........ ........ ........ ..X....." + - " ........ ........ ........ ........ ..X....." + - " ........ ........ ........ ........ ..X....." + - " ........ ........ ........ ........ ..X....." + - " ........ ........ ........ ........ ..X....." + - " ........ ........ ........ ........ ..X....." + - " X.....X. XXXXX .XXX. X....X.. X...X... ........ ..X....." + - " ........ ........ ........ ........ ..X....." + - " ........ ........ ........ ........ ..X....."); - // binary long form - testHighLevelEncodeString( - "\0\0\0\0 \0\0\1\0 \0\0\2\0 \0\0\3\0 \0\0\4\0 \0\0\5\0 \0\0\6\0 \0\0\7\0 \0\0\u0008" + - "\0 \0\0\u0009\0 \0\0\u00F0\0 \0\0\u00F1\0 \0\0\u00F2\0A", - "XXXXX ..... .....X...X. ........ ........ ........ ........ ..X....." + - " ........ ........ .......X ........ ..X....." + - " ........ ........ ......X. ........ ..X....." + - " ........ ........ ......XX ........ ..X....." + - " ........ ........ .....X.. ........ ..X....." + - " ........ ........ .....X.X ........ ..X....." + - " ........ ........ .....XX. ........ ..X....." + - " ........ ........ .....XXX ........ ..X....." + - " ........ ........ ....X... ........ ..X....." + - " ........ ........ ....X..X ........ ..X....." + - " ........ ........ XXXX.... ........ ..X....." + - " ........ ........ XXXX...X ........ ..X....." + - " ........ ........ XXXX..X. ........ .X.....X"); + + // getting into binary mode from digit mode + testHighLevelEncodeString("1234\0", + //D/L '1' '2' '3' '4' U/L B/S =1 \0 + "XXXX. ..XX .X.. .X.X .XX. XXX. XXXXX ....X ........" + ); + + // Create a string in which every character requires binary + StringBuilder sb = new StringBuilder(); + for (int i = 0; i <= 3000; i++) { + sb.append((char)(128 + (i % 30))); + } + // Test the output generated by Binary/Switch, particularly near the + // places where the encoding changes: 31, 62, and 2047+31=2078 + for (int i : new int[] { 1, 2, 3, 10, 29, 30, 31, 32, 33, + 60, 61, 62, 63, 64, 2076, 2077, 2078, 2079, 2080, 3000}) { + // This is the expected length of a binary string of length "i" + int expectedLength = (8 * i) + + ( (i <= 31) ? 10 : (i <= 62) ? 20 : (i <= 2078) ? 21 : 31); + // Verify that we are correct about the length. + testHighLevelEncodeString(sb.substring(0, i), expectedLength); + // A lower case letter at the beginning will be merged into binary mode + testHighLevelEncodeString('a' + sb.substring(0, i - 1), expectedLength); + // A lower case letter at the end will also be merged into binary mode + testHighLevelEncodeString(sb.substring(0, i - 1) + 'a', expectedLength); + // A lower case letter at both ends will enough to latch us into LOWER. + testHighLevelEncodeString('a' + sb.substring(0, i) + 'b', expectedLength + 15); + } } + @Test + public void testHighLevelEncodePairs() throws Exception { + // Typical usage + testHighLevelEncodeString("ABC. DEF\r\n", + // A B C P/S . D E F P/S \r\n + "...X. ...XX ..X.. ..... ...XX ..X.X ..XX. ..XXX ..... ...X."); + + // We should latch to PUNCT mode, rather than shift. Also check all pairs + testHighLevelEncodeString("A. : , \r\n", + // 'A' M/L P/L ". " ": " ", " "\r\n" + "...X. XXX.X XXXX. ...XX ..X.X ..X.. ...X."); + + // Latch to DIGIT rather than shift to PUNCT + testHighLevelEncodeString("A. 1234", + // 'A' D/L '.' ' ' '1' '2' '3' '4' + "...X. XXXX. XX.X ...X ..XX .X.. .X.X .X X." + ); + // Don't bother leaving Binary Shift. + testHighLevelEncodeString("A\200. \200", + // 'A' B/S =2 \200 "." " " \200 + "...X. XXXXX ..X.. X....... ..X.XXX. ..X..... X......."); + } + // Helper routines private static void testEncode(String data, boolean compact, int layers, String expected) throws Exception { @@ -400,7 +449,7 @@ public final class EncoderTest extends Assert { assertEquals(expectedData, res.getText()); } - static Random getPseudoRandom() { + private static Random getPseudoRandom() { return new SecureRandom(new byte[] {(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF}); } @@ -426,10 +475,26 @@ public final class EncoderTest extends Assert { return in; } + private static boolean[] toBooleanArray(BitArray bitArray) { + boolean[] result = new boolean[bitArray.getSize()]; + for (int i = 0; i < result.length; i++) { + result[i] = bitArray.get(i); + } + return result; + } + private static void testHighLevelEncodeString(String s, String expectedBits) { - BitArray bits = Encoder.highLevelEncode(s.getBytes(LATIN_1)); + BitArray bits = new HighLevelEncoder(s.getBytes(LATIN_1)).encode(); String receivedBits = bits.toString().replace(" ", ""); assertEquals("highLevelEncode() failed for input string: " + s, expectedBits.replace(" ", ""), receivedBits); + assertEquals(s, Decoder.highLevelDecode(toBooleanArray(bits))); + } + + private static void testHighLevelEncodeString(String s, int receivedBits) { + BitArray bits = new HighLevelEncoder(s.getBytes(LATIN_1)).encode(); + int receivedBitCount = bits.toString().replace(" ", "").length(); + assertEquals("highLevelEncode() failed for input string: " + s, receivedBitCount, receivedBitCount); + assertEquals(s, Decoder.highLevelDecode(toBooleanArray(bits))); } }