Issue 1693 Aztec encoder improvements

git-svn-id: https://zxing.googlecode.com/svn/trunk@2814 59b500cc-1b3d-0410-9834-0bbf25fbcc57
2025-03-05 20:48:51 -08:00 · 2013-06-01 13:56:31 +00:00 · 2013-06-01 13:56:31 +00:00 · 11dbdde1c5
parent f9bf974d1c
commit 11dbdde1c5
8 changed files with 793 additions and 326 deletions
--- a/core/src/com/google/zxing/aztec/decoder/Decoder.java
+++ b/core/src/com/google/zxing/aztec/decoder/Decoder.java
@ -113,12 +113,19 @@ public final class Decoder {
   * @throws FormatException if the input is not valid
   */
  private String getEncodedData(boolean[] correctedBits) throws FormatException {
-
    int endIndex = codewordSize * ddata.getNbDatablocks() - invertedBitCount;
    if (endIndex > correctedBits.length) {
      throw FormatException.getFormatInstance();
    }
+    return getEncodedData(correctedBits, endIndex);
+  }

+  // This method is used for testing the high-level encoder
+  public static String highLevelDecode(boolean[] correctedBits) {
+    return getEncodedData(correctedBits, correctedBits.length);
+  }
+
+  private static String getEncodedData(boolean[] correctedBits, int endIndex) {
    Table lastTable = Table.UPPER;
    Table table = Table.UPPER;
    int startIndex = 0;
--- a/core/src/com/google/zxing/aztec/encoder/BinaryShiftToken.java
+++ b/core/src/com/google/zxing/aztec/encoder/BinaryShiftToken.java
@ -0,0 +1,62 @@
+/*
+ * Copyright 2013 ZXing authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.zxing.aztec.encoder;
+
+import com.google.zxing.common.BitArray;
+
+final class BinaryShiftToken extends Token {
+  
+  private final short binaryShiftStart;
+  private final short binaryShiftByteCount;
+  
+  BinaryShiftToken(Token previous, 
+                   int totalBitCount,
+                   int binaryShiftStart, 
+                   int binaryShiftByteCount) {
+    super(previous, totalBitCount);
+    this.binaryShiftStart = (short) binaryShiftStart;
+    this.binaryShiftByteCount = (short) binaryShiftByteCount;
+  }
+
+  @Override
+  public void appendTo(BitArray bitArray, byte[] text) {
+    for (int i = 0; i < binaryShiftByteCount; i++) {
+      if (i == 0 || (i == 31 && binaryShiftByteCount <= 62))  {
+        // We need a header before the first character, and before
+        // character 31 when the total byte code is <= 62
+        bitArray.appendBits(31, 5);
+        if (binaryShiftByteCount > 62) {
+          bitArray.appendBits(binaryShiftByteCount - 31, 16);
+        } else if (i == 0) {
+          // 1 <= binaryShiftByteCode <= 62
+          bitArray.appendBits(Math.min(binaryShiftByteCount, 31), 5);
+        } else {
+          // 32 <= binaryShiftCount <= 62 and i == 31
+          bitArray.appendBits(binaryShiftByteCount - 31, 5);
+        }
+      }
+      bitArray.appendBits(text[binaryShiftStart + i], 8);
+    }
+    //assert bitArray.getSize() == getTotalBitCount();
+  }
+
+  @Override
+  public String toString() {
+    return "<" + binaryShiftStart + "::" + (binaryShiftStart + binaryShiftByteCount - 1) + '>';
+  }
+
+}
--- a/core/src/com/google/zxing/aztec/encoder/Encoder.java
+++ b/core/src/com/google/zxing/aztec/encoder/Encoder.java
@ -16,8 +16,6 @@

 package com.google.zxing.aztec.encoder;

-import java.util.Arrays;
-
 import com.google.zxing.common.BitArray;
 import com.google.zxing.common.BitMatrix;
 import com.google.zxing.common.reedsolomon.GenericGF;
@ -29,78 +27,12 @@ import com.google.zxing.common.reedsolomon.ReedSolomonEncoder;
 * @author Rustam Abdullaev
 */
 public final class Encoder {
-  
-  public static final int DEFAULT_EC_PERCENT = 33; // default minimal percentage of error check words

-  private static final int TABLE_UPPER  = 0; // 5 bits
-  private static final int TABLE_LOWER  = 1; // 5 bits
-  private static final int TABLE_DIGIT  = 2; // 4 bits
-  private static final int TABLE_MIXED  = 3; // 5 bits
-  private static final int TABLE_PUNCT  = 4; // 5 bits
-  private static final int TABLE_BINARY = 5; // 8 bits
-  
-  private static final int[][] CHAR_MAP = new int[5][256]; // reverse mapping ASCII -> table offset, per table
-  private static final int[][] SHIFT_TABLE = new int[6][6]; // mode shift codes, per table
-  private static final int[][] LATCH_TABLE = new int[6][6]; // mode latch codes, per table
+  public static final int DEFAULT_EC_PERCENT = 33; // default minimal percentage of error check words
  private static final int[] NB_BITS; // total bits per compact symbol for a given number of layers
  private static final int[] NB_BITS_COMPACT; // total bits per full symbol for a given number of layers

  static {
-    CHAR_MAP[TABLE_UPPER][' '] = 1;
-    for (int c = 'A'; c <= 'Z'; c++) {
-      CHAR_MAP[TABLE_UPPER][c] = c - 'A' + 2;
-    }
-    CHAR_MAP[TABLE_LOWER][' '] = 1;
-    for (int c = 'a'; c <= 'z'; c++) {
-      CHAR_MAP[TABLE_LOWER][c] = c - 'a' + 2;
-    }
-    CHAR_MAP[TABLE_DIGIT][' '] = 1;
-    for (int c = '0'; c <= '9'; c++) {
-      CHAR_MAP[TABLE_DIGIT][c] = c - '0' + 2;
-    }
-    CHAR_MAP[TABLE_DIGIT][','] = 12;
-    CHAR_MAP[TABLE_DIGIT]['.'] = 13;
-    int[] mixedTable = { 
-        '\0', ' ', '\1', '\2', '\3', '\4', '\5', '\6', '\7', '\b', '\t', '\n', '\13', '\f', '\r', 
-        '\33', '\34', '\35', '\36', '\37', '@', '\\', '^', '_', '`', '|', '~', '\177' 
-    };
-    for (int i = 0; i < mixedTable.length; i++) {
-      CHAR_MAP[TABLE_MIXED][mixedTable[i]] = i;
-    }
-    int[] punctTable = { 
-        '\0', '\r', '\0', '\0', '\0', '\0', '!', '\'', '#', '$', '%', '&', '\'', '(', ')', '*', '+',
-        ',', '-', '.', '/', ':', ';', '<', '=', '>', '?', '[', ']', '{', '}' 
-    };
-    for (int i = 0; i < punctTable.length; i++) {
-      if (punctTable[i] > 0) {
-        CHAR_MAP[TABLE_PUNCT][punctTable[i]] = i;
-      }
-    }
-    for (int[] table : SHIFT_TABLE) {
-      Arrays.fill(table, -1);
-    }
-    for (int[] table : LATCH_TABLE) {
-      Arrays.fill(table, -1);
-    }
-    SHIFT_TABLE[TABLE_UPPER][TABLE_PUNCT] = 0;
-    LATCH_TABLE[TABLE_UPPER][TABLE_LOWER] = 28;
-    LATCH_TABLE[TABLE_UPPER][TABLE_MIXED] = 29;
-    LATCH_TABLE[TABLE_UPPER][TABLE_DIGIT] = 30;
-    SHIFT_TABLE[TABLE_UPPER][TABLE_BINARY] = 31;
-    SHIFT_TABLE[TABLE_LOWER][TABLE_PUNCT] = 0;
-    SHIFT_TABLE[TABLE_LOWER][TABLE_UPPER] = 28;
-    LATCH_TABLE[TABLE_LOWER][TABLE_MIXED] = 29;
-    LATCH_TABLE[TABLE_LOWER][TABLE_DIGIT] = 30;
-    SHIFT_TABLE[TABLE_LOWER][TABLE_BINARY] = 31;
-    SHIFT_TABLE[TABLE_MIXED][TABLE_PUNCT] = 0;
-    LATCH_TABLE[TABLE_MIXED][TABLE_LOWER] = 28;
-    LATCH_TABLE[TABLE_MIXED][TABLE_UPPER] = 29;
-    LATCH_TABLE[TABLE_MIXED][TABLE_PUNCT] = 30;
-    SHIFT_TABLE[TABLE_MIXED][TABLE_BINARY] = 31;
-    LATCH_TABLE[TABLE_PUNCT][TABLE_UPPER] = 31;
-    SHIFT_TABLE[TABLE_DIGIT][TABLE_PUNCT] = 0;
-    LATCH_TABLE[TABLE_DIGIT][TABLE_UPPER] = 30;
-    SHIFT_TABLE[TABLE_DIGIT][TABLE_UPPER] = 31;
    NB_BITS_COMPACT = new int[5];
    for (int i = 1; i < NB_BITS_COMPACT.length; i++) {
      NB_BITS_COMPACT[i] = (88 + 16 * i) * i;
@ -140,7 +72,7 @@ public final class Encoder {
  public static AztecCode encode(byte[] data, int minECCPercent) {
    
    // High-level encode
-    BitArray bits = highLevelEncode(data);
+    BitArray bits = new HighLevelEncoder(data).encode();
    
    // stuff bits and choose symbol size
    int eccBits = bits.getSize() * minECCPercent / 100 + 11;
@ -183,12 +115,9 @@ public final class Encoder {

    // pad the end
    int messageSizeInWords = (stuffedBits.getSize() + wordSize - 1) / wordSize;
-    // This seems to be redundant? 
-    /*    
    for (int i = messageSizeInWords * wordSize - stuffedBits.getSize(); i > 0; i--) {
      stuffedBits.appendBit(true);
    }
-     */

    // generate check words
    ReedSolomonEncoder rs = new ReedSolomonEncoder(getGF(wordSize));
@ -277,7 +206,7 @@ public final class Encoder {
    return aztec;
  }
  
-  static void drawBullsEye(BitMatrix matrix, int center, int size) {
+  private static void drawBullsEye(BitMatrix matrix, int center, int size) {
    for (int i = 0; i < size; i += 2) {
      for (int j = center - i; j <= center + i; j++) {
        matrix.set(j, center - i);
@ -308,7 +237,7 @@ public final class Encoder {
    return modeMessage;
  }
  
-  static void drawModeMessage(BitMatrix matrix, boolean compact, int matrixSize, BitArray modeMessage) {
+  private static void drawModeMessage(BitMatrix matrix, boolean compact, int matrixSize, BitArray modeMessage) {
    if (compact) {
      for (int i = 0; i < 7; i++) {
        if (modeMessage.get(i)) {
@ -342,7 +271,7 @@ public final class Encoder {
    }
  }
  
-  static BitArray generateCheckWords(BitArray stuffedBits, int totalSymbolBits, int wordSize) {
+  private static BitArray generateCheckWords(BitArray stuffedBits, int totalSymbolBits, int wordSize) {
    int messageSizeInWords = (stuffedBits.getSize() + wordSize - 1) / wordSize;
    for (int i = messageSizeInWords * wordSize - stuffedBits.getSize(); i > 0; i--) {
      stuffedBits.appendBit(true);
@ -360,7 +289,7 @@ public final class Encoder {
    return messageBits;
  }
  
-  static int[] bitsToWords(BitArray stuffedBits, int wordSize, int totalWords) {
+  private static int[] bitsToWords(BitArray stuffedBits, int wordSize, int totalWords) {
    int[] message = new int[totalWords];
    int i;
    int n;
@ -374,7 +303,7 @@ public final class Encoder {
    return message;
  }
  
-  static GenericGF getGF(int wordSize) {
+  private static GenericGF getGF(int wordSize) {
    switch (wordSize) {
      case 4:
        return GenericGF.AZTEC_PARAM;
@ -416,8 +345,6 @@ public final class Encoder {
    }
    
    // 2. pad last word to wordSize
-    // This seems to be redundant?
-    /*
    n = out.getSize();
    int remainder = n % wordSize;
    if (remainder != 0) {
@ -432,173 +359,6 @@ public final class Encoder {
      }
      out.appendBit(j == 0);
    }
-     */
    return out;
  }
-  
-  static BitArray highLevelEncode(byte[] data) {
-    BitArray bits = new BitArray();
-    int mode = TABLE_UPPER;
-    int[] idx = new int[5];
-    int[] idxnext = new int[5];
-    for (int i = 0; i < data.length; i++) {
-      int c = data[i] & 0xFF;
-      int next = i < data.length - 1 ? data[i + 1] & 0xFF : 0;
-      int punctWord = 0;
-      // special case: double-character codes
-      if (c == '\r' && next == '\n') {
-        punctWord = 2;
-      } else if (c == '.' && next == ' ') {
-        punctWord = 3;
-      } else if (c == ',' && next == ' ') {
-        punctWord = 4;
-      } else if (c == ':' && next == ' ') {
-        punctWord = 5;
-      }
-      if (punctWord > 0) {
-        if (mode == TABLE_PUNCT) {
-          outputWord(bits, TABLE_PUNCT, punctWord);
-          i++;
-          continue;
-        } else if (SHIFT_TABLE[mode][TABLE_PUNCT] >= 0) {
-          outputWord(bits, mode, SHIFT_TABLE[mode][TABLE_PUNCT]);
-          outputWord(bits, TABLE_PUNCT, punctWord);
-          i++;
-          continue;
-        } else if (LATCH_TABLE[mode][TABLE_PUNCT] >= 0) {
-          outputWord(bits, mode, LATCH_TABLE[mode][TABLE_PUNCT]);
-          outputWord(bits, TABLE_PUNCT, punctWord);
-          mode = TABLE_PUNCT;
-          i++;
-          continue;
-        }
-      }
-      // find the best matching table, taking current mode and next character into account
-      int firstMatch = -1;
-      int shiftMode = -1;
-      int latchMode = -1;
-      int j;
-      for (j = 0; j < TABLE_BINARY; j++) {
-        idx[j] = CHAR_MAP[j][c];
-        if (idx[j] > 0 && firstMatch < 0) {
-          firstMatch = j;
-        }
-        if (shiftMode < 0 && idx[j] > 0 && SHIFT_TABLE[mode][j] >= 0) {
-          shiftMode = j;
-        }
-        idxnext[j] = CHAR_MAP[j][next];
-        if (latchMode < 0 && idx[j] > 0 && (next == 0 || idxnext[j] > 0) && LATCH_TABLE[mode][j] >= 0) {
-          latchMode = j;
-        }
-      }
-      if (shiftMode < 0 && latchMode < 0) {
-        for (j = 0; j < TABLE_BINARY; j++) {
-          if (idx[j] > 0 && LATCH_TABLE[mode][j] >= 0) {
-            latchMode = j;
-            break;
-          }
-        }
-      }
-      if (idx[mode] > 0) {
-        // found character in current table - stay in current table
-        outputWord(bits, mode, idx[mode]);
-      } else {
-        if (latchMode >= 0) {
-          // latch into mode latchMode
-          outputWord(bits, mode, LATCH_TABLE[mode][latchMode]);
-          outputWord(bits, latchMode, idx[latchMode]);
-          mode = latchMode;
-        } else if (shiftMode >= 0) {
-          // shift into shiftMode
-          outputWord(bits, mode, SHIFT_TABLE[mode][shiftMode]);
-          outputWord(bits, shiftMode, idx[shiftMode]);
-        } else {
-          if (firstMatch >= 0) {
-            // can't switch into this mode from current mode - switch in two steps
-            if (mode == TABLE_PUNCT) {
-              outputWord(bits, TABLE_PUNCT, LATCH_TABLE[TABLE_PUNCT][TABLE_UPPER]);
-              mode = TABLE_UPPER;
-              i--;
-              continue;
-            } else if (mode == TABLE_DIGIT) {
-              outputWord(bits, TABLE_DIGIT, LATCH_TABLE[TABLE_DIGIT][TABLE_UPPER]);
-              mode = TABLE_UPPER;
-              i--;
-              continue;
-            }
-          }
-          // use binary table
-          // find the binary string length
-          int k;
-          int lookahead;
-          for (k = i + 1, lookahead = 0; k < data.length; k++) {
-            next = data[k] & 0xFF;
-            boolean binary = true;
-            for (j = 0; j < TABLE_BINARY; j++) {
-              if (CHAR_MAP[j][next] > 0) {
-                binary = false;
-                break;
-              }
-            }
-            if (binary) {
-              lookahead = 0;
-            } else {
-              // skip over single character in between binary bytes
-              if (lookahead >= 1) {
-                k -= lookahead;
-                break;
-              }
-              lookahead++;
-            }
-          }
-          k -= i;
-          // switch into binary table
-          switch (mode) {
-          case TABLE_UPPER:
-          case TABLE_LOWER:
-          case TABLE_MIXED:
-            outputWord(bits, mode, SHIFT_TABLE[mode][TABLE_BINARY]);
-            break;
-          case TABLE_DIGIT:
-            outputWord(bits, mode, LATCH_TABLE[mode][TABLE_UPPER]);
-            mode = TABLE_UPPER;
-            outputWord(bits, mode, SHIFT_TABLE[mode][TABLE_BINARY]);
-            break;
-          case TABLE_PUNCT:
-            outputWord(bits, mode, LATCH_TABLE[mode][TABLE_UPPER]);
-            mode = TABLE_UPPER;
-            outputWord(bits, mode, SHIFT_TABLE[mode][TABLE_BINARY]);
-            break;
-          }
-          if (k >= 32 && k < 63) { // optimization: split one long form into two short forms, saves 1 bit
-            k = 31;
-          }
-          if (k > 542) { // maximum encodable binary length in long form is 511 + 31
-            k = 542;
-          }
-          if (k < 32) {
-            bits.appendBits(k, 5);
-          } else {
-            bits.appendBits(k - 31, 16);
-          }
-          for (; k > 0; k--, i++) {
-            bits.appendBits(data[i], 8);
-          }
-          i--;
-        }
-      }
-    }
-    return bits;
-  }
-
-  static void outputWord(BitArray bits, int mode, int value) {
-    if (mode == TABLE_DIGIT) {
-      bits.appendBits(value, 4);
-    } else if (mode < TABLE_BINARY) {
-      bits.appendBits(value, 5);
-    } else {
-      bits.appendBits(value, 8);
-    }
-  }
-
 }
--- a/core/src/com/google/zxing/aztec/encoder/HighLevelEncoder.java
+++ b/core/src/com/google/zxing/aztec/encoder/HighLevelEncoder.java
@ -0,0 +1,307 @@
+/*
+ * Copyright 2013 ZXing authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.zxing.aztec.encoder;
+
+import com.google.zxing.common.BitArray;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * This produces nearly optimal encodings of text into the first-level of
+ * encoding used by Aztec code.
+ *
+ * It uses a dynamic algorithm.  For each prefix of the string, it determines
+ * a set of encodings that could lead to this prefix.  We repeatedly add a
+ * character and generate a new set of optimal encodings until we have read
+ * through the entire input.
+ *
+ * @author Frank Yellin
+ * @author Rustam Abdullaev
+ */
+public final class HighLevelEncoder {
+
+  static final String[] MODE_NAMES = {"UPPER", "LOWER", "DIGIT", "MIXED", "PUNCT"};
+
+  static final int MODE_UPPER = 0; // 5 bits
+  static final int MODE_LOWER = 1; // 5 bits
+  static final int MODE_DIGIT = 2; // 4 bits
+  static final int MODE_MIXED = 3; // 5 bits
+  static final int MODE_PUNCT = 4; // 5 bits
+
+  // The Latch Table shows, for each pair of Modes, the optimal method for
+  // getting from one mode to another.  In the worst possible case, this can
+  // be up to 14 bits.  In the best possible case, we are already there!
+  // The high half-word of each entry gives the number of bits.
+  // The low half-word of each entry are the actual bits necessary to change
+  static final int[][] LATCH_TABLE = {
+    {
+      0,
+      (5 << 16) + 28,              // UPPER -> LOWER
+      (5 << 16) + 30,              // UPPER -> DIGIT
+      (5 << 16) + 29,              // UPPER -> MIXED
+      (10 << 16) + (29 << 5) + 30, // UPPER -> MIXED -> PUNCT
+    },
+    {
+      (9 << 16) + (30 << 4) + 14,  // LOWER -> DIGIT -> UPPER
+      0,
+      (5 << 16) + 30,              // LOWER -> DIGIT
+      (5 << 16) + 29,              // LOWER -> MIXED
+      (10 << 16) + (29 << 5) + 30, // LOWER -> MIXED -> PUNCT
+    },
+    {
+      (4 << 16) + 14,              // DIGIT -> UPPER
+      (9 << 16) + (14 << 5) + 28,  // DIGIT -> UPPER -> LOWER
+      0,
+      (9 << 16) + (14 << 5) + 29,  // DIGIT -> UPPER -> MIXED
+      (14 << 16) + (14 << 10) + (29 << 5) + 30,
+                                   // DIGIT -> UPPER -> MIXED -> PUNCT
+    },
+    {
+      (5 << 16) + 29,              // MIXED -> UPPER
+      (5 << 16) + 28,              // MIXED -> LOWER
+      (10 << 16) + (29 << 5) + 30, // MIXED -> UPPER -> DIGIT
+      0,
+      (5 << 16) + 30,              // MIXED -> PUNCT
+    },
+    {
+      (5 << 16) + 31,              // PUNCT -> UPPER
+      (10 << 16) + (31 << 5) + 28, // PUNCT -> UPPER -> LOWER
+      (10 << 16) + (31 << 5) + 30, // PUNCT -> UPPER -> DIGIT
+      (10 << 16) + (31 << 5) + 29, // PUNCT -> UPPER -> MIXED
+      0,
+    },
+  };
+
+  // A reverse mapping from [mode][char] to the encoding for that character
+  // in that mode.  An entry of 0 indicates no mapping exists.
+  private static final int[][] CHAR_MAP = new int[5][256];
+  static {
+    CHAR_MAP[MODE_UPPER][' '] = 1;
+    for (int c = 'A'; c <= 'Z'; c++) {
+      CHAR_MAP[MODE_UPPER][c] = c - 'A' + 2;
+    }
+    CHAR_MAP[MODE_LOWER][' '] = 1;
+    for (int c = 'a'; c <= 'z'; c++) {
+      CHAR_MAP[MODE_LOWER][c] = c - 'a' + 2;
+    }
+    CHAR_MAP[MODE_DIGIT][' '] = 1;
+    for (int c = '0'; c <= '9'; c++) {
+      CHAR_MAP[MODE_DIGIT][c] = c - '0' + 2;
+    }
+    CHAR_MAP[MODE_DIGIT][','] = 12;
+    CHAR_MAP[MODE_DIGIT]['.'] = 13;
+    int[] mixedTable = {
+        '\0', ' ', '\1', '\2', '\3', '\4', '\5', '\6', '\7', '\b', '\t', '\n',
+        '\13', '\f', '\r', '\33', '\34', '\35', '\36', '\37', '@', '\\', '^',
+        '_', '`', '|', '~', '\177'
+    };
+    for (int i = 0; i < mixedTable.length; i++) {
+      CHAR_MAP[MODE_MIXED][mixedTable[i]] = i;
+    }
+    int[] punctTable = {
+        '\0', '\r', '\0', '\0', '\0', '\0', '!', '\'', '#', '$', '%', '&', '\'',
+        '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?',
+        '[', ']', '{', '}'
+    };
+    for (int i = 0; i < punctTable.length; i++) {
+      if (punctTable[i] > 0) {
+        CHAR_MAP[MODE_PUNCT][punctTable[i]] = i;
+      }
+    }
+  }
+
+  // A map showing the available shift coodes.  (The shifts to BINARY are not
+  // shown
+  static final int[][] SHIFT_TABLE = new int[6][6]; // mode shift codes, per table
+  static {
+    for (int[] table : SHIFT_TABLE) {
+      Arrays.fill(table, -1);
+    }
+    SHIFT_TABLE[MODE_UPPER][MODE_PUNCT] = 0;
+
+    SHIFT_TABLE[MODE_LOWER][MODE_PUNCT] = 0;
+    SHIFT_TABLE[MODE_LOWER][MODE_UPPER] = 28;
+
+    SHIFT_TABLE[MODE_MIXED][MODE_PUNCT] = 0;
+
+    SHIFT_TABLE[MODE_DIGIT][MODE_PUNCT] = 0;
+    SHIFT_TABLE[MODE_DIGIT][MODE_UPPER] = 15;
+  }
+
+  private final byte[] text;
+
+  public HighLevelEncoder(byte[] text) {
+    this.text = text;
+  }
+
+  /**
+   * Convert the text represented by this High Level Encoder into a BitArray.
+   */
+  public BitArray encode() {
+    List<State> states = Collections.singletonList(State.INITIAL_STATE);
+    for (int index = 0; index < text.length; index++) {
+      int pairCode;
+      int nextChar = index + 1 < text.length ? text[index + 1] : 0;
+      switch (text[index]) {
+        case '\r':  
+          pairCode = nextChar == '\n' ? 2 : 0; 
+          break;
+        case '.' :  
+          pairCode = nextChar == ' '  ? 3 : 0; 
+          break;
+        case ',' :  
+          pairCode = nextChar == ' ' ? 4 : 0; 
+          break;
+        case ':' :  
+          pairCode = nextChar == ' ' ? 5 : 0; 
+          break;
+        default:    
+          pairCode = 0;
+      }
+      if (pairCode > 0) {
+        // We have one of the four special PUNCT pairs.  Treat them specially.
+        // Get a new set of states for the two new characters.
+        states = updateStateListForPair(states, index, pairCode);
+        index++;
+      } else {
+        // Get a new set of states for the new character.
+        states = updateStateListForChar(states, index);
+      }
+    }
+    // We are left with a set of states.  Find the shortest one.
+    State minState = Collections.min(states, new Comparator<State>() {
+      @Override
+      public int compare(State a, State b) {
+        return a.getBitCount() - b.getBitCount();
+      }
+    });
+    // Convert it to a bit array, and return.
+    return minState.toBitArray(text);
+  }
+
+  // We update a set of states for a new character by updating each state
+  // for the new character, merging the results, and then removing the
+  // non-optimal states.
+  private List<State> updateStateListForChar(Iterable<State> states, int index) {
+    Collection<State> result = new LinkedList<State>();
+    for (State state : states) {
+      updateStateForChar(state, index, result);
+    }
+    return simplifyStates(result);
+  }
+
+  // Return a set of states that represent the possible ways of updating this
+  // state for the next character.  The resulting set of states are added to
+  // the "result" list.
+  private void updateStateForChar(State state, int index, Collection<State> result) {
+    char ch = (char) (text[index] & 0xFF);
+    boolean charInCurrentTable = CHAR_MAP[state.getMode()][ch] > 0;
+    State stateNoBinary = null;
+    for (int mode = 0; mode <= MODE_PUNCT; mode++) {
+      int charInMode = CHAR_MAP[mode][ch];
+      if (charInMode > 0) {
+        if (stateNoBinary == null) {
+          // Only create stateNoBinary the first time it's required.
+          stateNoBinary = state.endBinaryShift(index);
+        }
+        // Try generating the character by latching to its mode
+        if (!charInCurrentTable || mode == state.getMode() || mode == MODE_DIGIT) {
+          // If the character is in the current table, we don't want to latch to
+          // any other mode except possibly digit (which uses only 4 bits).  Any
+          // other latch would be equally successful *after* this character, and
+          // so wouldn't save any bits.
+          State latch_state = stateNoBinary.latchAndAppend(mode, charInMode);
+          result.add(latch_state);
+        }
+        // Try generating the character by switching to its mode.
+        if (!charInCurrentTable && SHIFT_TABLE[state.getMode()][mode] >= 0) {
+          // It never makes sense to temporarily shift to another mode if the
+          // character exists in the current mode.  That can never save bits.
+          State shift_state = stateNoBinary.shiftAndAppend(mode, charInMode);
+          result.add(shift_state);
+        }
+      }
+    }
+    if (state.getBinaryShiftByteCount() > 0 || CHAR_MAP[state.getMode()][ch] == 0) {
+      // It's never worthwhile to go into binary shift mode if you're not already
+      // in binary shift mode, and the character exists in your current mode.
+      // That can never save bits over just outputting the char in the current mode.
+      State binaryState = state.addBinaryShiftChar(index);
+      result.add(binaryState);
+    }
+  }
+
+  private static List<State> updateStateListForPair(Iterable<State> states, int index, int pairCode) {
+    Collection<State> result = new LinkedList<State>();
+    for (State state : states) {
+      updateStateForPair(state, index, pairCode, result);
+    }
+    return simplifyStates(result);
+  }
+
+  private static void updateStateForPair(State state, int index, int pairCode, Collection<State> result) {
+    State stateNoBinary = state.endBinaryShift(index);
+    // Possibility 1.  Latch to MODE_PUNCT, and then append this code
+    result.add(stateNoBinary.latchAndAppend(MODE_PUNCT, pairCode));
+    if (state.getMode() != MODE_PUNCT) {
+      // Possibility 2.  Shift to MODE_PUNCT, and then append this code.
+      // Every state except MODE_PUNCT (handled above) can shift
+      result.add(stateNoBinary.shiftAndAppend(MODE_PUNCT, pairCode));
+    }
+    if (pairCode == 3 || pairCode == 4) {
+      // both characters are in DIGITS.  Sometimes better to just add two digits
+      State digit_state = stateNoBinary
+          .latchAndAppend(MODE_DIGIT, 16 - pairCode)  // period or comma in DIGIT
+          .latchAndAppend(MODE_DIGIT, 1);             // space in DIGIT
+      result.add(digit_state);
+    }
+    if (state.getBinaryShiftByteCount() > 0) {
+      // It only makes sense to do the characters as binary if we're already
+      // in binary mode.
+      State binaryState = state.addBinaryShiftChar(index).addBinaryShiftChar(index + 1);
+      result.add(binaryState);
+    }
+  }
+
+  private static List<State> simplifyStates(Iterable<State> states) {
+    List<State> result = new LinkedList<State>();
+    for (State newState : states) {
+      boolean add = true;
+      for (Iterator<State> iterator = result.iterator(); iterator.hasNext(); ) {
+        State oldState = iterator.next();
+        if (oldState.isBetterThanOrEqualTo(newState)) {
+          add = false;
+          break;
+        }
+        if (newState.isBetterThanOrEqualTo(oldState)) {
+          iterator.remove();
+        }
+      }
+      if (add) {
+        result.add(newState);
+      }
+    }
+    return result;
+  }
+
+}
--- a/core/src/com/google/zxing/aztec/encoder/SimpleToken.java
+++ b/core/src/com/google/zxing/aztec/encoder/SimpleToken.java
@ -0,0 +1,45 @@
+/*
+ * Copyright 2013 ZXing authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.zxing.aztec.encoder;
+
+import com.google.zxing.common.BitArray;
+
+final class SimpleToken extends Token {
+  
+  // For normal words, indicates value and bitCount
+  private final short value;
+  private final short bitCount;
+
+  SimpleToken(Token previous, int totalBitCount, int value, int bitCount)  {
+    super(previous, totalBitCount);
+    this.value = (short) value;
+    this.bitCount = (short) bitCount;
+  }
+
+  @Override
+  void appendTo(BitArray bitArray, byte[] text) {
+    bitArray.appendBits(value, bitCount);
+  }
+
+  @Override
+  public String toString() {
+    int value = this.value & ((1 << bitCount) - 1);
+    value |= 1 << bitCount;
+    return '<' + Integer.toBinaryString(value | (1 << bitCount)).substring(1) + '>';
+  }
+
+}
--- a/core/src/com/google/zxing/aztec/encoder/State.java
+++ b/core/src/com/google/zxing/aztec/encoder/State.java
@ -0,0 +1,169 @@
+/*
+ * Copyright 2013 ZXing authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.zxing.aztec.encoder;
+
+import java.util.Deque;
+import java.util.LinkedList;
+
+import com.google.zxing.common.BitArray;
+
+/**
+ * State represents all information about a sequence necessary to generate the current output.
+ * Note that a state is immutable.
+ */
+final class State {
+  
+  static final State INITIAL_STATE = new State(Token.EMPTY, HighLevelEncoder.MODE_UPPER, 0, 0);
+
+  // The current mode of the encoding (or the mode to which we'll return if
+  // we're in Binary Shift mode.
+  private final int mode;
+  // The list of tokens that we output.  If we are in Binary Shift mode, this
+  // token list does *not* yet included the token for those bytes
+  private final Token token;
+  // If non-zero, the number of most recent bytes that should be output
+  // in Binary Shift mode.
+  private final int binaryShiftByteCount;
+  // The total number of bits generated (including Binary Shift).
+  private final int bitCount;
+
+  private State(Token token, int mode, int binaryBytes, int bitCount) {
+    this.token = token;
+    this.mode = mode;
+    this.binaryShiftByteCount = binaryBytes;
+    this.bitCount = bitCount;
+    // Make sure we match the token
+    //int binaryShiftBitCount = (binaryShiftByteCount * 8) +
+    //    (binaryShiftByteCount == 0 ? 0 :
+    //     binaryShiftByteCount <= 31 ? 10 :
+    //     binaryShiftByteCount <= 62 ? 20 : 21);
+    //assert this.bitCount == token.getTotalBitCount() + binaryShiftBitCount;
+  }
+  
+  int getMode() {
+    return mode;
+  }
+  
+  Token getToken() {
+    return token;
+  }
+  
+  int getBinaryShiftByteCount() {
+    return binaryShiftByteCount;
+  }
+  
+  int getBitCount() {
+    return bitCount;
+  }
+
+  // Create a new state representing this state with a latch to a (not
+  // necessary different) mode, and then a code.
+  State latchAndAppend(int mode, int value) {
+    //assert binaryShiftByteCount == 0;
+    int bitCount = this.bitCount;
+    Token token = this.token;
+    if (mode != this.mode) {
+      int latch = HighLevelEncoder.LATCH_TABLE[this.mode][mode];
+      token = token.add(latch & 0xFFFF, latch >> 16);
+      bitCount += latch >> 16;
+    }
+    int latchModeBitCount = mode == HighLevelEncoder.MODE_DIGIT ? 4 : 5;
+    token = token.add(value, latchModeBitCount);
+    return new State(token, mode, 0, bitCount + latchModeBitCount);
+  }
+
+  // Create a new state representing this state, with a temporary shift
+  // to a different mode to output a single value.
+  State shiftAndAppend(int mode, int value) {
+    //assert binaryShiftByteCount == 0 && this.mode != mode;
+    Token token = this.token;
+    int thisModeBitCount = this.mode == HighLevelEncoder.MODE_DIGIT ? 4 : 5;
+    // Shifts exist only to UPPER and PUNCT, both with tokens size 5.
+    token = token.add(HighLevelEncoder.SHIFT_TABLE[this.mode][mode], thisModeBitCount);
+    token = token.add(value, 5);
+    return new State(token, this.mode, 0, this.bitCount + thisModeBitCount + 5);
+  }
+
+  // Create a new state representing this state, but an additional character
+  // output in Binary Shift mode.
+  State addBinaryShiftChar(int index) {
+    Token token = this.token;
+    int mode = this.mode;
+    int bitCount = this.bitCount;
+    if (this.mode == HighLevelEncoder.MODE_PUNCT || this.mode == HighLevelEncoder.MODE_DIGIT)  {
+      //assert binaryShiftByteCount == 0;
+      int latch = HighLevelEncoder.LATCH_TABLE[mode][HighLevelEncoder.MODE_UPPER];
+      token = token.add(latch & 0xFFFF, latch >> 16);
+      bitCount += latch >> 16;
+      mode = HighLevelEncoder.MODE_UPPER;
+    }
+    int deltaBitCount =
+      (binaryShiftByteCount == 0 || binaryShiftByteCount == 31) ? 18 :
+      (binaryShiftByteCount == 62) ? 9 : 8;
+    State result = new State(token, mode, binaryShiftByteCount + 1, bitCount + deltaBitCount);
+    if (result.binaryShiftByteCount == 2047 + 31) {
+      // The string is as long as it's allowed to be.  We should end it.
+      result = result.endBinaryShift(index + 1);
+    }
+    return result;
+  }
+
+  // Create the state identical to this one, but we are no longer in
+  // Binary Shift mode.
+  State endBinaryShift(int index) {
+    if (binaryShiftByteCount == 0) {
+      return this;
+    }
+    Token token = this.token;
+    token = token.addBinaryShift(index - binaryShiftByteCount, binaryShiftByteCount);
+    //assert token.getTotalBitCount() == this.bitCount;
+    return new State(token, mode, 0, this.bitCount);
+  }
+
+  // Returns true if "this" state is better (or equal) to be in than "that"
+  // state under all possible circumstances.
+  boolean isBetterThanOrEqualTo(State other) {
+    int mySize = this.bitCount + (HighLevelEncoder.LATCH_TABLE[this.mode][other.mode] >> 16);
+    if (other.binaryShiftByteCount > 0 &&
+        (this.binaryShiftByteCount == 0 || this.binaryShiftByteCount > other.binaryShiftByteCount)) {
+      mySize += 10;     // Cost of entering Binary Shift mode.
+    }
+    return mySize <= other.bitCount;
+  }
+
+  BitArray toBitArray(byte[] text) {
+    // Reverse the tokens, so that they are in the order that they should
+    // be output
+    Deque<Token> symbols = new LinkedList<Token>();
+    for (Token token = endBinaryShift(text.length).token; token != null; token = token.getPrevious()) {
+      symbols.addFirst(token);
+    }
+    BitArray bitArray = new BitArray();
+    // Add each token to the result.
+    for (Token symbol : symbols) {
+      symbol.appendTo(bitArray, text);
+    }
+    //assert bitArray.getSize() == this.bitCount;
+    return bitArray;
+  }
+
+  @Override
+  public String toString() {
+    return String.format("%s bits=%d bytes=%d", HighLevelEncoder.MODE_NAMES[mode], bitCount, binaryShiftByteCount);
+  }
+
+}
--- a/core/src/com/google/zxing/aztec/encoder/Token.java
+++ b/core/src/com/google/zxing/aztec/encoder/Token.java
@ -0,0 +1,52 @@
+/*
+ * Copyright 2013 ZXing authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.zxing.aztec.encoder;
+
+import com.google.zxing.common.BitArray;
+
+abstract class Token {
+  
+  static final Token EMPTY = new SimpleToken(null, 0, 0, 0);
+  
+  private final Token previous;
+  private final int totalBitCount; // For debugging purposes, only
+  
+  Token(Token previous, int totalBitCount) {
+    this.previous = previous;
+    this.totalBitCount = totalBitCount;
+  }
+  
+  final Token getPrevious() {
+    return previous;
+  }
+
+  final int getTotalBitCount() {
+    return totalBitCount;
+  }
+
+  final Token add(int value, int bitCount) {
+    return new SimpleToken(this, this.totalBitCount + bitCount, value, bitCount);
+   }
+
+  final Token addBinaryShift(int start, int byteCount) {
+    int bitCount = (byteCount * 8) + (byteCount <= 31 ? 10 : byteCount <= 62 ? 20 : 21);
+    return new BinaryShiftToken(this, this.totalBitCount + bitCount, start, byteCount);
+  }
+
+  abstract void appendTo(BitArray bitArray, byte[] text);
+
+}
--- a/core/test/src/com/google/zxing/aztec/encoder/EncoderTest.java
+++ b/core/test/src/com/google/zxing/aztec/encoder/EncoderTest.java
@ -41,12 +41,13 @@ import com.google.zxing.common.DecoderResult;
 * Aztec 2D generator unit tests.
 *
 * @author Rustam Abdullaev
+ * @author Frank Yellin
 */
 public final class EncoderTest extends Assert {

  private static final Charset LATIN_1 = Charset.forName("ISO-8859-1");
  private static final Pattern DOTX = Pattern.compile("[^.X]");
-  public static final ResultPoint[] NO_POINTS = new ResultPoint[0];
+  private static final ResultPoint[] NO_POINTS = new ResultPoint[0];

  // real life tests

@ -81,49 +82,49 @@ public final class EncoderTest extends Assert {
  @Test
  public void testEncode2() throws Exception {
    testEncode("Aztec Code is a public domain 2D matrix barcode symbology" +
-    		" of nominally square symbols built on a square grid with a " +
-    		"distinctive square bullseye pattern at their center.", false, 6,
-        "        X X     X X     X     X     X   X X X         X   X         X   X X       \n" +
-        "  X       X X     X   X X   X X       X             X     X   X X   X           X \n" +
-        "  X   X X X     X   X   X X     X X X   X   X X               X X       X X     X \n" +
-        "X X X             X   X         X         X     X     X   X     X X       X   X   \n" +
-        "X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X \n" +
-        "    X X   X   X   X X X               X       X       X X     X X   X X       X   \n" +
-        "X X     X       X       X X X X   X   X X       X   X X   X       X X   X X   X   \n" +
-        "  X       X   X         X     X   X         X X       X         X     X   X   X X \n" +
-        "X X   X X   X   X   X X       X X     X X     X X X   X X   X X   X X   X X X     \n" +
-        "  X       X   X   X X     X X   X X         X X X   X     X     X X   X     X X X \n" +
-        "  X   X X X   X X X   X   X X   X   X   X X   X X   X X X X X   X X X   X X     X \n" +
-        "    X     X   X X   X   X X X X       X       X       X X X         X X     X   X \n" +
-        "X X X   X           X X X X     X X X X X X X X   X       X X X     X   X   X   X \n" +
-        "          X       X   X X X X     X   X           X   X X       X                 \n" +
-        "  X     X X   X   X X   X X X X X X X X X X X X X X X X   X X       X   X X X     \n" +
-        "    X X           X X       X                       X X X X X X             X X X \n" +
-        "        X   X X   X X X   X X   X X X X X X X X X   X   X               X X X X   \n" +
-        "          X X X       X     X   X               X   X X   X       X X X           \n" +
-        "X X     X     X   X     X X X   X   X X X X X   X   X X       X         X   X X X \n" +
-        "X X X X       X     X   X X X   X   X       X   X   X       X X X   X X       X X \n" +
-        "X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X \n" +
-        "    X     X       X     X   X   X   X       X   X   X       X                     \n" +
-        "        X X     X X X X X   X   X   X X X X X   X   X X X     X     X   X         \n" +
-        "X     X   X   X   X X X X   X   X               X   X X X   X X     X     X   X   \n" +
-        "  X   X X X   X     X X X X X   X X X X X X X X X   X X X X X           X X X X   \n" +
-        "    X X   X   X     X X     X                       X X X X       X   X     X     \n" +
-        "    X X X X   X       X     X X X X X X X X X X X X X X       X     X   X X   X X \n" +
-        "            X   X X     X     X X X X X     X X X       X X X X X   X         X   \n" +
-        "X       X         X           X X   X X X X   X X   X X X     X X   X   X       X \n" +
-        "X     X       X X     X     X X     X             X X   X       X     X   X X     \n" +
-        "  X X X X X       X   X     X           X     X   X X X X   X X X X     X X   X X \n" +
-        "X             X   X X X     X X       X       X X   X   X X     X X X         X X \n" +
-        "    X   X X       X     X       X   X X X X X X   X X   X X X X X X X X X   X X   \n" +
-        "    X         X X   X       X     X   X   X       X     X X X     X       X X     \n" +
-        "X     X X     X X X X X X             X X X   X               X   X     X       X \n" +
-        "X   X X     X               X X X X X     X X     X X X X X X X X     X   X   X X \n" +
-        "X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X \n" +
-        "X           X     X X X X     X     X         X         X   X       X X   X X X   \n" +
-        "X   X   X X   X X X   X         X X     X X X X     X X   X   X     X   X       X \n" +
-        "      X     X     X     X X     X   X X   X X   X         X X       X       X   X \n" +
-        "X       X           X   X   X     X X   X               X     X     X X X         \n");
+                " of nominally square symbols built on a square grid with a " +
+                "distinctive square bullseye pattern at their center.", false, 6,
+          "        X X     X X     X     X     X   X X X         X   X         X   X X       \n" +
+          "  X       X X     X   X X   X X       X             X     X   X X   X           X \n" +
+          "  X   X X X     X   X   X X     X X X   X   X X               X X       X X     X \n" +
+          "X X X             X   X         X         X     X     X   X     X X       X   X   \n" +
+          "X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X \n" +
+          "    X X   X   X   X X X               X       X       X X     X X   X X       X   \n" +
+          "X X     X       X       X X X X   X   X X       X   X X   X       X X   X X   X   \n" +
+          "  X       X   X     X X   X   X X   X X   X X X X X X   X X           X   X   X X \n" +
+          "X X   X X   X   X X X X   X X X X X X X X   X   X       X X   X X X X   X X X     \n" +
+          "  X       X   X     X       X X     X X   X   X   X     X X   X X X   X     X X X \n" +
+          "  X   X X X   X X       X X X         X X           X   X   X   X X X   X X     X \n" +
+          "    X     X   X X     X X X X     X   X     X X X X   X X   X X   X X X     X   X \n" +
+          "X X X   X             X         X X X X X   X   X X   X   X   X X   X   X   X   X \n" +
+          "          X       X X X   X X     X   X           X   X X X X   X X               \n" +
+          "  X     X X   X   X       X X X X X X X X X X X X X X X   X   X X   X   X X X     \n" +
+          "    X X                 X   X                       X X   X       X         X X X \n" +
+          "        X   X X   X X X X X X   X X X X X X X X X   X     X X           X X X X   \n" +
+          "          X X X   X     X   X   X               X   X X     X X X   X X           \n" +
+          "X X     X     X   X   X   X X   X   X X X X X   X   X X X X X X X       X   X X X \n" +
+          "X X X X       X       X   X X   X   X       X   X   X     X X X     X X       X X \n" +
+          "X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X \n" +
+          "    X     X       X         X   X   X       X   X   X     X   X X                 \n" +
+          "        X X     X X X X X   X   X   X X X X X   X   X X X     X X X X   X         \n" +
+          "X     X   X   X         X   X   X               X   X X   X X   X X X     X   X   \n" +
+          "  X   X X X   X   X X   X X X   X X X X X X X X X   X X         X X     X X X X   \n" +
+          "    X X   X   X   X X X     X                       X X X   X X   X   X     X     \n" +
+          "    X X X X   X         X   X X X X X X X X X X X X X X   X       X X   X X   X X \n" +
+          "            X   X   X X       X X X X X     X X X       X       X X X         X   \n" +
+          "X       X         X   X X X X   X     X X     X X     X X           X   X       X \n" +
+          "X     X       X X X X X     X   X X X X   X X X     X       X X X X   X   X X   X \n" +
+          "  X X X X X               X     X X X   X       X X   X X   X X X X     X X       \n" +
+          "X             X         X   X X   X X     X     X     X   X   X X X X             \n" +
+          "    X   X X       X     X       X   X X X X X X   X X   X X X X X X X X X   X   X \n" +
+          "    X         X X   X       X     X   X   X       X     X X X     X       X X X X \n" +
+          "X     X X     X X X X X X             X X X   X               X   X     X     X X \n" +
+          "X   X X     X               X X X X X     X X     X X X X X X X X     X   X   X X \n" +
+          "X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X   X \n" +
+          "X           X     X X X X     X     X         X         X   X       X X   X X X   \n" +
+          "X   X   X X   X X X   X         X X     X X X X     X X   X   X     X   X       X \n" +
+          "      X     X     X     X X     X   X X   X X   X         X X       X       X   X \n" +
+          "X       X           X   X   X     X X   X               X     X     X X X         \n");
  }
  
  @Test
@ -167,7 +168,8 @@ public final class EncoderTest extends Assert {
  
  @Test
  public void testEncodeDecode5() throws Exception {
-    testEncodeDecode("http://test/~!@#*^%&)__ ;:'\"[]{}\\|-+-=`1029384756<>/?abc", false, 5);
+    testEncodeDecode("http://test/~!@#*^%&)__ ;:'\"[]{}\\|-+-=`1029384756<>/?abc"
+        + "Four score and seven our forefathers brought forth", false, 5);
  }
  
  @Test
@ -277,59 +279,106 @@ public final class EncoderTest extends Assert {
  @Test
  public void testHighLevelEncode() throws Exception {
    testHighLevelEncodeString("A. b.",
+        // 'A'  P/S   '. ' L/L    b    D/L    '.'
        "...X. ..... ...XX XXX.. ...XX XXXX. XX.X");
    testHighLevelEncodeString("Lorem ipsum.",
+        // 'L'  L/L   'o'   'r'   'e'   'm'   ' '   'i'   'p'   's'   'u'   'm'   D/L   '.'
        ".XX.X XXX.. X.... X..XX ..XX. .XXX. ....X .X.X. X...X X.X.. X.XX. .XXX. XXXX. XX.X");
    testHighLevelEncodeString("Lo. Test 123.",
-        ".XX.X XXX.. X.... ..... ...XX XXX.. X.X.X ..XX. X.X.. X.X.X ....X XXXX. ..XX .X.. .X.X XX.X");
+        // 'L'  L/L   'o'   P/S   '. '  U/S   'T'   'e'   's'   't'    D/L   ' '  '1'  '2'  '3'  '.'
+        ".XX.X XXX.. X.... ..... ...XX XXX.. X.X.X ..XX. X.X.. X.X.X  XXXX. ...X ..XX .X.. .X.X XX.X");
    testHighLevelEncodeString("Lo...x",
+        // 'L'  L/L   'o'   D/L   '.'  '.'  '.'  U/L  L/L   'x'
        ".XX.X XXX.. X.... XXXX. XX.X XX.X XX.X XXX. XXX.. XX..X");
    testHighLevelEncodeString(". x://abc/.",
+        //P/S   '. '  L/L   'x'   P/S   ':'   P/S   '/'   P/S   '/'   'a'   'b'   'c'   P/S   '/'   D/L   '.'
        "..... ...XX XXX.. XX..X ..... X.X.X ..... X.X.. ..... X.X.. ...X. ...XX ..X.. ..... X.X.. XXXX. XX.X");
+    // Uses Binary/Shift rather than Lower/Shift to save two bits.
+    testHighLevelEncodeString("ABCdEFG",
+        //'A'   'B'   'C'   B/S    =1    'd'     'E'   'F'   'G'
+        "...X. ...XX ..X.. XXXXX ....X .XX..X.. ..XX. ..XXX .X...");
+
+    testHighLevelEncodeString(
+        // Found on an airline boarding pass.  Several stretches of Binary shift are
+        // necessary to keep the bitcount so low.
+        "09  UAG    ^160MEUCIQC0sYS/HpKxnBELR1uB85R20OoqqwFGa0q2uEi"
+            + "Ygh6utAIgLl1aBVM4EOTQtMQQYH9M2Z3Dp4qnA/fwWuQ+M8L3V8U=",
+        823);
  }

  @Test
  public void testHighLevelEncodeBinary() throws Exception {
    // binary short form single byte
    testHighLevelEncodeString("N\0N",
-        ".XXXX XXXXX ...X. ........ .X..XXX.");
+        // 'N'  B/S    =1   '\0'      N
+        ".XXXX XXXXX ....X ........ .XXXX");   // Encode "N" in UPPER
+
+    testHighLevelEncodeString("N\0n",
+        // 'N'  B/S    =2   '\0'       'n'
+        ".XXXX XXXXX ...X. ........ .XX.XXX.");   // Encode "n" in BINARY
+
    // binary short form consecutive bytes
    testHighLevelEncodeString("N\0\u0080 A",
+        // 'N'  B/S    =2    '\0'    \u0080   ' '  'A'
        ".XXXX XXXXX ...X. ........ X....... ....X ...X.");
+
    // binary skipping over single character
    testHighLevelEncodeString("\0a\u00FF\u0080 A",
+        // B/S  =4    '\0'      'a'     '\3ff'   '\200'   ' '   'A'
        "XXXXX ..X.. ........ .XX....X XXXXXXXX X....... ....X ...X.");
-    // binary long form optimization into 2 short forms (saves 1 bit)
-    testHighLevelEncodeString(
-        "\0\0\0\0 \0\0\0\0 \0\0\0\0 \0\0\0\0 \0\0\0\0 \0\0\0\0 \u0082\u0084\u0088\0 \0\0\0\0 \0\0\0\0 ",
-        "XXXXX XXXXX ........ ........ ........ ........ ..X....." +
-        " ........ ........ ........ ........ ..X....." +
-        " ........ ........ ........ ........ ..X....." +
-        " ........ ........ ........ ........ ..X....." +
-        " ........ ........ ........ ........ ..X....." +
-        " ........ ........ ........ ........ ..X....." +
-        " X.....X. XXXXX .XXX. X....X.. X...X... ........ ..X....." +
-        " ........ ........ ........ ........ ..X....." +
-        " ........ ........ ........ ........ ..X.....");
-    // binary long form
-    testHighLevelEncodeString(
-        "\0\0\0\0 \0\0\1\0 \0\0\2\0 \0\0\3\0 \0\0\4\0 \0\0\5\0 \0\0\6\0 \0\0\7\0 \0\0\u0008" +
-            "\0 \0\0\u0009\0 \0\0\u00F0\0 \0\0\u00F1\0 \0\0\u00F2\0A",
-        "XXXXX ..... .....X...X. ........ ........ ........ ........ ..X....." +
-        " ........ ........ .......X ........ ..X....." +
-        " ........ ........ ......X. ........ ..X....." +
-        " ........ ........ ......XX ........ ..X....." +
-        " ........ ........ .....X.. ........ ..X....." +
-        " ........ ........ .....X.X ........ ..X....." +
-        " ........ ........ .....XX. ........ ..X....." +
-        " ........ ........ .....XXX ........ ..X....." +
-        " ........ ........ ....X... ........ ..X....." +
-        " ........ ........ ....X..X ........ ..X....." +
-        " ........ ........ XXXX.... ........ ..X....." +
-        " ........ ........ XXXX...X ........ ..X....." +
-        " ........ ........ XXXX..X. ........ .X.....X");
+
+    // getting into binary mode from digit mode
+    testHighLevelEncodeString("1234\0",
+        //D/L   '1'  '2'  '3'  '4'  U/L  B/S    =1    \0
+        "XXXX. ..XX .X.. .X.X .XX. XXX. XXXXX ....X ........"
+    );
+
+    // Create a string in which every character requires binary
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i <= 3000; i++) {
+      sb.append((char)(128 + (i % 30)));
+    }
+    // Test the output generated by Binary/Switch, particularly near the
+    // places where the encoding changes: 31, 62, and 2047+31=2078
+    for (int i : new int[] { 1, 2, 3, 10, 29, 30, 31, 32, 33,
+                             60, 61, 62, 63, 64, 2076, 2077, 2078, 2079, 2080, 3000}) {
+      // This is the expected length of a binary string of length "i"
+      int expectedLength = (8 * i) +
+          ( (i <= 31) ? 10 : (i <= 62) ? 20 : (i <= 2078) ? 21 : 31);
+      // Verify that we are correct about the length.
+      testHighLevelEncodeString(sb.substring(0, i), expectedLength);
+      // A lower case letter at the beginning will be merged into binary mode
+      testHighLevelEncodeString('a' + sb.substring(0, i - 1), expectedLength);
+      // A lower case letter at the end will also be merged into binary mode
+      testHighLevelEncodeString(sb.substring(0, i - 1) + 'a', expectedLength);
+      // A lower case letter at both ends will enough to latch us into LOWER.
+      testHighLevelEncodeString('a' + sb.substring(0, i) + 'b', expectedLength + 15);
+    }
  }
  
+  @Test
+  public void testHighLevelEncodePairs() throws Exception {
+    // Typical usage
+    testHighLevelEncodeString("ABC. DEF\r\n",
+        //  A     B    C    P/S   .<sp>   D    E     F    P/S   \r\n
+        "...X. ...XX ..X.. ..... ...XX ..X.X ..XX. ..XXX ..... ...X.");
+
+    // We should latch to PUNCT mode, rather than shift.  Also check all pairs
+    testHighLevelEncodeString("A. : , \r\n",
+        // 'A'    M/L   P/L   ". "  ": "   ", " "\r\n"
+        "...X. XXX.X XXXX. ...XX ..X.X  ..X.. ...X.");
+
+    // Latch to DIGIT rather than shift to PUNCT
+    testHighLevelEncodeString("A. 1234",
+        // 'A'  D/L   '.'  ' '  '1' '2'   '3'  '4'
+        "...X. XXXX. XX.X ...X ..XX .X.. .X.X .X X."
+        );
+    // Don't bother leaving Binary Shift.
+    testHighLevelEncodeString("A\200. \200",
+        // 'A'  B/S    =2    \200      "."     " "     \200
+        "...X. XXXXX ..X.. X....... ..X.XXX. ..X..... X.......");
+  }
+
  // Helper routines

  private static void testEncode(String data, boolean compact, int layers, String expected) throws Exception {
@ -400,7 +449,7 @@ public final class EncoderTest extends Assert {
    assertEquals(expectedData, res.getText());
  }

-  static Random getPseudoRandom() {
+  private static Random getPseudoRandom() {
    return new SecureRandom(new byte[] {(byte) 0xDE, (byte) 0xAD, (byte) 0xBE, (byte) 0xEF});
  }

@ -426,10 +475,26 @@ public final class EncoderTest extends Assert {
    return in;
  }

+  private static boolean[] toBooleanArray(BitArray bitArray) {
+    boolean[] result = new boolean[bitArray.getSize()];
+    for (int i = 0; i < result.length; i++) {
+      result[i] = bitArray.get(i);
+    }
+    return result;
+  }
+
  private static void testHighLevelEncodeString(String s, String expectedBits) {
-    BitArray bits = Encoder.highLevelEncode(s.getBytes(LATIN_1));
+    BitArray bits = new HighLevelEncoder(s.getBytes(LATIN_1)).encode();
    String receivedBits = bits.toString().replace(" ", "");
    assertEquals("highLevelEncode() failed for input string: " + s, expectedBits.replace(" ", ""), receivedBits);
+    assertEquals(s, Decoder.highLevelDecode(toBooleanArray(bits)));
+  }
+
+  private static void testHighLevelEncodeString(String s, int receivedBits) {
+    BitArray bits = new HighLevelEncoder(s.getBytes(LATIN_1)).encode();
+    int receivedBitCount = bits.toString().replace(" ", "").length();
+    assertEquals("highLevelEncode() failed for input string: " + s, receivedBitCount, receivedBitCount);
+    assertEquals(s, Decoder.highLevelDecode(toBooleanArray(bits)));
  }

 }