Aztec Code: decode ECI and FNC1 (#1328)

* decode Aztec FLG(n) Per https://en.wikipedia.org/wiki/Aztec_Code#Character_set, FLG(0) → FNC1, FLG(1-6) → ECI identifier Decode FNC1 as 0x1d (ASCII escape character; same as Code128, DataMatrix, others) Not doing anything with ECI yet, but this at least fixes the mis-synchronization of the bit stream that comes from ignoring the bits that follow it. * include ecLevel (percentage) in DecoderResult for Aztec * Aztec decoder: follow character set indicated by ECI codes Modeled on PDF417's DecodedBitStreamParser * add testAztecResultECI and testHighLevelDecode to aztec.decoder.DecoderTest The latter includes non-ECI, ECI, and mixed test cases.
2025-03-05 20:48:51 -08:00 · 2020-10-27 18:04:41 -07:00 · 2020-10-27 18:04:41 -07:00 · 6e349275f7
parent b1c85db64e
commit 6e349275f7
4 changed files with 149 additions and 28 deletions
--- a/core/src/main/java/com/google/zxing/aztec/decoder/Decoder.java
+++ b/core/src/main/java/com/google/zxing/aztec/decoder/Decoder.java
@ -19,11 +19,15 @@ package com.google.zxing.aztec.decoder;
 import com.google.zxing.FormatException;
 import com.google.zxing.aztec.AztecDetectorResult;
 import com.google.zxing.common.BitMatrix;
+import com.google.zxing.common.CharacterSetECI;
 import com.google.zxing.common.DecoderResult;
 import com.google.zxing.common.reedsolomon.GenericGF;
 import com.google.zxing.common.reedsolomon.ReedSolomonDecoder;
 import com.google.zxing.common.reedsolomon.ReedSolomonException;

+import java.io.ByteArrayOutputStream;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;

 /**
@ -60,7 +64,7 @@ public final class Decoder {
  };

  private static final String[] PUNCT_TABLE = {
-      "", "\r", "\r\n", ". ", ", ", ": ", "!", "\"", "#", "$", "%", "&", "'", "(", ")",
+      "FLG(n)", "\r", "\r\n", ". ", ", ", ": ", "!", "\"", "#", "$", "%", "&", "'", "(", ")",
      "*", "+", ",", "-", ".", "/", ":", ";", "<", "=", ">", "?", "[", "]", "{", "}", "CTRL_UL"
  };

@ -68,22 +72,24 @@ public final class Decoder {
      "CTRL_PS", " ", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ",", ".", "CTRL_UL", "CTRL_US"
  };

+  private static final Charset DEFAULT_ENCODING = StandardCharsets.ISO_8859_1;
+
  private AztecDetectorResult ddata;

  public DecoderResult decode(AztecDetectorResult detectorResult) throws FormatException {
    ddata = detectorResult;
    BitMatrix matrix = detectorResult.getBits();
    boolean[] rawbits = extractBits(matrix);
-    boolean[] correctedBits = correctBits(rawbits);
-    byte[] rawBytes = convertBoolArrayToByteArray(correctedBits);
-    String result = getEncodedData(correctedBits);
-    DecoderResult decoderResult = new DecoderResult(rawBytes, result, null, null);
-    decoderResult.setNumBits(correctedBits.length);
+    CorrectedBitsResult correctedBits = correctBits(rawbits);
+    byte[] rawBytes = convertBoolArrayToByteArray(correctedBits.correctBits);
+    String result = getEncodedData(correctedBits.correctBits);
+    DecoderResult decoderResult = new DecoderResult(rawBytes, result, null, String.format("%d%%", correctedBits.ecLevel));
+    decoderResult.setNumBits(correctedBits.correctBits.length);
    return decoderResult;
  }

  // This method is used for testing the high-level encoder
-  public static String highLevelDecode(boolean[] correctedBits) {
+  public static String highLevelDecode(boolean[] correctedBits) throws FormatException {
    return getEncodedData(correctedBits);
  }

@ -92,11 +98,20 @@ public final class Decoder {
   *
   * @return the decoded string
   */
-  private static String getEncodedData(boolean[] correctedBits) {
+  private static String getEncodedData(boolean[] correctedBits) throws FormatException {
    int endIndex = correctedBits.length;
    Table latchTable = Table.UPPER; // table most recently latched to
    Table shiftTable = Table.UPPER; // table to use for the next read
-    StringBuilder result = new StringBuilder(20);
+
+    // Final decoded string result
+    // (correctedBits-5) / 4 is an upper bound on the size (all-digit result)
+    StringBuilder result = new StringBuilder((correctedBits.length - 5) / 4);
+
+    // Intermediary buffer of decoded bytes, which is decoded into a string and flushed
+    // when character encoding changes (ECI) or input ends.
+    ByteArrayOutputStream decodedBytes = new ByteArrayOutputStream();
+    Charset encoding = DEFAULT_ENCODING;
+
    int index = 0;
    while (index < endIndex) {
      if (shiftTable == Table.BINARY) {
@ -118,7 +133,7 @@ public final class Decoder {
            break;
          }
          int code = readCode(correctedBits, index, 8);
-          result.append((char) code);
+          decodedBytes.write((byte) code);
          index += 8;
        }
        // Go back to whatever mode we had been in
@ -131,7 +146,42 @@ public final class Decoder {
        int code = readCode(correctedBits, index, size);
        index += size;
        String str = getCharacter(shiftTable, code);
-        if (str.startsWith("CTRL_")) {
+        if ("FLG(n)".equals(str)) {
+          if (endIndex - index < 3) {
+            break;
+          }
+          int n = readCode(correctedBits, index, 3);
+          index += 3;
+          switch (n) {
+            case 0:
+              result.append((char) 29);  // translate FNC1 as ASCII 29
+              break;
+            case 7:
+              throw FormatException.getFormatInstance(); // FLG(7) is reserved and illegal
+            default:
+              // flush bytes before changing character set
+              result.append(new String(decodedBytes.toByteArray(), encoding));
+              decodedBytes.reset();
+
+              // ECI is decimal integer encoded as 1-6 codes in DIGIT mode
+              int eci = 0;
+              if (endIndex - index < 4 * n) {
+                break;
+              }
+              while (n-- > 0) {
+                int nextDigit = readCode(correctedBits, index, 4);
+                index += 4;
+                if (nextDigit < 2 || nextDigit > 11) {
+                  throw FormatException.getFormatInstance(); // Not a decimal digit
+                }
+                eci = eci * 10 + (nextDigit - 2);
+              }
+              CharacterSetECI charsetECI = CharacterSetECI.getCharacterSetECIByValue(eci);
+              encoding = Charset.forName(charsetECI.name());
+          }
+          // Go back to whatever mode we had been in
+          shiftTable = latchTable;
+        } else if (str.startsWith("CTRL_")) {
          // Table changes
          // ISO/IEC 24778:2008 prescribes ending a shift sequence in the mode from which it was invoked.
          // That's including when that mode is a shift.
@ -142,12 +192,15 @@ public final class Decoder {
            latchTable = shiftTable;
          }
        } else {
-          result.append(str);
+          // Though stored as a table of strings for convenience, codes actually represent 1 or 2 *bytes*.
+          byte[] b = str.getBytes(StandardCharsets.US_ASCII);
+          decodedBytes.write(b, 0, b.length);
          // Go back to whatever mode we had been in
          shiftTable = latchTable;
        }
      }
    }
+    result.append(new String(decodedBytes.toByteArray(), encoding));
    return result.toString();
  }

@ -196,13 +249,23 @@ public final class Decoder {
    }
  }

+  static final class CorrectedBitsResult {
+    private final boolean[] correctBits;
+    private final int ecLevel;
+
+    CorrectedBitsResult(boolean[] correctBits, int ecLevel) {
+      this.correctBits = correctBits;
+      this.ecLevel = ecLevel;
+    }
+  }
+
  /**
   * <p>Performs RS error correction on an array of bits.</p>
   *
   * @return the corrected array
   * @throws FormatException if the input contains too many errors
   */
-  private boolean[] correctBits(boolean[] rawbits) throws FormatException {
+  private CorrectedBitsResult correctBits(boolean[] rawbits) throws FormatException {
    GenericGF gf;
    int codewordSize;

@ -266,7 +329,8 @@ public final class Decoder {
        }
      }
    }
-    return correctedBits;
+
+    return new CorrectedBitsResult(correctedBits, 100 * (numCodewords - numDataCodewords) / numCodewords);
  }

  /**
--- a/core/src/test/java/com/google/zxing/aztec/decoder/DecoderTest.java
+++ b/core/src/test/java/com/google/zxing/aztec/decoder/DecoderTest.java
@ -15,9 +15,12 @@
 */
 package com.google.zxing.aztec.decoder;

+import com.google.zxing.aztec.encoder.EncoderTest;
+
 import com.google.zxing.FormatException;
 import com.google.zxing.ResultPoint;
 import com.google.zxing.aztec.AztecDetectorResult;
+import com.google.zxing.common.BitArray;
 import com.google.zxing.common.BitMatrix;
 import com.google.zxing.common.DecoderResult;
 import org.junit.Test;
@ -29,7 +32,32 @@ import org.junit.Assert;
 public final class DecoderTest extends Assert {

  private static final ResultPoint[] NO_POINTS = new ResultPoint[0];
-  
+
+  @Test
+  public void testHighLevelDecode() throws FormatException {
+      // no ECI codes
+      testHighLevelDecodeString("A. b.",
+          // 'A'  P/S   '. ' L/L    b    D/L    '.'
+          "...X. ..... ...XX XXX.. ...XX XXXX. XX.X");
+
+      // initial ECI code 26 (switch to UTF-8)
+      testHighLevelDecodeString("Ça",
+          // P/S FLG(n) 2  '2'  '6'  B/S   2     0xc3     0x87     L/L   'a'
+          "..... ..... .X. .X.. X... XXXXX ...X. XX....XX X....XXX XXX.. ...X.");
+
+      // initial character without ECI (must be interpreted as ISO_8859_1)
+      // followed by ECI code 26 (= UTF-8) and UTF-8 text
+      testHighLevelDecodeString("±Ça",
+         // B/S 1     0xb1     P/S   FLG(n) 2  '2'  '6'  B/S   2     0xc3     0x87     L/L   'a'
+         "XXXXX ....X X.XX...X ..... ..... .X. .X.. X... XXXXX ...X. XX....XX X....XXX XXX.. ...X.");
+  }
+
+  private static void testHighLevelDecodeString(String expectedString, String b) throws FormatException {
+      BitArray bits = EncoderTest.toBitArray(EncoderTest.stripSpace(b));
+      assertEquals("highLevelDecode() failed for input bits: " + b,
+                   expectedString, Decoder.highLevelDecode(EncoderTest.toBooleanArray(bits)));
+  }
+
  @Test
  public void testAztecResult() throws FormatException {
    BitMatrix matrix = BitMatrix.parse(
@ -68,6 +96,34 @@ public final class DecoderTest extends Assert {
    assertEquals(180, result.getNumBits());
  }

+  @Test
+  public void testAztecResultECI() throws FormatException {
+    BitMatrix matrix = BitMatrix.parse(
+        "      X     X X X   X           X     \n" +
+        "    X X   X X   X X X X X X X   X     \n" +
+        "    X X                         X   X \n" +
+        "  X X X X X X X X X X X X X X X X X   \n" +
+        "      X                       X       \n" +
+        "      X   X X X X X X X X X   X   X   \n" +
+        "  X X X   X               X   X X X   \n" +
+        "  X   X   X   X X X X X   X   X X X   \n" +
+        "      X   X   X       X   X   X X X   \n" +
+        "  X   X   X   X   X   X   X   X   X   \n" +
+        "X   X X   X   X       X   X   X     X \n" +
+        "  X X X   X   X X X X X   X   X X     \n" +
+        "      X   X               X   X X   X \n" +
+        "      X   X X X X X X X X X   X   X X \n" +
+        "  X   X                       X       \n" +
+        "X X   X X X X X X X X X X X X X X X   \n" +
+        "X X     X   X         X X X       X X \n" +
+        "  X   X   X   X X X X X     X X   X   \n" +
+        "X     X       X X   X X X       X     \n",
+        "X ", "  ");
+    AztecDetectorResult r = new AztecDetectorResult(matrix, NO_POINTS, false, 15, 1);
+    DecoderResult result = new Decoder().decode(r);
+    assertEquals("Français", result.getText());
+  }
+
  @Test(expected = FormatException.class)
  public void testDecodeTooManyErrors() throws FormatException {
    BitMatrix matrix = BitMatrix.parse(""
--- a/core/src/test/java/com/google/zxing/aztec/encoder/EncoderTest.java
+++ b/core/src/test/java/com/google/zxing/aztec/encoder/EncoderTest.java
@ -51,7 +51,7 @@ public final class EncoderTest extends Assert {
  // real life tests

  @Test
-  public void testEncode1() {
+  public void testEncode1() throws FormatException {
    testEncode("This is an example Aztec symbol for Wikipedia.", true, 3,
        "X     X X       X     X X     X     X         \n" +
        "X         X     X X     X   X X   X X       X \n" +
@ -79,7 +79,7 @@ public final class EncoderTest extends Assert {
  }

  @Test
-  public void testEncode2() {
+  public void testEncode2() throws FormatException {
    testEncode("Aztec Code is a public domain 2D matrix barcode symbology" +
                " of nominally square symbols built on a square grid with a " +
                "distinctive square bullseye pattern at their center.", false, 6,
@ -277,7 +277,7 @@ public final class EncoderTest extends Assert {
  }

  @Test
-  public void testHighLevelEncode() {
+  public void testHighLevelEncode() throws FormatException {
    testHighLevelEncodeString("A. b.",
        // 'A'  P/S   '. ' L/L    b    D/L    '.'
        "...X. ..... ...XX XXX.. ...XX XXXX. XX.X");
@ -307,7 +307,7 @@ public final class EncoderTest extends Assert {
  }

  @Test
-  public void testHighLevelEncodeBinary() {
+  public void testHighLevelEncodeBinary() throws FormatException {
    // binary short form single byte
    testHighLevelEncodeString("N\0N",
        // 'N'  B/S    =1   '\0'      N
@ -394,7 +394,7 @@ public final class EncoderTest extends Assert {
  }

  @Test
-  public void testHighLevelEncodePairs() {
+  public void testHighLevelEncodePairs() throws FormatException {
    // Typical usage
    testHighLevelEncodeString("ABC. DEF\r\n",
        //  A     B    C    P/S   .<sp>   D    E     F    P/S   \r\n
@ -417,7 +417,7 @@ public final class EncoderTest extends Assert {
  }

  @Test
-  public void testUserSpecifiedLayers() {
+  public void testUserSpecifiedLayers() throws FormatException {
    byte[] alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".getBytes(StandardCharsets.ISO_8859_1);
    AztecCode aztec = Encoder.encode(alphabet, 25, -2);
    assertEquals(2, aztec.getLayers());
@ -443,7 +443,7 @@ public final class EncoderTest extends Assert {
  }

  @Test
-  public void testBorderCompact4Case() {
+  public void testBorderCompact4Case() throws FormatException {
    // Compact(4) con hold 608 bits of information, but at most 504 can be data.  Rest must
    // be error correction
    String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
@ -471,7 +471,7 @@ public final class EncoderTest extends Assert {

  // Helper routines

-  private static void testEncode(String data, boolean compact, int layers, String expected) {
+  private static void testEncode(String data, boolean compact, int layers, String expected) throws FormatException {
    AztecCode aztec = Encoder.encode(data.getBytes(StandardCharsets.ISO_8859_1), 33, Encoder.DEFAULT_AZTEC_LAYERS);
    assertEquals("Unexpected symbol format (compact)", compact, aztec.isCompact());
    assertEquals("Unexpected nr. of layers", layers, aztec.getLayers());
@ -556,7 +556,7 @@ public final class EncoderTest extends Assert {
                 stripSpace(expected), stripSpace(stuffed.toString()));
  }

-  private static BitArray toBitArray(CharSequence bits) {
+  public static BitArray toBitArray(CharSequence bits) {
    BitArray in = new BitArray();
    char[] str = DOTX.matcher(bits).replaceAll("").toCharArray();
    for (char aStr : str) {
@ -565,7 +565,7 @@ public final class EncoderTest extends Assert {
    return in;
  }

-  private static boolean[] toBooleanArray(BitArray bitArray) {
+  public static boolean[] toBooleanArray(BitArray bitArray) {
    boolean[] result = new boolean[bitArray.getSize()];
    for (int i = 0; i < result.length; i++) {
      result[i] = bitArray.get(i);
@ -573,14 +573,14 @@ public final class EncoderTest extends Assert {
    return result;
  }

-  private static void testHighLevelEncodeString(String s, String expectedBits) {
+  private static void testHighLevelEncodeString(String s, String expectedBits) throws FormatException {
    BitArray bits = new HighLevelEncoder(s.getBytes(StandardCharsets.ISO_8859_1)).encode();
    String receivedBits = stripSpace(bits.toString());
    assertEquals("highLevelEncode() failed for input string: " + s, stripSpace(expectedBits), receivedBits);
    assertEquals(s, Decoder.highLevelDecode(toBooleanArray(bits)));
  }

-  private static void testHighLevelEncodeString(String s, int expectedReceivedBits) {
+  private static void testHighLevelEncodeString(String s, int expectedReceivedBits) throws FormatException {
    BitArray bits = new HighLevelEncoder(s.getBytes(StandardCharsets.ISO_8859_1)).encode();
    int receivedBitCount = stripSpace(bits.toString()).length();
    assertEquals("highLevelEncode() failed for input string: " + s,
@ -588,7 +588,7 @@ public final class EncoderTest extends Assert {
    assertEquals(s, Decoder.highLevelDecode(toBooleanArray(bits)));
  }

-  private static String stripSpace(String s) {
+  public static String stripSpace(String s) {
    return SPACES.matcher(s).replaceAll("");
  }

--- a/core/src/test/resources/blackbox/.gitattributes
+++ b/core/src/test/resources/blackbox/.gitattributes
@ -0,0 +1 @@
+* -text