Aztec Code: decode ECI and FNC1 (#1328)

* decode Aztec FLG(n)

Per https://en.wikipedia.org/wiki/Aztec_Code#Character_set,
FLG(0) → FNC1, FLG(1-6) → ECI identifier

Decode FNC1 as 0x1d (ASCII escape character; same as Code128,
DataMatrix, others)

Not doing anything with ECI yet, but this at least fixes
the mis-synchronization of the bit stream that comes from
ignoring the bits that follow it.

* include ecLevel (percentage) in DecoderResult for Aztec

* Aztec decoder: follow character set indicated by ECI codes

Modeled on PDF417's DecodedBitStreamParser

* add testAztecResultECI and testHighLevelDecode to aztec.decoder.DecoderTest

The latter includes non-ECI, ECI, and mixed test cases.
This commit is contained in:
Dan Lenski 2020-10-27 18:04:41 -07:00 committed by GitHub
parent b1c85db64e
commit 6e349275f7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 149 additions and 28 deletions

View file

@ -19,11 +19,15 @@ package com.google.zxing.aztec.decoder;
import com.google.zxing.FormatException; import com.google.zxing.FormatException;
import com.google.zxing.aztec.AztecDetectorResult; import com.google.zxing.aztec.AztecDetectorResult;
import com.google.zxing.common.BitMatrix; import com.google.zxing.common.BitMatrix;
import com.google.zxing.common.CharacterSetECI;
import com.google.zxing.common.DecoderResult; import com.google.zxing.common.DecoderResult;
import com.google.zxing.common.reedsolomon.GenericGF; import com.google.zxing.common.reedsolomon.GenericGF;
import com.google.zxing.common.reedsolomon.ReedSolomonDecoder; import com.google.zxing.common.reedsolomon.ReedSolomonDecoder;
import com.google.zxing.common.reedsolomon.ReedSolomonException; import com.google.zxing.common.reedsolomon.ReedSolomonException;
import java.io.ByteArrayOutputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Arrays; import java.util.Arrays;
/** /**
@ -60,7 +64,7 @@ public final class Decoder {
}; };
private static final String[] PUNCT_TABLE = { private static final String[] PUNCT_TABLE = {
"", "\r", "\r\n", ". ", ", ", ": ", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "FLG(n)", "\r", "\r\n", ". ", ", ", ": ", "!", "\"", "#", "$", "%", "&", "'", "(", ")",
"*", "+", ",", "-", ".", "/", ":", ";", "<", "=", ">", "?", "[", "]", "{", "}", "CTRL_UL" "*", "+", ",", "-", ".", "/", ":", ";", "<", "=", ">", "?", "[", "]", "{", "}", "CTRL_UL"
}; };
@ -68,22 +72,24 @@ public final class Decoder {
"CTRL_PS", " ", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ",", ".", "CTRL_UL", "CTRL_US" "CTRL_PS", " ", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ",", ".", "CTRL_UL", "CTRL_US"
}; };
private static final Charset DEFAULT_ENCODING = StandardCharsets.ISO_8859_1;
private AztecDetectorResult ddata; private AztecDetectorResult ddata;
public DecoderResult decode(AztecDetectorResult detectorResult) throws FormatException { public DecoderResult decode(AztecDetectorResult detectorResult) throws FormatException {
ddata = detectorResult; ddata = detectorResult;
BitMatrix matrix = detectorResult.getBits(); BitMatrix matrix = detectorResult.getBits();
boolean[] rawbits = extractBits(matrix); boolean[] rawbits = extractBits(matrix);
boolean[] correctedBits = correctBits(rawbits); CorrectedBitsResult correctedBits = correctBits(rawbits);
byte[] rawBytes = convertBoolArrayToByteArray(correctedBits); byte[] rawBytes = convertBoolArrayToByteArray(correctedBits.correctBits);
String result = getEncodedData(correctedBits); String result = getEncodedData(correctedBits.correctBits);
DecoderResult decoderResult = new DecoderResult(rawBytes, result, null, null); DecoderResult decoderResult = new DecoderResult(rawBytes, result, null, String.format("%d%%", correctedBits.ecLevel));
decoderResult.setNumBits(correctedBits.length); decoderResult.setNumBits(correctedBits.correctBits.length);
return decoderResult; return decoderResult;
} }
// This method is used for testing the high-level encoder // This method is used for testing the high-level encoder
public static String highLevelDecode(boolean[] correctedBits) { public static String highLevelDecode(boolean[] correctedBits) throws FormatException {
return getEncodedData(correctedBits); return getEncodedData(correctedBits);
} }
@ -92,11 +98,20 @@ public final class Decoder {
* *
* @return the decoded string * @return the decoded string
*/ */
private static String getEncodedData(boolean[] correctedBits) { private static String getEncodedData(boolean[] correctedBits) throws FormatException {
int endIndex = correctedBits.length; int endIndex = correctedBits.length;
Table latchTable = Table.UPPER; // table most recently latched to Table latchTable = Table.UPPER; // table most recently latched to
Table shiftTable = Table.UPPER; // table to use for the next read Table shiftTable = Table.UPPER; // table to use for the next read
StringBuilder result = new StringBuilder(20);
// Final decoded string result
// (correctedBits-5) / 4 is an upper bound on the size (all-digit result)
StringBuilder result = new StringBuilder((correctedBits.length - 5) / 4);
// Intermediary buffer of decoded bytes, which is decoded into a string and flushed
// when character encoding changes (ECI) or input ends.
ByteArrayOutputStream decodedBytes = new ByteArrayOutputStream();
Charset encoding = DEFAULT_ENCODING;
int index = 0; int index = 0;
while (index < endIndex) { while (index < endIndex) {
if (shiftTable == Table.BINARY) { if (shiftTable == Table.BINARY) {
@ -118,7 +133,7 @@ public final class Decoder {
break; break;
} }
int code = readCode(correctedBits, index, 8); int code = readCode(correctedBits, index, 8);
result.append((char) code); decodedBytes.write((byte) code);
index += 8; index += 8;
} }
// Go back to whatever mode we had been in // Go back to whatever mode we had been in
@ -131,7 +146,42 @@ public final class Decoder {
int code = readCode(correctedBits, index, size); int code = readCode(correctedBits, index, size);
index += size; index += size;
String str = getCharacter(shiftTable, code); String str = getCharacter(shiftTable, code);
if (str.startsWith("CTRL_")) { if ("FLG(n)".equals(str)) {
if (endIndex - index < 3) {
break;
}
int n = readCode(correctedBits, index, 3);
index += 3;
switch (n) {
case 0:
result.append((char) 29); // translate FNC1 as ASCII 29
break;
case 7:
throw FormatException.getFormatInstance(); // FLG(7) is reserved and illegal
default:
// flush bytes before changing character set
result.append(new String(decodedBytes.toByteArray(), encoding));
decodedBytes.reset();
// ECI is decimal integer encoded as 1-6 codes in DIGIT mode
int eci = 0;
if (endIndex - index < 4 * n) {
break;
}
while (n-- > 0) {
int nextDigit = readCode(correctedBits, index, 4);
index += 4;
if (nextDigit < 2 || nextDigit > 11) {
throw FormatException.getFormatInstance(); // Not a decimal digit
}
eci = eci * 10 + (nextDigit - 2);
}
CharacterSetECI charsetECI = CharacterSetECI.getCharacterSetECIByValue(eci);
encoding = Charset.forName(charsetECI.name());
}
// Go back to whatever mode we had been in
shiftTable = latchTable;
} else if (str.startsWith("CTRL_")) {
// Table changes // Table changes
// ISO/IEC 24778:2008 prescribes ending a shift sequence in the mode from which it was invoked. // ISO/IEC 24778:2008 prescribes ending a shift sequence in the mode from which it was invoked.
// That's including when that mode is a shift. // That's including when that mode is a shift.
@ -142,12 +192,15 @@ public final class Decoder {
latchTable = shiftTable; latchTable = shiftTable;
} }
} else { } else {
result.append(str); // Though stored as a table of strings for convenience, codes actually represent 1 or 2 *bytes*.
byte[] b = str.getBytes(StandardCharsets.US_ASCII);
decodedBytes.write(b, 0, b.length);
// Go back to whatever mode we had been in // Go back to whatever mode we had been in
shiftTable = latchTable; shiftTable = latchTable;
} }
} }
} }
result.append(new String(decodedBytes.toByteArray(), encoding));
return result.toString(); return result.toString();
} }
@ -196,13 +249,23 @@ public final class Decoder {
} }
} }
static final class CorrectedBitsResult {
private final boolean[] correctBits;
private final int ecLevel;
CorrectedBitsResult(boolean[] correctBits, int ecLevel) {
this.correctBits = correctBits;
this.ecLevel = ecLevel;
}
}
/** /**
* <p>Performs RS error correction on an array of bits.</p> * <p>Performs RS error correction on an array of bits.</p>
* *
* @return the corrected array * @return the corrected array
* @throws FormatException if the input contains too many errors * @throws FormatException if the input contains too many errors
*/ */
private boolean[] correctBits(boolean[] rawbits) throws FormatException { private CorrectedBitsResult correctBits(boolean[] rawbits) throws FormatException {
GenericGF gf; GenericGF gf;
int codewordSize; int codewordSize;
@ -266,7 +329,8 @@ public final class Decoder {
} }
} }
} }
return correctedBits;
return new CorrectedBitsResult(correctedBits, 100 * (numCodewords - numDataCodewords) / numCodewords);
} }
/** /**

View file

@ -15,9 +15,12 @@
*/ */
package com.google.zxing.aztec.decoder; package com.google.zxing.aztec.decoder;
import com.google.zxing.aztec.encoder.EncoderTest;
import com.google.zxing.FormatException; import com.google.zxing.FormatException;
import com.google.zxing.ResultPoint; import com.google.zxing.ResultPoint;
import com.google.zxing.aztec.AztecDetectorResult; import com.google.zxing.aztec.AztecDetectorResult;
import com.google.zxing.common.BitArray;
import com.google.zxing.common.BitMatrix; import com.google.zxing.common.BitMatrix;
import com.google.zxing.common.DecoderResult; import com.google.zxing.common.DecoderResult;
import org.junit.Test; import org.junit.Test;
@ -30,6 +33,31 @@ public final class DecoderTest extends Assert {
private static final ResultPoint[] NO_POINTS = new ResultPoint[0]; private static final ResultPoint[] NO_POINTS = new ResultPoint[0];
@Test
public void testHighLevelDecode() throws FormatException {
// no ECI codes
testHighLevelDecodeString("A. b.",
// 'A' P/S '. ' L/L b D/L '.'
"...X. ..... ...XX XXX.. ...XX XXXX. XX.X");
// initial ECI code 26 (switch to UTF-8)
testHighLevelDecodeString("Ça",
// P/S FLG(n) 2 '2' '6' B/S 2 0xc3 0x87 L/L 'a'
"..... ..... .X. .X.. X... XXXXX ...X. XX....XX X....XXX XXX.. ...X.");
// initial character without ECI (must be interpreted as ISO_8859_1)
// followed by ECI code 26 (= UTF-8) and UTF-8 text
testHighLevelDecodeString("±Ça",
// B/S 1 0xb1 P/S FLG(n) 2 '2' '6' B/S 2 0xc3 0x87 L/L 'a'
"XXXXX ....X X.XX...X ..... ..... .X. .X.. X... XXXXX ...X. XX....XX X....XXX XXX.. ...X.");
}
private static void testHighLevelDecodeString(String expectedString, String b) throws FormatException {
BitArray bits = EncoderTest.toBitArray(EncoderTest.stripSpace(b));
assertEquals("highLevelDecode() failed for input bits: " + b,
expectedString, Decoder.highLevelDecode(EncoderTest.toBooleanArray(bits)));
}
@Test @Test
public void testAztecResult() throws FormatException { public void testAztecResult() throws FormatException {
BitMatrix matrix = BitMatrix.parse( BitMatrix matrix = BitMatrix.parse(
@ -68,6 +96,34 @@ public final class DecoderTest extends Assert {
assertEquals(180, result.getNumBits()); assertEquals(180, result.getNumBits());
} }
@Test
public void testAztecResultECI() throws FormatException {
BitMatrix matrix = BitMatrix.parse(
" X X X X X X \n" +
" X X X X X X X X X X X X \n" +
" X X X X \n" +
" X X X X X X X X X X X X X X X X X \n" +
" X X \n" +
" X X X X X X X X X X X X \n" +
" X X X X X X X X \n" +
" X X X X X X X X X X X X \n" +
" X X X X X X X X \n" +
" X X X X X X X X X \n" +
"X X X X X X X X X \n" +
" X X X X X X X X X X X X \n" +
" X X X X X X \n" +
" X X X X X X X X X X X X X \n" +
" X X X \n" +
"X X X X X X X X X X X X X X X X X \n" +
"X X X X X X X X X \n" +
" X X X X X X X X X X X \n" +
"X X X X X X X X \n",
"X ", " ");
AztecDetectorResult r = new AztecDetectorResult(matrix, NO_POINTS, false, 15, 1);
DecoderResult result = new Decoder().decode(r);
assertEquals("Français", result.getText());
}
@Test(expected = FormatException.class) @Test(expected = FormatException.class)
public void testDecodeTooManyErrors() throws FormatException { public void testDecodeTooManyErrors() throws FormatException {
BitMatrix matrix = BitMatrix.parse("" BitMatrix matrix = BitMatrix.parse(""

View file

@ -51,7 +51,7 @@ public final class EncoderTest extends Assert {
// real life tests // real life tests
@Test @Test
public void testEncode1() { public void testEncode1() throws FormatException {
testEncode("This is an example Aztec symbol for Wikipedia.", true, 3, testEncode("This is an example Aztec symbol for Wikipedia.", true, 3,
"X X X X X X X X \n" + "X X X X X X X X \n" +
"X X X X X X X X X X \n" + "X X X X X X X X X X \n" +
@ -79,7 +79,7 @@ public final class EncoderTest extends Assert {
} }
@Test @Test
public void testEncode2() { public void testEncode2() throws FormatException {
testEncode("Aztec Code is a public domain 2D matrix barcode symbology" + testEncode("Aztec Code is a public domain 2D matrix barcode symbology" +
" of nominally square symbols built on a square grid with a " + " of nominally square symbols built on a square grid with a " +
"distinctive square bullseye pattern at their center.", false, 6, "distinctive square bullseye pattern at their center.", false, 6,
@ -277,7 +277,7 @@ public final class EncoderTest extends Assert {
} }
@Test @Test
public void testHighLevelEncode() { public void testHighLevelEncode() throws FormatException {
testHighLevelEncodeString("A. b.", testHighLevelEncodeString("A. b.",
// 'A' P/S '. ' L/L b D/L '.' // 'A' P/S '. ' L/L b D/L '.'
"...X. ..... ...XX XXX.. ...XX XXXX. XX.X"); "...X. ..... ...XX XXX.. ...XX XXXX. XX.X");
@ -307,7 +307,7 @@ public final class EncoderTest extends Assert {
} }
@Test @Test
public void testHighLevelEncodeBinary() { public void testHighLevelEncodeBinary() throws FormatException {
// binary short form single byte // binary short form single byte
testHighLevelEncodeString("N\0N", testHighLevelEncodeString("N\0N",
// 'N' B/S =1 '\0' N // 'N' B/S =1 '\0' N
@ -394,7 +394,7 @@ public final class EncoderTest extends Assert {
} }
@Test @Test
public void testHighLevelEncodePairs() { public void testHighLevelEncodePairs() throws FormatException {
// Typical usage // Typical usage
testHighLevelEncodeString("ABC. DEF\r\n", testHighLevelEncodeString("ABC. DEF\r\n",
// A B C P/S .<sp> D E F P/S \r\n // A B C P/S .<sp> D E F P/S \r\n
@ -417,7 +417,7 @@ public final class EncoderTest extends Assert {
} }
@Test @Test
public void testUserSpecifiedLayers() { public void testUserSpecifiedLayers() throws FormatException {
byte[] alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".getBytes(StandardCharsets.ISO_8859_1); byte[] alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".getBytes(StandardCharsets.ISO_8859_1);
AztecCode aztec = Encoder.encode(alphabet, 25, -2); AztecCode aztec = Encoder.encode(alphabet, 25, -2);
assertEquals(2, aztec.getLayers()); assertEquals(2, aztec.getLayers());
@ -443,7 +443,7 @@ public final class EncoderTest extends Assert {
} }
@Test @Test
public void testBorderCompact4Case() { public void testBorderCompact4Case() throws FormatException {
// Compact(4) con hold 608 bits of information, but at most 504 can be data. Rest must // Compact(4) con hold 608 bits of information, but at most 504 can be data. Rest must
// be error correction // be error correction
String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
@ -471,7 +471,7 @@ public final class EncoderTest extends Assert {
// Helper routines // Helper routines
private static void testEncode(String data, boolean compact, int layers, String expected) { private static void testEncode(String data, boolean compact, int layers, String expected) throws FormatException {
AztecCode aztec = Encoder.encode(data.getBytes(StandardCharsets.ISO_8859_1), 33, Encoder.DEFAULT_AZTEC_LAYERS); AztecCode aztec = Encoder.encode(data.getBytes(StandardCharsets.ISO_8859_1), 33, Encoder.DEFAULT_AZTEC_LAYERS);
assertEquals("Unexpected symbol format (compact)", compact, aztec.isCompact()); assertEquals("Unexpected symbol format (compact)", compact, aztec.isCompact());
assertEquals("Unexpected nr. of layers", layers, aztec.getLayers()); assertEquals("Unexpected nr. of layers", layers, aztec.getLayers());
@ -556,7 +556,7 @@ public final class EncoderTest extends Assert {
stripSpace(expected), stripSpace(stuffed.toString())); stripSpace(expected), stripSpace(stuffed.toString()));
} }
private static BitArray toBitArray(CharSequence bits) { public static BitArray toBitArray(CharSequence bits) {
BitArray in = new BitArray(); BitArray in = new BitArray();
char[] str = DOTX.matcher(bits).replaceAll("").toCharArray(); char[] str = DOTX.matcher(bits).replaceAll("").toCharArray();
for (char aStr : str) { for (char aStr : str) {
@ -565,7 +565,7 @@ public final class EncoderTest extends Assert {
return in; return in;
} }
private static boolean[] toBooleanArray(BitArray bitArray) { public static boolean[] toBooleanArray(BitArray bitArray) {
boolean[] result = new boolean[bitArray.getSize()]; boolean[] result = new boolean[bitArray.getSize()];
for (int i = 0; i < result.length; i++) { for (int i = 0; i < result.length; i++) {
result[i] = bitArray.get(i); result[i] = bitArray.get(i);
@ -573,14 +573,14 @@ public final class EncoderTest extends Assert {
return result; return result;
} }
private static void testHighLevelEncodeString(String s, String expectedBits) { private static void testHighLevelEncodeString(String s, String expectedBits) throws FormatException {
BitArray bits = new HighLevelEncoder(s.getBytes(StandardCharsets.ISO_8859_1)).encode(); BitArray bits = new HighLevelEncoder(s.getBytes(StandardCharsets.ISO_8859_1)).encode();
String receivedBits = stripSpace(bits.toString()); String receivedBits = stripSpace(bits.toString());
assertEquals("highLevelEncode() failed for input string: " + s, stripSpace(expectedBits), receivedBits); assertEquals("highLevelEncode() failed for input string: " + s, stripSpace(expectedBits), receivedBits);
assertEquals(s, Decoder.highLevelDecode(toBooleanArray(bits))); assertEquals(s, Decoder.highLevelDecode(toBooleanArray(bits)));
} }
private static void testHighLevelEncodeString(String s, int expectedReceivedBits) { private static void testHighLevelEncodeString(String s, int expectedReceivedBits) throws FormatException {
BitArray bits = new HighLevelEncoder(s.getBytes(StandardCharsets.ISO_8859_1)).encode(); BitArray bits = new HighLevelEncoder(s.getBytes(StandardCharsets.ISO_8859_1)).encode();
int receivedBitCount = stripSpace(bits.toString()).length(); int receivedBitCount = stripSpace(bits.toString()).length();
assertEquals("highLevelEncode() failed for input string: " + s, assertEquals("highLevelEncode() failed for input string: " + s,
@ -588,7 +588,7 @@ public final class EncoderTest extends Assert {
assertEquals(s, Decoder.highLevelDecode(toBooleanArray(bits))); assertEquals(s, Decoder.highLevelDecode(toBooleanArray(bits)));
} }
private static String stripSpace(String s) { public static String stripSpace(String s) {
return SPACES.matcher(s).replaceAll(""); return SPACES.matcher(s).replaceAll("");
} }

View file

@ -0,0 +1 @@
* -text