Aztec Code: decode ECI and FNC1 (#1328)

* decode Aztec FLG(n)

Per https://en.wikipedia.org/wiki/Aztec_Code#Character_set,
FLG(0) → FNC1, FLG(1-6) → ECI identifier

Decode FNC1 as 0x1d (ASCII escape character; same as Code128,
DataMatrix, others)

Not doing anything with ECI yet, but this at least fixes
the mis-synchronization of the bit stream that comes from
ignoring the bits that follow it.

* include ecLevel (percentage) in DecoderResult for Aztec

* Aztec decoder: follow character set indicated by ECI codes

Modeled on PDF417's DecodedBitStreamParser

* add testAztecResultECI and testHighLevelDecode to aztec.decoder.DecoderTest

The latter includes non-ECI, ECI, and mixed test cases.
This commit is contained in:
Dan Lenski 2020-10-27 18:04:41 -07:00 committed by GitHub
parent b1c85db64e
commit 6e349275f7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 149 additions and 28 deletions

View file

@ -19,11 +19,15 @@ package com.google.zxing.aztec.decoder;
import com.google.zxing.FormatException;
import com.google.zxing.aztec.AztecDetectorResult;
import com.google.zxing.common.BitMatrix;
import com.google.zxing.common.CharacterSetECI;
import com.google.zxing.common.DecoderResult;
import com.google.zxing.common.reedsolomon.GenericGF;
import com.google.zxing.common.reedsolomon.ReedSolomonDecoder;
import com.google.zxing.common.reedsolomon.ReedSolomonException;
import java.io.ByteArrayOutputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
/**
@ -60,7 +64,7 @@ public final class Decoder {
};
private static final String[] PUNCT_TABLE = {
"", "\r", "\r\n", ". ", ", ", ": ", "!", "\"", "#", "$", "%", "&", "'", "(", ")",
"FLG(n)", "\r", "\r\n", ". ", ", ", ": ", "!", "\"", "#", "$", "%", "&", "'", "(", ")",
"*", "+", ",", "-", ".", "/", ":", ";", "<", "=", ">", "?", "[", "]", "{", "}", "CTRL_UL"
};
@ -68,22 +72,24 @@ public final class Decoder {
"CTRL_PS", " ", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ",", ".", "CTRL_UL", "CTRL_US"
};
private static final Charset DEFAULT_ENCODING = StandardCharsets.ISO_8859_1;
private AztecDetectorResult ddata;
public DecoderResult decode(AztecDetectorResult detectorResult) throws FormatException {
ddata = detectorResult;
BitMatrix matrix = detectorResult.getBits();
boolean[] rawbits = extractBits(matrix);
boolean[] correctedBits = correctBits(rawbits);
byte[] rawBytes = convertBoolArrayToByteArray(correctedBits);
String result = getEncodedData(correctedBits);
DecoderResult decoderResult = new DecoderResult(rawBytes, result, null, null);
decoderResult.setNumBits(correctedBits.length);
CorrectedBitsResult correctedBits = correctBits(rawbits);
byte[] rawBytes = convertBoolArrayToByteArray(correctedBits.correctBits);
String result = getEncodedData(correctedBits.correctBits);
DecoderResult decoderResult = new DecoderResult(rawBytes, result, null, String.format("%d%%", correctedBits.ecLevel));
decoderResult.setNumBits(correctedBits.correctBits.length);
return decoderResult;
}
// This method is used for testing the high-level encoder
public static String highLevelDecode(boolean[] correctedBits) {
public static String highLevelDecode(boolean[] correctedBits) throws FormatException {
return getEncodedData(correctedBits);
}
@ -92,11 +98,20 @@ public final class Decoder {
*
* @return the decoded string
*/
private static String getEncodedData(boolean[] correctedBits) {
private static String getEncodedData(boolean[] correctedBits) throws FormatException {
int endIndex = correctedBits.length;
Table latchTable = Table.UPPER; // table most recently latched to
Table shiftTable = Table.UPPER; // table to use for the next read
StringBuilder result = new StringBuilder(20);
// Final decoded string result
// (correctedBits-5) / 4 is an upper bound on the size (all-digit result)
StringBuilder result = new StringBuilder((correctedBits.length - 5) / 4);
// Intermediary buffer of decoded bytes, which is decoded into a string and flushed
// when character encoding changes (ECI) or input ends.
ByteArrayOutputStream decodedBytes = new ByteArrayOutputStream();
Charset encoding = DEFAULT_ENCODING;
int index = 0;
while (index < endIndex) {
if (shiftTable == Table.BINARY) {
@ -118,7 +133,7 @@ public final class Decoder {
break;
}
int code = readCode(correctedBits, index, 8);
result.append((char) code);
decodedBytes.write((byte) code);
index += 8;
}
// Go back to whatever mode we had been in
@ -131,7 +146,42 @@ public final class Decoder {
int code = readCode(correctedBits, index, size);
index += size;
String str = getCharacter(shiftTable, code);
if (str.startsWith("CTRL_")) {
if ("FLG(n)".equals(str)) {
if (endIndex - index < 3) {
break;
}
int n = readCode(correctedBits, index, 3);
index += 3;
switch (n) {
case 0:
result.append((char) 29); // translate FNC1 as ASCII 29
break;
case 7:
throw FormatException.getFormatInstance(); // FLG(7) is reserved and illegal
default:
// flush bytes before changing character set
result.append(new String(decodedBytes.toByteArray(), encoding));
decodedBytes.reset();
// ECI is decimal integer encoded as 1-6 codes in DIGIT mode
int eci = 0;
if (endIndex - index < 4 * n) {
break;
}
while (n-- > 0) {
int nextDigit = readCode(correctedBits, index, 4);
index += 4;
if (nextDigit < 2 || nextDigit > 11) {
throw FormatException.getFormatInstance(); // Not a decimal digit
}
eci = eci * 10 + (nextDigit - 2);
}
CharacterSetECI charsetECI = CharacterSetECI.getCharacterSetECIByValue(eci);
encoding = Charset.forName(charsetECI.name());
}
// Go back to whatever mode we had been in
shiftTable = latchTable;
} else if (str.startsWith("CTRL_")) {
// Table changes
// ISO/IEC 24778:2008 prescribes ending a shift sequence in the mode from which it was invoked.
// That's including when that mode is a shift.
@ -142,12 +192,15 @@ public final class Decoder {
latchTable = shiftTable;
}
} else {
result.append(str);
// Though stored as a table of strings for convenience, codes actually represent 1 or 2 *bytes*.
byte[] b = str.getBytes(StandardCharsets.US_ASCII);
decodedBytes.write(b, 0, b.length);
// Go back to whatever mode we had been in
shiftTable = latchTable;
}
}
}
result.append(new String(decodedBytes.toByteArray(), encoding));
return result.toString();
}
@ -196,13 +249,23 @@ public final class Decoder {
}
}
static final class CorrectedBitsResult {
private final boolean[] correctBits;
private final int ecLevel;
CorrectedBitsResult(boolean[] correctBits, int ecLevel) {
this.correctBits = correctBits;
this.ecLevel = ecLevel;
}
}
/**
* <p>Performs RS error correction on an array of bits.</p>
*
* @return the corrected array
* @throws FormatException if the input contains too many errors
*/
private boolean[] correctBits(boolean[] rawbits) throws FormatException {
private CorrectedBitsResult correctBits(boolean[] rawbits) throws FormatException {
GenericGF gf;
int codewordSize;
@ -266,7 +329,8 @@ public final class Decoder {
}
}
}
return correctedBits;
return new CorrectedBitsResult(correctedBits, 100 * (numCodewords - numDataCodewords) / numCodewords);
}
/**

View file

@ -15,9 +15,12 @@
*/
package com.google.zxing.aztec.decoder;
import com.google.zxing.aztec.encoder.EncoderTest;
import com.google.zxing.FormatException;
import com.google.zxing.ResultPoint;
import com.google.zxing.aztec.AztecDetectorResult;
import com.google.zxing.common.BitArray;
import com.google.zxing.common.BitMatrix;
import com.google.zxing.common.DecoderResult;
import org.junit.Test;
@ -29,7 +32,32 @@ import org.junit.Assert;
public final class DecoderTest extends Assert {
private static final ResultPoint[] NO_POINTS = new ResultPoint[0];
@Test
public void testHighLevelDecode() throws FormatException {
// no ECI codes
testHighLevelDecodeString("A. b.",
// 'A' P/S '. ' L/L b D/L '.'
"...X. ..... ...XX XXX.. ...XX XXXX. XX.X");
// initial ECI code 26 (switch to UTF-8)
testHighLevelDecodeString("Ça",
// P/S FLG(n) 2 '2' '6' B/S 2 0xc3 0x87 L/L 'a'
"..... ..... .X. .X.. X... XXXXX ...X. XX....XX X....XXX XXX.. ...X.");
// initial character without ECI (must be interpreted as ISO_8859_1)
// followed by ECI code 26 (= UTF-8) and UTF-8 text
testHighLevelDecodeString("±Ça",
// B/S 1 0xb1 P/S FLG(n) 2 '2' '6' B/S 2 0xc3 0x87 L/L 'a'
"XXXXX ....X X.XX...X ..... ..... .X. .X.. X... XXXXX ...X. XX....XX X....XXX XXX.. ...X.");
}
private static void testHighLevelDecodeString(String expectedString, String b) throws FormatException {
BitArray bits = EncoderTest.toBitArray(EncoderTest.stripSpace(b));
assertEquals("highLevelDecode() failed for input bits: " + b,
expectedString, Decoder.highLevelDecode(EncoderTest.toBooleanArray(bits)));
}
@Test
public void testAztecResult() throws FormatException {
BitMatrix matrix = BitMatrix.parse(
@ -68,6 +96,34 @@ public final class DecoderTest extends Assert {
assertEquals(180, result.getNumBits());
}
@Test
public void testAztecResultECI() throws FormatException {
BitMatrix matrix = BitMatrix.parse(
" X X X X X X \n" +
" X X X X X X X X X X X X \n" +
" X X X X \n" +
" X X X X X X X X X X X X X X X X X \n" +
" X X \n" +
" X X X X X X X X X X X X \n" +
" X X X X X X X X \n" +
" X X X X X X X X X X X X \n" +
" X X X X X X X X \n" +
" X X X X X X X X X \n" +
"X X X X X X X X X \n" +
" X X X X X X X X X X X X \n" +
" X X X X X X \n" +
" X X X X X X X X X X X X X \n" +
" X X X \n" +
"X X X X X X X X X X X X X X X X X \n" +
"X X X X X X X X X \n" +
" X X X X X X X X X X X \n" +
"X X X X X X X X \n",
"X ", " ");
AztecDetectorResult r = new AztecDetectorResult(matrix, NO_POINTS, false, 15, 1);
DecoderResult result = new Decoder().decode(r);
assertEquals("Français", result.getText());
}
@Test(expected = FormatException.class)
public void testDecodeTooManyErrors() throws FormatException {
BitMatrix matrix = BitMatrix.parse(""

View file

@ -51,7 +51,7 @@ public final class EncoderTest extends Assert {
// real life tests
@Test
public void testEncode1() {
public void testEncode1() throws FormatException {
testEncode("This is an example Aztec symbol for Wikipedia.", true, 3,
"X X X X X X X X \n" +
"X X X X X X X X X X \n" +
@ -79,7 +79,7 @@ public final class EncoderTest extends Assert {
}
@Test
public void testEncode2() {
public void testEncode2() throws FormatException {
testEncode("Aztec Code is a public domain 2D matrix barcode symbology" +
" of nominally square symbols built on a square grid with a " +
"distinctive square bullseye pattern at their center.", false, 6,
@ -277,7 +277,7 @@ public final class EncoderTest extends Assert {
}
@Test
public void testHighLevelEncode() {
public void testHighLevelEncode() throws FormatException {
testHighLevelEncodeString("A. b.",
// 'A' P/S '. ' L/L b D/L '.'
"...X. ..... ...XX XXX.. ...XX XXXX. XX.X");
@ -307,7 +307,7 @@ public final class EncoderTest extends Assert {
}
@Test
public void testHighLevelEncodeBinary() {
public void testHighLevelEncodeBinary() throws FormatException {
// binary short form single byte
testHighLevelEncodeString("N\0N",
// 'N' B/S =1 '\0' N
@ -394,7 +394,7 @@ public final class EncoderTest extends Assert {
}
@Test
public void testHighLevelEncodePairs() {
public void testHighLevelEncodePairs() throws FormatException {
// Typical usage
testHighLevelEncodeString("ABC. DEF\r\n",
// A B C P/S .<sp> D E F P/S \r\n
@ -417,7 +417,7 @@ public final class EncoderTest extends Assert {
}
@Test
public void testUserSpecifiedLayers() {
public void testUserSpecifiedLayers() throws FormatException {
byte[] alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".getBytes(StandardCharsets.ISO_8859_1);
AztecCode aztec = Encoder.encode(alphabet, 25, -2);
assertEquals(2, aztec.getLayers());
@ -443,7 +443,7 @@ public final class EncoderTest extends Assert {
}
@Test
public void testBorderCompact4Case() {
public void testBorderCompact4Case() throws FormatException {
// Compact(4) con hold 608 bits of information, but at most 504 can be data. Rest must
// be error correction
String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
@ -471,7 +471,7 @@ public final class EncoderTest extends Assert {
// Helper routines
private static void testEncode(String data, boolean compact, int layers, String expected) {
private static void testEncode(String data, boolean compact, int layers, String expected) throws FormatException {
AztecCode aztec = Encoder.encode(data.getBytes(StandardCharsets.ISO_8859_1), 33, Encoder.DEFAULT_AZTEC_LAYERS);
assertEquals("Unexpected symbol format (compact)", compact, aztec.isCompact());
assertEquals("Unexpected nr. of layers", layers, aztec.getLayers());
@ -556,7 +556,7 @@ public final class EncoderTest extends Assert {
stripSpace(expected), stripSpace(stuffed.toString()));
}
private static BitArray toBitArray(CharSequence bits) {
public static BitArray toBitArray(CharSequence bits) {
BitArray in = new BitArray();
char[] str = DOTX.matcher(bits).replaceAll("").toCharArray();
for (char aStr : str) {
@ -565,7 +565,7 @@ public final class EncoderTest extends Assert {
return in;
}
private static boolean[] toBooleanArray(BitArray bitArray) {
public static boolean[] toBooleanArray(BitArray bitArray) {
boolean[] result = new boolean[bitArray.getSize()];
for (int i = 0; i < result.length; i++) {
result[i] = bitArray.get(i);
@ -573,14 +573,14 @@ public final class EncoderTest extends Assert {
return result;
}
private static void testHighLevelEncodeString(String s, String expectedBits) {
private static void testHighLevelEncodeString(String s, String expectedBits) throws FormatException {
BitArray bits = new HighLevelEncoder(s.getBytes(StandardCharsets.ISO_8859_1)).encode();
String receivedBits = stripSpace(bits.toString());
assertEquals("highLevelEncode() failed for input string: " + s, stripSpace(expectedBits), receivedBits);
assertEquals(s, Decoder.highLevelDecode(toBooleanArray(bits)));
}
private static void testHighLevelEncodeString(String s, int expectedReceivedBits) {
private static void testHighLevelEncodeString(String s, int expectedReceivedBits) throws FormatException {
BitArray bits = new HighLevelEncoder(s.getBytes(StandardCharsets.ISO_8859_1)).encode();
int receivedBitCount = stripSpace(bits.toString()).length();
assertEquals("highLevelEncode() failed for input string: " + s,
@ -588,7 +588,7 @@ public final class EncoderTest extends Assert {
assertEquals(s, Decoder.highLevelDecode(toBooleanArray(bits)));
}
private static String stripSpace(String s) {
public static String stripSpace(String s) {
return SPACES.matcher(s).replaceAll("");
}

View file

@ -0,0 +1 @@
* -text