Aztec encode with ECI for non-default character sets (#1330)

* Aztec encoder: add ECI codes according to character set

Added redundant methods to avoid modifying existing tests.

* fix testAztecWriter

- ISO-8859-1 cannot actually encode Euro symbol ('€'); this test case only
  passed before because the Decoder wasn't actually doing the bytes→String
  decode, but simply round-tripping an unknown byte.
- Add extra test cases for implicit ISO-8859-1 (without ECI code), explicit
  ISO-8559-1 (with ECI code), and Shift_JIS

* remove unnecessary conversion between String and byte[] in Aztec EncoderTest and DetectorTest

* Aztec DecoderTest: use constants for charsets

* Aztec Code: remove unnecessary conversion between Charset and Charset.name() strings

* PDF417, QR, DataMatrix: remove unnecessary conversion between Charset and Charset.name() strings

Includes replacing StringUtils.guessEncoding() with .guessCharset(), to return
Charset rather than String.

This change makes the tacit assumption that Shift_JIS charset *will* be
available.  There are existing comments suggesting that it might not always
be available… but the existing *tests* assume it will be.
This commit is contained in:
Dan Lenski 2020-11-07 14:26:24 -08:00 committed by GitHub
parent 28d339e67d
commit 515688992b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 244 additions and 129 deletions

View file

@ -24,7 +24,6 @@ import com.google.zxing.aztec.encoder.Encoder;
import com.google.zxing.common.BitMatrix;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Map;
/**
@ -39,7 +38,7 @@ public final class AztecWriter implements Writer {
@Override
public BitMatrix encode(String contents, BarcodeFormat format, int width, int height, Map<EncodeHintType,?> hints) {
Charset charset = StandardCharsets.ISO_8859_1;
Charset charset = null; // Do not add any ECI code by default
int eccPercent = Encoder.DEFAULT_EC_PERCENT;
int layers = Encoder.DEFAULT_AZTEC_LAYERS;
if (hints != null) {
@ -62,7 +61,7 @@ public final class AztecWriter implements Writer {
if (format != BarcodeFormat.AZTEC) {
throw new IllegalArgumentException("Can only encode AZTEC, but got " + format);
}
AztecCode aztec = Encoder.encode(contents.getBytes(charset), eccPercent, layers);
AztecCode aztec = Encoder.encode(contents, eccPercent, layers, charset);
return renderResult(aztec, width, height);
}

View file

@ -177,7 +177,7 @@ public final class Decoder {
eci = eci * 10 + (nextDigit - 2);
}
CharacterSetECI charsetECI = CharacterSetECI.getCharacterSetECIByValue(eci);
encoding = Charset.forName(charsetECI.name());
encoding = charsetECI.getCharset();
}
// Go back to whatever mode we had been in
shiftTable = latchTable;

View file

@ -21,6 +21,9 @@ import com.google.zxing.common.BitMatrix;
import com.google.zxing.common.reedsolomon.GenericGF;
import com.google.zxing.common.reedsolomon.ReedSolomonEncoder;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
/**
* Generates Aztec 2D barcodes.
*
@ -42,13 +45,66 @@ public final class Encoder {
}
/**
* Encodes the given binary content as an Aztec symbol
* Encodes the given string content as an Aztec symbol (without ECI code)
*
* @param data input data string; must be encodable as ISO/IEC 8859-1 (Latin-1)
* @return Aztec symbol matrix with metadata
*/
public static AztecCode encode(String data) {
return encode(data.getBytes(StandardCharsets.ISO_8859_1));
}
/**
* Encodes the given string content as an Aztec symbol (without ECI code)
*
* @param data input data string; must be encodable as ISO/IEC 8859-1 (Latin-1)
* @param minECCPercent minimal percentage of error check words (According to ISO/IEC 24778:2008,
* a minimum of 23% + 3 words is recommended)
* @param userSpecifiedLayers if non-zero, a user-specified value for the number of layers
* @return Aztec symbol matrix with metadata
*/
public static AztecCode encode(String data, int minECCPercent, int userSpecifiedLayers) {
return encode(data.getBytes(StandardCharsets.ISO_8859_1), minECCPercent, userSpecifiedLayers, null);
}
/**
* Encodes the given string content as an Aztec symbol
*
* @param data input data string
* @param minECCPercent minimal percentage of error check words (According to ISO/IEC 24778:2008,
* a minimum of 23% + 3 words is recommended)
* @param userSpecifiedLayers if non-zero, a user-specified value for the number of layers
* @param charset character set in which to encode string using ECI; if null, no ECI code
* will be inserted, and the string must be encodable as ISO/IEC 8859-1
* (Latin-1), the default encoding of the symbol.
* @return Aztec symbol matrix with metadata
*/
public static AztecCode encode(String data, int minECCPercent, int userSpecifiedLayers, Charset charset) {
byte[] bytes = data.getBytes(null != charset ? charset : StandardCharsets.ISO_8859_1);
return encode(bytes, minECCPercent, userSpecifiedLayers, charset);
}
/**
* Encodes the given binary content as an Aztec symbol (without ECI code)
*
* @param data input data string
* @return Aztec symbol matrix with metadata
*/
public static AztecCode encode(byte[] data) {
return encode(data, DEFAULT_EC_PERCENT, DEFAULT_AZTEC_LAYERS);
return encode(data, DEFAULT_EC_PERCENT, DEFAULT_AZTEC_LAYERS, null);
}
/**
* Encodes the given binary content as an Aztec symbol (without ECI code)
*
* @param data input data string
* @param minECCPercent minimal percentage of error check words (According to ISO/IEC 24778:2008,
* a minimum of 23% + 3 words is recommended)
* @param userSpecifiedLayers if non-zero, a user-specified value for the number of layers
* @return Aztec symbol matrix with metadata
*/
public static AztecCode encode(byte[] data, int minECCPercent, int userSpecifiedLayers) {
return encode(data, minECCPercent, userSpecifiedLayers, null);
}
/**
@ -58,11 +114,13 @@ public final class Encoder {
* @param minECCPercent minimal percentage of error check words (According to ISO/IEC 24778:2008,
* a minimum of 23% + 3 words is recommended)
* @param userSpecifiedLayers if non-zero, a user-specified value for the number of layers
* @param charset character set to mark using ECI; if null, no ECI code will be inserted, and the
* default encoding of ISO/IEC 8859-1 will be assuming by readers.
* @return Aztec symbol matrix with metadata
*/
public static AztecCode encode(byte[] data, int minECCPercent, int userSpecifiedLayers) {
public static AztecCode encode(byte[] data, int minECCPercent, int userSpecifiedLayers, Charset charset) {
// High-level encode
BitArray bits = new HighLevelEncoder(data).encode();
BitArray bits = new HighLevelEncoder(data, charset).encode();
// stuff bits and choose symbol size
int eccBits = bits.getSize() * minECCPercent / 100 + 11;

View file

@ -17,6 +17,9 @@
package com.google.zxing.aztec.encoder;
import com.google.zxing.common.BitArray;
import com.google.zxing.common.CharacterSetECI;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Collection;
@ -148,16 +151,31 @@ public final class HighLevelEncoder {
}
private final byte[] text;
private final Charset charset;
public HighLevelEncoder(byte[] text) {
this.text = text;
this.charset = null;
}
public HighLevelEncoder(byte[] text, Charset charset) {
this.text = text;
this.charset = charset;
}
/**
* @return text represented by this encoder encoded as a {@link BitArray}
*/
public BitArray encode() {
Collection<State> states = Collections.singletonList(State.INITIAL_STATE);
State initialState = State.INITIAL_STATE;
if (charset != null) {
CharacterSetECI eci = CharacterSetECI.getCharacterSetECI(charset);
if (null == eci) {
throw new IllegalArgumentException("No ECI code for character set " + charset.toString());
}
initialState = initialState.appendFLGn(eci.getValue());
}
Collection<State> states = Collections.singletonList(initialState);
for (int index = 0; index < text.length; index++) {
int pairCode;
int nextChar = index + 1 < text.length ? text[index + 1] : 0;

View file

@ -16,6 +16,8 @@
package com.google.zxing.aztec.encoder;
import java.nio.charset.StandardCharsets;
import java.util.Deque;
import java.util.LinkedList;
@ -70,6 +72,25 @@ final class State {
return bitCount;
}
State appendFLGn(int eci) {
State result = shiftAndAppend(HighLevelEncoder.MODE_PUNCT, 0); // 0: FLG(n)
Token token = result.token;
int bitsAdded = 3;
if (eci < 0) {
token = token.add(0, 3); // 0: FNC1
} else if (eci > 999999) {
throw new IllegalArgumentException("ECI code must be between 0 and 999999");
} else {
byte[] eciDigits = Integer.toString(eci).getBytes(StandardCharsets.ISO_8859_1);
token = token.add(eciDigits.length, 3); // 1-6: number of ECI digits
for (int ii = 0; ii < eciDigits.length; ii++) {
token = token.add(eciDigits[ii] - '0' + 2, 4);
}
bitsAdded += eciDigits.length * 4;
}
return new State(token, mode, 0, bitCount + bitsAdded);
}
// Create a new state representing this state with a latch to a (not
// necessary different) mode, and then a code.
State latchAndAppend(int mode, int value) {
@ -143,7 +164,7 @@ final class State {
newModeBitCount += calculateBinaryShiftCost(other) - calculateBinaryShiftCost(this);
} else if (this.binaryShiftByteCount > other.binaryShiftByteCount && other.binaryShiftByteCount > 0) {
// maximum possible additional cost (we end up exceeding the 31 byte boundary and other state can stay beneath it)
newModeBitCount += 10;
newModeBitCount += 10;
}
return newModeBitCount <= other.bitCount;
}
@ -168,7 +189,7 @@ final class State {
public String toString() {
return String.format("%s bits=%d bytes=%d", HighLevelEncoder.MODE_NAMES[mode], bitCount, binaryShiftByteCount);
}
private static int calculateBinaryShiftCost(State state) {
if (state.binaryShiftByteCount > 62) {
return 21; // B/S with extended length

View file

@ -18,6 +18,8 @@ package com.google.zxing.common;
import com.google.zxing.FormatException;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map;
@ -93,6 +95,19 @@ public enum CharacterSetECI {
return values[0];
}
public Charset getCharset() {
return Charset.forName(name());
}
/**
* @param charset Java character set object
* @return CharacterSetECI representing ECI for character encoding, or null if it is legal
* but unsupported
*/
public static CharacterSetECI getCharacterSetECI(Charset charset) {
return NAME_TO_ECI.get(charset.name());
}
/**
* @param value character set ECI value
* @return {@code CharacterSetECI} representing ECI of given value, or null if it is legal but

View file

@ -17,6 +17,7 @@
package com.google.zxing.common;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import com.google.zxing.DecodeHintType;
@ -29,15 +30,17 @@ import com.google.zxing.DecodeHintType;
*/
public final class StringUtils {
private static final String PLATFORM_DEFAULT_ENCODING = Charset.defaultCharset().name();
private static final Charset PLATFORM_DEFAULT_ENCODING = Charset.defaultCharset();
public static final Charset SHIFT_JIS_CHARSET = Charset.forName("SJIS");
public static final Charset GB2312_CHARSET = Charset.forName("GB2312");
private static final Charset EUC_JP = Charset.forName("EUC_JP");
private static final boolean ASSUME_SHIFT_JIS =
SHIFT_JIS_CHARSET.equals(PLATFORM_DEFAULT_ENCODING) ||
EUC_JP.equals(PLATFORM_DEFAULT_ENCODING);
// Retained for ABI compatibility with earlier versions
public static final String SHIFT_JIS = "SJIS";
public static final String GB2312 = "GB2312";
private static final String EUC_JP = "EUC_JP";
private static final String UTF8 = "UTF8";
private static final String ISO88591 = "ISO8859_1";
private static final boolean ASSUME_SHIFT_JIS =
SHIFT_JIS.equalsIgnoreCase(PLATFORM_DEFAULT_ENCODING) ||
EUC_JP.equalsIgnoreCase(PLATFORM_DEFAULT_ENCODING);
private StringUtils() { }
@ -45,12 +48,32 @@ public final class StringUtils {
* @param bytes bytes encoding a string, whose encoding should be guessed
* @param hints decode hints if applicable
* @return name of guessed encoding; at the moment will only guess one of:
* {@link #SHIFT_JIS}, {@link #UTF8}, {@link #ISO88591}, or the platform
* default encoding if none of these can possibly be correct
* "SJIS", "UTF8", "ISO8859_1", or the platform default encoding if none
* of these can possibly be correct
*/
public static String guessEncoding(byte[] bytes, Map<DecodeHintType,?> hints) {
Charset c = guessCharset(bytes, hints);
if (c == SHIFT_JIS_CHARSET) {
return "SJIS";
} else if (c == StandardCharsets.UTF_8) {
return "UTF8";
} else if (c == StandardCharsets.ISO_8859_1) {
return "ISO8859_1";
}
return c.name();
}
/**
* @param bytes bytes encoding a string, whose encoding should be guessed
* @param hints decode hints if applicable
* @return Charset of guessed encoding; at the moment will only guess one of:
* {@link #SHIFT_JIS_CHARSET}, {@link StandardCharsets#UTF_8},
* {@link StandardCharsets#ISO_8859_1}, or the platform default encoding if
* none of these can possibly be correct
*/
public static Charset guessCharset(byte[] bytes, Map<DecodeHintType,?> hints) {
if (hints != null && hints.containsKey(DecodeHintType.CHARACTER_SET)) {
return hints.get(DecodeHintType.CHARACTER_SET).toString();
return Charset.forName(hints.get(DecodeHintType.CHARACTER_SET).toString());
}
// For now, merely tries to distinguish ISO-8859-1, UTF-8 and Shift_JIS,
// which should be by far the most common encodings.
@ -164,11 +187,11 @@ public final class StringUtils {
// Easy -- if there is BOM or at least 1 valid not-single byte character (and no evidence it can't be UTF-8), done
if (canBeUTF8 && (utf8bom || utf2BytesChars + utf3BytesChars + utf4BytesChars > 0)) {
return UTF8;
return StandardCharsets.UTF_8;
}
// Easy -- if assuming Shift_JIS or >= 3 valid consecutive not-ascii characters (and no evidence it can't be), done
if (canBeShiftJIS && (ASSUME_SHIFT_JIS || sjisMaxKatakanaWordLength >= 3 || sjisMaxDoubleBytesWordLength >= 3)) {
return SHIFT_JIS;
return SHIFT_JIS_CHARSET;
}
// Distinguishing Shift_JIS and ISO-8859-1 can be a little tough for short words. The crude heuristic is:
// - If we saw
@ -177,18 +200,18 @@ public final class StringUtils {
// - then we conclude Shift_JIS, else ISO-8859-1
if (canBeISO88591 && canBeShiftJIS) {
return (sjisMaxKatakanaWordLength == 2 && sjisKatakanaChars == 2) || isoHighOther * 10 >= length
? SHIFT_JIS : ISO88591;
? SHIFT_JIS_CHARSET : StandardCharsets.ISO_8859_1;
}
// Otherwise, try in order ISO-8859-1, Shift JIS, UTF-8 and fall back to default platform encoding
if (canBeISO88591) {
return ISO88591;
return StandardCharsets.ISO_8859_1;
}
if (canBeShiftJIS) {
return SHIFT_JIS;
return SHIFT_JIS_CHARSET;
}
if (canBeUTF8) {
return UTF8;
return StandardCharsets.UTF_8;
}
// Otherwise, we take a wild guess with platform encoding
return PLATFORM_DEFAULT_ENCODING;

View file

@ -20,7 +20,7 @@ import com.google.zxing.FormatException;
import com.google.zxing.common.BitSource;
import com.google.zxing.common.DecoderResult;
import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
@ -505,11 +505,7 @@ final class DecodedBitStreamParser {
bytes[i] = (byte) unrandomize255State(bits.readBits(8), codewordPosition++);
}
byteSegments.add(bytes);
try {
result.append(new String(bytes, "ISO8859_1"));
} catch (UnsupportedEncodingException uee) {
throw new IllegalStateException("Platform does not support required encoding: " + uee);
}
result.append(new String(bytes, StandardCharsets.ISO_8859_1));
}
/**

View file

@ -125,7 +125,7 @@ final class DecodedBitStreamParser {
case ECI_CHARSET:
CharacterSetECI charsetECI =
CharacterSetECI.getCharacterSetECIByValue(codewords[codeIndex++]);
encoding = Charset.forName(charsetECI.name());
encoding = charsetECI.getCharset();
break;
case ECI_GENERAL_PURPOSE:
// Can't do anything with generic ECI; skip its 2 characters

View file

@ -169,7 +169,7 @@ final class PDF417HighLevelEncoder {
if (encoding == null) {
encoding = DEFAULT_ENCODING;
} else if (!DEFAULT_ENCODING.equals(encoding)) {
CharacterSetECI eci = CharacterSetECI.getCharacterSetECIByName(encoding.name());
CharacterSetECI eci = CharacterSetECI.getCharacterSetECI(encoding);
if (eci != null) {
encodingECI(eci.getValue(), sb);
}

View file

@ -23,7 +23,7 @@ import com.google.zxing.common.CharacterSetECI;
import com.google.zxing.common.DecoderResult;
import com.google.zxing.common.StringUtils;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
@ -173,11 +173,7 @@ final class DecodedBitStreamParser {
count--;
}
try {
result.append(new String(buffer, StringUtils.GB2312));
} catch (UnsupportedEncodingException ignored) {
throw FormatException.getFormatInstance();
}
result.append(new String(buffer, StringUtils.GB2312_CHARSET));
}
private static void decodeKanjiSegment(BitSource bits,
@ -208,12 +204,7 @@ final class DecodedBitStreamParser {
offset += 2;
count--;
}
// Shift_JIS may not be supported in some environments:
try {
result.append(new String(buffer, StringUtils.SHIFT_JIS));
} catch (UnsupportedEncodingException ignored) {
throw FormatException.getFormatInstance();
}
result.append(new String(buffer, StringUtils.SHIFT_JIS_CHARSET));
}
private static void decodeByteSegment(BitSource bits,
@ -231,22 +222,18 @@ final class DecodedBitStreamParser {
for (int i = 0; i < count; i++) {
readBytes[i] = (byte) bits.readBits(8);
}
String encoding;
Charset encoding;
if (currentCharacterSetECI == null) {
// The spec isn't clear on this mode; see
// section 6.4.5: t does not say which encoding to assuming
// upon decoding. I have seen ISO-8859-1 used as well as
// Shift_JIS -- without anything like an ECI designator to
// give a hint.
encoding = StringUtils.guessEncoding(readBytes, hints);
encoding = StringUtils.guessCharset(readBytes, hints);
} else {
encoding = currentCharacterSetECI.name();
}
try {
result.append(new String(readBytes, encoding));
} catch (UnsupportedEncodingException ignored) {
throw FormatException.getFormatInstance();
encoding = currentCharacterSetECI.getCharset();
}
result.append(new String(readBytes, encoding));
byteSegments.add(readBytes);
}

View file

@ -19,6 +19,7 @@ package com.google.zxing.qrcode.encoder;
import com.google.zxing.EncodeHintType;
import com.google.zxing.WriterException;
import com.google.zxing.common.BitArray;
import com.google.zxing.common.StringUtils;
import com.google.zxing.common.CharacterSetECI;
import com.google.zxing.common.reedsolomon.GenericGF;
import com.google.zxing.common.reedsolomon.ReedSolomonEncoder;
@ -26,7 +27,8 @@ import com.google.zxing.qrcode.decoder.ErrorCorrectionLevel;
import com.google.zxing.qrcode.decoder.Mode;
import com.google.zxing.qrcode.decoder.Version;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
@ -47,7 +49,7 @@ public final class Encoder {
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, // 0x50-0x5f
};
static final String DEFAULT_BYTE_MODE_ENCODING = "ISO-8859-1";
static final Charset DEFAULT_BYTE_MODE_ENCODING = StandardCharsets.ISO_8859_1;
private Encoder() {
}
@ -77,10 +79,10 @@ public final class Encoder {
Map<EncodeHintType,?> hints) throws WriterException {
// Determine what character encoding has been specified by the caller, if any
String encoding = DEFAULT_BYTE_MODE_ENCODING;
Charset encoding = DEFAULT_BYTE_MODE_ENCODING;
boolean hasEncodingHint = hints != null && hints.containsKey(EncodeHintType.CHARACTER_SET);
if (hasEncodingHint) {
encoding = hints.get(EncodeHintType.CHARACTER_SET).toString();
encoding = Charset.forName(hints.get(EncodeHintType.CHARACTER_SET).toString());
}
// Pick an encoding mode appropriate for the content. Note that this will not attempt to use
@ -93,7 +95,7 @@ public final class Encoder {
// Append ECI segment if applicable
if (mode == Mode.BYTE && hasEncodingHint) {
CharacterSetECI eci = CharacterSetECI.getCharacterSetECIByName(encoding);
CharacterSetECI eci = CharacterSetECI.getCharacterSetECI(encoding);
if (eci != null) {
appendECI(eci, headerBits);
}
@ -221,8 +223,8 @@ public final class Encoder {
* Choose the best mode by examining the content. Note that 'encoding' is used as a hint;
* if it is Shift_JIS, and the input is only double-byte Kanji, then we return {@link Mode#KANJI}.
*/
private static Mode chooseMode(String content, String encoding) {
if ("Shift_JIS".equals(encoding) && isOnlyDoubleByteKanji(content)) {
private static Mode chooseMode(String content, Charset encoding) {
if (StringUtils.SHIFT_JIS_CHARSET.equals(encoding) && isOnlyDoubleByteKanji(content)) {
// Choose Kanji mode if all input are double-byte characters
return Mode.KANJI;
}
@ -248,12 +250,7 @@ public final class Encoder {
}
private static boolean isOnlyDoubleByteKanji(String content) {
byte[] bytes;
try {
bytes = content.getBytes("Shift_JIS");
} catch (UnsupportedEncodingException ignored) {
return false;
}
byte[] bytes = content.getBytes(StringUtils.SHIFT_JIS_CHARSET);
int length = bytes.length;
if (length % 2 != 0) {
return false;
@ -512,7 +509,7 @@ public final class Encoder {
static void appendBytes(String content,
Mode mode,
BitArray bits,
String encoding) throws WriterException {
Charset encoding) throws WriterException {
switch (mode) {
case NUMERIC:
appendNumericBytes(content, bits);
@ -579,26 +576,15 @@ public final class Encoder {
}
}
static void append8BitBytes(String content, BitArray bits, String encoding)
throws WriterException {
byte[] bytes;
try {
bytes = content.getBytes(encoding);
} catch (UnsupportedEncodingException uee) {
throw new WriterException(uee);
}
static void append8BitBytes(String content, BitArray bits, Charset encoding) {
byte[] bytes = content.getBytes(encoding);
for (byte b : bytes) {
bits.appendBits(b, 8);
}
}
static void appendKanjiBytes(String content, BitArray bits) throws WriterException {
byte[] bytes;
try {
bytes = content.getBytes("Shift_JIS");
} catch (UnsupportedEncodingException uee) {
throw new WriterException(uee);
}
byte[] bytes = content.getBytes(StringUtils.SHIFT_JIS_CHARSET);
if (bytes.length % 2 != 0) {
throw new WriterException("Kanji byte size not even");
}

View file

@ -27,7 +27,6 @@ import com.google.zxing.common.DecoderResult;
import org.junit.Assert;
import org.junit.Test;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@ -62,7 +61,7 @@ public final class DetectorTest extends Assert {
// Test that we can tolerate errors in the parameter locator bits
private static void testErrorInParameterLocator(String data) throws Exception {
AztecCode aztec = Encoder.encode(data.getBytes(StandardCharsets.ISO_8859_1), 25, Encoder.DEFAULT_AZTEC_LAYERS);
AztecCode aztec = Encoder.encode(data, 25, Encoder.DEFAULT_AZTEC_LAYERS);
Random random = new Random(aztec.getMatrix().hashCode()); // pseudo-random, but deterministic
int layers = aztec.getLayers();
boolean compact = aztec.isCompact();

View file

@ -44,6 +44,12 @@ import java.util.regex.Pattern;
*/
public final class EncoderTest extends Assert {
private static final Charset ISO_8859_1 = StandardCharsets.ISO_8859_1;
private static final Charset UTF_8 = StandardCharsets.UTF_8;
private static final Charset SHIFT_JIS = Charset.forName("Shift_JIS");
private static final Charset ISO_8859_15 = Charset.forName("ISO-8859-15");
private static final Charset WINDOWS_1252 = Charset.forName("Windows-1252");
private static final Pattern DOTX = Pattern.compile("[^.X]");
private static final Pattern SPACES = Pattern.compile("\\s+");
private static final ResultPoint[] NO_POINTS = new ResultPoint[0];
@ -128,17 +134,20 @@ public final class EncoderTest extends Assert {
@Test
public void testAztecWriter() throws Exception {
testWriter("\u20AC 1 sample data.", "ISO-8859-1", 25, true, 2);
testWriter("\u20AC 1 sample data.", "ISO-8859-15", 25, true, 2);
testWriter("\u20AC 1 sample data.", "UTF-8", 25, true, 2);
testWriter("\u20AC 1 sample data.", "UTF-8", 100, true, 3);
testWriter("\u20AC 1 sample data.", "UTF-8", 300, true, 4);
testWriter("\u20AC 1 sample data.", "UTF-8", 500, false, 5);
testWriter("Espa\u00F1ol", null, 25, true, 1); // Without ECI (implicit ISO-8859-1)
testWriter("Espa\u00F1ol", ISO_8859_1, 25, true, 1); // Explicit ISO-8859-1
testWriter("\u20AC 1 sample data.", WINDOWS_1252, 25, true, 2); // Standard ISO-8859-1 cannot encode Euro symbol; Windows-1252 superset can
testWriter("\u20AC 1 sample data.", ISO_8859_15, 25, true, 2);
testWriter("\u20AC 1 sample data.", UTF_8, 25, true, 2);
testWriter("\u20AC 1 sample data.", UTF_8, 100, true, 3);
testWriter("\u20AC 1 sample data.", UTF_8, 300, true, 4);
testWriter("\u20AC 1 sample data.", UTF_8, 500, false, 5);
testWriter("The capital of Japan is named \u6771\u4EAC.", SHIFT_JIS, 25, true, 3);
// Test AztecWriter defaults
String data = "In ut magna vel mauris malesuada";
AztecWriter writer = new AztecWriter();
BitMatrix matrix = writer.encode(data, BarcodeFormat.AZTEC, 0, 0);
AztecCode aztec = Encoder.encode(data.getBytes(StandardCharsets.ISO_8859_1),
AztecCode aztec = Encoder.encode(data,
Encoder.DEFAULT_EC_PERCENT, Encoder.DEFAULT_AZTEC_LAYERS);
BitMatrix expectedMatrix = aztec.getMatrix();
assertEquals(matrix, expectedMatrix);
@ -418,7 +427,7 @@ public final class EncoderTest extends Assert {
@Test
public void testUserSpecifiedLayers() throws FormatException {
byte[] alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".getBytes(StandardCharsets.ISO_8859_1);
String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
AztecCode aztec = Encoder.encode(alphabet, 25, -2);
assertEquals(2, aztec.getLayers());
assertTrue(aztec.isCompact());
@ -449,22 +458,21 @@ public final class EncoderTest extends Assert {
String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
// encodes as 26 * 5 * 4 = 520 bits of data
String alphabet4 = alphabet + alphabet + alphabet + alphabet;
byte[] data = alphabet4.getBytes(StandardCharsets.ISO_8859_1);
try {
Encoder.encode(data, 0, -4);
Encoder.encode(alphabet4, 0, -4);
fail("Encode should have failed. Text can't fit in 1-layer compact");
} catch (IllegalArgumentException expected) {
// continue
}
// If we just try to encode it normally, it will go to a non-compact 4 layer
AztecCode aztecCode = Encoder.encode(data, 0, Encoder.DEFAULT_AZTEC_LAYERS);
AztecCode aztecCode = Encoder.encode(alphabet4, 0, Encoder.DEFAULT_AZTEC_LAYERS);
assertFalse(aztecCode.isCompact());
assertEquals(4, aztecCode.getLayers());
// But shortening the string to 100 bytes (500 bits of data), compact works fine, even if we
// include more error checking.
aztecCode = Encoder.encode(alphabet4.substring(0, 100).getBytes(StandardCharsets.ISO_8859_1), 10, Encoder.DEFAULT_AZTEC_LAYERS);
aztecCode = Encoder.encode(alphabet4.substring(0, 100), 10, Encoder.DEFAULT_AZTEC_LAYERS);
assertTrue(aztecCode.isCompact());
assertEquals(4, aztecCode.getLayers());
}
@ -472,7 +480,7 @@ public final class EncoderTest extends Assert {
// Helper routines
private static void testEncode(String data, boolean compact, int layers, String expected) throws FormatException {
AztecCode aztec = Encoder.encode(data.getBytes(StandardCharsets.ISO_8859_1), 33, Encoder.DEFAULT_AZTEC_LAYERS);
AztecCode aztec = Encoder.encode(data, 33, Encoder.DEFAULT_AZTEC_LAYERS);
assertEquals("Unexpected symbol format (compact)", compact, aztec.isCompact());
assertEquals("Unexpected nr. of layers", layers, aztec.getLayers());
BitMatrix matrix = aztec.getMatrix();
@ -480,7 +488,7 @@ public final class EncoderTest extends Assert {
}
private static void testEncodeDecode(String data, boolean compact, int layers) throws Exception {
AztecCode aztec = Encoder.encode(data.getBytes(StandardCharsets.ISO_8859_1), 25, Encoder.DEFAULT_AZTEC_LAYERS);
AztecCode aztec = Encoder.encode(data, 25, Encoder.DEFAULT_AZTEC_LAYERS);
assertEquals("Unexpected symbol format (compact)", compact, aztec.isCompact());
assertEquals("Unexpected nr. of layers", layers, aztec.getLayers());
BitMatrix matrix = aztec.getMatrix();
@ -500,20 +508,20 @@ public final class EncoderTest extends Assert {
}
private static void testWriter(String data,
String charset,
Charset charset,
int eccPercent,
boolean compact,
int layers) throws FormatException {
// 1. Perform an encode-decode round-trip because it can be lossy.
// 2. Aztec Decoder currently always decodes the data with a LATIN-1 charset:
String expectedData = new String(data.getBytes(Charset.forName(charset)), StandardCharsets.ISO_8859_1);
// Perform an encode-decode round-trip because it can be lossy.
Map<EncodeHintType,Object> hints = new EnumMap<>(EncodeHintType.class);
hints.put(EncodeHintType.CHARACTER_SET, charset);
if (null != charset) {
hints.put(EncodeHintType.CHARACTER_SET, charset.name());
}
hints.put(EncodeHintType.ERROR_CORRECTION, eccPercent);
AztecWriter writer = new AztecWriter();
BitMatrix matrix = writer.encode(data, BarcodeFormat.AZTEC, 0, 0, hints);
AztecCode aztec = Encoder.encode(data.getBytes(Charset.forName(charset)), eccPercent,
Encoder.DEFAULT_AZTEC_LAYERS);
AztecCode aztec = Encoder.encode(data, eccPercent,
Encoder.DEFAULT_AZTEC_LAYERS, charset);
assertEquals("Unexpected symbol format (compact)", compact, aztec.isCompact());
assertEquals("Unexpected nr. of layers", layers, aztec.getLayers());
BitMatrix matrix2 = aztec.getMatrix();
@ -521,7 +529,7 @@ public final class EncoderTest extends Assert {
AztecDetectorResult r =
new AztecDetectorResult(matrix, NO_POINTS, aztec.isCompact(), aztec.getCodeWords(), aztec.getLayers());
DecoderResult res = new Decoder().decode(r);
assertEquals(expectedData, res.getText());
assertEquals(data, res.getText());
// Check error correction by introducing up to eccPercent/2 errors
int ecWords = aztec.getCodeWords() * eccPercent / 100 / 2;
Random random = getPseudoRandom();
@ -537,7 +545,7 @@ public final class EncoderTest extends Assert {
}
r = new AztecDetectorResult(matrix, NO_POINTS, aztec.isCompact(), aztec.getCodeWords(), aztec.getLayers());
res = new Decoder().decode(r);
assertEquals(expectedData, res.getText());
assertEquals(data, res.getText());
}
private static Random getPseudoRandom() {

View file

@ -19,6 +19,7 @@ package com.google.zxing.common;
import org.junit.Assert;
import org.junit.Test;
import java.nio.charset.StandardCharsets;
import java.nio.charset.Charset;
/**
@ -28,34 +29,42 @@ public final class StringUtilsTestCase extends Assert {
@Test
public void testShortShiftJIS1() {
// ÈáëÈö
doTest(new byte[] { (byte) 0x8b, (byte) 0xe0, (byte) 0x8b, (byte) 0x9b, }, "SJIS");
// 金魚
doTest(new byte[] { (byte) 0x8b, (byte) 0xe0, (byte) 0x8b, (byte) 0x9b, }, StringUtils.SHIFT_JIS_CHARSET, "SJIS");
}
@Test
public void testShortISO885911() {
// bd
doTest(new byte[] { (byte) 0x62, (byte) 0xe5, (byte) 0x64, }, "ISO-8859-1");
// båd
doTest(new byte[] { (byte) 0x62, (byte) 0xe5, (byte) 0x64, }, StandardCharsets.ISO_8859_1, "ISO8859_1");
}
@Test
public void testShortUTF81() {
// Español
doTest(new byte[] { (byte) 0x45, (byte) 0x73, (byte) 0x70, (byte) 0x61, (byte) 0xc3,
(byte) 0xb1, (byte) 0x6f, (byte) 0x6c },
StandardCharsets.UTF_8, "UTF8");
}
@Test
public void testMixedShiftJIS1() {
// Hello Èáë!
// Hello !
doTest(new byte[] { (byte) 0x48, (byte) 0x65, (byte) 0x6c, (byte) 0x6c, (byte) 0x6f,
(byte) 0x20, (byte) 0x8b, (byte) 0xe0, (byte) 0x21, },
"SJIS");
StringUtils.SHIFT_JIS_CHARSET, "SJIS");
}
private static void doTest(byte[] bytes, String charsetName) {
Charset charset = Charset.forName(charsetName);
String guessedName = StringUtils.guessEncoding(bytes, null);
Charset guessedEncoding = Charset.forName(guessedName);
assertEquals(charset, guessedEncoding);
private static void doTest(byte[] bytes, Charset charset, String encoding) {
Charset guessedCharset = StringUtils.guessCharset(bytes, null);
String guessedEncoding = StringUtils.guessEncoding(bytes, null);
assertEquals(charset, guessedCharset);
assertEquals(encoding, guessedEncoding);
}
/**
* Utility for printing out a string in given encoding as a Java statement, since it's better
* to write that into the Java source file rather than risk character encoding issues in the
* to write that into the Java source file rather than risk character encoding issues in the
* source file itself.
*
* @param args command line arguments

View file

@ -19,6 +19,7 @@ package com.google.zxing.qrcode.encoder;
import com.google.zxing.EncodeHintType;
import com.google.zxing.WriterException;
import com.google.zxing.common.BitArray;
import com.google.zxing.common.StringUtils;
import com.google.zxing.qrcode.decoder.ErrorCorrectionLevel;
import com.google.zxing.qrcode.decoder.Mode;
import com.google.zxing.qrcode.decoder.Version;
@ -26,7 +27,6 @@ import com.google.zxing.qrcode.decoder.Version;
import org.junit.Assert;
import org.junit.Test;
import java.io.UnsupportedEncodingException;
import java.util.EnumMap;
import java.util.Map;
@ -127,7 +127,7 @@ public final class EncoderTestCase extends Assert {
">>\n";
assertEquals(expected, qrCode.toString());
}
@Test
public void testEncodeWithVersion() throws WriterException {
Map<EncodeHintType, Object> hints = new EnumMap<>(EncodeHintType.class);
@ -135,7 +135,7 @@ public final class EncoderTestCase extends Assert {
QRCode qrCode = Encoder.encode("ABCDEF", ErrorCorrectionLevel.H, hints);
assertTrue(qrCode.toString().contains(" version: 7\n"));
}
@Test(expected = WriterException.class)
public void testEncodeWithVersionTooSmall() throws WriterException {
Map<EncodeHintType, Object> hints = new EnumMap<>(EncodeHintType.class);
@ -742,12 +742,8 @@ public final class EncoderTestCase extends Assert {
assertEquals(expected, qrCode.toString());
}
private static String shiftJISString(byte[] bytes) throws WriterException {
try {
return new String(bytes, "Shift_JIS");
} catch (UnsupportedEncodingException uee) {
throw new WriterException(uee.toString());
}
private static String shiftJISString(byte[] bytes) {
return new String(bytes, StringUtils.SHIFT_JIS_CHARSET);
}
}