mirror of
https://github.com/zxing/zxing.git
synced 2025-02-02 05:41:08 -08:00
Aztec encode with ECI for non-default character sets (#1330)
* Aztec encoder: add ECI codes according to character set Added redundant methods to avoid modifying existing tests. * fix testAztecWriter - ISO-8859-1 cannot actually encode Euro symbol ('€'); this test case only passed before because the Decoder wasn't actually doing the bytes→String decode, but simply round-tripping an unknown byte. - Add extra test cases for implicit ISO-8859-1 (without ECI code), explicit ISO-8559-1 (with ECI code), and Shift_JIS * remove unnecessary conversion between String and byte[] in Aztec EncoderTest and DetectorTest * Aztec DecoderTest: use constants for charsets * Aztec Code: remove unnecessary conversion between Charset and Charset.name() strings * PDF417, QR, DataMatrix: remove unnecessary conversion between Charset and Charset.name() strings Includes replacing StringUtils.guessEncoding() with .guessCharset(), to return Charset rather than String. This change makes the tacit assumption that Shift_JIS charset *will* be available. There are existing comments suggesting that it might not always be available… but the existing *tests* assume it will be.
This commit is contained in:
parent
28d339e67d
commit
515688992b
|
@ -24,7 +24,6 @@ import com.google.zxing.aztec.encoder.Encoder;
|
|||
import com.google.zxing.common.BitMatrix;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
|
@ -39,7 +38,7 @@ public final class AztecWriter implements Writer {
|
|||
|
||||
@Override
|
||||
public BitMatrix encode(String contents, BarcodeFormat format, int width, int height, Map<EncodeHintType,?> hints) {
|
||||
Charset charset = StandardCharsets.ISO_8859_1;
|
||||
Charset charset = null; // Do not add any ECI code by default
|
||||
int eccPercent = Encoder.DEFAULT_EC_PERCENT;
|
||||
int layers = Encoder.DEFAULT_AZTEC_LAYERS;
|
||||
if (hints != null) {
|
||||
|
@ -62,7 +61,7 @@ public final class AztecWriter implements Writer {
|
|||
if (format != BarcodeFormat.AZTEC) {
|
||||
throw new IllegalArgumentException("Can only encode AZTEC, but got " + format);
|
||||
}
|
||||
AztecCode aztec = Encoder.encode(contents.getBytes(charset), eccPercent, layers);
|
||||
AztecCode aztec = Encoder.encode(contents, eccPercent, layers, charset);
|
||||
return renderResult(aztec, width, height);
|
||||
}
|
||||
|
||||
|
|
|
@ -177,7 +177,7 @@ public final class Decoder {
|
|||
eci = eci * 10 + (nextDigit - 2);
|
||||
}
|
||||
CharacterSetECI charsetECI = CharacterSetECI.getCharacterSetECIByValue(eci);
|
||||
encoding = Charset.forName(charsetECI.name());
|
||||
encoding = charsetECI.getCharset();
|
||||
}
|
||||
// Go back to whatever mode we had been in
|
||||
shiftTable = latchTable;
|
||||
|
|
|
@ -21,6 +21,9 @@ import com.google.zxing.common.BitMatrix;
|
|||
import com.google.zxing.common.reedsolomon.GenericGF;
|
||||
import com.google.zxing.common.reedsolomon.ReedSolomonEncoder;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
/**
|
||||
* Generates Aztec 2D barcodes.
|
||||
*
|
||||
|
@ -42,13 +45,66 @@ public final class Encoder {
|
|||
}
|
||||
|
||||
/**
|
||||
* Encodes the given binary content as an Aztec symbol
|
||||
* Encodes the given string content as an Aztec symbol (without ECI code)
|
||||
*
|
||||
* @param data input data string; must be encodable as ISO/IEC 8859-1 (Latin-1)
|
||||
* @return Aztec symbol matrix with metadata
|
||||
*/
|
||||
public static AztecCode encode(String data) {
|
||||
return encode(data.getBytes(StandardCharsets.ISO_8859_1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes the given string content as an Aztec symbol (without ECI code)
|
||||
*
|
||||
* @param data input data string; must be encodable as ISO/IEC 8859-1 (Latin-1)
|
||||
* @param minECCPercent minimal percentage of error check words (According to ISO/IEC 24778:2008,
|
||||
* a minimum of 23% + 3 words is recommended)
|
||||
* @param userSpecifiedLayers if non-zero, a user-specified value for the number of layers
|
||||
* @return Aztec symbol matrix with metadata
|
||||
*/
|
||||
public static AztecCode encode(String data, int minECCPercent, int userSpecifiedLayers) {
|
||||
return encode(data.getBytes(StandardCharsets.ISO_8859_1), minECCPercent, userSpecifiedLayers, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes the given string content as an Aztec symbol
|
||||
*
|
||||
* @param data input data string
|
||||
* @param minECCPercent minimal percentage of error check words (According to ISO/IEC 24778:2008,
|
||||
* a minimum of 23% + 3 words is recommended)
|
||||
* @param userSpecifiedLayers if non-zero, a user-specified value for the number of layers
|
||||
* @param charset character set in which to encode string using ECI; if null, no ECI code
|
||||
* will be inserted, and the string must be encodable as ISO/IEC 8859-1
|
||||
* (Latin-1), the default encoding of the symbol.
|
||||
* @return Aztec symbol matrix with metadata
|
||||
*/
|
||||
public static AztecCode encode(String data, int minECCPercent, int userSpecifiedLayers, Charset charset) {
|
||||
byte[] bytes = data.getBytes(null != charset ? charset : StandardCharsets.ISO_8859_1);
|
||||
return encode(bytes, minECCPercent, userSpecifiedLayers, charset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes the given binary content as an Aztec symbol (without ECI code)
|
||||
*
|
||||
* @param data input data string
|
||||
* @return Aztec symbol matrix with metadata
|
||||
*/
|
||||
public static AztecCode encode(byte[] data) {
|
||||
return encode(data, DEFAULT_EC_PERCENT, DEFAULT_AZTEC_LAYERS);
|
||||
return encode(data, DEFAULT_EC_PERCENT, DEFAULT_AZTEC_LAYERS, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes the given binary content as an Aztec symbol (without ECI code)
|
||||
*
|
||||
* @param data input data string
|
||||
* @param minECCPercent minimal percentage of error check words (According to ISO/IEC 24778:2008,
|
||||
* a minimum of 23% + 3 words is recommended)
|
||||
* @param userSpecifiedLayers if non-zero, a user-specified value for the number of layers
|
||||
* @return Aztec symbol matrix with metadata
|
||||
*/
|
||||
public static AztecCode encode(byte[] data, int minECCPercent, int userSpecifiedLayers) {
|
||||
return encode(data, minECCPercent, userSpecifiedLayers, null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -58,11 +114,13 @@ public final class Encoder {
|
|||
* @param minECCPercent minimal percentage of error check words (According to ISO/IEC 24778:2008,
|
||||
* a minimum of 23% + 3 words is recommended)
|
||||
* @param userSpecifiedLayers if non-zero, a user-specified value for the number of layers
|
||||
* @param charset character set to mark using ECI; if null, no ECI code will be inserted, and the
|
||||
* default encoding of ISO/IEC 8859-1 will be assuming by readers.
|
||||
* @return Aztec symbol matrix with metadata
|
||||
*/
|
||||
public static AztecCode encode(byte[] data, int minECCPercent, int userSpecifiedLayers) {
|
||||
public static AztecCode encode(byte[] data, int minECCPercent, int userSpecifiedLayers, Charset charset) {
|
||||
// High-level encode
|
||||
BitArray bits = new HighLevelEncoder(data).encode();
|
||||
BitArray bits = new HighLevelEncoder(data, charset).encode();
|
||||
|
||||
// stuff bits and choose symbol size
|
||||
int eccBits = bits.getSize() * minECCPercent / 100 + 11;
|
||||
|
|
|
@ -17,6 +17,9 @@
|
|||
package com.google.zxing.aztec.encoder;
|
||||
|
||||
import com.google.zxing.common.BitArray;
|
||||
import com.google.zxing.common.CharacterSetECI;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
|
@ -148,16 +151,31 @@ public final class HighLevelEncoder {
|
|||
}
|
||||
|
||||
private final byte[] text;
|
||||
private final Charset charset;
|
||||
|
||||
public HighLevelEncoder(byte[] text) {
|
||||
this.text = text;
|
||||
this.charset = null;
|
||||
}
|
||||
|
||||
public HighLevelEncoder(byte[] text, Charset charset) {
|
||||
this.text = text;
|
||||
this.charset = charset;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return text represented by this encoder encoded as a {@link BitArray}
|
||||
*/
|
||||
public BitArray encode() {
|
||||
Collection<State> states = Collections.singletonList(State.INITIAL_STATE);
|
||||
State initialState = State.INITIAL_STATE;
|
||||
if (charset != null) {
|
||||
CharacterSetECI eci = CharacterSetECI.getCharacterSetECI(charset);
|
||||
if (null == eci) {
|
||||
throw new IllegalArgumentException("No ECI code for character set " + charset.toString());
|
||||
}
|
||||
initialState = initialState.appendFLGn(eci.getValue());
|
||||
}
|
||||
Collection<State> states = Collections.singletonList(initialState);
|
||||
for (int index = 0; index < text.length; index++) {
|
||||
int pairCode;
|
||||
int nextChar = index + 1 < text.length ? text[index + 1] : 0;
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
|
||||
package com.google.zxing.aztec.encoder;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import java.util.Deque;
|
||||
import java.util.LinkedList;
|
||||
|
||||
|
@ -70,6 +72,25 @@ final class State {
|
|||
return bitCount;
|
||||
}
|
||||
|
||||
State appendFLGn(int eci) {
|
||||
State result = shiftAndAppend(HighLevelEncoder.MODE_PUNCT, 0); // 0: FLG(n)
|
||||
Token token = result.token;
|
||||
int bitsAdded = 3;
|
||||
if (eci < 0) {
|
||||
token = token.add(0, 3); // 0: FNC1
|
||||
} else if (eci > 999999) {
|
||||
throw new IllegalArgumentException("ECI code must be between 0 and 999999");
|
||||
} else {
|
||||
byte[] eciDigits = Integer.toString(eci).getBytes(StandardCharsets.ISO_8859_1);
|
||||
token = token.add(eciDigits.length, 3); // 1-6: number of ECI digits
|
||||
for (int ii = 0; ii < eciDigits.length; ii++) {
|
||||
token = token.add(eciDigits[ii] - '0' + 2, 4);
|
||||
}
|
||||
bitsAdded += eciDigits.length * 4;
|
||||
}
|
||||
return new State(token, mode, 0, bitCount + bitsAdded);
|
||||
}
|
||||
|
||||
// Create a new state representing this state with a latch to a (not
|
||||
// necessary different) mode, and then a code.
|
||||
State latchAndAppend(int mode, int value) {
|
||||
|
@ -143,7 +164,7 @@ final class State {
|
|||
newModeBitCount += calculateBinaryShiftCost(other) - calculateBinaryShiftCost(this);
|
||||
} else if (this.binaryShiftByteCount > other.binaryShiftByteCount && other.binaryShiftByteCount > 0) {
|
||||
// maximum possible additional cost (we end up exceeding the 31 byte boundary and other state can stay beneath it)
|
||||
newModeBitCount += 10;
|
||||
newModeBitCount += 10;
|
||||
}
|
||||
return newModeBitCount <= other.bitCount;
|
||||
}
|
||||
|
@ -168,7 +189,7 @@ final class State {
|
|||
public String toString() {
|
||||
return String.format("%s bits=%d bytes=%d", HighLevelEncoder.MODE_NAMES[mode], bitCount, binaryShiftByteCount);
|
||||
}
|
||||
|
||||
|
||||
private static int calculateBinaryShiftCost(State state) {
|
||||
if (state.binaryShiftByteCount > 62) {
|
||||
return 21; // B/S with extended length
|
||||
|
|
|
@ -18,6 +18,8 @@ package com.google.zxing.common;
|
|||
|
||||
import com.google.zxing.FormatException;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -93,6 +95,19 @@ public enum CharacterSetECI {
|
|||
return values[0];
|
||||
}
|
||||
|
||||
public Charset getCharset() {
|
||||
return Charset.forName(name());
|
||||
}
|
||||
|
||||
/**
|
||||
* @param charset Java character set object
|
||||
* @return CharacterSetECI representing ECI for character encoding, or null if it is legal
|
||||
* but unsupported
|
||||
*/
|
||||
public static CharacterSetECI getCharacterSetECI(Charset charset) {
|
||||
return NAME_TO_ECI.get(charset.name());
|
||||
}
|
||||
|
||||
/**
|
||||
* @param value character set ECI value
|
||||
* @return {@code CharacterSetECI} representing ECI of given value, or null if it is legal but
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package com.google.zxing.common;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Map;
|
||||
|
||||
import com.google.zxing.DecodeHintType;
|
||||
|
@ -29,15 +30,17 @@ import com.google.zxing.DecodeHintType;
|
|||
*/
|
||||
public final class StringUtils {
|
||||
|
||||
private static final String PLATFORM_DEFAULT_ENCODING = Charset.defaultCharset().name();
|
||||
private static final Charset PLATFORM_DEFAULT_ENCODING = Charset.defaultCharset();
|
||||
public static final Charset SHIFT_JIS_CHARSET = Charset.forName("SJIS");
|
||||
public static final Charset GB2312_CHARSET = Charset.forName("GB2312");
|
||||
private static final Charset EUC_JP = Charset.forName("EUC_JP");
|
||||
private static final boolean ASSUME_SHIFT_JIS =
|
||||
SHIFT_JIS_CHARSET.equals(PLATFORM_DEFAULT_ENCODING) ||
|
||||
EUC_JP.equals(PLATFORM_DEFAULT_ENCODING);
|
||||
|
||||
// Retained for ABI compatibility with earlier versions
|
||||
public static final String SHIFT_JIS = "SJIS";
|
||||
public static final String GB2312 = "GB2312";
|
||||
private static final String EUC_JP = "EUC_JP";
|
||||
private static final String UTF8 = "UTF8";
|
||||
private static final String ISO88591 = "ISO8859_1";
|
||||
private static final boolean ASSUME_SHIFT_JIS =
|
||||
SHIFT_JIS.equalsIgnoreCase(PLATFORM_DEFAULT_ENCODING) ||
|
||||
EUC_JP.equalsIgnoreCase(PLATFORM_DEFAULT_ENCODING);
|
||||
|
||||
private StringUtils() { }
|
||||
|
||||
|
@ -45,12 +48,32 @@ public final class StringUtils {
|
|||
* @param bytes bytes encoding a string, whose encoding should be guessed
|
||||
* @param hints decode hints if applicable
|
||||
* @return name of guessed encoding; at the moment will only guess one of:
|
||||
* {@link #SHIFT_JIS}, {@link #UTF8}, {@link #ISO88591}, or the platform
|
||||
* default encoding if none of these can possibly be correct
|
||||
* "SJIS", "UTF8", "ISO8859_1", or the platform default encoding if none
|
||||
* of these can possibly be correct
|
||||
*/
|
||||
public static String guessEncoding(byte[] bytes, Map<DecodeHintType,?> hints) {
|
||||
Charset c = guessCharset(bytes, hints);
|
||||
if (c == SHIFT_JIS_CHARSET) {
|
||||
return "SJIS";
|
||||
} else if (c == StandardCharsets.UTF_8) {
|
||||
return "UTF8";
|
||||
} else if (c == StandardCharsets.ISO_8859_1) {
|
||||
return "ISO8859_1";
|
||||
}
|
||||
return c.name();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param bytes bytes encoding a string, whose encoding should be guessed
|
||||
* @param hints decode hints if applicable
|
||||
* @return Charset of guessed encoding; at the moment will only guess one of:
|
||||
* {@link #SHIFT_JIS_CHARSET}, {@link StandardCharsets#UTF_8},
|
||||
* {@link StandardCharsets#ISO_8859_1}, or the platform default encoding if
|
||||
* none of these can possibly be correct
|
||||
*/
|
||||
public static Charset guessCharset(byte[] bytes, Map<DecodeHintType,?> hints) {
|
||||
if (hints != null && hints.containsKey(DecodeHintType.CHARACTER_SET)) {
|
||||
return hints.get(DecodeHintType.CHARACTER_SET).toString();
|
||||
return Charset.forName(hints.get(DecodeHintType.CHARACTER_SET).toString());
|
||||
}
|
||||
// For now, merely tries to distinguish ISO-8859-1, UTF-8 and Shift_JIS,
|
||||
// which should be by far the most common encodings.
|
||||
|
@ -164,11 +187,11 @@ public final class StringUtils {
|
|||
|
||||
// Easy -- if there is BOM or at least 1 valid not-single byte character (and no evidence it can't be UTF-8), done
|
||||
if (canBeUTF8 && (utf8bom || utf2BytesChars + utf3BytesChars + utf4BytesChars > 0)) {
|
||||
return UTF8;
|
||||
return StandardCharsets.UTF_8;
|
||||
}
|
||||
// Easy -- if assuming Shift_JIS or >= 3 valid consecutive not-ascii characters (and no evidence it can't be), done
|
||||
if (canBeShiftJIS && (ASSUME_SHIFT_JIS || sjisMaxKatakanaWordLength >= 3 || sjisMaxDoubleBytesWordLength >= 3)) {
|
||||
return SHIFT_JIS;
|
||||
return SHIFT_JIS_CHARSET;
|
||||
}
|
||||
// Distinguishing Shift_JIS and ISO-8859-1 can be a little tough for short words. The crude heuristic is:
|
||||
// - If we saw
|
||||
|
@ -177,18 +200,18 @@ public final class StringUtils {
|
|||
// - then we conclude Shift_JIS, else ISO-8859-1
|
||||
if (canBeISO88591 && canBeShiftJIS) {
|
||||
return (sjisMaxKatakanaWordLength == 2 && sjisKatakanaChars == 2) || isoHighOther * 10 >= length
|
||||
? SHIFT_JIS : ISO88591;
|
||||
? SHIFT_JIS_CHARSET : StandardCharsets.ISO_8859_1;
|
||||
}
|
||||
|
||||
// Otherwise, try in order ISO-8859-1, Shift JIS, UTF-8 and fall back to default platform encoding
|
||||
if (canBeISO88591) {
|
||||
return ISO88591;
|
||||
return StandardCharsets.ISO_8859_1;
|
||||
}
|
||||
if (canBeShiftJIS) {
|
||||
return SHIFT_JIS;
|
||||
return SHIFT_JIS_CHARSET;
|
||||
}
|
||||
if (canBeUTF8) {
|
||||
return UTF8;
|
||||
return StandardCharsets.UTF_8;
|
||||
}
|
||||
// Otherwise, we take a wild guess with platform encoding
|
||||
return PLATFORM_DEFAULT_ENCODING;
|
||||
|
|
|
@ -20,7 +20,7 @@ import com.google.zxing.FormatException;
|
|||
import com.google.zxing.common.BitSource;
|
||||
import com.google.zxing.common.DecoderResult;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
@ -505,11 +505,7 @@ final class DecodedBitStreamParser {
|
|||
bytes[i] = (byte) unrandomize255State(bits.readBits(8), codewordPosition++);
|
||||
}
|
||||
byteSegments.add(bytes);
|
||||
try {
|
||||
result.append(new String(bytes, "ISO8859_1"));
|
||||
} catch (UnsupportedEncodingException uee) {
|
||||
throw new IllegalStateException("Platform does not support required encoding: " + uee);
|
||||
}
|
||||
result.append(new String(bytes, StandardCharsets.ISO_8859_1));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -125,7 +125,7 @@ final class DecodedBitStreamParser {
|
|||
case ECI_CHARSET:
|
||||
CharacterSetECI charsetECI =
|
||||
CharacterSetECI.getCharacterSetECIByValue(codewords[codeIndex++]);
|
||||
encoding = Charset.forName(charsetECI.name());
|
||||
encoding = charsetECI.getCharset();
|
||||
break;
|
||||
case ECI_GENERAL_PURPOSE:
|
||||
// Can't do anything with generic ECI; skip its 2 characters
|
||||
|
|
|
@ -169,7 +169,7 @@ final class PDF417HighLevelEncoder {
|
|||
if (encoding == null) {
|
||||
encoding = DEFAULT_ENCODING;
|
||||
} else if (!DEFAULT_ENCODING.equals(encoding)) {
|
||||
CharacterSetECI eci = CharacterSetECI.getCharacterSetECIByName(encoding.name());
|
||||
CharacterSetECI eci = CharacterSetECI.getCharacterSetECI(encoding);
|
||||
if (eci != null) {
|
||||
encodingECI(eci.getValue(), sb);
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ import com.google.zxing.common.CharacterSetECI;
|
|||
import com.google.zxing.common.DecoderResult;
|
||||
import com.google.zxing.common.StringUtils;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
@ -173,11 +173,7 @@ final class DecodedBitStreamParser {
|
|||
count--;
|
||||
}
|
||||
|
||||
try {
|
||||
result.append(new String(buffer, StringUtils.GB2312));
|
||||
} catch (UnsupportedEncodingException ignored) {
|
||||
throw FormatException.getFormatInstance();
|
||||
}
|
||||
result.append(new String(buffer, StringUtils.GB2312_CHARSET));
|
||||
}
|
||||
|
||||
private static void decodeKanjiSegment(BitSource bits,
|
||||
|
@ -208,12 +204,7 @@ final class DecodedBitStreamParser {
|
|||
offset += 2;
|
||||
count--;
|
||||
}
|
||||
// Shift_JIS may not be supported in some environments:
|
||||
try {
|
||||
result.append(new String(buffer, StringUtils.SHIFT_JIS));
|
||||
} catch (UnsupportedEncodingException ignored) {
|
||||
throw FormatException.getFormatInstance();
|
||||
}
|
||||
result.append(new String(buffer, StringUtils.SHIFT_JIS_CHARSET));
|
||||
}
|
||||
|
||||
private static void decodeByteSegment(BitSource bits,
|
||||
|
@ -231,22 +222,18 @@ final class DecodedBitStreamParser {
|
|||
for (int i = 0; i < count; i++) {
|
||||
readBytes[i] = (byte) bits.readBits(8);
|
||||
}
|
||||
String encoding;
|
||||
Charset encoding;
|
||||
if (currentCharacterSetECI == null) {
|
||||
// The spec isn't clear on this mode; see
|
||||
// section 6.4.5: t does not say which encoding to assuming
|
||||
// upon decoding. I have seen ISO-8859-1 used as well as
|
||||
// Shift_JIS -- without anything like an ECI designator to
|
||||
// give a hint.
|
||||
encoding = StringUtils.guessEncoding(readBytes, hints);
|
||||
encoding = StringUtils.guessCharset(readBytes, hints);
|
||||
} else {
|
||||
encoding = currentCharacterSetECI.name();
|
||||
}
|
||||
try {
|
||||
result.append(new String(readBytes, encoding));
|
||||
} catch (UnsupportedEncodingException ignored) {
|
||||
throw FormatException.getFormatInstance();
|
||||
encoding = currentCharacterSetECI.getCharset();
|
||||
}
|
||||
result.append(new String(readBytes, encoding));
|
||||
byteSegments.add(readBytes);
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ package com.google.zxing.qrcode.encoder;
|
|||
import com.google.zxing.EncodeHintType;
|
||||
import com.google.zxing.WriterException;
|
||||
import com.google.zxing.common.BitArray;
|
||||
import com.google.zxing.common.StringUtils;
|
||||
import com.google.zxing.common.CharacterSetECI;
|
||||
import com.google.zxing.common.reedsolomon.GenericGF;
|
||||
import com.google.zxing.common.reedsolomon.ReedSolomonEncoder;
|
||||
|
@ -26,7 +27,8 @@ import com.google.zxing.qrcode.decoder.ErrorCorrectionLevel;
|
|||
import com.google.zxing.qrcode.decoder.Mode;
|
||||
import com.google.zxing.qrcode.decoder.Version;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
|
@ -47,7 +49,7 @@ public final class Encoder {
|
|||
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, // 0x50-0x5f
|
||||
};
|
||||
|
||||
static final String DEFAULT_BYTE_MODE_ENCODING = "ISO-8859-1";
|
||||
static final Charset DEFAULT_BYTE_MODE_ENCODING = StandardCharsets.ISO_8859_1;
|
||||
|
||||
private Encoder() {
|
||||
}
|
||||
|
@ -77,10 +79,10 @@ public final class Encoder {
|
|||
Map<EncodeHintType,?> hints) throws WriterException {
|
||||
|
||||
// Determine what character encoding has been specified by the caller, if any
|
||||
String encoding = DEFAULT_BYTE_MODE_ENCODING;
|
||||
Charset encoding = DEFAULT_BYTE_MODE_ENCODING;
|
||||
boolean hasEncodingHint = hints != null && hints.containsKey(EncodeHintType.CHARACTER_SET);
|
||||
if (hasEncodingHint) {
|
||||
encoding = hints.get(EncodeHintType.CHARACTER_SET).toString();
|
||||
encoding = Charset.forName(hints.get(EncodeHintType.CHARACTER_SET).toString());
|
||||
}
|
||||
|
||||
// Pick an encoding mode appropriate for the content. Note that this will not attempt to use
|
||||
|
@ -93,7 +95,7 @@ public final class Encoder {
|
|||
|
||||
// Append ECI segment if applicable
|
||||
if (mode == Mode.BYTE && hasEncodingHint) {
|
||||
CharacterSetECI eci = CharacterSetECI.getCharacterSetECIByName(encoding);
|
||||
CharacterSetECI eci = CharacterSetECI.getCharacterSetECI(encoding);
|
||||
if (eci != null) {
|
||||
appendECI(eci, headerBits);
|
||||
}
|
||||
|
@ -221,8 +223,8 @@ public final class Encoder {
|
|||
* Choose the best mode by examining the content. Note that 'encoding' is used as a hint;
|
||||
* if it is Shift_JIS, and the input is only double-byte Kanji, then we return {@link Mode#KANJI}.
|
||||
*/
|
||||
private static Mode chooseMode(String content, String encoding) {
|
||||
if ("Shift_JIS".equals(encoding) && isOnlyDoubleByteKanji(content)) {
|
||||
private static Mode chooseMode(String content, Charset encoding) {
|
||||
if (StringUtils.SHIFT_JIS_CHARSET.equals(encoding) && isOnlyDoubleByteKanji(content)) {
|
||||
// Choose Kanji mode if all input are double-byte characters
|
||||
return Mode.KANJI;
|
||||
}
|
||||
|
@ -248,12 +250,7 @@ public final class Encoder {
|
|||
}
|
||||
|
||||
private static boolean isOnlyDoubleByteKanji(String content) {
|
||||
byte[] bytes;
|
||||
try {
|
||||
bytes = content.getBytes("Shift_JIS");
|
||||
} catch (UnsupportedEncodingException ignored) {
|
||||
return false;
|
||||
}
|
||||
byte[] bytes = content.getBytes(StringUtils.SHIFT_JIS_CHARSET);
|
||||
int length = bytes.length;
|
||||
if (length % 2 != 0) {
|
||||
return false;
|
||||
|
@ -512,7 +509,7 @@ public final class Encoder {
|
|||
static void appendBytes(String content,
|
||||
Mode mode,
|
||||
BitArray bits,
|
||||
String encoding) throws WriterException {
|
||||
Charset encoding) throws WriterException {
|
||||
switch (mode) {
|
||||
case NUMERIC:
|
||||
appendNumericBytes(content, bits);
|
||||
|
@ -579,26 +576,15 @@ public final class Encoder {
|
|||
}
|
||||
}
|
||||
|
||||
static void append8BitBytes(String content, BitArray bits, String encoding)
|
||||
throws WriterException {
|
||||
byte[] bytes;
|
||||
try {
|
||||
bytes = content.getBytes(encoding);
|
||||
} catch (UnsupportedEncodingException uee) {
|
||||
throw new WriterException(uee);
|
||||
}
|
||||
static void append8BitBytes(String content, BitArray bits, Charset encoding) {
|
||||
byte[] bytes = content.getBytes(encoding);
|
||||
for (byte b : bytes) {
|
||||
bits.appendBits(b, 8);
|
||||
}
|
||||
}
|
||||
|
||||
static void appendKanjiBytes(String content, BitArray bits) throws WriterException {
|
||||
byte[] bytes;
|
||||
try {
|
||||
bytes = content.getBytes("Shift_JIS");
|
||||
} catch (UnsupportedEncodingException uee) {
|
||||
throw new WriterException(uee);
|
||||
}
|
||||
byte[] bytes = content.getBytes(StringUtils.SHIFT_JIS_CHARSET);
|
||||
if (bytes.length % 2 != 0) {
|
||||
throw new WriterException("Kanji byte size not even");
|
||||
}
|
||||
|
|
|
@ -27,7 +27,6 @@ import com.google.zxing.common.DecoderResult;
|
|||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
|
@ -62,7 +61,7 @@ public final class DetectorTest extends Assert {
|
|||
|
||||
// Test that we can tolerate errors in the parameter locator bits
|
||||
private static void testErrorInParameterLocator(String data) throws Exception {
|
||||
AztecCode aztec = Encoder.encode(data.getBytes(StandardCharsets.ISO_8859_1), 25, Encoder.DEFAULT_AZTEC_LAYERS);
|
||||
AztecCode aztec = Encoder.encode(data, 25, Encoder.DEFAULT_AZTEC_LAYERS);
|
||||
Random random = new Random(aztec.getMatrix().hashCode()); // pseudo-random, but deterministic
|
||||
int layers = aztec.getLayers();
|
||||
boolean compact = aztec.isCompact();
|
||||
|
|
|
@ -44,6 +44,12 @@ import java.util.regex.Pattern;
|
|||
*/
|
||||
public final class EncoderTest extends Assert {
|
||||
|
||||
private static final Charset ISO_8859_1 = StandardCharsets.ISO_8859_1;
|
||||
private static final Charset UTF_8 = StandardCharsets.UTF_8;
|
||||
private static final Charset SHIFT_JIS = Charset.forName("Shift_JIS");
|
||||
private static final Charset ISO_8859_15 = Charset.forName("ISO-8859-15");
|
||||
private static final Charset WINDOWS_1252 = Charset.forName("Windows-1252");
|
||||
|
||||
private static final Pattern DOTX = Pattern.compile("[^.X]");
|
||||
private static final Pattern SPACES = Pattern.compile("\\s+");
|
||||
private static final ResultPoint[] NO_POINTS = new ResultPoint[0];
|
||||
|
@ -128,17 +134,20 @@ public final class EncoderTest extends Assert {
|
|||
|
||||
@Test
|
||||
public void testAztecWriter() throws Exception {
|
||||
testWriter("\u20AC 1 sample data.", "ISO-8859-1", 25, true, 2);
|
||||
testWriter("\u20AC 1 sample data.", "ISO-8859-15", 25, true, 2);
|
||||
testWriter("\u20AC 1 sample data.", "UTF-8", 25, true, 2);
|
||||
testWriter("\u20AC 1 sample data.", "UTF-8", 100, true, 3);
|
||||
testWriter("\u20AC 1 sample data.", "UTF-8", 300, true, 4);
|
||||
testWriter("\u20AC 1 sample data.", "UTF-8", 500, false, 5);
|
||||
testWriter("Espa\u00F1ol", null, 25, true, 1); // Without ECI (implicit ISO-8859-1)
|
||||
testWriter("Espa\u00F1ol", ISO_8859_1, 25, true, 1); // Explicit ISO-8859-1
|
||||
testWriter("\u20AC 1 sample data.", WINDOWS_1252, 25, true, 2); // Standard ISO-8859-1 cannot encode Euro symbol; Windows-1252 superset can
|
||||
testWriter("\u20AC 1 sample data.", ISO_8859_15, 25, true, 2);
|
||||
testWriter("\u20AC 1 sample data.", UTF_8, 25, true, 2);
|
||||
testWriter("\u20AC 1 sample data.", UTF_8, 100, true, 3);
|
||||
testWriter("\u20AC 1 sample data.", UTF_8, 300, true, 4);
|
||||
testWriter("\u20AC 1 sample data.", UTF_8, 500, false, 5);
|
||||
testWriter("The capital of Japan is named \u6771\u4EAC.", SHIFT_JIS, 25, true, 3);
|
||||
// Test AztecWriter defaults
|
||||
String data = "In ut magna vel mauris malesuada";
|
||||
AztecWriter writer = new AztecWriter();
|
||||
BitMatrix matrix = writer.encode(data, BarcodeFormat.AZTEC, 0, 0);
|
||||
AztecCode aztec = Encoder.encode(data.getBytes(StandardCharsets.ISO_8859_1),
|
||||
AztecCode aztec = Encoder.encode(data,
|
||||
Encoder.DEFAULT_EC_PERCENT, Encoder.DEFAULT_AZTEC_LAYERS);
|
||||
BitMatrix expectedMatrix = aztec.getMatrix();
|
||||
assertEquals(matrix, expectedMatrix);
|
||||
|
@ -418,7 +427,7 @@ public final class EncoderTest extends Assert {
|
|||
|
||||
@Test
|
||||
public void testUserSpecifiedLayers() throws FormatException {
|
||||
byte[] alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".getBytes(StandardCharsets.ISO_8859_1);
|
||||
String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
AztecCode aztec = Encoder.encode(alphabet, 25, -2);
|
||||
assertEquals(2, aztec.getLayers());
|
||||
assertTrue(aztec.isCompact());
|
||||
|
@ -449,22 +458,21 @@ public final class EncoderTest extends Assert {
|
|||
String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
// encodes as 26 * 5 * 4 = 520 bits of data
|
||||
String alphabet4 = alphabet + alphabet + alphabet + alphabet;
|
||||
byte[] data = alphabet4.getBytes(StandardCharsets.ISO_8859_1);
|
||||
try {
|
||||
Encoder.encode(data, 0, -4);
|
||||
Encoder.encode(alphabet4, 0, -4);
|
||||
fail("Encode should have failed. Text can't fit in 1-layer compact");
|
||||
} catch (IllegalArgumentException expected) {
|
||||
// continue
|
||||
}
|
||||
|
||||
// If we just try to encode it normally, it will go to a non-compact 4 layer
|
||||
AztecCode aztecCode = Encoder.encode(data, 0, Encoder.DEFAULT_AZTEC_LAYERS);
|
||||
AztecCode aztecCode = Encoder.encode(alphabet4, 0, Encoder.DEFAULT_AZTEC_LAYERS);
|
||||
assertFalse(aztecCode.isCompact());
|
||||
assertEquals(4, aztecCode.getLayers());
|
||||
|
||||
// But shortening the string to 100 bytes (500 bits of data), compact works fine, even if we
|
||||
// include more error checking.
|
||||
aztecCode = Encoder.encode(alphabet4.substring(0, 100).getBytes(StandardCharsets.ISO_8859_1), 10, Encoder.DEFAULT_AZTEC_LAYERS);
|
||||
aztecCode = Encoder.encode(alphabet4.substring(0, 100), 10, Encoder.DEFAULT_AZTEC_LAYERS);
|
||||
assertTrue(aztecCode.isCompact());
|
||||
assertEquals(4, aztecCode.getLayers());
|
||||
}
|
||||
|
@ -472,7 +480,7 @@ public final class EncoderTest extends Assert {
|
|||
// Helper routines
|
||||
|
||||
private static void testEncode(String data, boolean compact, int layers, String expected) throws FormatException {
|
||||
AztecCode aztec = Encoder.encode(data.getBytes(StandardCharsets.ISO_8859_1), 33, Encoder.DEFAULT_AZTEC_LAYERS);
|
||||
AztecCode aztec = Encoder.encode(data, 33, Encoder.DEFAULT_AZTEC_LAYERS);
|
||||
assertEquals("Unexpected symbol format (compact)", compact, aztec.isCompact());
|
||||
assertEquals("Unexpected nr. of layers", layers, aztec.getLayers());
|
||||
BitMatrix matrix = aztec.getMatrix();
|
||||
|
@ -480,7 +488,7 @@ public final class EncoderTest extends Assert {
|
|||
}
|
||||
|
||||
private static void testEncodeDecode(String data, boolean compact, int layers) throws Exception {
|
||||
AztecCode aztec = Encoder.encode(data.getBytes(StandardCharsets.ISO_8859_1), 25, Encoder.DEFAULT_AZTEC_LAYERS);
|
||||
AztecCode aztec = Encoder.encode(data, 25, Encoder.DEFAULT_AZTEC_LAYERS);
|
||||
assertEquals("Unexpected symbol format (compact)", compact, aztec.isCompact());
|
||||
assertEquals("Unexpected nr. of layers", layers, aztec.getLayers());
|
||||
BitMatrix matrix = aztec.getMatrix();
|
||||
|
@ -500,20 +508,20 @@ public final class EncoderTest extends Assert {
|
|||
}
|
||||
|
||||
private static void testWriter(String data,
|
||||
String charset,
|
||||
Charset charset,
|
||||
int eccPercent,
|
||||
boolean compact,
|
||||
int layers) throws FormatException {
|
||||
// 1. Perform an encode-decode round-trip because it can be lossy.
|
||||
// 2. Aztec Decoder currently always decodes the data with a LATIN-1 charset:
|
||||
String expectedData = new String(data.getBytes(Charset.forName(charset)), StandardCharsets.ISO_8859_1);
|
||||
// Perform an encode-decode round-trip because it can be lossy.
|
||||
Map<EncodeHintType,Object> hints = new EnumMap<>(EncodeHintType.class);
|
||||
hints.put(EncodeHintType.CHARACTER_SET, charset);
|
||||
if (null != charset) {
|
||||
hints.put(EncodeHintType.CHARACTER_SET, charset.name());
|
||||
}
|
||||
hints.put(EncodeHintType.ERROR_CORRECTION, eccPercent);
|
||||
AztecWriter writer = new AztecWriter();
|
||||
BitMatrix matrix = writer.encode(data, BarcodeFormat.AZTEC, 0, 0, hints);
|
||||
AztecCode aztec = Encoder.encode(data.getBytes(Charset.forName(charset)), eccPercent,
|
||||
Encoder.DEFAULT_AZTEC_LAYERS);
|
||||
AztecCode aztec = Encoder.encode(data, eccPercent,
|
||||
Encoder.DEFAULT_AZTEC_LAYERS, charset);
|
||||
assertEquals("Unexpected symbol format (compact)", compact, aztec.isCompact());
|
||||
assertEquals("Unexpected nr. of layers", layers, aztec.getLayers());
|
||||
BitMatrix matrix2 = aztec.getMatrix();
|
||||
|
@ -521,7 +529,7 @@ public final class EncoderTest extends Assert {
|
|||
AztecDetectorResult r =
|
||||
new AztecDetectorResult(matrix, NO_POINTS, aztec.isCompact(), aztec.getCodeWords(), aztec.getLayers());
|
||||
DecoderResult res = new Decoder().decode(r);
|
||||
assertEquals(expectedData, res.getText());
|
||||
assertEquals(data, res.getText());
|
||||
// Check error correction by introducing up to eccPercent/2 errors
|
||||
int ecWords = aztec.getCodeWords() * eccPercent / 100 / 2;
|
||||
Random random = getPseudoRandom();
|
||||
|
@ -537,7 +545,7 @@ public final class EncoderTest extends Assert {
|
|||
}
|
||||
r = new AztecDetectorResult(matrix, NO_POINTS, aztec.isCompact(), aztec.getCodeWords(), aztec.getLayers());
|
||||
res = new Decoder().decode(r);
|
||||
assertEquals(expectedData, res.getText());
|
||||
assertEquals(data, res.getText());
|
||||
}
|
||||
|
||||
private static Random getPseudoRandom() {
|
||||
|
|
|
@ -19,6 +19,7 @@ package com.google.zxing.common;
|
|||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
/**
|
||||
|
@ -28,34 +29,42 @@ public final class StringUtilsTestCase extends Assert {
|
|||
|
||||
@Test
|
||||
public void testShortShiftJIS1() {
|
||||
// 金魚
|
||||
doTest(new byte[] { (byte) 0x8b, (byte) 0xe0, (byte) 0x8b, (byte) 0x9b, }, "SJIS");
|
||||
// 金魚
|
||||
doTest(new byte[] { (byte) 0x8b, (byte) 0xe0, (byte) 0x8b, (byte) 0x9b, }, StringUtils.SHIFT_JIS_CHARSET, "SJIS");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testShortISO885911() {
|
||||
// båd
|
||||
doTest(new byte[] { (byte) 0x62, (byte) 0xe5, (byte) 0x64, }, "ISO-8859-1");
|
||||
// båd
|
||||
doTest(new byte[] { (byte) 0x62, (byte) 0xe5, (byte) 0x64, }, StandardCharsets.ISO_8859_1, "ISO8859_1");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testShortUTF81() {
|
||||
// Español
|
||||
doTest(new byte[] { (byte) 0x45, (byte) 0x73, (byte) 0x70, (byte) 0x61, (byte) 0xc3,
|
||||
(byte) 0xb1, (byte) 0x6f, (byte) 0x6c },
|
||||
StandardCharsets.UTF_8, "UTF8");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMixedShiftJIS1() {
|
||||
// Hello Èáë!
|
||||
// Hello 金!
|
||||
doTest(new byte[] { (byte) 0x48, (byte) 0x65, (byte) 0x6c, (byte) 0x6c, (byte) 0x6f,
|
||||
(byte) 0x20, (byte) 0x8b, (byte) 0xe0, (byte) 0x21, },
|
||||
"SJIS");
|
||||
StringUtils.SHIFT_JIS_CHARSET, "SJIS");
|
||||
}
|
||||
|
||||
private static void doTest(byte[] bytes, String charsetName) {
|
||||
Charset charset = Charset.forName(charsetName);
|
||||
String guessedName = StringUtils.guessEncoding(bytes, null);
|
||||
Charset guessedEncoding = Charset.forName(guessedName);
|
||||
assertEquals(charset, guessedEncoding);
|
||||
private static void doTest(byte[] bytes, Charset charset, String encoding) {
|
||||
Charset guessedCharset = StringUtils.guessCharset(bytes, null);
|
||||
String guessedEncoding = StringUtils.guessEncoding(bytes, null);
|
||||
assertEquals(charset, guessedCharset);
|
||||
assertEquals(encoding, guessedEncoding);
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility for printing out a string in given encoding as a Java statement, since it's better
|
||||
* to write that into the Java source file rather than risk character encoding issues in the
|
||||
* to write that into the Java source file rather than risk character encoding issues in the
|
||||
* source file itself.
|
||||
*
|
||||
* @param args command line arguments
|
||||
|
|
|
@ -19,6 +19,7 @@ package com.google.zxing.qrcode.encoder;
|
|||
import com.google.zxing.EncodeHintType;
|
||||
import com.google.zxing.WriterException;
|
||||
import com.google.zxing.common.BitArray;
|
||||
import com.google.zxing.common.StringUtils;
|
||||
import com.google.zxing.qrcode.decoder.ErrorCorrectionLevel;
|
||||
import com.google.zxing.qrcode.decoder.Mode;
|
||||
import com.google.zxing.qrcode.decoder.Version;
|
||||
|
@ -26,7 +27,6 @@ import com.google.zxing.qrcode.decoder.Version;
|
|||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.EnumMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -127,7 +127,7 @@ public final class EncoderTestCase extends Assert {
|
|||
">>\n";
|
||||
assertEquals(expected, qrCode.toString());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testEncodeWithVersion() throws WriterException {
|
||||
Map<EncodeHintType, Object> hints = new EnumMap<>(EncodeHintType.class);
|
||||
|
@ -135,7 +135,7 @@ public final class EncoderTestCase extends Assert {
|
|||
QRCode qrCode = Encoder.encode("ABCDEF", ErrorCorrectionLevel.H, hints);
|
||||
assertTrue(qrCode.toString().contains(" version: 7\n"));
|
||||
}
|
||||
|
||||
|
||||
@Test(expected = WriterException.class)
|
||||
public void testEncodeWithVersionTooSmall() throws WriterException {
|
||||
Map<EncodeHintType, Object> hints = new EnumMap<>(EncodeHintType.class);
|
||||
|
@ -742,12 +742,8 @@ public final class EncoderTestCase extends Assert {
|
|||
assertEquals(expected, qrCode.toString());
|
||||
}
|
||||
|
||||
private static String shiftJISString(byte[] bytes) throws WriterException {
|
||||
try {
|
||||
return new String(bytes, "Shift_JIS");
|
||||
} catch (UnsupportedEncodingException uee) {
|
||||
throw new WriterException(uee.toString());
|
||||
}
|
||||
private static String shiftJISString(byte[] bytes) {
|
||||
return new String(bytes, StringUtils.SHIFT_JIS_CHARSET);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue