Aztec encode with ECI for non-default character sets (#1330)

* Aztec encoder: add ECI codes according to character set Added redundant methods to avoid modifying existing tests. * fix testAztecWriter - ISO-8859-1 cannot actually encode Euro symbol ('€'); this test case only passed before because the Decoder wasn't actually doing the bytes→String decode, but simply round-tripping an unknown byte. - Add extra test cases for implicit ISO-8859-1 (without ECI code), explicit ISO-8559-1 (with ECI code), and Shift_JIS * remove unnecessary conversion between String and byte[] in Aztec EncoderTest and DetectorTest * Aztec DecoderTest: use constants for charsets * Aztec Code: remove unnecessary conversion between Charset and Charset.name() strings * PDF417, QR, DataMatrix: remove unnecessary conversion between Charset and Charset.name() strings Includes replacing StringUtils.guessEncoding() with .guessCharset(), to return Charset rather than String. This change makes the tacit assumption that Shift_JIS charset *will* be available. There are existing comments suggesting that it might not always be available… but the existing *tests* assume it will be.
2025-03-05 20:48:51 -08:00 · 2020-11-07 14:26:24 -08:00 · 2020-11-07 14:26:24 -08:00 · 515688992b
parent 28d339e67d
commit 515688992b
16 changed files with 244 additions and 129 deletions
--- a/core/src/main/java/com/google/zxing/aztec/AztecWriter.java
+++ b/core/src/main/java/com/google/zxing/aztec/AztecWriter.java
@ -24,7 +24,6 @@ import com.google.zxing.aztec.encoder.Encoder;
 import com.google.zxing.common.BitMatrix;
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.util.Map;
 /**
@ -39,7 +38,7 @@ public final class AztecWriter implements Writer {
  @Override
  public BitMatrix encode(String contents, BarcodeFormat format, int width, int height, Map<EncodeHintType,?> hints) {
-    Charset charset = StandardCharsets.ISO_8859_1;
+    Charset charset = null; // Do not add any ECI code by default
    int eccPercent = Encoder.DEFAULT_EC_PERCENT;
    int layers = Encoder.DEFAULT_AZTEC_LAYERS;
    if (hints != null) {
@ -62,7 +61,7 @@ public final class AztecWriter implements Writer {
    if (format != BarcodeFormat.AZTEC) {
      throw new IllegalArgumentException("Can only encode AZTEC, but got " + format);
    }
-    AztecCode aztec = Encoder.encode(contents.getBytes(charset), eccPercent, layers);
+    AztecCode aztec = Encoder.encode(contents, eccPercent, layers, charset);
    return renderResult(aztec, width, height);
  }
--- a/core/src/main/java/com/google/zxing/aztec/decoder/Decoder.java
+++ b/core/src/main/java/com/google/zxing/aztec/decoder/Decoder.java
@ -177,7 +177,7 @@ public final class Decoder {
                eci = eci * 10 + (nextDigit - 2);
              }
              CharacterSetECI charsetECI = CharacterSetECI.getCharacterSetECIByValue(eci);
-              encoding = Charset.forName(charsetECI.name());
+              encoding = charsetECI.getCharset();
          }
          // Go back to whatever mode we had been in
          shiftTable = latchTable;
--- a/core/src/main/java/com/google/zxing/aztec/encoder/Encoder.java
+++ b/core/src/main/java/com/google/zxing/aztec/encoder/Encoder.java
@ -21,6 +21,9 @@ import com.google.zxing.common.BitMatrix;
 import com.google.zxing.common.reedsolomon.GenericGF;
 import com.google.zxing.common.reedsolomon.ReedSolomonEncoder;
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 /**
 * Generates Aztec 2D barcodes.
 *
@ -42,13 +45,66 @@ public final class Encoder {
  }
  /**
-   * Encodes the given binary content as an Aztec symbol
+   * Encodes the given string content as an Aztec symbol (without ECI code)
   *
   * @param data input data string; must be encodable as ISO/IEC 8859-1 (Latin-1)
   * @return Aztec symbol matrix with metadata
   */
  public static AztecCode encode(String data) {
    return encode(data.getBytes(StandardCharsets.ISO_8859_1));
  }
  /**
   * Encodes the given string content as an Aztec symbol (without ECI code)
   *
   * @param data input data string; must be encodable as ISO/IEC 8859-1 (Latin-1)
   * @param minECCPercent minimal percentage of error check words (According to ISO/IEC 24778:2008,
   *                      a minimum of 23% + 3 words is recommended)
   * @param userSpecifiedLayers if non-zero, a user-specified value for the number of layers
   * @return Aztec symbol matrix with metadata
   */
  public static AztecCode encode(String data, int minECCPercent, int userSpecifiedLayers) {
    return encode(data.getBytes(StandardCharsets.ISO_8859_1), minECCPercent, userSpecifiedLayers, null);
  }
  /**
   * Encodes the given string content as an Aztec symbol
   *
   * @param data input data string
   * @param minECCPercent minimal percentage of error check words (According to ISO/IEC 24778:2008,
   *                      a minimum of 23% + 3 words is recommended)
   * @param userSpecifiedLayers if non-zero, a user-specified value for the number of layers
   * @param charset character set in which to encode string using ECI; if null, no ECI code
   *                will be inserted, and the string must be encodable as ISO/IEC 8859-1
   *                (Latin-1), the default encoding of the symbol.
   * @return Aztec symbol matrix with metadata
   */
  public static AztecCode encode(String data, int minECCPercent, int userSpecifiedLayers, Charset charset) {
    byte[] bytes = data.getBytes(null != charset ? charset : StandardCharsets.ISO_8859_1);
    return encode(bytes, minECCPercent, userSpecifiedLayers, charset);
  }
  /**
   * Encodes the given binary content as an Aztec symbol (without ECI code)
   *
   * @param data input data string
   * @return Aztec symbol matrix with metadata
   */
  public static AztecCode encode(byte[] data) {
-    return encode(data, DEFAULT_EC_PERCENT, DEFAULT_AZTEC_LAYERS);
+    return encode(data, DEFAULT_EC_PERCENT, DEFAULT_AZTEC_LAYERS, null);
  }
  /**
   * Encodes the given binary content as an Aztec symbol (without ECI code)
   *
   * @param data input data string
   * @param minECCPercent minimal percentage of error check words (According to ISO/IEC 24778:2008,
   *                      a minimum of 23% + 3 words is recommended)
   * @param userSpecifiedLayers if non-zero, a user-specified value for the number of layers
   * @return Aztec symbol matrix with metadata
   */
  public static AztecCode encode(byte[] data, int minECCPercent, int userSpecifiedLayers) {
    return encode(data, minECCPercent, userSpecifiedLayers, null);
  }
  /**
@ -58,11 +114,13 @@ public final class Encoder {
   * @param minECCPercent minimal percentage of error check words (According to ISO/IEC 24778:2008,
   *                      a minimum of 23% + 3 words is recommended)
   * @param userSpecifiedLayers if non-zero, a user-specified value for the number of layers
   * @param charset character set to mark using ECI; if null, no ECI code will be inserted, and the
   *                default encoding of ISO/IEC 8859-1 will be assuming by readers.
   * @return Aztec symbol matrix with metadata
   */
-  public static AztecCode encode(byte[] data, int minECCPercent, int userSpecifiedLayers) {
+  public static AztecCode encode(byte[] data, int minECCPercent, int userSpecifiedLayers, Charset charset) {
    // High-level encode
-    BitArray bits = new HighLevelEncoder(data).encode();
+    BitArray bits = new HighLevelEncoder(data, charset).encode();
    // stuff bits and choose symbol size
    int eccBits = bits.getSize() * minECCPercent / 100 + 11;
--- a/core/src/main/java/com/google/zxing/aztec/encoder/HighLevelEncoder.java
+++ b/core/src/main/java/com/google/zxing/aztec/encoder/HighLevelEncoder.java
@ -17,6 +17,9 @@
 package com.google.zxing.aztec.encoder;
 import com.google.zxing.common.BitArray;
 import com.google.zxing.common.CharacterSetECI;
 import java.nio.charset.Charset;
 import java.util.Arrays;
 import java.util.Collection;
@ -148,16 +151,31 @@ public final class HighLevelEncoder {
  }
  private final byte[] text;
  private final Charset charset;
  public HighLevelEncoder(byte[] text) {
    this.text = text;
    this.charset = null;
  }
  public HighLevelEncoder(byte[] text, Charset charset) {
    this.text = text;
    this.charset = charset;
  }
  /**
   * @return text represented by this encoder encoded as a {@link BitArray}
   */
  public BitArray encode() {
-    Collection<State> states = Collections.singletonList(State.INITIAL_STATE);
+    State initialState = State.INITIAL_STATE;
    if (charset != null) {
      CharacterSetECI eci = CharacterSetECI.getCharacterSetECI(charset);
      if (null == eci) {
        throw new IllegalArgumentException("No ECI code for character set " + charset.toString());
      }
      initialState = initialState.appendFLGn(eci.getValue());
    }
    Collection<State> states = Collections.singletonList(initialState);
    for (int index = 0; index < text.length; index++) {
      int pairCode;
      int nextChar = index + 1 < text.length ? text[index + 1] : 0;
--- a/core/src/main/java/com/google/zxing/aztec/encoder/State.java
+++ b/core/src/main/java/com/google/zxing/aztec/encoder/State.java
@ -16,6 +16,8 @@
 package com.google.zxing.aztec.encoder;
 import java.nio.charset.StandardCharsets;
 import java.util.Deque;
 import java.util.LinkedList;
@ -70,6 +72,25 @@ final class State {
    return bitCount;
  }
  State appendFLGn(int eci) {
    State result = shiftAndAppend(HighLevelEncoder.MODE_PUNCT, 0); // 0: FLG(n)
    Token token = result.token;
    int bitsAdded = 3;
    if (eci < 0) {
      token = token.add(0, 3); // 0: FNC1
    } else if (eci > 999999) {
      throw new IllegalArgumentException("ECI code must be between 0 and 999999");
    } else {
      byte[] eciDigits = Integer.toString(eci).getBytes(StandardCharsets.ISO_8859_1);
      token = token.add(eciDigits.length, 3); // 1-6: number of ECI digits
      for (int ii = 0; ii < eciDigits.length; ii++) {
        token = token.add(eciDigits[ii] - '0' + 2, 4);
      }
      bitsAdded += eciDigits.length * 4;
    }
    return new State(token, mode, 0, bitCount + bitsAdded);
  }
  // Create a new state representing this state with a latch to a (not
  // necessary different) mode, and then a code.
  State latchAndAppend(int mode, int value) {
@ -143,7 +164,7 @@ final class State {
      newModeBitCount += calculateBinaryShiftCost(other) - calculateBinaryShiftCost(this);
    } else if (this.binaryShiftByteCount > other.binaryShiftByteCount && other.binaryShiftByteCount > 0) {
      // maximum possible additional cost (we end up exceeding the 31 byte boundary and other state can stay beneath it)
-      newModeBitCount += 10; 
+      newModeBitCount += 10;
    }
    return newModeBitCount <= other.bitCount;
  }
@ -168,7 +189,7 @@ final class State {
  public String toString() {
    return String.format("%s bits=%d bytes=%d", HighLevelEncoder.MODE_NAMES[mode], bitCount, binaryShiftByteCount);
  }
-  
+
  private static int calculateBinaryShiftCost(State state) {
    if (state.binaryShiftByteCount > 62) {
      return 21; // B/S with extended length
--- a/core/src/main/java/com/google/zxing/common/CharacterSetECI.java
+++ b/core/src/main/java/com/google/zxing/common/CharacterSetECI.java
@ -18,6 +18,8 @@ package com.google.zxing.common;
 import com.google.zxing.FormatException;
 import java.nio.charset.Charset;
 import java.util.HashMap;
 import java.util.Map;
@ -93,6 +95,19 @@ public enum CharacterSetECI {
    return values[0];
  }
  public Charset getCharset() {
    return Charset.forName(name());
  }
  /**
   * @param charset Java character set object
   * @return CharacterSetECI representing ECI for character encoding, or null if it is legal
   *   but unsupported
   */
  public static CharacterSetECI getCharacterSetECI(Charset charset) {
    return NAME_TO_ECI.get(charset.name());
  }
  /**
   * @param value character set ECI value
   * @return {@code CharacterSetECI} representing ECI of given value, or null if it is legal but
--- a/core/src/main/java/com/google/zxing/common/StringUtils.java
+++ b/core/src/main/java/com/google/zxing/common/StringUtils.java
@ -17,6 +17,7 @@
 package com.google.zxing.common;
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.util.Map;
 import com.google.zxing.DecodeHintType;
@ -29,15 +30,17 @@ import com.google.zxing.DecodeHintType;
 */
 public final class StringUtils {
-  private static final String PLATFORM_DEFAULT_ENCODING = Charset.defaultCharset().name();
+  private static final Charset PLATFORM_DEFAULT_ENCODING = Charset.defaultCharset();
  public static final Charset SHIFT_JIS_CHARSET = Charset.forName("SJIS");
  public static final Charset GB2312_CHARSET = Charset.forName("GB2312");
  private static final Charset EUC_JP = Charset.forName("EUC_JP");
  private static final boolean ASSUME_SHIFT_JIS =
      SHIFT_JIS_CHARSET.equals(PLATFORM_DEFAULT_ENCODING) ||
      EUC_JP.equals(PLATFORM_DEFAULT_ENCODING);
  // Retained for ABI compatibility with earlier versions
  public static final String SHIFT_JIS = "SJIS";
  public static final String GB2312 = "GB2312";
  private static final String EUC_JP = "EUC_JP";
  private static final String UTF8 = "UTF8";
  private static final String ISO88591 = "ISO8859_1";
  private static final boolean ASSUME_SHIFT_JIS =
      SHIFT_JIS.equalsIgnoreCase(PLATFORM_DEFAULT_ENCODING) ||
      EUC_JP.equalsIgnoreCase(PLATFORM_DEFAULT_ENCODING);
  private StringUtils() { }
@ -45,12 +48,32 @@ public final class StringUtils {
   * @param bytes bytes encoding a string, whose encoding should be guessed
   * @param hints decode hints if applicable
   * @return name of guessed encoding; at the moment will only guess one of:
-   *  {@link #SHIFT_JIS}, {@link #UTF8}, {@link #ISO88591}, or the platform
+   *  "SJIS", "UTF8", "ISO8859_1", or the platform default encoding if none
-   *  default encoding if none of these can possibly be correct
+   *  of these can possibly be correct
   */
  public static String guessEncoding(byte[] bytes, Map<DecodeHintType,?> hints) {
      Charset c = guessCharset(bytes, hints);
      if (c == SHIFT_JIS_CHARSET) {
          return "SJIS";
      } else if (c == StandardCharsets.UTF_8) {
          return "UTF8";
      } else if (c == StandardCharsets.ISO_8859_1) {
          return "ISO8859_1";
      }
      return c.name();
  }
  /**
   * @param bytes bytes encoding a string, whose encoding should be guessed
   * @param hints decode hints if applicable
   * @return Charset of guessed encoding; at the moment will only guess one of:
   *  {@link #SHIFT_JIS_CHARSET}, {@link StandardCharsets#UTF_8},
   *  {@link StandardCharsets#ISO_8859_1}, or the platform default encoding if
   *  none of these can possibly be correct
   */
  public static Charset guessCharset(byte[] bytes, Map<DecodeHintType,?> hints) {
    if (hints != null && hints.containsKey(DecodeHintType.CHARACTER_SET)) {
-      return hints.get(DecodeHintType.CHARACTER_SET).toString();
+      return Charset.forName(hints.get(DecodeHintType.CHARACTER_SET).toString());
    }
    // For now, merely tries to distinguish ISO-8859-1, UTF-8 and Shift_JIS,
    // which should be by far the most common encodings.
@ -164,11 +187,11 @@ public final class StringUtils {
    // Easy -- if there is BOM or at least 1 valid not-single byte character (and no evidence it can't be UTF-8), done
    if (canBeUTF8 && (utf8bom || utf2BytesChars + utf3BytesChars + utf4BytesChars > 0)) {
-      return UTF8;
+      return StandardCharsets.UTF_8;
    }
    // Easy -- if assuming Shift_JIS or >= 3 valid consecutive not-ascii characters (and no evidence it can't be), done
    if (canBeShiftJIS && (ASSUME_SHIFT_JIS || sjisMaxKatakanaWordLength >= 3 || sjisMaxDoubleBytesWordLength >= 3)) {
-      return SHIFT_JIS;
+      return SHIFT_JIS_CHARSET;
    }
    // Distinguishing Shift_JIS and ISO-8859-1 can be a little tough for short words. The crude heuristic is:
    // - If we saw
@ -177,18 +200,18 @@ public final class StringUtils {
    // - then we conclude Shift_JIS, else ISO-8859-1
    if (canBeISO88591 && canBeShiftJIS) {
      return (sjisMaxKatakanaWordLength == 2 && sjisKatakanaChars == 2) || isoHighOther * 10 >= length
-          ? SHIFT_JIS : ISO88591;
+          ? SHIFT_JIS_CHARSET : StandardCharsets.ISO_8859_1;
    }
    // Otherwise, try in order ISO-8859-1, Shift JIS, UTF-8 and fall back to default platform encoding
    if (canBeISO88591) {
-      return ISO88591;
+      return StandardCharsets.ISO_8859_1;
    }
    if (canBeShiftJIS) {
-      return SHIFT_JIS;
+      return SHIFT_JIS_CHARSET;
    }
    if (canBeUTF8) {
-      return UTF8;
+      return StandardCharsets.UTF_8;
    }
    // Otherwise, we take a wild guess with platform encoding
    return PLATFORM_DEFAULT_ENCODING;
--- a/core/src/main/java/com/google/zxing/datamatrix/decoder/DecodedBitStreamParser.java
+++ b/core/src/main/java/com/google/zxing/datamatrix/decoder/DecodedBitStreamParser.java
@ -20,7 +20,7 @@ import com.google.zxing.FormatException;
 import com.google.zxing.common.BitSource;
 import com.google.zxing.common.DecoderResult;
-import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
@ -505,11 +505,7 @@ final class DecodedBitStreamParser {
      bytes[i] = (byte) unrandomize255State(bits.readBits(8), codewordPosition++);
    }
    byteSegments.add(bytes);
-    try {
+    result.append(new String(bytes, StandardCharsets.ISO_8859_1));
      result.append(new String(bytes, "ISO8859_1"));
    } catch (UnsupportedEncodingException uee) {
      throw new IllegalStateException("Platform does not support required encoding: " + uee);
    }
  }
  /**
--- a/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java
+++ b/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java
@ -125,7 +125,7 @@ final class DecodedBitStreamParser {
        case ECI_CHARSET:
          CharacterSetECI charsetECI =
              CharacterSetECI.getCharacterSetECIByValue(codewords[codeIndex++]);
-          encoding = Charset.forName(charsetECI.name());
+          encoding = charsetECI.getCharset();
          break;
        case ECI_GENERAL_PURPOSE:
          // Can't do anything with generic ECI; skip its 2 characters
--- a/core/src/main/java/com/google/zxing/pdf417/encoder/PDF417HighLevelEncoder.java
+++ b/core/src/main/java/com/google/zxing/pdf417/encoder/PDF417HighLevelEncoder.java
@ -169,7 +169,7 @@ final class PDF417HighLevelEncoder {
    if (encoding == null) {
      encoding = DEFAULT_ENCODING;
    } else if (!DEFAULT_ENCODING.equals(encoding)) {
-      CharacterSetECI eci = CharacterSetECI.getCharacterSetECIByName(encoding.name());
+      CharacterSetECI eci = CharacterSetECI.getCharacterSetECI(encoding);
      if (eci != null) {
        encodingECI(eci.getValue(), sb);
      }
--- a/core/src/main/java/com/google/zxing/qrcode/decoder/DecodedBitStreamParser.java
+++ b/core/src/main/java/com/google/zxing/qrcode/decoder/DecodedBitStreamParser.java
@ -23,7 +23,7 @@ import com.google.zxing.common.CharacterSetECI;
 import com.google.zxing.common.DecoderResult;
 import com.google.zxing.common.StringUtils;
-import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
@ -173,11 +173,7 @@ final class DecodedBitStreamParser {
      count--;
    }
-    try {
+    result.append(new String(buffer, StringUtils.GB2312_CHARSET));
      result.append(new String(buffer, StringUtils.GB2312));
    } catch (UnsupportedEncodingException ignored) {
      throw FormatException.getFormatInstance();
    }
  }
  private static void decodeKanjiSegment(BitSource bits,
@ -208,12 +204,7 @@ final class DecodedBitStreamParser {
      offset += 2;
      count--;
    }
-    // Shift_JIS may not be supported in some environments:
+    result.append(new String(buffer, StringUtils.SHIFT_JIS_CHARSET));
    try {
      result.append(new String(buffer, StringUtils.SHIFT_JIS));
    } catch (UnsupportedEncodingException ignored) {
      throw FormatException.getFormatInstance();
    }
  }
  private static void decodeByteSegment(BitSource bits,
@ -231,22 +222,18 @@ final class DecodedBitStreamParser {
    for (int i = 0; i < count; i++) {
      readBytes[i] = (byte) bits.readBits(8);
    }
-    String encoding;
+    Charset encoding;
    if (currentCharacterSetECI == null) {
      // The spec isn't clear on this mode; see
      // section 6.4.5: t does not say which encoding to assuming
      // upon decoding. I have seen ISO-8859-1 used as well as
      // Shift_JIS -- without anything like an ECI designator to
      // give a hint.
-      encoding = StringUtils.guessEncoding(readBytes, hints);
+      encoding = StringUtils.guessCharset(readBytes, hints);
    } else {
-      encoding = currentCharacterSetECI.name();
+      encoding = currentCharacterSetECI.getCharset();
    }
    try {
      result.append(new String(readBytes, encoding));
    } catch (UnsupportedEncodingException ignored) {
      throw FormatException.getFormatInstance();
    }
    result.append(new String(readBytes, encoding));
    byteSegments.add(readBytes);
  }
--- a/core/src/main/java/com/google/zxing/qrcode/encoder/Encoder.java
+++ b/core/src/main/java/com/google/zxing/qrcode/encoder/Encoder.java
@ -19,6 +19,7 @@ package com.google.zxing.qrcode.encoder;
 import com.google.zxing.EncodeHintType;
 import com.google.zxing.WriterException;
 import com.google.zxing.common.BitArray;
 import com.google.zxing.common.StringUtils;
 import com.google.zxing.common.CharacterSetECI;
 import com.google.zxing.common.reedsolomon.GenericGF;
 import com.google.zxing.common.reedsolomon.ReedSolomonEncoder;
@ -26,7 +27,8 @@ import com.google.zxing.qrcode.decoder.ErrorCorrectionLevel;
 import com.google.zxing.qrcode.decoder.Mode;
 import com.google.zxing.qrcode.decoder.Version;
-import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Map;
@ -47,7 +49,7 @@ public final class Encoder {
      25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1,  // 0x50-0x5f
  };
-  static final String DEFAULT_BYTE_MODE_ENCODING = "ISO-8859-1";
+  static final Charset DEFAULT_BYTE_MODE_ENCODING = StandardCharsets.ISO_8859_1;
  private Encoder() {
  }
@ -77,10 +79,10 @@ public final class Encoder {
                              Map<EncodeHintType,?> hints) throws WriterException {
    // Determine what character encoding has been specified by the caller, if any
-    String encoding = DEFAULT_BYTE_MODE_ENCODING;
+    Charset encoding = DEFAULT_BYTE_MODE_ENCODING;
    boolean hasEncodingHint = hints != null && hints.containsKey(EncodeHintType.CHARACTER_SET);
    if (hasEncodingHint) {
-      encoding = hints.get(EncodeHintType.CHARACTER_SET).toString();
+      encoding = Charset.forName(hints.get(EncodeHintType.CHARACTER_SET).toString());
    }
    // Pick an encoding mode appropriate for the content. Note that this will not attempt to use
@ -93,7 +95,7 @@ public final class Encoder {
    // Append ECI segment if applicable
    if (mode == Mode.BYTE && hasEncodingHint) {
-      CharacterSetECI eci = CharacterSetECI.getCharacterSetECIByName(encoding);
+      CharacterSetECI eci = CharacterSetECI.getCharacterSetECI(encoding);
      if (eci != null) {
        appendECI(eci, headerBits);
      }
@ -221,8 +223,8 @@ public final class Encoder {
   * Choose the best mode by examining the content. Note that 'encoding' is used as a hint;
   * if it is Shift_JIS, and the input is only double-byte Kanji, then we return {@link Mode#KANJI}.
   */
-  private static Mode chooseMode(String content, String encoding) {
+  private static Mode chooseMode(String content, Charset encoding) {
-    if ("Shift_JIS".equals(encoding) && isOnlyDoubleByteKanji(content)) {
+    if (StringUtils.SHIFT_JIS_CHARSET.equals(encoding) && isOnlyDoubleByteKanji(content)) {
      // Choose Kanji mode if all input are double-byte characters
      return Mode.KANJI;
    }
@ -248,12 +250,7 @@ public final class Encoder {
  }
  private static boolean isOnlyDoubleByteKanji(String content) {
-    byte[] bytes;
+    byte[] bytes = content.getBytes(StringUtils.SHIFT_JIS_CHARSET);
    try {
      bytes = content.getBytes("Shift_JIS");
    } catch (UnsupportedEncodingException ignored) {
      return false;
    }
    int length = bytes.length;
    if (length % 2 != 0) {
      return false;
@ -512,7 +509,7 @@ public final class Encoder {
  static void appendBytes(String content,
                          Mode mode,
                          BitArray bits,
-                          String encoding) throws WriterException {
+                          Charset encoding) throws WriterException {
    switch (mode) {
      case NUMERIC:
        appendNumericBytes(content, bits);
@ -579,26 +576,15 @@ public final class Encoder {
    }
  }
-  static void append8BitBytes(String content, BitArray bits, String encoding)
+  static void append8BitBytes(String content, BitArray bits, Charset encoding) {
-      throws WriterException {
+    byte[] bytes = content.getBytes(encoding);
    byte[] bytes;
    try {
      bytes = content.getBytes(encoding);
    } catch (UnsupportedEncodingException uee) {
      throw new WriterException(uee);
    }
    for (byte b : bytes) {
      bits.appendBits(b, 8);
    }
  }
  static void appendKanjiBytes(String content, BitArray bits) throws WriterException {
-    byte[] bytes;
+    byte[] bytes = content.getBytes(StringUtils.SHIFT_JIS_CHARSET);
    try {
      bytes = content.getBytes("Shift_JIS");
    } catch (UnsupportedEncodingException uee) {
      throw new WriterException(uee);
    }
    if (bytes.length % 2 != 0) {
      throw new WriterException("Kanji byte size not even");
    }
--- a/core/src/test/java/com/google/zxing/aztec/detector/DetectorTest.java
+++ b/core/src/test/java/com/google/zxing/aztec/detector/DetectorTest.java
@ -27,7 +27,6 @@ import com.google.zxing.common.DecoderResult;
 import org.junit.Assert;
 import org.junit.Test;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@ -62,7 +61,7 @@ public final class DetectorTest extends Assert {
  // Test that we can tolerate errors in the parameter locator bits
  private static void testErrorInParameterLocator(String data) throws Exception {
-    AztecCode aztec = Encoder.encode(data.getBytes(StandardCharsets.ISO_8859_1), 25, Encoder.DEFAULT_AZTEC_LAYERS);
+    AztecCode aztec = Encoder.encode(data, 25, Encoder.DEFAULT_AZTEC_LAYERS);
    Random random = new Random(aztec.getMatrix().hashCode());   // pseudo-random, but deterministic
    int layers = aztec.getLayers();
    boolean compact = aztec.isCompact();
--- a/core/src/test/java/com/google/zxing/aztec/encoder/EncoderTest.java
+++ b/core/src/test/java/com/google/zxing/aztec/encoder/EncoderTest.java
@ -44,6 +44,12 @@ import java.util.regex.Pattern;
 */
 public final class EncoderTest extends Assert {
  private static final Charset ISO_8859_1 = StandardCharsets.ISO_8859_1;
  private static final Charset UTF_8 = StandardCharsets.UTF_8;
  private static final Charset SHIFT_JIS = Charset.forName("Shift_JIS");
  private static final Charset ISO_8859_15 = Charset.forName("ISO-8859-15");
  private static final Charset WINDOWS_1252 = Charset.forName("Windows-1252");
  private static final Pattern DOTX = Pattern.compile("[^.X]");
  private static final Pattern SPACES = Pattern.compile("\\s+");
  private static final ResultPoint[] NO_POINTS = new ResultPoint[0];
@ -128,17 +134,20 @@ public final class EncoderTest extends Assert {
  @Test
  public void testAztecWriter() throws Exception {
-    testWriter("\u20AC 1 sample data.", "ISO-8859-1", 25, true, 2);
+    testWriter("Espa\u00F1ol", null, 25, true, 1);                   // Without ECI (implicit ISO-8859-1)
-    testWriter("\u20AC 1 sample data.", "ISO-8859-15", 25, true, 2);
+    testWriter("Espa\u00F1ol", ISO_8859_1, 25, true, 1);             // Explicit ISO-8859-1
-    testWriter("\u20AC 1 sample data.", "UTF-8", 25, true, 2);
+    testWriter("\u20AC 1 sample data.", WINDOWS_1252, 25, true, 2);  // Standard ISO-8859-1 cannot encode Euro symbol; Windows-1252 superset can
-    testWriter("\u20AC 1 sample data.", "UTF-8", 100, true, 3);
+    testWriter("\u20AC 1 sample data.", ISO_8859_15, 25, true, 2);
-    testWriter("\u20AC 1 sample data.", "UTF-8", 300, true, 4);
+    testWriter("\u20AC 1 sample data.", UTF_8, 25, true, 2);
-    testWriter("\u20AC 1 sample data.", "UTF-8", 500, false, 5);
+    testWriter("\u20AC 1 sample data.", UTF_8, 100, true, 3);
    testWriter("\u20AC 1 sample data.", UTF_8, 300, true, 4);
    testWriter("\u20AC 1 sample data.", UTF_8, 500, false, 5);
    testWriter("The capital of Japan is named \u6771\u4EAC.", SHIFT_JIS, 25, true, 3);
    // Test AztecWriter defaults
    String data = "In ut magna vel mauris malesuada";
    AztecWriter writer = new AztecWriter();
    BitMatrix matrix = writer.encode(data, BarcodeFormat.AZTEC, 0, 0);
-    AztecCode aztec = Encoder.encode(data.getBytes(StandardCharsets.ISO_8859_1),
+    AztecCode aztec = Encoder.encode(data,
        Encoder.DEFAULT_EC_PERCENT, Encoder.DEFAULT_AZTEC_LAYERS);
    BitMatrix expectedMatrix = aztec.getMatrix();
    assertEquals(matrix, expectedMatrix);
@ -418,7 +427,7 @@ public final class EncoderTest extends Assert {
  @Test
  public void testUserSpecifiedLayers() throws FormatException {
-    byte[] alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".getBytes(StandardCharsets.ISO_8859_1);
+    String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
    AztecCode aztec = Encoder.encode(alphabet, 25, -2);
    assertEquals(2, aztec.getLayers());
    assertTrue(aztec.isCompact());
@ -449,22 +458,21 @@ public final class EncoderTest extends Assert {
    String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
    // encodes as 26 * 5 * 4 = 520 bits of data
    String alphabet4 = alphabet + alphabet + alphabet + alphabet;
    byte[] data = alphabet4.getBytes(StandardCharsets.ISO_8859_1);
    try {
-      Encoder.encode(data, 0, -4);
+      Encoder.encode(alphabet4, 0, -4);
      fail("Encode should have failed.  Text can't fit in 1-layer compact");
    } catch (IllegalArgumentException expected) {
      // continue
    }
    // If we just try to encode it normally, it will go to a non-compact 4 layer
-    AztecCode aztecCode = Encoder.encode(data, 0, Encoder.DEFAULT_AZTEC_LAYERS);
+    AztecCode aztecCode = Encoder.encode(alphabet4, 0, Encoder.DEFAULT_AZTEC_LAYERS);
    assertFalse(aztecCode.isCompact());
    assertEquals(4, aztecCode.getLayers());
    // But shortening the string to 100 bytes (500 bits of data), compact works fine, even if we
    // include more error checking.
-    aztecCode = Encoder.encode(alphabet4.substring(0, 100).getBytes(StandardCharsets.ISO_8859_1), 10, Encoder.DEFAULT_AZTEC_LAYERS);
+    aztecCode = Encoder.encode(alphabet4.substring(0, 100), 10, Encoder.DEFAULT_AZTEC_LAYERS);
    assertTrue(aztecCode.isCompact());
    assertEquals(4, aztecCode.getLayers());
  }
@ -472,7 +480,7 @@ public final class EncoderTest extends Assert {
  // Helper routines
  private static void testEncode(String data, boolean compact, int layers, String expected) throws FormatException {
-    AztecCode aztec = Encoder.encode(data.getBytes(StandardCharsets.ISO_8859_1), 33, Encoder.DEFAULT_AZTEC_LAYERS);
+    AztecCode aztec = Encoder.encode(data, 33, Encoder.DEFAULT_AZTEC_LAYERS);
    assertEquals("Unexpected symbol format (compact)", compact, aztec.isCompact());
    assertEquals("Unexpected nr. of layers", layers, aztec.getLayers());
    BitMatrix matrix = aztec.getMatrix();
@ -480,7 +488,7 @@ public final class EncoderTest extends Assert {
  }
  private static void testEncodeDecode(String data, boolean compact, int layers) throws Exception {
-    AztecCode aztec = Encoder.encode(data.getBytes(StandardCharsets.ISO_8859_1), 25, Encoder.DEFAULT_AZTEC_LAYERS);
+    AztecCode aztec = Encoder.encode(data, 25, Encoder.DEFAULT_AZTEC_LAYERS);
    assertEquals("Unexpected symbol format (compact)", compact, aztec.isCompact());
    assertEquals("Unexpected nr. of layers", layers, aztec.getLayers());
    BitMatrix matrix = aztec.getMatrix();
@ -500,20 +508,20 @@ public final class EncoderTest extends Assert {
  }
  private static void testWriter(String data,
-                                 String charset,
+                                 Charset charset,
                                 int eccPercent,
                                 boolean compact,
                                 int layers) throws FormatException {
-    // 1. Perform an encode-decode round-trip because it can be lossy.
+    // Perform an encode-decode round-trip because it can be lossy.
    // 2. Aztec Decoder currently always decodes the data with a LATIN-1 charset:
    String expectedData = new String(data.getBytes(Charset.forName(charset)), StandardCharsets.ISO_8859_1);
    Map<EncodeHintType,Object> hints = new EnumMap<>(EncodeHintType.class);
-    hints.put(EncodeHintType.CHARACTER_SET, charset);
+    if (null != charset) {
        hints.put(EncodeHintType.CHARACTER_SET, charset.name());
    }
    hints.put(EncodeHintType.ERROR_CORRECTION, eccPercent);
    AztecWriter writer = new AztecWriter();
    BitMatrix matrix = writer.encode(data, BarcodeFormat.AZTEC, 0, 0, hints);
-    AztecCode aztec = Encoder.encode(data.getBytes(Charset.forName(charset)), eccPercent,
+    AztecCode aztec = Encoder.encode(data, eccPercent,
-        Encoder.DEFAULT_AZTEC_LAYERS);
+        Encoder.DEFAULT_AZTEC_LAYERS, charset);
    assertEquals("Unexpected symbol format (compact)", compact, aztec.isCompact());
    assertEquals("Unexpected nr. of layers", layers, aztec.getLayers());
    BitMatrix matrix2 = aztec.getMatrix();
@ -521,7 +529,7 @@ public final class EncoderTest extends Assert {
    AztecDetectorResult r =
        new AztecDetectorResult(matrix, NO_POINTS, aztec.isCompact(), aztec.getCodeWords(), aztec.getLayers());
    DecoderResult res = new Decoder().decode(r);
-    assertEquals(expectedData, res.getText());
+    assertEquals(data, res.getText());
    // Check error correction by introducing up to eccPercent/2 errors
    int ecWords = aztec.getCodeWords() * eccPercent / 100 / 2;
    Random random = getPseudoRandom();
@ -537,7 +545,7 @@ public final class EncoderTest extends Assert {
    }
    r = new AztecDetectorResult(matrix, NO_POINTS, aztec.isCompact(), aztec.getCodeWords(), aztec.getLayers());
    res = new Decoder().decode(r);
-    assertEquals(expectedData, res.getText());
+    assertEquals(data, res.getText());
  }
  private static Random getPseudoRandom() {
--- a/core/src/test/java/com/google/zxing/common/StringUtilsTestCase.java
+++ b/core/src/test/java/com/google/zxing/common/StringUtilsTestCase.java
@ -19,6 +19,7 @@ package com.google.zxing.common;
 import org.junit.Assert;
 import org.junit.Test;
 import java.nio.charset.StandardCharsets;
 import java.nio.charset.Charset;
 /**
@ -28,34 +29,42 @@ public final class StringUtilsTestCase extends Assert {
  @Test
  public void testShortShiftJIS1() {
-    // ÈáëÈ≠ö
+    // 金魚
-    doTest(new byte[] { (byte) 0x8b, (byte) 0xe0, (byte) 0x8b, (byte) 0x9b, }, "SJIS");
+    doTest(new byte[] { (byte) 0x8b, (byte) 0xe0, (byte) 0x8b, (byte) 0x9b, }, StringUtils.SHIFT_JIS_CHARSET, "SJIS");
  }
  @Test
  public void testShortISO885911() {
-    // b√•d
+    // båd
-    doTest(new byte[] { (byte) 0x62, (byte) 0xe5, (byte) 0x64, }, "ISO-8859-1");
+    doTest(new byte[] { (byte) 0x62, (byte) 0xe5, (byte) 0x64, }, StandardCharsets.ISO_8859_1, "ISO8859_1");
  }
  @Test
  public void testShortUTF81() {
    // Español
    doTest(new byte[] { (byte) 0x45, (byte) 0x73, (byte) 0x70, (byte) 0x61, (byte) 0xc3,
                        (byte) 0xb1, (byte) 0x6f, (byte) 0x6c },
           StandardCharsets.UTF_8, "UTF8");
  }
  @Test
  public void testMixedShiftJIS1() {
-    // Hello Èáë!
+    // Hello 金!
    doTest(new byte[] { (byte) 0x48, (byte) 0x65, (byte) 0x6c, (byte) 0x6c, (byte) 0x6f,
                        (byte) 0x20, (byte) 0x8b, (byte) 0xe0, (byte) 0x21, },
-           "SJIS");
+           StringUtils.SHIFT_JIS_CHARSET, "SJIS");
  }
-  private static void doTest(byte[] bytes, String charsetName) {
+  private static void doTest(byte[] bytes, Charset charset, String encoding) {
-    Charset charset = Charset.forName(charsetName);
+    Charset guessedCharset = StringUtils.guessCharset(bytes, null);
-    String guessedName = StringUtils.guessEncoding(bytes, null);
+    String guessedEncoding = StringUtils.guessEncoding(bytes, null);
-    Charset guessedEncoding = Charset.forName(guessedName);
+    assertEquals(charset, guessedCharset);
-    assertEquals(charset, guessedEncoding);
+    assertEquals(encoding, guessedEncoding);
  }
  /**
   * Utility for printing out a string in given encoding as a Java statement, since it's better
-   * to write that into the Java source file rather than risk character encoding issues in the 
+   * to write that into the Java source file rather than risk character encoding issues in the
   * source file itself.
   *
   * @param args command line arguments
--- a/core/src/test/java/com/google/zxing/qrcode/encoder/EncoderTestCase.java
+++ b/core/src/test/java/com/google/zxing/qrcode/encoder/EncoderTestCase.java
@ -19,6 +19,7 @@ package com.google.zxing.qrcode.encoder;
 import com.google.zxing.EncodeHintType;
 import com.google.zxing.WriterException;
 import com.google.zxing.common.BitArray;
 import com.google.zxing.common.StringUtils;
 import com.google.zxing.qrcode.decoder.ErrorCorrectionLevel;
 import com.google.zxing.qrcode.decoder.Mode;
 import com.google.zxing.qrcode.decoder.Version;
@ -26,7 +27,6 @@ import com.google.zxing.qrcode.decoder.Version;
 import org.junit.Assert;
 import org.junit.Test;
 import java.io.UnsupportedEncodingException;
 import java.util.EnumMap;
 import java.util.Map;
@ -127,7 +127,7 @@ public final class EncoderTestCase extends Assert {
          ">>\n";
    assertEquals(expected, qrCode.toString());
  }
-  
+
  @Test
  public void testEncodeWithVersion() throws WriterException {
    Map<EncodeHintType, Object> hints = new EnumMap<>(EncodeHintType.class);
@ -135,7 +135,7 @@ public final class EncoderTestCase extends Assert {
    QRCode qrCode = Encoder.encode("ABCDEF", ErrorCorrectionLevel.H, hints);
    assertTrue(qrCode.toString().contains(" version: 7\n"));
  }
-  
+
  @Test(expected = WriterException.class)
  public void testEncodeWithVersionTooSmall() throws WriterException {
    Map<EncodeHintType, Object> hints = new EnumMap<>(EncodeHintType.class);
@ -742,12 +742,8 @@ public final class EncoderTestCase extends Assert {
    assertEquals(expected, qrCode.toString());
  }
-  private static String shiftJISString(byte[] bytes) throws WriterException {
+  private static String shiftJISString(byte[] bytes) {
-    try {
+    return new String(bytes, StringUtils.SHIFT_JIS_CHARSET);
      return new String(bytes, "Shift_JIS");
    } catch (UnsupportedEncodingException uee) {
      throw new WriterException(uee.toString());
    }
  }
 }