Aztec encode with ECI for non-default character sets (#1330)

* Aztec encoder: add ECI codes according to character set Added redundant methods to avoid modifying existing tests. * fix testAztecWriter - ISO-8859-1 cannot actually encode Euro symbol ('€'); this test case only passed before because the Decoder wasn't actually doing the bytes→String decode, but simply round-tripping an unknown byte. - Add extra test cases for implicit ISO-8859-1 (without ECI code), explicit ISO-8559-1 (with ECI code), and Shift_JIS * remove unnecessary conversion between String and byte[] in Aztec EncoderTest and DetectorTest * Aztec DecoderTest: use constants for charsets * Aztec Code: remove unnecessary conversion between Charset and Charset.name() strings * PDF417, QR, DataMatrix: remove unnecessary conversion between Charset and Charset.name() strings Includes replacing StringUtils.guessEncoding() with .guessCharset(), to return Charset rather than String. This change makes the tacit assumption that Shift_JIS charset *will* be available. There are existing comments suggesting that it might not always be available… but the existing *tests* assume it will be.
2025-03-05 20:48:51 -08:00 · 2020-11-07 14:26:24 -08:00 · 2020-11-07 14:26:24 -08:00 · 515688992b
parent 28d339e67d
commit 515688992b
16 changed files with 244 additions and 129 deletions
--- a/core/src/main/java/com/google/zxing/aztec/AztecWriter.java
+++ b/core/src/main/java/com/google/zxing/aztec/AztecWriter.java
@ -24,7 +24,6 @@ import com.google.zxing.aztec.encoder.Encoder;
 import com.google.zxing.common.BitMatrix;

 import java.nio.charset.Charset;
-import java.nio.charset.StandardCharsets;
 import java.util.Map;

 /**
@ -39,7 +38,7 @@ public final class AztecWriter implements Writer {

  @Override
  public BitMatrix encode(String contents, BarcodeFormat format, int width, int height, Map<EncodeHintType,?> hints) {
-    Charset charset = StandardCharsets.ISO_8859_1;
+    Charset charset = null; // Do not add any ECI code by default
    int eccPercent = Encoder.DEFAULT_EC_PERCENT;
    int layers = Encoder.DEFAULT_AZTEC_LAYERS;
    if (hints != null) {
@ -62,7 +61,7 @@ public final class AztecWriter implements Writer {
    if (format != BarcodeFormat.AZTEC) {
      throw new IllegalArgumentException("Can only encode AZTEC, but got " + format);
    }
-    AztecCode aztec = Encoder.encode(contents.getBytes(charset), eccPercent, layers);
+    AztecCode aztec = Encoder.encode(contents, eccPercent, layers, charset);
    return renderResult(aztec, width, height);
  }

--- a/core/src/main/java/com/google/zxing/aztec/decoder/Decoder.java
+++ b/core/src/main/java/com/google/zxing/aztec/decoder/Decoder.java
@ -177,7 +177,7 @@ public final class Decoder {
                eci = eci * 10 + (nextDigit - 2);
              }
              CharacterSetECI charsetECI = CharacterSetECI.getCharacterSetECIByValue(eci);
-              encoding = Charset.forName(charsetECI.name());
+              encoding = charsetECI.getCharset();
          }
          // Go back to whatever mode we had been in
          shiftTable = latchTable;
--- a/core/src/main/java/com/google/zxing/aztec/encoder/Encoder.java
+++ b/core/src/main/java/com/google/zxing/aztec/encoder/Encoder.java
@ -21,6 +21,9 @@ import com.google.zxing.common.BitMatrix;
 import com.google.zxing.common.reedsolomon.GenericGF;
 import com.google.zxing.common.reedsolomon.ReedSolomonEncoder;

+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+
 /**
 * Generates Aztec 2D barcodes.
 *
@ -42,13 +45,66 @@ public final class Encoder {
  }

  /**
-   * Encodes the given binary content as an Aztec symbol
+   * Encodes the given string content as an Aztec symbol (without ECI code)
+   *
+   * @param data input data string; must be encodable as ISO/IEC 8859-1 (Latin-1)
+   * @return Aztec symbol matrix with metadata
+   */
+  public static AztecCode encode(String data) {
+    return encode(data.getBytes(StandardCharsets.ISO_8859_1));
+  }
+
+  /**
+   * Encodes the given string content as an Aztec symbol (without ECI code)
+   *
+   * @param data input data string; must be encodable as ISO/IEC 8859-1 (Latin-1)
+   * @param minECCPercent minimal percentage of error check words (According to ISO/IEC 24778:2008,
+   *                      a minimum of 23% + 3 words is recommended)
+   * @param userSpecifiedLayers if non-zero, a user-specified value for the number of layers
+   * @return Aztec symbol matrix with metadata
+   */
+  public static AztecCode encode(String data, int minECCPercent, int userSpecifiedLayers) {
+    return encode(data.getBytes(StandardCharsets.ISO_8859_1), minECCPercent, userSpecifiedLayers, null);
+  }
+
+  /**
+   * Encodes the given string content as an Aztec symbol
+   *
+   * @param data input data string
+   * @param minECCPercent minimal percentage of error check words (According to ISO/IEC 24778:2008,
+   *                      a minimum of 23% + 3 words is recommended)
+   * @param userSpecifiedLayers if non-zero, a user-specified value for the number of layers
+   * @param charset character set in which to encode string using ECI; if null, no ECI code
+   *                will be inserted, and the string must be encodable as ISO/IEC 8859-1
+   *                (Latin-1), the default encoding of the symbol.
+   * @return Aztec symbol matrix with metadata
+   */
+  public static AztecCode encode(String data, int minECCPercent, int userSpecifiedLayers, Charset charset) {
+    byte[] bytes = data.getBytes(null != charset ? charset : StandardCharsets.ISO_8859_1);
+    return encode(bytes, minECCPercent, userSpecifiedLayers, charset);
+  }
+
+  /**
+   * Encodes the given binary content as an Aztec symbol (without ECI code)
   *
   * @param data input data string
   * @return Aztec symbol matrix with metadata
   */
  public static AztecCode encode(byte[] data) {
-    return encode(data, DEFAULT_EC_PERCENT, DEFAULT_AZTEC_LAYERS);
+    return encode(data, DEFAULT_EC_PERCENT, DEFAULT_AZTEC_LAYERS, null);
+  }
+
+  /**
+   * Encodes the given binary content as an Aztec symbol (without ECI code)
+   *
+   * @param data input data string
+   * @param minECCPercent minimal percentage of error check words (According to ISO/IEC 24778:2008,
+   *                      a minimum of 23% + 3 words is recommended)
+   * @param userSpecifiedLayers if non-zero, a user-specified value for the number of layers
+   * @return Aztec symbol matrix with metadata
+   */
+  public static AztecCode encode(byte[] data, int minECCPercent, int userSpecifiedLayers) {
+    return encode(data, minECCPercent, userSpecifiedLayers, null);
  }

  /**
@ -58,11 +114,13 @@ public final class Encoder {
   * @param minECCPercent minimal percentage of error check words (According to ISO/IEC 24778:2008,
   *                      a minimum of 23% + 3 words is recommended)
   * @param userSpecifiedLayers if non-zero, a user-specified value for the number of layers
+   * @param charset character set to mark using ECI; if null, no ECI code will be inserted, and the
+   *                default encoding of ISO/IEC 8859-1 will be assuming by readers.
   * @return Aztec symbol matrix with metadata
   */
-  public static AztecCode encode(byte[] data, int minECCPercent, int userSpecifiedLayers) {
+  public static AztecCode encode(byte[] data, int minECCPercent, int userSpecifiedLayers, Charset charset) {
    // High-level encode
-    BitArray bits = new HighLevelEncoder(data).encode();
+    BitArray bits = new HighLevelEncoder(data, charset).encode();

    // stuff bits and choose symbol size
    int eccBits = bits.getSize() * minECCPercent / 100 + 11;
--- a/core/src/main/java/com/google/zxing/aztec/encoder/HighLevelEncoder.java
+++ b/core/src/main/java/com/google/zxing/aztec/encoder/HighLevelEncoder.java
@ -17,6 +17,9 @@
 package com.google.zxing.aztec.encoder;

 import com.google.zxing.common.BitArray;
+import com.google.zxing.common.CharacterSetECI;
+
+import java.nio.charset.Charset;

 import java.util.Arrays;
 import java.util.Collection;
@ -148,16 +151,31 @@ public final class HighLevelEncoder {
  }

  private final byte[] text;
+  private final Charset charset;

  public HighLevelEncoder(byte[] text) {
    this.text = text;
+    this.charset = null;
+  }
+
+  public HighLevelEncoder(byte[] text, Charset charset) {
+    this.text = text;
+    this.charset = charset;
  }

  /**
   * @return text represented by this encoder encoded as a {@link BitArray}
   */
  public BitArray encode() {
-    Collection<State> states = Collections.singletonList(State.INITIAL_STATE);
+    State initialState = State.INITIAL_STATE;
+    if (charset != null) {
+      CharacterSetECI eci = CharacterSetECI.getCharacterSetECI(charset);
+      if (null == eci) {
+        throw new IllegalArgumentException("No ECI code for character set " + charset.toString());
+      }
+      initialState = initialState.appendFLGn(eci.getValue());
+    }
+    Collection<State> states = Collections.singletonList(initialState);
    for (int index = 0; index < text.length; index++) {
      int pairCode;
      int nextChar = index + 1 < text.length ? text[index + 1] : 0;
--- a/core/src/main/java/com/google/zxing/aztec/encoder/State.java
+++ b/core/src/main/java/com/google/zxing/aztec/encoder/State.java
@ -16,6 +16,8 @@

 package com.google.zxing.aztec.encoder;

+import java.nio.charset.StandardCharsets;
+
 import java.util.Deque;
 import java.util.LinkedList;

@ -70,6 +72,25 @@ final class State {
    return bitCount;
  }

+  State appendFLGn(int eci) {
+    State result = shiftAndAppend(HighLevelEncoder.MODE_PUNCT, 0); // 0: FLG(n)
+    Token token = result.token;
+    int bitsAdded = 3;
+    if (eci < 0) {
+      token = token.add(0, 3); // 0: FNC1
+    } else if (eci > 999999) {
+      throw new IllegalArgumentException("ECI code must be between 0 and 999999");
+    } else {
+      byte[] eciDigits = Integer.toString(eci).getBytes(StandardCharsets.ISO_8859_1);
+      token = token.add(eciDigits.length, 3); // 1-6: number of ECI digits
+      for (int ii = 0; ii < eciDigits.length; ii++) {
+        token = token.add(eciDigits[ii] - '0' + 2, 4);
+      }
+      bitsAdded += eciDigits.length * 4;
+    }
+    return new State(token, mode, 0, bitCount + bitsAdded);
+  }
+
  // Create a new state representing this state with a latch to a (not
  // necessary different) mode, and then a code.
  State latchAndAppend(int mode, int value) {
@ -143,7 +164,7 @@ final class State {
      newModeBitCount += calculateBinaryShiftCost(other) - calculateBinaryShiftCost(this);
    } else if (this.binaryShiftByteCount > other.binaryShiftByteCount && other.binaryShiftByteCount > 0) {
      // maximum possible additional cost (we end up exceeding the 31 byte boundary and other state can stay beneath it)
-      newModeBitCount += 10; 
+      newModeBitCount += 10;
    }
    return newModeBitCount <= other.bitCount;
  }
@ -168,7 +189,7 @@ final class State {
  public String toString() {
    return String.format("%s bits=%d bytes=%d", HighLevelEncoder.MODE_NAMES[mode], bitCount, binaryShiftByteCount);
  }
-  
+
  private static int calculateBinaryShiftCost(State state) {
    if (state.binaryShiftByteCount > 62) {
      return 21; // B/S with extended length
--- a/core/src/main/java/com/google/zxing/common/CharacterSetECI.java
+++ b/core/src/main/java/com/google/zxing/common/CharacterSetECI.java
@ -18,6 +18,8 @@ package com.google.zxing.common;

 import com.google.zxing.FormatException;

+import java.nio.charset.Charset;
+
 import java.util.HashMap;
 import java.util.Map;

@ -93,6 +95,19 @@ public enum CharacterSetECI {
    return values[0];
  }

+  public Charset getCharset() {
+    return Charset.forName(name());
+  }
+
+  /**
+   * @param charset Java character set object
+   * @return CharacterSetECI representing ECI for character encoding, or null if it is legal
+   *   but unsupported
+   */
+  public static CharacterSetECI getCharacterSetECI(Charset charset) {
+    return NAME_TO_ECI.get(charset.name());
+  }
+
  /**
   * @param value character set ECI value
   * @return {@code CharacterSetECI} representing ECI of given value, or null if it is legal but
--- a/core/src/main/java/com/google/zxing/common/StringUtils.java
+++ b/core/src/main/java/com/google/zxing/common/StringUtils.java
@ -17,6 +17,7 @@
 package com.google.zxing.common;

 import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
 import java.util.Map;

 import com.google.zxing.DecodeHintType;
@ -29,15 +30,17 @@ import com.google.zxing.DecodeHintType;
 */
 public final class StringUtils {

-  private static final String PLATFORM_DEFAULT_ENCODING = Charset.defaultCharset().name();
+  private static final Charset PLATFORM_DEFAULT_ENCODING = Charset.defaultCharset();
+  public static final Charset SHIFT_JIS_CHARSET = Charset.forName("SJIS");
+  public static final Charset GB2312_CHARSET = Charset.forName("GB2312");
+  private static final Charset EUC_JP = Charset.forName("EUC_JP");
+  private static final boolean ASSUME_SHIFT_JIS =
+      SHIFT_JIS_CHARSET.equals(PLATFORM_DEFAULT_ENCODING) ||
+      EUC_JP.equals(PLATFORM_DEFAULT_ENCODING);
+
+  // Retained for ABI compatibility with earlier versions
  public static final String SHIFT_JIS = "SJIS";
  public static final String GB2312 = "GB2312";
-  private static final String EUC_JP = "EUC_JP";
-  private static final String UTF8 = "UTF8";
-  private static final String ISO88591 = "ISO8859_1";
-  private static final boolean ASSUME_SHIFT_JIS =
-      SHIFT_JIS.equalsIgnoreCase(PLATFORM_DEFAULT_ENCODING) ||
-      EUC_JP.equalsIgnoreCase(PLATFORM_DEFAULT_ENCODING);

  private StringUtils() { }

@ -45,12 +48,32 @@ public final class StringUtils {
   * @param bytes bytes encoding a string, whose encoding should be guessed
   * @param hints decode hints if applicable
   * @return name of guessed encoding; at the moment will only guess one of:
-   *  {@link #SHIFT_JIS}, {@link #UTF8}, {@link #ISO88591}, or the platform
-   *  default encoding if none of these can possibly be correct
+   *  "SJIS", "UTF8", "ISO8859_1", or the platform default encoding if none
+   *  of these can possibly be correct
   */
  public static String guessEncoding(byte[] bytes, Map<DecodeHintType,?> hints) {
+      Charset c = guessCharset(bytes, hints);
+      if (c == SHIFT_JIS_CHARSET) {
+          return "SJIS";
+      } else if (c == StandardCharsets.UTF_8) {
+          return "UTF8";
+      } else if (c == StandardCharsets.ISO_8859_1) {
+          return "ISO8859_1";
+      }
+      return c.name();
+  }
+
+  /**
+   * @param bytes bytes encoding a string, whose encoding should be guessed
+   * @param hints decode hints if applicable
+   * @return Charset of guessed encoding; at the moment will only guess one of:
+   *  {@link #SHIFT_JIS_CHARSET}, {@link StandardCharsets#UTF_8},
+   *  {@link StandardCharsets#ISO_8859_1}, or the platform default encoding if
+   *  none of these can possibly be correct
+   */
+  public static Charset guessCharset(byte[] bytes, Map<DecodeHintType,?> hints) {
    if (hints != null && hints.containsKey(DecodeHintType.CHARACTER_SET)) {
-      return hints.get(DecodeHintType.CHARACTER_SET).toString();
+      return Charset.forName(hints.get(DecodeHintType.CHARACTER_SET).toString());
    }
    // For now, merely tries to distinguish ISO-8859-1, UTF-8 and Shift_JIS,
    // which should be by far the most common encodings.
@ -164,11 +187,11 @@ public final class StringUtils {

    // Easy -- if there is BOM or at least 1 valid not-single byte character (and no evidence it can't be UTF-8), done
    if (canBeUTF8 && (utf8bom || utf2BytesChars + utf3BytesChars + utf4BytesChars > 0)) {
-      return UTF8;
+      return StandardCharsets.UTF_8;
    }
    // Easy -- if assuming Shift_JIS or >= 3 valid consecutive not-ascii characters (and no evidence it can't be), done
    if (canBeShiftJIS && (ASSUME_SHIFT_JIS || sjisMaxKatakanaWordLength >= 3 || sjisMaxDoubleBytesWordLength >= 3)) {
-      return SHIFT_JIS;
+      return SHIFT_JIS_CHARSET;
    }
    // Distinguishing Shift_JIS and ISO-8859-1 can be a little tough for short words. The crude heuristic is:
    // - If we saw
@ -177,18 +200,18 @@ public final class StringUtils {
    // - then we conclude Shift_JIS, else ISO-8859-1
    if (canBeISO88591 && canBeShiftJIS) {
      return (sjisMaxKatakanaWordLength == 2 && sjisKatakanaChars == 2) || isoHighOther * 10 >= length
-          ? SHIFT_JIS : ISO88591;
+          ? SHIFT_JIS_CHARSET : StandardCharsets.ISO_8859_1;
    }

    // Otherwise, try in order ISO-8859-1, Shift JIS, UTF-8 and fall back to default platform encoding
    if (canBeISO88591) {
-      return ISO88591;
+      return StandardCharsets.ISO_8859_1;
    }
    if (canBeShiftJIS) {
-      return SHIFT_JIS;
+      return SHIFT_JIS_CHARSET;
    }
    if (canBeUTF8) {
-      return UTF8;
+      return StandardCharsets.UTF_8;
    }
    // Otherwise, we take a wild guess with platform encoding
    return PLATFORM_DEFAULT_ENCODING;
--- a/core/src/main/java/com/google/zxing/datamatrix/decoder/DecodedBitStreamParser.java
+++ b/core/src/main/java/com/google/zxing/datamatrix/decoder/DecodedBitStreamParser.java
@ -20,7 +20,7 @@ import com.google.zxing.FormatException;
 import com.google.zxing.common.BitSource;
 import com.google.zxing.common.DecoderResult;

-import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
@ -505,11 +505,7 @@ final class DecodedBitStreamParser {
      bytes[i] = (byte) unrandomize255State(bits.readBits(8), codewordPosition++);
    }
    byteSegments.add(bytes);
-    try {
-      result.append(new String(bytes, "ISO8859_1"));
-    } catch (UnsupportedEncodingException uee) {
-      throw new IllegalStateException("Platform does not support required encoding: " + uee);
-    }
+    result.append(new String(bytes, StandardCharsets.ISO_8859_1));
  }

  /**
--- a/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java
+++ b/core/src/main/java/com/google/zxing/pdf417/decoder/DecodedBitStreamParser.java
@ -125,7 +125,7 @@ final class DecodedBitStreamParser {
        case ECI_CHARSET:
          CharacterSetECI charsetECI =
              CharacterSetECI.getCharacterSetECIByValue(codewords[codeIndex++]);
-          encoding = Charset.forName(charsetECI.name());
+          encoding = charsetECI.getCharset();
          break;
        case ECI_GENERAL_PURPOSE:
          // Can't do anything with generic ECI; skip its 2 characters
--- a/core/src/main/java/com/google/zxing/pdf417/encoder/PDF417HighLevelEncoder.java
+++ b/core/src/main/java/com/google/zxing/pdf417/encoder/PDF417HighLevelEncoder.java
@ -169,7 +169,7 @@ final class PDF417HighLevelEncoder {
    if (encoding == null) {
      encoding = DEFAULT_ENCODING;
    } else if (!DEFAULT_ENCODING.equals(encoding)) {
-      CharacterSetECI eci = CharacterSetECI.getCharacterSetECIByName(encoding.name());
+      CharacterSetECI eci = CharacterSetECI.getCharacterSetECI(encoding);
      if (eci != null) {
        encodingECI(eci.getValue(), sb);
      }
--- a/core/src/main/java/com/google/zxing/qrcode/decoder/DecodedBitStreamParser.java
+++ b/core/src/main/java/com/google/zxing/qrcode/decoder/DecodedBitStreamParser.java
@ -23,7 +23,7 @@ import com.google.zxing.common.CharacterSetECI;
 import com.google.zxing.common.DecoderResult;
 import com.google.zxing.common.StringUtils;

-import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
@ -173,11 +173,7 @@ final class DecodedBitStreamParser {
      count--;
    }

-    try {
-      result.append(new String(buffer, StringUtils.GB2312));
-    } catch (UnsupportedEncodingException ignored) {
-      throw FormatException.getFormatInstance();
-    }
+    result.append(new String(buffer, StringUtils.GB2312_CHARSET));
  }

  private static void decodeKanjiSegment(BitSource bits,
@ -208,12 +204,7 @@ final class DecodedBitStreamParser {
      offset += 2;
      count--;
    }
-    // Shift_JIS may not be supported in some environments:
-    try {
-      result.append(new String(buffer, StringUtils.SHIFT_JIS));
-    } catch (UnsupportedEncodingException ignored) {
-      throw FormatException.getFormatInstance();
-    }
+    result.append(new String(buffer, StringUtils.SHIFT_JIS_CHARSET));
  }

  private static void decodeByteSegment(BitSource bits,
@ -231,22 +222,18 @@ final class DecodedBitStreamParser {
    for (int i = 0; i < count; i++) {
      readBytes[i] = (byte) bits.readBits(8);
    }
-    String encoding;
+    Charset encoding;
    if (currentCharacterSetECI == null) {
      // The spec isn't clear on this mode; see
      // section 6.4.5: t does not say which encoding to assuming
      // upon decoding. I have seen ISO-8859-1 used as well as
      // Shift_JIS -- without anything like an ECI designator to
      // give a hint.
-      encoding = StringUtils.guessEncoding(readBytes, hints);
+      encoding = StringUtils.guessCharset(readBytes, hints);
    } else {
-      encoding = currentCharacterSetECI.name();
-    }
-    try {
-      result.append(new String(readBytes, encoding));
-    } catch (UnsupportedEncodingException ignored) {
-      throw FormatException.getFormatInstance();
+      encoding = currentCharacterSetECI.getCharset();
    }
+    result.append(new String(readBytes, encoding));
    byteSegments.add(readBytes);
  }

--- a/core/src/main/java/com/google/zxing/qrcode/encoder/Encoder.java
+++ b/core/src/main/java/com/google/zxing/qrcode/encoder/Encoder.java
@ -19,6 +19,7 @@ package com.google.zxing.qrcode.encoder;
 import com.google.zxing.EncodeHintType;
 import com.google.zxing.WriterException;
 import com.google.zxing.common.BitArray;
+import com.google.zxing.common.StringUtils;
 import com.google.zxing.common.CharacterSetECI;
 import com.google.zxing.common.reedsolomon.GenericGF;
 import com.google.zxing.common.reedsolomon.ReedSolomonEncoder;
@ -26,7 +27,8 @@ import com.google.zxing.qrcode.decoder.ErrorCorrectionLevel;
 import com.google.zxing.qrcode.decoder.Mode;
 import com.google.zxing.qrcode.decoder.Version;

-import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Map;
@ -47,7 +49,7 @@ public final class Encoder {
      25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1,  // 0x50-0x5f
  };

-  static final String DEFAULT_BYTE_MODE_ENCODING = "ISO-8859-1";
+  static final Charset DEFAULT_BYTE_MODE_ENCODING = StandardCharsets.ISO_8859_1;

  private Encoder() {
  }
@ -77,10 +79,10 @@ public final class Encoder {
                              Map<EncodeHintType,?> hints) throws WriterException {

    // Determine what character encoding has been specified by the caller, if any
-    String encoding = DEFAULT_BYTE_MODE_ENCODING;
+    Charset encoding = DEFAULT_BYTE_MODE_ENCODING;
    boolean hasEncodingHint = hints != null && hints.containsKey(EncodeHintType.CHARACTER_SET);
    if (hasEncodingHint) {
-      encoding = hints.get(EncodeHintType.CHARACTER_SET).toString();
+      encoding = Charset.forName(hints.get(EncodeHintType.CHARACTER_SET).toString());
    }

    // Pick an encoding mode appropriate for the content. Note that this will not attempt to use
@ -93,7 +95,7 @@ public final class Encoder {

    // Append ECI segment if applicable
    if (mode == Mode.BYTE && hasEncodingHint) {
-      CharacterSetECI eci = CharacterSetECI.getCharacterSetECIByName(encoding);
+      CharacterSetECI eci = CharacterSetECI.getCharacterSetECI(encoding);
      if (eci != null) {
        appendECI(eci, headerBits);
      }
@ -221,8 +223,8 @@ public final class Encoder {
   * Choose the best mode by examining the content. Note that 'encoding' is used as a hint;
   * if it is Shift_JIS, and the input is only double-byte Kanji, then we return {@link Mode#KANJI}.
   */
-  private static Mode chooseMode(String content, String encoding) {
-    if ("Shift_JIS".equals(encoding) && isOnlyDoubleByteKanji(content)) {
+  private static Mode chooseMode(String content, Charset encoding) {
+    if (StringUtils.SHIFT_JIS_CHARSET.equals(encoding) && isOnlyDoubleByteKanji(content)) {
      // Choose Kanji mode if all input are double-byte characters
      return Mode.KANJI;
    }
@ -248,12 +250,7 @@ public final class Encoder {
  }

  private static boolean isOnlyDoubleByteKanji(String content) {
-    byte[] bytes;
-    try {
-      bytes = content.getBytes("Shift_JIS");
-    } catch (UnsupportedEncodingException ignored) {
-      return false;
-    }
+    byte[] bytes = content.getBytes(StringUtils.SHIFT_JIS_CHARSET);
    int length = bytes.length;
    if (length % 2 != 0) {
      return false;
@ -512,7 +509,7 @@ public final class Encoder {
  static void appendBytes(String content,
                          Mode mode,
                          BitArray bits,
-                          String encoding) throws WriterException {
+                          Charset encoding) throws WriterException {
    switch (mode) {
      case NUMERIC:
        appendNumericBytes(content, bits);
@ -579,26 +576,15 @@ public final class Encoder {
    }
  }

-  static void append8BitBytes(String content, BitArray bits, String encoding)
-      throws WriterException {
-    byte[] bytes;
-    try {
-      bytes = content.getBytes(encoding);
-    } catch (UnsupportedEncodingException uee) {
-      throw new WriterException(uee);
-    }
+  static void append8BitBytes(String content, BitArray bits, Charset encoding) {
+    byte[] bytes = content.getBytes(encoding);
    for (byte b : bytes) {
      bits.appendBits(b, 8);
    }
  }

  static void appendKanjiBytes(String content, BitArray bits) throws WriterException {
-    byte[] bytes;
-    try {
-      bytes = content.getBytes("Shift_JIS");
-    } catch (UnsupportedEncodingException uee) {
-      throw new WriterException(uee);
-    }
+    byte[] bytes = content.getBytes(StringUtils.SHIFT_JIS_CHARSET);
    if (bytes.length % 2 != 0) {
      throw new WriterException("Kanji byte size not even");
    }
--- a/core/src/test/java/com/google/zxing/aztec/detector/DetectorTest.java
+++ b/core/src/test/java/com/google/zxing/aztec/detector/DetectorTest.java
@ -27,7 +27,6 @@ import com.google.zxing.common.DecoderResult;
 import org.junit.Assert;
 import org.junit.Test;

-import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@ -62,7 +61,7 @@ public final class DetectorTest extends Assert {

  // Test that we can tolerate errors in the parameter locator bits
  private static void testErrorInParameterLocator(String data) throws Exception {
-    AztecCode aztec = Encoder.encode(data.getBytes(StandardCharsets.ISO_8859_1), 25, Encoder.DEFAULT_AZTEC_LAYERS);
+    AztecCode aztec = Encoder.encode(data, 25, Encoder.DEFAULT_AZTEC_LAYERS);
    Random random = new Random(aztec.getMatrix().hashCode());   // pseudo-random, but deterministic
    int layers = aztec.getLayers();
    boolean compact = aztec.isCompact();
--- a/core/src/test/java/com/google/zxing/aztec/encoder/EncoderTest.java
+++ b/core/src/test/java/com/google/zxing/aztec/encoder/EncoderTest.java
@ -44,6 +44,12 @@ import java.util.regex.Pattern;
 */
 public final class EncoderTest extends Assert {

+  private static final Charset ISO_8859_1 = StandardCharsets.ISO_8859_1;
+  private static final Charset UTF_8 = StandardCharsets.UTF_8;
+  private static final Charset SHIFT_JIS = Charset.forName("Shift_JIS");
+  private static final Charset ISO_8859_15 = Charset.forName("ISO-8859-15");
+  private static final Charset WINDOWS_1252 = Charset.forName("Windows-1252");
+
  private static final Pattern DOTX = Pattern.compile("[^.X]");
  private static final Pattern SPACES = Pattern.compile("\\s+");
  private static final ResultPoint[] NO_POINTS = new ResultPoint[0];
@ -128,17 +134,20 @@ public final class EncoderTest extends Assert {

  @Test
  public void testAztecWriter() throws Exception {
-    testWriter("\u20AC 1 sample data.", "ISO-8859-1", 25, true, 2);
-    testWriter("\u20AC 1 sample data.", "ISO-8859-15", 25, true, 2);
-    testWriter("\u20AC 1 sample data.", "UTF-8", 25, true, 2);
-    testWriter("\u20AC 1 sample data.", "UTF-8", 100, true, 3);
-    testWriter("\u20AC 1 sample data.", "UTF-8", 300, true, 4);
-    testWriter("\u20AC 1 sample data.", "UTF-8", 500, false, 5);
+    testWriter("Espa\u00F1ol", null, 25, true, 1);                   // Without ECI (implicit ISO-8859-1)
+    testWriter("Espa\u00F1ol", ISO_8859_1, 25, true, 1);             // Explicit ISO-8859-1
+    testWriter("\u20AC 1 sample data.", WINDOWS_1252, 25, true, 2);  // Standard ISO-8859-1 cannot encode Euro symbol; Windows-1252 superset can
+    testWriter("\u20AC 1 sample data.", ISO_8859_15, 25, true, 2);
+    testWriter("\u20AC 1 sample data.", UTF_8, 25, true, 2);
+    testWriter("\u20AC 1 sample data.", UTF_8, 100, true, 3);
+    testWriter("\u20AC 1 sample data.", UTF_8, 300, true, 4);
+    testWriter("\u20AC 1 sample data.", UTF_8, 500, false, 5);
+    testWriter("The capital of Japan is named \u6771\u4EAC.", SHIFT_JIS, 25, true, 3);
    // Test AztecWriter defaults
    String data = "In ut magna vel mauris malesuada";
    AztecWriter writer = new AztecWriter();
    BitMatrix matrix = writer.encode(data, BarcodeFormat.AZTEC, 0, 0);
-    AztecCode aztec = Encoder.encode(data.getBytes(StandardCharsets.ISO_8859_1),
+    AztecCode aztec = Encoder.encode(data,
        Encoder.DEFAULT_EC_PERCENT, Encoder.DEFAULT_AZTEC_LAYERS);
    BitMatrix expectedMatrix = aztec.getMatrix();
    assertEquals(matrix, expectedMatrix);
@ -418,7 +427,7 @@ public final class EncoderTest extends Assert {

  @Test
  public void testUserSpecifiedLayers() throws FormatException {
-    byte[] alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".getBytes(StandardCharsets.ISO_8859_1);
+    String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
    AztecCode aztec = Encoder.encode(alphabet, 25, -2);
    assertEquals(2, aztec.getLayers());
    assertTrue(aztec.isCompact());
@ -449,22 +458,21 @@ public final class EncoderTest extends Assert {
    String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
    // encodes as 26 * 5 * 4 = 520 bits of data
    String alphabet4 = alphabet + alphabet + alphabet + alphabet;
-    byte[] data = alphabet4.getBytes(StandardCharsets.ISO_8859_1);
    try {
-      Encoder.encode(data, 0, -4);
+      Encoder.encode(alphabet4, 0, -4);
      fail("Encode should have failed.  Text can't fit in 1-layer compact");
    } catch (IllegalArgumentException expected) {
      // continue
    }

    // If we just try to encode it normally, it will go to a non-compact 4 layer
-    AztecCode aztecCode = Encoder.encode(data, 0, Encoder.DEFAULT_AZTEC_LAYERS);
+    AztecCode aztecCode = Encoder.encode(alphabet4, 0, Encoder.DEFAULT_AZTEC_LAYERS);
    assertFalse(aztecCode.isCompact());
    assertEquals(4, aztecCode.getLayers());

    // But shortening the string to 100 bytes (500 bits of data), compact works fine, even if we
    // include more error checking.
-    aztecCode = Encoder.encode(alphabet4.substring(0, 100).getBytes(StandardCharsets.ISO_8859_1), 10, Encoder.DEFAULT_AZTEC_LAYERS);
+    aztecCode = Encoder.encode(alphabet4.substring(0, 100), 10, Encoder.DEFAULT_AZTEC_LAYERS);
    assertTrue(aztecCode.isCompact());
    assertEquals(4, aztecCode.getLayers());
  }
@ -472,7 +480,7 @@ public final class EncoderTest extends Assert {
  // Helper routines

  private static void testEncode(String data, boolean compact, int layers, String expected) throws FormatException {
-    AztecCode aztec = Encoder.encode(data.getBytes(StandardCharsets.ISO_8859_1), 33, Encoder.DEFAULT_AZTEC_LAYERS);
+    AztecCode aztec = Encoder.encode(data, 33, Encoder.DEFAULT_AZTEC_LAYERS);
    assertEquals("Unexpected symbol format (compact)", compact, aztec.isCompact());
    assertEquals("Unexpected nr. of layers", layers, aztec.getLayers());
    BitMatrix matrix = aztec.getMatrix();
@ -480,7 +488,7 @@ public final class EncoderTest extends Assert {
  }

  private static void testEncodeDecode(String data, boolean compact, int layers) throws Exception {
-    AztecCode aztec = Encoder.encode(data.getBytes(StandardCharsets.ISO_8859_1), 25, Encoder.DEFAULT_AZTEC_LAYERS);
+    AztecCode aztec = Encoder.encode(data, 25, Encoder.DEFAULT_AZTEC_LAYERS);
    assertEquals("Unexpected symbol format (compact)", compact, aztec.isCompact());
    assertEquals("Unexpected nr. of layers", layers, aztec.getLayers());
    BitMatrix matrix = aztec.getMatrix();
@ -500,20 +508,20 @@ public final class EncoderTest extends Assert {
  }

  private static void testWriter(String data,
-                                 String charset,
+                                 Charset charset,
                                 int eccPercent,
                                 boolean compact,
                                 int layers) throws FormatException {
-    // 1. Perform an encode-decode round-trip because it can be lossy.
-    // 2. Aztec Decoder currently always decodes the data with a LATIN-1 charset:
-    String expectedData = new String(data.getBytes(Charset.forName(charset)), StandardCharsets.ISO_8859_1);
+    // Perform an encode-decode round-trip because it can be lossy.
    Map<EncodeHintType,Object> hints = new EnumMap<>(EncodeHintType.class);
-    hints.put(EncodeHintType.CHARACTER_SET, charset);
+    if (null != charset) {
+        hints.put(EncodeHintType.CHARACTER_SET, charset.name());
+    }
    hints.put(EncodeHintType.ERROR_CORRECTION, eccPercent);
    AztecWriter writer = new AztecWriter();
    BitMatrix matrix = writer.encode(data, BarcodeFormat.AZTEC, 0, 0, hints);
-    AztecCode aztec = Encoder.encode(data.getBytes(Charset.forName(charset)), eccPercent,
-        Encoder.DEFAULT_AZTEC_LAYERS);
+    AztecCode aztec = Encoder.encode(data, eccPercent,
+        Encoder.DEFAULT_AZTEC_LAYERS, charset);
    assertEquals("Unexpected symbol format (compact)", compact, aztec.isCompact());
    assertEquals("Unexpected nr. of layers", layers, aztec.getLayers());
    BitMatrix matrix2 = aztec.getMatrix();
@ -521,7 +529,7 @@ public final class EncoderTest extends Assert {
    AztecDetectorResult r =
        new AztecDetectorResult(matrix, NO_POINTS, aztec.isCompact(), aztec.getCodeWords(), aztec.getLayers());
    DecoderResult res = new Decoder().decode(r);
-    assertEquals(expectedData, res.getText());
+    assertEquals(data, res.getText());
    // Check error correction by introducing up to eccPercent/2 errors
    int ecWords = aztec.getCodeWords() * eccPercent / 100 / 2;
    Random random = getPseudoRandom();
@ -537,7 +545,7 @@ public final class EncoderTest extends Assert {
    }
    r = new AztecDetectorResult(matrix, NO_POINTS, aztec.isCompact(), aztec.getCodeWords(), aztec.getLayers());
    res = new Decoder().decode(r);
-    assertEquals(expectedData, res.getText());
+    assertEquals(data, res.getText());
  }

  private static Random getPseudoRandom() {
--- a/core/src/test/java/com/google/zxing/common/StringUtilsTestCase.java
+++ b/core/src/test/java/com/google/zxing/common/StringUtilsTestCase.java
@ -19,6 +19,7 @@ package com.google.zxing.common;
 import org.junit.Assert;
 import org.junit.Test;

+import java.nio.charset.StandardCharsets;
 import java.nio.charset.Charset;

 /**
@ -28,34 +29,42 @@ public final class StringUtilsTestCase extends Assert {

  @Test
  public void testShortShiftJIS1() {
-    // ÈáëÈ≠ö
-    doTest(new byte[] { (byte) 0x8b, (byte) 0xe0, (byte) 0x8b, (byte) 0x9b, }, "SJIS");
+    // 金魚
+    doTest(new byte[] { (byte) 0x8b, (byte) 0xe0, (byte) 0x8b, (byte) 0x9b, }, StringUtils.SHIFT_JIS_CHARSET, "SJIS");
  }

  @Test
  public void testShortISO885911() {
-    // b√•d
-    doTest(new byte[] { (byte) 0x62, (byte) 0xe5, (byte) 0x64, }, "ISO-8859-1");
+    // båd
+    doTest(new byte[] { (byte) 0x62, (byte) 0xe5, (byte) 0x64, }, StandardCharsets.ISO_8859_1, "ISO8859_1");
+  }
+
+  @Test
+  public void testShortUTF81() {
+    // Español
+    doTest(new byte[] { (byte) 0x45, (byte) 0x73, (byte) 0x70, (byte) 0x61, (byte) 0xc3,
+                        (byte) 0xb1, (byte) 0x6f, (byte) 0x6c },
+           StandardCharsets.UTF_8, "UTF8");
  }

  @Test
  public void testMixedShiftJIS1() {
-    // Hello Èáë!
+    // Hello 金!
    doTest(new byte[] { (byte) 0x48, (byte) 0x65, (byte) 0x6c, (byte) 0x6c, (byte) 0x6f,
                        (byte) 0x20, (byte) 0x8b, (byte) 0xe0, (byte) 0x21, },
-           "SJIS");
+           StringUtils.SHIFT_JIS_CHARSET, "SJIS");
  }

-  private static void doTest(byte[] bytes, String charsetName) {
-    Charset charset = Charset.forName(charsetName);
-    String guessedName = StringUtils.guessEncoding(bytes, null);
-    Charset guessedEncoding = Charset.forName(guessedName);
-    assertEquals(charset, guessedEncoding);
+  private static void doTest(byte[] bytes, Charset charset, String encoding) {
+    Charset guessedCharset = StringUtils.guessCharset(bytes, null);
+    String guessedEncoding = StringUtils.guessEncoding(bytes, null);
+    assertEquals(charset, guessedCharset);
+    assertEquals(encoding, guessedEncoding);
  }

  /**
   * Utility for printing out a string in given encoding as a Java statement, since it's better
-   * to write that into the Java source file rather than risk character encoding issues in the 
+   * to write that into the Java source file rather than risk character encoding issues in the
   * source file itself.
   *
   * @param args command line arguments
--- a/core/src/test/java/com/google/zxing/qrcode/encoder/EncoderTestCase.java
+++ b/core/src/test/java/com/google/zxing/qrcode/encoder/EncoderTestCase.java
@ -19,6 +19,7 @@ package com.google.zxing.qrcode.encoder;
 import com.google.zxing.EncodeHintType;
 import com.google.zxing.WriterException;
 import com.google.zxing.common.BitArray;
+import com.google.zxing.common.StringUtils;
 import com.google.zxing.qrcode.decoder.ErrorCorrectionLevel;
 import com.google.zxing.qrcode.decoder.Mode;
 import com.google.zxing.qrcode.decoder.Version;
@ -26,7 +27,6 @@ import com.google.zxing.qrcode.decoder.Version;
 import org.junit.Assert;
 import org.junit.Test;

-import java.io.UnsupportedEncodingException;
 import java.util.EnumMap;
 import java.util.Map;

@ -127,7 +127,7 @@ public final class EncoderTestCase extends Assert {
          ">>\n";
    assertEquals(expected, qrCode.toString());
  }
-  
+
  @Test
  public void testEncodeWithVersion() throws WriterException {
    Map<EncodeHintType, Object> hints = new EnumMap<>(EncodeHintType.class);
@ -135,7 +135,7 @@ public final class EncoderTestCase extends Assert {
    QRCode qrCode = Encoder.encode("ABCDEF", ErrorCorrectionLevel.H, hints);
    assertTrue(qrCode.toString().contains(" version: 7\n"));
  }
-  
+
  @Test(expected = WriterException.class)
  public void testEncodeWithVersionTooSmall() throws WriterException {
    Map<EncodeHintType, Object> hints = new EnumMap<>(EncodeHintType.class);
@ -742,12 +742,8 @@ public final class EncoderTestCase extends Assert {
    assertEquals(expected, qrCode.toString());
  }

-  private static String shiftJISString(byte[] bytes) throws WriterException {
-    try {
-      return new String(bytes, "Shift_JIS");
-    } catch (UnsupportedEncodingException uee) {
-      throw new WriterException(uee.toString());
-    }
+  private static String shiftJISString(byte[] bytes) {
+    return new String(bytes, StringUtils.SHIFT_JIS_CHARSET);
  }

 }