Minimal encoding for Data Matrix with multi ECI and GS1-FNC1 support (2nd) (#1479)

* Added Data Matrix minimal encoding with multi-ECI and GS1-FNC1 support.

* Added support for MACRO-5 and MACRO-6

* Improved comment
This commit is contained in:
AlexGeller1 2022-01-12 18:58:24 +01:00 committed by GitHub
parent 491006ac3b
commit ab23778d4b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 1683 additions and 121 deletions

View file

@ -44,6 +44,20 @@ public enum EncodeHintType {
*/
DATA_MATRIX_SHAPE,
/**
* Specifies whether to use compact mode for Data Matrix (type {@link Boolean}, or "true" or "false"
* The compact encoding mode also supports the encoding of characters that are not in the ISO-8859-1
* character set via ECIs.
* Please note that in that case, the most compact character encoding is chosen for characters in
* the input that are not in the ISO-8859-1 character set. Based on experience, some scanners do not
* support encodings like cp-1256 (Arabic). In such cases the encoding can be forced to UTF-8 by
* means of the {@link #CHARACTER_SET} encoding hint.
* Compact encoding also provides GS1-FNC1 support when {@link #GS1_FORMAT} is selected. In this case
* group-separator character (ASCII 29 decimal) can be used to encode the positions of FNC1 codewords
* for the purpose of delimiting AIs.
*/
DATA_MATRIX_COMPACT,
/**
* Specifies a minimum barcode size (type {@link Dimension}). Only applicable to Data Matrix now.
*

View file

@ -0,0 +1,200 @@
/*
* Copyright 2021 ZXing authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.zxing.common;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;
import java.util.List;
/**
* Set of CharsetEncoders for a given input string
*
* Invariants:
* - The list contains only encoders from CharacterSetECI (list is shorter then the list of encoders available on
* the platform for which ECI values are defined).
* - The list contains encoders at least one encoder for every character in the input.
* - The first encoder in the list is always the ISO-8859-1 encoder even of no character in the input can be encoded
* by it.
* - If the input contains a character that is not in ISO-8859-1 then the last two entries in the list will be the
* UTF-8 encoder and the UTF-16BE encoder.
*
* @author Alex Geller
*/
public final class ECIEncoderSet {
// List of encoders that potentially encode characters not in ISO-8859-1 in one byte.
private static final List<CharsetEncoder> ENCODERS = new ArrayList<>();
static {
final String[] names = { "IBM437",
"ISO-8859-2",
"ISO-8859-3",
"ISO-8859-4",
"ISO-8859-5",
"ISO-8859-6",
"ISO-8859-7",
"ISO-8859-8",
"ISO-8859-9",
"ISO-8859-10",
"ISO-8859-11",
"ISO-8859-13",
"ISO-8859-14",
"ISO-8859-15",
"ISO-8859-16",
"windows-1250",
"windows-1251",
"windows-1252",
"windows-1256",
"Shift_JIS" };
for (String name : names) {
if (CharacterSetECI.getCharacterSetECIByName(name) != null) {
try {
ENCODERS.add(Charset.forName(name).newEncoder());
} catch (UnsupportedCharsetException e) {
// continue
}
}
}
}
private final CharsetEncoder[] encoders;
private final int priorityEncoderIndex;
/**
* Constructs an encoder set
*
* @param stringToEncode the string that needs to be encoded
* @param priorityCharset The preferred {@link Charset} or null.
* @param fnc1 fnc1 denotes the character in the input that represents the FNC1 character or -1 for a non-GS1 bar
* code. When specified, it is considered an error to pass it as argument to the methods canEncode() or encode().
*/
public ECIEncoderSet(String stringToEncode, Charset priorityCharset, int fnc1) {
List<CharsetEncoder> neededEncoders = new ArrayList<>();
//we always need the ISO-8859-1 encoder. It is the default encoding
neededEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
boolean needUnicodeEncoder = priorityCharset != null && priorityCharset.name().startsWith("UTF");
//Walk over the input string and see if all characters can be encoded with the list of encoders
for (int i = 0; i < stringToEncode.length(); i++) {
boolean canEncode = false;
for (CharsetEncoder encoder : neededEncoders) {
char c = stringToEncode.charAt(i);
if (c == fnc1 || encoder.canEncode(c)) {
canEncode = true;
break;
}
}
if (!canEncode) {
//for the character at position i we don't yet have an encoder in the list
for (CharsetEncoder encoder : ENCODERS) {
if (encoder.canEncode(stringToEncode.charAt(i))) {
//Good, we found an encoder that can encode the character. We add him to the list and continue scanning
//the input
neededEncoders.add(encoder);
canEncode = true;
break;
}
}
}
if (!canEncode) {
//The character is not encodeable by any of the single byte encoders so we remember that we will need a
//Unicode encoder.
needUnicodeEncoder = true;
}
}
if (neededEncoders.size() == 1 && !needUnicodeEncoder) {
//the entire input can be encoded by the ISO-8859-1 encoder
encoders = new CharsetEncoder[] { neededEncoders.get(0) };
} else {
// we need more than one single byte encoder or we need a Unicode encoder.
// In this case we append a UTF-8 and UTF-16 encoder to the list
encoders = new CharsetEncoder[neededEncoders.size() + 2];
int index = 0;
for (CharsetEncoder encoder : neededEncoders) {
encoders[index++] = encoder;
}
encoders[index] = StandardCharsets.UTF_8.newEncoder();
encoders[index + 1] = StandardCharsets.UTF_16BE.newEncoder();
}
//Compute priorityEncoderIndex by looking up priorityCharset in encoders
int priorityEncoderIndexValue = -1;
if (priorityCharset != null) {
for (int i = 0; i < encoders.length; i++) {
if (encoders[i] != null && priorityCharset.name().equals(encoders[i].charset().name())) {
priorityEncoderIndexValue = i;
break;
}
}
}
priorityEncoderIndex = priorityEncoderIndexValue;
//invariants
assert encoders.length > 0;
assert encoders[0].charset().equals(StandardCharsets.ISO_8859_1);
}
public int length() {
return encoders.length;
}
public String getCharsetName(int index) {
assert index < length();
return encoders[index].charset().name();
}
public Charset getCharset(int index) {
assert index < length();
return encoders[index].charset();
}
public int getECIValue(int encoderIndex) {
return CharacterSetECI.getCharacterSetECI(encoders[encoderIndex].charset()).getValue();
}
/*
* returns -1 if no priority charset was defined
*/
public int getPriorityEncoderIndex() {
return priorityEncoderIndex;
}
public boolean canEncode(char c, int encoderIndex) {
assert encoderIndex < length();
CharsetEncoder encoder = encoders[encoderIndex];
return encoder.canEncode("" + c);
}
public byte[] encode(char c, int encoderIndex) {
assert encoderIndex < length();
CharsetEncoder encoder = encoders[encoderIndex];
assert encoder.canEncode("" + c);
return ("" + c).getBytes(encoder.charset());
}
public byte[] encode(String s, int encoderIndex) {
assert encoderIndex < length();
CharsetEncoder encoder = encoders[encoderIndex];
return s.getBytes(encoder.charset());
}
}

View file

@ -24,11 +24,13 @@ import com.google.zxing.datamatrix.encoder.DefaultPlacement;
import com.google.zxing.Dimension;
import com.google.zxing.datamatrix.encoder.ErrorCorrection;
import com.google.zxing.datamatrix.encoder.HighLevelEncoder;
import com.google.zxing.datamatrix.encoder.MinimalEncoder;
import com.google.zxing.datamatrix.encoder.SymbolInfo;
import com.google.zxing.datamatrix.encoder.SymbolShapeHint;
import com.google.zxing.qrcode.encoder.ByteMatrix;
import java.util.Map;
import java.nio.charset.Charset;
/**
* This object renders a Data Matrix code as a BitMatrix 2D array of greyscale values.
@ -81,7 +83,24 @@ public final class DataMatrixWriter implements Writer {
//1. step: Data encodation
String encoded = HighLevelEncoder.encodeHighLevel(contents, shape, minSize, maxSize);
String encoded;
boolean hasCompactionHint = hints != null && hints.containsKey(EncodeHintType.DATA_MATRIX_COMPACT) &&
Boolean.parseBoolean(hints.get(EncodeHintType.DATA_MATRIX_COMPACT).toString());
if (hasCompactionHint) {
boolean hasGS1FormatHint = hints.containsKey(EncodeHintType.GS1_FORMAT) &&
Boolean.parseBoolean(hints.get(EncodeHintType.GS1_FORMAT).toString());
Charset charset = null;
boolean hasEncodingHint = hints.containsKey(EncodeHintType.CHARACTER_SET);
if (hasEncodingHint) {
charset = Charset.forName(hints.get(EncodeHintType.CHARACTER_SET).toString());
}
encoded = MinimalEncoder.encodeHighLevel(contents, charset, hasGS1FormatHint ? 0x1D : -1, shape);
} else {
encoded = HighLevelEncoder.encodeHighLevel(contents, shape, minSize, maxSize);
}
SymbolInfo symbolInfo = SymbolInfo.lookup(encoded.length(), shape, minSize, maxSize, true);

View file

@ -91,15 +91,15 @@ public final class HighLevelEncoder {
/**
* 05 Macro header
*/
private static final String MACRO_05_HEADER = "[)>\u001E05\u001D";
static final String MACRO_05_HEADER = "[)>\u001E05\u001D";
/**
* 06 Macro header
*/
private static final String MACRO_06_HEADER = "[)>\u001E06\u001D";
static final String MACRO_06_HEADER = "[)>\u001E06\u001D";
/**
* Macro trailer
*/
private static final String MACRO_TRAILER = "\u001E\u0004";
static final String MACRO_TRAILER = "\u001E\u0004";
static final int ASCII_ENCODATION = 0;
static final int C40_ENCODATION = 1;
@ -406,15 +406,15 @@ public final class HighLevelEncoder {
return ch >= 128 && ch <= 255;
}
private static boolean isNativeC40(char ch) {
static boolean isNativeC40(char ch) {
return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z');
}
private static boolean isNativeText(char ch) {
static boolean isNativeText(char ch) {
return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z');
}
private static boolean isNativeX12(char ch) {
static boolean isNativeX12(char ch) {
return isX12TermSep(ch) || (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z');
}
@ -424,7 +424,7 @@ public final class HighLevelEncoder {
|| (ch == '>');
}
private static boolean isNativeEDIFACT(char ch) {
static boolean isNativeEDIFACT(char ch) {
return ch >= ' ' && ch <= '^';
}

File diff suppressed because it is too large Load diff

View file

@ -19,17 +19,14 @@ package com.google.zxing.qrcode.encoder;
import com.google.zxing.qrcode.decoder.Mode;
import com.google.zxing.qrcode.decoder.Version;
import com.google.zxing.common.BitArray;
import com.google.zxing.common.CharacterSetECI;
import com.google.zxing.common.ECIEncoderSet;
import com.google.zxing.WriterException;
import com.google.zxing.qrcode.decoder.ErrorCorrectionLevel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.nio.charset.UnsupportedCharsetException;
/**
* Encoder that encodes minimally
@ -76,49 +73,9 @@ final class MinimalEncoder {
}
}
// List of encoders that potentially encode characters not in ISO-8859-1 in one byte.
private static final List<CharsetEncoder> ENCODERS = new ArrayList<>();
static {
final String[] names = { "ISO-8859-2",
"ISO-8859-3",
"ISO-8859-4",
"ISO-8859-5",
"ISO-8859-6",
"ISO-8859-7",
"ISO-8859-8",
"ISO-8859-9",
"ISO-8859-10",
"ISO-8859-11",
"ISO-8859-13",
"ISO-8859-14",
"ISO-8859-15",
"ISO-8859-16",
"windows-1250",
"windows-1251",
"windows-1252",
"windows-1253",
"windows-1254",
"windows-1255",
"windows-1256",
"windows-1257",
"windows-1258",
"Shift_JIS" };
for (String name : names) {
if (CharacterSetECI.getCharacterSetECIByName(name) != null) {
try {
ENCODERS.add(Charset.forName(name).newEncoder());
} catch (UnsupportedCharsetException e) {
// continue
}
}
}
}
private final String stringToEncode;
private final boolean isGS1;
private final CharsetEncoder[] encoders;
private final int priorityEncoderIndex;
private final ECIEncoderSet encoders;
private final ErrorCorrectionLevel ecLevel;
/**
@ -138,59 +95,8 @@ final class MinimalEncoder {
this.stringToEncode = stringToEncode;
this.isGS1 = isGS1;
this.encoders = new ECIEncoderSet(stringToEncode, priorityCharset, -1);
this.ecLevel = ecLevel;
List<CharsetEncoder> neededEncoders = new ArrayList<>();
neededEncoders.add(StandardCharsets.ISO_8859_1.newEncoder());
boolean needUnicodeEncoder = priorityCharset != null && priorityCharset.name().startsWith("UTF");
for (int i = 0; i < stringToEncode.length(); i++) {
boolean canEncode = false;
for (CharsetEncoder encoder : neededEncoders) {
if (encoder.canEncode(stringToEncode.charAt(i))) {
canEncode = true;
break;
}
}
if (!canEncode) {
for (CharsetEncoder encoder : ENCODERS) {
if (encoder.canEncode(stringToEncode.charAt(i))) {
neededEncoders.add(encoder);
canEncode = true;
break;
}
}
}
if (!canEncode) {
needUnicodeEncoder = true;
}
}
if (neededEncoders.size() == 1 && !needUnicodeEncoder) {
encoders = new CharsetEncoder[] { neededEncoders.get(0) };
} else {
encoders = new CharsetEncoder[neededEncoders.size() + 2];
int index = 0;
for (CharsetEncoder encoder : neededEncoders) {
encoders[index++] = encoder;
}
encoders[index] = StandardCharsets.UTF_8.newEncoder();
encoders[index + 1] = StandardCharsets.UTF_16BE.newEncoder();
}
int priorityEncoderIndexValue = -1;
if (priorityCharset != null) {
for (int i = 0; i < encoders.length; i++) {
if (encoders[i] != null && priorityCharset.name().equals(encoders[i].charset().name())) {
priorityEncoderIndexValue = i;
break;
}
}
}
priorityEncoderIndex = priorityEncoderIndexValue;
}
/**
@ -315,14 +221,15 @@ final class MinimalEncoder {
void addEdges(Version version, Edge[][][] edges, int from, Edge previous) {
int start = 0;
int end = encoders.length;
if (priorityEncoderIndex >= 0 && encoders[priorityEncoderIndex].canEncode(stringToEncode.charAt(from))) {
int end = encoders.length();
int priorityEncoderIndex = encoders.getPriorityEncoderIndex();
if (priorityEncoderIndex >= 0 && encoders.canEncode(stringToEncode.charAt(from),priorityEncoderIndex)) {
start = priorityEncoderIndex;
end = priorityEncoderIndex + 1;
}
for (int i = start; i < end; i++) {
if (encoders[i].canEncode(stringToEncode.charAt(from))) {
if (encoders.canEncode(stringToEncode.charAt(from), i)) {
addEdge(edges, from, new Edge(Mode.BYTE, from, i, 1, previous, version));
}
}
@ -464,11 +371,11 @@ final class MinimalEncoder {
// The last dimension in the array below encodes the 4 modes KANJI, ALPHANUMERIC, NUMERIC and BYTE via the
// function getCompactedOrdinal(Mode)
@SuppressWarnings("unchecked")
Edge[][][] edges = new Edge[inputLength + 1][encoders.length][4];
Edge[][][] edges = new Edge[inputLength + 1][encoders.length()][4];
addEdges(version, edges, 0, null);
for (int i = 1; i <= inputLength; i++) {
for (int j = 0; j < encoders.length; j++) {
for (int j = 0; j < encoders.length(); j++) {
for (int k = 0; k < 4; k++) {
if (edges[i][j][k] != null && i < inputLength) {
addEdges(version, edges, i, edges[i][j][k]);
@ -480,7 +387,7 @@ final class MinimalEncoder {
int minimalJ = -1;
int minimalK = -1;
int minimalSize = Integer.MAX_VALUE;
for (int j = 0; j < encoders.length; j++) {
for (int j = 0; j < encoders.length(); j++) {
for (int k = 0; k < 4; k++) {
if (edges[inputLength][j][k] != null) {
Edge edge = edges[inputLength][j][k];
@ -535,8 +442,8 @@ final class MinimalEncoder {
size += characterLength == 1 ? 4 : characterLength == 2 ? 7 : 10;
break;
case BYTE:
size += 8 * stringToEncode.substring(fromPosition, fromPosition + characterLength).getBytes(
encoders[charsetEncoderIndex].charset()).length;
size += 8 * encoders.encode(stringToEncode.substring(fromPosition, fromPosition + characterLength),
charsetEncoderIndex).length;
if (needECI) {
size += 4 + 8; // the ECI assignment numbers for ISO-8859-x, UTF-8 and UTF-16 are all 8 bit long
}
@ -712,8 +619,9 @@ final class MinimalEncoder {
* for multi byte encoded characters)
*/
private int getCharacterCountIndicator() {
return mode == Mode.BYTE ? stringToEncode.substring(fromPosition, fromPosition + characterLength).getBytes(
encoders[charsetEncoderIndex].charset()).length : characterLength;
return mode == Mode.BYTE ?
encoders.encode(stringToEncode.substring(fromPosition, fromPosition + characterLength),
charsetEncoderIndex).length : characterLength;
}
/**
@ -726,11 +634,11 @@ final class MinimalEncoder {
bits.appendBits(length, mode.getCharacterCountBits(version));
}
if (mode == Mode.ECI) {
bits.appendBits(CharacterSetECI.getCharacterSetECI(encoders[charsetEncoderIndex].charset()).getValue(), 8);
bits.appendBits(encoders.getECIValue(charsetEncoderIndex), 8);
} else if (characterLength > 0) {
// append data
Encoder.appendBytes(stringToEncode.substring(fromPosition, fromPosition + characterLength), mode, bits,
encoders[charsetEncoderIndex].charset());
encoders.getCharset(charsetEncoderIndex));
}
}
@ -738,7 +646,7 @@ final class MinimalEncoder {
StringBuilder result = new StringBuilder();
result.append(mode).append('(');
if (mode == Mode.ECI) {
result.append(encoders[charsetEncoderIndex].charset().displayName());
result.append(encoders.getCharset(charsetEncoderIndex).displayName());
} else {
result.append(makePrintable(stringToEncode.substring(fromPosition, fromPosition + characterLength)));
}

View file

@ -19,9 +19,10 @@ package com.google.zxing.datamatrix.encoder;
import junit.framework.ComparisonFailure;
import org.junit.Assert;
import org.junit.Test;
import java.nio.charset.StandardCharsets;
/**
* Tests for {@link HighLevelEncoder}.
* Tests for {@link HighLevelEncoder} and {@link MinimalEncoder}
*/
public final class HighLevelEncodeTestCase extends Assert {
@ -111,11 +112,11 @@ public final class HighLevelEncodeTestCase extends Assert {
//with the 16x48 symbol (47 data codewords)
useTestSymbols();
String visualized = encodeHighLevel("AIMAIMAIMAIMAIMAIM");
String visualized = encodeHighLevel("AIMAIMAIMAIMAIMAIM", false);
assertEquals("230 91 11 91 11 91 11 91 11 91 11 91 11", visualized);
//case "a": Unlatch is not required
visualized = encodeHighLevel("AIMAIMAIMAIMAIMAI");
visualized = encodeHighLevel("AIMAIMAIMAIMAIMAI", false);
assertEquals("230 91 11 91 11 91 11 91 11 91 11 90 241", visualized);
//case "b": Add trailing shift 0 and Unlatch is not required
@ -379,9 +380,153 @@ public final class HighLevelEncodeTestCase extends Assert {
"191 89 191 89 191 254 66 66", visualized);
}
@Test
public void testSizes() {
int[] sizes = new int[2];
encodeHighLevel("A", sizes);
assertEquals(3, sizes[0]);
assertEquals(1, sizes[1]);
encodeHighLevel("AB", sizes);
assertEquals(3, sizes[0]);
assertEquals(2, sizes[1]);
encodeHighLevel("ABC", sizes);
assertEquals(3, sizes[0]);
assertEquals(3, sizes[1]);
encodeHighLevel("ABCD", sizes);
assertEquals(5, sizes[0]);
assertEquals(4, sizes[1]);
encodeHighLevel("ABCDE", sizes);
assertEquals(5, sizes[0]);
assertEquals(5, sizes[1]);
encodeHighLevel("ABCDEF", sizes);
assertEquals(5, sizes[0]);
assertEquals(5, sizes[1]);
encodeHighLevel("ABCDEFG", sizes);
assertEquals(8, sizes[0]);
assertEquals(7, sizes[1]);
encodeHighLevel("ABCDEFGH", sizes);
assertEquals(8, sizes[0]);
assertEquals(7, sizes[1]);
encodeHighLevel("ABCDEFGHI", sizes);
assertEquals(8, sizes[0]);
assertEquals(8, sizes[1]);
encodeHighLevel("ABCDEFGHIJ", sizes);
assertEquals(8, sizes[0]);
assertEquals(8, sizes[1]);
encodeHighLevel("a", sizes);
assertEquals(3, sizes[0]);
assertEquals(1, sizes[1]);
encodeHighLevel("ab", sizes);
assertEquals(3, sizes[0]);
assertEquals(2, sizes[1]);
encodeHighLevel("abc", sizes);
assertEquals(3, sizes[0]);
assertEquals(3, sizes[1]);
encodeHighLevel("abcd", sizes);
assertEquals(5, sizes[0]);
assertEquals(4, sizes[1]);
encodeHighLevel("abcdef", sizes);
assertEquals(5, sizes[0]);
assertEquals(5, sizes[1]);
encodeHighLevel("abcdefg", sizes);
assertEquals(8, sizes[0]);
assertEquals(7, sizes[1]);
encodeHighLevel("abcdefgh", sizes);
assertEquals(8, sizes[0]);
assertEquals(8, sizes[1]);
encodeHighLevel("+", sizes);
assertEquals(3, sizes[0]);
assertEquals(1, sizes[1]);
encodeHighLevel("++", sizes);
assertEquals(3, sizes[0]);
assertEquals(2, sizes[1]);
encodeHighLevel("+++", sizes);
assertEquals(3, sizes[0]);
assertEquals(3, sizes[1]);
encodeHighLevel("++++", sizes);
assertEquals(5, sizes[0]);
assertEquals(4, sizes[1]);
encodeHighLevel("+++++", sizes);
assertEquals(5, sizes[0]);
assertEquals(5, sizes[1]);
encodeHighLevel("++++++", sizes);
assertEquals(8, sizes[0]);
assertEquals(6, sizes[1]);
encodeHighLevel("+++++++", sizes);
assertEquals(8, sizes[0]);
assertEquals(7, sizes[1]);
encodeHighLevel("++++++++", sizes);
assertEquals(8, sizes[0]);
assertEquals(7, sizes[1]);
encodeHighLevel("+++++++++", sizes);
assertEquals(8, sizes[0]);
assertEquals(8, sizes[1]);
encodeHighLevel("\u00F0\u00F0" +
"ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEF", sizes);
assertEquals(114, sizes[0]);
assertEquals(62, sizes[1]);
}
@Test
public void testECIs() {
String visualized = visualize(MinimalEncoder.encodeHighLevel("that particularly stands out to me is \u0625\u0650" +
"\u062C\u064E\u0651\u0627\u0635 (\u02BE\u0101\u1E63) \"pear\", suggested to have originated from Hebrew " +
"\u05D0\u05B7\u05D2\u05B8\u05BC\u05E1 (ag\u00E1s)"));
assertEquals("239 209 151 206 214 92 122 140 35 158 144 162 52 205 55 171 137 23 67 206 218 175 147 113 15 254" +
" 116 33 241 25 231 186 14 212 64 253 151 252 159 33 41 241 27 231 83 171 53 209 35 25 134 6 42 33 35 239 184" +
" 31 193 234 7 252 205 101 127 241 209 34 24 5 22 23 221 148 179 239 128 140 92 187 106 204 198 59 19 25 114" +
" 248 118 36 254 231 106 196 19 239 101 27 107 69 189 112 236 156 252 16 174 125 24 10 125 116 42", visualized);
visualized = visualize(MinimalEncoder.encodeHighLevel("that particularly stands out to me is \u0625\u0650" +
"\u062C\u064E\u0651\u0627\u0635 (\u02BE\u0101\u1E63) \"pear\", suggested to have originated from Hebrew " +
"\u05D0\u05B7\u05D2\u05B8\u05BC\u05E1 (ag\u00E1s)", StandardCharsets.UTF_8, -1 , SymbolShapeHint.FORCE_NONE));
assertEquals("241 27 239 209 151 206 214 92 122 140 35 158 144 162 52 205 55 171 137 23 67 206 218 175 147 113" +
" 15 254 116 33 231 202 33 131 77 154 119 225 163 238 206 28 249 93 36 150 151 53 108 246 145 228 217 71" +
" 199 42 33 35 239 184 31 193 234 7 252 205 101 127 241 209 34 24 5 22 23 221 148 179 239 128 140 92 187 106" +
" 204 198 59 19 25 114 248 118 36 254 231 43 133 212 175 38 220 44 6 125 49 172 93 189 209 111 61 217 203 62" +
" 116 42", visualized);
}
private static void encodeHighLevel(String msg, int[] sizes) {
sizes[0] = HighLevelEncoder.encodeHighLevel(msg).length();
sizes[1] = MinimalEncoder.encodeHighLevel(msg).length();
}
private static String encodeHighLevel(String msg) {
return encodeHighLevel(msg, true);
}
private static String encodeHighLevel(String msg, boolean compareSizeToMinimalEncoder) {
CharSequence encoded = HighLevelEncoder.encodeHighLevel(msg);
CharSequence encoded2 = MinimalEncoder.encodeHighLevel(msg);
assert !compareSizeToMinimalEncoder || encoded2.length() <= encoded.length();
//DecodeHighLevel.decode(encoded);
return visualize(encoded);
}