Issue 581, Hanzi mode support from Shiyuan Guo

git-svn-id: https://zxing.googlecode.com/svn/trunk@1623 59b500cc-1b3d-0410-9834-0bbf25fbcc57
This commit is contained in:
srowen 2010-10-12 16:01:31 +00:00
parent eaecf26bf6
commit c3f4602678
8 changed files with 81 additions and 15 deletions

View file

@ -51,6 +51,7 @@ Roman Nurik (Google)
Ryan Alford
Sanford Squires
Sean Owen (Google)
Shiyuan Guo / 郭世元
Simon Flannery (Ericsson)
Steven Parkes
Suraj Supekar

View file

@ -30,6 +30,7 @@ public final class StringUtils {
private static final String PLATFORM_DEFAULT_ENCODING =
System.getProperty("file.encoding");
public static final String SHIFT_JIS = "SJIS";
public static final String GB2312 = "GB2312";
private static final String EUC_JP = "EUC_JP";
private static final String UTF8 = "UTF8";
private static final String ISO88591 = "ISO8859_1";

View file

@ -45,6 +45,7 @@ final class DecodedBitStreamParser {
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
' ', '$', '%', '*', '+', '-', '.', '/', ':'
};
private static final int GB2312_SUBSET = 1;
private DecodedBitStreamParser() {
}
@ -85,18 +86,29 @@ final class DecodedBitStreamParser {
throw FormatException.getFormatInstance();
}
} else {
// How many characters will follow, encoded in this mode?
int count = bits.readBits(mode.getCharacterCountBits(version));
if (mode.equals(Mode.NUMERIC)) {
decodeNumericSegment(bits, result, count);
} else if (mode.equals(Mode.ALPHANUMERIC)) {
decodeAlphanumericSegment(bits, result, count, fc1InEffect);
} else if (mode.equals(Mode.BYTE)) {
decodeByteSegment(bits, result, count, currentCharacterSetECI, byteSegments, hints);
} else if (mode.equals(Mode.KANJI)) {
decodeKanjiSegment(bits, result, count);
// First handle Hanzi mode which does not start with character count
if (mode.equals(Mode.HANZI)) {
//chinese mode contains a sub set indicator right after mode indicator
int subset = bits.readBits(4);
int countHanzi = bits.readBits(mode.getCharacterCountBits(version));
if (subset == GB2312_SUBSET) {
decodeHanziSegment(bits, result, countHanzi);
}
} else {
throw FormatException.getFormatInstance();
// "Normal" QR code modes:
// How many characters will follow, encoded in this mode?
int count = bits.readBits(mode.getCharacterCountBits(version));
if (mode.equals(Mode.NUMERIC)) {
decodeNumericSegment(bits, result, count);
} else if (mode.equals(Mode.ALPHANUMERIC)) {
decodeAlphanumericSegment(bits, result, count, fc1InEffect);
} else if (mode.equals(Mode.BYTE)) {
decodeByteSegment(bits, result, count, currentCharacterSetECI, byteSegments, hints);
} else if (mode.equals(Mode.KANJI)) {
decodeKanjiSegment(bits, result, count);
} else {
throw FormatException.getFormatInstance();
}
}
}
}
@ -105,6 +117,40 @@ final class DecodedBitStreamParser {
return new DecoderResult(bytes, result.toString(), byteSegments.isEmpty() ? null : byteSegments, ecLevel);
}
/**
* See specification GBT 18284-2000
*/
private static void decodeHanziSegment(BitSource bits,
StringBuffer result,
int count) throws FormatException {
// Each character will require 2 bytes. Read the characters as 2-byte pairs
// and decode as GB2312 afterwards
byte[] buffer = new byte[2 * count];
int offset = 0;
while (count > 0) {
// Each 13 bits encodes a 2-byte character
int twoBytes = bits.readBits(13);
int assembledTwoBytes = ((twoBytes / 0x060) << 8) | (twoBytes % 0x060);
if (assembledTwoBytes < 0x003BF) {
// In the 0xA1A1 to 0xAAFE range
assembledTwoBytes += 0x0A1A1;
} else {
// In the 0xB0A1 to 0xFAFE range
assembledTwoBytes += 0x0A6A1;
}
buffer[offset] = (byte) ((assembledTwoBytes >> 8) & 0xFF);
buffer[offset + 1] = (byte) ((assembledTwoBytes) & 0xFF);
offset += 2;
count--;
}
try {
result.append(new String(buffer, StringUtils.GB2312));
} catch (UnsupportedEncodingException uee) {
throw FormatException.getFormatInstance();
}
}
private static void decodeKanjiSegment(BitSource bits,
StringBuffer result,
int count) throws FormatException {

View file

@ -35,6 +35,8 @@ public final class Mode {
public static final Mode KANJI = new Mode(new int[]{8, 10, 12}, 0x08, "KANJI");
public static final Mode FNC1_FIRST_POSITION = new Mode(null, 0x05, "FNC1_FIRST_POSITION");
public static final Mode FNC1_SECOND_POSITION = new Mode(null, 0x09, "FNC1_SECOND_POSITION");
/** See GBT 18284-2000; "Hanzi" is a transliteration of this mode name. */
public static final Mode HANZI = new Mode(new int[]{8, 10, 12}, 0x0D, "HANZI");
private final int[] characterCountBitsForVersions;
private final int bits;
@ -71,6 +73,9 @@ public final class Mode {
return KANJI;
case 0x9:
return FNC1_SECOND_POSITION;
case 0xD:
// 0xD is defined in GBT 18284-2000, may not be supported in foreign country
return HANZI;
default:
throw new IllegalArgumentException();
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

View file

@ -0,0 +1 @@
AD:SUB:阿;;

View file

@ -27,10 +27,10 @@ public final class QRCodeBlackBox2TestCase extends AbstractBlackBoxTestCase {
public QRCodeBlackBox2TestCase() {
super("test/data/blackbox/qrcode-2", new MultiFormatReader(), BarcodeFormat.QR_CODE);
addTest(26, 26, 0.0f);
addTest(26, 26, 90.0f);
addTest(26, 26, 180.0f);
addTest(25, 25, 270.0f);
addTest(27, 27, 0.0f);
addTest(27, 27, 90.0f);
addTest(27, 27, 180.0f);
addTest(26, 26, 270.0f);
}
}

View file

@ -69,6 +69,18 @@ public final class DecodedBitStreamParserTestCase extends Assert {
assertEquals("\u00ed\u00f3\u00fa", result);
}
@Test
public void testHanzi() throws Exception {
BitSourceBuilder builder = new BitSourceBuilder();
builder.write(0x0D, 4); // Hanzi mode
builder.write(0x01, 4); // Subset 1 = GB2312 encoding
builder.write(0x01, 8); // 1 characters
builder.write(0x03C1, 13);
String result = DecodedBitStreamParser.decode(builder.toByteArray(),
Version.getVersionForNumber(1), null, null).getText();
assertEquals("\u963f", result);
}
// TODO definitely need more tests here
}