Added some degree of support for Character Set ECIs

git-svn-id: https://zxing.googlecode.com/svn/trunk@467 59b500cc-1b3d-0410-9834-0bbf25fbcc57
This commit is contained in:
srowen 2008-06-20 22:38:35 +00:00
parent 72dfe0c28f
commit c9ead4ce8a
4 changed files with 196 additions and 25 deletions

View file

@ -0,0 +1,73 @@
/*
* Copyright 2008 ZXing authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.zxing.qrcode.decoder;
import java.util.Hashtable;
/**
* Encapsulates a Character Set ECI, according to "Extended Channel Interpretations" 5.3.1.1.
*
* @author srowen@google.com (Sean Owen)
*/
final class CharacterSetECI extends ECI {
private static final Hashtable VALUE_TO_ECI;
static {
VALUE_TO_ECI = new Hashtable(29);
// TODO figure out if these values are even right!
addCharacterSet(3, "ISO8859_1");
addCharacterSet(4, "ISO8859_2");
addCharacterSet(5, "ISO8859_3");
addCharacterSet(6, "ISO8859_4");
addCharacterSet(7, "ISO8859_5");
addCharacterSet(8, "ISO8859_6");
addCharacterSet(9, "ISO8859_7");
addCharacterSet(10, "ISO8859_8");
addCharacterSet(11, "ISO8859_9");
addCharacterSet(12, "ISO8859_10");
addCharacterSet(13, "ISO8859_11");
addCharacterSet(15, "ISO8859_13");
addCharacterSet(16, "ISO8859_14");
addCharacterSet(17, "ISO8859_15");
addCharacterSet(18, "ISO8859_16");
addCharacterSet(20, "Shift_JIS");
}
private final String encodingName;
private CharacterSetECI(int value, String encodingName) {
super(value);
this.encodingName = encodingName;
}
String getEncodingName() {
return encodingName;
}
private static void addCharacterSet(int value, String encodingName) {
VALUE_TO_ECI.put(new Integer(value), new CharacterSetECI(value, encodingName));
}
static CharacterSetECI getCharacterSetECIByValue(int value) {
CharacterSetECI eci = (CharacterSetECI) VALUE_TO_ECI.get(new Integer(value));
if (eci == null) {
throw new IllegalArgumentException("Unsupported value: " + value);
}
return eci;
}
}

View file

@ -57,6 +57,7 @@ final class DecodedBitStreamParser {
static String decode(byte[] bytes, Version version) throws ReaderException {
BitSource bits = new BitSource(bytes);
StringBuffer result = new StringBuffer();
CharacterSetECI currentCharacterSetECI = null;
Mode mode;
do {
// While still another segment to read...
@ -69,11 +70,12 @@ final class DecodedBitStreamParser {
if (!mode.equals(Mode.TERMINATOR)) {
if (mode.equals(Mode.ECI)) {
// Count doesn't apply to ECI
parseECI(bits);
// We don't currently do anything with ECI, since there seems to be no reference
// defining what each value means. AIM's "Extended Channel Interpretations" does
// not define it. I have never observed a QR Code using it. So for now, we at least
// parse it but don't know how to take action on it.
int value = ECI.parseECI(bits);
try {
currentCharacterSetECI = CharacterSetECI.getCharacterSetECIByValue(value);
} catch (IllegalArgumentException iae) {
// unsupported... just continue?
}
} else {
// How many characters will follow, encoded in this mode?
int count = bits.readBits(mode.getCharacterCountBits(version));
@ -82,7 +84,7 @@ final class DecodedBitStreamParser {
} else if (mode.equals(Mode.ALPHANUMERIC)) {
decodeAlphanumericSegment(bits, result, count);
} else if (mode.equals(Mode.BYTE)) {
decodeByteSegment(bits, result, count);
decodeByteSegment(bits, result, count, currentCharacterSetECI);
} else if (mode.equals(Mode.KANJI)) {
decodeKanjiSegment(bits, result, count);
} else {
@ -104,23 +106,6 @@ final class DecodedBitStreamParser {
return result.toString();
}
private static int parseECI(BitSource bits) {
int firstByte = bits.readBits(8);
if ((firstByte & 0x80) == 0) {
// just one byte
return firstByte & 0x7F;
} else if ((firstByte & 0xC0) == 0x80) {
// two bytes
int secondByte = bits.readBits(8);
return ((firstByte & 0x3F) << 8) | secondByte;
} else if ((firstByte & 0xE0) == 0xC0) {
// three bytes
int secondThirdBytes = bits.readBits(16);
return ((firstByte & 0x1F) << 16) | secondThirdBytes;
}
throw new IllegalArgumentException("Bad ECI bits starting with byte " + firstByte);
}
private static void decodeKanjiSegment(BitSource bits,
StringBuffer result,
int count) throws ReaderException {
@ -154,7 +139,8 @@ final class DecodedBitStreamParser {
private static void decodeByteSegment(BitSource bits,
StringBuffer result,
int count) throws ReaderException {
int count,
CharacterSetECI currentCharacterSetECI) throws ReaderException {
byte[] readBytes = new byte[count];
if (count << 3 > bits.available()) {
throw new ReaderException("Count too large: " + count);
@ -162,12 +148,17 @@ final class DecodedBitStreamParser {
for (int i = 0; i < count; i++) {
readBytes[i] = (byte) bits.readBits(8);
}
String encoding;
if (currentCharacterSetECI == null) {
// The spec isn't clear on this mode; see
// section 6.4.5: t does not say which encoding to assuming
// upon decoding. I have seen ISO-8859-1 used as well as
// Shift_JIS -- without anything like an ECI designator to
// give a hint.
String encoding = guessEncoding(readBytes);
encoding = guessEncoding(readBytes);
} else {
encoding = currentCharacterSetECI.getEncodingName();
}
try {
result.append(new String(readBytes, encoding));
} catch (UnsupportedEncodingException uce) {

View file

@ -0,0 +1,65 @@
/*
* Copyright 2008 ZXing authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.zxing.qrcode.decoder;
import com.google.zxing.common.BitSource;
/**
* Superclass of classes encapsulating types ECIs, according to "Extended Channel Interpretations" 5.3.
*
* @author srowen@google.com (Sean Owen)
*/
abstract class ECI {
private final int value;
ECI(int value) {
this.value = value;
}
int getValue() {
return value;
}
static ECI getECIByValue(int value) {
if (value < 0 || value > 999999) {
throw new IllegalArgumentException("Bad ECI value: " + value);
}
if (value < 900) { // Character set ECIs use 000000 - 000899
return CharacterSetECI.getCharacterSetECIByValue(value);
}
throw new IllegalArgumentException("Unsupported ECI value: " + value);
}
static int parseECI(BitSource bits) {
int firstByte = bits.readBits(8);
if ((firstByte & 0x80) == 0) {
// just one byte
return firstByte & 0x7F;
} else if ((firstByte & 0xC0) == 0x80) {
// two bytes
int secondByte = bits.readBits(8);
return ((firstByte & 0x3F) << 8) | secondByte;
} else if ((firstByte & 0xE0) == 0xC0) {
// three bytes
int secondThirdBytes = bits.readBits(16);
return ((firstByte & 0x1F) << 16) | secondThirdBytes;
}
throw new IllegalArgumentException("Bad ECI bits starting with byte " + firstByte);
}
}

View file

@ -0,0 +1,42 @@
/*
* Copyright 2008 ZXing authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.zxing.qrcode.decoder;
import com.google.zxing.common.BitSource;
import junit.framework.TestCase;
/**
* @author srowen@google.com (Sean Owen)
*/
public final class ECITestCase extends TestCase {
public void testParseECI() {
doTestParseECI(0, new byte[] { (byte) 0x00 });
doTestParseECI(127, new byte[] { (byte) 0x7F });
doTestParseECI(128, new byte[] { (byte) 0x80, (byte) 0x80 });
doTestParseECI(16383, new byte[] { (byte) 0xBF, (byte) 0xFF });
doTestParseECI(16384, new byte[] { (byte) 0xC0, (byte) 0x40, (byte) 0x00 });
doTestParseECI(2097151, new byte[] { (byte) 0xDF, (byte) 0xFF, (byte) 0xFF });
}
private static void doTestParseECI(int expectedValue, byte[] bytes) {
BitSource bitSource = new BitSource(bytes);
int actual = ECI.parseECI(bitSource);
assertEquals(expectedValue, actual);
}
}