From 2c787d844030c896e8eec9a2c37a0b7bea4c8f52 Mon Sep 17 00:00:00 2001 From: "smparkes@smparkes.net" Date: Thu, 13 Oct 2011 15:21:13 +0000 Subject: [PATCH] Port r1623 changes to C++ This provide C++ support for Hanzi from issue 582. With it the C++ qrcode blackbox test results again match the Java results. The changes are mostly a re-port of various pieces of the Java code for supporting modes and bit stream parsing in QR codes. It expands the modes/parsing including ECI and FNC, refactors StringUtils as was done in Java, and makes DecoderResult include the fields that the Java code has. Although the non-qrcode blackbox tests don't all match the Java code (a known issue) the entire blackbox suite was run under valgrind and has no errors, including invalid memory references and leaks. This commit also includes a few cleanups for things like files that had mixed line endings, i.e., a combination of LF and CR-LF. git-svn-id: https://zxing.googlecode.com/svn/trunk@1966 59b500cc-1b3d-0410-9834-0bbf25fbcc57 --- cpp/.valgrind.supp | 14 + cpp/.valgrindrc | 1 + cpp/SConscript | 2 +- cpp/core/src/zxing/DecodeHints.cpp | 2 + cpp/core/src/zxing/DecodeHints.h | 13 +- cpp/core/src/zxing/Exception.cpp | 17 +- cpp/core/src/zxing/Exception.h | 1 + cpp/core/src/zxing/FormatException.cpp | 35 ++ cpp/core/src/zxing/FormatException.h | 35 ++ cpp/core/src/zxing/MultiFormatReader.cpp | 2 +- cpp/core/src/zxing/ReaderException.cpp | 5 +- cpp/core/src/zxing/ReaderException.h | 1 + cpp/core/src/zxing/common/Array.h | 2 +- cpp/core/src/zxing/common/CharacterSetECI.cpp | 87 ++++ cpp/core/src/zxing/common/CharacterSetECI.h | 53 +++ cpp/core/src/zxing/common/DecoderResult.cpp | 23 +- cpp/core/src/zxing/common/DecoderResult.h | 8 + cpp/core/src/zxing/common/ECI.cpp | 39 ++ cpp/core/src/zxing/common/ECI.h | 40 ++ .../zxing/common/GlobalHistogramBinarizer.cpp | 7 +- cpp/core/src/zxing/common/HybridBinarizer.cpp | 3 + cpp/core/src/zxing/common/StringUtils.cpp | 175 ++++++++ cpp/core/src/zxing/common/StringUtils.h | 52 +++ .../src/zxing/datamatrix/decoder/Decoder.cpp | 60 +-- cpp/core/src/zxing/oned/Code128Reader.cpp | 2 +- cpp/core/src/zxing/oned/ITFReader.cpp | 6 +- cpp/core/src/zxing/oned/OneDReader.cpp | 4 +- .../src/zxing/qrcode/ErrorCorrectionLevel.cpp | 34 +- .../src/zxing/qrcode/ErrorCorrectionLevel.h | 10 +- .../qrcode/decoder/DecodedBitStreamParser.cpp | 406 ++++++++---------- .../qrcode/decoder/DecodedBitStreamParser.h | 33 +- cpp/core/src/zxing/qrcode/decoder/Decoder.cpp | 11 +- cpp/core/src/zxing/qrcode/decoder/Mode.cpp | 43 +- .../src/zxing/qrcode/detector/Detector.cpp | 2 +- cpp/magick/src/main.cpp | 1 + 35 files changed, 909 insertions(+), 320 deletions(-) create mode 100644 cpp/.valgrind.supp create mode 100644 cpp/.valgrindrc create mode 100644 cpp/core/src/zxing/FormatException.cpp create mode 100644 cpp/core/src/zxing/FormatException.h create mode 100644 cpp/core/src/zxing/common/CharacterSetECI.cpp create mode 100644 cpp/core/src/zxing/common/CharacterSetECI.h create mode 100644 cpp/core/src/zxing/common/ECI.cpp create mode 100644 cpp/core/src/zxing/common/ECI.h create mode 100644 cpp/core/src/zxing/common/StringUtils.cpp create mode 100644 cpp/core/src/zxing/common/StringUtils.h diff --git a/cpp/.valgrind.supp b/cpp/.valgrind.supp new file mode 100644 index 000000000..a77f590ab --- /dev/null +++ b/cpp/.valgrind.supp @@ -0,0 +1,14 @@ +{ + zxing::common::CharacterSetECI::init_tables + Memcheck:Leak + fun:malloc + ... + fun:_ZN5zxing6common15CharacterSetECI11init_tablesEv +} + +{ + ImageMagick + Memcheck:Leak + ... + fun:AcquireImage +} \ No newline at end of file diff --git a/cpp/.valgrindrc b/cpp/.valgrindrc new file mode 100644 index 000000000..5c54def98 --- /dev/null +++ b/cpp/.valgrindrc @@ -0,0 +1 @@ +--memcheck:leak-check=full --suppressions=.valgrind.supp --gen-suppressions=all \ No newline at end of file diff --git a/cpp/SConscript b/cpp/SConscript index 6cf9724b2..8e07367ac 100644 --- a/cpp/SConscript +++ b/cpp/SConscript @@ -28,7 +28,7 @@ def all_files(dir, ext='.cpp', level=5): -magick_include = ['/usr/include/ImageMagick/'] +magick_include = ['/usr/include/ImageMagick/', '/opt/local/include/ImageMagick/'] magick_libs = ['Magick++', 'MagickWand', 'MagickCore'] cppunit_libs = ['cppunit'] diff --git a/cpp/core/src/zxing/DecodeHints.cpp b/cpp/core/src/zxing/DecodeHints.cpp index 8209f767f..82f9e30bd 100644 --- a/cpp/core/src/zxing/DecodeHints.cpp +++ b/cpp/core/src/zxing/DecodeHints.cpp @@ -21,6 +21,8 @@ #include namespace zxing { +const DecodeHintType DecodeHints::CHARACTER_SET; + const DecodeHints DecodeHints::PRODUCT_HINT( BARCODEFORMAT_UPC_E_HINT | BARCODEFORMAT_UPC_A_HINT | diff --git a/cpp/core/src/zxing/DecodeHints.h b/cpp/core/src/zxing/DecodeHints.h index f5c27f52d..7eb120194 100644 --- a/cpp/core/src/zxing/DecodeHints.h +++ b/cpp/core/src/zxing/DecodeHints.h @@ -30,6 +30,12 @@ class DecodeHints { private: + DecodeHintType hints; + + Ref callback; + + public: + static const DecodeHintType BARCODEFORMAT_QR_CODE_HINT = 1 << BarcodeFormat_QR_CODE; static const DecodeHintType BARCODEFORMAT_DATA_MATRIX_HINT = 1 << BarcodeFormat_DATA_MATRIX; static const DecodeHintType BARCODEFORMAT_UPC_E_HINT = 1 << BarcodeFormat_UPC_E; @@ -39,14 +45,9 @@ class DecodeHints { static const DecodeHintType BARCODEFORMAT_CODE_128_HINT = 1 << BarcodeFormat_CODE_128; static const DecodeHintType BARCODEFORMAT_CODE_39_HINT = 1 << BarcodeFormat_CODE_39; static const DecodeHintType BARCODEFORMAT_ITF_HINT = 1 << BarcodeFormat_ITF; + static const DecodeHintType CHARACTER_SET = 1 << 30; static const DecodeHintType TRYHARDER_HINT = 1 << 31; - DecodeHintType hints; - - Ref callback; - - public: - static const DecodeHints PRODUCT_HINT; static const DecodeHints ONED_HINT; static const DecodeHints DEFAULT_HINT; diff --git a/cpp/core/src/zxing/Exception.cpp b/cpp/core/src/zxing/Exception.cpp index d20b6e3c1..ca8c21d05 100644 --- a/cpp/core/src/zxing/Exception.cpp +++ b/cpp/core/src/zxing/Exception.cpp @@ -1,16 +1,31 @@ +// -*- mode:c++; tab-width:2; indent-tabs-mode:nil; c-basic-offset:2 -*- /* * Exception.cpp * ZXing * * Created by Christian Brunschen on 03/06/2008. - * Copyright 2008 ZXing authors All rights reserved. + * Copyright 2008-2011 ZXing authors All rights reserved. * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include namespace zxing { +Exception::Exception() {} + Exception::Exception(const char *msg) : message(msg) { } diff --git a/cpp/core/src/zxing/Exception.h b/cpp/core/src/zxing/Exception.h index aaaeb9aab..67dd97abe 100644 --- a/cpp/core/src/zxing/Exception.h +++ b/cpp/core/src/zxing/Exception.h @@ -30,6 +30,7 @@ private: std::string message; public: + Exception(); Exception(const char *msg); virtual const char* what() const throw(); virtual ~Exception() throw(); diff --git a/cpp/core/src/zxing/FormatException.cpp b/cpp/core/src/zxing/FormatException.cpp new file mode 100644 index 000000000..d3981c50f --- /dev/null +++ b/cpp/core/src/zxing/FormatException.cpp @@ -0,0 +1,35 @@ +// -*- mode:c++; tab-width:2; indent-tabs-mode:nil; c-basic-offset:2 -*- +/* + * FormatException.cpp + * zxing + * + * Created by Christian Brunschen on 13/05/2008. + * Copyright 2008 ZXing authors All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace zxing { + +FormatException::FormatException() {} + +FormatException::FormatException(const char *msg) : + ReaderException(msg) { +} + +FormatException::~FormatException() throw() { +} + +} diff --git a/cpp/core/src/zxing/FormatException.h b/cpp/core/src/zxing/FormatException.h new file mode 100644 index 000000000..da50b501e --- /dev/null +++ b/cpp/core/src/zxing/FormatException.h @@ -0,0 +1,35 @@ +#ifndef __FORMAT_EXCEPTION_H__ +#define __FORMAT_EXCEPTION_H__ + +/* + * FormatException.h + * zxing + * + * Copyright 2010 ZXing authors All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace zxing { + +class FormatException : public ReaderException { +public: + FormatException(); + FormatException(const char *msg); + ~FormatException() throw(); +}; + +} +#endif // __FORMAT_EXCEPTION_H__ diff --git a/cpp/core/src/zxing/MultiFormatReader.cpp b/cpp/core/src/zxing/MultiFormatReader.cpp index 2132fcb6b..6b423e8e1 100644 --- a/cpp/core/src/zxing/MultiFormatReader.cpp +++ b/cpp/core/src/zxing/MultiFormatReader.cpp @@ -89,7 +89,7 @@ namespace zxing { for (unsigned int i = 0; i < readers_.size(); i++) { try { return readers_[i]->decode(image, hints_); - } catch (ReaderException re) { + } catch (ReaderException const& re) { // continue } } diff --git a/cpp/core/src/zxing/ReaderException.cpp b/cpp/core/src/zxing/ReaderException.cpp index 7d2bc9d98..07d4bb697 100644 --- a/cpp/core/src/zxing/ReaderException.cpp +++ b/cpp/core/src/zxing/ReaderException.cpp @@ -1,9 +1,10 @@ +// -*- mode:c++; tab-width:2; indent-tabs-mode:nil; c-basic-offset:2 -*- /* * ReaderException.cpp * zxing * * Created by Christian Brunschen on 13/05/2008. - * Copyright 2008 ZXing authors All rights reserved. + * Copyright 2008-2011 ZXing authors All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +23,8 @@ namespace zxing { +ReaderException::ReaderException() {} + ReaderException::ReaderException(const char *msg) : Exception(msg) { } diff --git a/cpp/core/src/zxing/ReaderException.h b/cpp/core/src/zxing/ReaderException.h index 6777e99a2..d54ae62b4 100644 --- a/cpp/core/src/zxing/ReaderException.h +++ b/cpp/core/src/zxing/ReaderException.h @@ -26,6 +26,7 @@ namespace zxing { class ReaderException : public Exception { public: + ReaderException(); ReaderException(const char *msg); ~ReaderException() throw(); }; diff --git a/cpp/core/src/zxing/common/Array.h b/cpp/core/src/zxing/common/Array.h index 2cd848737..037eea6b7 100644 --- a/cpp/core/src/zxing/common/Array.h +++ b/cpp/core/src/zxing/common/Array.h @@ -93,7 +93,7 @@ public: } }; -template class ArrayRef { +template class ArrayRef : public Counted { private: public: Array *array_; diff --git a/cpp/core/src/zxing/common/CharacterSetECI.cpp b/cpp/core/src/zxing/common/CharacterSetECI.cpp new file mode 100644 index 000000000..534138cac --- /dev/null +++ b/cpp/core/src/zxing/common/CharacterSetECI.cpp @@ -0,0 +1,87 @@ +// -*- mode:c++; tab-width:2; indent-tabs-mode:nil; c-basic-offset:2 -*- +/* + * Copyright 2008-2011 ZXing authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +using std::string; + +using zxing::common::CharacterSetECI; +using zxing::IllegalArgumentException; + +std::map CharacterSetECI::VALUE_TO_ECI; +std::map CharacterSetECI::NAME_TO_ECI; + +const bool CharacterSetECI::inited = CharacterSetECI::init_tables(); + +bool CharacterSetECI::init_tables() { + addCharacterSet(0, "Cp437"); + { char const* s[] = {"ISO8859_1", "ISO-8859-1", 0}; + addCharacterSet(1, s); } + addCharacterSet(2, "Cp437"); + { char const* s[] = {"ISO8859_1", "ISO-8859-1", 0}; + addCharacterSet(3, s); } + addCharacterSet(4, "ISO8859_2"); + addCharacterSet(5, "ISO8859_3"); + addCharacterSet(6, "ISO8859_4"); + addCharacterSet(7, "ISO8859_5"); + addCharacterSet(8, "ISO8859_6"); + addCharacterSet(9, "ISO8859_7"); + addCharacterSet(10, "ISO8859_8"); + addCharacterSet(11, "ISO8859_9"); + addCharacterSet(12, "ISO8859_10"); + addCharacterSet(13, "ISO8859_11"); + addCharacterSet(15, "ISO8859_13"); + addCharacterSet(16, "ISO8859_14"); + addCharacterSet(17, "ISO8859_15"); + addCharacterSet(18, "ISO8859_16"); + { char const* s[] = {"SJIS", "Shift_JIS", 0}; + addCharacterSet(20, s ); } + return true; +} + +CharacterSetECI::CharacterSetECI(int value, char const* encodingName_) + : ECI(value), encodingName(encodingName_) {} + +char const* CharacterSetECI::getEncodingName() { + return encodingName; +} + +void CharacterSetECI::addCharacterSet(int value, char const* encodingName) { + CharacterSetECI* eci = new CharacterSetECI(value, encodingName); + VALUE_TO_ECI[value] = eci; // can't use valueOf + NAME_TO_ECI[string(encodingName)] = eci; +} + +void CharacterSetECI::addCharacterSet(int value, char const* const* encodingNames) { + CharacterSetECI* eci = new CharacterSetECI(value, encodingNames[0]); + VALUE_TO_ECI[value] = eci; + for (int i = 0; encodingNames[i]; i++) { + NAME_TO_ECI[string(encodingNames[i])] = eci; + } +} + +CharacterSetECI* CharacterSetECI::getCharacterSetECIByValue(int value) { + if (value < 0 || value >= 900) { + throw IllegalArgumentException("Bad ECI value: " + value); + } + return VALUE_TO_ECI[value]; +} + +CharacterSetECI* CharacterSetECI::getCharacterSetECIByName(string const& name) { + return NAME_TO_ECI[name]; +} diff --git a/cpp/core/src/zxing/common/CharacterSetECI.h b/cpp/core/src/zxing/common/CharacterSetECI.h new file mode 100644 index 000000000..59dd41309 --- /dev/null +++ b/cpp/core/src/zxing/common/CharacterSetECI.h @@ -0,0 +1,53 @@ +// -*- mode:c++; tab-width:2; indent-tabs-mode:nil; c-basic-offset:2 -*- + +#ifndef __CHARACTERSET_ECI__ +#define __CHARACTERSET_ECI__ + +/* + * Copyright 2008-2011 ZXing authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace zxing { + namespace common { + class CharacterSetECI; + } +} + +class zxing::common::CharacterSetECI : public ECI { +private: + static std::map VALUE_TO_ECI; + static std::map NAME_TO_ECI; + static const bool inited; + static bool init_tables(); + + char const* const encodingName; + + CharacterSetECI(int value, char const* encodingName); + + static void addCharacterSet(int value, char const* encodingName); + static void addCharacterSet(int value, char const* const* encodingNames); + +public: + char const* getEncodingName(); + + static CharacterSetECI* getCharacterSetECIByValue(int value); + static CharacterSetECI* getCharacterSetECIByName(std::string const& name); +}; + +#endif diff --git a/cpp/core/src/zxing/common/DecoderResult.cpp b/cpp/core/src/zxing/common/DecoderResult.cpp index 86b11f810..af7e5e2e3 100644 --- a/cpp/core/src/zxing/common/DecoderResult.cpp +++ b/cpp/core/src/zxing/common/DecoderResult.cpp @@ -1,9 +1,10 @@ +// -*- mode:c++; tab-width:2; indent-tabs-mode:nil; c-basic-offset:2 -*- /* * DecoderResult.cpp * zxing * * Created by Christian Brunschen on 20/05/2008. - * Copyright 2008 ZXing authors All rights reserved. + * Copyright 2008-2011 ZXing authors All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,11 +21,21 @@ #include -namespace zxing { +using namespace std; +using namespace zxing; -DecoderResult::DecoderResult(ArrayRef rawBytes, Ref text) : - rawBytes_(rawBytes), text_(text) { -} +DecoderResult::DecoderResult(ArrayRef rawBytes, + Ref text, + ArrayRef< ArrayRef >& byteSegments, + string const& ecLevel) : + rawBytes_(rawBytes), + text_(text), + byteSegments_(byteSegments), + ecLevel_(ecLevel) {} + +DecoderResult::DecoderResult(ArrayRef rawBytes, + Ref text) + : rawBytes_(rawBytes), text_(text) {} ArrayRef DecoderResult::getRawBytes() { return rawBytes_; @@ -33,5 +44,3 @@ ArrayRef DecoderResult::getRawBytes() { Ref DecoderResult::getText() { return text_; } - -} diff --git a/cpp/core/src/zxing/common/DecoderResult.h b/cpp/core/src/zxing/common/DecoderResult.h index 2cd6a4ad6..d26caa3aa 100644 --- a/cpp/core/src/zxing/common/DecoderResult.h +++ b/cpp/core/src/zxing/common/DecoderResult.h @@ -31,9 +31,17 @@ class DecoderResult : public Counted { private: ArrayRef rawBytes_; Ref text_; + ArrayRef< ArrayRef > byteSegments_; + std::string ecLevel_; public: + DecoderResult(ArrayRef rawBytes, + Ref text, + ArrayRef< ArrayRef >& byteSegments, + std::string const& ecLevel); + DecoderResult(ArrayRef rawBytes, Ref text); + ArrayRef getRawBytes(); Ref getText(); }; diff --git a/cpp/core/src/zxing/common/ECI.cpp b/cpp/core/src/zxing/common/ECI.cpp new file mode 100644 index 000000000..dc382dfb5 --- /dev/null +++ b/cpp/core/src/zxing/common/ECI.cpp @@ -0,0 +1,39 @@ +// -*- mode:c++; tab-width:2; indent-tabs-mode:nil; c-basic-offset:2 -*- +/* + * Copyright 2008-2011 ZXing authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +using zxing::common::ECI; +using zxing::IllegalArgumentException; + +ECI::ECI(int value_) : value(value_) {} + +int ECI::getValue() const { + return value; +} + +ECI* ECI::getECIByValue(int value) { + if (value < 0 || value > 999999) { + throw IllegalArgumentException("Bad ECI value: " + value); + } + if (value < 900) { // Character set ECIs use 000000 - 000899 + return CharacterSetECI::getCharacterSetECIByValue(value); + } + return 0; +} diff --git a/cpp/core/src/zxing/common/ECI.h b/cpp/core/src/zxing/common/ECI.h new file mode 100644 index 000000000..e1e6ca01c --- /dev/null +++ b/cpp/core/src/zxing/common/ECI.h @@ -0,0 +1,40 @@ +// -*- mode:c++; tab-width:2; indent-tabs-mode:nil; c-basic-offset:2 -*- + +#ifndef __ECI__ +#define __ECI__ + +/* + * Copyright 2008-2011 ZXing authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace zxing { + namespace common { + class ECI; + } +} +class zxing::common::ECI { +private: + const int value; + +protected: + ECI(int value); + +public: + int getValue() const; + + static ECI* getECIByValue(int value); +}; + +#endif diff --git a/cpp/core/src/zxing/common/GlobalHistogramBinarizer.cpp b/cpp/core/src/zxing/common/GlobalHistogramBinarizer.cpp index e0ce81ed0..2a1467331 100644 --- a/cpp/core/src/zxing/common/GlobalHistogramBinarizer.cpp +++ b/cpp/core/src/zxing/common/GlobalHistogramBinarizer.cpp @@ -19,8 +19,8 @@ */ #include - #include +#include namespace zxing { using namespace std; @@ -107,7 +107,8 @@ Ref GlobalHistogramBinarizer::getBlackMatrix() { // Quickly calculates the histogram by sampling four rows from the image. // This proved to be more robust on the blackbox tests than sampling a // diagonal as we used to do. - unsigned char* row = new unsigned char[width]; + ArrayRef ref (width); + unsigned char* row = &ref[0]; for (int y = 1; y < 5; y++) { int rownum = height * y / 5; int right = (width << 2) / 5; @@ -130,7 +131,7 @@ Ref GlobalHistogramBinarizer::getBlackMatrix() { } cached_matrix_ = matrix_ref; - delete [] row; + // delete [] row; return matrix_ref; } diff --git a/cpp/core/src/zxing/common/HybridBinarizer.cpp b/cpp/core/src/zxing/common/HybridBinarizer.cpp index 0bab00a50..f36ab40c9 100644 --- a/cpp/core/src/zxing/common/HybridBinarizer.cpp +++ b/cpp/core/src/zxing/common/HybridBinarizer.cpp @@ -70,6 +70,9 @@ Ref HybridBinarizer::getBlackMatrix() { calculateThresholdForBlock(luminances, subWidth, subHeight, width, height, blackPoints, newMatrix); matrix_ = newMatrix; + // N.B.: these deletes are inadequate if anything between the new and this point can throw. + // As of this writing, it doesn't look like they do. + delete [] blackPoints; delete [] luminances; } else { diff --git a/cpp/core/src/zxing/common/StringUtils.cpp b/cpp/core/src/zxing/common/StringUtils.cpp new file mode 100644 index 000000000..01ac8405c --- /dev/null +++ b/cpp/core/src/zxing/common/StringUtils.cpp @@ -0,0 +1,175 @@ +// -*- mode:c++; tab-width:2; indent-tabs-mode:nil; c-basic-offset:2 -*- + +/* + * Copyright (C) 2010-2011 ZXing authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +using namespace std; +using namespace zxing; +using namespace zxing::common; + +// N.B.: these are the iconv strings for at least some versions of iconv + +char const* const StringUtils::PLATFORM_DEFAULT_ENCODING = "UTF-8"; +char const* const StringUtils::ASCII = "ASCII"; +char const* const StringUtils::SHIFT_JIS = "SHIFT_JIS"; +char const* const StringUtils::GB2312 = "GBK"; +char const* const StringUtils::EUC_JP = "EUC-JP"; +char const* const StringUtils::UTF8 = "UTF-8"; +char const* const StringUtils::ISO88591 = "ISO8859-1"; +const bool StringUtils::ASSUME_SHIFT_JIS = false; + +string +StringUtils::guessEncoding(unsigned char* bytes, int length, Hashtable const& hints) { + Hashtable::const_iterator i = hints.find(DecodeHints::CHARACTER_SET); + if (i != hints.end()) { + return i->second; + } + // Does it start with the UTF-8 byte order mark? then guess it's UTF-8 + if (length > 3 && + bytes[0] == (unsigned char) 0xEF && + bytes[1] == (unsigned char) 0xBB && + bytes[2] == (unsigned char) 0xBF) { + return UTF8; + } + // For now, merely tries to distinguish ISO-8859-1, UTF-8 and Shift_JIS, + // which should be by far the most common encodings. ISO-8859-1 + // should not have bytes in the 0x80 - 0x9F range, while Shift_JIS + // uses this as a first byte of a two-byte character. If we see this + // followed by a valid second byte in Shift_JIS, assume it is Shift_JIS. + // If we see something else in that second byte, we'll make the risky guess + // that it's UTF-8. + bool canBeISO88591 = true; + bool canBeShiftJIS = true; + bool canBeUTF8 = true; + int utf8BytesLeft = 0; + int maybeDoubleByteCount = 0; + int maybeSingleByteKatakanaCount = 0; + bool sawLatin1Supplement = false; + bool sawUTF8Start = false; + bool lastWasPossibleDoubleByteStart = false; + + for (int i = 0; + i < length && (canBeISO88591 || canBeShiftJIS || canBeUTF8); + i++) { + + int value = bytes[i] & 0xFF; + + // UTF-8 stuff + if (value >= 0x80 && value <= 0xBF) { + if (utf8BytesLeft > 0) { + utf8BytesLeft--; + } + } else { + if (utf8BytesLeft > 0) { + canBeUTF8 = false; + } + if (value >= 0xC0 && value <= 0xFD) { + sawUTF8Start = true; + int valueCopy = value; + while ((valueCopy & 0x40) != 0) { + utf8BytesLeft++; + valueCopy <<= 1; + } + } + } + + // ISO-8859-1 stuff + + if ((value == 0xC2 || value == 0xC3) && i < length - 1) { + // This is really a poor hack. The slightly more exotic characters people might want to put in + // a QR Code, by which I mean the Latin-1 supplement characters (e.g. u-umlaut) have encodings + // that start with 0xC2 followed by [0xA0,0xBF], or start with 0xC3 followed by [0x80,0xBF]. + int nextValue = bytes[i + 1] & 0xFF; + if (nextValue <= 0xBF && + ((value == 0xC2 && nextValue >= 0xA0) || (value == 0xC3 && nextValue >= 0x80))) { + sawLatin1Supplement = true; + } + } + if (value >= 0x7F && value <= 0x9F) { + canBeISO88591 = false; + } + + // Shift_JIS stuff + + if (value >= 0xA1 && value <= 0xDF) { + // count the number of characters that might be a Shift_JIS single-byte Katakana character + if (!lastWasPossibleDoubleByteStart) { + maybeSingleByteKatakanaCount++; + } + } + if (!lastWasPossibleDoubleByteStart && + ((value >= 0xF0 && value <= 0xFF) || value == 0x80 || value == 0xA0)) { + canBeShiftJIS = false; + } + if ((value >= 0x81 && value <= 0x9F) || (value >= 0xE0 && value <= 0xEF)) { + // These start double-byte characters in Shift_JIS. Let's see if it's followed by a valid + // second byte. + if (lastWasPossibleDoubleByteStart) { + // If we just checked this and the last byte for being a valid double-byte + // char, don't check starting on this byte. If this and the last byte + // formed a valid pair, then this shouldn't be checked to see if it starts + // a double byte pair of course. + lastWasPossibleDoubleByteStart = false; + } else { + // ... otherwise do check to see if this plus the next byte form a valid + // double byte pair encoding a character. + lastWasPossibleDoubleByteStart = true; + if (i >= length - 1) { + canBeShiftJIS = false; + } else { + int nextValue = bytes[i + 1] & 0xFF; + if (nextValue < 0x40 || nextValue > 0xFC) { + canBeShiftJIS = false; + } else { + maybeDoubleByteCount++; + } + // There is some conflicting information out there about which bytes can follow which in + // double-byte Shift_JIS characters. The rule above seems to be the one that matches practice. + } + } + } else { + lastWasPossibleDoubleByteStart = false; + } + } + if (utf8BytesLeft > 0) { + canBeUTF8 = false; + } + + // Easy -- if assuming Shift_JIS and no evidence it can't be, done + if (canBeShiftJIS && ASSUME_SHIFT_JIS) { + return SHIFT_JIS; + } + if (canBeUTF8 && sawUTF8Start) { + return UTF8; + } + // Distinguishing Shift_JIS and ISO-8859-1 can be a little tough. The crude heuristic is: + // - If we saw + // - at least 3 bytes that starts a double-byte value (bytes that are rare in ISO-8859-1), or + // - over 5% of bytes could be single-byte Katakana (also rare in ISO-8859-1), + // - and, saw no sequences that are invalid in Shift_JIS, then we conclude Shift_JIS + if (canBeShiftJIS && (maybeDoubleByteCount >= 3 || 20 * maybeSingleByteKatakanaCount > length)) { + return SHIFT_JIS; + } + // Otherwise, we default to ISO-8859-1 unless we know it can't be + if (!sawLatin1Supplement && canBeISO88591) { + return ISO88591; + } + // Otherwise, we take a wild guess with platform encoding + return PLATFORM_DEFAULT_ENCODING; +} diff --git a/cpp/core/src/zxing/common/StringUtils.h b/cpp/core/src/zxing/common/StringUtils.h new file mode 100644 index 000000000..87ffb3b15 --- /dev/null +++ b/cpp/core/src/zxing/common/StringUtils.h @@ -0,0 +1,52 @@ +// -*- mode:c++; tab-width:2; indent-tabs-mode:nil; c-basic-offset:2 -*- + +#ifndef __STRING_UTILS__ +#define __STRING_UTILS__ + +/* + * Copyright (C) 2010-2011 ZXing authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace zxing { + namespace common { + class StringUtils; + } +} + +class zxing::common::StringUtils { +private: + static char const* const PLATFORM_DEFAULT_ENCODING; + + StringUtils() {} + +public: + static char const* const ASCII; + static char const* const SHIFT_JIS; + static char const* const GB2312; + static char const* const EUC_JP; + static char const* const UTF8; + static char const* const ISO88591; + static const bool ASSUME_SHIFT_JIS; + + typedef std::map Hashtable; + + static std::string guessEncoding(unsigned char* bytes, int length, Hashtable const& hints); +}; + +#endif diff --git a/cpp/core/src/zxing/datamatrix/decoder/Decoder.cpp b/cpp/core/src/zxing/datamatrix/decoder/Decoder.cpp index 72172dc41..f841d3a95 100644 --- a/cpp/core/src/zxing/datamatrix/decoder/Decoder.cpp +++ b/cpp/core/src/zxing/datamatrix/decoder/Decoder.cpp @@ -34,7 +34,7 @@ using namespace std; Decoder::Decoder() : rsDecoder_(GF256::DATA_MATRIX_FIELD) { } - + void Decoder::correctErrors(ArrayRef codewordBytes, int numDataCodewords) { int numCodewords = codewordBytes->size(); @@ -46,7 +46,7 @@ void Decoder::correctErrors(ArrayRef codewordBytes, int numDataCo try { rsDecoder_.decode(codewordInts, numECCodewords); - } catch (ReedSolomonException ex) { + } catch (ReedSolomonException const& ex) { ReaderException rex(ex.what()); throw rex; } @@ -56,34 +56,34 @@ void Decoder::correctErrors(ArrayRef codewordBytes, int numDataCo } } -Ref Decoder::decode(Ref bits) { - // Construct a parser and read version, error-correction level - BitMatrixParser parser(bits); - Version *version = parser.readVersion(bits); - - // Read codewords - ArrayRef codewords(parser.readCodewords()); - // Separate into data blocks - std::vector > dataBlocks = DataBlock::getDataBlocks(codewords, version); - - // Count total number of data bytes - int totalBytes = 0; - for (unsigned int i = 0; i < dataBlocks.size(); i++) { - totalBytes += dataBlocks[i]->getNumDataCodewords(); - } - ArrayRef resultBytes(totalBytes); - int resultOffset = 0; - - // Error-correct and copy data blocks together into a stream of bytes - for (unsigned int j = 0; j < dataBlocks.size(); j++) { - Ref dataBlock(dataBlocks[j]); - ArrayRef codewordBytes = dataBlock->getCodewords(); - int numDataCodewords = dataBlock->getNumDataCodewords(); - correctErrors(codewordBytes, numDataCodewords); - for (int i = 0; i < numDataCodewords; i++) { - resultBytes[resultOffset++] = codewordBytes[i]; - } - } +Ref Decoder::decode(Ref bits) { + // Construct a parser and read version, error-correction level + BitMatrixParser parser(bits); + Version *version = parser.readVersion(bits); + + // Read codewords + ArrayRef codewords(parser.readCodewords()); + // Separate into data blocks + std::vector > dataBlocks = DataBlock::getDataBlocks(codewords, version); + + // Count total number of data bytes + int totalBytes = 0; + for (unsigned int i = 0; i < dataBlocks.size(); i++) { + totalBytes += dataBlocks[i]->getNumDataCodewords(); + } + ArrayRef resultBytes(totalBytes); + int resultOffset = 0; + + // Error-correct and copy data blocks together into a stream of bytes + for (unsigned int j = 0; j < dataBlocks.size(); j++) { + Ref dataBlock(dataBlocks[j]); + ArrayRef codewordBytes = dataBlock->getCodewords(); + int numDataCodewords = dataBlock->getNumDataCodewords(); + correctErrors(codewordBytes, numDataCodewords); + for (int i = 0; i < numDataCodewords; i++) { + resultBytes[resultOffset++] = codewordBytes[i]; + } + } // Decode the contents of that stream of bytes DecodedBitStreamParser decodedBSParser; diff --git a/cpp/core/src/zxing/oned/Code128Reader.cpp b/cpp/core/src/zxing/oned/Code128Reader.cpp index b76536a4f..fa3fc7142 100644 --- a/cpp/core/src/zxing/oned/Code128Reader.cpp +++ b/cpp/core/src/zxing/oned/Code128Reader.cpp @@ -279,7 +279,7 @@ namespace zxing { // Decode another code from image try { code = decodeCode(row, counters, sizeof(counters)/sizeof(int), nextStart); - } catch (ReaderException re) { + } catch (ReaderException const& re) { throw re; } diff --git a/cpp/core/src/zxing/oned/ITFReader.cpp b/cpp/core/src/zxing/oned/ITFReader.cpp index 2c0518ceb..e37bad7f2 100644 --- a/cpp/core/src/zxing/oned/ITFReader.cpp +++ b/cpp/core/src/zxing/oned/ITFReader.cpp @@ -103,7 +103,7 @@ namespace zxing { delete [] endRange; ArrayRef resultBytes(1); return Ref(new Result(resultString, resultBytes, resultPoints, BarcodeFormat_ITF)); - } catch (ReaderException re) { + } catch (ReaderException const& re) { delete [] startRange; delete [] endRange; return Ref(); @@ -179,7 +179,7 @@ namespace zxing { narrowLineWidth = (startPattern[1] - startPattern[0]) >> 2; validateQuietZone(row, startPattern[0]); return startPattern; - } catch (ReaderException re) { + } catch (ReaderException const& re) { delete [] startPattern; throw re; } @@ -217,7 +217,7 @@ namespace zxing { row->reverse(); return endPattern; - } catch (ReaderException re) { + } catch (ReaderException const& re) { delete [] endPattern; row->reverse(); throw re; diff --git a/cpp/core/src/zxing/oned/OneDReader.cpp b/cpp/core/src/zxing/oned/OneDReader.cpp index 3a82a04a8..d73057b15 100644 --- a/cpp/core/src/zxing/oned/OneDReader.cpp +++ b/cpp/core/src/zxing/oned/OneDReader.cpp @@ -89,9 +89,9 @@ namespace zxing { // Estimate black point for this row and load it: try { row = image->getBlackRow(rowNumber, row); - } catch (ReaderException re) { + } catch (ReaderException const& re) { continue; - } catch (IllegalArgumentException re) { + } catch (IllegalArgumentException const& re) { continue; } diff --git a/cpp/core/src/zxing/qrcode/ErrorCorrectionLevel.cpp b/cpp/core/src/zxing/qrcode/ErrorCorrectionLevel.cpp index e8faf9091..16659a131 100644 --- a/cpp/core/src/zxing/qrcode/ErrorCorrectionLevel.cpp +++ b/cpp/core/src/zxing/qrcode/ErrorCorrectionLevel.cpp @@ -1,9 +1,10 @@ +// -*- mode:c++; tab-width:2; indent-tabs-mode:nil; c-basic-offset:2 -*- /* * ErrorCorrectionLevel.cpp * zxing * * Created by Christian Brunschen on 15/05/2008. - * Copyright 2008 ZXing authors All rights reserved. + * Copyright 2008-2011 ZXing authors All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,15 +21,30 @@ #include +using std::string; + namespace zxing { namespace qrcode { -ErrorCorrectionLevel::ErrorCorrectionLevel(int inOrdinal) : - ordinal_(inOrdinal) { +ErrorCorrectionLevel::ErrorCorrectionLevel(int inOrdinal, + int bits, + char const* name) : + ordinal_(inOrdinal), bits_(bits), name_(name) {} + +int ErrorCorrectionLevel::ordinal() const { + return ordinal_; } -int ErrorCorrectionLevel::ordinal() { - return ordinal_; +int ErrorCorrectionLevel::bits() const { + return bits_; +} + +string const& ErrorCorrectionLevel::name() const { + return name_; +} + +ErrorCorrectionLevel::operator string const& () const { + return name_; } ErrorCorrectionLevel& ErrorCorrectionLevel::forBits(int bits) { @@ -38,10 +54,10 @@ ErrorCorrectionLevel& ErrorCorrectionLevel::forBits(int bits) { return *FOR_BITS[bits]; } -ErrorCorrectionLevel ErrorCorrectionLevel::L(0); -ErrorCorrectionLevel ErrorCorrectionLevel::M(1); -ErrorCorrectionLevel ErrorCorrectionLevel::Q(2); -ErrorCorrectionLevel ErrorCorrectionLevel::H(3); + ErrorCorrectionLevel ErrorCorrectionLevel::L(0, 0x01, "L"); + ErrorCorrectionLevel ErrorCorrectionLevel::M(1, 0x00, "M"); + ErrorCorrectionLevel ErrorCorrectionLevel::Q(2, 0x03, "Q"); + ErrorCorrectionLevel ErrorCorrectionLevel::H(3, 0x02, "H"); ErrorCorrectionLevel *ErrorCorrectionLevel::FOR_BITS[] = { &M, &L, &H, &Q }; int ErrorCorrectionLevel::N_LEVELS = 4; diff --git a/cpp/core/src/zxing/qrcode/ErrorCorrectionLevel.h b/cpp/core/src/zxing/qrcode/ErrorCorrectionLevel.h index d953f17f5..ad8d64e21 100644 --- a/cpp/core/src/zxing/qrcode/ErrorCorrectionLevel.h +++ b/cpp/core/src/zxing/qrcode/ErrorCorrectionLevel.h @@ -28,7 +28,9 @@ namespace qrcode { class ErrorCorrectionLevel { private: int ordinal_; - ErrorCorrectionLevel(int inOrdinal); + int bits_; + std::string name_; + ErrorCorrectionLevel(int inOrdinal, int bits, char const* name); static ErrorCorrectionLevel *FOR_BITS[]; static int N_LEVELS; public: @@ -37,7 +39,11 @@ public: static ErrorCorrectionLevel Q; static ErrorCorrectionLevel H; - int ordinal(); + int ordinal() const; + int bits() const; + std::string const& name() const; + operator std::string const& () const; + static ErrorCorrectionLevel& forBits(int bits); }; } diff --git a/cpp/core/src/zxing/qrcode/decoder/DecodedBitStreamParser.cpp b/cpp/core/src/zxing/qrcode/decoder/DecodedBitStreamParser.cpp index 0c5dedae4..2d482de62 100644 --- a/cpp/core/src/zxing/qrcode/decoder/DecodedBitStreamParser.cpp +++ b/cpp/core/src/zxing/qrcode/decoder/DecodedBitStreamParser.cpp @@ -20,6 +20,9 @@ */ #include +#include +#include +#include #include #ifndef NO_ICONV #include @@ -38,6 +41,7 @@ using namespace std; using namespace zxing; using namespace zxing::qrcode; +using namespace zxing::common; const char DecodedBitStreamParser::ALPHANUMERIC_CHARS[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', @@ -46,19 +50,29 @@ const char DecodedBitStreamParser::ALPHANUMERIC_CHARS[] = 'Y', 'Z', ' ', '$', '%', '*', '+', '-', '.', '/', ':' }; -const char *DecodedBitStreamParser::ASCII = "ASCII"; -const char *DecodedBitStreamParser::ISO88591 = "ISO-8859-1"; -const char *DecodedBitStreamParser::UTF8 = "UTF-8"; -const char *DecodedBitStreamParser::SHIFT_JIS = "SHIFT_JIS"; -const char *DecodedBitStreamParser::EUC_JP = "EUC-JP"; +namespace {int GB2312_SUBSET = 1;} -void DecodedBitStreamParser::append(std::string &result, const unsigned char *bufIn, size_t nIn, const char *src) { +void DecodedBitStreamParser::append(std::string &result, + string const& in, + const char *src) { + append(result, (unsigned char const*)in.c_str(), in.length(), src); +} + +void DecodedBitStreamParser::append(std::string &result, + const unsigned char *bufIn, + size_t nIn, + const char *src) { #ifndef NO_ICONV if (nIn == 0) { return; } - iconv_t cd = iconv_open(UTF8, src); + iconv_t cd = iconv_open(StringUtils::UTF8, src); + if (cd == (iconv_t)-1) { + result.append((const char *)bufIn, nIn); + return; + } + const int maxOut = 4 * nIn + 1; unsigned char* bufOut = new unsigned char[maxOut]; @@ -86,6 +100,47 @@ void DecodedBitStreamParser::append(std::string &result, const unsigned char *bu #endif } +void DecodedBitStreamParser::decodeHanziSegment(Ref bits_, + string& result, + int count) { + BitSource& bits (*bits_); + // Don't crash trying to read more bits than we have available. + if (count * 13 > bits.available()) { + throw FormatException(); + } + + // Each character will require 2 bytes. Read the characters as 2-byte pairs + // and decode as GB2312 afterwards + size_t nBytes = 2 * count; + unsigned char* buffer = new unsigned char[nBytes]; + int offset = 0; + while (count > 0) { + // Each 13 bits encodes a 2-byte character + int twoBytes = bits.readBits(13); + int assembledTwoBytes = ((twoBytes / 0x060) << 8) | (twoBytes % 0x060); + if (assembledTwoBytes < 0x003BF) { + // In the 0xA1A1 to 0xAAFE range + assembledTwoBytes += 0x0A1A1; + } else { + // In the 0xB0A1 to 0xFAFE range + assembledTwoBytes += 0x0A6A1; + } + buffer[offset] = (unsigned char) ((assembledTwoBytes >> 8) & 0xFF); + buffer[offset + 1] = (unsigned char) (assembledTwoBytes & 0xFF); + offset += 2; + count--; + } + + try { + append(result, buffer, nBytes, StringUtils::GB2312); + } catch (ReaderException const& re) { + delete [] buffer; + throw FormatException(); + } + + delete [] buffer; + } + void DecodedBitStreamParser::decodeKanjiSegment(Ref bits, std::string &result, int count) { // Each character will require 2 bytes. Read the characters as 2-byte pairs // and decode as Shift_JIS afterwards @@ -110,30 +165,45 @@ void DecodedBitStreamParser::decodeKanjiSegment(Ref bits, std::string count--; } - append(result, buffer, nBytes, SHIFT_JIS); + append(result, buffer, nBytes, StringUtils::SHIFT_JIS); delete[] buffer; } -void DecodedBitStreamParser::decodeByteSegment(Ref bits, std::string &result, int count) { +void DecodedBitStreamParser::decodeByteSegment(Ref bits_, + string& result, + int count, + CharacterSetECI* currentCharacterSetECI, + ArrayRef< ArrayRef >& byteSegments, + Hashtable const& hints) { int nBytes = count; - unsigned char* readBytes = new unsigned char[nBytes]; - if (count << 3 > bits->available()) { - ostringstream s; - s << "Count too large: " << count; - delete[] readBytes; - throw ReaderException(s.str().c_str()); + BitSource& bits (*bits_); + // Don't crash trying to read more bits than we have available. + if (count << 3 > bits.available()) { + throw FormatException(); } + + ArrayRef bytes_ (count); + unsigned char* readBytes = &(*bytes_)[0]; for (int i = 0; i < count; i++) { - readBytes[i] = (unsigned char)bits->readBits(8); + readBytes[i] = (unsigned char) bits.readBits(8); } - // The spec isn't clear on this mode; see - // section 6.4.5: t does not say which encoding to assuming - // upon decoding. I have seen ISO-8859-1 used as well as - // Shift_JIS -- without anything like an ECI designator to - // give a hint. - const char *encoding = guessEncoding(readBytes, nBytes); - append(result, readBytes, nBytes, encoding); - delete[] readBytes; + string encoding; + if (currentCharacterSetECI == 0) { + // The spec isn't clear on this mode; see + // section 6.4.5: t does not say which encoding to assuming + // upon decoding. I have seen ISO-8859-1 used as well as + // Shift_JIS -- without anything like an ECI designator to + // give a hint. + encoding = StringUtils::guessEncoding(readBytes, count, hints); + } else { + encoding = currentCharacterSetECI->getEncodingName(); + } + try { + append(result, readBytes, nBytes, encoding.c_str()); + } catch (ReaderException const& re) { + throw FormatException(); + } + byteSegments->values().push_back(bytes_); } void DecodedBitStreamParser::decodeNumericSegment(Ref bits, std::string &result, int count) { @@ -186,249 +256,147 @@ void DecodedBitStreamParser::decodeNumericSegment(Ref bits, std::stri } bytes[i++] = ALPHANUMERIC_CHARS[digitBits]; } - append(result, bytes, nBytes, ASCII); + append(result, bytes, nBytes, StringUtils::ASCII); delete[] bytes; } -void DecodedBitStreamParser::decodeAlphanumericSegment(Ref bits, std::string &result, int count) { - int nBytes = count; - unsigned char* bytes = new unsigned char[nBytes]; - int i = 0; +char DecodedBitStreamParser::toAlphaNumericChar(size_t value) { + if (value >= sizeof(DecodedBitStreamParser::ALPHANUMERIC_CHARS)) { + throw FormatException(); + } + return ALPHANUMERIC_CHARS[value]; +} + +void DecodedBitStreamParser::decodeAlphanumericSegment(Ref bits_, + string& result, + int count, + bool fc1InEffect) { + BitSource& bits (*bits_); + ostringstream bytes; // Read two characters at a time while (count > 1) { - int nextTwoCharsBits = bits->readBits(11); - bytes[i++] = ALPHANUMERIC_CHARS[nextTwoCharsBits / 45]; - bytes[i++] = ALPHANUMERIC_CHARS[nextTwoCharsBits % 45]; + int nextTwoCharsBits = bits.readBits(11); + bytes << toAlphaNumericChar(nextTwoCharsBits / 45); + bytes << toAlphaNumericChar(nextTwoCharsBits % 45); count -= 2; } if (count == 1) { - bytes[i++] = ALPHANUMERIC_CHARS[bits->readBits(6)]; + // special case: one character left + bytes << toAlphaNumericChar(bits.readBits(6)); } - append(result, bytes, nBytes, ASCII); - delete[] bytes; -} - -const char * -DecodedBitStreamParser::guessEncoding(unsigned char *bytes, int length) { - const bool ASSUME_SHIFT_JIS = false; - char const* const PLATFORM_DEFAULT_ENCODING="UTF-8"; - - // Does it start with the UTF-8 byte order mark? then guess it's UTF-8 - if (length > 3 && bytes[0] == (unsigned char)0xEF && bytes[1] == (unsigned char)0xBB && bytes[2] - == (unsigned char)0xBF) { - return UTF8; - } - // For now, merely tries to distinguish ISO-8859-1, UTF-8 and Shift_JIS, - // which should be by far the most common encodings. ISO-8859-1 - // should not have bytes in the 0x80 - 0x9F range, while Shift_JIS - // uses this as a first byte of a two-byte character. If we see this - // followed by a valid second byte in Shift_JIS, assume it is Shift_JIS. - // If we see something else in that second byte, we'll make the risky guess - // that it's UTF-8. - bool canBeISO88591 = true; - bool canBeShiftJIS = true; - bool canBeUTF8 = true; - int utf8BytesLeft = 0; - int maybeDoubleByteCount = 0; - int maybeSingleByteKatakanaCount = 0; - bool sawLatin1Supplement = false; - bool sawUTF8Start = false; - bool lastWasPossibleDoubleByteStart = false; - for (int i = 0; - i < length && (canBeISO88591 || canBeShiftJIS || canBeUTF8); - i++) { - int value = bytes[i] & 0xFF; - - // UTF-8 stuff - if (value >= 0x80 && value <= 0xBF) { - if (utf8BytesLeft > 0) { - utf8BytesLeft--; - } - } else { - if (utf8BytesLeft > 0) { - canBeUTF8 = false; - } - if (value >= 0xC0 && value <= 0xFD) { - sawUTF8Start = true; - int valueCopy = value; - while ((valueCopy & 0x40) != 0) { - utf8BytesLeft++; - valueCopy <<= 1; - } - } - } - - // Shift_JIS stuff - - if (value >= 0xA1 && value <= 0xDF) { - // count the number of characters that might be a Shift_JIS single-byte Katakana character - if (!lastWasPossibleDoubleByteStart) { - maybeSingleByteKatakanaCount++; - } - } - if (!lastWasPossibleDoubleByteStart && - ((value >= 0xF0 && value <= 0xFF) || value == 0x80 || value == 0xA0)) { - canBeShiftJIS = false; - } - if (((value >= 0x81 && value <= 0x9F) || (value >= 0xE0 && value <= 0xEF))) { - // These start double-byte characters in Shift_JIS. Let's see if it's followed by a valid - // second byte. - if (lastWasPossibleDoubleByteStart) { - // If we just checked this and the last byte for being a valid double-byte - // char, don't check starting on this byte. If this and the last byte - // formed a valid pair, then this shouldn't be checked to see if it starts - // a double byte pair of course. - lastWasPossibleDoubleByteStart = false; + // See section 6.4.8.1, 6.4.8.2 + string s = bytes.str(); + if (fc1InEffect) { + // We need to massage the result a bit if in an FNC1 mode: + ostringstream r; + for (size_t i = 0; i < s.length(); i++) { + if (s[i] != '%') { + r << s[i]; } else { - // ... otherwise do check to see if this plus the next byte form a valid - // double byte pair encoding a character. - lastWasPossibleDoubleByteStart = true; - if (i >= length - 1) { - canBeShiftJIS = false; + if (i < s.length() - 1 && s[i + 1] == '%') { + // %% is rendered as % + r << s[i++]; } else { - int nextValue = bytes[i + 1] & 0xFF; - if (nextValue < 0x40 || nextValue > 0xFC) { - canBeShiftJIS = false; - } else { - maybeDoubleByteCount++; - } - // There is some conflicting information out there about which bytes can follow which in - // double-byte Shift_JIS characters. The rule above seems to be the one that matches practice. + // In alpha mode, % should be converted to FNC1 separator 0x1D + r << (char)0x1D; } } - } else { - lastWasPossibleDoubleByteStart = false; } + s = r.str(); } - if (utf8BytesLeft > 0) { - canBeUTF8 = false; - } - - // Easy -- if assuming Shift_JIS and no evidence it can't be, done - if (canBeShiftJIS && ASSUME_SHIFT_JIS) { - return SHIFT_JIS; - } - if (canBeUTF8 && sawUTF8Start) { - return UTF8; - } - // Distinguishing Shift_JIS and ISO-8859-1 can be a little tough. The crude heuristic is: - // - If we saw - // - at least 3 bytes that starts a double-byte value (bytes that are rare in ISO-8859-1), or - // - over 5% of bytes could be single-byte Katakana (also rare in ISO-8859-1), - // - and, saw no sequences that are invalid in Shift_JIS, then we conclude Shift_JIS - if (canBeShiftJIS && (maybeDoubleByteCount >= 3 || 20 * maybeSingleByteKatakanaCount > length)) { - return SHIFT_JIS; - } - // Otherwise, we default to ISO-8859-1 unless we know it can't be - if (!sawLatin1Supplement && canBeISO88591) { - return ISO88591; - } - // Otherwise, we take a wild guess with platform encoding - return PLATFORM_DEFAULT_ENCODING; + append(result, s, StringUtils::ASCII); } -/* -string DecodedBitStreamParser::decode(ArrayRef bytes, Version *version) { +namespace { + int parseECIValue(BitSource bits) { + int firstByte = bits.readBits(8); + if ((firstByte & 0x80) == 0) { + // just one byte + return firstByte & 0x7F; + } + if ((firstByte & 0xC0) == 0x80) { + // two bytes + int secondByte = bits.readBits(8); + return ((firstByte & 0x3F) << 8) | secondByte; + } + if ((firstByte & 0xE0) == 0xC0) { + // three bytes + int secondThirdBytes = bits.readBits(16); + return ((firstByte & 0x1F) << 16) | secondThirdBytes; + } + throw IllegalArgumentException("Bad ECI bits starting with byte " + firstByte); + } +} + +Ref +DecodedBitStreamParser::decode(ArrayRef bytes, + Version* version, + ErrorCorrectionLevel const& ecLevel, + Hashtable const& hints) { + Ref bits_ (new BitSource(bytes)); + BitSource& bits (*bits_); string result; - Ref bits(new BitSource(bytes)); - Mode *mode = &Mode::TERMINATOR; - do { - // While still another segment to read... - if (bits->available() < 4) { - // OK, assume we're done. Really, a TERMINATOR mode should have been recorded here - mode = &Mode::TERMINATOR; - } else { - mode = &Mode::forBits(bits->readBits(4)); // mode is encoded by 4 bits - } - if (mode != &Mode::TERMINATOR) { - // How many characters will follow, encoded in this mode? - int count = bits->readBits(mode->getCharacterCountBits(version)); - if (mode == &Mode::NUMERIC) { - decodeNumericSegment(bits, result, count); - } else if (mode == &Mode::ALPHANUMERIC) { - decodeAlphanumericSegment(bits, result, count); - } else if (mode == &Mode::BYTE) { - decodeByteSegment(bits, result, count); - } else if (mode == &Mode::KANJI) { - decodeKanjiSegment(bits, result, count); - } else { - throw ReaderException("Unsupported mode indicator"); - } - } - } while (mode != &Mode::TERMINATOR); - return result; -} -*/ - -DecoderResult DecodedBitStreamParser::decode(ArrayRef bytes, - Version* version, - ErrorCorrectionLevel ecLevel, - Hashtable hints) { - BitSource bits = new BitSource(bytes); - StringBuffer result = new StringBuffer(50); - CharacterSetECI currentCharacterSetECI = null; - boolean fc1InEffect = false; - Vector byteSegments = new Vector(1); - Mode mode; + CharacterSetECI* currentCharacterSetECI = 0; + bool fc1InEffect = false; + ArrayRef< ArrayRef > byteSegments (size_t(0)); + Mode* mode = 0; do { // While still another segment to read... if (bits.available() < 4) { // OK, assume we're done. Really, a TERMINATOR mode should have been recorded here - mode = Mode.TERMINATOR; + mode = &Mode::TERMINATOR; } else { try { - mode = Mode.forBits(bits.readBits(4)); // mode is encoded by 4 bits - } catch (IllegalArgumentException iae) { - throw FormatException.getFormatInstance(); + mode = &Mode::forBits(bits.readBits(4)); // mode is encoded by 4 bits + } catch (IllegalArgumentException const& iae) { + throw iae; + // throw FormatException.getFormatInstance(); } } - if (!mode.equals(Mode.TERMINATOR)) { - if (mode.equals(Mode.FNC1_FIRST_POSITION) || mode.equals(Mode.FNC1_SECOND_POSITION)) { + if (mode != &Mode::TERMINATOR) { + if ((mode == &Mode::FNC1_FIRST_POSITION) || (mode == &Mode::FNC1_SECOND_POSITION)) { // We do little with FNC1 except alter the parsed result a bit according to the spec fc1InEffect = true; - } else if (mode.equals(Mode.STRUCTURED_APPEND)) { + } else if (mode == &Mode::STRUCTURED_APPEND) { // not really supported; all we do is ignore it // Read next 8 bits (symbol sequence #) and 8 bits (parity data), then continue bits.readBits(16); - } else if (mode.equals(Mode.ECI)) { + } else if (mode == &Mode::ECI) { // Count doesn't apply to ECI int value = parseECIValue(bits); - currentCharacterSetECI = CharacterSetECI.getCharacterSetECIByValue(value); - if (currentCharacterSetECI == null) { - throw FormatException.getFormatInstance(); + currentCharacterSetECI = CharacterSetECI::getCharacterSetECIByValue(value); + if (currentCharacterSetECI == 0) { + throw FormatException(); } } else { // First handle Hanzi mode which does not start with character count - if (mode.equals(Mode.HANZI)) { + if (mode == &Mode::HANZI) { //chinese mode contains a sub set indicator right after mode indicator int subset = bits.readBits(4); - int countHanzi = bits.readBits(mode.getCharacterCountBits(version)); + int countHanzi = bits.readBits(mode->getCharacterCountBits(version)); if (subset == GB2312_SUBSET) { - decodeHanziSegment(bits, result, countHanzi); + decodeHanziSegment(bits_, result, countHanzi); } } else { // "Normal" QR code modes: // How many characters will follow, encoded in this mode? - int count = bits.readBits(mode.getCharacterCountBits(version)); - if (mode.equals(Mode.NUMERIC)) { - decodeNumericSegment(bits, result, count); - } else if (mode.equals(Mode.ALPHANUMERIC)) { - decodeAlphanumericSegment(bits, result, count, fc1InEffect); - } else if (mode.equals(Mode.BYTE)) { - decodeByteSegment(bits, result, count, currentCharacterSetECI, byteSegments, hints); - } else if (mode.equals(Mode.KANJI)) { - decodeKanjiSegment(bits, result, count); + int count = bits.readBits(mode->getCharacterCountBits(version)); + if (mode == &Mode::NUMERIC) { + decodeNumericSegment(bits_, result, count); + } else if (mode == &Mode::ALPHANUMERIC) { + decodeAlphanumericSegment(bits_, result, count, fc1InEffect); + } else if (mode == &Mode::BYTE) { + decodeByteSegment(bits_, result, count, currentCharacterSetECI, byteSegments, hints); + } else if (mode == &Mode::KANJI) { + decodeKanjiSegment(bits_, result, count); } else { - throw FormatException.getFormatInstance(); + throw FormatException(); } } } } - } while (!mode.equals(Mode.TERMINATOR)); + } while (mode != &Mode::TERMINATOR); - return new DecoderResult(bytes, - result.toString(), - byteSegments.isEmpty() ? null : byteSegments, - ecLevel == null ? null : ecLevel.toString()); + return Ref(new DecoderResult(bytes, Ref(new String(result)), byteSegments, (string)ecLevel)); } diff --git a/cpp/core/src/zxing/qrcode/decoder/DecodedBitStreamParser.h b/cpp/core/src/zxing/qrcode/decoder/DecodedBitStreamParser.h index eb35f8827..086c31c96 100644 --- a/cpp/core/src/zxing/qrcode/decoder/DecodedBitStreamParser.h +++ b/cpp/core/src/zxing/qrcode/decoder/DecodedBitStreamParser.h @@ -24,33 +24,46 @@ #include #include +#include #include #include #include #include +#include +#include +#include namespace zxing { namespace qrcode { class DecodedBitStreamParser { +public: + typedef std::map Hashtable; + private: - static const char ALPHANUMERIC_CHARS[]; - - static const char *ASCII; - static const char *ISO88591; - static const char *UTF8; - static const char *SHIFT_JIS; - static const char *EUC_JP; + static char const ALPHANUMERIC_CHARS[]; + static char toAlphaNumericChar(size_t value); + static void decodeHanziSegment(Ref bits, std::string &result, int count); static void decodeKanjiSegment(Ref bits, std::string &result, int count); static void decodeByteSegment(Ref bits, std::string &result, int count); - static void decodeAlphanumericSegment(Ref bits, std::string &result, int count); + static void decodeByteSegment(Ref bits_, + std::string& result, + int count, + zxing::common::CharacterSetECI* currentCharacterSetECI, + ArrayRef< ArrayRef >& byteSegments, + Hashtable const& hints); + static void decodeAlphanumericSegment(Ref bits, std::string &result, int count, bool fc1InEffect); static void decodeNumericSegment(Ref bits, std::string &result, int count); - static const char *guessEncoding(unsigned char *bytes, int length); + static void append(std::string &ost, const unsigned char *bufIn, size_t nIn, const char *src); + static void append(std::string &ost, std::string const& in, const char *src); public: - static DecoderResulta decode(ArrayRef bytes, Version *version); + static Ref decode(ArrayRef bytes, + Version *version, + ErrorCorrectionLevel const& ecLevel, + Hashtable const& hints); }; } diff --git a/cpp/core/src/zxing/qrcode/decoder/Decoder.cpp b/cpp/core/src/zxing/qrcode/decoder/Decoder.cpp index 13023131d..7b86e652d 100644 --- a/cpp/core/src/zxing/qrcode/decoder/Decoder.cpp +++ b/cpp/core/src/zxing/qrcode/decoder/Decoder.cpp @@ -46,7 +46,7 @@ void Decoder::correctErrors(ArrayRef codewordBytes, int numDataCo try { rsDecoder_.decode(codewordInts, numECCodewords); - } catch (ReedSolomonException ex) { + } catch (ReedSolomonException const& ex) { ReaderException rex(ex.what()); throw rex; } @@ -92,11 +92,10 @@ Ref Decoder::decode(Ref bits) { } } - // Decode the contents of that stream of bytes - Ref text(new String(DecodedBitStreamParser::decode(resultBytes, version))); - - Ref result(new DecoderResult(resultBytes, text)); - return result; + return DecodedBitStreamParser::decode(resultBytes, + version, + ecLevel, + DecodedBitStreamParser::Hashtable()); } } diff --git a/cpp/core/src/zxing/qrcode/decoder/Mode.cpp b/cpp/core/src/zxing/qrcode/decoder/Mode.cpp index 6dcb24956..67b2f27cd 100644 --- a/cpp/core/src/zxing/qrcode/decoder/Mode.cpp +++ b/cpp/core/src/zxing/qrcode/decoder/Mode.cpp @@ -45,22 +45,33 @@ Mode::Mode(int cbv0_9, int cbv10_26, int cbv27, int bits, char const* name) : } Mode& Mode::forBits(int bits) { - switch (bits) { - case 0x0: - return TERMINATOR; - case 0x1: - return NUMERIC; - case 0x2: - return ALPHANUMERIC; - case 0x4: - return BYTE; - case 0x8: - return KANJI; - default: - ostringstream s; - s << "Illegal mode bits: " << bits; - throw ReaderException(s.str().c_str()); - } + switch (bits) { + case 0x0: + return TERMINATOR; + case 0x1: + return NUMERIC; + case 0x2: + return ALPHANUMERIC; + case 0x3: + return STRUCTURED_APPEND; + case 0x4: + return BYTE; + case 0x5: + return FNC1_FIRST_POSITION; + case 0x7: + return ECI; + case 0x8: + return KANJI; + case 0x9: + return FNC1_SECOND_POSITION; + case 0xD: + // 0xD is defined in GBT 18284-2000, may not be supported in foreign country + return HANZI; + default: + ostringstream s; + s << "Illegal mode bits: " << bits; + throw ReaderException(s.str().c_str()); + } } int Mode::getCharacterCountBits(Version *version) { diff --git a/cpp/core/src/zxing/qrcode/detector/Detector.cpp b/cpp/core/src/zxing/qrcode/detector/Detector.cpp index 527aadbb0..6e1bb14da 100644 --- a/cpp/core/src/zxing/qrcode/detector/Detector.cpp +++ b/cpp/core/src/zxing/qrcode/detector/Detector.cpp @@ -82,7 +82,7 @@ Ref Detector::detect(DecodeHints const& hints) { try { alignmentPattern = findAlignmentInRegion(moduleSize, estAlignmentX, estAlignmentY, (float)i); break; - } catch (zxing::ReaderException re) { + } catch (zxing::ReaderException const& re) { // try next round } } diff --git a/cpp/magick/src/main.cpp b/cpp/magick/src/main.cpp index 7dfbb6df6..91056bfaf 100644 --- a/cpp/magick/src/main.cpp +++ b/cpp/magick/src/main.cpp @@ -1,3 +1,4 @@ +// -*- mode:c++; tab-width:2; indent-tabs-mode:nil; c-basic-offset:2 -*- /* * main.cpp * zxing