Issue 1347 retrieve HTML title as supplemental info

git-svn-id: https://zxing.googlecode.com/svn/trunk@2397 59b500cc-1b3d-0410-9834-0bbf25fbcc57
This commit is contained in:
srowen 2012-08-28 09:30:13 +00:00
parent a3db02fa49
commit 82ab2cd2cd
6 changed files with 106 additions and 35 deletions

View file

@ -18,10 +18,9 @@ package com.google.zxing.client.android;
import android.util.Log;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URISyntaxException;
@ -56,13 +55,23 @@ public final class HttpHelper {
TEXT,
}
/**
* Downloads the entire resource instead of part.
*
* @see #downloadViaHttp(String, HttpHelper.ContentType, int)
*/
public static CharSequence downloadViaHttp(String uri, ContentType type) throws IOException {
return downloadViaHttp(uri, type, Integer.MAX_VALUE);
}
/**
* @param uri URI to retrieve
* @param type expected text-like MIME type of that content
* @param maxChars approximate maximum characters to read from the source
* @return content as a {@code String}
* @throws IOException if the content can't be retrieved because of a bad URI, network problem, etc.
*/
public static String downloadViaHttp(String uri, ContentType type) throws IOException {
public static CharSequence downloadViaHttp(String uri, ContentType type, int maxChars) throws IOException {
String contentTypes;
switch (type) {
case HTML:
@ -75,10 +84,10 @@ public final class HttpHelper {
default:
contentTypes = "text/*,*/*";
}
return downloadViaHttp(uri, contentTypes);
return downloadViaHttp(uri, contentTypes, maxChars);
}
private static String downloadViaHttp(String uri, String contentTypes) throws IOException {
private static CharSequence downloadViaHttp(String uri, String contentTypes, int maxChars) throws IOException {
Log.i(TAG, "Downloading " + uri);
URL url = new URL(uri);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
@ -91,7 +100,7 @@ public final class HttpHelper {
throw new IOException("Bad HTTP response: " + connection.getResponseCode());
}
Log.i(TAG, "Consuming " + uri);
return consume(connection);
return consume(connection, maxChars);
} finally {
connection.disconnect();
}
@ -108,34 +117,27 @@ public final class HttpHelper {
return "UTF-8";
}
private static String consume(URLConnection connection) throws IOException {
private static CharSequence consume(URLConnection connection, int maxChars) throws IOException {
String encoding = getEncoding(connection);
ByteArrayOutputStream out = new ByteArrayOutputStream();
InputStream in = connection.getInputStream();
StringBuilder out = new StringBuilder();
Reader in = null;
try {
in = connection.getInputStream();
byte[] buffer = new byte[1024];
int bytesRead;
while ((bytesRead = in.read(buffer)) > 0) {
out.write(buffer, 0, bytesRead);
in = new InputStreamReader(connection.getInputStream(), encoding);
char[] buffer = new char[1024];
int charsRead;
while (out.length() < maxChars && (charsRead = in.read(buffer)) > 0) {
out.append(buffer, 0, charsRead);
}
} finally {
try {
in.close();
} catch (IOException ioe) {
// continue
}
}
try {
return new String(out.toByteArray(), encoding);
} catch (UnsupportedEncodingException uee) {
try {
return new String(out.toByteArray(), "UTF-8");
} catch (UnsupportedEncodingException uee2) {
// can't happen
throw new IllegalStateException(uee2);
if (in != null) {
try {
in.close();
} catch (IOException ioe) {
// continue
}
}
}
return out;
}
public static URI unredirect(URI uri) throws IOException {

View file

@ -187,8 +187,8 @@ public final class SearchBookContentsActivity extends Activity {
} else {
uri = "http://www.google.com/books?vid=isbn" + theIsbn + "&jscmd=SearchWithinVolume2&q=" + theQuery;
}
String content = HttpHelper.downloadViaHttp(uri, HttpHelper.ContentType.JSON);
return new JSONObject(content);
CharSequence content = HttpHelper.downloadViaHttp(uri, HttpHelper.ContentType.JSON);
return new JSONObject(content.toString());
} catch (IOException ioe) {
Log.w(TAG, "Error accessing book search", ioe);
return null;

View file

@ -52,8 +52,8 @@ final class BookResultInfoRetriever extends SupplementalInfoRetriever {
@Override
void retrieveSupplementalInfo() throws IOException {
String contents = HttpHelper.downloadViaHttp("https://www.googleapis.com/books/v1/volumes?q=isbn:" + isbn,
HttpHelper.ContentType.JSON);
CharSequence contents = HttpHelper.downloadViaHttp("https://www.googleapis.com/books/v1/volumes?q=isbn:" + isbn,
HttpHelper.ContentType.JSON);
if (contents.length() == 0) {
return;
@ -65,7 +65,7 @@ final class BookResultInfoRetriever extends SupplementalInfoRetriever {
try {
JSONObject topLevel = (JSONObject) new JSONTokener(contents).nextValue();
JSONObject topLevel = (JSONObject) new JSONTokener(contents.toString()).nextValue();
JSONArray items = topLevel.optJSONArray("items");
if (items == null || items.isNull(0)) {
return;

View file

@ -53,7 +53,7 @@ final class ProductResultInfoRetriever extends SupplementalInfoRetriever {
String encodedProductID = URLEncoder.encode(productID, "UTF-8");
String uri = "http://www.google." + LocaleManager.getProductSearchCountryTLD(context)
+ "/m/products?ie=utf8&oe=utf8&scoring=p&source=zxing&q=" + encodedProductID;
String content = HttpHelper.downloadViaHttp(uri, HttpHelper.ContentType.HTML);
CharSequence content = HttpHelper.downloadViaHttp(uri, HttpHelper.ContentType.HTML);
for (Pattern p : PRODUCT_NAME_PRICE_PATTERNS) {
Matcher matcher = p.matcher(content);

View file

@ -50,6 +50,7 @@ public abstract class SupplementalInfoRetriever extends AsyncTask<Object,Object,
AsyncTaskExecInterface taskExec = new AsyncTaskExecManager().build();
if (result instanceof URIParsedResult) {
taskExec.execute(new URIResultInfoRetriever(textView, (URIParsedResult) result, historyManager, context));
taskExec.execute(new TitleRetriever(textView, (URIParsedResult) result, historyManager));
} else if (result instanceof ProductParsedResult) {
String productID = ((ProductParsedResult) result).getProductID();
taskExec.execute(new ProductResultInfoRetriever(textView, productID, historyManager, context));

View file

@ -0,0 +1,68 @@
/*
* Copyright 2012 ZXing authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.zxing.client.android.result.supplement;
import android.widget.TextView;
import com.google.zxing.client.android.HttpHelper;
import com.google.zxing.client.android.history.HistoryManager;
import com.google.zxing.client.result.URIParsedResult;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Retrieves the title of a web page as supplemental info.
*
* @author Sean Owen
*/
final class TitleRetriever extends SupplementalInfoRetriever {
private static final Pattern TITLE_PATTERN = Pattern.compile("<title>([^<]+)");
private static final int MAX_TITLE_LEN = 100;
private final String httpUrl;
TitleRetriever(TextView textView, URIParsedResult result, HistoryManager historyManager) {
super(textView, historyManager);
this.httpUrl = result.getURI();
}
@Override
void retrieveSupplementalInfo() {
CharSequence contents;
try {
contents = HttpHelper.downloadViaHttp(httpUrl, HttpHelper.ContentType.HTML, 4096);
} catch (IOException ioe) {
// ignore this
return;
}
if (contents != null && contents.length() > 0) {
Matcher m = TITLE_PATTERN.matcher(contents);
if (m.find()) {
String title = m.group(1);
if (title != null && !title.isEmpty()) {
if (title.length() > MAX_TITLE_LEN) {
title = title.substring(0, MAX_TITLE_LEN) + "...";
}
append(httpUrl, null, new String[] {title}, httpUrl);
}
}
}
}
}