| 1 | |
package org.paneris.bibliomania.metasearch.amazon; |
| 2 | |
|
| 3 | |
import java.io.FileInputStream; |
| 4 | |
|
| 5 | |
import org.melati.util.IoUtils; |
| 6 | |
import org.paneris.bibliomania.metasearch.util.HackParser; |
| 7 | |
|
| 8 | |
public class BookPage extends HackParser { |
| 9 | |
public String publisher; |
| 10 | |
public String image; |
| 11 | |
|
| 12 | 0 | private static final byte[] isbnColon = "ISBN:".getBytes(); |
| 13 | |
|
| 14 | |
public BookPage(byte[] text, String isbn) { |
| 15 | 0 | super(text); |
| 16 | |
|
| 17 | |
try { |
| 18 | 0 | skipTo(isbnColon); |
| 19 | 0 | while (text[here] != ';') |
| 20 | 0 | --here; |
| 21 | 0 | int end = here; |
| 22 | 0 | while (text[here] != '\n') |
| 23 | 0 | --here; |
| 24 | 0 | ++here; |
| 25 | 0 | while (text[here] == ' ') |
| 26 | 0 | ++here; |
| 27 | 0 | publisher = new String(text, here, end - here); |
| 28 | |
|
| 29 | 0 | here = 0; |
| 30 | |
|
| 31 | 0 | skipTo(("src=\"http://images.amazon.com/images/P/" + isbn).getBytes()); |
| 32 | 0 | image = quotedStringFromBack(20); |
| 33 | 0 | } catch (Exception e) { |
| 34 | 0 | } |
| 35 | 0 | } |
| 36 | |
|
| 37 | |
public static void main(String[] args) throws Exception { |
| 38 | 0 | BookPage p = |
| 39 | |
new BookPage(IoUtils.slurp(new FileInputStream(args[0]), 1000), args[1]); |
| 40 | 0 | System.out.println(p.publisher); |
| 41 | 0 | System.out.println(p.image); |
| 42 | 0 | } |
| 43 | |
} |