View Javadoc

1   package org.paneris.bibliomania.metasearch.util;
2   
3   public class HackParser {
4     protected static class ParseException extends RuntimeException {
5       private int offset;
6       private String reason;
7       public ParseException(int offset, String expected) {
8         super();
9         this.offset = offset;
10        this.reason = expected;
11      }
12      private static final long serialVersionUID = 1L;
13      /**
14       * {@inheritDoc}
15       * @see java.lang.Throwable#getMessage()
16       */
17      public String getMessage() {
18        return reason + " at  " + offset;      
19      }
20    }
21  
22    protected byte[] text;
23    protected int here = 0;
24  
25    public HackParser(byte[] text) {
26      this.text = text;
27      //System.err.println(new String(text));
28    }
29  
30    protected final void skipTo(byte c) {
31     // System.err.println("here:" + here);
32     // System.err.println(new String(text));
33     // System.err.println("length:" +text.length);
34      
35      while (text[here++] != c);
36    }
37  
38    protected final void skipTo(byte c, byte d) {
39      for (;;) {
40        byte t = text[here++];
41        if (t == c || t == d) break;
42      }
43    }
44  
45    public static int indexOf(byte[] text, byte[] s) {
46      byte s0 = s[0];
47      int l = text.length - s.length;
48      for (int h = 0; h <= l; ++h) {
49        if (text[h] == s0) {
50          for (int i = 1;; ++i) {
51            if (i >= s.length)
52              return h;
53            if (text[h + i] != s[i])
54              break;
55          }
56        }
57      }
58  
59      return -1;
60    }
61  
62    protected final void skipTo(byte[] s) {
63      // FIXME dumb way: could use skip table etc.
64      int startFrom = here;
65      for (;;) {
66        skipTo(s[0]);
67        int herePointer = here;
68        //System.err.println("Skipped to " + here + " to find first char of " + new String(s));
69        for (int i = 1;; ++i, ++herePointer) {
70          if (i >= s.length) {
71            here = herePointer;
72            System.err.println("Found:" + new String(s));
73            return;
74          }
75          if (herePointer == text.length) {
76            System.err.println(new String(text));
77            throw new RuntimeException("String \"" + new String(s) + 
78                "\" not found starting at " + startFrom + " and looking to " + herePointer );
79          }
80          if (text[herePointer] != s[i])
81            break;
82        }
83      }
84    }
85  
86    protected final void skipRealSpace() {
87      while (Character.isWhitespace((char)text[here]))
88        ++here;
89    }
90  
91    protected final boolean skipMaybe(byte[] s) {
92      if (here + s.length > text.length)
93        return false;
94  
95      for (int i = 0; i < s.length; ++i)
96        if (text[here + i] != s[i])
97          //          Character.toLowercase(text[here + i]) != Character.toLowercase(s[i]))
98          return false;
99  
100     here += s.length;
101     return true;
102   }
103 
104   protected static final byte[] nbsp = "&nbsp;".getBytes();
105 
106   protected final void skipSpace() {
107     do
108       skipRealSpace();
109     while (skipMaybe(nbsp));
110   }
111 
112   protected final String plaintext(byte term) {
113     skipSpace();
114     int start = here;
115     if (term == 0)
116       skipTo((byte)'<');
117     else
118       skipTo((byte)'<', term);
119 
120     int end = here - 2;
121     while (Character.isWhitespace((char)text[end]))
122       --end;
123 
124     ++end;
125 
126     if (end <= start)
127       throw new ParseException(here, "at end");
128 
129     String it = new String(text, start, end - start);
130 
131     while (it.endsWith("&nbsp;"))
132       it = it.substring(0, it.length() - 6).trim();
133 
134     return it;
135   }
136 
137   protected final String plaintext() {
138     return plaintext((byte)'\0');
139   }
140 
141   protected final String digits() {
142     int start = here;
143     if (!Character.isDigit((char)text[here]))
144       throw new ParseException(here, "not digit");
145     ++here;
146     while (Character.isDigit((char)text[here]))
147       ++here;
148     return new String(text, start, here - start);
149   }
150 
151   protected final String quotedStringFromBack(int off) {
152     int start;
153     for (start = here - off; text[start] != '"'; --start);
154     skipTo((byte)'"');
155     return new String(text, start + 1, here - start - 2);
156   }
157 }