1 package org.paneris.bibliomania.fti;
2
3 import java.io.BufferedInputStream;
4 import java.io.File;
5 import java.io.FileInputStream;
6 import java.io.IOException;
7 import java.io.InputStream;
8
9 import com.sleepycat.db.DbException;
10
11 public class ContextSearchResults implements SearchResults {
12
13 private static byte[] skipBuffer = new byte[5000];
14
15 public static final String contextUnavailable = "(context not available)";
16
17 public static final int contextWordsAfterHit = 5;
18
19 public static final int wordsGapBetweenAreas = 6;
20
21 private Library library;
22
23 private Text currentText = null;
24
25 private SearchResults results;
26
27 private AnchorFinder blockmarks;
28
29
30
31
32 private InputStream body = null;
33
34 private long bodyPosition;
35
36 public ContextSearchResults(Library library, SearchResults results,
37 IndexOther fti) {
38 this.library = library;
39 this.results = results;
40 blockmarks = new AnchorFinder(fti, true);
41 }
42
43 public int frequency() {
44 return results.frequency();
45 }
46
47 public int hitWordsCount() {
48 return results.hitWordsCount();
49 }
50
51 public void init() {
52 closeBody();
53 results.init();
54 }
55
56 public void skipToNextHit() {
57 results.skipToNextHit();
58 }
59
60 public void skipToWordIndex(int wordIndex) {
61 results.skipToWordIndex(wordIndex);
62 }
63
64 public int currentWordIndex() {
65 return results.currentWordIndex();
66 }
67
68 public int currentOffset() {
69 return results.currentOffset();
70 }
71
72 public void gotoText(long textID) throws DbException {
73 closeBody();
74 results.gotoText(textID);
75 long it = results.currentTextID();
76 currentText = it == -1 ? null : library.text(it);
77 }
78
79 public void gotoPosition(long position) throws DbException {
80 gotoText(position >> 32);
81 if (currentTextID() != -1)
82 skipToWordIndex((int) position);
83 }
84
85 public long getPosition() {
86 int wi = currentWordIndex();
87 return wi == -1 ? -1 : (currentTextID() << 32 | wi);
88 }
89
90 public long currentTextID() {
91 return results.currentTextID();
92 }
93
94 public Text currentText() {
95 return currentText;
96 }
97
98 public String currentAnchor() throws DbException {
99
100
101 int index = currentWordIndex();
102 return index == -1 ? null : blockmarks
103 .anchorOfIndex(currentTextID(), index);
104 }
105
106 private synchronized void closeBody() {
107 if (body != null) {
108 try {
109 body.close();
110 } catch (IOException e) {
111 }
112 body = null;
113 }
114 }
115
116 public synchronized String nextArea() {
117 try {
118 long target = results.currentOffset();
119 if (target == -1) {
120 closeBody();
121 return null;
122 }
123
124 if (body == null) {
125 if (currentText == null)
126 return null;
127
128 body = new BufferedInputStream(currentText.bodyForFragment());
129 bodyPosition = 0;
130 }
131
132 if (bodyPosition > target)
133 throw new IOException("tried to skip backwards");
134
135
136
137
138
139 if (bodyPosition == 0)
140 bodyPosition += body.skip(target - bodyPosition);
141 else {
142 long read;
143 do {
144 bodyPosition += (read = body.read(skipBuffer, 0, (int) Math.min(
145 target - bodyPosition, skipBuffer.length)));
146 } while (read > 0);
147 }
148
149 if (bodyPosition < target)
150 throw new IOException("skipped only to " + bodyPosition + " not "
151 + target);
152
153 StringBuffer buf = new StringBuffer();
154 IndexTokenizer words = new IndexTokenizer(body, true);
155 int baseIndex = Math.max(results.currentWordIndex()
156 - IndexOther.contextWordsBeforeHit, 0);
157
158 do {
159 int limitIndex = results.currentWordIndex() + results.hitWordsCount()
160 + contextWordsAfterHit - baseIndex;
161
162 while (words.hasMoreWords() && words.wordIndex() < limitIndex) {
163 String word = words.nextWord();
164 if (!word.startsWith("#")) {
165 if (words.hadBreak() && buf.length() > 0)
166 buf.append(' ');
167 buf.append(word);
168 }
169 }
170
171
172
173 results.skipToNextHit();
174 } while (results.currentWordIndex() != -1
175 && (results.currentWordIndex() - baseIndex) - words.wordIndex() < wordsGapBetweenAreas);
176
177 bodyPosition += words.bytesReadFromUnderlyingStream();
178
179 return buf.toString();
180 } catch (IOException e) {
181 System.err.println(e);
182
183 try {
184 body.close();
185 } catch (Exception ee) {
186 }
187
188 return null;
189 }
190 }
191
192 public static void main(String[] args) throws Exception {
193 IndexOther fti = new IndexOther(new File("/tmp"));
194
195 SearchResults rawResults;
196 if (args[0].charAt(0) == '_') {
197 args[0] = args[0].substring(1);
198 rawResults = fti.groupSearchResults(args);
199 System.out.println("phrase");
200 } else if (args[0].equals("-query")) {
201 rawResults = fti.querySearchResults(args[1]);
202 } else {
203 rawResults = fti.andSearchResults(args);
204 System.out.println("and");
205 }
206
207 ContextSearchResults results = new ContextSearchResults(new Library() {
208 public Text text(final long textID) {
209 return new Text() {
210 public InputStream body() throws IOException {
211 return new FileInputStream(
212 "/usr/doc/HOWTO/other-formats/html/CDROM-HOWTO-3.html");
213 }
214
215 public InputStream bodyForFragment() throws IOException {
216 return body();
217 }
218
219 public long ftiTextID() {
220 return textID;
221 }
222 };
223 }
224 }, rawResults, fti);
225
226 for (results.gotoText(0); results.currentTextID() != -1;
227
228 results.gotoText(results.currentTextID() + 1)) {
229 System.out.println("== " + results.currentTextID());
230 for (;;) {
231 String anchor = results.currentAnchor();
232 String area = results.nextArea();
233 if (area == null)
234 break;
235 System.out.println("-- A HREF=http://doc#" + anchor);
236 System.out.println(area);
237 }
238 }
239 }
240
241 public void close() {
242 results.close();
243 blockmarks.close();
244 closeBody();
245 }
246 }