Coverage Report

Coverage Report - org.paneris.bibliomania.fti.ContextSearchResults

Classes in this File

Line Coverage

Branch Coverage

Complexity

ContextSearchResults

0/106

0/46

2.545

ContextSearchResults$1

0/2

N/A

2.545

ContextSearchResults$1$1

0/4

N/A

2.545

 package org.paneris.bibliomania.fti;
 
 import java.io.BufferedInputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 
 import org.melati.poem.NoSuchRowPoemException;
 
 import com.sleepycat.db.DatabaseException;
 
 public class ContextSearchResults implements SearchResults {
 
   private static byte[] skipBuffer = new byte[5000];
 
   public static final String contextUnavailable = "(context not available)";
 
   public static final int contextWordsAfterHit = 5;
 
   public static final int wordsGapBetweenAreas = 6;
 
   private Library library;
 
   private Text currentText = null;
 
   private SearchResults results;
 
   private AnchorFinder blockmarks;
 
   // we don't want to use a Reader for this because then skip is implemented
   // using repeated reads ...
 
   private InputStream body = null;
 
   private long bodyPosition;
 
   public ContextSearchResults(Library library, SearchResults results,
       IndexOther fti) {
     this.library = library;
     this.results = results;
     blockmarks = new AnchorFinder(fti, true);
   }
 
   public int frequency() {
     return results.frequency();
   }
 
   public int hitWordsCount() {
     return results.hitWordsCount();
   }
 
   public void init() {
     closeBody();
     results.init();
   }
 
   public void skipToNextHit() {
     results.skipToNextHit();
   }
 
   public void skipToWordIndex(int wordIndex) {
     results.skipToWordIndex(wordIndex);
   }
 
   public int currentWordIndex() {
     return results.currentWordIndex();
   }
 
   public int currentOffset() {
     return results.currentOffset();
   }
 
   public void gotoText(long textID) throws DatabaseException {
     closeBody();
     results.gotoText(textID);
     long it = results.currentTextID();
     currentText = it == -1 ? null : library.text(it);
   }
 
   public void gotoPosition(long position) throws DatabaseException {
     gotoText(position >> 32);
     if (currentTextID() != -1)
       skipToWordIndex((int) position);
   }
 
   public long getPosition() {
     int wi = currentWordIndex();
     return wi == -1 ? -1 : (currentTextID() << 32 | wi);
   }
 
   public long currentTextID() {
     return results.currentTextID();
   }
 
   public Text currentText() {
     return currentText;
   }
 
   public String currentAnchor() throws DatabaseException {
     // FIXME not necessarily very optimal
     // could exploit forward-moving definition of these cursors
     int index = currentWordIndex();
     return index == -1 ? null : blockmarks
         .anchorOfIndex(currentTextID(), index);
   }
 
   private synchronized void closeBody() {
     if (body != null) {
       try {
         body.close();
       } catch (IOException e) {
       }
       body = null;
     }
   }
 
   public synchronized String nextArea() {
     try {
       long target = results.currentOffset();
       if (target == -1) {
         closeBody();
         return null;
       }
 
       if (body == null) {
         if (currentText == null)
           return null;
         try { 
           body = new BufferedInputStream(currentText.bodyForFragment());
           bodyPosition = 0;
         } catch (NoSuchRowPoemException e) { 
           // This should not happen in a coherent database, but 
           // the one on my machine isn't coherent
           return null;
         }
       }
 
       if (bodyPosition > target)
         throw new IOException("tried to skip backwards");
 
       // FIXME BufferedInputStream seems genuinely not to work well
       // wrt to skipping ... so we use skip for the first move and dummy
       // reads thereafter
 
       if (bodyPosition == 0)
         bodyPosition += body.skip(target - bodyPosition);
       else {
         long read;
         do {
           bodyPosition += (read = body.read(skipBuffer, 0, (int) Math.min(
               target - bodyPosition, skipBuffer.length)));
         } while (read > 0);
       }
 
       if (bodyPosition < target)
         throw new IOException("skipped only to " + bodyPosition + " not "
             + target);
 
       StringBuffer buf = new StringBuffer();
       IndexTokenizer words = new IndexTokenizer(body, true);
       int baseIndex = Math.max(results.currentWordIndex()
           - IndexOther.contextWordsBeforeHit, 0);
       // int lastIndex = -1;
       do {
         int limitIndex = results.currentWordIndex() + results.hitWordsCount()
             + contextWordsAfterHit - baseIndex;
 
         while (words.hasMoreWords() && words.wordIndex() < limitIndex) {
           String word = words.nextWord();
           if (!word.startsWith("#")) {
             if (words.hadBreak() && buf.length() > 0)
               buf.append(' ');
             buf.append(word);
           }
         }
 
         // lastIndex = results.currentWordIndex() + results.hitWordsCount();
 
         results.skipToNextHit();
       } while (results.currentWordIndex() != -1
           && (results.currentWordIndex() - baseIndex) - words.wordIndex() < wordsGapBetweenAreas);
 
       bodyPosition += words.bytesReadFromUnderlyingStream();
 
       return buf.toString();
     } catch (IOException e) {
       System.err.println(e);
 
       try {
         body.close();
       } catch (Exception ee) {
       }
 
       return null;
     }
   }
 
   public static void main(String[] args) throws Exception {
     IndexOther fti = new IndexOther(new File("/tmp"));
 
     SearchResults rawResults;
     if (args[0].charAt(0) == '_') {
       args[0] = args[0].substring(1);
       rawResults = fti.groupSearchResults(args);
       System.out.println("phrase");
     } else if (args[0].equals("-query")) {
       rawResults = fti.querySearchResults(args[1]);
     } else {
       rawResults = fti.andSearchResults(args);
       System.out.println("and");
     }
 
     ContextSearchResults results = new ContextSearchResults(new Library() {
       public Text text(final long textID) {
         return new Text() {
           public InputStream body() throws IOException {
             return new FileInputStream(
                 "/usr/doc/HOWTO/other-formats/html/CDROM-HOWTO-3.html");
           }
 
           public InputStream bodyForFragment() throws IOException {
             return body();
           }
 
           public long ftiTextID() {
             return textID;
           }
         };
       }
     }, rawResults, fti);
 
     for (results.gotoText(0); results.currentTextID() != -1;
     // FIXME this isn't very clever! (maybe?)
     results.gotoText(results.currentTextID() + 1)) {
       System.out.println("== " + results.currentTextID());
       for (;;) {
         String anchor = results.currentAnchor();
         String area = results.nextArea();
         if (area == null)
           break;
         System.out.println("-- A HREF=http://doc#" + anchor);
         System.out.println(area);
       }
     }
   }
 
   public void close() {
     results.close();
     blockmarks.close();
     closeBody();
   }
 }

1		package org.paneris.bibliomania.fti;
2
3		import java.io.BufferedInputStream;
4		import java.io.File;
5		import java.io.FileInputStream;
6		import java.io.IOException;
7		import java.io.InputStream;
8
9		import org.melati.poem.NoSuchRowPoemException;
10
11		import com.sleepycat.db.DatabaseException;
12
13		public class ContextSearchResults implements SearchResults {
14
15	0	private static byte[] skipBuffer = new byte[5000];
16
17		public static final String contextUnavailable = "(context not available)";
18
19		public static final int contextWordsAfterHit = 5;
20
21		public static final int wordsGapBetweenAreas = 6;
22
23		private Library library;
24
25	0	private Text currentText = null;
26
27		private SearchResults results;
28
29		private AnchorFinder blockmarks;
30
31		// we don't want to use a Reader for this because then skip is implemented
32		// using repeated reads ...
33
34	0	private InputStream body = null;
35
36		private long bodyPosition;
37
38		public ContextSearchResults(Library library, SearchResults results,
39	0	IndexOther fti) {
40	0	this.library = library;
41	0	this.results = results;
42	0	blockmarks = new AnchorFinder(fti, true);
43	0	}
44
45		public int frequency() {
46	0	return results.frequency();
47		}
48
49		public int hitWordsCount() {
50	0	return results.hitWordsCount();
51		}
52
53		public void init() {
54	0	closeBody();
55	0	results.init();
56	0	}
57
58		public void skipToNextHit() {
59	0	results.skipToNextHit();
60	0	}
61
62		public void skipToWordIndex(int wordIndex) {
63	0	results.skipToWordIndex(wordIndex);
64	0	}
65
66		public int currentWordIndex() {
67	0	return results.currentWordIndex();
68		}
69
70		public int currentOffset() {
71	0	return results.currentOffset();
72		}
73
74		public void gotoText(long textID) throws DatabaseException {
75	0	closeBody();
76	0	results.gotoText(textID);
77	0	long it = results.currentTextID();
78	0	currentText = it == -1 ? null : library.text(it);
79	0	}
80
81		public void gotoPosition(long position) throws DatabaseException {
82	0	gotoText(position >> 32);
83	0	if (currentTextID() != -1)
84	0	skipToWordIndex((int) position);
85	0	}
86
87		public long getPosition() {
88	0	int wi = currentWordIndex();
89	0	return wi == -1 ? -1 : (currentTextID() << 32 \| wi);
90		}
91
92		public long currentTextID() {
93	0	return results.currentTextID();
94		}
95
96		public Text currentText() {
97	0	return currentText;
98		}
99
100		public String currentAnchor() throws DatabaseException {
101		// FIXME not necessarily very optimal
102		// could exploit forward-moving definition of these cursors
103	0	int index = currentWordIndex();
104	0	return index == -1 ? null : blockmarks
105		.anchorOfIndex(currentTextID(), index);
106		}
107
108		private synchronized void closeBody() {
109	0	if (body != null) {
110		try {
111	0	body.close();
112	0	} catch (IOException e) {
113	0	}
114	0	body = null;
115		}
116	0	}
117
118		public synchronized String nextArea() {
119		try {
120	0	long target = results.currentOffset();
121	0	if (target == -1) {
122	0	closeBody();
123	0	return null;
124		}
125
126	0	if (body == null) {
127	0	if (currentText == null)
128	0	return null;
129		try {
130	0	body = new BufferedInputStream(currentText.bodyForFragment());
131	0	bodyPosition = 0;
132	0	} catch (NoSuchRowPoemException e) {
133		// This should not happen in a coherent database, but
134		// the one on my machine isn't coherent
135	0	return null;
136	0	}
137		}
138
139	0	if (bodyPosition > target)
140	0	throw new IOException("tried to skip backwards");
141
142		// FIXME BufferedInputStream seems genuinely not to work well
143		// wrt to skipping ... so we use skip for the first move and dummy
144		// reads thereafter
145
146	0	if (bodyPosition == 0)
147	0	bodyPosition += body.skip(target - bodyPosition);
148		else {
149		long read;
150		do {
151	0	bodyPosition += (read = body.read(skipBuffer, 0, (int) Math.min(
152		target - bodyPosition, skipBuffer.length)));
153	0	} while (read > 0);
154		}
155
156	0	if (bodyPosition < target)
157	0	throw new IOException("skipped only to " + bodyPosition + " not "
158		+ target);
159
160	0	StringBuffer buf = new StringBuffer();
161	0	IndexTokenizer words = new IndexTokenizer(body, true);
162	0	int baseIndex = Math.max(results.currentWordIndex()
163		- IndexOther.contextWordsBeforeHit, 0);
164		// int lastIndex = -1;
165		do {
166	0	int limitIndex = results.currentWordIndex() + results.hitWordsCount()
167		+ contextWordsAfterHit - baseIndex;
168
169	0	while (words.hasMoreWords() && words.wordIndex() < limitIndex) {
170	0	String word = words.nextWord();
171	0	if (!word.startsWith("#")) {
172	0	if (words.hadBreak() && buf.length() > 0)
173	0	buf.append(' ');
174	0	buf.append(word);
175		}
176	0	}
177
178		// lastIndex = results.currentWordIndex() + results.hitWordsCount();
179
180	0	results.skipToNextHit();
181		} while (results.currentWordIndex() != -1
182	0	&& (results.currentWordIndex() - baseIndex) - words.wordIndex() < wordsGapBetweenAreas);
183
184	0	bodyPosition += words.bytesReadFromUnderlyingStream();
185
186	0	return buf.toString();
187	0	} catch (IOException e) {
188	0	System.err.println(e);
189
190		try {
191	0	body.close();
192	0	} catch (Exception ee) {
193	0	}
194
195	0	return null;
196		}
197		}
198
199		public static void main(String[] args) throws Exception {
200	0	IndexOther fti = new IndexOther(new File("/tmp"));
201
202		SearchResults rawResults;
203	0	if (args[0].charAt(0) == '_') {
204	0	args[0] = args[0].substring(1);
205	0	rawResults = fti.groupSearchResults(args);
206	0	System.out.println("phrase");
207	0	} else if (args[0].equals("-query")) {
208	0	rawResults = fti.querySearchResults(args[1]);
209		} else {
210	0	rawResults = fti.andSearchResults(args);
211	0	System.out.println("and");
212		}
213
214	0	ContextSearchResults results = new ContextSearchResults(new Library() {
215		public Text text(final long textID) {
216	0	return new Text() {
217		public InputStream body() throws IOException {
218	0	return new FileInputStream(
219		"/usr/doc/HOWTO/other-formats/html/CDROM-HOWTO-3.html");
220		}
221
222		public InputStream bodyForFragment() throws IOException {
223	0	return body();
224		}
225
226		public long ftiTextID() {
227	0	return textID;
228		}
229		};
230		}
231		}, rawResults, fti);
232
233	0	for (results.gotoText(0); results.currentTextID() != -1;
234		// FIXME this isn't very clever! (maybe?)
235	0	results.gotoText(results.currentTextID() + 1)) {
236	0	System.out.println("== " + results.currentTextID());
237		for (;;) {
238	0	String anchor = results.currentAnchor();
239	0	String area = results.nextArea();
240	0	if (area == null)
241	0	break;
242	0	System.out.println("-- A HREF=http://doc#" + anchor);
243	0	System.out.println(area);
244	0	}
245		}
246	0	}
247
248		public void close() {
249	0	results.close();
250	0	blockmarks.close();
251	0	closeBody();
252	0	}
253		}