Coverage Report - org.paneris.bibliomania.fti.ContextSearchResults
 
Classes in this File Line Coverage Branch Coverage Complexity
ContextSearchResults
0%
0/106
0%
0/46
2.545
ContextSearchResults$1
0%
0/2
N/A
2.545
ContextSearchResults$1$1
0%
0/4
N/A
2.545
 
 1  
 package org.paneris.bibliomania.fti;
 2  
 
 3  
 import java.io.BufferedInputStream;
 4  
 import java.io.File;
 5  
 import java.io.FileInputStream;
 6  
 import java.io.IOException;
 7  
 import java.io.InputStream;
 8  
 
 9  
 import org.melati.poem.NoSuchRowPoemException;
 10  
 
 11  
 import com.sleepycat.db.DatabaseException;
 12  
 
 13  
 public class ContextSearchResults implements SearchResults {
 14  
 
 15  0
   private static byte[] skipBuffer = new byte[5000];
 16  
 
 17  
   public static final String contextUnavailable = "(context not available)";
 18  
 
 19  
   public static final int contextWordsAfterHit = 5;
 20  
 
 21  
   public static final int wordsGapBetweenAreas = 6;
 22  
 
 23  
   private Library library;
 24  
 
 25  0
   private Text currentText = null;
 26  
 
 27  
   private SearchResults results;
 28  
 
 29  
   private AnchorFinder blockmarks;
 30  
 
 31  
   // we don't want to use a Reader for this because then skip is implemented
 32  
   // using repeated reads ...
 33  
 
 34  0
   private InputStream body = null;
 35  
 
 36  
   private long bodyPosition;
 37  
 
 38  
   public ContextSearchResults(Library library, SearchResults results,
 39  0
       IndexOther fti) {
 40  0
     this.library = library;
 41  0
     this.results = results;
 42  0
     blockmarks = new AnchorFinder(fti, true);
 43  0
   }
 44  
 
 45  
   public int frequency() {
 46  0
     return results.frequency();
 47  
   }
 48  
 
 49  
   public int hitWordsCount() {
 50  0
     return results.hitWordsCount();
 51  
   }
 52  
 
 53  
   public void init() {
 54  0
     closeBody();
 55  0
     results.init();
 56  0
   }
 57  
 
 58  
   public void skipToNextHit() {
 59  0
     results.skipToNextHit();
 60  0
   }
 61  
 
 62  
   public void skipToWordIndex(int wordIndex) {
 63  0
     results.skipToWordIndex(wordIndex);
 64  0
   }
 65  
 
 66  
   public int currentWordIndex() {
 67  0
     return results.currentWordIndex();
 68  
   }
 69  
 
 70  
   public int currentOffset() {
 71  0
     return results.currentOffset();
 72  
   }
 73  
 
 74  
   public void gotoText(long textID) throws DatabaseException {
 75  0
     closeBody();
 76  0
     results.gotoText(textID);
 77  0
     long it = results.currentTextID();
 78  0
     currentText = it == -1 ? null : library.text(it);
 79  0
   }
 80  
 
 81  
   public void gotoPosition(long position) throws DatabaseException {
 82  0
     gotoText(position >> 32);
 83  0
     if (currentTextID() != -1)
 84  0
       skipToWordIndex((int) position);
 85  0
   }
 86  
 
 87  
   public long getPosition() {
 88  0
     int wi = currentWordIndex();
 89  0
     return wi == -1 ? -1 : (currentTextID() << 32 | wi);
 90  
   }
 91  
 
 92  
   public long currentTextID() {
 93  0
     return results.currentTextID();
 94  
   }
 95  
 
 96  
   public Text currentText() {
 97  0
     return currentText;
 98  
   }
 99  
 
 100  
   public String currentAnchor() throws DatabaseException {
 101  
     // FIXME not necessarily very optimal
 102  
     // could exploit forward-moving definition of these cursors
 103  0
     int index = currentWordIndex();
 104  0
     return index == -1 ? null : blockmarks
 105  
         .anchorOfIndex(currentTextID(), index);
 106  
   }
 107  
 
 108  
   private synchronized void closeBody() {
 109  0
     if (body != null) {
 110  
       try {
 111  0
         body.close();
 112  0
       } catch (IOException e) {
 113  0
       }
 114  0
       body = null;
 115  
     }
 116  0
   }
 117  
 
 118  
   public synchronized String nextArea() {
 119  
     try {
 120  0
       long target = results.currentOffset();
 121  0
       if (target == -1) {
 122  0
         closeBody();
 123  0
         return null;
 124  
       }
 125  
 
 126  0
       if (body == null) {
 127  0
         if (currentText == null)
 128  0
           return null;
 129  
         try { 
 130  0
           body = new BufferedInputStream(currentText.bodyForFragment());
 131  0
           bodyPosition = 0;
 132  0
         } catch (NoSuchRowPoemException e) { 
 133  
           // This should not happen in a coherent database, but 
 134  
           // the one on my machine isn't coherent
 135  0
           return null;
 136  0
         }
 137  
       }
 138  
 
 139  0
       if (bodyPosition > target)
 140  0
         throw new IOException("tried to skip backwards");
 141  
 
 142  
       // FIXME BufferedInputStream seems genuinely not to work well
 143  
       // wrt to skipping ... so we use skip for the first move and dummy
 144  
       // reads thereafter
 145  
 
 146  0
       if (bodyPosition == 0)
 147  0
         bodyPosition += body.skip(target - bodyPosition);
 148  
       else {
 149  
         long read;
 150  
         do {
 151  0
           bodyPosition += (read = body.read(skipBuffer, 0, (int) Math.min(
 152  
               target - bodyPosition, skipBuffer.length)));
 153  0
         } while (read > 0);
 154  
       }
 155  
 
 156  0
       if (bodyPosition < target)
 157  0
         throw new IOException("skipped only to " + bodyPosition + " not "
 158  
             + target);
 159  
 
 160  0
       StringBuffer buf = new StringBuffer();
 161  0
       IndexTokenizer words = new IndexTokenizer(body, true);
 162  0
       int baseIndex = Math.max(results.currentWordIndex()
 163  
           - IndexOther.contextWordsBeforeHit, 0);
 164  
       // int lastIndex = -1;
 165  
       do {
 166  0
         int limitIndex = results.currentWordIndex() + results.hitWordsCount()
 167  
             + contextWordsAfterHit - baseIndex;
 168  
 
 169  0
         while (words.hasMoreWords() && words.wordIndex() < limitIndex) {
 170  0
           String word = words.nextWord();
 171  0
           if (!word.startsWith("#")) {
 172  0
             if (words.hadBreak() && buf.length() > 0)
 173  0
               buf.append(' ');
 174  0
             buf.append(word);
 175  
           }
 176  0
         }
 177  
 
 178  
         // lastIndex = results.currentWordIndex() + results.hitWordsCount();
 179  
 
 180  0
         results.skipToNextHit();
 181  
       } while (results.currentWordIndex() != -1
 182  0
           && (results.currentWordIndex() - baseIndex) - words.wordIndex() < wordsGapBetweenAreas);
 183  
 
 184  0
       bodyPosition += words.bytesReadFromUnderlyingStream();
 185  
 
 186  0
       return buf.toString();
 187  0
     } catch (IOException e) {
 188  0
       System.err.println(e);
 189  
 
 190  
       try {
 191  0
         body.close();
 192  0
       } catch (Exception ee) {
 193  0
       }
 194  
 
 195  0
       return null;
 196  
     }
 197  
   }
 198  
 
 199  
   public static void main(String[] args) throws Exception {
 200  0
     IndexOther fti = new IndexOther(new File("/tmp"));
 201  
 
 202  
     SearchResults rawResults;
 203  0
     if (args[0].charAt(0) == '_') {
 204  0
       args[0] = args[0].substring(1);
 205  0
       rawResults = fti.groupSearchResults(args);
 206  0
       System.out.println("phrase");
 207  0
     } else if (args[0].equals("-query")) {
 208  0
       rawResults = fti.querySearchResults(args[1]);
 209  
     } else {
 210  0
       rawResults = fti.andSearchResults(args);
 211  0
       System.out.println("and");
 212  
     }
 213  
 
 214  0
     ContextSearchResults results = new ContextSearchResults(new Library() {
 215  
       public Text text(final long textID) {
 216  0
         return new Text() {
 217  
           public InputStream body() throws IOException {
 218  0
             return new FileInputStream(
 219  
                 "/usr/doc/HOWTO/other-formats/html/CDROM-HOWTO-3.html");
 220  
           }
 221  
 
 222  
           public InputStream bodyForFragment() throws IOException {
 223  0
             return body();
 224  
           }
 225  
 
 226  
           public long ftiTextID() {
 227  0
             return textID;
 228  
           }
 229  
         };
 230  
       }
 231  
     }, rawResults, fti);
 232  
 
 233  0
     for (results.gotoText(0); results.currentTextID() != -1;
 234  
     // FIXME this isn't very clever! (maybe?)
 235  0
     results.gotoText(results.currentTextID() + 1)) {
 236  0
       System.out.println("== " + results.currentTextID());
 237  
       for (;;) {
 238  0
         String anchor = results.currentAnchor();
 239  0
         String area = results.nextArea();
 240  0
         if (area == null)
 241  0
           break;
 242  0
         System.out.println("-- A HREF=http://doc#" + anchor);
 243  0
         System.out.println(area);
 244  0
       }
 245  
     }
 246  0
   }
 247  
 
 248  
   public void close() {
 249  0
     results.close();
 250  0
     blockmarks.close();
 251  0
     closeBody();
 252  0
   }
 253  
 }