ContextSearchResults

package org.paneris.bibliomania.fti;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;

import org.melati.poem.NoSuchRowPoemException;

import com.sleepycat.db.DatabaseException;

public class ContextSearchResults implements SearchResults {

  private static byte[] skipBuffer = new byte[5000];

  public static final String contextUnavailable = "(context not available)";

  public static final int contextWordsAfterHit = 5;

  public static final int wordsGapBetweenAreas = 6;

  private Library library;

  private Text currentText = null;

  private SearchResults results;

  private AnchorFinder blockmarks;

  // we don't want to use a Reader for this because then skip is implemented
  // using repeated reads ...

  private InputStream body = null;

  private long bodyPosition;

  public ContextSearchResults(Library library, SearchResults results,
      IndexOther fti) {
    this.library = library;
    this.results = results;
    blockmarks = new AnchorFinder(fti, true);
  }

  public int frequency() {
    return results.frequency();
  }

  public int hitWordsCount() {
    return results.hitWordsCount();
  }

  public void init() {
    closeBody();
    results.init();
  }

  public void skipToNextHit() {
    results.skipToNextHit();
  }

  public void skipToWordIndex(int wordIndex) {
    results.skipToWordIndex(wordIndex);
  }

  public int currentWordIndex() {
    return results.currentWordIndex();
  }

  public int currentOffset() {
    return results.currentOffset();
  }

  public void gotoText(long textID) throws DatabaseException {
    closeBody();
    results.gotoText(textID);
    long it = results.currentTextID();
    currentText = it == -1 ? null : library.text(it);
  }

  public void gotoPosition(long position) throws DatabaseException {
    gotoText(position >> 32);
    if (currentTextID() != -1)
      skipToWordIndex((int) position);
  }

  public long getPosition() {
    int wi = currentWordIndex();
    return wi == -1 ? -1 : (currentTextID() << 32 | wi);
  }

  public long currentTextID() {
    return results.currentTextID();
  }

  public Text currentText() {
    return currentText;
  }

  public String currentAnchor() throws DatabaseException {
    // FIXME not necessarily very optimal
    // could exploit forward-moving definition of these cursors
    int index = currentWordIndex();
    return index == -1 ? null : blockmarks
        .anchorOfIndex(currentTextID(), index);
  }

  private synchronized void closeBody() {
    if (body != null) {
      try {
        body.close();
      } catch (IOException e) {
      }
      body = null;
    }
  }

  public synchronized String nextArea() {
    try {
      long target = results.currentOffset();
      if (target == -1) {
        closeBody();
        return null;
      }

      if (body == null) {
        if (currentText == null)
          return null;
        try {
          body = new BufferedInputStream(currentText.bodyForFragment());
          bodyPosition = 0;
        } catch (NoSuchRowPoemException e) {
          // This should not happen in a coherent database, but
          // the one on my machine isn't coherent
          return null;
        }
      }

      if (bodyPosition > target)
        throw new IOException("tried to skip backwards");

      // FIXME BufferedInputStream seems genuinely not to work well
      // wrt to skipping ... so we use skip for the first move and dummy
      // reads thereafter

      if (bodyPosition == 0)
        bodyPosition += body.skip(target - bodyPosition);
      else {
        long read;
        do {
          bodyPosition += (read = body.read(skipBuffer, 0, (int) Math.min(
              target - bodyPosition, skipBuffer.length)));
        } while (read > 0);
      }

      if (bodyPosition < target)
        throw new IOException("skipped only to " + bodyPosition + " not "
            + target);

      StringBuffer buf = new StringBuffer();
      IndexTokenizer words = new IndexTokenizer(body, true);
      int baseIndex = Math.max(results.currentWordIndex()
          - IndexOther.contextWordsBeforeHit, 0);
      // int lastIndex = -1;
      do {
        int limitIndex = results.currentWordIndex() + results.hitWordsCount()
            + contextWordsAfterHit - baseIndex;

        while (words.hasMoreWords() && words.wordIndex() < limitIndex) {
          String word = words.nextWord();
          if (!word.startsWith("#")) {
            if (words.hadBreak() && buf.length() > 0)
              buf.append(' ');
            buf.append(word);
          }
        }

        // lastIndex = results.currentWordIndex() + results.hitWordsCount();

        results.skipToNextHit();
      } while (results.currentWordIndex() != -1
          && (results.currentWordIndex() - baseIndex) - words.wordIndex() < wordsGapBetweenAreas);

      bodyPosition += words.bytesReadFromUnderlyingStream();

      return buf.toString();
    } catch (IOException e) {
      System.err.println(e);

      try {
        body.close();
      } catch (Exception ee) {
      }

      return null;
    }
  }

  public static void main(String[] args) throws Exception {
    IndexOther fti = new IndexOther(new File("/tmp"));

    SearchResults rawResults;
    if (args[0].charAt(0) == '_') {
      args[0] = args[0].substring(1);
      rawResults = fti.groupSearchResults(args);
      System.out.println("phrase");
    } else if (args[0].equals("-query")) {
      rawResults = fti.querySearchResults(args[1]);
    } else {
      rawResults = fti.andSearchResults(args);
      System.out.println("and");
    }

    ContextSearchResults results = new ContextSearchResults(new Library() {
      public Text text(final long textID) {
        return new Text() {
          public InputStream body() throws IOException {
            return new FileInputStream(
                "/usr/doc/HOWTO/other-formats/html/CDROM-HOWTO-3.html");
          }

          public InputStream bodyForFragment() throws IOException {
            return body();
          }

          public long ftiTextID() {
            return textID;
          }
        };
      }
    }, rawResults, fti);

    for (results.gotoText(0); results.currentTextID() != -1;
    // FIXME this isn't very clever! (maybe?)
    results.gotoText(results.currentTextID() + 1)) {
      System.out.println("== " + results.currentTextID());
      for (;;) {
        String anchor = results.currentAnchor();
        String area = results.nextArea();
        if (area == null)
          break;
        System.out.println("-- A HREF=http://doc#" + anchor);
        System.out.println(area);
      }
    }
  }

  public void close() {
    results.close();
    blockmarks.close();
    closeBody();
  }
}