| 1 | |
package org.paneris.bibliomania.fti; |
| 2 | |
|
| 3 | |
import org.melati.util.BugException; |
| 4 | |
import org.melati.poem.util.Order; |
| 5 | |
import org.melati.poem.util.SortUtils; |
| 6 | |
|
| 7 | |
import com.sleepycat.db.DatabaseException; |
| 8 | |
|
| 9 | 0 | public class Score { |
| 10 | |
|
| 11 | 0 | public static class Hit { |
| 12 | |
public double score; |
| 13 | |
public long textID; |
| 14 | |
|
| 15 | 0 | public Hit(SearchResults results) { |
| 16 | 0 | if ((textID = results.currentTextID()) == -1) |
| 17 | 0 | throw new BugException("Tried to score an empty hit."); |
| 18 | |
|
| 19 | 0 | score = 1.; |
| 20 | 0 | int i = results.currentWordIndex(); |
| 21 | |
for (;;) { |
| 22 | 0 | results.skipToNextHit(); |
| 23 | 0 | int j = results.currentWordIndex(); |
| 24 | 0 | if (j == -1) break; |
| 25 | 0 | score += 0.2 + Math.exp((i - j) * (j - i) / 100.); |
| 26 | 0 | i = j; |
| 27 | 0 | } |
| 28 | 0 | } |
| 29 | |
} |
| 30 | |
|
| 31 | 0 | private static final Order highScoreFirst = |
| 32 | 0 | new Order() { |
| 33 | |
public boolean lessOrEqual(Object a, Object b) { |
| 34 | 0 | return ((Hit)a).score >= ((Hit)b).score; |
| 35 | |
} |
| 36 | |
}; |
| 37 | |
|
| 38 | |
|
| 39 | |
|
| 40 | |
|
| 41 | |
|
| 42 | |
public static Hit[] scoredHits( |
| 43 | |
SearchResults r, long startTextID, long limitTextID, int max) |
| 44 | |
throws DatabaseException { |
| 45 | 0 | Hit[] them = new Hit[512]; |
| 46 | |
|
| 47 | 0 | int n = 0; |
| 48 | 0 | for (r.gotoText(startTextID); |
| 49 | 0 | n < max && r.currentTextID() != -1 && r.currentTextID() < limitTextID; |
| 50 | 0 | r.gotoText(r.currentTextID() + 1)) { |
| 51 | 0 | if (n >= them.length) { |
| 52 | 0 | Hit[] old = them; |
| 53 | 0 | them = new Hit[old.length * 2]; |
| 54 | 0 | System.arraycopy(old, 0, them, 0, old.length); |
| 55 | |
} |
| 56 | |
|
| 57 | 0 | them[n++] = new Hit(r); |
| 58 | |
} |
| 59 | |
|
| 60 | 0 | Hit[] sorted = new Hit[n]; |
| 61 | 0 | System.arraycopy(them, 0, sorted, 0, n); |
| 62 | 0 | SortUtils.qsort(highScoreFirst, sorted); |
| 63 | 0 | return sorted; |
| 64 | |
} |
| 65 | |
} |