| 1 | |
package org.paneris.bibliomania; |
| 2 | |
|
| 3 | |
import java.sql.ResultSet; |
| 4 | |
import java.util.Hashtable; |
| 5 | |
import java.util.Properties; |
| 6 | |
import java.util.Vector; |
| 7 | |
|
| 8 | |
import org.melati.poem.AccessToken; |
| 9 | |
import org.melati.poem.PoemTask; |
| 10 | |
import org.melati.poem.UnexpectedExceptionPoemException; |
| 11 | |
import org.melati.poem.util.EnumUtils; |
| 12 | |
import org.paneris.bibliomania.fti.IndexCursor; |
| 13 | |
|
| 14 | 0 | public class ShowNotIndexed { |
| 15 | |
|
| 16 | |
public static void showNotIndexed(BibliomaniaDatabase db |
| 17 | |
|
| 18 | |
) throws Exception { |
| 19 | 0 | Hashtable texts = new Hashtable(); |
| 20 | |
|
| 21 | 0 | System.err.println("Running through index: this will take a LONG time!"); |
| 22 | |
|
| 23 | 0 | int n = 1; |
| 24 | 0 | IndexCursor cursor = db.fti().allEntries(); |
| 25 | |
try { |
| 26 | 0 | while (cursor.next()) { |
| 27 | 0 | texts.put(new Long(cursor.textID()), Boolean.TRUE); |
| 28 | 0 | if (n % 10000 == 0) |
| 29 | 0 | System.err.println("... " + n); |
| 30 | 0 | ++n; |
| 31 | |
} |
| 32 | |
} |
| 33 | |
finally { |
| 34 | 0 | try { cursor.close(); } catch (Exception e) {} |
| 35 | 0 | } |
| 36 | |
|
| 37 | 0 | System.err.println("Running through chapters in database"); |
| 38 | |
|
| 39 | 0 | int currentBook = -1; |
| 40 | 0 | Vector chaps = new Vector(); |
| 41 | 0 | String path = null; |
| 42 | 0 | n = 1; |
| 43 | 0 | for (ResultSet c = db.sqlQuery( |
| 44 | |
"SELECT textid, id, path, book FROM chapter ORDER BY textid"); |
| 45 | 0 | c.next();) { |
| 46 | 0 | int book = c.getInt(4); |
| 47 | 0 | if (book != currentBook) { |
| 48 | 0 | if (chaps.size() != 0) { |
| 49 | 0 | System.out.println(currentBook + " " + path + " " + |
| 50 | |
EnumUtils.concatenated(" ", chaps.elements())); |
| 51 | 0 | chaps.setSize(0); |
| 52 | |
} |
| 53 | |
|
| 54 | 0 | currentBook = book; |
| 55 | 0 | path = c.getString(3); |
| 56 | |
} |
| 57 | |
|
| 58 | 0 | if (!texts.containsKey(new Long(c.getLong(1)))) { |
| 59 | 0 | if (chaps.size() < 10) chaps.addElement(new Integer(c.getInt(2))); |
| 60 | 0 | else if (chaps.size() == 10) chaps.addElement("..."); |
| 61 | |
} |
| 62 | |
|
| 63 | 0 | if (n % 1000 == 0) |
| 64 | 0 | System.err.println("... " + n); |
| 65 | 0 | ++n; |
| 66 | 0 | } |
| 67 | 0 | } |
| 68 | |
|
| 69 | |
public static void main(String[] args) { |
| 70 | |
|
| 71 | 0 | Properties conf = new Properties(); |
| 72 | 0 | conf.put("idOfWord.cacheSize", "0"); |
| 73 | 0 | conf.put("occurrences.cacheSize", "131072"); |
| 74 | 0 | conf.put("anchorOfIndex.cacheSize", "0"); |
| 75 | |
|
| 76 | 0 | final BibliomaniaDatabase db = new BibliomaniaDatabase(false, true, conf); |
| 77 | 0 | db.connect("bibliomania","org.melati.poem.dbms.Postgresql", |
| 78 | |
"jdbc:postgresql:bibliomania", "postgres", "*",8); |
| 79 | |
|
| 80 | 0 | db.inSession( |
| 81 | |
AccessToken.root, |
| 82 | 0 | new PoemTask() { |
| 83 | |
public void run() { |
| 84 | |
try { |
| 85 | 0 | showNotIndexed(db |
| 86 | |
|
| 87 | |
|
| 88 | |
|
| 89 | |
|
| 90 | |
|
| 91 | |
|
| 92 | |
); |
| 93 | |
} |
| 94 | 0 | catch (Exception e) { |
| 95 | 0 | throw new UnexpectedExceptionPoemException(e); |
| 96 | 0 | } |
| 97 | 0 | } |
| 98 | |
}); |
| 99 | 0 | } |
| 100 | |
} |