1 | |
package org.paneris.bibliomania; |
2 | |
|
3 | |
import java.sql.ResultSet; |
4 | |
import java.util.Hashtable; |
5 | |
import java.util.Properties; |
6 | |
import java.util.Vector; |
7 | |
|
8 | |
import org.melati.poem.AccessToken; |
9 | |
import org.melati.poem.PoemTask; |
10 | |
import org.melati.poem.UnexpectedExceptionPoemException; |
11 | |
import org.melati.poem.util.EnumUtils; |
12 | |
import org.paneris.bibliomania.fti.IndexCursor; |
13 | |
|
14 | 0 | public class ShowNotIndexed { |
15 | |
|
16 | |
public static void showNotIndexed(BibliomaniaDatabase db |
17 | |
|
18 | |
) throws Exception { |
19 | 0 | Hashtable texts = new Hashtable(); |
20 | |
|
21 | 0 | System.err.println("Running through index: this will take a LONG time!"); |
22 | |
|
23 | 0 | int n = 1; |
24 | 0 | IndexCursor cursor = db.fti().allEntries(); |
25 | |
try { |
26 | 0 | while (cursor.next()) { |
27 | 0 | texts.put(new Long(cursor.textID()), Boolean.TRUE); |
28 | 0 | if (n % 10000 == 0) |
29 | 0 | System.err.println("... " + n); |
30 | 0 | ++n; |
31 | |
} |
32 | |
} |
33 | |
finally { |
34 | 0 | try { cursor.close(); } catch (Exception e) {} |
35 | 0 | } |
36 | |
|
37 | 0 | System.err.println("Running through chapters in database"); |
38 | |
|
39 | 0 | int currentBook = -1; |
40 | 0 | Vector chaps = new Vector(); |
41 | 0 | String path = null; |
42 | 0 | n = 1; |
43 | 0 | for (ResultSet c = db.sqlQuery( |
44 | |
"SELECT textid, id, path, book FROM chapter ORDER BY textid"); |
45 | 0 | c.next();) { |
46 | 0 | int book = c.getInt(4); |
47 | 0 | if (book != currentBook) { |
48 | 0 | if (chaps.size() != 0) { |
49 | 0 | System.out.println(currentBook + " " + path + " " + |
50 | |
EnumUtils.concatenated(" ", chaps.elements())); |
51 | 0 | chaps.setSize(0); |
52 | |
} |
53 | |
|
54 | 0 | currentBook = book; |
55 | 0 | path = c.getString(3); |
56 | |
} |
57 | |
|
58 | 0 | if (!texts.containsKey(new Long(c.getLong(1)))) { |
59 | 0 | if (chaps.size() < 10) chaps.addElement(new Integer(c.getInt(2))); |
60 | 0 | else if (chaps.size() == 10) chaps.addElement("..."); |
61 | |
} |
62 | |
|
63 | 0 | if (n % 1000 == 0) |
64 | 0 | System.err.println("... " + n); |
65 | 0 | ++n; |
66 | 0 | } |
67 | 0 | } |
68 | |
|
69 | |
public static void main(String[] args) { |
70 | |
|
71 | 0 | Properties conf = new Properties(); |
72 | 0 | conf.put("idOfWord.cacheSize", "0"); |
73 | 0 | conf.put("occurrences.cacheSize", "131072"); |
74 | 0 | conf.put("anchorOfIndex.cacheSize", "0"); |
75 | |
|
76 | 0 | final BibliomaniaDatabase db = new BibliomaniaDatabase(false, true, conf); |
77 | 0 | db.connect("bibliomania","org.melati.poem.dbms.Postgresql", |
78 | |
"jdbc:postgresql:bibliomania", "postgres", "*",8); |
79 | |
|
80 | 0 | db.inSession( |
81 | |
AccessToken.root, |
82 | 0 | new PoemTask() { |
83 | |
public void run() { |
84 | |
try { |
85 | 0 | showNotIndexed(db |
86 | |
|
87 | |
|
88 | |
|
89 | |
|
90 | |
|
91 | |
|
92 | |
); |
93 | |
} |
94 | 0 | catch (Exception e) { |
95 | 0 | throw new UnexpectedExceptionPoemException(e); |
96 | 0 | } |
97 | 0 | } |
98 | |
}); |
99 | 0 | } |
100 | |
} |