1 | |
package org.paneris.bibliomania.pagination; |
2 | |
|
3 | |
import java.io.BufferedReader; |
4 | |
import java.io.BufferedWriter; |
5 | |
import java.io.ByteArrayOutputStream; |
6 | |
import java.io.File; |
7 | |
import java.io.FileInputStream; |
8 | |
import java.io.FileNotFoundException; |
9 | |
import java.io.FileWriter; |
10 | |
import java.io.IOException; |
11 | |
import java.io.InputStream; |
12 | |
import java.io.InputStreamReader; |
13 | |
import java.io.PrintWriter; |
14 | |
import java.io.Reader; |
15 | |
import java.io.StreamTokenizer; |
16 | |
import java.io.StringWriter; |
17 | |
import java.io.Writer; |
18 | |
import java.net.URL; |
19 | |
import java.util.Enumeration; |
20 | |
import java.util.Hashtable; |
21 | |
import java.util.Stack; |
22 | |
import java.util.Vector; |
23 | |
|
24 | |
import javax.swing.text.AttributeSet; |
25 | |
import javax.swing.text.BadLocationException; |
26 | |
import javax.swing.text.MutableAttributeSet; |
27 | |
import javax.swing.text.html.HTML; |
28 | |
import javax.swing.text.html.HTMLEditorKit; |
29 | |
|
30 | |
import org.melati.Melati; |
31 | |
import org.melati.MelatiConfig; |
32 | |
import org.melati.template.webmacro.MelatiFastWriter; |
33 | |
import org.paneris.bibliomania.util.FictionalNotifiableParserCallback; |
34 | |
import org.paneris.bibliomania.util.FictionalNotifyingDocumentParser; |
35 | |
import org.melati.util.FtellPrintWriter; |
36 | |
import org.melati.util.FtellWriter; |
37 | |
import org.melati.util.HTMLUtils; |
38 | |
import org.melati.util.IoUtils; |
39 | |
import org.melati.util.MelatiRuntimeException; |
40 | |
import org.melati.poem.util.StringUtils; |
41 | |
import org.melati.util.UnexpectedExceptionException; |
42 | |
import org.paneris.bibliomania.fti.DbUtils; |
43 | |
import org.paneris.bibliomania.fti.FivePacker; |
44 | |
import org.paneris.bibliomania.fti.FourPacker; |
45 | |
import org.paneris.bibliomania.fti.Text; |
46 | |
import org.webmacro.Context; |
47 | |
import org.webmacro.WM; |
48 | |
import org.webmacro.WebMacro; |
49 | |
import org.webmacro.engine.FileTemplate; |
50 | |
|
51 | |
import com.sleepycat.db.Database; |
52 | |
import com.sleepycat.db.DatabaseException; |
53 | |
import com.sleepycat.db.DatabaseEntry; |
54 | |
import com.sleepycat.db.LockMode; |
55 | |
import com.sleepycat.db.OperationStatus; |
56 | |
|
57 | 9 | public class Pagination { |
58 | |
public static final String pageAnchorPrefix = "__page__"; |
59 | |
|
60 | |
|
61 | |
|
62 | |
|
63 | |
|
64 | 1 | public static final File tempDir = new File("/tmp"); |
65 | |
|
66 | |
public static final String dvi2ttyWithBreakinfo = "dvi2tty_breakinfo"; |
67 | |
|
68 | |
private Database pageBreaksOfTextID; |
69 | |
|
70 | 2 | public Pagination(File dbHome) throws DatabaseException, IOException { |
71 | 2 | pageBreaksOfTextID = DbUtils.openOrCreateBTreeDb(dbHome, "pageBreaksOfTextID", 0); |
72 | |
|
73 | |
|
74 | |
|
75 | |
|
76 | 2 | HTMLUtils.dtdForHTMLParser(); |
77 | 2 | } |
78 | |
|
79 | |
public class PageSpan { |
80 | |
public int number; |
81 | |
|
82 | |
public int startOffset, endOffset; |
83 | |
|
84 | |
public int totalPages; |
85 | |
|
86 | 2 | public PageSpan(int number, int startOffset, int endOffset, int totalPages) { |
87 | 2 | this.number = number; |
88 | 2 | this.startOffset = startOffset; |
89 | 2 | this.endOffset = endOffset; |
90 | 2 | this.totalPages = totalPages; |
91 | 2 | } |
92 | |
|
93 | |
public String toString() { |
94 | 0 | return number + "/" + totalPages + ":" + startOffset + "-" + endOffset; |
95 | |
} |
96 | |
} |
97 | |
|
98 | |
public static class TagInTexException extends RuntimeException { |
99 | |
private static final long serialVersionUID = 1L; |
100 | |
|
101 | |
public HTML.Tag tag; |
102 | |
|
103 | |
public AttributeSet atts; |
104 | |
|
105 | |
public TagInTexException(HTML.Tag tag, AttributeSet atts) { |
106 | |
this.tag = tag; |
107 | |
this.atts = atts; |
108 | |
} |
109 | |
|
110 | |
public String getMessage() { |
111 | |
return "The element `" |
112 | |
+ (atts == null ? tag.toString() : HTMLUtils.stringOf(tag, atts)) |
113 | |
+ "' occurs inside <span class=tex>. " |
114 | |
+ "If you really want to put < or > in the literal TeX code, you " |
115 | |
+ "should use < and >."; |
116 | |
} |
117 | |
} |
118 | |
|
119 | 2 | public static class FootnoteSequence { |
120 | 2 | public int lastNumber = 0; |
121 | |
|
122 | 2 | private int spansOnStack = 0; |
123 | |
|
124 | 2 | public static class NestingException extends RuntimeException { |
125 | |
private static final long serialVersionUID = 1L; |
126 | |
|
127 | |
public HTML.Tag tag; |
128 | |
|
129 | |
public AttributeSet atts; |
130 | |
|
131 | |
public NestingException(HTML.Tag tag, AttributeSet atts) { |
132 | |
this.tag = tag; |
133 | |
this.atts = atts; |
134 | |
} |
135 | |
|
136 | |
public String getMessage() { |
137 | |
return "The element " + HTMLUtils.stringOf(tag, atts) + " " |
138 | |
+ "constitutes a footnote-within-a-footnote, which is not allowed"; |
139 | |
} |
140 | |
} |
141 | |
|
142 | |
public int number(HTML.Tag tag, AttributeSet atts) { |
143 | 180 | boolean isSpan = tag.toString().equalsIgnoreCase("span"); |
144 | 180 | boolean isFootnote = isSpan |
145 | |
&& atts.isDefined(HTML.Attribute.CLASS) |
146 | |
&& atts.getAttribute(HTML.Attribute.CLASS).toString() |
147 | |
.equalsIgnoreCase("footnote"); |
148 | |
|
149 | 180 | if (spansOnStack > 0) { |
150 | 0 | if (isSpan) |
151 | 0 | ++spansOnStack; |
152 | |
|
153 | 0 | if (isFootnote) |
154 | 0 | throw new NestingException(tag, atts); |
155 | |
} else { |
156 | 180 | if (isFootnote) { |
157 | 0 | ++spansOnStack; |
158 | 0 | if (atts.isDefined("number")) { |
159 | |
try { |
160 | 0 | lastNumber = Integer.parseInt(atts.getAttribute("number") |
161 | |
.toString()); |
162 | 0 | } catch (Exception e) { |
163 | 0 | ++lastNumber; |
164 | 0 | } |
165 | |
} else |
166 | 0 | ++lastNumber; |
167 | |
|
168 | 0 | return lastNumber; |
169 | |
} |
170 | |
} |
171 | |
|
172 | 180 | return -1; |
173 | |
} |
174 | |
|
175 | |
public boolean inFootnote() { |
176 | 231 | return spansOnStack > 0; |
177 | |
} |
178 | |
|
179 | |
public boolean footnoteEnded(HTML.Tag tag) { |
180 | 180 | if (spansOnStack != 0 && tag.toString().equalsIgnoreCase("span") |
181 | |
&& --spansOnStack == 0) { |
182 | 0 | return true; |
183 | |
} else |
184 | 180 | return false; |
185 | |
} |
186 | |
} |
187 | |
|
188 | 9 | public class PageFinder { |
189 | 9 | private DatabaseEntry textID = DbUtils.userMemDatabaseEntry(5); |
190 | |
|
191 | 9 | private DatabaseEntry pageBreaks = DbUtils.userMemDatabaseEntry(1000); |
192 | |
|
193 | 9 | private long theTextID = -1; |
194 | |
|
195 | 9 | private byte[] thePageBreaks = null; |
196 | |
|
197 | |
private synchronized byte[] pageBreaks(long textIdP) throws DatabaseException { |
198 | 9 | if (theTextID != textIdP) { |
199 | 9 | FivePacker.set_(this.textID.getData(), 0, textIdP); |
200 | 9 | theTextID = textIdP; |
201 | 9 | if (pageBreaksOfTextID.get(null, this.textID, pageBreaks, LockMode.DEFAULT) == OperationStatus.SUCCESS) { |
202 | 9 | thePageBreaks = new byte[pageBreaks.getSize() + 4]; |
203 | 9 | System.arraycopy(pageBreaks.getData(), 0, thePageBreaks, 4, |
204 | |
thePageBreaks.length - 4); |
205 | |
} else |
206 | 0 | thePageBreaks = null; |
207 | |
} |
208 | |
|
209 | 9 | return thePageBreaks; |
210 | |
} |
211 | |
|
212 | |
public int totalPages(long textIdP) throws DatabaseException { |
213 | 7 | byte[] pageBreaksL = pageBreaks(textIdP); |
214 | 7 | return pageBreaksL == null ? 0 : pageBreaksL.length / 4 - 1; |
215 | |
} |
216 | |
|
217 | |
public PageSpan pageOfNumber(long textIdP, int number) throws DatabaseException { |
218 | 2 | byte[] pageBreaksL = pageBreaks(textIdP); |
219 | 2 | if (pageBreaksL == null || pageBreaksL.length < (number + 2) * 4) |
220 | 0 | return null; |
221 | |
else |
222 | 2 | return new PageSpan(number, |
223 | |
FourPacker.number_(pageBreaksL, number * 4), FourPacker.number_( |
224 | |
pageBreaksL, (number + 1) * 4), pageBreaksL.length / 4 - 1); |
225 | |
} |
226 | |
|
227 | |
public PageSpan pageOfOffset(long textIdP, int offset) throws DatabaseException { |
228 | 0 | byte[] pageBreaksL = pageBreaks(textIdP); |
229 | |
|
230 | 0 | if (pageBreaksL == null || pageBreaksL.length == 0) |
231 | 0 | return null; |
232 | |
|
233 | 0 | int lastBreakOffset = FourPacker.number_(pageBreaksL, |
234 | |
pageBreaksL.length - 4); |
235 | 0 | if (offset >= lastBreakOffset) |
236 | 0 | return null; |
237 | |
|
238 | 0 | int totalPages = pageBreaksL.length / 4 - 1; |
239 | |
|
240 | 0 | int p = (int)((long)offset * pageBreaksL.length / lastBreakOffset); |
241 | 0 | if ((p + 1) * 4 >= pageBreaksL.length) |
242 | 0 | p = (pageBreaksL.length / 4) - 1; |
243 | |
|
244 | 0 | int offP = FourPacker.number_(pageBreaksL, p * 4); |
245 | 0 | if (offP == offset) |
246 | 0 | return (p + 2) * 4 >= pageBreaksL.length ? null : new PageSpan(p, offP, |
247 | |
FourPacker.number_(pageBreaksL, (p + 1) * 4), totalPages); |
248 | 0 | else if (offP < offset) { |
249 | |
for (;;) { |
250 | 0 | int offP1 = FourPacker.number_(pageBreaksL, (p + 1) * 4); |
251 | 0 | if (offP1 > offset) |
252 | 0 | return new PageSpan(p, offP, offP1, totalPages); |
253 | 0 | ++p; |
254 | 0 | offP = offP1; |
255 | 0 | } |
256 | |
} else { |
257 | |
int offP1; |
258 | |
do { |
259 | 0 | --p; |
260 | 0 | if (p < 0) |
261 | 0 | return new PageSpan(0, 0, offP, totalPages); |
262 | 0 | offP1 = offP; |
263 | 0 | offP = FourPacker.number_(pageBreaksL, p * 4); |
264 | 0 | } while (offP > offset); |
265 | 0 | return new PageSpan(p, offP, offP1, totalPages); |
266 | |
} |
267 | |
} |
268 | |
} |
269 | |
|
270 | |
private static class TexGeneratingCallback extends |
271 | |
HTMLEditorKit.ParserCallback { |
272 | |
|
273 | |
private static final int NONE = 0, SPACE = 1, NOSPACE = 2; |
274 | |
|
275 | 1 | private int wordBreak = SPACE; |
276 | |
|
277 | |
private FtellPrintWriter tex; |
278 | |
|
279 | 1 | private int pos = 0; |
280 | |
|
281 | |
|
282 | |
|
283 | 1 | private FootnoteSequence footnoteSequence = new FootnoteSequence(); |
284 | |
|
285 | 1 | private boolean passthrough = false; |
286 | |
|
287 | 1 | private long lastTexLineBreak = 0; |
288 | |
|
289 | 1 | public TexGeneratingCallback(FtellPrintWriter tex) { |
290 | 1 | this.tex = tex; |
291 | 1 | } |
292 | |
|
293 | |
private void writeTexSpace() { |
294 | 196 | long here = tex.ftell(); |
295 | 196 | if (here - lastTexLineBreak > 1000) { |
296 | 2 | tex.write('\n'); |
297 | 2 | lastTexLineBreak = here; |
298 | |
} else |
299 | 194 | tex.write(' '); |
300 | 196 | } |
301 | |
|
302 | |
public void handleText(char[] text, int _pos) { |
303 | 51 | if (passthrough) { |
304 | 0 | tex.write("in passthough:"); |
305 | 0 | tex.write(text); |
306 | 0 | tex.write(":"); |
307 | 51 | } else if (!footnoteSequence.inFootnote()) { |
308 | 51 | tex.write('{'); |
309 | 850 | for (int i = 0; i < text.length; ++i) { |
310 | 799 | char c = text[i]; |
311 | 799 | if (Character.isWhitespace(c)) { |
312 | 63 | writeTexSpace(); |
313 | 63 | wordBreak = SPACE; |
314 | 736 | } else if (c == '-' || c == 173) { |
315 | 0 | tex.write(c); |
316 | 0 | wordBreak = NOSPACE; |
317 | |
} else { |
318 | 736 | if (wordBreak == SPACE) |
319 | 114 | writeTexSpace(); |
320 | |
|
321 | 736 | switch (c) { |
322 | |
case '!': |
323 | |
case '@': |
324 | |
case '*': |
325 | |
case '(': |
326 | |
case ')': |
327 | |
case '.': |
328 | |
case ',': |
329 | |
case '/': |
330 | |
case '?': |
331 | |
case '[': |
332 | |
case ']': |
333 | |
case '|': |
334 | |
case '=': |
335 | |
case '+': |
336 | 60 | tex.write(c); |
337 | 60 | break; |
338 | |
case 151: |
339 | 0 | tex.write("---"); |
340 | 0 | break; |
341 | |
case 145: |
342 | 0 | tex.write('`'); |
343 | 0 | break; |
344 | |
case 146: |
345 | 0 | tex.write('\''); |
346 | 0 | break; |
347 | |
case 147: |
348 | 0 | tex.write("``"); |
349 | 0 | break; |
350 | |
case 148: |
351 | 0 | tex.write("''"); |
352 | 0 | break; |
353 | |
default: |
354 | 676 | if ('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c |
355 | |
&& c <= '9') |
356 | 657 | tex.write(c); |
357 | 19 | else if (32 <= c && c < 128) { |
358 | 19 | tex.write("\\char"); |
359 | 19 | tex.write(Integer.toString(c)); |
360 | 19 | writeTexSpace(); |
361 | |
} |
362 | |
} |
363 | |
|
364 | 736 | if (wordBreak != NONE) { |
365 | 114 | tex.write("\\special{.="); |
366 | 114 | tex.write("" + (pos + i)); |
367 | 114 | tex.write("}"); |
368 | 114 | wordBreak = NONE; |
369 | |
} |
370 | |
} |
371 | |
} |
372 | 51 | tex.write('}'); |
373 | |
} |
374 | 0 | else tex.write("Seem to be in footnote:" + text); |
375 | 51 | } |
376 | |
|
377 | |
public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet a, int posP) { |
378 | 0 | if (passthrough) |
379 | 0 | throw new TagInTexException(tag, a); |
380 | |
|
381 | 0 | if (!footnoteSequence.inFootnote()) |
382 | 0 | this.pos = posP; |
383 | 0 | } |
384 | |
|
385 | |
private static class TagTranslation { |
386 | |
String begin, end; |
387 | |
|
388 | 10 | TagTranslation(String begin, String end) { |
389 | 10 | this.begin = begin; |
390 | 10 | this.end = end; |
391 | 10 | } |
392 | |
|
393 | |
public void begin(PrintWriter tex, HTML.Tag tag, MutableAttributeSet a) { |
394 | 35 | tex.write(begin); |
395 | 35 | } |
396 | |
|
397 | |
public void end(PrintWriter tex, HTML.Tag tag) { |
398 | 35 | tex.write(end); |
399 | 35 | } |
400 | |
} |
401 | |
|
402 | 1 | private static final Hashtable<HTML.Tag, TagTranslation> tagTranslations = new Hashtable<HTML.Tag, TagTranslation>(); |
403 | |
|
404 | |
static { |
405 | 1 | tagTranslations.put(HTML.Tag.P, new TagTranslation("\n\n", "")); |
406 | 1 | tagTranslations.put(HTML.Tag.H1, new TagTranslation("\\H{I}{", "}")); |
407 | 1 | tagTranslations.put(HTML.Tag.H2, new TagTranslation("\\H{II}{", "}")); |
408 | 1 | tagTranslations.put(HTML.Tag.H3, new TagTranslation("\\H{III}{", "}")); |
409 | 1 | tagTranslations.put(HTML.Tag.H4, new TagTranslation("\\H{IIII}{", "}")); |
410 | 1 | TagTranslation i = new TagTranslation("{\\it ", "}"); |
411 | 1 | tagTranslations.put(HTML.Tag.I, i); |
412 | 1 | tagTranslations.put(HTML.Tag.EM, i); |
413 | 1 | TagTranslation b = new TagTranslation("{\\bf ", "}"); |
414 | 1 | tagTranslations.put(HTML.Tag.B, b); |
415 | 1 | tagTranslations.put(HTML.Tag.STRONG, b); |
416 | 1 | tagTranslations.put(HTML.Tag.OL, new TagTranslation("\\orderedlist ", |
417 | |
"\\endorderedlist ")); |
418 | 1 | tagTranslations.put(HTML.Tag.UL, new TagTranslation("\\unorderedlist ", |
419 | |
"\\endunorderedlist ")); |
420 | 1 | tagTranslations.put(HTML.Tag.LI, new TagTranslation("\\li ", "")); |
421 | 1 | } |
422 | |
|
423 | |
public void handleStartTag(HTML.Tag tag, MutableAttributeSet a, int posP) { |
424 | |
|
425 | 90 | if (passthrough) |
426 | 0 | throw new TagInTexException(tag, a); |
427 | |
|
428 | 90 | if (tag.toString().equalsIgnoreCase("span") |
429 | |
&& a.isDefined(HTML.Attribute.CLASS) |
430 | |
&& a.getAttribute(HTML.Attribute.CLASS).toString().equalsIgnoreCase( |
431 | |
"tex")) { |
432 | 0 | passthrough = true; |
433 | |
} else { |
434 | 90 | int footnum = footnoteSequence.number(tag, a); |
435 | 90 | if (footnum != -1) { |
436 | 0 | tex.write("\\footnotenumber{" + footnum + "}"); |
437 | 0 | this.pos = posP; |
438 | 90 | } else if (!footnoteSequence.inFootnote()) { |
439 | 90 | TagTranslation translation = (TagTranslation)tagTranslations.get(tag); |
440 | 90 | if (translation != null) |
441 | 35 | translation.begin(tex, tag, a); |
442 | |
|
443 | 90 | if (tag.breaksFlow()) |
444 | 90 | wordBreak = SPACE; |
445 | |
|
446 | 90 | this.pos = posP; |
447 | |
} |
448 | |
} |
449 | 90 | } |
450 | |
|
451 | |
public void handleEndTag(HTML.Tag tag, int posL) { |
452 | 90 | if (passthrough) { |
453 | 0 | if (tag.toString().equalsIgnoreCase("span")) |
454 | 0 | passthrough = false; |
455 | |
else |
456 | 0 | throw new TagInTexException(tag, null); |
457 | 90 | } else if (footnoteSequence.footnoteEnded(tag)) { |
458 | 0 | wordBreak = NOSPACE; |
459 | 0 | this.pos = posL; |
460 | 90 | } else if (!footnoteSequence.inFootnote()) { |
461 | 90 | TagTranslation translation = (TagTranslation)tagTranslations.get(tag); |
462 | 90 | if (translation != null) |
463 | 35 | translation.end(tex, tag); |
464 | |
|
465 | 90 | if (tag.breaksFlow()) |
466 | 90 | wordBreak = SPACE; |
467 | |
|
468 | 90 | this.pos = posL; |
469 | |
} |
470 | 90 | } |
471 | |
|
472 | |
public void handleError(String msg, int posP) { |
473 | 34 | if (msg.startsWith("start") || msg.startsWith("end") |
474 | |
|| msg.startsWith("unmatched") || msg.startsWith("invalid")) { |
475 | |
} else |
476 | 0 | System.err.println("Bibliomania pagination: " + posP + ": " + msg); |
477 | 34 | } |
478 | |
|
479 | |
public void flush() throws BadLocationException { |
480 | |
|
481 | 0 | wordBreak = SPACE; |
482 | 0 | } |
483 | |
} |
484 | |
|
485 | |
private static final String defaultTexHeaderName = "header.tex"; |
486 | |
|
487 | |
public static class DefaultTexHeaderNotFoundException extends |
488 | |
MelatiRuntimeException { |
489 | |
|
490 | |
private static final long serialVersionUID = 1L; |
491 | |
|
492 | |
public DefaultTexHeaderNotFoundException(Exception problem) { |
493 | |
super(problem); |
494 | |
} |
495 | |
|
496 | |
public String getMessage() { |
497 | |
return "Strangely, a problem arose opening the default TeX header for " |
498 | |
+ "pagination; it ought to be in the software source tree as `" |
499 | |
+ defaultTexHeaderName + "'!?\n" + subException.getMessage(); |
500 | |
} |
501 | |
} |
502 | |
|
503 | |
public String defaultTexHeader() { |
504 | |
try { |
505 | 0 | URL it = getClass().getResource(defaultTexHeaderName); |
506 | 0 | if (it == null) |
507 | 0 | throw new FileNotFoundException("Resource `" + defaultTexHeaderName |
508 | |
+ "' not found by " + getClass()); |
509 | 0 | return new String(IoUtils.slurp(new InputStreamReader(it.openStream()), |
510 | |
5000)); |
511 | 0 | } catch (Exception e) { |
512 | 0 | throw new DefaultTexHeaderNotFoundException(e); |
513 | |
} |
514 | |
} |
515 | |
|
516 | |
private void htmlToTEX(Reader html, String texHeader, Writer tex) |
517 | |
throws IOException { |
518 | 1 | tex.write(texHeader == null ? defaultTexHeader() : texHeader); |
519 | 1 | tex.write('\n'); |
520 | |
|
521 | 1 | FtellPrintWriter texP = new FtellPrintWriter(tex); |
522 | |
|
523 | 1 | newDocumentParser().parse(html, new TexGeneratingCallback(texP), |
524 | |
true); |
525 | |
|
526 | 1 | if (texP.checkError()) |
527 | 0 | throw new IOException("Error writing to TeX buffer"); |
528 | 1 | tex.write("\n\\bye\n"); |
529 | 1 | } |
530 | |
|
531 | |
|
532 | |
|
533 | |
|
534 | |
public static FictionalNotifyingDocumentParser newDocumentParser() { |
535 | 2 | return new FictionalNotifyingDocumentParser(HTMLUtils.dtdForHTMLParser()); |
536 | |
} |
537 | |
|
538 | |
public static class TexFailedException extends IOException { |
539 | |
private static final long serialVersionUID = 1L; |
540 | |
|
541 | |
public String[] command; |
542 | |
|
543 | |
public File texFile; |
544 | |
|
545 | |
public TexFailedException(String[] command, File texFile) { |
546 | |
this.command = command; |
547 | |
this.texFile = texFile; |
548 | |
} |
549 | |
|
550 | |
public String getLogFile() { |
551 | |
String p = texFile.getPath(); |
552 | |
return (p.endsWith(".tex") ? p.substring(0, p.length() - 4) : p) + ".log"; |
553 | |
} |
554 | |
|
555 | |
public String getMessage() { |
556 | |
return "Nonzero exit status from [[" |
557 | |
+ StringUtils.concatenated(" ", command) + "]]; " |
558 | |
+ "look for errors in " + getLogFile(); |
559 | |
} |
560 | |
} |
561 | |
|
562 | |
public void texToDVI(File texTemp) throws IOException, InterruptedException { |
563 | 1 | String[] command = { |
564 | |
"/bin/sh", |
565 | |
"-c", |
566 | |
"cd " + tempDir + " && " + "tex " + texTemp |
567 | |
+ " </dev/null >/dev/null 2>&1" }; |
568 | |
|
569 | 1 | if (Runtime.getRuntime().exec(command).waitFor() != 0) |
570 | 0 | throw new TexFailedException(command, texTemp); |
571 | 1 | } |
572 | |
|
573 | |
public static class BreakInfoOfDVI { |
574 | |
public static final int LINE = 0, PAGE = 1; |
575 | |
|
576 | |
private Process dvi2tty; |
577 | |
|
578 | |
private StreamTokenizer tokens; |
579 | |
|
580 | 1 | private int kind = -1, position = -1; |
581 | |
|
582 | 1 | public BreakInfoOfDVI(File dviTemp) throws IOException { |
583 | 1 | String[] command = { |
584 | |
"/bin/sh", |
585 | |
"-c", |
586 | |
dvi2ttyWithBreakinfo + " -b " + dviTemp.getPath() + "| " |
587 | |
+ "sort -n -k2" }; |
588 | 1 | dvi2tty = Runtime.getRuntime().exec(command); |
589 | 1 | tokens = new StreamTokenizer(new BufferedReader(new InputStreamReader( |
590 | |
dvi2tty.getInputStream()))); |
591 | 1 | if (tokens.nextToken() == StreamTokenizer.TT_EOF) |
592 | 0 | throw new IOException("[[" + StringUtils.concatenated(" ", command) |
593 | |
+ "]] produced no output"); |
594 | 1 | tokens.pushBack(); |
595 | 1 | } |
596 | |
|
597 | |
public boolean next() throws IOException { |
598 | 35 | if (tokens.nextToken() == StreamTokenizer.TT_EOF) { |
599 | 1 | close(); |
600 | 1 | kind = -1; |
601 | 1 | position = -1; |
602 | 1 | return false; |
603 | |
} |
604 | |
|
605 | 34 | if (tokens.ttype != StreamTokenizer.TT_WORD) |
606 | 0 | throw new IOException("Unexpected nonword " + tokens + " from " |
607 | |
+ dvi2ttyWithBreakinfo); |
608 | 34 | if (tokens.sval.equals("line")) |
609 | 33 | kind = LINE; |
610 | 1 | else if (tokens.sval.equals("page")) |
611 | 1 | kind = PAGE; |
612 | |
else |
613 | 0 | throw new IOException("Unexpected " + tokens + " from " |
614 | |
+ dvi2ttyWithBreakinfo + ": " + "expected `line' or `page'"); |
615 | |
|
616 | 34 | if (tokens.nextToken() != StreamTokenizer.TT_NUMBER) |
617 | 0 | throw new IOException("Unexpected nonnumber " + tokens + " from " |
618 | |
+ dvi2ttyWithBreakinfo); |
619 | |
|
620 | 34 | position = (int)tokens.nval; |
621 | |
|
622 | 34 | return true; |
623 | |
} |
624 | |
|
625 | |
public int kind() { |
626 | 34 | return kind; |
627 | |
} |
628 | |
|
629 | |
public int position() { |
630 | 1618 | return position; |
631 | |
} |
632 | |
|
633 | |
public void close() { |
634 | 3 | dvi2tty.destroy(); |
635 | 3 | } |
636 | |
|
637 | |
protected void finalize() throws Throwable { |
638 | 1 | close(); |
639 | 1 | } |
640 | |
} |
641 | |
|
642 | |
private class LineFixatingCallback extends FictionalNotifiableParserCallback { |
643 | |
private FtellWriter linedHTML; |
644 | |
|
645 | 1 | private StringWriter footnote = null; |
646 | |
|
647 | |
private Vector pages; |
648 | |
|
649 | |
private BreakInfoOfDVI breaks; |
650 | |
|
651 | 1 | private boolean inWord = false, inLine = true, hadBreak = false; |
652 | |
|
653 | 1 | private int pos = 0; |
654 | |
|
655 | 1 | private Stack openTags = new Stack(); |
656 | |
|
657 | 1 | private Vector pendingStartTags = new Vector(); |
658 | |
|
659 | 1 | private int pageNum = 0; |
660 | |
|
661 | 1 | private FootnoteSequence footnoteSequence = new FootnoteSequence(); |
662 | |
|
663 | |
private org.webmacro.Template footnoteTemplate; |
664 | |
|
665 | |
private WebMacro footnoteWebmacro; |
666 | |
|
667 | |
private String contentEncoding; |
668 | |
|
669 | 1 | private boolean currentIsFictional = false; |
670 | |
|
671 | 1 | private boolean literalTeX = false; |
672 | |
|
673 | 1 | private int footnoteSeqInDoc = 0; |
674 | |
|
675 | |
public LineFixatingCallback(FtellWriter linedHTML, BreakInfoOfDVI breaks, |
676 | |
Vector pages, org.webmacro.Template footnoteTemplate, |
677 | 1 | WebMacro footnoteWebmacro, String contentEncoding) throws IOException { |
678 | 1 | this.linedHTML = linedHTML; |
679 | 1 | this.breaks = breaks; |
680 | 1 | this.pages = pages; |
681 | 1 | this.footnoteTemplate = footnoteTemplate; |
682 | 1 | this.footnoteWebmacro = footnoteWebmacro; |
683 | 1 | this.contentEncoding = contentEncoding; |
684 | 1 | breaks.next(); |
685 | 1 | } |
686 | |
|
687 | |
public void notifyCurrentIsFictional(boolean is) { |
688 | 180 | currentIsFictional = is; |
689 | 180 | } |
690 | |
|
691 | |
|
692 | |
|
693 | |
|
694 | |
|
695 | |
protected boolean getCurrentIsFictional() { |
696 | 0 | return currentIsFictional; |
697 | |
} |
698 | |
|
699 | |
public void checkBreaks(int posP) { |
700 | |
try { |
701 | 821 | while (breaks.position() != -1 && breaks.position() <= posP) { |
702 | 34 | if (breaks.kind() == BreakInfoOfDVI.PAGE) { |
703 | |
|
704 | 2 | for (int e = openTags.size() - 1; e >= 0; --e) { |
705 | 1 | linedHTML.write("</"); |
706 | 1 | linedHTML |
707 | |
.write(((HTMLUtils.TagInstance)openTags.elementAt(e)).tag |
708 | |
.toString()); |
709 | 1 | linedHTML.write(">"); |
710 | |
} |
711 | |
|
712 | 1 | linedHTML |
713 | |
.write("<a name=" + pageAnchorPrefix + ++pageNum + "></a>"); |
714 | |
|
715 | 1 | pages.addElement(new Integer((int)linedHTML.ftell())); |
716 | |
|
717 | 2 | for (int e = 0; e < openTags.size(); ++e) |
718 | 1 | emitTag((HTMLUtils.TagInstance)openTags.elementAt(e)); |
719 | |
|
720 | 1 | emitPendingStartTags(); |
721 | |
} |
722 | |
|
723 | 34 | inLine = false; |
724 | |
|
725 | 34 | breaks.next(); |
726 | |
} |
727 | 0 | } catch (IOException e) { |
728 | 0 | throw new UnexpectedExceptionException(e); |
729 | 787 | } |
730 | 787 | } |
731 | |
|
732 | |
public void handleText(char[] text, int _pos) { |
733 | 51 | if (!literalTeX) |
734 | |
try { |
735 | 51 | if (footnote != null) { |
736 | 0 | for (int i = 0; i < text.length; ++i) { |
737 | 0 | char c = text[i]; |
738 | 0 | if (c < 32 || 128 <= c) |
739 | 0 | footnote.write("&#" + (int)c + ';'); |
740 | |
else |
741 | 0 | footnote.write(c); |
742 | |
} |
743 | |
} else { |
744 | 51 | checkBreaks(pos); |
745 | 51 | emitPendingStartTags(); |
746 | 850 | for (int i = 0; i < text.length; ++i) { |
747 | 799 | char c = text[i]; |
748 | 799 | if (Character.isWhitespace(c)) { |
749 | 63 | if (c == '\n') |
750 | 0 | inLine = false; |
751 | 63 | inWord = false; |
752 | |
} else { |
753 | 736 | checkBreaks(pos + i); |
754 | |
|
755 | 736 | if (!inLine) { |
756 | 1 | if (!hadBreak) |
757 | 0 | linedHTML.write('\n'); |
758 | 1 | inLine = true; |
759 | 735 | } else if (!inWord) |
760 | 63 | linedHTML.write(' '); |
761 | |
|
762 | 736 | if (c < 32 || 128 <= c) |
763 | 0 | linedHTML.write("&#" + (int)c + ';'); |
764 | |
else |
765 | 736 | linedHTML.write(c); |
766 | |
|
767 | 736 | inWord = true; |
768 | 736 | hadBreak = false; |
769 | |
} |
770 | |
} |
771 | |
} |
772 | 0 | } catch (IOException e) { |
773 | 0 | throw new UnexpectedExceptionException(e); |
774 | 51 | } |
775 | 51 | } |
776 | |
|
777 | |
private void emitFootnote() { |
778 | |
try { |
779 | 0 | Context context = footnoteWebmacro.getContext(); |
780 | 0 | context.put("seqInDoc", new Integer(++footnoteSeqInDoc)); |
781 | 0 | context.put("number", new Integer(footnoteSequence.lastNumber)); |
782 | 0 | context.put("text", footnote.toString()); |
783 | |
|
784 | 0 | ByteArrayOutputStream buf = new ByteArrayOutputStream(); |
785 | 0 | MelatiFastWriter fmw = new MelatiFastWriter(footnoteWebmacro |
786 | |
.getBroker(), buf, contentEncoding); |
787 | 0 | Melati m = new Melati(new MelatiConfig(), fmw); |
788 | 0 | context.put("melati", m); |
789 | 0 | footnoteTemplate.write(fmw.getFastWriter().getOutputStream(), context); |
790 | 0 | fmw.flush(); |
791 | |
|
792 | |
|
793 | |
|
794 | |
|
795 | |
|
796 | |
|
797 | |
|
798 | |
|
799 | |
|
800 | 0 | linedHTML.write(buf.toString().trim()); |
801 | 0 | } catch (Exception e) { |
802 | 0 | throw new UnexpectedExceptionException(e); |
803 | |
} finally { |
804 | 0 | footnote = null; |
805 | 0 | } |
806 | 0 | } |
807 | |
|
808 | |
private void emitTag(HTML.Tag tag, AttributeSet attributes) |
809 | |
throws IOException { |
810 | 88 | if (!inWord) { |
811 | 0 | linedHTML.write(' '); |
812 | 0 | inWord = true; |
813 | |
} |
814 | |
|
815 | 88 | if (tag.breaksFlow()) { |
816 | 88 | inLine = true; |
817 | 88 | inWord = true; |
818 | 88 | hadBreak = true; |
819 | |
} |
820 | |
|
821 | 88 | if (tag == HTML.Tag.P || tag == HTML.Tag.LI) |
822 | 35 | linedHTML.write(" " + preStart); |
823 | |
|
824 | |
|
825 | |
|
826 | |
|
827 | |
|
828 | |
|
829 | |
|
830 | |
|
831 | 88 | linedHTML.write('<'); |
832 | 88 | linedHTML.write(tag.toString()); |
833 | 88 | for (Enumeration<?> a = attributes.getAttributeNames(); a.hasMoreElements();) { |
834 | 0 | Object n = a.nextElement(); |
835 | 0 | if (attributes.isDefined(n)) { |
836 | 0 | linedHTML.write(' '); |
837 | 0 | String name = n.toString(); |
838 | 0 | String value = attributes.getAttribute(n).toString(); |
839 | 0 | if (!(tag == HTML.Tag.P && name.equalsIgnoreCase("class") && value |
840 | |
.equalsIgnoreCase("footnote"))) { |
841 | 0 | linedHTML.write(name); |
842 | 0 | linedHTML.write("=\""); |
843 | 0 | linedHTML.write(HTMLUtils.entitied(value)); |
844 | 0 | linedHTML.write('"'); |
845 | |
} |
846 | |
} |
847 | 0 | } |
848 | 88 | linedHTML.write('>'); |
849 | 88 | } |
850 | |
|
851 | |
private void emitTag(HTMLUtils.TagInstance tag) throws IOException { |
852 | 88 | emitTag(tag.tag, tag.attributes); |
853 | 88 | } |
854 | |
|
855 | |
public static final String preStart = ""; |
856 | |
|
857 | |
public static final String preEnd = ""; |
858 | |
|
859 | |
private void emitPendingStartTags() throws IOException { |
860 | 229 | for (int i = 0; i < pendingStartTags.size(); ++i) { |
861 | 87 | HTMLUtils.TagInstance tag = (HTMLUtils.TagInstance)pendingStartTags |
862 | |
.elementAt(i); |
863 | 87 | emitTag(tag); |
864 | 87 | openTags.push(tag); |
865 | |
} |
866 | |
|
867 | 142 | pendingStartTags.clear(); |
868 | 142 | } |
869 | |
|
870 | |
public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attributes, |
871 | |
int posP) { |
872 | 0 | if (literalTeX) |
873 | 0 | throw new TagInTexException(tag, null); |
874 | |
|
875 | 0 | if (!tag.toString().equals("__EndOfLineTag__")) |
876 | |
try { |
877 | 0 | if (footnote != null) |
878 | 0 | HTMLUtils.write(footnote, tag, attributes); |
879 | |
else { |
880 | 0 | emitPendingStartTags(); |
881 | |
|
882 | 0 | emitTag(tag, attributes); |
883 | 0 | this.pos = posP; |
884 | |
} |
885 | 0 | } catch (IOException e) { |
886 | 0 | throw new UnexpectedExceptionException(e); |
887 | 0 | } |
888 | 0 | } |
889 | |
|
890 | |
public void handleStartTag(HTML.Tag tag, MutableAttributeSet attributes, |
891 | |
int posP) { |
892 | 90 | if (literalTeX) |
893 | 0 | throw new TagInTexException(tag, attributes); |
894 | |
|
895 | 90 | if (tag.toString().equalsIgnoreCase("span") |
896 | |
&& attributes.isDefined(HTML.Attribute.CLASS) |
897 | |
&& attributes.getAttribute(HTML.Attribute.CLASS).toString() |
898 | |
.equalsIgnoreCase("tex")) { |
899 | 0 | literalTeX = true; |
900 | |
} else { |
901 | 90 | int footnoteNumber = footnoteSequence.number(tag, attributes); |
902 | 90 | if (footnoteNumber != -1) { |
903 | 0 | footnote = new StringWriter(); |
904 | 0 | this.pos = posP; |
905 | 90 | } else if (footnote != null) { |
906 | |
try { |
907 | 0 | HTMLUtils.write(footnote, tag, attributes); |
908 | 0 | } catch (IOException e) { |
909 | 0 | throw new UnexpectedExceptionException(e); |
910 | 0 | } |
911 | |
} else { |
912 | 90 | if (tag != HTML.Tag.HTML && tag != HTML.Tag.HEAD |
913 | |
&& tag != HTML.Tag.BODY) |
914 | 87 | pendingStartTags.addElement(new HTMLUtils.TagInstance(tag, |
915 | |
attributes.copyAttributes())); |
916 | 90 | this.pos = posP; |
917 | |
} |
918 | |
} |
919 | 90 | } |
920 | |
|
921 | |
public void handleEndTag(HTML.Tag tag, int posP) { |
922 | 90 | if (literalTeX) { |
923 | 0 | if (tag.toString().equalsIgnoreCase("span")) |
924 | 0 | literalTeX = false; |
925 | |
else |
926 | 0 | throw new TagInTexException(tag, null); |
927 | |
} else |
928 | |
try { |
929 | 90 | if (footnoteSequence.footnoteEnded(tag)) { |
930 | 0 | emitFootnote(); |
931 | 0 | this.pos = posP; |
932 | 90 | } else if (footnote != null) |
933 | 0 | footnote.write("</" + tag + ">"); |
934 | |
else { |
935 | 90 | emitPendingStartTags(); |
936 | |
|
937 | 90 | if (tag.breaksFlow()) { |
938 | 90 | inLine = false; |
939 | 90 | inWord = true; |
940 | 90 | hadBreak = true; |
941 | |
} |
942 | |
|
943 | 90 | if (tag != HTML.Tag.HTML && tag != HTML.Tag.HEAD |
944 | |
&& tag != HTML.Tag.BODY) { |
945 | 87 | linedHTML.write("</" + tag + ">"); |
946 | |
|
947 | 87 | if (tag == HTML.Tag.P || tag == HTML.Tag.LI) |
948 | 35 | linedHTML.write(preEnd + " \n"); |
949 | |
|
950 | |
|
951 | |
|
952 | |
while (!openTags.empty() |
953 | 87 | && ((HTMLUtils.TagInstance)openTags.pop()).tag != tag) |
954 | 0 | ; |
955 | |
} |
956 | |
|
957 | 90 | this.pos = posP; |
958 | |
} |
959 | 0 | } catch (IOException e) { |
960 | 0 | throw new UnexpectedExceptionException(e); |
961 | 90 | } |
962 | 90 | } |
963 | |
|
964 | |
public void handleError(String msg, int posP) { |
965 | 34 | if (msg.startsWith("start") || msg.startsWith("end") |
966 | |
|| msg.startsWith("unmatched") || msg.startsWith("invalid")) { |
967 | |
} else |
968 | 0 | System.err.println("Bibliomania pagination: " + posP + ": " + msg); |
969 | 34 | } |
970 | |
} |
971 | |
|
972 | |
private String tempPrefix() { |
973 | 1 | return getClass().getName(); |
974 | |
} |
975 | |
|
976 | |
public boolean isTempFile(File file) { |
977 | 0 | return file.getPath().startsWith(new File(tempDir, tempPrefix()).getPath()); |
978 | |
} |
979 | |
|
980 | |
public void paginate(Text text, String texHeader, Writer linedHTML, |
981 | |
org.webmacro.Template footnoteTemplate, WebMacro footnoteWebmacro, |
982 | |
String contentEncoding) throws IOException, DatabaseException { |
983 | 1 | Reader html = new InputStreamReader(text.body()); |
984 | 1 | File texTemp = File.createTempFile(tempPrefix(), ".tex", tempDir); |
985 | 1 | String base = texTemp.getPath() |
986 | |
.substring(0, texTemp.getPath().length() - 4); |
987 | 1 | File dviTemp = new File(base + ".dvi"); |
988 | |
|
989 | 1 | Writer toTEXTemp = new BufferedWriter(new FileWriter(texTemp)); |
990 | 1 | BreakInfoOfDVI breaks = null; |
991 | |
try { |
992 | 1 | htmlToTEX(html, texHeader, toTEXTemp); |
993 | 1 | html.close(); |
994 | 1 | toTEXTemp.close(); |
995 | |
|
996 | |
try { |
997 | 1 | texToDVI(texTemp); |
998 | 0 | } catch (InterruptedException e) { |
999 | 0 | throw new IOException("Interrupted while waiting for tex"); |
1000 | 1 | } |
1001 | |
|
1002 | 1 | html = new InputStreamReader(text.body()); |
1003 | 1 | breaks = new BreakInfoOfDVI(dviTemp); |
1004 | |
|
1005 | 1 | Vector pages = new Vector(); |
1006 | |
|
1007 | 1 | FtellWriter linedF = new FtellWriter(linedHTML); |
1008 | |
|
1009 | |
try { |
1010 | 1 | newDocumentParser().parse( |
1011 | |
html, |
1012 | |
new LineFixatingCallback(linedF, breaks, pages, footnoteTemplate, |
1013 | |
footnoteWebmacro, contentEncoding), true); |
1014 | 0 | } catch (UnexpectedExceptionException e) { |
1015 | 0 | if (e.subException instanceof IOException) |
1016 | 0 | throw (IOException)e.subException; |
1017 | |
else |
1018 | 0 | throw e; |
1019 | 1 | } |
1020 | |
|
1021 | 1 | linedF.flush(); |
1022 | |
|
1023 | 1 | pages.addElement(new Integer((int)linedF.ftell())); |
1024 | 1 | DatabaseEntry pageBreaks = DbUtils.userMemDatabaseEntry(pages.size() * 4); |
1025 | 3 | for (int i = 0; i < pages.size(); ++i) |
1026 | 2 | FourPacker.set_(pageBreaks.getData(), i * 4, ((Integer)pages |
1027 | |
.elementAt(i)).intValue()); |
1028 | |
|
1029 | 1 | DatabaseEntry textID = DbUtils.userMemDatabaseEntry(5); |
1030 | 1 | FivePacker.set_(textID.getData(), 0, text.ftiTextID()); |
1031 | |
|
1032 | 1 | pageBreaksOfTextID.put(null, textID, pageBreaks); |
1033 | |
|
1034 | |
|
1035 | |
|
1036 | 1 | if (!new File(tempDir, "bibliomania.Pagination.leaveTempFiles").exists()) { |
1037 | 1 | texTemp.delete(); |
1038 | 1 | dviTemp.delete(); |
1039 | 1 | new File(base + ".log").delete(); |
1040 | |
} |
1041 | |
} finally { |
1042 | 0 | try { |
1043 | 1 | if (breaks != null) |
1044 | 1 | breaks.close(); |
1045 | 0 | } catch (Exception e) { |
1046 | 1 | } |
1047 | |
try { |
1048 | 1 | html.close(); |
1049 | 0 | } catch (Exception e) { |
1050 | 1 | } |
1051 | |
try { |
1052 | 1 | toTEXTemp.close(); |
1053 | 0 | } catch (Exception e) { |
1054 | 1 | } |
1055 | 0 | } |
1056 | 1 | } |
1057 | |
|
1058 | |
public void flush() throws DatabaseException { |
1059 | 1 | pageBreaksOfTextID.sync(); |
1060 | 1 | } |
1061 | |
|
1062 | |
public void close() { |
1063 | |
try { |
1064 | 1 | pageBreaksOfTextID.close(); |
1065 | 0 | } catch (Exception e) { |
1066 | 0 | throw new RuntimeException(); |
1067 | 1 | } |
1068 | 1 | } |
1069 | |
|
1070 | |
protected void finalize() throws Throwable { |
1071 | 1 | close(); |
1072 | 1 | } |
1073 | |
|
1074 | |
public static void main(final String[] args) throws Exception { |
1075 | 0 | WebMacro wm = new WM(); |
1076 | |
|
1077 | 0 | Pagination pagination = new Pagination(new File("/tmp")); |
1078 | |
|
1079 | 0 | if (args[0].equals("-page")) |
1080 | 0 | System.out.println(pagination.new PageFinder().pageOfNumber(Integer |
1081 | |
.parseInt(args[1]), Integer.parseInt(args[2]))); |
1082 | 0 | else if (args[0].equals("-offset")) |
1083 | 0 | System.out.println(pagination.new PageFinder().pageOfOffset(Integer |
1084 | |
.parseInt(args[1]), Integer.parseInt(args[2]))); |
1085 | 0 | else if (args[0].equals("-paginate")) { |
1086 | |
|
1087 | 0 | File templateFile = new File( |
1088 | |
"/dist/Bibliomania/src/org/paneris/bibliomania/html/pagination/footnote.wm"); |
1089 | 0 | org.webmacro.Template fnt = new FileTemplate(wm.getBroker(), |
1090 | |
templateFile, "UTF8"); |
1091 | 0 | fnt.parse(); |
1092 | |
|
1093 | 0 | pagination.paginate(new Text() { |
1094 | |
public InputStream body() throws IOException { |
1095 | 0 | return new FileInputStream(args[1]); |
1096 | |
} |
1097 | |
|
1098 | |
public InputStream bodyForFragment() throws IOException { |
1099 | 0 | return body(); |
1100 | |
} |
1101 | |
|
1102 | |
public long ftiTextID() { |
1103 | 0 | return Long.parseLong(args[2]); |
1104 | |
} |
1105 | |
}, null, new BufferedWriter(new FileWriter("/tmp/lined.html")), fnt, wm, |
1106 | |
"UTF8"); |
1107 | |
} |
1108 | |
|
1109 | 0 | pagination.close(); |
1110 | 0 | } |
1111 | |
} |