View Javadoc

1   package org.paneris.bibliomania.logs;
2   
3   import java.io.BufferedReader;
4   import java.io.File;
5   import java.io.FileReader;
6   import java.io.FileWriter;
7   import java.io.FilenameFilter;
8   import java.io.InputStreamReader;
9   import java.io.PrintWriter;
10  import java.util.Calendar;
11  import java.util.Date;
12  import java.util.GregorianCalendar;
13  
14  import org.melati.poem.AccessToken;
15  import org.melati.poem.NoSuchRowPoemException;
16  import org.melati.poem.PoemTask;
17  import org.melati.poem.UnexpectedExceptionPoemException;
18  import org.melati.poem.util.StringUtils;
19  import org.paneris.bibliomania.BibliomaniaDatabase;
20  
21  /*
22   * This program will analyse the stats generated by the bibliomania.com site.
23   * 
24   * It is assumed to be run from cron at 2am EDT ie a crontab entry of
25   * 0   2 * * *  /usr/local/logs/stats.sh 
26   * where stats.sh runs this and outputs error and reports files.
27   * 
28   * It uses 2 analysis programs - weblaizer and analog.
29   * It expects log files in a directory tree:
30   * 
31   * year/month/
32   *
33   * with each directory containing 1 log file per day.  
34   * It will not do anything with today's file (as it will probably not be complete)
35   * FIXME this means that the last day of every month is missed.
36   *
37   * Output for webalizer is according to the config file bibliomania.conf
38   *
39   * Output for analog is in the file analog.html in the directory for each month,
40   * and the directory for the year.  The annual stats are recomputed every day.
41   *
42   * The stats are translated before being analysed stats to be trnaslated need
43   * to be unzipped first.
44   *
45   * You can optionally supply a single parameter of the form yyyy/mm in order to 
46   * process the stats for the specific month.  
47   * Annual stats are still calculated.
48   */  
49  
50  public class LogAnalysis {
51    
52    public static void main(final String[] args) throws Exception {
53      
54      // get today's date
55      Calendar today = new GregorianCalendar();
56      int year = today.get(Calendar.YEAR);
57      int month = today.get(Calendar.MONTH) + 1;
58      String monthString = month + "";
59      if (month < 10) monthString = "0" + month;
60      int day = today.get(Calendar.DAY_OF_MONTH);
61      String dayString = day + "";
62      if (day < 10) dayString = "0" + day;
63      String todayYearmonth = year + File.separator + monthString;
64      
65      // default 
66      String yearmonth = todayYearmonth;
67      
68      // allow us to run month by month
69      if (args.length > 0) {
70        yearmonth = args[0];
71      }
72      
73      System.out.println("Running Stats for " + yearmonth);
74      System.out.println("Started at: " + new Date());
75      File dir = new File(yearmonth);
76  
77      Translator trans = new Translator();
78  
79      // find files that need translating, translate them,
80      // analize them, and then zip up the output
81      FilenameFilter webalizerFilter = new WebalizerFilter(todayYearmonth, dayString);
82      File[] files = dir.listFiles(webalizerFilter);
83      java.util.Arrays.sort(files);
84      if (files != null) {
85        for (int i=0; i<files.length; i++) {    
86          System.out.println("Translating " + files[i]);
87          File out = trans.doIt(files[i]);
88          String outName = out.toString();
89          out = null;
90          System.out.println("Webalizering " + outName);
91          run("webalizer -c bibliomania.conf " + outName);
92          System.out.println("Gzipping " + files[i]);
93          run("gzip " + files[i]);
94          System.out.println("Gzipping " + outName);
95          run("gzip " + outName);
96        }
97      }    
98      
99      // run the files for this month through analog
100     File[] dirs = new File[1];
101     dirs[0] = dir;
102     analog(dirs, todayYearmonth, dayString, yearmonth);
103     
104     // do the annual stuff.
105     int i = yearmonth.indexOf("/");
106     if (i > 0) {
107       String yeardir = yearmonth.substring(0,i);
108       File updir =  new File(yeardir);
109       files = updir.listFiles();
110       analog(files,todayYearmonth,dayString,yeardir);
111     }
112     
113     System.out.println("Completed at: " + new Date());
114   }
115   
116   
117   /**
118    * Run analog on all translated files in the supplied list of directories.
119    */
120   static void analog(File[] dirs, String yearmonth, String dayString, String destination) throws Exception {
121     FilenameFilter translatedFilter = new TranslatedFilter(yearmonth, dayString);
122     String filenames = "";
123     for (int i=0; i<dirs.length; i++) {    
124       File[] files = dirs[i].listFiles(translatedFilter);
125       if (files != null) {
126         for (int j=0; j<files.length; j++) {    
127           filenames += " " + files[j];
128         }
129       }
130     }
131     System.out.println("Analogging " + filenames);
132     run("analog -G +ganalog.cfg -O" + destination + File.separator + "index.html" + filenames);
133   }   
134   
135   static void run(String command) throws Exception {
136     Process p = Runtime.getRuntime().exec(command);
137     InputStreamReader error = new InputStreamReader(p.getErrorStream());
138     InputStreamReader input = new InputStreamReader(p.getInputStream());
139     boolean ok = true;
140     while (ok) {
141       ok = ((error.read() != -1) || (input.read() != -1));
142     }
143     p.waitFor();
144   }
145 
146 }
147 
148 /**
149  * Select all files that are not for today.
150  */
151 class NotTodayFilter implements FilenameFilter {
152 
153   String yearmonth, day;
154   
155   public NotTodayFilter(String yearmonth, String day) {
156     this.yearmonth = yearmonth;
157     this.day = day;
158   }
159   
160   public boolean accept(File dir, String name) {
161     if (!dir.toString().equals(yearmonth)) return true;
162     if (name.length() > 1) {
163       if (day.equals(name.substring(0,2))) return false;
164     }
165     return true;
166   }
167 }  
168 
169 
170  
171 /**
172  * Select files that have been translated.
173  */
174 class TranslatedFilter extends NotTodayFilter {
175   
176   public TranslatedFilter(String yearmonth, String day) {
177     super(yearmonth, day);
178   }
179 
180   public boolean accept(File dir, String name) {
181     if (name.endsWith("access.log.translated.gz") && super.accept(dir, name)) return true;
182     return false;
183   }
184 }
185 
186 /**
187  *  Select files that need translating and webalizing.
188  */
189 class WebalizerFilter extends NotTodayFilter {
190   
191   public WebalizerFilter(String yearmonth, String day) {
192     super(yearmonth, day);
193   }
194 
195   public boolean accept(File dir, String name) {
196     File translated1 = new File(dir, name + ".translated.gz");
197     File translated2 = new File(dir, name + ".translated");
198     if (!translated1.exists() && !translated2.exists() && name.endsWith("access.log") && super.accept(dir, name)) return true;
199     return false;
200   }
201 }  
202 
203 /**
204  * Translate a log file.
205  */
206 class Translator {
207 
208   File fileoutput;
209   final BibliomaniaDatabase db;
210     
211   public Translator() {
212     db = new BibliomaniaDatabase(false);
213     db.connect("bibliomania", "org.melati.poem.dbms.Postgresql",
214     "jdbc:postgresql:bibliomania", "postgres", "*",4);
215   }
216   
217   public File doIt(final File file) throws Exception {
218     
219     db.inSession(
220     AccessToken.root,       // FIXME
221     new PoemTask() {
222       public void run() {
223         try {
224           doTranslate(db, file);
225         }
226         catch (Exception e) {
227           throw new UnexpectedExceptionPoemException(e);
228         }
229       }
230     }
231     );
232     return fileoutput;
233   }
234 
235   private void doTranslate(BibliomaniaDatabase dbP, File fileinput) throws Exception {
236     System.out.println("Started translation at: " + new Date());
237     fileoutput = new File(fileinput.toString() + ".translated");
238     FileReader filein = new FileReader(fileinput);
239     PrintWriter fileout = new PrintWriter(new FileWriter(fileoutput));
240     BufferedReader in = new BufferedReader(filein);
241     int totalLines = 0;
242     int translatedLines = 0;
243     int failedLines = 0;
244     int failedSectionGroup = 0;
245     int failedSection = 0;
246     int failedAuthor = 0;
247     int failedBook = 0;
248     String line = "";
249     while ((line = in.readLine()) != null) {
250       totalLines++;
251       LogLine ln = new LogLine(dbP,line, totalLines);
252       fileout.println(ln.translate());
253       if (ln.translatedAll == LogLine.TRANSLATED) translatedLines++;
254       if (ln.translatedAll == LogLine.FAILED_TRANSLATED) failedLines++;
255       if (ln.translatedSectionGroup == LogLine.FAILED_TRANSLATED) failedSectionGroup++;
256       if (ln.translatedSection == LogLine.FAILED_TRANSLATED) failedSection++;
257       if (ln.translatedAuthor == LogLine.FAILED_TRANSLATED) failedAuthor++;
258       if (ln.translatedBook == LogLine.FAILED_TRANSLATED) failedBook++;
259     }
260     in.close();
261     fileout.close();
262     System.out.println("Found " + totalLines + " lines, translated " + translatedLines + " lines, failed on "+ failedLines + " lines.");
263     System.out.println("Failed:  SectionGroup " + failedSectionGroup + ", Section " + failedSection + ", Author " + failedAuthor + ", Book " + failedBook);
264   }
265 
266 }
267 
268   class LogLine {
269     
270     static String lookForStart = "GET ";
271     static String lookForEnd = " HTTP"; 
272     static int lookForStartLength = lookForStart.length();
273     
274     static int NOT_TRANSLATED = 0;
275     static int TRANSLATED = 1;
276     static int FAILED_TRANSLATED = 2;
277     
278     int translatedAll = NOT_TRANSLATED;
279     int translatedSectionGroup = NOT_TRANSLATED;
280     int translatedSection = NOT_TRANSLATED;
281     int translatedAuthor = NOT_TRANSLATED;
282     int translatedBook = NOT_TRANSLATED;
283     
284     int lineno;
285     
286     String in;
287     String middle = "";
288     String end = "";
289     BibliomaniaDatabase db;
290     
291     public LogLine(BibliomaniaDatabase db, String in, int lineno) {
292       this.in = in;
293       this.db = db;
294       this.lineno = lineno;
295     }
296     
297     public String translate() {
298       String start = in;
299       String endBit = "";
300       String middleBit = "";
301       int length = in.length();
302       int startPoint = in.indexOf(lookForStart);
303       if (startPoint > -1) {
304         startPoint += lookForStartLength;
305         start = in.substring(0,startPoint);
306         endBit = in.substring(startPoint, length);
307         int middlePoint = in.indexOf(lookForEnd, startPoint);
308         if (middlePoint > -1) {
309           middleBit = in.substring(startPoint,middlePoint);
310           middleBit = lookup(middleBit);
311           endBit = in.substring(middlePoint, length);
312         }
313       }
314       if (translatedSectionGroup == TRANSLATED || 
315           translatedSection == TRANSLATED ||
316           translatedAuthor == TRANSLATED ||
317           translatedBook == TRANSLATED) {
318         translatedAll = TRANSLATED;
319       }
320       if (translatedSectionGroup == FAILED_TRANSLATED || 
321           translatedSection == FAILED_TRANSLATED ||
322           translatedAuthor == FAILED_TRANSLATED ||
323           translatedBook == FAILED_TRANSLATED) {
324         translatedAll = FAILED_TRANSLATED;
325       }
326       return start + middleBit + endBit;
327     }
328     
329     public String lookup(String inP) {
330       if (inP.startsWith("/")) inP = inP.substring(1,inP.length());
331       String[] split = StringUtils.split(inP, '/');
332       int len = split.length;
333       if (len > 4) len = 4;
334       try {
335         for (int i=0; i<len; i++) {
336           split[i] = look(i,split[i]);
337         }
338       } catch (NumberFormatException e) {}
339       return join(split);
340     }
341     
342     public String look(int place,String inP) throws NumberFormatException {
343       Integer i = new Integer(inP);
344       switch (place) {
345         case 0: 
346           return lookupSectionGroup(i);
347         case 1: 
348           return lookupSection(i);
349         case 2: 
350           return lookupAuthor(i);
351         case 3: 
352           return lookupBook(i);
353         default:
354           return inP;
355       }
356     }
357 
358     public String lookupSectionGroup(Integer i) {
359       try {
360         translatedSectionGroup = TRANSLATED;
361         return nospaces(db.getSectionGroupTable().getSectionGroupObject(i).getDisplayname());
362       } catch (NoSuchRowPoemException e) {
363         translatedSectionGroup = FAILED_TRANSLATED;
364         System.err.println("Line no " + lineno + "  Not Found: Section Group " + i);
365         System.err.println("        " + in);
366         return i + "";
367       }
368     }
369 
370     public String lookupSection(Integer i) throws NoSuchRowPoemException {
371       try {
372         translatedSection = TRANSLATED;
373         return nospaces(db.getSectionTable().getSectionObject(i).getDisplayname());
374       } catch (NoSuchRowPoemException e) {
375         translatedSection = FAILED_TRANSLATED;
376         System.err.println("Line no " + lineno + "  Not Found:       Section " + i);
377         System.err.println("        " + in);
378         return i + "";
379       }
380     }
381 
382     public String lookupAuthor(Integer i) throws NoSuchRowPoemException {
383       try {
384         translatedAuthor = TRANSLATED;
385         return nospaces(db.getAuthorTable().getAuthorObject(i).getSortname());
386       } catch (NoSuchRowPoemException e) {
387         translatedAuthor = FAILED_TRANSLATED;
388         System.err.println("Line no " + lineno + "  Not Found:        Author " + i);
389         System.err.println("        " + in);
390         return i + "";
391       }
392     }
393 
394     public String lookupBook(Integer i) throws NoSuchRowPoemException {
395       try {
396         translatedBook = TRANSLATED;
397         return nospaces(db.getBookTable().getBookObject(i).getTitle());
398       } catch (NoSuchRowPoemException e) {
399         translatedBook = FAILED_TRANSLATED;
400         System.err.println("Line no " + lineno + "  Not Found:          Book " + i);
401         System.err.println("        " + in);
402         return i + "";
403       }
404     }
405 
406     public String join(String[] split) {
407       String out = "";
408       for (int i=0; i<split.length; i++) {
409         out += "/" + split[i];
410       }
411       return out;
412     }
413     
414     public static String nospaces(String s) {
415       if (s == null) return null;
416       String out = s.replace(' ','_');
417       return out.replace('&','n');
418     }
419   }