View Javadoc

1   package org.paneris.bibliomania.logs;
2   
3   import java.io.BufferedReader;
4   import java.io.File;
5   import java.io.FileReader;
6   import java.io.FileWriter;
7   import java.io.FilenameFilter;
8   import java.io.InputStreamReader;
9   import java.io.PrintWriter;
10  import java.util.Calendar;
11  import java.util.Date;
12  import java.util.GregorianCalendar;
13  
14  import org.melati.poem.AccessToken;
15  import org.melati.poem.NoSuchRowPoemException;
16  import org.melati.poem.PoemTask;
17  import org.melati.poem.UnexpectedExceptionPoemException;
18  import org.melati.util.StringUtils;
19  import org.paneris.bibliomania.BibliomaniaDatabase;
20  
21  /*
22   * this program will analyse the stats generated by the bibliomania.com site
23   * 
24   * it uses 2 analysis programs - weblaizer and analog
25   * it expects log files in a directory tree:
26   * 
27   * year/month/
28   *
29   * with each directory containing 1 log file per day.  it will not do anything
30   * with today's file (as it will probably not be complete)
31   *
32   * output for webalizer is according to the config file bibliomania.conf
33   *
34   * output for analog is in the file analog.html in the directory for each month,
35   * and the directory for the year.  the annual stats are recomputed every day.
36   *
37   * the stats are translated before being analysed stats to be trnaslated need
38   * to be unzipped first
39   *
40   * you can optionally supply a single parameter of the form yyyy/mm in order to 
41   * process the stats for the specific month.  annual stats are still calculated
42   */  
43  
44  public class LogAnalysis {
45    
46    public static void main(final String[] args) throws Exception {
47      
48      // get today's date
49      Calendar today = new GregorianCalendar();
50      int year = today.get(Calendar.YEAR);
51      int month = today.get(Calendar.MONTH) + 1;
52      String monthString = month + "";
53      if (month < 10) monthString = "0" + month;
54      int day = today.get(Calendar.DAY_OF_MONTH);
55      String dayString = day + "";
56      if (day < 10) dayString = "0" + day;
57      String todayYearmonth = year + File.separator + monthString;
58      
59      // default 
60      String yearmonth = todayYearmonth;
61      
62      // allow us to run month by month
63      if (args.length > 0) {
64        yearmonth = args[0];
65      }
66      
67      System.out.println("Running Stats for " + yearmonth);
68      System.out.println("Started at: " + new Date());
69      File dir = new File(yearmonth);
70  
71      Translator trans = new Translator();
72  
73      // find files that need translating, translate them,
74      // analize them, and then zip up the output
75      FilenameFilter webalizerFilter = new WebalizerFilter(todayYearmonth, dayString);
76      File[] files = dir.listFiles(webalizerFilter);
77      java.util.Arrays.sort(files);
78      if (files != null) {
79        for (int i=0; i<files.length; i++) {    
80          System.out.println("Translating " + files[i]);
81          File out = trans.doIt(files[i]);
82          String outName = out.toString();
83          out = null;
84          System.out.println("Webalizering " + outName);
85          run("webalizer -c bibliomania.conf " + outName);
86          System.out.println("Gzipping " + files[i]);
87          run("gzip " + files[i]);
88          System.out.println("Gzipping " + outName);
89          run("gzip " + outName);
90        }
91      }    
92      
93      // run the files for this month through analog
94      File[] dirs = new File[1];
95      dirs[0] = dir;
96      analog(dirs, todayYearmonth, dayString, yearmonth);
97      
98      // do the annual stuff.
99      int i = yearmonth.indexOf("/");
100     if (i > 0) {
101       String yeardir = yearmonth.substring(0,i);
102       File updir =  new File(yeardir);
103       files = updir.listFiles();
104       analog(files,todayYearmonth,dayString,yeardir);
105     }
106     
107     System.out.println("Completed at: " + new Date());
108   }
109   
110   // run analog on all translated files in the supplied liswt of directories
111   static void analog(File[] dirs, String yearmonth, String dayString, String destination) throws Exception {
112     FilenameFilter translatedFilter = new TranslatedFilter(yearmonth, dayString);
113     String filenames = "";
114     for (int i=0; i<dirs.length; i++) {    
115       File[] files = dirs[i].listFiles(translatedFilter);
116       if (files != null) {
117         for (int j=0; j<files.length; j++) {    
118           filenames += " " + files[j];
119         }
120       }
121     }
122     System.out.println("Analogging " + filenames);
123     run("analog -G +ganalog.cfg -O" + destination + File.separator + "index.html" + filenames);
124   }   
125   
126   static void run(String command) throws Exception {
127     Process p = Runtime.getRuntime().exec(command);
128     InputStreamReader error = new InputStreamReader(p.getErrorStream());
129     InputStreamReader input = new InputStreamReader(p.getInputStream());
130     boolean ok = true;
131     while (ok) {
132       ok = ((error.read() != -1) || (input.read() != -1));
133     }
134     p.waitFor();
135   }
136 
137 }
138 
139 // select all files that are not for today
140 class NotTodayFilter implements FilenameFilter {
141 
142   String yearmonth, day;
143   
144   public NotTodayFilter(String yearmonth, String day) {
145     this.yearmonth = yearmonth;
146     this.day = day;
147   }
148   
149   public boolean accept(File dir, String name) {
150     if (!dir.toString().equals(yearmonth)) return true;
151     if (name.length() > 1) {
152       if (day.equals(name.substring(0,2))) return false;
153     }
154     return true;
155   }
156 }  
157 
158 
159 // select files that have been translated
160 class TranslatedFilter extends NotTodayFilter {
161   
162   public TranslatedFilter(String yearmonth, String day) {
163     super(yearmonth, day);
164   }
165 
166   public boolean accept(File dir, String name) {
167     if (name.endsWith("access.log.translated.gz") && super.accept(dir, name)) return true;
168     return false;
169   }
170 }
171 
172 // select files that need translating and webalizing
173 class WebalizerFilter extends NotTodayFilter {
174   
175   public WebalizerFilter(String yearmonth, String day) {
176     super(yearmonth, day);
177   }
178 
179   public boolean accept(File dir, String name) {
180     File translated1 = new File(dir, name + ".translated.gz");
181     File translated2 = new File(dir, name + ".translated");
182     if (!translated1.exists() && !translated2.exists() && name.endsWith("access.log") && super.accept(dir, name)) return true;
183     return false;
184   }
185 }  
186 
187 // translate a log file
188 class Translator {
189 
190   File fileoutput;
191   final BibliomaniaDatabase db;
192     
193   public Translator() {
194     db = new BibliomaniaDatabase(false);
195     db.connect("bibliomania", "org.melati.poem.dbms.Postgresql",
196     "jdbc:postgresql:bibliomania", "postgres", "*",4);
197   }
198   
199   public File doIt(final File file) throws Exception {
200     
201     db.inSession(
202     AccessToken.root,       // FIXME
203     new PoemTask() {
204       public void run() {
205         try {
206           doTranslate(db, file);
207         }
208         catch (Exception e) {
209           throw new UnexpectedExceptionPoemException(e);
210         }
211       }
212     }
213     );
214     return fileoutput;
215   }
216 
217   private void doTranslate(BibliomaniaDatabase dbP, File fileinput) throws Exception {
218     System.out.println("Started translation at: " + new Date());
219     fileoutput = new File(fileinput.toString() + ".translated");
220     FileReader filein = new FileReader(fileinput);
221     PrintWriter fileout = new PrintWriter(new FileWriter(fileoutput));
222     BufferedReader in = new BufferedReader(filein);
223     int totalLines = 0;
224     int translatedLines = 0;
225     int failedLines = 0;
226     int failedSectionGroup = 0;
227     int failedSection = 0;
228     int failedAuthor = 0;
229     int failedBook = 0;
230     String line = "";
231     while ((line = in.readLine()) != null) {
232       totalLines++;
233       LogLine ln = new LogLine(dbP,line, totalLines);
234       fileout.println(ln.translate());
235       if (ln.translatedAll == LogLine.TRANSLATED) translatedLines++;
236       if (ln.translatedAll == LogLine.FAILED_TRANSLATED) failedLines++;
237       if (ln.translatedSectionGroup == LogLine.FAILED_TRANSLATED) failedSectionGroup++;
238       if (ln.translatedSection == LogLine.FAILED_TRANSLATED) failedSection++;
239       if (ln.translatedAuthor == LogLine.FAILED_TRANSLATED) failedAuthor++;
240       if (ln.translatedBook == LogLine.FAILED_TRANSLATED) failedBook++;
241     }
242     in.close();
243     fileout.close();
244     System.out.println("Found " + totalLines + " lines, translated " + translatedLines + " lines, failed on "+ failedLines + " lines.");
245     System.out.println("Failed:  SectionGroup " + failedSectionGroup + ", Section " + failedSection + ", Author " + failedAuthor + ", Book " + failedBook);
246   }
247 
248 }
249 
250   class LogLine {
251     
252     static String lookForStart = "GET ";
253     static String lookForEnd = " HTTP"; 
254     static int lookForStartLength = lookForStart.length();
255     
256     static int NOT_TRANSLATED = 0;
257     static int TRANSLATED = 1;
258     static int FAILED_TRANSLATED = 2;
259     
260     int translatedAll = NOT_TRANSLATED;
261     int translatedSectionGroup = NOT_TRANSLATED;
262     int translatedSection = NOT_TRANSLATED;
263     int translatedAuthor = NOT_TRANSLATED;
264     int translatedBook = NOT_TRANSLATED;
265     
266     int lineno;
267     
268     String in;
269     String middle = "";
270     String end = "";
271     BibliomaniaDatabase db;
272     
273     public LogLine(BibliomaniaDatabase db, String in, int lineno) {
274       this.in = in;
275       this.db = db;
276       this.lineno = lineno;
277     }
278     
279     public String translate() {
280       String start = in;
281       String endBit = "";
282       String middleBit = "";
283       int length = in.length();
284       int startPoint = in.indexOf(lookForStart);
285       if (startPoint > -1) {
286         startPoint += lookForStartLength;
287         start = in.substring(0,startPoint);
288         endBit = in.substring(startPoint, length);
289         int middlePoint = in.indexOf(lookForEnd, startPoint);
290         if (middlePoint > -1) {
291           middleBit = in.substring(startPoint,middlePoint);
292           middleBit = lookup(middleBit);
293           endBit = in.substring(middlePoint, length);
294         }
295       }
296       if (translatedSectionGroup == TRANSLATED || 
297           translatedSection == TRANSLATED ||
298           translatedAuthor == TRANSLATED ||
299           translatedBook == TRANSLATED) {
300         translatedAll = TRANSLATED;
301       }
302       if (translatedSectionGroup == FAILED_TRANSLATED || 
303           translatedSection == FAILED_TRANSLATED ||
304           translatedAuthor == FAILED_TRANSLATED ||
305           translatedBook == FAILED_TRANSLATED) {
306         translatedAll = FAILED_TRANSLATED;
307       }
308       return start + middleBit + endBit;
309     }
310     
311     public String lookup(String inP) {
312       if (inP.startsWith("/")) inP = inP.substring(1,inP.length());
313       String[] split = StringUtils.split(inP, '/');
314       int len = split.length;
315       if (len > 4) len = 4;
316       try {
317         for (int i=0; i<len; i++) {
318           split[i] = look(i,split[i]);
319         }
320       } catch (NumberFormatException e) {}
321       return join(split);
322     }
323     
324     public String look(int place,String inP) throws NumberFormatException {
325       Integer i = new Integer(inP);
326       switch (place) {
327         case 0: 
328           return lookupSectionGroup(i);
329         case 1: 
330           return lookupSection(i);
331         case 2: 
332           return lookupAuthor(i);
333         case 3: 
334           return lookupBook(i);
335         default:
336           return inP;
337       }
338     }
339 
340     public String lookupSectionGroup(Integer i) {
341       try {
342         translatedSectionGroup = TRANSLATED;
343         return nospaces(db.getSectionGroupTable().getSectionGroupObject(i).getDisplayname());
344       } catch (NoSuchRowPoemException e) {
345         translatedSectionGroup = FAILED_TRANSLATED;
346         System.err.println("Line no " + lineno + "  Not Found: Section Group " + i);
347         System.err.println("        " + in);
348         return i + "";
349       }
350     }
351 
352     public String lookupSection(Integer i) throws NoSuchRowPoemException {
353       try {
354         translatedSection = TRANSLATED;
355         return nospaces(db.getSectionTable().getSectionObject(i).getDisplayname());
356       } catch (NoSuchRowPoemException e) {
357         translatedSection = FAILED_TRANSLATED;
358         System.err.println("Line no " + lineno + "  Not Found:       Section " + i);
359         System.err.println("        " + in);
360         return i + "";
361       }
362     }
363 
364     public String lookupAuthor(Integer i) throws NoSuchRowPoemException {
365       try {
366         translatedAuthor = TRANSLATED;
367         return nospaces(db.getAuthorTable().getAuthorObject(i).getSortname());
368       } catch (NoSuchRowPoemException e) {
369         translatedAuthor = FAILED_TRANSLATED;
370         System.err.println("Line no " + lineno + "  Not Found:        Author " + i);
371         System.err.println("        " + in);
372         return i + "";
373       }
374     }
375 
376     public String lookupBook(Integer i) throws NoSuchRowPoemException {
377       try {
378         translatedBook = TRANSLATED;
379         return nospaces(db.getBookTable().getBookObject(i).getTitle());
380       } catch (NoSuchRowPoemException e) {
381         translatedBook = FAILED_TRANSLATED;
382         System.err.println("Line no " + lineno + "  Not Found:          Book " + i);
383         System.err.println("        " + in);
384         return i + "";
385       }
386     }
387 
388     public String join(String[] split) {
389       String out = "";
390       for (int i=0; i<split.length; i++) {
391         out += "/" + split[i];
392       }
393       return out;
394     }
395     
396     public static String nospaces(String s) {
397       if (s == null) return null;
398       String out = s.replace(' ','_');
399       return out.replace('&','n');
400     }
401   }