View Javadoc

1   package org.paneris.bibliomania.metasearch.bob;
2   
3   import java.io.FileOutputStream;
4   import java.io.IOException;
5   import java.io.OutputStream;
6   import java.net.URL;
7   
8   import org.apache.oro.text.regex.MalformedPatternException;
9   import org.apache.oro.text.regex.Perl5Compiler;
10  import org.apache.oro.text.regex.Perl5Matcher;
11  import org.apache.oro.text.regex.Perl5Pattern;
12  import org.apache.oro.text.regex.StringSubstitution;
13  import org.apache.oro.text.regex.Util;
14  import org.melati.util.IoUtils;
15  import org.melati.util.MelatiRuntimeException;
16  import org.melati.util.UnexpectedExceptionException;
17  import org.paneris.bibliomania.metasearch.util.HackParser;
18  import org.paneris.bibliomania.metasearch.util.TimeoutThread;
19  
20  public class Session {
21    public static final String frontPageURL =
22      "http://bookshop.blackwell.co.uk/cgi-bin/BOB2?BV_Operation=Dyn_LoginReceive&BV_ServiceName=BOB&form%25login_type=guest&form%25failure=/service_error.html&form%25success=/bob/bob_main.html.tmpl";
23  
24    public String sessionID, engineID;
25  
26    public Session(String sessionID, String engineID) {
27      this.sessionID = sessionID;
28      this.engineID = engineID;
29    }
30  
31    public String queryParams() {
32      return "BV_SessionID=" + sessionID + "&BV_EngineID=" + engineID;
33    }
34  
35    public String toString() {
36      return queryParams();
37    }
38  
39    private static final Object bobSessionMon = new Object();
40  
41    private static Session bobSession = null;
42  
43    private static Exception bobSessionException = null;
44  
45    public static class NoSessionIDException extends MelatiRuntimeException {
46      /**
47       * 
48       */
49      private static final long serialVersionUID = 1L;
50  
51      public String getMessage() {
52        return "Couldn't extract session ID from front page";
53      }
54    }
55  
56    private static void newSession() throws OpenException {
57      System.err.println("BOB: establishing new session");
58      try {
59        TimeoutThread timeout = TimeoutThread.forCurrentThread(20000);
60        try {
61          FrontPage fp =
62            new FrontPage(IoUtils.slurp(new URL(frontPageURL), 32768));
63          if (fp.sessionID == null)
64            throw new NoSessionIDException();
65  
66          bobSession = new Session(fp.sessionID, fp.engineID);
67          bobSessionException = null;
68        } finally {
69          timeout.stop();
70        }
71      } catch (Exception e) {
72        bobSessionException = e;
73        throw new OpenException(e);
74      }
75    }
76  
77    private static class SessionRenewer extends Thread {
78      public SessionRenewer() {
79        setDaemon(true);
80      }
81  
82      public void run() {
83        try {
84          for (;;) {
85            try {
86              newSession();
87            } catch (Exception e) {
88              System.err.println("BOB SessionRenewer:");
89              e.printStackTrace();
90            } finally {
91              synchronized (bobSessionMon) {
92                bobSessionMon.notifyAll();
93              }
94            }
95  
96            Thread.sleep(5 * 60 * 1000);
97          }
98        } catch (InterruptedException e) {
99        } finally {
100         sessionRenewer = null;
101       }
102     }
103   }
104 
105   private static SessionRenewer sessionRenewer = null;
106 
107   private static synchronized void checkSessionRenewer() {
108     if (sessionRenewer == null)
109        (sessionRenewer = new SessionRenewer()).start();
110   }
111 
112   static Session bobSession() {
113     checkSessionRenewer();
114 
115     if (bobSession == null) {
116       try {
117         synchronized (bobSessionMon) {
118           if (bobSession == null)
119             bobSessionMon.wait();
120         }
121       } catch (InterruptedException e) {
122         throw new OpenException(e);
123       }
124     }
125 
126     if (bobSession == null)
127       throw new OpenException(bobSessionException);
128 
129     return bobSession;
130   }
131 
132   private static Perl5Pattern bv__IDParam;
133 
134   static {
135     try {
136       bv__IDParam =
137         (Perl5Pattern) (new Perl5Compiler()).compile(
138           "BV_(Session|Engine)ID=[^&]*&?",
139           0);
140     } catch (MalformedPatternException e) {
141       throw new UnexpectedExceptionException(e);
142     }
143   }
144 
145   private static final StringSubstitution empty = new StringSubstitution();
146 
147   public static String urlInCurrentSession(String url) {
148     String urlWithoutSession =
149       Util.substitute(
150         new Perl5Matcher(),
151         bv__IDParam,
152         empty,
153         url,
154         Util.SUBSTITUTE_ALL);
155 
156     return urlWithoutSession + '&' + bobSession().queryParams();
157   }
158 
159   private static String contentDumpPrefix = null;
160   private static int contentDumpIndex = 0;
161 
162   private static byte[] contentUsingCurrentSession(
163     String url,
164     int estimate,
165     int limit)
166     throws IOException {
167     byte[] content = IoUtils.slurp(new URL(urlInCurrentSession(url)), 32768);
168 
169     String dp = contentDumpPrefix;
170     if (dp != null) {
171       try {
172         OutputStream o =
173           new FileOutputStream(contentDumpPrefix + contentDumpIndex++ +".html");
174         try {
175           o.write(content);
176         } finally {
177           try {
178             o.close();
179           } catch (IOException e) {
180           }
181         }
182       } catch (Exception e) {
183         System.err.println("Can't write dump!");
184       }
185     }
186 
187     return content;
188   }
189 
190   private static final byte[] timeoutExplan =
191     "current session timing out".getBytes();
192 
193   public static byte[] content(String url, int estimate, int limit)
194     throws IOException {
195     byte[] it = contentUsingCurrentSession(url, estimate, limit);
196     if (HackParser.indexOf(it, timeoutExplan) != -1) {
197       newSession();
198       return contentUsingCurrentSession(url, estimate, limit);
199     } else
200       return it;
201   }
202 }