View Javadoc

1   /*
2   
3   This software is OSI Certified Open Source Software.
4   OSI Certified is a certification mark of the Open Source Initiative.
5   
6   The license (Mozilla version 1.0) can be read at the MMBase site.
7   See http://www.MMBase.org/license
8   
9   */
10  package net.sf.mmapps.modules.linkchecker.plugins;
11  
12  import java.io.*;
13  import java.util.*;
14  
15  import net.sf.mmapps.modules.linkchecker.*;
16  
17  import org.apache.commons.httpclient.*;
18  import org.apache.commons.httpclient.methods.GetMethod;
19  import org.apache.commons.logging.*;
20  
21  /***
22   * input plugin to fetch links from http
23   * @author Kees Jongenburger
24   */
25  public class FetchLinksFromHttpPlugins implements ReactorPlugin {
26      private static Log log = LogFactory.getLog(HTTPLinkCheckerPlugin.class);
27  
28      public final static String URLLIST_URL_PROPERTY_NAME = "linkchecker.urllist.url";
29  
30      public void init(Properties properties) {}
31  
32      public Links handle(Links links, Properties properties) throws IOException {
33          GetMethod method = null;
34          String sourceUrl = properties.getProperty(URLLIST_URL_PROPERTY_NAME);
35          try {
36              HttpClient client = new HttpClient();
37  
38              log.info("getting url list from " + sourceUrl);
39              method = new GetMethod(sourceUrl);
40  
41              try {
42                  client.executeMethod(method);
43                  //} catch (ConnectException ce){
44              } catch (Throwable t) {
45                  log.warn("failed to get the list of urls from " + sourceUrl, t);
46                  if (!(t instanceof RuntimeException)) {
47                      throw new RuntimeException(t);
48                  } else {
49                      throw (RuntimeException)t;
50                  }
51              }
52  
53              if (method.getStatusCode() != HttpStatus.SC_OK) {
54                  String message = "http status code recieved when fetching the links from {" + sourceUrl + "} was " + method.getStatusText();
55                  RuntimeException e = new RuntimeException(message);
56                  log.warn(e);
57                  throw e;
58              }
59  
60              BufferedReader reader;
61              reader = new BufferedReader(new InputStreamReader(method.getResponseBodyAsStream()));
62  
63              String line = null;
64  
65              while ((line = reader.readLine()) != null) {
66                  //TODO:find better field delimiter and make configurable
67                  line = line.trim();
68                  if (line.length() > 0) {
69                      StringTokenizer st = new StringTokenizer(line.trim(), "|");
70                      String content = null;
71                      if (st.hasMoreTokens()) {
72                          content = st.nextToken();
73                      }
74                      Link link = new Link(content);
75                      link.setExtraData(line);
76                      links.add(link);
77                  }
78              }
79          } finally {
80              log.info("imported " + links.size() + " links from " + sourceUrl);
81              if (method != null) {
82                  method.releaseConnection();
83              }
84          }
85          return links;
86      }
87  
88  }