1
2
3
4
5
6
7
8
9
10 package net.sf.mmapps.modules.linkchecker.plugins;
11
12 import java.io.*;
13 import java.util.*;
14
15 import net.sf.mmapps.modules.linkchecker.*;
16
17 import org.apache.commons.httpclient.*;
18 import org.apache.commons.httpclient.methods.GetMethod;
19 import org.apache.commons.logging.*;
20
21 /***
22 * input plugin to fetch links from http
23 * @author Kees Jongenburger
24 */
25 public class FetchLinksFromHttpPlugins implements ReactorPlugin {
26 private static Log log = LogFactory.getLog(HTTPLinkCheckerPlugin.class);
27
28 public final static String URLLIST_URL_PROPERTY_NAME = "linkchecker.urllist.url";
29
30 public void init(Properties properties) {}
31
32 public Links handle(Links links, Properties properties) throws IOException {
33 GetMethod method = null;
34 String sourceUrl = properties.getProperty(URLLIST_URL_PROPERTY_NAME);
35 try {
36 HttpClient client = new HttpClient();
37
38 log.info("getting url list from " + sourceUrl);
39 method = new GetMethod(sourceUrl);
40
41 try {
42 client.executeMethod(method);
43
44 } catch (Throwable t) {
45 log.warn("failed to get the list of urls from " + sourceUrl, t);
46 if (!(t instanceof RuntimeException)) {
47 throw new RuntimeException(t);
48 } else {
49 throw (RuntimeException)t;
50 }
51 }
52
53 if (method.getStatusCode() != HttpStatus.SC_OK) {
54 String message = "http status code recieved when fetching the links from {" + sourceUrl + "} was " + method.getStatusText();
55 RuntimeException e = new RuntimeException(message);
56 log.warn(e);
57 throw e;
58 }
59
60 BufferedReader reader;
61 reader = new BufferedReader(new InputStreamReader(method.getResponseBodyAsStream()));
62
63 String line = null;
64
65 while ((line = reader.readLine()) != null) {
66
67 line = line.trim();
68 if (line.length() > 0) {
69 StringTokenizer st = new StringTokenizer(line.trim(), "|");
70 String content = null;
71 if (st.hasMoreTokens()) {
72 content = st.nextToken();
73 }
74 Link link = new Link(content);
75 link.setExtraData(line);
76 links.add(link);
77 }
78 }
79 } finally {
80 log.info("imported " + links.size() + " links from " + sourceUrl);
81 if (method != null) {
82 method.releaseConnection();
83 }
84 }
85 return links;
86 }
87
88 }