|
|||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||||
java.lang.Objectorg.matalon.pagerankhits.crawler.Crawler
This class is responsible for crawling the web according to the given parameters.
| Field Summary | |
private int |
maxRetrievedUrlsNo
|
private java.lang.String |
startingAddress
|
private boolean |
stopCrawling
|
static Crawler |
theOnlyOne
|
private static Graph |
webGraph
|
| Constructor Summary | |
private |
Crawler()
Private constructor. |
| Method Summary | |
void |
crawl(java.lang.String startingAddress,
int maxRetrievedUrlsNo)
This method crawls over web pages on the Internet and extracts all thier links. |
static Crawler |
getInstance()
|
Graph |
getWebGraph()
|
boolean |
isStopCrawling()
|
static java.lang.String |
makeShorter(java.lang.String string,
int maxLength)
Shortens the given string to a maximum of |
private void |
prepareSummary(UrlsQueue visitedUrlsQueue)
Prepares a summary of crawler's progress; this summary is written to GeneralSettings.CRAWLING_PROGRESS_REPORT_FILE file. |
private static void |
printUrl(java.io.PrintWriter printWriter,
java.lang.String url,
int urlNo,
int maxUrlsNo)
Prints the given url to the given PrintWriter. |
private static void |
printUrlDetails(java.io.PrintWriter printWriter,
int urlNo,
WebPageProperties webPageProperties)
Prints the given URL details to the given PrintWriter. |
void |
run()
|
void |
stopCrawling()
Stops the Crawler. |
private static void |
updateRealTimeProgress(UrlsQueue urls2Print,
int currCrawledUrlNo,
int maxUrlsNo,
boolean lastUrl,
boolean stopCrawling)
Updates crawler's progress in real time to GeneralSettings.CRAWLING_PROGRESS_REPORT_FILE_TAIL file. |
private static boolean |
wasUrlVisited(java.lang.String url,
java.util.Map visitedUrlsMap)
|
| Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
public static Crawler theOnlyOne
private static Graph webGraph
private boolean stopCrawling
private java.lang.String startingAddress
private int maxRetrievedUrlsNo
| Constructor Detail |
private Crawler()
| Method Detail |
public static Crawler getInstance()
public void crawl(java.lang.String startingAddress,
int maxRetrievedUrlsNo)
startingAddress - maxRetrievedUrlsNo - public void run()
run in interface java.lang.RunnableRunnable.run()
private static final boolean wasUrlVisited(java.lang.String url,
java.util.Map visitedUrlsMap)
url - visitedUrlsMap -
url was visited, false otherwise.
private static final void updateRealTimeProgress(UrlsQueue urls2Print,
int currCrawledUrlNo,
int maxUrlsNo,
boolean lastUrl,
boolean stopCrawling)
urls2Print - currCrawledUrlNo - maxUrlsNo - lastUrl - stopCrawling -
private static final void printUrl(java.io.PrintWriter printWriter,
java.lang.String url,
int urlNo,
int maxUrlsNo)
url to the given PrintWriter.
printWriter - url - urlNo - maxUrlsNo -
public static final java.lang.String makeShorter(java.lang.String string,
int maxLength)
string - maxLength -
private final void prepareSummary(UrlsQueue visitedUrlsQueue)
visitedUrlsQueue -
private static final void printUrlDetails(java.io.PrintWriter printWriter,
int urlNo,
WebPageProperties webPageProperties)
URL details to the given PrintWriter.
printWriter - urlNo - webPageProperties - public Graph getWebGraph()
public boolean isStopCrawling()
public void stopCrawling()
|
|||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||||