This library was used in a proof-of-concept (POC) I was working on at work. We needed to be able to extract all of the links from a page, and display them. This includes hyperlinks and email addresses. This little gem met the bill, and was quick to parse the text file I used.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.bluelotussoftware.autolink; | |
import java.io.FileNotFoundException; | |
import java.io.FileReader; | |
import java.io.IOException; | |
import java.text.MessageFormat; | |
import java.util.EnumSet; | |
import java.util.List; | |
import org.apache.commons.io.IOUtils; | |
import org.nibor.autolink.LinkExtractor; | |
import org.nibor.autolink.LinkSpan; | |
import org.nibor.autolink.LinkType; | |
/** | |
* | |
* @author John Yeary <jyeary@bluelotussoftware.com> | |
* @version 1.0.0 | |
*/ | |
public class ExtractorExample { | |
public static void main(String[] args) throws FileNotFoundException, IOException { | |
List<String> lines = IOUtils.readLines(new FileReader("target/classes/links2.html")); | |
LinkExtractor linkExtractor = LinkExtractor.builder() | |
.linkTypes(EnumSet.of(LinkType.URL, LinkType.WWW, LinkType.EMAIL)) | |
.build(); | |
lines.forEach((String line) -> { | |
Iterable<LinkSpan> links = linkExtractor.extractLinks(line); | |
for (LinkSpan link : links) { | |
System.out.println(MessageFormat.format("{0} : {1}", link.getType(), line.substring(link.getBeginIndex(), link.getEndIndex()))); | |
} | |
}); | |
} | |
} |
1 2 3 4 5 6 7 8 9 10 11 12 | < dependencies > < dependency > < groupid >org.nibor.autolink</ groupId > < artifactid >autolink</ artifactId > < version >0.6.0</ version > </ dependency > < dependency > < groupid >commons-io</ groupId > < artifactid >commons-io</ artifactId > < version >2.5</ version > </ dependency > </ dependencies > |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | < a href = "`URLTrackRaw(" http://www.spanningsync.com/",,)`">Spanning Sync - Sync iCal and Google Calendar</ a > < a href = "`URLTrackRaw(" http://www.prototypejs.org/",,)`">Prototype Javascript Library easing the development of dynamic web applications</ a > < a href = "`URLTrackRaw(" http://blog.stevenlevithan.com/archives/faster-than-innerhtml",,)`">When innerHTML isn’t Fast Enough</ a > |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | URL : http: //www .dimdim.com/ URL : http: //www .neooffice.org /neojava/en/index .php URL : http: //www .openoffice.org/ URL : http: //www .zimbra.com/ URL : http: //bazaar-vcs .org/ URL : http: //www .versionsapp.com/ URL : http: //cocoondev .org /daisy/index .html URL : http: //www .bacula.org/ URL : http: //www .screencast-o-matic.com/ URL : http: //cruisecontrol .sourceforge.net/ URL : http: //getfiregpg .org/ URL : http: //hotwayd .sourceforge.net/ URL : http: //jott .com/ URL : http: //www .spanningsync.com/ URL : http: //virtualbox .org/ URL : http: //www .nightskyinfo.com/ URL : http: //www .kistlermorse.com/ URL : http: //www .clevest.com/ URL : http: //www .licenseonline.com/ URL : http: //www .logodesigncreation.com/ URL : http: //www .dhtmlgoodies.com /index .html URL : http: //www .prototypejs.org/ URL : http: //www .openrico.org/ URL : http: //script .aculo.us/ URL : http: //blog .stevenlevithan.com /archives/faster-than-innerhtml |
The project has been uploaded to Bitbucket and can be found here: autolink-java-extractor.