[
https://issues.apache.org/jira/browse/DROIDS-115?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Paul Rogalinski updated DROIDS-115:
-----------------------------------
Description:
the getURI() Method won't resolve the URL correctly if a baseUri is provided
without the trailing slash *and* the relative url to be resolved does not start
with "/". Under that circumstances it will resolve to:
http://example.comRelativeUrl.
edit: previous patch did solve the problem only partially.
Modified methods (whole class attached):
public LinkExtractor(LinkTask base, Map<String, String> elements) {
super();
this.base = base;
this.elements = elements;
this.setBaseUri(base.getURI());
}
@Override
public void startElement(String uri, String loc, String raw, Attributes
att) throws SAXException {
if (checkBase && BASE_ELEMENT.equalsIgnoreCase(loc) &&
att.getValue(BASE_ATTRIBUTE) != null) {
try {
setBaseUri(new URI(att.getValue(BASE_ATTRIBUTE)));
log.debug("Found base URI: " + baseUri);
checkBase = false;
} catch (URISyntaxException e) {
log.debug("Base URI not valid: " +
att.getValue(BASE_ATTRIBUTE));
}
}
Iterator<String> it = elements.keySet().iterator();
String elem, linkAtt;
while (it.hasNext()) {
elem = it.next();
linkAtt = elements.get(elem);
if (elem.equalsIgnoreCase(loc) && att.getValue(linkAtt) != null) {
link = getURI(att.getValue(linkAtt));
log.debug("Found element: " + elem + " with link: " + link);
if (link != null) {
addOutlinkURI(link.toString());
link = null;
anchorText = new StringBuilder();
}
}
}
}
public void setBaseUri(URI baseUri) {
if (baseUri.toString().endsWith(baseUri.getHost())) {
try {
this.baseUri = new URI(baseUri.toString() + "/");
} catch (URISyntaxException e) {
log.error("could not fix base URI", e);
}
} else {
this.baseUri = baseUri;
}
}
was:
the getURI() Method won't resolve the URL correctly if a baseUri is provided
without the trailing slash *and* the relative url to be resolved does not start
with "/". Under that circumstances it will resolve to:
http://example.comRelativeUrl.
proposed Patch:
private URI getURI(String target) {
target = target.replaceAll("\\s", "%20");
try {
if (!target.toLowerCase().startsWith("javascript")&&
!target.contains(":/")) {
return
baseUri.resolve((baseUri.toString().endsWith(baseUri.getHost()) &&
!target.startsWith("/") ? "/" : "") + target.split("#")[0]);
}
else if
(!target.toLowerCase().startsWith("javascript")) {
return new URI(target.split("#")[0]);
}
}
catch (Exception e) {
log.debug("URI not valid: " + target);
}
return null;
}
> LinkExtractor getURI(String target) does not resolve correctly when baseUri
> is provided
> ---------------------------------------------------------------------------------------
>
> Key: DROIDS-115
> URL: https://issues.apache.org/jira/browse/DROIDS-115
> Project: Droids
> Issue Type: Bug
> Components: core
> Reporter: Paul Rogalinski
> Attachments: LinkExtractor.java
>
>
> the getURI() Method won't resolve the URL correctly if a baseUri is provided
> without the trailing slash *and* the relative url to be resolved does not
> start with "/". Under that circumstances it will resolve to:
> http://example.comRelativeUrl.
> edit: previous patch did solve the problem only partially.
> Modified methods (whole class attached):
> public LinkExtractor(LinkTask base, Map<String, String> elements) {
> super();
> this.base = base;
> this.elements = elements;
> this.setBaseUri(base.getURI());
> }
> @Override
> public void startElement(String uri, String loc, String raw, Attributes
> att) throws SAXException {
> if (checkBase && BASE_ELEMENT.equalsIgnoreCase(loc) &&
> att.getValue(BASE_ATTRIBUTE) != null) {
> try {
> setBaseUri(new URI(att.getValue(BASE_ATTRIBUTE)));
> log.debug("Found base URI: " + baseUri);
> checkBase = false;
> } catch (URISyntaxException e) {
> log.debug("Base URI not valid: " +
> att.getValue(BASE_ATTRIBUTE));
> }
> }
> Iterator<String> it = elements.keySet().iterator();
> String elem, linkAtt;
> while (it.hasNext()) {
> elem = it.next();
> linkAtt = elements.get(elem);
> if (elem.equalsIgnoreCase(loc) && att.getValue(linkAtt) != null) {
> link = getURI(att.getValue(linkAtt));
> log.debug("Found element: " + elem + " with link: " + link);
> if (link != null) {
> addOutlinkURI(link.toString());
> link = null;
> anchorText = new StringBuilder();
> }
> }
> }
> }
> public void setBaseUri(URI baseUri) {
> if (baseUri.toString().endsWith(baseUri.getHost())) {
> try {
> this.baseUri = new URI(baseUri.toString() + "/");
> } catch (URISyntaxException e) {
> log.error("could not fix base URI", e);
> }
> } else {
> this.baseUri = baseUri;
> }
> }
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.