[ 
https://issues.apache.org/jira/browse/DROIDS-115?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Paul Rogalinski updated DROIDS-115:
-----------------------------------

    Description: 
the getURI() Method won't resolve the URL correctly if a baseUri is provided 
without the trailing slash *and* the relative url to be resolved does not start 
with "/". Under that circumstances it will resolve to: 
http://example.comRelativeUrl. 

edit: previous patch did solve the problem only partially. 

Modified methods (whole class attached):

    public LinkExtractor(LinkTask base, Map<String, String> elements) {
        super();
        this.base = base;
        this.elements = elements;
        this.setBaseUri(base.getURI());
    }

    @Override
    public void startElement(String uri, String loc, String raw, Attributes 
att) throws SAXException {
        if (checkBase && BASE_ELEMENT.equalsIgnoreCase(loc) && 
att.getValue(BASE_ATTRIBUTE) != null) {
            try {
                setBaseUri(new URI(att.getValue(BASE_ATTRIBUTE)));
                log.debug("Found base URI: " + baseUri);
                checkBase = false;
            } catch (URISyntaxException e) {
                log.debug("Base URI not valid: " + 
att.getValue(BASE_ATTRIBUTE));
            }
        }

        Iterator<String> it = elements.keySet().iterator();
        String elem, linkAtt;
        while (it.hasNext()) {
            elem = it.next();
            linkAtt = elements.get(elem);
            if (elem.equalsIgnoreCase(loc) && att.getValue(linkAtt) != null) {
                link = getURI(att.getValue(linkAtt));
                log.debug("Found element: " + elem + " with link: " + link);
                if (link != null) {
                    addOutlinkURI(link.toString());
                    link = null;
                    anchorText = new StringBuilder();
                }
            }
        }
    }

    public void setBaseUri(URI baseUri) {
        if (baseUri.toString().endsWith(baseUri.getHost())) {
            try {
                this.baseUri = new URI(baseUri.toString() + "/");
            } catch (URISyntaxException e) {
                log.error("could not fix base URI", e);
            }
        } else {
            this.baseUri = baseUri;
        }
    }



  was:
the getURI() Method won't resolve the URL correctly if a baseUri is provided 
without the trailing slash *and* the relative url to be resolved does not start 
with "/". Under that circumstances it will resolve to: 
http://example.comRelativeUrl. 

proposed Patch:

        private URI getURI(String target)       {
                target = target.replaceAll("\\s", "%20");
                try     {
                        if (!target.toLowerCase().startsWith("javascript")&& 
!target.contains(":/")) {
                                return 
baseUri.resolve((baseUri.toString().endsWith(baseUri.getHost()) && 
!target.startsWith("/") ? "/" : "") + target.split("#")[0]);
                        }
                        else if 
(!target.toLowerCase().startsWith("javascript")) {
                                return new URI(target.split("#")[0]);
                        }
                }
                catch (Exception e)     {
                        log.debug("URI not valid: " + target);
                }
                return null;
        }


> LinkExtractor getURI(String target) does not resolve correctly when baseUri 
> is provided
> ---------------------------------------------------------------------------------------
>
>                 Key: DROIDS-115
>                 URL: https://issues.apache.org/jira/browse/DROIDS-115
>             Project: Droids
>          Issue Type: Bug
>          Components: core
>            Reporter: Paul Rogalinski
>         Attachments: LinkExtractor.java
>
>
> the getURI() Method won't resolve the URL correctly if a baseUri is provided 
> without the trailing slash *and* the relative url to be resolved does not 
> start with "/". Under that circumstances it will resolve to: 
> http://example.comRelativeUrl. 
> edit: previous patch did solve the problem only partially. 
> Modified methods (whole class attached):
>     public LinkExtractor(LinkTask base, Map<String, String> elements) {
>         super();
>         this.base = base;
>         this.elements = elements;
>         this.setBaseUri(base.getURI());
>     }
>     @Override
>     public void startElement(String uri, String loc, String raw, Attributes 
> att) throws SAXException {
>         if (checkBase && BASE_ELEMENT.equalsIgnoreCase(loc) && 
> att.getValue(BASE_ATTRIBUTE) != null) {
>             try {
>                 setBaseUri(new URI(att.getValue(BASE_ATTRIBUTE)));
>                 log.debug("Found base URI: " + baseUri);
>                 checkBase = false;
>             } catch (URISyntaxException e) {
>                 log.debug("Base URI not valid: " + 
> att.getValue(BASE_ATTRIBUTE));
>             }
>         }
>         Iterator<String> it = elements.keySet().iterator();
>         String elem, linkAtt;
>         while (it.hasNext()) {
>             elem = it.next();
>             linkAtt = elements.get(elem);
>             if (elem.equalsIgnoreCase(loc) && att.getValue(linkAtt) != null) {
>                 link = getURI(att.getValue(linkAtt));
>                 log.debug("Found element: " + elem + " with link: " + link);
>                 if (link != null) {
>                     addOutlinkURI(link.toString());
>                     link = null;
>                     anchorText = new StringBuilder();
>                 }
>             }
>         }
>     }
>     public void setBaseUri(URI baseUri) {
>         if (baseUri.toString().endsWith(baseUri.getHost())) {
>             try {
>                 this.baseUri = new URI(baseUri.toString() + "/");
>             } catch (URISyntaxException e) {
>                 log.error("could not fix base URI", e);
>             }
>         } else {
>             this.baseUri = baseUri;
>         }
>     }

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.

Reply via email to