what do you find that is strange?

On Aug 21, 7:32 am, Stef Mientki <stef.mien...@gmail.com> wrote:
> > Graphical representation of links or pages that don't get linked to.
>
> I tried to test the links (with 2 algorithms, code below) in a generated 
> webpage, but the result I
> get are very weird.
> Probably one you knows a better way ?
>
> cheers,
> Stef
>
> from BeautifulSoup import BeautifulSoup
> from urllib        import urlopen
> from httplib       import HTTP
> from urlparse      import urlparse
>
> def Check_URL_1 ( URL ) :
>   try:
>     fh = urlopen ( URL )
>     return fh.code == 200
>   except :
>     return False
>
> def Check_URL_2 ( URL ) :
>   p = urlparse ( URL )
>   h = HTTP ( p[1] )
>   h.putrequest ( 'HEAD', p[2] )
>   h.endheaders()
>   if h.getreply()[0] == 200:
>     return True
>   else:
>     return False
>
> def Verify_Links ( URL ) :
>   Parts   = URL.split('/')
>   Site    = '/'.join ( Parts [:3] )
>   Current = '/'.join ( Parts [:-1] )
>
>   fh = urlopen ( URL )
>   lines = fh.read ()
>   fh.close()
>
>   Soup = BeautifulSoup ( lines )
>   hrefs = lines = Soup.findAll ( 'a' )
>
>   for href in hrefs :
>     href = href [ 'href' ] #[:-1]     ## <== remove "#" to generate all errors
>
>     if href.startswith ( '/' ) :
>       href = Site + href
>     elif href.startswith ('#' ) :
>       href = URL + href
>     elif href.startswith ( 'http' ) :
>       pass
>     else :
>       href = Current + href
>
>     try:
>       fh = urllib.urlopen ( href )
>     except :
>       pass
>     print Check_URL_1 ( href ), Check_URL_2 ( href ), href
>
> URL = 'http://127.0.0.1:8000/welcome/default/index'
> fh = Verify_Links ( URL )

Reply via email to