Hi all, I am also facing the same problem. I am stuck at the same place where my gwt app is not crawling even after applying all the code as described in https://developers.google.com/webmasters/ajax-crawling/. Please help me with some code or link
On Friday, March 30, 2012 10:01:56 PM UTC+5:30, erebrus wrote: > > Hi all, > I was reading https://developers.google.com/webmasters/ajax-crawling/ > on how to make ajax apps (consequently gwt apps) crawlable. > I took the code from google (summarized in point 3 of "How to create > an HTML snapshot?" to create a filter (that returns html from ajax > using HtmlUnit) and changed the web.xml accordingly. I created a new > GWT project with example code and applied the filter and the web.xml > there. It worked directly. > However, I did exactly the same on the gwt app I want to make > searchable and it doesn't work. For some reason, the only requests the > filter gets are the ones to the ones for the rpc. > I think I must be missing a terribly simple detail, but I'm a bit lost > on where to go from here. > > > Following you can see the code for the filter (CrawlServlet) and the > web.xml > > package crawltest.server; > > import com.gargoylesoftware.htmlunit.BrowserVersion; > import com.gargoylesoftware.htmlunit.WebClient; > import com.gargoylesoftware.htmlunit.html.HtmlPage; > > import java.io.IOException; > import java.io.PrintWriter; > import java.io.UnsupportedEncodingException; > import java.net.URLDecoder; > import java.util.logging.Logger; > > import javax.servlet.Filter; > import javax.servlet.FilterChain; > import javax.servlet.FilterConfig; > import javax.servlet.ServletException; > import javax.servlet.ServletRequest; > import javax.servlet.ServletResponse; > import javax.servlet.http.HttpServletRequest; > import javax.servlet.http.HttpServletResponse; > > /** > * Servlet that makes this application crawlable > */ > public final class CrawlServlet implements Filter { > > private static final Logger logger = > Logger.getLogger(CrawlServlet.class > .getName()); > private static String rewriteQueryString(String queryString) throws > UnsupportedEncodingException { > StringBuilder queryStringSb = new StringBuilder(queryString); > int i = queryStringSb.indexOf("&_escaped_fragment_"); > if (i != -1) { > StringBuilder tmpSb = new > StringBuilder(queryStringSb.substring(0, i)); > tmpSb.append("#!"); > tmpSb.append(URLDecoder.decode(queryStringSb.substring(i + 20, > queryStringSb.length()),"UTF-8")); > queryStringSb = tmpSb; > } > > i = queryStringSb.indexOf("_escaped_fragment_"); > if (i != -1) { > StringBuilder tmpSb = new > StringBuilder(queryStringSb.substring(0, i)); > tmpSb.append("#!"); > tmpSb.append(URLDecoder.decode(queryStringSb.substring(i + 19, > queryStringSb.length()), "UTF-8")); > queryStringSb = tmpSb; > } > if (queryStringSb.indexOf("#!") != 0) { > queryStringSb.insert(0, '?'); > } > queryString = queryStringSb.toString(); > > > > return queryString; > } > > private FilterConfig filterConfig = null; > > /** > * Destroys the filter configuration > */ > public void destroy() { > this.filterConfig = null; > } > > /** > * Filters all requests and invokes headless browser if necessary > */ > public void doFilter(ServletRequest request, ServletResponse > response, > FilterChain chain) throws IOException { > System.out.println("crawl"); > if (filterConfig == null) { > return; > } > System.out.println("crawl"); > HttpServletRequest req = (HttpServletRequest) request; > HttpServletResponse res = (HttpServletResponse) response; > String queryString = req.getQueryString(); > System.out.println("query:"+queryString); > System.out.println("param:"+req.getParameterMap().toString()); > System.out.println("req:"+req); > if ((queryString != null) && > (queryString.contains("_escaped_fragment_"))) { > System.out.println("in!!"); > StringBuilder pageNameSb = new StringBuilder("http://"); > pageNameSb.append(req.getServerName()); > if (req.getServerPort() != 0) { > pageNameSb.append(":"); > pageNameSb.append(req.getServerPort()); > } > pageNameSb.append(req.getRequestURI()); > queryString = rewriteQueryString(queryString); > pageNameSb.append(queryString); > > final WebClient webClient = new > WebClient(BrowserVersion.FIREFOX_3); > webClient.setJavaScriptEnabled(true); > String pageName = pageNameSb.toString(); > HtmlPage page = webClient.getPage(pageName); > webClient.waitForBackgroundJavaScriptStartingBefore(2000); > > res.setContentType("text/html;charset=UTF-8"); > PrintWriter out = res.getWriter(); > out.println("<hr>"); > out.println("<center><h3>You are viewing a non-interactive page > that is intended for the crawler. You probably want to see this page: > <a href=\"" > + pageName + "\">" + pageName + "</a></h3></center>"); > out.println("<hr>"); > > out.println(page.asXml()); > webClient.closeAllWindows(); > out.close(); > > } else { > try { > chain.doFilter(request, response); > } catch (ServletException e) { > e.printStackTrace(); > } > } > } > > /** > * Initializes the filter configuration > */ > public void init(FilterConfig filterConfig) { > this.filterConfig = filterConfig; > } > > } > > > web-xml: > > <?xml version="1.0" encoding="UTF-8"?> > <web-app xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" > xsi:schemaLocation="http://java.sun.com/xml/ns/javaee > http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd" > version="2.5" xmlns="http://java.sun.com/xml/ns/javaee"> > > <filter> > <filter-name>CrawlServlet</filter-name> > <filter-class>crawltest.server.CrawlServlet</filter-class> > </filter> > > <filter-mapping> > <filter-name>CrawlServlet</filter-name> > <url-pattern>/*</url-pattern> > </filter-mapping> > > <!-- Servlets --> > > <!-- Default page to serve --> > <welcome-file-list> > <welcome-file>CrawlTest.html</welcome-file> > </welcome-file-list> > > </web-app> > > > > -- You received this message because you are subscribed to the Google Groups "Google Web Toolkit" group. To unsubscribe from this group and stop receiving emails from it, send an email to google-web-toolkit+unsubscr...@googlegroups.com. To post to this group, send email to google-web-toolkit@googlegroups.com. Visit this group at http://groups.google.com/group/google-web-toolkit. For more options, visit https://groups.google.com/d/optout.