Friendly greetings ! i'm on the same problem since many days (an hour per day) and i can't find a solution i have 2 index (see source doe below) No problem with the "parsed" index, but the "url" index never return any result. I don't if it's because the url isn't indexed or because the query on the index is wrong. Or something else ?
Could you please take a look and see what's wrong ? thank you (you can try to run the script, it works) require 'nokogiri' require 'open-uri' require 'neography' #init neography @neo = Neography::Rest.new neo_root = @neo.get_root domaine = 'http://www.over-blog.com/' parsed_idx = "ob_parsed_idx" url_idx = "ob_url_idx" #FIRST RUN #ob_root_node = @neo.create_node("domaine" => domaine, "parsed" => "false", "url" => domaine) #@neo.create_relationship("obgraph", neo_root, ob_root_node) #pidx = @neo.create_node_index(parsed_idx) #uidx = @neo.create_node_index(url_idx) #@neo.add_node_to_index(parsed_idx, "parsed", "false", ob_root_node) ##@neo.add_node_to_index(url_idx, "url", domaine, ob_root_node) #node_to_parse = @neo.get_node_index(parsed_idx, "parsed", "false") ob_root_node = @neo.traverse(neo_root, "nodes", { "relationships" => [{"type"=> "obgraph", "direction" => "out" }], "depth" => 1}) #node_to_parse = @neo.traverse(ob_root_node, "nodes", { "relationships" => [{"type"=> "link", "direction" => "out" }] }) node_to_parse = @neo.get_node_index(parsed_idx, "parsed", "false") #print @neo.list_node_indexes node_to_parse.each do |node| url_to_parse = @neo.get_node_properties(node)["url"] printf("exploring : %s\n", url_to_parse) doc = Nokogiri::HTML(open(url_to_parse)) @neo.set_node_properties(node, {"parsed" => "true"}) @neo.remove_node_from_index(parsed_idx, node) @neo.add_node_to_index(parsed_idx, "parsed", "true", node) doc.xpath('//a').each do |link| link_text = link.content.strip() link_url = link['href'].to_s().strip() link_title = link['title'].to_s().strip() link_url = link_url.sub(/#.*$/, "") if(link_url =~ /^\/.*/) link_url = link_url.sub(/^\//, '') link_url = domaine + link_url end if(link_text == '') link_text = link_title end #skiping empty stuff next if link_url.empty? next if link_text.empty? node_found = @neo.find_node_index(url_idx, "url", link_url) #node_found = @neo.traverse(ob_root_node, "nodes", { "relationships" => [{"direction" => "out" }], "prune evaluator" => {"language" => "javascript", "body" => "position.endNode().getProperty(url) == #{link_url};"}, "return filter" => {"language" => "builtin", "name" => "all but start node"}}) print "\nsearching url #{link_url}\n" printf("node_found : %s \n", node_found) if(node_found.nil?) printf("create node %s\n", link_url) nnode = @neo.create_node("parsed" => "false", "url" => link_url) @neo.add_node_to_index(url_idx, "url", link_url, nnode) @neo.add_node_to_index(parsed_idx, "parsed", "false", nnode) else printf("node_found : %s \n", node_found) end nrel = @neo.create_relationship("link", node, nnode) @neo.set_relationship_properties(nrel, {"text" => link_text}) #printf("%s => %s\n", link_text, link_url) end sleep(1.0) end -- Laurent "ker2x" Laborde Sysadmin & DBA at http://www.over-blog.com/ _______________________________________________ Neo4j mailing list User@lists.neo4j.org https://lists.neo4j.org/mailman/listinfo/user