Hey, did a little cleanup so no ones stuck reading impossibly bad code ;) This also has the super-sucky way of integrating with the querying of the lucene indexies. The biggest problem is that right now it will only work on internally mapped Uri's (the uid:xxxxxxx ones) . So, in addition to the real merging of queries, uri mapping/lookup should be done too.
Since some people are far too lazy to use patches (or are just that cool ;) ) theres a bzr branch here: https://code.launchpad.net/~kkubasik/beagle/kkubasik-beagle On 9/28/07, Kevin Kubasik <[EMAIL PROTECTED]> wrote: > Hey, I was chatting with DBera last night at we got off on a random > little tangent, anyways, I remembered that I still hadn't shared any > of the code or my thoughts that had started to evolve as far as > supporting the idea of 'desktop tagging'. > > I figured I would attach a copy of the patch that allows you to see > the current ITagProvider (unfortunety this is the majorly dumbed down > interface as I tried to get it integrated, once we have this worked > into the query system, I'll flesh out the API, and make my simple > sample threadsafe etc.)sketchup, I need to abstract or make an > interface for the Tag class, but I got far too tired last night after > my battle with Lucene. > > DBera mentioned that the best place to implement this was probably > inside LuceneQueryDriver, since we are already merging 2 result sets > (the primary and secondary indexies) adding a third datasource > shouldn't be too hard, should it? > > Either way, I tried a couple of things, and I've got a fair idea of > how the process works, I'm just still getting hung up on the different > BitArrays. It seems that as they are the ones holding all the results > sets, to merge results from the tagging backend at the lower level, I > need to figure those out. The other option is always to just build > hits from the tagged Uri's and drop any duplicates, but I'm not sure > thats how the response works. > > Anyways, I'd love some feedback/help. This is just the core/super > simple implementation, once I figure out the results merging I'll add > back in the child tags, descriptions, etc. > -- > Cheers, > Kevin Kubasik > http://kubasik.net/blog > > -- Cheers, Kevin Kubasik http://kubasik.net/blog
=== added file 'Util/TagProvider.cs' --- Util/TagProvider.cs 1970-01-01 00:00:00 +0000 +++ Util/TagProvider.cs 2007-09-28 09:06:42 +0000 @@ -0,0 +1,183 @@ +// TagProvider.cs - An interface used to pull tags from a variety of +// sources. +// +// Copyright (C) 2007 Kevin Kubasik <[EMAIL PROTECTED]> +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to +// the following conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// + +using System; +using System.IO; +using System.Collections; +using System.Collections.Generic; + +using Mono.Data.SqliteClient; +//using ICSharpCode.SharpZipLib.GZip; + + +namespace Beagle.Util +{ + + public interface ITagProvider{ + ITag MakeNewTag(string s); + ITag GetTag(string s); + ITag[] SearchTags(string s); + ITag[] GetTagsForUri(string s); + } + public interface ITag{ + String GetFirstUri(); + String[] GetAllUri(); + void AddUri(string s); + void DeleteUri(string s); + + } + public class BeagleTag: ITag { + string name; + SqliteConnection connection = null; + public BeagleTag (){ + name = ""; + } + public BeagleTag(SqliteConnection conn){ + connection = conn; + name= ""; + } + public BeagleTag(SqliteConnection conn, string argname){ + connection = conn; + name = argname; + } + public String GetFirstUri(){ + SqliteCommand scomm = new SqliteCommand(String.Format("select * from tags where tag='{0}' order by uri limit 1;",name),connection); + + SqliteDataReader sdr = scomm.ExecuteReader(); + if(sdr.Read()) + return sdr.GetString(0); + return null; + } + public String[] GetAllUri(){ + List<string> l = new List<string>(); + SqliteCommand scomm = new SqliteCommand(String.Format("select * from tags where tag='{0}' order by uri;",name),connection); + SqliteDataReader sdr = scomm.ExecuteReader(); + + while(sdr.Read()){ + l.Add(sdr.GetString(0)); + } + return l.ToArray(); + } + public void AddUri(string s){ + SqliteCommand scomm = new SqliteCommand(String.Format("insert into tags values ('{0}','{1}') ;",s,name),connection); + int i = scomm.ExecuteNonQuery(); + + } + public void DeleteUri(string s){ + SqliteCommand scomm = new SqliteCommand(String.Format(" delete from tags where tag='{1}' and uri='{0}' ;",s,name),connection); + int i = scomm.ExecuteNonQuery(); + } + + } + + public class BeagleTagProvider : ITagProvider + { + string tag_file = null; + SqliteConnection connection = null; + + public BeagleTagProvider() + { + Init(); + } + public virtual Beagle.Util.ITag GetTag (string s) + { + return new BeagleTag(connection,s); + } + + public virtual Beagle.Util.ITag[] GetTagsForUri (string s) + { + List<ITag> l = new List<ITag>(); + SqliteCommand scomm = new SqliteCommand(String.Format("select * from tags where uri='{0}' order by tag;",s),connection); + SqliteDataReader sdr = scomm.ExecuteReader(); + + while(sdr.Read()){ + l.Add(new BeagleTag(connection,sdr.GetString(1))); + } + return l.ToArray(); + //return new Tag[10]; + } + + public virtual Beagle.Util.ITag MakeNewTag (string s) + { + + return new BeagleTag(connection,s); + } + + public virtual Beagle.Util.ITag[] SearchTags (string s) + { + List<ITag> l = new List<ITag>(); + SqliteCommand scomm = new SqliteCommand(String.Format("select * from tags where tag LIKE '%{0}%' order by tag;",s),connection); + SqliteDataReader sdr = scomm.ExecuteReader(); + + while(sdr.Read()){ + l.Add(new BeagleTag(connection,sdr.GetString(1))); + } + return l.ToArray(); + //return new Tag[10]; + } + + + private void Init(){ + tag_file = Path.Combine(PathFinder.StorageDir, "beagletags.db"); + if(!File.Exists(tag_file)){ + File.Create(tag_file); + MakeTables(); + }else { + try { + connection = Open (tag_file); + } catch (Exception e) { + Log.Debug (e, "Exception opening tags {0}", tag_file); + } + } + + } + + private SqliteConnection Open (string db_filename) + { + SqliteConnection connection = new SqliteConnection (); + connection.ConnectionString = "version=" + ExternalStringsHack.SqliteVersion + + ",encoding=UTF-8,URI=file:" + db_filename; + connection.Open (); + return connection; + } + private void MakeTables(){ + try { + connection = Open (tag_file); + } catch (Exception e) { + Log.Debug (e, "Exception opening tags {0}", tag_file); + } + SqliteCommand scomm = new SqliteCommand("CREATE TABLE tags ( uri STRING NOT NULL, tag STRING NOT NULL);",connection); + scomm.ExecuteNonQuery(); + } + + public static void Main(string[] args){ + BeagleTagProvider btp = new BeagleTagProvider(); + ITag t = btp.MakeNewTag("Tag"); + t.AddUri("testmoreuri"); + Console.WriteLine(t.GetFirstUri()); + Console.WriteLine("Ran"); + } + } +} === modified file 'Util/Makefile.am' --- Util/Makefile.am 2007-08-09 15:24:30 +0000 +++ Util/Makefile.am 2007-09-28 02:16:30 +0000 @@ -77,6 +77,7 @@ $(srcdir)/StringMatcher.cs \ $(srcdir)/SystemInformation.cs \ $(srcdir)/SystemPriorities.cs \ + $(srcdir)/TagProvider.cs \ $(srcdir)/TeeTextWriter.cs \ $(srcdir)/ThreadPond.cs \ $(srcdir)/Timeline.cs \ === modified file 'beagled/LuceneQueryingDriver.cs' --- beagled/LuceneQueryingDriver.cs 2007-08-05 16:10:39 +0000 +++ beagled/LuceneQueryingDriver.cs 2007-09-28 09:08:00 +0000 @@ -49,7 +49,7 @@ public class LuceneQueryingDriver : LuceneCommon { - static public bool Debug = false; + static public bool Debug = true; public delegate bool UriFilter (Uri uri); public delegate double RelevancyMultiplier (Hit hit); @@ -143,9 +143,10 @@ { if (Debug) Logger.Log.Debug ("###### {0}: Starting low-level queries", IndexName); - + Stopwatch total, a, b, c, d, e, f; - + //ITagProvider btp = new Beagle.Util.BeagleTagProvider(); + List<Uri> taggeduris = new List<Uri>(); total = new Stopwatch (); a = new Stopwatch (); b = new Stopwatch (); @@ -156,12 +157,14 @@ total.Start (); a.Start (); - + //Our tag provider, eventually will probably be dynamic in some sense. + ITagProvider tag_provider = new BeagleTagProvider(); // Assemble all of the parts into a bunch of Lucene queries ArrayList primary_required_part_queries = null; ArrayList secondary_required_part_queries = null; - + List<string> tagprovider_part_queries = null; + LNS.BooleanQuery primary_prohibited_part_query = null; LNS.BooleanQuery secondary_prohibited_part_query = null; @@ -185,17 +188,23 @@ if (primary_part_query == null) continue; - + switch (part.Logic) { case QueryPartLogic.Required: if (primary_required_part_queries == null) { primary_required_part_queries = new ArrayList (); secondary_required_part_queries = new ArrayList (); + tagprovider_part_queries = new List<string>(); } primary_required_part_queries.Add (primary_part_query); secondary_required_part_queries.Add (secondary_part_query); + if(part != null && part.GetType().Equals((new QueryPart_Property()).GetType())) + if(((Beagle.QueryPart_Property) part).Key =="beagle:tag") + tagprovider_part_queries.Add(((Beagle.QueryPart_Property) part).Value); + + if (part_hit_filter != null) all_hit_filters.Add (part_hit_filter); @@ -220,8 +229,26 @@ break; } + //Well also query to see if any of the search terms are tags + foreach(Term tempterm in term_list){ + tagprovider_part_queries.Add(tempterm.Text()); + Log.Debug("Adding {0} to the tag queries",tempterm.Text()); + } + //Actually build a list of tags, here we hit the tag provider + List<ITag> tags = new List<ITag>(); + foreach(string temps in tagprovider_part_queries){ + tags.AddRange(tag_provider.SearchTags(temps)); + Log.Debug("Searching {0}",temps); + Log.Debug("First Found: {0}",tag_provider.GetTag(temps).GetFirstUri()); + } + //Build a list of all the Uri's associated with any tags found in the + //query. + foreach(ITag t in tags) + foreach(string temps in t.GetAllUri()) + taggeduris.Add(UriFu.EscapedStringToUri(temps)); } - + + a.Stop (); if (Debug) Log.Debug ("###### {0}: Building queries took {1}", IndexName, a); @@ -265,19 +292,23 @@ c.Start (); // Possibly create our whitelists from the search subset. - + + LuceneBitArray primary_whitelist = null; LuceneBitArray secondary_whitelist = null; + + if (search_subset_uris != null && search_subset_uris.Count > 0) { primary_whitelist = new LuceneBitArray (primary_searcher); if (secondary_searcher != null) secondary_whitelist = new LuceneBitArray (secondary_searcher); - + foreach (Uri uri in search_subset_uris) { primary_whitelist.AddUri (uri); if (secondary_whitelist != null) secondary_whitelist.AddUri (uri); + } primary_whitelist.FlushUris (); if (secondary_whitelist != null) @@ -301,8 +332,10 @@ primary_blacklist.Join (secondary_blacklist); } } - + Lucene.Net.Search.Query qer = (Lucene.Net.Search.Query) primary_required_part_queries[primary_required_part_queries.Count-1]; + LuceneBitArray lba = new LuceneBitArray(primary_searcher,qer); + Log.Debug("Adding {0} to required queries",qer); // Combine our whitelist and blacklist into just a whitelist. if (primary_blacklist != null) { @@ -353,6 +386,8 @@ Logger.Log.Debug ("###### {0}: Low-level queries finished in {1}", IndexName, d); e.Start (); + + Log.Debug ("is primary matches null {0} does it contain a true {1}", primary_matches, primary_matches.ContainsTrue ()); // Only generate results if we got some matches if (primary_matches != null && primary_matches.ContainsTrue ()) { GenerateQueryResults (primary_reader, @@ -368,10 +403,15 @@ } e.Stop (); - + //This sucks and is expensive, it only works on internal uri's (uid:xxxxxx) + ICollection collOfTagHits = GetHitsForUris(taggeduris); + foreach(Hit h in collOfTagHits){ + //We should not be sending the same Uri twice. + result.Add(collOfTagHits); + } if (Debug) Log.Debug ("###### {0}: Query results generated in {1}", IndexName, e); - + // // Finally, we clean up after ourselves. // @@ -489,22 +529,25 @@ p_matches = new LuceneBitArray (primary_searcher); if (pq != null) { p_matches.Or (pq); - if (primary_whitelist != null) - p_matches.And (primary_whitelist); +// if (primary_whitelist != null) +// p_matches.And (primary_whitelist); } - + Log.Debug("Primary Query {0}",pq); + s_matches = new LuceneBitArray (secondary_searcher); if (sq != null) { s_matches.Or (sq); if (secondary_whitelist != null) s_matches.And (secondary_whitelist); } - + MatchInfo info; info = new MatchInfo (); info.PrimaryMatches = p_matches; info.SecondaryMatches = s_matches; info.RestrictBy (null); // a hack to initialize the UpperBound + Log.Debug("MatchInfo {0}",info.PrimaryMatches); + Log.Debug("Count {0}",p_matches.Count); match_info_list.Add (info); } @@ -628,7 +671,7 @@ e = new Stopwatch (); total.Start (); - + ArrayList final_list_of_hits = null; // This is used only for scoring @@ -878,6 +921,8 @@ uri = GetUriFromDocument (doc); if (! uri_filter (uri)) continue; + + Log.Debug("Heres a primary match Uri {0}",uri); } // Get the actual hit now === modified file 'beagled/PropertyKeywordFu.cs' --- beagled/PropertyKeywordFu.cs 2007-02-18 22:23:47 +0000 +++ beagled/PropertyKeywordFu.cs 2007-09-28 07:21:19 +0000 @@ -124,6 +124,7 @@ property_table.Add ("filetype", new PropertyDetail (PropertyType.Keyword, "beagle:FileType", "Type of content for HitType File")); + property_table.Add("tag",new PropertyDetail(PropertyType.Text, "beagle:tag", "Tag of file")); } public static void RegisterMapping (PropertyKeywordMapping mapping)
_______________________________________________ Dashboard-hackers mailing list Dashboard-hackers@gnome.org http://mail.gnome.org/mailman/listinfo/dashboard-hackers