TS generates a pretty bad sql query for this index definition:

  define_index do
    indexes :name
    indexes 'trim(trailing \'0123456789\' from
"polymerases"."name")', :as => :non_numeric_name
    indexes aliases.name, :as => :alias
    indexes organism.name, :as => :organism
    indexes historical_note
    indexes family.name, :as => :family
    indexes references.title, :as => :reference_title
    indexes references.abstract, :as => :abstract
    indexes references.authors, :as => :authors


    has family_id, organism_id, contact_id, created_at, updated_at,
creator_id
    has is_wild_type, :type => :boolean
    has is_stub, :type => :boolean
    has results.question(:id), :as => :question_id
    has results.question(:topic_id), :as => :topic_ids

    set_property :delta => :delayed
  end
which generates:
 SELECT "polymerases"."id" * 2 + 0 AS "id" , "polymerases"."name" AS
"name", trim(trailing '0123456789' from "polymerases"."name") AS
"non_numeric_name",
array_to_string(array_accum(COALESCE("polymerase_aliases"."name",
'0')), ' ') AS "alias", "organisms"."name" AS "organism",
"polymerases"."historical_note" AS "historical_note",
"families"."name" AS "family",
array_to_string(array_accum(COALESCE("references"."title", '0')), ' ')
AS "reference_title",
array_to_string(array_accum(COALESCE("references"."abstract", '0')), '
') AS "abstract",
array_to_string(array_accum(COALESCE("references"."authors", '0')), '
') AS "authors", "polymerases"."id" AS "sphinx_internal_id",
1939935772 AS "class_crc", '1939935772' AS "subclass_crcs", 0 AS
"sphinx_deleted", "polymerases"."family_id" AS "family_id",
"polymerases"."organism_id" AS "organism_id",
"polymerases"."contact_id" AS "contact_id", cast(extract(epoch from
"polymerases"."created_at") as int) AS "created_at",
cast(extract(epoch from "polymerases"."updated_at") as int) AS
"updated_at", "polymerases"."creator_id" AS "creator_id",
"polymerases"."is_wild_type" AS "is_wild_type",
"polymerases"."is_stub" AS "is_stub",
array_to_string(array_accum(COALESCE("questions"."topic_id", '0')),
',') AS "topic_ids" FROM "polymerases"    LEFT OUTER JOIN
"polymerase_aliases" ON "polymerase_aliases"."polymerase_id" =
"polymerases".id   LEFT OUTER JOIN "organisms" ON "organisms".id =
"polymerases".organism_id   LEFT OUTER JOIN "families" ON
"families".id = "polymerases".family_id   LEFT OUTER JOIN
"polymerase_references" ON ("polymerases"."id" =
"polymerase_references"."polymerase_id")  LEFT OUTER JOIN "references"
ON ("references"."id" = "polymerase_references"."reference_id")   LEFT
OUTER JOIN "results" ON "results"."polymerase_id" = "polymerases".id
LEFT OUTER JOIN "questions" ON "questions".id = "results".question_id
WHERE "polymerases"."id" >= $start AND "polymerases"."id" <= $end AND
"polymerases"."delta" = TRUE GROUP BY "polymerases"."id",
"polymerases"."name", "organisms"."name",
"polymerases"."historical_note", "families"."name",
"polymerases"."id", "polymerases"."family_id",
"polymerases"."organism_id", "polymerases"."contact_id",
"polymerases"."created_at", "polymerases"."updated_at",
"polymerases"."creator_id", "polymerases"."is_wild_type",
"polymerases"."is_stub"

The problem is that the group_by explodes when i add the last 2 lines
from the index config - postgres need 200M of work_mem RAM to do it
efficiently.
Why do i need the group by?  would distinct not do it for me?

Is there some ts sanctioned way to tweak the config - or do i just do
it manually?

Any gotchas I should look out for?

Brad

-- 
You received this message because you are subscribed to the Google Groups 
"Thinking Sphinx" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to 
[email protected].
For more options, visit this group at 
http://groups.google.com/group/thinking-sphinx?hl=en.

Reply via email to