TS generates a pretty bad sql query for this index definition:
define_index do
indexes :name
indexes 'trim(trailing \'0123456789\' from
"polymerases"."name")', :as => :non_numeric_name
indexes aliases.name, :as => :alias
indexes organism.name, :as => :organism
indexes historical_note
indexes family.name, :as => :family
indexes references.title, :as => :reference_title
indexes references.abstract, :as => :abstract
indexes references.authors, :as => :authors
has family_id, organism_id, contact_id, created_at, updated_at,
creator_id
has is_wild_type, :type => :boolean
has is_stub, :type => :boolean
has results.question(:id), :as => :question_id
has results.question(:topic_id), :as => :topic_ids
set_property :delta => :delayed
end
which generates:
SELECT "polymerases"."id" * 2 + 0 AS "id" , "polymerases"."name" AS
"name", trim(trailing '0123456789' from "polymerases"."name") AS
"non_numeric_name",
array_to_string(array_accum(COALESCE("polymerase_aliases"."name",
'0')), ' ') AS "alias", "organisms"."name" AS "organism",
"polymerases"."historical_note" AS "historical_note",
"families"."name" AS "family",
array_to_string(array_accum(COALESCE("references"."title", '0')), ' ')
AS "reference_title",
array_to_string(array_accum(COALESCE("references"."abstract", '0')), '
') AS "abstract",
array_to_string(array_accum(COALESCE("references"."authors", '0')), '
') AS "authors", "polymerases"."id" AS "sphinx_internal_id",
1939935772 AS "class_crc", '1939935772' AS "subclass_crcs", 0 AS
"sphinx_deleted", "polymerases"."family_id" AS "family_id",
"polymerases"."organism_id" AS "organism_id",
"polymerases"."contact_id" AS "contact_id", cast(extract(epoch from
"polymerases"."created_at") as int) AS "created_at",
cast(extract(epoch from "polymerases"."updated_at") as int) AS
"updated_at", "polymerases"."creator_id" AS "creator_id",
"polymerases"."is_wild_type" AS "is_wild_type",
"polymerases"."is_stub" AS "is_stub",
array_to_string(array_accum(COALESCE("questions"."topic_id", '0')),
',') AS "topic_ids" FROM "polymerases" LEFT OUTER JOIN
"polymerase_aliases" ON "polymerase_aliases"."polymerase_id" =
"polymerases".id LEFT OUTER JOIN "organisms" ON "organisms".id =
"polymerases".organism_id LEFT OUTER JOIN "families" ON
"families".id = "polymerases".family_id LEFT OUTER JOIN
"polymerase_references" ON ("polymerases"."id" =
"polymerase_references"."polymerase_id") LEFT OUTER JOIN "references"
ON ("references"."id" = "polymerase_references"."reference_id") LEFT
OUTER JOIN "results" ON "results"."polymerase_id" = "polymerases".id
LEFT OUTER JOIN "questions" ON "questions".id = "results".question_id
WHERE "polymerases"."id" >= $start AND "polymerases"."id" <= $end AND
"polymerases"."delta" = TRUE GROUP BY "polymerases"."id",
"polymerases"."name", "organisms"."name",
"polymerases"."historical_note", "families"."name",
"polymerases"."id", "polymerases"."family_id",
"polymerases"."organism_id", "polymerases"."contact_id",
"polymerases"."created_at", "polymerases"."updated_at",
"polymerases"."creator_id", "polymerases"."is_wild_type",
"polymerases"."is_stub"
The problem is that the group_by explodes when i add the last 2 lines
from the index config - postgres need 200M of work_mem RAM to do it
efficiently.
Why do i need the group by? would distinct not do it for me?
Is there some ts sanctioned way to tweak the config - or do i just do
it manually?
Any gotchas I should look out for?
Brad
--
You received this message because you are subscribed to the Google Groups
"Thinking Sphinx" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
For more options, visit this group at
http://groups.google.com/group/thinking-sphinx?hl=en.