Hello, my partner and me are working with the goal of improve the GEQO's
performance, we tried with Ant Colony Optimization, but it does not
improve, actually we are trying with a new variant of Genetic Algorithm,
specifically Micro-GA. This algorithm finds a better solution than GEQO
in less time, however the total query execution time is higher. The
fitness is calculated by geqo_eval function. Does anybody know why this
happens?
We attach the patch made with the changes in postgresql-9.2.0.
Regards.
diff --git a/contrib/Makefile b/contrib/Makefile
index d230451..df9ccef 100755
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -26,6 +26,7 @@ SUBDIRS = \
isn \
lo \
ltree \
+ microg \
oid2name \
pageinspect \
passwordcheck \
diff --git a/contrib/microg/Makefile b/contrib/microg/Makefile
new file mode 100644
index 0000000..7597ede
--- /dev/null
+++ b/contrib/microg/Makefile
@@ -0,0 +1,25 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+# Makefile for the genetic query optimizer module
+#
+# Copyright (c) 2015, Universidad Central Marta Abreu de Las Villas
+#
+# contrib/microg/Makefile
+#
+#-------------------------------------------------------------------------
+
+MODULE_big = microg
+OBJS = microg_main.o
+
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/microg
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/microg/microg_main.c b/contrib/microg/microg_main.c
new file mode 100644
index 0000000..0bd896d
--- /dev/null
+++ b/contrib/microg/microg_main.c
@@ -0,0 +1,451 @@
+/*------------------------------------------------------------------------
+ *
+ * microg_main.c
+ * solution to the query optimization problem
+ * by means of a Micro Genetic Algorithm (micro-GA)
+ *
+ * Portions Copyright (c) 2014-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 2015, Universidad Central de Las Villas
+ *
+ * contrib/microg_main.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* contributed by:
+ =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
+ * Martin Utesch * Institute of Automatic Control *
+ = = University of Mining and Technology =
+ * ute...@aut.tu-freiberg.de * Freiberg, Germany *
+ =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
+* Laura Perez Triana
+* Centro de Estudios Informaticos
+*== Universidad Central de Las Villas =* ltri...@uclv.cu *Villa Clara, Cuba
+ =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
+* Alejandro G. Gomez Boix
+* Centro de Estudios Informaticos
+*== Universidad Central de Las Villas =* b...@uclv.cu *Villa Clara, Cuba
+ */
+
+/* -- parts of this are adapted from D. Whitley's Genitor algorithm -- */
+
+#include "postgres.h"
+
+#include <math.h>
+
+#include "optimizer/geqo_misc.h"
+#include "optimizer/geqo_mutation.h"
+#include "optimizer/geqo_pool.h"
+#include "optimizer/geqo_random.h"
+#include "optimizer/geqo_selection.h"
+#include "optimizer/geqo_gene.h"
+#include "optimizer/paths.h"
+#include <sys/timeb.h>
+#include "fmgr.h"
+
+PG_MODULE_MAGIC
+;
+
+/*
+ * Configuration options
+ */
+int Geqo_effort;
+int Geqo_pool_size;
+int Geqo_generations;
+double Geqo_selection_bias;
+double Geqo_seed;
+
+join_search_hook_type join_search_hook = NULL;
+
+void _PG_init(void);
+void _PG_fini(void);
+
+
+/* new functions of microg */
+void random_init_poolMG(PlannerInfo *root, Pool *pool);
+int meanCommonEdgeInPool(Chromosome *pool, int pool_size, int number_of_rels);
+int existEdge(Gene a, Gene b, Gene* s2, int lenght);
+int commonEdge(Gene* s1, Gene* s2, int number_of_rels);
+Chromosome * localSerch2_opt(PlannerInfo *root, Gene* solution, int sizeProblem);
+RelOptInfo *microg(PlannerInfo *root, int number_of_rels, List *initial_rels);
+
+
+/* define edge recombination crossover [ERX] per default */
+#if !defined(ERX) && \
+ !defined(PMX) && \
+ !defined(CX) && \
+ !defined(PX) && \
+ !defined(OX1) && \
+ !defined(OX2)
+#define ERX
+#endif
+
+/*
+ * microg
+ * solution of the query optimization problem
+ * similar to a constrained Traveling Salesman Problem (TSP)
+ */
+
+RelOptInfo *
+microg(PlannerInfo *root, int number_of_rels, List *initial_rels) {
+ GeqoPrivateData
+private;
+ int generation;
+ Chromosome *momma;
+ Chromosome *daddy;
+ Chromosome *kid, *result;
+ Pool *pool;
+ int pool_size, number_generations;
+
+ struct timeb seed;
+
+#ifdef GEQO_DEBUG
+ int status_interval;
+#endif
+
+ RelOptInfo *best_rel;
+
+#if defined(ERX)
+ Edge *edge_table; /* list of edges */
+ int edge_failures = 0;
+#endif
+#if defined(CX) || defined(PX) || defined(OX1) || defined(OX2)
+ City *city_table; /* list of cities */
+#endif
+#if defined(CX)
+ int cycle_diffs = 0;
+ int mutations = 0;
+#endif
+
+ /* set up private information */
+ root->join_search_private = (void *) &private;
+private.initial_rels = initial_rels;
+
+ /*No using Geqo_seed, the new seed is initialized with ftime function*/
+ ftime(&seed);
+ geqo_set_seed(root, seed.millitm);
+
+ /* set GA parameters */
+ pool_size = 5;
+ number_generations = 250;
+#ifdef GEQO_DEBUG
+ status_interval = 10;
+#endif
+
+ /* allocate genetic pool memory */
+ pool = alloc_pool(root, pool_size, number_of_rels);
+
+ //inicializando aleatorio solo esta primera vez al primer elemento de la pobleacion
+#ifdef GEQO_DEBUG
+ elog(DEBUG1, "GEQO selected %d pool entries, best %.2f, worst %.2f",
+ pool_size,
+#endif
+
+ /* allocate chromosome momma and daddy memory */
+ momma = alloc_chromo(root, pool->string_length);
+ daddy = alloc_chromo(root, pool->string_length);
+
+#if defined (ERX)
+#ifdef GEQO_DEBUG
+ elog(DEBUG2, "using edge recombination crossover [ERX]");
+#endif
+ /* allocate edge table memory */
+ edge_table = alloc_edge_table(root, pool->string_length);
+#elif defined(PMX)
+#ifdef GEQO_DEBUG
+ elog(DEBUG2, "using partially matched crossover [PMX]");
+#endif
+ /* allocate chromosome kid memory */
+ kid = alloc_chromo(root, pool->string_length);
+#elif defined(CX)
+#ifdef GEQO_DEBUG
+ elog(DEBUG2, "using cycle crossover [CX]");
+#endif
+ /* allocate city table memory */
+ kid = alloc_chromo(root, pool->string_length);
+ city_table = alloc_city_table(root, pool->string_length);
+#elif defined(PX)
+#ifdef GEQO_DEBUG
+ elog(DEBUG2, "using position crossover [PX]");
+#endif
+ /* allocate city table memory */
+ kid = alloc_chromo(root, pool->string_length);
+ city_table = alloc_city_table(root, pool->string_length);
+#elif defined(OX1)
+#ifdef GEQO_DEBUG
+ elog(DEBUG2, "using order crossover [OX1]");
+#endif
+ /* allocate city table memory */
+ kid = alloc_chromo(root, pool->string_length);
+ city_table = alloc_city_table(root, pool->string_length);
+#elif defined(OX2)
+#ifdef GEQO_DEBUG
+ elog(DEBUG2, "using order crossover [OX2]");
+#endif
+ /* allocate city table memory */
+ kid = alloc_chromo(root, pool->string_length);
+ city_table = alloc_city_table(root, pool->string_length);
+#endif
+
+ init_tour(root, pool->data[0].string, pool->string_length);
+ pool->data[0].worth = geqo_eval(root, pool->data[0].string,
+ pool->string_length);
+ for (generation = 0; generation < number_generations; generation++) {
+
+ ftime(&seed);
+ geqo_set_seed(root, seed.millitm);
+ /* random initialization of the pool */
+ random_init_poolMG(root, pool);
+ /* sort the pool according to cheapest path as fitness */
+ sort_pool(root, pool); /* we have to do it only one time, since all
+ * kids replace the worst individuals in
+ * future (-> geqo_pool.c:spread_chromo ) */
+
+ do {
+ geqo_selection(root, momma, daddy, pool, Geqo_selection_bias);
+
+#if defined (ERX)
+ /* EDGE RECOMBINATION CROSSOVER */
+ gimme_edge_table(root, momma->string, daddy->string,
+ pool->string_length, edge_table);
+
+ kid = momma;
+
+ /* are there any edge failures ? */
+ edge_failures += gimme_tour(root, edge_table, kid->string,
+ pool->string_length);
+#elif defined(PMX)
+ /* PARTIALLY MATCHED CROSSOVER */
+ pmx(root, momma->string, daddy->string, kid->string, pool->string_length);
+#elif defined(CX)
+ /* CYCLE CROSSOVER */
+ cycle_diffs = cx(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
+ /* mutate the child */
+ if (cycle_diffs == 0)
+ {
+ mutations++;
+ geqo_mutation(root, kid->string, pool->string_length);
+ }
+#elif defined(PX)
+ /* POSITION CROSSOVER */
+ px(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
+#elif defined(OX1)
+ /* ORDER CROSSOVER */
+ ox1(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
+#elif defined(OX2)
+ /* ORDER CROSSOVER */
+ ox2(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
+#endif
+
+ /* EVALUATE FITNESS */
+ kid->worth = geqo_eval(root, kid->string, pool->string_length);
+
+ /* push the kid into the wilderness of life according to its worth */
+ spread_chromo(root, kid, pool);
+
+#ifdef GEQO_DEBUG
+ if (status_interval && !(generation % status_interval))
+ print_gen(stdout, pool, generation);
+#endif
+
+ } while (meanCommonEdgeInPool(pool->data, pool_size,
+ number_of_rels) < (number_of_rels - 1) / 4);
+
+ }
+
+#if defined(ERX) && defined(GEQO_DEBUG)
+ if (edge_failures != 0)
+ elog(LOG, "[GEQO] failures: %d, average: %d",
+ edge_failures, (int) number_generations / edge_failures);
+ else
+ elog(LOG, "[GEQO] no edge failures detected");
+#endif
+
+#if defined(CX) && defined(GEQO_DEBUG)
+ if (mutations != 0)
+ elog(LOG, "[GEQO] mutations: %d, generations: %d",
+ mutations, number_generations);
+ else
+ elog(LOG, "[GEQO] no mutations processed");
+#endif
+
+#ifdef GEQO_DEBUG
+ print_pool(stdout, pool, 0, pool_size - 1);
+#endif
+
+#ifdef GEQO_DEBUG
+ elog(DEBUG1, "GEQO best is %.2f after %d generations",
+ pool->data[0].worth, number_generations);
+#endif
+
+ /*
+ * got the cheapest query tree processed by geqo; first element of the
+ * population indicates the best query tree
+ */
+
+ result = localSerch2_opt(root, (Gene *) pool->data[0].string,
+ number_of_rels);
+
+ best_rel = gimme_tree(root, result->string, pool->string_length);
+
+ /* DBG: show the query plan */
+#ifdef NOT_USED
+ print_plan(best_plan, root);
+#endif
+
+ /* ... free memory stuff */
+ free_chromo(root, momma);
+ free_chromo(root, daddy);
+
+#if defined (ERX)
+ free_edge_table(root, edge_table);
+#elif defined(PMX)
+ free_chromo(root, kid);
+#elif defined(CX)
+ free_chromo(root, kid);
+ free_city_table(root, city_table);
+#elif defined(PX)
+ free_chromo(root, kid);
+ free_city_table(root, city_table);
+#elif defined(OX1)
+ free_chromo(root, kid);
+ free_city_table(root, city_table);
+#elif defined(OX2)
+ free_chromo(root, kid);
+ free_city_table(root, city_table);
+#endif
+
+ free_pool(root, pool);
+
+ /* ... clear root pointer to our private storage */
+ root->join_search_private = NULL;
+
+ return best_rel;
+}
+
+
+void _PG_init(void) {
+
+ join_search_hook = microg;
+}
+void _PG_fini(void) {
+
+ join_search_hook = NULL;
+}
+
+void random_init_poolMG(PlannerInfo *root, Pool *pool) {
+ Chromosome *chromo = (Chromosome *) pool->data;
+ int i;
+
+ /* new micro-GA pool with best individual
+ * from last generation */
+ for (i = 1; i < pool->size; i++) {
+ init_tour(root, chromo[i].string, pool->string_length);
+ pool->data[i].worth = geqo_eval(root, chromo[i].string,
+ pool->string_length);
+ }
+
+}
+
+/* computing mean of common edge between all individuals */
+int meanCommonEdgeInPool(Chromosome *pool, int pool_size,
+ int number_of_rels) {
+
+ int result = 0;
+ int sum = 0, count = 0;
+ int i, j;
+
+ for (i = pool_size - 1; i >= 0; i--) {
+ for (j = i - 1; j >= 0; j--) {
+ sum += commonEdge((Gene *) pool[i].string,
+ (Gene *) pool[j].string, number_of_rels);
+ count++;
+ }
+ }
+ result = sum / count;
+ return result;
+}
+
+/* computing amount of common edges between two individuals */
+int commonEdge(Gene* s1, Gene* s2, int number_of_rels) {
+
+ int countEdge = 0;
+ int i;
+ if (existEdge(s1[0], s1[1], s2, number_of_rels) == 1)
+ countEdge++;
+ for (i = 1; i < number_of_rels - 1; i++) {
+ if (existEdge(s1[i], s1[i + 1], s2, number_of_rels) == 1)
+ countEdge++;
+ }
+ return countEdge;
+}
+
+int existEdge(Gene a, Gene b, Gene* s2, int lenght) {
+ int i;
+ if (s2[0] == a || s2[lenght - 1] == a) {
+ if ((s2[0] == a && s2[1] == b)
+ || (s2[lenght - 1] == a && s2[lenght - 2] == b))
+ return 1;
+ else
+ return 0;
+ } else {
+ for (i = 1; i < lenght - 1; i++) {
+ if ((s2[i] == a && s2[i + 1] == b)
+ || (s2[i] == a && s2[i - 1] == b)) {
+ return 1;
+ }
+ }
+ return 0;
+ }
+}
+
+
+/* local search algorithm to try to improve the quality of the solution */
+Chromosome * localSerch2_opt(PlannerInfo *root, Gene* solution,
+ int sizeProblem) {
+
+ Chromosome *result;
+ Gene *aux, *aux1;
+ double costAux, costAux1;
+ int flag = 1, i, j, h;
+ Gene temp;
+
+ result = alloc_chromo(root, sizeProblem);
+
+ aux = malloc(sizeof(Gene) * sizeProblem);
+ aux1 = malloc(sizeof(Gene) * sizeProblem);
+
+ for (i = 0; i < sizeProblem; i++) {
+ aux1[i] = solution[i];
+ aux[i] = solution[i];
+ }
+ costAux = geqo_eval(root, aux, sizeProblem);
+ costAux1 = costAux;
+ while (flag == 1) {
+ flag = 0;
+ for (i = 0; i < sizeProblem; i++) {
+ j = i + 2;
+ while (j != i) {
+ for (h = 0; h < sizeProblem; h++) {
+ aux1[h] = aux[h];
+ }
+ temp = aux1[(i + 1) % sizeProblem];
+ aux1[(i + 1) % sizeProblem] = aux1[j % sizeProblem];
+ aux1[j % sizeProblem] = temp;
+
+ costAux1 = geqo_eval(root, aux1, sizeProblem);
+ if (costAux > costAux1) {
+ aux = aux1;
+ costAux = costAux1;
+ flag = 1;
+ }
+ j = (j + 1) % sizeProblem;
+ }
+ }
+ }
+
+ result->string = aux;
+ result->worth = costAux;
+ return result;
+}
+
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers