PostgreSQL 源码解读(72)- 查询语句#57(make_one_rel函数#22-...
本节大体介绍了遗传算法(geqo函数)的实现,在参与连接的关系大于等于12(默认值)个时,PG使用遗传算法生成连接访问路径,构建最终的连接关系。
遗传算法简介
遗传算法是借鉴生物科学而产生的搜索算法,在这个算法中会用到一些生物科学的相关知识,下面是PG遗传算法中所使用的的一些术语:
1、染色体(Chromosome):染色体又可称为基因型个体(individuals),一个染色体可以视为一个解(一个合法的连接访问路径)。
2、种群(Pool):一定数量的个体(染色体)组成了群体(pool/population),群体中个体的数量叫做群体大小(population size)。
3、基因(Gene):基因是染色体中的元素,用于表示个体的特征。例如有一个串(即染色体)S=1011,则其中的1,0,1,1这4个元素分别称为基因。在PG中,基因是参与连接的关系。
4、适应度(Fitness):各个个体对环境的适应程度叫做适应度(fitness)。为了体现染色体的适应能力,引入了对问题中的每一个染色体都能进行度量的函数,叫适应度函数。这个函数通常会被用来计算个体在群体中被使用的概率。在PG中适应度是连接访问路径的总成本。
一、数据结构
typedef struct
{
List *initial_rels;
unsigned short random_state[3];
} GeqoPrivateData;
typedef int Gene;//基因(整型)
typedef struct Chromosome//染色体
{
Gene *string;//基因
Cost worth;//成本
} Chromosome;
typedef struct Pool//种群
{
Chromosome *data;//染色体数组
int size;//大小
int string_length;//长度
} Pool;
typedef struct
{
RelOptInfo *joinrel;
int size;
} Clump;
二、源码解读
geqo函数实现了遗传算法,构建多表(≥12)的连接访问路径。
//----------------------------------------------------------------------- geqo
RelOptInfo *
geqo(PlannerInfo *root, int number_of_rels, List *initial_rels)
{
GeqoPrivateData private;//遗传算法私有的数据,包括参与连接的关系和随机数
int generation;
Chromosome *momma;//染色体-母亲数组
Chromosome *daddy;//染色体-父亲数组
Chromosome *kid;//染色体-孩子数组
Pool *pool;//种群指针
int pool_size,//种群大小
number_generations;//进化代数,使用最大迭代次数(进化代数)作为停止准则
#ifdef GEQO_DEBUG
int status_interval;
#endif
Gene *best_tour;
RelOptInfo *best_rel;//最优解
#if defined(ERX)
Edge *edge_table;
int edge_failures = 0;
#endif
#if defined(CX) || defined(PX) || defined(OX1) || defined(OX2)
City *city_table;
#endif
#if defined(CX)
int cycle_diffs = 0;
int mutations = 0;
#endif
root->join_search_private = (void *) &private;
private.initial_rels = initial_rels;
geqo_set_seed(root, Geqo_seed);
pool_size = gimme_pool_size(number_of_rels);//种群大小
number_generations = gimme_number_generations(pool_size);//迭代次数
#ifdef GEQO_DEBUG
status_interval = 10;
#endif
pool = alloc_pool(root, pool_size, number_of_rels);
random_init_pool(root, pool);
sort_pool(root, pool);
#ifdef GEQO_DEBUG
elog(DEBUG1, "GEQO selected %d pool entries, best %.2f, worst %.2f",
pool_size,
pool->data[0].worth,
pool->data[pool_size - 1].worth);
#endif
momma = alloc_chromo(root, pool->string_length);
daddy = alloc_chromo(root, pool->string_length);
#if defined (ERX)
#ifdef GEQO_DEBUG
elog(DEBUG2, "using edge recombination crossover [ERX]");
#endif
//申请边界表内存
edge_table = alloc_edge_table(root, pool->string_length);
#elif defined(PMX)
#ifdef GEQO_DEBUG
elog(DEBUG2, "using partially matched crossover [PMX]");
#endif
kid = alloc_chromo(root, pool->string_length);
#elif defined(CX)
#ifdef GEQO_DEBUG
elog(DEBUG2, "using cycle crossover [CX]");
#endif
kid = alloc_chromo(root, pool->string_length);
city_table = alloc_city_table(root, pool->string_length);
#elif defined(PX)
#ifdef GEQO_DEBUG
elog(DEBUG2, "using position crossover [PX]");
#endif
kid = alloc_chromo(root, pool->string_length);//申请内存
city_table = alloc_city_table(root, pool->string_length);
#elif defined(OX1)
#ifdef GEQO_DEBUG
elog(DEBUG2, "using order crossover [OX1]");
#endif
kid = alloc_chromo(root, pool->string_length);
city_table = alloc_city_table(root, pool->string_length);
#elif defined(OX2)
#ifdef GEQO_DEBUG
elog(DEBUG2, "using order crossover [OX2]");
#endif
kid = alloc_chromo(root, pool->string_length);
city_table = alloc_city_table(root, pool->string_length);
#endif
for (generation = 0; generation < number_generations; generation++)//开始迭代
{
//选择:利用线性偏差(bias)函数,从中选出momma&daddy
geqo_selection(root, momma, daddy, pool, Geqo_selection_bias);
#if defined (ERX)
//交叉遗传
gimme_edge_table(root, momma->string, daddy->string, pool->string_length, edge_table);
kid = momma;
//遍历边界
edge_failures += gimme_tour(root, edge_table, kid->string, pool->string_length);
#elif defined(PMX)
pmx(root, momma->string, daddy->string, kid->string, pool->string_length);
#elif defined(CX)
cycle_diffs = cx(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
if (cycle_diffs == 0)
{
mutations++;
geqo_mutation(root, kid->string, pool->string_length);
}
#elif defined(PX)
px(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
#elif defined(OX1)
ox1(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
#elif defined(OX2)
ox2(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
#endif
//计算适应度
kid->worth = geqo_eval(root, kid->string, pool->string_length);
//把遗传产生的染色体放到野外以进行下一轮的进化
spread_chromo(root, kid, pool);
#ifdef GEQO_DEBUG
if (status_interval && !(generation % status_interval))
print_gen(stdout, pool, generation);
#endif
}
#if defined(ERX) && defined(GEQO_DEBUG)
if (edge_failures != 0)
elog(LOG, "[GEQO] failures: %d, average: %d",
edge_failures, (int) number_generations / edge_failures);
else
elog(LOG, "[GEQO] no edge failures detected");
#endif
#if defined(CX) && defined(GEQO_DEBUG)
if (mutations != 0)
elog(LOG, "[GEQO] mutations: %d, generations: %d",
mutations, number_generations);
else
elog(LOG, "[GEQO] no mutations processed");
#endif
#ifdef GEQO_DEBUG
print_pool(stdout, pool, 0, pool_size - 1);
#endif
#ifdef GEQO_DEBUG
elog(DEBUG1, "GEQO best is %.2f after %d generations",
pool->data[0].worth, number_generations);
#endif
best_tour = (Gene *) pool->data[0].string;
best_rel = gimme_tree(root, best_tour, pool->string_length);
if (best_rel == NULL)
elog(ERROR, "geqo failed to make a valid plan");
#ifdef NOT_USED
print_plan(best_plan, root);
#endif
free_chromo(root, momma);
free_chromo(root, daddy);
#if defined (ERX)
free_edge_table(root, edge_table);
#elif defined(PMX)
free_chromo(root, kid);
#elif defined(CX)
free_chromo(root, kid);
free_city_table(root, city_table);
#elif defined(PX)
free_chromo(root, kid);
free_city_table(root, city_table);
#elif defined(OX1)
free_chromo(root, kid);
free_city_table(root, city_table);
#elif defined(OX2)
free_chromo(root, kid);
free_city_table(root, city_table);
#endif
free_pool(root, pool);
root->join_search_private = NULL;
return best_rel;
}
//--------------------------------------------------------------------------- geqo_pool.c
static int compare(const void *arg1, const void *arg2);
Pool *
alloc_pool(PlannerInfo *root, int pool_size, int string_length)
{
Pool *new_pool;
Chromosome *chromo;
int i;
new_pool = (Pool *) palloc(sizeof(Pool));
new_pool->size = (int) pool_size;
new_pool->string_length = (int) string_length;
new_pool->data = (Chromosome *) palloc(pool_size * sizeof(Chromosome));
chromo = (Chromosome *) new_pool->data;
for (i = 0; i < pool_size; i++)
chromo[i].string = palloc((string_length + 1) * sizeof(Gene));
return new_pool;
}
void
free_pool(PlannerInfo *root, Pool *pool)
{
Chromosome *chromo;
int i;
chromo = (Chromosome *) pool->data;
for (i = 0; i < pool->size; i++)
pfree(chromo[i].string);
pfree(pool->data);
pfree(pool);
}
void
random_init_pool(PlannerInfo *root, Pool *pool)
{
Chromosome *chromo = (Chromosome *) pool->data;
int i;
int bad = 0;
i = 0;
while (i < pool->size)
{
init_tour(root, chromo[i].string, pool->string_length);
pool->data[i].worth = geqo_eval(root, chromo[i].string,
pool->string_length);
if (pool->data[i].worth < DBL_MAX)
i++;
else
{
bad++;
if (i == 0 && bad >= 10000)
elog(ERROR, "geqo failed to make a valid plan");
}
}
#ifdef GEQO_DEBUG
if (bad > 0)
elog(DEBUG1, "%d invalid tours found while selecting %d pool entries",
bad, pool->size);
#endif
}
void
sort_pool(PlannerInfo *root, Pool *pool)
{
qsort(pool->data, pool->size, sizeof(Chromosome), compare);
}
static int
compare(const void *arg1, const void *arg2)
{
const Chromosome *chromo1 = (const Chromosome *) arg1;
const Chromosome *chromo2 = (const Chromosome *) arg2;
if (chromo1->worth == chromo2->worth)
return 0;
else if (chromo1->worth > chromo2->worth)
return 1;
else
return -1;
}
Chromosome *
alloc_chromo(PlannerInfo *root, int string_length)
{
Chromosome *chromo;
chromo = (Chromosome *) palloc(sizeof(Chromosome));
chromo->string = (Gene *) palloc((string_length + 1) * sizeof(Gene));
return chromo;
}
void
free_chromo(PlannerInfo *root, Chromosome *chromo)
{
pfree(chromo->string);
pfree(chromo);
}
void
spread_chromo(PlannerInfo *root, Chromosome *chromo, Pool *pool)
{
int top,
mid,
bot;
int i,
index;
Chromosome swap_chromo,
tmp_chromo;
if (chromo->worth > pool->data[pool->size - 1].worth)
return;
top = 0;
mid = pool->size / 2;
bot = pool->size - 1;
index = -1;
while (index == -1)
{
if (chromo->worth <= pool->data[top].worth)
index = top;
else if (chromo->worth == pool->data[mid].worth)
index = mid;
else if (chromo->worth == pool->data[bot].worth)
index = bot;
else if (bot - top <= 1)
index = bot;
else if (chromo->worth < pool->data[mid].worth)
{
bot = mid;
mid = top + ((bot - top) / 2);
}
else
{
top = mid;
mid = top + ((bot - top) / 2);
}
}
geqo_copy(root, &pool->data[pool->size - 1], chromo, pool->string_length);
swap_chromo.string = pool->data[pool->size - 1].string;
swap_chromo.worth = pool->data[pool->size - 1].worth;
for (i = index; i < pool->size; i++)
{
tmp_chromo.string = pool->data[i].string;
tmp_chromo.worth = pool->data[i].worth;
pool->data[i].string = swap_chromo.string;
pool->data[i].worth = swap_chromo.worth;
swap_chromo.string = tmp_chromo.string;
swap_chromo.worth = tmp_chromo.worth;
}
}
void
init_tour(PlannerInfo *root, Gene *tour, int num_gene)
{
int i,
j;
if (num_gene > 0)
tour[0] = (Gene) 1;
for (i = 1; i < num_gene; i++)
{
j = geqo_randint(root, i, 0);
if (i != j)
tour[i] = tour[j];
tour[j] = (Gene) (i + 1);
}
}
//----------------------------------------------------------- geqo_eval
Cost
geqo_eval(PlannerInfo *root, Gene *tour, int num_gene)
{
MemoryContext mycontext;
MemoryContext oldcxt;
RelOptInfo *joinrel;
Cost fitness;
int savelength;
struct HTAB *savehash;
mycontext = AllocSetContextCreate(CurrentMemoryContext,
"GEQO",
ALLOCSET_DEFAULT_SIZES);
oldcxt = MemoryContextSwitchTo(mycontext);
savelength = list_length(root->join_rel_list);
savehash = root->join_rel_hash;
Assert(root->join_rel_level == NULL);
root->join_rel_hash = NULL;
//给定的关系组合,构造最佳的访问路径
joinrel = gimme_tree(root, tour, num_gene);
if (joinrel)
{
Path *best_path = joinrel->cheapest_total_path;//获取生成的关系的最优路径
fitness = best_path->total_cost;//适应度=该路径的总成本
}
else
fitness = DBL_MAX;//连接无效,适应度为DBL_MAX,下一轮迭代会丢弃
root->join_rel_list = list_truncate(root->join_rel_list,
savelength);
root->join_rel_hash = savehash;
//释放资源
MemoryContextSwitchTo(oldcxt);
MemoryContextDelete(mycontext);
return fitness;
}
//------------------------------------------- gimme_tree
RelOptInfo *
gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
{
GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private;
List *clumps;
int rel_count;
clumps = NIL;
for (rel_count = 0; rel_count < num_gene; rel_count++)//遍历基因即参与连接的关系
{
int cur_rel_index;//当前索引
RelOptInfo *cur_rel;//当前的关系
Clump *cur_clump;//当前的clump聚类
cur_rel_index = (int) tour[rel_count];
cur_rel = (RelOptInfo *) list_nth(private->initial_rels,
cur_rel_index - 1);
cur_clump = (Clump *) palloc(sizeof(Clump));
cur_clump->joinrel = cur_rel;
cur_clump->size = 1;
//使用期望的连接方式(force=F)将它合并到clump(聚类)链表中
clumps = merge_clump(root, clumps, cur_clump, num_gene, false);
}
if (list_length(clumps) > 1)//聚类链表>1
{
//以传统的顺序加入到剩余的聚类中
List *fclumps;//链表
ListCell *lc;//元素
fclumps = NIL;
foreach(lc, clumps)
{
Clump *clump = (Clump *) lfirst(lc);
//(force=T)
fclumps = merge_clump(root, fclumps, clump, num_gene, true);
}
clumps = fclumps;
}
if (list_length(clumps) != 1)//无法形成最终的结果关系,返回NULL
return NULL;
return ((Clump *) linitial(clumps))->joinrel;//成功,则返回结果Relation
}
//------------------------------ merge_clump
static List *
merge_clump(PlannerInfo *root,//优化信息
List *clumps, //聚类链表
Clump *new_clump, //新的聚类
int num_gene,//基因格式
bool force)//是否强制加入
{
ListCell *prev;
ListCell *lc;
//验证新聚类能否加入
prev = NULL;
foreach(lc, clumps)//遍历链表
{
Clump *old_clump = (Clump *) lfirst(lc);//原有的聚类
if (force ||
desirable_join(root, old_clump->joinrel, new_clump->joinrel))//如强制加入或者可按要求加入
{
RelOptInfo *joinrel;//
joinrel = make_join_rel(root,
old_clump->joinrel,
new_clump->joinrel);//构造连接新关系RelOptInfo
if (joinrel)
{
//创建partitionwise连接
generate_partitionwise_join_paths(root, joinrel);
if (old_clump->size + new_clump->size < num_gene)
generate_gather_paths(root, joinrel, false);
//设置成本最低的路径
set_cheapest(joinrel);
//把新的clump吸纳到旧的clump中,释放new_clump
old_clump->joinrel = joinrel;
old_clump->size += new_clump->size;
pfree(new_clump);
//从链表中删除old_clump
clumps = list_delete_cell(clumps, lc, prev);
return merge_clump(root, clumps, old_clump, num_gene, force);
}
}
prev = lc;
}
if (clumps == NIL || new_clump->size == 1)
return lappend(clumps, new_clump);//直接添加
//检查是否属于前面的clump
lc = list_head(clumps);
if (new_clump->size > ((Clump *) lfirst(lc))->size)
return lcons(new_clump, clumps);
//搜索位置,插入之
for (;;)
{
ListCell *nxt = lnext(lc);
if (nxt == NULL || new_clump->size > ((Clump *) lfirst(nxt))->size)
break;
lc = nxt;
}
lappend_cell(clumps, lc, new_clump);
return clumps;
}
三、跟踪分析
测试表(13张表)和数据:
drop table if exists t01;
drop table if exists t02;
drop table if exists t03;
drop table if exists t04;
drop table if exists t05;
drop table if exists t06;
drop table if exists t07;
drop table if exists t08;
drop table if exists t09;
drop table if exists t10;
drop table if exists t11;
drop table if exists t12;
drop table if exists t13;
create table t01(c1 int,c2 varchar(20));
create table t02(c1 int,c2 varchar(20));
create table t03(c1 int,c2 varchar(20));
create table t04(c1 int,c2 varchar(20));
create table t05(c1 int,c2 varchar(20));
create table t06(c1 int,c2 varchar(20));
create table t07(c1 int,c2 varchar(20));
create table t08(c1 int,c2 varchar(20));
create table t09(c1 int,c2 varchar(20));
create table t10(c1 int,c2 varchar(20));
create table t11(c1 int,c2 varchar(20));
create table t12(c1 int,c2 varchar(20));
create table t13(c1 int,c2 varchar(20));
insert into t01 select generate_series(1,100),'TEST'||generate_series(1,100);
insert into t02 select generate_series(1,1000),'TEST'||generate_series(1,1000);
insert into t03 select generate_series(1,10000),'TEST'||generate_series(1,10000);
insert into t04 select generate_series(1,200),'TEST'||generate_series(1,200);
insert into t05 select generate_series(1,4000),'TEST'||generate_series(1,4000);
insert into t06 select generate_series(1,100000),'TEST'||generate_series(1,100000);
insert into t07 select generate_series(1,100),'TEST'||generate_series(1,100);
insert into t08 select generate_series(1,1000),'TEST'||generate_series(1,1000);
insert into t09 select generate_series(1,10000),'TEST'||generate_series(1,10000);
insert into t10 select generate_series(1,200),'TEST'||generate_series(1,200);
insert into t11 select generate_series(1,4000),'TEST'||generate_series(1,4000);
insert into t12 select generate_series(1,100000),'TEST'||generate_series(1,100000);
insert into t13 select generate_series(1,100),'TEST'||generate_series(1,100);
create index idx_t01_c1 on t01(c1);
create index idx_t06_c1 on t06(c1);
create index idx_t12_c1 on t12(c1);
测试SQL语句与执行计划如下:
testdb=# explain verbose select *
from t01,t02,t03,t04,t05,t06,t07,t08,t09,t10,t11,t12,t13
where t01.c1 = t02.c1
and t02.c1 = t03.c1
and t03.c1 = t04.c1
and t04.c1 = t05.c1
and t05.c1 = t06.c1
and t06.c1 = t07.c1
and t07.c1 = t08.c1
and t08.c1 = t09.c1
and t09.c1 = t10.c1
and t10.c1 = t11.c1
and t11.c1 = t12.c1
and t12.c1 = t13.c1;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------
Hash Join (cost=404.93..597.44 rows=1 width=148)
Output: t01.c1, t01.c2, t02.c1, t02.c2, t03.c1, t03.c2, t04.c1, t04.c2, t05.c1, t05.c2, t06.c1, t06.c2, t07.c1, t07.c2, t08.c1, t08.c2, t09.c1, t09.c2, t10.c1, t10.c2, t11.c1, t11.c2, t12.c1, t12.c2,
t13.c1, t13.c2
Hash Cond: (t03.c1 = t01.c1)
-> Seq Scan on public.t03 (cost=0.00..155.00 rows=10000 width=12)
Output: t03.c1, t03.c2
-> Hash (cost=404.92..404.92 rows=1 width=136)
Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2, t12.c1, t12.c2, t11.c1, t11.c2, t08.c1, t08.c2, t05.c1, t05.c2, t06.c1, t06.c2, t01.c1, t
01.c2
-> Nested Loop (cost=327.82..404.92 rows=1 width=136)
Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2, t12.c1, t12.c2, t11.c1, t11.c2, t08.c1, t08.c2, t05.c1, t05.c2, t06.c1, t06.c2, t01
.c1, t01.c2
Join Filter: (t02.c1 = t01.c1)
-> Nested Loop (cost=327.68..404.75 rows=1 width=126)
Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2, t12.c1, t12.c2, t11.c1, t11.c2, t08.c1, t08.c2, t05.c1, t05.c2, t06.c1, t06.c
2
Join Filter: (t02.c1 = t06.c1)
-> Hash Join (cost=327.38..404.39 rows=1 width=113)
Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2, t12.c1, t12.c2, t11.c1, t11.c2, t08.c1, t08.c2, t05.c1, t05.c2
Hash Cond: (t05.c1 = t02.c1)
-> Seq Scan on public.t05 (cost=0.00..62.00 rows=4000 width=12)
Output: t05.c1, t05.c2
-> Hash (cost=327.37..327.37 rows=1 width=101)
Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2, t12.c1, t12.c2, t11.c1, t11.c2, t08.c1, t08.c2
-> Hash Join (cost=307.61..327.37 rows=1 width=101)
Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2, t12.c1, t12.c2, t11.c1, t11.c2, t08.c1, t08.c2
Hash Cond: (t08.c1 = t02.c1)
-> Seq Scan on public.t08 (cost=0.00..16.00 rows=1000 width=11)
Output: t08.c1, t08.c2
-> Hash (cost=307.60..307.60 rows=1 width=90)
Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2, t12.c1, t12.c2, t11.c1, t11.c2
-> Hash Join (cost=230.59..307.60 rows=1 width=90)
Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2, t12.c1, t12.c2, t11.c1, t11.c2
Hash Cond: (t11.c1 = t02.c1)
-> Seq Scan on public.t11 (cost=0.00..62.00 rows=4000 width=12)
Output: t11.c1, t11.c2
-> Hash (cost=230.58..230.58 rows=1 width=78)
Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2, t12.c1, t12.c2
-> Nested Loop (cost=37.71..230.58 rows=1 width=78)
Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2, t12.c1, t12.c2
Join Filter: (t02.c1 = t12.c1)
-> Hash Join (cost=37.42..229.93 rows=1 width=65)
Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2, t09.c1, t09.c2
Hash Cond: (t09.c1 = t02.c1)
-> Seq Scan on public.t09 (cost=0.00..155.00 rows=10000 width=12)
Output: t09.c1, t09.c2
-> Hash (cost=37.41..37.41 rows=1 width=53)
Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2
-> Hash Join (cost=32.65..37.41 rows=1 width=53)
Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2, t04.c1, t04.c2
Hash Cond: (t04.c1 = t02.c1)
-> Seq Scan on public.t04 (cost=0.00..4.00 rows=200 width=11)
Output: t04.c1, t04.c2
-> Hash (cost=32.62..32.62 rows=2 width=42)
Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2
-> Hash Join (cost=27.85..32.62 rows=2 width=42)
Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2, t10.c1, t10.c2
Hash Cond: (t10.c1 = t02.c1)
-> Seq Scan on public.t10 (cost=0.00..4.00 rows=200 width=11)
Output: t10.c1, t10.c2
-> Hash (cost=27.73..27.73 rows=10 width=31)
Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2
-> Hash Join (cost=6.50..27.73 rows=10 width=31)
Output: t02.c1, t02.c2, t07.c1, t07.c2, t13.c1, t13.c2
Hash Cond: (t02.c1 = t13.c1)
-> Hash Join (cost=3.25..24.00 rows=100 width=21)
Output: t02.c1, t02.c2, t07.c1, t07.c2
Hash Cond: (t02.c1 = t07.c1)
-> Seq Scan on public.t02 (cost=0.00..16.00 rows=1000 width=11)
Output: t02.c1, t02.c2
-> Hash (cost=2.00..2.00 rows=100 width=10)
Output: t07.c1, t07.c2
-> Seq Scan on public.t07 (cost=0.00..2.00 rows=100 width=10)
Output: t07.c1, t07.c2
-> Hash (cost=2.00..2.00 rows=100 width=10)
Output: t13.c1, t13.c2
-> Seq Scan on public.t13 (cost=0.00..2.00 rows=100 width=10)
Output: t13.c1, t13.c2
-> Index Scan using idx_t12_c1 on public.t12 (cost=0.29..0.64 rows=1 width=13)
Output: t12.c1, t12.c2
Index Cond: (t12.c1 = t09.c1)
-> Index Scan using idx_t06_c1 on public.t06 (cost=0.29..0.34 rows=1 width=13)
Output: t06.c1, t06.c2
Index Cond: (t06.c1 = t12.c1)
-> Index Scan using idx_t01_c1 on public.t01 (cost=0.14..0.16 rows=1 width=10)
Output: t01.c1, t01.c2
Index Cond: (t01.c1 = t06.c1)
(83 rows)
testdb=#
启动gdb,设置断点
(gdb) b geqo
Breakpoint 1 at 0x793ec6: file geqo_main.c, line 86.
(gdb) c
Continuing.
Breakpoint 1, geqo (root=0x1fbf0f8, number_of_rels=13, initial_rels=0x1f0f698) at geqo_main.c:86
86 int edge_failures = 0;
输入参数:root为优化器信息,一共有13个参与连接的关系,initial_rels是13个参与连接关系的链表
(gdb) p *initial_rels
$1 = {type = T_List, length = 13, head = 0x1f0f670, tail = 0x1f0f888}
初始化遗传算法私有数据
86 int edge_failures = 0;
(gdb) n
97 root->join_search_private = (void *) &private;
(gdb)
98 private.initial_rels = initial_rels;
设置种子值
(gdb) n
101 geqo_set_seed(root, Geqo_seed);
计算种群大小/迭代代数
104 pool_size = gimme_pool_size(number_of_rels);
(gdb) p Geqo_seed
$2 = 0
(gdb) n
105 number_generations = gimme_number_generations(pool_size);
(gdb) p pool_size
$6 = 250
(gdb) n
111 pool = alloc_pool(root, pool_size, number_of_rels);
(gdb) p number_generations
$7 = 250
随机初始化种群,pool->data数组存储了组成该种群的染色体
(gdb) n
114 random_init_pool(root, pool);
(gdb) n
117 sort_pool(root, pool); /* we have to do it only one time, since all
(gdb) p *pool
$20 = {data = 0x1f0f8d8, size = 250, string_length = 13}
(gdb) p pool->data[0]->string
$23 = (Gene *) 0x1f108f0
(gdb) p *pool->data[0]->string
$24 = 8
(gdb) p pool->data[0].worth
$50 = 635.99087618977478
(gdb) p *pool->data[1]->string
$25 = 7
(gdb) p *pool->data[2]->string
$26 = 6
(gdb) p *pool->data[249].string
$48 = 13
(gdb) p pool->data[249].worth
$49 = 601.3463999999999
...
开始进行迭代进化
(gdb) n
129 momma = alloc_chromo(root, pool->string_length);
(gdb)
130 daddy = alloc_chromo(root, pool->string_length);
(gdb)
137 edge_table = alloc_edge_table(root, pool->string_length);
(gdb)
178 for (generation = 0; generation < number_generations; generation++)
(gdb) p number_generations
$52 = 250
利用线性偏差(bias)函数选择,然后交叉遗传
181 geqo_selection(root, momma, daddy, pool, Geqo_selection_bias);
(gdb) n
185 gimme_edge_table(root, momma->string, daddy->string, pool->string_length, edge_table);
(gdb) n
187 kid = momma;
(gdb) p *momma
$1 = {string = 0x1f30460, worth = 637.36587618977478}
(gdb) p *momma->string
$2 = 11
(gdb) p *daddy->string
$3 = 8
(gdb) p *daddy
$4 = {string = 0x1f304e0, worth = 635.57048404744364}
遍历边界表,计算kid的成本,把kid放到种群中,进一步进化
(gdb)
216 kid->worth = geqo_eval(root, kid->string, pool->string_length);
(gdb) p *kid
$5 = {string = 0x1f30460, worth = 637.36587618977478}
(gdb) n
219 spread_chromo(root, kid, pool);
(gdb) p *kid
$6 = {string = 0x1f30460, worth = 663.22435850797251}
(gdb) n
178 for (generation = 0; generation < number_generations; generation++)
下面考察进化过程中的geqo_eval函数,进入该函数,13个基因,tour为2
(gdb)
216 kid->worth = geqo_eval(root, kid->string, pool->string_length);
(gdb) step
geqo_eval (root=0x1fbf0f8, tour=0x1f30460, num_gene=13) at geqo_eval.c:75
75 mycontext = AllocSetContextCreate(CurrentMemoryContext,
(gdb) p *tour
$7 = 2
赋值/保存状态
(gdb) n
78 oldcxt = MemoryContextSwitchTo(mycontext);
(gdb)
95 savelength = list_length(root->join_rel_list);
(gdb)
96 savehash = root->join_rel_hash;
(gdb)
97 Assert(root->join_rel_level == NULL);
(gdb)
99 root->join_rel_hash = NULL;
进入geqo_eval->gimme_tree函数
(gdb)
102 joinrel = gimme_tree(root, tour, num_gene);
(gdb) step
gimme_tree (root=0x1fbf0f8, tour=0x1f30460, num_gene=13) at geqo_eval.c:165
165 GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private;
链表clumps初始化为NULL,开始循环,执行连接操作,tour数组保存了RTE的顺序
(gdb) n
180 clumps = NIL;
(gdb)
182 for (rel_count = 0; rel_count < num_gene; rel_count++)
(gdb) n
189 cur_rel_index = (int) tour[rel_count];
(gdb) p tour[0]
$9 = 2
(gdb) p tour[1]
$10 = 12
循环添加到clumps中,直至所有的表都加入到clumps中或者无法产生有效的连接
(gdb) n
190 cur_rel = (RelOptInfo *) list_nth(private->initial_rels,
(gdb)
194 cur_clump = (Clump *) palloc(sizeof(Clump));
(gdb)
195 cur_clump->joinrel = cur_rel;
(gdb)
196 cur_clump->size = 1;
(gdb)
199 clumps = merge_clump(root, clumps, cur_clump, num_gene, false);
(gdb)
(gdb)
182 for (rel_count = 0; rel_count < num_gene; rel_count++)
完成循环调用
(gdb) b geqo_eval.c:218
Breakpoint 2 at 0x793bf9: file geqo_eval.c, line 218.
(gdb) c
Continuing.
Breakpoint 2, gimme_tree (root=0x1fbf0f8, tour=0x1f30460, num_gene=13) at geqo_eval.c:219
219 if (list_length(clumps) != 1)
clumps链表中只有一个元素,该元素为13张表成功连接的访问路径
(gdb) p *clumps
$11 = {type = T_List, length = 1, head = 0x1ea2e20, tail = 0x1ea2e20}
$12 = {joinrel = 0x1ee4ef8, size = 13}
(gdb) p *((Clump *)clumps->head->data.ptr_value)->joinrel
$13 = {type = T_RelOptInfo, reloptkind = RELOPT_JOINREL, relids = 0x1ee34b0, rows = 100, consider_startup = false,
consider_param_startup = false, consider_parallel = true, reltarget = 0x1ee5110, pathlist = 0x1ee5a78, ppilist = 0x0,
partial_pathlist = 0x0, cheapest_startup_path = 0x1ee5ee0, cheapest_total_path = 0x1ee5ee0, cheapest_unique_path = 0x0,
cheapest_parameterized_paths = 0x1ee5fa0, direct_lateral_relids = 0x0, lateral_relids = 0x0, relid = 0,
reltablespace = 0, rtekind = RTE_JOIN, min_attr = 0, max_attr = 0, attr_needed = 0x0, attr_widths = 0x0,
lateral_vars = 0x0, lateral_referencers = 0x0, indexlist = 0x0, statlist = 0x0, pages = 0, tuples = 0, allvisfrac = 0,
subroot = 0x0, subplan_params = 0x0, rel_parallel_workers = -1, serverid = 0, userid = 0, useridiscurrent = false,
fdwroutine = 0x0, fdw_private = 0x0, unique_for_rels = 0x0, non_unique_for_rels = 0x0, baserestrictinfo = 0x0,
baserestrictcost = {startup = 0, per_tuple = 0}, baserestrict_min_security = 4294967295, joininfo = 0x0,
has_eclass_joins = false, consider_partitionwise_join = false, top_parent_relids = 0x0, part_scheme = 0x0, nparts = 0,
boundinfo = 0x0, partition_qual = 0x0, part_rels = 0x0, partexprs = 0x0, nullable_partexprs = 0x0,
partitioned_child_rels = 0x0}
geqo_eval->gimme_tree函数返回
(gdb) n
222 return ((Clump *) linitial(clumps))->joinrel;
回到geqo_eval函数,设置适应度,还原现场等
(gdb)
113 Path *best_path = joinrel->cheapest_total_path;
(gdb) n
115 fitness = best_path->total_cost;
(gdb)
124 root->join_rel_list = list_truncate(root->join_rel_list,
(gdb)
126 root->join_rel_hash = savehash;
(gdb)
129 MemoryContextSwitchTo(oldcxt);
(gdb)
130 MemoryContextDelete(mycontext);
(gdb)
132 return fitness;
(gdb) p fitness
$14 = 680.71399161308523
回到函数geqo,继续迭代
geqo (root=0x1fbf0f8, number_of_rels=13, initial_rels=0x1f0f698) at geqo_main.c:219
219 spread_chromo(root, kid, pool);
(gdb)
178 for (generation = 0; generation < number_generations; generation++)
完成循环迭代
(gdb) b geqo_main.c:229
Breakpoint 3 at 0x79407a: file geqo_main.c, line 229.
(gdb) c
Continuing.
Breakpoint 3, geqo (root=0x1fbf0f8, number_of_rels=13, initial_rels=0x1f0f698) at geqo_main.c:260
260 best_tour = (Gene *) pool->data[0].string;
最佳的访问节点路径(存储在best_tour数组中)
(gdb) p best_tour[0]
$17 = 2
(gdb) p best_tour[1]
$18 = 7
(gdb) p best_tour[12]
$19 = 3
(gdb) p best_tour[13]
最佳的最终结果关系
(gdb) p *best_rel
$21 = {type = T_RelOptInfo, reloptkind = RELOPT_JOINREL, relids = 0x1f3d098, rows = 1, consider_startup = false,
consider_param_startup = false, consider_parallel = true, reltarget = 0x1f3d7e0, pathlist = 0x1f3e148, ppilist = 0x0,
partial_pathlist = 0x0, cheapest_startup_path = 0x1f3e550, cheapest_total_path = 0x1f3e550, cheapest_unique_path = 0x0,
cheapest_parameterized_paths = 0x1f3e670, direct_lateral_relids = 0x0, lateral_relids = 0x0, relid = 0,
reltablespace = 0, rtekind = RTE_JOIN, min_attr = 0, max_attr = 0, attr_needed = 0x0, attr_widths = 0x0,
lateral_vars = 0x0, lateral_referencers = 0x0, indexlist = 0x0, statlist = 0x0, pages = 0, tuples = 0, allvisfrac = 0,
subroot = 0x0, subplan_params = 0x0, rel_parallel_workers = -1, serverid = 0, userid = 0, useridiscurrent = false,
fdwroutine = 0x0, fdw_private = 0x0, unique_for_rels = 0x0, non_unique_for_rels = 0x0, baserestrictinfo = 0x0,
baserestrictcost = {startup = 0, per_tuple = 0}, baserestrict_min_security = 4294967295, joininfo = 0x0,
has_eclass_joins = false, consider_partitionwise_join = false, top_parent_relids = 0x0, part_scheme = 0x0, nparts = 0,
boundinfo = 0x0, partition_qual = 0x0, part_rels = 0x0, partexprs = 0x0, nullable_partexprs = 0x0,
partitioned_child_rels = 0x0}
清理现场,并返回
(gdb) n
274 free_chromo(root, daddy);
(gdb)
277 free_edge_table(root, edge_table);
(gdb)
294 free_pool(root, pool);
(gdb)
297 root->join_search_private = NULL;
(gdb)
299 return best_rel;
(gdb)
300 }
DONE!
四、参考资料
allpaths.c
cost.h
costsize.c
PG Document:Query Planning
十分钟搞懂遗传算法
你和遗传算法的距离也许只差这一文
智能优化方法及其应用-遗传算法
免责声明:
① 本站未注明“稿件来源”的信息均来自网络整理。其文字、图片和音视频稿件的所属权归原作者所有。本站收集整理出于非商业性的教育和科研之目的,并不意味着本站赞同其观点或证实其内容的真实性。仅作为临时的测试数据,供内部测试之用。本站并未授权任何人以任何方式主动获取本站任何信息。
② 本站未注明“稿件来源”的临时测试数据将在测试完成后最终做删除处理。有问题或投稿请发送至: 邮箱/279061341@qq.com QQ/279061341
PostgreSQL 源码解读(72)- 查询语句#57(make_one_rel函数#22-...
下载Word文档到电脑,方便收藏和打印~