PostgreSQL 源码解读(69)- 查询语句#54(make_one_rel函数#19-...
本节大体介绍了动态规划算法实现(standard_join_search)中的join_search_one_level->make_join_rel->populate_joinrel_with_paths->add_paths_to_joinrel->match_unsorted_outer中的initial_cost_nestloop和final_cost_nestloop函数,这些函数用于计算nestloop join的Cost。
一、数据结构
Cost相关
注意:实际使用的参数值通过系统配置文件定义,而不是这里的常量定义!
typedef double Cost;
#define DEFAULT_SEQ_PAGE_COST 1.0 //顺序扫描page的成本
#define DEFAULT_RANDOM_PAGE_COST 4.0 //随机扫描page的成本
#define DEFAULT_CPU_TUPLE_COST 0.01 //处理一个元组的CPU成本
#define DEFAULT_CPU_INDEX_TUPLE_COST 0.005 //处理一个索引元组的CPU成本
#define DEFAULT_CPU_OPERATOR_COST 0.0025 //执行一次操作或函数的CPU成本
#define DEFAULT_PARALLEL_TUPLE_COST 0.1 //并行执行,从一个worker传输一个元组到另一个worker的成本
#define DEFAULT_PARALLEL_SETUP_COST 1000.0 //构建并行执行环境的成本
#define DEFAULT_EFFECTIVE_CACHE_SIZE 524288
double seq_page_cost = DEFAULT_SEQ_PAGE_COST;
double random_page_cost = DEFAULT_RANDOM_PAGE_COST;
double cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;
double cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
double cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
double parallel_tuple_cost = DEFAULT_PARALLEL_TUPLE_COST;
double parallel_setup_cost = DEFAULT_PARALLEL_SETUP_COST;
int effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
Cost disable_cost = 1.0e10;//1后面10个0,通过设置一个巨大的成本,让优化器自动放弃此路径
int max_parallel_workers_per_gather = 2;//每次gather使用的worker数
二、源码解读
nested loop join的算法实现伪代码如下:
FOR row#1 IN (select * from dataset#1) LOOP
FOR row#2 IN (select * from dataset#2 where row#1 is matched) LOOP
output values from row#1 and row#2
END LOOP
END LOOP
initial_cost_nestloop
该函数预估nestloop join访问路径的成本
//---------------------------------------------------------------------- initial_cost_nestloop
void
initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace,
JoinType jointype,
Path *outer_path, Path *inner_path,
JoinPathExtraData *extra)
{
Cost startup_cost = 0;
Cost run_cost = 0;
double outer_path_rows = outer_path->rows;
Cost inner_rescan_start_cost;
Cost inner_rescan_total_cost;
Cost inner_run_cost;
Cost inner_rescan_run_cost;
cost_rescan(root, inner_path,
&inner_rescan_start_cost,
&inner_rescan_total_cost);
startup_cost += outer_path->startup_cost + inner_path->startup_cost;
run_cost += outer_path->total_cost - outer_path->startup_cost;
if (outer_path_rows > 1)
run_cost += (outer_path_rows - 1) * inner_rescan_start_cost;
inner_run_cost = inner_path->total_cost - inner_path->startup_cost;
inner_rescan_run_cost = inner_rescan_total_cost - inner_rescan_start_cost;
if (jointype == JOIN_SEMI || jointype == JOIN_ANTI ||
extra->inner_unique)
{
workspace->inner_run_cost = inner_run_cost;
workspace->inner_rescan_run_cost = inner_rescan_run_cost;
}
else
{
//常规的情况,对于每一个外表行,将扫描整个内表
run_cost += inner_run_cost;
if (outer_path_rows > 1)
run_cost += (outer_path_rows - 1) * inner_rescan_run_cost;
}
//结果赋值
workspace->startup_cost = startup_cost;
workspace->total_cost = startup_cost + run_cost;
workspace->run_cost = run_cost;
}
//-------------------------------------- cost_rescan
static void
cost_rescan(PlannerInfo *root, Path *path,
Cost *rescan_startup_cost,
Cost *rescan_total_cost)
{
switch (path->pathtype)//路径类型
{
case T_FunctionScan:
*rescan_startup_cost = 0;
*rescan_total_cost = path->total_cost - path->startup_cost;
break;
case T_HashJoin://Hash Join
if (((HashPath *) path)->num_batches == 1)
{
*rescan_startup_cost = 0;//启动成本为0(构建hash表)
*rescan_total_cost = path->total_cost - path->startup_cost;
}
else
{
*rescan_startup_cost = path->startup_cost;
*rescan_total_cost = path->total_cost;
}
break;
case T_CteScan:
case T_WorkTableScan:
{
Cost run_cost = cpu_tuple_cost * path->rows;
double nbytes = relation_byte_size(path->rows,
path->pathtarget->width);
long work_mem_bytes = work_mem * 1024L;
if (nbytes > work_mem_bytes)
{
//如果溢出到磁盘,那么需考虑重新读取的成本
double npages = ceil(nbytes / BLCKSZ);
run_cost += seq_page_cost * npages;
}
*rescan_startup_cost = 0;
*rescan_total_cost = run_cost;
}
break;
case T_Material:
case T_Sort:
{
Cost run_cost = cpu_operator_cost * path->rows;
double nbytes = relation_byte_size(path->rows,
path->pathtarget->width);
long work_mem_bytes = work_mem * 1024L;
if (nbytes > work_mem_bytes)
{
//如果溢出到磁盘,那么需考虑重新读取的成本
double npages = ceil(nbytes / BLCKSZ);
run_cost += seq_page_cost * npages;
}
*rescan_startup_cost = 0;
*rescan_total_cost = run_cost;
}
break;
default:
*rescan_startup_cost = path->startup_cost;
*rescan_total_cost = path->total_cost;
break;
}
}
final_cost_nestloop
该函数实现nestloop join访问路径的成本和结果大小的最终估算。
//---------------------------------------------------------------------- final_cost_nestloop
void
final_cost_nestloop(PlannerInfo *root, NestPath *path,//NL访问路径
JoinCostWorkspace *workspace,//initial_cost_nestloop返回的结果
JoinPathExtraData *extra)//额外的信息
{
Path *outer_path = path->outerjoinpath;//外表访问路径
Path *inner_path = path->innerjoinpath;//内部访问路径
double outer_path_rows = outer_path->rows;//外表访问路径行数
double inner_path_rows = inner_path->rows;//内部访问路径行数
Cost startup_cost = workspace->startup_cost;//启动成本
Cost run_cost = workspace->run_cost;//运行成本
Cost cpu_per_tuple;//处理每个tuple的CPU成本
QualCost restrict_qual_cost;//表达式处理成本
double ntuples;//元组数目
//确保参数正确
if (outer_path_rows <= 0 || isnan(outer_path_rows))
outer_path_rows = 1;
if (inner_path_rows <= 0 || isnan(inner_path_rows))
inner_path_rows = 1;
//修正行数估算
if (path->path.param_info)
path->path.rows = path->path.param_info->ppi_rows;
else
path->path.rows = path->path.parent->rows;
//调整并行执行的行数估算
if (path->path.parallel_workers > 0)
{
double parallel_divisor = get_parallel_divisor(&path->path);
path->path.rows =
clamp_row_est(path->path.rows / parallel_divisor);
}
if (!enable_nestloop)
startup_cost += disable_cost;
// 内部源数据的成本
if (path->jointype == JOIN_SEMI || path->jointype == JOIN_ANTI ||
extra->inner_unique)//半连接/反连接或者内部返回唯一值
{
Cost inner_run_cost = workspace->inner_run_cost;
Cost inner_rescan_run_cost = workspace->inner_rescan_run_cost;
double outer_matched_rows;
double outer_unmatched_rows;
Selectivity inner_scan_frac;
outer_matched_rows = rint(outer_path_rows * extra->semifactors.outer_match_frac);
outer_unmatched_rows = outer_path_rows - outer_matched_rows;
inner_scan_frac = 2.0 / (extra->semifactors.match_count + 1.0);
ntuples = outer_matched_rows * inner_path_rows * inner_scan_frac;
if (has_indexed_join_quals(path))//连接条件上存在索引
{
run_cost += inner_run_cost * inner_scan_frac;
if (outer_matched_rows > 1)
run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac;
run_cost += outer_unmatched_rows *
inner_rescan_run_cost / inner_path_rows;
}
else
{
//首先,统计所有处理过程中不匹配的行数
ntuples += outer_unmatched_rows * inner_path_rows;
//现在强制添加全表扫描,并减少相应的计数
run_cost += inner_run_cost;
if (outer_unmatched_rows >= 1)
outer_unmatched_rows -= 1;
else
outer_matched_rows -= 1;
//对于已经匹配的外表行,增加内表运行成本
if (outer_matched_rows > 0)
run_cost += outer_matched_rows * inner_rescan_run_cost * inner_scan_frac;
//对于未匹配的外表行,增加内表运行成本
if (outer_unmatched_rows > 0)
run_cost += outer_unmatched_rows * inner_rescan_run_cost;
}
}
else//普通连接
{
//正常情况下,源成本已在预估计算过程中统计
//计算处理的元组数量
ntuples = outer_path_rows * inner_path_rows;
}
cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root);
startup_cost += restrict_qual_cost.startup;
cpu_per_tuple = cpu_tuple_cost + restrict_qual_cost.per_tuple;
run_cost += cpu_per_tuple * ntuples;
startup_cost += path->path.pathtarget->cost.startup;
run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows;
path->path.startup_cost = startup_cost;
path->path.total_cost = startup_cost + run_cost;
}
三、跟踪分析
测试脚本如下
select a.*,b.grbh,b.je
from t_dwxx a,
lateral (select t1.dwbh,t1.grbh,t2.je
from t_grxx t1
inner join t_jfxx t2 on t1.dwbh = a.dwbh and t1.grbh = t2.grbh) b
where a.dwbh = '1001'
order by b.dwbh;
启动gdb,设置断点
(gdb) b try_nestloop_path
Breakpoint 1 at 0x7ae950: file joinpath.c, line 373.
(gdb) c
Continuing.
Breakpoint 1, try_nestloop_path (root=0x2fb3b30, joinrel=0x2fc6d28, outer_path=0x2fc2540, inner_path=0x2fc1280,
pathkeys=0x0, jointype=JOIN_INNER, extra=0x7ffec5f496e0) at joinpath.c:373
373 RelOptInfo *innerrel = inner_path->parent;
进入函数initial_cost_nestloop
(gdb)
422 initial_cost_nestloop(root, &workspace, jointype,
(gdb) step
initial_cost_nestloop (root=0x2fb3b30, workspace=0x7ffec5f49540, jointype=JOIN_INNER, outer_path=0x2fc2540,
inner_path=0x2fc1280, extra=0x7ffec5f496e0) at costsize.c:2323
2323 Cost startup_cost = 0;
进入initial_cost_nestloop->cost_rescan函数
(gdb)
2332 cost_rescan(root, inner_path,
(gdb) step
cost_rescan (root=0x2fb3b30, path=0x2fc1280, rescan_startup_cost=0x7ffec5f494a0, rescan_total_cost=0x7ffec5f49498)
at costsize.c:3613
3613 switch (path->pathtype)
路径类型为T_SeqScan(在执行该SQL语句前,删除了t_grxx.dwbh上的索引)
(gdb) p path->pathtype
$1 = T_SeqScan
进入相应的处理逻辑,直接复制,启动成本&总成本与T_SeqScan一样
(gdb) n
3699 *rescan_startup_cost = path->startup_cost;
(gdb) n
3700 *rescan_total_cost = path->total_cost;
(gdb)
3701 break;
(gdb)
回到initial_cost_nestloop,执行完成,最终结果
外表存在约束条件dwbh='1001',只有一行,内表在dwbh上没有索引,使用了顺序全表扫描
...
(gdb)
2381 workspace->run_cost = run_cost;
(gdb)
2382 }
(gdb) p *workspace
$4 = {startup_cost = 0.28500000000000003, total_cost = 1984.3025, run_cost = 1984.0174999999999,
inner_run_cost = 2.4712728827210812e-316, inner_rescan_run_cost = 6.9530954948263344e-310,
outer_rows = 3.9937697668447996e-317, inner_rows = 2.4712728827210812e-316, outer_skip_rows = 6.9530954948287059e-310,
inner_skip_rows = 6.9443062041807458e-310, numbuckets = 50092024, numbatches = 0,
inner_rows_total = 2.4751428001118265e-316}
回到try_nestloop_path
(gdb) n
try_nestloop_path (root=0x2fb3b30, joinrel=0x2fc6d28, outer_path=0x2fc2540, inner_path=0x2fc1280, pathkeys=0x0,
jointype=JOIN_INNER, extra=0x7ffec5f496e0) at joinpath.c:425
425 if (add_path_precheck(joinrel,
设置断点,进入final_cost_nestloop
(gdb) b final_cost_nestloop
Breakpoint 2 at 0x79f3ff: file costsize.c, line 2397.
(gdb) c
Continuing.
Breakpoint 2, final_cost_nestloop (root=0x2fb3b30, path=0x2fc2ef0, workspace=0x7ffec5f49540, extra=0x7ffec5f496e0)
at costsize.c:2397
2397 Path *outer_path = path->outerjoinpath;
外表访问路径是索引扫描(t_dwxx),内表访问路径是全表顺序扫描
(gdb) p *outer_path
$6 = {type = T_IndexPath, pathtype = T_IndexScan, parent = 0x2fb3570, pathtarget = 0x2fb8ee0, param_info = 0x0,
parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 1, startup_cost = 0.28500000000000003,
total_cost = 8.3025000000000002, pathkeys = 0x0}
内表行数为10,PG通过统计信息准确计算了该值
(gdb) n
2400 double inner_path_rows = inner_path->rows;
(gdb)
2401 Cost startup_cost = workspace->startup_cost;
(gdb) p inner_path_rows
$8 = 10
计算成本
(gdb)
2556 cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root);
(gdb)
2557 startup_cost += restrict_qual_cost.startup;
(gdb) p restrict_qual_cost
$10 = {startup = 0, per_tuple = 0}
最终结果,T_NestPath,总成本total_cost = 1984.4024999999999,启动成本startup_cost = 0.28500000000000003
2567 }
(gdb) p *path
$11 = {path = {type = T_NestPath, pathtype = T_NestLoop, parent = 0x2fc6d28, pathtarget = 0x2fc6f60, param_info = 0x0,
parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 10, startup_cost = 0.28500000000000003,
total_cost = 1984.4024999999999, pathkeys = 0x0}, jointype = JOIN_INNER, inner_unique = false,
outerjoinpath = 0x2fc2540, innerjoinpath = 0x2fc1280, joinrestrictinfo = 0x0}
完成调用
(gdb) n
create_nestloop_path (root=0x2fb3b30, joinrel=0x2fc6d28, jointype=JOIN_INNER, workspace=0x7ffec5f49540,
extra=0x7ffec5f496e0, outer_path=0x2fc2540, inner_path=0x2fc1280, restrict_clauses=0x0, pathkeys=0x0,
required_outer=0x0) at pathnode.c:2229
2229 return pathnode;
DONE!
四、参考资料
allpaths.c
cost.h
costsize.c
PG Document:Query Planning
免责声明:
① 本站未注明“稿件来源”的信息均来自网络整理。其文字、图片和音视频稿件的所属权归原作者所有。本站收集整理出于非商业性的教育和科研之目的,并不意味着本站赞同其观点或证实其内容的真实性。仅作为临时的测试数据,供内部测试之用。本站并未授权任何人以任何方式主动获取本站任何信息。
② 本站未注明“稿件来源”的临时测试数据将在测试完成后最终做删除处理。有问题或投稿请发送至: 邮箱/279061341@qq.com QQ/279061341
PostgreSQL 源码解读(69)- 查询语句#54(make_one_rel函数#19-...
下载Word文档到电脑,方便收藏和打印~