PostgreSQL 源码解读(33)- 查询语句#18(查询优化-表达式预处理#3)
本节简单介绍了PG查询优化表达式预处理中的规范化过程。规范化具体的做法一是忽略NULL以及OR中的False,And中的True(实现函数find_duplicate_ors),二是拉平谓词(实现函数:pull_ors/pull_ands),三是清除重复的ORs(实现函数process_duplicate_ors)。这些函数位于文件class="lazy" data-src/backend/optimizer/prep/prepqual.c中。
一、布尔代数基础
规范化处理基于布尔/逻辑代数运算的相关基本定律:
幂等律
A∪A = A
A∩A = A
交换律
A∪B = B∪A
A∩B = B∩A
结合律
A∪(B∪С) = (A∪B) ∪ С
A∩(B∩С) = (A∩B)∩ С
吸收律
A∪(A∩B) = A
A∩(A∪B) = A
分配律
A∪(B∩С)=(A∪B)∩(A∪С)
A∩(B∪С)=(A∩B)∪(A∩С)
幺元律
0∪A = A
1∩A = A
1∪A = 1
0∩A = 0
补余律
A∪A' = 1
A∩A' = 0
二、基本概念
PG源码对规范化表达式的注释如下:
忽略NULL以及OR中的False/AND中的TRUE
Where条件语句中的NULL/FALSE/TRUE,如能忽略,忽略之.如:NULL OR FALSE OR dwbh = '1001',则忽略NULL/ FALSE
testdb=# explain verbose select * from t_dwxx where NULL OR FALSE OR dwbh = '1001' ;
QUERY PLAN
----------------------------------------------------------------
Seq Scan on public.t_dwxx (cost=0.00..12.00 rows=1 width=474)
Output: dwmc, dwbh, dwdz
Filter: ((t_dwxx.dwbh)::text = '1001'::text)
(3 rows)
拉平谓词
SQL语句中的X1 OR/AND (X2 OR/AND X3),拉平简化为OR/AND(X1,X2,X3)
X1 OR/AND (X2 OR/AND X3),在查询树中为树状结构,第一层节点是BoolExpr,该Node中的args链表有2个元素,args->1=X1,args->2=BoolExpr,args->2->1=X2,args->2->2=X3,组成树状结构.简化后args->1/2/3=X1/X2/X3,所有条件处于同一个层次上,并不是树状结构.
testdb=# explain select * from t_dwxx where dwbh = '1001' OR (dwbh = '1002' OR dwbh = '1003');
QUERY PLAN
-------------------------------------------------------------------------------------------------------------
Seq Scan on t_dwxx (cost=0.00..12.80 rows=3 width=474)
Filter: (((dwbh)::text = '1001'::text) OR ((dwbh)::text = '1002'::text) OR ((dwbh)::text = '1003'::text))
(2 rows)
清除重复ORs
清除重复ORs的数学基础是布尔(逻辑)代数:
(X1 AND X2) OR (X1 AND X3) 应用分配律可以改写为 X1 AND (X2 OR X3),这样改写的目的是把X1抽取出来,为后续下推谓词X1作准备.
如(dwbh = '1001' AND dwbh = '1002') OR (dwbh = '1001' AND dwbh = '1003')条件,会改写为dwbh = '1001' AND (dwbh = '1002' OR dwbh = '1003')
testdb=# explain verbose select * from t_dwxx where (dwbh = '1001' AND dwbh = '1002') OR (dwbh = '1001' AND dwbh = '1003');
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------
--------
Seq Scan on public.t_dwxx (cost=0.00..12.80 rows=1 width=474)
Output: dwmc, dwbh, dwdz
Filter: (((t_dwxx.dwbh)::text = '1001'::text) AND (((t_dwxx.dwbh)::text = '1002'::text) OR ((t_dwxx.dwbh)::text = '1003'::
text)))
(3 rows)
三、源码解读
主函数入口:
subquery_planner
PlannerInfo *
subquery_planner(PlannerGlobal *glob, Query *parse,
PlannerInfo *parent_root,
bool hasRecursion, double tuple_fraction)
{
PlannerInfo *root;//返回值
List *newWithCheckOptions;//
List *newHaving;//Having子句
bool hasOuterJoins;//是否存在Outer Join?
RelOptInfo *final_rel;//
ListCell *l;//临时变量
root = makeNode(PlannerInfo);//构造返回值
root->parse = parse;
root->glob = glob;
root->query_level = parent_root ? parent_root->query_level + 1 : 1;
root->parent_root = parent_root;
root->plan_params = NIL;
root->outer_params = NULL;
root->planner_cxt = CurrentMemoryContext;
root->init_plans = NIL;
root->cte_plan_ids = NIL;
root->multiexpr_params = NIL;
root->eq_classes = NIL;
root->append_rel_list = NIL;
root->rowMarks = NIL;
memset(root->upper_rels, 0, sizeof(root->upper_rels));
memset(root->upper_targets, 0, sizeof(root->upper_targets));
root->processed_tlist = NIL;
root->grouping_map = NULL;
root->minmax_aggs = NIL;
root->qual_security_level = 0;
root->inhTargetKind = INHKIND_NONE;
root->hasRecursion = hasRecursion;
if (hasRecursion)
root->wt_param_id = SS_assign_special_param(root);
else
root->wt_param_id = -1;
root->non_recursive_path = NULL;
root->partColsUpdated = false;
if (parse->cteList)
SS_process_ctes(root);//处理With 语句
if (parse->hasSubLinks)
pull_up_sublinks(root); //上拉子链接
inline_set_returning_functions(root);//
pull_up_subqueries(root);//上拉子查询
if (parse->setOperations)
flatten_simple_union_all(root);//扁平化处理UNION ALL
//判断RTE中是否存在RTE_JOIN?
root->hasJoinRTEs = false;
root->hasLateralRTEs = false;
hasOuterJoins = false;
foreach(l, parse->rtable)
{
RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
if (rte->rtekind == RTE_JOIN)
{
root->hasJoinRTEs = true;
if (IS_OUTER_JOIN(rte->jointype))
hasOuterJoins = true;
}
if (rte->lateral)
root->hasLateralRTEs = true;
}
//预处理RowMark信息
preprocess_rowmarks(root);
//展开继承表
expand_inherited_tables(root);
//是否存在Having表达式
root->hasHavingQual = (parse->havingQual != NULL);
root->hasPseudoConstantQuals = false;
//预处理表达式:targetList(投影列)
parse->targetList = (List *)
preprocess_expression(root, (Node *) parse->targetList,
EXPRKIND_TARGET);
if (parse->hasTargetSRFs)
parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList);
newWithCheckOptions = NIL;
foreach(l, parse->withCheckOptions)//witch Check Options
{
WithCheckOption *wco = lfirst_node(WithCheckOption, l);
wco->qual = preprocess_expression(root, wco->qual,
EXPRKIND_QUAL);
if (wco->qual != NULL)
newWithCheckOptions = lappend(newWithCheckOptions, wco);
}
parse->withCheckOptions = newWithCheckOptions;
//返回列信息returningList
parse->returningList = (List *)
preprocess_expression(root, (Node *) parse->returningList,
EXPRKIND_TARGET);
//预处理条件表达式
preprocess_qual_conditions(root, (Node *) parse->jointree);
//预处理Having表达式
parse->havingQual = preprocess_expression(root, parse->havingQual,
EXPRKIND_QUAL);
//窗口函数
foreach(l, parse->windowClause)
{
WindowClause *wc = lfirst_node(WindowClause, l);
wc->startOffset = preprocess_expression(root, wc->startOffset,
EXPRKIND_LIMIT);
wc->endOffset = preprocess_expression(root, wc->endOffset,
EXPRKIND_LIMIT);
}
//Limit子句
parse->limitOffset = preprocess_expression(root, parse->limitOffset,
EXPRKIND_LIMIT);
parse->limitCount = preprocess_expression(root, parse->limitCount,
EXPRKIND_LIMIT);
//On Conflict子句
if (parse->onConflict)
{
parse->onConflict->arbiterElems = (List *)
preprocess_expression(root,
(Node *) parse->onConflict->arbiterElems,
EXPRKIND_ARBITER_ELEM);
parse->onConflict->arbiterWhere =
preprocess_expression(root,
parse->onConflict->arbiterWhere,
EXPRKIND_QUAL);
parse->onConflict->onConflictSet = (List *)
preprocess_expression(root,
(Node *) parse->onConflict->onConflictSet,
EXPRKIND_TARGET);
parse->onConflict->onConflictWhere =
preprocess_expression(root,
parse->onConflict->onConflictWhere,
EXPRKIND_QUAL);
}
//集合操作(AppendRelInfo)
root->append_rel_list = (List *)
preprocess_expression(root, (Node *) root->append_rel_list,
EXPRKIND_APPINFO);
//RTE
foreach(l, parse->rtable)
{
RangeTblEntry *rte = lfirst_node(RangeTblEntry, l);
int kind;
ListCell *lcsq;
if (rte->rtekind == RTE_RELATION)
{
if (rte->tablesample)
rte->tablesample = (TableSampleClause *)
preprocess_expression(root,
(Node *) rte->tablesample,
EXPRKIND_TABLESAMPLE);//数据表采样语句
}
else if (rte->rtekind == RTE_SUBQUERY)//子查询
{
if (rte->lateral && root->hasJoinRTEs)
rte->subquery = (Query *)
flatten_join_alias_vars(root, (Node *) rte->subquery);
}
else if (rte->rtekind == RTE_FUNCTION)//函数
{
kind = rte->lateral ? EXPRKIND_RTFUNC_LATERAL : EXPRKIND_RTFUNC;
rte->functions = (List *)
preprocess_expression(root, (Node *) rte->functions, kind);
}
else if (rte->rtekind == RTE_TABLEFUNC)//TABLE FUNC
{
kind = rte->lateral ? EXPRKIND_TABLEFUNC_LATERAL : EXPRKIND_TABLEFUNC;
rte->tablefunc = (TableFunc *)
preprocess_expression(root, (Node *) rte->tablefunc, kind);
}
else if (rte->rtekind == RTE_VALUES)//VALUES子句
{
kind = rte->lateral ? EXPRKIND_VALUES_LATERAL : EXPRKIND_VALUES;
rte->values_lists = (List *)
preprocess_expression(root, (Node *) rte->values_lists, kind);
}
foreach(lcsq, rte->securityQuals)
{
lfirst(lcsq) = preprocess_expression(root,
(Node *) lfirst(lcsq),
EXPRKIND_QUAL);
}
}
...//其他
return root;
}
preprocess_expression
static Node *
preprocess_expression(PlannerInfo *root, Node *expr, int kind)
{
if (expr == NULL)
return NULL;
if (root->hasJoinRTEs &&
!(kind == EXPRKIND_RTFUNC ||
kind == EXPRKIND_VALUES ||
kind == EXPRKIND_TABLESAMPLE ||
kind == EXPRKIND_TABLEFUNC))
expr = flatten_join_alias_vars(root, expr);//扁平化处理joinaliasvars,上节已介绍
expr = eval_const_expressions(root, expr);//简化常量表达式
if (kind == EXPRKIND_QUAL)
{
expr = (Node *) canonicalize_qual((Expr *) expr, false);//表达式规范化
#ifdef OPTIMIZER_DEBUG
printf("After canonicalize_qual()\n");
pprint(expr);
#endif
}
if (root->parse->hasSubLinks)//扩展子链接为子计划
expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
if (root->query_level > 1)
expr = SS_replace_correlation_vars(root, expr);//使用Param节点替换上层的Vars
if (kind == EXPRKIND_QUAL)
expr = (Node *) make_ands_implicit((Expr *) expr);
return expr;
}
canonicalize_qual
Expr *
canonicalize_qual(Expr *qual, bool is_check)//规范化表达式
{
Expr *newqual;
if (qual == NULL)
return NULL;
Assert(!IsA(qual, List));
newqual = find_duplicate_ors(qual, is_check);//执行实际处理逻辑
return newqual;
}
find_duplicate_ors
static Expr *
find_duplicate_ors(Expr *qual, bool is_check)
{
if (or_clause((Node *) qual))//OR语句
{
List *orlist = NIL;
ListCell *temp;
foreach(temp, ((BoolExpr *) qual)->args)//遍历args链表
{
Expr *arg = (Expr *) lfirst(temp);//获取链表中的元素
arg = find_duplicate_ors(arg, is_check);//递归调用
if (arg && IsA(arg, Const))//arg为常量
{
Const *carg = (Const *) arg;
if (is_check)//Check语句
{
if (!carg->constisnull && !DatumGetBool(carg->constvalue))
continue;//不为NULL而且为FALSE,继续循环
//arg为NULL或者TURE,即NULL OR TRUE,简化为TRUE
return (Expr *) makeBoolConst(true, false);
}
else//Where条件语句
{
if (carg->constisnull || !DatumGetBool(carg->constvalue))
continue;//arg为NULL或者FALSE,继续循环
//arg不为NULL而且为TRUE,即TRUE常量,直接返回arg
return arg;
}
}
//加入结果链表
orlist = lappend(orlist, arg);
}
orlist = pull_ors(orlist);//扁平化ORs
return process_duplicate_ors(orlist);//处理重复的ORs
}
else if (and_clause((Node *) qual))//AND语句
{
List *andlist = NIL;
ListCell *temp;
foreach(temp, ((BoolExpr *) qual)->args)//遍历链表
{
Expr *arg = (Expr *) lfirst(temp);
arg = find_duplicate_ors(arg, is_check);
if (arg && IsA(arg, Const))
{
Const *carg = (Const *) arg;
if (is_check)
{
if (carg->constisnull || DatumGetBool(carg->constvalue))
continue;//CHECK语句
//不为空且值为FALSE,返回FALSE
return arg;
}
else
{
if (!carg->constisnull && DatumGetBool(carg->constvalue))
continue;
//NULL OR FALSE,返回FALSE
return (Expr *) makeBoolConst(false, false);
}
}
andlist = lappend(andlist, arg);//加入到结果列表
}
andlist = pull_ands(andlist);//扁平化处理AND
if (andlist == NIL)//为空指针
return (Expr *) makeBoolConst(true, false);//返回TRUE
if (list_length(andlist) == 1)//单个表达式
return (Expr *) linitial(andlist);//返回此表达式链表
return make_andclause(andlist);//否则返回结果链表
}
else
return qual;//非AND/OR语句,直接返回结果
}
process_duplicate_ors
static Expr *
process_duplicate_ors(List *orlist)
{
List *reference = NIL;
int num_subclauses = 0;
List *winners;
List *neworlist;
ListCell *temp;
if (orlist == NIL)
return (Expr *) makeBoolConst(false, false);
if (list_length(orlist) == 1)
return (Expr *) linitial(orlist);
//遍历OR链表,找到AND语句中约束条件最少的那个表达式
//与求解最小公约数同理,公共的谓词只可能在此最小的表达式中产生
foreach(temp, orlist)
{
Expr *clause = (Expr *) lfirst(temp);
if (and_clause((Node *) clause))//AND语句
{
List *subclauses = ((BoolExpr *) clause)->args;
int nclauses = list_length(subclauses);
if (reference == NIL || nclauses < num_subclauses)
{
reference = subclauses;
num_subclauses = nclauses;
}
}
else//单个约束条件或者带有表达式的约束条件,如(X1 AND X2) OR X3等
{
reference = list_make1(clause);
break;
}
}
reference = list_union(NIL, reference);//去掉重复的谓词
winners = NIL;
foreach(temp, reference)//遍历链表
{
Expr *refclause = (Expr *) lfirst(temp);
bool win = true;
ListCell *temp2;
foreach(temp2, orlist)
{
Expr *clause = (Expr *) lfirst(temp2);
if (and_clause((Node *) clause))//该谓词是否在链表中存在?
{
if (!list_member(((BoolExpr *) clause)->args, refclause))
{
win = false;
break;
}
}
else//该谓词是否与单个条件表达式等价?
{
if (!equal(refclause, clause))
{
win = false;
break;
}
}
}
if (win)//找到了公共的谓词
winners = lappend(winners, refclause);//加入到结果中
}
if (winners == NIL)
return make_orclause(orlist);//如果找到,原样返回
neworlist = NIL;
foreach(temp, orlist)//遍历OR链表
{
Expr *clause = (Expr *) lfirst(temp);
if (and_clause((Node *) clause))//AND语句
{
List *subclauses = ((BoolExpr *) clause)->args;//获取条件语句参数
subclauses = list_difference(subclauses, winners);//剔除相同部分
if (subclauses != NIL)//成功剔除,产生新的AND语句
{
if (list_length(subclauses) == 1)
neworlist = lappend(neworlist, linitial(subclauses));
else
neworlist = lappend(neworlist, make_andclause(subclauses));
}
else
{
neworlist = NIL;
break;
}
}
else//不是AND语句
{
if (!list_member(winners, clause))//单个条件语句,直接添加条件
neworlist = lappend(neworlist, clause);
else
//OR语句?公共部分已提出,无需加入其他条件,直接返回
//根据吸收律,X AND (X OR B) 等价于X
{
neworlist = NIL;
break;
}
}
}
if (neworlist != NIL)//新产生的链表
{
if (list_length(neworlist) == 1)
winners = lappend(winners, linitial(neworlist));
else
winners = lappend(winners, make_orclause(pull_ors(neworlist)));//拉平OR
}
//返回结果
if (list_length(winners) == 1)
return (Expr *) linitial(winners);
else
return make_andclause(pull_ands(winners));//拉平AND
}
pull_ors
static List *
pull_ors(List *orlist)
{
List *out_list = NIL;
ListCell *arg;
foreach(arg, orlist)
{
Node *subexpr = (Node *) lfirst(arg);
if (or_clause(subexpr))
out_list = list_concat(out_list,
pull_ors(((BoolExpr *) subexpr)->args));//递归拉平
else
out_list = lappend(out_list, subexpr);
}
return out_list;
}
pull_ands
static List *
pull_ands(List *andlist)
{
List *out_list = NIL;
ListCell *arg;
foreach(arg, andlist)
{
Node *subexpr = (Node *) lfirst(arg);
if (and_clause(subexpr))
out_list = list_concat(out_list,
pull_ands(((BoolExpr *) subexpr)->args));//递归拉平
else
out_list = lappend(out_list, subexpr);
}
return out_list;
}
四、跟踪分析
测试脚本:
select * from t_dwxx
where (FALSE OR dwbh = '1001')
AND ((dwbh = '1001' AND dwbh = '1002')
OR (dwbh = '1001' AND dwbh = '1003'));
该语句经规范化后与以下SQL语句无异:
select * from t_dwxx
where dwbh = '1001' AND (dwbh='1002' OR dwbh='1003');
-- 执行计划
testdb=# explain verbose select * from t_dwxx
where (FALSE OR dwbh = '1001')
AND ((dwbh = '1001' AND dwbh = '1002')
OR (dwbh = '1001' AND dwbh = '1003'));
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------
--------
Seq Scan on public.t_dwxx (cost=0.00..12.80 rows=1 width=474)
Output: dwmc, dwbh, dwdz
Filter: (((t_dwxx.dwbh)::text = '1001'::text) AND (((t_dwxx.dwbh)::text = '1002'::text) OR ((t_dwxx.dwbh)::text = '1003'::
text)))
(3 rows)
gdb跟踪:
(gdb) b find_duplicate_ors
Breakpoint 1 at 0x7811b4: file prepqual.c, line 418.
(gdb) c
Continuing.
Breakpoint 1, find_duplicate_ors (qual=0x2727528, is_check=false) at prepqual.c:418
418 if (or_clause((Node *) qual))
#输入参数,BoolExpr
#args链表有两个元素,一个是(FALSE OR dwbh = '1001'),
#另外一个是((dwbh = '1001' AND dwbh = '1002')
OR (dwbh = '1001' AND dwbh = '1003'))
(gdb) p *(BoolExpr *)qual
$2 = {xpr = {type = T_BoolExpr}, boolop = AND_EXPR, args = 0x2726c88, location = -1}
(gdb) p *((BoolExpr *)qual)->args
$3 = {type = T_List, length = 2, head = 0x2726c68, tail = 0x2727508}
...
#进入AND分支
462 else if (and_clause((Node *) qual))
...
#获得链表第一个元素
470 Expr *arg = (Expr *) lfirst(temp);
#再次进入find_duplicate_ors
#FALSE OR dwbh='1001'在此前已被简化为dwbh='1001',所以在这里直接返回
Breakpoint 1, find_duplicate_ors (qual=0x2726bc8, is_check=false) at prepqual.c:418
418 if (or_clause((Node *) qual))
(gdb) n
462 else if (and_clause((Node *) qual))
(gdb)
#
515 return qual;
(gdb) n
516 }
(gdb)
475 if (arg && IsA(arg, Const))
...
497 andlist = lappend(andlist, arg);
(gdb) n
468 foreach(temp, ((BoolExpr *) qual)->args)
(gdb) n
470 Expr *arg = (Expr *) lfirst(temp);
(gdb)
472 arg = find_duplicate_ors(arg, is_check);
#链表的下一个元素,即
#((dwbh = '1001' AND dwbh = '1002')
OR (dwbh = '1001' AND dwbh = '1003'))
#由两个BoolExpr组成
(gdb) p *(BoolExpr *)arg
$23 = {xpr = {type = T_BoolExpr}, boolop = OR_EXPR, args = 0x27270d8, location = -1}
(gdb) p *((BoolExpr *)arg)->args
$24 = {type = T_List, length = 2, head = 0x27270b8, tail = 0x27274b8}
(gdb) p *(Node *)((BoolExpr *)arg)->args->head->data.ptr_value
$25 = {type = T_BoolExpr}
(gdb) p *(Node *)((BoolExpr *)arg)->args->head->next->data.ptr_value
$26 = {type = T_BoolExpr}
...
#args左右两边的的arg处理完毕
424 foreach(temp, ((BoolExpr *) qual)->args)
(gdb)
457 orlist = pull_ors(orlist);
...
#进入process_duplicate_ors
460 return process_duplicate_ors(orlist);
(gdb) step
process_duplicate_ors (orlist=0x2727858) at prepqual.c:529
529 List *reference = NIL;
...
#获得最少谓词的链表
(gdb) p *reference
$36 = {type = T_List, length = 2, head = 0x27278a8, tail = 0x27278f8}
...
#获得了公共的谓词winner!即dwbh = '1001'
(gdb) p *winners
$40 = {type = T_List, length = 1, head = 0x2727918, tail = 0x2727918}
...
(gdb)
canonicalize_qual (qual=0x2727528, is_check=false) at prepqual.c:309
309 return newqual;
(gdb) p *newqual
$43 = {type = T_BoolExpr}
(gdb) p *(BoolExpr *)newqual
$45 = {xpr = {type = T_BoolExpr}, boolop = AND_EXPR, args = 0x2727c18, location = -1}
#DONE!
五、小结
1、优化的数学基础:布尔代数以及相关的定律;
2、表达式规范化的过程:表达式扁平化处理以及公共谓词提取等的处理逻辑。
六、参考资料
布尔代数
prepqual.c
免责声明:
① 本站未注明“稿件来源”的信息均来自网络整理。其文字、图片和音视频稿件的所属权归原作者所有。本站收集整理出于非商业性的教育和科研之目的,并不意味着本站赞同其观点或证实其内容的真实性。仅作为临时的测试数据,供内部测试之用。本站并未授权任何人以任何方式主动获取本站任何信息。
② 本站未注明“稿件来源”的临时测试数据将在测试完成后最终做删除处理。有问题或投稿请发送至: 邮箱/279061341@qq.com QQ/279061341
PostgreSQL 源码解读(33)- 查询语句#18(查询优化-表达式预处理#3)
下载Word文档到电脑,方便收藏和打印~