PostgreSQL 源码解读(182)- 查询#98(聚合函数#3-ExecAgg)
本节简单介绍了PostgreSQL执行聚合函数的实现,主要实现函数是ExecAgg.这里先行介绍ExecAgg->agg_fill_hash_table函数,其他子函数后续再行介绍.
通过设置log输出,可得到SQL的planTree:
",,,,,"select bh,avg(c1),min(c1),max(c2) from t_agg group by bh;",,,"psql"
2019-04-30 14:33:11.998 CST,"xdb","testdb",1387,"[local]",5cc7ec00.56b,3,"SELECT",2019-04-30 14:32:32 CST,3/3,0,LOG,00000,"plan:"," {PLANNEDSTMT
:commandType 1
:queryId 0
:hasReturning false
:hasModifyingCTE false
:canSetTag true
:transientPlan false
:dependsOnRole false
:parallelModeNeeded false
:jitFlags 0
:planTree
{AGG
:startup_cost 13677.00
:total_cost 13677.06
:plan_rows 5
:plan_width 45
:parallel_aware false
:parallel_safe false
:plan_node_id 0
:targetlist (...
)
:qual <>
:lefttree
{SEQSCAN
:startup_cost 0.00
:total_cost 8677.00
:plan_rows 500000
:plan_width 13
:parallel_aware false
:parallel_safe false
:plan_node_id 1
:targetlist (...
)
:qual <>
:lefttree <>
:righttree <>
:initPlan <>
:extParam (b)
:allParam (b)
:scanrelid 1
}
:righttree <>
:initPlan <>
:extParam (b)
:allParam (b)
:aggstrategy 2
:aggsplit 0
:numCols 1
:grpColIdx 1
:grpOperators 98
:numGroups 5
:aggParams (b)
:groupingSets <>
:chain <>
}
:rtable (...
)
:resultRelations <>
:nonleafResultRelations <>
:rootResultRelations <>
:subplans <>
:rewindPlanIDs (b)
:rowMarks <>
:relationOids (o 245801)
:invalItems <>
:paramExecTypes <>
:utilityStmt <>
:stmt_location 0
:stmt_len 56
}
",,,,,"select bh,avg(c1),min(c1),max(c2) from t_agg group by bh;",,,"psql"
第一个节点为AGG,相应的实现函数为ExecAgg.
一、数据结构
AggState
聚合函数执行时状态结构体,内含AggStatePerAgg等结构体
//在nodeAgg.c中私有的结构体
typedef struct AggStatePerAggData *AggStatePerAgg;
typedef struct AggStatePerTransData *AggStatePerTrans;
typedef struct AggStatePerGroupData *AggStatePerGroup;
typedef struct AggStatePerPhaseData *AggStatePerPhase;
typedef struct AggStatePerHashData *AggStatePerHash;
typedef struct AggState
{
//第一个字段是NodeTag(继承自ScanState)
ScanState ss;
//targetlist和quals中所有的Aggref
List *aggs;
//链表的大小(可以为0)
int numaggs;
//pertrans条目大小
int numtrans;
//Agg策略模式
AggStrategy aggstrategy;
//agg-splitting模式,参见nodes.h
AggSplit aggsplit;
//指向当前步骤数据的指针
AggStatePerPhase phase;
//步骤数(包括0)
int numphases;
//当前步骤
int current_phase;
//per-Aggref信息
AggStatePerAgg peragg;
//per-Trans状态信息
AggStatePerTrans pertrans;
//长生命周期数据的ExprContexts(hashtable)
ExprContext *hashcontext;
////长生命周期数据的ExprContexts(每一个GS使用)
ExprContext **aggcontexts;
//输入表达式的ExprContext
ExprContext *tmpcontext;
#define FIELDNO_AGGSTATE_CURAGGCONTEXT 14
//当前活跃的aggcontext
ExprContext *curaggcontext;
//当前活跃的aggregate(如存在)
AggStatePerAgg curperagg;
#define FIELDNO_AGGSTATE_CURPERTRANS 16
//当前活跃的trans state
AggStatePerTrans curpertrans;
//输入结束?
bool input_done;
//Agg扫描结束?
bool agg_done;
//最后一个grouping set
int projected_set;
#define FIELDNO_AGGSTATE_CURRENT_SET 20
//将要解析的当前grouping set
int current_set;
//当前投影操作的分组列
Bitmapset *grouped_cols;
//倒序的分组列链表
List *all_grouped_cols;
//-------- 下面的列用于grouping set步骤数据
//所有步骤中最大的sets大小
int maxsets;
//所有步骤的数组
AggStatePerPhase phases;
//对于phases > 1,已排序的输入信息
Tuplesortstate *sort_in;
//对于下一个步骤,输入已拷贝
Tuplesortstate *sort_out;
//排序结果的slot
TupleTableSlot *sort_slot;
//------- 下面的列用于AGG_PLAIN和AGG_SORTED模式:
//per-group指针的grouping set编号数组
AggStatePerGroup *pergroups;
//当前组的第一个元组拷贝
HeapTuple grp_firstTuple;
//--------- 下面的列用于AGG_HASHED和AGG_MIXED模式:
//是否已填充hash表?
bool table_filled;
//hash桶数?
int num_hashes;
//相应的哈希表数据数组
AggStatePerHash perhash;
//per-group指针的grouping set编号数组
AggStatePerGroup *hash_pergroup;
//---------- agg输入表达式解析支持
#define FIELDNO_AGGSTATE_ALL_PERGROUPS 34
//首先是->pergroups,然后是hash_pergroup
AggStatePerGroup *all_pergroups;
//投影实现机制
ProjectionInfo *combinedproj;
} AggState;
//nodeag .c支持的基本选项
#define AGGSPLITOP_COMBINE 0x01
#define AGGSPLITOP_SKIPFINAL 0x02
#define AGGSPLITOP_SERIALIZE 0x04
#define AGGSPLITOP_DESERIALIZE 0x08
//支持的操作模式
typedef enum AggSplit
{
//基本 : 非split聚合
AGGSPLIT_SIMPLE = 0,
//部分聚合的初始步骤,序列化
AGGSPLIT_INITIAL_SERIAL = AGGSPLITOP_SKIPFINAL | AGGSPLITOP_SERIALIZE,
//部分聚合的最终步骤,反序列化
AGGSPLIT_FINAL_DESERIAL = AGGSPLITOP_COMBINE | AGGSPLITOP_DESERIALIZE
} AggSplit;
//测试AggSplit选择了哪些基本选项
#define DO_AGGSPLIT_COMBINE(as) (((as) & AGGSPLITOP_COMBINE) != 0)
#define DO_AGGSPLIT_SKIPFINAL(as) (((as) & AGGSPLITOP_SKIPFINAL) != 0)
#define DO_AGGSPLIT_SERIALIZE(as) (((as) & AGGSPLITOP_SERIALIZE) != 0)
#define DO_AGGSPLIT_DESERIALIZE(as) (((as) & AGGSPLITOP_DESERIALIZE) != 0)
二、源码解读
ExecAgg接收从outer子计划返回的元组合适的属性上为每一个聚合函数(出现在投影列或节点表达式)执行聚合.需要聚合的元组数量依赖于是否已分组或者选择普通聚合.在已分组的聚合操作宏,为每一个组产生结果行;普通聚合,整个查询只有一个结果行.
不管哪种情况,每一个聚合结果值都会存储在表达式上下文中(ExecProject会解析结果元组)
static TupleTableSlot *
ExecAgg(PlanState *pstate)
{
AggState *node = castNode(AggState, pstate);
TupleTableSlot *result = NULL;
CHECK_FOR_INTERRUPTS();
if (!node->agg_done)
{
//基于策略进行分发
switch (node->phase->aggstrategy)
{
case AGG_HASHED:
if (!node->table_filled)
agg_fill_hash_table(node);
//填充后,执行MIXED
case AGG_MIXED:
result = agg_retrieve_hash_table(node);
break;
case AGG_PLAIN:
case AGG_SORTED:
result = agg_retrieve_direct(node);
break;
}
if (!TupIsNull(result))
return result;
}
return NULL;
}
agg_fill_hash_table
读取输入并构建哈希表,逻辑较为简单,详细参考下面源码
static void
agg_fill_hash_table(AggState *aggstate)
{
TupleTableSlot *outerslot;
ExprContext *tmpcontext = aggstate->tmpcontext;
for (;;)
{
//--------- 循环直至完成所有元组的处理
//提取输入的元组
outerslot = fetch_input_tuple(aggstate);
if (TupIsNull(outerslot))
break;//已完成处理,退出循环
//配置lookup_hash_entries和advance_aggregates函数
//把元组放在临时内存上下文中
tmpcontext->ecxt_outertuple = outerslot;
//检索或构建哈希表条目
lookup_hash_entries(aggstate);
//增加聚合(或组合函数)
advance_aggregates(aggstate);
ResetExprContext(aggstate->tmpcontext);
}
aggstate->table_filled = true;
//初始化用于遍历第一个哈希表
select_current_set(aggstate, 0, true);
ResetTupleHashIterator(aggstate->perhash[0].hashtable,
&aggstate->perhash[0].hashiter);
}
static void
advance_aggregates(AggState *aggstate)
{
bool dummynull;
ExecEvalExprSwitchContext(aggstate->phase->evaltrans,
aggstate->tmpcontext,
&dummynull);
}
#ifndef FRONTEND
static inline Datum
ExecEvalExprSwitchContext(ExprState *state,
ExprContext *econtext,
bool *isNull)
{
Datum retDatum;
MemoryContext oldContext;
oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
retDatum = state->evalfunc(state, econtext, isNull);
MemoryContextSwitchTo(oldContext);
return retDatum;
}
#endif
static void
lookup_hash_entries(AggState *aggstate)
{
//hash个数
int numHashes = aggstate->num_hashes;
//获取pergroup
AggStatePerGroup *pergroup = aggstate->hash_pergroup;
int setno;
for (setno = 0; setno < numHashes; setno++)
{
//设置当前集合
select_current_set(aggstate, setno, true);
//检索哈希条目
pergroup[setno] = lookup_hash_entry(aggstate)->additional;
}
}
static TupleHashEntryData *
lookup_hash_entry(AggState *aggstate)
{
//输入的元组
TupleTableSlot *inputslot = aggstate->tmpcontext->ecxt_outertuple;
//perhash
AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set];
//hashslot
TupleTableSlot *hashslot = perhash->hashslot;
//条目入口
TupleHashEntryData *entry;
//变量
bool isnew;
int i;
//转换需要的列到hashslot中
slot_getsomeattrs(inputslot, perhash->largestGrpColIdx);
ExecClearTuple(hashslot);
for (i = 0; i < perhash->numhashGrpCols; i++)
{
//遍历分组列
//列编号
int varNumber = perhash->hashGrpColIdxInput[i] - 1;
//赋值
hashslot->tts_values[i] = inputslot->tts_values[varNumber];
hashslot->tts_isnull[i] = inputslot->tts_isnull[varNumber];
}
//存储虚拟元组
ExecStoreVirtualTuple(hashslot);
//使用已过滤的元组检索或者创建哈希表条目
entry = LookupTupleHashEntry(perhash->hashtable, hashslot, &isnew);
if (isnew)
{
//新条目
AggStatePerGroup pergroup;
int transno;
//分配内存
pergroup = (AggStatePerGroup)
MemoryContextAlloc(perhash->hashtable->tablecxt,
sizeof(AggStatePerGroupData) * aggstate->numtrans);
entry->additional = pergroup;
for (transno = 0; transno < aggstate->numtrans; transno++)
{
//遍历转换函数
AggStatePerTrans pertrans = &aggstate->pertrans[transno];
AggStatePerGroup pergroupstate = &pergroup[transno];
//初始化聚合
initialize_aggregate(aggstate, pertrans, pergroupstate);
}
}
return entry;
}
TupleHashEntry
LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
bool *isnew)
{
//哈希条目
TupleHashEntryData *entry;
MemoryContext oldContext;
bool found;
MinimalTuple key;
//在短生命周期中执行哈希函数
oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
//设置哈希和匹配函数需要的数据
hashtable->inputslot = slot;
hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
hashtable->cur_eq_func = hashtable->tab_eq_func;
//参考inputslot的flag
key = NULL;
if (isnew)
{
//新条目,插入到哈希表中
entry = tuplehash_insert(hashtable->hashtab, key, &found);
if (found)
{
//发现上一个已存在的条目
*isnew = false;
}
else
{
//创建新条目
*isnew = true;
//初始化调用者的数据
entry->additional = NULL;
MemoryContextSwitchTo(hashtable->tablecxt);
//拷贝第一个条目到数据表上下文中
entry->firstTuple = ExecCopySlotMinimalTuple(slot);
}
}
else
{
//isnew为NULL,调用tuplehash_lookup
entry = tuplehash_lookup(hashtable->hashtab, key);
}
MemoryContextSwitchTo(oldContext);
return entry;
}
static void
initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
AggStatePerGroup pergroupstate)
{
if (pertrans->numSortCols > 0)
{
if (pertrans->sortstates[aggstate->current_set])
tuplesort_end(pertrans->sortstates[aggstate->current_set]);
if (pertrans->numInputs == 1)
{
//属性信息
Form_pg_attribute attr = TupleDescAttr(pertrans->sortdesc, 0);
//Datum sorter
pertrans->sortstates[aggstate->current_set] =
tuplesort_begin_datum(attr->atttypid,
pertrans->sortOperators[0],
pertrans->sortCollations[0],
pertrans->sortNullsFirst[0],
work_mem, NULL, false);
}
else
//full tuple sorter
pertrans->sortstates[aggstate->current_set] =
tuplesort_begin_heap(pertrans->sortdesc,
pertrans->numSortCols,
pertrans->sortColIdx,
pertrans->sortOperators,
pertrans->sortCollations,
pertrans->sortNullsFirst,
work_mem, NULL, false);
}
if (pertrans->initValueIsNull)
pergroupstate->transValue = pertrans->initValue;
else
{
MemoryContext oldContext;
oldContext = MemoryContextSwitchTo(
aggstate->curaggcontext->ecxt_per_tuple_memory);
//拷贝
pergroupstate->transValue = datumCopy(pertrans->initValue,
pertrans->transtypeByVal,
pertrans->transtypeLen);
MemoryContextSwitchTo(oldContext);
}
pergroupstate->transValueIsNull = pertrans->initValueIsNull;
pergroupstate->noTransValue = pertrans->initValueIsNull;
}
static void
select_current_set(AggState *aggstate, int setno, bool is_hash)
{
//在修改的时候,会同时调整ExecInterpExpr()和友元
if (is_hash)
aggstate->curaggcontext = aggstate->hashcontext;
else
aggstate->curaggcontext = aggstate->aggcontexts[setno];
aggstate->current_set = setno;
}
三、跟踪分析
测试脚本
//禁用并行
testdb=# set max_parallel_workers_per_gather=0;
SET
testdb=# explain verbose select bh,avg(c1),min(c1),max(c2) from t_agg group by bh;
QUERY PLAN
---------------------------------------------------------------------------
HashAggregate (cost=13677.00..13677.06 rows=5 width=45)
Output: bh, avg(c1), min(c1), max(c2)
Group Key: t_agg.bh
-> Seq Scan on public.t_agg (cost=0.00..8677.00 rows=500000 width=13)
Output: bh, c1, c2, c3, c4, c5, c6
(5 rows)
跟踪分析
(gdb) b ExecAgg
Breakpoint 1 at 0x6ee444: file nodeAgg.c, line 1536.
(gdb) c
Continuing.
Breakpoint 1, ExecAgg (pstate=0x1f895a0) at nodeAgg.c:1536
1536 AggState *node = castNode(AggState, pstate);
(gdb)
输入参数,AggState,在ExecInitAgg函数中初始化
(gdb) p *pstate
$1 = {type = T_AggState, plan = 0x1f7b1e0, state = 0x1f89388, ExecProcNode = 0x6ee438 <ExecAgg>,
ExecProcNodeReal = 0x6ee438 <ExecAgg>, instrument = 0x0, worker_instrument = 0x0, worker_jit_instrument = 0x0,
qual = 0x0, lefttree = 0x1f89b10, righttree = 0x0, initPlan = 0x0, subPlan = 0x0, chgParam = 0x0,
ps_ResultTupleSlot = 0x1f8a710, ps_ExprContext = 0x1f89a50, ps_ProjInfo = 0x1f8a850, scandesc = 0x1f89e60}
使用Hash实现
(gdb) n
1537 TupleTableSlot *result = NULL;
(gdb)
1539 CHECK_FOR_INTERRUPTS();
(gdb)
1541 if (!node->agg_done)
(gdb)
1544 switch (node->phase->aggstrategy)
(gdb) p node->phase->aggstrategy
$2 = AGG_HASHED
(gdb) n
1547 if (!node->table_filled)
(gdb)
1548 agg_fill_hash_table(node);
(gdb)
进入agg_fill_hash_table
(gdb) step
agg_fill_hash_table (aggstate=0x1f895a0) at nodeAgg.c:1915
1915 ExprContext *tmpcontext = aggstate->tmpcontext;
agg_fill_hash_table->提取输入的元组
(gdb) n
1923 outerslot = fetch_input_tuple(aggstate);
(gdb) step
fetch_input_tuple (aggstate=0x1f895a0) at nodeAgg.c:396
396 if (aggstate->sort_in)
(gdb) p aggstate->sort_in
$3 = (Tuplesortstate *) 0x0
(gdb) n
406 slot = ExecProcNode(outerPlanState(aggstate));
(gdb)
408 if (!TupIsNull(slot) && aggstate->sort_out)
(gdb) p *slot
$4 = {type = T_TupleTableSlot, tts_isempty = false, tts_shouldFree = false, tts_shouldFreeMin = false, tts_slow = false,
tts_tuple = 0x1fa5998, tts_tupleDescriptor = 0x7ff7dd2d1380, tts_mcxt = 0x1f89270, tts_buffer = 124, tts_nvalid = 0,
tts_values = 0x1f89d48, tts_isnull = 0x1f89d80, tts_mintuple = 0x0, tts_minhdr = {t_len = 0, t_self = {ip_blkid = {
bi_hi = 0, bi_lo = 0}, ip_posid = 0}, t_tableOid = 0, t_data = 0x0}, tts_off = 0, tts_fixedTupleDescriptor = true}
(gdb) n
411 return slot;
(gdb)
412 }
(gdb)
agg_fill_hash_table (aggstate=0x1f895a0) at nodeAgg.c:1924
1924 if (TupIsNull(outerslot))
(gdb)
lookup_hash_entries->进入lookup_hash_entries,为当前元组在所有已完成hash的grouping sets中检索hash条目,为后续的advance_aggregates函数调用返回pergroup指针数组.
(gdb) n
1928 tmpcontext->ecxt_outertuple = outerslot;
(gdb)
1931 lookup_hash_entries(aggstate);
(gdb)
(gdb) step
lookup_hash_entries (aggstate=0x1f895a0) at nodeAgg.c:1509
1509 int numHashes = aggstate->num_hashes;
(gdb) n
1510 AggStatePerGroup *pergroup = aggstate->hash_pergroup;
(gdb) p numHashes
$5 = 1
(gdb) n
1513 for (setno = 0; setno < numHashes; setno++)
(gdb) p *pergroup
$6 = (AggStatePerGroup) 0x0
(gdb) n
1515 select_current_set(aggstate, setno, true);
(gdb) step
select_current_set (aggstate=0x1f895a0, setno=0, is_hash=true) at nodeAgg.c:306
306 if (is_hash)
(gdb) n
307 aggstate->curaggcontext = aggstate->hashcontext;
(gdb)
311 aggstate->current_set = setno;
(gdb)
312 }
(gdb)
lookup_hash_entries (aggstate=0x1f895a0) at nodeAgg.c:1516
1516 pergroup[setno] = lookup_hash_entry(aggstate)->additional;
(gdb)
lookup_hash_entry->调用lookup_hash_entry,该函数为包含当前元组的组检索或创建哈希表条目.
(gdb) step
lookup_hash_entry (aggstate=0x1f895a0) at nodeAgg.c:1451
1451 TupleTableSlot *inputslot = aggstate->tmpcontext->ecxt_outertuple;
(gdb) n
1452 AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set];
(gdb) p aggstate->current_set
$7 = 0
(gdb) n
1453 TupleTableSlot *hashslot = perhash->hashslot;
(gdb) p *perhash
$8 = {hashtable = 0x1f9fc98, hashiter = {cur = 0, end = 0, done = false}, hashslot = 0x1f8b198, hashfunctions = 0x1f8b230,
eqfuncoids = 0x1f9fc50, numCols = 1, numhashGrpCols = 1, largestGrpColIdx = 1, hashGrpColIdxInput = 0x1f9fbb0,
hashGrpColIdxHash = 0x1f9fbd0, aggnode = 0x1f7b1e0}
(gdb) n
1459 slot_getsomeattrs(inputslot, perhash->largestGrpColIdx);
(gdb)
1460 ExecClearTuple(hashslot);
(gdb) p *perhash
$9 = {hashtable = 0x1f9fc98, hashiter = {cur = 0, end = 0, done = false}, hashslot = 0x1f8b198, hashfunctions = 0x1f8b230,
eqfuncoids = 0x1f9fc50, numCols = 1, numhashGrpCols = 1, largestGrpColIdx = 1, hashGrpColIdxInput = 0x1f9fbb0,
hashGrpColIdxHash = 0x1f9fbd0, aggnode = 0x1f7b1e0}
(gdb) p *perhash->hashslot
$10 = {type = T_TupleTableSlot, tts_isempty = true, tts_shouldFree = false, tts_shouldFreeMin = false, tts_slow = false,
tts_tuple = 0x0, tts_tupleDescriptor = 0x1f8b080, tts_mcxt = 0x1f89270, tts_buffer = 0, tts_nvalid = 0,
tts_values = 0x1f8b1f8, tts_isnull = 0x1f8b200, tts_mintuple = 0x0, tts_minhdr = {t_len = 0, t_self = {ip_blkid = {
bi_hi = 0, bi_lo = 0}, ip_posid = 0}, t_tableOid = 0, t_data = 0x0}, tts_off = 0, tts_fixedTupleDescriptor = true}
(gdb) p *perhash->hashfunctions
$11 = {fn_addr = 0x4c8a31 <hashtext>, fn_oid = 400, fn_nargs = 1, fn_strict = true, fn_retset = false, fn_stats = 2 '\002',
fn_extra = 0x0, fn_mcxt = 0x1f89270, fn_expr = 0x0}
(gdb) p *perhash->eqfuncoids
$12 = 67
(gdb) p *perhash->hashGrpColIdxInput
$13 = 1
(gdb) p *perhash->hashGrpColIdxHash
$14 = 1
(gdb) p *perhash->aggnode
$15 = {plan = {type = T_Agg, startup_cost = 13677, total_cost = 13677.0625, plan_rows = 5, plan_width = 45,
parallel_aware = false, parallel_safe = false, plan_node_id = 0, targetlist = 0x1f84108, qual = 0x0,
lefttree = 0x1f83bc8, righttree = 0x0, initPlan = 0x0, extParam = 0x0, allParam = 0x0}, aggstrategy = AGG_HASHED,
aggsplit = AGGSPLIT_SIMPLE, numCols = 1, grpColIdx = 0x1f83eb8, grpOperators = 0x1f83e98, numGroups = 5, aggParams = 0x0,
groupingSets = 0x0, chain = 0x0}
(gdb)
lookup_hash_entry->遍历分组键(这里是bh列)
(gdb) n
1462 for (i = 0; i < perhash->numhashGrpCols; i++)
(gdb)
1464 int varNumber = perhash->hashGrpColIdxInput[i] - 1;
(gdb)
1466 hashslot->tts_values[i] = inputslot->tts_values[varNumber];
(gdb) p varNumber
$16 = 0
(gdb) n
1467 hashslot->tts_isnull[i] = inputslot->tts_isnull[varNumber];
(gdb)
1462 for (i = 0; i < perhash->numhashGrpCols; i++)
(gdb) p *hashslot
$17 = {type = T_TupleTableSlot, tts_isempty = true, tts_shouldFree = false, tts_shouldFreeMin = false, tts_slow = false,
tts_tuple = 0x0, tts_tupleDescriptor = 0x1f8b080, tts_mcxt = 0x1f89270, tts_buffer = 0, tts_nvalid = 0,
tts_values = 0x1f8b1f8, tts_isnull = 0x1f8b200, tts_mintuple = 0x0, tts_minhdr = {t_len = 0, t_self = {ip_blkid = {
bi_hi = 0, bi_lo = 0}, ip_posid = 0}, t_tableOid = 0, t_data = 0x0}, tts_off = 0, tts_fixedTupleDescriptor = true}
(gdb) p *hashslot->tts_values[0]
$18 = 811222795
(gdb)
lookup_hash_entry->调用LookupTupleHashEntry,该函数使用已过滤的元组检索或者创建哈希表条目
(gdb) step
LookupTupleHashEntry (hashtable=0x1f9fc98, slot=0x1f8b198, isnew=0x7fff7f065e17) at execGrouping.c:290
290 oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
(gdb) p *hashtable
$19 = {hashtab = 0x1f9fd30, numCols = 1, keyColIdx = 0x1f9fbd0, tab_hash_funcs = 0x1f8b230, tab_eq_func = 0x1fa0050,
tablecxt = 0x1f91370, tempcxt = 0x1f9d8e0, entrysize = 24, tableslot = 0x1f9ffb8, inputslot = 0x0, in_hash_funcs = 0x0,
cur_eq_func = 0x0, hash_iv = 0, exprcontext = 0x1fa0970}
(gdb) p *hashtable->hashtab
$20 = {size = 8, members = 0, sizemask = 7, grow_threshold = 7, data = 0x1f9fd88, ctx = 0x1f89270, private_data = 0x1f9fc98}
(gdb) p *hashtable->keyColIdx
$21 = 1
(gdb) p *hashtable->tab_hash_funcs
$22 = {fn_addr = 0x4c8a31 <hashtext>, fn_oid = 400, fn_nargs = 1, fn_strict = true, fn_retset = false, fn_stats = 2 '\002',
fn_extra = 0x0, fn_mcxt = 0x1f89270, fn_expr = 0x0}
(gdb) n
293 hashtable->inputslot = slot;
(gdb)
294 hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
(gdb)
295 hashtable->cur_eq_func = hashtable->tab_eq_func;
(gdb)
297 key = NULL;
(gdb)
299 if (isnew)
(gdb)
301 entry = tuplehash_insert(hashtable->hashtab, key, &found);
(gdb) step
tuplehash_insert (tb=0x1f9fd30, key=0x0, found=0x7fff7f065dd7) at ../../../class="lazy" data-src/include/lib/simplehash.h:490
490 uint32 hash = SH_HASH_KEY(tb, key);
(gdb) finish
Run till exit from #0 tuplehash_insert (tb=0x1f9fd30, key=0x0, found=0x7fff7f065dd7)
at ../../../class="lazy" data-src/include/lib/simplehash.h:490
0x00000000006d3a1e in LookupTupleHashEntry (hashtable=0x1f9fc98, slot=0x1f8b198, isnew=0x7fff7f065e17) at execGrouping.c:301
301 entry = tuplehash_insert(hashtable->hashtab, key, &found);
Value returned is $23 = (TupleHashEntryData *) 0x1f9fdb8
(gdb) n
303 if (found)
(gdb) p found
$24 = false
(gdb)
LookupTupleHashEntry->插入新条目,返回entry
(gdb) n
311 *isnew = true;
(gdb)
313 entry->additional = NULL;
(gdb)
314 MemoryContextSwitchTo(hashtable->tablecxt);
(gdb)
316 entry->firstTuple = ExecCopySlotMinimalTuple(slot);
(gdb)
324 MemoryContextSwitchTo(oldContext);
(gdb)
326 return entry;
(gdb)
327 }
lookup_hash_entry->回到lookup_hash_entry
(gdb)
lookup_hash_entry (aggstate=0x1f895a0) at nodeAgg.c:1474
1474 if (isnew)
(gdb)
lookup_hash_entry->分配内存,设置条目的额外信息
(gdb) n
1481 sizeof(AggStatePerGroupData) * aggstate->numtrans);
(gdb) p *entry
$25 = {firstTuple = 0x1f91488, additional = 0x0, status = 1, hash = 443809650}
(gdb) n
1480 MemoryContextAlloc(perhash->hashtable->tablecxt,
(gdb)
1479 pergroup = (AggStatePerGroup)
(gdb)
1482 entry->additional = pergroup;
(gdb)
lookup_hash_entry->为新元组group初始化聚合操作, lookup_hash_entries()已选择了相应的grouping set(这里有3个聚合列)
1488 for (transno = 0; transno < aggstate->numtrans; transno++)
(gdb) p aggstate->numtrans
$26 = 3
(gdb)
(gdb) n
1490 AggStatePerTrans pertrans = &aggstate->pertrans[transno];
(gdb)
1491 AggStatePerGroup pergroupstate = &pergroup[transno];
(gdb) p *pertrans
$27 = {aggref = 0x1f84650, aggshared = false, numInputs = 1, numTransInputs = 1, transfn_oid = 768, serialfn_oid = 0,
deserialfn_oid = 0, aggtranstype = 23, transfn = {fn_addr = 0x93e877 <int4larger>, fn_oid = 768, fn_nargs = 2,
fn_strict = true, fn_retset = false, fn_stats = 2 '\002', fn_extra = 0x0, fn_mcxt = 0x1f89270, fn_expr = 0x1fa0b00},
serialfn = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0, fn_strict = false, fn_retset = false, fn_stats = 0 '\000',
fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0}, deserialfn = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0,
fn_strict = false, fn_retset = false, fn_stats = 0 '\000', fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0},
aggCollation = 0, numSortCols = 0, numDistinctCols = 0, sortColIdx = 0x0, sortOperators = 0x0, sortCollations = 0x0,
sortNullsFirst = 0x0, equalfnOne = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0, fn_strict = false, fn_retset = false,
fn_stats = 0 '\000', fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0}, equalfnMulti = 0x0, initValue = 0,
initValueIsNull = true, inputtypeLen = 0, transtypeLen = 4, inputtypeByVal = false, transtypeByVal = true,
sortslot = 0x0, uniqslot = 0x0, sortdesc = 0x0, sortstates = 0x1f9fb70, transfn_fcinfo = {flinfo = 0x1f99418,
context = 0x1f895a0, resultinfo = 0x0, fncollation = 0, isnull = false, nargs = 2, arg = {0 <repeats 100 times>},
argnull = {false <repeats 100 times>}}, serialfn_fcinfo = {flinfo = 0x0, context = 0x0, resultinfo = 0x0,
fncollation = 0, isnull = false, nargs = 0, arg = {0 <repeats 100 times>}, argnull = {false <repeats 100 times>}},
deserialfn_fcinfo = {flinfo = 0x0, context = 0x0, resultinfo = 0x0, fncollation = 0, isnull = false, nargs = 0, arg = {
0 <repeats 100 times>}, argnull = {false <repeats 100 times>}}}
(gdb) n
1493 initialize_aggregate(aggstate, pertrans, pergroupstate);
(gdb) p *pergroupstate
$28 = {transValue = 9187201950435737471, transValueIsNull = 127, noTransValue = 127}
(gdb) n
1488 for (transno = 0; transno < aggstate->numtrans; transno++)
(gdb) p *aggstate
$29 = {ss = {ps = {type = T_AggState, plan = 0x1f7b1e0, state = 0x1f89388, ExecProcNode = 0x6ee438 <ExecAgg>,
ExecProcNodeReal = 0x6ee438 <ExecAgg>, instrument = 0x0, worker_instrument = 0x0, worker_jit_instrument = 0x0,
qual = 0x0, lefttree = 0x1f89b10, righttree = 0x0, initPlan = 0x0, subPlan = 0x0, chgParam = 0x0,
ps_ResultTupleSlot = 0x1f8a710, ps_ExprContext = 0x1f89a50, ps_ProjInfo = 0x1f8a850, scandesc = 0x1f89e60},
ss_currentRelation = 0x0, ss_currentScanDesc = 0x0, ss_ScanTupleSlot = 0x1f8a3b8}, aggs = 0x1f8ad60, numaggs = 3,
numtrans = 3, aggstrategy = AGG_HASHED, aggsplit = AGGSPLIT_SIMPLE, phase = 0x1f8ae58, numphases = 1, current_phase = 0,
peragg = 0x1f9f930, pertrans = 0x1f993f0, hashcontext = 0x1f89990, aggcontexts = 0x1f897b8, tmpcontext = 0x1f897d8,
curaggcontext = 0x1f89990, curperagg = 0x0, curpertrans = 0x0, input_done = false, agg_done = false, projected_set = -1,
current_set = 0, grouped_cols = 0x0, all_grouped_cols = 0x1f8aff0, maxsets = 1, phases = 0x1f8ae58, sort_in = 0x0,
sort_out = 0x0, sort_slot = 0x0, pergroups = 0x0, grp_firstTuple = 0x0, table_filled = false, num_hashes = 1,
perhash = 0x1f8aeb0, hash_pergroup = 0x1f9fb48, all_pergroups = 0x1f9fb48, combinedproj = 0x0}
(gdb) n
1490 AggStatePerTrans pertrans = &aggstate->pertrans[transno];
(gdb)
1491 AggStatePerGroup pergroupstate = &pergroup[transno];
(gdb)
1493 initialize_aggregate(aggstate, pertrans, pergroupstate);
(gdb)
1488 for (transno = 0; transno < aggstate->numtrans; transno++)
(gdb)
1490 AggStatePerTrans pertrans = &aggstate->pertrans[transno];
(gdb)
1491 AggStatePerGroup pergroupstate = &pergroup[transno];
(gdb)
1493 initialize_aggregate(aggstate, pertrans, pergroupstate);
(gdb)
1488 for (transno = 0; transno < aggstate->numtrans; transno++)
(gdb)
1497 return entry;
(gdb)
1498 }
(gdb)
lookup_hash_entries->回到lookup_hash_entries
(gdb) n
lookup_hash_entries (aggstate=0x1f895a0) at nodeAgg.c:1513
1513 for (setno = 0; setno < numHashes; setno++)
agg_fill_hash_table->回到agg_fill_hash_table
(gdb) n
1518 }
(gdb)
agg_fill_hash_table (aggstate=0x1f895a0) at nodeAgg.c:1934
1934 advance_aggregates(aggstate);
(gdb)
advance_aggregates->进入advance_aggregates
(gdb) step
advance_aggregates (aggstate=0x1f895a0) at nodeAgg.c:680
680 ExecEvalExprSwitchContext(aggstate->phase->evaltrans,
(gdb) p *aggstate->phase->evaltrans
$30 = {tag = {type = T_ExprState}, flags = 6 '\006', resnull = false, resvalue = 0, resultslot = 0x0, steps = 0x1fa10d0,
evalfunc = 0x6cd882 <ExecInterpExprStillValid>, expr = 0x1f895a0, evalfunc_private = 0x6cb43e <ExecInterpExpr>,
steps_len = 16, steps_alloc = 16, parent = 0x1f895a0, ext_params = 0x0, innermost_caseval = 0x0,
innermost_casenull = 0x0, innermost_domainval = 0x0, innermost_domainnull = 0x0}
(gdb) step
ExecEvalExprSwitchContext (state=0x1fa1038, econtext=0x1f897d8, isNull=0x7fff7f065e9f)
at ../../../class="lazy" data-src/include/executor/executor.h:312
312 oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
(gdb) finish
Run till exit from #0 ExecEvalExprSwitchContext (state=0x1fa1038, econtext=0x1f897d8, isNull=0x7fff7f065e9f)
at ../../../class="lazy" data-src/include/executor/executor.h:312
advance_aggregates (aggstate=0x1f895a0) at nodeAgg.c:683
683 }
Value returned is $31 = 0
进入第2轮循环
(gdb) step
agg_fill_hash_table (aggstate=0x1f895a0) at nodeAgg.c:1940
1940 ResetExprContext(aggstate->tmpcontext);
(gdb) n
1941 }
查看相关信息
(gdb) n
1941 }
(gdb)
1923 outerslot = fetch_input_tuple(aggstate);
(gdb)
1924 if (TupIsNull(outerslot))
(gdb) n
1928 tmpcontext->ecxt_outertuple = outerslot;
(gdb)
1931 lookup_hash_entries(aggstate);
(gdb)
1934 advance_aggregates(aggstate);
(gdb)
1940 ResetExprContext(aggstate->tmpcontext);
(gdb) p *outerslot
$32 = {type = T_TupleTableSlot, tts_isempty = false, tts_shouldFree = false, tts_shouldFreeMin = false, tts_slow = true,
tts_tuple = 0x1fa5998, tts_tupleDescriptor = 0x7ff7dd2d1380, tts_mcxt = 0x1f89270, tts_buffer = 124, tts_nvalid = 3,
tts_values = 0x1f89d48, tts_isnull = 0x1f89d80, tts_mintuple = 0x0, tts_minhdr = {t_len = 0, t_self = {ip_blkid = {
bi_hi = 0, bi_lo = 0}, ip_posid = 0}, t_tableOid = 0, t_data = 0x0}, tts_off = 16, tts_fixedTupleDescriptor = true}
(gdb) x/32x outerslot->tts_values
0x1f89d48: 0x28 0xf6 0x0b 0xb1 0xf7 0x7f 0x00 0x00
0x1f89d50: 0x02 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x1f89d58: 0x02 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x1f89d60: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
tuple数据
(gdb) x/56x outerslot->tts_tuple->t_data->t_bits
0x7ff7b2e1365f: 0x00 0x0b 0x47 0x5a 0x30 0x31 0x00 0x00
0x7ff7b2e13667: 0x00 0x01 0x00 0x00 0x00 0x01 0x00 0x00
0x7ff7b2e1366f: 0x00 0x01 0x00 0x00 0x00 0x01 0x00 0x00
0x7ff7b2e13677: 0x00 0x01 0x00 0x00 0x00 0x01 0x00 0x00
0x7ff7b2e1367f: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x7ff7b2e13687: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x7ff7b2e1368f: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
DONE!
四、参考资料
N/A
免责声明:
① 本站未注明“稿件来源”的信息均来自网络整理。其文字、图片和音视频稿件的所属权归原作者所有。本站收集整理出于非商业性的教育和科研之目的,并不意味着本站赞同其观点或证实其内容的真实性。仅作为临时的测试数据,供内部测试之用。本站并未授权任何人以任何方式主动获取本站任何信息。
② 本站未注明“稿件来源”的临时测试数据将在测试完成后最终做删除处理。有问题或投稿请发送至: 邮箱/279061341@qq.com QQ/279061341
PostgreSQL 源码解读(182)- 查询#98(聚合函数#3-ExecAgg)
下载Word文档到电脑,方便收藏和打印~