PostgreSQL 源码解读(88)- 查询语句#73(SeqNext函数#1)
本节介绍了SeqNext函数的主要实现逻辑以及该函数中初始化相关数据结构的实现逻辑。SeqNext函数作为参数传递到函数ExecScan中,执行实际的扫描操作。
一、数据结构
TupleTableSlot
Tuple Table Slot,用于存储元组相关信息
typedef struct TupleTableSlot
{
NodeTag type;//Node标记
#define FIELDNO_TUPLETABLESLOT_FLAGS 1
uint16 tts_flags;
#define FIELDNO_TUPLETABLESLOT_NVALID 2
AttrNumber tts_nvalid;
const TupleTableSlotOps *const tts_ops;
#define FIELDNO_TUPLETABLESLOT_TUPLEDESCRIPTOR 4
TupleDesc tts_tupleDescriptor;
#define FIELDNO_TUPLETABLESLOT_VALUES 5
Datum *tts_values;
#define FIELDNO_TUPLETABLESLOT_ISNULL 6
bool *tts_isnull;
MemoryContext tts_mcxt;
} TupleTableSlot;
typedef struct tupleDesc
{
int natts;
Oid tdtypeid;
int32 tdtypmod;
int tdrefcount;
TupleConstr *constr;
//attrs[N]是第N+1个属性的描述符
FormData_pg_attribute attrs[FLEXIBLE_ARRAY_MEMBER];
} *TupleDesc;
HeapTuple
HeapTupleData是一个指向元组的内存数据结构
HeapTuple是指向HeapTupleData指针
typedef struct HeapTupleData
{
uint32 t_len;
ItemPointerData t_self;
Oid t_tableOid;
#define FIELDNO_HEAPTUPLEDATA_DATA 3
HeapTupleHeader t_data;
} HeapTupleData;
typedef HeapTupleData *HeapTuple;
#define HEAPTUPLESIZE MAXALIGN(sizeof(HeapTupleData))
HeapScanDesc
HeapScanDesc是指向HeapScanDescData结构体的指针
typedef struct HeapScanDescData
{
Relation rs_rd;
Snapshot rs_snapshot;
int rs_nkeys;
ScanKey rs_key;
bool rs_bitmapscan;
bool rs_samplescan;
bool rs_pageatatime;
bool rs_allow_strat;
bool rs_allow_sync;
bool rs_temp_snap;
//在initscan时配置的状态
BlockNumber rs_nblocks;
BlockNumber rs_startblock;
BlockNumber rs_numblocks;
//rs_numblocks通常值为InvalidBlockNumber,意味着扫描整个rel
BufferAccessStrategy rs_strategy;
bool rs_syncscan;
//扫描时的当前状态
bool rs_inited;
HeapTupleData rs_ctup;
BlockNumber rs_cblock;
Buffer rs_cbuf;
//注意:如果rs_cbuf<>InvalidBuffer,在buffer设置pin
ParallelHeapScanDesc rs_parallel;
//下面的变量只用于page-at-a-time模式以及位图扫描
int rs_cindex;
int rs_ntuples;
OffsetNumber rs_vistuples[MaxHeapTuplesPerPage];
} HeapScanDescData;
typedef struct HeapScanDescData *HeapScanDesc;
ScanState
ScanState扩展了对表示底层关系扫描的节点类型的PlanState。
typedef struct ScanState
{
PlanState ps;
Relation ss_currentRelation;
HeapScanDesc ss_currentScanDesc;
TupleTableSlot *ss_ScanTupleSlot;
} ScanState;
typedef struct SeqScanState
{
ScanState ss;
Size pscan_len;
} SeqScanState;
二、源码解读
SeqNext函数是ExecSeqScan的元组的实际访问方法(ExecScanAccessMtd).这里简单介绍了初始化过程,实际的元组获取过程下节再行介绍.
static TupleTableSlot *
SeqNext(SeqScanState *node)
{
HeapTuple tuple;
HeapScanDesc scandesc;
EState *estate;
ScanDirection direction;
TupleTableSlot *slot;
scandesc = node->ss.ss_currentScanDesc;
estate = node->ss.ps.state;
direction = estate->es_direction;
slot = node->ss.ss_ScanTupleSlot;
if (scandesc == NULL)//如scandesc为NULL,则初始化
{
scandesc = heap_beginscan(node->ss.ss_currentRelation,
estate->es_snapshot,
0, NULL);//扫描前准备,返回HeapScanDesc
node->ss.ss_currentScanDesc = scandesc;//赋值
}
tuple = heap_getnext(scandesc, direction);
if (tuple)//获取了tuple
ExecStoreBufferHeapTuple(tuple,
slot,
scandesc->rs_cbuf);
else
ExecClearTuple(slot);//tuple为NULL,则释放slot
return slot;//返回slot
}
static bool
SeqRecheck(SeqScanState *node, TupleTableSlot *slot)
{
//直接返回T
return true;
}
HeapScanDesc
heap_beginscan(Relation relation, Snapshot snapshot,
int nkeys, ScanKey key)
{
return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL,
true, true, true, false, false, false);//标准情况,调用heap_beginscan_internal
}
static HeapScanDesc
heap_beginscan_internal(Relation relation, Snapshot snapshot,//Relation & snapshot
int nkeys, ScanKey key,//键个数&扫描键
ParallelHeapScanDesc parallel_scan,//并行扫描描述符
bool allow_strat,//允许开始?
bool allow_sync,//允许sync扫描?
bool allow_pagemode,//允许页模式?
bool is_bitmapscan,//是否位图扫描
bool is_samplescan,//是否采样扫描
bool temp_snap)//是否使用临时快照
{
HeapScanDesc scan;//堆表扫描描述符
RelationIncrementReferenceCount(relation);
scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
scan->rs_rd = relation;
scan->rs_snapshot = snapshot;
scan->rs_nkeys = nkeys;
scan->rs_bitmapscan = is_bitmapscan;
scan->rs_samplescan = is_samplescan;
scan->rs_strategy = NULL;
scan->rs_allow_strat = allow_strat;
scan->rs_allow_sync = allow_sync;
scan->rs_temp_snap = temp_snap;
scan->rs_parallel = parallel_scan;
scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(snapshot);
if (!is_bitmapscan)
PredicateLockRelation(relation, snapshot);
//设置relid
scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
if (nkeys > 0)
scan->rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
else
scan->rs_key = NULL;
//初始化scan
initscan(scan, key, false);
return scan;
}
#define TUPLOCK_from_mxstatus(status) \
(MultiXactStatusLock[(status)])
static void
initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
{
bool allow_strat;
bool allow_sync;
if (scan->rs_parallel != NULL)
scan->rs_nblocks = scan->rs_parallel->phs_nblocks;
else
scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd);
if (!RelationUsesLocalBuffers(scan->rs_rd) &&
scan->rs_nblocks > NBuffers / 4)
{
allow_strat = scan->rs_allow_strat;
allow_sync = scan->rs_allow_sync;
}
else
allow_strat = allow_sync = false;//设置为F
if (allow_strat)//允许使用访问策略
{
//在重新扫描期间,存储先前的策略(strategy)对象
if (scan->rs_strategy == NULL)
scan->rs_strategy = GetAccessStrategy(BAS_BULKREAD);
}
else
{
if (scan->rs_strategy != NULL)
FreeAccessStrategy(scan->rs_strategy);
scan->rs_strategy = NULL;//不允许,则设置为NULL
}
if (scan->rs_parallel != NULL)//使用并行
{
//对于并行扫描,使用ParallelHeapScanDesc中的变量
scan->rs_syncscan = scan->rs_parallel->phs_syncscan;
}
else if (keep_startblock)
{
scan->rs_syncscan = (allow_sync && synchronize_seqscans);
}
else if (allow_sync && synchronize_seqscans)
{
scan->rs_syncscan = true;
scan->rs_startblock = ss_get_location(scan->rs_rd, scan->rs_nblocks);
}
else
{
scan->rs_syncscan = false;
scan->rs_startblock = 0;
}
scan->rs_numblocks = InvalidBlockNumber;
scan->rs_inited = false;
scan->rs_ctup.t_data = NULL;
ItemPointerSetInvalid(&scan->rs_ctup.t_self);
scan->rs_cbuf = InvalidBuffer;
scan->rs_cblock = InvalidBlockNumber;
//page-at-a-time相关的域通常设置为无效值
if (key != NULL)
memcpy(scan->rs_key, key, scan->rs_nkeys * sizeof(ScanKeyData));
if (!scan->rs_bitmapscan && !scan->rs_samplescan)
pgstat_count_heap_scan(scan->rs_rd);
}
三、跟踪分析
测试脚本如下
testdb=# explain select dw.*,grjf.grbh,grjf.xm,grjf.ny,grjf.je
testdb-# from t_dwxx dw,lateral (select gr.grbh,gr.xm,jf.ny,jf.je
testdb(# from t_grxx gr inner join t_jfxx jf
testdb(# on gr.dwbh = dw.dwbh
testdb(# and gr.grbh = jf.grbh) grjf
testdb-# order by dw.dwbh;
QUERY PLAN
------------------------------------------------------------------------------------------
Sort (cost=20070.93..20320.93 rows=100000 width=47)
Sort Key: dw.dwbh
-> Hash Join (cost=3754.00..8689.61 rows=100000 width=47)
Hash Cond: ((gr.dwbh)::text = (dw.dwbh)::text)
-> Hash Join (cost=3465.00..8138.00 rows=100000 width=31)
Hash Cond: ((jf.grbh)::text = (gr.grbh)::text)
-> Seq Scan on t_jfxx jf (cost=0.00..1637.00 rows=100000 width=20)
-> Hash (cost=1726.00..1726.00 rows=100000 width=16)
-> Seq Scan on t_grxx gr (cost=0.00..1726.00 rows=100000 width=16)
-> Hash (cost=164.00..164.00 rows=10000 width=20)
-> Seq Scan on t_dwxx dw (cost=0.00..164.00 rows=10000 width=20)
(11 rows)
启动gdb,设置断点,进入SeqNext
(gdb) b SeqNext
Breakpoint 1 at 0x7156b2: file nodeSeqscan.c, line 60.
(gdb) c
Continuing.
Breakpoint 1, SeqNext (node=0x2ed1588) at nodeSeqscan.c:60
60 scandesc = node->ss.ss_currentScanDesc;
变量赋值
60 scandesc = node->ss.ss_currentScanDesc;
(gdb) n
61 estate = node->ss.ps.state;
(gdb)
62 direction = estate->es_direction;
(gdb)
63 slot = node->ss.ss_ScanTupleSlot;
(gdb)
65 if (scandesc == NULL)
scandesc为NULL,进入初始化,调用heap_beginscan
(gdb) p scandesc
$1 = (HeapScanDesc) 0x0
进入heap_beginscan/heap_beginscan_internal函数
(gdb) n
71 scandesc = heap_beginscan(node->ss.ss_currentRelation,
(gdb) step
heap_beginscan (relation=0x7fb27c488a90, snapshot=0x2e0b8f0, nkeys=0, key=0x0) at heapam.c:1407
1407 return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL,
(gdb) step
heap_beginscan_internal (relation=0x7fb27c488a90, snapshot=0x2e0b8f0, nkeys=0, key=0x0, parallel_scan=0x0,
allow_strat=true, allow_sync=true, allow_pagemode=true, is_bitmapscan=false, is_samplescan=false, temp_snap=false)
at heapam.c:1469
1469 RelationIncrementReferenceCount(relation);
heap_beginscan_internal->增加relation参考计数
1469 RelationIncrementReferenceCount(relation);
(gdb) n
heap_beginscan_internal->初始化HeapScanDesc结构体
1474 scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
(gdb)
1476 scan->rs_rd = relation;
(gdb)
1477 scan->rs_snapshot = snapshot;
(gdb)
1478 scan->rs_nkeys = nkeys;
(gdb)
1479 scan->rs_bitmapscan = is_bitmapscan;
(gdb)
1480 scan->rs_samplescan = is_samplescan;
(gdb)
1481 scan->rs_strategy = NULL;
(gdb)
1482 scan->rs_allow_strat = allow_strat;
(gdb)
1483 scan->rs_allow_sync = allow_sync;
(gdb)
1484 scan->rs_temp_snap = temp_snap;
(gdb)
1485 scan->rs_parallel = parallel_scan;
(gdb)
1490 scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(snapshot);
(gdb)
1503 if (!is_bitmapscan)
heap_beginscan_internal->非位图扫描,谓词锁定
1503 if (!is_bitmapscan)
(gdb) p is_bitmapscan
$2 = false
(gdb) n
1504 PredicateLockRelation(relation, snapshot);
(gdb)
1507 scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
heap_beginscan_internal->进入initscan函数
(gdb) n
1513 if (nkeys > 0)
(gdb)
1516 scan->rs_key = NULL;
(gdb)
1518 initscan(scan, key, false);
(gdb) step
initscan (scan=0x2ee4568, key=0x0, keep_startblock=false) at heapam.c:236
236 if (scan->rs_parallel != NULL)
heap_beginscan_internal->relation的大小相对于buffer并不大(<25%),不使用访问策略(批量读取)&同步扫描
(gdb) n
239 scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd);
(gdb)
253 if (!RelationUsesLocalBuffers(scan->rs_rd) &&
(gdb)
254 scan->rs_nblocks > NBuffers / 4)
(gdb)
253 if (!RelationUsesLocalBuffers(scan->rs_rd) &&
(gdb)
260 allow_strat = allow_sync = false;
heap_beginscan_internal->设置其他变量
312 if (key != NULL)
(gdb)
320 if (!scan->rs_bitmapscan && !scan->rs_samplescan)
(gdb)
321 pgstat_count_heap_scan(scan->rs_rd);
(gdb)
322 }
(gdb)
heap_beginscan_internal->回到heap_beginscan_internal,完成初始化
(gdb) n
heap_beginscan_internal (relation=0x7fb27c488a90, snapshot=0x2e0b8f0, nkeys=0, key=0x0, parallel_scan=0x0,
allow_strat=true, allow_sync=true, allow_pagemode=true, is_bitmapscan=false, is_samplescan=false, temp_snap=false)
at heapam.c:1520
1520 return scan;
(gdb) p *scan
$4 = {rs_rd = 0x7fb27c488a90, rs_snapshot = 0x2e0b8f0, rs_nkeys = 0, rs_key = 0x0, rs_bitmapscan = false,
rs_samplescan = false, rs_pageatatime = true, rs_allow_strat = true, rs_allow_sync = true, rs_temp_snap = false,
rs_nblocks = 726, rs_startblock = 0, rs_numblocks = 4294967295, rs_strategy = 0x0, rs_syncscan = false,
rs_inited = false, rs_ctup = {t_len = 2139062143, t_self = {ip_blkid = {bi_hi = 65535, bi_lo = 65535}, ip_posid = 0},
t_tableOid = 16742, t_data = 0x0}, rs_cblock = 4294967295, rs_cbuf = 0, rs_parallel = 0x0, rs_cindex = 2139062143,
rs_ntuples = 2139062143, rs_vistuples = {32639 <repeats 291 times>}}
(gdb)
DONE!
四、参考资料
PG Document:Query Planning
免责声明:
① 本站未注明“稿件来源”的信息均来自网络整理。其文字、图片和音视频稿件的所属权归原作者所有。本站收集整理出于非商业性的教育和科研之目的,并不意味着本站赞同其观点或证实其内容的真实性。仅作为临时的测试数据,供内部测试之用。本站并未授权任何人以任何方式主动获取本站任何信息。
② 本站未注明“稿件来源”的临时测试数据将在测试完成后最终做删除处理。有问题或投稿请发送至: 邮箱/279061341@qq.com QQ/279061341
PostgreSQL 源码解读(88)- 查询语句#73(SeqNext函数#1)
下载Word文档到电脑,方便收藏和打印~