我的编程空间,编程开发者的网络收藏夹
学习永远不晚

PostgreSQL 源码解读(88)- 查询语句#73(SeqNext函数#1)

短信预约 -IT技能 免费直播动态提醒
省份

北京

  • 北京
  • 上海
  • 天津
  • 重庆
  • 河北
  • 山东
  • 辽宁
  • 黑龙江
  • 吉林
  • 甘肃
  • 青海
  • 河南
  • 江苏
  • 湖北
  • 湖南
  • 江西
  • 浙江
  • 广东
  • 云南
  • 福建
  • 海南
  • 山西
  • 四川
  • 陕西
  • 贵州
  • 安徽
  • 广西
  • 内蒙
  • 西藏
  • 新疆
  • 宁夏
  • 兵团
手机号立即预约

请填写图片验证码后获取短信验证码

看不清楚,换张图片

免费获取短信验证码

PostgreSQL 源码解读(88)- 查询语句#73(SeqNext函数#1)

本节介绍了SeqNext函数的主要实现逻辑以及该函数中初始化相关数据结构的实现逻辑。SeqNext函数作为参数传递到函数ExecScan中,执行实际的扫描操作。

一、数据结构

TupleTableSlot
Tuple Table Slot,用于存储元组相关信息


typedef struct TupleTableSlot
{
    NodeTag     type;//Node标记
#define FIELDNO_TUPLETABLESLOT_FLAGS 1
    uint16      tts_flags;      
#define FIELDNO_TUPLETABLESLOT_NVALID 2
    AttrNumber  tts_nvalid;     
    const TupleTableSlotOps *const tts_ops; 
#define FIELDNO_TUPLETABLESLOT_TUPLEDESCRIPTOR 4
    TupleDesc   tts_tupleDescriptor;    
#define FIELDNO_TUPLETABLESLOT_VALUES 5
    Datum      *tts_values;     
#define FIELDNO_TUPLETABLESLOT_ISNULL 6
    bool       *tts_isnull;     
    MemoryContext tts_mcxt;     
} TupleTableSlot;


typedef struct tupleDesc
{
    int         natts;          
    Oid         tdtypeid;       
    int32       tdtypmod;       
    int         tdrefcount;     
    TupleConstr *constr;        
    
    //attrs[N]是第N+1个属性的描述符
    FormData_pg_attribute attrs[FLEXIBLE_ARRAY_MEMBER];
}  *TupleDesc;

HeapTuple
HeapTupleData是一个指向元组的内存数据结构
HeapTuple是指向HeapTupleData指针


typedef struct HeapTupleData
{
    uint32      t_len;          
    ItemPointerData t_self;     
    Oid         t_tableOid;     
#define FIELDNO_HEAPTUPLEDATA_DATA 3
    HeapTupleHeader t_data;     
} HeapTupleData;

typedef HeapTupleData *HeapTuple;

#define HEAPTUPLESIZE   MAXALIGN(sizeof(HeapTupleData))


HeapScanDesc
HeapScanDesc是指向HeapScanDescData结构体的指针

typedef struct HeapScanDescData
{
    
    Relation    rs_rd;          
    Snapshot    rs_snapshot;    
    int         rs_nkeys;       
    ScanKey     rs_key;         
    bool        rs_bitmapscan;  
    bool        rs_samplescan;  
    bool        rs_pageatatime; 
    bool        rs_allow_strat; 
    bool        rs_allow_sync;  
    bool        rs_temp_snap;   

    
    //在initscan时配置的状态
    BlockNumber rs_nblocks;     
    BlockNumber rs_startblock;  
    BlockNumber rs_numblocks;   
    
    //rs_numblocks通常值为InvalidBlockNumber,意味着扫描整个rel
    
    BufferAccessStrategy rs_strategy;   
    bool        rs_syncscan;    

    
    //扫描时的当前状态
    bool        rs_inited;      
    HeapTupleData rs_ctup;      
    BlockNumber rs_cblock;      
    Buffer      rs_cbuf;        
    
    //注意:如果rs_cbuf<>InvalidBuffer,在buffer设置pin

    ParallelHeapScanDesc rs_parallel;   

    
    //下面的变量只用于page-at-a-time模式以及位图扫描
    int         rs_cindex;      
    int         rs_ntuples;     
    OffsetNumber rs_vistuples[MaxHeapTuplesPerPage];    
} HeapScanDescData;


typedef struct HeapScanDescData *HeapScanDesc;

ScanState
ScanState扩展了对表示底层关系扫描的节点类型的PlanState。


typedef struct ScanState
{
    PlanState   ps;             
    Relation    ss_currentRelation;
    HeapScanDesc ss_currentScanDesc;
    TupleTableSlot *ss_ScanTupleSlot;
} ScanState;


typedef struct SeqScanState
{
    ScanState   ss;             
    Size        pscan_len;      
} SeqScanState;

二、源码解读

SeqNext函数是ExecSeqScan的元组的实际访问方法(ExecScanAccessMtd).这里简单介绍了初始化过程,实际的元组获取过程下节再行介绍.


static TupleTableSlot *
SeqNext(SeqScanState *node)
{
    HeapTuple   tuple;
    HeapScanDesc scandesc;
    EState     *estate;
    ScanDirection direction;
    TupleTableSlot *slot;

    
    scandesc = node->ss.ss_currentScanDesc;
    estate = node->ss.ps.state;
    direction = estate->es_direction;
    slot = node->ss.ss_ScanTupleSlot;

    if (scandesc == NULL)//如scandesc为NULL,则初始化
    {
        
        scandesc = heap_beginscan(node->ss.ss_currentRelation,
                                  estate->es_snapshot,
                                  0, NULL);//扫描前准备,返回HeapScanDesc
        node->ss.ss_currentScanDesc = scandesc;//赋值
    }

    
    tuple = heap_getnext(scandesc, direction);

    
    if (tuple)//获取了tuple
        ExecStoreBufferHeapTuple(tuple, 
                                 slot,  
                                 scandesc->rs_cbuf);    
    else
        ExecClearTuple(slot);//tuple为NULL,则释放slot

    return slot;//返回slot
}


static bool
SeqRecheck(SeqScanState *node, TupleTableSlot *slot)
{
    
    //直接返回T
    return true;
}



HeapScanDesc
heap_beginscan(Relation relation, Snapshot snapshot,
               int nkeys, ScanKey key)
{
    return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL,
                                   true, true, true, false, false, false);//标准情况,调用heap_beginscan_internal
}


static HeapScanDesc
heap_beginscan_internal(Relation relation, Snapshot snapshot,//Relation & snapshot
                        int nkeys, ScanKey key,//键个数&扫描键
                        ParallelHeapScanDesc parallel_scan,//并行扫描描述符
                        bool allow_strat,//允许开始?
                        bool allow_sync,//允许sync扫描?
                        bool allow_pagemode,//允许页模式?
                        bool is_bitmapscan,//是否位图扫描
                        bool is_samplescan,//是否采样扫描
                        bool temp_snap)//是否使用临时快照
{
    HeapScanDesc scan;//堆表扫描描述符

    
    RelationIncrementReferenceCount(relation);

    
    scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));

    scan->rs_rd = relation;
    scan->rs_snapshot = snapshot;
    scan->rs_nkeys = nkeys;
    scan->rs_bitmapscan = is_bitmapscan;
    scan->rs_samplescan = is_samplescan;
    scan->rs_strategy = NULL;   
    scan->rs_allow_strat = allow_strat;
    scan->rs_allow_sync = allow_sync;
    scan->rs_temp_snap = temp_snap;
    scan->rs_parallel = parallel_scan;

    
    scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(snapshot);

    
    if (!is_bitmapscan)
        PredicateLockRelation(relation, snapshot);

    
    //设置relid
    scan->rs_ctup.t_tableOid = RelationGetRelid(relation);

    
    if (nkeys > 0)
        scan->rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
    else
        scan->rs_key = NULL;
    //初始化scan
    initscan(scan, key, false);

    return scan;
}


#define TUPLOCK_from_mxstatus(status) \
            (MultiXactStatusLock[(status)])




static void
initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
{
    bool        allow_strat;
    bool        allow_sync;

    
    if (scan->rs_parallel != NULL)
        scan->rs_nblocks = scan->rs_parallel->phs_nblocks;
    else
        scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd);

    
    if (!RelationUsesLocalBuffers(scan->rs_rd) &&
        scan->rs_nblocks > NBuffers / 4)
    {
        allow_strat = scan->rs_allow_strat;
        allow_sync = scan->rs_allow_sync;
    }
    else
        allow_strat = allow_sync = false;//设置为F

    if (allow_strat)//允许使用访问策略
    {
        
        //在重新扫描期间,存储先前的策略(strategy)对象
        if (scan->rs_strategy == NULL)
            scan->rs_strategy = GetAccessStrategy(BAS_BULKREAD);
    }
    else
    {
        if (scan->rs_strategy != NULL)
            FreeAccessStrategy(scan->rs_strategy);
        scan->rs_strategy = NULL;//不允许,则设置为NULL
    }

    if (scan->rs_parallel != NULL)//使用并行
    {
        
        //对于并行扫描,使用ParallelHeapScanDesc中的变量
        scan->rs_syncscan = scan->rs_parallel->phs_syncscan;
    }
    else if (keep_startblock)
    {
        
        scan->rs_syncscan = (allow_sync && synchronize_seqscans);
    }
    else if (allow_sync && synchronize_seqscans)
    {
        scan->rs_syncscan = true;
        scan->rs_startblock = ss_get_location(scan->rs_rd, scan->rs_nblocks);
    }
    else
    {
        scan->rs_syncscan = false;
        scan->rs_startblock = 0;
    }

    scan->rs_numblocks = InvalidBlockNumber;
    scan->rs_inited = false;
    scan->rs_ctup.t_data = NULL;
    ItemPointerSetInvalid(&scan->rs_ctup.t_self);
    scan->rs_cbuf = InvalidBuffer;
    scan->rs_cblock = InvalidBlockNumber;

    
    //page-at-a-time相关的域通常设置为无效值

    
    if (key != NULL)
        memcpy(scan->rs_key, key, scan->rs_nkeys * sizeof(ScanKeyData));

    
    if (!scan->rs_bitmapscan && !scan->rs_samplescan)
        pgstat_count_heap_scan(scan->rs_rd);
}

三、跟踪分析

测试脚本如下

testdb=# explain select dw.*,grjf.grbh,grjf.xm,grjf.ny,grjf.je 
testdb-# from t_dwxx dw,lateral (select gr.grbh,gr.xm,jf.ny,jf.je 
testdb(#                         from t_grxx gr inner join t_jfxx jf 
testdb(#                                        on gr.dwbh = dw.dwbh 
testdb(#                                           and gr.grbh = jf.grbh) grjf
testdb-# order by dw.dwbh;
                                        QUERY PLAN                                        
------------------------------------------------------------------------------------------
 Sort  (cost=20070.93..20320.93 rows=100000 width=47)
   Sort Key: dw.dwbh
   ->  Hash Join  (cost=3754.00..8689.61 rows=100000 width=47)
         Hash Cond: ((gr.dwbh)::text = (dw.dwbh)::text)
         ->  Hash Join  (cost=3465.00..8138.00 rows=100000 width=31)
               Hash Cond: ((jf.grbh)::text = (gr.grbh)::text)
               ->  Seq Scan on t_jfxx jf  (cost=0.00..1637.00 rows=100000 width=20)
               ->  Hash  (cost=1726.00..1726.00 rows=100000 width=16)
                     ->  Seq Scan on t_grxx gr  (cost=0.00..1726.00 rows=100000 width=16)
         ->  Hash  (cost=164.00..164.00 rows=10000 width=20)
               ->  Seq Scan on t_dwxx dw  (cost=0.00..164.00 rows=10000 width=20)
(11 rows)

启动gdb,设置断点,进入SeqNext

(gdb) b SeqNext
Breakpoint 1 at 0x7156b2: file nodeSeqscan.c, line 60.
(gdb) c
Continuing.

Breakpoint 1, SeqNext (node=0x2ed1588) at nodeSeqscan.c:60
60      scandesc = node->ss.ss_currentScanDesc;

变量赋值

60      scandesc = node->ss.ss_currentScanDesc;
(gdb) n
61      estate = node->ss.ps.state;
(gdb) 
62      direction = estate->es_direction;
(gdb) 
63      slot = node->ss.ss_ScanTupleSlot;
(gdb) 
65      if (scandesc == NULL)

scandesc为NULL,进入初始化,调用heap_beginscan

(gdb) p scandesc
$1 = (HeapScanDesc) 0x0

进入heap_beginscan/heap_beginscan_internal函数

(gdb) n
71          scandesc = heap_beginscan(node->ss.ss_currentRelation,
(gdb) step
heap_beginscan (relation=0x7fb27c488a90, snapshot=0x2e0b8f0, nkeys=0, key=0x0) at heapam.c:1407
1407        return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL,
(gdb) step
heap_beginscan_internal (relation=0x7fb27c488a90, snapshot=0x2e0b8f0, nkeys=0, key=0x0, parallel_scan=0x0, 
    allow_strat=true, allow_sync=true, allow_pagemode=true, is_bitmapscan=false, is_samplescan=false, temp_snap=false)
    at heapam.c:1469
1469        RelationIncrementReferenceCount(relation);    

heap_beginscan_internal->增加relation参考计数

1469        RelationIncrementReferenceCount(relation);
(gdb) n

heap_beginscan_internal->初始化HeapScanDesc结构体

1474        scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
(gdb) 
1476        scan->rs_rd = relation;
(gdb) 
1477        scan->rs_snapshot = snapshot;
(gdb) 
1478        scan->rs_nkeys = nkeys;
(gdb) 
1479        scan->rs_bitmapscan = is_bitmapscan;
(gdb) 
1480        scan->rs_samplescan = is_samplescan;
(gdb) 
1481        scan->rs_strategy = NULL;   
(gdb) 
1482        scan->rs_allow_strat = allow_strat;
(gdb) 
1483        scan->rs_allow_sync = allow_sync;
(gdb) 
1484        scan->rs_temp_snap = temp_snap;
(gdb) 
1485        scan->rs_parallel = parallel_scan;
(gdb) 
1490        scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(snapshot);
(gdb) 
1503        if (!is_bitmapscan)

heap_beginscan_internal->非位图扫描,谓词锁定

1503        if (!is_bitmapscan)
(gdb) p is_bitmapscan
$2 = false
(gdb) n
1504            PredicateLockRelation(relation, snapshot);
(gdb) 
1507        scan->rs_ctup.t_tableOid = RelationGetRelid(relation);

heap_beginscan_internal->进入initscan函数

(gdb) n
1513        if (nkeys > 0)
(gdb) 
1516            scan->rs_key = NULL;
(gdb) 
1518        initscan(scan, key, false);
(gdb) step
initscan (scan=0x2ee4568, key=0x0, keep_startblock=false) at heapam.c:236
236     if (scan->rs_parallel != NULL)

heap_beginscan_internal->relation的大小相对于buffer并不大(<25%),不使用访问策略(批量读取)&同步扫描

(gdb) n
239         scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd);
(gdb) 
253     if (!RelationUsesLocalBuffers(scan->rs_rd) &&
(gdb) 
254         scan->rs_nblocks > NBuffers / 4)
(gdb) 
253     if (!RelationUsesLocalBuffers(scan->rs_rd) &&
(gdb) 
260         allow_strat = allow_sync = false;

heap_beginscan_internal->设置其他变量

312     if (key != NULL)
(gdb) 
320     if (!scan->rs_bitmapscan && !scan->rs_samplescan)
(gdb) 
321         pgstat_count_heap_scan(scan->rs_rd);
(gdb) 
322 }
(gdb) 

heap_beginscan_internal->回到heap_beginscan_internal,完成初始化

(gdb) n
heap_beginscan_internal (relation=0x7fb27c488a90, snapshot=0x2e0b8f0, nkeys=0, key=0x0, parallel_scan=0x0, 
    allow_strat=true, allow_sync=true, allow_pagemode=true, is_bitmapscan=false, is_samplescan=false, temp_snap=false)
    at heapam.c:1520
1520        return scan;
(gdb) p *scan
$4 = {rs_rd = 0x7fb27c488a90, rs_snapshot = 0x2e0b8f0, rs_nkeys = 0, rs_key = 0x0, rs_bitmapscan = false, 
  rs_samplescan = false, rs_pageatatime = true, rs_allow_strat = true, rs_allow_sync = true, rs_temp_snap = false, 
  rs_nblocks = 726, rs_startblock = 0, rs_numblocks = 4294967295, rs_strategy = 0x0, rs_syncscan = false, 
  rs_inited = false, rs_ctup = {t_len = 2139062143, t_self = {ip_blkid = {bi_hi = 65535, bi_lo = 65535}, ip_posid = 0}, 
    t_tableOid = 16742, t_data = 0x0}, rs_cblock = 4294967295, rs_cbuf = 0, rs_parallel = 0x0, rs_cindex = 2139062143, 
  rs_ntuples = 2139062143, rs_vistuples = {32639 <repeats 291 times>}}
(gdb) 

DONE!

四、参考资料

PG Document:Query Planning

免责声明:

① 本站未注明“稿件来源”的信息均来自网络整理。其文字、图片和音视频稿件的所属权归原作者所有。本站收集整理出于非商业性的教育和科研之目的,并不意味着本站赞同其观点或证实其内容的真实性。仅作为临时的测试数据,供内部测试之用。本站并未授权任何人以任何方式主动获取本站任何信息。

② 本站未注明“稿件来源”的临时测试数据将在测试完成后最终做删除处理。有问题或投稿请发送至: 邮箱/279061341@qq.com QQ/279061341

PostgreSQL 源码解读(88)- 查询语句#73(SeqNext函数#1)

下载Word文档到电脑,方便收藏和打印~

下载Word文档

猜你喜欢

PostgreSQL 源码解读(71)- 查询语句#56(make_one_rel函数#21-...

本节大体介绍了动态规划算法实现(standard_join_search)中的join_search_one_level->make_join_rel->populate_joinrel_with_paths->add_paths_to_j
2022-11-30

编程热搜

目录