我的编程空间,编程开发者的网络收藏夹
学习永远不晚

PostgreSQL 源码解读(234)- 查询#127(NOT IN实现#5)

短信预约 -IT技能 免费直播动态提醒
省份

北京

  • 北京
  • 上海
  • 天津
  • 重庆
  • 河北
  • 山东
  • 辽宁
  • 黑龙江
  • 吉林
  • 甘肃
  • 青海
  • 河南
  • 江苏
  • 湖北
  • 湖南
  • 江西
  • 浙江
  • 广东
  • 云南
  • 福建
  • 海南
  • 山西
  • 四川
  • 陕西
  • 贵州
  • 安徽
  • 广西
  • 内蒙
  • 西藏
  • 新疆
  • 宁夏
  • 兵团
手机号立即预约

请填写图片验证码后获取短信验证码

看不清楚,换张图片

免费获取短信验证码

PostgreSQL 源码解读(234)- 查询#127(NOT IN实现#5)

本节简单解释了PostgreSQL NOT IN在执行时写入临时表空间的实现。

测试数据如下:


[local]:5432 pg12@testdb=# select count(*) from tbl;
 count 
-------
     1
(1 row)
Time: 6.009 ms
[local]:5432 pg12@testdb=# select count(*) from t_big_null;
  count   
----------
 10000001
(1 row)
[local]:5432 pg12@testdb=#

一、数据结构

Tuplestorestate
Tuplestore相关操作的私有状态。




typedef enum
{
    TSS_INMEM,                    
    TSS_WRITEFILE,                
    TSS_READFILE                
} TupStoreStatus;

struct Tuplestorestate
{
    TupStoreStatus status;        
    int            eflags;            
    bool        backward;        
    bool        interXact;        
    bool        truncated;        
    int64        availMem;        
    int64        allowedMem;        
    int64        tuples;            
    BufFile    *myfile;            
    MemoryContext context;        
    ResourceOwner resowner;        
    
    void       *(*copytup) (Tuplestorestate *state, void *tup);
    
    void        (*writetup) (Tuplestorestate *state, void *tup);
    
    void       *(*readtup) (Tuplestorestate *state, unsigned int len);
    
    void      **memtuples;        
    int            memtupdeleted;    
    int            memtupcount;    
    int            memtupsize;        
    bool        growmemtuples;    
    
    TSReadPointer *readptrs;    
    int            activeptr;        
    int            readptrcount;    
    int            readptrsize;    
    int            writepos_file;    
    off_t        writepos_offset;    
};
#define COPYTUP(state,tup)    ((*(state)->copytup) (state, tup))
#define WRITETUP(state,tup) ((*(state)->writetup) (state, tup))
#define READTUP(state,len)    ((*(state)->readtup) (state, len))
#define LACKMEM(state)        ((state)->availMem < 0)
#define USEMEM(state,amt)    ((state)->availMem -= (amt))
#define FREEMEM(state,amt)    ((state)->availMem += (amt))

TSReadPointer
tuplestore读指针




typedef enum
{
    TSS_INMEM,                    
    TSS_WRITEFILE,                
    TSS_READFILE                
} TupStoreStatus;

typedef struct
{
    int            eflags;            
    bool        eof_reached;    
    int            current;        
    int            file;            
    off_t        offset;            
} TSReadPointer;

BufFile
该数据结构表示包含一个或多个物理文件的buffered file(每一个通过fd.c管理的虚拟文件描述符进行访问)




#define MAX_PHYSICAL_FILESIZE    0x40000000
#define BUFFILE_SEG_SIZE        (MAX_PHYSICAL_FILESIZE / BLCKSZ)

struct BufFile
{
    //集合中物理文件的数量
    int            numFiles;        
    
    //------- 除了最后一个文件,其他文件的大小为MAX_PHYSICAL_FILESIZE
    //使用numFiles分配的数组
    File       *files;            
    //跨事务?
    bool        isInterXact;    
    //脏数据?
    bool        dirty;            
    //是否只读?
    bool        readOnly;        
    //如共享,段文件的空间大小
    SharedFileSet *fileset;        
    //如共享,该BufFile的名称
    const char *name;            
    
    ResourceOwner resowner;
    
    //文件索引,当前位置的第(0..n)部分
    int            curFile;        
    //当前位置的偏移部分
    off_t        curOffset;        
    //buffer中的下一个R/W位置
    int            pos;            
    //buffer中的有效字节数
    int            nbytes;            
    PGAlignedBlock buffer;
};

二、源码解读

tuplestore_puttupleslot
把接收到的tuple放到tuplestore中




void
tuplestore_puttupleslot(Tuplestorestate *state,
                        TupleTableSlot *slot)
{
    MinimalTuple tuple;
    MemoryContext oldcxt = MemoryContextSwitchTo(state->context);
    
    tuple = ExecCopySlotMinimalTuple(slot);
    USEMEM(state, GetMemoryChunkSpace(tuple));
    tuplestore_puttuple_common(state, (void *) tuple);
    MemoryContextSwitchTo(oldcxt);
}

tuplestore_puttuple_common
tuplestore_puttupleslot函数的实现



static void
tuplestore_puttuple_common(Tuplestorestate *state, void *tuple)
{
    TSReadPointer *readptr;
    int            i;
    ResourceOwner oldowner;
    state->tuples++;
    switch (state->status)
    {
        case TSS_INMEM:
            
            readptr = state->readptrs;
            for (i = 0; i < state->readptrcount; readptr++, i++)
            {
                if (readptr->eof_reached && i != state->activeptr)
                {
                    //已达末尾,且指针非活动,则设置相应的状态和位置
                    readptr->eof_reached = false;
                    readptr->current = state->memtupcount;
                }
            }
            
            if (state->memtupcount >= state->memtupsize - 1)
            {
                (void) grow_memtuples(state);
                Assert(state->memtupcount < state->memtupsize);
            }
            
            //指向tuple
            state->memtuples[state->memtupcount++] = tuple;
            
            if (state->memtupcount < state->memtupsize && !LACKMEM(state))
                return;
            //否则的话,需要落盘
            
            PrepareTempTablespaces();
            
            //关联文件与存储资源宿主
            oldowner = CurrentResourceOwner;
            CurrentResourceOwner = state->resowner;
            state->myfile = BufFileCreateTemp(state->interXact);
            CurrentResourceOwner = oldowner;
            
            state->backward = (state->eflags & EXEC_FLAG_BACKWARD) != 0;
            state->status = TSS_WRITEFILE;
            dumptuples(state);
            break;
        case TSS_WRITEFILE:
            
            readptr = state->readptrs;
            for (i = 0; i < state->readptrcount; readptr++, i++)
            {
                if (readptr->eof_reached && i != state->activeptr)
                {
                    readptr->eof_reached = false;
                    BufFileTell(state->myfile,
                                &readptr->file,
                                &readptr->offset);
                }
            }
            //#define WRITETUP(state,tup) ((*(state)->writetup) (state, tup))
            WRITETUP(state, tuple);
            break;
        case TSS_READFILE:
            
            if (!state->readptrs[state->activeptr].eof_reached)
                BufFileTell(state->myfile,
                            &state->readptrs[state->activeptr].file,
                            &state->readptrs[state->activeptr].offset);
            if (BufFileSeek(state->myfile,
                            state->writepos_file, state->writepos_offset,
                            SEEK_SET) != 0)
                ereport(ERROR,
                        (errcode_for_file_access(),
                         errmsg("could not seek in tuplestore temporary file: %m")));
            state->status = TSS_WRITEFILE;
            
            readptr = state->readptrs;
            for (i = 0; i < state->readptrcount; readptr++, i++)
            {
                if (readptr->eof_reached && i != state->activeptr)
                {
                    readptr->eof_reached = false;
                    readptr->file = state->writepos_file;
                    readptr->offset = state->writepos_offset;
                }
            }
            //#define WRITETUP(state,tup) ((*(state)->writetup) (state, tup))
            WRITETUP(state, tuple);
            break;
        default:
            elog(ERROR, "invalid tuplestore state");
            break;
    }
}
void
BufFileTell(BufFile *file, int *fileno, off_t *offset)
{
    *fileno = file->curFile;
    *offset = file->curOffset + file->pos;
}

三、跟踪分析

执行SQL:


[local]:5432 pg12@testdb=# select * from tbl a where a.id not in (select b.id from t_big_null b);

启动gdb,进入断点


(gdb) b tuplestore_puttupleslot
Breakpoint 1 at 0xab9134: file tuplestore.c, line 712.
(gdb) c
Continuing.
Breakpoint 1, tuplestore_puttupleslot (state=0x1efec78, slot=0x1efd4e0) at tuplestore.c:712
712        MemoryContext oldcxt = MemoryContextSwitchTo(state->context);
(gdb)

输入参数


(gdb) n
717        tuple = ExecCopySlotMinimalTuple(slot);
(gdb) 
718        USEMEM(state, GetMemoryChunkSpace(tuple));
(gdb) 
720        tuplestore_puttuple_common(state, (void *) tuple);
(gdb) p *state
$1 = {status = TSS_INMEM, eflags = 2, backward = false, interXact = false, truncated = false, 
  availMem = 4177840, allowedMem = 4194304, tuples = 0, myfile = 0x0, context = 0x1efce00, resowner = 0x1e5d308, 
  copytup = 0xaba7bd <copytup_heap>, writetup = 0xaba811 <writetup_heap>, readtup = 0xaba9d9 <readtup_heap>, 
  memtuples = 0x1f18ed0, memtupdeleted = 0, memtupcount = 0, memtupsize = 2048, growmemtuples = true, 
  readptrs = 0x1f056a0, activeptr = 0, readptrcount = 1, readptrsize = 8, writepos_file = 0, writepos_offset = 0}
(gdb) p *slot
$2 = {type = T_TupleTableSlot, tts_flags = 16, tts_nvalid = 0, tts_ops = 0xc3e780 <TTSOpsBufferHeapTuple>, 
  tts_tupleDescriptor = 0x7f16f33f5378, tts_values = 0x1efd550, tts_isnull = 0x1efd558, tts_mcxt = 0x1efce00, 
  tts_tid = {ip_blkid = {bi_hi = 0, bi_lo = 0}, ip_posid = 1}, tts_tableOid = 49155}
(gdb) p slot->tts_values[0]
$3 = 0
(gdb)

进入tuplestore_puttuple_common


(gdb) step
tuplestore_puttuple_common (state=0x1efec78, tuple=0x1f05ce8) at tuplestore.c:771
771        state->tuples++;
(gdb)

当前状态TSS_INMEM


(gdb) p state->status
$4 = TSS_INMEM
(gdb)

如需要,更新读指针(无需更新)


(gdb) n
773        switch (state->status)
(gdb) 
780                readptr = state->readptrs;
(gdb) 
781                for (i = 0; i < state->readptrcount; readptr++, i++)
(gdb) p *readptr
$5 = {eflags = 2, eof_reached = true, current = 0, file = 2139062143, offset = 9187201950435737471}
(gdb) n
783                    if (readptr->eof_reached && i != state->activeptr)
(gdb) p state->readptrcount
$6 = 1
(gdb) p state->activeptr
$7 = 0
(gdb) n
781                for (i = 0; i < state->readptrcount; readptr++, i++)
(gdb)

如需要,扩展数组(实际不需要)


(gdb) 
796                if (state->memtupcount >= state->memtupsize - 1)
(gdb) p state->memtupcount
$8 = 0
(gdb) p state->memtupsize - 1
$9 = 2047
(gdb) n
803                state->memtuples[state->memtupcount++] = tuple;
(gdb)

放入到内存中,返回


(gdb) n
808                if (state->memtupcount < state->memtupsize && !LACKMEM(state))
(gdb) 
809                    return;
(gdb)

退出函数


(gdb) 
892    }
(gdb) 
tuplestore_puttupleslot (state=0x1efec78, slot=0x1efd4e0) at tuplestore.c:722
722        MemoryContextSwitchTo(oldcxt);
(gdb) 
723    }
(gdb) 
ExecMaterial (pstate=0x1efd1b8) at nodeMaterial.c:149
149            ExecCopySlot(slot, outerslot);
(gdb)

使用ignore N遍后,state->status状态变为TSS_WRITEFILE


(gdb) ignore 4 4194303
Will ignore next 4194303 crossings of breakpoint 4.
(gdb) c
Continuing.
Breakpoint 3, tuplestore_puttuple_common (state=0x160ba38, tuple=0x7f2cd90cc0b0) at tuplestore.c:771
771        state->tuples++;
(gdb) 
...
tuplestore_puttupleslot (state=0x160ba38, slot=0x160a2a0) at tuplestore.c:722
722        MemoryContextSwitchTo(oldcxt);
(gdb) c
Continuing.
Breakpoint 3, tuplestore_puttuple_common (state=0x160ba38, tuple=0x7f2cd90cc0e8) at tuplestore.c:771
771        state->tuples++;
(gdb) p *state
$9 = {status = TSS_WRITEFILE, eflags = 2, backward = false, interXact = false, truncated = false, 
  availMem = 3669944, allowedMem = 4194304, tuples = 4192545, myfile = 0x162ad80, context = 0x1609bc0, 
  resowner = 0x1579170, copytup = 0xaba7bd <copytup_heap>, writetup = 0xaba811 <writetup_heap>, 
  readtup = 0xaba9d9 <readtup_heap>, memtuples = 0x7f2cd914a050, memtupdeleted = 0, memtupcount = 0, 
  memtupsize = 65535, growmemtuples = false, readptrs = 0x1627590, activeptr = 0, readptrcount = 1, 
  readptrsize = 8, writepos_file = 0, writepos_offset = 0}
(gdb) n
773        switch (state->status)
(gdb) 
841                readptr = state->readptrs;
(gdb) 
842                for (i = 0; i < state->readptrcount; readptr++, i++)
(gdb) 
844                    if (readptr->eof_reached && i != state->activeptr)
(gdb) 
842                for (i = 0; i < state->readptrcount; readptr++, i++)
(gdb) 
853                WRITETUP(state, tuple);
(gdb) 
854                break;
(gdb) p *state->myfile
$10 = {numFiles = 1, files = 0x7f2cd934c008, isInterXact = false, dirty = true, readOnly = false, fileset = 0x0, 
  name = 0x0, resowner = 0x1579170, curFile = 0, curOffset = 58687488, pos = 8156, nbytes = 8156, buffer = {
    data = "\000\t\030\000\335\366?\000\016\000\000\000\001\000\000\t\030\000\336\366?\000\016\000\000\000\001\000\000\t\030\000\337\366?\000\016\000\000\000\001\000\000\t\030\000\340\366?\000\016\000\000\000\001\000\000\t\030\000\341\366?\000\016\000\000\000\001\000\000\t\030\000\342\366?\000\016\000\000\000\001\000\000\t\030\000\343\366?\000\016\000\000\000\001\000\000\t\030\000\344\366?\000\016\000\000\000\001\000\000\t\030\000\345\366?\000\016\000\000\000\001\000\000\t\030\000\346\366?\000\016\000\000\000\001\000\000\t\030\000\347\366?\000\016\000\000\000\001\000\000\t\030\000\350\366?\000\016\000\000\000\001\000\000\t\030\000\351\366?\000\016\000\000\000\001\000\000\t\030\000\352\366?\000\016\000\000\000\001\000\000\t\030\000"..., force_align_d = 1.7780737478550286e-307, 
    force_align_i64 = 18004352582551808}}
...

DONE

四、参考资料

N/A

免责声明:

① 本站未注明“稿件来源”的信息均来自网络整理。其文字、图片和音视频稿件的所属权归原作者所有。本站收集整理出于非商业性的教育和科研之目的,并不意味着本站赞同其观点或证实其内容的真实性。仅作为临时的测试数据,供内部测试之用。本站并未授权任何人以任何方式主动获取本站任何信息。

② 本站未注明“稿件来源”的临时测试数据将在测试完成后最终做删除处理。有问题或投稿请发送至: 邮箱/279061341@qq.com QQ/279061341

PostgreSQL 源码解读(234)- 查询#127(NOT IN实现#5)

下载Word文档到电脑,方便收藏和打印~

下载Word文档

编程热搜

目录