PostgreSQL 源码解读(236)- 后台进程#14(autovacuum进程#2)
本节简单介绍了PostgreSQL的后台进程:autovacuum,主要分析了launch_worker函数的实现逻辑。
一、数据结构
AutoVacuumShmem
主要的autovacuum共享内存结构体,存储在shared memory中,同时WorkerInfo也会存储在其中.
typedef struct
{
sig_atomic_t av_signal[AutoVacNumSignals];
pid_t av_launcherpid;
dlist_head av_freeWorkers;
dlist_head av_runningWorkers;
WorkerInfo av_startingWorker;
AutoVacuumWorkItem av_workItems[NUM_WORKITEMS];
} AutoVacuumShmemStruct;
static AutoVacuumShmemStruct *AutoVacuumShmem;
FullTransactionId
64 bit的事务ID
typedef struct FullTransactionId
{
uint64 value;
} FullTransactionId;
avw_dbase
用于跟踪worker中的数据库的结构体
//用于跟踪worker中的数据库的结构体
typedef struct avw_dbase
{
Oid adw_datid;
char *adw_name;
TransactionId adw_frozenxid;
MultiXactId adw_minmulti;
PgStat_StatDBEntry *adw_entry;
} avw_dbase;
WorkerInfo
typedef struct
{
ArchiveHandle *AH;
ParallelSlot *slot;
} WorkerInfo;
二、源码解读
主要的实现逻辑在do_start_worker中
static void
launch_worker(TimestampTz now)
{
Oid dbid;
dlist_iter iter;
dbid = do_start_worker();
if (OidIsValid(dbid))
{
bool found = false;
dlist_foreach(iter, &DatabaseList)
{
avl_dbase *avdb = dlist_container(avl_dbase, adl_node, iter.cur);
if (avdb->adl_datid == dbid)
{
found = true;
avdb->adl_next_worker =
TimestampTzPlusMilliseconds(now, autovacuum_naptime * 1000);
dlist_move_head(&DatabaseList, iter.cur);
break;
}
}
if (!found)
rebuild_database_list(dbid);
}
}
do_start_worker
选择一个DB,算法如下:
选择最近最小清理的DB,或者需要清理以防止XID回卷导致数据丢失的DB.
如果存在XID回卷风险的DB,那么选择datfrozenxid最老的DB,而不管该DB做了多少次autovacuum.
自动忽略没有连接过(统计信息为空)的DB.
static Oid
do_start_worker(void)
{
List *dblist;//数据库链表
ListCell *cell;//临时变量
//typedef uint32 TransactionId;
TransactionId xidForceLimit;//事务id,无符号32bit整型
MultiXactId multiForceLimit;//
bool for_xid_wrap;
bool for_multi_wrap;
avw_dbase *avdb;
TimestampTz current_time;//当前时间
bool skipit = false;//是否跳过?
Oid retval = InvalidOid;//返回的数据库OID
MemoryContext tmpcxt,
oldcxt;//内存上下文
//如无空闲的worker(AutoVacuumShmem数据结构维护),则退出
LWLockAcquire(AutovacuumLock, LW_SHARED);
if (dlist_is_empty(&AutoVacuumShmem->av_freeWorkers))
{
LWLockRelease(AutovacuumLock);
return InvalidOid;
}
LWLockRelease(AutovacuumLock);
tmpcxt = AllocSetContextCreate(CurrentMemoryContext,
"Start worker tmp cxt",
ALLOCSET_DEFAULT_SIZES);
oldcxt = MemoryContextSwitchTo(tmpcxt);
//统计信息刷新
autovac_refresh_stats();
//获取数据库链表
dblist = get_database_list();
recentXid = ReadNewTransactionId();
xidForceLimit = recentXid - autovacuum_freeze_max_age;
//#define FirstNormalTransactionId ((TransactionId) 3)
//小于3(常规的XID),则减去3
if (xidForceLimit < FirstNormalTransactionId)
xidForceLimit -= FirstNormalTransactionId;
//确定需要考虑的最老的datminmxid
//从MultiXactState->nextMXact中获取MultiXactId
recentMulti = ReadNextMultiXactId();
multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold();
if (multiForceLimit < FirstMultiXactId)
multiForceLimit -= FirstMultiXactId;
avdb = NULL;//待清理的DB
for_xid_wrap = false;//xid回卷
for_multi_wrap = false;
current_time = GetCurrentTimestamp();//当前时间
foreach(cell, dblist)//循环db链表
{
avw_dbase *tmp = lfirst(cell);
dlist_iter iter;
//判断是否存在回卷风险?
//TransactionIdPrecedes --- is id1 logically < id2?
if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
{
if (avdb == NULL ||
TransactionIdPrecedes(tmp->adw_frozenxid,
avdb->adw_frozenxid))
avdb = tmp;//选择较旧的那个
for_xid_wrap = true;
continue;
}
else if (for_xid_wrap)
continue;
else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
{
if (avdb == NULL ||
MultiXactIdPrecedes(tmp->adw_minmulti, avdb->adw_minmulti))
avdb = tmp;
for_multi_wrap = true;
continue;
}
else if (for_multi_wrap)
continue;
tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
if (!tmp->adw_entry)
continue;
skipit = false;
dlist_reverse_foreach(iter, &DatabaseList)
{
avl_dbase *dbp = dlist_container(avl_dbase, adl_node, iter.cur);
if (dbp->adl_datid == tmp->adw_datid)
{
if (!TimestampDifferenceExceeds(dbp->adl_next_worker,
current_time, 0) &&
!TimestampDifferenceExceeds(current_time,
dbp->adl_next_worker,
autovacuum_naptime * 1000))
skipit = true;
break;
}
}
if (skipit)
continue;
if (avdb == NULL ||
tmp->adw_entry->last_autovac_time < avdb->adw_entry->last_autovac_time)
avdb = tmp;
}
if (avdb != NULL)
{
WorkerInfo worker;
dlist_node *wptr;
LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
wptr = dlist_pop_head_node(&AutoVacuumShmem->av_freeWorkers);
worker = dlist_container(WorkerInfoData, wi_links, wptr);
worker->wi_dboid = avdb->adw_datid;
worker->wi_proc = NULL;
worker->wi_launchtime = GetCurrentTimestamp();
AutoVacuumShmem->av_startingWorker = worker;
LWLockRelease(AutovacuumLock);
SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER);
retval = avdb->adw_datid;
}
else if (skipit)
{
rebuild_database_list(InvalidOid);
}
MemoryContextSwitchTo(oldcxt);
MemoryContextDelete(tmpcxt);
return retval;
}
static inline TransactionId
ReadNewTransactionId(void)
{
return XidFromFullTransactionId(ReadNextFullTransactionId());
}
#define XidFromFullTransactionId(x) ((uint32) (x).value)
FullTransactionId
ReadNextFullTransactionId(void)
{
FullTransactionId fullXid;
LWLockAcquire(XidGenLock, LW_SHARED);
fullXid = ShmemVariableCache->nextFullXid;
LWLockRelease(XidGenLock);
return fullXid;
}
三、跟踪分析
启动gdb,设置信号处理,设置断点
(gdb) handle SIGINT print nostop pass
SIGINT is used by the debugger.
Are you sure you want to change it? (y or n)
Please answer y or n.
SIGINT is used by the debugger.
Are you sure you want to change it? (y or n) y
Signal Stop Print Pass to program Description
SIGINT No Yes Yes Interrupt
(gdb) b autovacuum.c:launch_worker
Breakpoint 1 at 0x82f3e7: file autovacuum.c, line 1338.
(gdb) b autovacuum.c:783
Breakpoint 2 at 0x82e8f0: file autovacuum.c, line 783.
(gdb) c
Continuing.
在其他session执行更新等操作
[pg12@localhost test]$ psql -c "update tbl set id = 1;"
Expanded display is used automatically.
UPDATE 2000000
[pg12@localhost test]$ psql -c "update t1 set id = 1;"
Expanded display is used automatically.
UPDATE 20000
[pg12@localhost test]$ psql -c "update t2 set id = 1;"
Expanded display is used automatically.
UPDATE 10000
[pg12@localhost test]$ psql -c "select txid_current();"
Expanded display is used automatically.
txid_current
--------------
2917
(1 row)
60s后在gdb console中continue
Breakpoint 2, AutoVacLauncherMain (argc=0, argv=0x0) at autovacuum.c:783
783 if (dlist_is_empty(&DatabaseList))
(gdb) n
804 avdb = dlist_tail_element(avl_dbase, adl_node, &DatabaseList);
(gdb) n
810 if (TimestampDifferenceExceeds(avdb->adl_next_worker,
(gdb)
812 launch_worker(current_time);
(gdb) p *avdb
$1 = {adl_datid = 16384, adl_next_worker = 628852948486950, adl_score = 0, adl_node = {
prev = 0xfd9880 <DatabaseList>, next = 0xfd9880 <DatabaseList>}}
(gdb) step
Breakpoint 1, launch_worker (now=628853296722794) at autovacuum.c:1338
1338 dbid = do_start_worker();
进入do_start_worker
(gdb) step
do_start_worker () at autovacuum.c:1128
1128 bool skipit = false;
(gdb) n
1129 Oid retval = InvalidOid;
(gdb)
1134 LWLockAcquire(AutovacuumLock, LW_SHARED);
(gdb)
1135 if (dlist_is_empty(&AutoVacuumShmem->av_freeWorkers))
查看AutoVacuumShmem结构体
(gdb) p *AutoVacuumShmem
$2 = {av_signal = {0, 0}, av_launcherpid = 5476, av_freeWorkers = {head = {prev = 0x7f8ccf1a4938,
next = 0x7f8ccf1a49b8}}, av_runningWorkers = {head = {prev = 0x7f8ccf1a3520, next = 0x7f8ccf1a3520}},
av_startingWorker = 0x0, av_workItems = {{avw_type = AVW_BRINSummarizeRange, avw_used = false,
avw_active = false, avw_database = 0, avw_relation = 0, avw_blockNumber = 0} <repeats 256 times>}}
(gdb) n
1140 LWLockRelease(AutovacuumLock);
(gdb) p AutoVacuumShmem->av_runningWorkers
$3 = {head = {prev = 0x7f8ccf1a3520, next = 0x7f8ccf1a3520}}
(gdb) n
找到需要vacuum的database
1146 tmpcxt = AllocSetContextCreate(CurrentMemoryContext,
(gdb)
1149 oldcxt = MemoryContextSwitchTo(tmpcxt);
(gdb)
1152 autovac_refresh_stats();
(gdb) n
1155 dblist = get_database_list();
(gdb)
1162 recentXid = ReadNewTransactionId();
(gdb) p *dblist
$8 = {type = T_List, length = 5, head = 0x2382d48, tail = 0x2382f90}
(gdb) n
1163 xidForceLimit = recentXid - autovacuum_freeze_max_age;
(gdb) p recentXid
$9 = 2917
(gdb) p autovacuum_freeze_max_age
$10 = 200000000
(gdb) n
1166 if (xidForceLimit < FirstNormalTransactionId)
(gdb) p xidForceLimit
$11 = 4094970213
(gdb) p FirstNormalTransactionId
$12 = 3
(gdb) n
1170 recentMulti = ReadNextMultiXactId();
(gdb)
1171 multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold();
(gdb)
1172 if (multiForceLimit < FirstMultiXactId)
(gdb) p recentMulti
$13 = 1
(gdb) p MultiXactMemberFreezeThreshold()
$14 = 400000000
(gdb) n
1196 avdb = NULL;
(gdb)
1197 for_xid_wrap = false;
(gdb)
1198 for_multi_wrap = false;
(gdb)
1199 current_time = GetCurrentTimestamp();
(gdb)
1200 foreach(cell, dblist)
(gdb)
1202 avw_dbase *tmp = lfirst(cell);
(gdb)
1206 if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
(gdb) p *tmp --> 这是postgres数据库
$15 = {adw_datid = 13591, adw_name = 0x2382d20 "postgres", adw_frozenxid = 479, adw_minmulti = 1,
adw_entry = 0x0}
(gdb) n
1215 else if (for_xid_wrap)
(gdb)
1217 else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
(gdb)
1225 else if (for_multi_wrap)
(gdb)
1229 tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
(gdb)
1235 if (!tmp->adw_entry)
(gdb)
1236 continue;
(gdb)
1200 foreach(cell, dblist)
(gdb)
1202 avw_dbase *tmp = lfirst(cell);
(gdb)
1206 if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
(gdb) p *tmp --> 这是testdb数据库
$16 = {adw_datid = 16384, adw_name = 0x2382de0 "testdb", adw_frozenxid = 531, adw_minmulti = 1, adw_entry = 0x0}
(gdb) p tmp->adw_frozenxid
$17 = 531
(gdb) p xidForceLimit
$18 = 4094970213
(gdb) n
1215 else if (for_xid_wrap)
(gdb)
1217 else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
(gdb)
1225 else if (for_multi_wrap)
(gdb)
1229 tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
(gdb)
1235 if (!tmp->adw_entry)
(gdb)
1245 skipit = false;
(gdb)
1247 dlist_reverse_foreach(iter, &DatabaseList)
(gdb)
1249 avl_dbase *dbp = dlist_container(avl_dbase, adl_node, iter.cur);
(gdb)
1251 if (dbp->adl_datid == tmp->adw_datid)
(gdb)
1257 if (!TimestampDifferenceExceeds(dbp->adl_next_worker,
(gdb)
1267 if (skipit)
(gdb)
1274 if (avdb == NULL ||
(gdb)
1276 avdb = tmp;
(gdb) n
1200 foreach(cell, dblist)
(gdb)
1202 avw_dbase *tmp = lfirst(cell);
(gdb)
1206 if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
(gdb)
1215 else if (for_xid_wrap)
(gdb) p *tmp
$19 = {adw_datid = 1, adw_name = 0x2382e60 "template1", adw_frozenxid = 479, adw_minmulti = 1, adw_entry = 0x0}
(gdb) n
1217 else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
(gdb)
1225 else if (for_multi_wrap)
(gdb)
1229 tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
(gdb)
1235 if (!tmp->adw_entry)
(gdb)
1236 continue; --> 没有统计信息的,忽略
(gdb)
1200 foreach(cell, dblist)
(gdb)
1202 avw_dbase *tmp = lfirst(cell);
(gdb)
1206 if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
(gdb)
1215 else if (for_xid_wrap)
(gdb)
1217 else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
(gdb)
1225 else if (for_multi_wrap)
(gdb)
1229 tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
(gdb)
1235 if (!tmp->adw_entry)
(gdb)
1236 continue;
(gdb)
1200 foreach(cell, dblist)
(gdb)
1202 avw_dbase *tmp = lfirst(cell);
(gdb)
1206 if (TransactionIdPrecedes(tmp->adw_frozenxid, xidForceLimit))
(gdb)
1215 else if (for_xid_wrap)
(gdb)
1217 else if (MultiXactIdPrecedes(tmp->adw_minmulti, multiForceLimit))
(gdb)
1225 else if (for_multi_wrap)
(gdb)
1229 tmp->adw_entry = pgstat_fetch_stat_dbentry(tmp->adw_datid);
(gdb)
1235 if (!tmp->adw_entry)
(gdb)
1236 continue;
(gdb)
1200 foreach(cell, dblist)
(gdb)
完成db遍历,找到了需要处理的数据库->testdb,接下来就是找空闲worker并启动此worker执行vacuum
1280 if (avdb != NULL)
(gdb)
1285 LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);
(gdb)
1291 wptr = dlist_pop_head_node(&AutoVacuumShmem->av_freeWorkers);
(gdb)
1293 worker = dlist_container(WorkerInfoData, wi_links, wptr);
(gdb) p *wptr
$20 = {prev = 0x7f8ccf1a3510, next = 0x7f8ccf1a4978}
(gdb) n
1294 worker->wi_dboid = avdb->adw_datid;
(gdb) p *worker
$21 = {wi_links = {prev = 0x7f8ccf1a3510, next = 0x7f8ccf1a4978}, wi_dboid = 0, wi_tableoid = 0, wi_proc = 0x0,
wi_launchtime = 0, wi_dobalance = false, wi_sharedrel = false, wi_cost_delay = 0, wi_cost_limit = 0,
wi_cost_limit_base = 0}
(gdb) n
1295 worker->wi_proc = NULL;
(gdb)
1296 worker->wi_launchtime = GetCurrentTimestamp();
(gdb)
1298 AutoVacuumShmem->av_startingWorker = worker;
(gdb)
1300 LWLockRelease(AutovacuumLock);
(gdb)
1302 SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER);
(gdb) p *AutoVacuumShmem
$22 = {av_signal = {0, 0}, av_launcherpid = 5476, av_freeWorkers = {head = {prev = 0x7f8ccf1a4938,
next = 0x7f8ccf1a4978}}, av_runningWorkers = {head = {prev = 0x7f8ccf1a3520, next = 0x7f8ccf1a3520}},
av_startingWorker = 0x7f8ccf1a49b8, av_workItems = {{avw_type = AVW_BRINSummarizeRange, avw_used = false,
avw_active = false, avw_database = 0, avw_relation = 0, avw_blockNumber = 0} <repeats 256 times>}}
(gdb) n
1304 retval = avdb->adw_datid;
(gdb)
Program received signal SIGUSR2, User defined signal 2.
do_start_worker () at autovacuum.c:1304
1304 retval = avdb->adw_datid;
(gdb)
avl_sigusr2_handler (postgres_signal_arg=32764) at autovacuum.c:1405
1405 {
(gdb)
DONE!
四、参考资料
PG Source Code
免责声明:
① 本站未注明“稿件来源”的信息均来自网络整理。其文字、图片和音视频稿件的所属权归原作者所有。本站收集整理出于非商业性的教育和科研之目的,并不意味着本站赞同其观点或证实其内容的真实性。仅作为临时的测试数据,供内部测试之用。本站并未授权任何人以任何方式主动获取本站任何信息。
② 本站未注明“稿件来源”的临时测试数据将在测试完成后最终做删除处理。有问题或投稿请发送至: 邮箱/279061341@qq.com QQ/279061341
PostgreSQL 源码解读(236)- 后台进程#14(autovacuum进程#2)
下载Word文档到电脑,方便收藏和打印~