PostgreSQLDBA(27)-MVCC#7(避免长事务)

对于Update/Delete操作,PostgreSQL的MVCC机制仍会保留先前版本的数据,这些数据在VACUUM时将被清除.但如果在执行VACUUM时,存在先于VACUUM操作的活动事务,假定这些活动事务中最小的事务ID为OldestXmin,那么VACUUM不会清理删除事务ID(即xmax) > OldestXmin的元组,如果业务繁忙并且OldestXmin事务一直不提交,会导致存储空间一直膨胀,直至耗尽空间.

创新互联是一家集网站建设,青原企业网站建设,青原品牌网站建设,网站定制,青原网站建设报价,网络营销,网络优化,青原网站推广为一体的创新建站企业,帮助传统企业提升企业形象加强企业竞争力。可充分满足这一群体相比中小企业更为丰富、高端、多元的互联网需求。同时我们时刻保持专业、时尚、前沿,时刻以成就客户成长自我,坚持不断学习、思考、沉淀、净化自己,让我们为更多的企业打造出实用型网站。

实验验证
数据准备
创建一张普通表,插入一行数据


drop table if exists t_page;
create table t_page (id int,c1 char(8),c2 varchar(16));
insert into t_page values(1,'1','a');

获取该表对应的数据文件


10:26:31 (xdb@[local]:5432)testdb=# select pg_relation_filepath('t_page');
 pg_relation_filepath 
----------------------
 base/16402/50824
(1 row)

查询该数据表占用的空间


10:42:43 (xdb@[local]:5432)testdb=# \set v_tablename t_page
10:43:09 (xdb@[local]:5432)testdb=# SELECT pg_size_pretty( pg_total_relation_size(:'v_tablename') );
 pg_size_pretty 
----------------
 8192 bytes
(1 row)

过程
session 1启动事务,session 2执行pg_bench进行测试(在session 1后启动),session 3监控数据表的空间增长/执行vacuum
session 1


10:46:48 (xdb@[local]:5432)testdb=# begin;
BEGIN
10:46:50 (xdb@[local]:5432)testdb=#* select txid_current();
 txid_current 
--------------
       397083
(1 row)
10:46:54 (xdb@[local]:5432)testdb=#*

session 2执行pg_bench


[xdb@localhost script]$ cat test.sql 
\set id random(1,10000)
begin;
update t_page set c1='c1'||:id;
commit;

session 3监控数据表的空间增长/执行vacuum


-- 空间
10:49:00 (xdb@[local]:5432)testdb=# SELECT pg_size_pretty( pg_total_relation_size(:'v_tablename') );
 pg_size_pretty 
----------------
 192 kB --> 在执行压力测试过程中从8K增长到192KB
(1 row)
-- 执行vacuum
10:49:16 (xdb@[local]:5432)testdb=# vacuum verbose t_page;
INFO:  vacuuming "public.t_page"
INFO:  "t_page": found 0 removable, 10825 nonremovable row versions in 59 out of 59 pages
DETAIL:  10824 dead row versions cannot be removed yet, oldest xmin: 397083
There were 0 unused item pointers.
Skipped 0 pages due to buffer pins, 0 frozen pages.
0 pages are entirely empty.
CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s.
VACUUM

vacuum命令的输出信息:10824 dead row versions cannot be removed yet, oldest xmin: 397083
因为删除的xmax > OldestXmin,因此这些元组不能被清除.

源码分析
元组对VACUUM的可见性判断与元组对SELECT操作的可见性判断类似,SELECT查询调用的可见性判断函数是HeapTupleSatisfiesMVCC,而VACUUM的可见性判断函数是HeapTupleSatisfiesVacuum,该函数由lazy_scan_heap调用.
lazy_scan_heap
lazy_scan_heap函数的逻辑先前已介绍过,这里不再详述,下面简单梳理与元组清理的相关逻辑


static void
lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
               Relation *Irel, int nindexes, bool aggressive)
{
    ...
    for (blkno = 0; blkno < nblocks; blkno++)
    {
        //遍历每个block
        ...
        //遍历block中的每个元组
        for (offnum = FirstOffsetNumber;
             offnum <= maxoff;
             offnum = OffsetNumberNext(offnum))
        {
            ...
            if (ItemIdIsDead(itemid))
            {
                //记录需删除的tuple
                //vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr;
                //vacrelstats->num_dead_tuples++;
                lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
                all_visible = false;
                continue;
            }
            ...
             //在这里,主要目的是一个元组是否可能对所有正在运行中的事务可见.
            switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
            {
                case HEAPTUPLE_DEAD:
                ...
                case HEAPTUPLE_LIVE:
                ...
                case HEAPTUPLE_RECENTLY_DEAD:
                    //这些元组不能被清除!
                    nkeep += 1;
                    all_visible = false;
                    break;
                ...
            }
         }
    }
    ...
}

如果ItemIdIsDead,则记录该元组,继续下一元组;如ItemIdIsNormal,调用HeapTupleSatisfiesVacuum函数,判断元组可见性.
ItemIdIsDead的判断很简单,判断ItemId的lp_flags标记是否为LP_DEAD


((itemId)->lp_flags == LP_DEAD)

实际上,在执行update的时候,ItemId的lp_flags仍然是0x01,即Normal状态.


11:20:49 (xdb@[local]:5432)testdb=# select lp,lp_off,lp_flags,t_xmin,t_xmax,t_field3 as t_cid,t_ctid,t_infomask2,t_infomask from heap_page_items(get_raw_page('t_page',0));
 lp  | lp_off | lp_flags | t_xmin | t_xmax | t_cid | t_ctid  | t_infomask2 | t_infomask 
-----+--------+----------+--------+--------+-------+---------+-------------+------------
   1 |   8152 |        1 | 457343 | 457343 |     0 | (0,1)   |           3 |      10642
   2 |   8112 |        1 | 457342 | 457343 |     0 | (0,1)   |           3 |       9474
   3 |   8072 |        1 | 457341 | 457342 |     0 | (0,2)   |           3 |       9474
...

查看该block中3号元组的信息


[xdb@localhost pg111db]$ hexdump -C base/16402/50831 -s 32 -n 2
00000020  88 9f                                             |..|
00000022
[xdb@localhost pg111db]$ hexdump -C base/16402/50831 -s 34 -n 2
00000022  4e 00                                             |N.|
00000024

计算偏移/大小/标记


[xdb@localhost pg111db]$ #偏移
[xdb@localhost pg111db]$ echo $((0x9f88 & ~$((1<<15))))
8072
[xdb@localhost pg111db]$ #大小
[xdb@localhost pg111db]$ echo $((0x004e >> 1))
39
[xdb@localhost pg111db]$ #lp_flags
[xdb@localhost pg111db]$ echo $((0x004e & 0x0001))
[xdb@localhost pg111db]$ echo $((0x9f88 >> 15))
1

lp_flags=0x01,即LP_NORMAL

下面,简述HeapTupleSatisfiesVacuum的实现逻辑,该函数判断元组对于VACUUM操作的可见性.
HeapTupleSatisfiesVacuum


HTSV_Result
HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin,
                         Buffer buffer)
{
    ...
    if (!HeapTupleHeaderXminCommitted(tuple))
    {
        //xmin事务未提交
        ...
    }
    //不符合以上条件,则可确定xmin事务已提交
    if (tuple->t_infomask & HEAP_XMAX_INVALID)
        //xmax为无效事务ID
        return HEAPTUPLE_LIVE;
    ...
    if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
    {
        //xmax事务未提交
        ...
    }
    ...
    //不符合以上条件,则可确定xmax事务已提交
    if (!TransactionIdPrecedes(HeapTupleHeaderGetRawXmax(tuple), OldestXmin))
        //6.元组xmax ≥ OldestXmin,标记为最近删除
        return HEAPTUPLE_RECENTLY_DEAD;
    ...

元组xmax ≥ OldestXmin,标记为HEAPTUPLE_RECENTLY_DEAD,这些元组不能被清理!
之所以需要避免长事务是因为如果OldestXmin事务一直不提交,那么后续被删除的元组都一直保留,无法通过VACUUM清除.

上述案例,压力测试完成后,空间是原来的330倍,而且通过VACUUM(包括VACUUM FULL)无法清理!


10:50:13 (xdb@[local]:5432)testdb=# SELECT pg_size_pretty( pg_total_relation_size(:'v_tablename') );
 pg_size_pretty 
----------------
 2640 kB
(1 row)
11:01:28 (xdb@[local]:5432)testdb=# vacuum verbose t_page;
INFO:  vacuuming "public.t_page"
INFO:  "t_page": found 0 removable, 60255 nonremovable row versions in 326 out of 326 pages
DETAIL:  60254 dead row versions cannot be removed yet, oldest xmin: 397083
There were 0 unused item pointers.
Skipped 0 pages due to buffer pins, 0 frozen pages.
0 pages are entirely empty.
CPU: user: 0.02 s, system: 0.00 s, elapsed: 0.02 s.
VACUUM
11:01:31 (xdb@[local]:5432)testdb=# 
11:10:14 (xdb@[local]:5432)testdb=# vacuum full;
VACUUM
11:17:56 (xdb@[local]:5432)testdb=# SELECT pg_size_pretty( pg_total_relation_size(:'v_tablename') );
 pg_size_pretty 
----------------
 2640 kB
(1 row)

应用建议
在实际应用中,应尽可能避免长事务,跑批操作尽可能安排在非繁忙时段执行.如确定为只读事务,建议开启自动提交.
对于Java应用并且启用了连接池,如JDBC设置自动提交为false,就算是select操作,也务必在执行完毕后执行commit(Oracle不需要,但PG需要!).

参考资料
PostgreSQL Source Code
PostgreSQL 源码解读(134)- MVCC#18(vacuum过程-HeapTupleSatisfiesVacuum函数)
PostgreSQL 源码解读(130)- MVCC#14(vacuum过程-lazy_scan_heap函数)
PostgreSQL 源码解读(118)- MVCC#3(Tuple可见性判断)


标题名称:PostgreSQLDBA(27)-MVCC#7(避免长事务)
文章地址:http://scyanting.com/article/ppjsed.html