Postgresql 10 分区探密 原作者:听雨 创作时间:2016-12-10 23:50:38+08 |
doudou586 发布于2016-12-20 23:50:38 评论: 1 浏览: 12072 顶: 999 踩: 1433 |
Postgresql官方终于要出分区了,开发线上已经看到提交分区代码了,下一个版本10带有分区功能应该没问题了,那么这个分区功能如何呢?且和我深入源码一探究竟。
首先介绍下原有的“分区”功能,这个很早就有了,以继承表的方式创建子表方式曲线实现的分区,如下例子:
create table tbl( a int, b varchar(10) ); create table tbl_1 ( check ( a <= 1000 ) ) INHERITS (tbl); create table tbl_2 ( check ( a <= 10000 and a >1000 ) ) INHERITS (tbl); create table tbl_3 ( check ( a <= 100000 and a >10000 ) ) INHERITS (tbl);
再通过创建触发器或者规则,实现数据分发,只需要向主表插入数据,可以自动分发到子分区表中,下面以触发器为例如下:
CREATE OR REPLACE FUNCTION tbl_part_tg() RETURNS TRIGGER AS $$ BEGIN IF ( NEW. a <= 1000 ) THEN INSERT INTO tbl_1 VALUES (NEW.*); ELSIF ( NEW. a > 1000 and NEW.a <= 10000 ) THEN INSERT INTO tbl_2 VALUES (NEW.*); ELSIF ( NEW. a > 10000 and NEW.a <= 100000 ) THEN INSERT INTO tbl_3 VALUES (NEW.*); ELSIF ( NEW. a > 100000 and NEW.a <= 1000000 ) THEN INSERT INTO tbl_4 VALUES (NEW.*); ELSE RAISE EXCEPTION 'data out of range!'; END IF; RETURN NULL; END; $$ LANGUAGE plpgsql; CREATE TRIGGER insert_tbl_part_tg BEFORE INSERT ON tbl FOR EACH ROW EXECUTE PROCEDURE tbl_part_tg();
这样一个postgres分区表就创建完毕,对应用来说透明的,插入查询都对主表操作,非常方便。
对于分区表来说,最大的好处在于分区剪枝功能,如果有50个分区表,对于某个条件值如果能确定,那么很可能就直接过滤掉了49个分区,大大提高扫描速度,当然也能将不同子分区表放在不同物理盘上,提高IO速度。那么对于查询是怎么实现子分区表过滤的呢?约束排除,是否能使用约束排除由constraint_exclusion 参数控制,它三个可设值,on,off,partition, on代表无条件打开,所有情况都会检束约束,off代表关闭,所有约束都不生效,partition代表对分区表(或者说继承表)会进行约束排查,其它表则不会,因为检查约束在生成计划时会有额外开销,为了精准定位才有了这三个参数,默认值是partition,即对分区表约束生效。
如:select *from tbl where a = 12345; 首先找到主表tbl,然后通过tbl找到它的子表,找到后再对再拿着谓词条件a = 12345对一个个子表约束进行检查,不符合条件表就去掉不扫描,实现分区表过滤,下面简单介绍下约束排除源码逻辑。
//从set_rel_size 基表大小估计函数开始介绍 static void set_rel_size(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { //检查是否需要扫描 if (rel->reloptkind == RELOPT_BASEREL && relation_excluded_by_constraints(root, rel, rte)) //检查约束是否能排除掉该表 { set_dummy_rel_pathlist(rel); //可以排除,不需要扫描该表 } else if (rte->inh)//检查是否有子表 { set_append_rel_size(root, rel, rti, rte);//有子表则开始检查所有子表并把不需要的去掉 } //设置需要扫描的表 static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { int parentRTindex = rti; ... 为减少篇幅,忽略不重要代码 ... foreach(l, root->append_rel_list) { //遍历所有,root->append_rel_list 是含父表、子表所有relation的list,在前面已经准备好 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); int childRTindex; ... //拿到真实条件表达式,如这个用例就是拿到 a = 12345 这个条件 childquals = get_all_actual_clauses(rel->baserestrictinfo); //调整append的relation属性,可能需要对一些特殊的表达式或查询结构复制一份并转换,本用例中不涉及 childquals = (List *) adjust_appendrel_attrs(root, (Node *) childquals, //常量表达式处理,对一些常量表达式会将值直接算出来 //显然本例中a=12345是一个列的OpExpr表达式 ,因此这里不会发生改变 childqual = eval_const_expressions(root, (Node *) make_ands_explicit(childquals)); //下面条件成立直接判断不需要扫描该表,本例中均不会成立 if (childqual && IsA(childqual, Const) && (((Const *) childqual)->constisnull || !DatumGetBool(((Const *) childqual)->constvalue))) { set_dummy_rel_pathlist(childrel); continue; } //可以简单认为make_ands_implicit与上面make_ands_explicit互逆 childquals = make_ands_implicit((Expr *) childqual); //根据clause生成一个RestrictInfo结构 childquals = make_restrictinfos_from_actual_clauses(root, childquals); childrel->baserestrictinfo = childquals; //检查约束是否能排除掉该表,即判断某个分区是要要扫描 if (relation_excluded_by_constraints(root, childrel, childRTE)) { set_dummy_rel_pathlist(childrel);//进来即是排除掉,继续下一个表检查 continue; } ... ... //若约束无法排除掉某个分区,后续代码继续正常执行分区表相关计算 //表约束排查函数relation_excluded_by_constraints 简介 bool relation_excluded_by_constraints(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { List *safe_restrictions; ... //初步判断是否需要进行约束排除,return false则是不能,如constraint_exclusion 是off状态时 //这时根本没开约束排除功能,约束自然不能生效 if (constraint_exclusion == CONSTRAINT_EXCLUSION_OFF || (constraint_exclusion == CONSTRAINT_EXCLUSION_PARTITION && !(rel->reloptkind == RELOPT_OTHER_MEMBER_REL || (root->hasInheritedTarget && rel->reloptkind == RELOPT_BASEREL && rel->relid == root->parse->resultRelation)))) return false; //检查 谓词条件(a=12345)调用的函数是否结果稳定,若稳定结果则将条件挂到safe_restrictions上 safe_restrictions = NIL; foreach(lc, rel->baserestrictinfo) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); if (!contain_mutable_functions((Node *) rinfo->clause)) safe_restrictions = lappend(safe_restrictions, rinfo->clause); } //检查safe_restrictions条件本身是不是冲突,自身冲突则排除掉 if (predicate_refuted_by(safe_restrictions, safe_restrictions)) return true; /* Only plain relations have constraints */ if (rte->rtekind != RTE_RELATION || rte->inh) return false; //把这个表的约束取出来 constraint_pred = get_relation_constraints(root, rte->relid, rel, true); //检查这些约束的条件所调用的函数结果是否稳定,不稳定的不能作为排查条件 safe_constraints = NIL; foreach(lc, constraint_pred) { Node *pred = (Node *) lfirst(lc); if (!contain_mutable_functions(pred)) safe_constraints = lappend(safe_constraints, pred); } //约束条件和谓词条件进行排查,如果冲突得返回true去掉该分区表 //如:约束条件为 a>1000 and a<=10000,谓词条件为a=12345,它们冲突则返回true if (predicate_refuted_by(safe_constraints, rel->baserestrictinfo)) return true;
上面的例子如下,有四个分区的表,直接定位到了子表tbl_3,注意父表不能过滤,默认都要扫描,但如果分区表设计合理,父表不应该有数据,扫描代价为0,对性能几乎无影响
postgres=# explain select *from tbl where a =11111; QUERY PLAN ------------------------------------------------------------- Append (cost=0.00..24.50 rows=7 width=42) -> Seq Scan on tbl (cost=0.00..0.00 rows=1 width=42) Filter: (a = 11111) -> Seq Scan on tbl_3 (cost=0.00..24.50 rows=6 width=42) Filter: (a = 11111) (5 rows)
简单来说,postgresql原来分区是通过谓词条件和表上约束条件之间关系实现过滤的,当然这只能实现静态剪枝。
基于规则的话,会在查询重写阶段按时替换规则生成新的插入语句,基于触发器会在insert主表前触发另外一个insert操作,这两个逻辑都比较简单,相关代码不再介绍。
那么postgres10分区具有什么样的功能呢?先看用例
postgres=# CREATE TABLE list_parted ( postgres(# a int postgres(# ) PARTITION BY LIST (a); CREATE TABLE postgres=# CREATE TABLE part_1 PARTITION OF list_parted FOR VALUES IN (1); CREATE TABLE postgres=# CREATE TABLE part_2 PARTITION OF list_parted FOR VALUES IN (2); CREATE TABLE postgres=# CREATE TABLE part_3 PARTITION OF list_parted FOR VALUES IN (3); CREATE TABLE postgres=# CREATE TABLE part_4 PARTITION OF list_parted FOR VALUES IN (4); CREATE TABLE postgres=# CREATE TABLE part_5 PARTITION OF list_parted FOR VALUES IN (5); CREATE TABLE postgres=# postgres=# insert into list_parted values(32); --faled ERROR: no partition of relation "list_parted" found for row DETAIL: Failing row contains (32). postgres=# insert into part_1 values(1); INSERT 0 1 postgres=# insert into part_1 values(2);--faled ERROR: new row for relation "part_1" violates partition constraint DETAIL: Failing row contains (2). postgres=# explain select *from list_parted where a =1; QUERY PLAN ----------------------------------------------------------------- Append (cost=0.00..41.88 rows=14 width=4) -> Seq Scan on list_parted (cost=0.00..0.00 rows=1 width=4) Filter: (a = 1) -> Seq Scan on part_1 (cost=0.00..41.88 rows=13 width=4) Filter: (a = 1) (5 rows)
上面是LIST表,建表是先建主表,再建子表,子表以 PARTITION OF 方式说明和主表关系,约束条件应该就是后面的in里面,再来个范围表的例子。
postgres=# CREATE TABLE range_parted ( postgres(# a int postgres(# ) PARTITION BY RANGE (a); CREATE TABLE postgres=# CREATE TABLE range_parted1 PARTITION OF range_parted FOR VALUES from (1) TO (1000); CREATE TABLE postgres=# CREATE TABLE range_parted2 PARTITION OF range_parted FOR VALUES FROM (1000) TO (10000); CREATE TABLE postgres=# CREATE TABLE range_parted3 PARTITION OF range_parted FOR VALUES FROM (10000) TO (100000); CREATE TABLE postgres=# postgres=# insert into range_parted1 values(343); INSERT 0 1 postgres=# postgres=# explain select *from range_parted where a=32425; QUERY PLAN --------------------------------------------------------------------- Append (cost=0.00..41.88 rows=14 width=4) -> Seq Scan on range_parted (cost=0.00..0.00 rows=1 width=4) Filter: (a = 32425) -> Seq Scan on range_parted3 (cost=0.00..41.88 rows=13 width=4) Filter: (a = 32425) (5 rows) postgres=# set constraint_exclusion = off; SET postgres=# explain select *from range_parted where a=32425; QUERY PLAN --------------------------------------------------------------------- Append (cost=0.00..125.63 rows=40 width=4) -> Seq Scan on range_parted (cost=0.00..0.00 rows=1 width=4) Filter: (a = 32425) -> Seq Scan on range_parted1 (cost=0.00..41.88 rows=13 width=4) Filter: (a = 32425) -> Seq Scan on range_parted2 (cost=0.00..41.88 rows=13 width=4) Filter: (a = 32425) -> Seq Scan on range_parted3 (cost=0.00..41.88 rows=13 width=4) Filter: (a = 32425) (9 rows)
和LIST差不多,就是语法略有不同,范围表值是一个连续的范围,LIST表是单点或多点的集合。从上面例子可以看到,显然还是走的约束排除过滤子表的方式。
postgres=# CREATE TABLE hash_parted ( postgres(# a int postgres(# ) PARTITION BY HASH (a); ERROR: unrecognized partitioning strategy "hash" postgres=# postgres=# postgres=# CREATE TABLE cccc_parted ( postgres(# a int postgres(# ) PARTITION BY cccc (a); ERROR: unrecognized partitioning strategy "cccc" postgres=#
HASH分区语法还不支持,以后或许会支持。
与原来老的建分区方式比,简单了不少,不用建约束了,应该是内部创建了。插入能自动计算出子表插入位置,应该是在执行器增加根据给定值直接计算出目标分区,提高性能。分区过滤的原来一样,通过约束排除,目前没看到增强迹象,但毕竟只是开始。总的来说语法功能初步完善,但这仅是一个开始,以后肯定会越来越丰富强大的。
这里对分区测试主要关注两个点,1是分区剪枝,2是导入数据性能,参考德哥测试方法结果如下:
测试环境:pg10 DEBUG版,全默认编译,win7 i74770s cpu,普通硬盘。
传统方式建分区1000个,时间33.65秒,建规则22.2秒,总时间55.85秒。
第一个分区插入100W行用时185.75秒,第996个分区488.9秒。
第一个分区select用时172ms,几乎全是生成计划时间
第996个分区select用时171 ms,几乎全是生成计划时间
pg10分区方式建分区1000个,时间40.2秒,总时间40.2秒,环境同上。
第一个分区插入100W行用时3.5秒,第996个分区4.72秒。
第一个分区select用时133.5ms,几乎全是生成计划时间
第996个分区select用时133.8 ms,几乎全是生成计划时间
总的来说,新老分区剪枝走的策略一样,提升有限,但还是略有一点,主要是pg10计划时间稍短点,但insert性能实实在在的实现了质的飞跃, 两个数量级差别!
测试实况如下:
--传统方式 create table test1(id int8, info text, crt_time timestamp); do language plpgsql $$ declare i int; begin for i in 1..1000 loop execute 'create table test1_'||i||'(like test1 including all) inherits(test1)'; execute 'alter table test1_'||i||' add constraint ck_test1_'||i||' check(id>='||20000000::int8*(i-1)+1||' and id<'||20000000::int8*i+1||')'; end loop; end; $$; --规则 do language plpgsql $$ declare i int; begin for i in 1..1000 loop execute 'create or replace rule r'||i||' as on insert to test1 where id >= '||20000000::int8*(i-1)+1||' and id<'||20000000::int8*i+1||' do instead (insert into test1_'||i||' values (new.id,new.info,new.crt_time))'; end loop; end; $$; postgres=# insert into test1 select generate_series (19990000,21000000); INSERT 0 0 Time: 185755.382 ms (03:05.755) postgres=# insert into test1 select generate_series (19919990000,19921000000); INSERT 0 0 Time: 488937.560 ms (08:08.938) postgres=# explain analyze select *from test1 where id=20000000; QUERY PLAN ----------------------------------------------------------------------------------------------------- Append (cost=0.00..170.01 rows=2 width=48) (actual time=1.493..1.493 rows=1 loops=1) -> Seq Scan on test1 (cost=0.00..0.00 rows=1 width=48) (actual time=0.163..0.163 rows=0 loops=1) Filter: (id = 20000000) -> Seq Scan on test1_1 (cost=0.00..170.01 rows=1 width=48) (actual time=1.328..1.328 rows=1 loops=1) Filter: (id = 20000000) Rows Removed by Filter: 10000 Planning time: 166.882 ms Execution time: 1.552 ms (8 rows) Time: 172.326 ms postgres=# explain analyze select *from test1 where id=19919990000; QUERY PLAN -------------------------------------------------------------------------------------------------------- Append (cost=0.00..170.01 rows=2 width=48) (actual time=0.155..1.628 rows=1 loops=1) -> Seq Scan on test1 (cost=0.00..0.00 rows=1 width=48) (actual time=0.135..0.135 rows=0 loops=1) Filter: (id = '19919990000'::bigint) -> Seq Scan on test1_996 (cost=0.00..170.01 rows=1 width=48) (actual time=0.019..1.491 rows=1 loops=1) Filter: (id = '19919990000'::bigint) Rows Removed by Filter: 10000 Planning time: 165.187 ms Execution time: 1.690 ms (8 rows) Time: 171.021 ms
pg10分区方式 postgres=# create table test(id int8, info text, crt_time timestamp)partition by range(id); CREATE TABLE Time: 41.593 ms postgres=# postgres=# do language plpgsql $$ postgres$# declare postgres$# i int; postgres$# begin postgres$# for i in 1..1000 loop postgres$# execute 'create table test_'||i||' PARTITION OF test FOR VALUES FROM ('||20000000::int8*(i-1)+1||') to ('||20000000::int8*i+1||')'; postgres$# end loop; postgres$# end; postgres$# $$; DO Time: 40272.109 ms (00:40.272) postgres=# insert into test select generate_series (19990000,21000000); INSERT 0 1010001 Time: 3500.975 ms (00:03.501) postgres=# insert into test select generate_series (19919990000,19921000000); INSERT 0 1010001 Time: 4723.782 ms (00:04.724) postgres=# explain analyze select *from test where id=20000000; QUERY PLAN --------------------------------------------------------------------------------------------------------- Append (cost=0.00..170.01 rows=2 width=48) (actual time=1.449..1.450 rows=1 loops=1) -> Seq Scan on test (cost=0.00..0.00 rows=1 width=48) (actual time=0.121..0.121 rows=0 loops=1) Filter: (id = 20000000) -> Seq Scan on test_1 (cost=0.00..170.01 rows=1 width=48) (actual time=1.328..1.328 rows=1 loops=1) Filter: (id = 20000000) Rows Removed by Filter: 10000 Planning time: 128.244 ms Execution time: 1.491 ms (8 rows) Time: 133.588 ms postgres=# explain analyze select *from test where id=19919990000; QUERY PLAN ------------------------------------------------------------------------------------------------------- Append (cost=0.00..170.01 rows=2 width=48) (actual time=0.153..1.566 rows=1 loops=1) -> Seq Scan on test (cost=0.00..0.00 rows=1 width=48) (actual time=0.136..0.136 rows=0 loops=1) Filter: (id = '19919990000'::bigint) -> Seq Scan on test_996 (cost=0.00..170.01 rows=1 width=48) (actual time=0.015..1.429 rows=1 loops=1) Filter: (id = '19919990000'::bigint) Rows Removed by Filter: 10000 Planning time: 128.298 ms Execution time: 1.623 ms (8 rows) Time: 133.836 ms
AlterTableStmt: ... | ALTER TABLE relation_expr partition_cmd | ALTER TABLE IF_P EXISTS relation_expr partition_cmd: ATTACH PARTITION qualified_name ForValues ... | DETACH PARTITION qualified_name ... ; ...
alter table 增加了分区支持,如:
ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2);
注意这里的子表part_2表得先建好,然后才能alter table 挂到主表上。
CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')' OptInherit OptPartitionSpec OptWith OnCommitOption OptTableSpace ... | CREATE OptTemp TABLE qualified_name PARTITION OF qualified_name OptPartitionElementList ForValues OptPartitionSpec OptWith OnCommitOption OptTableSpace ... OptPartitionSpec: PartitionSpec { $$ = $1; } | /*EMPTY*/ { $$ = NULL; } ; PartitionSpec: PARTITION BY part_strategy '(' part_params ')' ... ; part_strategy: IDENT { $$ = $1; } | unreserved_keyword { $$ = pstrdup($1); } ;
create table在OptInherit 后面OptPartitionSpec 说明的是主表是一个分区表,ForValues 后面的OptPartitionSpec 则说明子分区的下面还可以挂子分区,至少代码上表现是这样的,试了试效果如下:
postgres=# CREATE TABLE range_list ( postgres(# a int postgres(# ) PARTITION BY RANGE (a); CREATE TABLE postgres=# CREATE TABLE range_pa1 PARTITION OF range_list FOR VALUES from (1) TO (1000) PARTITION BY LIST (a);; CREATE TABLE postgres=# CREATE TABLE range_pa2 PARTITION OF range_list FOR VALUES FROM (1000) TO (10000); CREATE TABLE postgres=# CREATE TABLE range_list1 PARTITION OF range_pa1 FOR VALUES IN (10); CREATE TABLE postgres=# CREATE TABLE range_list2 PARTITION OF range_pa1 FOR VALUES IN (20); CREATE TABLE postgres=# insert into range_pa1 values(20); INSERT 0 1 postgres=# insert into range_list2 values(20); INSERT 0 1 postgres=# explain select *from range_list where a =20; QUERY PLAN ------------------------------------------------------------------- Append (cost=0.00..41.88 rows=15 width=4) -> Seq Scan on range_list (cost=0.00..0.00 rows=1 width=4) Filter: (a = 20) -> Seq Scan on range_pa1 (cost=0.00..0.00 rows=1 width=4) Filter: (a = 20) -> Seq Scan on range_list2 (cost=0.00..41.88 rows=13 width=4) Filter: (a = 20) (7 rows)
果然能支持多级分区,且各级子分区都能起到过滤作用,但是子分区都能插入数据,即使它的下面还有下一级子分区。
CreateForeignTableStmt: ... | CREATE FOREIGN TABLE qualified_name PARTITION OF qualified_name OptPartitionElementList ForValues SERVER name create_generic_options ... | CREATE FOREIGN TABLE IF_P NOT EXISTS qualified_name PARTITION OF qualified_name OptPartitionElementList ForValues SERVER name create_generic_options
看起来似乎是支持直接把外部表作为某个表的子表,这个功能若好用会很实用,不过还没做测试。
这是建表DDL函数,建一个普通表都会进入该接口,里面增加了分区处理信息的逻辑.
ObjectAddress DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, ObjectAddress *typaddress, const char *queryString) { //增加了queryString存建表语句,主表不用,子表创建时用 char relname[NAMEDATALEN]; Oid namespaceId; List *schema = stmt->tableElts; ...//中间省略部分代码 if (stmt->partbound)//如果有分区键约束信息 { Node *bound; ParseState *pstate; Oid parentId = linitial_oid(inheritOids); Relation parent; parent = heap_open(parentId, NoLock); if (parent->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("\"%s\" is not partitioned", RelationGetRelationName(parent)))); pstate = make_parsestate(NULL); pstate->p_sourcetext = queryString; //解析处理分区条件,如 from (1) to (10) 转成一个list bound = transformPartitionBound(pstate, parent, stmt->partbound); //分区范围检查,如果是否和已有分区冲突等 check_new_partition_bound(relname, parent, bound); heap_close(parent, NoLock); //保存子分区范围信息,实际上子表已经在上面创建好了,这里把分区范围信息更新就行 //pg_class中新增加了一列relpartbound存范围信息,这存的是一个就Node转出来的字符串结构 StorePartitionBound(rel, bound); //更新本地立即可见 CommandCounterIncrement(); } if (stmt->partspec) { char strategy; ... //解析处理分区键,如 partition by range(a),将这个转成一个PartitionSpec Node。 //这里有一点实现比较奇怪,对于分区类型,list,range,在parser语法解析阶段并不精确确定,存的是一个 //字符串,进入此函数中再做字符串比较,只允许list,range这两种情况,这为什么不放到gram.y中处理? //放gram.y中处理更高效,也更清晰 stmt->partspec = transformPartitionSpec(rel, stmt->partspec, &strategy); //计算分区键属性值,拿着分区键去pg_attribute 表找全信息 ComputePartitionAttrs(rel, stmt->partspec->partParams, partattrs, &partexprs, partopclass, partcollation); //分区键个数 partnatts = list_length(stmt->partspec->partParams); //把分区信息存到 pg_partitioned_table 表中 StorePartitionKey(rel, strategy, partnatts, partattrs, partexprs, partopclass, partcollation); //如果是分区键,必须设成非空 if (strategy == PARTITION_STRATEGY_RANGE) { for (i = 0; i < partnatts; i++) { AttrNumber partattno = partattrs[i]; Form_pg_attribute attform = descriptor->attrs[partattno-1]; if (partattno != 0 && !attform->attnotnull) { /* Add a subcommand to make this one NOT NULL */ AlterTableCmd *cmd = makeNode(AlterTableCmd); cmd->subtype = AT_SetNotNull; cmd->name = pstrdup(NameStr(attform->attname)); cmds = lappend(cmds, cmd); } } /* * Although, there cannot be any partitions yet, we still need to * pass true for recurse; ATPrepSetNotNull() complains if we don't */ if (cmds != NIL) AlterTableInternal(RelationGetRelid(rel), cmds, true); } } ... return address; }
整体来说,建表逻辑主要增加了对主表的分区键处理逻辑(partition by range 和partition by list)和对子表的分区范围处理逻辑(for values from …to … 和for values in ..)。
分区键相关信息放在pg_partitioned_table系统表中,而分区范围值放在pg_class的relpartbound中,并且建主表时是不知道有多少子表的,也不需要知道,使用时可以动态取到,这对list、range分区表没问题,但是对于hash分区表就不行了,子表数不定就无法确定数据分发规则,因此基于这一套逻辑基本是无法实现hash分区表。
当对分区表执行查询时,如果constraint_exclusion 设置为on或者partition时,最终会在relation_excluded_by_constraints 函数中进行约束排除不需要扫描的表,排除逻辑和原来继续表逻辑是一样的,不再赘述。其中关键的区别约束或者分区表的范围来源途径不同,不管是普通表还是分区表,约束都在这里拿到:
constraint_pred = get_relation_constraints(root, rte->relid, rel, true);
在get_relation_constraints函数里面会区别约束和分区表范围,具体请看代码如下:
static List * get_relation_constraints(PlannerInfo *root, Oid relationObjectId, RelOptInfo *rel, bool include_notnull) { List *result = NIL; Index varno = rel->relid; Relation relation; TupleConstr *constr; List *pcqual; relation = heap_open(relationObjectId, NoLock); //constr取到约束结构,普通表的 constr = relation->rd_att->constr; //当然上面的约束还不是可直接用的,还需要转换成约束排除接口可用的 if (constr != NULL) { //如果这个表有约束则在这里面进行转换计算 } //如果是分区表,那么下面的pcqual将取到分区范围,实际上这个pcqual取到的就是 //relation->rd_partcheck,如果这个值不空就直接拿,为空说明是第一次拿,还要从系统表中取一下, //从pg_class的relpartbound字段拿到,就是上面建表时存的。 pcqual = RelationGetPartitionQual(relation, true); if (pcqual) { ... //拿到后进行一些简单计算处理 } } //最终返回和约束完全相同的结构,分区范围也是表的一种约束,但与普通约束分开逻辑更加清晰,以后扩展功能也更方便
前面语法篇测insert性能与继续表+触发器实现的分区比有了质的飞跃,性能提升2个数量级,初步猜测执行时增加了计算插入目标分区功能,实际情况如何呢?请看下面ExecInsert插入函数
static TupleTableSlot * ExecInsert(ModifyTableState *mtstate, TupleTableSlot *slot, TupleTableSlot *planSlot, List *arbiterIndexes, OnConflictAction onconflict, EState *estate, bool canSetTag) { HeapTuple tuple; ResultRelInfo *resultRelInfo; ResultRelInfo *saved_resultRelInfo = NULL; Relation resultRelationDesc; Oid newId; ... //如果有分区进入下面逻辑 if (mtstate->mt_partition_dispatch_info) { int leaf_part_index; ... //直接用要插入的值slot去找目标分区 leaf_part_index = ExecFindPartition(resultRelInfo, mtstate->mt_partition_dispatch_info, slot, estate); ... //mtstate->mt_partitions上存的是所有的分区ResultRelInfo结构,leaf_part_index是代表 //目标表是第几个,直接跳转换到目标所在的内存取到目标分区表ResultRelInfo结构 resultRelInfo = mtstate->mt_partitions + leaf_part_index; ... } //其它基本同原来insert逻辑 int //找分区 ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd, TupleTableSlot *slot, EState *estate) { int result; ... //找到是第几个分区 result = get_partition_for_tuple(pd, slot, estate, &failed_at); if (result < 0) { ... //负数即分区不存在,报错,要插的值有问题 } //返回要插的分区数 return result; } //get_partition_for_tuple函数中怎么找不再具体介绍,大意是拿着要插的分区键值去和所有的分区条件做比较, //以二分法搜索,找到符合的就返回分区编号,如第三个符合则返回3
显然可以看到,在执行时增加了计算插入目标分区的函数,直接确定插入哪个分区,因此插入性能爆涨。
对于update表操作,实际就是select+insert操作,新分区表目前对这块并没做优化,这块不再介绍。
以上是基于postgresql10分区当前已经提交代码的一些个人见解和想法,不一定正确,欢迎大家讨论。