diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile index d941b4305d5..494b4c25e09 100644 --- a/contrib/pageinspect/Makefile +++ b/contrib/pageinspect/Makefile @@ -14,7 +14,7 @@ OBJS = \ rawpage.o EXTENSION = pageinspect -DATA = pageinspect--1.8--1.9.sql \ +DATA = pageinspect--1.8--1.9.sql \ pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \ pageinspect--1.5.sql pageinspect--1.5--1.6.sql \ pageinspect--1.4--1.5.sql pageinspect--1.3--1.4.sql \ diff --git a/contrib/pageinspect/brinfuncs.c b/contrib/pageinspect/brinfuncs.c index 2c3da717e71..72f6408a8d9 100644 --- a/contrib/pageinspect/brinfuncs.c +++ b/contrib/pageinspect/brinfuncs.c @@ -22,16 +22,21 @@ #include "lib/stringinfo.h" #include "miscadmin.h" #include "pageinspect.h" +#include "storage/bufmgr.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/rel.h" +#include "miscadmin.h" PG_FUNCTION_INFO_V1(brin_page_type); PG_FUNCTION_INFO_V1(brin_page_items); PG_FUNCTION_INFO_V1(brin_metapage_info); PG_FUNCTION_INFO_V1(brin_revmap_data); +/* GPDB specific */ +PG_FUNCTION_INFO_V1(brin_revmap_chain); + #define IS_BRIN(r) ((r)->rd_rel->relam == BRIN_AM_OID) typedef struct brin_column_state @@ -361,8 +366,11 @@ brin_metapage_info(PG_FUNCTION_ARGS) Page page; BrinMetaPageData *meta; TupleDesc tupdesc; - Datum values[4]; - bool nulls[4]; + Datum values[8]; + bool nulls[8]; + Datum *firstrevmappages; + Datum *lastrevmappages; + Datum *lastrevmappagenums; HeapTuple htup; if (!superuser()) @@ -388,6 +396,41 @@ brin_metapage_info(PG_FUNCTION_ARGS) values[2] = Int32GetDatum(meta->pagesPerRange); values[3] = Int64GetDatum(meta->lastRevmapPage); + /* GPDB specific fields */ + values[4] = Int64GetDatum(meta->isAo); + if (!meta->isAo) + { + nulls[5] = true; + nulls[6] = true; + nulls[7] = true; + } + else + { + firstrevmappages = palloc(sizeof(Datum) * MAX_AOREL_CONCURRENCY); + lastrevmappages = palloc(sizeof(Datum) * MAX_AOREL_CONCURRENCY); + lastrevmappagenums = palloc(sizeof(Datum) * MAX_AOREL_CONCURRENCY); + + for (int i = 0; i < MAX_AOREL_CONCURRENCY; i++) + { + firstrevmappages[i] = UInt32GetDatum(meta->aoChainInfo[i].firstPage); + lastrevmappages[i] = UInt32GetDatum(meta->aoChainInfo[i].lastPage); + lastrevmappagenums[i] = UInt32GetDatum(meta->aoChainInfo[i].lastLogicalPageNum); + } + + values[5] = PointerGetDatum(construct_array(firstrevmappages, + MAX_AOREL_CONCURRENCY, + INT8OID, + sizeof(int64), true, 'i')); + values[6] = PointerGetDatum(construct_array(lastrevmappages, + MAX_AOREL_CONCURRENCY, + INT8OID, + sizeof(int64), true, 'i')); + values[7] = PointerGetDatum(construct_array(lastrevmappagenums, + MAX_AOREL_CONCURRENCY, + INT8OID, + sizeof(int64), true, 'i')); + } + htup = heap_form_tuple(tupdesc, values, nulls); PG_RETURN_DATUM(HeapTupleGetDatum(htup)); @@ -449,3 +492,70 @@ brin_revmap_data(PG_FUNCTION_ARGS) SRF_RETURN_DONE(fctx); } + +/* + * GPDB: Returns the chain of revmap block numbers for a given segno (aka block + * sequence). + */ +Datum +brin_revmap_chain(PG_FUNCTION_ARGS) +{ + bytea *raw_page = PG_GETARG_BYTEA_P(0); + Oid indexRelid = PG_GETARG_OID(1); + int segno = PG_GETARG_UINT32(2); + Page metapage; + BrinMetaPageData *meta; + ArrayBuildState *astate = NULL; + BlockNumber currRevmapBlk; + + Relation indexRel = index_open(indexRelid, AccessShareLock); + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + (errmsg("must be superuser to use raw page functions")))); + + if (!IS_BRIN(indexRel)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a %s index", + RelationGetRelationName(indexRel), "BRIN"))); + + if (segno < 0 || segno > AOTupleId_MaxSegmentFileNum) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("\"%u\" is not a valid segno value (valid values are in [0,127])", + segno))); + + metapage = verify_brin_page(raw_page, BRIN_PAGETYPE_META, "metapage"); + + if (PageIsNew(metapage)) + { + index_close(indexRel, AccessShareLock); + PG_RETURN_NULL(); + } + + meta = (BrinMetaPageData *) PageGetContents(metapage); + currRevmapBlk = meta->aoChainInfo[segno].firstPage; + while (currRevmapBlk != InvalidBlockNumber) + { + /* Look at the chain link to see what the next revmap blknum is */ + Buffer curr; + + astate = accumArrayResult(astate, UInt32GetDatum(currRevmapBlk), false, + INT8OID, CurrentMemoryContext); + + curr = ReadBuffer(indexRel, currRevmapBlk); + LockBuffer(curr, BUFFER_LOCK_SHARE); + currRevmapBlk = BrinNextRevmapPage(BufferGetPage(curr)); + UnlockReleaseBuffer(curr); + } + + index_close(indexRel, AccessShareLock); + + if (astate) + PG_RETURN_DATUM(makeArrayResult(astate, + CurrentMemoryContext)); + else + PG_RETURN_NULL(); +} diff --git a/contrib/pageinspect/pageinspect--1.8--1.9.sql b/contrib/pageinspect/pageinspect--1.8--1.9.sql index be89a64ca14..158695ce3d3 100644 --- a/contrib/pageinspect/pageinspect--1.8--1.9.sql +++ b/contrib/pageinspect/pageinspect--1.8--1.9.sql @@ -135,3 +135,24 @@ CREATE FUNCTION brin_page_items(IN page bytea, IN index_oid regclass, RETURNS SETOF record AS 'MODULE_PATHNAME', 'brin_page_items' LANGUAGE C STRICT PARALLEL SAFE; +-- brin_metapage_info() +-- +DROP FUNCTION brin_metapage_info(IN page bytea, OUT magic text, + OUT version integer, OUT pagesperrange integer, OUT lastrevmappage bigint); +CREATE FUNCTION brin_metapage_info(IN page bytea, OUT magic text, + OUT version integer, OUT pagesperrange integer, OUT lastrevmappage bigint, + /* GPDB specific for AO/CO tables */ + OUT isAo boolean, + OUT firstrevmappages bigint[], + OUT lastrevmappages bigint[], + OUT lastrevmappagenums bigint[]) +AS 'MODULE_PATHNAME', 'brin_metapage_info' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- brin_revmap_chain() +-- +CREATE FUNCTION brin_revmap_chain(IN page bytea, IN indexrelid regclass, IN segno int) + RETURNS bigint[] +AS 'MODULE_PATHNAME', 'brin_revmap_chain' + LANGUAGE C STRICT PARALLEL SAFE; diff --git a/src/backend/access/brin/README b/src/backend/access/brin/README index c1203c11c2f..4fd4e3cd8a3 100644 --- a/src/backend/access/brin/README +++ b/src/backend/access/brin/README @@ -191,6 +191,38 @@ Future improvements GPDB: +(1) Main design problem: + +BRIN needs special handling for append-optimized tables. The revmap relies on +the assumption that block numbers are consecutive, there are no gaps in the +sequence of block numbers for a given relation. This assumption does not hold +for append-optimized tables. The AO tid is comprised of +. Concurrent inserts into an AO table result in +multiple segment files, one per insert, being populated. + +The existing revmap structure is simple in the sense that it is easy to +calculate the block number for a revmap page (the block layout is always: +{meta page, [revmap pages], [data pages]}). The number of revmap pages is +directly proportional to the logical heap block numbers we are covering in the +index. + +If we continue with this representation, we will have to create revmap entries +for all the nonexistent TIDs in this gap, leading to large amounts of wasted +space. For example in a simple AO table with segment 1, having 10 logical heap +blocks: [33554432, 33554441], we would have to create revmap pages covering the +range [0, 33554431], and if pages_per_range = 1, that would mean creating close +to (33554432 / REVMAP_PAGE_MAXITEMS) = (33554432 / 5456) ~= 6150 revmap pages! +And an AO/CO table can have 128 such segments! + +We discuss how we change the internal structure for the metapage and revmap to +tackle this problem (See Section (3)). + +There is also the question is how can we ensure that most of the code between +heap and AO/CO tables is unified. Section (2) describes how we tackle that +through the introduction of new table AM APIs and BlockSequences. + +(2) BlockSequences and Table AM APIs: + We have introduced a new table AM API relation_get_block_sequences() that helps unify code for block-based iteration for BRIN scan and summarization, in a table AM agnostic manner. @@ -216,52 +248,102 @@ Sometimes, an alternative API is also needed: to get the block sequence, given a logical heap block number. For that purpose, we have introduced relation_get_block_sequence(). -BRIN on append only tables --------------------------- - -Cloudberry has a new kind of table - append only table. BRIN needs special -handling for append-optimized tables. The revmap relies on the assumption -that block numbers are consecutive, there are no gaps in the sequence of block -numbers for a given relation. This assumption does not hold for append-optimized -tables. The AO tid is comprised of . Concurrent -inserts into an AO table result in multiple segment files, one per insert, being -populated. When mapped to heap TIDs, there is a large gap between the block -number of the last TID on segment number 1 and the first TID on segment -number 2. If we continue to represent this using just the revmap, we will have -to create revmap entries for all the nonexistent TIDs in this gap, leading to -large amount of wasted space.The structure of revmap has been improved to adapt -to append only table. An upper block on top of revmap is introduced to avoid -wasting space due to non-existent AO TIDs. - -The Ao table is logically composed of 128 aosegs to support concurrent inserts. -Each tuple in the Ao table corresponds to a virtual tid. The virtual tid of -the first tuple of each Aoseg is equal to (248/128)*segnum, then the first -virtual block number of each Aoseg is equal to (232/128) * segnum. - -If there are three blocks in aoseg0, aoseg1, and aoseg127, their block numbers -are 0x0000 0000 0x0000 0001 0x0000 0002, 0x0200 0000 0x0200 0001 0x0200 0002, -0xFE00 0000 0xFE00 0001 0xFE00 0002. Then the largest index in the revmap array -is 0xFE00 0002. In this way, the revmap array contains 4,261,412,866 tids, -taking up 24GB of space. This is clearly unacceptable. - -So we added an extra upper level on top of the revmap. In this way, at the -level of revmap, tid and the corresponding block are initialized only when -the corresponding block number has data. The upper level block stores the -revmap level block number. In this way, the revmap level will only store the -tid corresponding to the block that has been filled with data. The upper -level will initialize all the blocks corresponding to the block number at -one time. But because the upper level only stores the block number of the -revmap, the number of records in the upper level is 232/TidNumPerPage which -is approximately equal to 800,000. Takes up 3.2MB of space. - -The corresponding relationship between the block number and the upper level -array index is: -upper_index=blocknum/TidNumPerPage -Stored in the upper level array is the block number of the revmap, and the -offset in the block of the revmap tid is: -revmap_offset=blocknum%TidNumPerPage -TidNumPerPage: The number of tids that each revmap page can hold. -All the discussions above have ignored the pagesPerRange variable. - - - +(3) Changes to the internal page structure: + +BRIN data pages remain unchanged. Only the metapage and revmap pages undergo a +change in structure, in order to deal with the main design problem highlighted +in Section (1). Also, these changes are made only for AO/CO tables - for heap +table,s the fields added to the internal structures are unused. + +We completely break away from the restriction that the revmap pages follow one +another right after the metapage, in contiguous block numbers. Instead, we now +have them point to one another in a singly linked list. We have introduced the +nextRevmapPage pointer in BrinSpecialSpace to this end. + +Note: Since revmap pages are not contiguous, we don't have to follow the page +evacuation protocol (that we have to follow for indexes on heap tables), which +had to move data pages to the end of the index relation, to make room for +revmap pages. + +Furthermore, there are up to MAX_AOREL_CONCURRENCY such linked lists of revmap +pages. There is one list per block sequence. The heads and tails of these lists +(or chains) are maintained in the metapage (and cached in the revmap access +struct). + +We have depicted the logical chain structure below: + + +----------+ + | meta | + | | + | | + +-----+----+ + | + +----------------+------------------+ + seq0| seq1| ... seqN| + | | | + +----v-----+ +-----v----+ +-----v----+ + | rev | | rev | | rev | + | +--+--+ | +--+--+ | +--+--+ + | | 1| | | | 1| | | | 1| | + +----+--++-+ +----+--++-+ +----+--++-+ + | | | + | | | + +--------v-+ +--------v-+ +--------v-+ + | rev | | rev | | rev | + | +--+--+ | +--+--+ | +--+--+ + | | 2| | | | 2| | | | 2| | + +----+--++-+ +----+--++-+ +----+--++-+ + | | | + v v v + ... + +----------+ +----------+ +----------+ + | rev | | rev | | rev | + | +--+--+ | +--+--+ | +--+--+ + | |n1| | | |n2| | | |nN| | + +----+--+--+ +----+--+--+ +----+--+--+ + +Omitted from the diagram are the tail pointers to the revmap chains and the +data pages, for clarity. + +Since revmap pages are no longer contiguous for AO/CO tables, we have to +additionally maintain logical page numbers (in the BrinSpecialSpace) for all +revmap pages (depicted in the diagram above). The need can be highlighted with +the following example: + +For heap tables, let's say we have metapage: Block0 and revmap pages: Block1,2,3 +and let's say we have pages_per_range = 1. If we wanted to look up the summary +info for heapBlk=6000, that would map to Block3 (we know that from simple math. +See: HEAPBLK_TO_REVMAP_BLK()). However, for AO/CO tables, we have no idea what +revmap block number this would map to since revmap pages are not contiguous. +This is where the 1-based logical page number comes in. With it we can say, +heapBlk 6000 maps to the 2nd revmap page for block sequence 9 (seg0) +(See: HEAPBLK_TO_REVMAP_PAGENUM_AO()). We can then traverse the revmap chain for +seg0 until we find the revmap page with pagenum=2. + +These logical page numbers are used for both iterating over the revmap during +scans and also while extending the revmap (see revmap_extend_and_get_blkno_ao()). +The logical revmap page number for a given logical heap block is calculated by +paying attention to the segment to which the logical heap block belongs and the +fixed number of items that can fit in a revmap page (See +HEAPBLK_TO_REVMAP_PAGENUM_AO()). The logical page numbers of the last chain +members are also cached in the metapage (and cached in the revmap access struct) + +For operations such as scan, build and summarize: +We always traverse each chain in order justifying their singly-linked-ness. +Also these chains are always traversed in block sequence order - the chain for +seg0 is traversed, chain for seg1 and so on. We use a revmap iterator to attain +this goal. Before traversing each chain, we position the iterator at the start +of the chain. + +We never have to lock more than 1 revmap page at a time during chain traversal. +Only for revmap extension, do we have to lock two revmap pages: the last revmap +page in the chain and the new revmap page being added. + +For operations such as insert, we make use of the chain tail pointer in the +metapage. Due to the appendonly nature of AO/CO tables, we would always write to +the last logical heap block within a block sequence. Thus, unlike for heap, +blocks other than the last block would never be summarized as a result of an +insert. So, we can safely position the revmap iterator at the end of the chain +(instead of traversing the chain unnecessarily from the front). + +Note: Multiple revmap pages across chains can map to the same data page. diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 60e6757c647..5c4546515ec 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -61,6 +61,8 @@ typedef struct BrinBuildState BrinRevmap *bs_rmAccess; BrinDesc *bs_bdesc; BrinMemTuple *bs_dtuple; + /* GPDB specific state for AO/CO tables */ + bool bs_isAo; } BrinBuildState; /* @@ -75,8 +77,11 @@ typedef struct BrinOpaque #define BRIN_ALL_BLOCKRANGES InvalidBlockNumber -static BrinBuildState *initialize_brin_buildstate(Relation idxRel, - BrinRevmap *revmap, BlockNumber pagesPerRange); +static BrinBuildState * +initialize_brin_buildstate(Relation idxRel, + BrinRevmap *revmap, + BlockNumber pagesPerRange, + bool isAo); static void terminate_brin_buildstate(BrinBuildState *state); static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange, bool include_partial, double *numSummarized, double *numExisting); @@ -173,6 +178,16 @@ brininsert(Relation idxRel, Datum *values, bool *nulls, MemoryContext oldcxt = CurrentMemoryContext; bool autosummarize = BrinGetAutoSummarize(idxRel); + /* + * GPDB: XXX: We initialize the revmap per-tuple. This routine has + * non-trivial CPU overhead (including a snapshot test and meta-page lock) + * Also, there is definitely memory overhead (even more so for GPDB, due to + * the added AO/CO specific state) + * + * Can we cache the access struct somehow, maybe in BrinDesc (as + * part of IndexInfo->ii_AmCache)? Both heap tables and AO/CO tables can + * definitely benefit from it. There might be concurrency concerns, however. + */ revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL); /* @@ -182,6 +197,22 @@ brininsert(Relation idxRel, Datum *values, bool *nulls, origHeapBlk = ItemPointerGetBlockNumber(heaptid); heapBlk = (origHeapBlk / pagesPerRange) * pagesPerRange; + /* + * GPDB: Due to the appendonly nature of AO/CO tables, we would always write + * to the last logical heap block within a block sequence (due to + * monotonically increasing gp_fastsequence allocations). Thus, unlike for + * heap, blocks other than the last block would never be summarized as a + * result of an insert. + * + * This holds true even for INSERTs following a VACUUM on a given segment, + * since VACUUM does not reset gp_fastsequence on the VACUUMed segment. + * + * So, we can safely position the revmap iterator at the end of the chain + * (instead of traversing the chain unnecessarily from the front). + */ + if (RelationIsAppendOptimized(heapRel)) + brinRevmapAOPositionAtEnd(revmap, AOSegmentGet_blockSequenceNum(heapBlk)); + for (;;) { bool need_insert = false; @@ -561,6 +592,11 @@ bringetbitmap(IndexScanDesc scan, Node **bmNodeP) */ BlockNumber startblknum = sequences[i].startblknum; BlockNumber endblknum = sequences[i].startblknum + sequences[i].nblocks; + int currseq = AOSegmentGet_blockSequenceNum(startblknum); + + if (RelationIsAppendOptimized(heapRel)) + brinRevmapAOPositionAtStart(opaque->bo_rmAccess, currseq); + for (heapBlk = startblknum; heapBlk < endblknum; heapBlk += opaque->bo_pagesPerRange) { bool addrange; @@ -823,8 +859,25 @@ brinbuildCallback(Relation index, * tuples for those too. */ - if (state->bs_currRangeStart < heapBlockGetCurrentAosegStart(thisblock)) - state->bs_currRangeStart = heapBlockGetCurrentAosegStart(thisblock); + /* + * GPDB: Adjust build state depending on latest logical heap block + * + * XXX: We can move this out of brinbuildCallback() if we refactor + * brinbuild() to loop over BlockSequences, much like we do in + * bringetbitmap() and brinsummarize(). + */ + if (state->bs_isAo) + { + BlockNumber seqStartBlk = AOHeapBlockGet_startHeapBlock(thisblock); + if (state->bs_currRangeStart < seqStartBlk) + { + /* adjust the current block sequence */ + int seqNum = AOSegmentGet_blockSequenceNum(thisblock); + brinRevmapAOPositionAtStart(state->bs_rmAccess, seqNum); + /* readjust the range lower bound */ + state->bs_currRangeStart = seqStartBlk; + } + } while (thisblock > state->bs_currRangeStart + state->bs_pagesPerRange - 1) { @@ -838,6 +891,7 @@ brinbuildCallback(Relation index, form_and_insert_tuple(state); /* set state to correspond to the next range */ + /* XXX: This needs clamping for AO/CO tables for seg i full case. */ state->bs_currRangeStart += state->bs_pagesPerRange; /* re-initialize state for it */ @@ -911,7 +965,10 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo) * Initialize our state, including the deformed tuple state. */ revmap = brinRevmapInitialize(index, &pagesPerRange, NULL); - state = initialize_brin_buildstate(index, revmap, pagesPerRange); + state = initialize_brin_buildstate(index, revmap, pagesPerRange, isAo); + + /* GPDB: AO/CO tables: position iterator to start of sequence 0's chain. */ + brinRevmapAOPositionAtStart(revmap, 0); /* * Now scan the relation. No syncscan allowed here because we want the @@ -921,7 +978,14 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo) brinbuildCallback, (void *) state, NULL); /* process the final batch */ - form_and_insert_tuple(state); + /* + * GPDB: Avoid this for AO/CO tables with no rows. We opt to not create a + * revmap page and data page with a placeholder tuple for empty relations, + * as is done for heap. If we did, we would have to do so for all 128 + * possible block sequences, creating unnecessary bloat. + */ + if (!isAo || reltuples != 0) + form_and_insert_tuple(state); /* release resources */ idxtuples = state->bs_numtuples; @@ -1324,7 +1388,26 @@ brinGetStats(Relation index, BrinStatsData *stats) metadata = (BrinMetaPageData *) PageGetContents(metapage); stats->pagesPerRange = metadata->pagesPerRange; + +/* + * GPDB: Since planning is done on the QD and since there is no data on the QD, + * there are no revmap pages on the QD. So, it is currently not possible to get + * an estimate on the number of revmap pages (since we want to avoid dispatching + * during planning). + * + * For AO/CO tables, the following wouldn't be applicable anyway (we would have + * to look at the revmap chains etc). + * + * Even though we are unable to get an estimate on the number of revmap pages, + * it works out fine for AO/CO tables as these pages get treated like data pages + * (i.e. they are costed as random access), as well as they should be (due to + * chaining, please refer to the BRIN README). For heap tables, we end up losing + * out a little as we would be costing a BRIN plan higher, due to this limitation. + */ +#if 0 stats->revmapNumPages = metadata->lastRevmapPage - 1; +#endif + stats->revmapNumPages = 0; UnlockReleaseBuffer(metabuffer); } @@ -1334,7 +1417,7 @@ brinGetStats(Relation index, BrinStatsData *stats) */ static BrinBuildState * initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap, - BlockNumber pagesPerRange) + BlockNumber pagesPerRange, bool isAo) { BrinBuildState *state; @@ -1349,6 +1432,9 @@ initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap, state->bs_bdesc = brin_build_desc(idxRel); state->bs_dtuple = brin_new_memtuple(state->bs_bdesc); + /* GPDB specific state for AO/CO tables */ + state->bs_isAo = isAo; + brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc); return state; @@ -1631,6 +1717,10 @@ brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange, } } + if (RelationIsAppendOptimized(heapRel)) + brinRevmapAOPositionAtStart(revmap, + AOSegmentGet_blockSequenceNum(startBlk)); + /* * Scan the revmap to find unsummarized items for each block sequence * involved. @@ -1663,7 +1753,8 @@ brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange, /* first time through */ Assert(!indexInfo); state = initialize_brin_buildstate(index, revmap, - pagesPerRange); + pagesPerRange, + RelationIsAppendOptimized(heapRel)); indexInfo = BuildIndexInfo(index); } summarize_range(indexInfo, state, heapRel, startBlk, endBlk); diff --git a/src/backend/access/brin/brin_pageops.c b/src/backend/access/brin/brin_pageops.c index e352c9910e0..fd35dced6f1 100644 --- a/src/backend/access/brin/brin_pageops.c +++ b/src/backend/access/brin/brin_pageops.c @@ -476,11 +476,16 @@ brin_page_init(Page page, uint16 type) { PageInit(page, BLCKSZ, sizeof(BrinSpecialSpace)); - BrinPageType(page) = type; + BrinPageType(page) = type; + /* GPDB: AO/CO tables: pageNum, nextRevmapPage is to be assigned later */ + BrinLogicalPageNum(page) = InvalidLogicalPageNum; + BrinNextRevmapPage(page) = InvalidBlockNumber; } /* * Initialize a new BRIN index's metapage. + * GPDB: We have the additional argument 'isAo' which is true if the base table + * is append-optimized (false otherwise, like for heap tables). */ void brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version, bool isAo) @@ -503,6 +508,14 @@ brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version, bool is */ metadata->lastRevmapPage = 0; + /* GPDB: AO table metadata initialization */ + for (int i = 0; i < MAX_AOREL_CONCURRENCY; i++) + { + metadata->aoChainInfo[i].firstPage = InvalidBlockNumber; + metadata->aoChainInfo[i].lastPage = InvalidBlockNumber; + metadata->aoChainInfo[i].lastLogicalPageNum = InvalidLogicalPageNum; + } + /* * Set pd_lower just past the end of the metadata. This is essential, * because without doing so, metadata will be lost if xlog.c compresses diff --git a/src/backend/access/brin/brin_revmap.c b/src/backend/access/brin/brin_revmap.c index 71669009233..a124aede5d5 100644 --- a/src/backend/access/brin/brin_revmap.c +++ b/src/backend/access/brin/brin_revmap.c @@ -35,12 +35,19 @@ struct BrinRevmap { - Relation rm_irel; + Relation rm_irel; BlockNumber rm_pagesPerRange; BlockNumber rm_lastRevmapPage; /* cached from the metapage */ - Buffer rm_metaBuf; - Buffer rm_currBuf; - bool rm_isAo; + Buffer rm_metaBuf; + Buffer rm_currBuf; + bool rm_isAo; + + /* GPDB: Cached state from metapage for AO/CO tables */ + AOChainInfo rm_aoChainInfo[MAX_AOREL_CONCURRENCY]; + /* GPDB: Revmap iterator state for AO/CO tables */ + int rm_aoIterBlockSeqNum; + BlockNumber rm_aoIterRevmapPage; + LogicalPageNum rm_aoIterRevmapPageNum; }; /* typedef appears in brin_revmap.h */ @@ -49,10 +56,12 @@ struct BrinRevmap static BlockNumber revmap_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk); static Buffer revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk); +static BlockNumber revmap_extend_and_get_blkno_heap(BrinRevmap *revmap, BlockNumber heapBlk); +static BlockNumber revmap_extend_and_get_blkno_ao(BrinRevmap *revmap, BlockNumber heapBlk); static BlockNumber revmap_extend_and_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk); -static void revmap_physical_extend(BrinRevmap *revmap); - +static void revmap_physical_extend(BrinRevmap *revmap, LogicalPageNum targetLogicalPageNum); +static void set_ao_revmap_chain(BrinRevmap *revmap, BrinMetaPageData *metadata, int seqnum); /* * Initialize an access object for a range map. This must be freed by * brinRevmapTerminate when caller is done with it. @@ -78,7 +87,13 @@ brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange, revmap->rm_lastRevmapPage = metadata->lastRevmapPage; revmap->rm_metaBuf = meta; revmap->rm_currBuf = InvalidBuffer; + + /* GPDB AO/CO specific initialization (barring iterator state) */ revmap->rm_isAo = metadata->isAo; + memcpy(revmap->rm_aoChainInfo, metadata->aoChainInfo, sizeof(metadata->aoChainInfo)); + revmap->rm_aoIterBlockSeqNum = InvalidBlockSequenceNum; + revmap->rm_aoIterRevmapPage = InvalidBlockNumber; + revmap->rm_aoIterRevmapPageNum = InvalidLogicalPageNum; *pagesPerRange = metadata->pagesPerRange; @@ -112,7 +127,8 @@ brinRevmapExtend(BrinRevmap *revmap, BlockNumber heapBlk) /* Ensure the buffer we got is in the expected range */ Assert(mapBlk != InvalidBlockNumber && mapBlk != BRIN_METAPAGE_BLKNO && - mapBlk <= revmap->rm_lastRevmapPage); + ((!revmap->rm_isAo && mapBlk <= revmap->rm_lastRevmapPage) || + (revmap->rm_isAo && mapBlk == revmap->rm_aoChainInfo[revmap->rm_aoIterBlockSeqNum].lastPage))); } /* @@ -227,6 +243,8 @@ brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk, Assert(mapBlk != InvalidBlockNumber); revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk); + if (revmap->rm_isAo) + revmap->rm_aoIterRevmapPageNum = BrinLogicalPageNum(BufferGetPage(revmap->rm_currBuf)); } LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_SHARE); @@ -435,9 +453,49 @@ brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk) } /* - * Given a heap block number, find the corresponding physical revmap block - * number and return it. If the revmap page hasn't been allocated yet, return - * InvalidBlockNumber. + * Position the AO revmap iterator at the beginning of the revmap chain for the + * given block sequence. This does temporarily lock the first page in the chain. + */ +void +brinRevmapAOPositionAtStart(BrinRevmap *revmap, int seqNum) +{ + Assert(seqNum != InvalidBlockSequenceNum); + + revmap->rm_aoIterBlockSeqNum = seqNum; + revmap->rm_aoIterRevmapPage = revmap->rm_aoChainInfo[seqNum].firstPage; + + if (revmap->rm_aoChainInfo[seqNum].firstPage != InvalidBlockNumber) + { + /* chain exists, read the first page to get its logical page number */ + Buffer buf = ReadBuffer(revmap->rm_irel, + revmap->rm_aoChainInfo[seqNum].firstPage); + LockBuffer(buf, BUFFER_LOCK_SHARE); + revmap->rm_aoIterRevmapPageNum = BrinLogicalPageNum(BufferGetPage(buf)); + UnlockReleaseBuffer(buf); + } + else + { + /* chain doesn't exist yet */ + revmap->rm_aoIterRevmapPageNum = InvalidLogicalPageNum; + } +} + +/* + * Position the AO revmap iterator at the end of the revmap chain for the given + * block sequence. This is a lockless operation. + */ +void +brinRevmapAOPositionAtEnd(BrinRevmap *revmap, int seqNum) +{ + Assert(seqNum != InvalidBlockSequenceNum); + + revmap->rm_aoIterBlockSeqNum = seqNum; + revmap->rm_aoIterRevmapPage = revmap->rm_aoChainInfo[seqNum].lastPage; + revmap->rm_aoIterRevmapPageNum = revmap->rm_aoChainInfo[seqNum].lastLogicalPageNum; +} + +/* + * Upstream version of revmap_get_blkno() for heap tables. */ static BlockNumber revmap_get_blkno_heap(BrinRevmap *revmap, BlockNumber heapBlk) @@ -454,6 +512,73 @@ revmap_get_blkno_heap(BrinRevmap *revmap, BlockNumber heapBlk) return InvalidBlockNumber; } +/* + * Similar in spirit to revmap_get_blkno_heap(), except here we traverse the + * revmap chain maintained for the block sequence in which 'heapBlk' falls. Our + * access struct buffer is used to read in each chain member. The iterator + * state is always kept up-to-date with the traversal. + */ +static BlockNumber +revmap_get_blkno_ao(BrinRevmap *revmap, BlockNumber heapBlk) +{ + BlockNumber mapBlk; + BlockNumber targetRevmapPageNum = + HEAPBLK_TO_REVMAP_PAGENUM_AO(revmap->rm_pagesPerRange, heapBlk); + + Assert(targetRevmapPageNum >= 1); + + /* There are no revmap pages for the current block sequence */ + if (revmap->rm_aoIterRevmapPageNum == InvalidLogicalPageNum) + return InvalidBlockNumber; + + Assert(revmap->rm_aoIterRevmapPage != InvalidBlockNumber); + + /* + * Traverse the revmap chain, looking for the target logical page number. + * Once found, the iterator will point to the required revmap page. + */ + mapBlk = revmap->rm_aoIterRevmapPage; + while (revmap->rm_aoIterRevmapPageNum < targetRevmapPageNum && mapBlk != InvalidBlockNumber) + { + Page currPage; + + if (!BufferIsValid(revmap->rm_currBuf)) + { + /* Read the next chain member */ + revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk); + } + else + { + /* Our access struct buffer already is what the iterator points to */ + Assert(revmap->rm_aoIterRevmapPage == BufferGetBlockNumber(revmap->rm_currBuf)); + } + + LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_SHARE); + + currPage = BufferGetPage(revmap->rm_currBuf); + + /* Update the iterator position */ + revmap->rm_aoIterRevmapPage = mapBlk; + revmap->rm_aoIterRevmapPageNum = BrinLogicalPageNum(currPage); + + /* Traverse to the next chain member */ + mapBlk = BrinNextRevmapPage(currPage); + + /* Release, so we can read in the next member */ + UnlockReleaseBuffer(revmap->rm_currBuf); + revmap->rm_currBuf = InvalidBuffer; + } + + if (revmap->rm_aoIterRevmapPageNum == targetRevmapPageNum) + { + /* Reached our destination */ + return revmap->rm_aoIterRevmapPage; + } + + /* Destination doesn't exist yet */ + return InvalidBlockNumber; +} + /* * Given a heap block number, find the corresponding physical revmap block * number and return it. If the revmap page hasn't been allocated yet, return @@ -463,7 +588,7 @@ static BlockNumber revmap_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk) { if (revmap->rm_isAo) - return -1; + return revmap_get_blkno_ao(revmap, heapBlk); else return revmap_get_blkno_heap(revmap, heapBlk); } @@ -487,7 +612,8 @@ revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk) /* Ensure the buffer we got is in the expected range */ Assert(mapBlk != BRIN_METAPAGE_BLKNO && - mapBlk <= revmap->rm_lastRevmapPage); + ((!revmap->rm_isAo && mapBlk <= revmap->rm_lastRevmapPage) || + (revmap->rm_isAo && mapBlk <= revmap->rm_aoChainInfo[revmap->rm_aoIterBlockSeqNum].lastPage))); /* * Obtain the buffer from which we need to read. If we already have the @@ -501,6 +627,8 @@ revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk) ReleaseBuffer(revmap->rm_currBuf); revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk); + if (revmap->rm_isAo) + revmap->rm_aoIterRevmapPageNum = BrinLogicalPageNum(BufferGetPage(revmap->rm_currBuf)); } return revmap->rm_currBuf; @@ -513,6 +641,19 @@ revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk) */ static BlockNumber revmap_extend_and_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk) +{ + if (revmap->rm_isAo) + return revmap_extend_and_get_blkno_ao(revmap, heapBlk); + + return revmap_extend_and_get_blkno_heap(revmap, heapBlk); +} + +/* + * GPDB: The upstream code from revmap_extend_and_get_blkno(), which applies to + * heap tables has been moved here. + */ +static BlockNumber +revmap_extend_and_get_blkno_heap(BrinRevmap *revmap, BlockNumber heapBlk) { BlockNumber targetblk; @@ -523,18 +664,55 @@ revmap_extend_and_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk) while (targetblk > revmap->rm_lastRevmapPage) { CHECK_FOR_INTERRUPTS(); - revmap_physical_extend(revmap); + revmap_physical_extend(revmap, InvalidLogicalPageNum); } return targetblk; } +/* + * Similar in spirit to revmap_extend_and_get_blkno_heap(), except here we know + * when we are done based on the positioning of the AO revmap iterator with + * respect to the target logical page number. We can simply derive this target + * page number based on some math. + * The reason why we need to take this approach is that unlike for heap, revmap + * pages don't reside in deterministic block numbers. + */ +static BlockNumber +revmap_extend_and_get_blkno_ao(BrinRevmap *revmap, BlockNumber heapBlk) +{ + int currSeqNum = revmap->rm_aoIterBlockSeqNum; + LogicalPageNum targetLogicalPageNum; + + Assert(currSeqNum == AOSegmentGet_blockSequenceNum(heapBlk)); + + /* set up the target page number state */ + targetLogicalPageNum = HEAPBLK_TO_REVMAP_PAGENUM_AO(revmap->rm_pagesPerRange, + heapBlk); + /* + * Extend the revmap, only if necessary. It is not necessary if the iterator + * is already positioned on the target logical page number. + */ + while (targetLogicalPageNum > revmap->rm_aoIterRevmapPageNum) + { + CHECK_FOR_INTERRUPTS(); + revmap_physical_extend(revmap, targetLogicalPageNum); + /* Make sure the iterator is positioned at the end of the current chain */ + brinRevmapAOPositionAtEnd(revmap, currSeqNum); + } + + return revmap->rm_aoIterRevmapPage; +} + /* * Try to extend the revmap by one page. This might not happen for a number of * reasons; caller is expected to retry until the expected outcome is obtained. + * + * GPDB: For AO/CO tables, 'targetLogicalPageNum' contains the logical page + * number of the to-be-added revmap page. (It is InvalidBlockNumber otherwise) */ static void -revmap_physical_extend(BrinRevmap *revmap) +revmap_physical_extend(BrinRevmap *revmap, LogicalPageNum targetLogicalPageNum) { Buffer buf; Page page; @@ -545,6 +723,13 @@ revmap_physical_extend(BrinRevmap *revmap) Relation irel = revmap->rm_irel; bool needLock = !RELATION_IS_LOCAL(irel); + /* GPDB: AO/CO specific state */ + bool isAo = revmap->rm_isAo; + Buffer currLastRevmapBuf = InvalidBuffer; + Page currLastRevmapPage = NULL; + bool ao_chain_exists = false; + int currSeq = revmap->rm_aoIterBlockSeqNum; + /* * Lock the metapage. This locks out concurrent extensions of the revmap, * but note that we still need to grab the relation extension lock because @@ -554,6 +739,12 @@ revmap_physical_extend(BrinRevmap *revmap) metapage = BufferGetPage(revmap->rm_metaBuf); metadata = (BrinMetaPageData *) PageGetContents(metapage); + if (!isAo) + { + /* unindented to prevent merge conflicts */ + + Assert(targetLogicalPageNum == InvalidLogicalPageNum); + /* * Check that our cached lastRevmapPage value was up-to-date; if it * wasn't, update the cached copy and have caller start over. @@ -566,7 +757,36 @@ revmap_physical_extend(BrinRevmap *revmap) } mapBlk = metadata->lastRevmapPage + 1; + /* end if */ + } + else + { + Assert(currSeq != InvalidBlockSequenceNum); + /* assert that we have a valid target page number to assign */ + Assert(targetLogicalPageNum != InvalidLogicalPageNum); + + /* + * GPDB: AO/CO: Check that our cached last revmap page and logical page + * number values were up-to-date; if they weren't, update the cached + * copies and have caller start over. + */ + if (metadata->aoChainInfo[currSeq].lastPage != revmap->rm_aoChainInfo[currSeq].lastPage) + { + set_ao_revmap_chain(revmap, metadata, currSeq); + LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK); + return; + } + } + nblocks = RelationGetNumberOfBlocks(irel); + + /* + * GPDB: For AO/CO tables, the new revmap page would always be allocated at + * the end of the relation. + */ + if (isAo) + mapBlk = nblocks; + if (mapBlk < nblocks) { buf = ReadBuffer(irel, mapBlk); @@ -579,7 +799,7 @@ revmap_physical_extend(BrinRevmap *revmap) LockRelationForExtension(irel, ExclusiveLock); buf = ReadBuffer(irel, P_NEW); - if (BufferGetBlockNumber(buf) != mapBlk) + if (!isAo && BufferGetBlockNumber(buf) != mapBlk) { /* * Very rare corner case: somebody extended the relation @@ -598,10 +818,42 @@ revmap_physical_extend(BrinRevmap *revmap) if (needLock) UnlockRelationForExtension(irel, ExclusiveLock); + + if (isAo) + { + Assert(mapBlk == BufferGetBlockNumber(buf)); + + if (metadata->aoChainInfo[currSeq].lastPage != InvalidBlockNumber) + { + /* + * We are extending the chain for the current block sequence. So, + * read and lock the last chain member. + */ + ao_chain_exists = true; + + currLastRevmapBuf = ReadBuffer(irel, + metadata->aoChainInfo[currSeq].lastPage); + LockBuffer(currLastRevmapBuf, BUFFER_LOCK_EXCLUSIVE); + currLastRevmapPage = BufferGetPage(currLastRevmapBuf); + + Assert(!PageIsNew(currLastRevmapPage)); + } + else + { + /* + * We have no revmap pages yet for the current BlockSequence. + * A new chain will be started for the current block sequence + * below. Consequently, there is no last chain member to read. + */ + Assert(revmap->rm_aoChainInfo[currSeq].lastLogicalPageNum == InvalidLogicalPageNum); + } + } } + AssertImply(isAo, PageIsNew(page)); + /* Check that it's a regular block (or an empty page) */ - if (!PageIsNew(page) && !BRIN_IS_REGULAR_PAGE(page)) + if (!isAo && !PageIsNew(page) && !BRIN_IS_REGULAR_PAGE(page)) ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), errmsg("unexpected page type 0x%04X in BRIN index \"%s\" block %u", @@ -610,7 +862,8 @@ revmap_physical_extend(BrinRevmap *revmap) BufferGetBlockNumber(buf)))); /* If the page is in use, evacuate it and restart */ - if (brin_start_evacuating_page(irel, buf)) + /* GPDB: We don't follow the page evacuation protoocol for AO/CO tables */ + if (!isAo && brin_start_evacuating_page(irel, buf)) { LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK); brin_evacuate_page(irel, revmap->rm_pagesPerRange, revmap, buf); @@ -627,9 +880,36 @@ revmap_physical_extend(BrinRevmap *revmap) /* the rm_tids array is initialized to all invalid by PageInit */ brin_page_init(page, BRIN_PAGETYPE_REVMAP); + + /* Set the logical page number for AO/CO tables */ + if (isAo) + BrinLogicalPageNum(page) = targetLogicalPageNum; + MarkBufferDirty(buf); - metadata->lastRevmapPage = mapBlk; + if (!isAo) + metadata->lastRevmapPage = mapBlk; + else + { + /* GPDB: Revmap chain bookkeeping for AO/CO tables */ + if (ao_chain_exists) + { + /* Extend the chain */ + BrinNextRevmapPage(currLastRevmapPage) = mapBlk; + MarkBufferDirty(currLastRevmapBuf); + } + else + { + /* Begin a new chain */ + metadata->aoChainInfo[currSeq].firstPage = mapBlk; + } + + metadata->aoChainInfo[currSeq].lastPage = mapBlk; + metadata->aoChainInfo[currSeq].lastLogicalPageNum = targetLogicalPageNum; + + /* And refresh the revmap's cached state as well. */ + set_ao_revmap_chain(revmap, metadata, currSeq); + } /* * Set pd_lower just past the end of the metadata. This is essential, @@ -649,6 +929,13 @@ revmap_physical_extend(BrinRevmap *revmap) XLogRecPtr recptr; xlrec.targetBlk = mapBlk; + xlrec.isAo = isAo; + + if (isAo) + { + xlrec.blockSeq = currSeq; + xlrec.targetPageNum = targetLogicalPageNum; + } XLogBeginInsert(); XLogRegisterData((char *) &xlrec, SizeOfBrinRevmapExtend); @@ -656,9 +943,19 @@ revmap_physical_extend(BrinRevmap *revmap) XLogRegisterBuffer(1, buf, REGBUF_WILL_INIT); + /* + * GPDB: Register the last chain member, so that we can link the new + * revmap page to it during replay. Pass empty flags as revmap pages + * don't follow the "standard" layout. + */ + if (ao_chain_exists) + XLogRegisterBuffer(2, currLastRevmapBuf, 0); + recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND); PageSetLSN(metapage, recptr); PageSetLSN(page, recptr); + if (ao_chain_exists) + PageSetLSN(currLastRevmapPage, recptr); } END_CRIT_SECTION(); @@ -666,17 +963,18 @@ revmap_physical_extend(BrinRevmap *revmap) LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK); UnlockReleaseBuffer(buf); + if (ao_chain_exists) + UnlockReleaseBuffer(currLastRevmapBuf); } /* - * Get the start block number of the current aoseg by block number. - * - * append-optimized table logically has 128 segment files. The highest 7 bits - * of the logical Tid represent the segment file number. So, segment file number - * with zero after is the start block number in a segment file. + * Set the cache of chain metadata maintained in the revmap access struct, + * for the chain with the given 'seqnum', using the metapage contents. */ -BlockNumber -heapBlockGetCurrentAosegStart(BlockNumber heapBlk) +static void +set_ao_revmap_chain(BrinRevmap *revmap, BrinMetaPageData *metadata, int seqnum) { - return heapBlk & 0xFE000000; + revmap->rm_aoChainInfo[seqnum].firstPage = metadata->aoChainInfo[seqnum].firstPage; + revmap->rm_aoChainInfo[seqnum].lastPage = metadata->aoChainInfo[seqnum].lastPage; + revmap->rm_aoChainInfo[seqnum].lastLogicalPageNum = metadata->aoChainInfo[seqnum].lastLogicalPageNum; } diff --git a/src/backend/access/brin/brin_xlog.c b/src/backend/access/brin/brin_xlog.c index a4bceda39c9..e120e8dd779 100644 --- a/src/backend/access/brin/brin_xlog.c +++ b/src/backend/access/brin/brin_xlog.c @@ -216,11 +216,40 @@ brin_xlog_revmap_extend(XLogReaderState *record) BlockNumber targetBlk; XLogRedoAction action; + /* GPDB AO/CO specific */ + bool ao_chain_exists = false; + Buffer currLastRevmapBuf = InvalidBuffer; + xlrec = (xl_brin_revmap_extend *) XLogRecGetData(record); XLogRecGetBlockTag(record, 1, NULL, NULL, &targetBlk); Assert(xlrec->targetBlk == targetBlk); + /* + * GPDB: If we have registered backup block id = 2, it means that this index + * is on an AO/CO relation, and we are extending a revmap chain. + */ + ao_chain_exists = XLogRecGetBlockTag(record, 2, NULL, NULL, NULL); + if (ao_chain_exists) + { + XLogRedoAction currLastRevmapBufAction = + XLogReadBufferForRedo(record, 2, &currLastRevmapBuf); + + Assert(xlrec->isAo); + + if (currLastRevmapBufAction == BLK_NEEDS_REDO) + { + /* Extend the chain for the current block sequence. */ + Page currLastRevmapPage = BufferGetPage(currLastRevmapBuf); + + Assert(!PageIsNew(currLastRevmapPage)); + + BrinNextRevmapPage(currLastRevmapPage) = xlrec->targetBlk; + PageSetLSN(currLastRevmapPage, lsn); + MarkBufferDirty(currLastRevmapBuf); + } + } + /* Update the metapage */ action = XLogReadBufferForRedo(record, 0, &metabuf); if (action == BLK_NEEDS_REDO) @@ -231,8 +260,30 @@ brin_xlog_revmap_extend(XLogReaderState *record) metapg = BufferGetPage(metabuf); metadata = (BrinMetaPageData *) PageGetContents(metapg); - Assert(metadata->lastRevmapPage == xlrec->targetBlk - 1); - metadata->lastRevmapPage = xlrec->targetBlk; + AssertImply(xlrec->isAo, metadata->isAo); + + if (!metadata->isAo) + { + Assert(metadata->lastRevmapPage == xlrec->targetBlk - 1); + metadata->lastRevmapPage = xlrec->targetBlk; + Assert(!ao_chain_exists); + } + else + { + /* GPDB AO/CO: Update the metapage's revmap chain info */ + int blockSeq = xlrec->blockSeq; + + if (!ao_chain_exists) + { + /* Begin a new chain */ + metadata->aoChainInfo[blockSeq].firstPage = xlrec->targetBlk; + } + + Assert(xlrec->targetBlk != InvalidBlockNumber); + Assert(xlrec->targetPageNum != InvalidLogicalPageNum); + metadata->aoChainInfo[blockSeq].lastPage = xlrec->targetBlk; + metadata->aoChainInfo[blockSeq].lastLogicalPageNum = xlrec->targetPageNum; + } PageSetLSN(metapg, lsn); @@ -258,12 +309,18 @@ brin_xlog_revmap_extend(XLogReaderState *record) page = (Page) BufferGetPage(buf); brin_page_init(page, BRIN_PAGETYPE_REVMAP); + /* GPDB: Set the logical page number for AO/CO tables */ + if (xlrec->isAo) + BrinLogicalPageNum(page) = xlrec->targetPageNum; + PageSetLSN(page, lsn); MarkBufferDirty(buf); UnlockReleaseBuffer(buf); if (BufferIsValid(metabuf)) UnlockReleaseBuffer(metabuf); + if (BufferIsValid(currLastRevmapBuf)) + UnlockReleaseBuffer(currLastRevmapBuf); } static void diff --git a/src/backend/access/rmgrdesc/brindesc.c b/src/backend/access/rmgrdesc/brindesc.c index b6265a49bc0..e239cdd4f82 100644 --- a/src/backend/access/rmgrdesc/brindesc.c +++ b/src/backend/access/rmgrdesc/brindesc.c @@ -27,8 +27,9 @@ brin_desc(StringInfo buf, XLogReaderState *record) { xl_brin_createidx *xlrec = (xl_brin_createidx *) rec; - appendStringInfo(buf, "v%d pagesPerRange %u", - xlrec->version, xlrec->pagesPerRange); + appendStringInfo(buf, "v%d pagesPerRange %u isAO %s", + xlrec->version, xlrec->pagesPerRange, + xlrec->isAo ? "true" : "false"); } else if (info == XLOG_BRIN_INSERT) { @@ -59,7 +60,8 @@ brin_desc(StringInfo buf, XLogReaderState *record) { xl_brin_revmap_extend *xlrec = (xl_brin_revmap_extend *) rec; - appendStringInfo(buf, "targetBlk %u", xlrec->targetBlk); + appendStringInfo(buf, "targetBlk %u isAO %s", xlrec->targetBlk, + xlrec->isAo ? "true" : "false"); } else if (info == XLOG_BRIN_DESUMMARIZE) { diff --git a/src/include/access/appendonlytid.h b/src/include/access/appendonlytid.h index d4acad89d31..8ad70b6ec95 100755 --- a/src/include/access/appendonlytid.h +++ b/src/include/access/appendonlytid.h @@ -84,6 +84,19 @@ typedef struct AOTupleId */ #define AOSegmentGet_startHeapBlock(segno) ((segno) << 25) +/* + * Get the start block number of the current aoseg/block sequence from a given + * logical heap block number. + * + * The highest 7 bits of the BlockNumber represents the segment file number. So, + * the starting block number in a specific segment (or block sequence) is just + * those bits with the lower order bits masked out. + */ +#define AOHeapBlockGet_startHeapBlock(heapBlk) ((heapBlk) & 0xFE000000) + +#define AOSegmentGet_blockSequenceNum(heapBlk) (AOSegmentGet_segno((heapBlk))) +#define InvalidBlockSequenceNum (-1) + static inline uint64 AOTupleIdGet_rowNum(AOTupleId *h) { diff --git a/src/include/access/brin_page.h b/src/include/access/brin_page.h index 3b76c5ae730..468e81ebc9e 100644 --- a/src/include/access/brin_page.h +++ b/src/include/access/brin_page.h @@ -17,6 +17,7 @@ #ifndef BRIN_PAGE_H #define BRIN_PAGE_H +#include "appendonlywriter.h" #include "storage/block.h" #include "storage/itemptr.h" @@ -28,7 +29,9 @@ */ typedef struct BrinSpecialSpace { - uint16 vector[MAXALIGN(1) / sizeof(uint16)]; + BlockNumber logicalPageNum; /* AO/CO: 1-based logical page number */ + BlockNumber nextRevmapPage; /* AO/CO: Only for revmap pages */ + uint16 vector[MAXALIGN(1) / sizeof(uint16)]; } BrinSpecialSpace; /* @@ -47,6 +50,14 @@ typedef struct BrinSpecialSpace (((BrinSpecialSpace *) \ PageGetSpecialPointer(page))->vector[MAXALIGN(1) / sizeof(uint16) - 2]) +/* GPDB: We maintain a chain of revmap pages for AO/CO tables */ +#define BrinLogicalPageNum(page) \ + (((BrinSpecialSpace *) \ + PageGetSpecialPointer(page))->logicalPageNum) + +#define BrinNextRevmapPage(page) \ + (((BrinSpecialSpace *) \ + PageGetSpecialPointer(page))->nextRevmapPage) /* special space on all BRIN pages stores a "type" identifier */ #define BRIN_PAGETYPE_META 0xF091 #define BRIN_PAGETYPE_REVMAP 0xF092 @@ -59,6 +70,29 @@ typedef struct BrinSpecialSpace /* flags for BrinSpecialSpace */ #define BRIN_EVACUATE_PAGE (1 << 0) +/* + * GPDB: We maintain a 1-based logical page number in revmap pages. This number + * gives us a way to find revmap pages, given a logical block number. This + * number is relative within a block sequence and starts from 1, with 1 + * representing all the heap blocks the 1st revmap page can contain. See + * HEAPBLK_TO_REVMAP_PAGENUM_AO() for more details. It is 1-based for + * convenience in routines such as revmap_extend_and_get_blkno_ao(), where a + * value of 0, can be used to represent the empty case. + */ +typedef BlockNumber LogicalPageNum; +#define InvalidLogicalPageNum (0) + +/* + * GPDB: Bookkeeping for the head and tail of the revmap page chain maintained + * for AO/CO tables. + */ +typedef struct AOChainInfo { + /* the first and last revmap pages of a chain for each block sequence */ + BlockNumber firstPage; + BlockNumber lastPage; + /* last logical revmap page number for each block sequence (1-based) */ + LogicalPageNum lastLogicalPageNum; +} AOChainInfo; /* Metapage definitions */ typedef struct BrinMetaPageData @@ -67,7 +101,10 @@ typedef struct BrinMetaPageData uint32 brinVersion; BlockNumber pagesPerRange; BlockNumber lastRevmapPage; + + /* GPDB section to handle AO/CO tables */ bool isAo; + AOChainInfo aoChainInfo[MAX_AOREL_CONCURRENCY]; } BrinMetaPageData; #define BRIN_CURRENT_VERSION 1 diff --git a/src/include/access/brin_revmap.h b/src/include/access/brin_revmap.h index 7fbeee4185b..d198e347188 100644 --- a/src/include/access/brin_revmap.h +++ b/src/include/access/brin_revmap.h @@ -31,6 +31,14 @@ #define HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk) \ ((heapBlk / pagesPerRange) % REVMAP_PAGE_MAXITEMS) +/* + * GPDB: Similar to the above calculation, except we need to normalize the + * provided heapBlk, with the starting block of the block sequence it belongs + * to. Also, logical page numbers are 1-based. + */ +#define HEAPBLK_TO_REVMAP_PAGENUM_AO(pagesPerRange, heapBlk) \ + (((heapBlk - AOHeapBlockGet_startHeapBlock(heapBlk)) / pagesPerRange) / REVMAP_PAGE_MAXITEMS + 1) + /* struct definition lives in brin_revmap.c */ typedef struct BrinRevmap BrinRevmap; @@ -49,6 +57,21 @@ extern BrinTuple *brinGetTupleForHeapBlock(BrinRevmap *revmap, Size *size, int mode, Snapshot snapshot); extern bool brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk); -extern BlockNumber heapBlockGetCurrentAosegStart(BlockNumber heapBlk); +/* GPDB specific */ +extern void brinRevmapAOPositionAtStart(BrinRevmap *revmap, int seqNum); +extern void brinRevmapAOPositionAtEnd(BrinRevmap *revmap, int seqNum); + +/* + * GPDB: Given a 'heapBlk', return the starting block number of the range in + * which 'heapBlk' lies. + * Note: We have to factor in BlockSequence limits when we do this calculation. + */ +static inline BlockNumber +brin_range_start_blk(BlockNumber heapBlk, bool isAo, BlockNumber pagesPerRange) +{ + BlockNumber seqStartBlk = isAo ? AOHeapBlockGet_startHeapBlock(heapBlk) : 0; + BlockNumber rangeNum = ((heapBlk - seqStartBlk) / pagesPerRange); + return (rangeNum * pagesPerRange) + seqStartBlk; +} #endif /* BRIN_REVMAP_H */ diff --git a/src/include/access/brin_xlog.h b/src/include/access/brin_xlog.h index 070726f427e..2f346237d15 100644 --- a/src/include/access/brin_xlog.h +++ b/src/include/access/brin_xlog.h @@ -15,6 +15,7 @@ #define BRIN_XLOG_H #include "access/xlogreader.h" +#include "access/brin_page.h" #include "lib/stringinfo.h" #include "storage/bufpage.h" #include "storage/itemptr.h" @@ -112,6 +113,7 @@ typedef struct xl_brin_samepage_update * * Backup block 0: metapage * Backup block 1: new revmap page + * Backup block 2: (AO/CO): last revmap page of current chain (if exists) */ typedef struct xl_brin_revmap_extend { @@ -120,9 +122,13 @@ typedef struct xl_brin_revmap_extend * backup block 1. */ BlockNumber targetBlk; + /* GPDB AO/CO state */ + bool isAo; + int blockSeq; /* block sequence */ + LogicalPageNum targetPageNum; /* page number to assign targetBlk */ } xl_brin_revmap_extend; -#define SizeOfBrinRevmapExtend (offsetof(xl_brin_revmap_extend, targetBlk) + \ +#define SizeOfBrinRevmapExtend (offsetof(xl_brin_revmap_extend, targetPageNum) + \ sizeof(BlockNumber)) /* diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 799916398b7..899c268bb06 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -56,6 +56,6 @@ */ /* 3yyymmddN */ -#define CATALOG_VERSION_NO 302501601 +#define CATALOG_VERSION_NO 302501111 #endif diff --git a/src/test/isolation2/expected/setup.out b/src/test/isolation2/expected/setup.out index 7834efcea99..24c3c32f01b 100644 --- a/src/test/isolation2/expected/setup.out +++ b/src/test/isolation2/expected/setup.out @@ -143,3 +143,26 @@ CREATE -- Helper function that ensures mirror of the specified contentid is down. create or replace function wait_for_mirror_down(contentid smallint, timeout_sec integer) returns bool as $$ declare i int; /* in func */ begin /* in func */ i := 0; /* in func */ loop /* in func */ perform gp_request_fts_probe_scan(); /* in func */ if (select count(1) from gp_segment_configuration where role='m' and content=$1 and status='d') = 1 then /* in func */ return true; /* in func */ end if; /* in func */ if i >= 2 * $2 then /* in func */ return false; /* in func */ end if; /* in func */ perform pg_sleep(0.5); /* in func */ i = i + 1; /* in func */ end loop; /* in func */ end; /* in func */ $$ language plpgsql; CREATE + +-- Helper function that ensures stats collector receives stat from the latest operation. +create or replace function wait_until_vacuum_count_change_to(relid oid, stat_val_expected bigint) returns text as $$ declare stat_val int; /* in func */ i int; /* in func */ begin i := 0; /* in func */ while i < 1200 loop select pg_stat_get_vacuum_count(relid) into stat_val; /* in func */ if stat_val = stat_val_expected then /* in func */ return 'OK'; /* in func */ end if; /* in func */ perform pg_sleep(0.1); /* in func */ perform pg_stat_clear_snapshot(); /* in func */ i := i + 1; /* in func */ end loop; /* in func */ return 'Fail'; /* in func */ end; /* in func */ $$ language plpgsql; +CREATE + +-- Helper function to get the number of blocks in a relation. +CREATE OR REPLACE FUNCTION blocks(rel regclass) RETURNS int AS $$ /* in func */ BEGIN /* in func */ RETURN pg_relation_size(rel) / current_setting('block_size')::int; /* in func */ END; $$ /* in func */ LANGUAGE PLPGSQL; +CREATE + +-- Helper function to populate logical heap pages in a certain block sequence. +-- Can be used for both heap and AO/CO tables. The target block sequence into +-- which we insert the pages depends on the session which is inserting the data. +-- This is currently meant to be used with a single column integer table. +-- +-- Sample usage: SELECT populate_pages('foo', 1, tid '(33554435,0)') +-- This will insert tuples with value=1 into a single QE such that logical +-- heap blocks [33554432, 33554434] will be full and 33554435 will have only +-- 1 tuple. +-- +-- Note: while using this with AO/CO tables, please account for how the block +-- sequences start/end based on the concurrency level (see AOSegmentGet_startHeapBlock()) +CREATE OR REPLACE FUNCTION populate_pages(relname text, value int, upto tid) RETURNS VOID AS $$ /* in func */ DECLARE curtid tid; /* in func */ BEGIN /* in func */ LOOP /* in func */ EXECUTE format('INSERT INTO %I VALUES($1) RETURNING ctid', relname) INTO curtid USING value; /* in func */ EXIT WHEN curtid > upto; /* in func */ END LOOP; /* in func */ END; $$ /* in func */ LANGUAGE PLPGSQL; +CREATE diff --git a/src/test/isolation2/input/uao/brin.source b/src/test/isolation2/input/uao/brin.source index 4e51ff1bb04..53e17e7d40b 100644 --- a/src/test/isolation2/input/uao/brin.source +++ b/src/test/isolation2/input/uao/brin.source @@ -1,23 +1,186 @@ --- Test cases with concurrency for BRIN indexes on AO/CO tables. +-- We rely on pageinspect to perform white-box testing for summarization. +-- White-box tests are necessary to ensure that summarization is done +-- successfully (to avoid cases where ranges have brin data tuples without +-- values or where the range is not covered by the revmap etc) +CREATE EXTENSION pageinspect; + +-------------------------------------------------------------------------------- +-- Test BRIN summarization with INSERT, brin_summarize_new_values() and VACUUM +-------------------------------------------------------------------------------- + +-- Create an index on an empty table +CREATE TABLE brin_ao_summarize_@amname@(i int) USING @amname@; +CREATE INDEX ON brin_ao_summarize_@amname@ USING brin(i) WITH (pages_per_range=1); + +-- Sanity: There are no revmap/data pages as there is no data +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + +-- There is no data, so nothing to summarize. +SELECT brin_summarize_new_values('brin_ao_summarize_@amname@_i_idx'); + +-- Sanity: Index contents should not have changed due to the no-op summarize. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + +-- Drop the index +DROP INDEX brin_ao_summarize_@amname@_i_idx; + +-- Create 3 blocks all on 1 QE, in 1 aoseg: 2 blocks full, 1 block with 1 tuple. +SELECT populate_pages('brin_ao_summarize_@amname@', 1, tid '(33554434, 0)'); + +-- Now re-create the index on the data inserted above. +CREATE INDEX ON brin_ao_summarize_@amname@ USING brin(i) WITH (pages_per_range=1); + +-- Sanity: there should be 1 revmap page and 1 data page covering the 3 blocks. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), + 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + +-- There is nothing new to summarize - it was all done during the index build. +SELECT brin_summarize_new_values('brin_ao_summarize_@amname@_i_idx'); + +-- Sanity: Index contents should not have changed due to the no-op summarize. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), + 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + +-- Insert more so we have 5 blocks on 1 QE, in 1 aoseg: 4 blocks full, 1 block +-- with 1 tuple. +SELECT populate_pages('brin_ao_summarize_@amname@', 20, tid '(33554436, 0)'); + +-- Sanity: The 3rd block should have its summary updated and the last 2 blocks +-- will be left unsummarized. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), + 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + +-- Summarize the last 2 blocks. +SELECT brin_summarize_new_values('brin_ao_summarize_@amname@_i_idx'); + +-- Sanity: All blocks should now have summary info. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), + 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + +-- Insert more so we have 7 blocks on 1 QE, in 1 aoseg: 6 blocks full, 1 page +-- with 1 tuple. +SELECT populate_pages('brin_ao_summarize_@amname@', 30, tid '(33554438, 0)'); + +-- Sanity: The 5th block should have its summary updated and the last 2 blocks +-- will be left unsummarized. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), + 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + +DELETE FROM brin_ao_summarize_@amname@ WHERE i = 1; --- Note: We use loops to populate logical heap pages in one aoseg. These logical --- heap blocks can start at a large number. See AOSegmentGet_startHeapBlock(segno). +VACUUM brin_ao_summarize_@amname@; + +-- A new INSERT would always map to the last range on the old segment and that +-- range will be updated to hold the new value, as part of INSERT. +INSERT INTO brin_ao_summarize_@amname@ VALUES(40); + +-- All the live tuples will have been moved to a single new logical heap block +-- in seg2 (67108864). The 1 tuple INSERTed after the VACUUM should have gone to +-- the last block in seg1 (33554438). +SELECT distinct(right(split_part(ctid::text, ',', 1), -1)) AS blknum + FROM brin_ao_summarize_@amname@; + +-- Sanity: There should now be 2 revmap pages (1 new one for the new seg). Also, +-- there will be a new index tuple mapping to that new seg and block number. +-- Note: Since VACUUM summarizes all logical heap blocks (invokes summarization +-- with BRIN_ALL_BLOCKRANGES), and doesn't clean up existing summary info, we +-- can expect entries from the 1st seg to be still there (including blank entries +-- added for the 6th and 7th blocks) +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 3)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), + 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + +-- VACUUM should have already summarized this one logical heap block, so +-- invoking summarization again will be a no-op. +SELECT brin_summarize_new_values('brin_ao_summarize_@amname@_i_idx'); + +-- Sanity: Index contents should not have changed due to the no-op summarize. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 3)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), + 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + +-------------------------------------------------------------------------------- +-- Specific range summarization +-------------------------------------------------------------------------------- + +-- We don't allow specific range summarization for AO tables at the moment. +SELECT brin_summarize_range('brin_ao_summarize_@amname@_i_idx', 1); + +-------------------------------------------------------------------------------- +-- Test summarization of last partial range. +-------------------------------------------------------------------------------- + +CREATE TABLE brin_ao_summarize_partial_@amname@(i int) USING @amname@; +CREATE INDEX ON brin_ao_summarize_partial_@amname@ USING brin(i) WITH (pages_per_range=3); + +-- Insert 4 blocks of data on 1 QE, in 1 aoseg; 3 blocks full, 1 block with 1 tuple. +-- The 1st range [33554432, 33554434] is full and the last range [33554435, 33554437] +-- is partially full with just 1 block: 33554435. +SELECT populate_pages('brin_ao_summarize_partial_@amname@', 1, tid '(33554435, 0)'); + +-- Sanity: We expect no summary information to be present. +-- Reason: For an empty AO table, when INSERTing into the 1st range, we don't +-- summarize. brininsert() -> brinGetTupleForHeapBlock() actually returns NULL +-- in this case as revmap_get_blkno_ao() returns InvalidBlockNumber. +-- This is contrary to heap behavior (where we return 1). +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_partial_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_partial_@amname@_i_idx') - 1) blkno; + +-- This will summarize both the first range and the last partial range. +SELECT brin_summarize_new_values('brin_ao_summarize_partial_@amname@_i_idx'); + +-- Sanity: Both ranges have been summarized. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_partial_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_partial_@amname@_i_idx') - 1) blkno; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_partial_@amname@_i_idx', 1)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_partial_@amname@_i_idx', 2), + 'brin_ao_summarize_partial_@amname@_i_idx') ORDER BY blknum, attnum; + +-------------------------------------------------------------------------------- +-- Test cases with concurrency for BRIN indexes on AO/CO tables. +-------------------------------------------------------------------------------- -- Ensure that we don't summarize the last partial range in case it was extended -- by another transaction, while summarization was in flight. CREATE TABLE brin_range_extended_@amname@(i int) USING @amname@; CREATE INDEX ON brin_range_extended_@amname@ USING brin(i) WITH (pages_per_range=5); + -- Insert 4 blocks of data on 1 QE, in 1 aoseg; 3 blocks full, 1 block with 1 tuple. -DO $$ /* in func */ -DECLARE curtid tid; /* in func */ -BEGIN /* in func */ - LOOP /* in func */ - INSERT INTO brin_range_extended_@amname@ VALUES (1) RETURNING ctid INTO curtid; /* in func */ - EXIT WHEN curtid > tid '(33554435, 0)'; /* in func */ - END LOOP; /* in func */ -END; /* in func */ -$$; /* in func */ +SELECT populate_pages('brin_range_extended_@amname@', 1, tid '(33554435, 0)'); -- Set up to suspend execution when will attempt to summarize the final partial -- range below: [33554432, 33554435]. @@ -30,18 +193,19 @@ SELECT gp_wait_until_triggered_fault('summarize_last_partial_range', 1, dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; -- Extend the last partial range by 1 block. -DO $$ /* in func */ -DECLARE curtid tid; /* in func */ -BEGIN /* in func */ - LOOP /* in func */ - INSERT INTO brin_range_extended_@amname@ VALUES (1) RETURNING ctid INTO curtid; /* in func */ - EXIT WHEN curtid > tid '(33554436, 0)'; /* in func */ - END LOOP; /* in func */ -END; /* in func */ -$$; /* in func */ +SELECT populate_pages('brin_range_extended_@amname@', 1, tid '(33554436, 0)'); SELECT gp_inject_fault('summarize_last_partial_range', 'reset', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; --- Summarize should only have summarized the first full range: [33554432, 33554436] 1<: + +-- Sanity: Summarize should only have summarized the first full range: [33554432, 33554436] +1U: SELECT blkno, brin_page_type(get_raw_page('brin_range_extended_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_range_extended_@amname@_i_idx') - 1) blkno; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_range_extended_@amname@_i_idx', 1)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_page_items(get_raw_page('brin_range_extended_@amname@_i_idx', 2), + 'brin_range_extended_@amname@_i_idx') ORDER BY blknum, attnum; + +DROP EXTENSION pageinspect; diff --git a/src/test/isolation2/input/uao/brin_chain.source b/src/test/isolation2/input/uao/brin_chain.source new file mode 100644 index 00000000000..ab5c8f16ff3 --- /dev/null +++ b/src/test/isolation2/input/uao/brin_chain.source @@ -0,0 +1,64 @@ +-- Tests for BRIN chaining for AO/CO tables +-- These are in a separate file as they take longer and deal with more data. + +CREATE EXTENSION pageinspect; + +-- All tests insert rows into content=1. + +-- We create an append-optimized table with the following characteristics: +-- * seg0: 1000 committed rows -> 1 revmap page with pagenum=1. (filled by ALTER TABLE) +-- * seg1: 180000000 committed rows -> 2 revmap pages with pagenums=1,2. +-- REVMAP_PAGE_MAXITEMS = 5454. About 32768 chars fit in one logical heap +-- block. So we need at least 32768 * 5454 + 1 = 178716673 rows to have 2 +-- revmap pages. +-- * seg2: 2000 aborted rows -> No revmap pages. +-- * seg3: 32768 aborted rows (1 logical heap block), 3000 committed rows -> 1 revmap page. + +CREATE TABLE brin_chain_@amname@(i character(1)) USING heap; +INSERT INTO brin_chain_@amname@ SELECT '2' FROM generate_series(1, 1000); +ALTER TABLE brin_chain_@amname@ SET ACCESS METHOD @amname@; + +1: BEGIN; +2: BEGIN; +3: BEGIN; + +-- Insert 180000000 rows into seg1. Use COPY for speed. +!\retcode yes 2 | head -n 180000000 > /tmp/brin_chain_@amname@_seg1.csv; +1: COPY brin_chain_@amname@ FROM '/tmp/brin_chain_@amname@_seg1.csv'; +!\retcode rm /tmp/brin_chain_@amname@_seg1.csv; +2: INSERT INTO brin_chain_@amname@ SELECT '2' FROM generate_series(1, 2000); +3: INSERT INTO brin_chain_@amname@ SELECT '2' FROM generate_series(1, 32768); +3: ABORT; +3: BEGIN; +3: INSERT INTO brin_chain_@amname@ SELECT '2' FROM generate_series(1, 3000); + +1: COMMIT; +2: ABORT; +3: COMMIT; + +-- Create the index. +CREATE INDEX ON brin_chain_@amname@ USING brin(i) WITH (pages_per_range=1); + +-- Sanity: Inspect the revmap chain information (limit to first 5 segments) +1U: SELECT blkno, brin_page_type(get_raw_page('brin_chain_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_chain_@amname@_i_idx') - 1) blkno; +1U: SELECT firstrevmappages[:5] FROM brin_metapage_info(get_raw_page('brin_chain_@amname@_i_idx', 0)); +1U: SELECT lastrevmappages[:5] FROM brin_metapage_info(get_raw_page('brin_chain_@amname@_i_idx', 0)); +1U: SELECT lastrevmappagenums[:5] FROM brin_metapage_info(get_raw_page('brin_chain_@amname@_i_idx', 0)); +1U: SELECT segnum, brin_revmap_chain(get_raw_page('brin_chain_@amname@_i_idx', 0), + 'brin_chain_@amname@_i_idx', segnum) AS chain FROM generate_series(0, 3) segnum; + +-- Now test index retrieval. We should be able to: +-- * Iterate through all segfiles. +-- * Iterate through the revmap chain for segfile 1, containing multiple revmap pages. +-- * Handle cases where there is no revmap chain for a block sequence, like when +-- all tuples are deleted from a segment file (segfile 2). +-- * Handle missing logical heap blocks inside a block sequence due to aborted +-- inserts, such as for segfile 3. + +SET enable_seqscan TO off; +SET optimizer TO off; +EXPLAIN SELECT count(*) FROM brin_chain_@amname@ WHERE i > '1' and i < '3'; +SELECT count(*) FROM brin_chain_@amname@ WHERE i > '1' and i < '3'; + +DROP EXTENSION pageinspect; diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule index ff9f861ff01..941c8ad06d3 100644 --- a/src/test/isolation2/isolation2_schedule +++ b/src/test/isolation2/isolation2_schedule @@ -315,3 +315,7 @@ test: export_distributed_snapshot # test TCP interconnect teardown bounded wait test: tcp_ic_teardown + +# Intensive tests for BRIN +test: uao/brin_chain_row +test: uao/brin_chain_column diff --git a/src/test/isolation2/output/uao/brin.source b/src/test/isolation2/output/uao/brin.source index 2307c67de54..0fefd4146a5 100644 --- a/src/test/isolation2/output/uao/brin.source +++ b/src/test/isolation2/output/uao/brin.source @@ -1,7 +1,390 @@ --- Test cases with concurrency for BRIN indexes on AO/CO tables. +-- We rely on pageinspect to perform white-box testing for summarization. +-- White-box tests are necessary to ensure that summarization is done +-- successfully (to avoid cases where ranges have brin data tuples without +-- values or where the range is not covered by the revmap etc) +CREATE EXTENSION pageinspect; +CREATE + +-------------------------------------------------------------------------------- +-- Test BRIN summarization with INSERT, brin_summarize_new_values() and VACUUM +-------------------------------------------------------------------------------- + +-- Create an index on an empty table +CREATE TABLE brin_ao_summarize_@amname@(i int) USING @amname@; +CREATE +CREATE INDEX ON brin_ao_summarize_@amname@ USING brin(i) WITH (pages_per_range=1); +CREATE + +-- Sanity: There are no revmap/data pages as there is no data +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta +(1 row) + +-- There is no data, so nothing to summarize. +SELECT brin_summarize_new_values('brin_ao_summarize_@amname@_i_idx'); + brin_summarize_new_values +--------------------------- + 0 +(1 row) + +-- Sanity: Index contents should not have changed due to the no-op summarize. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta +(1 row) + +-- Drop the index +DROP INDEX brin_ao_summarize_@amname@_i_idx; +DROP + +-- Create 3 blocks all on 1 QE, in 1 aoseg: 2 blocks full, 1 block with 1 tuple. +SELECT populate_pages('brin_ao_summarize_@amname@', 1, tid '(33554434, 0)'); + populate_pages +---------------- + +(1 row) + +-- Now re-create the index on the data inserted above. +CREATE INDEX ON brin_ao_summarize_@amname@ USING brin(i) WITH (pages_per_range=1); +CREATE + +-- Sanity: there should be 1 revmap page and 1 data page covering the 3 blocks. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta + 1 | revmap + 2 | regular +(3 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,1) + (2,2) + (2,3) +(3 rows) +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | value +------------+----------+--------+----------+----------+-------------+---------- + 1 | 33554432 | 1 | f | f | f | {1 .. 1} + 2 | 33554433 | 1 | f | f | f | {1 .. 1} + 3 | 33554434 | 1 | f | f | f | {1 .. 1} +(3 rows) + +-- There is nothing new to summarize - it was all done during the index build. +SELECT brin_summarize_new_values('brin_ao_summarize_@amname@_i_idx'); + brin_summarize_new_values +--------------------------- + 0 +(1 row) + +-- Sanity: Index contents should not have changed due to the no-op summarize. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta + 1 | revmap + 2 | regular +(3 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,1) + (2,2) + (2,3) +(3 rows) +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | value +------------+----------+--------+----------+----------+-------------+---------- + 1 | 33554432 | 1 | f | f | f | {1 .. 1} + 2 | 33554433 | 1 | f | f | f | {1 .. 1} + 3 | 33554434 | 1 | f | f | f | {1 .. 1} +(3 rows) + +-- Insert more so we have 5 blocks on 1 QE, in 1 aoseg: 4 blocks full, 1 block +-- with 1 tuple. +SELECT populate_pages('brin_ao_summarize_@amname@', 20, tid '(33554436, 0)'); + populate_pages +---------------- + +(1 row) + +-- Sanity: The 3rd block should have its summary updated and the last 2 blocks +-- will be left unsummarized. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta + 1 | revmap + 2 | regular +(3 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,1) + (2,2) + (2,3) +(3 rows) +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | value +------------+----------+--------+----------+----------+-------------+----------- + 1 | 33554432 | 1 | f | f | f | {1 .. 1} + 2 | 33554433 | 1 | f | f | f | {1 .. 1} + 3 | 33554434 | 1 | f | f | f | {1 .. 20} +(3 rows) + +-- Summarize the last 2 blocks. +SELECT brin_summarize_new_values('brin_ao_summarize_@amname@_i_idx'); + brin_summarize_new_values +--------------------------- + 2 +(1 row) + +-- Sanity: All blocks should now have summary info. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta + 1 | revmap + 2 | regular +(3 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,1) + (2,2) + (2,3) + (2,4) + (2,5) +(5 rows) +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | value +------------+----------+--------+----------+----------+-------------+------------ + 1 | 33554432 | 1 | f | f | f | {1 .. 1} + 2 | 33554433 | 1 | f | f | f | {1 .. 1} + 3 | 33554434 | 1 | f | f | f | {1 .. 20} + 4 | 33554435 | 1 | f | f | f | {20 .. 20} + 5 | 33554436 | 1 | f | f | f | {20 .. 20} +(5 rows) + +-- Insert more so we have 7 blocks on 1 QE, in 1 aoseg: 6 blocks full, 1 page +-- with 1 tuple. +SELECT populate_pages('brin_ao_summarize_@amname@', 30, tid '(33554438, 0)'); + populate_pages +---------------- + +(1 row) + +-- Sanity: The 5th block should have its summary updated and the last 2 blocks +-- will be left unsummarized. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta + 1 | revmap + 2 | regular +(3 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,1) + (2,2) + (2,3) + (2,4) + (2,5) +(5 rows) +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | value +------------+----------+--------+----------+----------+-------------+------------ + 1 | 33554432 | 1 | f | f | f | {1 .. 1} + 2 | 33554433 | 1 | f | f | f | {1 .. 1} + 3 | 33554434 | 1 | f | f | f | {1 .. 20} + 4 | 33554435 | 1 | f | f | f | {20 .. 20} + 5 | 33554436 | 1 | f | f | f | {20 .. 30} +(5 rows) + +DELETE FROM brin_ao_summarize_@amname@ WHERE i = 1; +DELETE 657 --- Note: We use loops to populate logical heap pages in one aoseg. These logical --- heap blocks can start at a large number. See AOSegmentGet_startHeapBlock(segno). +VACUUM brin_ao_summarize_@amname@; +VACUUM + +-- A new INSERT would always map to the last range on the old segment and that +-- range will be updated to hold the new value, as part of INSERT. +INSERT INTO brin_ao_summarize_@amname@ VALUES(40); +INSERT 1 + +-- All the live tuples will have been moved to a single new logical heap block +-- in seg2 (67108864). The 1 tuple INSERTed after the VACUUM should have gone to +-- the last block in seg1 (33554438). +SELECT distinct(right(split_part(ctid::text, ',', 1), -1)) AS blknum FROM brin_ao_summarize_@amname@; + blknum +---------- + 33554438 + 67108864 +(2 rows) + +-- Sanity: There should now be 2 revmap pages (1 new one for the new seg). Also, +-- there will be a new index tuple mapping to that new seg and block number. +-- Note: Since VACUUM summarizes all logical heap blocks (invokes summarization +-- with BRIN_ALL_BLOCKRANGES), and doesn't clean up existing summary info, we +-- can expect entries from the 1st seg to be still there (including blank entries +-- added for the 6th and 7th blocks) +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta + 1 | revmap + 2 | regular + 3 | revmap +(4 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,1) + (2,2) + (2,3) + (2,4) + (2,5) + (2,6) + (2,7) +(7 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 3)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,8) +(1 row) +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | value +------------+----------+--------+----------+----------+-------------+------------ + 1 | 33554432 | 1 | f | f | f | {1 .. 1} + 2 | 33554433 | 1 | f | f | f | {1 .. 1} + 3 | 33554434 | 1 | f | f | f | {1 .. 20} + 4 | 33554435 | 1 | f | f | f | {20 .. 20} + 5 | 33554436 | 1 | f | f | f | {20 .. 30} + 6 | 33554437 | 1 | t | f | f | + 7 | 33554438 | 1 | f | f | f | {40 .. 40} + 8 | 67108864 | 1 | f | f | f | {20 .. 30} +(8 rows) + +-- VACUUM should have already summarized this one logical heap block, so +-- invoking summarization again will be a no-op. +SELECT brin_summarize_new_values('brin_ao_summarize_@amname@_i_idx'); + brin_summarize_new_values +--------------------------- + 0 +(1 row) + +-- Sanity: Index contents should not have changed due to the no-op summarize. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta + 1 | revmap + 2 | regular + 3 | revmap +(4 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,1) + (2,2) + (2,3) + (2,4) + (2,5) + (2,6) + (2,7) +(7 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 3)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,8) +(1 row) +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | value +------------+----------+--------+----------+----------+-------------+------------ + 1 | 33554432 | 1 | f | f | f | {1 .. 1} + 2 | 33554433 | 1 | f | f | f | {1 .. 1} + 3 | 33554434 | 1 | f | f | f | {1 .. 20} + 4 | 33554435 | 1 | f | f | f | {20 .. 20} + 5 | 33554436 | 1 | f | f | f | {20 .. 30} + 6 | 33554437 | 1 | t | f | f | + 7 | 33554438 | 1 | f | f | f | {40 .. 40} + 8 | 67108864 | 1 | f | f | f | {20 .. 30} +(8 rows) + +-------------------------------------------------------------------------------- +-- Specific range summarization +-------------------------------------------------------------------------------- + +-- We don't allow specific range summarization for AO tables at the moment. +SELECT brin_summarize_range('brin_ao_summarize_@amname@_i_idx', 1); +ERROR: cannot summarize specific page range for append-optimized tables (seg1 slice1 10.0.0.202:7003 pid=886868) +CONTEXT: SQL function "brin_summarize_range" statement 1 + +-------------------------------------------------------------------------------- +-- Test summarization of last partial range. +-------------------------------------------------------------------------------- + +CREATE TABLE brin_ao_summarize_partial_@amname@(i int) USING @amname@; +CREATE +CREATE INDEX ON brin_ao_summarize_partial_@amname@ USING brin(i) WITH (pages_per_range=3); +CREATE + +-- Insert 4 blocks of data on 1 QE, in 1 aoseg; 3 blocks full, 1 block with 1 tuple. +-- The 1st range [33554432, 33554434] is full and the last range [33554435, 33554437] +-- is partially full with just 1 block: 33554435. +SELECT populate_pages('brin_ao_summarize_partial_@amname@', 1, tid '(33554435, 0)'); + populate_pages +---------------- + +(1 row) + +-- Sanity: We expect no summary information to be present. +-- Reason: For an empty AO table, when INSERTing into the 1st range, we don't +-- summarize. brininsert() -> brinGetTupleForHeapBlock() actually returns NULL +-- in this case as revmap_get_blkno_ao() returns InvalidBlockNumber. +-- This is contrary to heap behavior (where we return 1). +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_partial_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_partial_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta +(1 row) + +-- This will summarize both the first range and the last partial range. +SELECT brin_summarize_new_values('brin_ao_summarize_partial_@amname@_i_idx'); + brin_summarize_new_values +--------------------------- + 2 +(1 row) + +-- Sanity: Both ranges have been summarized. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_partial_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_partial_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta + 1 | revmap + 2 | regular +(3 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_partial_@amname@_i_idx', 1)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,1) + (2,2) +(2 rows) +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_partial_@amname@_i_idx', 2), 'brin_ao_summarize_partial_@amname@_i_idx') ORDER BY blknum, attnum; + itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | value +------------+----------+--------+----------+----------+-------------+---------- + 1 | 33554432 | 1 | f | f | f | {1 .. 1} + 2 | 33554435 | 1 | f | f | f | {1 .. 1} +(2 rows) + +-------------------------------------------------------------------------------- +-- Test cases with concurrency for BRIN indexes on AO/CO tables. +-------------------------------------------------------------------------------- -- Ensure that we don't summarize the last partial range in case it was extended -- by another transaction, while summarization was in flight. @@ -10,8 +393,14 @@ CREATE TABLE brin_range_extended_@amname@(i int) USING @amname@; CREATE CREATE INDEX ON brin_range_extended_@amname@ USING brin(i) WITH (pages_per_range=5); CREATE + -- Insert 4 blocks of data on 1 QE, in 1 aoseg; 3 blocks full, 1 block with 1 tuple. -DO $$ /* in func */ DECLARE curtid tid; /* in func */ BEGIN /* in func */ LOOP /* in func */ INSERT INTO brin_range_extended_@amname@ VALUES (1) RETURNING ctid INTO curtid; /* in func */ EXIT WHEN curtid > tid '(33554435, 0)'; /* in func */ END LOOP; /* in func */ END; /* in func */ $$; /* in func */ +SELECT populate_pages('brin_range_extended_@amname@', 1, tid '(33554435, 0)'); + populate_pages +---------------- + +(1 row) + -- Set up to suspend execution when will attempt to summarize the final partial -- range below: [33554432, 33554435]. SELECT gp_inject_fault('summarize_last_partial_range', 'suspend', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; @@ -29,16 +418,42 @@ SELECT gp_wait_until_triggered_fault('summarize_last_partial_range', 1, dbid) FR (1 row) -- Extend the last partial range by 1 block. -DO $$ /* in func */ DECLARE curtid tid; /* in func */ BEGIN /* in func */ LOOP /* in func */ INSERT INTO brin_range_extended_@amname@ VALUES (1) RETURNING ctid INTO curtid; /* in func */ EXIT WHEN curtid > tid '(33554436, 0)'; /* in func */ END LOOP; /* in func */ END; /* in func */ $$; /* in func */ +SELECT populate_pages('brin_range_extended_@amname@', 1, tid '(33554436, 0)'); + populate_pages +---------------- + +(1 row) + SELECT gp_inject_fault('summarize_last_partial_range', 'reset', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; gp_inject_fault ----------------- Success: (1 row) --- Summarize should only have summarized the first full range: [33554432, 33554436] 1<: <... completed> brin_summarize_new_values --------------------------- 1 (1 row) + +-- Sanity: Summarize should only have summarized the first full range: [33554432, 33554436] +1U: SELECT blkno, brin_page_type(get_raw_page('brin_range_extended_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_range_extended_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta + 1 | revmap + 2 | regular +(3 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_range_extended_@amname@_i_idx', 1)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,1) +(1 row) +1U: SELECT * FROM brin_page_items(get_raw_page('brin_range_extended_@amname@_i_idx', 2), 'brin_range_extended_@amname@_i_idx') ORDER BY blknum, attnum; + itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | value +------------+----------+--------+----------+----------+-------------+---------- + 1 | 33554432 | 1 | f | f | f | {1 .. 1} +(1 row) + +DROP EXTENSION pageinspect; +DROP diff --git a/src/test/isolation2/output/uao/brin_chain.source b/src/test/isolation2/output/uao/brin_chain.source new file mode 100644 index 00000000000..030abe4cc9a --- /dev/null +++ b/src/test/isolation2/output/uao/brin_chain.source @@ -0,0 +1,136 @@ +-- Tests for BRIN chaining for AO/CO tables +-- These are in a separate file as they take longer and deal with more data. + +CREATE EXTENSION pageinspect; +CREATE + +-- All tests insert rows into content=1. + +-- We create an append-optimized table with the following characteristics: +-- * seg0: 1000 committed rows -> 1 revmap page with pagenum=1. (filled by ALTER TABLE) +-- * seg1: 180000000 committed rows -> 2 revmap pages with pagenums=1,2. +-- REVMAP_PAGE_MAXITEMS = 5454. About 32768 chars fit in one logical heap +-- block. So we need at least 32768 * 5454 + 1 = 178716673 rows to have 2 +-- revmap pages. +-- * seg2: 2000 aborted rows -> No revmap pages. +-- * seg3: 32768 aborted rows (1 logical heap block), 3000 committed rows -> 1 revmap page. + +CREATE TABLE brin_chain_@amname@(i character(1)) USING heap; +CREATE +INSERT INTO brin_chain_@amname@ SELECT '2' FROM generate_series(1, 1000); +INSERT 1000 +ALTER TABLE brin_chain_@amname@ SET ACCESS METHOD @amname@; +ALTER + +1: BEGIN; +BEGIN +2: BEGIN; +BEGIN +3: BEGIN; +BEGIN + +-- Insert 180000000 rows into seg1. Use COPY for speed. +!\retcode yes 2 | head -n 180000000 > /tmp/brin_chain_@amname@_seg1.csv; +-- start_ignore + +-- end_ignore +(exited with code 0) +1: COPY brin_chain_@amname@ FROM '/tmp/brin_chain_@amname@_seg1.csv'; +COPY 180000000 +!\retcode rm /tmp/brin_chain_@amname@_seg1.csv; +-- start_ignore + +-- end_ignore +(exited with code 0) +2: INSERT INTO brin_chain_@amname@ SELECT '2' FROM generate_series(1, 2000); +INSERT 2000 +3: INSERT INTO brin_chain_@amname@ SELECT '2' FROM generate_series(1, 32768); +INSERT 32768 +3: ABORT; +ABORT +3: BEGIN; +BEGIN +3: INSERT INTO brin_chain_@amname@ SELECT '2' FROM generate_series(1, 3000); +INSERT 3000 + +1: COMMIT; +COMMIT +2: ABORT; +ABORT +3: COMMIT; +COMMIT + +-- Create the index. +CREATE INDEX ON brin_chain_@amname@ USING brin(i) WITH (pages_per_range=1); +CREATE + +-- Sanity: Inspect the revmap chain information (limit to first 5 segments) +1U: SELECT blkno, brin_page_type(get_raw_page('brin_chain_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_chain_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta + 1 | revmap + 2 | regular + 3 | revmap + 4 | regular + 5 | regular + 6 | regular + 7 | revmap + 8 | revmap +(9 rows) +1U: SELECT firstrevmappages[:5] FROM brin_metapage_info(get_raw_page('brin_chain_@amname@_i_idx', 0)); + firstrevmappages +----------------------------------- + [1, 3, 4294967295, 8, 4294967295] +(1 row) +1U: SELECT lastrevmappages[:5] FROM brin_metapage_info(get_raw_page('brin_chain_@amname@_i_idx', 0)); + lastrevmappages +----------------------------------- + [1, 7, 4294967295, 8, 4294967295] +(1 row) +1U: SELECT lastrevmappagenums[:5] FROM brin_metapage_info(get_raw_page('brin_chain_@amname@_i_idx', 0)); + lastrevmappagenums +-------------------- + [1, 2, 0, 1, 0] +(1 row) +1U: SELECT segnum, brin_revmap_chain(get_raw_page('brin_chain_@amname@_i_idx', 0), 'brin_chain_@amname@_i_idx', segnum) AS chain FROM generate_series(0, 3) segnum; + segnum | chain +--------+-------- + 0 | [1] + 1 | [3, 7] + 2 | + 3 | [8] +(4 rows) + +-- Now test index retrieval. We should be able to: +-- * Iterate through all segfiles. +-- * Iterate through the revmap chain for segfile 1, containing multiple revmap pages. +-- * Handle cases where there is no revmap chain for a block sequence, like when +-- all tuples are deleted from a segment file (segfile 2). +-- * Handle missing logical heap blocks inside a block sequence due to aborted +-- inserts, such as for segfile 3. + +SET enable_seqscan TO off; +SET +SET optimizer TO off; +SET +EXPLAIN SELECT count(*) FROM brin_chain_@amname@ WHERE i > '1' and i < '3'; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=862.37..862.38 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=862.31..862.36 rows=3 width=8) + -> Partial Aggregate (cost=862.31..862.32 rows=1 width=8) + -> Bitmap Heap Scan on brin_chain_@amname@ (cost=397.10..861.60 rows=287 width=0) + Recheck Cond: ((i > '1'::bpchar) AND (i < '3'::bpchar)) + -> Bitmap Index Scan on brin_chain_@amname@_i_idx (cost=0.00..397.02 rows=28700 width=0) + Index Cond: ((i > '1'::bpchar) AND (i < '3'::bpchar)) + Optimizer: Postgres query optimizer +(8 rows) +SELECT count(*) FROM brin_chain_@amname@ WHERE i > '1' and i < '3'; + count +----------- + 180004000 +(1 row) + +DROP EXTENSION pageinspect; +DROP diff --git a/src/test/isolation2/sql/setup.sql b/src/test/isolation2/sql/setup.sql index 9d6c0cc2456..48f37d4a153 100644 --- a/src/test/isolation2/sql/setup.sql +++ b/src/test/isolation2/sql/setup.sql @@ -439,3 +439,53 @@ begin /* in func */ end loop; /* in func */ end; /* in func */ $$ language plpgsql; + +-- Helper function that ensures stats collector receives stat from the latest operation. +create or replace function wait_until_vacuum_count_change_to(relid oid, stat_val_expected bigint) + returns text as $$ +declare + stat_val int; /* in func */ + i int; /* in func */ +begin + i := 0; /* in func */ + while i < 1200 loop + select pg_stat_get_vacuum_count(relid) into stat_val; /* in func */ + if stat_val = stat_val_expected then /* in func */ + return 'OK'; /* in func */ + end if; /* in func */ + perform pg_sleep(0.1); /* in func */ + perform pg_stat_clear_snapshot(); /* in func */ + i := i + 1; /* in func */ + end loop; /* in func */ + return 'Fail'; /* in func */ +end; /* in func */ +$$ language plpgsql; + +-- Helper function to get the number of blocks in a relation. +CREATE OR REPLACE FUNCTION blocks(rel regclass) RETURNS int AS $$ /* in func */ +BEGIN /* in func */ +RETURN pg_relation_size(rel) / current_setting('block_size')::int; /* in func */ +END; $$ /* in func */ + LANGUAGE PLPGSQL; + +-- Helper function to populate logical heap pages in a certain block sequence. +-- Can be used for both heap and AO/CO tables. The target block sequence into +-- which we insert the pages depends on the session which is inserting the data. +-- This is currently meant to be used with a single column integer table. +-- +-- Sample usage: SELECT populate_pages('foo', 1, tid '(33554435,0)') +-- This will insert tuples with value=1 into a single QE such that logical +-- heap blocks [33554432, 33554434] will be full and 33554435 will have only +-- 1 tuple. +-- +-- Note: while using this with AO/CO tables, please account for how the block +-- sequences start/end based on the concurrency level (see AOSegmentGet_startHeapBlock()) +CREATE OR REPLACE FUNCTION populate_pages(relname text, value int, upto tid) RETURNS VOID AS $$ /* in func */ +DECLARE curtid tid; /* in func */ +BEGIN /* in func */ +LOOP /* in func */ +EXECUTE format('INSERT INTO %I VALUES($1) RETURNING ctid', relname) INTO curtid USING value; /* in func */ +EXIT WHEN curtid > upto; /* in func */ +END LOOP; /* in func */ +END; $$ /* in func */ + LANGUAGE PLPGSQL; diff --git a/src/test/recovery/t/202_wal_consistency_brin.pl b/src/test/recovery/t/202_wal_consistency_brin.pl new file mode 100644 index 00000000000..b60f48b2a2d --- /dev/null +++ b/src/test/recovery/t/202_wal_consistency_brin.pl @@ -0,0 +1,110 @@ +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Verify WAL consistency of BRIN indexes for GPDB. This is a replica of +# src/test/modules/brin/t/02_wal_consistency.pl, with added tests for AO/CO tables. +# It's added here, since we currently don't run src/test/modules in CI. + +use strict; +use warnings; + +use PostgreSQL::Test::Utils; +use Test::More; +use PostgreSQL::Test::Cluster; + +# Set up primary +my $whiskey = PostgreSQL::Test::Cluster->new('whiskey'); +$whiskey->init(allows_streaming => 1); +$whiskey->append_conf('postgresql.conf', 'wal_consistency_checking = brin'); +$whiskey->start; +$whiskey->safe_psql('postgres', 'create extension pageinspect'); +is( $whiskey->psql( + 'postgres', + qq[SELECT pg_create_physical_replication_slot('standby_1');]), + 0, + 'physical slot created on primary'); + +# Take backup +my $backup_name = 'brinbkp'; +$whiskey->backup($backup_name); + +# Create streaming standby linking to primary +my $charlie = PostgreSQL::Test::Cluster->new('charlie'); +$charlie->init_from_backup($whiskey, $backup_name, has_streaming => 1); +$charlie->append_conf('postgresql.conf', 'primary_slot_name = standby_1'); +$charlie->start; + +# Now write some WAL in the primary for a heap table + +$whiskey->safe_psql( + 'postgres', qq{ +create table tbl_timestamp0 (d1 timestamp(0) without time zone) with (fillfactor=10); +create index on tbl_timestamp0 using brin (d1) with (pages_per_range = 1, autosummarize=false); +}); +# Run a loop that will end when the second revmap page is created +$whiskey->safe_psql( + 'postgres', q{ +do +$$ +declare + current timestamp with time zone := '2019-03-27 08:14:01.123456789 UTC'; +begin + loop + insert into tbl_timestamp0 select i from + generate_series(current, current + interval '1 day', '28 seconds') i; + perform brin_summarize_new_values('tbl_timestamp0_d1_idx'); + if (brin_metapage_info(get_raw_page('tbl_timestamp0_d1_idx', 0))).lastrevmappage > 1 then + exit; + end if; + current := current + interval '1 day'; + end loop; +end +$$; +}); + +# Now write some WAL in the primary for an ao_row and an ao_column table. + +# ao_row: +$whiskey->safe_psql( + 'postgres', qq{ +-- Case 1 (Starting a revmap chain .. 1 revmap page) +CREATE TABLE tbl_ao_row1 (i int) USING ao_row; +INSERT INTO tbl_ao_row1 SELECT generate_series(1, 5); +CREATE INDEX ON tbl_ao_row1 using brin (i) with (pages_per_range = 1, autosummarize=false); + +-- Case 2 (Extending a revmap chain .. 2 revmap pages) +CREATE TABLE tbl_ao_row2 (i int) USING ao_row; +insert into tbl_ao_row2 select generate_series(1, 5); +-- Bloat gp_fastsequence so that we will have to create 2 revmap pages. +-- REVMAP_PAGE_MAXITEMS = 5456. About 32768 ints fit in one logical heap block. +-- So we need at least 32768 * 5456 + 1 = 178782209 rows to have 2 revmap pages. +SET allow_system_table_mods TO ON; +UPDATE gp_fastsequence SET last_sequence = 180000000 WHERE + objid = (SELECT segrelid FROM pg_appendonly WHERE relid='tbl_ao_row2'::regclass); +INSERT INTO tbl_ao_row2 SELECT generate_series(6, 10); +CREATE INDEX ON tbl_ao_row2 USING brin (i) WITH (pages_per_range = 1, autosummarize=false); +}); + +# ao_column: +$whiskey->safe_psql( + 'postgres', qq{ +-- Case 1 (Starting a revmap chain .. 1 revmap page) +CREATE TABLE tbl_ao_column1 (i int) USING ao_column; +INSERT INTO tbl_ao_column1 SELECT generate_series(1, 5); +CREATE INDEX ON tbl_ao_column1 using brin (i) with (pages_per_range = 1, autosummarize=false); + +-- Case 2 (Extending a revmap chain .. 2 revmap pages) +CREATE TABLE tbl_ao_column2 (i int) USING ao_column; +insert into tbl_ao_column2 select generate_series(1, 5); +-- Bloat gp_fastsequence so that we will have to create 2 revmap pages. +-- REVMAP_PAGE_MAXITEMS = 5456. About 32768 ints fit in one logical heap block. +-- So we need at least 32768 * 5456 + 1 = 178782209 rows to have 2 revmap pages. +SET allow_system_table_mods TO ON; +UPDATE gp_fastsequence SET last_sequence = 180000000 WHERE + objid = (SELECT segrelid FROM pg_appendonly WHERE relid='tbl_ao_column2'::regclass); +INSERT INTO tbl_ao_column2 SELECT generate_series(6, 10); +CREATE INDEX ON tbl_ao_column2 USING brin (i) WITH (pages_per_range = 1, autosummarize=false); +}); + +$whiskey->wait_for_catchup($charlie, 'replay', $whiskey->lsn('insert')); + +done_testing(); diff --git a/src/test/regress/expected/brin_ao.out b/src/test/regress/expected/brin_ao.out index 2abc7f63d6c..84f1024e7ad 100644 --- a/src/test/regress/expected/brin_ao.out +++ b/src/test/regress/expected/brin_ao.out @@ -446,131 +446,3 @@ INSERT INTO brintest_ao SELECT format('%s/%s%s', odd, even, tenthous)::pg_lsn, box(point(odd, even), point(thousand, twothousand)) FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5; --- Test summarization --- Note: We use loops to populate logical heap pages in one aoseg. These logical --- heap blocks can start at a large number. See AOSegmentGet_startHeapBlock(segno). -CREATE TABLE brin_ao_summarize(i int) USING ao_row; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -CREATE INDEX ON brin_ao_summarize USING brin(i) WITH (pages_per_range=1); --- Tests for brin_summarize_new_values -SELECT brin_summarize_new_values('brin_ao_summarize'); -- error, not an index -ERROR: "brin_ao_summarize" is not an index -CONTEXT: SQL function "brin_summarize_new_values" statement 1 -SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index -ERROR: "tenk1_unique1" is not a BRIN index -CONTEXT: SQL function "brin_summarize_new_values" statement 1 --- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. --- Index dead tuples will not always be cleaned up completely after VACUUM, resulting --- brin_summarize_new_values() will not always be accurate. So ignore the check to --- coordinate with the new behavior. --- There is no data, so nothing to summarize. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - -DROP INDEX brin_ao_summarize_i_idx; --- Create 3 blocks all on 1 QE, in 1 aoseg: 2 blocks full, 1 block with 1 tuple. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554434, 0)'; - END LOOP; -END; -$$; --- Now create the index on the data inserted above. -CREATE INDEX ON brin_ao_summarize USING brin(i) WITH (pages_per_range=1); --- There is nothing new to summarize - it was all done during the index build. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - --- Insert more so we have 5 blocks on 1 QE, in 1 aoseg: 4 blocks full, 1 block --- with 1 tuple. The last and penultimate blocks will be unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize VALUES (20) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554436, 0)'; - END LOOP; -END; -$$; --- The last 2 blocks will be summarized. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 2 -(1 row) - --- Insert more so we have 7 blocks on 1 QE, in 1 aoseg: 6 blocks full, 1 page --- with 1 tuple. The last and penultimate blocks are unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize VALUES (30) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554438, 0)'; - END LOOP; -END; -$$; -DELETE FROM brin_ao_summarize WHERE i = 1; -VACUUM brin_ao_summarize; --- All the tuples will have been moved to one aoseg and all the tuples should --- have fit in one logical heap block. -SELECT distinct(right(split_part(ctid::text, ',', 1), -1)) AS blknum - FROM brin_ao_summarize; - blknum ----------- - 67108864 -(1 row) - --- VACUUM should have already summarized this one logical heap block, so --- invoking summarization again will be a no-op. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - --- We don't allow specific range summarization for AO tables at the moment. -SELECT brin_summarize_range('brin_ao_summarize_i_idx', 1); -ERROR: cannot summarize specific page range for append-optimized tables (seg2 slice1 192.168.0.148:7004 pid=25354) -CONTEXT: SQL function "brin_summarize_range" statement 1 --- Test summarization of last partial range. -CREATE TABLE brin_ao_summarize_partial(i int) USING ao_row; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -CREATE INDEX ON brin_ao_summarize_partial USING brin(i) WITH (pages_per_range=3); --- Insert 4 blocks of data on 1 QE, in 1 aoseg; 3 blocks full, 1 block with 1 tuple. --- The 1st range [33554432, 33554434] is full and the last range [33554435, 33554437] --- is partially full with just 1 block: 33554435. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize_partial VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554435, 0)'; - END LOOP; -END; -$$; --- We should successfully summarize the last partial range. --- --- Note: For an empty AO table, when INSERTing into the 1st range, we don't --- summarize. brininsert() -> brinGetTupleForHeapBlock() actually returns NULL --- in this case as revmap_get_blkno_ao() returns InvalidBlockNumber. --- This is contrary to heap behavior (where we return 1). --- --- Thus, we will have both ranges summarized here. -SELECT brin_summarize_new_values('brin_ao_summarize_partial_i_idx'); - brin_summarize_new_values ---------------------------- - 2 -(1 row) - diff --git a/src/test/regress/expected/brin_ao_optimizer.out b/src/test/regress/expected/brin_ao_optimizer.out index 9ff78f315e3..61d672bedb5 100644 --- a/src/test/regress/expected/brin_ao_optimizer.out +++ b/src/test/regress/expected/brin_ao_optimizer.out @@ -464,133 +464,3 @@ INSERT INTO brintest_ao SELECT format('%s/%s%s', odd, even, tenthous)::pg_lsn, box(point(odd, even), point(thousand, twothousand)) FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5; --- Test summarization --- Note: We use loops to populate logical heap pages in one aoseg. These logical --- heap blocks can start at a large number. See AOSegmentGet_startHeapBlock(segno). -CREATE TABLE brin_ao_summarize(i int) USING ao_row; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -CREATE INDEX ON brin_ao_summarize USING brin(i) WITH (pages_per_range=1); --- Tests for brin_summarize_new_values -SELECT brin_summarize_new_values('brin_ao_summarize'); -- error, not an index -ERROR: "brin_ao_summarize" is not an index -CONTEXT: SQL function "brin_summarize_new_values" statement 1 -SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index -ERROR: "tenk1_unique1" is not a BRIN index -CONTEXT: SQL function "brin_summarize_new_values" statement 1 --- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. --- Index dead tuples will not always be cleaned up completely after VACUUM, resulting --- brin_summarize_new_values() will not always be accurate. So ignore the check to --- coordinate with the new behavior. --- There is no data, so nothing to summarize. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - -DROP INDEX brin_ao_summarize_i_idx; --- Create 3 blocks all on 1 QE, in 1 aoseg: 2 blocks full, 1 block with 1 tuple. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554434, 0)'; - END LOOP; -END; -$$; --- Now create the index on the data inserted above. -CREATE INDEX ON brin_ao_summarize USING brin(i) WITH (pages_per_range=1); --- There is nothing new to summarize - it was all done during the index build. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - --- Insert more so we have 5 blocks on 1 QE, in 1 aoseg: 4 blocks full, 1 block --- with 1 tuple. The last and penultimate blocks will be unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize VALUES (20) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554436, 0)'; - END LOOP; -END; -$$; --- The last 2 blocks will be summarized. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 2 -(1 row) - --- Insert more so we have 7 blocks on 1 QE, in 1 aoseg: 6 blocks full, 1 page --- with 1 tuple. The last and penultimate blocks are unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize VALUES (30) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554438, 0)'; - END LOOP; -END; -$$; -DELETE FROM brin_ao_summarize WHERE i = 1; -VACUUM brin_ao_summarize; --- All the tuples will have been moved to one aoseg and all the tuples should --- have fit in one logical heap block. -SELECT distinct(right(split_part(ctid::text, ',', 1), -1)) AS blknum - FROM brin_ao_summarize; -NOTICE: One or more columns in the following table(s) do not have statistics: brin_ao_summarize -HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. - blknum ----------- - 67108864 -(1 row) - --- VACUUM should have already summarized this one logical heap block, so --- invoking summarization again will be a no-op. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - --- We don't allow specific range summarization for AO tables at the moment. -SELECT brin_summarize_range('brin_ao_summarize_i_idx', 1); -ERROR: cannot summarize specific page range for append-optimized tables (seg0 slice1 192.168.0.148:7002 pid=20357) -CONTEXT: SQL function "brin_summarize_range" statement 1 --- Test summarization of last partial range. -CREATE TABLE brin_ao_summarize_partial(i int) USING ao_row; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -CREATE INDEX ON brin_ao_summarize_partial USING brin(i) WITH (pages_per_range=3); --- Insert 4 blocks of data on 1 QE, in 1 aoseg; 3 blocks full, 1 block with 1 tuple. --- The 1st range [33554432, 33554434] is full and the last range [33554435, 33554437] --- is partially full with just 1 block: 33554435. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize_partial VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554435, 0)'; - END LOOP; -END; -$$; --- We should successfully summarize the last partial range. --- --- Note: For an empty AO table, when INSERTing into the 1st range, we don't --- summarize. brininsert() -> brinGetTupleForHeapBlock() actually returns NULL --- in this case as revmap_get_blkno_ao() returns InvalidBlockNumber. --- This is contrary to heap behavior (where we return 1). --- --- Thus, we will have both ranges summarized here. -SELECT brin_summarize_new_values('brin_ao_summarize_partial_i_idx'); - brin_summarize_new_values ---------------------------- - 2 -(1 row) - diff --git a/src/test/regress/expected/brin_aocs.out b/src/test/regress/expected/brin_aocs.out index 578e3010ff6..664d99b00ff 100644 --- a/src/test/regress/expected/brin_aocs.out +++ b/src/test/regress/expected/brin_aocs.out @@ -446,131 +446,3 @@ INSERT INTO brintest_aocs SELECT format('%s/%s%s', odd, even, tenthous)::pg_lsn, box(point(odd, even), point(thousand, twothousand)) FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5; --- Test summarization --- Note: We use loops to populate logical heap pages in one aoseg. These logical --- heap blocks can start at a large number. See AOSegmentGet_startHeapBlock(segno). -CREATE TABLE brin_aoco_summarize(i int) USING ao_column; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -CREATE INDEX ON brin_aoco_summarize USING brin(i) WITH (pages_per_range=1); --- Tests for brin_summarize_new_values -SELECT brin_summarize_new_values('brin_aoco_summarize'); -- error, not an index -ERROR: "brin_aoco_summarize" is not an index -CONTEXT: SQL function "brin_summarize_new_values" statement 1 -SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index -ERROR: "tenk1_unique1" is not a BRIN index -CONTEXT: SQL function "brin_summarize_new_values" statement 1 --- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. --- Index dead tuples will not always be cleaned up completely after VACUUM, resulting --- brin_summarize_new_values() will not always be accurate. So ignore the check to --- coordinate with the new behavior. --- There is no data, so nothing to summarize. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - -DROP INDEX brin_aoco_summarize_i_idx; --- Create 3 blocks all on 1 QE, in 1 aoseg: 2 blocks full, 1 block with 1 tuple. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554434, 0)'; - END LOOP; -END; -$$; --- Now create the index on the data inserted above. -CREATE INDEX ON brin_aoco_summarize USING brin(i) WITH (pages_per_range=1); --- There is nothing new to summarize - it was all done during the index build. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - --- Insert more so we have 5 blocks on 1 QE, in 1 aoseg: 4 blocks full, 1 block --- with 1 tuple. The last and penultimate blocks will be unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize VALUES (20) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554436, 0)'; - END LOOP; -END; -$$; --- The last 2 blocks will be summarized. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 2 -(1 row) - --- Insert more so we have 7 blocks on 1 QE, in 1 aoseg: 6 blocks full, 1 page --- with 1 tuple. The last and penultimate blocks are unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize VALUES (30) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554438, 0)'; - END LOOP; -END; -$$; -DELETE FROM brin_aoco_summarize WHERE i = 1; -VACUUM brin_aoco_summarize; --- All the tuples will have been moved to one aoseg and all the tuples should --- have fit in one logical heap block. -SELECT distinct(right(split_part(ctid::text, ',', 1), -1)) AS blknum -FROM brin_aoco_summarize; - blknum ----------- - 67108864 -(1 row) - --- VACUUM should have already summarized this one logical heap block, so --- invoking summarization again will be a no-op. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - --- We don't allow specific range summarization for AO tables at the moment. -SELECT brin_summarize_range('brin_aoco_summarize_i_idx', 1); -ERROR: cannot summarize specific page range for append-optimized tables (seg0 slice1 192.168.0.148:7002 pid=67650) -CONTEXT: SQL function "brin_summarize_range" statement 1 --- Test summarization of last partial range. -CREATE TABLE brin_aoco_summarize_partial(i int) USING ao_column; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -CREATE INDEX ON brin_aoco_summarize_partial USING brin(i) WITH (pages_per_range=3); --- Insert 4 blocks of data on 1 QE, in 1 aoseg; 3 blocks full, 1 block with 1 tuple. --- The 1st range [33554432, 33554434] is full and the last range [33554435, 33554437] --- is partially full with just 1 block: 33554435. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize_partial VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554435, 0)'; - END LOOP; -END; -$$; --- We should successfully summarize the last partial range. --- --- Note: For an empty AOCO table, when INSERTing into the 1st range, we don't --- summarize. brininsert() -> brinGetTupleForHeapBlock() actually returns NULL --- in this case as revmap_get_blkno_ao() returns InvalidBlockNumber. --- This is contrary to heap behavior (where we return 1). --- --- Thus, we will have both ranges summarized here. -SELECT brin_summarize_new_values('brin_aoco_summarize_partial_i_idx'); - brin_summarize_new_values ---------------------------- - 2 -(1 row) - diff --git a/src/test/regress/expected/brin_aocs_optimizer.out b/src/test/regress/expected/brin_aocs_optimizer.out index 8d4120b94fa..6964fb2e731 100644 --- a/src/test/regress/expected/brin_aocs_optimizer.out +++ b/src/test/regress/expected/brin_aocs_optimizer.out @@ -464,133 +464,3 @@ INSERT INTO brintest_aocs SELECT format('%s/%s%s', odd, even, tenthous)::pg_lsn, box(point(odd, even), point(thousand, twothousand)) FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5; --- Test summarization --- Note: We use loops to populate logical heap pages in one aoseg. These logical --- heap blocks can start at a large number. See AOSegmentGet_startHeapBlock(segno). -CREATE TABLE brin_aoco_summarize(i int) USING ao_column; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -CREATE INDEX ON brin_aoco_summarize USING brin(i) WITH (pages_per_range=1); --- Tests for brin_summarize_new_values -SELECT brin_summarize_new_values('brin_aoco_summarize'); -- error, not an index -ERROR: "brin_aoco_summarize" is not an index -CONTEXT: SQL function "brin_summarize_new_values" statement 1 -SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index -ERROR: "tenk1_unique1" is not a BRIN index -CONTEXT: SQL function "brin_summarize_new_values" statement 1 --- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. --- Index dead tuples will not always be cleaned up completely after VACUUM, resulting --- brin_summarize_new_values() will not always be accurate. So ignore the check to --- coordinate with the new behavior. --- There is no data, so nothing to summarize. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - -DROP INDEX brin_aoco_summarize_i_idx; --- Create 3 blocks all on 1 QE, in 1 aoseg: 2 blocks full, 1 block with 1 tuple. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554434, 0)'; - END LOOP; -END; -$$; --- Now create the index on the data inserted above. -CREATE INDEX ON brin_aoco_summarize USING brin(i) WITH (pages_per_range=1); --- There is nothing new to summarize - it was all done during the index build. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - --- Insert more so we have 5 blocks on 1 QE, in 1 aoseg: 4 blocks full, 1 block --- with 1 tuple. The last and penultimate blocks will be unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize VALUES (20) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554436, 0)'; - END LOOP; -END; -$$; --- The last 2 blocks will be summarized. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 2 -(1 row) - --- Insert more so we have 7 blocks on 1 QE, in 1 aoseg: 6 blocks full, 1 page --- with 1 tuple. The last and penultimate blocks are unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize VALUES (30) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554438, 0)'; - END LOOP; -END; -$$; -DELETE FROM brin_aoco_summarize WHERE i = 1; -VACUUM brin_aoco_summarize; --- All the tuples will have been moved to one aoseg and all the tuples should --- have fit in one logical heap block. -SELECT distinct(right(split_part(ctid::text, ',', 1), -1)) AS blknum -FROM brin_aoco_summarize; -NOTICE: One or more columns in the following table(s) do not have statistics: brin_aoco_summarize -HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. - blknum ----------- - 67108864 -(1 row) - --- VACUUM should have already summarized this one logical heap block, so --- invoking summarization again will be a no-op. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - --- We don't allow specific range summarization for AO tables at the moment. -SELECT brin_summarize_range('brin_aoco_summarize_i_idx', 1); -ERROR: cannot summarize specific page range for append-optimized tables (seg0 slice1 192.168.0.148:7002 pid=75014) -CONTEXT: SQL function "brin_summarize_range" statement 1 --- Test summarization of last partial range. -CREATE TABLE brin_aoco_summarize_partial(i int) USING ao_column; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -CREATE INDEX ON brin_aoco_summarize_partial USING brin(i) WITH (pages_per_range=3); --- Insert 4 blocks of data on 1 QE, in 1 aoseg; 3 blocks full, 1 block with 1 tuple. --- The 1st range [33554432, 33554434] is full and the last range [33554435, 33554437] --- is partially full with just 1 block: 33554435. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize_partial VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554435, 0)'; - END LOOP; -END; -$$; --- We should successfully summarize the last partial range. --- --- Note: For an empty AOCO table, when INSERTing into the 1st range, we don't --- summarize. brininsert() -> brinGetTupleForHeapBlock() actually returns NULL --- in this case as revmap_get_blkno_ao() returns InvalidBlockNumber. --- This is contrary to heap behavior (where we return 1). --- --- Thus, we will have both ranges summarized here. -SELECT brin_summarize_new_values('brin_aoco_summarize_partial_i_idx'); - brin_summarize_new_values ---------------------------- - 2 -(1 row) - diff --git a/src/test/regress/sql/brin_ao.sql b/src/test/regress/sql/brin_ao.sql index e0f4452225c..822230876e2 100644 --- a/src/test/regress/sql/brin_ao.sql +++ b/src/test/regress/sql/brin_ao.sql @@ -456,110 +456,3 @@ INSERT INTO brintest_ao SELECT format('%s/%s%s', odd, even, tenthous)::pg_lsn, box(point(odd, even), point(thousand, twothousand)) FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5; - --- Test summarization - --- Note: We use loops to populate logical heap pages in one aoseg. These logical --- heap blocks can start at a large number. See AOSegmentGet_startHeapBlock(segno). - -CREATE TABLE brin_ao_summarize(i int) USING ao_row; -CREATE INDEX ON brin_ao_summarize USING brin(i) WITH (pages_per_range=1); - --- Tests for brin_summarize_new_values -SELECT brin_summarize_new_values('brin_ao_summarize'); -- error, not an index -SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index --- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. --- Index dead tuples will not always be cleaned up completely after VACUUM, resulting --- brin_summarize_new_values() will not always be accurate. So ignore the check to --- coordinate with the new behavior. --- There is no data, so nothing to summarize. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - -DROP INDEX brin_ao_summarize_i_idx; - --- Create 3 blocks all on 1 QE, in 1 aoseg: 2 blocks full, 1 block with 1 tuple. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554434, 0)'; - END LOOP; -END; -$$; - --- Now create the index on the data inserted above. -CREATE INDEX ON brin_ao_summarize USING brin(i) WITH (pages_per_range=1); - --- There is nothing new to summarize - it was all done during the index build. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - --- Insert more so we have 5 blocks on 1 QE, in 1 aoseg: 4 blocks full, 1 block --- with 1 tuple. The last and penultimate blocks will be unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize VALUES (20) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554436, 0)'; - END LOOP; -END; -$$; - --- The last 2 blocks will be summarized. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - --- Insert more so we have 7 blocks on 1 QE, in 1 aoseg: 6 blocks full, 1 page --- with 1 tuple. The last and penultimate blocks are unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize VALUES (30) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554438, 0)'; - END LOOP; -END; -$$; - -DELETE FROM brin_ao_summarize WHERE i = 1; - -VACUUM brin_ao_summarize; - --- All the tuples will have been moved to one aoseg and all the tuples should --- have fit in one logical heap block. -SELECT distinct(right(split_part(ctid::text, ',', 1), -1)) AS blknum - FROM brin_ao_summarize; - --- VACUUM should have already summarized this one logical heap block, so --- invoking summarization again will be a no-op. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - --- We don't allow specific range summarization for AO tables at the moment. -SELECT brin_summarize_range('brin_ao_summarize_i_idx', 1); - --- Test summarization of last partial range. -CREATE TABLE brin_ao_summarize_partial(i int) USING ao_row; -CREATE INDEX ON brin_ao_summarize_partial USING brin(i) WITH (pages_per_range=3); - --- Insert 4 blocks of data on 1 QE, in 1 aoseg; 3 blocks full, 1 block with 1 tuple. --- The 1st range [33554432, 33554434] is full and the last range [33554435, 33554437] --- is partially full with just 1 block: 33554435. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize_partial VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554435, 0)'; - END LOOP; -END; -$$; - --- We should successfully summarize the last partial range. --- --- Note: For an empty AO table, when INSERTing into the 1st range, we don't --- summarize. brininsert() -> brinGetTupleForHeapBlock() actually returns NULL --- in this case as revmap_get_blkno_ao() returns InvalidBlockNumber. --- This is contrary to heap behavior (where we return 1). --- --- Thus, we will have both ranges summarized here. -SELECT brin_summarize_new_values('brin_ao_summarize_partial_i_idx'); diff --git a/src/test/regress/sql/brin_aocs.sql b/src/test/regress/sql/brin_aocs.sql index fb1b181f06a..b6f8d4477ab 100644 --- a/src/test/regress/sql/brin_aocs.sql +++ b/src/test/regress/sql/brin_aocs.sql @@ -456,110 +456,3 @@ INSERT INTO brintest_aocs SELECT format('%s/%s%s', odd, even, tenthous)::pg_lsn, box(point(odd, even), point(thousand, twothousand)) FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5; - --- Test summarization - --- Note: We use loops to populate logical heap pages in one aoseg. These logical --- heap blocks can start at a large number. See AOSegmentGet_startHeapBlock(segno). - -CREATE TABLE brin_aoco_summarize(i int) USING ao_column; -CREATE INDEX ON brin_aoco_summarize USING brin(i) WITH (pages_per_range=1); - --- Tests for brin_summarize_new_values -SELECT brin_summarize_new_values('brin_aoco_summarize'); -- error, not an index -SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index --- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. --- Index dead tuples will not always be cleaned up completely after VACUUM, resulting --- brin_summarize_new_values() will not always be accurate. So ignore the check to --- coordinate with the new behavior. --- There is no data, so nothing to summarize. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - -DROP INDEX brin_aoco_summarize_i_idx; - --- Create 3 blocks all on 1 QE, in 1 aoseg: 2 blocks full, 1 block with 1 tuple. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554434, 0)'; - END LOOP; -END; -$$; - --- Now create the index on the data inserted above. -CREATE INDEX ON brin_aoco_summarize USING brin(i) WITH (pages_per_range=1); - --- There is nothing new to summarize - it was all done during the index build. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - --- Insert more so we have 5 blocks on 1 QE, in 1 aoseg: 4 blocks full, 1 block --- with 1 tuple. The last and penultimate blocks will be unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize VALUES (20) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554436, 0)'; - END LOOP; -END; -$$; - --- The last 2 blocks will be summarized. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - --- Insert more so we have 7 blocks on 1 QE, in 1 aoseg: 6 blocks full, 1 page --- with 1 tuple. The last and penultimate blocks are unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize VALUES (30) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554438, 0)'; - END LOOP; -END; -$$; - -DELETE FROM brin_aoco_summarize WHERE i = 1; - -VACUUM brin_aoco_summarize; - --- All the tuples will have been moved to one aoseg and all the tuples should --- have fit in one logical heap block. -SELECT distinct(right(split_part(ctid::text, ',', 1), -1)) AS blknum -FROM brin_aoco_summarize; - --- VACUUM should have already summarized this one logical heap block, so --- invoking summarization again will be a no-op. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - --- We don't allow specific range summarization for AO tables at the moment. -SELECT brin_summarize_range('brin_aoco_summarize_i_idx', 1); - --- Test summarization of last partial range. -CREATE TABLE brin_aoco_summarize_partial(i int) USING ao_column; -CREATE INDEX ON brin_aoco_summarize_partial USING brin(i) WITH (pages_per_range=3); - --- Insert 4 blocks of data on 1 QE, in 1 aoseg; 3 blocks full, 1 block with 1 tuple. --- The 1st range [33554432, 33554434] is full and the last range [33554435, 33554437] --- is partially full with just 1 block: 33554435. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize_partial VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554435, 0)'; - END LOOP; -END; -$$; - --- We should successfully summarize the last partial range. --- --- Note: For an empty AOCO table, when INSERTing into the 1st range, we don't --- summarize. brininsert() -> brinGetTupleForHeapBlock() actually returns NULL --- in this case as revmap_get_blkno_ao() returns InvalidBlockNumber. --- This is contrary to heap behavior (where we return 1). --- --- Thus, we will have both ranges summarized here. -SELECT brin_summarize_new_values('brin_aoco_summarize_partial_i_idx');