public inbox for [email protected]  
help / color / mirror / Atom feed
From: Michael Zhilin <[email protected]>
To: [email protected]
Cc: y sokolov <[email protected]>
Subject: [BUG] false positive in bt_index_check in case of short 4B varlena datum
Date: Thu, 14 Dec 2023 19:18:11 +0300
Message-ID: <[email protected]> (raw)

Hi,

Following example produces error raised from bt_index_check.

drop table if exists t;
create table t (v text);
alter table t alter column v set storage plain;
insert into t values ('x');
copy t to '/tmp/1.lst';
copy t from '/tmp/1.lst';
create index t_idx on t(v);
create extension if not exists amcheck;
select bt_index_check('t_idx', true);

postgres=# select bt_index_check('t_idx', true);
ERROR:  heap tuple (0,2) from table "t" lacks matching index tuple 
within index "t_idx"
HINT:  Retrying verification using the function bt_index_parent_check() 
might provide a more specific error.

As result table contains 2 logically identical tuples:
  - one contains varlena 'x' with 1B (1-byte) header (added by INSERT 
statement)
  - one contains varlena 'x' with 4B (4-bytes) header (added by COPY 
statement)
CREATE INDEX statement builds index with posting list referencing both 
heap tuples.
The function bt_index_check calculates fingerprints of 1B and 4B header 
datums,
they are different and function returns error.

The attached patch allows to avoid such kind of false positives by 
converting short
4B datums to 1B before fingerprinting. Also it contains test for 
provided case.

Thank you,
  Michael

-- 
Michael Zhilin
Postgres Professional

Attachments:

  [text/x-patch] 0001-contrib-amcheck-must-support-different-header-size-o.patch (6.0K, 3-0001-contrib-amcheck-must-support-different-header-size-o.patch)
  download | inline diff:
From e2f1528107e2a39a7f3c01bdfc7452a9fe17eeee Mon Sep 17 00:00:00 2001
From: Michael Zhilin <[email protected]>
Date: Thu, 14 Dec 2023 16:08:15 +0300
Subject: [PATCH] contrib/amcheck: must support different header size of short
 varlena datum

---
 contrib/amcheck/expected/check_btree.out | 19 +++++++++++++
 contrib/amcheck/sql/check_btree.sql      | 17 +++++++++++
 contrib/amcheck/verify_nbtree.c          | 36 ++++++++++++++++++++----
 3 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/contrib/amcheck/expected/check_btree.out b/contrib/amcheck/expected/check_btree.out
index 38791bbc1f4..db73f8fd5b3 100644
--- a/contrib/amcheck/expected/check_btree.out
+++ b/contrib/amcheck/expected/check_btree.out
@@ -1,3 +1,5 @@
+-- directory paths are passed to us in environment variables
+\getenv abs_srcdir PG_ABS_SRCDIR
 CREATE TABLE bttest_a(id int8);
 CREATE TABLE bttest_b(id int8);
 CREATE TABLE bttest_multi(id int8, data int8);
@@ -199,6 +201,22 @@ SELECT bt_index_check('bttest_a_expr_idx', true);
  
 (1 row)
 
+--
+-- BUG: must support different header size of short varlena datum
+--
+CREATE TABLE varlena_bug (v text);
+ALTER TABLE varlena_bug ALTER column v SET storage plain;
+INSERT INTO varlena_bug VALUES ('x');
+\set filename :abs_srcdir '/results/varlena_bug.dmp'
+COPY varlena_bug TO :'filename';
+COPY varlena_bug FROM :'filename';
+CREATE INDEX varlena_bug_idx on varlena_bug(v);
+SELECT bt_index_check('varlena_bug_idx', true);
+ bt_index_check 
+----------------
+ 
+(1 row)
+
 -- cleanup
 DROP TABLE bttest_a;
 DROP TABLE bttest_b;
@@ -208,3 +226,4 @@ DROP TABLE toast_bug;
 DROP FUNCTION ifun(int8);
 DROP OWNED BY regress_bttest_role; -- permissions
 DROP ROLE regress_bttest_role;
+DROP TABLE varlena_bug;
diff --git a/contrib/amcheck/sql/check_btree.sql b/contrib/amcheck/sql/check_btree.sql
index 033c04b4d05..eff679a71ef 100644
--- a/contrib/amcheck/sql/check_btree.sql
+++ b/contrib/amcheck/sql/check_btree.sql
@@ -1,3 +1,6 @@
+-- directory paths are passed to us in environment variables
+\getenv abs_srcdir PG_ABS_SRCDIR
+
 CREATE TABLE bttest_a(id int8);
 CREATE TABLE bttest_b(id int8);
 CREATE TABLE bttest_multi(id int8, data int8);
@@ -135,6 +138,19 @@ CREATE INDEX bttest_a_expr_idx ON bttest_a ((ifun(id) + ifun(0)))
 
 SELECT bt_index_check('bttest_a_expr_idx', true);
 
+--
+-- BUG: must support different header size of short varlena datum
+--
+
+CREATE TABLE varlena_bug (v text);
+ALTER TABLE varlena_bug ALTER column v SET storage plain;
+INSERT INTO varlena_bug VALUES ('x');
+\set filename :abs_srcdir '/results/varlena_bug.dmp'
+COPY varlena_bug TO :'filename';
+COPY varlena_bug FROM :'filename';
+CREATE INDEX varlena_bug_idx on varlena_bug(v);
+SELECT bt_index_check('varlena_bug_idx', true);
+
 -- cleanup
 DROP TABLE bttest_a;
 DROP TABLE bttest_b;
@@ -144,3 +160,4 @@ DROP TABLE toast_bug;
 DROP FUNCTION ifun(int8);
 DROP OWNED BY regress_bttest_role; -- permissions
 DROP ROLE regress_bttest_role;
+DROP TABLE varlena_bug;
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index 9021d156eb7..2543f2ccfd1 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -2556,7 +2556,7 @@ bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
 	TupleDesc	tupleDescriptor = RelationGetDescr(state->rel);
 	Datum		normalized[INDEX_MAX_KEYS];
 	bool		isnull[INDEX_MAX_KEYS];
-	bool		toast_free[INDEX_MAX_KEYS];
+	bool		need_free[INDEX_MAX_KEYS];
 	bool		formnewtup = false;
 	IndexTuple	reformed;
 	int			i;
@@ -2575,7 +2575,7 @@ bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
 		att = TupleDescAttr(tupleDescriptor, i);
 
 		/* Assume untoasted/already normalized datum initially */
-		toast_free[i] = false;
+		need_free[i] = false;
 		normalized[i] = index_getattr(itup, att->attnum,
 									  tupleDescriptor,
 									  &isnull[i]);
@@ -2587,6 +2587,7 @@ bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
 		 * index without further processing, so an external varlena header
 		 * should never be encountered here
 		 */
+
 		if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
 			ereport(ERROR,
 					(errcode(ERRCODE_INDEX_CORRUPTED),
@@ -2598,11 +2599,32 @@ bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
 		{
 			formnewtup = true;
 			normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
-			toast_free[i] = true;
+			need_free[i] = true;
 		}
+		/*
+		 * Short tuples may have 1B or 4B header. Convert 4B header of short
+		 * tuples to 1B
+		 */
+		else if (VARATT_CAN_MAKE_SHORT(DatumGetPointer(normalized[i])))
+		{
+			/* convert to short varlena */
+			Size len = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(normalized[i]));
+			char *data = palloc(len);
+
+			SET_VARSIZE_SHORT(data, len);
+			memcpy(data + 1, VARDATA(DatumGetPointer(normalized[i])), len - 1);
+
+			formnewtup = true;
+			normalized[i] = PointerGetDatum(data);
+			need_free[i] = true;
+		}
+
 	}
 
-	/* Easier case: Tuple has varlena datums, none of which are compressed */
+	/*
+	 * Easier case: Tuple has varlena datums, none of which are compressed or
+	 * short with 4B header
+	 */
 	if (!formnewtup)
 		return itup;
 
@@ -2611,6 +2633,10 @@ bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
 	 * creating normalized version of the tuple from uncompressed input datums
 	 * (normalized input datums).  This is rather naive, but shouldn't be
 	 * necessary too often.
+	 * Also tuple had short varlena datums with 4B header. Actually there is no
+	 * restriction with have heap tuple containing varlena datum with 4B header
+	 * and corresponding index tuple containing varlena datum with 1B header.
+	 * For fingerprinting let's convert heap tuple varlena datum to 1B format.
 	 *
 	 * Note that we rely on deterministic index_form_tuple() TOAST compression
 	 * of normalized input.
@@ -2620,7 +2646,7 @@ bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
 
 	/* Cannot leak memory here */
 	for (i = 0; i < tupleDescriptor->natts; i++)
-		if (toast_free[i])
+		if (need_free[i])
 			pfree(DatumGetPointer(normalized[i]));
 
 	return reformed;
-- 
2.43.0



reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Reply to all the recipients using the --to and --cc options:
  reply via email

  To: [email protected]
  Cc: [email protected], [email protected]
  Subject: Re: [BUG] false positive in bt_index_check in case of short 4B varlena datum
  In-Reply-To: <[email protected]>

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox