public inbox for [email protected]  
help / color / mirror / Atom feed
From: Andrey M. Borodin <[email protected]>
To: Michael Zhilin <[email protected]>
Cc: [email protected]
Cc: y sokolov <[email protected]>
Cc: Alexander Lakhin <[email protected]>
Subject: Re: [BUG] false positive in bt_index_check in case of short 4B varlena datum
Date: Sun, 7 Jan 2024 23:04:35 +0500
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
References: <[email protected]>



> On 14 Dec 2023, at 21:18, Michael Zhilin <[email protected]> wrote:

I've checked that:
* bug is reproduced by the test in the patch
* bug is fixed by the patch
* fix seems idiomatic, similar to nearby code

Patch needed a rebase, so please find attached rebased version. I did not change anything.

I see that using a temp file in PG_ABS_SRCDIR is common approach. But still I want to ask, maybe can we develop some clever way to reproduce the bug without external file?
Also, maybe nearby code would be slightly more readable, if normalized[i] was a local variable.
And one last question about the line:
char *data = palloc(len);
what if data is somehow corrupted here... are there enough sanity checks that we won't palloc(-1) or something like that?
Won't we memcpy() from some other memory when len is bogus?

Besides this paranoid questions, I think that this patch is ready for committer.

Thanks!


Best regards, Andrey Borodin.





Attachments:

  [application/octet-stream] v1rebased-0001-contrib-amcheck-must-support-different-hea.patch (6.2K, 2-v1rebased-0001-contrib-amcheck-must-support-different-hea.patch)
  download | inline diff:
From f05a9ad52aa6552c9cae5dcc712a168e08c67d80 Mon Sep 17 00:00:00 2001
From: Michael Zhilin <[email protected]>
Date: Thu, 14 Dec 2023 16:08:15 +0300
Subject: [PATCH v1rebased] contrib/amcheck: must support different header size
 of short varlena datum

---
 contrib/amcheck/expected/check_btree.out | 19 +++++++++++++
 contrib/amcheck/sql/check_btree.sql      | 17 +++++++++++
 contrib/amcheck/verify_nbtree.c          | 36 ++++++++++++++++++++----
 3 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/contrib/amcheck/expected/check_btree.out b/contrib/amcheck/expected/check_btree.out
index 86b38d93f4..1ab1bd260c 100644
--- a/contrib/amcheck/expected/check_btree.out
+++ b/contrib/amcheck/expected/check_btree.out
@@ -1,3 +1,5 @@
+-- directory paths are passed to us in environment variables
+\getenv abs_srcdir PG_ABS_SRCDIR
 CREATE TABLE bttest_a(id int8);
 CREATE TABLE bttest_b(id int8);
 CREATE TABLE bttest_multi(id int8, data int8);
@@ -240,6 +242,22 @@ SELECT bt_index_check('bttest_unique_nulls_b_c_idx', heapallindexed => true, che
  
 (1 row)
 
+--
+-- BUG: must support different header size of short varlena datum
+--
+CREATE TABLE varlena_bug (v text);
+ALTER TABLE varlena_bug ALTER column v SET storage plain;
+INSERT INTO varlena_bug VALUES ('x');
+\set filename :abs_srcdir '/results/varlena_bug.dmp'
+COPY varlena_bug TO :'filename';
+COPY varlena_bug FROM :'filename';
+CREATE INDEX varlena_bug_idx on varlena_bug(v);
+SELECT bt_index_check('varlena_bug_idx', true);
+ bt_index_check 
+----------------
+ 
+(1 row)
+
 -- cleanup
 DROP TABLE bttest_a;
 DROP TABLE bttest_b;
@@ -250,3 +268,4 @@ DROP FUNCTION ifun(int8);
 DROP TABLE bttest_unique_nulls;
 DROP OWNED BY regress_bttest_role; -- permissions
 DROP ROLE regress_bttest_role;
+DROP TABLE varlena_bug;
diff --git a/contrib/amcheck/sql/check_btree.sql b/contrib/amcheck/sql/check_btree.sql
index aa461f7fb9..4daed676c2 100644
--- a/contrib/amcheck/sql/check_btree.sql
+++ b/contrib/amcheck/sql/check_btree.sql
@@ -1,3 +1,6 @@
+-- directory paths are passed to us in environment variables
+\getenv abs_srcdir PG_ABS_SRCDIR
+
 CREATE TABLE bttest_a(id int8);
 CREATE TABLE bttest_b(id int8);
 CREATE TABLE bttest_multi(id int8, data int8);
@@ -148,6 +151,19 @@ SELECT bt_index_check('bttest_unique_nulls_c_key', heapallindexed => true, check
 CREATE INDEX on bttest_unique_nulls (b,c);
 SELECT bt_index_check('bttest_unique_nulls_b_c_idx', heapallindexed => true, checkunique => true);
 
+--
+-- BUG: must support different header size of short varlena datum
+--
+
+CREATE TABLE varlena_bug (v text);
+ALTER TABLE varlena_bug ALTER column v SET storage plain;
+INSERT INTO varlena_bug VALUES ('x');
+\set filename :abs_srcdir '/results/varlena_bug.dmp'
+COPY varlena_bug TO :'filename';
+COPY varlena_bug FROM :'filename';
+CREATE INDEX varlena_bug_idx on varlena_bug(v);
+SELECT bt_index_check('varlena_bug_idx', true);
+
 -- cleanup
 DROP TABLE bttest_a;
 DROP TABLE bttest_b;
@@ -158,3 +174,4 @@ DROP FUNCTION ifun(int8);
 DROP TABLE bttest_unique_nulls;
 DROP OWNED BY regress_bttest_role; -- permissions
 DROP ROLE regress_bttest_role;
+DROP TABLE varlena_bug;
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index 91caa53dd8..e7f01c2add 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -2942,7 +2942,7 @@ bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
 	TupleDesc	tupleDescriptor = RelationGetDescr(state->rel);
 	Datum		normalized[INDEX_MAX_KEYS];
 	bool		isnull[INDEX_MAX_KEYS];
-	bool		toast_free[INDEX_MAX_KEYS];
+	bool		need_free[INDEX_MAX_KEYS];
 	bool		formnewtup = false;
 	IndexTuple	reformed;
 	int			i;
@@ -2961,7 +2961,7 @@ bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
 		att = TupleDescAttr(tupleDescriptor, i);
 
 		/* Assume untoasted/already normalized datum initially */
-		toast_free[i] = false;
+		need_free[i] = false;
 		normalized[i] = index_getattr(itup, att->attnum,
 									  tupleDescriptor,
 									  &isnull[i]);
@@ -2973,6 +2973,7 @@ bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
 		 * index without further processing, so an external varlena header
 		 * should never be encountered here
 		 */
+
 		if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
 			ereport(ERROR,
 					(errcode(ERRCODE_INDEX_CORRUPTED),
@@ -2984,11 +2985,32 @@ bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
 		{
 			formnewtup = true;
 			normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
-			toast_free[i] = true;
+			need_free[i] = true;
 		}
+		/*
+		 * Short tuples may have 1B or 4B header. Convert 4B header of short
+		 * tuples to 1B
+		 */
+		else if (VARATT_CAN_MAKE_SHORT(DatumGetPointer(normalized[i])))
+		{
+			/* convert to short varlena */
+			Size len = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(normalized[i]));
+			char *data = palloc(len);
+
+			SET_VARSIZE_SHORT(data, len);
+			memcpy(data + 1, VARDATA(DatumGetPointer(normalized[i])), len - 1);
+
+			formnewtup = true;
+			normalized[i] = PointerGetDatum(data);
+			need_free[i] = true;
+		}
+
 	}
 
-	/* Easier case: Tuple has varlena datums, none of which are compressed */
+	/*
+	 * Easier case: Tuple has varlena datums, none of which are compressed or
+	 * short with 4B header
+	 */
 	if (!formnewtup)
 		return itup;
 
@@ -2997,6 +3019,10 @@ bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
 	 * creating normalized version of the tuple from uncompressed input datums
 	 * (normalized input datums).  This is rather naive, but shouldn't be
 	 * necessary too often.
+	 * Also tuple had short varlena datums with 4B header. Actually there is no
+	 * restriction with have heap tuple containing varlena datum with 4B header
+	 * and corresponding index tuple containing varlena datum with 1B header.
+	 * For fingerprinting let's convert heap tuple varlena datum to 1B format.
 	 *
 	 * Note that we rely on deterministic index_form_tuple() TOAST compression
 	 * of normalized input.
@@ -3006,7 +3032,7 @@ bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
 
 	/* Cannot leak memory here */
 	for (i = 0; i < tupleDescriptor->natts; i++)
-		if (toast_free[i])
+		if (need_free[i])
 			pfree(DatumGetPointer(normalized[i]));
 
 	return reformed;
-- 
2.37.1 (Apple Git-137.1)



reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Reply to all the recipients using the --to and --cc options:
  reply via email

  To: [email protected]
  Cc: [email protected], [email protected], [email protected], [email protected]
  Subject: Re: [BUG] false positive in bt_index_check in case of short 4B varlena datum
  In-Reply-To: <[email protected]>

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox