public inbox for [email protected]  
help / color / mirror / Atom feed
From: David Rowley <[email protected]>
To: Andres Freund <[email protected]>
Cc: John Naylor <[email protected]>
Cc: Chao Li <[email protected]>
Cc: PostgreSQL Developers <[email protected]>
Subject: Re: More speedups for tuple deformation
Date: Mon, 2 Mar 2026 02:10:56 +1300
Message-ID: <CAApHDvq21qQigiM6z2YgadFusQC_pfEYP8D=oQCrwJ_kKzcqDg@mail.gmail.com> (raw)
In-Reply-To: <uhqul2ryci4tyg5ylddjrmf4kybzwb7m5z7rmurhhjp37vrn5f@zgxil7egr62n>
References: <CAApHDvpbntG7V3_EsZ+w-V=jU-y8rFmv9RB1EDJm4sxKno-4UA@mail.gmail.com>
	<e7sto7tk5dk5hfyvoocaddnxcngemcmfvbuh23l32w5cssaizy@znuphjqug7qe>
	<CAApHDvpuEbhvH1ViCZRz5vks+_bGbEnPoEdZYAZXK76_isb_+Q@mail.gmail.com>
	<v6z545yozjtywghn5glujemu72z4i4ynadsc2xks4ejotdg7yl@4rry7ixwr4us>
	<CANWCAZabO1oj+khF+YNVpmkTQwRRyNJesbsBhRFL5emZJh3tow@mail.gmail.com>
	<lzgoxzbh2gel5w362revuwaecrsbjr44kjdzrewuejugcodkeq@ixymojwnylsy>
	<CAApHDvodSVBj3ypOYbYUCJX+NWL=VZs63RNBQ_FxB_F+6QXF-A@mail.gmail.com>
	<rbxc2qqhsvzxpukgd36caoa4ydgn5r22fxktxanrkn6nobg7j6@27b4vogohgu2>
	<CAApHDvpWQn8sXDYpSNNpieJW-UTG4Nf4TVjT8ew64L073hz-Fw@mail.gmail.com>
	<mq6ddpgctt42srolsvo5kph2s6shfg62meb7i5fbg6n3s73zju@2n7gviiyga3h>
	<uhqul2ryci4tyg5ylddjrmf4kybzwb7m5z7rmurhhjp37vrn5f@zgxil7egr62n>

On Thu, 26 Feb 2026 at 09:29, Andres Freund <[email protected]> wrote:
> Huh.  It, at least partially, seems to be related to using an integer for
> attnum et al. Due to us using -fwrapv, the compiler can't actually assume that
> an attnum++ won't overflow. An overflow would make the loop trip counts a lot
> more complicated.   Even with that I don't understand how it ends up
> generating such crappy code, but since using size_t fixes it...

Thanks. That seems to make the gcc compiled version quite a bit better.

I am still seeing a bit of register overflow as the TupleDesc is
written to the stack and reloaded back into a register a couple of
times. I've attached the objdump in question.

if (attnum < firstNonGuaranteedAttr)
    1c3c: 48 39 e8              cmp    rax,rbp
    1c3f: 73 7f                jae    1cc0 <tts_heap_getsomeattrs+0x110>
    1c41: 48 89 54 24 f0        mov    QWORD PTR [rsp-0x10],rdx
    1c46: 48 8d 74 c2 20        lea    rsi,[rdx+rax*8+0x20]

the tupledesc is put back into the register in:

off += cattr->attlen;
    1f88: 48 8b 54 24 f0        mov    rdx,QWORD PTR [rsp-0x10]

I've not found a way to have gcc not do this.

I've also resequenced the patches so 0002 contains the sibling call
optimisation for slot_getmissingattrs() and I've applied that tail
call optimisation that you mentioned for slot_getmissingattrs() in
0004.

I've attached benchmark results in the attached spreadsheet.

David

0000000000001bb0 <tts_heap_getsomeattrs>:
{
    1bb0:	f3 0f 1e fa          	endbr64
    1bb4:	41 57                	push   r15
    1bb6:	49 89 fb             	mov    r11,rdi
    1bb9:	41 56                	push   r14
    1bbb:	41 55                	push   r13
    1bbd:	41 54                	push   r12
    1bbf:	4c 63 e6             	movsxd r12,esi
    1bc2:	55                   	push   rbp
    1bc3:	53                   	push   rbx
	HeapTupleHeader tup = tuple->t_data;
    1bc4:	48 8b 47 40          	mov    rax,QWORD PTR [rdi+0x40]
	TupleDesc	tupleDesc = slot->tts_tupleDescriptor;
    1bc8:	48 8b 57 10          	mov    rdx,QWORD PTR [rdi+0x10]
	isnull = slot->tts_isnull;
    1bcc:	48 8b 4f 20          	mov    rcx,QWORD PTR [rdi+0x20]
	HeapTupleHeader tup = tuple->t_data;
    1bd0:	48 8b 58 10          	mov    rbx,QWORD PTR [rax+0x10]
	if (TTS_OBEYS_NOT_NULL_CONSTRAINTS(slot))
    1bd4:	f6 47 04 08          	test   BYTE PTR [rdi+0x4],0x8
    1bd8:	0f 84 02 04 00 00    	je     1fe0 <tts_heap_getsomeattrs+0x430>
		firstNonGuaranteedAttr = Min(reqnatts, tupleDesc->firstNonGuaranteedAttr);
    1bde:	8b 42 14             	mov    eax,DWORD PTR [rdx+0x14]
    1be1:	41 39 c4             	cmp    r12d,eax
    1be4:	41 0f 4e c4          	cmovle eax,r12d
	if (attnum < firstNonGuaranteedAttr)
    1be8:	48 63 e8             	movsxd rbp,eax
	firstNonCacheOffsetAttr = tupleDesc->firstNonCachedOffsetAttr;
    1beb:	4c 63 52 10          	movsxd r10,DWORD PTR [rdx+0x10]
	if (HeapTupleHasNulls(tuple))
    1bef:	f6 43 14 01          	test   BYTE PTR [rbx+0x14],0x1
    1bf3:	0f 84 b7 03 00 00    	je     1fb0 <tts_heap_getsomeattrs+0x400>
		natts = HeapTupleHeaderGetNatts(tup);
    1bf9:	44 0f b7 4b 12       	movzx  r9d,WORD PTR [rbx+0x12]
    1bfe:	41 81 e1 ff 07 00 00 	and    r9d,0x7ff
 *		Computes size of null bitmap given number of data columns.
 */
static inline int
BITMAPLEN(int NATTS)
{
	return (NATTS + 7) / 8;
    1c05:	45 8d 41 07          	lea    r8d,[r9+0x7]
    1c09:	41 c1 f8 03          	sar    r8d,0x3
		tp = (char *) tup + MAXALIGN(offsetof(HeapTupleHeaderData, t_bits) +
    1c0d:	41 83 c0 1e          	add    r8d,0x1e
    1c11:	41 81 e0 f8 03 00 00 	and    r8d,0x3f8
    1c18:	49 01 d8             	add    r8,rbx
		natts = Min(natts, reqnatts);
    1c1b:	45 39 cc             	cmp    r12d,r9d
    1c1e:	4d 0f 4e cc          	cmovle r9,r12
			firstNullAttr = natts;
    1c22:	45 89 ce             	mov    r14d,r9d
		if (natts > firstNonGuaranteedAttr)
    1c25:	41 39 c1             	cmp    r9d,eax
    1c28:	0f 8f ea 04 00 00    	jg     2118 <tts_heap_getsomeattrs+0x568>
	attnum = slot->tts_nvalid;
    1c2e:	49 0f bf 43 06       	movsx  rax,WORD PTR [r11+0x6]
	values = slot->tts_values;
    1c33:	49 8b 7b 18          	mov    rdi,QWORD PTR [r11+0x18]
	slot->tts_nvalid = reqnatts;
    1c37:	66 45 89 63 06       	mov    WORD PTR [r11+0x6],r12w
	if (attnum < firstNonGuaranteedAttr)
    1c3c:	48 39 e8             	cmp    rax,rbp
    1c3f:	73 7f                	jae    1cc0 <tts_heap_getsomeattrs+0x110>
    1c41:	48 89 54 24 f0       	mov    QWORD PTR [rsp-0x10],rdx
    1c46:	48 8d 74 c2 20       	lea    rsi,[rdx+rax*8+0x20]
    1c4b:	eb 22                	jmp    1c6f <tts_heap_getsomeattrs+0xbf>
    1c4d:	0f 1f 00             	nop    DWORD PTR [rax]
static inline Datum
fetch_att_noerr(const void *T, bool attbyval, int attlen)
{
	if (attbyval)
	{
		switch (attlen)
    1c50:	66 41 83 ff 01       	cmp    r15w,0x1
    1c55:	74 59                	je     1cb0 <tts_heap_getsomeattrs+0x100>
 *		Returns datum representation for a 64-bit integer.
 */
static inline Datum
Int64GetDatum(int64 X)
{
	return (Datum) X;
    1c57:	48 8b 12             	mov    rdx,QWORD PTR [rdx]
			values[attnum] = fetch_att_noerr(tp + off, true, attlen);
    1c5a:	48 89 14 c7          	mov    QWORD PTR [rdi+rax*8],rdx
			attnum++;
    1c5e:	48 83 c0 01          	add    rax,0x1
		} while (attnum < firstNonGuaranteedAttr);
    1c62:	48 83 c6 08          	add    rsi,0x8
    1c66:	48 39 e8             	cmp    rax,rbp
    1c69:	0f 83 11 03 00 00    	jae    1f80 <tts_heap_getsomeattrs+0x3d0>
			isnull[attnum] = false;
    1c6f:	c6 04 01 00          	mov    BYTE PTR [rcx+rax*1],0x0
			off = cattr->attcacheoff;
    1c73:	0f bf 16             	movsx  edx,WORD PTR [rsi]
			cattr = &cattrs[attnum];
    1c76:	49 89 f5             	mov    r13,rsi
			attlen = cattr->attlen;
    1c79:	44 0f b7 7e 02       	movzx  r15d,WORD PTR [rsi+0x2]
			off = cattr->attcacheoff;
    1c7e:	48 89 d3             	mov    rbx,rdx
			values[attnum] = fetch_att_noerr(tp + off, true, attlen);
    1c81:	4c 01 c2             	add    rdx,r8
    1c84:	66 41 83 ff 02       	cmp    r15w,0x2
    1c89:	74 15                	je     1ca0 <tts_heap_getsomeattrs+0xf0>
    1c8b:	66 41 83 ff 04       	cmp    r15w,0x4
    1c90:	75 be                	jne    1c50 <tts_heap_getsomeattrs+0xa0>
	return (Datum) X;
    1c92:	48 63 12             	movsxd rdx,DWORD PTR [rdx]
		{
			case sizeof(int32):
				return Int32GetDatum(*((const int32 *) T));
    1c95:	eb c3                	jmp    1c5a <tts_heap_getsomeattrs+0xaa>
    1c97:	66 0f 1f 84 00 00 00 	nop    WORD PTR [rax+rax*1+0x0]
    1c9e:	00 00 
	return (Datum) X;
    1ca0:	48 0f bf 12          	movsx  rdx,WORD PTR [rdx]
			case sizeof(int16):
				return Int16GetDatum(*((const int16 *) T));
    1ca4:	eb b4                	jmp    1c5a <tts_heap_getsomeattrs+0xaa>
    1ca6:	66 2e 0f 1f 84 00 00 	cs nop WORD PTR [rax+rax*1+0x0]
    1cad:	00 00 00 
	return (Datum) X;
    1cb0:	48 0f be 12          	movsx  rdx,BYTE PTR [rdx]
			case sizeof(char):
				return CharGetDatum(*((const char *) T));
    1cb4:	eb a4                	jmp    1c5a <tts_heap_getsomeattrs+0xaa>
    1cb6:	66 2e 0f 1f 84 00 00 	cs nop WORD PTR [rax+rax*1+0x0]
    1cbd:	00 00 00 
		off = *offp;
    1cc0:	41 8b 5b 48          	mov    ebx,DWORD PTR [r11+0x48]
	if (unlikely(attnum < reqnatts))
    1cc4:	49 63 ec             	movsxd rbp,r12d
	firstNonCacheOffsetAttr = Min(firstNonCacheOffsetAttr, natts);
    1cc7:	45 39 ca             	cmp    r10d,r9d
    1cca:	4d 0f 4f d1          	cmovg  r10,r9
	if (attnum < firstNonCacheOffsetAttr)
    1cce:	4c 39 d0             	cmp    rax,r10
    1cd1:	0f 82 b9 01 00 00    	jb     1e90 <tts_heap_getsomeattrs+0x2e0>
	for (; attnum < firstNullAttr; attnum++)
    1cd7:	4d 63 d6             	movsxd r10,r14d
    1cda:	4c 39 d0             	cmp    rax,r10
    1cdd:	72 5e                	jb     1d3d <tts_heap_getsomeattrs+0x18d>
    1cdf:	e9 24 05 00 00       	jmp    2208 <tts_heap_getsomeattrs+0x658>
    1ce4:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]

	if (attlen > 0)
	{
		const char *offset_ptr;

		*off = TYPEALIGN(attalignby, *off);
    1ce8:	8d 5c 1e ff          	lea    ebx,[rsi+rbx*1-0x1]
    1cec:	f7 de                	neg    esi
    1cee:	21 de                	and    esi,ebx
		offset_ptr = tupptr + *off;
		*off += attlen;
    1cf0:	41 0f bf dd          	movsx  ebx,r13w
		offset_ptr = tupptr + *off;
    1cf4:	41 89 f6             	mov    r14d,esi
		*off += attlen;
    1cf7:	01 f3                	add    ebx,esi
		offset_ptr = tupptr + *off;
    1cf9:	4d 01 c6             	add    r14,r8
	return (Datum) (uintptr_t) X;
    1cfc:	4c 89 f6             	mov    rsi,r14
		if (attbyval)
    1cff:	80 7c c2 24 00       	cmp    BYTE PTR [rdx+rax*8+0x24],0x0
    1d04:	74 2a                	je     1d30 <tts_heap_getsomeattrs+0x180>
		{
			switch (attlen)
    1d06:	66 41 83 fd 02       	cmp    r13w,0x2
    1d0b:	0f 84 ef 01 00 00    	je     1f00 <tts_heap_getsomeattrs+0x350>
    1d11:	66 41 83 fd 04       	cmp    r13w,0x4
    1d16:	0f 84 d4 01 00 00    	je     1ef0 <tts_heap_getsomeattrs+0x340>
    1d1c:	66 41 83 fd 01       	cmp    r13w,0x1
    1d21:	0f 85 b9 01 00 00    	jne    1ee0 <tts_heap_getsomeattrs+0x330>
	return (Datum) X;
    1d27:	49 0f be 36          	movsx  rsi,BYTE PTR [r14]
    1d2b:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
		values[attnum] = align_fetch_then_add(tp,
    1d30:	48 89 34 c7          	mov    QWORD PTR [rdi+rax*8],rsi
	for (; attnum < firstNullAttr; attnum++)
    1d34:	48 83 c0 01          	add    rax,0x1
    1d38:	4c 39 d0             	cmp    rax,r10
    1d3b:	74 73                	je     1db0 <tts_heap_getsomeattrs+0x200>
		isnull[attnum] = false;
    1d3d:	c6 04 01 00          	mov    BYTE PTR [rcx+rax*1],0x0
		attlen = cattr->attlen;
    1d41:	44 0f b7 6c c2 22    	movzx  r13d,WORD PTR [rdx+rax*8+0x22]
											  cattr->attalignby);
    1d47:	0f b6 74 c2 25       	movzx  esi,BYTE PTR [rdx+rax*8+0x25]
	if (attlen > 0)
    1d4c:	66 45 85 ed          	test   r13w,r13w
    1d50:	7f 96                	jg     1ce8 <tts_heap_getsomeattrs+0x138>
		}
		return PointerGetDatum(offset_ptr);
	}
	else if (attlen == -1)
	{
		if (!VARATT_IS_SHORT(tupptr + *off))
    1d52:	41 89 dd             	mov    r13d,ebx
    1d55:	4d 01 c5             	add    r13,r8
    1d58:	41 f6 45 00 01       	test   BYTE PTR [r13+0x0],0x1
    1d5d:	75 0e                	jne    1d6d <tts_heap_getsomeattrs+0x1bd>
			*off = TYPEALIGN(attalignby, *off);
    1d5f:	8d 5c 1e ff          	lea    ebx,[rsi+rbx*1-0x1]
    1d63:	f7 de                	neg    esi
    1d65:	21 f3                	and    ebx,esi

		res = PointerGetDatum(tupptr + *off);
    1d67:	41 89 dd             	mov    r13d,ebx
    1d6a:	4d 01 c5             	add    r13,r8
	if (VARATT_IS_1B_E(PTR))
    1d6d:	45 0f b6 75 00       	movzx  r14d,BYTE PTR [r13+0x0]
	return (Datum) (uintptr_t) X;
    1d72:	4c 89 ee             	mov    rsi,r13
    1d75:	41 80 fe 01          	cmp    r14b,0x1
    1d79:	0f 84 59 03 00 00    	je     20d8 <tts_heap_getsomeattrs+0x528>
	else if (VARATT_IS_1B(PTR))
    1d7f:	41 f6 c6 01          	test   r14b,0x1
    1d83:	0f 85 87 02 00 00    	jne    2010 <tts_heap_getsomeattrs+0x460>
		return VARSIZE_4B(PTR);
    1d89:	45 8b 75 00          	mov    r14d,DWORD PTR [r13+0x0]
    1d8d:	41 c1 ee 02          	shr    r14d,0x2
		values[attnum] = align_fetch_then_add(tp,
    1d91:	48 89 34 c7          	mov    QWORD PTR [rdi+rax*8],rsi
	for (; attnum < firstNullAttr; attnum++)
    1d95:	48 83 c0 01          	add    rax,0x1
		*off += VARSIZE_ANY(DatumGetPointer(res));
    1d99:	44 01 f3             	add    ebx,r14d
    1d9c:	4c 39 d0             	cmp    rax,r10
    1d9f:	75 9c                	jne    1d3d <tts_heap_getsomeattrs+0x18d>
    1da1:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
    1da5:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    1dac:	00 00 00 00 
	for (; attnum < natts; attnum++)
    1db0:	4d 39 ca             	cmp    r10,r9
    1db3:	0f 83 57 04 00 00    	jae    2210 <tts_heap_getsomeattrs+0x660>
    1db9:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
		if (isnull[attnum])
    1dc0:	31 c0                	xor    eax,eax
    1dc2:	42 80 3c 11 00       	cmp    BYTE PTR [rcx+r10*1],0x0
    1dc7:	75 57                	jne    1e20 <tts_heap_getsomeattrs+0x270>
		attlen = cattr->attlen;
    1dc9:	42 0f b7 74 d2 22    	movzx  esi,WORD PTR [rdx+r10*8+0x22]
											  cattr->attalignby);
    1dcf:	42 0f b6 44 d2 25    	movzx  eax,BYTE PTR [rdx+r10*8+0x25]
	if (attlen > 0)
    1dd5:	66 85 f6             	test   si,si
    1dd8:	0f 8e 32 01 00 00    	jle    1f10 <tts_heap_getsomeattrs+0x360>
		*off = TYPEALIGN(attalignby, *off);
    1dde:	8d 5c 18 ff          	lea    ebx,[rax+rbx*1-0x1]
    1de2:	f7 d8                	neg    eax
    1de4:	21 d8                	and    eax,ebx
		*off += attlen;
    1de6:	0f bf de             	movsx  ebx,si
		offset_ptr = tupptr + *off;
    1de9:	41 89 c5             	mov    r13d,eax
		*off += attlen;
    1dec:	01 c3                	add    ebx,eax
		offset_ptr = tupptr + *off;
    1dee:	4d 01 c5             	add    r13,r8
    1df1:	4c 89 e8             	mov    rax,r13
		if (attbyval)
    1df4:	42 80 7c d2 24 00    	cmp    BYTE PTR [rdx+r10*8+0x24],0x0
    1dfa:	74 24                	je     1e20 <tts_heap_getsomeattrs+0x270>
			switch (attlen)
    1dfc:	66 83 fe 02          	cmp    si,0x2
    1e00:	0f 84 b2 02 00 00    	je     20b8 <tts_heap_getsomeattrs+0x508>
    1e06:	66 83 fe 04          	cmp    si,0x4
    1e0a:	0f 84 88 02 00 00    	je     2098 <tts_heap_getsomeattrs+0x4e8>
    1e10:	66 83 fe 01          	cmp    si,0x1
    1e14:	0f 84 d6 01 00 00    	je     1ff0 <tts_heap_getsomeattrs+0x440>
	return (Datum) X;
    1e1a:	49 8b 45 00          	mov    rax,QWORD PTR [r13+0x0]
    1e1e:	66 90                	xchg   ax,ax
			values[attnum] = (Datum) 0;
    1e20:	4a 89 04 d7          	mov    QWORD PTR [rdi+r10*8],rax
	for (; attnum < natts; attnum++)
    1e24:	49 83 c2 01          	add    r10,0x1
    1e28:	4d 39 ca             	cmp    r10,r9
    1e2b:	75 93                	jne    1dc0 <tts_heap_getsomeattrs+0x210>
	if (unlikely(attnum < reqnatts))
    1e2d:	49 39 e9             	cmp    r9,rbp
    1e30:	0f 82 ea 03 00 00    	jb     2220 <tts_heap_getsomeattrs+0x670>
	*offp = off;
    1e36:	41 89 5b 48          	mov    DWORD PTR [r11+0x48],ebx
}
    1e3a:	5b                   	pop    rbx
    1e3b:	5d                   	pop    rbp
    1e3c:	41 5c                	pop    r12
    1e3e:	41 5d                	pop    r13
    1e40:	41 5e                	pop    r14
    1e42:	41 5f                	pop    r15
    1e44:	c3                   	ret
    1e45:	0f 1f 00             	nop    DWORD PTR [rax]
		switch (attlen)
    1e48:	66 83 fb 01          	cmp    bx,0x1
    1e4c:	0f 84 0e 01 00 00    	je     1f60 <tts_heap_getsomeattrs+0x3b0>
    1e52:	48 8b 1e             	mov    rbx,QWORD PTR [rsi]
    1e55:	66 2e 0f 1f 84 00 00 	cs nop WORD PTR [rax+rax*1+0x0]
    1e5c:	00 00 00 
    1e5f:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    1e66:	00 00 00 00 
    1e6a:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    1e71:	00 00 00 00 
    1e75:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    1e7c:	00 00 00 00 
		} while (++attnum < firstNonCacheOffsetAttr);
    1e80:	48 8d 70 01          	lea    rsi,[rax+0x1]
			values[attnum] = fetch_att_noerr(tp + off,
    1e84:	48 89 1c c7          	mov    QWORD PTR [rdi+rax*8],rbx
		} while (++attnum < firstNonCacheOffsetAttr);
    1e88:	49 39 f2             	cmp    r10,rsi
    1e8b:	74 43                	je     1ed0 <tts_heap_getsomeattrs+0x320>
    1e8d:	48 89 f0             	mov    rax,rsi
			isnull[attnum] = false;
    1e90:	c6 04 01 00          	mov    BYTE PTR [rcx+rax*1],0x0
			off = cattr->attcacheoff;
    1e94:	0f bf 74 c2 20       	movsx  esi,WORD PTR [rdx+rax*8+0x20]
    1e99:	49 89 f5             	mov    r13,rsi
			values[attnum] = fetch_att_noerr(tp + off,
    1e9c:	4c 01 c6             	add    rsi,r8
	return (Datum) (uintptr_t) X;
    1e9f:	48 89 f3             	mov    rbx,rsi
	if (attbyval)
    1ea2:	80 7c c2 24 00       	cmp    BYTE PTR [rdx+rax*8+0x24],0x0
    1ea7:	74 d7                	je     1e80 <tts_heap_getsomeattrs+0x2d0>
											 cattr->attlen);
    1ea9:	0f b7 5c c2 22       	movzx  ebx,WORD PTR [rdx+rax*8+0x22]
		switch (attlen)
    1eae:	66 83 fb 02          	cmp    bx,0x2
    1eb2:	0f 84 b8 00 00 00    	je     1f70 <tts_heap_getsomeattrs+0x3c0>
    1eb8:	66 83 fb 04          	cmp    bx,0x4
    1ebc:	75 8a                	jne    1e48 <tts_heap_getsomeattrs+0x298>
	return (Datum) X;
    1ebe:	48 63 1e             	movsxd rbx,DWORD PTR [rsi]
		} while (++attnum < firstNonCacheOffsetAttr);
    1ec1:	48 8d 70 01          	lea    rsi,[rax+0x1]
			values[attnum] = fetch_att_noerr(tp + off,
    1ec5:	48 89 1c c7          	mov    QWORD PTR [rdi+rax*8],rbx
		} while (++attnum < firstNonCacheOffsetAttr);
    1ec9:	49 39 f2             	cmp    r10,rsi
    1ecc:	75 bf                	jne    1e8d <tts_heap_getsomeattrs+0x2dd>
    1ece:	66 90                	xchg   ax,ax
		off += cattr->attlen;
    1ed0:	0f bf 5c c2 22       	movsx  ebx,WORD PTR [rdx+rax*8+0x22]
		} while (++attnum < firstNonCacheOffsetAttr);
    1ed5:	4c 89 d0             	mov    rax,r10
		off += cattr->attlen;
    1ed8:	44 01 eb             	add    ebx,r13d
    1edb:	e9 f7 fd ff ff       	jmp    1cd7 <tts_heap_getsomeattrs+0x127>
	return (Datum) X;
    1ee0:	49 8b 36             	mov    rsi,QWORD PTR [r14]
					return Int64GetDatum(*((const int64 *) offset_ptr));
    1ee3:	e9 48 fe ff ff       	jmp    1d30 <tts_heap_getsomeattrs+0x180>
    1ee8:	0f 1f 84 00 00 00 00 	nop    DWORD PTR [rax+rax*1+0x0]
    1eef:	00 
	return (Datum) X;
    1ef0:	49 63 36             	movsxd rsi,DWORD PTR [r14]
					return Int32GetDatum(*((const int32 *) offset_ptr));
    1ef3:	e9 38 fe ff ff       	jmp    1d30 <tts_heap_getsomeattrs+0x180>
    1ef8:	0f 1f 84 00 00 00 00 	nop    DWORD PTR [rax+rax*1+0x0]
    1eff:	00 
	return (Datum) X;
    1f00:	49 0f bf 36          	movsx  rsi,WORD PTR [r14]
					return Int16GetDatum(*((const int16 *) offset_ptr));
    1f04:	e9 27 fe ff ff       	jmp    1d30 <tts_heap_getsomeattrs+0x180>
    1f09:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
		if (!VARATT_IS_SHORT(tupptr + *off))
    1f10:	89 de                	mov    esi,ebx
    1f12:	4c 01 c6             	add    rsi,r8
    1f15:	f6 06 01             	test   BYTE PTR [rsi],0x1
    1f18:	0f 84 02 01 00 00    	je     2020 <tts_heap_getsomeattrs+0x470>
	if (VARATT_IS_1B_E(PTR))
    1f1e:	44 0f b6 2e          	movzx  r13d,BYTE PTR [rsi]
	return (Datum) (uintptr_t) X;
    1f22:	48 89 f0             	mov    rax,rsi
    1f25:	41 80 fd 01          	cmp    r13b,0x1
    1f29:	0f 84 0f 01 00 00    	je     203e <tts_heap_getsomeattrs+0x48e>
	else if (VARATT_IS_1B(PTR))
    1f2f:	41 f6 c5 01          	test   r13b,0x1
    1f33:	0f 84 3f 01 00 00    	je     2078 <tts_heap_getsomeattrs+0x4c8>
		return VARSIZE_1B(PTR);
    1f39:	41 d0 ed             	shr    r13b,1
		*off += VARSIZE_ANY(DatumGetPointer(res));
    1f3c:	45 0f b6 ed          	movzx  r13d,r13b
			values[attnum] = (Datum) 0;
    1f40:	4a 89 04 d7          	mov    QWORD PTR [rdi+r10*8],rax
	for (; attnum < natts; attnum++)
    1f44:	49 83 c2 01          	add    r10,0x1
    1f48:	44 01 eb             	add    ebx,r13d
    1f4b:	4d 39 ca             	cmp    r10,r9
    1f4e:	0f 85 6c fe ff ff    	jne    1dc0 <tts_heap_getsomeattrs+0x210>
    1f54:	e9 d4 fe ff ff       	jmp    1e2d <tts_heap_getsomeattrs+0x27d>
    1f59:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
	return (Datum) X;
    1f60:	48 0f be 1e          	movsx  rbx,BYTE PTR [rsi]
				return CharGetDatum(*((const char *) T));
    1f64:	e9 17 ff ff ff       	jmp    1e80 <tts_heap_getsomeattrs+0x2d0>
    1f69:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
	return (Datum) X;
    1f70:	48 0f bf 1e          	movsx  rbx,WORD PTR [rsi]
				return Int16GetDatum(*((const int16 *) T));
    1f74:	e9 07 ff ff ff       	jmp    1e80 <tts_heap_getsomeattrs+0x2d0>
    1f79:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
		off += cattr->attlen;
    1f80:	41 0f bf 75 02       	movsx  esi,WORD PTR [r13+0x2]
		if (attnum == reqnatts)
    1f85:	49 63 ec             	movsxd rbp,r12d
		off += cattr->attlen;
    1f88:	48 8b 54 24 f0       	mov    rdx,QWORD PTR [rsp-0x10]
    1f8d:	01 f3                	add    ebx,esi
		if (attnum == reqnatts)
    1f8f:	48 39 e8             	cmp    rax,rbp
    1f92:	0f 85 2f fd ff ff    	jne    1cc7 <tts_heap_getsomeattrs+0x117>
	*offp = off;
    1f98:	41 89 5b 48          	mov    DWORD PTR [r11+0x48],ebx
}
    1f9c:	5b                   	pop    rbx
    1f9d:	5d                   	pop    rbp
    1f9e:	41 5c                	pop    r12
    1fa0:	41 5d                	pop    r13
    1fa2:	41 5e                	pop    r14
    1fa4:	41 5f                	pop    r15
    1fa6:	c3                   	ret
    1fa7:	66 0f 1f 84 00 00 00 	nop    WORD PTR [rax+rax*1+0x0]
    1fae:	00 00 
		tp = (char *) tup + MAXALIGN(offsetof(HeapTupleHeaderData, t_bits));
    1fb0:	4c 8d 43 18          	lea    r8,[rbx+0x18]
		if (reqnatts > firstNonGuaranteedAttr)
    1fb4:	41 39 c4             	cmp    r12d,eax
    1fb7:	0f 8e cb 00 00 00    	jle    2088 <tts_heap_getsomeattrs+0x4d8>
			natts = Min(HeapTupleHeaderGetNatts(tup), reqnatts);
    1fbd:	0f b7 43 12          	movzx  eax,WORD PTR [rbx+0x12]
    1fc1:	25 ff 07 00 00       	and    eax,0x7ff
    1fc6:	44 39 e0             	cmp    eax,r12d
    1fc9:	41 0f 4f c4          	cmovg  eax,r12d
    1fcd:	41 89 c6             	mov    r14d,eax
    1fd0:	4c 63 c8             	movsxd r9,eax
    1fd3:	e9 56 fc ff ff       	jmp    1c2e <tts_heap_getsomeattrs+0x7e>
    1fd8:	0f 1f 84 00 00 00 00 	nop    DWORD PTR [rax+rax*1+0x0]
    1fdf:	00 
    1fe0:	31 ed                	xor    ebp,ebp
		firstNonGuaranteedAttr = 0;
    1fe2:	31 c0                	xor    eax,eax
    1fe4:	e9 02 fc ff ff       	jmp    1beb <tts_heap_getsomeattrs+0x3b>
    1fe9:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
	return (Datum) X;
    1ff0:	49 0f be 45 00       	movsx  rax,BYTE PTR [r13+0x0]
			values[attnum] = (Datum) 0;
    1ff5:	4a 89 04 d7          	mov    QWORD PTR [rdi+r10*8],rax
	for (; attnum < natts; attnum++)
    1ff9:	49 83 c2 01          	add    r10,0x1
    1ffd:	4d 39 ca             	cmp    r10,r9
    2000:	0f 85 ba fd ff ff    	jne    1dc0 <tts_heap_getsomeattrs+0x210>
    2006:	e9 22 fe ff ff       	jmp    1e2d <tts_heap_getsomeattrs+0x27d>
    200b:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
    2010:	41 d0 ee             	shr    r14b,1
		*off += VARSIZE_ANY(DatumGetPointer(res));
    2013:	45 0f b6 f6          	movzx  r14d,r14b
    2017:	e9 75 fd ff ff       	jmp    1d91 <tts_heap_getsomeattrs+0x1e1>
    201c:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
			*off = TYPEALIGN(attalignby, *off);
    2020:	8d 5c 18 ff          	lea    ebx,[rax+rbx*1-0x1]
    2024:	f7 d8                	neg    eax
    2026:	21 c3                	and    ebx,eax
		res = PointerGetDatum(tupptr + *off);
    2028:	89 de                	mov    esi,ebx
    202a:	4c 01 c6             	add    rsi,r8
	if (VARATT_IS_1B_E(PTR))
    202d:	44 0f b6 2e          	movzx  r13d,BYTE PTR [rsi]
	return (Datum) (uintptr_t) X;
    2031:	48 89 f0             	mov    rax,rsi
    2034:	41 80 fd 01          	cmp    r13b,0x1
    2038:	0f 85 f1 fe ff ff    	jne    1f2f <tts_heap_getsomeattrs+0x37f>
	return VARTAG_1B_E(PTR);
    203e:	0f b6 76 01          	movzx  esi,BYTE PTR [rsi+0x1]
	if (tag == VARTAG_INDIRECT)
    2042:	83 fe 01             	cmp    esi,0x1
    2045:	0f 84 12 02 00 00    	je     225d <tts_heap_getsomeattrs+0x6ad>
	return ((tag & ~1) == VARTAG_EXPANDED_RO);
    204b:	41 89 f5             	mov    r13d,esi
    204e:	41 83 e5 fe          	and    r13d,0xfffffffe
	else if (VARTAG_IS_EXPANDED(tag))
    2052:	41 83 fd 02          	cmp    r13d,0x2
    2056:	0f 84 01 02 00 00    	je     225d <tts_heap_getsomeattrs+0x6ad>
	else if (tag == VARTAG_ONDISK)
    205c:	83 fe 12             	cmp    esi,0x12
    205f:	40 0f 94 c6          	sete   sil
    2063:	40 0f b6 f6          	movzx  esi,sil
    2067:	48 c1 e6 04          	shl    rsi,0x4
		*off += VARSIZE_ANY(DatumGetPointer(res));
    206b:	44 8d 6e 02          	lea    r13d,[rsi+0x2]
    206f:	e9 cc fe ff ff       	jmp    1f40 <tts_heap_getsomeattrs+0x390>
    2074:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
		return VARSIZE_4B(PTR);
    2078:	44 8b 2e             	mov    r13d,DWORD PTR [rsi]
    207b:	41 c1 ed 02          	shr    r13d,0x2
    207f:	e9 bc fe ff ff       	jmp    1f40 <tts_heap_getsomeattrs+0x390>
    2084:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
			natts = reqnatts;
    2088:	4d 63 cc             	movsxd r9,r12d
    208b:	45 89 e6             	mov    r14d,r12d
    208e:	e9 9b fb ff ff       	jmp    1c2e <tts_heap_getsomeattrs+0x7e>
    2093:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
	return (Datum) X;
    2098:	49 63 45 00          	movsxd rax,DWORD PTR [r13+0x0]
			values[attnum] = (Datum) 0;
    209c:	4a 89 04 d7          	mov    QWORD PTR [rdi+r10*8],rax
	for (; attnum < natts; attnum++)
    20a0:	49 83 c2 01          	add    r10,0x1
    20a4:	4d 39 ca             	cmp    r10,r9
    20a7:	0f 85 13 fd ff ff    	jne    1dc0 <tts_heap_getsomeattrs+0x210>
    20ad:	e9 7b fd ff ff       	jmp    1e2d <tts_heap_getsomeattrs+0x27d>
    20b2:	66 0f 1f 44 00 00    	nop    WORD PTR [rax+rax*1+0x0]
	return (Datum) X;
    20b8:	49 0f bf 45 00       	movsx  rax,WORD PTR [r13+0x0]
			values[attnum] = (Datum) 0;
    20bd:	4a 89 04 d7          	mov    QWORD PTR [rdi+r10*8],rax
	for (; attnum < natts; attnum++)
    20c1:	49 83 c2 01          	add    r10,0x1
    20c5:	4d 39 ca             	cmp    r10,r9
    20c8:	0f 85 f2 fc ff ff    	jne    1dc0 <tts_heap_getsomeattrs+0x210>
    20ce:	e9 5a fd ff ff       	jmp    1e2d <tts_heap_getsomeattrs+0x27d>
    20d3:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
	return VARTAG_1B_E(PTR);
    20d8:	45 0f b6 6d 01       	movzx  r13d,BYTE PTR [r13+0x1]
	if (tag == VARTAG_INDIRECT)
    20dd:	41 83 fd 01          	cmp    r13d,0x1
    20e1:	0f 84 6b 01 00 00    	je     2252 <tts_heap_getsomeattrs+0x6a2>
	return ((tag & ~1) == VARTAG_EXPANDED_RO);
    20e7:	45 89 ee             	mov    r14d,r13d
    20ea:	41 83 e6 fe          	and    r14d,0xfffffffe
	else if (VARTAG_IS_EXPANDED(tag))
    20ee:	41 83 fe 02          	cmp    r14d,0x2
    20f2:	0f 84 5a 01 00 00    	je     2252 <tts_heap_getsomeattrs+0x6a2>
	else if (tag == VARTAG_ONDISK)
    20f8:	41 83 fd 12          	cmp    r13d,0x12
    20fc:	41 0f 94 c5          	sete   r13b
    2100:	45 0f b6 ed          	movzx  r13d,r13b
    2104:	49 c1 e5 04          	shl    r13,0x4
    2108:	45 8d 75 02          	lea    r14d,[r13+0x2]
    210c:	e9 80 fc ff ff       	jmp    1d91 <tts_heap_getsomeattrs+0x1e1>
    2111:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
 * case.
 */
static inline int
first_null_attr(const bits8 *bits, int natts)
{
	int			nattByte = natts >> 3;
    2118:	45 89 cd             	mov    r13d,r9d
    211b:	41 c1 fd 03          	sar    r13d,0x3
		}
	}
#endif

	/* Process all bytes up to just before the byte for the natts attribute */
	for (bytenum = 0; bytenum < nattByte; bytenum++)
    211f:	45 85 ed             	test   r13d,r13d
    2122:	0f 8e 40 01 00 00    	jle    2268 <tts_heap_getsomeattrs+0x6b8>
    2128:	48 8d 73 17          	lea    rsi,[rbx+0x17]
    212c:	31 ff                	xor    edi,edi
    212e:	eb 20                	jmp    2150 <tts_heap_getsomeattrs+0x5a0>
    2130:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
    2135:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    213c:	00 00 00 00 
    2140:	83 c7 01             	add    edi,0x1
    2143:	48 83 c6 01          	add    rsi,0x1
    2147:	41 39 fd             	cmp    r13d,edi
    214a:	0f 84 ec 00 00 00    	je     223c <tts_heap_getsomeattrs+0x68c>
	{
		/* break if there's any NULL attrs (a 0 bit) */
		if (bits[bytenum] != 0xFF)
    2150:	0f b6 06             	movzx  eax,BYTE PTR [rsi]
    2153:	3c ff                	cmp    al,0xff
    2155:	74 e9                	je     2140 <tts_heap_getsomeattrs+0x590>
	 * looking for the first 1-bit.  This works even when the byte is 0xFF, as
	 * the bitwise NOT of 0xFF in 32 bits is 0xFFFFFF00, in which case
	 * pg_rightmost_one_pos32() will return 8.  We may end up with a value
	 * higher than natts here, but we'll fix that with the Min() below.
	 */
	res = bytenum << 3;
    2157:	c1 e7 03             	shl    edi,0x3
	res += pg_rightmost_one_pos32(~((uint32) bits[bytenum]));
    215a:	f7 d0                	not    eax
	int			nbytes = (natts + 7) >> 3;
    215c:	45 8d 69 07          	lea    r13d,[r9+0x7]
pg_rightmost_one_pos32(uint32 word)
{
#ifdef HAVE__BUILTIN_CTZ
	Assert(word != 0);

	return __builtin_ctz(word);
    2160:	f3 0f bc c0          	tzcnt  eax,eax
	res += pg_rightmost_one_pos32(~((uint32) bits[bytenum]));
    2164:	01 f8                	add    eax,edi

	/*
	 * Since we did no masking to mask out bits beyond the natt'th bit, we may
	 * have found a bit higher than natts, so we must cap res to natts
	 */
	res = Min(res, natts);
    2166:	41 39 c1             	cmp    r9d,eax
    2169:	41 0f 4e c1          	cmovle eax,r9d
	int			nbytes = (natts + 7) >> 3;
    216d:	41 c1 fd 03          	sar    r13d,0x3
	res = Min(res, natts);
    2171:	4c 63 f0             	movsxd r14,eax
		isnull_8 &= UINT64CONST(0x0101010101010101);
    2174:	49 bf 01 01 01 01 01 	movabs r15,0x101010101010101
    217b:	01 01 01 
    217e:	4d 63 ed             	movsxd r13,r13d
	for (bytenum = 0; bytenum < nattByte; bytenum++)
    2181:	31 ff                	xor    edi,edi
    2183:	66 0f 1f 44 00 00    	nop    WORD PTR [rax+rax*1+0x0]
    2189:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    2190:	00 00 00 00 
    2194:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    219b:	00 00 00 00 
    219f:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    21a6:	00 00 00 00 
    21aa:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    21b1:	00 00 00 00 
    21b5:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    21bc:	00 00 00 00 
		bits8		nullbyte = ~bits[i];
    21c0:	0f b6 74 3b 17       	movzx  esi,BYTE PTR [rbx+rdi*1+0x17]
    21c5:	f7 d6                	not    esi
		isnull_8 |= ((uint64) ((nullbyte >> 4) * SPREAD_BITS_MULTIPLIER_32)) << 32;
    21c7:	89 f0                	mov    eax,esi
		isnull_8 = (nullbyte & 0xf) * SPREAD_BITS_MULTIPLIER_32;
    21c9:	83 e6 0f             	and    esi,0xf
		isnull_8 |= ((uint64) ((nullbyte >> 4) * SPREAD_BITS_MULTIPLIER_32)) << 32;
    21cc:	c0 e8 04             	shr    al,0x4
		isnull_8 = (nullbyte & 0xf) * SPREAD_BITS_MULTIPLIER_32;
    21cf:	48 69 f6 81 40 20 00 	imul   rsi,rsi,0x204081
		isnull_8 |= ((uint64) ((nullbyte >> 4) * SPREAD_BITS_MULTIPLIER_32)) << 32;
    21d6:	83 e0 0f             	and    eax,0xf
    21d9:	48 69 c0 81 40 20 00 	imul   rax,rax,0x204081
    21e0:	48 c1 e0 20          	shl    rax,0x20
    21e4:	48 09 f0             	or     rax,rsi
		isnull_8 &= UINT64CONST(0x0101010101010101);
    21e7:	4c 21 f8             	and    rax,r15
    21ea:	48 89 04 f9          	mov    QWORD PTR [rcx+rdi*8],rax
	for (int i = 0; i < nbytes; i++, isnull += 8)
    21ee:	48 83 c7 01          	add    rdi,0x1
    21f2:	4c 39 ef             	cmp    rdi,r13
    21f5:	75 c9                	jne    21c0 <tts_heap_getsomeattrs+0x610>
			firstNonCacheOffsetAttr = Min(firstNonCacheOffsetAttr, firstNullAttr);
    21f7:	45 39 f2             	cmp    r10d,r14d
    21fa:	4d 0f 4f d6          	cmovg  r10,r14
    21fe:	e9 2b fa ff ff       	jmp    1c2e <tts_heap_getsomeattrs+0x7e>
    2203:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
	for (; attnum < firstNullAttr; attnum++)
    2208:	49 89 c2             	mov    r10,rax
    220b:	e9 a0 fb ff ff       	jmp    1db0 <tts_heap_getsomeattrs+0x200>
	for (; attnum < natts; attnum++)
    2210:	4d 89 d1             	mov    r9,r10
    2213:	e9 15 fc ff ff       	jmp    1e2d <tts_heap_getsomeattrs+0x27d>
    2218:	0f 1f 84 00 00 00 00 	nop    DWORD PTR [rax+rax*1+0x0]
    221f:	00 
		*offp = off;
    2220:	41 89 5b 48          	mov    DWORD PTR [r11+0x48],ebx
		slot_getmissingattrs(slot, attnum, reqnatts);
    2224:	44 89 e2             	mov    edx,r12d
}
    2227:	5b                   	pop    rbx
		slot_getmissingattrs(slot, attnum, reqnatts);
    2228:	44 89 ce             	mov    esi,r9d
}
    222b:	5d                   	pop    rbp
		slot_getmissingattrs(slot, attnum, reqnatts);
    222c:	4c 89 df             	mov    rdi,r11
}
    222f:	41 5c                	pop    r12
    2231:	41 5d                	pop    r13
    2233:	41 5e                	pop    r14
    2235:	41 5f                	pop    r15
		slot_getmissingattrs(slot, attnum, reqnatts);
    2237:	e9 b4 f8 ff ff       	jmp    1af0 <slot_getmissingattrs>
	res = bytenum << 3;
    223c:	42 8d 3c ed 00 00 00 	lea    edi,[r13*8+0x0]
    2243:	00 
	res += pg_rightmost_one_pos32(~((uint32) bits[bytenum]));
    2244:	4d 63 ed             	movsxd r13,r13d
    2247:	42 0f b6 44 2b 17    	movzx  eax,BYTE PTR [rbx+r13*1+0x17]
    224d:	e9 08 ff ff ff       	jmp    215a <tts_heap_getsomeattrs+0x5aa>
    2252:	41 be 0a 00 00 00    	mov    r14d,0xa
    2258:	e9 34 fb ff ff       	jmp    1d91 <tts_heap_getsomeattrs+0x1e1>
    225d:	41 bd 0a 00 00 00    	mov    r13d,0xa
    2263:	e9 d8 fc ff ff       	jmp    1f40 <tts_heap_getsomeattrs+0x390>
    2268:	0f b6 43 17          	movzx  eax,BYTE PTR [rbx+0x17]
	int			nbytes = (natts + 7) >> 3;
    226c:	45 8d 69 07          	lea    r13d,[r9+0x7]
	res += pg_rightmost_one_pos32(~((uint32) bits[bytenum]));
    2270:	f7 d0                	not    eax
    2272:	f3 0f bc c0          	tzcnt  eax,eax
	res = Min(res, natts);
    2276:	41 39 c1             	cmp    r9d,eax
    2279:	41 0f 4e c1          	cmovle eax,r9d
	int			nbytes = (natts + 7) >> 3;
    227d:	41 c1 fd 03          	sar    r13d,0x3
	res = Min(res, natts);
    2281:	4c 63 f0             	movsxd r14,eax
	for (int i = 0; i < nbytes; i++, isnull += 8)
    2284:	41 83 fd 01          	cmp    r13d,0x1
    2288:	0f 85 69 ff ff ff    	jne    21f7 <tts_heap_getsomeattrs+0x647>
    228e:	e9 e1 fe ff ff       	jmp    2174 <tts_heap_getsomeattrs+0x5c4>
    2293:	66 90                	xchg   ax,ax
    2295:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    229c:	00 00 00 00 

00000000000022a0 <tts_minimal_getsomeattrs>:
{
    22a0:	f3 0f 1e fa          	endbr64
    22a4:	41 57                	push   r15
    22a6:	49 89 fb             	mov    r11,rdi
    22a9:	41 56                	push   r14
    22ab:	41 55                	push   r13
    22ad:	41 54                	push   r12
    22af:	4c 63 e6             	movsxd r12,esi
    22b2:	55                   	push   rbp
    22b3:	53                   	push   rbx
	HeapTupleHeader tup = tuple->t_data;
    22b4:	48 8b 47 40          	mov    rax,QWORD PTR [rdi+0x40]
	TupleDesc	tupleDesc = slot->tts_tupleDescriptor;
    22b8:	48 8b 57 10          	mov    rdx,QWORD PTR [rdi+0x10]
	isnull = slot->tts_isnull;
    22bc:	48 8b 4f 20          	mov    rcx,QWORD PTR [rdi+0x20]
	HeapTupleHeader tup = tuple->t_data;
    22c0:	48 8b 58 10          	mov    rbx,QWORD PTR [rax+0x10]
	if (TTS_OBEYS_NOT_NULL_CONSTRAINTS(slot))
    22c4:	f6 47 04 08          	test   BYTE PTR [rdi+0x4],0x8
    22c8:	0f 84 22 04 00 00    	je     26f0 <tts_minimal_getsomeattrs+0x450>
		firstNonGuaranteedAttr = Min(reqnatts, tupleDesc->firstNonGuaranteedAttr);
    22ce:	8b 42 14             	mov    eax,DWORD PTR [rdx+0x14]
    22d1:	41 39 c4             	cmp    r12d,eax
    22d4:	41 0f 4e c4          	cmovle eax,r12d
	if (attnum < firstNonGuaranteedAttr)
    22d8:	48 63 e8             	movsxd rbp,eax
	firstNonCacheOffsetAttr = tupleDesc->firstNonCachedOffsetAttr;
    22db:	4c 63 52 10          	movsxd r10,DWORD PTR [rdx+0x10]
	if (HeapTupleHasNulls(tuple))
    22df:	f6 43 14 01          	test   BYTE PTR [rbx+0x14],0x1
    22e3:	0f 84 d7 03 00 00    	je     26c0 <tts_minimal_getsomeattrs+0x420>
		natts = HeapTupleHeaderGetNatts(tup);
    22e9:	44 0f b7 4b 12       	movzx  r9d,WORD PTR [rbx+0x12]
    22ee:	41 81 e1 ff 07 00 00 	and    r9d,0x7ff
    22f5:	45 8d 41 07          	lea    r8d,[r9+0x7]
    22f9:	41 c1 f8 03          	sar    r8d,0x3
		tp = (char *) tup + MAXALIGN(offsetof(HeapTupleHeaderData, t_bits) +
    22fd:	41 83 c0 1e          	add    r8d,0x1e
    2301:	41 81 e0 f8 03 00 00 	and    r8d,0x3f8
    2308:	49 01 d8             	add    r8,rbx
		natts = Min(natts, reqnatts);
    230b:	45 39 cc             	cmp    r12d,r9d
    230e:	4d 0f 4e cc          	cmovle r9,r12
			firstNullAttr = natts;
    2312:	45 89 ce             	mov    r14d,r9d
		if (natts > firstNonGuaranteedAttr)
    2315:	41 39 c1             	cmp    r9d,eax
    2318:	0f 8f 0a 05 00 00    	jg     2828 <tts_minimal_getsomeattrs+0x588>
	attnum = slot->tts_nvalid;
    231e:	49 0f bf 43 06       	movsx  rax,WORD PTR [r11+0x6]
	values = slot->tts_values;
    2323:	49 8b 7b 18          	mov    rdi,QWORD PTR [r11+0x18]
	slot->tts_nvalid = reqnatts;
    2327:	66 45 89 63 06       	mov    WORD PTR [r11+0x6],r12w
	if (attnum < firstNonGuaranteedAttr)
    232c:	48 39 e8             	cmp    rax,rbp
    232f:	73 7f                	jae    23b0 <tts_minimal_getsomeattrs+0x110>
    2331:	48 89 54 24 f0       	mov    QWORD PTR [rsp-0x10],rdx
    2336:	48 8d 74 c2 20       	lea    rsi,[rdx+rax*8+0x20]
    233b:	eb 22                	jmp    235f <tts_minimal_getsomeattrs+0xbf>
    233d:	0f 1f 00             	nop    DWORD PTR [rax]
		switch (attlen)
    2340:	66 41 83 ff 01       	cmp    r15w,0x1
    2345:	74 59                	je     23a0 <tts_minimal_getsomeattrs+0x100>
	return (Datum) X;
    2347:	48 8b 12             	mov    rdx,QWORD PTR [rdx]
			values[attnum] = fetch_att_noerr(tp + off, true, attlen);
    234a:	48 89 14 c7          	mov    QWORD PTR [rdi+rax*8],rdx
			attnum++;
    234e:	48 83 c0 01          	add    rax,0x1
		} while (attnum < firstNonGuaranteedAttr);
    2352:	48 83 c6 08          	add    rsi,0x8
    2356:	48 39 e8             	cmp    rax,rbp
    2359:	0f 83 31 03 00 00    	jae    2690 <tts_minimal_getsomeattrs+0x3f0>
			isnull[attnum] = false;
    235f:	c6 04 01 00          	mov    BYTE PTR [rcx+rax*1],0x0
			off = cattr->attcacheoff;
    2363:	0f bf 16             	movsx  edx,WORD PTR [rsi]
			cattr = &cattrs[attnum];
    2366:	49 89 f5             	mov    r13,rsi
			attlen = cattr->attlen;
    2369:	44 0f b7 7e 02       	movzx  r15d,WORD PTR [rsi+0x2]
			off = cattr->attcacheoff;
    236e:	48 89 d3             	mov    rbx,rdx
			values[attnum] = fetch_att_noerr(tp + off, true, attlen);
    2371:	4c 01 c2             	add    rdx,r8
    2374:	66 41 83 ff 02       	cmp    r15w,0x2
    2379:	74 15                	je     2390 <tts_minimal_getsomeattrs+0xf0>
    237b:	66 41 83 ff 04       	cmp    r15w,0x4
    2380:	75 be                	jne    2340 <tts_minimal_getsomeattrs+0xa0>
	return (Datum) X;
    2382:	48 63 12             	movsxd rdx,DWORD PTR [rdx]
				return Int32GetDatum(*((const int32 *) T));
    2385:	eb c3                	jmp    234a <tts_minimal_getsomeattrs+0xaa>
    2387:	66 0f 1f 84 00 00 00 	nop    WORD PTR [rax+rax*1+0x0]
    238e:	00 00 
	return (Datum) X;
    2390:	48 0f bf 12          	movsx  rdx,WORD PTR [rdx]
				return Int16GetDatum(*((const int16 *) T));
    2394:	eb b4                	jmp    234a <tts_minimal_getsomeattrs+0xaa>
    2396:	66 2e 0f 1f 84 00 00 	cs nop WORD PTR [rax+rax*1+0x0]
    239d:	00 00 00 
	return (Datum) X;
    23a0:	48 0f be 12          	movsx  rdx,BYTE PTR [rdx]
				return CharGetDatum(*((const char *) T));
    23a4:	eb a4                	jmp    234a <tts_minimal_getsomeattrs+0xaa>
    23a6:	66 2e 0f 1f 84 00 00 	cs nop WORD PTR [rax+rax*1+0x0]
    23ad:	00 00 00 
		off = *offp;
    23b0:	41 8b 5b 68          	mov    ebx,DWORD PTR [r11+0x68]
	if (unlikely(attnum < reqnatts))
    23b4:	49 63 ec             	movsxd rbp,r12d
	firstNonCacheOffsetAttr = Min(firstNonCacheOffsetAttr, natts);
    23b7:	45 39 ca             	cmp    r10d,r9d
    23ba:	4d 0f 4f d1          	cmovg  r10,r9
	if (attnum < firstNonCacheOffsetAttr)
    23be:	4c 39 d0             	cmp    rax,r10
    23c1:	0f 82 c9 01 00 00    	jb     2590 <tts_minimal_getsomeattrs+0x2f0>
	for (; attnum < firstNullAttr; attnum++)
    23c7:	4d 63 d6             	movsxd r10,r14d
    23ca:	4c 39 d0             	cmp    rax,r10
    23cd:	72 5e                	jb     242d <tts_minimal_getsomeattrs+0x18d>
    23cf:	e9 34 05 00 00       	jmp    2908 <tts_minimal_getsomeattrs+0x668>
    23d4:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
		*off = TYPEALIGN(attalignby, *off);
    23d8:	8d 5c 1e ff          	lea    ebx,[rsi+rbx*1-0x1]
    23dc:	f7 de                	neg    esi
    23de:	21 de                	and    esi,ebx
		*off += attlen;
    23e0:	41 0f bf dd          	movsx  ebx,r13w
		offset_ptr = tupptr + *off;
    23e4:	41 89 f6             	mov    r14d,esi
		*off += attlen;
    23e7:	01 f3                	add    ebx,esi
		offset_ptr = tupptr + *off;
    23e9:	4d 01 c6             	add    r14,r8
	return (Datum) (uintptr_t) X;
    23ec:	4c 89 f6             	mov    rsi,r14
		if (attbyval)
    23ef:	80 7c c2 24 00       	cmp    BYTE PTR [rdx+rax*8+0x24],0x0
    23f4:	74 2a                	je     2420 <tts_minimal_getsomeattrs+0x180>
			switch (attlen)
    23f6:	66 41 83 fd 02       	cmp    r13w,0x2
    23fb:	0f 84 0f 02 00 00    	je     2610 <tts_minimal_getsomeattrs+0x370>
    2401:	66 41 83 fd 04       	cmp    r13w,0x4
    2406:	0f 84 f4 01 00 00    	je     2600 <tts_minimal_getsomeattrs+0x360>
    240c:	66 41 83 fd 01       	cmp    r13w,0x1
    2411:	0f 85 d9 01 00 00    	jne    25f0 <tts_minimal_getsomeattrs+0x350>
	return (Datum) X;
    2417:	49 0f be 36          	movsx  rsi,BYTE PTR [r14]
    241b:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
		values[attnum] = align_fetch_then_add(tp,
    2420:	48 89 34 c7          	mov    QWORD PTR [rdi+rax*8],rsi
	for (; attnum < firstNullAttr; attnum++)
    2424:	48 83 c0 01          	add    rax,0x1
    2428:	4c 39 d0             	cmp    rax,r10
    242b:	74 73                	je     24a0 <tts_minimal_getsomeattrs+0x200>
		isnull[attnum] = false;
    242d:	c6 04 01 00          	mov    BYTE PTR [rcx+rax*1],0x0
		attlen = cattr->attlen;
    2431:	44 0f b7 6c c2 22    	movzx  r13d,WORD PTR [rdx+rax*8+0x22]
											  cattr->attalignby);
    2437:	0f b6 74 c2 25       	movzx  esi,BYTE PTR [rdx+rax*8+0x25]
	if (attlen > 0)
    243c:	66 45 85 ed          	test   r13w,r13w
    2440:	7f 96                	jg     23d8 <tts_minimal_getsomeattrs+0x138>
		if (!VARATT_IS_SHORT(tupptr + *off))
    2442:	41 89 dd             	mov    r13d,ebx
    2445:	4d 01 c5             	add    r13,r8
    2448:	41 f6 45 00 01       	test   BYTE PTR [r13+0x0],0x1
    244d:	75 0e                	jne    245d <tts_minimal_getsomeattrs+0x1bd>
			*off = TYPEALIGN(attalignby, *off);
    244f:	8d 5c 1e ff          	lea    ebx,[rsi+rbx*1-0x1]
    2453:	f7 de                	neg    esi
    2455:	21 f3                	and    ebx,esi
		res = PointerGetDatum(tupptr + *off);
    2457:	41 89 dd             	mov    r13d,ebx
    245a:	4d 01 c5             	add    r13,r8
	if (VARATT_IS_1B_E(PTR))
    245d:	45 0f b6 75 00       	movzx  r14d,BYTE PTR [r13+0x0]
	return (Datum) (uintptr_t) X;
    2462:	4c 89 ee             	mov    rsi,r13
    2465:	41 80 fe 01          	cmp    r14b,0x1
    2469:	0f 84 79 03 00 00    	je     27e8 <tts_minimal_getsomeattrs+0x548>
	else if (VARATT_IS_1B(PTR))
    246f:	41 f6 c6 01          	test   r14b,0x1
    2473:	0f 85 a7 02 00 00    	jne    2720 <tts_minimal_getsomeattrs+0x480>
		return VARSIZE_4B(PTR);
    2479:	45 8b 75 00          	mov    r14d,DWORD PTR [r13+0x0]
    247d:	41 c1 ee 02          	shr    r14d,0x2
		values[attnum] = align_fetch_then_add(tp,
    2481:	48 89 34 c7          	mov    QWORD PTR [rdi+rax*8],rsi
	for (; attnum < firstNullAttr; attnum++)
    2485:	48 83 c0 01          	add    rax,0x1
		*off += VARSIZE_ANY(DatumGetPointer(res));
    2489:	44 01 f3             	add    ebx,r14d
    248c:	4c 39 d0             	cmp    rax,r10
    248f:	75 9c                	jne    242d <tts_minimal_getsomeattrs+0x18d>
    2491:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
    2495:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    249c:	00 00 00 00 
	for (; attnum < natts; attnum++)
    24a0:	4d 39 ca             	cmp    r10,r9
    24a3:	0f 83 67 04 00 00    	jae    2910 <tts_minimal_getsomeattrs+0x670>
    24a9:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
		if (isnull[attnum])
    24b0:	31 c0                	xor    eax,eax
    24b2:	42 80 3c 11 00       	cmp    BYTE PTR [rcx+r10*1],0x0
    24b7:	75 57                	jne    2510 <tts_minimal_getsomeattrs+0x270>
		attlen = cattr->attlen;
    24b9:	42 0f b7 74 d2 22    	movzx  esi,WORD PTR [rdx+r10*8+0x22]
											  cattr->attalignby);
    24bf:	42 0f b6 44 d2 25    	movzx  eax,BYTE PTR [rdx+r10*8+0x25]
	if (attlen > 0)
    24c5:	66 85 f6             	test   si,si
    24c8:	0f 8e 52 01 00 00    	jle    2620 <tts_minimal_getsomeattrs+0x380>
		*off = TYPEALIGN(attalignby, *off);
    24ce:	8d 5c 18 ff          	lea    ebx,[rax+rbx*1-0x1]
    24d2:	f7 d8                	neg    eax
    24d4:	21 d8                	and    eax,ebx
		*off += attlen;
    24d6:	0f bf de             	movsx  ebx,si
		offset_ptr = tupptr + *off;
    24d9:	41 89 c5             	mov    r13d,eax
		*off += attlen;
    24dc:	01 c3                	add    ebx,eax
		offset_ptr = tupptr + *off;
    24de:	4d 01 c5             	add    r13,r8
    24e1:	4c 89 e8             	mov    rax,r13
		if (attbyval)
    24e4:	42 80 7c d2 24 00    	cmp    BYTE PTR [rdx+r10*8+0x24],0x0
    24ea:	74 24                	je     2510 <tts_minimal_getsomeattrs+0x270>
			switch (attlen)
    24ec:	66 83 fe 02          	cmp    si,0x2
    24f0:	0f 84 d2 02 00 00    	je     27c8 <tts_minimal_getsomeattrs+0x528>
    24f6:	66 83 fe 04          	cmp    si,0x4
    24fa:	0f 84 a8 02 00 00    	je     27a8 <tts_minimal_getsomeattrs+0x508>
    2500:	66 83 fe 01          	cmp    si,0x1
    2504:	0f 84 f6 01 00 00    	je     2700 <tts_minimal_getsomeattrs+0x460>
	return (Datum) X;
    250a:	49 8b 45 00          	mov    rax,QWORD PTR [r13+0x0]
    250e:	66 90                	xchg   ax,ax
			values[attnum] = (Datum) 0;
    2510:	4a 89 04 d7          	mov    QWORD PTR [rdi+r10*8],rax
	for (; attnum < natts; attnum++)
    2514:	49 83 c2 01          	add    r10,0x1
    2518:	4d 39 ca             	cmp    r10,r9
    251b:	75 93                	jne    24b0 <tts_minimal_getsomeattrs+0x210>
	if (unlikely(attnum < reqnatts))
    251d:	49 39 e9             	cmp    r9,rbp
    2520:	0f 82 fa 03 00 00    	jb     2920 <tts_minimal_getsomeattrs+0x680>
	*offp = off;
    2526:	41 89 5b 68          	mov    DWORD PTR [r11+0x68],ebx
}
    252a:	5b                   	pop    rbx
    252b:	5d                   	pop    rbp
    252c:	41 5c                	pop    r12
    252e:	41 5d                	pop    r13
    2530:	41 5e                	pop    r14
    2532:	41 5f                	pop    r15
    2534:	c3                   	ret
    2535:	0f 1f 00             	nop    DWORD PTR [rax]
		switch (attlen)
    2538:	66 83 fb 01          	cmp    bx,0x1
    253c:	0f 84 2e 01 00 00    	je     2670 <tts_minimal_getsomeattrs+0x3d0>
    2542:	48 8b 1e             	mov    rbx,QWORD PTR [rsi]
    2545:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
    2549:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    2550:	00 00 00 00 
    2554:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    255b:	00 00 00 00 
    255f:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    2566:	00 00 00 00 
    256a:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    2571:	00 00 00 00 
    2575:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    257c:	00 00 00 00 
		} while (++attnum < firstNonCacheOffsetAttr);
    2580:	48 8d 70 01          	lea    rsi,[rax+0x1]
			values[attnum] = fetch_att_noerr(tp + off,
    2584:	48 89 1c c7          	mov    QWORD PTR [rdi+rax*8],rbx
		} while (++attnum < firstNonCacheOffsetAttr);
    2588:	49 39 f2             	cmp    r10,rsi
    258b:	74 53                	je     25e0 <tts_minimal_getsomeattrs+0x340>
    258d:	48 89 f0             	mov    rax,rsi
			isnull[attnum] = false;
    2590:	c6 04 01 00          	mov    BYTE PTR [rcx+rax*1],0x0
			off = cattr->attcacheoff;
    2594:	0f bf 74 c2 20       	movsx  esi,WORD PTR [rdx+rax*8+0x20]
    2599:	49 89 f5             	mov    r13,rsi
			values[attnum] = fetch_att_noerr(tp + off,
    259c:	4c 01 c6             	add    rsi,r8
	return (Datum) (uintptr_t) X;
    259f:	48 89 f3             	mov    rbx,rsi
	if (attbyval)
    25a2:	80 7c c2 24 00       	cmp    BYTE PTR [rdx+rax*8+0x24],0x0
    25a7:	74 d7                	je     2580 <tts_minimal_getsomeattrs+0x2e0>
											 cattr->attlen);
    25a9:	0f b7 5c c2 22       	movzx  ebx,WORD PTR [rdx+rax*8+0x22]
		switch (attlen)
    25ae:	66 83 fb 02          	cmp    bx,0x2
    25b2:	0f 84 c8 00 00 00    	je     2680 <tts_minimal_getsomeattrs+0x3e0>
    25b8:	66 83 fb 04          	cmp    bx,0x4
    25bc:	0f 85 76 ff ff ff    	jne    2538 <tts_minimal_getsomeattrs+0x298>
	return (Datum) X;
    25c2:	48 63 1e             	movsxd rbx,DWORD PTR [rsi]
		} while (++attnum < firstNonCacheOffsetAttr);
    25c5:	48 8d 70 01          	lea    rsi,[rax+0x1]
			values[attnum] = fetch_att_noerr(tp + off,
    25c9:	48 89 1c c7          	mov    QWORD PTR [rdi+rax*8],rbx
		} while (++attnum < firstNonCacheOffsetAttr);
    25cd:	49 39 f2             	cmp    r10,rsi
    25d0:	75 bb                	jne    258d <tts_minimal_getsomeattrs+0x2ed>
    25d2:	0f 1f 00             	nop    DWORD PTR [rax]
    25d5:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    25dc:	00 00 00 00 
		off += cattr->attlen;
    25e0:	0f bf 5c c2 22       	movsx  ebx,WORD PTR [rdx+rax*8+0x22]
		} while (++attnum < firstNonCacheOffsetAttr);
    25e5:	4c 89 d0             	mov    rax,r10
		off += cattr->attlen;
    25e8:	44 01 eb             	add    ebx,r13d
    25eb:	e9 d7 fd ff ff       	jmp    23c7 <tts_minimal_getsomeattrs+0x127>
	return (Datum) X;
    25f0:	49 8b 36             	mov    rsi,QWORD PTR [r14]
					return Int64GetDatum(*((const int64 *) offset_ptr));
    25f3:	e9 28 fe ff ff       	jmp    2420 <tts_minimal_getsomeattrs+0x180>
    25f8:	0f 1f 84 00 00 00 00 	nop    DWORD PTR [rax+rax*1+0x0]
    25ff:	00 
	return (Datum) X;
    2600:	49 63 36             	movsxd rsi,DWORD PTR [r14]
					return Int32GetDatum(*((const int32 *) offset_ptr));
    2603:	e9 18 fe ff ff       	jmp    2420 <tts_minimal_getsomeattrs+0x180>
    2608:	0f 1f 84 00 00 00 00 	nop    DWORD PTR [rax+rax*1+0x0]
    260f:	00 
	return (Datum) X;
    2610:	49 0f bf 36          	movsx  rsi,WORD PTR [r14]
					return Int16GetDatum(*((const int16 *) offset_ptr));
    2614:	e9 07 fe ff ff       	jmp    2420 <tts_minimal_getsomeattrs+0x180>
    2619:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
		if (!VARATT_IS_SHORT(tupptr + *off))
    2620:	89 de                	mov    esi,ebx
    2622:	4c 01 c6             	add    rsi,r8
    2625:	f6 06 01             	test   BYTE PTR [rsi],0x1
    2628:	0f 84 02 01 00 00    	je     2730 <tts_minimal_getsomeattrs+0x490>
	if (VARATT_IS_1B_E(PTR))
    262e:	44 0f b6 2e          	movzx  r13d,BYTE PTR [rsi]
	return (Datum) (uintptr_t) X;
    2632:	48 89 f0             	mov    rax,rsi
    2635:	41 80 fd 01          	cmp    r13b,0x1
    2639:	0f 84 0f 01 00 00    	je     274e <tts_minimal_getsomeattrs+0x4ae>
	else if (VARATT_IS_1B(PTR))
    263f:	41 f6 c5 01          	test   r13b,0x1
    2643:	0f 84 3f 01 00 00    	je     2788 <tts_minimal_getsomeattrs+0x4e8>
		return VARSIZE_1B(PTR);
    2649:	41 d0 ed             	shr    r13b,1
		*off += VARSIZE_ANY(DatumGetPointer(res));
    264c:	45 0f b6 ed          	movzx  r13d,r13b
			values[attnum] = (Datum) 0;
    2650:	4a 89 04 d7          	mov    QWORD PTR [rdi+r10*8],rax
	for (; attnum < natts; attnum++)
    2654:	49 83 c2 01          	add    r10,0x1
    2658:	44 01 eb             	add    ebx,r13d
    265b:	4d 39 ca             	cmp    r10,r9
    265e:	0f 85 4c fe ff ff    	jne    24b0 <tts_minimal_getsomeattrs+0x210>
    2664:	e9 b4 fe ff ff       	jmp    251d <tts_minimal_getsomeattrs+0x27d>
    2669:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
	return (Datum) X;
    2670:	48 0f be 1e          	movsx  rbx,BYTE PTR [rsi]
				return CharGetDatum(*((const char *) T));
    2674:	e9 07 ff ff ff       	jmp    2580 <tts_minimal_getsomeattrs+0x2e0>
    2679:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
	return (Datum) X;
    2680:	48 0f bf 1e          	movsx  rbx,WORD PTR [rsi]
				return Int16GetDatum(*((const int16 *) T));
    2684:	e9 f7 fe ff ff       	jmp    2580 <tts_minimal_getsomeattrs+0x2e0>
    2689:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
		off += cattr->attlen;
    2690:	41 0f bf 75 02       	movsx  esi,WORD PTR [r13+0x2]
		if (attnum == reqnatts)
    2695:	49 63 ec             	movsxd rbp,r12d
		off += cattr->attlen;
    2698:	48 8b 54 24 f0       	mov    rdx,QWORD PTR [rsp-0x10]
    269d:	01 f3                	add    ebx,esi
		if (attnum == reqnatts)
    269f:	48 39 e8             	cmp    rax,rbp
    26a2:	0f 85 0f fd ff ff    	jne    23b7 <tts_minimal_getsomeattrs+0x117>
	*offp = off;
    26a8:	41 89 5b 68          	mov    DWORD PTR [r11+0x68],ebx
}
    26ac:	5b                   	pop    rbx
    26ad:	5d                   	pop    rbp
    26ae:	41 5c                	pop    r12
    26b0:	41 5d                	pop    r13
    26b2:	41 5e                	pop    r14
    26b4:	41 5f                	pop    r15
    26b6:	c3                   	ret
    26b7:	66 0f 1f 84 00 00 00 	nop    WORD PTR [rax+rax*1+0x0]
    26be:	00 00 
		tp = (char *) tup + MAXALIGN(offsetof(HeapTupleHeaderData, t_bits));
    26c0:	4c 8d 43 18          	lea    r8,[rbx+0x18]
		if (reqnatts > firstNonGuaranteedAttr)
    26c4:	41 39 c4             	cmp    r12d,eax
    26c7:	0f 8e cb 00 00 00    	jle    2798 <tts_minimal_getsomeattrs+0x4f8>
			natts = Min(HeapTupleHeaderGetNatts(tup), reqnatts);
    26cd:	0f b7 43 12          	movzx  eax,WORD PTR [rbx+0x12]
    26d1:	25 ff 07 00 00       	and    eax,0x7ff
    26d6:	44 39 e0             	cmp    eax,r12d
    26d9:	41 0f 4f c4          	cmovg  eax,r12d
    26dd:	41 89 c6             	mov    r14d,eax
    26e0:	4c 63 c8             	movsxd r9,eax
    26e3:	e9 36 fc ff ff       	jmp    231e <tts_minimal_getsomeattrs+0x7e>
    26e8:	0f 1f 84 00 00 00 00 	nop    DWORD PTR [rax+rax*1+0x0]
    26ef:	00 
    26f0:	31 ed                	xor    ebp,ebp
		firstNonGuaranteedAttr = 0;
    26f2:	31 c0                	xor    eax,eax
    26f4:	e9 e2 fb ff ff       	jmp    22db <tts_minimal_getsomeattrs+0x3b>
    26f9:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
	return (Datum) X;
    2700:	49 0f be 45 00       	movsx  rax,BYTE PTR [r13+0x0]
			values[attnum] = (Datum) 0;
    2705:	4a 89 04 d7          	mov    QWORD PTR [rdi+r10*8],rax
	for (; attnum < natts; attnum++)
    2709:	49 83 c2 01          	add    r10,0x1
    270d:	4d 39 ca             	cmp    r10,r9
    2710:	0f 85 9a fd ff ff    	jne    24b0 <tts_minimal_getsomeattrs+0x210>
    2716:	e9 02 fe ff ff       	jmp    251d <tts_minimal_getsomeattrs+0x27d>
    271b:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
    2720:	41 d0 ee             	shr    r14b,1
		*off += VARSIZE_ANY(DatumGetPointer(res));
    2723:	45 0f b6 f6          	movzx  r14d,r14b
    2727:	e9 55 fd ff ff       	jmp    2481 <tts_minimal_getsomeattrs+0x1e1>
    272c:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
			*off = TYPEALIGN(attalignby, *off);
    2730:	8d 5c 18 ff          	lea    ebx,[rax+rbx*1-0x1]
    2734:	f7 d8                	neg    eax
    2736:	21 c3                	and    ebx,eax
		res = PointerGetDatum(tupptr + *off);
    2738:	89 de                	mov    esi,ebx
    273a:	4c 01 c6             	add    rsi,r8
	if (VARATT_IS_1B_E(PTR))
    273d:	44 0f b6 2e          	movzx  r13d,BYTE PTR [rsi]
	return (Datum) (uintptr_t) X;
    2741:	48 89 f0             	mov    rax,rsi
    2744:	41 80 fd 01          	cmp    r13b,0x1
    2748:	0f 85 f1 fe ff ff    	jne    263f <tts_minimal_getsomeattrs+0x39f>
	return VARTAG_1B_E(PTR);
    274e:	0f b6 76 01          	movzx  esi,BYTE PTR [rsi+0x1]
	if (tag == VARTAG_INDIRECT)
    2752:	83 fe 01             	cmp    esi,0x1
    2755:	0f 84 02 02 00 00    	je     295d <tts_minimal_getsomeattrs+0x6bd>
	return ((tag & ~1) == VARTAG_EXPANDED_RO);
    275b:	41 89 f5             	mov    r13d,esi
    275e:	41 83 e5 fe          	and    r13d,0xfffffffe
	else if (VARTAG_IS_EXPANDED(tag))
    2762:	41 83 fd 02          	cmp    r13d,0x2
    2766:	0f 84 f1 01 00 00    	je     295d <tts_minimal_getsomeattrs+0x6bd>
	else if (tag == VARTAG_ONDISK)
    276c:	83 fe 12             	cmp    esi,0x12
    276f:	40 0f 94 c6          	sete   sil
    2773:	40 0f b6 f6          	movzx  esi,sil
    2777:	48 c1 e6 04          	shl    rsi,0x4
		*off += VARSIZE_ANY(DatumGetPointer(res));
    277b:	44 8d 6e 02          	lea    r13d,[rsi+0x2]
    277f:	e9 cc fe ff ff       	jmp    2650 <tts_minimal_getsomeattrs+0x3b0>
    2784:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
		return VARSIZE_4B(PTR);
    2788:	44 8b 2e             	mov    r13d,DWORD PTR [rsi]
    278b:	41 c1 ed 02          	shr    r13d,0x2
    278f:	e9 bc fe ff ff       	jmp    2650 <tts_minimal_getsomeattrs+0x3b0>
    2794:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
			natts = reqnatts;
    2798:	4d 63 cc             	movsxd r9,r12d
    279b:	45 89 e6             	mov    r14d,r12d
    279e:	e9 7b fb ff ff       	jmp    231e <tts_minimal_getsomeattrs+0x7e>
    27a3:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
	return (Datum) X;
    27a8:	49 63 45 00          	movsxd rax,DWORD PTR [r13+0x0]
			values[attnum] = (Datum) 0;
    27ac:	4a 89 04 d7          	mov    QWORD PTR [rdi+r10*8],rax
	for (; attnum < natts; attnum++)
    27b0:	49 83 c2 01          	add    r10,0x1
    27b4:	4d 39 ca             	cmp    r10,r9
    27b7:	0f 85 f3 fc ff ff    	jne    24b0 <tts_minimal_getsomeattrs+0x210>
    27bd:	e9 5b fd ff ff       	jmp    251d <tts_minimal_getsomeattrs+0x27d>
    27c2:	66 0f 1f 44 00 00    	nop    WORD PTR [rax+rax*1+0x0]
	return (Datum) X;
    27c8:	49 0f bf 45 00       	movsx  rax,WORD PTR [r13+0x0]
			values[attnum] = (Datum) 0;
    27cd:	4a 89 04 d7          	mov    QWORD PTR [rdi+r10*8],rax
	for (; attnum < natts; attnum++)
    27d1:	49 83 c2 01          	add    r10,0x1
    27d5:	4d 39 ca             	cmp    r10,r9
    27d8:	0f 85 d2 fc ff ff    	jne    24b0 <tts_minimal_getsomeattrs+0x210>
    27de:	e9 3a fd ff ff       	jmp    251d <tts_minimal_getsomeattrs+0x27d>
    27e3:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
	return VARTAG_1B_E(PTR);
    27e8:	45 0f b6 6d 01       	movzx  r13d,BYTE PTR [r13+0x1]
	if (tag == VARTAG_INDIRECT)
    27ed:	41 83 fd 01          	cmp    r13d,0x1
    27f1:	0f 84 5b 01 00 00    	je     2952 <tts_minimal_getsomeattrs+0x6b2>
	return ((tag & ~1) == VARTAG_EXPANDED_RO);
    27f7:	45 89 ee             	mov    r14d,r13d
    27fa:	41 83 e6 fe          	and    r14d,0xfffffffe
	else if (VARTAG_IS_EXPANDED(tag))
    27fe:	41 83 fe 02          	cmp    r14d,0x2
    2802:	0f 84 4a 01 00 00    	je     2952 <tts_minimal_getsomeattrs+0x6b2>
	else if (tag == VARTAG_ONDISK)
    2808:	41 83 fd 12          	cmp    r13d,0x12
    280c:	41 0f 94 c5          	sete   r13b
    2810:	45 0f b6 ed          	movzx  r13d,r13b
    2814:	49 c1 e5 04          	shl    r13,0x4
    2818:	45 8d 75 02          	lea    r14d,[r13+0x2]
    281c:	e9 60 fc ff ff       	jmp    2481 <tts_minimal_getsomeattrs+0x1e1>
    2821:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
	int			nattByte = natts >> 3;
    2828:	45 89 cd             	mov    r13d,r9d
    282b:	41 c1 fd 03          	sar    r13d,0x3
	for (bytenum = 0; bytenum < nattByte; bytenum++)
    282f:	45 85 ed             	test   r13d,r13d
    2832:	0f 8e 30 01 00 00    	jle    2968 <tts_minimal_getsomeattrs+0x6c8>
    2838:	48 8d 73 17          	lea    rsi,[rbx+0x17]
    283c:	31 ff                	xor    edi,edi
    283e:	eb 10                	jmp    2850 <tts_minimal_getsomeattrs+0x5b0>
    2840:	83 c7 01             	add    edi,0x1
    2843:	48 83 c6 01          	add    rsi,0x1
    2847:	41 39 fd             	cmp    r13d,edi
    284a:	0f 84 ec 00 00 00    	je     293c <tts_minimal_getsomeattrs+0x69c>
		if (bits[bytenum] != 0xFF)
    2850:	0f b6 06             	movzx  eax,BYTE PTR [rsi]
    2853:	3c ff                	cmp    al,0xff
    2855:	74 e9                	je     2840 <tts_minimal_getsomeattrs+0x5a0>
	res = bytenum << 3;
    2857:	c1 e7 03             	shl    edi,0x3
	res += pg_rightmost_one_pos32(~((uint32) bits[bytenum]));
    285a:	f7 d0                	not    eax
	int			nbytes = (natts + 7) >> 3;
    285c:	45 8d 69 07          	lea    r13d,[r9+0x7]
    2860:	f3 0f bc c0          	tzcnt  eax,eax
	res += pg_rightmost_one_pos32(~((uint32) bits[bytenum]));
    2864:	01 f8                	add    eax,edi
	res = Min(res, natts);
    2866:	41 39 c1             	cmp    r9d,eax
    2869:	41 0f 4e c1          	cmovle eax,r9d
	int			nbytes = (natts + 7) >> 3;
    286d:	41 c1 fd 03          	sar    r13d,0x3
	res = Min(res, natts);
    2871:	4c 63 f0             	movsxd r14,eax
		isnull_8 &= UINT64CONST(0x0101010101010101);
    2874:	49 bf 01 01 01 01 01 	movabs r15,0x101010101010101
    287b:	01 01 01 
    287e:	4d 63 ed             	movsxd r13,r13d
	for (bytenum = 0; bytenum < nattByte; bytenum++)
    2881:	31 ff                	xor    edi,edi
    2883:	66 0f 1f 44 00 00    	nop    WORD PTR [rax+rax*1+0x0]
    2889:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    2890:	00 00 00 00 
    2894:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    289b:	00 00 00 00 
    289f:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    28a6:	00 00 00 00 
    28aa:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    28b1:	00 00 00 00 
    28b5:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    28bc:	00 00 00 00 
		bits8		nullbyte = ~bits[i];
    28c0:	0f b6 74 3b 17       	movzx  esi,BYTE PTR [rbx+rdi*1+0x17]
    28c5:	f7 d6                	not    esi
		isnull_8 |= ((uint64) ((nullbyte >> 4) * SPREAD_BITS_MULTIPLIER_32)) << 32;
    28c7:	89 f0                	mov    eax,esi
		isnull_8 = (nullbyte & 0xf) * SPREAD_BITS_MULTIPLIER_32;
    28c9:	83 e6 0f             	and    esi,0xf
		isnull_8 |= ((uint64) ((nullbyte >> 4) * SPREAD_BITS_MULTIPLIER_32)) << 32;
    28cc:	c0 e8 04             	shr    al,0x4
		isnull_8 = (nullbyte & 0xf) * SPREAD_BITS_MULTIPLIER_32;
    28cf:	48 69 f6 81 40 20 00 	imul   rsi,rsi,0x204081
		isnull_8 |= ((uint64) ((nullbyte >> 4) * SPREAD_BITS_MULTIPLIER_32)) << 32;
    28d6:	83 e0 0f             	and    eax,0xf
    28d9:	48 69 c0 81 40 20 00 	imul   rax,rax,0x204081
    28e0:	48 c1 e0 20          	shl    rax,0x20
    28e4:	48 09 f0             	or     rax,rsi
		isnull_8 &= UINT64CONST(0x0101010101010101);
    28e7:	4c 21 f8             	and    rax,r15
    28ea:	48 89 04 f9          	mov    QWORD PTR [rcx+rdi*8],rax
	for (int i = 0; i < nbytes; i++, isnull += 8)
    28ee:	48 83 c7 01          	add    rdi,0x1
    28f2:	4c 39 ef             	cmp    rdi,r13
    28f5:	75 c9                	jne    28c0 <tts_minimal_getsomeattrs+0x620>
			firstNonCacheOffsetAttr = Min(firstNonCacheOffsetAttr, firstNullAttr);
    28f7:	45 39 f2             	cmp    r10d,r14d
    28fa:	4d 0f 4f d6          	cmovg  r10,r14
    28fe:	e9 1b fa ff ff       	jmp    231e <tts_minimal_getsomeattrs+0x7e>
    2903:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
	for (; attnum < firstNullAttr; attnum++)
    2908:	49 89 c2             	mov    r10,rax
    290b:	e9 90 fb ff ff       	jmp    24a0 <tts_minimal_getsomeattrs+0x200>
	for (; attnum < natts; attnum++)
    2910:	4d 89 d1             	mov    r9,r10
    2913:	e9 05 fc ff ff       	jmp    251d <tts_minimal_getsomeattrs+0x27d>
    2918:	0f 1f 84 00 00 00 00 	nop    DWORD PTR [rax+rax*1+0x0]
    291f:	00 
		*offp = off;
    2920:	41 89 5b 68          	mov    DWORD PTR [r11+0x68],ebx
		slot_getmissingattrs(slot, attnum, reqnatts);
    2924:	44 89 e2             	mov    edx,r12d
}
    2927:	5b                   	pop    rbx
		slot_getmissingattrs(slot, attnum, reqnatts);
    2928:	44 89 ce             	mov    esi,r9d
}
    292b:	5d                   	pop    rbp
		slot_getmissingattrs(slot, attnum, reqnatts);
    292c:	4c 89 df             	mov    rdi,r11
}
    292f:	41 5c                	pop    r12
    2931:	41 5d                	pop    r13
    2933:	41 5e                	pop    r14
    2935:	41 5f                	pop    r15
		slot_getmissingattrs(slot, attnum, reqnatts);
    2937:	e9 b4 f1 ff ff       	jmp    1af0 <slot_getmissingattrs>
	res = bytenum << 3;
    293c:	42 8d 3c ed 00 00 00 	lea    edi,[r13*8+0x0]
    2943:	00 
	res += pg_rightmost_one_pos32(~((uint32) bits[bytenum]));
    2944:	4d 63 ed             	movsxd r13,r13d
    2947:	42 0f b6 44 2b 17    	movzx  eax,BYTE PTR [rbx+r13*1+0x17]
    294d:	e9 08 ff ff ff       	jmp    285a <tts_minimal_getsomeattrs+0x5ba>
    2952:	41 be 0a 00 00 00    	mov    r14d,0xa
    2958:	e9 24 fb ff ff       	jmp    2481 <tts_minimal_getsomeattrs+0x1e1>
    295d:	41 bd 0a 00 00 00    	mov    r13d,0xa
    2963:	e9 e8 fc ff ff       	jmp    2650 <tts_minimal_getsomeattrs+0x3b0>
    2968:	0f b6 43 17          	movzx  eax,BYTE PTR [rbx+0x17]
	int			nbytes = (natts + 7) >> 3;
    296c:	45 8d 69 07          	lea    r13d,[r9+0x7]
	res += pg_rightmost_one_pos32(~((uint32) bits[bytenum]));
    2970:	f7 d0                	not    eax
    2972:	f3 0f bc c0          	tzcnt  eax,eax
	res = Min(res, natts);
    2976:	41 39 c1             	cmp    r9d,eax
    2979:	41 0f 4e c1          	cmovle eax,r9d
	int			nbytes = (natts + 7) >> 3;
    297d:	41 c1 fd 03          	sar    r13d,0x3
	res = Min(res, natts);
    2981:	4c 63 f0             	movsxd r14,eax
	for (int i = 0; i < nbytes; i++, isnull += 8)
    2984:	41 83 fd 01          	cmp    r13d,0x1
    2988:	0f 85 69 ff ff ff    	jne    28f7 <tts_minimal_getsomeattrs+0x657>
    298e:	e9 e1 fe ff ff       	jmp    2874 <tts_minimal_getsomeattrs+0x5d4>
    2993:	66 90                	xchg   ax,ax
    2995:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    299c:	00 00 00 00 

From 46c83290a6ed1256cbefd9fa62de808424601d70 Mon Sep 17 00:00:00 2001
From: David Rowley <[email protected]>
Date: Tue, 27 Jan 2026 15:08:09 +1300
Subject: [PATCH v11 1/5] Introduce deform_bench test module

For benchmarking tuple deformation.
---
 src/test/modules/deform_bench/.gitignore      |   4 +
 src/test/modules/deform_bench/Makefile        |  21 ++++
 .../deform_bench/deform_bench--1.0.sql        |   8 ++
 src/test/modules/deform_bench/deform_bench.c  | 107 ++++++++++++++++++
 .../modules/deform_bench/deform_bench.control |   4 +
 src/test/modules/deform_bench/meson.build     |  22 ++++
 src/test/modules/meson.build                  |   1 +
 7 files changed, 167 insertions(+)
 create mode 100644 src/test/modules/deform_bench/.gitignore
 create mode 100644 src/test/modules/deform_bench/Makefile
 create mode 100644 src/test/modules/deform_bench/deform_bench--1.0.sql
 create mode 100644 src/test/modules/deform_bench/deform_bench.c
 create mode 100644 src/test/modules/deform_bench/deform_bench.control
 create mode 100644 src/test/modules/deform_bench/meson.build

diff --git a/src/test/modules/deform_bench/.gitignore b/src/test/modules/deform_bench/.gitignore
new file mode 100644
index 00000000000..5dcb3ff9723
--- /dev/null
+++ b/src/test/modules/deform_bench/.gitignore
@@ -0,0 +1,4 @@
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
diff --git a/src/test/modules/deform_bench/Makefile b/src/test/modules/deform_bench/Makefile
new file mode 100644
index 00000000000..b5fc0f7a583
--- /dev/null
+++ b/src/test/modules/deform_bench/Makefile
@@ -0,0 +1,21 @@
+# src/test/modules/deform_bench/Makefile
+
+MODULE_big = deform_bench
+OBJS = deform_bench.o
+
+EXTENSION = deform_bench
+DATA = deform_bench--1.0.sql
+PGFILEDESC = "deform_bench - tuple deform benchmarking"
+
+REGRESS = deform_bench
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/deform_bench
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/src/test/modules/deform_bench/deform_bench--1.0.sql b/src/test/modules/deform_bench/deform_bench--1.0.sql
new file mode 100644
index 00000000000..492b71dba3b
--- /dev/null
+++ b/src/test/modules/deform_bench/deform_bench--1.0.sql
@@ -0,0 +1,8 @@
+/* deform_bench--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION deform_bench" to load this file. \quit
+
+CREATE FUNCTION deform_bench(tableoid Oid, attnum int[]) RETURNS FLOAT
+AS 'MODULE_PATHNAME', 'deform_bench'
+LANGUAGE C VOLATILE STRICT;
diff --git a/src/test/modules/deform_bench/deform_bench.c b/src/test/modules/deform_bench/deform_bench.c
new file mode 100644
index 00000000000..7838f639bef
--- /dev/null
+++ b/src/test/modules/deform_bench/deform_bench.c
@@ -0,0 +1,107 @@
+/*-------------------------------------------------------------------------
+ *
+ * deform_bench.c
+ *
+ * for benchmarking tuple deformation routines
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <time.h>
+#include <sys/time.h>
+
+#include "access/heapam.h"
+#include "access/relscan.h"
+#include "catalog/pg_am_d.h"
+#include "catalog/pg_type_d.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "utils/array.h"
+#include "utils/arrayaccess.h"
+#include "utils/builtins.h"
+
+PG_MODULE_MAGIC;
+
+PG_FUNCTION_INFO_V1(deform_bench);
+
+Datum
+deform_bench(PG_FUNCTION_ARGS)
+{
+	Oid			tableoid = PG_GETARG_OID(0);
+	ArrayType  *array = PG_GETARG_ARRAYTYPE_P(1);
+	TableScanDesc scan;
+	Relation	rel;
+	TupleDesc	tupdesc;
+	TupleTableSlot *slot;
+	Datum	   *elem_datums = NULL;
+	bool	   *elem_nulls = NULL;
+	int			elem_count;
+	int		   *attnums;
+	clock_t		start,
+				end;
+
+	rel = relation_open(tableoid, AccessShareLock);
+
+	if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("only heap AM is supported")));
+
+	tupdesc = RelationGetDescr(rel);
+	slot = MakeTupleTableSlot(tupdesc, &TTSOpsBufferHeapTuple);
+	scan = table_beginscan_strat(rel, GetActiveSnapshot(), 0, NULL, true, false);
+
+	/*
+	 * The array is used to allow callers to define how many atts to deform.
+	 * e.g: '{1,10}'::int[] would deform attnum=1, then in a 2nd pass deform
+	 * the remainder up to attnum=10.  Passing an element as NULL means all
+	 * attnums.  This allows simulation of incremental deformation.  Generally
+	 * if you're passing an array with more than 1 element, then the array
+	 * should be in ascending order.  Doing something like '{10,1}' would mean
+	 * we've already deformed 10 attributes and on the 2nd pass there's
+	 * nothing to do since attnum=1 was already deformed in the first pass.
+	 *
+	 * You'll get an ERROR if you pass a number higher than the number of
+	 * attributes in the table.
+	 */
+	deconstruct_array(array,
+					  INT4OID,
+					  sizeof(int32),
+					  true,
+					  'i',
+					  &elem_datums,
+					  &elem_nulls,
+					  &elem_count);
+
+	attnums = palloc_array(int, elem_count);
+
+	for (int i = 0; i < elem_count; i++)
+	{
+		/* Make a NULL element mean all attributes */
+		if (elem_nulls[i])
+			attnums[i] = tupdesc->natts;
+		else
+			attnums[i] = DatumGetInt32(elem_datums[i]);
+	}
+
+	start = clock();
+
+	while (heap_getnextslot(scan, ForwardScanDirection, slot))
+	{
+		CHECK_FOR_INTERRUPTS();
+
+		/* Deform in stages according to the attnums array */
+		for (int i = 0; i < elem_count; i++)
+			slot_getsomeattrs(slot, attnums[i]);
+	}
+
+	end = clock();
+
+	ExecDropSingleTupleTableSlot(slot);
+	table_endscan(scan);
+	relation_close(rel, AccessShareLock);
+
+
+	/* Returns the number of milliseconds to run the test */
+	PG_RETURN_FLOAT8((double) (end - start) / (CLOCKS_PER_SEC / 1000));
+}
diff --git a/src/test/modules/deform_bench/deform_bench.control b/src/test/modules/deform_bench/deform_bench.control
new file mode 100644
index 00000000000..a2023f9d738
--- /dev/null
+++ b/src/test/modules/deform_bench/deform_bench.control
@@ -0,0 +1,4 @@
+# deform_bench extension
+comment = 'functions for benchmarking tuple deformation'
+default_version = '1.0'
+module_pathname = '$libdir/deform_bench'
diff --git a/src/test/modules/deform_bench/meson.build b/src/test/modules/deform_bench/meson.build
new file mode 100644
index 00000000000..82049585244
--- /dev/null
+++ b/src/test/modules/deform_bench/meson.build
@@ -0,0 +1,22 @@
+# Copyright (c) 2026, PostgreSQL Global Development Group
+
+deform_bench_sources = files(
+  'deform_bench.c',
+)
+
+if host_system == 'windows'
+  deform_bench_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+    '--NAME', 'deform_bench',
+    '--FILEDESC', 'deform_bench - benchmarking tuple deformation',])
+endif
+
+deform_bench = shared_module('deform_bench',
+  deform_bench_sources,
+  kwargs: pg_test_mod_args,
+)
+test_install_libs += deform_bench
+
+test_install_data += files(
+  'deform_bench--1.0.sql',
+  'deform_bench.control',
+)
diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build
index 2634a519935..ef2b0af4581 100644
--- a/src/test/modules/meson.build
+++ b/src/test/modules/meson.build
@@ -2,6 +2,7 @@
 
 subdir('brin')
 subdir('commit_ts')
+subdir('deform_bench')
 subdir('delay_execution')
 subdir('dummy_index_am')
 subdir('dummy_seclabel')
-- 
2.51.0


From 5d372c316557406e319b26dcf381d896aecea226 Mon Sep 17 00:00:00 2001
From: David Rowley <[email protected]>
Date: Mon, 16 Feb 2026 14:20:19 +1300
Subject: [PATCH v11 2/5] Allow sibling call optimization in
 slot_getsomeattrs_int()

This changes the TupleTableSlotOps contract to make it so the
getsomeattrs() function is in charge of calling
slot_getmissingattrs().

Since this removes all code from slot_getsomeattrs_int() aside from the
getsomeattrs() call itself, we may as well adjust slot_getsomeattrs() so
that it calls getsomeattrs() directly.  We leave slot_getsomeattrs_int()
intact as this is still called from the JIT code.
---
 src/backend/executor/execTuples.c | 57 ++++++++++++++++---------------
 src/include/executor/tuptable.h   | 13 ++++---
 2 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c
index b768eae9e53..5b9bb21fa7b 100644
--- a/src/backend/executor/execTuples.c
+++ b/src/backend/executor/execTuples.c
@@ -73,7 +73,7 @@
 static TupleDesc ExecTypeFromTLInternal(List *targetList,
 										bool skipjunk);
 static pg_attribute_always_inline void slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
-															  int natts);
+															  int reqnatts);
 static inline void tts_buffer_heap_store_tuple(TupleTableSlot *slot,
 											   HeapTuple tuple,
 											   Buffer buffer,
@@ -1108,7 +1108,10 @@ slot_deform_heap_tuple_internal(TupleTableSlot *slot, HeapTuple tuple,
  * slot_deform_heap_tuple
  *		Given a TupleTableSlot, extract data from the slot's physical tuple
  *		into its Datum/isnull arrays.  Data is extracted up through the
- *		natts'th column (caller must ensure this is a legal column number).
+ *		reqnatts'th column.  If there are insufficient attributes in the given
+ *		tuple, then slot_getmissingattrs() is called to populate the
+ *		remainder.  If reqnatts is above the number of attributes in the
+ *		slot's TupleDesc, an error is raised.
  *
  *		This is essentially an incremental version of heap_deform_tuple:
  *		on each call we extract attributes up to the one needed, without
@@ -1120,7 +1123,7 @@ slot_deform_heap_tuple_internal(TupleTableSlot *slot, HeapTuple tuple,
  */
 static pg_attribute_always_inline void
 slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
-					   int natts)
+					   int reqnatts)
 {
 	bool		hasnulls = HeapTupleHasNulls(tuple);
 	int			attnum;
@@ -1128,13 +1131,14 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 	bool		slow;			/* can we use/set attcacheoff? */
 
 	/* We can only fetch as many attributes as the tuple has. */
-	natts = Min(HeapTupleHeaderGetNatts(tuple->t_data), natts);
+	natts = Min(HeapTupleHeaderGetNatts(tuple->t_data), reqnatts);
 
 	/*
 	 * Check whether the first call for this tuple, and initialize or restore
 	 * loop state.
 	 */
 	attnum = slot->tts_nvalid;
+	slot->tts_nvalid = reqnatts;
 	if (attnum == 0)
 	{
 		/* Start from the first attribute */
@@ -1199,12 +1203,15 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 	/*
 	 * Save state for next execution
 	 */
-	slot->tts_nvalid = attnum;
 	*offp = off;
 	if (slow)
 		slot->tts_flags |= TTS_FLAG_SLOW;
 	else
 		slot->tts_flags &= ~TTS_FLAG_SLOW;
+
+	/* Fetch any missing attrs and raise an error if reqnatts is invalid. */
+	if (unlikely(attnum < reqnatts))
+		slot_getmissingattrs(slot, attnum, reqnatts);
 }
 
 const TupleTableSlotOps TTSOpsVirtual = {
@@ -2058,34 +2065,36 @@ slot_getmissingattrs(TupleTableSlot *slot, int startAttNum, int lastAttNum)
 {
 	AttrMissing *attrmiss = NULL;
 
+	/* Check for invalid attnums */
+	if (unlikely(lastAttNum > slot->tts_tupleDescriptor->natts))
+		elog(ERROR, "invalid attribute number %d", lastAttNum);
+
 	if (slot->tts_tupleDescriptor->constr)
 		attrmiss = slot->tts_tupleDescriptor->constr->missing;
 
 	if (!attrmiss)
 	{
 		/* no missing values array at all, so just fill everything in as NULL */
-		memset(slot->tts_values + startAttNum, 0,
-			   (lastAttNum - startAttNum) * sizeof(Datum));
-		memset(slot->tts_isnull + startAttNum, 1,
-			   (lastAttNum - startAttNum) * sizeof(bool));
+		for (int attnum = startAttNum; attnum < lastAttNum; attnum++)
+		{
+			slot->tts_values[attnum] = (Datum) 0;
+			slot->tts_isnull[attnum] = true;
+		}
 	}
 	else
 	{
-		int			missattnum;
-
-		/* if there is a missing values array we must process them one by one */
-		for (missattnum = startAttNum;
-			 missattnum < lastAttNum;
-			 missattnum++)
+		/* use attrmiss to set the missing values */
+		for (int attnum = startAttNum; attnum < lastAttNum; attnum++)
 		{
-			slot->tts_values[missattnum] = attrmiss[missattnum].am_value;
-			slot->tts_isnull[missattnum] = !attrmiss[missattnum].am_present;
+			slot->tts_values[attnum] = attrmiss[attnum].am_value;
+			slot->tts_isnull[attnum] = !attrmiss[attnum].am_present;
 		}
 	}
 }
 
 /*
- * slot_getsomeattrs_int - workhorse for slot_getsomeattrs()
+ * slot_getsomeattrs_int
+ *		external function to call getsomeattrs() for use in JIT
  */
 void
 slot_getsomeattrs_int(TupleTableSlot *slot, int attnum)
@@ -2094,21 +2103,13 @@ slot_getsomeattrs_int(TupleTableSlot *slot, int attnum)
 	Assert(slot->tts_nvalid < attnum);	/* checked in slot_getsomeattrs */
 	Assert(attnum > 0);
 
-	if (unlikely(attnum > slot->tts_tupleDescriptor->natts))
-		elog(ERROR, "invalid attribute number %d", attnum);
-
 	/* Fetch as many attributes as possible from the underlying tuple. */
 	slot->tts_ops->getsomeattrs(slot, attnum);
 
 	/*
-	 * If the underlying tuple doesn't have enough attributes, tuple
-	 * descriptor must have the missing attributes.
+	 * Avoid putting new code here as that would prevent the compiler from
+	 * using the sibling call optimization for the above function.
 	 */
-	if (unlikely(slot->tts_nvalid < attnum))
-	{
-		slot_getmissingattrs(slot, slot->tts_nvalid, attnum);
-		slot->tts_nvalid = attnum;
-	}
 }
 
 /* ----------------------------------------------------------------
diff --git a/src/include/executor/tuptable.h b/src/include/executor/tuptable.h
index a2dfd707e78..3b09abbf99f 100644
--- a/src/include/executor/tuptable.h
+++ b/src/include/executor/tuptable.h
@@ -151,10 +151,12 @@ struct TupleTableSlotOps
 
 	/*
 	 * Fill up first natts entries of tts_values and tts_isnull arrays with
-	 * values from the tuple contained in the slot. The function may be called
-	 * with natts more than the number of attributes available in the tuple,
-	 * in which case it should set tts_nvalid to the number of returned
-	 * columns.
+	 * values from the tuple contained in the slot and set the slot's
+	 * tts_nvalid to natts. The function may be called with an natts value
+	 * more than the number of attributes available in the tuple, in which
+	 * case the function must call slot_getmissingattrs() to populate the
+	 * remaining attributes.  The function must raise an ERROR if 'natts' is
+	 * higher than the number of attributes in the slot's TupleDesc.
 	 */
 	void		(*getsomeattrs) (TupleTableSlot *slot, int natts);
 
@@ -357,8 +359,9 @@ extern void slot_getsomeattrs_int(TupleTableSlot *slot, int attnum);
 static inline void
 slot_getsomeattrs(TupleTableSlot *slot, int attnum)
 {
+	/* Populate slot with attributes up to 'attnum', if it's not already */
 	if (slot->tts_nvalid < attnum)
-		slot_getsomeattrs_int(slot, attnum);
+		slot->tts_ops->getsomeattrs(slot, attnum);
 }
 
 /*
-- 
2.51.0


From 099a6186e1886432ed24653178ab1ce9113900c9 Mon Sep 17 00:00:00 2001
From: David Rowley <[email protected]>
Date: Mon, 23 Feb 2026 09:39:37 +1300
Subject: [PATCH v11 5/5] Reduce size of CompactAttribute struct to 8 bytes

Previously, this was 16 bytes.  With the use of some bitflags and by
reducing the attcacheoff field size to a 16-bit type, we can halve the
size of the struct.

It's unlikely that caching the offsets for offsets larger than what will
fit in a 16-bit int will help much as the tuple is very likely to have
some non-fixed-width types anyway, the offsets of which we cannot cache.
---
 src/backend/access/common/tupdesc.c | 10 ++++++++++
 src/backend/executor/execTuples.c   | 16 ++++++++++++----
 src/include/access/tupdesc.h        | 16 ++++++++--------
 3 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c
index c68561337d7..71461ba6096 100644
--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -530,6 +530,16 @@ TupleDescFinalize(TupleDesc tupdesc)
 
 		off = att_nominal_alignby(off, cattr->attalignby);
 
+		/*
+		 * attcacheoff is an int16, so don't try to cache any offsets larger
+		 * than will fit in that type.  Any attributes which are offset more
+		 * than 2^15 are likely due to variable-length attributes.  Since we
+		 * don't cache offsets for or beyond variable-length attributes, using
+		 * an int16 rather than an int32 here is unlikely to cost us anything.
+		 */
+		if (off > PG_INT16_MAX)
+			break;
+
 		cattr->attcacheoff = off;
 
 		off += cattr->attlen;
diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c
index 83a8c02894d..345b22ca932 100644
--- a/src/backend/executor/execTuples.c
+++ b/src/backend/executor/execTuples.c
@@ -1013,6 +1013,7 @@ static pg_attribute_always_inline void
 slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 					   int reqnatts)
 {
+	CompactAttribute *cattrs;
 	CompactAttribute *cattr;
 	TupleDesc	tupleDesc = slot->tts_tupleDescriptor;
 	HeapTupleHeader tup = tuple->t_data;
@@ -1101,6 +1102,13 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 	values = slot->tts_values;
 	slot->tts_nvalid = reqnatts;
 
+	/*
+	 * We store the tupleDesc's CompactAttribute array in 'cattrs' as gcc
+	 * seems to be unwilling to optimize accessing the CompactAttribute
+	 * element efficiently when accessing it via TupleDescCompactAttr().
+	 */
+	cattrs = tupleDesc->compact_attrs;
+
 	/* Ensure we calculated tp correctly */
 	Assert(tp == (char *) tup + tup->t_hoff);
 
@@ -1111,7 +1119,7 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 			int			attlen;
 
 			isnull[attnum] = false;
-			cattr = TupleDescCompactAttr(tupleDesc, attnum);
+			cattr = &cattrs[attnum];
 			attlen = cattr->attlen;
 
 			/* We don't expect any non-byval types */
@@ -1156,7 +1164,7 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 		do
 		{
 			isnull[attnum] = false;
-			cattr = TupleDescCompactAttr(tupleDesc, attnum);
+			cattr = &cattrs[attnum];
 
 			off = cattr->attcacheoff;
 			values[attnum] = fetch_att_noerr(tp + off,
@@ -1183,7 +1191,7 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 		int			attlen;
 
 		isnull[attnum] = false;
-		cattr = TupleDescCompactAttr(tupleDesc, attnum);
+		cattr = &cattrs[attnum];
 		attlen = cattr->attlen;
 
 		/*
@@ -1216,7 +1224,7 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 			continue;
 		}
 
-		cattr = TupleDescCompactAttr(tupleDesc, attnum);
+		cattr = &cattrs[attnum];
 		attlen = cattr->attlen;
 
 		/* As above, we don't expect cstrings */
diff --git a/src/include/access/tupdesc.h b/src/include/access/tupdesc.h
index ad7bc013812..e98036b58bf 100644
--- a/src/include/access/tupdesc.h
+++ b/src/include/access/tupdesc.h
@@ -55,7 +55,7 @@ typedef struct TupleConstr
  *		directly after the FormData_pg_attribute struct is populated or
  *		altered in any way.
  *
- * Currently, this struct is 16 bytes.  Any code changes which enlarge this
+ * Currently, this struct is 8 bytes.  Any code changes which enlarge this
  * struct should be considered very carefully.
  *
  * Code which must access a TupleDesc's attribute data should always make use
@@ -67,17 +67,17 @@ typedef struct TupleConstr
  */
 typedef struct CompactAttribute
 {
-	int32		attcacheoff;	/* fixed offset into tuple, if known, or -1 */
+	int16		attcacheoff;	/* fixed offset into tuple, if known, or -1 */
 	int16		attlen;			/* attr len in bytes or -1 = varlen, -2 =
 								 * cstring */
 	bool		attbyval;		/* as FormData_pg_attribute.attbyval */
-	bool		attispackable;	/* FormData_pg_attribute.attstorage !=
-								 * TYPSTORAGE_PLAIN */
-	bool		atthasmissing;	/* as FormData_pg_attribute.atthasmissing */
-	bool		attisdropped;	/* as FormData_pg_attribute.attisdropped */
-	bool		attgenerated;	/* FormData_pg_attribute.attgenerated != '\0' */
-	char		attnullability; /* status of not-null constraint, see below */
 	uint8		attalignby;		/* alignment requirement in bytes */
+	bool		attispackable:1;	/* FormData_pg_attribute.attstorage !=
+									 * TYPSTORAGE_PLAIN */
+	bool		atthasmissing:1;	/* as FormData_pg_attribute.atthasmissing */
+	bool		attisdropped:1; /* as FormData_pg_attribute.attisdropped */
+	bool		attgenerated:1; /* FormData_pg_attribute.attgenerated != '\0' */
+	char		attnullability; /* status of not-null constraint, see below */
 } CompactAttribute;
 
 /* Valid values for CompactAttribute->attnullability */
-- 
2.51.0


From 3fa14f2411303b5433dd2e3434c840a77395e213 Mon Sep 17 00:00:00 2001
From: David Rowley <[email protected]>
Date: Wed, 21 Jan 2026 15:41:37 +1300
Subject: [PATCH v11 3/5] Add empty TupleDescFinalize() function

Currently does nothing, but will in a future commit.
---
 contrib/dblink/dblink.c                             |  4 ++++
 contrib/pg_buffercache/pg_buffercache_pages.c       |  2 ++
 contrib/pg_visibility/pg_visibility.c               |  2 ++
 src/backend/access/brin/brin_tuple.c                |  1 +
 src/backend/access/common/tupdesc.c                 | 13 +++++++++++++
 src/backend/access/gin/ginutil.c                    |  1 +
 src/backend/access/gist/gistscan.c                  |  1 +
 src/backend/access/spgist/spgutils.c                |  1 +
 src/backend/access/transam/twophase.c               |  1 +
 src/backend/access/transam/xlogfuncs.c              |  1 +
 src/backend/backup/basebackup_copy.c                |  3 +++
 src/backend/catalog/index.c                         |  2 ++
 src/backend/catalog/pg_publication.c                |  1 +
 src/backend/catalog/toasting.c                      |  6 ++++++
 src/backend/commands/explain.c                      |  1 +
 src/backend/commands/functioncmds.c                 |  1 +
 src/backend/commands/sequence.c                     |  1 +
 src/backend/commands/tablecmds.c                    |  4 ++++
 src/backend/commands/wait.c                         |  1 +
 src/backend/executor/execSRF.c                      |  2 ++
 src/backend/executor/execTuples.c                   |  4 ++++
 src/backend/executor/nodeFunctionscan.c             |  2 ++
 src/backend/parser/parse_relation.c                 |  4 +++-
 src/backend/parser/parse_target.c                   |  2 ++
 .../replication/libpqwalreceiver/libpqwalreceiver.c |  1 +
 src/backend/replication/walsender.c                 |  5 +++++
 src/backend/utils/adt/acl.c                         |  1 +
 src/backend/utils/adt/genfile.c                     |  1 +
 src/backend/utils/adt/lockfuncs.c                   |  1 +
 src/backend/utils/adt/orderedsetaggs.c              |  1 +
 src/backend/utils/adt/pgstatfuncs.c                 |  5 +++++
 src/backend/utils/adt/tsvector_op.c                 |  1 +
 src/backend/utils/cache/relcache.c                  |  8 ++++++++
 src/backend/utils/fmgr/funcapi.c                    |  6 ++++++
 src/backend/utils/misc/guc_funcs.c                  |  5 +++++
 src/include/access/tupdesc.h                        |  1 +
 src/pl/plpgsql/src/pl_comp.c                        |  2 ++
 .../test_custom_stats/test_custom_fixed_stats.c     |  1 +
 src/test/modules/test_predtest/test_predtest.c      |  1 +
 39 files changed, 100 insertions(+), 1 deletion(-)

diff --git a/contrib/dblink/dblink.c b/contrib/dblink/dblink.c
index 2498d80c8e7..4038950a6ef 100644
--- a/contrib/dblink/dblink.c
+++ b/contrib/dblink/dblink.c
@@ -881,6 +881,7 @@ materializeResult(FunctionCallInfo fcinfo, PGconn *conn, PGresult *res)
 		tupdesc = CreateTemplateTupleDesc(1);
 		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "status",
 						   TEXTOID, -1, 0);
+		TupleDescFinalize(tupdesc);
 		ntuples = 1;
 		nfields = 1;
 	}
@@ -1044,6 +1045,7 @@ materializeQueryResult(FunctionCallInfo fcinfo,
 			tupdesc = CreateTemplateTupleDesc(1);
 			TupleDescInitEntry(tupdesc, (AttrNumber) 1, "status",
 							   TEXTOID, -1, 0);
+			TupleDescFinalize(tupdesc);
 			attinmeta = TupleDescGetAttInMetadata(tupdesc);
 
 			oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
@@ -1529,6 +1531,8 @@ dblink_get_pkey(PG_FUNCTION_ARGS)
 		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "colname",
 						   TEXTOID, -1, 0);
 
+		TupleDescFinalize(tupdesc);
+
 		/*
 		 * Generate attribute metadata needed later to produce tuples from raw
 		 * C strings
diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c
index 89b86855243..a6b4fb5252b 100644
--- a/contrib/pg_buffercache/pg_buffercache_pages.c
+++ b/contrib/pg_buffercache/pg_buffercache_pages.c
@@ -174,6 +174,7 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
 			TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends",
 							   INT4OID, -1, 0);
 
+		TupleDescFinalize(tupledesc);
 		fctx->tupdesc = BlessTupleDesc(tupledesc);
 
 		/* Allocate NBuffers worth of BufferCachePagesRec records. */
@@ -442,6 +443,7 @@ pg_buffercache_os_pages_internal(FunctionCallInfo fcinfo, bool include_numa)
 		TupleDescInitEntry(tupledesc, (AttrNumber) 3, "numa_node",
 						   INT4OID, -1, 0);
 
+		TupleDescFinalize(tupledesc);
 		fctx->tupdesc = BlessTupleDesc(tupledesc);
 		fctx->include_numa = include_numa;
 
diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c
index 9bc3a784bf7..dfab0b64cf5 100644
--- a/contrib/pg_visibility/pg_visibility.c
+++ b/contrib/pg_visibility/pg_visibility.c
@@ -469,6 +469,8 @@ pg_visibility_tupdesc(bool include_blkno, bool include_pd)
 		TupleDescInitEntry(tupdesc, ++a, "pd_all_visible", BOOLOID, -1, 0);
 	Assert(a == maxattr);
 
+	TupleDescFinalize(tupdesc);
+
 	return BlessTupleDesc(tupdesc);
 }
 
diff --git a/src/backend/access/brin/brin_tuple.c b/src/backend/access/brin/brin_tuple.c
index 69c233c62eb..742ac089a28 100644
--- a/src/backend/access/brin/brin_tuple.c
+++ b/src/backend/access/brin/brin_tuple.c
@@ -84,6 +84,7 @@ brtuple_disk_tupdesc(BrinDesc *brdesc)
 
 		MemoryContextSwitchTo(oldcxt);
 
+		TupleDescFinalize(tupdesc);
 		brdesc->bd_disktdesc = tupdesc;
 	}
 
diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c
index b69d10f0a45..2137385a833 100644
--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -221,6 +221,9 @@ CreateTupleDesc(int natts, Form_pg_attribute *attrs)
 		memcpy(TupleDescAttr(desc, i), attrs[i], ATTRIBUTE_FIXED_PART_SIZE);
 		populate_compact_attribute(desc, i);
 	}
+
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
@@ -265,6 +268,8 @@ CreateTupleDescCopy(TupleDesc tupdesc)
 	desc->tdtypeid = tupdesc->tdtypeid;
 	desc->tdtypmod = tupdesc->tdtypmod;
 
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
@@ -311,6 +316,8 @@ CreateTupleDescTruncatedCopy(TupleDesc tupdesc, int natts)
 	desc->tdtypeid = tupdesc->tdtypeid;
 	desc->tdtypmod = tupdesc->tdtypmod;
 
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
@@ -396,6 +403,8 @@ CreateTupleDescCopyConstr(TupleDesc tupdesc)
 	desc->tdtypeid = tupdesc->tdtypeid;
 	desc->tdtypmod = tupdesc->tdtypmod;
 
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
@@ -438,6 +447,8 @@ TupleDescCopy(TupleDesc dst, TupleDesc src)
 	 * source's refcount would be wrong in any case.)
 	 */
 	dst->tdrefcount = -1;
+
+	TupleDescFinalize(dst);
 }
 
 /*
@@ -1065,6 +1076,8 @@ BuildDescFromLists(const List *names, const List *types, const List *typmods, co
 		TupleDescInitEntryCollation(desc, attnum, attcollation);
 	}
 
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index ff927279cc3..fe7b984ff32 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -129,6 +129,7 @@ initGinState(GinState *state, Relation index)
 							   attr->attndims);
 			TupleDescInitEntryCollation(state->tupdesc[i], (AttrNumber) 2,
 										attr->attcollation);
+			TupleDescFinalize(state->tupdesc[i]);
 		}
 
 		/*
diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c
index f23bc4a6757..c65f93abdae 100644
--- a/src/backend/access/gist/gistscan.c
+++ b/src/backend/access/gist/gistscan.c
@@ -201,6 +201,7 @@ gistrescan(IndexScanDesc scan, ScanKey key, int nkeys,
 											 attno - 1)->atttypid,
 							   -1, 0);
 		}
+		TupleDescFinalize(so->giststate->fetchTupdesc);
 		scan->xs_hitupdesc = so->giststate->fetchTupdesc;
 
 		/* Also create a memory context that will hold the returned tuples */
diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c
index 9f5379b87ac..b246e8127db 100644
--- a/src/backend/access/spgist/spgutils.c
+++ b/src/backend/access/spgist/spgutils.c
@@ -340,6 +340,7 @@ getSpGistTupleDesc(Relation index, SpGistTypeDesc *keyType)
 			TupleDescCompactAttr(outTupDesc, i)->attcacheoff = -1;
 
 		populate_compact_attribute(outTupDesc, spgKeyColumn);
+		TupleDescFinalize(outTupDesc);
 	}
 	return outTupDesc;
 }
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index e4340b59640..7f4ed02a6b9 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -744,6 +744,7 @@ pg_prepared_xact(PG_FUNCTION_ARGS)
 		TupleDescInitEntry(tupdesc, (AttrNumber) 5, "dbid",
 						   OIDOID, -1, 0);
 
+		TupleDescFinalize(tupdesc);
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
 
 		/*
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 2efe4105efb..b6bc616c74c 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -400,6 +400,7 @@ pg_walfile_name_offset(PG_FUNCTION_ARGS)
 	TupleDescInitEntry(resultTupleDesc, (AttrNumber) 2, "file_offset",
 					   INT4OID, -1, 0);
 
+	TupleDescFinalize(resultTupleDesc);
 	resultTupleDesc = BlessTupleDesc(resultTupleDesc);
 
 	/*
diff --git a/src/backend/backup/basebackup_copy.c b/src/backend/backup/basebackup_copy.c
index 07f58b39d8c..6c3453efd80 100644
--- a/src/backend/backup/basebackup_copy.c
+++ b/src/backend/backup/basebackup_copy.c
@@ -357,6 +357,8 @@ SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli)
 	 */
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 2, "tli", INT8OID, -1, 0);
 
+	TupleDescFinalize(tupdesc);
+
 	/* send RowDescription */
 	tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
 
@@ -388,6 +390,7 @@ SendTablespaceList(List *tablespaces)
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 1, "spcoid", OIDOID, -1, 0);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 2, "spclocation", TEXTOID, -1, 0);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 3, "size", INT8OID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	/* send RowDescription */
 	tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 43de42ce39e..75e97fb394a 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -481,6 +481,8 @@ ConstructTupleDescriptor(Relation heapRelation,
 		populate_compact_attribute(indexTupDesc, i);
 	}
 
+	TupleDescFinalize(indexTupDesc);
+
 	return indexTupDesc;
 }
 
diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c
index 9a4791c573e..fa353a0dd37 100644
--- a/src/backend/catalog/pg_publication.c
+++ b/src/backend/catalog/pg_publication.c
@@ -1230,6 +1230,7 @@ pg_get_publication_tables(PG_FUNCTION_ARGS)
 		TupleDescInitEntry(tupdesc, (AttrNumber) 4, "qual",
 						   PG_NODE_TREEOID, -1, 0);
 
+		TupleDescFinalize(tupdesc);
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
 		funcctx->user_fctx = table_infos;
 
diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c
index c78dcea98c1..078a1cf5127 100644
--- a/src/backend/catalog/toasting.c
+++ b/src/backend/catalog/toasting.c
@@ -229,6 +229,12 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
 	TupleDescAttr(tupdesc, 1)->attcompression = InvalidCompressionMethod;
 	TupleDescAttr(tupdesc, 2)->attcompression = InvalidCompressionMethod;
 
+	populate_compact_attribute(tupdesc, 0);
+	populate_compact_attribute(tupdesc, 1);
+	populate_compact_attribute(tupdesc, 2);
+
+	TupleDescFinalize(tupdesc);
+
 	/*
 	 * Toast tables for regular relations go in pg_toast; those for temp
 	 * relations go into the per-backend temp-toast-table namespace.
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 93918a223b8..5f922c3f5c2 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -281,6 +281,7 @@ ExplainResultDesc(ExplainStmt *stmt)
 	tupdesc = CreateTemplateTupleDesc(1);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "QUERY PLAN",
 					   result_type, -1, 0);
+	TupleDescFinalize(tupdesc);
 	return tupdesc;
 }
 
diff --git a/src/backend/commands/functioncmds.c b/src/backend/commands/functioncmds.c
index 242372b1e68..3afd762e9dc 100644
--- a/src/backend/commands/functioncmds.c
+++ b/src/backend/commands/functioncmds.c
@@ -2424,6 +2424,7 @@ CallStmtResultDesc(CallStmt *stmt)
 							   -1,
 							   0);
 		}
+		TupleDescFinalize(tupdesc);
 	}
 
 	return tupdesc;
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index e1b808bbb60..551667650ba 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -1808,6 +1808,7 @@ pg_get_sequence_data(PG_FUNCTION_ARGS)
 					   BOOLOID, -1, 0);
 	TupleDescInitEntry(resultTupleDesc, (AttrNumber) 3, "page_lsn",
 					   LSNOID, -1, 0);
+	TupleDescFinalize(resultTupleDesc);
 	resultTupleDesc = BlessTupleDesc(resultTupleDesc);
 
 	seqrel = try_relation_open(relid, AccessShareLock);
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index b04b0dbd2a0..8678cecd53f 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -1030,6 +1030,8 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
 		}
 	}
 
+	TupleDescFinalize(descriptor);
+
 	/*
 	 * For relations with table AM and partitioned tables, select access
 	 * method to use: an explicitly indicated one, or (in the case of a
@@ -1458,6 +1460,8 @@ BuildDescForRelation(const List *columns)
 		populate_compact_attribute(desc, attnum - 1);
 	}
 
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
diff --git a/src/backend/commands/wait.c b/src/backend/commands/wait.c
index 1290df10c6f..8e920a72372 100644
--- a/src/backend/commands/wait.c
+++ b/src/backend/commands/wait.c
@@ -338,5 +338,6 @@ WaitStmtResultDesc(WaitStmt *stmt)
 	tupdesc = CreateTemplateTupleDesc(1);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "status",
 					   TEXTOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 	return tupdesc;
 }
diff --git a/src/backend/executor/execSRF.c b/src/backend/executor/execSRF.c
index a0b111dc0e4..b481e50acfb 100644
--- a/src/backend/executor/execSRF.c
+++ b/src/backend/executor/execSRF.c
@@ -272,6 +272,7 @@ ExecMakeTableFunctionResult(SetExprState *setexpr,
 									   funcrettype,
 									   -1,
 									   0);
+					TupleDescFinalize(tupdesc);
 					rsinfo.setDesc = tupdesc;
 				}
 				MemoryContextSwitchTo(oldcontext);
@@ -776,6 +777,7 @@ init_sexpr(Oid foid, Oid input_collation, Expr *node,
 							   funcrettype,
 							   -1,
 							   0);
+			TupleDescFinalize(tupdesc);
 			sexpr->funcResultDesc = tupdesc;
 			sexpr->funcReturnsTuple = false;
 		}
diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c
index 5b9bb21fa7b..bb997182481 100644
--- a/src/backend/executor/execTuples.c
+++ b/src/backend/executor/execTuples.c
@@ -2174,6 +2174,8 @@ ExecTypeFromTLInternal(List *targetList, bool skipjunk)
 		cur_resno++;
 	}
 
+	TupleDescFinalize(typeInfo);
+
 	return typeInfo;
 }
 
@@ -2208,6 +2210,8 @@ ExecTypeFromExprList(List *exprList)
 		cur_resno++;
 	}
 
+	TupleDescFinalize(typeInfo);
+
 	return typeInfo;
 }
 
diff --git a/src/backend/executor/nodeFunctionscan.c b/src/backend/executor/nodeFunctionscan.c
index 63e605e1f81..feb82d64967 100644
--- a/src/backend/executor/nodeFunctionscan.c
+++ b/src/backend/executor/nodeFunctionscan.c
@@ -414,6 +414,7 @@ ExecInitFunctionScan(FunctionScan *node, EState *estate, int eflags)
 				TupleDescInitEntryCollation(tupdesc,
 											(AttrNumber) 1,
 											exprCollation(funcexpr));
+				TupleDescFinalize(tupdesc);
 			}
 			else
 			{
@@ -485,6 +486,7 @@ ExecInitFunctionScan(FunctionScan *node, EState *estate, int eflags)
 							   0);
 		}
 
+		TupleDescFinalize(scan_tupdesc);
 		Assert(attno == natts);
 	}
 
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index e003db520de..9c415e166ee 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -1883,6 +1883,7 @@ addRangeTableEntryForFunction(ParseState *pstate,
 			TupleDescInitEntryCollation(tupdesc,
 										(AttrNumber) 1,
 										exprCollation(funcexpr));
+			TupleDescFinalize(tupdesc);
 		}
 		else if (functypclass == TYPEFUNC_RECORD)
 		{
@@ -1940,6 +1941,7 @@ addRangeTableEntryForFunction(ParseState *pstate,
 
 				i++;
 			}
+			TupleDescFinalize(tupdesc);
 
 			/*
 			 * Ensure that the coldeflist defines a legal set of names (no
@@ -2008,7 +2010,7 @@ addRangeTableEntryForFunction(ParseState *pstate,
 							   0);
 			/* no need to set collation */
 		}
-
+		TupleDescFinalize(tupdesc);
 		Assert(natts == totalatts);
 	}
 	else
diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c
index dbf5b2b5c01..a03d82c0540 100644
--- a/src/backend/parser/parse_target.c
+++ b/src/backend/parser/parse_target.c
@@ -1572,6 +1572,8 @@ expandRecordVariable(ParseState *pstate, Var *var, int levelsup)
 		}
 		Assert(lname == NULL && lvar == NULL);	/* lists same length? */
 
+		TupleDescFinalize(tupleDesc);
+
 		return tupleDesc;
 	}
 
diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
index 7c8639b32e9..9f04c9ed25d 100644
--- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
+++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
@@ -1073,6 +1073,7 @@ libpqrcv_processTuples(PGresult *pgres, WalRcvExecResult *walres,
 	for (coln = 0; coln < nRetTypes; coln++)
 		TupleDescInitEntry(walres->tupledesc, (AttrNumber) coln + 1,
 						   PQfname(pgres, coln), retTypes[coln], -1, 0);
+	TupleDescFinalize(walres->tupledesc);
 	attinmeta = TupleDescGetAttInMetadata(walres->tupledesc);
 
 	/* No point in doing more here if there were no tuples returned. */
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 2cde8ebc729..33a9e8d7f21 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -451,6 +451,7 @@ IdentifySystem(void)
 							  TEXTOID, -1, 0);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 4, "dbname",
 							  TEXTOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	/* prepare for projection of tuples */
 	tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
@@ -496,6 +497,7 @@ ReadReplicationSlot(ReadReplicationSlotCmd *cmd)
 	/* TimeLineID is unsigned, so int4 is not wide enough. */
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 3, "restart_tli",
 							  INT8OID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	memset(nulls, true, READ_REPLICATION_SLOT_COLS * sizeof(bool));
 
@@ -598,6 +600,7 @@ SendTimeLineHistory(TimeLineHistoryCmd *cmd)
 	tupdesc = CreateTemplateTupleDesc(2);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 1, "filename", TEXTOID, -1, 0);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 2, "content", TEXTOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	TLHistoryFileName(histfname, cmd->timeline);
 	TLHistoryFilePath(path, cmd->timeline);
@@ -1015,6 +1018,7 @@ StartReplication(StartReplicationCmd *cmd)
 								  INT8OID, -1, 0);
 		TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 2, "next_tli_startpos",
 								  TEXTOID, -1, 0);
+		TupleDescFinalize(tupdesc);
 
 		/* prepare for projection of tuple */
 		tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
@@ -1369,6 +1373,7 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd)
 							  TEXTOID, -1, 0);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 4, "output_plugin",
 							  TEXTOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	/* prepare for projection of tuples */
 	tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
diff --git a/src/backend/utils/adt/acl.c b/src/backend/utils/adt/acl.c
index 641673f0b0e..ce07f2bc046 100644
--- a/src/backend/utils/adt/acl.c
+++ b/src/backend/utils/adt/acl.c
@@ -1819,6 +1819,7 @@ aclexplode(PG_FUNCTION_ARGS)
 		TupleDescInitEntry(tupdesc, (AttrNumber) 4, "is_grantable",
 						   BOOLOID, -1, 0);
 
+		TupleDescFinalize(tupdesc);
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
 
 		/* allocate memory for user context */
diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c
index c083608b1d5..bfb949401d0 100644
--- a/src/backend/utils/adt/genfile.c
+++ b/src/backend/utils/adt/genfile.c
@@ -454,6 +454,7 @@ pg_stat_file(PG_FUNCTION_ARGS)
 					   "creation", TIMESTAMPTZOID, -1, 0);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 6,
 					   "isdir", BOOLOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 	BlessTupleDesc(tupdesc);
 
 	memset(isnull, false, sizeof(isnull));
diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c
index 9dadd6da672..4481c354fd6 100644
--- a/src/backend/utils/adt/lockfuncs.c
+++ b/src/backend/utils/adt/lockfuncs.c
@@ -146,6 +146,7 @@ pg_lock_status(PG_FUNCTION_ARGS)
 		TupleDescInitEntry(tupdesc, (AttrNumber) 16, "waitstart",
 						   TIMESTAMPTZOID, -1, 0);
 
+		TupleDescFinalize(tupdesc);
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
 
 		/*
diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c
index 3b6da8e36ac..fd8b8676470 100644
--- a/src/backend/utils/adt/orderedsetaggs.c
+++ b/src/backend/utils/adt/orderedsetaggs.c
@@ -233,6 +233,7 @@ ordered_set_startup(FunctionCallInfo fcinfo, bool use_tuples)
 								   -1,
 								   0);
 
+				TupleDescFinalize(newdesc);
 				FreeTupleDesc(qstate->tupdesc);
 				qstate->tupdesc = newdesc;
 			}
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index b1df96e7b0b..0b10da3b180 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -769,6 +769,7 @@ pg_stat_get_backend_subxact(PG_FUNCTION_ARGS)
 	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "subxact_overflow",
 					   BOOLOID, -1, 0);
 
+	TupleDescFinalize(tupdesc);
 	BlessTupleDesc(tupdesc);
 
 	if ((local_beentry = pgstat_get_local_beentry_by_proc_number(procNumber)) != NULL)
@@ -1670,6 +1671,7 @@ pg_stat_wal_build_tuple(PgStat_WalCounters wal_counters,
 	TupleDescInitEntry(tupdesc, (AttrNumber) 6, "stats_reset",
 					   TIMESTAMPTZOID, -1, 0);
 
+	TupleDescFinalize(tupdesc);
 	BlessTupleDesc(tupdesc);
 
 	/* Fill values and NULLs */
@@ -2097,6 +2099,7 @@ pg_stat_get_archiver(PG_FUNCTION_ARGS)
 	TupleDescInitEntry(tupdesc, (AttrNumber) 7, "stats_reset",
 					   TIMESTAMPTZOID, -1, 0);
 
+	TupleDescFinalize(tupdesc);
 	BlessTupleDesc(tupdesc);
 
 	/* Get statistics about the archiver process */
@@ -2178,6 +2181,7 @@ pg_stat_get_replication_slot(PG_FUNCTION_ARGS)
 					   TIMESTAMPTZOID, -1, 0);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 13, "stats_reset",
 					   TIMESTAMPTZOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 	BlessTupleDesc(tupdesc);
 
 	namestrcpy(&slotname, text_to_cstring(slotname_text));
@@ -2265,6 +2269,7 @@ pg_stat_get_subscription_stats(PG_FUNCTION_ARGS)
 					   INT8OID, -1, 0);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 13, "stats_reset",
 					   TIMESTAMPTZOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 	BlessTupleDesc(tupdesc);
 
 	if (!subentry)
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 71c7c7d3b3c..d8dece42b9b 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -651,6 +651,7 @@ tsvector_unnest(PG_FUNCTION_ARGS)
 						   TEXTARRAYOID, -1, 0);
 		if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
 			elog(ERROR, "return type must be a row type");
+		TupleDescFinalize(tupdesc);
 		funcctx->tuple_desc = tupdesc;
 
 		funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 6b634c9fff1..770edb34e08 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -729,6 +729,8 @@ RelationBuildTupleDesc(Relation relation)
 		pfree(constr);
 		relation->rd_att->constr = NULL;
 	}
+
+	TupleDescFinalize(relation->rd_att);
 }
 
 /*
@@ -1985,6 +1987,7 @@ formrdesc(const char *relationName, Oid relationReltype,
 
 	/* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
 	TupleDescCompactAttr(relation->rd_att, 0)->attcacheoff = 0;
+	TupleDescFinalize(relation->rd_att);
 
 	/* mark not-null status */
 	if (has_not_null)
@@ -3688,6 +3691,8 @@ RelationBuildLocalRelation(const char *relname,
 	for (i = 0; i < natts; i++)
 		TupleDescAttr(rel->rd_att, i)->attrelid = relid;
 
+	TupleDescFinalize(rel->rd_att);
+
 	rel->rd_rel->reltablespace = reltablespace;
 
 	if (mapped_relation)
@@ -4443,6 +4448,7 @@ BuildHardcodedDescriptor(int natts, const FormData_pg_attribute *attrs)
 
 	/* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
 	TupleDescCompactAttr(result, 0)->attcacheoff = 0;
+	TupleDescFinalize(result);
 
 	/* Note: we don't bother to set up a TupleConstr entry */
 
@@ -6268,6 +6274,8 @@ load_relcache_init_file(bool shared)
 			populate_compact_attribute(rel->rd_att, i);
 		}
 
+		TupleDescFinalize(rel->rd_att);
+
 		/* next read the access method specific field */
 		if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
 			goto read_failed;
diff --git a/src/backend/utils/fmgr/funcapi.c b/src/backend/utils/fmgr/funcapi.c
index 8a934ea8dca..516d02cfb82 100644
--- a/src/backend/utils/fmgr/funcapi.c
+++ b/src/backend/utils/fmgr/funcapi.c
@@ -340,6 +340,8 @@ get_expr_result_type(Node *expr,
 										exprCollation(col));
 			i++;
 		}
+		TupleDescFinalize(tupdesc);
+
 		if (resultTypeId)
 			*resultTypeId = rexpr->row_typeid;
 		if (resultTupleDesc)
@@ -1044,6 +1046,7 @@ resolve_polymorphic_tupdesc(TupleDesc tupdesc, oidvector *declared_args,
 		}
 	}
 
+	TupleDescFinalize(tupdesc);
 	return true;
 }
 
@@ -1853,6 +1856,8 @@ build_function_result_tupdesc_d(char prokind,
 						   0);
 	}
 
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
@@ -1970,6 +1975,7 @@ TypeGetTupleDesc(Oid typeoid, List *colaliases)
 						   typeoid,
 						   -1,
 						   0);
+		TupleDescFinalize(tupdesc);
 	}
 	else if (functypclass == TYPEFUNC_RECORD)
 	{
diff --git a/src/backend/utils/misc/guc_funcs.c b/src/backend/utils/misc/guc_funcs.c
index 8524dd3a981..472cb5393ce 100644
--- a/src/backend/utils/misc/guc_funcs.c
+++ b/src/backend/utils/misc/guc_funcs.c
@@ -444,6 +444,7 @@ GetPGVariableResultDesc(const char *name)
 		TupleDescInitEntry(tupdesc, (AttrNumber) 1, varname,
 						   TEXTOID, -1, 0);
 	}
+	TupleDescFinalize(tupdesc);
 	return tupdesc;
 }
 
@@ -465,6 +466,7 @@ ShowGUCConfigOption(const char *name, DestReceiver *dest)
 	tupdesc = CreateTemplateTupleDesc(1);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 1, varname,
 							  TEXTOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	/* prepare for projection of tuples */
 	tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
@@ -499,6 +501,7 @@ ShowAllGUCConfig(DestReceiver *dest)
 							  TEXTOID, -1, 0);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 3, "description",
 							  TEXTOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	/* prepare for projection of tuples */
 	tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
@@ -934,6 +937,8 @@ show_all_settings(PG_FUNCTION_ARGS)
 		TupleDescInitEntry(tupdesc, (AttrNumber) 17, "pending_restart",
 						   BOOLOID, -1, 0);
 
+		TupleDescFinalize(tupdesc);
+
 		/*
 		 * Generate attribute metadata needed later to produce tuples from raw
 		 * C strings
diff --git a/src/include/access/tupdesc.h b/src/include/access/tupdesc.h
index d46cdbf7a3c..595413dbbc5 100644
--- a/src/include/access/tupdesc.h
+++ b/src/include/access/tupdesc.h
@@ -195,6 +195,7 @@ extern TupleDesc CreateTupleDescTruncatedCopy(TupleDesc tupdesc, int natts);
 
 extern TupleDesc CreateTupleDescCopyConstr(TupleDesc tupdesc);
 
+#define TupleDescFinalize(d) ((void) 0)
 #define TupleDescSize(src) \
 	(offsetof(struct TupleDescData, compact_attrs) + \
 	 (src)->natts * sizeof(CompactAttribute) + \
diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c
index 5ecc7766757..b72c963b3be 100644
--- a/src/pl/plpgsql/src/pl_comp.c
+++ b/src/pl/plpgsql/src/pl_comp.c
@@ -1912,6 +1912,8 @@ build_row_from_vars(PLpgSQL_variable **vars, int numvars)
 		TupleDescInitEntryCollation(row->rowtupdesc, i + 1, typcoll);
 	}
 
+	TupleDescFinalize(row->rowtupdesc);
+
 	return row;
 }
 
diff --git a/src/test/modules/test_custom_stats/test_custom_fixed_stats.c b/src/test/modules/test_custom_stats/test_custom_fixed_stats.c
index 485e08e5c19..f9e7c717280 100644
--- a/src/test/modules/test_custom_stats/test_custom_fixed_stats.c
+++ b/src/test/modules/test_custom_stats/test_custom_fixed_stats.c
@@ -206,6 +206,7 @@ test_custom_stats_fixed_report(PG_FUNCTION_ARGS)
 					   INT8OID, -1, 0);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "stats_reset",
 					   TIMESTAMPTZOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 	BlessTupleDesc(tupdesc);
 
 	values[0] = Int64GetDatum(stats->numcalls);
diff --git a/src/test/modules/test_predtest/test_predtest.c b/src/test/modules/test_predtest/test_predtest.c
index 679a5de456d..48ca2a4ea70 100644
--- a/src/test/modules/test_predtest/test_predtest.c
+++ b/src/test/modules/test_predtest/test_predtest.c
@@ -230,6 +230,7 @@ test_predtest(PG_FUNCTION_ARGS)
 					   "s_r_holds", BOOLOID, -1, 0);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 8,
 					   "w_r_holds", BOOLOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 	tupdesc = BlessTupleDesc(tupdesc);
 
 	values[0] = BoolGetDatum(strong_implied_by);
-- 
2.51.0


From 0c4bc383f1deae72103063a7e912f276dfd4a1c5 Mon Sep 17 00:00:00 2001
From: David Rowley <[email protected]>
Date: Tue, 31 Dec 2024 09:19:24 +1300
Subject: [PATCH v11 4/5] Optimize tuple deformation

This commit includes various optimizations to improve the performance of
tuple deformation.

We now precalculate CompactAttribute's attcacheoff, which allows us to
remove the code from the deform routines which was setting the
attcacheoff.  Setting the attcacheoff is handled by TupleDescFinalize(),
which must be called before the TupleDesc is used for anything.  Having
this TupleDescFinalize() function means we can store the first
attribute in the TupleDesc which does not have an offset cached.  That
allows us to add a dedicated deforming loop to deform all attributes up
to the final one with an attcacheoff set, or up to the first NULL
attribute, whichever comes first.

We also record the maximum attribute number which is guaranteed to exist
in the tuple, that is, has a NOT NULL constraint and isn't an
atthasmissing attribute.  When deforming only attributes prior to the
guaranteed attnum, we've no need to access the tuple's natt count.  As an
additional optimization, we only count fixed-width columns when
calculating the maximum guaranteed column as this eliminates the need to
emit code to fetch byref types in the deformation loop for guaranteed
attributes.

Some locations in the code deform tuples that have yet to go through NOT
NULL constraint validation.  We're unable to perform the guaranteed
attribute optimization when that's the case.  The optimization is opt-in
via the TupleTableSlot using the TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS
flag.

This commit also adds a more efficient way of populating the isnull
array by using a bit-wise trick which performs multiplication on the
inverse of the tuple's bitmap byte and masking out all but the lower bit
of each of the boolean's byte.  This results in much more optimal code
when compared to determining the NULLness via att_isnull().  8 isnull
elements are processed at once using this method, which means we need to
round the tts_isnull array size up to the next 8 bytes.  The palloc code
does this anyway, but the round-up needed to be formalized so as not to
overwrite the sentinel byte in debug builds.
---
 src/backend/access/common/heaptuple.c        | 360 ++++++++---------
 src/backend/access/common/indextuple.c       | 363 +++++++----------
 src/backend/access/common/tupdesc.c          |  51 +++
 src/backend/access/spgist/spgutils.c         |   3 -
 src/backend/executor/execTuples.c            | 392 +++++++++++--------
 src/backend/executor/nodeBitmapHeapscan.c    |   3 +
 src/backend/executor/nodeIndexonlyscan.c     |   3 +
 src/backend/executor/nodeIndexscan.c         |   3 +
 src/backend/executor/nodeSamplescan.c        |   3 +
 src/backend/executor/nodeSeqscan.c           |   3 +
 src/backend/executor/nodeTidrangescan.c      |   3 +
 src/backend/executor/nodeTidscan.c           |   3 +
 src/backend/jit/llvm/llvmjit_deform.c        |   6 -
 src/backend/utils/cache/relcache.c           |  12 -
 src/include/access/tupdesc.h                 |  20 +-
 src/include/access/tupmacs.h                 | 224 ++++++++++-
 src/include/executor/tuptable.h              |  17 +-
 src/test/modules/deform_bench/deform_bench.c |   1 +
 18 files changed, 846 insertions(+), 624 deletions(-)

diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c
index 11bec20e82e..b2ac7fef35b 100644
--- a/src/backend/access/common/heaptuple.c
+++ b/src/backend/access/common/heaptuple.c
@@ -498,19 +498,7 @@ heap_attisnull(HeapTuple tup, int attnum, TupleDesc tupleDesc)
  *		nocachegetattr
  *
  *		This only gets called from fastgetattr(), in cases where we
- *		can't use a cacheoffset and the value is not null.
- *
- *		This caches attribute offsets in the attribute descriptor.
- *
- *		An alternative way to speed things up would be to cache offsets
- *		with the tuple, but that seems more difficult unless you take
- *		the storage hit of actually putting those offsets into the
- *		tuple you send to disk.  Yuck.
- *
- *		This scheme will be slightly slower than that, but should
- *		perform well for queries which hit large #'s of tuples.  After
- *		you cache the offsets once, examining all the other tuples using
- *		the same attribute descriptor will go much quicker. -cim 5/4/91
+ *		can't use the attcacheoff and the value is not null.
  *
  *		NOTE: if you need to change this code, see also heap_deform_tuple.
  *		Also see nocache_index_getattr, which is the same code for index
@@ -522,194 +510,125 @@ nocachegetattr(HeapTuple tup,
 			   int attnum,
 			   TupleDesc tupleDesc)
 {
+	CompactAttribute *cattr;
 	HeapTupleHeader td = tup->t_data;
 	char	   *tp;				/* ptr to data part of tuple */
 	bits8	   *bp = td->t_bits;	/* ptr to null bitmap in tuple */
-	bool		slow = false;	/* do we have to walk attrs? */
 	int			off;			/* current offset within data */
+	int			startAttr;
+	int			firstNullAttr;
+	int			i;
+	bool		hasnulls = HeapTupleHasNulls(tup);
 
-	/* ----------------
-	 *	 Three cases:
-	 *
-	 *	 1: No nulls and no variable-width attributes.
-	 *	 2: Has a null or a var-width AFTER att.
-	 *	 3: Has nulls or var-widths BEFORE att.
-	 * ----------------
-	 */
+	/* Did someone forget to call TupleDescFinalize()? */
+	Assert(tupleDesc->firstNonCachedOffsetAttr >= 0);
 
 	attnum--;
 
-	if (!HeapTupleNoNulls(tup))
+	/*
+	 * To minimize the number of attributes we need to look at, start walking
+	 * the tuple at the attribute with the highest attcacheoff prior to attnum
+	 * or the first NULL attribute prior to attnum, whichever comes first.
+	 */
+	if (hasnulls)
+		firstNullAttr = first_null_attr(bp, attnum);
+	else
+		firstNullAttr = attnum;
+
+	if (tupleDesc->firstNonCachedOffsetAttr > 0)
 	{
 		/*
-		 * there's a null somewhere in the tuple
-		 *
-		 * check to see if any preceding bits are null...
+		 * Start at the highest attcacheoff attribute with no NULLs in prior
+		 * attributes.
 		 */
-		int			byte = attnum >> 3;
-		int			finalbit = attnum & 0x07;
-
-		/* check for nulls "before" final bit of last byte */
-		if ((~bp[byte]) & ((1 << finalbit) - 1))
-			slow = true;
-		else
-		{
-			/* check for nulls in any "earlier" bytes */
-			int			i;
-
-			for (i = 0; i < byte; i++)
-			{
-				if (bp[i] != 0xFF)
-				{
-					slow = true;
-					break;
-				}
-			}
-		}
+		startAttr = Min(tupleDesc->firstNonCachedOffsetAttr - 1, firstNullAttr);
+		off = TupleDescCompactAttr(tupleDesc, startAttr)->attcacheoff;
+	}
+	else
+	{
+		/* Otherwise, start at the beginning... */
+		startAttr = 0;
+		off = 0;
 	}
 
 	tp = (char *) td + td->t_hoff;
 
-	if (!slow)
+	/*
+	 * Calculate 'off' up to the first NULL attr.  We use two cheaper loops
+	 * when the tuple has no variable-width columns.  When variable-width
+	 * columns exists, we use att_addlength_pointer() to move the offset
+	 * beyond the current attribute.
+	 */
+	if (!HeapTupleHasVarWidth(tup))
 	{
-		CompactAttribute *att;
-
-		/*
-		 * If we get here, there are no nulls up to and including the target
-		 * attribute.  If we have a cached offset, we can use it.
-		 */
-		att = TupleDescCompactAttr(tupleDesc, attnum);
-		if (att->attcacheoff >= 0)
-			return fetchatt(att, tp + att->attcacheoff);
-
-		/*
-		 * Otherwise, check for non-fixed-length attrs up to and including
-		 * target.  If there aren't any, it's safe to cheaply initialize the
-		 * cached offsets for these attrs.
-		 */
-		if (HeapTupleHasVarWidth(tup))
+		for (i = startAttr; i < firstNullAttr; i++)
 		{
-			int			j;
+			cattr = TupleDescCompactAttr(tupleDesc, i);
 
-			for (j = 0; j <= attnum; j++)
-			{
-				if (TupleDescCompactAttr(tupleDesc, j)->attlen <= 0)
-				{
-					slow = true;
-					break;
-				}
-			}
+			off = att_nominal_alignby(off, cattr->attalignby);
+			off += cattr->attlen;
 		}
-	}
-
-	if (!slow)
-	{
-		int			natts = tupleDesc->natts;
-		int			j = 1;
-
-		/*
-		 * If we get here, we have a tuple with no nulls or var-widths up to
-		 * and including the target attribute, so we can use the cached offset
-		 * ... only we don't have it yet, or we'd not have got here.  Since
-		 * it's cheap to compute offsets for fixed-width columns, we take the
-		 * opportunity to initialize the cached offsets for *all* the leading
-		 * fixed-width columns, in hope of avoiding future visits to this
-		 * routine.
-		 */
-		TupleDescCompactAttr(tupleDesc, 0)->attcacheoff = 0;
-
-		/* we might have set some offsets in the slow path previously */
-		while (j < natts && TupleDescCompactAttr(tupleDesc, j)->attcacheoff > 0)
-			j++;
-
-		off = TupleDescCompactAttr(tupleDesc, j - 1)->attcacheoff +
-			TupleDescCompactAttr(tupleDesc, j - 1)->attlen;
 
-		for (; j < natts; j++)
+		for (; i < attnum; i++)
 		{
-			CompactAttribute *att = TupleDescCompactAttr(tupleDesc, j);
+			if (att_isnull(i, bp))
+				continue;
 
-			if (att->attlen <= 0)
-				break;
-
-			off = att_nominal_alignby(off, att->attalignby);
+			cattr = TupleDescCompactAttr(tupleDesc, i);
 
-			att->attcacheoff = off;
-
-			off += att->attlen;
+			off = att_nominal_alignby(off, cattr->attalignby);
+			off += cattr->attlen;
 		}
-
-		Assert(j > attnum);
-
-		off = TupleDescCompactAttr(tupleDesc, attnum)->attcacheoff;
 	}
 	else
 	{
-		bool		usecache = true;
-		int			i;
-
-		/*
-		 * Now we know that we have to walk the tuple CAREFULLY.  But we still
-		 * might be able to cache some offsets for next time.
-		 *
-		 * Note - This loop is a little tricky.  For each non-null attribute,
-		 * we have to first account for alignment padding before the attr,
-		 * then advance over the attr based on its length.  Nulls have no
-		 * storage and no alignment padding either.  We can use/set
-		 * attcacheoff until we reach either a null or a var-width attribute.
-		 */
-		off = 0;
-		for (i = 0;; i++)		/* loop exit is at "break" */
+		for (i = startAttr; i < firstNullAttr; i++)
 		{
-			CompactAttribute *att = TupleDescCompactAttr(tupleDesc, i);
+			int			attlen;
 
-			if (HeapTupleHasNulls(tup) && att_isnull(i, bp))
-			{
-				usecache = false;
-				continue;		/* this cannot be the target att */
-			}
+			cattr = TupleDescCompactAttr(tupleDesc, i);
+			attlen = cattr->attlen;
 
-			/* If we know the next offset, we can skip the rest */
-			if (usecache && att->attcacheoff >= 0)
-				off = att->attcacheoff;
-			else if (att->attlen == -1)
-			{
-				/*
-				 * We can only cache the offset for a varlena attribute if the
-				 * offset is already suitably aligned, so that there would be
-				 * no pad bytes in any case: then the offset will be valid for
-				 * either an aligned or unaligned value.
-				 */
-				if (usecache &&
-					off == att_nominal_alignby(off, att->attalignby))
-					att->attcacheoff = off;
-				else
-				{
-					off = att_pointer_alignby(off, att->attalignby, -1,
-											  tp + off);
-					usecache = false;
-				}
-			}
-			else
-			{
-				/* not varlena, so safe to use att_nominal_alignby */
-				off = att_nominal_alignby(off, att->attalignby);
+			/*
+			 * cstrings don't exist in heap tuples.  Use pg_assume to instruct
+			 * the compiler not to emit the cstring-related code in
+			 * att_addlength_pointer().
+			 */
+			pg_assume(attlen > 0 || attlen == -1);
 
-				if (usecache)
-					att->attcacheoff = off;
-			}
+			off = att_pointer_alignby(off,
+									  cattr->attalignby,
+									  attlen,
+									  tp + off);
+			off = att_addlength_pointer(off, attlen, tp + off);
+		}
 
-			if (i == attnum)
-				break;
+		for (; i < attnum; i++)
+		{
+			int			attlen;
 
-			off = att_addlength_pointer(off, att->attlen, tp + off);
+			if (att_isnull(i, bp))
+				continue;
 
-			if (usecache && att->attlen <= 0)
-				usecache = false;
+			cattr = TupleDescCompactAttr(tupleDesc, i);
+			attlen = cattr->attlen;
+
+			/* As above, heaptuples have no cstrings */
+			pg_assume(attlen > 0 || attlen == -1);
+
+			off = att_pointer_alignby(off, cattr->attalignby, attlen,
+									  tp + off);
+			off = att_addlength_pointer(off, attlen, tp + off);
 		}
 	}
 
-	return fetchatt(TupleDescCompactAttr(tupleDesc, attnum), tp + off);
+	cattr = TupleDescCompactAttr(tupleDesc, attnum);
+	off = att_pointer_alignby(off,
+							  cattr->attalignby,
+							  cattr->attlen,
+							  tp + off);
+
+	return fetchatt(cattr, tp + off);
 }
 
 /* ----------------
@@ -1347,6 +1266,7 @@ heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc,
 				  Datum *values, bool *isnull)
 {
 	HeapTupleHeader tup = tuple->t_data;
+	CompactAttribute *cattr;
 	bool		hasnulls = HeapTupleHasNulls(tuple);
 	int			tdesc_natts = tupleDesc->natts;
 	int			natts;			/* number of atts to extract */
@@ -1354,70 +1274,98 @@ heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc,
 	char	   *tp;				/* ptr to tuple data */
 	uint32		off;			/* offset in tuple data */
 	bits8	   *bp = tup->t_bits;	/* ptr to null bitmap in tuple */
-	bool		slow = false;	/* can we use/set attcacheoff? */
+	int			firstNonCacheOffsetAttr;
+	int			firstNullAttr;
 
 	natts = HeapTupleHeaderGetNatts(tup);
 
+	/* Did someone forget to call TupleDescFinalize()? */
+	Assert(tupleDesc->firstNonCachedOffsetAttr >= 0);
+
 	/*
 	 * In inheritance situations, it is possible that the given tuple actually
 	 * has more fields than the caller is expecting.  Don't run off the end of
 	 * the caller's arrays.
 	 */
 	natts = Min(natts, tdesc_natts);
+	firstNonCacheOffsetAttr = Min(tupleDesc->firstNonCachedOffsetAttr, natts);
+
+	if (hasnulls)
+	{
+		firstNullAttr = first_null_attr(bp, natts);
+
+		/*
+		 * XXX: it'd be nice to use populate_isnull_array() here, but that
+		 * requires that the isnull array's size is rounded up to the next
+		 * multiple of 8.  Doing that would require adjusting many locations
+		 * that allocate the array.
+		 */
+		firstNonCacheOffsetAttr = Min(firstNonCacheOffsetAttr, firstNullAttr);
+	}
+	else
+		firstNullAttr = natts;
 
 	tp = (char *) tup + tup->t_hoff;
+	attnum = 0;
 
-	off = 0;
+	if (firstNonCacheOffsetAttr > 0)
+	{
+#ifdef USE_ASSERT_CHECKING
+		/* In Assert enabled builds, verify attcacheoff is correct */
+		int			offcheck = 0;
+#endif
+		do
+		{
+			isnull[attnum] = false;
+			cattr = TupleDescCompactAttr(tupleDesc, attnum);
+			off = cattr->attcacheoff;
 
-	for (attnum = 0; attnum < natts; attnum++)
+#ifdef USE_ASSERT_CHECKING
+			offcheck = att_nominal_alignby(offcheck, cattr->attalignby);
+			Assert(offcheck == cattr->attcacheoff);
+			offcheck += cattr->attlen;
+#endif
+
+			values[attnum] = fetch_att_noerr(tp + off,
+											 cattr->attbyval,
+											 cattr->attlen);
+		} while (++attnum < firstNonCacheOffsetAttr);
+		off += cattr->attlen;
+	}
+	else
+		off = 0;
+
+	for (; attnum < firstNullAttr; attnum++)
 	{
-		CompactAttribute *thisatt = TupleDescCompactAttr(tupleDesc, attnum);
+		isnull[attnum] = false;
+		cattr = TupleDescCompactAttr(tupleDesc, attnum);
+		values[attnum] = align_fetch_then_add(tp,
+											  &off,
+											  cattr->attbyval,
+											  cattr->attlen,
+											  cattr->attalignby);
+	}
+
+	for (; attnum < natts; attnum++)
+	{
+		Assert(hasnulls);
 
-		if (hasnulls && att_isnull(attnum, bp))
+		if (att_isnull(attnum, bp))
 		{
 			values[attnum] = (Datum) 0;
 			isnull[attnum] = true;
-			slow = true;		/* can't use attcacheoff anymore */
 			continue;
 		}
 
 		isnull[attnum] = false;
-
-		if (!slow && thisatt->attcacheoff >= 0)
-			off = thisatt->attcacheoff;
-		else if (thisatt->attlen == -1)
-		{
-			/*
-			 * We can only cache the offset for a varlena attribute if the
-			 * offset is already suitably aligned, so that there would be no
-			 * pad bytes in any case: then the offset will be valid for either
-			 * an aligned or unaligned value.
-			 */
-			if (!slow &&
-				off == att_nominal_alignby(off, thisatt->attalignby))
-				thisatt->attcacheoff = off;
-			else
-			{
-				off = att_pointer_alignby(off, thisatt->attalignby, -1,
-										  tp + off);
-				slow = true;
-			}
-		}
-		else
-		{
-			/* not varlena, so safe to use att_nominal_alignby */
-			off = att_nominal_alignby(off, thisatt->attalignby);
-
-			if (!slow)
-				thisatt->attcacheoff = off;
-		}
-
-		values[attnum] = fetchatt(thisatt, tp + off);
-
-		off = att_addlength_pointer(off, thisatt->attlen, tp + off);
-
-		if (thisatt->attlen <= 0)
-			slow = true;		/* can't use attcacheoff anymore */
+		cattr = TupleDescCompactAttr(tupleDesc, attnum);
+
+		/* align 'off', fetch the datum, and increment off beyond the datum */
+		values[attnum] = align_fetch_then_add(tp,
+											  &off,
+											  cattr->attbyval,
+											  cattr->attlen,
+											  cattr->attalignby);
 	}
 
 	/*
diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c
index d6350201e01..8c410853191 100644
--- a/src/backend/access/common/indextuple.c
+++ b/src/backend/access/common/indextuple.c
@@ -223,18 +223,6 @@ index_form_tuple_context(TupleDesc tupleDescriptor,
  *
  *		This gets called from index_getattr() macro, and only in cases
  *		where we can't use cacheoffset and the value is not null.
- *
- *		This caches attribute offsets in the attribute descriptor.
- *
- *		An alternative way to speed things up would be to cache offsets
- *		with the tuple, but that seems more difficult unless you take
- *		the storage hit of actually putting those offsets into the
- *		tuple you send to disk.  Yuck.
- *
- *		This scheme will be slightly slower than that, but should
- *		perform well for queries which hit large #'s of tuples.  After
- *		you cache the offsets once, examining all the other tuples using
- *		the same attribute descriptor will go much quicker. -cim 5/4/91
  * ----------------
  */
 Datum
@@ -242,205 +230,124 @@ nocache_index_getattr(IndexTuple tup,
 					  int attnum,
 					  TupleDesc tupleDesc)
 {
+	CompactAttribute *cattr;
 	char	   *tp;				/* ptr to data part of tuple */
 	bits8	   *bp = NULL;		/* ptr to null bitmap in tuple */
-	bool		slow = false;	/* do we have to walk attrs? */
 	int			data_off;		/* tuple data offset */
 	int			off;			/* current offset within data */
+	int			startAttr;
+	int			firstNullAttr;
+	bool		hasnulls = IndexTupleHasNulls(tup);
+	int			i;
 
-	/* ----------------
-	 *	 Three cases:
-	 *
-	 *	 1: No nulls and no variable-width attributes.
-	 *	 2: Has a null or a var-width AFTER att.
-	 *	 3: Has nulls or var-widths BEFORE att.
-	 * ----------------
-	 */
-
-	data_off = IndexInfoFindDataOffset(tup->t_info);
+	/* Did someone forget to call TupleDescFinalize()? */
+	Assert(tupleDesc->firstNonCachedOffsetAttr >= 0);
 
 	attnum--;
 
-	if (IndexTupleHasNulls(tup))
-	{
-		/*
-		 * there's a null somewhere in the tuple
-		 *
-		 * check to see if desired att is null
-		 */
+	data_off = IndexInfoFindDataOffset(tup->t_info);
+	tp = (char *) tup + data_off;
 
-		/* XXX "knows" t_bits are just after fixed tuple header! */
+	/*
+	 * To minimize the number of attributes we need to look at, start walking
+	 * the tuple at the attribute with the highest attcacheoff prior to attnum
+	 * or the first NULL attribute prior to attnum, whichever comes first.
+	 */
+	if (hasnulls)
+	{
 		bp = (bits8 *) ((char *) tup + sizeof(IndexTupleData));
-
-		/*
-		 * Now check to see if any preceding bits are null...
-		 */
-		{
-			int			byte = attnum >> 3;
-			int			finalbit = attnum & 0x07;
-
-			/* check for nulls "before" final bit of last byte */
-			if ((~bp[byte]) & ((1 << finalbit) - 1))
-				slow = true;
-			else
-			{
-				/* check for nulls in any "earlier" bytes */
-				int			i;
-
-				for (i = 0; i < byte; i++)
-				{
-					if (bp[i] != 0xFF)
-					{
-						slow = true;
-						break;
-					}
-				}
-			}
-		}
+		firstNullAttr = first_null_attr(bp, attnum);
 	}
+	else
+		firstNullAttr = attnum;
 
-	tp = (char *) tup + data_off;
-
-	if (!slow)
+	if (tupleDesc->firstNonCachedOffsetAttr > 0)
 	{
-		CompactAttribute *att;
-
-		/*
-		 * If we get here, there are no nulls up to and including the target
-		 * attribute.  If we have a cached offset, we can use it.
-		 */
-		att = TupleDescCompactAttr(tupleDesc, attnum);
-		if (att->attcacheoff >= 0)
-			return fetchatt(att, tp + att->attcacheoff);
-
 		/*
-		 * Otherwise, check for non-fixed-length attrs up to and including
-		 * target.  If there aren't any, it's safe to cheaply initialize the
-		 * cached offsets for these attrs.
+		 * Start at the highest attcacheoff attribute with no NULLs in prior
+		 * attributes.
 		 */
-		if (IndexTupleHasVarwidths(tup))
-		{
-			int			j;
-
-			for (j = 0; j <= attnum; j++)
-			{
-				if (TupleDescCompactAttr(tupleDesc, j)->attlen <= 0)
-				{
-					slow = true;
-					break;
-				}
-			}
-		}
+		startAttr = Min(tupleDesc->firstNonCachedOffsetAttr - 1, firstNullAttr);
+		off = TupleDescCompactAttr(tupleDesc, startAttr)->attcacheoff;
 	}
-
-	if (!slow)
+	else
 	{
-		int			natts = tupleDesc->natts;
-		int			j = 1;
-
-		/*
-		 * If we get here, we have a tuple with no nulls or var-widths up to
-		 * and including the target attribute, so we can use the cached offset
-		 * ... only we don't have it yet, or we'd not have got here.  Since
-		 * it's cheap to compute offsets for fixed-width columns, we take the
-		 * opportunity to initialize the cached offsets for *all* the leading
-		 * fixed-width columns, in hope of avoiding future visits to this
-		 * routine.
-		 */
-		TupleDescCompactAttr(tupleDesc, 0)->attcacheoff = 0;
+		/* Otherwise, start at the beginning... */
+		startAttr = 0;
+		off = 0;
+	}
 
-		/* we might have set some offsets in the slow path previously */
-		while (j < natts && TupleDescCompactAttr(tupleDesc, j)->attcacheoff > 0)
-			j++;
+	/*
+	 * Calculate 'off' up to the first NULL attr.  We use two cheaper loops
+	 * when the tuple has no variable-width columns.  When variable-width
+	 * columns exists, we use att_addlength_pointer() to move the offset
+	 * beyond the current attribute.
+	 */
+	if (IndexTupleHasVarwidths(tup))
+	{
+		/* Calculate the offset up until the first NULL */
+		for (i = startAttr; i < firstNullAttr; i++)
+		{
+			cattr = TupleDescCompactAttr(tupleDesc, i);
 
-		off = TupleDescCompactAttr(tupleDesc, j - 1)->attcacheoff +
-			TupleDescCompactAttr(tupleDesc, j - 1)->attlen;
+			off = att_pointer_alignby(off,
+									  cattr->attalignby,
+									  cattr->attlen,
+									  tp + off);
+			off = att_addlength_pointer(off, cattr->attlen, tp + off);
+		}
 
-		for (; j < natts; j++)
+		/* Calculate the offset for any remaining columns. */
+		for (; i < attnum; i++)
 		{
-			CompactAttribute *att = TupleDescCompactAttr(tupleDesc, j);
+			Assert(hasnulls);
 
-			if (att->attlen <= 0)
-				break;
+			if (att_isnull(i, bp))
+				continue;
 
-			off = att_nominal_alignby(off, att->attalignby);
+			cattr = TupleDescCompactAttr(tupleDesc, i);
 
-			att->attcacheoff = off;
-
-			off += att->attlen;
+			off = att_pointer_alignby(off,
+									  cattr->attalignby,
+									  cattr->attlen,
+									  tp + off);
+			off = att_addlength_pointer(off, cattr->attlen, tp + off);
 		}
-
-		Assert(j > attnum);
-
-		off = TupleDescCompactAttr(tupleDesc, attnum)->attcacheoff;
 	}
 	else
 	{
-		bool		usecache = true;
-		int			i;
+		/* Handle tuples with only fixed-width attributes */
 
-		/*
-		 * Now we know that we have to walk the tuple CAREFULLY.  But we still
-		 * might be able to cache some offsets for next time.
-		 *
-		 * Note - This loop is a little tricky.  For each non-null attribute,
-		 * we have to first account for alignment padding before the attr,
-		 * then advance over the attr based on its length.  Nulls have no
-		 * storage and no alignment padding either.  We can use/set
-		 * attcacheoff until we reach either a null or a var-width attribute.
-		 */
-		off = 0;
-		for (i = 0;; i++)		/* loop exit is at "break" */
+		/* Calculate the offset up until the first NULL */
+		for (i = startAttr; i < firstNullAttr; i++)
 		{
-			CompactAttribute *att = TupleDescCompactAttr(tupleDesc, i);
+			cattr = TupleDescCompactAttr(tupleDesc, i);
 
-			if (IndexTupleHasNulls(tup) && att_isnull(i, bp))
-			{
-				usecache = false;
-				continue;		/* this cannot be the target att */
-			}
-
-			/* If we know the next offset, we can skip the rest */
-			if (usecache && att->attcacheoff >= 0)
-				off = att->attcacheoff;
-			else if (att->attlen == -1)
-			{
-				/*
-				 * We can only cache the offset for a varlena attribute if the
-				 * offset is already suitably aligned, so that there would be
-				 * no pad bytes in any case: then the offset will be valid for
-				 * either an aligned or unaligned value.
-				 */
-				if (usecache &&
-					off == att_nominal_alignby(off, att->attalignby))
-					att->attcacheoff = off;
-				else
-				{
-					off = att_pointer_alignby(off, att->attalignby, -1,
-											  tp + off);
-					usecache = false;
-				}
-			}
-			else
-			{
-				/* not varlena, so safe to use att_nominal_alignby */
-				off = att_nominal_alignby(off, att->attalignby);
+			Assert(cattr->attlen > 0);
+			off = att_nominal_alignby(off, cattr->attalignby);
+			off += cattr->attlen;
+		}
 
-				if (usecache)
-					att->attcacheoff = off;
-			}
+		/* Calculate the offset for any remaining columns. */
+		for (; i < attnum; i++)
+		{
+			Assert(hasnulls);
 
-			if (i == attnum)
-				break;
+			if (att_isnull(i, bp))
+				continue;
 
-			off = att_addlength_pointer(off, att->attlen, tp + off);
+			cattr = TupleDescCompactAttr(tupleDesc, i);
 
-			if (usecache && att->attlen <= 0)
-				usecache = false;
+			Assert(cattr->attlen > 0);
+			off = att_nominal_alignby(off, cattr->attalignby);
+			off += cattr->attlen;
 		}
 	}
 
-	return fetchatt(TupleDescCompactAttr(tupleDesc, attnum), tp + off);
+	cattr = TupleDescCompactAttr(tupleDesc, attnum);
+	off = att_pointer_alignby(off, cattr->attalignby,
+							  cattr->attlen, tp + off);
+	return fetchatt(cattr, tp + off);
 }
 
 /*
@@ -480,63 +387,87 @@ index_deform_tuple_internal(TupleDesc tupleDescriptor,
 							Datum *values, bool *isnull,
 							char *tp, bits8 *bp, int hasnulls)
 {
+	CompactAttribute *cattr;
 	int			natts = tupleDescriptor->natts; /* number of atts to extract */
-	int			attnum;
-	int			off = 0;		/* offset in tuple data */
-	bool		slow = false;	/* can we use/set attcacheoff? */
+	int			attnum = 0;
+	uint32		off = 0;		/* offset in tuple data */
+	int			firstNonCacheOffsetAttr;
+	int			firstNullAttr;
 
 	/* Assert to protect callers who allocate fixed-size arrays */
 	Assert(natts <= INDEX_MAX_KEYS);
 
-	for (attnum = 0; attnum < natts; attnum++)
+	/* Did someone forget to call TupleDescFinalize()? */
+	Assert(tupleDescriptor->firstNonCachedOffsetAttr >= 0);
+
+	firstNonCacheOffsetAttr = Min(tupleDescriptor->firstNonCachedOffsetAttr, natts);
+
+	if (hasnulls)
+	{
+		firstNullAttr = first_null_attr(bp, natts);
+		firstNonCacheOffsetAttr = Min(firstNonCacheOffsetAttr, firstNullAttr);
+	}
+	else
+		firstNullAttr = natts;
+
+	if (firstNonCacheOffsetAttr > 0)
 	{
-		CompactAttribute *thisatt = TupleDescCompactAttr(tupleDescriptor, attnum);
+#ifdef USE_ASSERT_CHECKING
+		/* In Assert enabled builds, verify attcacheoff is correct */
+		off = 0;
+#endif
 
-		if (hasnulls && att_isnull(attnum, bp))
+		do
 		{
-			values[attnum] = (Datum) 0;
-			isnull[attnum] = true;
-			slow = true;		/* can't use attcacheoff anymore */
-			continue;
-		}
+			isnull[attnum] = false;
+			cattr = TupleDescCompactAttr(tupleDescriptor, attnum);
 
-		isnull[attnum] = false;
+#ifdef USE_ASSERT_CHECKING
+			off = att_nominal_alignby(off, cattr->attalignby);
+			Assert(off == cattr->attcacheoff);
+			off += cattr->attlen;
+#endif
 
-		if (!slow && thisatt->attcacheoff >= 0)
-			off = thisatt->attcacheoff;
-		else if (thisatt->attlen == -1)
-		{
-			/*
-			 * We can only cache the offset for a varlena attribute if the
-			 * offset is already suitably aligned, so that there would be no
-			 * pad bytes in any case: then the offset will be valid for either
-			 * an aligned or unaligned value.
-			 */
-			if (!slow &&
-				off == att_nominal_alignby(off, thisatt->attalignby))
-				thisatt->attcacheoff = off;
-			else
-			{
-				off = att_pointer_alignby(off, thisatt->attalignby, -1,
-										  tp + off);
-				slow = true;
-			}
-		}
-		else
-		{
-			/* not varlena, so safe to use att_nominal_alignby */
-			off = att_nominal_alignby(off, thisatt->attalignby);
+			values[attnum] = fetch_att_noerr(tp + cattr->attcacheoff, cattr->attbyval,
+											 cattr->attlen);
+		} while (++attnum < firstNonCacheOffsetAttr);
 
-			if (!slow)
-				thisatt->attcacheoff = off;
-		}
+		off = cattr->attcacheoff + cattr->attlen;
+	}
 
-		values[attnum] = fetchatt(thisatt, tp + off);
+	for (; attnum < firstNullAttr; attnum++)
+	{
+		isnull[attnum] = false;
+		cattr = TupleDescCompactAttr(tupleDescriptor, attnum);
+
+		/* align 'off', fetch the datum, and increment off beyond the datum */
+		values[attnum] = align_fetch_then_add(tp,
+											  &off,
+											  cattr->attbyval,
+											  cattr->attlen,
+											  cattr->attalignby);
+	}
 
-		off = att_addlength_pointer(off, thisatt->attlen, tp + off);
+	for (; attnum < natts; attnum++)
+	{
+		Assert(hasnulls);
 
-		if (thisatt->attlen <= 0)
-			slow = true;		/* can't use attcacheoff anymore */
+		if (att_isnull(attnum, bp))
+		{
+			values[attnum] = (Datum) 0;
+			isnull[attnum] = true;
+			continue;
+		}
+
+		isnull[attnum] = false;
+		cattr = TupleDescCompactAttr(tupleDescriptor, attnum);
+
+		/* align 'off', fetch the datum, and increment off beyond the datum */
+		values[attnum] = align_fetch_then_add(tp,
+											  &off,
+											  cattr->attbyval,
+											  cattr->attlen,
+											  cattr->attalignby);
 	}
 }
 
diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c
index 2137385a833..c68561337d7 100644
--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -197,6 +197,10 @@ CreateTemplateTupleDesc(int natts)
 	desc->tdtypmod = -1;
 	desc->tdrefcount = -1;		/* assume not reference-counted */
 
+	/* This will be set to the correct value by TupleDescFinalize() */
+	desc->firstNonCachedOffsetAttr = -1;
+	desc->firstNonGuaranteedAttr = -1;
+
 	return desc;
 }
 
@@ -457,6 +461,9 @@ TupleDescCopy(TupleDesc dst, TupleDesc src)
  *		descriptor to another.
  *
  * !!! Constraints and defaults are not copied !!!
+ *
+ * The caller must take care of calling TupleDescFinalize() on 'dst' once all
+ * TupleDesc changes have been made.
  */
 void
 TupleDescCopyEntry(TupleDesc dst, AttrNumber dstAttno,
@@ -489,6 +496,50 @@ TupleDescCopyEntry(TupleDesc dst, AttrNumber dstAttno,
 	populate_compact_attribute(dst, dstAttno - 1);
 }
 
+/*
+ * TupleDescFinalize
+ *		Finalize the given TupleDesc.  This must be called after the
+ *		attributes arrays have been populated or adjusted by any code.
+ *
+ * Must be called after populate_compact_attribute() and before
+ * BlessTupleDesc().
+ */
+void
+TupleDescFinalize(TupleDesc tupdesc)
+{
+	int			firstNonCachedOffsetAttr = 0;
+	int			firstNonGuaranteedAttr = tupdesc->natts;
+	int			off = 0;
+
+	for (int i = 0; i < tupdesc->natts; i++)
+	{
+		CompactAttribute *cattr = TupleDescCompactAttr(tupdesc, i);
+
+		/*
+		 * Find the highest attnum which is guaranteed to exist in all tuples
+		 * in the table.  We currently only pay attention to byval attributes
+		 * to allow additional optimizations during tuple deformation.
+		 */
+		if (firstNonGuaranteedAttr == tupdesc->natts &&
+			(cattr->attnullability != ATTNULLABLE_VALID || !cattr->attbyval ||
+			 cattr->atthasmissing || cattr->attisdropped || cattr->attlen <= 0))
+			firstNonGuaranteedAttr = i;
+
+		if (cattr->attlen <= 0)
+			break;
+
+		off = att_nominal_alignby(off, cattr->attalignby);
+
+		cattr->attcacheoff = off;
+
+		off += cattr->attlen;
+		firstNonCachedOffsetAttr = i + 1;
+	}
+
+	tupdesc->firstNonCachedOffsetAttr = firstNonCachedOffsetAttr;
+	tupdesc->firstNonGuaranteedAttr = firstNonGuaranteedAttr;
+}
+
 /*
  * Free a TupleDesc including all substructure
  */
diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c
index b246e8127db..a4694bd8065 100644
--- a/src/backend/access/spgist/spgutils.c
+++ b/src/backend/access/spgist/spgutils.c
@@ -335,9 +335,6 @@ getSpGistTupleDesc(Relation index, SpGistTypeDesc *keyType)
 		/* We shouldn't need to bother with making these valid: */
 		att->attcompression = InvalidCompressionMethod;
 		att->attcollation = InvalidOid;
-		/* In case we changed typlen, we'd better reset following offsets */
-		for (int i = spgFirstIncludeColumn; i < outTupDesc->natts; i++)
-			TupleDescCompactAttr(outTupDesc, i)->attcacheoff = -1;
 
 		populate_compact_attribute(outTupDesc, spgKeyColumn);
 		TupleDescFinalize(outTupDesc);
diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c
index bb997182481..83a8c02894d 100644
--- a/src/backend/executor/execTuples.c
+++ b/src/backend/executor/execTuples.c
@@ -993,225 +993,254 @@ tts_buffer_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple,
 }
 
 /*
- * slot_deform_heap_tuple_internal
- *		An always inline helper function for use in slot_deform_heap_tuple to
- *		allow the compiler to emit specialized versions of this function for
- *		various combinations of "slow" and "hasnulls".  For example, if a
- *		given tuple has no nulls, then we needn't check "hasnulls" for every
- *		attribute that we're deforming.  The caller can just call this
- *		function with hasnulls set to constant-false and have the compiler
- *		remove the constant-false branches and emit more optimal code.
- *
- * Returns the next attnum to deform, which can be equal to natts when the
- * function manages to deform all requested attributes.  *offp is an input and
- * output parameter which is the byte offset within the tuple to start deforming
- * from which, on return, gets set to the offset where the next attribute
- * should be deformed from.  *slowp is set to true when subsequent deforming
- * of this tuple must use a version of this function with "slow" passed as
- * true.
- *
- * Callers cannot assume when we return "attnum" (i.e. all requested
- * attributes have been deformed) that slow mode isn't required for any
- * additional deforming as the final attribute may have caused a switch to
- * slow mode.
+ * slot_deform_heap_tuple
+ *		Given a TupleTableSlot, extract data from the slot's physical tuple
+ *		into its Datum/isnull arrays.  Data is extracted up through the
+ *		reqnatts'th column.  If there are insufficient attributes in the given
+ *		tuple, then slot_getmissingattrs() is called to populate the
+ *		remainder.  If reqnatts is above the number of attributes in the
+ *		slot's TupleDesc, an error is raised.
+ *
+ *		This is essentially an incremental version of heap_deform_tuple:
+ *		on each call we extract attributes up to the one needed, without
+ *		re-computing information about previously extracted attributes.
+ *		slot->tts_nvalid is the number of attributes already extracted.
+ *
+ * This is marked as always inline, so the different offp for different types
+ * of slots gets optimized away.
  */
-static pg_attribute_always_inline int
-slot_deform_heap_tuple_internal(TupleTableSlot *slot, HeapTuple tuple,
-								int attnum, int natts, bool slow,
-								bool hasnulls, uint32 *offp, bool *slowp)
+static pg_attribute_always_inline void
+slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
+					   int reqnatts)
 {
+	CompactAttribute *cattr;
 	TupleDesc	tupleDesc = slot->tts_tupleDescriptor;
-	Datum	   *values = slot->tts_values;
-	bool	   *isnull = slot->tts_isnull;
 	HeapTupleHeader tup = tuple->t_data;
+	size_t		attnum;
+	int			firstNonCacheOffsetAttr;
+	int			firstNonGuaranteedAttr;
+	int			firstNullAttr;
+	int			natts;
+	Datum	   *values;
+	bool	   *isnull;
 	char	   *tp;				/* ptr to tuple data */
-	bits8	   *bp = tup->t_bits;	/* ptr to null bitmap in tuple */
-	bool		slownext = false;
+	uint32		off;			/* offset in tuple data */
 
-	tp = (char *) tup + tup->t_hoff;
+	/* Did someone forget to call TupleDescFinalize()? */
+	Assert(tupleDesc->firstNonCachedOffsetAttr >= 0);
 
-	for (; attnum < natts; attnum++)
+	isnull = slot->tts_isnull;
+
+	/*
+	 * Some callers may form and deform tuples prior to NOT NULL constraints
+	 * being checked.  Here we'd like to optimize the case where we only need
+	 * to fetch attributes before or up to the point where the attribute is
+	 * guaranteed to exist in the tuple.  We rely on the slot flag being set
+	 * correctly to only enable this optimization when it's valid to do so.
+	 * This optimization allows us to save fetching the number of attributes
+	 * from the tuple and saves the additional cost of handling non-byval
+	 * attrs.
+	 */
+	if (TTS_OBEYS_NOT_NULL_CONSTRAINTS(slot))
+		firstNonGuaranteedAttr = Min(reqnatts, tupleDesc->firstNonGuaranteedAttr);
+	else
+		firstNonGuaranteedAttr = 0;
+
+	firstNonCacheOffsetAttr = tupleDesc->firstNonCachedOffsetAttr;
+
+	if (HeapTupleHasNulls(tuple))
 	{
-		CompactAttribute *thisatt = TupleDescCompactAttr(tupleDesc, attnum);
+		natts = HeapTupleHeaderGetNatts(tup);
+		tp = (char *) tup + MAXALIGN(offsetof(HeapTupleHeaderData, t_bits) +
+									 BITMAPLEN(natts));
 
-		if (hasnulls && att_isnull(attnum, bp))
+		natts = Min(natts, reqnatts);
+		if (natts > firstNonGuaranteedAttr)
 		{
-			values[attnum] = (Datum) 0;
-			isnull[attnum] = true;
-			if (!slow)
-			{
-				*slowp = true;
-				return attnum + 1;
-			}
-			else
-				continue;
-		}
+			bits8	   *bp = tup->t_bits;
 
-		isnull[attnum] = false;
+			/* Find the first NULL attr */
+			firstNullAttr = first_null_attr(bp, natts);
 
-		/* calculate the offset of this attribute */
-		if (!slow && thisatt->attcacheoff >= 0)
-			*offp = thisatt->attcacheoff;
-		else if (thisatt->attlen == -1)
-		{
 			/*
-			 * We can only cache the offset for a varlena attribute if the
-			 * offset is already suitably aligned, so that there would be no
-			 * pad bytes in any case: then the offset will be valid for either
-			 * an aligned or unaligned value.
+			 * And populate the isnull array for all attributes being fetched
+			 * from the tuple.
 			 */
-			if (!slow && *offp == att_nominal_alignby(*offp, thisatt->attalignby))
-				thisatt->attcacheoff = *offp;
-			else
-			{
-				*offp = att_pointer_alignby(*offp,
-											thisatt->attalignby,
-											-1,
-											tp + *offp);
+			populate_isnull_array(bp, natts, isnull);
 
-				if (!slow)
-					slownext = true;
-			}
+			/* We can only use any cached offsets until the first NULL attr */
+			firstNonCacheOffsetAttr = Min(firstNonCacheOffsetAttr, firstNullAttr);
 		}
 		else
 		{
-			/* not varlena, so safe to use att_nominal_alignby */
-			*offp = att_nominal_alignby(*offp, thisatt->attalignby);
+			/* Otherwise all required columns are guaranteed to exist */
+			firstNullAttr = natts;
+		}
+	}
+	else
+	{
+		tp = (char *) tup + MAXALIGN(offsetof(HeapTupleHeaderData, t_bits));
 
-			if (!slow)
-				thisatt->attcacheoff = *offp;
+		/*
+		 * We only need to look at the tuple's natts if we need more than the
+		 * guaranteed number of columns
+		 */
+		if (reqnatts > firstNonGuaranteedAttr)
+			natts = Min(HeapTupleHeaderGetNatts(tup), reqnatts);
+		else
+		{
+			/* No need to access the number of attributes in the tuple */
+			natts = reqnatts;
 		}
 
-		values[attnum] = fetchatt(thisatt, tp + *offp);
+		/* All attrs can be fetched without checking for NULLs */
+		firstNullAttr = natts;
+	}
+
+	attnum = slot->tts_nvalid;
+	values = slot->tts_values;
+	slot->tts_nvalid = reqnatts;
 
-		*offp = att_addlength_pointer(*offp, thisatt->attlen, tp + *offp);
+	/* Ensure we calculated tp correctly */
+	Assert(tp == (char *) tup + tup->t_hoff);
 
-		/* check if we need to switch to slow mode */
-		if (!slow)
+	if (attnum < firstNonGuaranteedAttr)
+	{
+		do
 		{
+			int			attlen;
+
+			isnull[attnum] = false;
+			cattr = TupleDescCompactAttr(tupleDesc, attnum);
+			attlen = cattr->attlen;
+
+			/* We don't expect any non-byval types */
+			pg_assume(attlen > 0);
+
 			/*
-			 * We're unable to deform any further if the above code set
-			 * 'slownext', or if this isn't a fixed-width attribute.
+			 * Technically we could support non-byval fixed-width types, but
+			 * not doing so allows us to pass true to fetch_att_noerr() which
+			 * eliminates the !attbyval branch.
 			 */
-			if (slownext || thisatt->attlen <= 0)
-			{
-				*slowp = true;
-				return attnum + 1;
-			}
-		}
-	}
+			Assert(cattr->attbyval == true);
 
-	return natts;
-}
-
-/*
- * slot_deform_heap_tuple
- *		Given a TupleTableSlot, extract data from the slot's physical tuple
- *		into its Datum/isnull arrays.  Data is extracted up through the
- *		reqnatts'th column.  If there are insufficient attributes in the given
- *		tuple, then slot_getmissingattrs() is called to populate the
- *		remainder.  If reqnatts is above the number of attributes in the
- *		slot's TupleDesc, an error is raised.
- *
- *		This is essentially an incremental version of heap_deform_tuple:
- *		on each call we extract attributes up to the one needed, without
- *		re-computing information about previously extracted attributes.
- *		slot->tts_nvalid is the number of attributes already extracted.
- *
- * This is marked as always inline, so the different offp for different types
- * of slots gets optimized away.
- */
-static pg_attribute_always_inline void
-slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
-					   int reqnatts)
-{
-	bool		hasnulls = HeapTupleHasNulls(tuple);
-	int			attnum;
-	uint32		off;			/* offset in tuple data */
-	bool		slow;			/* can we use/set attcacheoff? */
+			off = cattr->attcacheoff;
+			values[attnum] = fetch_att_noerr(tp + off, true, attlen);
+			attnum++;
+		} while (attnum < firstNonGuaranteedAttr);
 
-	/* We can only fetch as many attributes as the tuple has. */
-	natts = Min(HeapTupleHeaderGetNatts(tuple->t_data), reqnatts);
+		off += cattr->attlen;
 
-	/*
-	 * Check whether the first call for this tuple, and initialize or restore
-	 * loop state.
-	 */
-	attnum = slot->tts_nvalid;
-	slot->tts_nvalid = reqnatts;
-	if (attnum == 0)
-	{
-		/* Start from the first attribute */
-		off = 0;
-		slow = false;
+		if (attnum == reqnatts)
+			goto done;
 	}
 	else
 	{
 		/* Restore state from previous execution */
 		off = *offp;
-		slow = TTS_SLOW(slot);
+
+		/* We expect *offp to be set to 0 when attnum == 0 */
+		Assert(off == 0 || attnum > 0);
 	}
 
+	/* We can only fetch as many attributes as the tuple has. */
+	firstNonCacheOffsetAttr = Min(firstNonCacheOffsetAttr, natts);
+
 	/*
-	 * If 'slow' isn't set, try deforming using deforming code that does not
-	 * contain any of the extra checks required for non-fixed offset
-	 * deforming.  During deforming, if or when we find a NULL or a variable
-	 * length attribute, we'll switch to a deforming method which includes the
-	 * extra code required for non-fixed offset deforming, a.k.a slow mode.
-	 * Because this is performance critical, we inline
-	 * slot_deform_heap_tuple_internal passing the 'slow' and 'hasnull'
-	 * parameters as constants to allow the compiler to emit specialized code
-	 * with the known-const false comparisons and subsequent branches removed.
+	 * Handle the portion of the tuple that we have cached the offset for up
+	 * to the first NULL attribute.  The offset is effectively fixed for
+	 * these, so we can use the CompactAttribute's attcacheoff.
 	 */
-	if (!slow)
+	if (attnum < firstNonCacheOffsetAttr)
 	{
-		/* Tuple without any NULLs? We can skip doing any NULL checking */
-		if (!hasnulls)
-			attnum = slot_deform_heap_tuple_internal(slot,
-													 tuple,
-													 attnum,
-													 natts,
-													 false, /* slow */
-													 false, /* hasnulls */
-													 &off,
-													 &slow);
-		else
-			attnum = slot_deform_heap_tuple_internal(slot,
-													 tuple,
-													 attnum,
-													 natts,
-													 false, /* slow */
-													 true,	/* hasnulls */
-													 &off,
-													 &slow);
+		do
+		{
+			isnull[attnum] = false;
+			cattr = TupleDescCompactAttr(tupleDesc, attnum);
+
+			off = cattr->attcacheoff;
+			values[attnum] = fetch_att_noerr(tp + off,
+											 cattr->attbyval,
+											 cattr->attlen);
+		} while (++attnum < firstNonCacheOffsetAttr);
+
+		/*
+		 * Point the offset after the end of the last attribute with a cached
+		 * offset.  We expect the final cached offset attribute to have a
+		 * fixed width, so just add the attlen to the attcacheoff
+		 */
+		Assert(cattr->attlen > 0);
+		off += cattr->attlen;
 	}
 
-	/* If there's still work to do then we must be in slow mode */
-	if (attnum < natts)
+	/*
+	 * Handle any portion of the tuple that doesn't have a fixed offset up
+	 * until the first NULL attribute.  This loop only differs from the one
+	 * after it by the NULL checks.
+	 */
+	for (; attnum < firstNullAttr; attnum++)
 	{
-		/* XXX is it worth adding a separate call when hasnulls is false? */
-		attnum = slot_deform_heap_tuple_internal(slot,
-												 tuple,
-												 attnum,
-												 natts,
-												 true,	/* slow */
-												 hasnulls,
-												 &off,
-												 &slow);
+		int			attlen;
+
+		isnull[attnum] = false;
+		cattr = TupleDescCompactAttr(tupleDesc, attnum);
+		attlen = cattr->attlen;
+
+		/*
+		 * cstrings don't exist in heap tuples.  Use pg_assume to instruct the
+		 * compiler not to emit the cstring-related code in
+		 * align_fetch_then_add().
+		 */
+		pg_assume(attlen > 0 || attlen == -1);
+
+		/* align 'off', fetch the datum, and increment off beyond the datum */
+		values[attnum] = align_fetch_then_add(tp,
+											  &off,
+											  cattr->attbyval,
+											  attlen,
+											  cattr->attalignby);
 	}
 
 	/*
-	 * Save state for next execution
+	 * Now handle any remaining attributes in the tuple up to the requested
+	 * attnum.  This time, include NULL checks as we're now at the first NULL
+	 * attribute.
 	 */
-	*offp = off;
-	if (slow)
-		slot->tts_flags |= TTS_FLAG_SLOW;
-	else
-		slot->tts_flags &= ~TTS_FLAG_SLOW;
+	for (; attnum < natts; attnum++)
+	{
+		int			attlen;
+
+		if (isnull[attnum])
+		{
+			values[attnum] = (Datum) 0;
+			continue;
+		}
+
+		cattr = TupleDescCompactAttr(tupleDesc, attnum);
+		attlen = cattr->attlen;
 
-	/* Fetch any missing attrs and raise an error if reqnatts is invalid. */
+		/* As above, we don't expect cstrings */
+		pg_assume(attlen > 0 || attlen == -1);
+
+		/* align 'off', fetch the datum, and increment off beyond the datum */
+		values[attnum] = align_fetch_then_add(tp,
+											  &off,
+											  cattr->attbyval,
+											  attlen,
+											  cattr->attalignby);
+	}
+
+	/* Fetch any missing attrs and raise an error if reqnatts is invalid */
 	if (unlikely(attnum < reqnatts))
+	{
+		*offp = off;
 		slot_getmissingattrs(slot, attnum, reqnatts);
+		return;
+	}
+done:
+
+	/* Save current offset for next execution */
+	*offp = off;
 }
 
 const TupleTableSlotOps TTSOpsVirtual = {
@@ -1341,10 +1370,17 @@ MakeTupleTableSlot(TupleDesc tupleDesc,
 		slot->tts_values = (Datum *)
 			(((char *) slot)
 			 + MAXALIGN(basesz));
+
+		/*
+		 * We round the size of tts_isnull up to the next highest multiple of
+		 * 8.  This is needed as populate_isnull_array() operates on 8
+		 * elements at a time when converting a tuple's NULL bitmap into a
+		 * boolean array.
+		 */
 		slot->tts_isnull = (bool *)
 			(((char *) slot)
 			 + MAXALIGN(basesz)
-			 + MAXALIGN(tupleDesc->natts * sizeof(Datum)));
+			 + TYPEALIGN(8, tupleDesc->natts * sizeof(Datum)));
 
 		PinTupleDesc(tupleDesc);
 	}
@@ -1514,8 +1550,14 @@ ExecSetSlotDescriptor(TupleTableSlot *slot, /* slot to change */
 	 */
 	slot->tts_values = (Datum *)
 		MemoryContextAlloc(slot->tts_mcxt, tupdesc->natts * sizeof(Datum));
+
+	/*
+	 * We round the size of tts_isnull up to the next highest multiple of 8.
+	 * This is needed as populate_isnull_array() operates on 8 elements at a
+	 * time when converting a tuple's NULL bitmap into a boolean array.
+	 */
 	slot->tts_isnull = (bool *)
-		MemoryContextAlloc(slot->tts_mcxt, tupdesc->natts * sizeof(bool));
+		MemoryContextAlloc(slot->tts_mcxt, TYPEALIGN(8, tupdesc->natts * sizeof(bool)));
 }
 
 /* --------------------------------
@@ -2260,10 +2302,16 @@ ExecTypeSetColNames(TupleDesc typeInfo, List *namesList)
  * This happens "for free" if the tupdesc came from a relcache entry, but
  * not if we have manufactured a tupdesc for a transient RECORD datatype.
  * In that case we have to notify typcache.c of the existence of the type.
+ *
+ * TupleDescFinalize() must be called on the TupleDesc before calling this
+ * function.
  */
 TupleDesc
 BlessTupleDesc(TupleDesc tupdesc)
 {
+	/* Did someone forget to call TupleDescFinalize()? */
+	Assert(tupdesc->firstNonCachedOffsetAttr >= 0);
+
 	if (tupdesc->tdtypeid == RECORDOID &&
 		tupdesc->tdtypmod < 0)
 		assign_record_type_typmod(tupdesc);
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index c68c26cbf38..b17c4e721b3 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -383,6 +383,9 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 						  RelationGetDescr(currentRelation),
 						  table_slot_callbacks(currentRelation));
 
+	scanstate->ss.ss_ScanTupleSlot->tts_flags |=
+		TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS;
+
 	/*
 	 * Initialize result type and projection.
 	 */
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index c2d09374517..506fdf446d2 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -569,6 +569,9 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
 	ExecInitScanTupleSlot(estate, &indexstate->ss, tupDesc,
 						  &TTSOpsVirtual);
 
+	indexstate->ss.ss_ScanTupleSlot->tts_flags |=
+		TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS;
+
 	/*
 	 * We need another slot, in a format that's suitable for the table AM, for
 	 * when we need to fetch a tuple from the table for rechecking visibility.
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c
index a616abff04c..c77746ab9f5 100644
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -940,6 +940,9 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
 						  RelationGetDescr(currentRelation),
 						  table_slot_callbacks(currentRelation));
 
+	indexstate->ss.ss_ScanTupleSlot->tts_flags |=
+		TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS;
+
 	/*
 	 * Initialize result type and projection.
 	 */
diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c
index 1b0af70fd7a..d29ef2872f7 100644
--- a/src/backend/executor/nodeSamplescan.c
+++ b/src/backend/executor/nodeSamplescan.c
@@ -130,6 +130,9 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
 						  RelationGetDescr(scanstate->ss.ss_currentRelation),
 						  table_slot_callbacks(scanstate->ss.ss_currentRelation));
 
+	scanstate->ss.ss_ScanTupleSlot->tts_flags |=
+		TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS;
+
 	/*
 	 * Initialize result type and projection.
 	 */
diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c
index af3c788ce8b..3ff2a2843eb 100644
--- a/src/backend/executor/nodeSeqscan.c
+++ b/src/backend/executor/nodeSeqscan.c
@@ -246,6 +246,9 @@ ExecInitSeqScan(SeqScan *node, EState *estate, int eflags)
 						  RelationGetDescr(scanstate->ss.ss_currentRelation),
 						  table_slot_callbacks(scanstate->ss.ss_currentRelation));
 
+	scanstate->ss.ss_ScanTupleSlot->tts_flags |=
+		TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS;
+
 	/*
 	 * Initialize result type and projection.
 	 */
diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c
index 503817da65b..2ece0255e7d 100644
--- a/src/backend/executor/nodeTidrangescan.c
+++ b/src/backend/executor/nodeTidrangescan.c
@@ -396,6 +396,9 @@ ExecInitTidRangeScan(TidRangeScan *node, EState *estate, int eflags)
 						  RelationGetDescr(currentRelation),
 						  table_slot_callbacks(currentRelation));
 
+	tidrangestate->ss.ss_ScanTupleSlot->tts_flags |=
+		TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS;
+
 	/*
 	 * Initialize result type and projection.
 	 */
diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c
index 4eddb0828b5..484e3306e0b 100644
--- a/src/backend/executor/nodeTidscan.c
+++ b/src/backend/executor/nodeTidscan.c
@@ -538,6 +538,9 @@ ExecInitTidScan(TidScan *node, EState *estate, int eflags)
 						  RelationGetDescr(currentRelation),
 						  table_slot_callbacks(currentRelation));
 
+	tidstate->ss.ss_ScanTupleSlot->tts_flags |=
+		TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS;
+
 	/*
 	 * Initialize result type and projection.
 	 */
diff --git a/src/backend/jit/llvm/llvmjit_deform.c b/src/backend/jit/llvm/llvmjit_deform.c
index 3eb087eb56b..12521e3e46a 100644
--- a/src/backend/jit/llvm/llvmjit_deform.c
+++ b/src/backend/jit/llvm/llvmjit_deform.c
@@ -62,7 +62,6 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc,
 	LLVMValueRef v_tts_values;
 	LLVMValueRef v_tts_nulls;
 	LLVMValueRef v_slotoffp;
-	LLVMValueRef v_flagsp;
 	LLVMValueRef v_nvalidp;
 	LLVMValueRef v_nvalid;
 	LLVMValueRef v_maxatt;
@@ -178,7 +177,6 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc,
 	v_tts_nulls =
 		l_load_struct_gep(b, StructTupleTableSlot, v_slot, FIELDNO_TUPLETABLESLOT_ISNULL,
 						  "tts_ISNULL");
-	v_flagsp = l_struct_gep(b, StructTupleTableSlot, v_slot, FIELDNO_TUPLETABLESLOT_FLAGS, "");
 	v_nvalidp = l_struct_gep(b, StructTupleTableSlot, v_slot, FIELDNO_TUPLETABLESLOT_NVALID, "");
 
 	if (ops == &TTSOpsHeapTuple || ops == &TTSOpsBufferHeapTuple)
@@ -747,14 +745,10 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc,
 
 	{
 		LLVMValueRef v_off = l_load(b, TypeSizeT, v_offp, "");
-		LLVMValueRef v_flags;
 
 		LLVMBuildStore(b, l_int16_const(lc, natts), v_nvalidp);
 		v_off = LLVMBuildTrunc(b, v_off, LLVMInt32TypeInContext(lc), "");
 		LLVMBuildStore(b, v_off, v_slotoffp);
-		v_flags = l_load(b, LLVMInt16TypeInContext(lc), v_flagsp, "tts_flags");
-		v_flags = LLVMBuildOr(b, v_flags, l_int16_const(lc, TTS_FLAG_SLOW), "");
-		LLVMBuildStore(b, v_flags, v_flagsp);
 		LLVMBuildRetVoid(b);
 	}
 
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 770edb34e08..998be24ac41 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -666,14 +666,6 @@ RelationBuildTupleDesc(Relation relation)
 		elog(ERROR, "pg_attribute catalog is missing %d attribute(s) for relation OID %u",
 			 need, RelationGetRelid(relation));
 
-	/*
-	 * We can easily set the attcacheoff value for the first attribute: it
-	 * must be zero.  This eliminates the need for special cases for attnum=1
-	 * that used to exist in fastgetattr() and index_getattr().
-	 */
-	if (RelationGetNumberOfAttributes(relation) > 0)
-		TupleDescCompactAttr(relation->rd_att, 0)->attcacheoff = 0;
-
 	/*
 	 * Set up constraint/default info
 	 */
@@ -1985,8 +1977,6 @@ formrdesc(const char *relationName, Oid relationReltype,
 		populate_compact_attribute(relation->rd_att, i);
 	}
 
-	/* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
-	TupleDescCompactAttr(relation->rd_att, 0)->attcacheoff = 0;
 	TupleDescFinalize(relation->rd_att);
 
 	/* mark not-null status */
@@ -4446,8 +4436,6 @@ BuildHardcodedDescriptor(int natts, const FormData_pg_attribute *attrs)
 		populate_compact_attribute(result, i);
 	}
 
-	/* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
-	TupleDescCompactAttr(result, 0)->attcacheoff = 0;
 	TupleDescFinalize(result);
 
 	/* Note: we don't bother to set up a TupleConstr entry */
diff --git a/src/include/access/tupdesc.h b/src/include/access/tupdesc.h
index 595413dbbc5..ad7bc013812 100644
--- a/src/include/access/tupdesc.h
+++ b/src/include/access/tupdesc.h
@@ -131,6 +131,19 @@ typedef struct CompactAttribute
  * Any code making changes manually to and fields in the FormData_pg_attribute
  * array must subsequently call populate_compact_attribute() to flush the
  * changes out to the corresponding 'compact_attrs' element.
+ *
+ * firstNonCachedOffsetAttr stores the index into the compact_attrs array for
+ * the first attribute that we don't have a known attcacheoff for.
+ *
+ * firstNonGuaranteedAttr stores the index to into the compact_attrs array for
+ * the first attribute that is either NULLable, missing, or !attbyval.  This
+ * can be used in locations as a guarantee that attributes before this will
+ * always exist in tuples.  The !attbyval part isn't required for this, but
+ * including this allows various tuple deforming routines to forego any checks
+ * for !attbyval.
+ *
+ * Once a TupleDesc has been populated, before it is used for any purpose,
+ * TupleDescFinalize() must be called on it.
  */
 typedef struct TupleDescData
 {
@@ -138,6 +151,11 @@ typedef struct TupleDescData
 	Oid			tdtypeid;		/* composite type ID for tuple type */
 	int32		tdtypmod;		/* typmod for tuple type */
 	int			tdrefcount;		/* reference count, or -1 if not counting */
+	int			firstNonCachedOffsetAttr;	/* index of the first att without
+											 * an attcacheoff */
+	int			firstNonGuaranteedAttr; /* index of the first nullable,
+										 * missing, dropped, or !attbyval
+										 * attribute. */
 	TupleConstr *constr;		/* constraints, or NULL if none */
 	/* compact_attrs[N] is the compact metadata of Attribute Number N+1 */
 	CompactAttribute compact_attrs[FLEXIBLE_ARRAY_MEMBER];
@@ -195,7 +213,6 @@ extern TupleDesc CreateTupleDescTruncatedCopy(TupleDesc tupdesc, int natts);
 
 extern TupleDesc CreateTupleDescCopyConstr(TupleDesc tupdesc);
 
-#define TupleDescFinalize(d) ((void) 0)
 #define TupleDescSize(src) \
 	(offsetof(struct TupleDescData, compact_attrs) + \
 	 (src)->natts * sizeof(CompactAttribute) + \
@@ -206,6 +223,7 @@ extern void TupleDescCopy(TupleDesc dst, TupleDesc src);
 extern void TupleDescCopyEntry(TupleDesc dst, AttrNumber dstAttno,
 							   TupleDesc src, AttrNumber srcAttno);
 
+extern void TupleDescFinalize(TupleDesc tupdesc);
 extern void FreeTupleDesc(TupleDesc tupdesc);
 
 extern void IncrTupleDescRefCount(TupleDesc tupdesc);
diff --git a/src/include/access/tupmacs.h b/src/include/access/tupmacs.h
index d64c18b950b..87dbeb76618 100644
--- a/src/include/access/tupmacs.h
+++ b/src/include/access/tupmacs.h
@@ -15,7 +15,9 @@
 #define TUPMACS_H
 
 #include "catalog/pg_type_d.h"	/* for TYPALIGN macros */
-
+#include "port/pg_bitutils.h"
+#include "port/pg_bswap.h"
+#include "varatt.h"
 
 /*
  * Check a tuple's null bitmap to determine whether the attribute is null.
@@ -28,6 +30,62 @@ att_isnull(int ATT, const bits8 *BITS)
 	return !(BITS[ATT >> 3] & (1 << (ATT & 0x07)));
 }
 
+/*
+ * populate_isnull_array
+ *		Transform a tuple's null bitmap into a boolean array.
+ *
+ * Caller must ensure that the isnull array is sized so it contains
+ * at least as many elements as there are bits in the 'bits' array.
+ * Callers should be aware that isnull is populated 8 elements at a time,
+ * effectively as if natts is rounded up to the next multiple of 8.
+ */
+static inline void
+populate_isnull_array(const bits8 *bits, int natts, bool *isnull)
+{
+	int			nbytes = (natts + 7) >> 3;
+
+	/*
+	 * Multiplying the inverted NULL bitmap byte by this value results in the
+	 * lowest bit in each byte being set the same as each bit of the inverted
+	 * byte.  We perform this as 2 32-bit operations rather than a single
+	 * 64-bit operation as multiplying by the required value to do this in
+	 * 64-bits would result in overflowing a uint64 in some cases.
+	 *
+	 * XXX if we ever require BMI2 (-march=x86-64-v3), then this could be done
+	 * more efficiently on most X86-64 CPUs with the PDEP instruction.  Beware
+	 * that some chips (e.g. AMD's Zen2) are horribly inefficient at PDEP.
+	 */
+#define SPREAD_BITS_MULTIPLIER_32 0x204081U
+
+	for (int i = 0; i < nbytes; i++, isnull += 8)
+	{
+		uint64		isnull_8;
+		bits8		nullbyte = ~bits[i];
+
+		/* Convert the lower 4 bits of NULL bitmap word into a 64 bit int */
+		isnull_8 = (nullbyte & 0xf) * SPREAD_BITS_MULTIPLIER_32;
+
+		/*
+		 * Convert the upper 4 bits of null bitmap word into a 64 bit int,
+		 * shift into the upper 32 bit and bitwise-OR with the result of the
+		 * lower 4 bits.
+		 */
+		isnull_8 |= ((uint64) ((nullbyte >> 4) * SPREAD_BITS_MULTIPLIER_32)) << 32;
+
+		/* Mask out all other bits apart from the lowest bit of each byte. */
+		isnull_8 &= UINT64CONST(0x0101010101010101);
+
+#ifdef WORDS_BIGENDIAN
+
+		/*
+		 * Fix byte order on big-endian machines before copying to the array.
+		 */
+		isnull_8 = pg_bswap64(isnull_8);
+#endif
+		memcpy(isnull, &isnull_8, sizeof(uint64));
+	}
+}
+
 #ifndef FRONTEND
 /*
  * Given an attbyval and an attlen from either a Form_pg_attribute or
@@ -69,6 +127,170 @@ fetch_att(const void *T, bool attbyval, int attlen)
 	else
 		return PointerGetDatum(T);
 }
+
+/*
+ * Same, but no error checking for invalid attlens for byval types.  This
+ * is safe to use when attlen comes from CompactAttribute as we validate the
+ * length when populating that struct.
+ */
+static inline Datum
+fetch_att_noerr(const void *T, bool attbyval, int attlen)
+{
+	if (attbyval)
+	{
+		switch (attlen)
+		{
+			case sizeof(int32):
+				return Int32GetDatum(*((const int32 *) T));
+			case sizeof(int16):
+				return Int16GetDatum(*((const int16 *) T));
+			case sizeof(char):
+				return CharGetDatum(*((const char *) T));
+			default:
+				Assert(attlen == sizeof(int64));
+				return Int64GetDatum(*((const int64 *) T));
+		}
+	}
+	else
+		return PointerGetDatum(T);
+}
+
+
+/*
+ * align_fetch_then_add
+ *		Applies all the functionality of att_pointer_alignby(),
+ *		fetch_att_noerr() and att_addlength_pointer(), resulting in the *off
+ *		pointer to the perhaps unaligned number of bytes into 'tupptr', ready
+ *		to deform the next attribute.
+ *
+ * tupptr: pointer to the beginning of the tuple, after the header and any
+ * NULL bitmask.
+ * off: offset in bytes for reading tuple data, possibly unaligned.
+ * attbyval, attlen and attalignby are values from CompactAttribute.
+ */
+static inline Datum
+align_fetch_then_add(const char *tupptr, uint32 *off, bool attbyval, int attlen,
+					 uint8 attalignby)
+{
+	Datum		res;
+
+	if (attlen > 0)
+	{
+		const char *offset_ptr;
+
+		*off = TYPEALIGN(attalignby, *off);
+		offset_ptr = tupptr + *off;
+		*off += attlen;
+		if (attbyval)
+		{
+			switch (attlen)
+			{
+				case sizeof(char):
+					return CharGetDatum(*((const char *) offset_ptr));
+				case sizeof(int16):
+					return Int16GetDatum(*((const int16 *) offset_ptr));
+				case sizeof(int32):
+					return Int32GetDatum(*((const int32 *) offset_ptr));
+				default:
+
+					/*
+					 * populate_compact_attribute_internal() should have
+					 * checked
+					 */
+					Assert(attlen == sizeof(int64));
+					return Int64GetDatum(*((const int64 *) offset_ptr));
+			}
+		}
+		return PointerGetDatum(offset_ptr);
+	}
+	else if (attlen == -1)
+	{
+		if (!VARATT_IS_SHORT(tupptr + *off))
+			*off = TYPEALIGN(attalignby, *off);
+
+		res = PointerGetDatum(tupptr + *off);
+		*off += VARSIZE_ANY(DatumGetPointer(res));
+		return res;
+	}
+	else
+	{
+		Assert(attlen == -2);
+		*off = TYPEALIGN(attalignby, *off);
+		res = PointerGetDatum(tupptr + *off);
+		*off += strlen(tupptr + *off) + 1;
+		return res;
+	}
+}
+
+/*
+ * first_null_attr
+ *		Inspect a NULL bitmap from a tuple and return the 0-based attnum of the
+ *		first NULL attribute.  Returns natts if no NULLs were found.
+ *
+ * This is coded to expect that 'bits' contains at least one 0 bit somewhere
+ * in the array, but not necessarily < natts.  Note that natts may be passed
+ * as a value lower than the number of bits physically stored in the tuple's
+ * NULL bitmap, in which case we may not find a NULL and return natts.
+ *
+ * The reason we require at least one 0 bit somewhere in the NULL bitmap is
+ * that the for loop that checks 0xFF bytes would loop to the last byte in
+ * the array if all bytes were 0xFF, and the subsequent code that finds the
+ * right-most 0 bit would access the first byte beyond the bitmap.  Provided
+ * we find a 0 bit before then, that won't happen.  Since tuples which have no
+ * NULLs don't have a NULL bitmap, this function won't get called for that
+ * case.
+ */
+static inline int
+first_null_attr(const bits8 *bits, int natts)
+{
+	int			nattByte = natts >> 3;
+	int			bytenum;
+	int			res;
+
+#ifdef USE_ASSERT_CHECKING
+	int			firstnull_check = natts;
+
+	/* Do it the slow way and check we get the same answer. */
+	for (int i = 0; i < natts; i++)
+	{
+		if (att_isnull(i, bits))
+		{
+			firstnull_check = i;
+			break;
+		}
+	}
+#endif
+
+	/* Process all bytes up to just before the byte for the natts attribute */
+	for (bytenum = 0; bytenum < nattByte; bytenum++)
+	{
+		/* break if there's any NULL attrs (a 0 bit) */
+		if (bits[bytenum] != 0xFF)
+			break;
+	}
+
+	/*
+	 * Look for the highest 0-bit in the 'bytenum' element.  To do this, we
+	 * promote the uint8 to uint32 before performing the bitwise NOT and
+	 * looking for the first 1-bit.  This works even when the byte is 0xFF, as
+	 * the bitwise NOT of 0xFF in 32 bits is 0xFFFFFF00, in which case
+	 * pg_rightmost_one_pos32() will return 8.  We may end up with a value
+	 * higher than natts here, but we'll fix that with the Min() below.
+	 */
+	res = bytenum << 3;
+	res += pg_rightmost_one_pos32(~((uint32) bits[bytenum]));
+
+	/*
+	 * Since we did no masking to mask out bits beyond the natt'th bit, we may
+	 * have found a bit higher than natts, so we must cap res to natts
+	 */
+	res = Min(res, natts);
+
+	/* Ensure we got the same answer as the att_isnull() loop got */
+	Assert(res == firstnull_check);
+
+	return res;
+}
 #endif							/* FRONTEND */
 
 /*
diff --git a/src/include/executor/tuptable.h b/src/include/executor/tuptable.h
index 3b09abbf99f..ff4572a29ae 100644
--- a/src/include/executor/tuptable.h
+++ b/src/include/executor/tuptable.h
@@ -84,9 +84,6 @@
  * tts_values/tts_isnull are allocated either when the slot is created (when
  * the descriptor is provided), or when a descriptor is assigned to the slot;
  * they are of length equal to the descriptor's natts.
- *
- * The TTS_FLAG_SLOW flag is saved state for
- * slot_deform_heap_tuple, and should not be touched by any other code.
  *----------
  */
 
@@ -98,9 +95,13 @@
 #define			TTS_FLAG_SHOULDFREE		(1 << 2)
 #define TTS_SHOULDFREE(slot) (((slot)->tts_flags & TTS_FLAG_SHOULDFREE) != 0)
 
-/* saved state for slot_deform_heap_tuple */
-#define			TTS_FLAG_SLOW		(1 << 3)
-#define TTS_SLOW(slot) (((slot)->tts_flags & TTS_FLAG_SLOW) != 0)
+/*
+ * true = slot's formed tuple guaranteed to not have NULLs in NOT NULLable
+ * columns.
+ */
+#define			TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS		(1 << 3)
+#define TTS_OBEYS_NOT_NULL_CONSTRAINTS(slot) \
+	(((slot)->tts_flags & TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS) != 0)
 
 /* fixed tuple descriptor */
 #define			TTS_FLAG_FIXED		(1 << 4)
@@ -123,7 +124,9 @@ typedef struct TupleTableSlot
 #define FIELDNO_TUPLETABLESLOT_VALUES 5
 	Datum	   *tts_values;		/* current per-attribute values */
 #define FIELDNO_TUPLETABLESLOT_ISNULL 6
-	bool	   *tts_isnull;		/* current per-attribute isnull flags */
+	bool	   *tts_isnull;		/* current per-attribute isnull flags.  Array
+								 * size must always be rounded up to the next
+								 * multiple of 8 elements. */
 	MemoryContext tts_mcxt;		/* slot itself is in this context */
 	ItemPointerData tts_tid;	/* stored tuple's tid */
 	Oid			tts_tableOid;	/* table oid of tuple */
diff --git a/src/test/modules/deform_bench/deform_bench.c b/src/test/modules/deform_bench/deform_bench.c
index 7838f639bef..de39fecf8fd 100644
--- a/src/test/modules/deform_bench/deform_bench.c
+++ b/src/test/modules/deform_bench/deform_bench.c
@@ -49,6 +49,7 @@ deform_bench(PG_FUNCTION_ARGS)
 
 	tupdesc = RelationGetDescr(rel);
 	slot = MakeTupleTableSlot(tupdesc, &TTSOpsBufferHeapTuple);
+	slot->tts_flags |= TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS;
 	scan = table_beginscan_strat(rel, GetActiveSnapshot(), 0, NULL, true, false);
 
 	/*
-- 
2.51.0



Attachments:

  [text/plain] tts_heap_getsomeattrs_objdump_Mintel.txt (63.8K, 2-tts_heap_getsomeattrs_objdump_Mintel.txt)
  download | inline:
0000000000001bb0 <tts_heap_getsomeattrs>:
{
    1bb0:	f3 0f 1e fa          	endbr64
    1bb4:	41 57                	push   r15
    1bb6:	49 89 fb             	mov    r11,rdi
    1bb9:	41 56                	push   r14
    1bbb:	41 55                	push   r13
    1bbd:	41 54                	push   r12
    1bbf:	4c 63 e6             	movsxd r12,esi
    1bc2:	55                   	push   rbp
    1bc3:	53                   	push   rbx
	HeapTupleHeader tup = tuple->t_data;
    1bc4:	48 8b 47 40          	mov    rax,QWORD PTR [rdi+0x40]
	TupleDesc	tupleDesc = slot->tts_tupleDescriptor;
    1bc8:	48 8b 57 10          	mov    rdx,QWORD PTR [rdi+0x10]
	isnull = slot->tts_isnull;
    1bcc:	48 8b 4f 20          	mov    rcx,QWORD PTR [rdi+0x20]
	HeapTupleHeader tup = tuple->t_data;
    1bd0:	48 8b 58 10          	mov    rbx,QWORD PTR [rax+0x10]
	if (TTS_OBEYS_NOT_NULL_CONSTRAINTS(slot))
    1bd4:	f6 47 04 08          	test   BYTE PTR [rdi+0x4],0x8
    1bd8:	0f 84 02 04 00 00    	je     1fe0 <tts_heap_getsomeattrs+0x430>
		firstNonGuaranteedAttr = Min(reqnatts, tupleDesc->firstNonGuaranteedAttr);
    1bde:	8b 42 14             	mov    eax,DWORD PTR [rdx+0x14]
    1be1:	41 39 c4             	cmp    r12d,eax
    1be4:	41 0f 4e c4          	cmovle eax,r12d
	if (attnum < firstNonGuaranteedAttr)
    1be8:	48 63 e8             	movsxd rbp,eax
	firstNonCacheOffsetAttr = tupleDesc->firstNonCachedOffsetAttr;
    1beb:	4c 63 52 10          	movsxd r10,DWORD PTR [rdx+0x10]
	if (HeapTupleHasNulls(tuple))
    1bef:	f6 43 14 01          	test   BYTE PTR [rbx+0x14],0x1
    1bf3:	0f 84 b7 03 00 00    	je     1fb0 <tts_heap_getsomeattrs+0x400>
		natts = HeapTupleHeaderGetNatts(tup);
    1bf9:	44 0f b7 4b 12       	movzx  r9d,WORD PTR [rbx+0x12]
    1bfe:	41 81 e1 ff 07 00 00 	and    r9d,0x7ff
 *		Computes size of null bitmap given number of data columns.
 */
static inline int
BITMAPLEN(int NATTS)
{
	return (NATTS + 7) / 8;
    1c05:	45 8d 41 07          	lea    r8d,[r9+0x7]
    1c09:	41 c1 f8 03          	sar    r8d,0x3
		tp = (char *) tup + MAXALIGN(offsetof(HeapTupleHeaderData, t_bits) +
    1c0d:	41 83 c0 1e          	add    r8d,0x1e
    1c11:	41 81 e0 f8 03 00 00 	and    r8d,0x3f8
    1c18:	49 01 d8             	add    r8,rbx
		natts = Min(natts, reqnatts);
    1c1b:	45 39 cc             	cmp    r12d,r9d
    1c1e:	4d 0f 4e cc          	cmovle r9,r12
			firstNullAttr = natts;
    1c22:	45 89 ce             	mov    r14d,r9d
		if (natts > firstNonGuaranteedAttr)
    1c25:	41 39 c1             	cmp    r9d,eax
    1c28:	0f 8f ea 04 00 00    	jg     2118 <tts_heap_getsomeattrs+0x568>
	attnum = slot->tts_nvalid;
    1c2e:	49 0f bf 43 06       	movsx  rax,WORD PTR [r11+0x6]
	values = slot->tts_values;
    1c33:	49 8b 7b 18          	mov    rdi,QWORD PTR [r11+0x18]
	slot->tts_nvalid = reqnatts;
    1c37:	66 45 89 63 06       	mov    WORD PTR [r11+0x6],r12w
	if (attnum < firstNonGuaranteedAttr)
    1c3c:	48 39 e8             	cmp    rax,rbp
    1c3f:	73 7f                	jae    1cc0 <tts_heap_getsomeattrs+0x110>
    1c41:	48 89 54 24 f0       	mov    QWORD PTR [rsp-0x10],rdx
    1c46:	48 8d 74 c2 20       	lea    rsi,[rdx+rax*8+0x20]
    1c4b:	eb 22                	jmp    1c6f <tts_heap_getsomeattrs+0xbf>
    1c4d:	0f 1f 00             	nop    DWORD PTR [rax]
static inline Datum
fetch_att_noerr(const void *T, bool attbyval, int attlen)
{
	if (attbyval)
	{
		switch (attlen)
    1c50:	66 41 83 ff 01       	cmp    r15w,0x1
    1c55:	74 59                	je     1cb0 <tts_heap_getsomeattrs+0x100>
 *		Returns datum representation for a 64-bit integer.
 */
static inline Datum
Int64GetDatum(int64 X)
{
	return (Datum) X;
    1c57:	48 8b 12             	mov    rdx,QWORD PTR [rdx]
			values[attnum] = fetch_att_noerr(tp + off, true, attlen);
    1c5a:	48 89 14 c7          	mov    QWORD PTR [rdi+rax*8],rdx
			attnum++;
    1c5e:	48 83 c0 01          	add    rax,0x1
		} while (attnum < firstNonGuaranteedAttr);
    1c62:	48 83 c6 08          	add    rsi,0x8
    1c66:	48 39 e8             	cmp    rax,rbp
    1c69:	0f 83 11 03 00 00    	jae    1f80 <tts_heap_getsomeattrs+0x3d0>
			isnull[attnum] = false;
    1c6f:	c6 04 01 00          	mov    BYTE PTR [rcx+rax*1],0x0
			off = cattr->attcacheoff;
    1c73:	0f bf 16             	movsx  edx,WORD PTR [rsi]
			cattr = &cattrs[attnum];
    1c76:	49 89 f5             	mov    r13,rsi
			attlen = cattr->attlen;
    1c79:	44 0f b7 7e 02       	movzx  r15d,WORD PTR [rsi+0x2]
			off = cattr->attcacheoff;
    1c7e:	48 89 d3             	mov    rbx,rdx
			values[attnum] = fetch_att_noerr(tp + off, true, attlen);
    1c81:	4c 01 c2             	add    rdx,r8
    1c84:	66 41 83 ff 02       	cmp    r15w,0x2
    1c89:	74 15                	je     1ca0 <tts_heap_getsomeattrs+0xf0>
    1c8b:	66 41 83 ff 04       	cmp    r15w,0x4
    1c90:	75 be                	jne    1c50 <tts_heap_getsomeattrs+0xa0>
	return (Datum) X;
    1c92:	48 63 12             	movsxd rdx,DWORD PTR [rdx]
		{
			case sizeof(int32):
				return Int32GetDatum(*((const int32 *) T));
    1c95:	eb c3                	jmp    1c5a <tts_heap_getsomeattrs+0xaa>
    1c97:	66 0f 1f 84 00 00 00 	nop    WORD PTR [rax+rax*1+0x0]
    1c9e:	00 00 
	return (Datum) X;
    1ca0:	48 0f bf 12          	movsx  rdx,WORD PTR [rdx]
			case sizeof(int16):
				return Int16GetDatum(*((const int16 *) T));
    1ca4:	eb b4                	jmp    1c5a <tts_heap_getsomeattrs+0xaa>
    1ca6:	66 2e 0f 1f 84 00 00 	cs nop WORD PTR [rax+rax*1+0x0]
    1cad:	00 00 00 
	return (Datum) X;
    1cb0:	48 0f be 12          	movsx  rdx,BYTE PTR [rdx]
			case sizeof(char):
				return CharGetDatum(*((const char *) T));
    1cb4:	eb a4                	jmp    1c5a <tts_heap_getsomeattrs+0xaa>
    1cb6:	66 2e 0f 1f 84 00 00 	cs nop WORD PTR [rax+rax*1+0x0]
    1cbd:	00 00 00 
		off = *offp;
    1cc0:	41 8b 5b 48          	mov    ebx,DWORD PTR [r11+0x48]
	if (unlikely(attnum < reqnatts))
    1cc4:	49 63 ec             	movsxd rbp,r12d
	firstNonCacheOffsetAttr = Min(firstNonCacheOffsetAttr, natts);
    1cc7:	45 39 ca             	cmp    r10d,r9d
    1cca:	4d 0f 4f d1          	cmovg  r10,r9
	if (attnum < firstNonCacheOffsetAttr)
    1cce:	4c 39 d0             	cmp    rax,r10
    1cd1:	0f 82 b9 01 00 00    	jb     1e90 <tts_heap_getsomeattrs+0x2e0>
	for (; attnum < firstNullAttr; attnum++)
    1cd7:	4d 63 d6             	movsxd r10,r14d
    1cda:	4c 39 d0             	cmp    rax,r10
    1cdd:	72 5e                	jb     1d3d <tts_heap_getsomeattrs+0x18d>
    1cdf:	e9 24 05 00 00       	jmp    2208 <tts_heap_getsomeattrs+0x658>
    1ce4:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]

	if (attlen > 0)
	{
		const char *offset_ptr;

		*off = TYPEALIGN(attalignby, *off);
    1ce8:	8d 5c 1e ff          	lea    ebx,[rsi+rbx*1-0x1]
    1cec:	f7 de                	neg    esi
    1cee:	21 de                	and    esi,ebx
		offset_ptr = tupptr + *off;
		*off += attlen;
    1cf0:	41 0f bf dd          	movsx  ebx,r13w
		offset_ptr = tupptr + *off;
    1cf4:	41 89 f6             	mov    r14d,esi
		*off += attlen;
    1cf7:	01 f3                	add    ebx,esi
		offset_ptr = tupptr + *off;
    1cf9:	4d 01 c6             	add    r14,r8
	return (Datum) (uintptr_t) X;
    1cfc:	4c 89 f6             	mov    rsi,r14
		if (attbyval)
    1cff:	80 7c c2 24 00       	cmp    BYTE PTR [rdx+rax*8+0x24],0x0
    1d04:	74 2a                	je     1d30 <tts_heap_getsomeattrs+0x180>
		{
			switch (attlen)
    1d06:	66 41 83 fd 02       	cmp    r13w,0x2
    1d0b:	0f 84 ef 01 00 00    	je     1f00 <tts_heap_getsomeattrs+0x350>
    1d11:	66 41 83 fd 04       	cmp    r13w,0x4
    1d16:	0f 84 d4 01 00 00    	je     1ef0 <tts_heap_getsomeattrs+0x340>
    1d1c:	66 41 83 fd 01       	cmp    r13w,0x1
    1d21:	0f 85 b9 01 00 00    	jne    1ee0 <tts_heap_getsomeattrs+0x330>
	return (Datum) X;
    1d27:	49 0f be 36          	movsx  rsi,BYTE PTR [r14]
    1d2b:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
		values[attnum] = align_fetch_then_add(tp,
    1d30:	48 89 34 c7          	mov    QWORD PTR [rdi+rax*8],rsi
	for (; attnum < firstNullAttr; attnum++)
    1d34:	48 83 c0 01          	add    rax,0x1
    1d38:	4c 39 d0             	cmp    rax,r10
    1d3b:	74 73                	je     1db0 <tts_heap_getsomeattrs+0x200>
		isnull[attnum] = false;
    1d3d:	c6 04 01 00          	mov    BYTE PTR [rcx+rax*1],0x0
		attlen = cattr->attlen;
    1d41:	44 0f b7 6c c2 22    	movzx  r13d,WORD PTR [rdx+rax*8+0x22]
											  cattr->attalignby);
    1d47:	0f b6 74 c2 25       	movzx  esi,BYTE PTR [rdx+rax*8+0x25]
	if (attlen > 0)
    1d4c:	66 45 85 ed          	test   r13w,r13w
    1d50:	7f 96                	jg     1ce8 <tts_heap_getsomeattrs+0x138>
		}
		return PointerGetDatum(offset_ptr);
	}
	else if (attlen == -1)
	{
		if (!VARATT_IS_SHORT(tupptr + *off))
    1d52:	41 89 dd             	mov    r13d,ebx
    1d55:	4d 01 c5             	add    r13,r8
    1d58:	41 f6 45 00 01       	test   BYTE PTR [r13+0x0],0x1
    1d5d:	75 0e                	jne    1d6d <tts_heap_getsomeattrs+0x1bd>
			*off = TYPEALIGN(attalignby, *off);
    1d5f:	8d 5c 1e ff          	lea    ebx,[rsi+rbx*1-0x1]
    1d63:	f7 de                	neg    esi
    1d65:	21 f3                	and    ebx,esi

		res = PointerGetDatum(tupptr + *off);
    1d67:	41 89 dd             	mov    r13d,ebx
    1d6a:	4d 01 c5             	add    r13,r8
	if (VARATT_IS_1B_E(PTR))
    1d6d:	45 0f b6 75 00       	movzx  r14d,BYTE PTR [r13+0x0]
	return (Datum) (uintptr_t) X;
    1d72:	4c 89 ee             	mov    rsi,r13
    1d75:	41 80 fe 01          	cmp    r14b,0x1
    1d79:	0f 84 59 03 00 00    	je     20d8 <tts_heap_getsomeattrs+0x528>
	else if (VARATT_IS_1B(PTR))
    1d7f:	41 f6 c6 01          	test   r14b,0x1
    1d83:	0f 85 87 02 00 00    	jne    2010 <tts_heap_getsomeattrs+0x460>
		return VARSIZE_4B(PTR);
    1d89:	45 8b 75 00          	mov    r14d,DWORD PTR [r13+0x0]
    1d8d:	41 c1 ee 02          	shr    r14d,0x2
		values[attnum] = align_fetch_then_add(tp,
    1d91:	48 89 34 c7          	mov    QWORD PTR [rdi+rax*8],rsi
	for (; attnum < firstNullAttr; attnum++)
    1d95:	48 83 c0 01          	add    rax,0x1
		*off += VARSIZE_ANY(DatumGetPointer(res));
    1d99:	44 01 f3             	add    ebx,r14d
    1d9c:	4c 39 d0             	cmp    rax,r10
    1d9f:	75 9c                	jne    1d3d <tts_heap_getsomeattrs+0x18d>
    1da1:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
    1da5:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    1dac:	00 00 00 00 
	for (; attnum < natts; attnum++)
    1db0:	4d 39 ca             	cmp    r10,r9
    1db3:	0f 83 57 04 00 00    	jae    2210 <tts_heap_getsomeattrs+0x660>
    1db9:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
		if (isnull[attnum])
    1dc0:	31 c0                	xor    eax,eax
    1dc2:	42 80 3c 11 00       	cmp    BYTE PTR [rcx+r10*1],0x0
    1dc7:	75 57                	jne    1e20 <tts_heap_getsomeattrs+0x270>
		attlen = cattr->attlen;
    1dc9:	42 0f b7 74 d2 22    	movzx  esi,WORD PTR [rdx+r10*8+0x22]
											  cattr->attalignby);
    1dcf:	42 0f b6 44 d2 25    	movzx  eax,BYTE PTR [rdx+r10*8+0x25]
	if (attlen > 0)
    1dd5:	66 85 f6             	test   si,si
    1dd8:	0f 8e 32 01 00 00    	jle    1f10 <tts_heap_getsomeattrs+0x360>
		*off = TYPEALIGN(attalignby, *off);
    1dde:	8d 5c 18 ff          	lea    ebx,[rax+rbx*1-0x1]
    1de2:	f7 d8                	neg    eax
    1de4:	21 d8                	and    eax,ebx
		*off += attlen;
    1de6:	0f bf de             	movsx  ebx,si
		offset_ptr = tupptr + *off;
    1de9:	41 89 c5             	mov    r13d,eax
		*off += attlen;
    1dec:	01 c3                	add    ebx,eax
		offset_ptr = tupptr + *off;
    1dee:	4d 01 c5             	add    r13,r8
    1df1:	4c 89 e8             	mov    rax,r13
		if (attbyval)
    1df4:	42 80 7c d2 24 00    	cmp    BYTE PTR [rdx+r10*8+0x24],0x0
    1dfa:	74 24                	je     1e20 <tts_heap_getsomeattrs+0x270>
			switch (attlen)
    1dfc:	66 83 fe 02          	cmp    si,0x2
    1e00:	0f 84 b2 02 00 00    	je     20b8 <tts_heap_getsomeattrs+0x508>
    1e06:	66 83 fe 04          	cmp    si,0x4
    1e0a:	0f 84 88 02 00 00    	je     2098 <tts_heap_getsomeattrs+0x4e8>
    1e10:	66 83 fe 01          	cmp    si,0x1
    1e14:	0f 84 d6 01 00 00    	je     1ff0 <tts_heap_getsomeattrs+0x440>
	return (Datum) X;
    1e1a:	49 8b 45 00          	mov    rax,QWORD PTR [r13+0x0]
    1e1e:	66 90                	xchg   ax,ax
			values[attnum] = (Datum) 0;
    1e20:	4a 89 04 d7          	mov    QWORD PTR [rdi+r10*8],rax
	for (; attnum < natts; attnum++)
    1e24:	49 83 c2 01          	add    r10,0x1
    1e28:	4d 39 ca             	cmp    r10,r9
    1e2b:	75 93                	jne    1dc0 <tts_heap_getsomeattrs+0x210>
	if (unlikely(attnum < reqnatts))
    1e2d:	49 39 e9             	cmp    r9,rbp
    1e30:	0f 82 ea 03 00 00    	jb     2220 <tts_heap_getsomeattrs+0x670>
	*offp = off;
    1e36:	41 89 5b 48          	mov    DWORD PTR [r11+0x48],ebx
}
    1e3a:	5b                   	pop    rbx
    1e3b:	5d                   	pop    rbp
    1e3c:	41 5c                	pop    r12
    1e3e:	41 5d                	pop    r13
    1e40:	41 5e                	pop    r14
    1e42:	41 5f                	pop    r15
    1e44:	c3                   	ret
    1e45:	0f 1f 00             	nop    DWORD PTR [rax]
		switch (attlen)
    1e48:	66 83 fb 01          	cmp    bx,0x1
    1e4c:	0f 84 0e 01 00 00    	je     1f60 <tts_heap_getsomeattrs+0x3b0>
    1e52:	48 8b 1e             	mov    rbx,QWORD PTR [rsi]
    1e55:	66 2e 0f 1f 84 00 00 	cs nop WORD PTR [rax+rax*1+0x0]
    1e5c:	00 00 00 
    1e5f:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    1e66:	00 00 00 00 
    1e6a:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    1e71:	00 00 00 00 
    1e75:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    1e7c:	00 00 00 00 
		} while (++attnum < firstNonCacheOffsetAttr);
    1e80:	48 8d 70 01          	lea    rsi,[rax+0x1]
			values[attnum] = fetch_att_noerr(tp + off,
    1e84:	48 89 1c c7          	mov    QWORD PTR [rdi+rax*8],rbx
		} while (++attnum < firstNonCacheOffsetAttr);
    1e88:	49 39 f2             	cmp    r10,rsi
    1e8b:	74 43                	je     1ed0 <tts_heap_getsomeattrs+0x320>
    1e8d:	48 89 f0             	mov    rax,rsi
			isnull[attnum] = false;
    1e90:	c6 04 01 00          	mov    BYTE PTR [rcx+rax*1],0x0
			off = cattr->attcacheoff;
    1e94:	0f bf 74 c2 20       	movsx  esi,WORD PTR [rdx+rax*8+0x20]
    1e99:	49 89 f5             	mov    r13,rsi
			values[attnum] = fetch_att_noerr(tp + off,
    1e9c:	4c 01 c6             	add    rsi,r8
	return (Datum) (uintptr_t) X;
    1e9f:	48 89 f3             	mov    rbx,rsi
	if (attbyval)
    1ea2:	80 7c c2 24 00       	cmp    BYTE PTR [rdx+rax*8+0x24],0x0
    1ea7:	74 d7                	je     1e80 <tts_heap_getsomeattrs+0x2d0>
											 cattr->attlen);
    1ea9:	0f b7 5c c2 22       	movzx  ebx,WORD PTR [rdx+rax*8+0x22]
		switch (attlen)
    1eae:	66 83 fb 02          	cmp    bx,0x2
    1eb2:	0f 84 b8 00 00 00    	je     1f70 <tts_heap_getsomeattrs+0x3c0>
    1eb8:	66 83 fb 04          	cmp    bx,0x4
    1ebc:	75 8a                	jne    1e48 <tts_heap_getsomeattrs+0x298>
	return (Datum) X;
    1ebe:	48 63 1e             	movsxd rbx,DWORD PTR [rsi]
		} while (++attnum < firstNonCacheOffsetAttr);
    1ec1:	48 8d 70 01          	lea    rsi,[rax+0x1]
			values[attnum] = fetch_att_noerr(tp + off,
    1ec5:	48 89 1c c7          	mov    QWORD PTR [rdi+rax*8],rbx
		} while (++attnum < firstNonCacheOffsetAttr);
    1ec9:	49 39 f2             	cmp    r10,rsi
    1ecc:	75 bf                	jne    1e8d <tts_heap_getsomeattrs+0x2dd>
    1ece:	66 90                	xchg   ax,ax
		off += cattr->attlen;
    1ed0:	0f bf 5c c2 22       	movsx  ebx,WORD PTR [rdx+rax*8+0x22]
		} while (++attnum < firstNonCacheOffsetAttr);
    1ed5:	4c 89 d0             	mov    rax,r10
		off += cattr->attlen;
    1ed8:	44 01 eb             	add    ebx,r13d
    1edb:	e9 f7 fd ff ff       	jmp    1cd7 <tts_heap_getsomeattrs+0x127>
	return (Datum) X;
    1ee0:	49 8b 36             	mov    rsi,QWORD PTR [r14]
					return Int64GetDatum(*((const int64 *) offset_ptr));
    1ee3:	e9 48 fe ff ff       	jmp    1d30 <tts_heap_getsomeattrs+0x180>
    1ee8:	0f 1f 84 00 00 00 00 	nop    DWORD PTR [rax+rax*1+0x0]
    1eef:	00 
	return (Datum) X;
    1ef0:	49 63 36             	movsxd rsi,DWORD PTR [r14]
					return Int32GetDatum(*((const int32 *) offset_ptr));
    1ef3:	e9 38 fe ff ff       	jmp    1d30 <tts_heap_getsomeattrs+0x180>
    1ef8:	0f 1f 84 00 00 00 00 	nop    DWORD PTR [rax+rax*1+0x0]
    1eff:	00 
	return (Datum) X;
    1f00:	49 0f bf 36          	movsx  rsi,WORD PTR [r14]
					return Int16GetDatum(*((const int16 *) offset_ptr));
    1f04:	e9 27 fe ff ff       	jmp    1d30 <tts_heap_getsomeattrs+0x180>
    1f09:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
		if (!VARATT_IS_SHORT(tupptr + *off))
    1f10:	89 de                	mov    esi,ebx
    1f12:	4c 01 c6             	add    rsi,r8
    1f15:	f6 06 01             	test   BYTE PTR [rsi],0x1
    1f18:	0f 84 02 01 00 00    	je     2020 <tts_heap_getsomeattrs+0x470>
	if (VARATT_IS_1B_E(PTR))
    1f1e:	44 0f b6 2e          	movzx  r13d,BYTE PTR [rsi]
	return (Datum) (uintptr_t) X;
    1f22:	48 89 f0             	mov    rax,rsi
    1f25:	41 80 fd 01          	cmp    r13b,0x1
    1f29:	0f 84 0f 01 00 00    	je     203e <tts_heap_getsomeattrs+0x48e>
	else if (VARATT_IS_1B(PTR))
    1f2f:	41 f6 c5 01          	test   r13b,0x1
    1f33:	0f 84 3f 01 00 00    	je     2078 <tts_heap_getsomeattrs+0x4c8>
		return VARSIZE_1B(PTR);
    1f39:	41 d0 ed             	shr    r13b,1
		*off += VARSIZE_ANY(DatumGetPointer(res));
    1f3c:	45 0f b6 ed          	movzx  r13d,r13b
			values[attnum] = (Datum) 0;
    1f40:	4a 89 04 d7          	mov    QWORD PTR [rdi+r10*8],rax
	for (; attnum < natts; attnum++)
    1f44:	49 83 c2 01          	add    r10,0x1
    1f48:	44 01 eb             	add    ebx,r13d
    1f4b:	4d 39 ca             	cmp    r10,r9
    1f4e:	0f 85 6c fe ff ff    	jne    1dc0 <tts_heap_getsomeattrs+0x210>
    1f54:	e9 d4 fe ff ff       	jmp    1e2d <tts_heap_getsomeattrs+0x27d>
    1f59:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
	return (Datum) X;
    1f60:	48 0f be 1e          	movsx  rbx,BYTE PTR [rsi]
				return CharGetDatum(*((const char *) T));
    1f64:	e9 17 ff ff ff       	jmp    1e80 <tts_heap_getsomeattrs+0x2d0>
    1f69:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
	return (Datum) X;
    1f70:	48 0f bf 1e          	movsx  rbx,WORD PTR [rsi]
				return Int16GetDatum(*((const int16 *) T));
    1f74:	e9 07 ff ff ff       	jmp    1e80 <tts_heap_getsomeattrs+0x2d0>
    1f79:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
		off += cattr->attlen;
    1f80:	41 0f bf 75 02       	movsx  esi,WORD PTR [r13+0x2]
		if (attnum == reqnatts)
    1f85:	49 63 ec             	movsxd rbp,r12d
		off += cattr->attlen;
    1f88:	48 8b 54 24 f0       	mov    rdx,QWORD PTR [rsp-0x10]
    1f8d:	01 f3                	add    ebx,esi
		if (attnum == reqnatts)
    1f8f:	48 39 e8             	cmp    rax,rbp
    1f92:	0f 85 2f fd ff ff    	jne    1cc7 <tts_heap_getsomeattrs+0x117>
	*offp = off;
    1f98:	41 89 5b 48          	mov    DWORD PTR [r11+0x48],ebx
}
    1f9c:	5b                   	pop    rbx
    1f9d:	5d                   	pop    rbp
    1f9e:	41 5c                	pop    r12
    1fa0:	41 5d                	pop    r13
    1fa2:	41 5e                	pop    r14
    1fa4:	41 5f                	pop    r15
    1fa6:	c3                   	ret
    1fa7:	66 0f 1f 84 00 00 00 	nop    WORD PTR [rax+rax*1+0x0]
    1fae:	00 00 
		tp = (char *) tup + MAXALIGN(offsetof(HeapTupleHeaderData, t_bits));
    1fb0:	4c 8d 43 18          	lea    r8,[rbx+0x18]
		if (reqnatts > firstNonGuaranteedAttr)
    1fb4:	41 39 c4             	cmp    r12d,eax
    1fb7:	0f 8e cb 00 00 00    	jle    2088 <tts_heap_getsomeattrs+0x4d8>
			natts = Min(HeapTupleHeaderGetNatts(tup), reqnatts);
    1fbd:	0f b7 43 12          	movzx  eax,WORD PTR [rbx+0x12]
    1fc1:	25 ff 07 00 00       	and    eax,0x7ff
    1fc6:	44 39 e0             	cmp    eax,r12d
    1fc9:	41 0f 4f c4          	cmovg  eax,r12d
    1fcd:	41 89 c6             	mov    r14d,eax
    1fd0:	4c 63 c8             	movsxd r9,eax
    1fd3:	e9 56 fc ff ff       	jmp    1c2e <tts_heap_getsomeattrs+0x7e>
    1fd8:	0f 1f 84 00 00 00 00 	nop    DWORD PTR [rax+rax*1+0x0]
    1fdf:	00 
    1fe0:	31 ed                	xor    ebp,ebp
		firstNonGuaranteedAttr = 0;
    1fe2:	31 c0                	xor    eax,eax
    1fe4:	e9 02 fc ff ff       	jmp    1beb <tts_heap_getsomeattrs+0x3b>
    1fe9:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
	return (Datum) X;
    1ff0:	49 0f be 45 00       	movsx  rax,BYTE PTR [r13+0x0]
			values[attnum] = (Datum) 0;
    1ff5:	4a 89 04 d7          	mov    QWORD PTR [rdi+r10*8],rax
	for (; attnum < natts; attnum++)
    1ff9:	49 83 c2 01          	add    r10,0x1
    1ffd:	4d 39 ca             	cmp    r10,r9
    2000:	0f 85 ba fd ff ff    	jne    1dc0 <tts_heap_getsomeattrs+0x210>
    2006:	e9 22 fe ff ff       	jmp    1e2d <tts_heap_getsomeattrs+0x27d>
    200b:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
    2010:	41 d0 ee             	shr    r14b,1
		*off += VARSIZE_ANY(DatumGetPointer(res));
    2013:	45 0f b6 f6          	movzx  r14d,r14b
    2017:	e9 75 fd ff ff       	jmp    1d91 <tts_heap_getsomeattrs+0x1e1>
    201c:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
			*off = TYPEALIGN(attalignby, *off);
    2020:	8d 5c 18 ff          	lea    ebx,[rax+rbx*1-0x1]
    2024:	f7 d8                	neg    eax
    2026:	21 c3                	and    ebx,eax
		res = PointerGetDatum(tupptr + *off);
    2028:	89 de                	mov    esi,ebx
    202a:	4c 01 c6             	add    rsi,r8
	if (VARATT_IS_1B_E(PTR))
    202d:	44 0f b6 2e          	movzx  r13d,BYTE PTR [rsi]
	return (Datum) (uintptr_t) X;
    2031:	48 89 f0             	mov    rax,rsi
    2034:	41 80 fd 01          	cmp    r13b,0x1
    2038:	0f 85 f1 fe ff ff    	jne    1f2f <tts_heap_getsomeattrs+0x37f>
	return VARTAG_1B_E(PTR);
    203e:	0f b6 76 01          	movzx  esi,BYTE PTR [rsi+0x1]
	if (tag == VARTAG_INDIRECT)
    2042:	83 fe 01             	cmp    esi,0x1
    2045:	0f 84 12 02 00 00    	je     225d <tts_heap_getsomeattrs+0x6ad>
	return ((tag & ~1) == VARTAG_EXPANDED_RO);
    204b:	41 89 f5             	mov    r13d,esi
    204e:	41 83 e5 fe          	and    r13d,0xfffffffe
	else if (VARTAG_IS_EXPANDED(tag))
    2052:	41 83 fd 02          	cmp    r13d,0x2
    2056:	0f 84 01 02 00 00    	je     225d <tts_heap_getsomeattrs+0x6ad>
	else if (tag == VARTAG_ONDISK)
    205c:	83 fe 12             	cmp    esi,0x12
    205f:	40 0f 94 c6          	sete   sil
    2063:	40 0f b6 f6          	movzx  esi,sil
    2067:	48 c1 e6 04          	shl    rsi,0x4
		*off += VARSIZE_ANY(DatumGetPointer(res));
    206b:	44 8d 6e 02          	lea    r13d,[rsi+0x2]
    206f:	e9 cc fe ff ff       	jmp    1f40 <tts_heap_getsomeattrs+0x390>
    2074:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
		return VARSIZE_4B(PTR);
    2078:	44 8b 2e             	mov    r13d,DWORD PTR [rsi]
    207b:	41 c1 ed 02          	shr    r13d,0x2
    207f:	e9 bc fe ff ff       	jmp    1f40 <tts_heap_getsomeattrs+0x390>
    2084:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
			natts = reqnatts;
    2088:	4d 63 cc             	movsxd r9,r12d
    208b:	45 89 e6             	mov    r14d,r12d
    208e:	e9 9b fb ff ff       	jmp    1c2e <tts_heap_getsomeattrs+0x7e>
    2093:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
	return (Datum) X;
    2098:	49 63 45 00          	movsxd rax,DWORD PTR [r13+0x0]
			values[attnum] = (Datum) 0;
    209c:	4a 89 04 d7          	mov    QWORD PTR [rdi+r10*8],rax
	for (; attnum < natts; attnum++)
    20a0:	49 83 c2 01          	add    r10,0x1
    20a4:	4d 39 ca             	cmp    r10,r9
    20a7:	0f 85 13 fd ff ff    	jne    1dc0 <tts_heap_getsomeattrs+0x210>
    20ad:	e9 7b fd ff ff       	jmp    1e2d <tts_heap_getsomeattrs+0x27d>
    20b2:	66 0f 1f 44 00 00    	nop    WORD PTR [rax+rax*1+0x0]
	return (Datum) X;
    20b8:	49 0f bf 45 00       	movsx  rax,WORD PTR [r13+0x0]
			values[attnum] = (Datum) 0;
    20bd:	4a 89 04 d7          	mov    QWORD PTR [rdi+r10*8],rax
	for (; attnum < natts; attnum++)
    20c1:	49 83 c2 01          	add    r10,0x1
    20c5:	4d 39 ca             	cmp    r10,r9
    20c8:	0f 85 f2 fc ff ff    	jne    1dc0 <tts_heap_getsomeattrs+0x210>
    20ce:	e9 5a fd ff ff       	jmp    1e2d <tts_heap_getsomeattrs+0x27d>
    20d3:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
	return VARTAG_1B_E(PTR);
    20d8:	45 0f b6 6d 01       	movzx  r13d,BYTE PTR [r13+0x1]
	if (tag == VARTAG_INDIRECT)
    20dd:	41 83 fd 01          	cmp    r13d,0x1
    20e1:	0f 84 6b 01 00 00    	je     2252 <tts_heap_getsomeattrs+0x6a2>
	return ((tag & ~1) == VARTAG_EXPANDED_RO);
    20e7:	45 89 ee             	mov    r14d,r13d
    20ea:	41 83 e6 fe          	and    r14d,0xfffffffe
	else if (VARTAG_IS_EXPANDED(tag))
    20ee:	41 83 fe 02          	cmp    r14d,0x2
    20f2:	0f 84 5a 01 00 00    	je     2252 <tts_heap_getsomeattrs+0x6a2>
	else if (tag == VARTAG_ONDISK)
    20f8:	41 83 fd 12          	cmp    r13d,0x12
    20fc:	41 0f 94 c5          	sete   r13b
    2100:	45 0f b6 ed          	movzx  r13d,r13b
    2104:	49 c1 e5 04          	shl    r13,0x4
    2108:	45 8d 75 02          	lea    r14d,[r13+0x2]
    210c:	e9 80 fc ff ff       	jmp    1d91 <tts_heap_getsomeattrs+0x1e1>
    2111:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
 * case.
 */
static inline int
first_null_attr(const bits8 *bits, int natts)
{
	int			nattByte = natts >> 3;
    2118:	45 89 cd             	mov    r13d,r9d
    211b:	41 c1 fd 03          	sar    r13d,0x3
		}
	}
#endif

	/* Process all bytes up to just before the byte for the natts attribute */
	for (bytenum = 0; bytenum < nattByte; bytenum++)
    211f:	45 85 ed             	test   r13d,r13d
    2122:	0f 8e 40 01 00 00    	jle    2268 <tts_heap_getsomeattrs+0x6b8>
    2128:	48 8d 73 17          	lea    rsi,[rbx+0x17]
    212c:	31 ff                	xor    edi,edi
    212e:	eb 20                	jmp    2150 <tts_heap_getsomeattrs+0x5a0>
    2130:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
    2135:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    213c:	00 00 00 00 
    2140:	83 c7 01             	add    edi,0x1
    2143:	48 83 c6 01          	add    rsi,0x1
    2147:	41 39 fd             	cmp    r13d,edi
    214a:	0f 84 ec 00 00 00    	je     223c <tts_heap_getsomeattrs+0x68c>
	{
		/* break if there's any NULL attrs (a 0 bit) */
		if (bits[bytenum] != 0xFF)
    2150:	0f b6 06             	movzx  eax,BYTE PTR [rsi]
    2153:	3c ff                	cmp    al,0xff
    2155:	74 e9                	je     2140 <tts_heap_getsomeattrs+0x590>
	 * looking for the first 1-bit.  This works even when the byte is 0xFF, as
	 * the bitwise NOT of 0xFF in 32 bits is 0xFFFFFF00, in which case
	 * pg_rightmost_one_pos32() will return 8.  We may end up with a value
	 * higher than natts here, but we'll fix that with the Min() below.
	 */
	res = bytenum << 3;
    2157:	c1 e7 03             	shl    edi,0x3
	res += pg_rightmost_one_pos32(~((uint32) bits[bytenum]));
    215a:	f7 d0                	not    eax
	int			nbytes = (natts + 7) >> 3;
    215c:	45 8d 69 07          	lea    r13d,[r9+0x7]
pg_rightmost_one_pos32(uint32 word)
{
#ifdef HAVE__BUILTIN_CTZ
	Assert(word != 0);

	return __builtin_ctz(word);
    2160:	f3 0f bc c0          	tzcnt  eax,eax
	res += pg_rightmost_one_pos32(~((uint32) bits[bytenum]));
    2164:	01 f8                	add    eax,edi

	/*
	 * Since we did no masking to mask out bits beyond the natt'th bit, we may
	 * have found a bit higher than natts, so we must cap res to natts
	 */
	res = Min(res, natts);
    2166:	41 39 c1             	cmp    r9d,eax
    2169:	41 0f 4e c1          	cmovle eax,r9d
	int			nbytes = (natts + 7) >> 3;
    216d:	41 c1 fd 03          	sar    r13d,0x3
	res = Min(res, natts);
    2171:	4c 63 f0             	movsxd r14,eax
		isnull_8 &= UINT64CONST(0x0101010101010101);
    2174:	49 bf 01 01 01 01 01 	movabs r15,0x101010101010101
    217b:	01 01 01 
    217e:	4d 63 ed             	movsxd r13,r13d
	for (bytenum = 0; bytenum < nattByte; bytenum++)
    2181:	31 ff                	xor    edi,edi
    2183:	66 0f 1f 44 00 00    	nop    WORD PTR [rax+rax*1+0x0]
    2189:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    2190:	00 00 00 00 
    2194:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    219b:	00 00 00 00 
    219f:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    21a6:	00 00 00 00 
    21aa:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    21b1:	00 00 00 00 
    21b5:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    21bc:	00 00 00 00 
		bits8		nullbyte = ~bits[i];
    21c0:	0f b6 74 3b 17       	movzx  esi,BYTE PTR [rbx+rdi*1+0x17]
    21c5:	f7 d6                	not    esi
		isnull_8 |= ((uint64) ((nullbyte >> 4) * SPREAD_BITS_MULTIPLIER_32)) << 32;
    21c7:	89 f0                	mov    eax,esi
		isnull_8 = (nullbyte & 0xf) * SPREAD_BITS_MULTIPLIER_32;
    21c9:	83 e6 0f             	and    esi,0xf
		isnull_8 |= ((uint64) ((nullbyte >> 4) * SPREAD_BITS_MULTIPLIER_32)) << 32;
    21cc:	c0 e8 04             	shr    al,0x4
		isnull_8 = (nullbyte & 0xf) * SPREAD_BITS_MULTIPLIER_32;
    21cf:	48 69 f6 81 40 20 00 	imul   rsi,rsi,0x204081
		isnull_8 |= ((uint64) ((nullbyte >> 4) * SPREAD_BITS_MULTIPLIER_32)) << 32;
    21d6:	83 e0 0f             	and    eax,0xf
    21d9:	48 69 c0 81 40 20 00 	imul   rax,rax,0x204081
    21e0:	48 c1 e0 20          	shl    rax,0x20
    21e4:	48 09 f0             	or     rax,rsi
		isnull_8 &= UINT64CONST(0x0101010101010101);
    21e7:	4c 21 f8             	and    rax,r15
    21ea:	48 89 04 f9          	mov    QWORD PTR [rcx+rdi*8],rax
	for (int i = 0; i < nbytes; i++, isnull += 8)
    21ee:	48 83 c7 01          	add    rdi,0x1
    21f2:	4c 39 ef             	cmp    rdi,r13
    21f5:	75 c9                	jne    21c0 <tts_heap_getsomeattrs+0x610>
			firstNonCacheOffsetAttr = Min(firstNonCacheOffsetAttr, firstNullAttr);
    21f7:	45 39 f2             	cmp    r10d,r14d
    21fa:	4d 0f 4f d6          	cmovg  r10,r14
    21fe:	e9 2b fa ff ff       	jmp    1c2e <tts_heap_getsomeattrs+0x7e>
    2203:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
	for (; attnum < firstNullAttr; attnum++)
    2208:	49 89 c2             	mov    r10,rax
    220b:	e9 a0 fb ff ff       	jmp    1db0 <tts_heap_getsomeattrs+0x200>
	for (; attnum < natts; attnum++)
    2210:	4d 89 d1             	mov    r9,r10
    2213:	e9 15 fc ff ff       	jmp    1e2d <tts_heap_getsomeattrs+0x27d>
    2218:	0f 1f 84 00 00 00 00 	nop    DWORD PTR [rax+rax*1+0x0]
    221f:	00 
		*offp = off;
    2220:	41 89 5b 48          	mov    DWORD PTR [r11+0x48],ebx
		slot_getmissingattrs(slot, attnum, reqnatts);
    2224:	44 89 e2             	mov    edx,r12d
}
    2227:	5b                   	pop    rbx
		slot_getmissingattrs(slot, attnum, reqnatts);
    2228:	44 89 ce             	mov    esi,r9d
}
    222b:	5d                   	pop    rbp
		slot_getmissingattrs(slot, attnum, reqnatts);
    222c:	4c 89 df             	mov    rdi,r11
}
    222f:	41 5c                	pop    r12
    2231:	41 5d                	pop    r13
    2233:	41 5e                	pop    r14
    2235:	41 5f                	pop    r15
		slot_getmissingattrs(slot, attnum, reqnatts);
    2237:	e9 b4 f8 ff ff       	jmp    1af0 <slot_getmissingattrs>
	res = bytenum << 3;
    223c:	42 8d 3c ed 00 00 00 	lea    edi,[r13*8+0x0]
    2243:	00 
	res += pg_rightmost_one_pos32(~((uint32) bits[bytenum]));
    2244:	4d 63 ed             	movsxd r13,r13d
    2247:	42 0f b6 44 2b 17    	movzx  eax,BYTE PTR [rbx+r13*1+0x17]
    224d:	e9 08 ff ff ff       	jmp    215a <tts_heap_getsomeattrs+0x5aa>
    2252:	41 be 0a 00 00 00    	mov    r14d,0xa
    2258:	e9 34 fb ff ff       	jmp    1d91 <tts_heap_getsomeattrs+0x1e1>
    225d:	41 bd 0a 00 00 00    	mov    r13d,0xa
    2263:	e9 d8 fc ff ff       	jmp    1f40 <tts_heap_getsomeattrs+0x390>
    2268:	0f b6 43 17          	movzx  eax,BYTE PTR [rbx+0x17]
	int			nbytes = (natts + 7) >> 3;
    226c:	45 8d 69 07          	lea    r13d,[r9+0x7]
	res += pg_rightmost_one_pos32(~((uint32) bits[bytenum]));
    2270:	f7 d0                	not    eax
    2272:	f3 0f bc c0          	tzcnt  eax,eax
	res = Min(res, natts);
    2276:	41 39 c1             	cmp    r9d,eax
    2279:	41 0f 4e c1          	cmovle eax,r9d
	int			nbytes = (natts + 7) >> 3;
    227d:	41 c1 fd 03          	sar    r13d,0x3
	res = Min(res, natts);
    2281:	4c 63 f0             	movsxd r14,eax
	for (int i = 0; i < nbytes; i++, isnull += 8)
    2284:	41 83 fd 01          	cmp    r13d,0x1
    2288:	0f 85 69 ff ff ff    	jne    21f7 <tts_heap_getsomeattrs+0x647>
    228e:	e9 e1 fe ff ff       	jmp    2174 <tts_heap_getsomeattrs+0x5c4>
    2293:	66 90                	xchg   ax,ax
    2295:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    229c:	00 00 00 00 

00000000000022a0 <tts_minimal_getsomeattrs>:
{
    22a0:	f3 0f 1e fa          	endbr64
    22a4:	41 57                	push   r15
    22a6:	49 89 fb             	mov    r11,rdi
    22a9:	41 56                	push   r14
    22ab:	41 55                	push   r13
    22ad:	41 54                	push   r12
    22af:	4c 63 e6             	movsxd r12,esi
    22b2:	55                   	push   rbp
    22b3:	53                   	push   rbx
	HeapTupleHeader tup = tuple->t_data;
    22b4:	48 8b 47 40          	mov    rax,QWORD PTR [rdi+0x40]
	TupleDesc	tupleDesc = slot->tts_tupleDescriptor;
    22b8:	48 8b 57 10          	mov    rdx,QWORD PTR [rdi+0x10]
	isnull = slot->tts_isnull;
    22bc:	48 8b 4f 20          	mov    rcx,QWORD PTR [rdi+0x20]
	HeapTupleHeader tup = tuple->t_data;
    22c0:	48 8b 58 10          	mov    rbx,QWORD PTR [rax+0x10]
	if (TTS_OBEYS_NOT_NULL_CONSTRAINTS(slot))
    22c4:	f6 47 04 08          	test   BYTE PTR [rdi+0x4],0x8
    22c8:	0f 84 22 04 00 00    	je     26f0 <tts_minimal_getsomeattrs+0x450>
		firstNonGuaranteedAttr = Min(reqnatts, tupleDesc->firstNonGuaranteedAttr);
    22ce:	8b 42 14             	mov    eax,DWORD PTR [rdx+0x14]
    22d1:	41 39 c4             	cmp    r12d,eax
    22d4:	41 0f 4e c4          	cmovle eax,r12d
	if (attnum < firstNonGuaranteedAttr)
    22d8:	48 63 e8             	movsxd rbp,eax
	firstNonCacheOffsetAttr = tupleDesc->firstNonCachedOffsetAttr;
    22db:	4c 63 52 10          	movsxd r10,DWORD PTR [rdx+0x10]
	if (HeapTupleHasNulls(tuple))
    22df:	f6 43 14 01          	test   BYTE PTR [rbx+0x14],0x1
    22e3:	0f 84 d7 03 00 00    	je     26c0 <tts_minimal_getsomeattrs+0x420>
		natts = HeapTupleHeaderGetNatts(tup);
    22e9:	44 0f b7 4b 12       	movzx  r9d,WORD PTR [rbx+0x12]
    22ee:	41 81 e1 ff 07 00 00 	and    r9d,0x7ff
    22f5:	45 8d 41 07          	lea    r8d,[r9+0x7]
    22f9:	41 c1 f8 03          	sar    r8d,0x3
		tp = (char *) tup + MAXALIGN(offsetof(HeapTupleHeaderData, t_bits) +
    22fd:	41 83 c0 1e          	add    r8d,0x1e
    2301:	41 81 e0 f8 03 00 00 	and    r8d,0x3f8
    2308:	49 01 d8             	add    r8,rbx
		natts = Min(natts, reqnatts);
    230b:	45 39 cc             	cmp    r12d,r9d
    230e:	4d 0f 4e cc          	cmovle r9,r12
			firstNullAttr = natts;
    2312:	45 89 ce             	mov    r14d,r9d
		if (natts > firstNonGuaranteedAttr)
    2315:	41 39 c1             	cmp    r9d,eax
    2318:	0f 8f 0a 05 00 00    	jg     2828 <tts_minimal_getsomeattrs+0x588>
	attnum = slot->tts_nvalid;
    231e:	49 0f bf 43 06       	movsx  rax,WORD PTR [r11+0x6]
	values = slot->tts_values;
    2323:	49 8b 7b 18          	mov    rdi,QWORD PTR [r11+0x18]
	slot->tts_nvalid = reqnatts;
    2327:	66 45 89 63 06       	mov    WORD PTR [r11+0x6],r12w
	if (attnum < firstNonGuaranteedAttr)
    232c:	48 39 e8             	cmp    rax,rbp
    232f:	73 7f                	jae    23b0 <tts_minimal_getsomeattrs+0x110>
    2331:	48 89 54 24 f0       	mov    QWORD PTR [rsp-0x10],rdx
    2336:	48 8d 74 c2 20       	lea    rsi,[rdx+rax*8+0x20]
    233b:	eb 22                	jmp    235f <tts_minimal_getsomeattrs+0xbf>
    233d:	0f 1f 00             	nop    DWORD PTR [rax]
		switch (attlen)
    2340:	66 41 83 ff 01       	cmp    r15w,0x1
    2345:	74 59                	je     23a0 <tts_minimal_getsomeattrs+0x100>
	return (Datum) X;
    2347:	48 8b 12             	mov    rdx,QWORD PTR [rdx]
			values[attnum] = fetch_att_noerr(tp + off, true, attlen);
    234a:	48 89 14 c7          	mov    QWORD PTR [rdi+rax*8],rdx
			attnum++;
    234e:	48 83 c0 01          	add    rax,0x1
		} while (attnum < firstNonGuaranteedAttr);
    2352:	48 83 c6 08          	add    rsi,0x8
    2356:	48 39 e8             	cmp    rax,rbp
    2359:	0f 83 31 03 00 00    	jae    2690 <tts_minimal_getsomeattrs+0x3f0>
			isnull[attnum] = false;
    235f:	c6 04 01 00          	mov    BYTE PTR [rcx+rax*1],0x0
			off = cattr->attcacheoff;
    2363:	0f bf 16             	movsx  edx,WORD PTR [rsi]
			cattr = &cattrs[attnum];
    2366:	49 89 f5             	mov    r13,rsi
			attlen = cattr->attlen;
    2369:	44 0f b7 7e 02       	movzx  r15d,WORD PTR [rsi+0x2]
			off = cattr->attcacheoff;
    236e:	48 89 d3             	mov    rbx,rdx
			values[attnum] = fetch_att_noerr(tp + off, true, attlen);
    2371:	4c 01 c2             	add    rdx,r8
    2374:	66 41 83 ff 02       	cmp    r15w,0x2
    2379:	74 15                	je     2390 <tts_minimal_getsomeattrs+0xf0>
    237b:	66 41 83 ff 04       	cmp    r15w,0x4
    2380:	75 be                	jne    2340 <tts_minimal_getsomeattrs+0xa0>
	return (Datum) X;
    2382:	48 63 12             	movsxd rdx,DWORD PTR [rdx]
				return Int32GetDatum(*((const int32 *) T));
    2385:	eb c3                	jmp    234a <tts_minimal_getsomeattrs+0xaa>
    2387:	66 0f 1f 84 00 00 00 	nop    WORD PTR [rax+rax*1+0x0]
    238e:	00 00 
	return (Datum) X;
    2390:	48 0f bf 12          	movsx  rdx,WORD PTR [rdx]
				return Int16GetDatum(*((const int16 *) T));
    2394:	eb b4                	jmp    234a <tts_minimal_getsomeattrs+0xaa>
    2396:	66 2e 0f 1f 84 00 00 	cs nop WORD PTR [rax+rax*1+0x0]
    239d:	00 00 00 
	return (Datum) X;
    23a0:	48 0f be 12          	movsx  rdx,BYTE PTR [rdx]
				return CharGetDatum(*((const char *) T));
    23a4:	eb a4                	jmp    234a <tts_minimal_getsomeattrs+0xaa>
    23a6:	66 2e 0f 1f 84 00 00 	cs nop WORD PTR [rax+rax*1+0x0]
    23ad:	00 00 00 
		off = *offp;
    23b0:	41 8b 5b 68          	mov    ebx,DWORD PTR [r11+0x68]
	if (unlikely(attnum < reqnatts))
    23b4:	49 63 ec             	movsxd rbp,r12d
	firstNonCacheOffsetAttr = Min(firstNonCacheOffsetAttr, natts);
    23b7:	45 39 ca             	cmp    r10d,r9d
    23ba:	4d 0f 4f d1          	cmovg  r10,r9
	if (attnum < firstNonCacheOffsetAttr)
    23be:	4c 39 d0             	cmp    rax,r10
    23c1:	0f 82 c9 01 00 00    	jb     2590 <tts_minimal_getsomeattrs+0x2f0>
	for (; attnum < firstNullAttr; attnum++)
    23c7:	4d 63 d6             	movsxd r10,r14d
    23ca:	4c 39 d0             	cmp    rax,r10
    23cd:	72 5e                	jb     242d <tts_minimal_getsomeattrs+0x18d>
    23cf:	e9 34 05 00 00       	jmp    2908 <tts_minimal_getsomeattrs+0x668>
    23d4:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
		*off = TYPEALIGN(attalignby, *off);
    23d8:	8d 5c 1e ff          	lea    ebx,[rsi+rbx*1-0x1]
    23dc:	f7 de                	neg    esi
    23de:	21 de                	and    esi,ebx
		*off += attlen;
    23e0:	41 0f bf dd          	movsx  ebx,r13w
		offset_ptr = tupptr + *off;
    23e4:	41 89 f6             	mov    r14d,esi
		*off += attlen;
    23e7:	01 f3                	add    ebx,esi
		offset_ptr = tupptr + *off;
    23e9:	4d 01 c6             	add    r14,r8
	return (Datum) (uintptr_t) X;
    23ec:	4c 89 f6             	mov    rsi,r14
		if (attbyval)
    23ef:	80 7c c2 24 00       	cmp    BYTE PTR [rdx+rax*8+0x24],0x0
    23f4:	74 2a                	je     2420 <tts_minimal_getsomeattrs+0x180>
			switch (attlen)
    23f6:	66 41 83 fd 02       	cmp    r13w,0x2
    23fb:	0f 84 0f 02 00 00    	je     2610 <tts_minimal_getsomeattrs+0x370>
    2401:	66 41 83 fd 04       	cmp    r13w,0x4
    2406:	0f 84 f4 01 00 00    	je     2600 <tts_minimal_getsomeattrs+0x360>
    240c:	66 41 83 fd 01       	cmp    r13w,0x1
    2411:	0f 85 d9 01 00 00    	jne    25f0 <tts_minimal_getsomeattrs+0x350>
	return (Datum) X;
    2417:	49 0f be 36          	movsx  rsi,BYTE PTR [r14]
    241b:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
		values[attnum] = align_fetch_then_add(tp,
    2420:	48 89 34 c7          	mov    QWORD PTR [rdi+rax*8],rsi
	for (; attnum < firstNullAttr; attnum++)
    2424:	48 83 c0 01          	add    rax,0x1
    2428:	4c 39 d0             	cmp    rax,r10
    242b:	74 73                	je     24a0 <tts_minimal_getsomeattrs+0x200>
		isnull[attnum] = false;
    242d:	c6 04 01 00          	mov    BYTE PTR [rcx+rax*1],0x0
		attlen = cattr->attlen;
    2431:	44 0f b7 6c c2 22    	movzx  r13d,WORD PTR [rdx+rax*8+0x22]
											  cattr->attalignby);
    2437:	0f b6 74 c2 25       	movzx  esi,BYTE PTR [rdx+rax*8+0x25]
	if (attlen > 0)
    243c:	66 45 85 ed          	test   r13w,r13w
    2440:	7f 96                	jg     23d8 <tts_minimal_getsomeattrs+0x138>
		if (!VARATT_IS_SHORT(tupptr + *off))
    2442:	41 89 dd             	mov    r13d,ebx
    2445:	4d 01 c5             	add    r13,r8
    2448:	41 f6 45 00 01       	test   BYTE PTR [r13+0x0],0x1
    244d:	75 0e                	jne    245d <tts_minimal_getsomeattrs+0x1bd>
			*off = TYPEALIGN(attalignby, *off);
    244f:	8d 5c 1e ff          	lea    ebx,[rsi+rbx*1-0x1]
    2453:	f7 de                	neg    esi
    2455:	21 f3                	and    ebx,esi
		res = PointerGetDatum(tupptr + *off);
    2457:	41 89 dd             	mov    r13d,ebx
    245a:	4d 01 c5             	add    r13,r8
	if (VARATT_IS_1B_E(PTR))
    245d:	45 0f b6 75 00       	movzx  r14d,BYTE PTR [r13+0x0]
	return (Datum) (uintptr_t) X;
    2462:	4c 89 ee             	mov    rsi,r13
    2465:	41 80 fe 01          	cmp    r14b,0x1
    2469:	0f 84 79 03 00 00    	je     27e8 <tts_minimal_getsomeattrs+0x548>
	else if (VARATT_IS_1B(PTR))
    246f:	41 f6 c6 01          	test   r14b,0x1
    2473:	0f 85 a7 02 00 00    	jne    2720 <tts_minimal_getsomeattrs+0x480>
		return VARSIZE_4B(PTR);
    2479:	45 8b 75 00          	mov    r14d,DWORD PTR [r13+0x0]
    247d:	41 c1 ee 02          	shr    r14d,0x2
		values[attnum] = align_fetch_then_add(tp,
    2481:	48 89 34 c7          	mov    QWORD PTR [rdi+rax*8],rsi
	for (; attnum < firstNullAttr; attnum++)
    2485:	48 83 c0 01          	add    rax,0x1
		*off += VARSIZE_ANY(DatumGetPointer(res));
    2489:	44 01 f3             	add    ebx,r14d
    248c:	4c 39 d0             	cmp    rax,r10
    248f:	75 9c                	jne    242d <tts_minimal_getsomeattrs+0x18d>
    2491:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
    2495:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    249c:	00 00 00 00 
	for (; attnum < natts; attnum++)
    24a0:	4d 39 ca             	cmp    r10,r9
    24a3:	0f 83 67 04 00 00    	jae    2910 <tts_minimal_getsomeattrs+0x670>
    24a9:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
		if (isnull[attnum])
    24b0:	31 c0                	xor    eax,eax
    24b2:	42 80 3c 11 00       	cmp    BYTE PTR [rcx+r10*1],0x0
    24b7:	75 57                	jne    2510 <tts_minimal_getsomeattrs+0x270>
		attlen = cattr->attlen;
    24b9:	42 0f b7 74 d2 22    	movzx  esi,WORD PTR [rdx+r10*8+0x22]
											  cattr->attalignby);
    24bf:	42 0f b6 44 d2 25    	movzx  eax,BYTE PTR [rdx+r10*8+0x25]
	if (attlen > 0)
    24c5:	66 85 f6             	test   si,si
    24c8:	0f 8e 52 01 00 00    	jle    2620 <tts_minimal_getsomeattrs+0x380>
		*off = TYPEALIGN(attalignby, *off);
    24ce:	8d 5c 18 ff          	lea    ebx,[rax+rbx*1-0x1]
    24d2:	f7 d8                	neg    eax
    24d4:	21 d8                	and    eax,ebx
		*off += attlen;
    24d6:	0f bf de             	movsx  ebx,si
		offset_ptr = tupptr + *off;
    24d9:	41 89 c5             	mov    r13d,eax
		*off += attlen;
    24dc:	01 c3                	add    ebx,eax
		offset_ptr = tupptr + *off;
    24de:	4d 01 c5             	add    r13,r8
    24e1:	4c 89 e8             	mov    rax,r13
		if (attbyval)
    24e4:	42 80 7c d2 24 00    	cmp    BYTE PTR [rdx+r10*8+0x24],0x0
    24ea:	74 24                	je     2510 <tts_minimal_getsomeattrs+0x270>
			switch (attlen)
    24ec:	66 83 fe 02          	cmp    si,0x2
    24f0:	0f 84 d2 02 00 00    	je     27c8 <tts_minimal_getsomeattrs+0x528>
    24f6:	66 83 fe 04          	cmp    si,0x4
    24fa:	0f 84 a8 02 00 00    	je     27a8 <tts_minimal_getsomeattrs+0x508>
    2500:	66 83 fe 01          	cmp    si,0x1
    2504:	0f 84 f6 01 00 00    	je     2700 <tts_minimal_getsomeattrs+0x460>
	return (Datum) X;
    250a:	49 8b 45 00          	mov    rax,QWORD PTR [r13+0x0]
    250e:	66 90                	xchg   ax,ax
			values[attnum] = (Datum) 0;
    2510:	4a 89 04 d7          	mov    QWORD PTR [rdi+r10*8],rax
	for (; attnum < natts; attnum++)
    2514:	49 83 c2 01          	add    r10,0x1
    2518:	4d 39 ca             	cmp    r10,r9
    251b:	75 93                	jne    24b0 <tts_minimal_getsomeattrs+0x210>
	if (unlikely(attnum < reqnatts))
    251d:	49 39 e9             	cmp    r9,rbp
    2520:	0f 82 fa 03 00 00    	jb     2920 <tts_minimal_getsomeattrs+0x680>
	*offp = off;
    2526:	41 89 5b 68          	mov    DWORD PTR [r11+0x68],ebx
}
    252a:	5b                   	pop    rbx
    252b:	5d                   	pop    rbp
    252c:	41 5c                	pop    r12
    252e:	41 5d                	pop    r13
    2530:	41 5e                	pop    r14
    2532:	41 5f                	pop    r15
    2534:	c3                   	ret
    2535:	0f 1f 00             	nop    DWORD PTR [rax]
		switch (attlen)
    2538:	66 83 fb 01          	cmp    bx,0x1
    253c:	0f 84 2e 01 00 00    	je     2670 <tts_minimal_getsomeattrs+0x3d0>
    2542:	48 8b 1e             	mov    rbx,QWORD PTR [rsi]
    2545:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
    2549:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    2550:	00 00 00 00 
    2554:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    255b:	00 00 00 00 
    255f:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    2566:	00 00 00 00 
    256a:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    2571:	00 00 00 00 
    2575:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    257c:	00 00 00 00 
		} while (++attnum < firstNonCacheOffsetAttr);
    2580:	48 8d 70 01          	lea    rsi,[rax+0x1]
			values[attnum] = fetch_att_noerr(tp + off,
    2584:	48 89 1c c7          	mov    QWORD PTR [rdi+rax*8],rbx
		} while (++attnum < firstNonCacheOffsetAttr);
    2588:	49 39 f2             	cmp    r10,rsi
    258b:	74 53                	je     25e0 <tts_minimal_getsomeattrs+0x340>
    258d:	48 89 f0             	mov    rax,rsi
			isnull[attnum] = false;
    2590:	c6 04 01 00          	mov    BYTE PTR [rcx+rax*1],0x0
			off = cattr->attcacheoff;
    2594:	0f bf 74 c2 20       	movsx  esi,WORD PTR [rdx+rax*8+0x20]
    2599:	49 89 f5             	mov    r13,rsi
			values[attnum] = fetch_att_noerr(tp + off,
    259c:	4c 01 c6             	add    rsi,r8
	return (Datum) (uintptr_t) X;
    259f:	48 89 f3             	mov    rbx,rsi
	if (attbyval)
    25a2:	80 7c c2 24 00       	cmp    BYTE PTR [rdx+rax*8+0x24],0x0
    25a7:	74 d7                	je     2580 <tts_minimal_getsomeattrs+0x2e0>
											 cattr->attlen);
    25a9:	0f b7 5c c2 22       	movzx  ebx,WORD PTR [rdx+rax*8+0x22]
		switch (attlen)
    25ae:	66 83 fb 02          	cmp    bx,0x2
    25b2:	0f 84 c8 00 00 00    	je     2680 <tts_minimal_getsomeattrs+0x3e0>
    25b8:	66 83 fb 04          	cmp    bx,0x4
    25bc:	0f 85 76 ff ff ff    	jne    2538 <tts_minimal_getsomeattrs+0x298>
	return (Datum) X;
    25c2:	48 63 1e             	movsxd rbx,DWORD PTR [rsi]
		} while (++attnum < firstNonCacheOffsetAttr);
    25c5:	48 8d 70 01          	lea    rsi,[rax+0x1]
			values[attnum] = fetch_att_noerr(tp + off,
    25c9:	48 89 1c c7          	mov    QWORD PTR [rdi+rax*8],rbx
		} while (++attnum < firstNonCacheOffsetAttr);
    25cd:	49 39 f2             	cmp    r10,rsi
    25d0:	75 bb                	jne    258d <tts_minimal_getsomeattrs+0x2ed>
    25d2:	0f 1f 00             	nop    DWORD PTR [rax]
    25d5:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    25dc:	00 00 00 00 
		off += cattr->attlen;
    25e0:	0f bf 5c c2 22       	movsx  ebx,WORD PTR [rdx+rax*8+0x22]
		} while (++attnum < firstNonCacheOffsetAttr);
    25e5:	4c 89 d0             	mov    rax,r10
		off += cattr->attlen;
    25e8:	44 01 eb             	add    ebx,r13d
    25eb:	e9 d7 fd ff ff       	jmp    23c7 <tts_minimal_getsomeattrs+0x127>
	return (Datum) X;
    25f0:	49 8b 36             	mov    rsi,QWORD PTR [r14]
					return Int64GetDatum(*((const int64 *) offset_ptr));
    25f3:	e9 28 fe ff ff       	jmp    2420 <tts_minimal_getsomeattrs+0x180>
    25f8:	0f 1f 84 00 00 00 00 	nop    DWORD PTR [rax+rax*1+0x0]
    25ff:	00 
	return (Datum) X;
    2600:	49 63 36             	movsxd rsi,DWORD PTR [r14]
					return Int32GetDatum(*((const int32 *) offset_ptr));
    2603:	e9 18 fe ff ff       	jmp    2420 <tts_minimal_getsomeattrs+0x180>
    2608:	0f 1f 84 00 00 00 00 	nop    DWORD PTR [rax+rax*1+0x0]
    260f:	00 
	return (Datum) X;
    2610:	49 0f bf 36          	movsx  rsi,WORD PTR [r14]
					return Int16GetDatum(*((const int16 *) offset_ptr));
    2614:	e9 07 fe ff ff       	jmp    2420 <tts_minimal_getsomeattrs+0x180>
    2619:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
		if (!VARATT_IS_SHORT(tupptr + *off))
    2620:	89 de                	mov    esi,ebx
    2622:	4c 01 c6             	add    rsi,r8
    2625:	f6 06 01             	test   BYTE PTR [rsi],0x1
    2628:	0f 84 02 01 00 00    	je     2730 <tts_minimal_getsomeattrs+0x490>
	if (VARATT_IS_1B_E(PTR))
    262e:	44 0f b6 2e          	movzx  r13d,BYTE PTR [rsi]
	return (Datum) (uintptr_t) X;
    2632:	48 89 f0             	mov    rax,rsi
    2635:	41 80 fd 01          	cmp    r13b,0x1
    2639:	0f 84 0f 01 00 00    	je     274e <tts_minimal_getsomeattrs+0x4ae>
	else if (VARATT_IS_1B(PTR))
    263f:	41 f6 c5 01          	test   r13b,0x1
    2643:	0f 84 3f 01 00 00    	je     2788 <tts_minimal_getsomeattrs+0x4e8>
		return VARSIZE_1B(PTR);
    2649:	41 d0 ed             	shr    r13b,1
		*off += VARSIZE_ANY(DatumGetPointer(res));
    264c:	45 0f b6 ed          	movzx  r13d,r13b
			values[attnum] = (Datum) 0;
    2650:	4a 89 04 d7          	mov    QWORD PTR [rdi+r10*8],rax
	for (; attnum < natts; attnum++)
    2654:	49 83 c2 01          	add    r10,0x1
    2658:	44 01 eb             	add    ebx,r13d
    265b:	4d 39 ca             	cmp    r10,r9
    265e:	0f 85 4c fe ff ff    	jne    24b0 <tts_minimal_getsomeattrs+0x210>
    2664:	e9 b4 fe ff ff       	jmp    251d <tts_minimal_getsomeattrs+0x27d>
    2669:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
	return (Datum) X;
    2670:	48 0f be 1e          	movsx  rbx,BYTE PTR [rsi]
				return CharGetDatum(*((const char *) T));
    2674:	e9 07 ff ff ff       	jmp    2580 <tts_minimal_getsomeattrs+0x2e0>
    2679:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
	return (Datum) X;
    2680:	48 0f bf 1e          	movsx  rbx,WORD PTR [rsi]
				return Int16GetDatum(*((const int16 *) T));
    2684:	e9 f7 fe ff ff       	jmp    2580 <tts_minimal_getsomeattrs+0x2e0>
    2689:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
		off += cattr->attlen;
    2690:	41 0f bf 75 02       	movsx  esi,WORD PTR [r13+0x2]
		if (attnum == reqnatts)
    2695:	49 63 ec             	movsxd rbp,r12d
		off += cattr->attlen;
    2698:	48 8b 54 24 f0       	mov    rdx,QWORD PTR [rsp-0x10]
    269d:	01 f3                	add    ebx,esi
		if (attnum == reqnatts)
    269f:	48 39 e8             	cmp    rax,rbp
    26a2:	0f 85 0f fd ff ff    	jne    23b7 <tts_minimal_getsomeattrs+0x117>
	*offp = off;
    26a8:	41 89 5b 68          	mov    DWORD PTR [r11+0x68],ebx
}
    26ac:	5b                   	pop    rbx
    26ad:	5d                   	pop    rbp
    26ae:	41 5c                	pop    r12
    26b0:	41 5d                	pop    r13
    26b2:	41 5e                	pop    r14
    26b4:	41 5f                	pop    r15
    26b6:	c3                   	ret
    26b7:	66 0f 1f 84 00 00 00 	nop    WORD PTR [rax+rax*1+0x0]
    26be:	00 00 
		tp = (char *) tup + MAXALIGN(offsetof(HeapTupleHeaderData, t_bits));
    26c0:	4c 8d 43 18          	lea    r8,[rbx+0x18]
		if (reqnatts > firstNonGuaranteedAttr)
    26c4:	41 39 c4             	cmp    r12d,eax
    26c7:	0f 8e cb 00 00 00    	jle    2798 <tts_minimal_getsomeattrs+0x4f8>
			natts = Min(HeapTupleHeaderGetNatts(tup), reqnatts);
    26cd:	0f b7 43 12          	movzx  eax,WORD PTR [rbx+0x12]
    26d1:	25 ff 07 00 00       	and    eax,0x7ff
    26d6:	44 39 e0             	cmp    eax,r12d
    26d9:	41 0f 4f c4          	cmovg  eax,r12d
    26dd:	41 89 c6             	mov    r14d,eax
    26e0:	4c 63 c8             	movsxd r9,eax
    26e3:	e9 36 fc ff ff       	jmp    231e <tts_minimal_getsomeattrs+0x7e>
    26e8:	0f 1f 84 00 00 00 00 	nop    DWORD PTR [rax+rax*1+0x0]
    26ef:	00 
    26f0:	31 ed                	xor    ebp,ebp
		firstNonGuaranteedAttr = 0;
    26f2:	31 c0                	xor    eax,eax
    26f4:	e9 e2 fb ff ff       	jmp    22db <tts_minimal_getsomeattrs+0x3b>
    26f9:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
	return (Datum) X;
    2700:	49 0f be 45 00       	movsx  rax,BYTE PTR [r13+0x0]
			values[attnum] = (Datum) 0;
    2705:	4a 89 04 d7          	mov    QWORD PTR [rdi+r10*8],rax
	for (; attnum < natts; attnum++)
    2709:	49 83 c2 01          	add    r10,0x1
    270d:	4d 39 ca             	cmp    r10,r9
    2710:	0f 85 9a fd ff ff    	jne    24b0 <tts_minimal_getsomeattrs+0x210>
    2716:	e9 02 fe ff ff       	jmp    251d <tts_minimal_getsomeattrs+0x27d>
    271b:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
    2720:	41 d0 ee             	shr    r14b,1
		*off += VARSIZE_ANY(DatumGetPointer(res));
    2723:	45 0f b6 f6          	movzx  r14d,r14b
    2727:	e9 55 fd ff ff       	jmp    2481 <tts_minimal_getsomeattrs+0x1e1>
    272c:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
			*off = TYPEALIGN(attalignby, *off);
    2730:	8d 5c 18 ff          	lea    ebx,[rax+rbx*1-0x1]
    2734:	f7 d8                	neg    eax
    2736:	21 c3                	and    ebx,eax
		res = PointerGetDatum(tupptr + *off);
    2738:	89 de                	mov    esi,ebx
    273a:	4c 01 c6             	add    rsi,r8
	if (VARATT_IS_1B_E(PTR))
    273d:	44 0f b6 2e          	movzx  r13d,BYTE PTR [rsi]
	return (Datum) (uintptr_t) X;
    2741:	48 89 f0             	mov    rax,rsi
    2744:	41 80 fd 01          	cmp    r13b,0x1
    2748:	0f 85 f1 fe ff ff    	jne    263f <tts_minimal_getsomeattrs+0x39f>
	return VARTAG_1B_E(PTR);
    274e:	0f b6 76 01          	movzx  esi,BYTE PTR [rsi+0x1]
	if (tag == VARTAG_INDIRECT)
    2752:	83 fe 01             	cmp    esi,0x1
    2755:	0f 84 02 02 00 00    	je     295d <tts_minimal_getsomeattrs+0x6bd>
	return ((tag & ~1) == VARTAG_EXPANDED_RO);
    275b:	41 89 f5             	mov    r13d,esi
    275e:	41 83 e5 fe          	and    r13d,0xfffffffe
	else if (VARTAG_IS_EXPANDED(tag))
    2762:	41 83 fd 02          	cmp    r13d,0x2
    2766:	0f 84 f1 01 00 00    	je     295d <tts_minimal_getsomeattrs+0x6bd>
	else if (tag == VARTAG_ONDISK)
    276c:	83 fe 12             	cmp    esi,0x12
    276f:	40 0f 94 c6          	sete   sil
    2773:	40 0f b6 f6          	movzx  esi,sil
    2777:	48 c1 e6 04          	shl    rsi,0x4
		*off += VARSIZE_ANY(DatumGetPointer(res));
    277b:	44 8d 6e 02          	lea    r13d,[rsi+0x2]
    277f:	e9 cc fe ff ff       	jmp    2650 <tts_minimal_getsomeattrs+0x3b0>
    2784:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
		return VARSIZE_4B(PTR);
    2788:	44 8b 2e             	mov    r13d,DWORD PTR [rsi]
    278b:	41 c1 ed 02          	shr    r13d,0x2
    278f:	e9 bc fe ff ff       	jmp    2650 <tts_minimal_getsomeattrs+0x3b0>
    2794:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]
			natts = reqnatts;
    2798:	4d 63 cc             	movsxd r9,r12d
    279b:	45 89 e6             	mov    r14d,r12d
    279e:	e9 7b fb ff ff       	jmp    231e <tts_minimal_getsomeattrs+0x7e>
    27a3:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
	return (Datum) X;
    27a8:	49 63 45 00          	movsxd rax,DWORD PTR [r13+0x0]
			values[attnum] = (Datum) 0;
    27ac:	4a 89 04 d7          	mov    QWORD PTR [rdi+r10*8],rax
	for (; attnum < natts; attnum++)
    27b0:	49 83 c2 01          	add    r10,0x1
    27b4:	4d 39 ca             	cmp    r10,r9
    27b7:	0f 85 f3 fc ff ff    	jne    24b0 <tts_minimal_getsomeattrs+0x210>
    27bd:	e9 5b fd ff ff       	jmp    251d <tts_minimal_getsomeattrs+0x27d>
    27c2:	66 0f 1f 44 00 00    	nop    WORD PTR [rax+rax*1+0x0]
	return (Datum) X;
    27c8:	49 0f bf 45 00       	movsx  rax,WORD PTR [r13+0x0]
			values[attnum] = (Datum) 0;
    27cd:	4a 89 04 d7          	mov    QWORD PTR [rdi+r10*8],rax
	for (; attnum < natts; attnum++)
    27d1:	49 83 c2 01          	add    r10,0x1
    27d5:	4d 39 ca             	cmp    r10,r9
    27d8:	0f 85 d2 fc ff ff    	jne    24b0 <tts_minimal_getsomeattrs+0x210>
    27de:	e9 3a fd ff ff       	jmp    251d <tts_minimal_getsomeattrs+0x27d>
    27e3:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
	return VARTAG_1B_E(PTR);
    27e8:	45 0f b6 6d 01       	movzx  r13d,BYTE PTR [r13+0x1]
	if (tag == VARTAG_INDIRECT)
    27ed:	41 83 fd 01          	cmp    r13d,0x1
    27f1:	0f 84 5b 01 00 00    	je     2952 <tts_minimal_getsomeattrs+0x6b2>
	return ((tag & ~1) == VARTAG_EXPANDED_RO);
    27f7:	45 89 ee             	mov    r14d,r13d
    27fa:	41 83 e6 fe          	and    r14d,0xfffffffe
	else if (VARTAG_IS_EXPANDED(tag))
    27fe:	41 83 fe 02          	cmp    r14d,0x2
    2802:	0f 84 4a 01 00 00    	je     2952 <tts_minimal_getsomeattrs+0x6b2>
	else if (tag == VARTAG_ONDISK)
    2808:	41 83 fd 12          	cmp    r13d,0x12
    280c:	41 0f 94 c5          	sete   r13b
    2810:	45 0f b6 ed          	movzx  r13d,r13b
    2814:	49 c1 e5 04          	shl    r13,0x4
    2818:	45 8d 75 02          	lea    r14d,[r13+0x2]
    281c:	e9 60 fc ff ff       	jmp    2481 <tts_minimal_getsomeattrs+0x1e1>
    2821:	0f 1f 80 00 00 00 00 	nop    DWORD PTR [rax+0x0]
	int			nattByte = natts >> 3;
    2828:	45 89 cd             	mov    r13d,r9d
    282b:	41 c1 fd 03          	sar    r13d,0x3
	for (bytenum = 0; bytenum < nattByte; bytenum++)
    282f:	45 85 ed             	test   r13d,r13d
    2832:	0f 8e 30 01 00 00    	jle    2968 <tts_minimal_getsomeattrs+0x6c8>
    2838:	48 8d 73 17          	lea    rsi,[rbx+0x17]
    283c:	31 ff                	xor    edi,edi
    283e:	eb 10                	jmp    2850 <tts_minimal_getsomeattrs+0x5b0>
    2840:	83 c7 01             	add    edi,0x1
    2843:	48 83 c6 01          	add    rsi,0x1
    2847:	41 39 fd             	cmp    r13d,edi
    284a:	0f 84 ec 00 00 00    	je     293c <tts_minimal_getsomeattrs+0x69c>
		if (bits[bytenum] != 0xFF)
    2850:	0f b6 06             	movzx  eax,BYTE PTR [rsi]
    2853:	3c ff                	cmp    al,0xff
    2855:	74 e9                	je     2840 <tts_minimal_getsomeattrs+0x5a0>
	res = bytenum << 3;
    2857:	c1 e7 03             	shl    edi,0x3
	res += pg_rightmost_one_pos32(~((uint32) bits[bytenum]));
    285a:	f7 d0                	not    eax
	int			nbytes = (natts + 7) >> 3;
    285c:	45 8d 69 07          	lea    r13d,[r9+0x7]
    2860:	f3 0f bc c0          	tzcnt  eax,eax
	res += pg_rightmost_one_pos32(~((uint32) bits[bytenum]));
    2864:	01 f8                	add    eax,edi
	res = Min(res, natts);
    2866:	41 39 c1             	cmp    r9d,eax
    2869:	41 0f 4e c1          	cmovle eax,r9d
	int			nbytes = (natts + 7) >> 3;
    286d:	41 c1 fd 03          	sar    r13d,0x3
	res = Min(res, natts);
    2871:	4c 63 f0             	movsxd r14,eax
		isnull_8 &= UINT64CONST(0x0101010101010101);
    2874:	49 bf 01 01 01 01 01 	movabs r15,0x101010101010101
    287b:	01 01 01 
    287e:	4d 63 ed             	movsxd r13,r13d
	for (bytenum = 0; bytenum < nattByte; bytenum++)
    2881:	31 ff                	xor    edi,edi
    2883:	66 0f 1f 44 00 00    	nop    WORD PTR [rax+rax*1+0x0]
    2889:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    2890:	00 00 00 00 
    2894:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    289b:	00 00 00 00 
    289f:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    28a6:	00 00 00 00 
    28aa:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    28b1:	00 00 00 00 
    28b5:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    28bc:	00 00 00 00 
		bits8		nullbyte = ~bits[i];
    28c0:	0f b6 74 3b 17       	movzx  esi,BYTE PTR [rbx+rdi*1+0x17]
    28c5:	f7 d6                	not    esi
		isnull_8 |= ((uint64) ((nullbyte >> 4) * SPREAD_BITS_MULTIPLIER_32)) << 32;
    28c7:	89 f0                	mov    eax,esi
		isnull_8 = (nullbyte & 0xf) * SPREAD_BITS_MULTIPLIER_32;
    28c9:	83 e6 0f             	and    esi,0xf
		isnull_8 |= ((uint64) ((nullbyte >> 4) * SPREAD_BITS_MULTIPLIER_32)) << 32;
    28cc:	c0 e8 04             	shr    al,0x4
		isnull_8 = (nullbyte & 0xf) * SPREAD_BITS_MULTIPLIER_32;
    28cf:	48 69 f6 81 40 20 00 	imul   rsi,rsi,0x204081
		isnull_8 |= ((uint64) ((nullbyte >> 4) * SPREAD_BITS_MULTIPLIER_32)) << 32;
    28d6:	83 e0 0f             	and    eax,0xf
    28d9:	48 69 c0 81 40 20 00 	imul   rax,rax,0x204081
    28e0:	48 c1 e0 20          	shl    rax,0x20
    28e4:	48 09 f0             	or     rax,rsi
		isnull_8 &= UINT64CONST(0x0101010101010101);
    28e7:	4c 21 f8             	and    rax,r15
    28ea:	48 89 04 f9          	mov    QWORD PTR [rcx+rdi*8],rax
	for (int i = 0; i < nbytes; i++, isnull += 8)
    28ee:	48 83 c7 01          	add    rdi,0x1
    28f2:	4c 39 ef             	cmp    rdi,r13
    28f5:	75 c9                	jne    28c0 <tts_minimal_getsomeattrs+0x620>
			firstNonCacheOffsetAttr = Min(firstNonCacheOffsetAttr, firstNullAttr);
    28f7:	45 39 f2             	cmp    r10d,r14d
    28fa:	4d 0f 4f d6          	cmovg  r10,r14
    28fe:	e9 1b fa ff ff       	jmp    231e <tts_minimal_getsomeattrs+0x7e>
    2903:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]
	for (; attnum < firstNullAttr; attnum++)
    2908:	49 89 c2             	mov    r10,rax
    290b:	e9 90 fb ff ff       	jmp    24a0 <tts_minimal_getsomeattrs+0x200>
	for (; attnum < natts; attnum++)
    2910:	4d 89 d1             	mov    r9,r10
    2913:	e9 05 fc ff ff       	jmp    251d <tts_minimal_getsomeattrs+0x27d>
    2918:	0f 1f 84 00 00 00 00 	nop    DWORD PTR [rax+rax*1+0x0]
    291f:	00 
		*offp = off;
    2920:	41 89 5b 68          	mov    DWORD PTR [r11+0x68],ebx
		slot_getmissingattrs(slot, attnum, reqnatts);
    2924:	44 89 e2             	mov    edx,r12d
}
    2927:	5b                   	pop    rbx
		slot_getmissingattrs(slot, attnum, reqnatts);
    2928:	44 89 ce             	mov    esi,r9d
}
    292b:	5d                   	pop    rbp
		slot_getmissingattrs(slot, attnum, reqnatts);
    292c:	4c 89 df             	mov    rdi,r11
}
    292f:	41 5c                	pop    r12
    2931:	41 5d                	pop    r13
    2933:	41 5e                	pop    r14
    2935:	41 5f                	pop    r15
		slot_getmissingattrs(slot, attnum, reqnatts);
    2937:	e9 b4 f1 ff ff       	jmp    1af0 <slot_getmissingattrs>
	res = bytenum << 3;
    293c:	42 8d 3c ed 00 00 00 	lea    edi,[r13*8+0x0]
    2943:	00 
	res += pg_rightmost_one_pos32(~((uint32) bits[bytenum]));
    2944:	4d 63 ed             	movsxd r13,r13d
    2947:	42 0f b6 44 2b 17    	movzx  eax,BYTE PTR [rbx+r13*1+0x17]
    294d:	e9 08 ff ff ff       	jmp    285a <tts_minimal_getsomeattrs+0x5ba>
    2952:	41 be 0a 00 00 00    	mov    r14d,0xa
    2958:	e9 24 fb ff ff       	jmp    2481 <tts_minimal_getsomeattrs+0x1e1>
    295d:	41 bd 0a 00 00 00    	mov    r13d,0xa
    2963:	e9 e8 fc ff ff       	jmp    2650 <tts_minimal_getsomeattrs+0x3b0>
    2968:	0f b6 43 17          	movzx  eax,BYTE PTR [rbx+0x17]
	int			nbytes = (natts + 7) >> 3;
    296c:	45 8d 69 07          	lea    r13d,[r9+0x7]
	res += pg_rightmost_one_pos32(~((uint32) bits[bytenum]));
    2970:	f7 d0                	not    eax
    2972:	f3 0f bc c0          	tzcnt  eax,eax
	res = Min(res, natts);
    2976:	41 39 c1             	cmp    r9d,eax
    2979:	41 0f 4e c1          	cmovle eax,r9d
	int			nbytes = (natts + 7) >> 3;
    297d:	41 c1 fd 03          	sar    r13d,0x3
	res = Min(res, natts);
    2981:	4c 63 f0             	movsxd r14,eax
	for (int i = 0; i < nbytes; i++, isnull += 8)
    2984:	41 83 fd 01          	cmp    r13d,0x1
    2988:	0f 85 69 ff ff ff    	jne    28f7 <tts_minimal_getsomeattrs+0x657>
    298e:	e9 e1 fe ff ff       	jmp    2874 <tts_minimal_getsomeattrs+0x5d4>
    2993:	66 90                	xchg   ax,ax
    2995:	66 66 2e 0f 1f 84 00 	data16 cs nop WORD PTR [rax+rax*1+0x0]
    299c:	00 00 00 00 

  [text/plain] v11-0001-Introduce-deform_bench-test-module.patch (7.3K, 3-v11-0001-Introduce-deform_bench-test-module.patch)
  download | inline diff:
From 46c83290a6ed1256cbefd9fa62de808424601d70 Mon Sep 17 00:00:00 2001
From: David Rowley <[email protected]>
Date: Tue, 27 Jan 2026 15:08:09 +1300
Subject: [PATCH v11 1/5] Introduce deform_bench test module

For benchmarking tuple deformation.
---
 src/test/modules/deform_bench/.gitignore      |   4 +
 src/test/modules/deform_bench/Makefile        |  21 ++++
 .../deform_bench/deform_bench--1.0.sql        |   8 ++
 src/test/modules/deform_bench/deform_bench.c  | 107 ++++++++++++++++++
 .../modules/deform_bench/deform_bench.control |   4 +
 src/test/modules/deform_bench/meson.build     |  22 ++++
 src/test/modules/meson.build                  |   1 +
 7 files changed, 167 insertions(+)
 create mode 100644 src/test/modules/deform_bench/.gitignore
 create mode 100644 src/test/modules/deform_bench/Makefile
 create mode 100644 src/test/modules/deform_bench/deform_bench--1.0.sql
 create mode 100644 src/test/modules/deform_bench/deform_bench.c
 create mode 100644 src/test/modules/deform_bench/deform_bench.control
 create mode 100644 src/test/modules/deform_bench/meson.build

diff --git a/src/test/modules/deform_bench/.gitignore b/src/test/modules/deform_bench/.gitignore
new file mode 100644
index 00000000000..5dcb3ff9723
--- /dev/null
+++ b/src/test/modules/deform_bench/.gitignore
@@ -0,0 +1,4 @@
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
diff --git a/src/test/modules/deform_bench/Makefile b/src/test/modules/deform_bench/Makefile
new file mode 100644
index 00000000000..b5fc0f7a583
--- /dev/null
+++ b/src/test/modules/deform_bench/Makefile
@@ -0,0 +1,21 @@
+# src/test/modules/deform_bench/Makefile
+
+MODULE_big = deform_bench
+OBJS = deform_bench.o
+
+EXTENSION = deform_bench
+DATA = deform_bench--1.0.sql
+PGFILEDESC = "deform_bench - tuple deform benchmarking"
+
+REGRESS = deform_bench
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/deform_bench
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/src/test/modules/deform_bench/deform_bench--1.0.sql b/src/test/modules/deform_bench/deform_bench--1.0.sql
new file mode 100644
index 00000000000..492b71dba3b
--- /dev/null
+++ b/src/test/modules/deform_bench/deform_bench--1.0.sql
@@ -0,0 +1,8 @@
+/* deform_bench--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION deform_bench" to load this file. \quit
+
+CREATE FUNCTION deform_bench(tableoid Oid, attnum int[]) RETURNS FLOAT
+AS 'MODULE_PATHNAME', 'deform_bench'
+LANGUAGE C VOLATILE STRICT;
diff --git a/src/test/modules/deform_bench/deform_bench.c b/src/test/modules/deform_bench/deform_bench.c
new file mode 100644
index 00000000000..7838f639bef
--- /dev/null
+++ b/src/test/modules/deform_bench/deform_bench.c
@@ -0,0 +1,107 @@
+/*-------------------------------------------------------------------------
+ *
+ * deform_bench.c
+ *
+ * for benchmarking tuple deformation routines
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <time.h>
+#include <sys/time.h>
+
+#include "access/heapam.h"
+#include "access/relscan.h"
+#include "catalog/pg_am_d.h"
+#include "catalog/pg_type_d.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "utils/array.h"
+#include "utils/arrayaccess.h"
+#include "utils/builtins.h"
+
+PG_MODULE_MAGIC;
+
+PG_FUNCTION_INFO_V1(deform_bench);
+
+Datum
+deform_bench(PG_FUNCTION_ARGS)
+{
+	Oid			tableoid = PG_GETARG_OID(0);
+	ArrayType  *array = PG_GETARG_ARRAYTYPE_P(1);
+	TableScanDesc scan;
+	Relation	rel;
+	TupleDesc	tupdesc;
+	TupleTableSlot *slot;
+	Datum	   *elem_datums = NULL;
+	bool	   *elem_nulls = NULL;
+	int			elem_count;
+	int		   *attnums;
+	clock_t		start,
+				end;
+
+	rel = relation_open(tableoid, AccessShareLock);
+
+	if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("only heap AM is supported")));
+
+	tupdesc = RelationGetDescr(rel);
+	slot = MakeTupleTableSlot(tupdesc, &TTSOpsBufferHeapTuple);
+	scan = table_beginscan_strat(rel, GetActiveSnapshot(), 0, NULL, true, false);
+
+	/*
+	 * The array is used to allow callers to define how many atts to deform.
+	 * e.g: '{1,10}'::int[] would deform attnum=1, then in a 2nd pass deform
+	 * the remainder up to attnum=10.  Passing an element as NULL means all
+	 * attnums.  This allows simulation of incremental deformation.  Generally
+	 * if you're passing an array with more than 1 element, then the array
+	 * should be in ascending order.  Doing something like '{10,1}' would mean
+	 * we've already deformed 10 attributes and on the 2nd pass there's
+	 * nothing to do since attnum=1 was already deformed in the first pass.
+	 *
+	 * You'll get an ERROR if you pass a number higher than the number of
+	 * attributes in the table.
+	 */
+	deconstruct_array(array,
+					  INT4OID,
+					  sizeof(int32),
+					  true,
+					  'i',
+					  &elem_datums,
+					  &elem_nulls,
+					  &elem_count);
+
+	attnums = palloc_array(int, elem_count);
+
+	for (int i = 0; i < elem_count; i++)
+	{
+		/* Make a NULL element mean all attributes */
+		if (elem_nulls[i])
+			attnums[i] = tupdesc->natts;
+		else
+			attnums[i] = DatumGetInt32(elem_datums[i]);
+	}
+
+	start = clock();
+
+	while (heap_getnextslot(scan, ForwardScanDirection, slot))
+	{
+		CHECK_FOR_INTERRUPTS();
+
+		/* Deform in stages according to the attnums array */
+		for (int i = 0; i < elem_count; i++)
+			slot_getsomeattrs(slot, attnums[i]);
+	}
+
+	end = clock();
+
+	ExecDropSingleTupleTableSlot(slot);
+	table_endscan(scan);
+	relation_close(rel, AccessShareLock);
+
+
+	/* Returns the number of milliseconds to run the test */
+	PG_RETURN_FLOAT8((double) (end - start) / (CLOCKS_PER_SEC / 1000));
+}
diff --git a/src/test/modules/deform_bench/deform_bench.control b/src/test/modules/deform_bench/deform_bench.control
new file mode 100644
index 00000000000..a2023f9d738
--- /dev/null
+++ b/src/test/modules/deform_bench/deform_bench.control
@@ -0,0 +1,4 @@
+# deform_bench extension
+comment = 'functions for benchmarking tuple deformation'
+default_version = '1.0'
+module_pathname = '$libdir/deform_bench'
diff --git a/src/test/modules/deform_bench/meson.build b/src/test/modules/deform_bench/meson.build
new file mode 100644
index 00000000000..82049585244
--- /dev/null
+++ b/src/test/modules/deform_bench/meson.build
@@ -0,0 +1,22 @@
+# Copyright (c) 2026, PostgreSQL Global Development Group
+
+deform_bench_sources = files(
+  'deform_bench.c',
+)
+
+if host_system == 'windows'
+  deform_bench_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+    '--NAME', 'deform_bench',
+    '--FILEDESC', 'deform_bench - benchmarking tuple deformation',])
+endif
+
+deform_bench = shared_module('deform_bench',
+  deform_bench_sources,
+  kwargs: pg_test_mod_args,
+)
+test_install_libs += deform_bench
+
+test_install_data += files(
+  'deform_bench--1.0.sql',
+  'deform_bench.control',
+)
diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build
index 2634a519935..ef2b0af4581 100644
--- a/src/test/modules/meson.build
+++ b/src/test/modules/meson.build
@@ -2,6 +2,7 @@
 
 subdir('brin')
 subdir('commit_ts')
+subdir('deform_bench')
 subdir('delay_execution')
 subdir('dummy_index_am')
 subdir('dummy_seclabel')
-- 
2.51.0



  [text/plain] v11-0002-Allow-sibling-call-optimization-in-slot_getsomea.patch (7.3K, 4-v11-0002-Allow-sibling-call-optimization-in-slot_getsomea.patch)
  download | inline diff:
From 5d372c316557406e319b26dcf381d896aecea226 Mon Sep 17 00:00:00 2001
From: David Rowley <[email protected]>
Date: Mon, 16 Feb 2026 14:20:19 +1300
Subject: [PATCH v11 2/5] Allow sibling call optimization in
 slot_getsomeattrs_int()

This changes the TupleTableSlotOps contract to make it so the
getsomeattrs() function is in charge of calling
slot_getmissingattrs().

Since this removes all code from slot_getsomeattrs_int() aside from the
getsomeattrs() call itself, we may as well adjust slot_getsomeattrs() so
that it calls getsomeattrs() directly.  We leave slot_getsomeattrs_int()
intact as this is still called from the JIT code.
---
 src/backend/executor/execTuples.c | 57 ++++++++++++++++---------------
 src/include/executor/tuptable.h   | 13 ++++---
 2 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c
index b768eae9e53..5b9bb21fa7b 100644
--- a/src/backend/executor/execTuples.c
+++ b/src/backend/executor/execTuples.c
@@ -73,7 +73,7 @@
 static TupleDesc ExecTypeFromTLInternal(List *targetList,
 										bool skipjunk);
 static pg_attribute_always_inline void slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
-															  int natts);
+															  int reqnatts);
 static inline void tts_buffer_heap_store_tuple(TupleTableSlot *slot,
 											   HeapTuple tuple,
 											   Buffer buffer,
@@ -1108,7 +1108,10 @@ slot_deform_heap_tuple_internal(TupleTableSlot *slot, HeapTuple tuple,
  * slot_deform_heap_tuple
  *		Given a TupleTableSlot, extract data from the slot's physical tuple
  *		into its Datum/isnull arrays.  Data is extracted up through the
- *		natts'th column (caller must ensure this is a legal column number).
+ *		reqnatts'th column.  If there are insufficient attributes in the given
+ *		tuple, then slot_getmissingattrs() is called to populate the
+ *		remainder.  If reqnatts is above the number of attributes in the
+ *		slot's TupleDesc, an error is raised.
  *
  *		This is essentially an incremental version of heap_deform_tuple:
  *		on each call we extract attributes up to the one needed, without
@@ -1120,7 +1123,7 @@ slot_deform_heap_tuple_internal(TupleTableSlot *slot, HeapTuple tuple,
  */
 static pg_attribute_always_inline void
 slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
-					   int natts)
+					   int reqnatts)
 {
 	bool		hasnulls = HeapTupleHasNulls(tuple);
 	int			attnum;
@@ -1128,13 +1131,14 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 	bool		slow;			/* can we use/set attcacheoff? */
 
 	/* We can only fetch as many attributes as the tuple has. */
-	natts = Min(HeapTupleHeaderGetNatts(tuple->t_data), natts);
+	natts = Min(HeapTupleHeaderGetNatts(tuple->t_data), reqnatts);
 
 	/*
 	 * Check whether the first call for this tuple, and initialize or restore
 	 * loop state.
 	 */
 	attnum = slot->tts_nvalid;
+	slot->tts_nvalid = reqnatts;
 	if (attnum == 0)
 	{
 		/* Start from the first attribute */
@@ -1199,12 +1203,15 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 	/*
 	 * Save state for next execution
 	 */
-	slot->tts_nvalid = attnum;
 	*offp = off;
 	if (slow)
 		slot->tts_flags |= TTS_FLAG_SLOW;
 	else
 		slot->tts_flags &= ~TTS_FLAG_SLOW;
+
+	/* Fetch any missing attrs and raise an error if reqnatts is invalid. */
+	if (unlikely(attnum < reqnatts))
+		slot_getmissingattrs(slot, attnum, reqnatts);
 }
 
 const TupleTableSlotOps TTSOpsVirtual = {
@@ -2058,34 +2065,36 @@ slot_getmissingattrs(TupleTableSlot *slot, int startAttNum, int lastAttNum)
 {
 	AttrMissing *attrmiss = NULL;
 
+	/* Check for invalid attnums */
+	if (unlikely(lastAttNum > slot->tts_tupleDescriptor->natts))
+		elog(ERROR, "invalid attribute number %d", lastAttNum);
+
 	if (slot->tts_tupleDescriptor->constr)
 		attrmiss = slot->tts_tupleDescriptor->constr->missing;
 
 	if (!attrmiss)
 	{
 		/* no missing values array at all, so just fill everything in as NULL */
-		memset(slot->tts_values + startAttNum, 0,
-			   (lastAttNum - startAttNum) * sizeof(Datum));
-		memset(slot->tts_isnull + startAttNum, 1,
-			   (lastAttNum - startAttNum) * sizeof(bool));
+		for (int attnum = startAttNum; attnum < lastAttNum; attnum++)
+		{
+			slot->tts_values[attnum] = (Datum) 0;
+			slot->tts_isnull[attnum] = true;
+		}
 	}
 	else
 	{
-		int			missattnum;
-
-		/* if there is a missing values array we must process them one by one */
-		for (missattnum = startAttNum;
-			 missattnum < lastAttNum;
-			 missattnum++)
+		/* use attrmiss to set the missing values */
+		for (int attnum = startAttNum; attnum < lastAttNum; attnum++)
 		{
-			slot->tts_values[missattnum] = attrmiss[missattnum].am_value;
-			slot->tts_isnull[missattnum] = !attrmiss[missattnum].am_present;
+			slot->tts_values[attnum] = attrmiss[attnum].am_value;
+			slot->tts_isnull[attnum] = !attrmiss[attnum].am_present;
 		}
 	}
 }
 
 /*
- * slot_getsomeattrs_int - workhorse for slot_getsomeattrs()
+ * slot_getsomeattrs_int
+ *		external function to call getsomeattrs() for use in JIT
  */
 void
 slot_getsomeattrs_int(TupleTableSlot *slot, int attnum)
@@ -2094,21 +2103,13 @@ slot_getsomeattrs_int(TupleTableSlot *slot, int attnum)
 	Assert(slot->tts_nvalid < attnum);	/* checked in slot_getsomeattrs */
 	Assert(attnum > 0);
 
-	if (unlikely(attnum > slot->tts_tupleDescriptor->natts))
-		elog(ERROR, "invalid attribute number %d", attnum);
-
 	/* Fetch as many attributes as possible from the underlying tuple. */
 	slot->tts_ops->getsomeattrs(slot, attnum);
 
 	/*
-	 * If the underlying tuple doesn't have enough attributes, tuple
-	 * descriptor must have the missing attributes.
+	 * Avoid putting new code here as that would prevent the compiler from
+	 * using the sibling call optimization for the above function.
 	 */
-	if (unlikely(slot->tts_nvalid < attnum))
-	{
-		slot_getmissingattrs(slot, slot->tts_nvalid, attnum);
-		slot->tts_nvalid = attnum;
-	}
 }
 
 /* ----------------------------------------------------------------
diff --git a/src/include/executor/tuptable.h b/src/include/executor/tuptable.h
index a2dfd707e78..3b09abbf99f 100644
--- a/src/include/executor/tuptable.h
+++ b/src/include/executor/tuptable.h
@@ -151,10 +151,12 @@ struct TupleTableSlotOps
 
 	/*
 	 * Fill up first natts entries of tts_values and tts_isnull arrays with
-	 * values from the tuple contained in the slot. The function may be called
-	 * with natts more than the number of attributes available in the tuple,
-	 * in which case it should set tts_nvalid to the number of returned
-	 * columns.
+	 * values from the tuple contained in the slot and set the slot's
+	 * tts_nvalid to natts. The function may be called with an natts value
+	 * more than the number of attributes available in the tuple, in which
+	 * case the function must call slot_getmissingattrs() to populate the
+	 * remaining attributes.  The function must raise an ERROR if 'natts' is
+	 * higher than the number of attributes in the slot's TupleDesc.
 	 */
 	void		(*getsomeattrs) (TupleTableSlot *slot, int natts);
 
@@ -357,8 +359,9 @@ extern void slot_getsomeattrs_int(TupleTableSlot *slot, int attnum);
 static inline void
 slot_getsomeattrs(TupleTableSlot *slot, int attnum)
 {
+	/* Populate slot with attributes up to 'attnum', if it's not already */
 	if (slot->tts_nvalid < attnum)
-		slot_getsomeattrs_int(slot, attnum);
+		slot->tts_ops->getsomeattrs(slot, attnum);
 }
 
 /*
-- 
2.51.0



  [text/plain] v11-0005-Reduce-size-of-CompactAttribute-struct-to-8-byte.patch (5.5K, 5-v11-0005-Reduce-size-of-CompactAttribute-struct-to-8-byte.patch)
  download | inline diff:
From 099a6186e1886432ed24653178ab1ce9113900c9 Mon Sep 17 00:00:00 2001
From: David Rowley <[email protected]>
Date: Mon, 23 Feb 2026 09:39:37 +1300
Subject: [PATCH v11 5/5] Reduce size of CompactAttribute struct to 8 bytes

Previously, this was 16 bytes.  With the use of some bitflags and by
reducing the attcacheoff field size to a 16-bit type, we can halve the
size of the struct.

It's unlikely that caching the offsets for offsets larger than what will
fit in a 16-bit int will help much as the tuple is very likely to have
some non-fixed-width types anyway, the offsets of which we cannot cache.
---
 src/backend/access/common/tupdesc.c | 10 ++++++++++
 src/backend/executor/execTuples.c   | 16 ++++++++++++----
 src/include/access/tupdesc.h        | 16 ++++++++--------
 3 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c
index c68561337d7..71461ba6096 100644
--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -530,6 +530,16 @@ TupleDescFinalize(TupleDesc tupdesc)
 
 		off = att_nominal_alignby(off, cattr->attalignby);
 
+		/*
+		 * attcacheoff is an int16, so don't try to cache any offsets larger
+		 * than will fit in that type.  Any attributes which are offset more
+		 * than 2^15 are likely due to variable-length attributes.  Since we
+		 * don't cache offsets for or beyond variable-length attributes, using
+		 * an int16 rather than an int32 here is unlikely to cost us anything.
+		 */
+		if (off > PG_INT16_MAX)
+			break;
+
 		cattr->attcacheoff = off;
 
 		off += cattr->attlen;
diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c
index 83a8c02894d..345b22ca932 100644
--- a/src/backend/executor/execTuples.c
+++ b/src/backend/executor/execTuples.c
@@ -1013,6 +1013,7 @@ static pg_attribute_always_inline void
 slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 					   int reqnatts)
 {
+	CompactAttribute *cattrs;
 	CompactAttribute *cattr;
 	TupleDesc	tupleDesc = slot->tts_tupleDescriptor;
 	HeapTupleHeader tup = tuple->t_data;
@@ -1101,6 +1102,13 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 	values = slot->tts_values;
 	slot->tts_nvalid = reqnatts;
 
+	/*
+	 * We store the tupleDesc's CompactAttribute array in 'cattrs' as gcc
+	 * seems to be unwilling to optimize accessing the CompactAttribute
+	 * element efficiently when accessing it via TupleDescCompactAttr().
+	 */
+	cattrs = tupleDesc->compact_attrs;
+
 	/* Ensure we calculated tp correctly */
 	Assert(tp == (char *) tup + tup->t_hoff);
 
@@ -1111,7 +1119,7 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 			int			attlen;
 
 			isnull[attnum] = false;
-			cattr = TupleDescCompactAttr(tupleDesc, attnum);
+			cattr = &cattrs[attnum];
 			attlen = cattr->attlen;
 
 			/* We don't expect any non-byval types */
@@ -1156,7 +1164,7 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 		do
 		{
 			isnull[attnum] = false;
-			cattr = TupleDescCompactAttr(tupleDesc, attnum);
+			cattr = &cattrs[attnum];
 
 			off = cattr->attcacheoff;
 			values[attnum] = fetch_att_noerr(tp + off,
@@ -1183,7 +1191,7 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 		int			attlen;
 
 		isnull[attnum] = false;
-		cattr = TupleDescCompactAttr(tupleDesc, attnum);
+		cattr = &cattrs[attnum];
 		attlen = cattr->attlen;
 
 		/*
@@ -1216,7 +1224,7 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 			continue;
 		}
 
-		cattr = TupleDescCompactAttr(tupleDesc, attnum);
+		cattr = &cattrs[attnum];
 		attlen = cattr->attlen;
 
 		/* As above, we don't expect cstrings */
diff --git a/src/include/access/tupdesc.h b/src/include/access/tupdesc.h
index ad7bc013812..e98036b58bf 100644
--- a/src/include/access/tupdesc.h
+++ b/src/include/access/tupdesc.h
@@ -55,7 +55,7 @@ typedef struct TupleConstr
  *		directly after the FormData_pg_attribute struct is populated or
  *		altered in any way.
  *
- * Currently, this struct is 16 bytes.  Any code changes which enlarge this
+ * Currently, this struct is 8 bytes.  Any code changes which enlarge this
  * struct should be considered very carefully.
  *
  * Code which must access a TupleDesc's attribute data should always make use
@@ -67,17 +67,17 @@ typedef struct TupleConstr
  */
 typedef struct CompactAttribute
 {
-	int32		attcacheoff;	/* fixed offset into tuple, if known, or -1 */
+	int16		attcacheoff;	/* fixed offset into tuple, if known, or -1 */
 	int16		attlen;			/* attr len in bytes or -1 = varlen, -2 =
 								 * cstring */
 	bool		attbyval;		/* as FormData_pg_attribute.attbyval */
-	bool		attispackable;	/* FormData_pg_attribute.attstorage !=
-								 * TYPSTORAGE_PLAIN */
-	bool		atthasmissing;	/* as FormData_pg_attribute.atthasmissing */
-	bool		attisdropped;	/* as FormData_pg_attribute.attisdropped */
-	bool		attgenerated;	/* FormData_pg_attribute.attgenerated != '\0' */
-	char		attnullability; /* status of not-null constraint, see below */
 	uint8		attalignby;		/* alignment requirement in bytes */
+	bool		attispackable:1;	/* FormData_pg_attribute.attstorage !=
+									 * TYPSTORAGE_PLAIN */
+	bool		atthasmissing:1;	/* as FormData_pg_attribute.atthasmissing */
+	bool		attisdropped:1; /* as FormData_pg_attribute.attisdropped */
+	bool		attgenerated:1; /* FormData_pg_attribute.attgenerated != '\0' */
+	char		attnullability; /* status of not-null constraint, see below */
 } CompactAttribute;
 
 /* Valid values for CompactAttribute->attnullability */
-- 
2.51.0



  [text/plain] v11-0003-Add-empty-TupleDescFinalize-function.patch (29.0K, 6-v11-0003-Add-empty-TupleDescFinalize-function.patch)
  download | inline diff:
From 3fa14f2411303b5433dd2e3434c840a77395e213 Mon Sep 17 00:00:00 2001
From: David Rowley <[email protected]>
Date: Wed, 21 Jan 2026 15:41:37 +1300
Subject: [PATCH v11 3/5] Add empty TupleDescFinalize() function

Currently does nothing, but will in a future commit.
---
 contrib/dblink/dblink.c                             |  4 ++++
 contrib/pg_buffercache/pg_buffercache_pages.c       |  2 ++
 contrib/pg_visibility/pg_visibility.c               |  2 ++
 src/backend/access/brin/brin_tuple.c                |  1 +
 src/backend/access/common/tupdesc.c                 | 13 +++++++++++++
 src/backend/access/gin/ginutil.c                    |  1 +
 src/backend/access/gist/gistscan.c                  |  1 +
 src/backend/access/spgist/spgutils.c                |  1 +
 src/backend/access/transam/twophase.c               |  1 +
 src/backend/access/transam/xlogfuncs.c              |  1 +
 src/backend/backup/basebackup_copy.c                |  3 +++
 src/backend/catalog/index.c                         |  2 ++
 src/backend/catalog/pg_publication.c                |  1 +
 src/backend/catalog/toasting.c                      |  6 ++++++
 src/backend/commands/explain.c                      |  1 +
 src/backend/commands/functioncmds.c                 |  1 +
 src/backend/commands/sequence.c                     |  1 +
 src/backend/commands/tablecmds.c                    |  4 ++++
 src/backend/commands/wait.c                         |  1 +
 src/backend/executor/execSRF.c                      |  2 ++
 src/backend/executor/execTuples.c                   |  4 ++++
 src/backend/executor/nodeFunctionscan.c             |  2 ++
 src/backend/parser/parse_relation.c                 |  4 +++-
 src/backend/parser/parse_target.c                   |  2 ++
 .../replication/libpqwalreceiver/libpqwalreceiver.c |  1 +
 src/backend/replication/walsender.c                 |  5 +++++
 src/backend/utils/adt/acl.c                         |  1 +
 src/backend/utils/adt/genfile.c                     |  1 +
 src/backend/utils/adt/lockfuncs.c                   |  1 +
 src/backend/utils/adt/orderedsetaggs.c              |  1 +
 src/backend/utils/adt/pgstatfuncs.c                 |  5 +++++
 src/backend/utils/adt/tsvector_op.c                 |  1 +
 src/backend/utils/cache/relcache.c                  |  8 ++++++++
 src/backend/utils/fmgr/funcapi.c                    |  6 ++++++
 src/backend/utils/misc/guc_funcs.c                  |  5 +++++
 src/include/access/tupdesc.h                        |  1 +
 src/pl/plpgsql/src/pl_comp.c                        |  2 ++
 .../test_custom_stats/test_custom_fixed_stats.c     |  1 +
 src/test/modules/test_predtest/test_predtest.c      |  1 +
 39 files changed, 100 insertions(+), 1 deletion(-)

diff --git a/contrib/dblink/dblink.c b/contrib/dblink/dblink.c
index 2498d80c8e7..4038950a6ef 100644
--- a/contrib/dblink/dblink.c
+++ b/contrib/dblink/dblink.c
@@ -881,6 +881,7 @@ materializeResult(FunctionCallInfo fcinfo, PGconn *conn, PGresult *res)
 		tupdesc = CreateTemplateTupleDesc(1);
 		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "status",
 						   TEXTOID, -1, 0);
+		TupleDescFinalize(tupdesc);
 		ntuples = 1;
 		nfields = 1;
 	}
@@ -1044,6 +1045,7 @@ materializeQueryResult(FunctionCallInfo fcinfo,
 			tupdesc = CreateTemplateTupleDesc(1);
 			TupleDescInitEntry(tupdesc, (AttrNumber) 1, "status",
 							   TEXTOID, -1, 0);
+			TupleDescFinalize(tupdesc);
 			attinmeta = TupleDescGetAttInMetadata(tupdesc);
 
 			oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
@@ -1529,6 +1531,8 @@ dblink_get_pkey(PG_FUNCTION_ARGS)
 		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "colname",
 						   TEXTOID, -1, 0);
 
+		TupleDescFinalize(tupdesc);
+
 		/*
 		 * Generate attribute metadata needed later to produce tuples from raw
 		 * C strings
diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c
index 89b86855243..a6b4fb5252b 100644
--- a/contrib/pg_buffercache/pg_buffercache_pages.c
+++ b/contrib/pg_buffercache/pg_buffercache_pages.c
@@ -174,6 +174,7 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
 			TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends",
 							   INT4OID, -1, 0);
 
+		TupleDescFinalize(tupledesc);
 		fctx->tupdesc = BlessTupleDesc(tupledesc);
 
 		/* Allocate NBuffers worth of BufferCachePagesRec records. */
@@ -442,6 +443,7 @@ pg_buffercache_os_pages_internal(FunctionCallInfo fcinfo, bool include_numa)
 		TupleDescInitEntry(tupledesc, (AttrNumber) 3, "numa_node",
 						   INT4OID, -1, 0);
 
+		TupleDescFinalize(tupledesc);
 		fctx->tupdesc = BlessTupleDesc(tupledesc);
 		fctx->include_numa = include_numa;
 
diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c
index 9bc3a784bf7..dfab0b64cf5 100644
--- a/contrib/pg_visibility/pg_visibility.c
+++ b/contrib/pg_visibility/pg_visibility.c
@@ -469,6 +469,8 @@ pg_visibility_tupdesc(bool include_blkno, bool include_pd)
 		TupleDescInitEntry(tupdesc, ++a, "pd_all_visible", BOOLOID, -1, 0);
 	Assert(a == maxattr);
 
+	TupleDescFinalize(tupdesc);
+
 	return BlessTupleDesc(tupdesc);
 }
 
diff --git a/src/backend/access/brin/brin_tuple.c b/src/backend/access/brin/brin_tuple.c
index 69c233c62eb..742ac089a28 100644
--- a/src/backend/access/brin/brin_tuple.c
+++ b/src/backend/access/brin/brin_tuple.c
@@ -84,6 +84,7 @@ brtuple_disk_tupdesc(BrinDesc *brdesc)
 
 		MemoryContextSwitchTo(oldcxt);
 
+		TupleDescFinalize(tupdesc);
 		brdesc->bd_disktdesc = tupdesc;
 	}
 
diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c
index b69d10f0a45..2137385a833 100644
--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -221,6 +221,9 @@ CreateTupleDesc(int natts, Form_pg_attribute *attrs)
 		memcpy(TupleDescAttr(desc, i), attrs[i], ATTRIBUTE_FIXED_PART_SIZE);
 		populate_compact_attribute(desc, i);
 	}
+
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
@@ -265,6 +268,8 @@ CreateTupleDescCopy(TupleDesc tupdesc)
 	desc->tdtypeid = tupdesc->tdtypeid;
 	desc->tdtypmod = tupdesc->tdtypmod;
 
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
@@ -311,6 +316,8 @@ CreateTupleDescTruncatedCopy(TupleDesc tupdesc, int natts)
 	desc->tdtypeid = tupdesc->tdtypeid;
 	desc->tdtypmod = tupdesc->tdtypmod;
 
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
@@ -396,6 +403,8 @@ CreateTupleDescCopyConstr(TupleDesc tupdesc)
 	desc->tdtypeid = tupdesc->tdtypeid;
 	desc->tdtypmod = tupdesc->tdtypmod;
 
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
@@ -438,6 +447,8 @@ TupleDescCopy(TupleDesc dst, TupleDesc src)
 	 * source's refcount would be wrong in any case.)
 	 */
 	dst->tdrefcount = -1;
+
+	TupleDescFinalize(dst);
 }
 
 /*
@@ -1065,6 +1076,8 @@ BuildDescFromLists(const List *names, const List *types, const List *typmods, co
 		TupleDescInitEntryCollation(desc, attnum, attcollation);
 	}
 
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index ff927279cc3..fe7b984ff32 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -129,6 +129,7 @@ initGinState(GinState *state, Relation index)
 							   attr->attndims);
 			TupleDescInitEntryCollation(state->tupdesc[i], (AttrNumber) 2,
 										attr->attcollation);
+			TupleDescFinalize(state->tupdesc[i]);
 		}
 
 		/*
diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c
index f23bc4a6757..c65f93abdae 100644
--- a/src/backend/access/gist/gistscan.c
+++ b/src/backend/access/gist/gistscan.c
@@ -201,6 +201,7 @@ gistrescan(IndexScanDesc scan, ScanKey key, int nkeys,
 											 attno - 1)->atttypid,
 							   -1, 0);
 		}
+		TupleDescFinalize(so->giststate->fetchTupdesc);
 		scan->xs_hitupdesc = so->giststate->fetchTupdesc;
 
 		/* Also create a memory context that will hold the returned tuples */
diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c
index 9f5379b87ac..b246e8127db 100644
--- a/src/backend/access/spgist/spgutils.c
+++ b/src/backend/access/spgist/spgutils.c
@@ -340,6 +340,7 @@ getSpGistTupleDesc(Relation index, SpGistTypeDesc *keyType)
 			TupleDescCompactAttr(outTupDesc, i)->attcacheoff = -1;
 
 		populate_compact_attribute(outTupDesc, spgKeyColumn);
+		TupleDescFinalize(outTupDesc);
 	}
 	return outTupDesc;
 }
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index e4340b59640..7f4ed02a6b9 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -744,6 +744,7 @@ pg_prepared_xact(PG_FUNCTION_ARGS)
 		TupleDescInitEntry(tupdesc, (AttrNumber) 5, "dbid",
 						   OIDOID, -1, 0);
 
+		TupleDescFinalize(tupdesc);
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
 
 		/*
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 2efe4105efb..b6bc616c74c 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -400,6 +400,7 @@ pg_walfile_name_offset(PG_FUNCTION_ARGS)
 	TupleDescInitEntry(resultTupleDesc, (AttrNumber) 2, "file_offset",
 					   INT4OID, -1, 0);
 
+	TupleDescFinalize(resultTupleDesc);
 	resultTupleDesc = BlessTupleDesc(resultTupleDesc);
 
 	/*
diff --git a/src/backend/backup/basebackup_copy.c b/src/backend/backup/basebackup_copy.c
index 07f58b39d8c..6c3453efd80 100644
--- a/src/backend/backup/basebackup_copy.c
+++ b/src/backend/backup/basebackup_copy.c
@@ -357,6 +357,8 @@ SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli)
 	 */
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 2, "tli", INT8OID, -1, 0);
 
+	TupleDescFinalize(tupdesc);
+
 	/* send RowDescription */
 	tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
 
@@ -388,6 +390,7 @@ SendTablespaceList(List *tablespaces)
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 1, "spcoid", OIDOID, -1, 0);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 2, "spclocation", TEXTOID, -1, 0);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 3, "size", INT8OID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	/* send RowDescription */
 	tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 43de42ce39e..75e97fb394a 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -481,6 +481,8 @@ ConstructTupleDescriptor(Relation heapRelation,
 		populate_compact_attribute(indexTupDesc, i);
 	}
 
+	TupleDescFinalize(indexTupDesc);
+
 	return indexTupDesc;
 }
 
diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c
index 9a4791c573e..fa353a0dd37 100644
--- a/src/backend/catalog/pg_publication.c
+++ b/src/backend/catalog/pg_publication.c
@@ -1230,6 +1230,7 @@ pg_get_publication_tables(PG_FUNCTION_ARGS)
 		TupleDescInitEntry(tupdesc, (AttrNumber) 4, "qual",
 						   PG_NODE_TREEOID, -1, 0);
 
+		TupleDescFinalize(tupdesc);
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
 		funcctx->user_fctx = table_infos;
 
diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c
index c78dcea98c1..078a1cf5127 100644
--- a/src/backend/catalog/toasting.c
+++ b/src/backend/catalog/toasting.c
@@ -229,6 +229,12 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
 	TupleDescAttr(tupdesc, 1)->attcompression = InvalidCompressionMethod;
 	TupleDescAttr(tupdesc, 2)->attcompression = InvalidCompressionMethod;
 
+	populate_compact_attribute(tupdesc, 0);
+	populate_compact_attribute(tupdesc, 1);
+	populate_compact_attribute(tupdesc, 2);
+
+	TupleDescFinalize(tupdesc);
+
 	/*
 	 * Toast tables for regular relations go in pg_toast; those for temp
 	 * relations go into the per-backend temp-toast-table namespace.
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 93918a223b8..5f922c3f5c2 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -281,6 +281,7 @@ ExplainResultDesc(ExplainStmt *stmt)
 	tupdesc = CreateTemplateTupleDesc(1);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "QUERY PLAN",
 					   result_type, -1, 0);
+	TupleDescFinalize(tupdesc);
 	return tupdesc;
 }
 
diff --git a/src/backend/commands/functioncmds.c b/src/backend/commands/functioncmds.c
index 242372b1e68..3afd762e9dc 100644
--- a/src/backend/commands/functioncmds.c
+++ b/src/backend/commands/functioncmds.c
@@ -2424,6 +2424,7 @@ CallStmtResultDesc(CallStmt *stmt)
 							   -1,
 							   0);
 		}
+		TupleDescFinalize(tupdesc);
 	}
 
 	return tupdesc;
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index e1b808bbb60..551667650ba 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -1808,6 +1808,7 @@ pg_get_sequence_data(PG_FUNCTION_ARGS)
 					   BOOLOID, -1, 0);
 	TupleDescInitEntry(resultTupleDesc, (AttrNumber) 3, "page_lsn",
 					   LSNOID, -1, 0);
+	TupleDescFinalize(resultTupleDesc);
 	resultTupleDesc = BlessTupleDesc(resultTupleDesc);
 
 	seqrel = try_relation_open(relid, AccessShareLock);
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index b04b0dbd2a0..8678cecd53f 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -1030,6 +1030,8 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
 		}
 	}
 
+	TupleDescFinalize(descriptor);
+
 	/*
 	 * For relations with table AM and partitioned tables, select access
 	 * method to use: an explicitly indicated one, or (in the case of a
@@ -1458,6 +1460,8 @@ BuildDescForRelation(const List *columns)
 		populate_compact_attribute(desc, attnum - 1);
 	}
 
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
diff --git a/src/backend/commands/wait.c b/src/backend/commands/wait.c
index 1290df10c6f..8e920a72372 100644
--- a/src/backend/commands/wait.c
+++ b/src/backend/commands/wait.c
@@ -338,5 +338,6 @@ WaitStmtResultDesc(WaitStmt *stmt)
 	tupdesc = CreateTemplateTupleDesc(1);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "status",
 					   TEXTOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 	return tupdesc;
 }
diff --git a/src/backend/executor/execSRF.c b/src/backend/executor/execSRF.c
index a0b111dc0e4..b481e50acfb 100644
--- a/src/backend/executor/execSRF.c
+++ b/src/backend/executor/execSRF.c
@@ -272,6 +272,7 @@ ExecMakeTableFunctionResult(SetExprState *setexpr,
 									   funcrettype,
 									   -1,
 									   0);
+					TupleDescFinalize(tupdesc);
 					rsinfo.setDesc = tupdesc;
 				}
 				MemoryContextSwitchTo(oldcontext);
@@ -776,6 +777,7 @@ init_sexpr(Oid foid, Oid input_collation, Expr *node,
 							   funcrettype,
 							   -1,
 							   0);
+			TupleDescFinalize(tupdesc);
 			sexpr->funcResultDesc = tupdesc;
 			sexpr->funcReturnsTuple = false;
 		}
diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c
index 5b9bb21fa7b..bb997182481 100644
--- a/src/backend/executor/execTuples.c
+++ b/src/backend/executor/execTuples.c
@@ -2174,6 +2174,8 @@ ExecTypeFromTLInternal(List *targetList, bool skipjunk)
 		cur_resno++;
 	}
 
+	TupleDescFinalize(typeInfo);
+
 	return typeInfo;
 }
 
@@ -2208,6 +2210,8 @@ ExecTypeFromExprList(List *exprList)
 		cur_resno++;
 	}
 
+	TupleDescFinalize(typeInfo);
+
 	return typeInfo;
 }
 
diff --git a/src/backend/executor/nodeFunctionscan.c b/src/backend/executor/nodeFunctionscan.c
index 63e605e1f81..feb82d64967 100644
--- a/src/backend/executor/nodeFunctionscan.c
+++ b/src/backend/executor/nodeFunctionscan.c
@@ -414,6 +414,7 @@ ExecInitFunctionScan(FunctionScan *node, EState *estate, int eflags)
 				TupleDescInitEntryCollation(tupdesc,
 											(AttrNumber) 1,
 											exprCollation(funcexpr));
+				TupleDescFinalize(tupdesc);
 			}
 			else
 			{
@@ -485,6 +486,7 @@ ExecInitFunctionScan(FunctionScan *node, EState *estate, int eflags)
 							   0);
 		}
 
+		TupleDescFinalize(scan_tupdesc);
 		Assert(attno == natts);
 	}
 
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index e003db520de..9c415e166ee 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -1883,6 +1883,7 @@ addRangeTableEntryForFunction(ParseState *pstate,
 			TupleDescInitEntryCollation(tupdesc,
 										(AttrNumber) 1,
 										exprCollation(funcexpr));
+			TupleDescFinalize(tupdesc);
 		}
 		else if (functypclass == TYPEFUNC_RECORD)
 		{
@@ -1940,6 +1941,7 @@ addRangeTableEntryForFunction(ParseState *pstate,
 
 				i++;
 			}
+			TupleDescFinalize(tupdesc);
 
 			/*
 			 * Ensure that the coldeflist defines a legal set of names (no
@@ -2008,7 +2010,7 @@ addRangeTableEntryForFunction(ParseState *pstate,
 							   0);
 			/* no need to set collation */
 		}
-
+		TupleDescFinalize(tupdesc);
 		Assert(natts == totalatts);
 	}
 	else
diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c
index dbf5b2b5c01..a03d82c0540 100644
--- a/src/backend/parser/parse_target.c
+++ b/src/backend/parser/parse_target.c
@@ -1572,6 +1572,8 @@ expandRecordVariable(ParseState *pstate, Var *var, int levelsup)
 		}
 		Assert(lname == NULL && lvar == NULL);	/* lists same length? */
 
+		TupleDescFinalize(tupleDesc);
+
 		return tupleDesc;
 	}
 
diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
index 7c8639b32e9..9f04c9ed25d 100644
--- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
+++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
@@ -1073,6 +1073,7 @@ libpqrcv_processTuples(PGresult *pgres, WalRcvExecResult *walres,
 	for (coln = 0; coln < nRetTypes; coln++)
 		TupleDescInitEntry(walres->tupledesc, (AttrNumber) coln + 1,
 						   PQfname(pgres, coln), retTypes[coln], -1, 0);
+	TupleDescFinalize(walres->tupledesc);
 	attinmeta = TupleDescGetAttInMetadata(walres->tupledesc);
 
 	/* No point in doing more here if there were no tuples returned. */
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 2cde8ebc729..33a9e8d7f21 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -451,6 +451,7 @@ IdentifySystem(void)
 							  TEXTOID, -1, 0);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 4, "dbname",
 							  TEXTOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	/* prepare for projection of tuples */
 	tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
@@ -496,6 +497,7 @@ ReadReplicationSlot(ReadReplicationSlotCmd *cmd)
 	/* TimeLineID is unsigned, so int4 is not wide enough. */
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 3, "restart_tli",
 							  INT8OID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	memset(nulls, true, READ_REPLICATION_SLOT_COLS * sizeof(bool));
 
@@ -598,6 +600,7 @@ SendTimeLineHistory(TimeLineHistoryCmd *cmd)
 	tupdesc = CreateTemplateTupleDesc(2);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 1, "filename", TEXTOID, -1, 0);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 2, "content", TEXTOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	TLHistoryFileName(histfname, cmd->timeline);
 	TLHistoryFilePath(path, cmd->timeline);
@@ -1015,6 +1018,7 @@ StartReplication(StartReplicationCmd *cmd)
 								  INT8OID, -1, 0);
 		TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 2, "next_tli_startpos",
 								  TEXTOID, -1, 0);
+		TupleDescFinalize(tupdesc);
 
 		/* prepare for projection of tuple */
 		tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
@@ -1369,6 +1373,7 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd)
 							  TEXTOID, -1, 0);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 4, "output_plugin",
 							  TEXTOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	/* prepare for projection of tuples */
 	tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
diff --git a/src/backend/utils/adt/acl.c b/src/backend/utils/adt/acl.c
index 641673f0b0e..ce07f2bc046 100644
--- a/src/backend/utils/adt/acl.c
+++ b/src/backend/utils/adt/acl.c
@@ -1819,6 +1819,7 @@ aclexplode(PG_FUNCTION_ARGS)
 		TupleDescInitEntry(tupdesc, (AttrNumber) 4, "is_grantable",
 						   BOOLOID, -1, 0);
 
+		TupleDescFinalize(tupdesc);
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
 
 		/* allocate memory for user context */
diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c
index c083608b1d5..bfb949401d0 100644
--- a/src/backend/utils/adt/genfile.c
+++ b/src/backend/utils/adt/genfile.c
@@ -454,6 +454,7 @@ pg_stat_file(PG_FUNCTION_ARGS)
 					   "creation", TIMESTAMPTZOID, -1, 0);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 6,
 					   "isdir", BOOLOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 	BlessTupleDesc(tupdesc);
 
 	memset(isnull, false, sizeof(isnull));
diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c
index 9dadd6da672..4481c354fd6 100644
--- a/src/backend/utils/adt/lockfuncs.c
+++ b/src/backend/utils/adt/lockfuncs.c
@@ -146,6 +146,7 @@ pg_lock_status(PG_FUNCTION_ARGS)
 		TupleDescInitEntry(tupdesc, (AttrNumber) 16, "waitstart",
 						   TIMESTAMPTZOID, -1, 0);
 
+		TupleDescFinalize(tupdesc);
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
 
 		/*
diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c
index 3b6da8e36ac..fd8b8676470 100644
--- a/src/backend/utils/adt/orderedsetaggs.c
+++ b/src/backend/utils/adt/orderedsetaggs.c
@@ -233,6 +233,7 @@ ordered_set_startup(FunctionCallInfo fcinfo, bool use_tuples)
 								   -1,
 								   0);
 
+				TupleDescFinalize(newdesc);
 				FreeTupleDesc(qstate->tupdesc);
 				qstate->tupdesc = newdesc;
 			}
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index b1df96e7b0b..0b10da3b180 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -769,6 +769,7 @@ pg_stat_get_backend_subxact(PG_FUNCTION_ARGS)
 	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "subxact_overflow",
 					   BOOLOID, -1, 0);
 
+	TupleDescFinalize(tupdesc);
 	BlessTupleDesc(tupdesc);
 
 	if ((local_beentry = pgstat_get_local_beentry_by_proc_number(procNumber)) != NULL)
@@ -1670,6 +1671,7 @@ pg_stat_wal_build_tuple(PgStat_WalCounters wal_counters,
 	TupleDescInitEntry(tupdesc, (AttrNumber) 6, "stats_reset",
 					   TIMESTAMPTZOID, -1, 0);
 
+	TupleDescFinalize(tupdesc);
 	BlessTupleDesc(tupdesc);
 
 	/* Fill values and NULLs */
@@ -2097,6 +2099,7 @@ pg_stat_get_archiver(PG_FUNCTION_ARGS)
 	TupleDescInitEntry(tupdesc, (AttrNumber) 7, "stats_reset",
 					   TIMESTAMPTZOID, -1, 0);
 
+	TupleDescFinalize(tupdesc);
 	BlessTupleDesc(tupdesc);
 
 	/* Get statistics about the archiver process */
@@ -2178,6 +2181,7 @@ pg_stat_get_replication_slot(PG_FUNCTION_ARGS)
 					   TIMESTAMPTZOID, -1, 0);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 13, "stats_reset",
 					   TIMESTAMPTZOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 	BlessTupleDesc(tupdesc);
 
 	namestrcpy(&slotname, text_to_cstring(slotname_text));
@@ -2265,6 +2269,7 @@ pg_stat_get_subscription_stats(PG_FUNCTION_ARGS)
 					   INT8OID, -1, 0);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 13, "stats_reset",
 					   TIMESTAMPTZOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 	BlessTupleDesc(tupdesc);
 
 	if (!subentry)
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 71c7c7d3b3c..d8dece42b9b 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -651,6 +651,7 @@ tsvector_unnest(PG_FUNCTION_ARGS)
 						   TEXTARRAYOID, -1, 0);
 		if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
 			elog(ERROR, "return type must be a row type");
+		TupleDescFinalize(tupdesc);
 		funcctx->tuple_desc = tupdesc;
 
 		funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 6b634c9fff1..770edb34e08 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -729,6 +729,8 @@ RelationBuildTupleDesc(Relation relation)
 		pfree(constr);
 		relation->rd_att->constr = NULL;
 	}
+
+	TupleDescFinalize(relation->rd_att);
 }
 
 /*
@@ -1985,6 +1987,7 @@ formrdesc(const char *relationName, Oid relationReltype,
 
 	/* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
 	TupleDescCompactAttr(relation->rd_att, 0)->attcacheoff = 0;
+	TupleDescFinalize(relation->rd_att);
 
 	/* mark not-null status */
 	if (has_not_null)
@@ -3688,6 +3691,8 @@ RelationBuildLocalRelation(const char *relname,
 	for (i = 0; i < natts; i++)
 		TupleDescAttr(rel->rd_att, i)->attrelid = relid;
 
+	TupleDescFinalize(rel->rd_att);
+
 	rel->rd_rel->reltablespace = reltablespace;
 
 	if (mapped_relation)
@@ -4443,6 +4448,7 @@ BuildHardcodedDescriptor(int natts, const FormData_pg_attribute *attrs)
 
 	/* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
 	TupleDescCompactAttr(result, 0)->attcacheoff = 0;
+	TupleDescFinalize(result);
 
 	/* Note: we don't bother to set up a TupleConstr entry */
 
@@ -6268,6 +6274,8 @@ load_relcache_init_file(bool shared)
 			populate_compact_attribute(rel->rd_att, i);
 		}
 
+		TupleDescFinalize(rel->rd_att);
+
 		/* next read the access method specific field */
 		if (fread(&len, 1, sizeof(len), fp) != sizeof(len))
 			goto read_failed;
diff --git a/src/backend/utils/fmgr/funcapi.c b/src/backend/utils/fmgr/funcapi.c
index 8a934ea8dca..516d02cfb82 100644
--- a/src/backend/utils/fmgr/funcapi.c
+++ b/src/backend/utils/fmgr/funcapi.c
@@ -340,6 +340,8 @@ get_expr_result_type(Node *expr,
 										exprCollation(col));
 			i++;
 		}
+		TupleDescFinalize(tupdesc);
+
 		if (resultTypeId)
 			*resultTypeId = rexpr->row_typeid;
 		if (resultTupleDesc)
@@ -1044,6 +1046,7 @@ resolve_polymorphic_tupdesc(TupleDesc tupdesc, oidvector *declared_args,
 		}
 	}
 
+	TupleDescFinalize(tupdesc);
 	return true;
 }
 
@@ -1853,6 +1856,8 @@ build_function_result_tupdesc_d(char prokind,
 						   0);
 	}
 
+	TupleDescFinalize(desc);
+
 	return desc;
 }
 
@@ -1970,6 +1975,7 @@ TypeGetTupleDesc(Oid typeoid, List *colaliases)
 						   typeoid,
 						   -1,
 						   0);
+		TupleDescFinalize(tupdesc);
 	}
 	else if (functypclass == TYPEFUNC_RECORD)
 	{
diff --git a/src/backend/utils/misc/guc_funcs.c b/src/backend/utils/misc/guc_funcs.c
index 8524dd3a981..472cb5393ce 100644
--- a/src/backend/utils/misc/guc_funcs.c
+++ b/src/backend/utils/misc/guc_funcs.c
@@ -444,6 +444,7 @@ GetPGVariableResultDesc(const char *name)
 		TupleDescInitEntry(tupdesc, (AttrNumber) 1, varname,
 						   TEXTOID, -1, 0);
 	}
+	TupleDescFinalize(tupdesc);
 	return tupdesc;
 }
 
@@ -465,6 +466,7 @@ ShowGUCConfigOption(const char *name, DestReceiver *dest)
 	tupdesc = CreateTemplateTupleDesc(1);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 1, varname,
 							  TEXTOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	/* prepare for projection of tuples */
 	tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
@@ -499,6 +501,7 @@ ShowAllGUCConfig(DestReceiver *dest)
 							  TEXTOID, -1, 0);
 	TupleDescInitBuiltinEntry(tupdesc, (AttrNumber) 3, "description",
 							  TEXTOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 
 	/* prepare for projection of tuples */
 	tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
@@ -934,6 +937,8 @@ show_all_settings(PG_FUNCTION_ARGS)
 		TupleDescInitEntry(tupdesc, (AttrNumber) 17, "pending_restart",
 						   BOOLOID, -1, 0);
 
+		TupleDescFinalize(tupdesc);
+
 		/*
 		 * Generate attribute metadata needed later to produce tuples from raw
 		 * C strings
diff --git a/src/include/access/tupdesc.h b/src/include/access/tupdesc.h
index d46cdbf7a3c..595413dbbc5 100644
--- a/src/include/access/tupdesc.h
+++ b/src/include/access/tupdesc.h
@@ -195,6 +195,7 @@ extern TupleDesc CreateTupleDescTruncatedCopy(TupleDesc tupdesc, int natts);
 
 extern TupleDesc CreateTupleDescCopyConstr(TupleDesc tupdesc);
 
+#define TupleDescFinalize(d) ((void) 0)
 #define TupleDescSize(src) \
 	(offsetof(struct TupleDescData, compact_attrs) + \
 	 (src)->natts * sizeof(CompactAttribute) + \
diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c
index 5ecc7766757..b72c963b3be 100644
--- a/src/pl/plpgsql/src/pl_comp.c
+++ b/src/pl/plpgsql/src/pl_comp.c
@@ -1912,6 +1912,8 @@ build_row_from_vars(PLpgSQL_variable **vars, int numvars)
 		TupleDescInitEntryCollation(row->rowtupdesc, i + 1, typcoll);
 	}
 
+	TupleDescFinalize(row->rowtupdesc);
+
 	return row;
 }
 
diff --git a/src/test/modules/test_custom_stats/test_custom_fixed_stats.c b/src/test/modules/test_custom_stats/test_custom_fixed_stats.c
index 485e08e5c19..f9e7c717280 100644
--- a/src/test/modules/test_custom_stats/test_custom_fixed_stats.c
+++ b/src/test/modules/test_custom_stats/test_custom_fixed_stats.c
@@ -206,6 +206,7 @@ test_custom_stats_fixed_report(PG_FUNCTION_ARGS)
 					   INT8OID, -1, 0);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "stats_reset",
 					   TIMESTAMPTZOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 	BlessTupleDesc(tupdesc);
 
 	values[0] = Int64GetDatum(stats->numcalls);
diff --git a/src/test/modules/test_predtest/test_predtest.c b/src/test/modules/test_predtest/test_predtest.c
index 679a5de456d..48ca2a4ea70 100644
--- a/src/test/modules/test_predtest/test_predtest.c
+++ b/src/test/modules/test_predtest/test_predtest.c
@@ -230,6 +230,7 @@ test_predtest(PG_FUNCTION_ARGS)
 					   "s_r_holds", BOOLOID, -1, 0);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 8,
 					   "w_r_holds", BOOLOID, -1, 0);
+	TupleDescFinalize(tupdesc);
 	tupdesc = BlessTupleDesc(tupdesc);
 
 	values[0] = BoolGetDatum(strong_implied_by);
-- 
2.51.0



  [text/plain] v11-0004-Optimize-tuple-deformation.patch (66.9K, 7-v11-0004-Optimize-tuple-deformation.patch)
  download | inline diff:
From 0c4bc383f1deae72103063a7e912f276dfd4a1c5 Mon Sep 17 00:00:00 2001
From: David Rowley <[email protected]>
Date: Tue, 31 Dec 2024 09:19:24 +1300
Subject: [PATCH v11 4/5] Optimize tuple deformation

This commit includes various optimizations to improve the performance of
tuple deformation.

We now precalculate CompactAttribute's attcacheoff, which allows us to
remove the code from the deform routines which was setting the
attcacheoff.  Setting the attcacheoff is handled by TupleDescFinalize(),
which must be called before the TupleDesc is used for anything.  Having
this TupleDescFinalize() function means we can store the first
attribute in the TupleDesc which does not have an offset cached.  That
allows us to add a dedicated deforming loop to deform all attributes up
to the final one with an attcacheoff set, or up to the first NULL
attribute, whichever comes first.

We also record the maximum attribute number which is guaranteed to exist
in the tuple, that is, has a NOT NULL constraint and isn't an
atthasmissing attribute.  When deforming only attributes prior to the
guaranteed attnum, we've no need to access the tuple's natt count.  As an
additional optimization, we only count fixed-width columns when
calculating the maximum guaranteed column as this eliminates the need to
emit code to fetch byref types in the deformation loop for guaranteed
attributes.

Some locations in the code deform tuples that have yet to go through NOT
NULL constraint validation.  We're unable to perform the guaranteed
attribute optimization when that's the case.  The optimization is opt-in
via the TupleTableSlot using the TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS
flag.

This commit also adds a more efficient way of populating the isnull
array by using a bit-wise trick which performs multiplication on the
inverse of the tuple's bitmap byte and masking out all but the lower bit
of each of the boolean's byte.  This results in much more optimal code
when compared to determining the NULLness via att_isnull().  8 isnull
elements are processed at once using this method, which means we need to
round the tts_isnull array size up to the next 8 bytes.  The palloc code
does this anyway, but the round-up needed to be formalized so as not to
overwrite the sentinel byte in debug builds.
---
 src/backend/access/common/heaptuple.c        | 360 ++++++++---------
 src/backend/access/common/indextuple.c       | 363 +++++++----------
 src/backend/access/common/tupdesc.c          |  51 +++
 src/backend/access/spgist/spgutils.c         |   3 -
 src/backend/executor/execTuples.c            | 392 +++++++++++--------
 src/backend/executor/nodeBitmapHeapscan.c    |   3 +
 src/backend/executor/nodeIndexonlyscan.c     |   3 +
 src/backend/executor/nodeIndexscan.c         |   3 +
 src/backend/executor/nodeSamplescan.c        |   3 +
 src/backend/executor/nodeSeqscan.c           |   3 +
 src/backend/executor/nodeTidrangescan.c      |   3 +
 src/backend/executor/nodeTidscan.c           |   3 +
 src/backend/jit/llvm/llvmjit_deform.c        |   6 -
 src/backend/utils/cache/relcache.c           |  12 -
 src/include/access/tupdesc.h                 |  20 +-
 src/include/access/tupmacs.h                 | 224 ++++++++++-
 src/include/executor/tuptable.h              |  17 +-
 src/test/modules/deform_bench/deform_bench.c |   1 +
 18 files changed, 846 insertions(+), 624 deletions(-)

diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c
index 11bec20e82e..b2ac7fef35b 100644
--- a/src/backend/access/common/heaptuple.c
+++ b/src/backend/access/common/heaptuple.c
@@ -498,19 +498,7 @@ heap_attisnull(HeapTuple tup, int attnum, TupleDesc tupleDesc)
  *		nocachegetattr
  *
  *		This only gets called from fastgetattr(), in cases where we
- *		can't use a cacheoffset and the value is not null.
- *
- *		This caches attribute offsets in the attribute descriptor.
- *
- *		An alternative way to speed things up would be to cache offsets
- *		with the tuple, but that seems more difficult unless you take
- *		the storage hit of actually putting those offsets into the
- *		tuple you send to disk.  Yuck.
- *
- *		This scheme will be slightly slower than that, but should
- *		perform well for queries which hit large #'s of tuples.  After
- *		you cache the offsets once, examining all the other tuples using
- *		the same attribute descriptor will go much quicker. -cim 5/4/91
+ *		can't use the attcacheoff and the value is not null.
  *
  *		NOTE: if you need to change this code, see also heap_deform_tuple.
  *		Also see nocache_index_getattr, which is the same code for index
@@ -522,194 +510,125 @@ nocachegetattr(HeapTuple tup,
 			   int attnum,
 			   TupleDesc tupleDesc)
 {
+	CompactAttribute *cattr;
 	HeapTupleHeader td = tup->t_data;
 	char	   *tp;				/* ptr to data part of tuple */
 	bits8	   *bp = td->t_bits;	/* ptr to null bitmap in tuple */
-	bool		slow = false;	/* do we have to walk attrs? */
 	int			off;			/* current offset within data */
+	int			startAttr;
+	int			firstNullAttr;
+	int			i;
+	bool		hasnulls = HeapTupleHasNulls(tup);
 
-	/* ----------------
-	 *	 Three cases:
-	 *
-	 *	 1: No nulls and no variable-width attributes.
-	 *	 2: Has a null or a var-width AFTER att.
-	 *	 3: Has nulls or var-widths BEFORE att.
-	 * ----------------
-	 */
+	/* Did someone forget to call TupleDescFinalize()? */
+	Assert(tupleDesc->firstNonCachedOffsetAttr >= 0);
 
 	attnum--;
 
-	if (!HeapTupleNoNulls(tup))
+	/*
+	 * To minimize the number of attributes we need to look at, start walking
+	 * the tuple at the attribute with the highest attcacheoff prior to attnum
+	 * or the first NULL attribute prior to attnum, whichever comes first.
+	 */
+	if (hasnulls)
+		firstNullAttr = first_null_attr(bp, attnum);
+	else
+		firstNullAttr = attnum;
+
+	if (tupleDesc->firstNonCachedOffsetAttr > 0)
 	{
 		/*
-		 * there's a null somewhere in the tuple
-		 *
-		 * check to see if any preceding bits are null...
+		 * Start at the highest attcacheoff attribute with no NULLs in prior
+		 * attributes.
 		 */
-		int			byte = attnum >> 3;
-		int			finalbit = attnum & 0x07;
-
-		/* check for nulls "before" final bit of last byte */
-		if ((~bp[byte]) & ((1 << finalbit) - 1))
-			slow = true;
-		else
-		{
-			/* check for nulls in any "earlier" bytes */
-			int			i;
-
-			for (i = 0; i < byte; i++)
-			{
-				if (bp[i] != 0xFF)
-				{
-					slow = true;
-					break;
-				}
-			}
-		}
+		startAttr = Min(tupleDesc->firstNonCachedOffsetAttr - 1, firstNullAttr);
+		off = TupleDescCompactAttr(tupleDesc, startAttr)->attcacheoff;
+	}
+	else
+	{
+		/* Otherwise, start at the beginning... */
+		startAttr = 0;
+		off = 0;
 	}
 
 	tp = (char *) td + td->t_hoff;
 
-	if (!slow)
+	/*
+	 * Calculate 'off' up to the first NULL attr.  We use two cheaper loops
+	 * when the tuple has no variable-width columns.  When variable-width
+	 * columns exists, we use att_addlength_pointer() to move the offset
+	 * beyond the current attribute.
+	 */
+	if (!HeapTupleHasVarWidth(tup))
 	{
-		CompactAttribute *att;
-
-		/*
-		 * If we get here, there are no nulls up to and including the target
-		 * attribute.  If we have a cached offset, we can use it.
-		 */
-		att = TupleDescCompactAttr(tupleDesc, attnum);
-		if (att->attcacheoff >= 0)
-			return fetchatt(att, tp + att->attcacheoff);
-
-		/*
-		 * Otherwise, check for non-fixed-length attrs up to and including
-		 * target.  If there aren't any, it's safe to cheaply initialize the
-		 * cached offsets for these attrs.
-		 */
-		if (HeapTupleHasVarWidth(tup))
+		for (i = startAttr; i < firstNullAttr; i++)
 		{
-			int			j;
+			cattr = TupleDescCompactAttr(tupleDesc, i);
 
-			for (j = 0; j <= attnum; j++)
-			{
-				if (TupleDescCompactAttr(tupleDesc, j)->attlen <= 0)
-				{
-					slow = true;
-					break;
-				}
-			}
+			off = att_nominal_alignby(off, cattr->attalignby);
+			off += cattr->attlen;
 		}
-	}
-
-	if (!slow)
-	{
-		int			natts = tupleDesc->natts;
-		int			j = 1;
-
-		/*
-		 * If we get here, we have a tuple with no nulls or var-widths up to
-		 * and including the target attribute, so we can use the cached offset
-		 * ... only we don't have it yet, or we'd not have got here.  Since
-		 * it's cheap to compute offsets for fixed-width columns, we take the
-		 * opportunity to initialize the cached offsets for *all* the leading
-		 * fixed-width columns, in hope of avoiding future visits to this
-		 * routine.
-		 */
-		TupleDescCompactAttr(tupleDesc, 0)->attcacheoff = 0;
-
-		/* we might have set some offsets in the slow path previously */
-		while (j < natts && TupleDescCompactAttr(tupleDesc, j)->attcacheoff > 0)
-			j++;
-
-		off = TupleDescCompactAttr(tupleDesc, j - 1)->attcacheoff +
-			TupleDescCompactAttr(tupleDesc, j - 1)->attlen;
 
-		for (; j < natts; j++)
+		for (; i < attnum; i++)
 		{
-			CompactAttribute *att = TupleDescCompactAttr(tupleDesc, j);
+			if (att_isnull(i, bp))
+				continue;
 
-			if (att->attlen <= 0)
-				break;
-
-			off = att_nominal_alignby(off, att->attalignby);
+			cattr = TupleDescCompactAttr(tupleDesc, i);
 
-			att->attcacheoff = off;
-
-			off += att->attlen;
+			off = att_nominal_alignby(off, cattr->attalignby);
+			off += cattr->attlen;
 		}
-
-		Assert(j > attnum);
-
-		off = TupleDescCompactAttr(tupleDesc, attnum)->attcacheoff;
 	}
 	else
 	{
-		bool		usecache = true;
-		int			i;
-
-		/*
-		 * Now we know that we have to walk the tuple CAREFULLY.  But we still
-		 * might be able to cache some offsets for next time.
-		 *
-		 * Note - This loop is a little tricky.  For each non-null attribute,
-		 * we have to first account for alignment padding before the attr,
-		 * then advance over the attr based on its length.  Nulls have no
-		 * storage and no alignment padding either.  We can use/set
-		 * attcacheoff until we reach either a null or a var-width attribute.
-		 */
-		off = 0;
-		for (i = 0;; i++)		/* loop exit is at "break" */
+		for (i = startAttr; i < firstNullAttr; i++)
 		{
-			CompactAttribute *att = TupleDescCompactAttr(tupleDesc, i);
+			int			attlen;
 
-			if (HeapTupleHasNulls(tup) && att_isnull(i, bp))
-			{
-				usecache = false;
-				continue;		/* this cannot be the target att */
-			}
+			cattr = TupleDescCompactAttr(tupleDesc, i);
+			attlen = cattr->attlen;
 
-			/* If we know the next offset, we can skip the rest */
-			if (usecache && att->attcacheoff >= 0)
-				off = att->attcacheoff;
-			else if (att->attlen == -1)
-			{
-				/*
-				 * We can only cache the offset for a varlena attribute if the
-				 * offset is already suitably aligned, so that there would be
-				 * no pad bytes in any case: then the offset will be valid for
-				 * either an aligned or unaligned value.
-				 */
-				if (usecache &&
-					off == att_nominal_alignby(off, att->attalignby))
-					att->attcacheoff = off;
-				else
-				{
-					off = att_pointer_alignby(off, att->attalignby, -1,
-											  tp + off);
-					usecache = false;
-				}
-			}
-			else
-			{
-				/* not varlena, so safe to use att_nominal_alignby */
-				off = att_nominal_alignby(off, att->attalignby);
+			/*
+			 * cstrings don't exist in heap tuples.  Use pg_assume to instruct
+			 * the compiler not to emit the cstring-related code in
+			 * att_addlength_pointer().
+			 */
+			pg_assume(attlen > 0 || attlen == -1);
 
-				if (usecache)
-					att->attcacheoff = off;
-			}
+			off = att_pointer_alignby(off,
+									  cattr->attalignby,
+									  attlen,
+									  tp + off);
+			off = att_addlength_pointer(off, attlen, tp + off);
+		}
 
-			if (i == attnum)
-				break;
+		for (; i < attnum; i++)
+		{
+			int			attlen;
 
-			off = att_addlength_pointer(off, att->attlen, tp + off);
+			if (att_isnull(i, bp))
+				continue;
 
-			if (usecache && att->attlen <= 0)
-				usecache = false;
+			cattr = TupleDescCompactAttr(tupleDesc, i);
+			attlen = cattr->attlen;
+
+			/* As above, heaptuples have no cstrings */
+			pg_assume(attlen > 0 || attlen == -1);
+
+			off = att_pointer_alignby(off, cattr->attalignby, attlen,
+									  tp + off);
+			off = att_addlength_pointer(off, attlen, tp + off);
 		}
 	}
 
-	return fetchatt(TupleDescCompactAttr(tupleDesc, attnum), tp + off);
+	cattr = TupleDescCompactAttr(tupleDesc, attnum);
+	off = att_pointer_alignby(off,
+							  cattr->attalignby,
+							  cattr->attlen,
+							  tp + off);
+
+	return fetchatt(cattr, tp + off);
 }
 
 /* ----------------
@@ -1347,6 +1266,7 @@ heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc,
 				  Datum *values, bool *isnull)
 {
 	HeapTupleHeader tup = tuple->t_data;
+	CompactAttribute *cattr;
 	bool		hasnulls = HeapTupleHasNulls(tuple);
 	int			tdesc_natts = tupleDesc->natts;
 	int			natts;			/* number of atts to extract */
@@ -1354,70 +1274,98 @@ heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc,
 	char	   *tp;				/* ptr to tuple data */
 	uint32		off;			/* offset in tuple data */
 	bits8	   *bp = tup->t_bits;	/* ptr to null bitmap in tuple */
-	bool		slow = false;	/* can we use/set attcacheoff? */
+	int			firstNonCacheOffsetAttr;
+	int			firstNullAttr;
 
 	natts = HeapTupleHeaderGetNatts(tup);
 
+	/* Did someone forget to call TupleDescFinalize()? */
+	Assert(tupleDesc->firstNonCachedOffsetAttr >= 0);
+
 	/*
 	 * In inheritance situations, it is possible that the given tuple actually
 	 * has more fields than the caller is expecting.  Don't run off the end of
 	 * the caller's arrays.
 	 */
 	natts = Min(natts, tdesc_natts);
+	firstNonCacheOffsetAttr = Min(tupleDesc->firstNonCachedOffsetAttr, natts);
+
+	if (hasnulls)
+	{
+		firstNullAttr = first_null_attr(bp, natts);
+
+		/*
+		 * XXX: it'd be nice to use populate_isnull_array() here, but that
+		 * requires that the isnull array's size is rounded up to the next
+		 * multiple of 8.  Doing that would require adjusting many locations
+		 * that allocate the array.
+		 */
+		firstNonCacheOffsetAttr = Min(firstNonCacheOffsetAttr, firstNullAttr);
+	}
+	else
+		firstNullAttr = natts;
 
 	tp = (char *) tup + tup->t_hoff;
+	attnum = 0;
 
-	off = 0;
+	if (firstNonCacheOffsetAttr > 0)
+	{
+#ifdef USE_ASSERT_CHECKING
+		/* In Assert enabled builds, verify attcacheoff is correct */
+		int			offcheck = 0;
+#endif
+		do
+		{
+			isnull[attnum] = false;
+			cattr = TupleDescCompactAttr(tupleDesc, attnum);
+			off = cattr->attcacheoff;
 
-	for (attnum = 0; attnum < natts; attnum++)
+#ifdef USE_ASSERT_CHECKING
+			offcheck = att_nominal_alignby(offcheck, cattr->attalignby);
+			Assert(offcheck == cattr->attcacheoff);
+			offcheck += cattr->attlen;
+#endif
+
+			values[attnum] = fetch_att_noerr(tp + off,
+											 cattr->attbyval,
+											 cattr->attlen);
+		} while (++attnum < firstNonCacheOffsetAttr);
+		off += cattr->attlen;
+	}
+	else
+		off = 0;
+
+	for (; attnum < firstNullAttr; attnum++)
 	{
-		CompactAttribute *thisatt = TupleDescCompactAttr(tupleDesc, attnum);
+		isnull[attnum] = false;
+		cattr = TupleDescCompactAttr(tupleDesc, attnum);
+		values[attnum] = align_fetch_then_add(tp,
+											  &off,
+											  cattr->attbyval,
+											  cattr->attlen,
+											  cattr->attalignby);
+	}
+
+	for (; attnum < natts; attnum++)
+	{
+		Assert(hasnulls);
 
-		if (hasnulls && att_isnull(attnum, bp))
+		if (att_isnull(attnum, bp))
 		{
 			values[attnum] = (Datum) 0;
 			isnull[attnum] = true;
-			slow = true;		/* can't use attcacheoff anymore */
 			continue;
 		}
 
 		isnull[attnum] = false;
-
-		if (!slow && thisatt->attcacheoff >= 0)
-			off = thisatt->attcacheoff;
-		else if (thisatt->attlen == -1)
-		{
-			/*
-			 * We can only cache the offset for a varlena attribute if the
-			 * offset is already suitably aligned, so that there would be no
-			 * pad bytes in any case: then the offset will be valid for either
-			 * an aligned or unaligned value.
-			 */
-			if (!slow &&
-				off == att_nominal_alignby(off, thisatt->attalignby))
-				thisatt->attcacheoff = off;
-			else
-			{
-				off = att_pointer_alignby(off, thisatt->attalignby, -1,
-										  tp + off);
-				slow = true;
-			}
-		}
-		else
-		{
-			/* not varlena, so safe to use att_nominal_alignby */
-			off = att_nominal_alignby(off, thisatt->attalignby);
-
-			if (!slow)
-				thisatt->attcacheoff = off;
-		}
-
-		values[attnum] = fetchatt(thisatt, tp + off);
-
-		off = att_addlength_pointer(off, thisatt->attlen, tp + off);
-
-		if (thisatt->attlen <= 0)
-			slow = true;		/* can't use attcacheoff anymore */
+		cattr = TupleDescCompactAttr(tupleDesc, attnum);
+
+		/* align 'off', fetch the datum, and increment off beyond the datum */
+		values[attnum] = align_fetch_then_add(tp,
+											  &off,
+											  cattr->attbyval,
+											  cattr->attlen,
+											  cattr->attalignby);
 	}
 
 	/*
diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c
index d6350201e01..8c410853191 100644
--- a/src/backend/access/common/indextuple.c
+++ b/src/backend/access/common/indextuple.c
@@ -223,18 +223,6 @@ index_form_tuple_context(TupleDesc tupleDescriptor,
  *
  *		This gets called from index_getattr() macro, and only in cases
  *		where we can't use cacheoffset and the value is not null.
- *
- *		This caches attribute offsets in the attribute descriptor.
- *
- *		An alternative way to speed things up would be to cache offsets
- *		with the tuple, but that seems more difficult unless you take
- *		the storage hit of actually putting those offsets into the
- *		tuple you send to disk.  Yuck.
- *
- *		This scheme will be slightly slower than that, but should
- *		perform well for queries which hit large #'s of tuples.  After
- *		you cache the offsets once, examining all the other tuples using
- *		the same attribute descriptor will go much quicker. -cim 5/4/91
  * ----------------
  */
 Datum
@@ -242,205 +230,124 @@ nocache_index_getattr(IndexTuple tup,
 					  int attnum,
 					  TupleDesc tupleDesc)
 {
+	CompactAttribute *cattr;
 	char	   *tp;				/* ptr to data part of tuple */
 	bits8	   *bp = NULL;		/* ptr to null bitmap in tuple */
-	bool		slow = false;	/* do we have to walk attrs? */
 	int			data_off;		/* tuple data offset */
 	int			off;			/* current offset within data */
+	int			startAttr;
+	int			firstNullAttr;
+	bool		hasnulls = IndexTupleHasNulls(tup);
+	int			i;
 
-	/* ----------------
-	 *	 Three cases:
-	 *
-	 *	 1: No nulls and no variable-width attributes.
-	 *	 2: Has a null or a var-width AFTER att.
-	 *	 3: Has nulls or var-widths BEFORE att.
-	 * ----------------
-	 */
-
-	data_off = IndexInfoFindDataOffset(tup->t_info);
+	/* Did someone forget to call TupleDescFinalize()? */
+	Assert(tupleDesc->firstNonCachedOffsetAttr >= 0);
 
 	attnum--;
 
-	if (IndexTupleHasNulls(tup))
-	{
-		/*
-		 * there's a null somewhere in the tuple
-		 *
-		 * check to see if desired att is null
-		 */
+	data_off = IndexInfoFindDataOffset(tup->t_info);
+	tp = (char *) tup + data_off;
 
-		/* XXX "knows" t_bits are just after fixed tuple header! */
+	/*
+	 * To minimize the number of attributes we need to look at, start walking
+	 * the tuple at the attribute with the highest attcacheoff prior to attnum
+	 * or the first NULL attribute prior to attnum, whichever comes first.
+	 */
+	if (hasnulls)
+	{
 		bp = (bits8 *) ((char *) tup + sizeof(IndexTupleData));
-
-		/*
-		 * Now check to see if any preceding bits are null...
-		 */
-		{
-			int			byte = attnum >> 3;
-			int			finalbit = attnum & 0x07;
-
-			/* check for nulls "before" final bit of last byte */
-			if ((~bp[byte]) & ((1 << finalbit) - 1))
-				slow = true;
-			else
-			{
-				/* check for nulls in any "earlier" bytes */
-				int			i;
-
-				for (i = 0; i < byte; i++)
-				{
-					if (bp[i] != 0xFF)
-					{
-						slow = true;
-						break;
-					}
-				}
-			}
-		}
+		firstNullAttr = first_null_attr(bp, attnum);
 	}
+	else
+		firstNullAttr = attnum;
 
-	tp = (char *) tup + data_off;
-
-	if (!slow)
+	if (tupleDesc->firstNonCachedOffsetAttr > 0)
 	{
-		CompactAttribute *att;
-
-		/*
-		 * If we get here, there are no nulls up to and including the target
-		 * attribute.  If we have a cached offset, we can use it.
-		 */
-		att = TupleDescCompactAttr(tupleDesc, attnum);
-		if (att->attcacheoff >= 0)
-			return fetchatt(att, tp + att->attcacheoff);
-
 		/*
-		 * Otherwise, check for non-fixed-length attrs up to and including
-		 * target.  If there aren't any, it's safe to cheaply initialize the
-		 * cached offsets for these attrs.
+		 * Start at the highest attcacheoff attribute with no NULLs in prior
+		 * attributes.
 		 */
-		if (IndexTupleHasVarwidths(tup))
-		{
-			int			j;
-
-			for (j = 0; j <= attnum; j++)
-			{
-				if (TupleDescCompactAttr(tupleDesc, j)->attlen <= 0)
-				{
-					slow = true;
-					break;
-				}
-			}
-		}
+		startAttr = Min(tupleDesc->firstNonCachedOffsetAttr - 1, firstNullAttr);
+		off = TupleDescCompactAttr(tupleDesc, startAttr)->attcacheoff;
 	}
-
-	if (!slow)
+	else
 	{
-		int			natts = tupleDesc->natts;
-		int			j = 1;
-
-		/*
-		 * If we get here, we have a tuple with no nulls or var-widths up to
-		 * and including the target attribute, so we can use the cached offset
-		 * ... only we don't have it yet, or we'd not have got here.  Since
-		 * it's cheap to compute offsets for fixed-width columns, we take the
-		 * opportunity to initialize the cached offsets for *all* the leading
-		 * fixed-width columns, in hope of avoiding future visits to this
-		 * routine.
-		 */
-		TupleDescCompactAttr(tupleDesc, 0)->attcacheoff = 0;
+		/* Otherwise, start at the beginning... */
+		startAttr = 0;
+		off = 0;
+	}
 
-		/* we might have set some offsets in the slow path previously */
-		while (j < natts && TupleDescCompactAttr(tupleDesc, j)->attcacheoff > 0)
-			j++;
+	/*
+	 * Calculate 'off' up to the first NULL attr.  We use two cheaper loops
+	 * when the tuple has no variable-width columns.  When variable-width
+	 * columns exists, we use att_addlength_pointer() to move the offset
+	 * beyond the current attribute.
+	 */
+	if (IndexTupleHasVarwidths(tup))
+	{
+		/* Calculate the offset up until the first NULL */
+		for (i = startAttr; i < firstNullAttr; i++)
+		{
+			cattr = TupleDescCompactAttr(tupleDesc, i);
 
-		off = TupleDescCompactAttr(tupleDesc, j - 1)->attcacheoff +
-			TupleDescCompactAttr(tupleDesc, j - 1)->attlen;
+			off = att_pointer_alignby(off,
+									  cattr->attalignby,
+									  cattr->attlen,
+									  tp + off);
+			off = att_addlength_pointer(off, cattr->attlen, tp + off);
+		}
 
-		for (; j < natts; j++)
+		/* Calculate the offset for any remaining columns. */
+		for (; i < attnum; i++)
 		{
-			CompactAttribute *att = TupleDescCompactAttr(tupleDesc, j);
+			Assert(hasnulls);
 
-			if (att->attlen <= 0)
-				break;
+			if (att_isnull(i, bp))
+				continue;
 
-			off = att_nominal_alignby(off, att->attalignby);
+			cattr = TupleDescCompactAttr(tupleDesc, i);
 
-			att->attcacheoff = off;
-
-			off += att->attlen;
+			off = att_pointer_alignby(off,
+									  cattr->attalignby,
+									  cattr->attlen,
+									  tp + off);
+			off = att_addlength_pointer(off, cattr->attlen, tp + off);
 		}
-
-		Assert(j > attnum);
-
-		off = TupleDescCompactAttr(tupleDesc, attnum)->attcacheoff;
 	}
 	else
 	{
-		bool		usecache = true;
-		int			i;
+		/* Handle tuples with only fixed-width attributes */
 
-		/*
-		 * Now we know that we have to walk the tuple CAREFULLY.  But we still
-		 * might be able to cache some offsets for next time.
-		 *
-		 * Note - This loop is a little tricky.  For each non-null attribute,
-		 * we have to first account for alignment padding before the attr,
-		 * then advance over the attr based on its length.  Nulls have no
-		 * storage and no alignment padding either.  We can use/set
-		 * attcacheoff until we reach either a null or a var-width attribute.
-		 */
-		off = 0;
-		for (i = 0;; i++)		/* loop exit is at "break" */
+		/* Calculate the offset up until the first NULL */
+		for (i = startAttr; i < firstNullAttr; i++)
 		{
-			CompactAttribute *att = TupleDescCompactAttr(tupleDesc, i);
+			cattr = TupleDescCompactAttr(tupleDesc, i);
 
-			if (IndexTupleHasNulls(tup) && att_isnull(i, bp))
-			{
-				usecache = false;
-				continue;		/* this cannot be the target att */
-			}
-
-			/* If we know the next offset, we can skip the rest */
-			if (usecache && att->attcacheoff >= 0)
-				off = att->attcacheoff;
-			else if (att->attlen == -1)
-			{
-				/*
-				 * We can only cache the offset for a varlena attribute if the
-				 * offset is already suitably aligned, so that there would be
-				 * no pad bytes in any case: then the offset will be valid for
-				 * either an aligned or unaligned value.
-				 */
-				if (usecache &&
-					off == att_nominal_alignby(off, att->attalignby))
-					att->attcacheoff = off;
-				else
-				{
-					off = att_pointer_alignby(off, att->attalignby, -1,
-											  tp + off);
-					usecache = false;
-				}
-			}
-			else
-			{
-				/* not varlena, so safe to use att_nominal_alignby */
-				off = att_nominal_alignby(off, att->attalignby);
+			Assert(cattr->attlen > 0);
+			off = att_nominal_alignby(off, cattr->attalignby);
+			off += cattr->attlen;
+		}
 
-				if (usecache)
-					att->attcacheoff = off;
-			}
+		/* Calculate the offset for any remaining columns. */
+		for (; i < attnum; i++)
+		{
+			Assert(hasnulls);
 
-			if (i == attnum)
-				break;
+			if (att_isnull(i, bp))
+				continue;
 
-			off = att_addlength_pointer(off, att->attlen, tp + off);
+			cattr = TupleDescCompactAttr(tupleDesc, i);
 
-			if (usecache && att->attlen <= 0)
-				usecache = false;
+			Assert(cattr->attlen > 0);
+			off = att_nominal_alignby(off, cattr->attalignby);
+			off += cattr->attlen;
 		}
 	}
 
-	return fetchatt(TupleDescCompactAttr(tupleDesc, attnum), tp + off);
+	cattr = TupleDescCompactAttr(tupleDesc, attnum);
+	off = att_pointer_alignby(off, cattr->attalignby,
+							  cattr->attlen, tp + off);
+	return fetchatt(cattr, tp + off);
 }
 
 /*
@@ -480,63 +387,87 @@ index_deform_tuple_internal(TupleDesc tupleDescriptor,
 							Datum *values, bool *isnull,
 							char *tp, bits8 *bp, int hasnulls)
 {
+	CompactAttribute *cattr;
 	int			natts = tupleDescriptor->natts; /* number of atts to extract */
-	int			attnum;
-	int			off = 0;		/* offset in tuple data */
-	bool		slow = false;	/* can we use/set attcacheoff? */
+	int			attnum = 0;
+	uint32		off = 0;		/* offset in tuple data */
+	int			firstNonCacheOffsetAttr;
+	int			firstNullAttr;
 
 	/* Assert to protect callers who allocate fixed-size arrays */
 	Assert(natts <= INDEX_MAX_KEYS);
 
-	for (attnum = 0; attnum < natts; attnum++)
+	/* Did someone forget to call TupleDescFinalize()? */
+	Assert(tupleDescriptor->firstNonCachedOffsetAttr >= 0);
+
+	firstNonCacheOffsetAttr = Min(tupleDescriptor->firstNonCachedOffsetAttr, natts);
+
+	if (hasnulls)
+	{
+		firstNullAttr = first_null_attr(bp, natts);
+		firstNonCacheOffsetAttr = Min(firstNonCacheOffsetAttr, firstNullAttr);
+	}
+	else
+		firstNullAttr = natts;
+
+	if (firstNonCacheOffsetAttr > 0)
 	{
-		CompactAttribute *thisatt = TupleDescCompactAttr(tupleDescriptor, attnum);
+#ifdef USE_ASSERT_CHECKING
+		/* In Assert enabled builds, verify attcacheoff is correct */
+		off = 0;
+#endif
 
-		if (hasnulls && att_isnull(attnum, bp))
+		do
 		{
-			values[attnum] = (Datum) 0;
-			isnull[attnum] = true;
-			slow = true;		/* can't use attcacheoff anymore */
-			continue;
-		}
+			isnull[attnum] = false;
+			cattr = TupleDescCompactAttr(tupleDescriptor, attnum);
 
-		isnull[attnum] = false;
+#ifdef USE_ASSERT_CHECKING
+			off = att_nominal_alignby(off, cattr->attalignby);
+			Assert(off == cattr->attcacheoff);
+			off += cattr->attlen;
+#endif
 
-		if (!slow && thisatt->attcacheoff >= 0)
-			off = thisatt->attcacheoff;
-		else if (thisatt->attlen == -1)
-		{
-			/*
-			 * We can only cache the offset for a varlena attribute if the
-			 * offset is already suitably aligned, so that there would be no
-			 * pad bytes in any case: then the offset will be valid for either
-			 * an aligned or unaligned value.
-			 */
-			if (!slow &&
-				off == att_nominal_alignby(off, thisatt->attalignby))
-				thisatt->attcacheoff = off;
-			else
-			{
-				off = att_pointer_alignby(off, thisatt->attalignby, -1,
-										  tp + off);
-				slow = true;
-			}
-		}
-		else
-		{
-			/* not varlena, so safe to use att_nominal_alignby */
-			off = att_nominal_alignby(off, thisatt->attalignby);
+			values[attnum] = fetch_att_noerr(tp + cattr->attcacheoff, cattr->attbyval,
+											 cattr->attlen);
+		} while (++attnum < firstNonCacheOffsetAttr);
 
-			if (!slow)
-				thisatt->attcacheoff = off;
-		}
+		off = cattr->attcacheoff + cattr->attlen;
+	}
 
-		values[attnum] = fetchatt(thisatt, tp + off);
+	for (; attnum < firstNullAttr; attnum++)
+	{
+		isnull[attnum] = false;
+		cattr = TupleDescCompactAttr(tupleDescriptor, attnum);
+
+		/* align 'off', fetch the datum, and increment off beyond the datum */
+		values[attnum] = align_fetch_then_add(tp,
+											  &off,
+											  cattr->attbyval,
+											  cattr->attlen,
+											  cattr->attalignby);
+	}
 
-		off = att_addlength_pointer(off, thisatt->attlen, tp + off);
+	for (; attnum < natts; attnum++)
+	{
+		Assert(hasnulls);
 
-		if (thisatt->attlen <= 0)
-			slow = true;		/* can't use attcacheoff anymore */
+		if (att_isnull(attnum, bp))
+		{
+			values[attnum] = (Datum) 0;
+			isnull[attnum] = true;
+			continue;
+		}
+
+		isnull[attnum] = false;
+		cattr = TupleDescCompactAttr(tupleDescriptor, attnum);
+
+		/* align 'off', fetch the datum, and increment off beyond the datum */
+		values[attnum] = align_fetch_then_add(tp,
+											  &off,
+											  cattr->attbyval,
+											  cattr->attlen,
+											  cattr->attalignby);
 	}
 }
 
diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c
index 2137385a833..c68561337d7 100644
--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -197,6 +197,10 @@ CreateTemplateTupleDesc(int natts)
 	desc->tdtypmod = -1;
 	desc->tdrefcount = -1;		/* assume not reference-counted */
 
+	/* This will be set to the correct value by TupleDescFinalize() */
+	desc->firstNonCachedOffsetAttr = -1;
+	desc->firstNonGuaranteedAttr = -1;
+
 	return desc;
 }
 
@@ -457,6 +461,9 @@ TupleDescCopy(TupleDesc dst, TupleDesc src)
  *		descriptor to another.
  *
  * !!! Constraints and defaults are not copied !!!
+ *
+ * The caller must take care of calling TupleDescFinalize() on 'dst' once all
+ * TupleDesc changes have been made.
  */
 void
 TupleDescCopyEntry(TupleDesc dst, AttrNumber dstAttno,
@@ -489,6 +496,50 @@ TupleDescCopyEntry(TupleDesc dst, AttrNumber dstAttno,
 	populate_compact_attribute(dst, dstAttno - 1);
 }
 
+/*
+ * TupleDescFinalize
+ *		Finalize the given TupleDesc.  This must be called after the
+ *		attributes arrays have been populated or adjusted by any code.
+ *
+ * Must be called after populate_compact_attribute() and before
+ * BlessTupleDesc().
+ */
+void
+TupleDescFinalize(TupleDesc tupdesc)
+{
+	int			firstNonCachedOffsetAttr = 0;
+	int			firstNonGuaranteedAttr = tupdesc->natts;
+	int			off = 0;
+
+	for (int i = 0; i < tupdesc->natts; i++)
+	{
+		CompactAttribute *cattr = TupleDescCompactAttr(tupdesc, i);
+
+		/*
+		 * Find the highest attnum which is guaranteed to exist in all tuples
+		 * in the table.  We currently only pay attention to byval attributes
+		 * to allow additional optimizations during tuple deformation.
+		 */
+		if (firstNonGuaranteedAttr == tupdesc->natts &&
+			(cattr->attnullability != ATTNULLABLE_VALID || !cattr->attbyval ||
+			 cattr->atthasmissing || cattr->attisdropped || cattr->attlen <= 0))
+			firstNonGuaranteedAttr = i;
+
+		if (cattr->attlen <= 0)
+			break;
+
+		off = att_nominal_alignby(off, cattr->attalignby);
+
+		cattr->attcacheoff = off;
+
+		off += cattr->attlen;
+		firstNonCachedOffsetAttr = i + 1;
+	}
+
+	tupdesc->firstNonCachedOffsetAttr = firstNonCachedOffsetAttr;
+	tupdesc->firstNonGuaranteedAttr = firstNonGuaranteedAttr;
+}
+
 /*
  * Free a TupleDesc including all substructure
  */
diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c
index b246e8127db..a4694bd8065 100644
--- a/src/backend/access/spgist/spgutils.c
+++ b/src/backend/access/spgist/spgutils.c
@@ -335,9 +335,6 @@ getSpGistTupleDesc(Relation index, SpGistTypeDesc *keyType)
 		/* We shouldn't need to bother with making these valid: */
 		att->attcompression = InvalidCompressionMethod;
 		att->attcollation = InvalidOid;
-		/* In case we changed typlen, we'd better reset following offsets */
-		for (int i = spgFirstIncludeColumn; i < outTupDesc->natts; i++)
-			TupleDescCompactAttr(outTupDesc, i)->attcacheoff = -1;
 
 		populate_compact_attribute(outTupDesc, spgKeyColumn);
 		TupleDescFinalize(outTupDesc);
diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c
index bb997182481..83a8c02894d 100644
--- a/src/backend/executor/execTuples.c
+++ b/src/backend/executor/execTuples.c
@@ -993,225 +993,254 @@ tts_buffer_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple,
 }
 
 /*
- * slot_deform_heap_tuple_internal
- *		An always inline helper function for use in slot_deform_heap_tuple to
- *		allow the compiler to emit specialized versions of this function for
- *		various combinations of "slow" and "hasnulls".  For example, if a
- *		given tuple has no nulls, then we needn't check "hasnulls" for every
- *		attribute that we're deforming.  The caller can just call this
- *		function with hasnulls set to constant-false and have the compiler
- *		remove the constant-false branches and emit more optimal code.
- *
- * Returns the next attnum to deform, which can be equal to natts when the
- * function manages to deform all requested attributes.  *offp is an input and
- * output parameter which is the byte offset within the tuple to start deforming
- * from which, on return, gets set to the offset where the next attribute
- * should be deformed from.  *slowp is set to true when subsequent deforming
- * of this tuple must use a version of this function with "slow" passed as
- * true.
- *
- * Callers cannot assume when we return "attnum" (i.e. all requested
- * attributes have been deformed) that slow mode isn't required for any
- * additional deforming as the final attribute may have caused a switch to
- * slow mode.
+ * slot_deform_heap_tuple
+ *		Given a TupleTableSlot, extract data from the slot's physical tuple
+ *		into its Datum/isnull arrays.  Data is extracted up through the
+ *		reqnatts'th column.  If there are insufficient attributes in the given
+ *		tuple, then slot_getmissingattrs() is called to populate the
+ *		remainder.  If reqnatts is above the number of attributes in the
+ *		slot's TupleDesc, an error is raised.
+ *
+ *		This is essentially an incremental version of heap_deform_tuple:
+ *		on each call we extract attributes up to the one needed, without
+ *		re-computing information about previously extracted attributes.
+ *		slot->tts_nvalid is the number of attributes already extracted.
+ *
+ * This is marked as always inline, so the different offp for different types
+ * of slots gets optimized away.
  */
-static pg_attribute_always_inline int
-slot_deform_heap_tuple_internal(TupleTableSlot *slot, HeapTuple tuple,
-								int attnum, int natts, bool slow,
-								bool hasnulls, uint32 *offp, bool *slowp)
+static pg_attribute_always_inline void
+slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
+					   int reqnatts)
 {
+	CompactAttribute *cattr;
 	TupleDesc	tupleDesc = slot->tts_tupleDescriptor;
-	Datum	   *values = slot->tts_values;
-	bool	   *isnull = slot->tts_isnull;
 	HeapTupleHeader tup = tuple->t_data;
+	size_t		attnum;
+	int			firstNonCacheOffsetAttr;
+	int			firstNonGuaranteedAttr;
+	int			firstNullAttr;
+	int			natts;
+	Datum	   *values;
+	bool	   *isnull;
 	char	   *tp;				/* ptr to tuple data */
-	bits8	   *bp = tup->t_bits;	/* ptr to null bitmap in tuple */
-	bool		slownext = false;
+	uint32		off;			/* offset in tuple data */
 
-	tp = (char *) tup + tup->t_hoff;
+	/* Did someone forget to call TupleDescFinalize()? */
+	Assert(tupleDesc->firstNonCachedOffsetAttr >= 0);
 
-	for (; attnum < natts; attnum++)
+	isnull = slot->tts_isnull;
+
+	/*
+	 * Some callers may form and deform tuples prior to NOT NULL constraints
+	 * being checked.  Here we'd like to optimize the case where we only need
+	 * to fetch attributes before or up to the point where the attribute is
+	 * guaranteed to exist in the tuple.  We rely on the slot flag being set
+	 * correctly to only enable this optimization when it's valid to do so.
+	 * This optimization allows us to save fetching the number of attributes
+	 * from the tuple and saves the additional cost of handling non-byval
+	 * attrs.
+	 */
+	if (TTS_OBEYS_NOT_NULL_CONSTRAINTS(slot))
+		firstNonGuaranteedAttr = Min(reqnatts, tupleDesc->firstNonGuaranteedAttr);
+	else
+		firstNonGuaranteedAttr = 0;
+
+	firstNonCacheOffsetAttr = tupleDesc->firstNonCachedOffsetAttr;
+
+	if (HeapTupleHasNulls(tuple))
 	{
-		CompactAttribute *thisatt = TupleDescCompactAttr(tupleDesc, attnum);
+		natts = HeapTupleHeaderGetNatts(tup);
+		tp = (char *) tup + MAXALIGN(offsetof(HeapTupleHeaderData, t_bits) +
+									 BITMAPLEN(natts));
 
-		if (hasnulls && att_isnull(attnum, bp))
+		natts = Min(natts, reqnatts);
+		if (natts > firstNonGuaranteedAttr)
 		{
-			values[attnum] = (Datum) 0;
-			isnull[attnum] = true;
-			if (!slow)
-			{
-				*slowp = true;
-				return attnum + 1;
-			}
-			else
-				continue;
-		}
+			bits8	   *bp = tup->t_bits;
 
-		isnull[attnum] = false;
+			/* Find the first NULL attr */
+			firstNullAttr = first_null_attr(bp, natts);
 
-		/* calculate the offset of this attribute */
-		if (!slow && thisatt->attcacheoff >= 0)
-			*offp = thisatt->attcacheoff;
-		else if (thisatt->attlen == -1)
-		{
 			/*
-			 * We can only cache the offset for a varlena attribute if the
-			 * offset is already suitably aligned, so that there would be no
-			 * pad bytes in any case: then the offset will be valid for either
-			 * an aligned or unaligned value.
+			 * And populate the isnull array for all attributes being fetched
+			 * from the tuple.
 			 */
-			if (!slow && *offp == att_nominal_alignby(*offp, thisatt->attalignby))
-				thisatt->attcacheoff = *offp;
-			else
-			{
-				*offp = att_pointer_alignby(*offp,
-											thisatt->attalignby,
-											-1,
-											tp + *offp);
+			populate_isnull_array(bp, natts, isnull);
 
-				if (!slow)
-					slownext = true;
-			}
+			/* We can only use any cached offsets until the first NULL attr */
+			firstNonCacheOffsetAttr = Min(firstNonCacheOffsetAttr, firstNullAttr);
 		}
 		else
 		{
-			/* not varlena, so safe to use att_nominal_alignby */
-			*offp = att_nominal_alignby(*offp, thisatt->attalignby);
+			/* Otherwise all required columns are guaranteed to exist */
+			firstNullAttr = natts;
+		}
+	}
+	else
+	{
+		tp = (char *) tup + MAXALIGN(offsetof(HeapTupleHeaderData, t_bits));
 
-			if (!slow)
-				thisatt->attcacheoff = *offp;
+		/*
+		 * We only need to look at the tuple's natts if we need more than the
+		 * guaranteed number of columns
+		 */
+		if (reqnatts > firstNonGuaranteedAttr)
+			natts = Min(HeapTupleHeaderGetNatts(tup), reqnatts);
+		else
+		{
+			/* No need to access the number of attributes in the tuple */
+			natts = reqnatts;
 		}
 
-		values[attnum] = fetchatt(thisatt, tp + *offp);
+		/* All attrs can be fetched without checking for NULLs */
+		firstNullAttr = natts;
+	}
+
+	attnum = slot->tts_nvalid;
+	values = slot->tts_values;
+	slot->tts_nvalid = reqnatts;
 
-		*offp = att_addlength_pointer(*offp, thisatt->attlen, tp + *offp);
+	/* Ensure we calculated tp correctly */
+	Assert(tp == (char *) tup + tup->t_hoff);
 
-		/* check if we need to switch to slow mode */
-		if (!slow)
+	if (attnum < firstNonGuaranteedAttr)
+	{
+		do
 		{
+			int			attlen;
+
+			isnull[attnum] = false;
+			cattr = TupleDescCompactAttr(tupleDesc, attnum);
+			attlen = cattr->attlen;
+
+			/* We don't expect any non-byval types */
+			pg_assume(attlen > 0);
+
 			/*
-			 * We're unable to deform any further if the above code set
-			 * 'slownext', or if this isn't a fixed-width attribute.
+			 * Technically we could support non-byval fixed-width types, but
+			 * not doing so allows us to pass true to fetch_att_noerr() which
+			 * eliminates the !attbyval branch.
 			 */
-			if (slownext || thisatt->attlen <= 0)
-			{
-				*slowp = true;
-				return attnum + 1;
-			}
-		}
-	}
+			Assert(cattr->attbyval == true);
 
-	return natts;
-}
-
-/*
- * slot_deform_heap_tuple
- *		Given a TupleTableSlot, extract data from the slot's physical tuple
- *		into its Datum/isnull arrays.  Data is extracted up through the
- *		reqnatts'th column.  If there are insufficient attributes in the given
- *		tuple, then slot_getmissingattrs() is called to populate the
- *		remainder.  If reqnatts is above the number of attributes in the
- *		slot's TupleDesc, an error is raised.
- *
- *		This is essentially an incremental version of heap_deform_tuple:
- *		on each call we extract attributes up to the one needed, without
- *		re-computing information about previously extracted attributes.
- *		slot->tts_nvalid is the number of attributes already extracted.
- *
- * This is marked as always inline, so the different offp for different types
- * of slots gets optimized away.
- */
-static pg_attribute_always_inline void
-slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
-					   int reqnatts)
-{
-	bool		hasnulls = HeapTupleHasNulls(tuple);
-	int			attnum;
-	uint32		off;			/* offset in tuple data */
-	bool		slow;			/* can we use/set attcacheoff? */
+			off = cattr->attcacheoff;
+			values[attnum] = fetch_att_noerr(tp + off, true, attlen);
+			attnum++;
+		} while (attnum < firstNonGuaranteedAttr);
 
-	/* We can only fetch as many attributes as the tuple has. */
-	natts = Min(HeapTupleHeaderGetNatts(tuple->t_data), reqnatts);
+		off += cattr->attlen;
 
-	/*
-	 * Check whether the first call for this tuple, and initialize or restore
-	 * loop state.
-	 */
-	attnum = slot->tts_nvalid;
-	slot->tts_nvalid = reqnatts;
-	if (attnum == 0)
-	{
-		/* Start from the first attribute */
-		off = 0;
-		slow = false;
+		if (attnum == reqnatts)
+			goto done;
 	}
 	else
 	{
 		/* Restore state from previous execution */
 		off = *offp;
-		slow = TTS_SLOW(slot);
+
+		/* We expect *offp to be set to 0 when attnum == 0 */
+		Assert(off == 0 || attnum > 0);
 	}
 
+	/* We can only fetch as many attributes as the tuple has. */
+	firstNonCacheOffsetAttr = Min(firstNonCacheOffsetAttr, natts);
+
 	/*
-	 * If 'slow' isn't set, try deforming using deforming code that does not
-	 * contain any of the extra checks required for non-fixed offset
-	 * deforming.  During deforming, if or when we find a NULL or a variable
-	 * length attribute, we'll switch to a deforming method which includes the
-	 * extra code required for non-fixed offset deforming, a.k.a slow mode.
-	 * Because this is performance critical, we inline
-	 * slot_deform_heap_tuple_internal passing the 'slow' and 'hasnull'
-	 * parameters as constants to allow the compiler to emit specialized code
-	 * with the known-const false comparisons and subsequent branches removed.
+	 * Handle the portion of the tuple that we have cached the offset for up
+	 * to the first NULL attribute.  The offset is effectively fixed for
+	 * these, so we can use the CompactAttribute's attcacheoff.
 	 */
-	if (!slow)
+	if (attnum < firstNonCacheOffsetAttr)
 	{
-		/* Tuple without any NULLs? We can skip doing any NULL checking */
-		if (!hasnulls)
-			attnum = slot_deform_heap_tuple_internal(slot,
-													 tuple,
-													 attnum,
-													 natts,
-													 false, /* slow */
-													 false, /* hasnulls */
-													 &off,
-													 &slow);
-		else
-			attnum = slot_deform_heap_tuple_internal(slot,
-													 tuple,
-													 attnum,
-													 natts,
-													 false, /* slow */
-													 true,	/* hasnulls */
-													 &off,
-													 &slow);
+		do
+		{
+			isnull[attnum] = false;
+			cattr = TupleDescCompactAttr(tupleDesc, attnum);
+
+			off = cattr->attcacheoff;
+			values[attnum] = fetch_att_noerr(tp + off,
+											 cattr->attbyval,
+											 cattr->attlen);
+		} while (++attnum < firstNonCacheOffsetAttr);
+
+		/*
+		 * Point the offset after the end of the last attribute with a cached
+		 * offset.  We expect the final cached offset attribute to have a
+		 * fixed width, so just add the attlen to the attcacheoff
+		 */
+		Assert(cattr->attlen > 0);
+		off += cattr->attlen;
 	}
 
-	/* If there's still work to do then we must be in slow mode */
-	if (attnum < natts)
+	/*
+	 * Handle any portion of the tuple that doesn't have a fixed offset up
+	 * until the first NULL attribute.  This loop only differs from the one
+	 * after it by the NULL checks.
+	 */
+	for (; attnum < firstNullAttr; attnum++)
 	{
-		/* XXX is it worth adding a separate call when hasnulls is false? */
-		attnum = slot_deform_heap_tuple_internal(slot,
-												 tuple,
-												 attnum,
-												 natts,
-												 true,	/* slow */
-												 hasnulls,
-												 &off,
-												 &slow);
+		int			attlen;
+
+		isnull[attnum] = false;
+		cattr = TupleDescCompactAttr(tupleDesc, attnum);
+		attlen = cattr->attlen;
+
+		/*
+		 * cstrings don't exist in heap tuples.  Use pg_assume to instruct the
+		 * compiler not to emit the cstring-related code in
+		 * align_fetch_then_add().
+		 */
+		pg_assume(attlen > 0 || attlen == -1);
+
+		/* align 'off', fetch the datum, and increment off beyond the datum */
+		values[attnum] = align_fetch_then_add(tp,
+											  &off,
+											  cattr->attbyval,
+											  attlen,
+											  cattr->attalignby);
 	}
 
 	/*
-	 * Save state for next execution
+	 * Now handle any remaining attributes in the tuple up to the requested
+	 * attnum.  This time, include NULL checks as we're now at the first NULL
+	 * attribute.
 	 */
-	*offp = off;
-	if (slow)
-		slot->tts_flags |= TTS_FLAG_SLOW;
-	else
-		slot->tts_flags &= ~TTS_FLAG_SLOW;
+	for (; attnum < natts; attnum++)
+	{
+		int			attlen;
+
+		if (isnull[attnum])
+		{
+			values[attnum] = (Datum) 0;
+			continue;
+		}
+
+		cattr = TupleDescCompactAttr(tupleDesc, attnum);
+		attlen = cattr->attlen;
 
-	/* Fetch any missing attrs and raise an error if reqnatts is invalid. */
+		/* As above, we don't expect cstrings */
+		pg_assume(attlen > 0 || attlen == -1);
+
+		/* align 'off', fetch the datum, and increment off beyond the datum */
+		values[attnum] = align_fetch_then_add(tp,
+											  &off,
+											  cattr->attbyval,
+											  attlen,
+											  cattr->attalignby);
+	}
+
+	/* Fetch any missing attrs and raise an error if reqnatts is invalid */
 	if (unlikely(attnum < reqnatts))
+	{
+		*offp = off;
 		slot_getmissingattrs(slot, attnum, reqnatts);
+		return;
+	}
+done:
+
+	/* Save current offset for next execution */
+	*offp = off;
 }
 
 const TupleTableSlotOps TTSOpsVirtual = {
@@ -1341,10 +1370,17 @@ MakeTupleTableSlot(TupleDesc tupleDesc,
 		slot->tts_values = (Datum *)
 			(((char *) slot)
 			 + MAXALIGN(basesz));
+
+		/*
+		 * We round the size of tts_isnull up to the next highest multiple of
+		 * 8.  This is needed as populate_isnull_array() operates on 8
+		 * elements at a time when converting a tuple's NULL bitmap into a
+		 * boolean array.
+		 */
 		slot->tts_isnull = (bool *)
 			(((char *) slot)
 			 + MAXALIGN(basesz)
-			 + MAXALIGN(tupleDesc->natts * sizeof(Datum)));
+			 + TYPEALIGN(8, tupleDesc->natts * sizeof(Datum)));
 
 		PinTupleDesc(tupleDesc);
 	}
@@ -1514,8 +1550,14 @@ ExecSetSlotDescriptor(TupleTableSlot *slot, /* slot to change */
 	 */
 	slot->tts_values = (Datum *)
 		MemoryContextAlloc(slot->tts_mcxt, tupdesc->natts * sizeof(Datum));
+
+	/*
+	 * We round the size of tts_isnull up to the next highest multiple of 8.
+	 * This is needed as populate_isnull_array() operates on 8 elements at a
+	 * time when converting a tuple's NULL bitmap into a boolean array.
+	 */
 	slot->tts_isnull = (bool *)
-		MemoryContextAlloc(slot->tts_mcxt, tupdesc->natts * sizeof(bool));
+		MemoryContextAlloc(slot->tts_mcxt, TYPEALIGN(8, tupdesc->natts * sizeof(bool)));
 }
 
 /* --------------------------------
@@ -2260,10 +2302,16 @@ ExecTypeSetColNames(TupleDesc typeInfo, List *namesList)
  * This happens "for free" if the tupdesc came from a relcache entry, but
  * not if we have manufactured a tupdesc for a transient RECORD datatype.
  * In that case we have to notify typcache.c of the existence of the type.
+ *
+ * TupleDescFinalize() must be called on the TupleDesc before calling this
+ * function.
  */
 TupleDesc
 BlessTupleDesc(TupleDesc tupdesc)
 {
+	/* Did someone forget to call TupleDescFinalize()? */
+	Assert(tupdesc->firstNonCachedOffsetAttr >= 0);
+
 	if (tupdesc->tdtypeid == RECORDOID &&
 		tupdesc->tdtypmod < 0)
 		assign_record_type_typmod(tupdesc);
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index c68c26cbf38..b17c4e721b3 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -383,6 +383,9 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 						  RelationGetDescr(currentRelation),
 						  table_slot_callbacks(currentRelation));
 
+	scanstate->ss.ss_ScanTupleSlot->tts_flags |=
+		TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS;
+
 	/*
 	 * Initialize result type and projection.
 	 */
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index c2d09374517..506fdf446d2 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -569,6 +569,9 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
 	ExecInitScanTupleSlot(estate, &indexstate->ss, tupDesc,
 						  &TTSOpsVirtual);
 
+	indexstate->ss.ss_ScanTupleSlot->tts_flags |=
+		TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS;
+
 	/*
 	 * We need another slot, in a format that's suitable for the table AM, for
 	 * when we need to fetch a tuple from the table for rechecking visibility.
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c
index a616abff04c..c77746ab9f5 100644
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -940,6 +940,9 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
 						  RelationGetDescr(currentRelation),
 						  table_slot_callbacks(currentRelation));
 
+	indexstate->ss.ss_ScanTupleSlot->tts_flags |=
+		TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS;
+
 	/*
 	 * Initialize result type and projection.
 	 */
diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c
index 1b0af70fd7a..d29ef2872f7 100644
--- a/src/backend/executor/nodeSamplescan.c
+++ b/src/backend/executor/nodeSamplescan.c
@@ -130,6 +130,9 @@ ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
 						  RelationGetDescr(scanstate->ss.ss_currentRelation),
 						  table_slot_callbacks(scanstate->ss.ss_currentRelation));
 
+	scanstate->ss.ss_ScanTupleSlot->tts_flags |=
+		TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS;
+
 	/*
 	 * Initialize result type and projection.
 	 */
diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c
index af3c788ce8b..3ff2a2843eb 100644
--- a/src/backend/executor/nodeSeqscan.c
+++ b/src/backend/executor/nodeSeqscan.c
@@ -246,6 +246,9 @@ ExecInitSeqScan(SeqScan *node, EState *estate, int eflags)
 						  RelationGetDescr(scanstate->ss.ss_currentRelation),
 						  table_slot_callbacks(scanstate->ss.ss_currentRelation));
 
+	scanstate->ss.ss_ScanTupleSlot->tts_flags |=
+		TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS;
+
 	/*
 	 * Initialize result type and projection.
 	 */
diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c
index 503817da65b..2ece0255e7d 100644
--- a/src/backend/executor/nodeTidrangescan.c
+++ b/src/backend/executor/nodeTidrangescan.c
@@ -396,6 +396,9 @@ ExecInitTidRangeScan(TidRangeScan *node, EState *estate, int eflags)
 						  RelationGetDescr(currentRelation),
 						  table_slot_callbacks(currentRelation));
 
+	tidrangestate->ss.ss_ScanTupleSlot->tts_flags |=
+		TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS;
+
 	/*
 	 * Initialize result type and projection.
 	 */
diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c
index 4eddb0828b5..484e3306e0b 100644
--- a/src/backend/executor/nodeTidscan.c
+++ b/src/backend/executor/nodeTidscan.c
@@ -538,6 +538,9 @@ ExecInitTidScan(TidScan *node, EState *estate, int eflags)
 						  RelationGetDescr(currentRelation),
 						  table_slot_callbacks(currentRelation));
 
+	tidstate->ss.ss_ScanTupleSlot->tts_flags |=
+		TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS;
+
 	/*
 	 * Initialize result type and projection.
 	 */
diff --git a/src/backend/jit/llvm/llvmjit_deform.c b/src/backend/jit/llvm/llvmjit_deform.c
index 3eb087eb56b..12521e3e46a 100644
--- a/src/backend/jit/llvm/llvmjit_deform.c
+++ b/src/backend/jit/llvm/llvmjit_deform.c
@@ -62,7 +62,6 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc,
 	LLVMValueRef v_tts_values;
 	LLVMValueRef v_tts_nulls;
 	LLVMValueRef v_slotoffp;
-	LLVMValueRef v_flagsp;
 	LLVMValueRef v_nvalidp;
 	LLVMValueRef v_nvalid;
 	LLVMValueRef v_maxatt;
@@ -178,7 +177,6 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc,
 	v_tts_nulls =
 		l_load_struct_gep(b, StructTupleTableSlot, v_slot, FIELDNO_TUPLETABLESLOT_ISNULL,
 						  "tts_ISNULL");
-	v_flagsp = l_struct_gep(b, StructTupleTableSlot, v_slot, FIELDNO_TUPLETABLESLOT_FLAGS, "");
 	v_nvalidp = l_struct_gep(b, StructTupleTableSlot, v_slot, FIELDNO_TUPLETABLESLOT_NVALID, "");
 
 	if (ops == &TTSOpsHeapTuple || ops == &TTSOpsBufferHeapTuple)
@@ -747,14 +745,10 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc,
 
 	{
 		LLVMValueRef v_off = l_load(b, TypeSizeT, v_offp, "");
-		LLVMValueRef v_flags;
 
 		LLVMBuildStore(b, l_int16_const(lc, natts), v_nvalidp);
 		v_off = LLVMBuildTrunc(b, v_off, LLVMInt32TypeInContext(lc), "");
 		LLVMBuildStore(b, v_off, v_slotoffp);
-		v_flags = l_load(b, LLVMInt16TypeInContext(lc), v_flagsp, "tts_flags");
-		v_flags = LLVMBuildOr(b, v_flags, l_int16_const(lc, TTS_FLAG_SLOW), "");
-		LLVMBuildStore(b, v_flags, v_flagsp);
 		LLVMBuildRetVoid(b);
 	}
 
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 770edb34e08..998be24ac41 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -666,14 +666,6 @@ RelationBuildTupleDesc(Relation relation)
 		elog(ERROR, "pg_attribute catalog is missing %d attribute(s) for relation OID %u",
 			 need, RelationGetRelid(relation));
 
-	/*
-	 * We can easily set the attcacheoff value for the first attribute: it
-	 * must be zero.  This eliminates the need for special cases for attnum=1
-	 * that used to exist in fastgetattr() and index_getattr().
-	 */
-	if (RelationGetNumberOfAttributes(relation) > 0)
-		TupleDescCompactAttr(relation->rd_att, 0)->attcacheoff = 0;
-
 	/*
 	 * Set up constraint/default info
 	 */
@@ -1985,8 +1977,6 @@ formrdesc(const char *relationName, Oid relationReltype,
 		populate_compact_attribute(relation->rd_att, i);
 	}
 
-	/* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
-	TupleDescCompactAttr(relation->rd_att, 0)->attcacheoff = 0;
 	TupleDescFinalize(relation->rd_att);
 
 	/* mark not-null status */
@@ -4446,8 +4436,6 @@ BuildHardcodedDescriptor(int natts, const FormData_pg_attribute *attrs)
 		populate_compact_attribute(result, i);
 	}
 
-	/* initialize first attribute's attcacheoff, cf RelationBuildTupleDesc */
-	TupleDescCompactAttr(result, 0)->attcacheoff = 0;
 	TupleDescFinalize(result);
 
 	/* Note: we don't bother to set up a TupleConstr entry */
diff --git a/src/include/access/tupdesc.h b/src/include/access/tupdesc.h
index 595413dbbc5..ad7bc013812 100644
--- a/src/include/access/tupdesc.h
+++ b/src/include/access/tupdesc.h
@@ -131,6 +131,19 @@ typedef struct CompactAttribute
  * Any code making changes manually to and fields in the FormData_pg_attribute
  * array must subsequently call populate_compact_attribute() to flush the
  * changes out to the corresponding 'compact_attrs' element.
+ *
+ * firstNonCachedOffsetAttr stores the index into the compact_attrs array for
+ * the first attribute that we don't have a known attcacheoff for.
+ *
+ * firstNonGuaranteedAttr stores the index to into the compact_attrs array for
+ * the first attribute that is either NULLable, missing, or !attbyval.  This
+ * can be used in locations as a guarantee that attributes before this will
+ * always exist in tuples.  The !attbyval part isn't required for this, but
+ * including this allows various tuple deforming routines to forego any checks
+ * for !attbyval.
+ *
+ * Once a TupleDesc has been populated, before it is used for any purpose,
+ * TupleDescFinalize() must be called on it.
  */
 typedef struct TupleDescData
 {
@@ -138,6 +151,11 @@ typedef struct TupleDescData
 	Oid			tdtypeid;		/* composite type ID for tuple type */
 	int32		tdtypmod;		/* typmod for tuple type */
 	int			tdrefcount;		/* reference count, or -1 if not counting */
+	int			firstNonCachedOffsetAttr;	/* index of the first att without
+											 * an attcacheoff */
+	int			firstNonGuaranteedAttr; /* index of the first nullable,
+										 * missing, dropped, or !attbyval
+										 * attribute. */
 	TupleConstr *constr;		/* constraints, or NULL if none */
 	/* compact_attrs[N] is the compact metadata of Attribute Number N+1 */
 	CompactAttribute compact_attrs[FLEXIBLE_ARRAY_MEMBER];
@@ -195,7 +213,6 @@ extern TupleDesc CreateTupleDescTruncatedCopy(TupleDesc tupdesc, int natts);
 
 extern TupleDesc CreateTupleDescCopyConstr(TupleDesc tupdesc);
 
-#define TupleDescFinalize(d) ((void) 0)
 #define TupleDescSize(src) \
 	(offsetof(struct TupleDescData, compact_attrs) + \
 	 (src)->natts * sizeof(CompactAttribute) + \
@@ -206,6 +223,7 @@ extern void TupleDescCopy(TupleDesc dst, TupleDesc src);
 extern void TupleDescCopyEntry(TupleDesc dst, AttrNumber dstAttno,
 							   TupleDesc src, AttrNumber srcAttno);
 
+extern void TupleDescFinalize(TupleDesc tupdesc);
 extern void FreeTupleDesc(TupleDesc tupdesc);
 
 extern void IncrTupleDescRefCount(TupleDesc tupdesc);
diff --git a/src/include/access/tupmacs.h b/src/include/access/tupmacs.h
index d64c18b950b..87dbeb76618 100644
--- a/src/include/access/tupmacs.h
+++ b/src/include/access/tupmacs.h
@@ -15,7 +15,9 @@
 #define TUPMACS_H
 
 #include "catalog/pg_type_d.h"	/* for TYPALIGN macros */
-
+#include "port/pg_bitutils.h"
+#include "port/pg_bswap.h"
+#include "varatt.h"
 
 /*
  * Check a tuple's null bitmap to determine whether the attribute is null.
@@ -28,6 +30,62 @@ att_isnull(int ATT, const bits8 *BITS)
 	return !(BITS[ATT >> 3] & (1 << (ATT & 0x07)));
 }
 
+/*
+ * populate_isnull_array
+ *		Transform a tuple's null bitmap into a boolean array.
+ *
+ * Caller must ensure that the isnull array is sized so it contains
+ * at least as many elements as there are bits in the 'bits' array.
+ * Callers should be aware that isnull is populated 8 elements at a time,
+ * effectively as if natts is rounded up to the next multiple of 8.
+ */
+static inline void
+populate_isnull_array(const bits8 *bits, int natts, bool *isnull)
+{
+	int			nbytes = (natts + 7) >> 3;
+
+	/*
+	 * Multiplying the inverted NULL bitmap byte by this value results in the
+	 * lowest bit in each byte being set the same as each bit of the inverted
+	 * byte.  We perform this as 2 32-bit operations rather than a single
+	 * 64-bit operation as multiplying by the required value to do this in
+	 * 64-bits would result in overflowing a uint64 in some cases.
+	 *
+	 * XXX if we ever require BMI2 (-march=x86-64-v3), then this could be done
+	 * more efficiently on most X86-64 CPUs with the PDEP instruction.  Beware
+	 * that some chips (e.g. AMD's Zen2) are horribly inefficient at PDEP.
+	 */
+#define SPREAD_BITS_MULTIPLIER_32 0x204081U
+
+	for (int i = 0; i < nbytes; i++, isnull += 8)
+	{
+		uint64		isnull_8;
+		bits8		nullbyte = ~bits[i];
+
+		/* Convert the lower 4 bits of NULL bitmap word into a 64 bit int */
+		isnull_8 = (nullbyte & 0xf) * SPREAD_BITS_MULTIPLIER_32;
+
+		/*
+		 * Convert the upper 4 bits of null bitmap word into a 64 bit int,
+		 * shift into the upper 32 bit and bitwise-OR with the result of the
+		 * lower 4 bits.
+		 */
+		isnull_8 |= ((uint64) ((nullbyte >> 4) * SPREAD_BITS_MULTIPLIER_32)) << 32;
+
+		/* Mask out all other bits apart from the lowest bit of each byte. */
+		isnull_8 &= UINT64CONST(0x0101010101010101);
+
+#ifdef WORDS_BIGENDIAN
+
+		/*
+		 * Fix byte order on big-endian machines before copying to the array.
+		 */
+		isnull_8 = pg_bswap64(isnull_8);
+#endif
+		memcpy(isnull, &isnull_8, sizeof(uint64));
+	}
+}
+
 #ifndef FRONTEND
 /*
  * Given an attbyval and an attlen from either a Form_pg_attribute or
@@ -69,6 +127,170 @@ fetch_att(const void *T, bool attbyval, int attlen)
 	else
 		return PointerGetDatum(T);
 }
+
+/*
+ * Same, but no error checking for invalid attlens for byval types.  This
+ * is safe to use when attlen comes from CompactAttribute as we validate the
+ * length when populating that struct.
+ */
+static inline Datum
+fetch_att_noerr(const void *T, bool attbyval, int attlen)
+{
+	if (attbyval)
+	{
+		switch (attlen)
+		{
+			case sizeof(int32):
+				return Int32GetDatum(*((const int32 *) T));
+			case sizeof(int16):
+				return Int16GetDatum(*((const int16 *) T));
+			case sizeof(char):
+				return CharGetDatum(*((const char *) T));
+			default:
+				Assert(attlen == sizeof(int64));
+				return Int64GetDatum(*((const int64 *) T));
+		}
+	}
+	else
+		return PointerGetDatum(T);
+}
+
+
+/*
+ * align_fetch_then_add
+ *		Applies all the functionality of att_pointer_alignby(),
+ *		fetch_att_noerr() and att_addlength_pointer(), resulting in the *off
+ *		pointer to the perhaps unaligned number of bytes into 'tupptr', ready
+ *		to deform the next attribute.
+ *
+ * tupptr: pointer to the beginning of the tuple, after the header and any
+ * NULL bitmask.
+ * off: offset in bytes for reading tuple data, possibly unaligned.
+ * attbyval, attlen and attalignby are values from CompactAttribute.
+ */
+static inline Datum
+align_fetch_then_add(const char *tupptr, uint32 *off, bool attbyval, int attlen,
+					 uint8 attalignby)
+{
+	Datum		res;
+
+	if (attlen > 0)
+	{
+		const char *offset_ptr;
+
+		*off = TYPEALIGN(attalignby, *off);
+		offset_ptr = tupptr + *off;
+		*off += attlen;
+		if (attbyval)
+		{
+			switch (attlen)
+			{
+				case sizeof(char):
+					return CharGetDatum(*((const char *) offset_ptr));
+				case sizeof(int16):
+					return Int16GetDatum(*((const int16 *) offset_ptr));
+				case sizeof(int32):
+					return Int32GetDatum(*((const int32 *) offset_ptr));
+				default:
+
+					/*
+					 * populate_compact_attribute_internal() should have
+					 * checked
+					 */
+					Assert(attlen == sizeof(int64));
+					return Int64GetDatum(*((const int64 *) offset_ptr));
+			}
+		}
+		return PointerGetDatum(offset_ptr);
+	}
+	else if (attlen == -1)
+	{
+		if (!VARATT_IS_SHORT(tupptr + *off))
+			*off = TYPEALIGN(attalignby, *off);
+
+		res = PointerGetDatum(tupptr + *off);
+		*off += VARSIZE_ANY(DatumGetPointer(res));
+		return res;
+	}
+	else
+	{
+		Assert(attlen == -2);
+		*off = TYPEALIGN(attalignby, *off);
+		res = PointerGetDatum(tupptr + *off);
+		*off += strlen(tupptr + *off) + 1;
+		return res;
+	}
+}
+
+/*
+ * first_null_attr
+ *		Inspect a NULL bitmap from a tuple and return the 0-based attnum of the
+ *		first NULL attribute.  Returns natts if no NULLs were found.
+ *
+ * This is coded to expect that 'bits' contains at least one 0 bit somewhere
+ * in the array, but not necessarily < natts.  Note that natts may be passed
+ * as a value lower than the number of bits physically stored in the tuple's
+ * NULL bitmap, in which case we may not find a NULL and return natts.
+ *
+ * The reason we require at least one 0 bit somewhere in the NULL bitmap is
+ * that the for loop that checks 0xFF bytes would loop to the last byte in
+ * the array if all bytes were 0xFF, and the subsequent code that finds the
+ * right-most 0 bit would access the first byte beyond the bitmap.  Provided
+ * we find a 0 bit before then, that won't happen.  Since tuples which have no
+ * NULLs don't have a NULL bitmap, this function won't get called for that
+ * case.
+ */
+static inline int
+first_null_attr(const bits8 *bits, int natts)
+{
+	int			nattByte = natts >> 3;
+	int			bytenum;
+	int			res;
+
+#ifdef USE_ASSERT_CHECKING
+	int			firstnull_check = natts;
+
+	/* Do it the slow way and check we get the same answer. */
+	for (int i = 0; i < natts; i++)
+	{
+		if (att_isnull(i, bits))
+		{
+			firstnull_check = i;
+			break;
+		}
+	}
+#endif
+
+	/* Process all bytes up to just before the byte for the natts attribute */
+	for (bytenum = 0; bytenum < nattByte; bytenum++)
+	{
+		/* break if there's any NULL attrs (a 0 bit) */
+		if (bits[bytenum] != 0xFF)
+			break;
+	}
+
+	/*
+	 * Look for the highest 0-bit in the 'bytenum' element.  To do this, we
+	 * promote the uint8 to uint32 before performing the bitwise NOT and
+	 * looking for the first 1-bit.  This works even when the byte is 0xFF, as
+	 * the bitwise NOT of 0xFF in 32 bits is 0xFFFFFF00, in which case
+	 * pg_rightmost_one_pos32() will return 8.  We may end up with a value
+	 * higher than natts here, but we'll fix that with the Min() below.
+	 */
+	res = bytenum << 3;
+	res += pg_rightmost_one_pos32(~((uint32) bits[bytenum]));
+
+	/*
+	 * Since we did no masking to mask out bits beyond the natt'th bit, we may
+	 * have found a bit higher than natts, so we must cap res to natts
+	 */
+	res = Min(res, natts);
+
+	/* Ensure we got the same answer as the att_isnull() loop got */
+	Assert(res == firstnull_check);
+
+	return res;
+}
 #endif							/* FRONTEND */
 
 /*
diff --git a/src/include/executor/tuptable.h b/src/include/executor/tuptable.h
index 3b09abbf99f..ff4572a29ae 100644
--- a/src/include/executor/tuptable.h
+++ b/src/include/executor/tuptable.h
@@ -84,9 +84,6 @@
  * tts_values/tts_isnull are allocated either when the slot is created (when
  * the descriptor is provided), or when a descriptor is assigned to the slot;
  * they are of length equal to the descriptor's natts.
- *
- * The TTS_FLAG_SLOW flag is saved state for
- * slot_deform_heap_tuple, and should not be touched by any other code.
  *----------
  */
 
@@ -98,9 +95,13 @@
 #define			TTS_FLAG_SHOULDFREE		(1 << 2)
 #define TTS_SHOULDFREE(slot) (((slot)->tts_flags & TTS_FLAG_SHOULDFREE) != 0)
 
-/* saved state for slot_deform_heap_tuple */
-#define			TTS_FLAG_SLOW		(1 << 3)
-#define TTS_SLOW(slot) (((slot)->tts_flags & TTS_FLAG_SLOW) != 0)
+/*
+ * true = slot's formed tuple guaranteed to not have NULLs in NOT NULLable
+ * columns.
+ */
+#define			TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS		(1 << 3)
+#define TTS_OBEYS_NOT_NULL_CONSTRAINTS(slot) \
+	(((slot)->tts_flags & TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS) != 0)
 
 /* fixed tuple descriptor */
 #define			TTS_FLAG_FIXED		(1 << 4)
@@ -123,7 +124,9 @@ typedef struct TupleTableSlot
 #define FIELDNO_TUPLETABLESLOT_VALUES 5
 	Datum	   *tts_values;		/* current per-attribute values */
 #define FIELDNO_TUPLETABLESLOT_ISNULL 6
-	bool	   *tts_isnull;		/* current per-attribute isnull flags */
+	bool	   *tts_isnull;		/* current per-attribute isnull flags.  Array
+								 * size must always be rounded up to the next
+								 * multiple of 8 elements. */
 	MemoryContext tts_mcxt;		/* slot itself is in this context */
 	ItemPointerData tts_tid;	/* stored tuple's tid */
 	Oid			tts_tableOid;	/* table oid of tuple */
diff --git a/src/test/modules/deform_bench/deform_bench.c b/src/test/modules/deform_bench/deform_bench.c
index 7838f639bef..de39fecf8fd 100644
--- a/src/test/modules/deform_bench/deform_bench.c
+++ b/src/test/modules/deform_bench/deform_bench.c
@@ -49,6 +49,7 @@ deform_bench(PG_FUNCTION_ARGS)
 
 	tupdesc = RelationGetDescr(rel);
 	slot = MakeTupleTableSlot(tupdesc, &TTSOpsBufferHeapTuple);
+	slot->tts_flags |= TTS_FLAG_OBEYS_NOT_NULL_CONSTRAINTS;
 	scan = table_beginscan_strat(rel, GetActiveSnapshot(), 0, NULL, true, false);
 
 	/*
-- 
2.51.0



  [application/vnd.openxmlformats-officedocument.spreadsheetml.sheet] Deform_bench_test_module_results_v11.xlsx (36.7K, 8-Deform_bench_test_module_results_v11.xlsx)
  download

view thread (30+ messages)  latest in thread

reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Reply to all the recipients using the --to and --cc options:
  reply via email

  To: [email protected]
  Cc: [email protected], [email protected], [email protected], [email protected], [email protected]
  Subject: Re: More speedups for tuple deformation
  In-Reply-To: <CAApHDvq21qQigiM6z2YgadFusQC_pfEYP8D=oQCrwJ_kKzcqDg@mail.gmail.com>

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox