From 49939fe3416803d446ba43c404eae9711a3ae21b Mon Sep 17 00:00:00 2001 From: Dharin Shah <8616130+Dharin-shah@users.noreply.github.com> Date: Sun, 28 Dec 2025 23:21:21 +0100 Subject: [PATCH v1] refactor detoast to use decoded external toast abstraction --- src/backend/access/common/detoast.c | 326 ++++++++++++++++++++------- src/include/access/toast_internals.h | 39 ++++ 2 files changed, 278 insertions(+), 87 deletions(-) diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c index 62651787742..fdade574913 100644 --- a/src/backend/access/common/detoast.c +++ b/src/backend/access/common/detoast.c @@ -29,6 +29,70 @@ static struct varlena *toast_fetch_datum_slice(struct varlena *attr, static struct varlena *toast_decompress_datum(struct varlena *attr); static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength); +static struct varlena *toast_fetch_datum_decoded(const DecodedExternalToast *decoded); +static struct varlena *toast_decompress_decoded(const struct varlena *compressed, + const DecodedExternalToast *decoded); + +/* ---------- + * decode_external_toast_pointer - + * + * Decode external varlena into DecodedExternalToast struct. + * Only handles VARTAG_ONDISK with known compression methods. + * Returns false for INDIRECT, EXPANDED, or unrecognized compression. + * ---------- + */ +static bool +decode_external_toast_pointer(const struct varlena *attr, + DecodedExternalToast *decoded) +{ + struct varatt_external toast_pointer; + vartag_external tag; + + Assert(VARATT_IS_EXTERNAL(attr)); + + tag = VARTAG_EXTERNAL(attr); + + if (tag == VARTAG_ONDISK) + { + VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); + + decoded->toastrelid = toast_pointer.va_toastrelid; + decoded->valueid = toast_pointer.va_valueid; + decoded->rawsize = toast_pointer.va_rawsize; + decoded->extsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer); + decoded->flags = TOAST_EXT_IS_ONDISK; + + if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) + { + uint32 cmid = VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer); + + switch (cmid) + { + case TOAST_PGLZ_COMPRESSION_ID: + decoded->method = TOAST_DECOMP_PGLZ; + decoded->flags |= TOAST_EXT_HAS_TCINFO; + break; + case TOAST_LZ4_COMPRESSION_ID: + decoded->method = TOAST_DECOMP_LZ4; + decoded->flags |= TOAST_EXT_HAS_TCINFO; + break; + default: + /* Unknown compression - let caller fall back */ + return false; + } + } + else + { + decoded->method = TOAST_DECOMP_NONE; + } + + return true; + } + + /* Not an on-disk datum (INDIRECT, EXPANDED, etc.) */ + return false; +} + /* ---------- * detoast_external_attr - * @@ -115,57 +179,78 @@ detoast_external_attr(struct varlena *attr) struct varlena * detoast_attr(struct varlena *attr) { - if (VARATT_IS_EXTERNAL_ONDISK(attr)) + if (VARATT_IS_EXTERNAL(attr)) { - /* - * This is an externally stored datum --- fetch it back from there - */ - attr = toast_fetch_datum(attr); - /* If it's compressed, decompress it */ - if (VARATT_IS_COMPRESSED(attr)) + DecodedExternalToast decoded; + + if (decode_external_toast_pointer(attr, &decoded)) { - struct varlena *tmp = attr; + struct varlena *fetched = toast_fetch_datum_decoded(&decoded); + + if (decoded.method != TOAST_DECOMP_NONE) + { + struct varlena *result = toast_decompress_decoded(fetched, &decoded); + pfree(fetched); + return result; + } + return fetched; + } - attr = toast_decompress_datum(tmp); - pfree(tmp); + /* Decode failed: INDIRECT, EXPANDED, or unrecognized compression */ + if (VARATT_IS_EXTERNAL_ONDISK(attr)) + { + /* Unrecognized compression - legacy path preserves error behavior */ + attr = toast_fetch_datum(attr); + if (VARATT_IS_COMPRESSED(attr)) + { + struct varlena *tmp = attr; + + attr = toast_decompress_datum(tmp); + pfree(tmp); + } + return attr; } - } - else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) - { - /* - * This is an indirect pointer --- dereference it - */ - struct varatt_indirect redirect; + else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) + { + /* + * This is an indirect pointer --- dereference it + */ + struct varatt_indirect redirect; - VARATT_EXTERNAL_GET_POINTER(redirect, attr); - attr = (struct varlena *) redirect.pointer; + VARATT_EXTERNAL_GET_POINTER(redirect, attr); + attr = (struct varlena *) redirect.pointer; - /* nested indirect Datums aren't allowed */ - Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr)); + /* nested indirect Datums aren't allowed */ + Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr)); - /* recurse in case value is still extended in some other way */ - attr = detoast_attr(attr); + /* recurse in case value is still extended in some other way */ + attr = detoast_attr(attr); - /* if it isn't, we'd better copy it */ - if (attr == (struct varlena *) redirect.pointer) - { - struct varlena *result; + /* if it isn't, we'd better copy it */ + if (attr == (struct varlena *) redirect.pointer) + { + struct varlena *result; - result = (struct varlena *) palloc(VARSIZE_ANY(attr)); - memcpy(result, attr, VARSIZE_ANY(attr)); - attr = result; + result = (struct varlena *) palloc(VARSIZE_ANY(attr)); + memcpy(result, attr, VARSIZE_ANY(attr)); + attr = result; + } + return attr; + } + else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) + { + /* + * This is an expanded-object pointer --- get flat format + */ + attr = detoast_external_attr(attr); + /* flatteners are not allowed to produce compressed/short output */ + Assert(!VARATT_IS_EXTENDED(attr)); + return attr; } } - else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) - { - /* - * This is an expanded-object pointer --- get flat format - */ - attr = detoast_external_attr(attr); - /* flatteners are not allowed to produce compressed/short output */ - Assert(!VARATT_IS_EXTENDED(attr)); - } - else if (VARATT_IS_COMPRESSED(attr)) + + /* Handle inline cases (not external) */ + if (VARATT_IS_COMPRESSED(attr)) { /* * This is a compressed value inside of the main tuple @@ -223,63 +308,75 @@ detoast_attr_slice(struct varlena *attr, else if (pg_add_s32_overflow(sliceoffset, slicelength, &slicelimit)) slicelength = slicelimit = -1; - if (VARATT_IS_EXTERNAL_ONDISK(attr)) + if (VARATT_IS_EXTERNAL(attr)) { - struct varatt_external toast_pointer; + DecodedExternalToast decoded; - VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); + if (decode_external_toast_pointer(attr, &decoded)) + { + if (decoded.method == TOAST_DECOMP_NONE) + return toast_fetch_datum_slice(attr, sliceoffset, slicelength); + + /* Compressed: fetch enough to decompress the requested prefix */ + if (slicelimit >= 0) + { + int32 max_size = decoded.extsize; + + /* PGLZ supports partial decompression; LZ4 needs all data */ + if (decoded.method == TOAST_DECOMP_PGLZ) + max_size = pglz_maximum_compressed_size(slicelimit, max_size); + + preslice = toast_fetch_datum_slice(attr, 0, max_size); + } + else + preslice = toast_fetch_datum_decoded(&decoded); + } + else if (VARATT_IS_EXTERNAL_ONDISK(attr)) + { + /* Unrecognized compression - legacy path */ + struct varatt_external toast_pointer; - /* fast path for non-compressed external datums */ - if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) - return toast_fetch_datum_slice(attr, sliceoffset, slicelength); + VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); - /* - * For compressed values, we need to fetch enough slices to decompress - * at least the requested part (when a prefix is requested). - * Otherwise, just fetch all slices. - */ - if (slicelimit >= 0) - { - int32 max_size = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer); + if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) + return toast_fetch_datum_slice(attr, sliceoffset, slicelength); - /* - * Determine maximum amount of compressed data needed for a prefix - * of a given length (after decompression). - * - * At least for now, if it's LZ4 data, we'll have to fetch the - * whole thing, because there doesn't seem to be an API call to - * determine how much compressed data we need to be sure of being - * able to decompress the required slice. - */ - if (VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) == - TOAST_PGLZ_COMPRESSION_ID) - max_size = pglz_maximum_compressed_size(slicelimit, max_size); + if (slicelimit >= 0) + { + int32 max_size = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer); - /* - * Fetch enough compressed slices (compressed marker will get set - * automatically). - */ - preslice = toast_fetch_datum_slice(attr, 0, max_size); + if (VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) == + TOAST_PGLZ_COMPRESSION_ID) + max_size = pglz_maximum_compressed_size(slicelimit, max_size); + + preslice = toast_fetch_datum_slice(attr, 0, max_size); + } + else + preslice = toast_fetch_datum(attr); } - else - preslice = toast_fetch_datum(attr); - } - else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) - { - struct varatt_indirect redirect; + else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) + { + struct varatt_indirect redirect; - VARATT_EXTERNAL_GET_POINTER(redirect, attr); + VARATT_EXTERNAL_GET_POINTER(redirect, attr); - /* nested indirect Datums aren't allowed */ - Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer)); + /* nested indirect Datums aren't allowed */ + Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer)); - return detoast_attr_slice(redirect.pointer, - sliceoffset, slicelength); - } - else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) - { - /* pass it off to detoast_external_attr to flatten */ - preslice = detoast_external_attr(attr); + return detoast_attr_slice(redirect.pointer, + sliceoffset, slicelength); + } + else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) + { + /* pass it off to detoast_external_attr to flatten */ + preslice = detoast_external_attr(attr); + } + else + { + /* Should not reach here - unknown external type */ + elog(ERROR, "unexpected external varlena type"); + preslice = attr; /* keep compiler quiet */ + } } else preslice = attr; @@ -462,6 +559,61 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, return result; } +/* ---------- + * toast_fetch_datum_decoded - + * + * Fetch TOAST data using pre-decoded pointer info. + * ---------- + */ +static struct varlena * +toast_fetch_datum_decoded(const DecodedExternalToast *decoded) +{ + Relation toastrel; + struct varlena *result; + int32 attrsize = decoded->extsize; + + result = (struct varlena *) palloc(attrsize + VARHDRSZ); + + /* HAS_TCINFO determines header format, not "is compressed" */ + if (decoded->flags & TOAST_EXT_HAS_TCINFO) + SET_VARSIZE_COMPRESSED(result, attrsize + VARHDRSZ); + else + SET_VARSIZE(result, attrsize + VARHDRSZ); + + if (attrsize == 0) + return result; + + toastrel = table_open(decoded->toastrelid, AccessShareLock); + table_relation_fetch_toast_slice(toastrel, decoded->valueid, + attrsize, 0, attrsize, result); + table_close(toastrel, AccessShareLock); + + return result; +} + +/* ---------- + * toast_decompress_decoded - + * + * Decompress TOAST data using decoded method. + * ---------- + */ +static struct varlena * +toast_decompress_decoded(const struct varlena *compressed, + const DecodedExternalToast *decoded) +{ + switch (decoded->method) + { + case TOAST_DECOMP_PGLZ: + return pglz_decompress_datum(compressed); + case TOAST_DECOMP_LZ4: + return lz4_decompress_datum(compressed); + case TOAST_DECOMP_NONE: + default: + elog(ERROR, "unexpected decompression method %d", decoded->method); + return NULL; + } +} + /* ---------- * toast_decompress_datum - * diff --git a/src/include/access/toast_internals.h b/src/include/access/toast_internals.h index 06ae8583c1e..0245f67b246 100644 --- a/src/include/access/toast_internals.h +++ b/src/include/access/toast_internals.h @@ -16,6 +16,45 @@ #include "storage/lockdefs.h" #include "utils/relcache.h" #include "utils/snapshot.h" +#include "varatt.h" + +/* + * Decompression method for decoded toast pointers. Separate from + * ToastCompressionId (2-bit on-disk encoding) to allow future methods. + */ +typedef enum ToastDecompressMethod +{ + TOAST_DECOMP_NONE = 0, + TOAST_DECOMP_PGLZ = 1, + TOAST_DECOMP_LZ4 = 2 +} ToastDecompressMethod; + +/* + * Flags for DecodedExternalToast. + * + * HAS_TCINFO: Payload starts with tcinfo header. True for PGLZ/LZ4 external; + * false for uncompressed or future methods storing raw compressed data. + * IS_ONDISK: Set for VARTAG_ONDISK (for future vartag extension). + */ +#define TOAST_EXT_HAS_TCINFO 0x01 +#define TOAST_EXT_IS_ONDISK 0x02 + +/* + * Decoded representation of an external on-disk TOAST pointer. + * Normalizes vartag/va_extinfo variations; decode once, use throughout. + * + * HAS_TCINFO indicates payload format (has tcinfo header), distinct from + * "is compressed" (extsize < rawsize) - future methods may omit tcinfo. + */ +typedef struct DecodedExternalToast +{ + Oid toastrelid; + Oid valueid; + uint32 rawsize; /* Decompressed size; for future methods without tcinfo */ + uint32 extsize; /* On-disk payload size */ + ToastDecompressMethod method; + uint8 flags; +} DecodedExternalToast; /* * The information at the start of the compressed toast data. -- 2.39.3 (Apple Git-146)