From 0087b33bb80b67ae4f7d4901b047da5e2c913d72 Mon Sep 17 00:00:00 2001 From: Filip Janus Date: Sun, 1 Dec 2024 17:43:27 +0100 Subject: [PATCH 2/3] This commit enhance temporary file compression It implements just one working buffer for compression and decompression to avoid memory wasting. The buffer is allocated in the top memory context. Also, it adds pglz support and enhances the code structure to be able to add other compression method simply. --- src/backend/executor/nodeHashjoin.c | 2 +- src/backend/storage/file/buffile.c | 111 +++++++++++++++++++--------- src/backend/utils/misc/guc_tables.c | 1 + src/include/storage/buffile.h | 4 +- 4 files changed, 80 insertions(+), 38 deletions(-) diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 1b5c6448ef..32cdb63ff8 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -1434,7 +1434,7 @@ ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue, { MemoryContext oldctx = MemoryContextSwitchTo(hashtable->spillCxt); - file = BufFileCreateTemp(false, true); + file = BufFileCreateCompressTemp(false); *fileptr = file; MemoryContextSwitchTo(oldctx); diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c index 818ef39d5c..2b270211cc 100644 --- a/src/backend/storage/file/buffile.c +++ b/src/backend/storage/file/buffile.c @@ -53,7 +53,9 @@ #include "storage/bufmgr.h" #include "storage/fd.h" #include "utils/resowner.h" +#include "utils/memutils.h" +#include "common/pg_lzcompress.h" #ifdef USE_LZ4 #include #endif @@ -108,6 +110,7 @@ struct BufFile int pos; /* next read/write position in buffer */ int nbytes; /* total # of valid bytes in buffer */ bool compress; /* State of usege file compression */ + char *cBuffer; /* * XXX Should ideally us PGIOAlignedBlock, but might need a way to avoid * wasting per-file alignment padding when some users create many files. @@ -140,6 +143,7 @@ makeBufFileCommon(int nfiles) file->pos = 0; file->nbytes = 0; file->compress = false; + file->cBuffer = NULL; return file; } @@ -235,16 +239,45 @@ BufFileCreateTemp(bool interXact, bool compress) if (temp_file_compression != TEMP_NONE_COMPRESSION) { -#ifdef USE_LZ4 file->compress = compress; -#else - NO_LZ4_SUPPORT(); -#endif } return file; + } +/* + * Wrapper for BuffileCreateTemp + * We want to limit the number of memory allocations for the compression buffer, + * only one buffer for all compression operations is enough + */ +BufFile * +BufFileCreateCompressTemp(bool interXact){ + static char * buff = NULL; + BufFile *tmpBufFile = BufFileCreateTemp(interXact, true); + + if (buff == NULL && temp_file_compression != TEMP_NONE_COMPRESSION) + { + int size = 0; + switch (temp_file_compression) + { + case TEMP_LZ4_COMPRESSION: +#ifdef USE_LZ4 + size = LZ4_compressBound(BLCKSZ)+sizeof(int); +#endif + break; + case TEMP_PGLZ_COMPRESSION: + size = pglz_maximum_compressed_size(BLCKSZ, BLCKSZ)+sizeof(int); + break; + } + /* + * Persistent buffer for all temporary file compressions + */ + buff = MemoryContextAlloc(TopMemoryContext, size); + } + tmpBufFile->cBuffer = buff; + return tmpBufFile; +} /* * Build the name for a given segment of a given BufFile. */ @@ -516,12 +549,10 @@ BufFileLoadBuffer(BufFile *file) /* if not EOF let's continue */ if (nread > 0) { - /* - * A long life buffer would make sence to limit number of - * memory allocations - */ - char * buff; + /* A long life buffer limits number of memory allocations */ + char * buff = file->cBuffer; + Assert(file->cBuffer != NULL); /* * Read compressed data, curOffset differs with pos * It reads less data than it returns to caller @@ -529,25 +560,32 @@ BufFileLoadBuffer(BufFile *file) */ file->curOffset+=sizeof(nbytes); - buff = palloc(nbytes); - nread = FileRead(thisfile, buff, nbytes, file->curOffset, WAIT_EVENT_BUFFILE_READ); + switch (temp_file_compression) + { + case TEMP_LZ4_COMPRESSION: #ifdef USE_LZ4 - file->nbytes = LZ4_decompress_safe(buff, - file->buffer.data,nbytes,sizeof(file->buffer)); - file->curOffset += nread; + file->nbytes = LZ4_decompress_safe(buff, + file->buffer.data,nbytes,sizeof(file->buffer)); #endif + break; + + case TEMP_PGLZ_COMPRESSION: + file->nbytes = pglz_decompress(buff,nbytes, + file->buffer.data,sizeof(file->buffer),false); + break; + } + file->curOffset += nread; if (file->nbytes < 0) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg_internal("compressed lz4 data is corrupt"))); - pfree(buff); } } @@ -591,8 +629,6 @@ BufFileDumpBuffer(BufFile *file) /* Save nbytes value because the size changes due to compression */ int nbytesOriginal = file->nbytes; - bool compression = false; - char * DataToWrite; DataToWrite = file->buffer.data; @@ -604,26 +640,33 @@ BufFileDumpBuffer(BufFile *file) if (file->compress) { - int cBufferSize = 0; char * cData; int cSize = 0; + + Assert(file->cBuffer != NULL); + cData = file->cBuffer; + + switch (temp_file_compression) + { + case TEMP_LZ4_COMPRESSION: + { #ifdef USE_LZ4 - cBufferSize = LZ4_compressBound(file->nbytes); -#endif - /* - * A long life buffer would make sence to limit number of - * memory allocations - */ - compression = true; - cData = palloc(cBufferSize + sizeof(int)); -#ifdef USE_LZ4 - /* - * Using stream compression would lead to the slight improvement in - * compression ratio - */ - cSize = LZ4_compress_default(file->buffer.data, - cData + sizeof(int),file->nbytes, cBufferSize); + int cBufferSize = LZ4_compressBound(file->nbytes); + /* + * Using stream compression would lead to the slight improvement in + * compression ratio + */ + cSize = LZ4_compress_default(file->buffer.data, + cData + sizeof(int),file->nbytes, cBufferSize); #endif + break; + } + case TEMP_PGLZ_COMPRESSION: + cSize = pglz_compress(file->buffer.data,file->nbytes, + cData + sizeof(int),PGLZ_strategy_always); + break; + } + /* Write size of compressed block in front of compressed data * It's used to determine amount of data to read within @@ -727,8 +770,6 @@ BufFileDumpBuffer(BufFile *file) file->pos = 0; file->nbytes = 0; - if (compression) - pfree(DataToWrite); } /* diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 3821caf763..e4a98d1198 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -461,6 +461,7 @@ static const struct config_enum_entry default_toast_compression_options[] = { */ static const struct config_enum_entry temp_file_compression_options[] = { {"no", TEMP_NONE_COMPRESSION, false}, + {"pglz", TEMP_PGLZ_COMPRESSION, false}, #ifdef USE_LZ4 {"lz4", TEMP_LZ4_COMPRESSION, false}, #endif diff --git a/src/include/storage/buffile.h b/src/include/storage/buffile.h index 486b552e31..b8ce164e4b 100644 --- a/src/include/storage/buffile.h +++ b/src/include/storage/buffile.h @@ -35,9 +35,8 @@ typedef struct BufFile BufFile; typedef enum { TEMP_NONE_COMPRESSION, -#ifdef USE_LZ4 + TEMP_PGLZ_COMPRESSION, TEMP_LZ4_COMPRESSION -#endif } TempCompression; extern PGDLLIMPORT int temp_file_compression; @@ -47,6 +46,7 @@ extern PGDLLIMPORT int temp_file_compression; * prototypes for functions in buffile.c */ +extern BufFile *BufFileCreateCompressTemp(bool interXact); extern BufFile *BufFileCreateTemp(bool interXact, bool compress); extern void BufFileClose(BufFile *file); extern pg_nodiscard size_t BufFileRead(BufFile *file, void *ptr, size_t size); -- 2.39.5 (Apple Git-154)