From 2de9b5bc18bfa169b3ba3507b6bdf79d277c0ad4 Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Fri, 13 Feb 2026 13:36:34 +0300
Subject: [PATCH v7 2/2] Use 4 vectors in CopyReadLineText() SIMD

---
 src/backend/commands/copyfromparse.c | 116 +++++++++++++++++++++------
 1 file changed, 92 insertions(+), 24 deletions(-)

diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 4a127d1af90..caadc40cc8b 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -1361,6 +1361,9 @@ CopyReadLineText(CopyFromState cstate, bool is_csv, bool simd_enabled)
 		 * escape). This is faster than byte-by-byte iteration, especially on
 		 * large buffers.
 		 *
+		 * For better instruction-level parallelism, we try to process four
+		 * vectors at a time.
+		 *
 		 * We do not apply the SIMD fast path in either of the following
 		 * cases: - When the previously processed character was an escape
 		 * character (last_was_esc), since the next byte must be examined
@@ -1373,53 +1376,118 @@ CopyReadLineText(CopyFromState cstate, bool is_csv, bool simd_enabled)
 		 * the rest of the input once we encounter a special character which
 		 * is neither EOF nor EOL.
 		 */
-		if (simd_enabled && !last_was_esc && copy_buf_len - input_buf_ptr > sizeof(Vector8))
+		if (simd_enabled && !last_was_esc && copy_buf_len - input_buf_ptr >= 4 * sizeof(Vector8))
 		{
-			Vector8		chunk;
-			Vector8		match = vector8_broadcast(0);
-			uint32		mask;
-
-			/* Load a chunk of data into a vector register */
-			vector8_load(&chunk, (const uint8 *) &copy_input_buf[input_buf_ptr]);
+			Vector8		chunk1,
+						chunk2,
+						chunk3,
+						chunk4;
+			Vector8		match1,
+						match2,
+						match3,
+						match4;
+			Vector8		tmp1,
+						tmp2,
+						result;
+
+			/* Load four chunks of data into vector registers */
+			vector8_load(&chunk1, (const uint8 *) &copy_input_buf[input_buf_ptr]);
+			vector8_load(&chunk2, (const uint8 *) &copy_input_buf[input_buf_ptr + sizeof(Vector8)]);
+			vector8_load(&chunk3, (const uint8 *) &copy_input_buf[input_buf_ptr + 2 * sizeof(Vector8)]);
+			vector8_load(&chunk4, (const uint8 *) &copy_input_buf[input_buf_ptr + 3 * sizeof(Vector8)]);
 
 			if (is_csv)
 			{
 				/* \n and \r are not special inside quotes */
 				if (!in_quote)
-					match = vector8_or(vector8_eq(chunk, nl), vector8_eq(chunk, cr));
+				{
+					match1 = vector8_or(vector8_eq(chunk1, nl), vector8_eq(chunk1, cr));
+					match2 = vector8_or(vector8_eq(chunk2, nl), vector8_eq(chunk2, cr));
+					match3 = vector8_or(vector8_eq(chunk3, nl), vector8_eq(chunk3, cr));
+					match4 = vector8_or(vector8_eq(chunk4, nl), vector8_eq(chunk4, cr));
+				}
+				else
+				{
+					match1 = vector8_broadcast(0);
+					match2 = vector8_broadcast(0);
+					match3 = vector8_broadcast(0);
+					match4 = vector8_broadcast(0);
+				}
 
-				match = vector8_or(match, vector8_eq(chunk, quote));
+				match1 = vector8_or(match1, vector8_eq(chunk1, quote));
+				match2 = vector8_or(match2, vector8_eq(chunk2, quote));
+				match3 = vector8_or(match3, vector8_eq(chunk3, quote));
+				match4 = vector8_or(match4, vector8_eq(chunk4, quote));
 				if (escapec != '\0')
-					match = vector8_or(match, vector8_eq(chunk, escape));
+				{
+					match1 = vector8_or(match1, vector8_eq(chunk1, escape));
+					match2 = vector8_or(match2, vector8_eq(chunk2, escape));
+					match3 = vector8_or(match3, vector8_eq(chunk3, escape));
+					match4 = vector8_or(match4, vector8_eq(chunk4, escape));
+				}
 			}
 			else
 			{
-				match = vector8_or(vector8_eq(chunk, nl), vector8_eq(chunk, cr));
-				match = vector8_or(match, vector8_eq(chunk, bs));
+				match1 = vector8_or(vector8_eq(chunk1, nl), vector8_eq(chunk1, cr));
+				match2 = vector8_or(vector8_eq(chunk2, nl), vector8_eq(chunk2, cr));
+				match3 = vector8_or(vector8_eq(chunk3, nl), vector8_eq(chunk3, cr));
+				match4 = vector8_or(vector8_eq(chunk4, nl), vector8_eq(chunk4, cr));
+
+				match1 = vector8_or(match1, vector8_eq(chunk1, bs));
+				match2 = vector8_or(match2, vector8_eq(chunk2, bs));
+				match3 = vector8_or(match3, vector8_eq(chunk3, bs));
+				match4 = vector8_or(match4, vector8_eq(chunk4, bs));
 			}
 
-			/* Check if we found any special characters */
-			mask = vector8_highbit_mask(match);
-			if (mask != 0)
+			/* Combine results to check if any chunk has special characters */
+			tmp1 = vector8_or(match1, match2);
+			tmp2 = vector8_or(match3, match4);
+			result = vector8_or(tmp1, tmp2);
+
+			if (vector8_is_highbit_set(result))
 			{
 				/*
-				 * Found a special character. Advance up to that point and let
-				 * the scalar code handle it.
+				 * Found a special character somewhere in the four chunks.
+				 * Identify the first chunk containing it.
 				 */
-				int			advance = pg_rightmost_one_pos32(mask);
+				uint32		mask;
+				int			advance;
 				char		c1,
 							c2;
 				bool		simd_hit_eol,
 							simd_hit_eof;
 
+				mask = vector8_highbit_mask(match1);
+				if (mask == 0)
+				{
+					input_buf_ptr += sizeof(Vector8);
+					mask = vector8_highbit_mask(match2);
+				}
+				if (mask == 0)
+				{
+					input_buf_ptr += sizeof(Vector8);
+					mask = vector8_highbit_mask(match3);
+				}
+				if (mask == 0)
+				{
+					input_buf_ptr += sizeof(Vector8);
+					mask = vector8_highbit_mask(match4);
+				}
+				Assert(mask != 0);
+
+				/*
+				 * Found a special character. Advance up to that point and let
+				 * the scalar code handle it.
+				 */
+				advance = pg_rightmost_one_pos32(mask);
 				input_buf_ptr += advance;
 				c1 = copy_input_buf[input_buf_ptr];
 
 				/*
-				 * Since we stopped within the chunk and ((copy_buf_len -
-				 * input_buf_ptr) > sizeof(Vector8)) is true,
-				 * copy_input_buf[input_buf_ptr + 1] is guaranteed to be
-				 * readable.
+				 * Since we stopped within the block and ((copy_buf_len -
+				 * input_buf_ptr) >= 4 * sizeof(Vector8)) was true at the
+				 * start, copy_input_buf[input_buf_ptr + 1] is guaranteed to
+				 * be readable.
 				 */
 				c2 = copy_input_buf[input_buf_ptr + 1];
 				simd_hit_eol = (c1 == '\r' || c1 == '\n') && (!is_csv || !in_quote);
@@ -1438,8 +1506,8 @@ CopyReadLineText(CopyFromState cstate, bool is_csv, bool simd_enabled)
 			}
 			else
 			{
-				/* No special characters found, so skip the entire chunk */
-				input_buf_ptr += sizeof(Vector8);
+				/* No special characters found, so skip the entire block */
+				input_buf_ptr += 4 * sizeof(Vector8);
 				continue;
 			}
 		}
-- 
2.47.3

