From 3c18418ccc3a8fe2e0276d229ff51d3f29e766a4 Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.albe@cybertec.at>
Date: Sat, 13 Sep 2025 10:51:35 +0200
Subject: [PATCH v2 2/2] Cosmetic improvements for SIMILAR TO regex conversion

The bug introduced in e3ffc3e91d indicates that the code in that area
should be more readable.  This patch renames the variables
"charclass_depth" and "charclass_start" to something more meaningful,
adds and improves comments and rewrites an "if" cascade to be more
consistent.

This patch is purely cosmetic and should not change the logic.

Author: Laurenz Albe <laurenz.albe@cybertec.at>
Discussion: https://postgr.es/m/flat/2673230.1757722323%40sss.pgh.pa.us#831fd545c0faeebbfb08714eb4cc307d
Backpatch-through: 13
---
 src/backend/utils/adt/regexp.c | 76 ++++++++++++++++++++--------------
 1 file changed, 45 insertions(+), 31 deletions(-)

diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index b62d67a5a98..dcdb2e53f67 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -774,10 +774,8 @@ similar_escape_internal(text *pat_text, text *esc_text)
 				elen;
 	bool		afterescape = false;
 	int			nquotes = 0;
-	int			charclass_depth = 0;	/* Nesting level of character classes,
-										 * encompassed by square brackets */
-	int			charclass_start = 0;	/* State of the character class start,
-										 * for carets */
+	int			bracket_depth = 0;	/* square bracket nesting level */
+	int			charclass_pos = 0;	/* position inside a character class */
 
 	p = VARDATA_ANY(pat_text);
 	plen = VARSIZE_ANY_EXHDR(pat_text);
@@ -907,7 +905,7 @@ similar_escape_internal(text *pat_text, text *esc_text)
 		/* fast path */
 		if (afterescape)
 		{
-			if (pchar == '"' && charclass_depth < 1)	/* escape-double-quote? */
+			if (pchar == '"' && bracket_depth < 1)	/* escape-double-quote? */
 			{
 				/* emit appropriate part separator, per notes above */
 				if (nquotes == 0)
@@ -948,12 +946,13 @@ similar_escape_internal(text *pat_text, text *esc_text)
 				 */
 				*r++ = '\\';
 				*r++ = pchar;
+
 				/*
 				 * If we encounter an escaped character in a character class,
 				 * we are no longer at the beginning.
 				 */
-				if (charclass_depth > 0)
-					charclass_start = 3;
+				if (bracket_depth > 0)
+					charclass_pos = 3;
 			}
 			afterescape = false;
 		}
@@ -962,49 +961,64 @@ similar_escape_internal(text *pat_text, text *esc_text)
 			/* SQL escape character; do not send to output */
 			afterescape = true;
 		}
-		else if (charclass_depth > 0)
+		else if (bracket_depth > 0)
 		{
 			if (pchar == '\\')
 			{
+				/* if backslash is no SIMILAR TO escape character, double it */
 				*r++ = '\\';
 				/* we are no longer at the beginning of a character class */
-				charclass_start = 3;
+				charclass_pos = 3;
 			}
 			*r++ = pchar;
 
-			/*
-			 * Ignore a closing bracket at the start of a character class.
-			 * Such a bracket is taken literally rather than closing the
-			 * class.  "charclass_start" is 1 right at the beginning of a
-			 * class and 2 after an initial caret.
+			/*----------
+			 * "charclass_pos" describes where in a character class we are:
+			 *   0: not in a character class
+			 *   1: right after the opening '[' (a following '^' will negate
+			 *      the class, and ']' is a normal character)
+			 *   2: right after a '^' after the opening '[' (']' is a normal
+			 *      character)
+			 *   3 or more: further inside the character class
 			 */
-			if (pchar == ']' && charclass_start > 2)
-				charclass_depth--;
+			if (pchar == ']' && charclass_pos > 2)
+			{
+				/* found the real end of a bracket pair */
+				bracket_depth--;
+				/* don't reset charclass_pos, this may be an inner bracket */
+			}
 			else if (pchar == '[')
 			{
-				charclass_depth++;
+				/* start of a nested bracket pair */
+				bracket_depth++;
 				/* we are no longer at the beginning of a character class */
-				charclass_start = 3;
+				charclass_pos = 3;
+			}
+			else if (pchar == '^')
+			{
+				/*
+				 * A caret right after the opening bracket negates the
+				 * character class.  In that case, the following will
+				 * increment charclass_pos from 1 to 2, so that a following
+				 * ']' is still a normal character and does not close the
+				 * character class. If we are further inside a character
+				 * class, charclass_pos might get incremented past 3, which is
+				 * fine.
+				 */
+				charclass_pos++;
 			}
-
-			/*
-			 * If there is a caret right after the opening bracket, it negates
-			 * the character class, but a following closing bracket should
-			 * still be treated as a normal character.  That holds only for
-			 * the first caret, so only the values 1 and 2 mean that closing
-			 * brackets should be taken literally.
-			 */
-			if (pchar == '^')
-				charclass_start++;
 			else
-				charclass_start = 3;	/* definitely past the start */
+			{
+				/* a leading ']' or anything else take us past the beginning */
+				charclass_pos = 3;
+			}
 		}
 		else if (pchar == '[')
 		{
 			/* start of a character class */
 			*r++ = pchar;
-			charclass_depth++;
-			charclass_start = 1;
+			bracket_depth = 1;
+			charclass_pos = 1;
 		}
 		else if (pchar == '%')
 		{
-- 
2.51.0

