From dd37acb30b637c306f2ae7f041c164e892e92a22 Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.albe@cybertec.at>
Date: Sat, 13 Sep 2025 09:59:47 +0200
Subject: [PATCH v2 1/2] Amend recent fix for SIMILAR TO regex conversion

Commit e3ffc3e91d fixed the translation of character classes in
SIMILAR TO regular expressions.  Unfortunately the fix broke a corner
case: if there is an escape character right after the opening bracket,
(for example in "[\q]") a closing bracket right after the escape
sequence would not be seen as closing the character class.

There were two more oversights: a backslash or a nested opening bracket
right at the beginning of a character class should remove the special
meaning from any following caret or closing bracket.

Author: Laurenz Albe <laurenz.albe@cybertec.at>
Reported-By: Dominique Devienne <ddevienne@gmail.com>
Reported-By: Stephan Springl <springl-psql@bfw-online.de>
Discussion: https://postgr.es/m/41a37137-f8bb-8fc5-2948-31b528f166dc%40bfw-online.de
Discussion: https://postgr.es/m/CAFCRh-8NwJd0jq6P%3DR3qhHyqU7hw0BTor3W0SvUcii24et%2BzAw%40mail.gmail.com
Backpatch-through: 13
---
 src/backend/utils/adt/regexp.c        | 14 ++++++++++++++
 src/test/regress/expected/strings.out |  9 +++++++++
 src/test/regress/sql/strings.sql      |  3 +++
 3 files changed, 26 insertions(+)

diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index 6e2864cbbda..b62d67a5a98 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -948,6 +948,12 @@ similar_escape_internal(text *pat_text, text *esc_text)
 				 */
 				*r++ = '\\';
 				*r++ = pchar;
+				/*
+				 * If we encounter an escaped character in a character class,
+				 * we are no longer at the beginning.
+				 */
+				if (charclass_depth > 0)
+					charclass_start = 3;
 			}
 			afterescape = false;
 		}
@@ -959,7 +965,11 @@ similar_escape_internal(text *pat_text, text *esc_text)
 		else if (charclass_depth > 0)
 		{
 			if (pchar == '\\')
+			{
 				*r++ = '\\';
+				/* we are no longer at the beginning of a character class */
+				charclass_start = 3;
+			}
 			*r++ = pchar;
 
 			/*
@@ -971,7 +981,11 @@ similar_escape_internal(text *pat_text, text *esc_text)
 			if (pchar == ']' && charclass_start > 2)
 				charclass_depth--;
 			else if (pchar == '[')
+			{
 				charclass_depth++;
+				/* we are no longer at the beginning of a character class */
+				charclass_start = 3;
+			}
 
 			/*
 			 * If there is a caret right after the opening bracket, it negates
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index ba302da51e7..2d6cb02ad60 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -693,6 +693,15 @@ EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[^^]^';
    Filter: (f1 ~ '^(?:[^^]\^)$'::text)
 (2 rows)
 
+-- Closing square bracket after an escape sequence at the beginning of
+-- a character closes the character class
+EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[|a]%' ESCAPE '|';
+              QUERY PLAN               
+---------------------------------------
+ Seq Scan on text_tbl
+   Filter: (f1 ~ '^(?:[\a].*)$'::text)
+(2 rows)
+
 -- Test backslash escapes in regexp_replace's replacement string
 SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3');
  regexp_replace 
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index b94004cc08c..5ed421d6205 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -218,6 +218,9 @@ EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[]%][^]%][^%]%';
 -- Closing square bracket effective after two carets at the beginning
 -- of character class.
 EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[^^]^';
+-- Closing square bracket after an escape sequence at the beginning of
+-- a character closes the character class
+EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[|a]%' ESCAPE '|';
 
 -- Test backslash escapes in regexp_replace's replacement string
 SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3');
-- 
2.51.0

