From 4aadf3ed9ec702bcda9b1799e0b3c7052903c1bb Mon Sep 17 00:00:00 2001 From: shenzheng4 Date: Mon, 10 Mar 2025 20:38:14 +0800 Subject: [PATCH] fix gs_dump --- src/bin/pg_dump/dumputils.cpp | 71 +++++++++++++++++++++++++++------- src/bin/pg_dump/dumputils.h | 2 + src/bin/pg_dump/pg_dump.cpp | 1 + src/bin/pg_dump/pg_dumpall.cpp | 1 + src/bin/psql/command.cpp | 2 + 5 files changed, 64 insertions(+), 13 deletions(-) diff --git a/src/bin/pg_dump/dumputils.cpp b/src/bin/pg_dump/dumputils.cpp index 976ee456fe..8c72e4b8c4 100644 --- a/src/bin/pg_dump/dumputils.cpp +++ b/src/bin/pg_dump/dumputils.cpp @@ -18,6 +18,7 @@ #include #include "dumputils.h" #include "dumpmem.h" +#include "mb/pg_wchar.h" #include "pgtime.h" #include "parser/keywords.h" @@ -45,6 +46,7 @@ char* binary_upgrade_newowner = NULL; int dolphin_lower_case_table_names = -1; #define MAX_ON_EXIT_NICELY 20 +#define ENLARGE_BUFFER_SIZE 2 static struct { on_exit_nicely_callback function; @@ -53,6 +55,7 @@ static struct { static int on_exit_nicely_index; static int lock_fd = -1; +static int backup_fmt_id_encoding = -1; #define supports_grant_options(version) ((version) >= 70400) @@ -78,6 +81,21 @@ void init_parallel_dump_utils(void) #endif } +void setFmtEncoding(int encoding) +{ + backup_fmt_id_encoding = encoding; +} + +static int getFmtEncoding(void) +{ + if (backup_fmt_id_encoding != -1) { + return backup_fmt_id_encoding; + } + + Assert(backup_fmt_id_encoding != -1); + return PG_UTF8; +} + /* * Quotes input string if it's not a legitimate SQL identifier as-is. * @@ -86,7 +104,7 @@ void init_parallel_dump_utils(void) * reduces memory leakage. (On Windows the memory leakage will be one buffer * per thread, which is at least better than one per call). */ -const char* fmtId(const char* rawid) +const char* fmtIdEncoding(const char* rawid, int encoding) { /* * The Tls code goes awry if we use a static var, so we provide for both @@ -97,6 +115,7 @@ const char* fmtId(const char* rawid) const char* cp = rawid; bool need_quotes = false; + size_t remaining = strlen(rawid); #ifdef WIN32 if (parallel_init_done) @@ -160,24 +179,50 @@ const char* fmtId(const char* rawid) if (!need_quotes) { /* no quoting needed */ (void)appendPQExpBufferStr(id_return, rawid); - } else { - (void)appendPQExpBufferChar(id_return, '\"'); - for (cp = rawid; *cp; cp++) { - /* - * Did we find a double-quote in the string? Then make this a - * double double-quote per SQL99. Before, we put in a - * backslash/double-quote pair. - thomas 2000-08-05 - */ - if (*cp == '\"') - (void)appendPQExpBufferChar(id_return, '\"'); + return id_return->data; + } + + (void)appendPQExpBufferChar(id_return, '"'); + cp = &rawid[0]; + while (remaining > 0) { + int charlen; + if (!IS_HIGHBIT_SET(*cp)) { + if (*cp == '"') { + (void)appendPQExpBufferChar(id_return, '"'); + } (void)appendPQExpBufferChar(id_return, *cp); + remaining--; + cp++; + continue; } - (void)appendPQExpBufferChar(id_return, '\"'); - } + charlen = pg_encoding_mblen(encoding, cp); + if (remaining < (size_t)charlen || pg_encoding_verifymbchar(encoding, cp, charlen) == -1) { + if (enlargePQExpBuffer(id_return, ENLARGE_BUFFER_SIZE)) { + pg_encoding_set_invalid(encoding, id_return->data + id_return->len); + id_return->len += ENLARGE_BUFFER_SIZE; + id_return->data[id_return->len] = '\0'; + } + remaining--; + cp++; + } else { + for (int i = 0; i < charlen; i++) { + (void)appendPQExpBufferChar(id_return, *cp); + remaining--; + cp++; + } + } + } + + (void)appendPQExpBufferChar(id_return, '"'); return id_return->data; } +const char* fmtId(const char* rawid) +{ + return fmtIdEncoding(rawid, getFmtEncoding()); +} + /* * Convert a string value to an SQL string literal and append it to * the given buffer. We assume the specified client_encoding and diff --git a/src/bin/pg_dump/dumputils.h b/src/bin/pg_dump/dumputils.h index 48665fed5d..5fed8e072f 100644 --- a/src/bin/pg_dump/dumputils.h +++ b/src/bin/pg_dump/dumputils.h @@ -98,6 +98,8 @@ extern char* binary_upgrade_newowner; extern void init_parallel_dump_utils(void); extern const char* fmtId(const char* identifier); +extern void setFmtEncoding(int encoding); +extern const char* fmtIdEncoding(const char* identifier, int encoding); extern void appendStringLiteral(PQExpBuffer buf, const char* str, int encoding, bool std_strings); extern void appendStringLiteralConn(PQExpBuffer buf, const char* str, PGconn* conn); extern void appendStringLiteralDQ(PQExpBuffer buf, const char* str, const char* dqprefix); diff --git a/src/bin/pg_dump/pg_dump.cpp b/src/bin/pg_dump/pg_dump.cpp index a4568b94e0..7327b1c3dc 100644 --- a/src/bin/pg_dump/pg_dump.cpp +++ b/src/bin/pg_dump/pg_dump.cpp @@ -2085,6 +2085,7 @@ static void setup_connection(Archive* AH) * we know how to escape strings. */ AH->encoding = PQclientEncoding(conn); + setFmtEncoding(AH->encoding); std_strings = PQparameterStatus(conn, "standard_conforming_strings"); AH->std_strings = ((std_strings != NULL) && strcmp(std_strings, "on") == 0); diff --git a/src/bin/pg_dump/pg_dumpall.cpp b/src/bin/pg_dump/pg_dumpall.cpp index da2d724436..4d263d084d 100644 --- a/src/bin/pg_dump/pg_dumpall.cpp +++ b/src/bin/pg_dump/pg_dumpall.cpp @@ -451,6 +451,7 @@ int main(int argc, char* argv[]) * we know how to escape strings. */ encoding = PQclientEncoding(conn); + setFmtEncoding(encoding); std_strings = PQparameterStatus(conn, "standard_conforming_strings"); if (std_strings == NULL) { std_strings = "off"; diff --git a/src/bin/psql/command.cpp b/src/bin/psql/command.cpp index 54f5327023..b64aa5d0d3 100644 --- a/src/bin/psql/command.cpp +++ b/src/bin/psql/command.cpp @@ -731,6 +731,7 @@ static backslashResult exec_command(const char* cmd, PsqlScanState scan_state, P /* save encoding info into psql internal data */ pset.encoding = PQclientEncoding(pset.db); pset.popt.topt.encoding = pset.encoding; + setFmtEncoding(pset.encoding); if (!SetVariable(pset.vars, "ENCODING", pg_encoding_to_char(pset.encoding))) { psql_error("set variable %s failed.\n", "ENCODING"); } @@ -1786,6 +1787,7 @@ void SyncVariables(void) /* get stuff from connection */ pset.encoding = PQclientEncoding(pset.db); pset.popt.topt.encoding = pset.encoding; + setFmtEncoding(pset.encoding); pset.sversion = PQserverVersion(pset.db); if (!SetVariable(pset.vars, "DBNAME", PQdb(pset.db))) { -- Gitee