Hi, Rucha! On Apr 15, Rucha Deodhar wrote: > revision-id: 7b20964dd240 > parent(s): e9a2c9e > author: Rucha Deodhar <rucha.deod...@mariadb.com> > timestamp: 2021-03-26 00:55:56 +0530 > message: > > MDEV-8334: Rename utf8 to utf8mb3 > > This patch is made as a part of MDEV-8334 to fix failing test in unit and > main test suite so that utf8mb3 characterset is recognized. Failing tests: > main.mysql_client_test > main.mysql_client_test_comp > unit.conc_basic-t > unit.conc_charset > unit.conc_connection
> diff --git a/libmariadb/ma_charset.c b/libmariadb/ma_charset.c > index ee4b0f47..307cd522 100644 > --- a/libmariadb/ma_charset.c > +++ b/libmariadb/ma_charset.c > @@ -67,6 +67,10 @@ > #include <langinfo.h> > #endif > > +#define IS_UTF8(c)\ > +(!strcasecmp((c), "utf8") || !strcasecmp((c), "utf8mb3") ||\ > + !strcasecmp((c), "utf8mb4") || !strcasecmp((c), "utf-8")) > + > /* > +----------------------------------------------------------------------+ > | PHP Version 5 | > @@ -1269,7 +1275,7 @@ struct st_madb_os_charset MADB_OS_CHARSET[]= > {"57010", "ISCII Gujarati", NULL, NULL, MADB_CS_UNSUPPORTED}, > {"57011", "ISCII Punjabi", NULL, NULL, MADB_CS_UNSUPPORTED}, > {"65000", "utf-7 Unicode (UTF-7)", NULL, NULL, MADB_CS_UNSUPPORTED}, > - {"65001", "utf-8 Unicode (UTF-8)", "utf8", NULL, MADB_CS_EXACT}, > + {"65001", "utf-8 Unicode (UTF-8)", "utf8mb3", NULL, MADB_CS_EXACT}, No, keep this utf8, it's still a valid charset name, the server can figure it out what to map it to. > /* non Windows */ > #else > /* iconv encodings */ > @@ -1337,8 +1343,8 @@ struct st_madb_os_charset MADB_OS_CHARSET[]= > {"gb2312", "GB2312", "gb2312", "GB2312", MADB_CS_EXACT}, > {"gbk", "GBK", "gbk", "GBK", MADB_CS_EXACT}, > {"georgianps", "Georgian", "geostd8", "GEORGIAN-PS", MADB_CS_EXACT}, > - {"utf8", "UTF8", "utf8", "UTF-8", MADB_CS_EXACT}, > - {"utf-8", "UTF8", "utf8", "UTF-8", MADB_CS_EXACT}, > + {"utf8mb3", "UTF8MB3", "utf8mb3", "UTF-8", MADB_CS_EXACT}, > + {"utf-8", "UTF8MB3", "utf8mb3", "UTF-8", MADB_CS_EXACT}, same here > #endif > {NULL, NULL, NULL, NULL, 0} > }; > @@ -1361,8 +1367,8 @@ const char *madb_get_os_character_set() > return MADB_DEFAULT_CHARSET_NAME; > while (MADB_OS_CHARSET[i].identifier) > { > - if (MADB_OS_CHARSET[i].supported > MADB_CS_UNSUPPORTED && > - strcasecmp(MADB_OS_CHARSET[i].identifier, p) == 0) > + if ((MADB_OS_CHARSET[i].supported > MADB_CS_UNSUPPORTED && > + strcasecmp(MADB_OS_CHARSET[i].identifier, p) == 0) || IS_UTF8(p)) why? > return MADB_OS_CHARSET[i].charset; > i++; > } > diff --git a/unittest/libmariadb/basic-t.c b/unittest/libmariadb/basic-t.c > index c22e6c2b..e2943964 100644 > --- a/unittest/libmariadb/basic-t.c > +++ b/unittest/libmariadb/basic-t.c > @@ -310,7 +310,8 @@ static int use_utf8(MYSQL *my) > > while ((row= mysql_fetch_row(res)) != NULL) > { > - FAIL_IF(strcmp(row[0], "utf8"), "wrong character set"); > + FAIL_IF(strcmp(row[0], > get_utf8_name(mysql_get_server_version(my),"utf8")), > + "wrong character set"); technically, C/C is a separate project, can run on any server with any config file. So it'd be safer to check that row[0] starts from utf8 and not assume that it depends on a server version in a specific way. > } > FAIL_IF(mysql_errno(my), mysql_error(my)); > mysql_free_result(res); > diff --git a/unittest/libmariadb/charset.c b/unittest/libmariadb/charset.c > index 898b6dad..ffa877bc 100644 > --- a/unittest/libmariadb/charset.c > +++ b/unittest/libmariadb/charset.c > @@ -71,14 +71,20 @@ int bug_8378(MYSQL *mysql) { > int test_client_character_set(MYSQL *mysql) > { > MY_CHARSET_INFO cs; > + char collation_name[19]; > char *csname= (char*) "utf8"; > char *csdefault= (char*)mysql_character_set_name(mysql); > > + strcpy(collation_name,(const > char*)get_utf8_name(mysql_get_server_version(mysql), > + "utf8_general_ci")); > + This one is simpler. It only tests that mysql_set_character_set() works. Just don't use utf8, make it test on something else, e.g. on latin2. > FAIL_IF(mysql_set_character_set(mysql, csname), mysql_error(mysql)); > > mysql_get_character_set_info(mysql, &cs); > > - FAIL_IF(strcmp(cs.csname, "utf8") || strcmp(cs.name, "utf8_general_ci"), > "Character set != UTF8"); > + FAIL_IF(strcmp(cs.csname, > get_utf8_name(mysql_get_server_version(mysql),"utf8")) || > + strcmp(cs.name, collation_name), > + "Wrong UTF8 characterset"); > FAIL_IF(mysql_set_character_set(mysql, csdefault), mysql_error(mysql)); > > return OK; > @@ -537,6 +544,9 @@ static int test_bug30472(MYSQL *mysql) > > SKIP_MAXSCALE; > > + strcpy(collation_name,(const > char*)get_utf8_name(mysql_get_server_version(mysql), > + "utf8_general_ci")); > + same here, the bug is https://bugs.mysql.com/bug.php?id=30472 "libmysql doesn't reset charset, insert_id after succ. mysql_change_user() call" so, does not need utf8 specifically. Change it to some easier to use charset. > if (mysql_get_server_version(mysql) < 50100 || !is_mariadb) > { > diag("Test requires MySQL Server version 5.1 or above"); > diff --git a/unittest/libmariadb/connection.c > b/unittest/libmariadb/connection.c > index 70d347ce..eb9b39bb 100644 > --- a/unittest/libmariadb/connection.c > +++ b/unittest/libmariadb/connection.c > @@ -644,9 +644,8 @@ int test_conc26(MYSQL *unused __attribute__((unused))) > > FAIL_IF(my_test_connect(mysql, hostname, "notexistinguser", "password", > schema, port, NULL, CLIENT_REMEMBER_OPTIONS), > "Error expected"); > - > - FAIL_IF(!mysql->options.charset_name || > strcmp(mysql->options.charset_name, "utf8") != 0, > - "expected charsetname=utf8"); > + FAIL_IF(!mysql->options.charset_name || > strcmp(mysql->options.charset_name, "utf8") != 0, > + "Wrong utf8 characterset for this version"); again, CONC-26 is "CLIENT_REMEMBER_OPTIONS flag missing" it doesn't apparently need utf8 specifically, so just use a different non-default charset there. > mysql_close(mysql); > > mysql= mysql_init(NULL); > @@ -981,7 +980,8 @@ static int test_sess_track_db(MYSQL *mysql) > printf("# SESSION_TRACK_VARIABLES: %*.*s\n", (int)len, (int)len, data); > } while (!mysql_session_track_get_next(mysql, > SESSION_TRACK_SYSTEM_VARIABLES, &data, &len)); > diag("charset: %s", mysql->charset->csname); > - FAIL_IF(strcmp(mysql->charset->csname, "utf8"), "Expected charset > 'utf8'"); > + FAIL_IF(strcmp(mysql->charset->csname, > get_utf8_name(mysql_get_server_version(mysql),"utf8")), > + "Wrong utf8 characterset for this version"); same here > > rc= mysql_query(mysql, "SET NAMES latin1"); > check_mysql_rc(rc, mysql); > diff --git a/unittest/libmariadb/my_test.h b/unittest/libmariadb/my_test.h > index c30d1b6d..a040c3d9 100644 > --- a/unittest/libmariadb/my_test.h > +++ b/unittest/libmariadb/my_test.h > @@ -701,3 +701,23 @@ void run_tests(struct my_tests_st *test) { > } > } > > +static inline const char* get_utf8_name(unsigned long server_version, > + const char* name) > +{ > + const char *csname= server_version >= 100600 ? "utf8mb3" : "utf8"; > + char *corrected_name= malloc(19*sizeof(char)); > + corrected_name[18]='\0'; > + > + if (!strchr(name, '_')) > + { > + strcpy(corrected_name,csname); > + corrected_name[strlen(csname)]='\0'; > + } > + else > + { > + strcpy(corrected_name,csname); > + strcat(corrected_name,"_general_ci"); > + corrected_name[strlen(csname)+11]= '\0'; > + } > + return (const char*)corrected_name; > +} shouldn't be needed Regards, Sergei VP of MariaDB Server Engineering and secur...@mariadb.org _______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : maria-developers@lists.launchpad.net Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp