[Asterisk-code-review] res_pjsip: Mask invalid UTF-8 sequences from callerid name (asterisk[18])
George Joseph
asteriskteam at digium.com
Fri Feb 17 10:59:00 CST 2023
George Joseph has uploaded this change for review. ( https://gerrit.asterisk.org/c/asterisk/+/19898 )
Change subject: res_pjsip: Mask invalid UTF-8 sequences from callerid name
......................................................................
res_pjsip: Mask invalid UTF-8 sequences from callerid name
* Added a new function ast_utf8_mask_invalid_chars() to
json.c that copies a string replacing any invalid UTF-8
sequences with a specified mask character. For example:
"abc\xffdef" becomes "abc?def".
* Updated res_pjsip:set_id_from_hdr() to use
ast_utf8_mask_invalid_chars and print a warning if any
invalid sequences were found during the copy.
* Updated stasis_channels:ast_channel_publish_varset to use
ast_utf8_mask_invalid_chars and print a warning if any
invalid sequences were found during the copy.
ASTERISK-27830
Change-Id: I4ffbdb19c80bf0efc675d40078a3ca4f85c567d8
---
M include/asterisk/json.h
M include/asterisk/utf8.h
M main/json.c
M main/stasis_channels.c
M res/res_pjsip.c
M tests/test_json.c
6 files changed, 162 insertions(+), 7 deletions(-)
git pull ssh://gerrit.asterisk.org:29418/asterisk refs/changes/98/19898/1
diff --git a/include/asterisk/json.h b/include/asterisk/json.h
index 5edc3a9..5f41e69 100644
--- a/include/asterisk/json.h
+++ b/include/asterisk/json.h
@@ -20,6 +20,7 @@
#define _ASTERISK_JSON_H
#include "asterisk/netsock2.h"
+#include "utf8.h"
/*! \file
*
@@ -193,6 +194,26 @@
/*!@{*/
/*!
+ * \brief Copy a string safely masking any invalid UTF-8 sequences
+ *
+ * This is similar to \ref ast_copy_string, but it will only copy valid UTF-8
+ * sequences from the source string into the destination buffer. Unlike
+ * \ref ast_utf8_copy_string however, if an invalid sequence is encountered,
+ * it's masked with the supplied character and copying continues.
+ *
+ * \param dst The destination buffer.
+ * \param dst_size The size of the dst buffer including space for the NULL terminator
+ * \param src The source string
+ * \param src_len The number of characters to copy
+ * \param mask The charcter to use to mask the invalid ones
+ *
+ * \return The \ref ast_utf8_validation_result indicating whether there
+ * were any invalid characters in the string.
+ */
+enum ast_utf8_validation_result ast_utf8_mask_invalid_chars(char *dst,
+ size_t dst_size, const char *str, size_t src_len, const char mask);
+
+/*!
* \brief Check the string of the given length for UTF-8 format.
* \since 13.12.0
*
diff --git a/include/asterisk/utf8.h b/include/asterisk/utf8.h
index 02ec800..7638637 100644
--- a/include/asterisk/utf8.h
+++ b/include/asterisk/utf8.h
@@ -93,6 +93,14 @@
* to feed into the validator the UTF-8 sequence is invalid.
*/
AST_UTF8_UNKNOWN,
+
+ /*! \brief Outright failure
+ *
+ * Some condition prevented the validator or copy function
+ * from operating all. For instance, it was passed a NULL
+ * pointer or the output buffer was too small.
+ */
+ AST_UTF8_FAIL,
};
/*!
diff --git a/main/json.c b/main/json.c
index 616b12e..66676a2 100644
--- a/main/json.c
+++ b/main/json.c
@@ -230,6 +230,46 @@
return str ? ast_json_utf8_check_len(str, strlen(str)) : 0;
}
+enum ast_utf8_validation_result ast_utf8_mask_invalid_chars(char *dst,
+ size_t dst_size, const char *src, size_t src_count, const char mask)
+{
+ size_t pos;
+ size_t count;
+ enum ast_utf8_validation_result result = AST_UTF8_VALID;
+
+ if (!src || ! dst || dst_size < src_count + 1) {
+ return AST_UTF8_FAIL;
+ }
+
+ for (pos = 0; pos < src_count; ) {
+ count = json_utf8_check_first(src[pos]);
+ ast_debug(9, "first pos: %2ld count: %2ld char: '%c' 0x%02x", pos, count, src[pos], src[pos] & 0xFF);
+ if (count == 0) {
+ dst[pos] = mask;
+ ast_debug(9, " not good dst: '%c' 0x%02x\n", dst[pos], dst[pos]);
+ pos++;
+ result = AST_UTF8_INVALID;
+ } else if (count > 1) {
+ if (!json_utf8_check_full(&src[pos], count)) {
+ dst[pos] = mask;
+ ast_debug(9, " fail full dst: '%c' 0x%02x\n", dst[pos], dst[pos]);
+ pos++;
+ result = AST_UTF8_INVALID;
+ } else {
+ strncpy(&dst[pos], &src[pos], count);
+ ast_debug(9, " good seq dst: '%.*s'\n", (int)count, &dst[pos]);
+ pos+=count;
+ }
+ } else {
+ dst[pos] = src[pos];
+ ast_debug(9, " good char dst: '%c' 0x%02x\n", dst[pos], dst[pos]);
+ pos++;
+ }
+ }
+ dst[pos] = '\0';
+ return result;
+}
+
struct ast_json *ast_json_true(void)
{
return (struct ast_json *)json_true();
@@ -637,7 +677,7 @@
"exten", exten,
"priority", priority != -1 ? ast_json_integer_create(priority) : ast_json_null(),
"app_name", app_name,
- "app_data", app_data
+ "app_data", AST_JSON_UTF8_VALIDATE(app_data)
);
}
diff --git a/main/stasis_channels.c b/main/stasis_channels.c
index d373f6a..696e68f 100644
--- a/main/stasis_channels.c
+++ b/main/stasis_channels.c
@@ -1154,13 +1154,23 @@
void ast_channel_publish_varset(struct ast_channel *chan, const char *name, const char *value)
{
struct ast_json *blob;
+ enum ast_utf8_validation_result result;
+ char *new_value;
ast_assert(name != NULL);
ast_assert(value != NULL);
+ new_value = ast_strdupa(value);
+ result = ast_utf8_mask_invalid_chars(new_value, strlen(new_value) + 1,
+ value, strlen(value), '?');
+
+ if (result != AST_UTF8_VALID) {
+ ast_log(LOG_WARNING, "%s: Variable '%s' has invalid UTF-8 value '%s'. "
+ " Replacing with '%s'", ast_channel_name(chan), name, value, new_value);
+ }
blob = ast_json_pack("{s: s, s: s}",
"variable", name,
- "value", value);
+ "value", new_value);
if (!blob) {
ast_log(LOG_ERROR, "Error creating message\n");
return;
diff --git a/res/res_pjsip.c b/res/res_pjsip.c
index 8273847..7374a4e 100644
--- a/res/res_pjsip.c
+++ b/res/res_pjsip.c
@@ -47,6 +47,7 @@
#include "asterisk/test.h"
#include "asterisk/res_pjsip_presence_xml.h"
#include "asterisk/res_pjproject.h"
+#include "asterisk/json.h"
/*** MODULEINFO
<depend>pjproject</depend>
@@ -2463,10 +2464,9 @@
char cid_num[AST_CHANNEL_NAME];
pjsip_name_addr *id_name_addr = (pjsip_name_addr *) hdr->uri;
char *semi;
+ enum ast_utf8_validation_result result;
- ast_copy_pj_str(cid_name, &id_name_addr->display, sizeof(cid_name));
ast_copy_pj_str(cid_num, ast_sip_pjsip_uri_get_username(hdr->uri), sizeof(cid_num));
-
/* Always truncate caller-id number at a semicolon. */
semi = strchr(cid_num, ';');
if (semi) {
@@ -2484,6 +2484,16 @@
*semi = '\0';
}
+ result = ast_utf8_mask_invalid_chars(cid_name, sizeof(cid_name),
+ id_name_addr->display.ptr, id_name_addr->display.slen, '?');
+
+ if (result != AST_UTF8_VALID) {
+ ast_log(LOG_WARNING, "CallerID Name '" PJSTR_PRINTF_SPEC "' for number '%s' has invalid UTF-8 characters. "
+ " Replaced with '%s'",
+ PJSTR_PRINTF_VAR(id_name_addr->display), cid_num,
+ cid_name);
+ }
+
ast_free(id->name.str);
id->name.str = ast_strdup(cid_name);
if (!ast_strlen_zero(cid_name)) {
diff --git a/tests/test_json.c b/tests/test_json.c
index e1fc0ba..297bc7c 100644
--- a/tests/test_json.c
+++ b/tests/test_json.c
@@ -1718,14 +1718,14 @@
break;
}
- expected = ast_json_pack("{s: o, s: o, s: o, s: o, s: o}",
+ expected = ast_json_pack("{s: o, s: o, s: o, s: o, s: s}",
"context", ast_json_null(),
"exten", ast_json_null(),
"priority", ast_json_null(),
"app_name", ast_json_null(),
- "app_data", ast_json_null()
+ "app_data", ""
);
- uut = ast_json_dialplan_cep_app(NULL, NULL, -1, NULL, NULL);
+ uut = ast_json_dialplan_cep_app(NULL, NULL, -1, NULL, "");
ast_test_validate(test, ast_json_equal(expected, uut));
ast_json_unref(expected);
@@ -1743,6 +1743,46 @@
return AST_TEST_PASS;
}
+static int test_copy_and_mask(const char *src, const char *cmp)
+{
+ char *dst = ast_strdupa(src);
+ ast_utf8_mask_invalid_chars(dst, strlen(dst) + 1,
+ src, strlen(src), '?');
+ if (strcmp(dst, cmp) != 0) {
+ ast_log(LOG_ERROR, "Invalid result. In: '%s', Out: '%s', Expected: '%s'\n",
+ src, dst, cmp);
+ return 0;
+ }
+ return 1;
+}
+
+AST_TEST_DEFINE(test_utf8_mask_invalid_chars)
+{
+ switch (cmd) {
+ case TEST_INIT:
+ info->name = "mask_string";
+ info->category = CATEGORY;
+ info->summary = "Test ast_utf8_mask_invalid_chars";
+ info->description =
+ "Tests UTF-8 string copying/masking code.";
+ return AST_TEST_NOT_RUN;
+ case TEST_EXECUTE:
+ break;
+ }
+
+ ast_test_validate(test, test_copy_and_mask("Asterisk", "Asterisk"));
+ ast_test_validate(test, test_copy_and_mask("Asterisk \xc2", "Asterisk ?"));
+ ast_test_validate(test, test_copy_and_mask("Asterisk \xc2\xae", "Asterisk \xc2\xae"));
+ ast_test_validate(test, test_copy_and_mask("Asterisk \xc0\x8a", "Asterisk ??"));
+ ast_test_validate(test, test_copy_and_mask("\xce\xbb xyz", "\xce\xbb xyz"));
+ ast_test_validate(test, test_copy_and_mask("\xe0\xc2\xb0xyz", "?\xc2\xb0xyz"));
+ ast_test_validate(test, test_copy_and_mask("\xe0\xc2\xf4\xb0xyz", "????xyz"));
+ ast_test_validate(test, test_copy_and_mask("\xe0\xc2\xb0xyz\xc2", "?\xc2\xb0xyz?"));
+
+ return AST_TEST_PASS;
+}
+
+
static int unload_module(void)
{
AST_TEST_UNREGISTER(json_test_false);
@@ -1797,6 +1837,7 @@
AST_TEST_UNREGISTER(json_test_name_number);
AST_TEST_UNREGISTER(json_test_timeval);
AST_TEST_UNREGISTER(json_test_cep);
+ AST_TEST_UNREGISTER(test_utf8_mask_invalid_chars);
return 0;
}
@@ -1854,6 +1895,7 @@
AST_TEST_REGISTER(json_test_name_number);
AST_TEST_REGISTER(json_test_timeval);
AST_TEST_REGISTER(json_test_cep);
+ AST_TEST_REGISTER(test_utf8_mask_invalid_chars);
ast_test_register_init(CATEGORY, json_test_init);
ast_test_register_cleanup(CATEGORY, json_test_cleanup);
--
To view, visit https://gerrit.asterisk.org/c/asterisk/+/19898
To unsubscribe, or for help writing mail filters, visit https://gerrit.asterisk.org/settings
Gerrit-Project: asterisk
Gerrit-Branch: 18
Gerrit-Change-Id: I4ffbdb19c80bf0efc675d40078a3ca4f85c567d8
Gerrit-Change-Number: 19898
Gerrit-PatchSet: 1
Gerrit-Owner: George Joseph <gjoseph at digium.com>
Gerrit-MessageType: newchange
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.digium.com/pipermail/asterisk-code-review/attachments/20230217/c09c8c26/attachment-0001.html>
More information about the asterisk-code-review
mailing list