[asterisk-commits] json: Add UTF-8 check call. (asterisk[certified/13.8])
SVN commits to the Asterisk project
asterisk-commits at lists.digium.com
Fri Oct 14 15:13:43 CDT 2016
Joshua Colp has submitted this change and it was merged.
Change subject: json: Add UTF-8 check call.
......................................................................
json: Add UTF-8 check call.
Since the json library does not make the check function public we
recreate/copy the function in our interface module.
ASTERISK-26466
Reported by: Richard Mudgett
Change-Id: I36d3d750b6f5f1a110bc69ea92b435ecdeeb2a99
---
M include/asterisk/json.h
M main/json.c
2 files changed, 156 insertions(+), 0 deletions(-)
Approvals:
George Joseph: Looks good to me, approved
Joshua Colp: Looks good to me, but someone else must approve; Verified
diff --git a/include/asterisk/json.h b/include/asterisk/json.h
index 28ebfbd..cfd9a29 100644
--- a/include/asterisk/json.h
+++ b/include/asterisk/json.h
@@ -217,6 +217,41 @@
/*!@{*/
/*!
+ * \brief Check the string of the given length for UTF-8 format.
+ * \since 13.12.0
+ *
+ * \param str String to check.
+ * \param len Length of string to check.
+ *
+ * \retval 0 if not UTF-8 encoded or str is NULL.
+ * \retval 1 if UTF-8 encoded.
+ */
+int ast_json_utf8_check_len(const char *str, size_t len);
+
+/*!
+ * \brief Check the nul terminated string for UTF-8 format.
+ * \since 13.12.0
+ *
+ * \param str String to check.
+ *
+ * \retval 0 if not UTF-8 encoded or str is NULL.
+ * \retval 1 if UTF-8 encoded.
+ */
+int ast_json_utf8_check(const char *str);
+
+/*!
+ * \brief Check str for UTF-8 and replace with an empty string if fails the check.
+ *
+ * \note The convenience macro is normally used with ast_json_pack()
+ * or a function wrapper that calls ast_json_vpack().
+ */
+#define AST_JSON_UTF8_VALIDATE(str) (ast_json_utf8_check(str) ? (str) : "")
+
+/*!@}*/
+
+/*!@{*/
+
+/*!
* \brief Get the JSON true value.
* \since 12.0.0
*
diff --git a/main/json.c b/main/json.c
index 35e6f16..78a47cd 100644
--- a/main/json.c
+++ b/main/json.c
@@ -269,6 +269,127 @@
return "?";
}
+/* Ported from libjansson utf.c:utf8_check_first() */
+static size_t json_utf8_check_first(char byte)
+{
+ unsigned char ch = (unsigned char) byte;
+
+ if (ch < 0x80) {
+ return 1;
+ }
+
+ if (0x80 <= ch && ch <= 0xBF) {
+ /* second, third or fourth byte of a multi-byte
+ sequence, i.e. a "continuation byte" */
+ return 0;
+ } else if (ch == 0xC0 || ch == 0xC1) {
+ /* overlong encoding of an ASCII byte */
+ return 0;
+ } else if (0xC2 <= ch && ch <= 0xDF) {
+ /* 2-byte sequence */
+ return 2;
+ } else if (0xE0 <= ch && ch <= 0xEF) {
+ /* 3-byte sequence */
+ return 3;
+ } else if (0xF0 <= ch && ch <= 0xF4) {
+ /* 4-byte sequence */
+ return 4;
+ } else { /* ch >= 0xF5 */
+ /* Restricted (start of 4-, 5- or 6-byte sequence) or invalid
+ UTF-8 */
+ return 0;
+ }
+}
+
+/* Ported from libjansson utf.c:utf8_check_full() */
+static size_t json_utf8_check_full(const char *str, size_t len)
+{
+ size_t pos;
+ int32_t value;
+ unsigned char ch = (unsigned char) str[0];
+
+ if (len == 2) {
+ value = ch & 0x1F;
+ } else if (len == 3) {
+ value = ch & 0xF;
+ } else if (len == 4) {
+ value = ch & 0x7;
+ } else {
+ return 0;
+ }
+
+ for (pos = 1; pos < len; ++pos) {
+ ch = (unsigned char) str[pos];
+ if (ch < 0x80 || ch > 0xBF) {
+ /* not a continuation byte */
+ return 0;
+ }
+
+ value = (value << 6) + (ch & 0x3F);
+ }
+
+ if (value > 0x10FFFF) {
+ /* not in Unicode range */
+ return 0;
+ } else if (0xD800 <= value && value <= 0xDFFF) {
+ /* invalid code point (UTF-16 surrogate halves) */
+ return 0;
+ } else if ((len == 2 && value < 0x80)
+ || (len == 3 && value < 0x800)
+ || (len == 4 && value < 0x10000)) {
+ /* overlong encoding */
+ return 0;
+ }
+
+ return 1;
+}
+
+int ast_json_utf8_check_len(const char *str, size_t len)
+{
+ size_t pos;
+ size_t count;
+ int res = 1;
+
+ if (!str) {
+ return 0;
+ }
+
+ /*
+ * Since the json library does not make the check function
+ * public we recreate/copy the function in our interface
+ * module.
+ *
+ * Loop ported from libjansson utf.c:utf8_check_string()
+ */
+ for (pos = 0; pos < len; pos += count) {
+ count = json_utf8_check_first(str[pos]);
+ if (count == 0) {
+ res = 0;
+ break;
+ } else if (count > 1) {
+ if (count > len - pos) {
+ /* UTF-8 needs more than we have left in the string. */
+ res = 0;
+ break;
+ }
+
+ if (!json_utf8_check_full(&str[pos], count)) {
+ res = 0;
+ break;
+ }
+ }
+ }
+
+ if (!res) {
+ ast_debug(1, "String '%.*s' is not UTF-8 for json conversion\n", (int) len, str);
+ }
+ return res;
+}
+
+int ast_json_utf8_check(const char *str)
+{
+ return str ? ast_json_utf8_check_len(str, strlen(str)) : 0;
+}
struct ast_json *ast_json_true(void)
{
--
To view, visit https://gerrit.asterisk.org/4104
To unsubscribe, visit https://gerrit.asterisk.org/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I36d3d750b6f5f1a110bc69ea92b435ecdeeb2a99
Gerrit-PatchSet: 2
Gerrit-Project: asterisk
Gerrit-Branch: certified/13.8
Gerrit-Owner: Richard Mudgett <rmudgett at digium.com>
Gerrit-Reviewer: Anonymous Coward #1000019
Gerrit-Reviewer: George Joseph <gjoseph at digium.com>
Gerrit-Reviewer: Joshua Colp <jcolp at digium.com>
More information about the asterisk-commits
mailing list