[Asterisk-code-review] res_http_media_cache.c: Parse media URLs to find extensions. (asterisk[18])

Joshua Colp asteriskteam at digium.com
Mon Jul 19 06:54:10 CDT 2021


Joshua Colp has submitted this change. ( https://gerrit.asterisk.org/c/asterisk/+/16163 )

Change subject: res_http_media_cache.c: Parse media URLs to find extensions.
......................................................................

res_http_media_cache.c: Parse media URLs to find extensions.

Use the URI parsing functions to parse playback URLs in order to find
their file extensions.

For backwards compatibility, we first look at the full URL, then at
any Content-Type header, and finally at just the path portion of the
URL.

ASTERISK-27871 #close

Change-Id: I16d0682f6d794be96539261b3e48f237909139cb
---
M main/media_cache.c
M res/res_http_media_cache.c
M tests/test_http_media_cache.c
3 files changed, 200 insertions(+), 51 deletions(-)

Approvals:
  Joshua Colp: Looks good to me, approved; Approved for Submit
  Kevin Harwell: Looks good to me, approved
  George Joseph: Looks good to me, but someone else must approve



diff --git a/main/media_cache.c b/main/media_cache.c
index b303643..1899fb4 100644
--- a/main/media_cache.c
+++ b/main/media_cache.c
@@ -126,71 +126,30 @@
 
 /*!
  * \internal
- * \brief Normalize the value of a Content-Type header
- *
- * This will trim off any optional parameters after the type/subtype.
- */
-static void normalize_content_type_header(char *content_type)
-{
-	char *params = strchr(content_type, ';');
-
-	if (params) {
-		*params-- = 0;
-		while (params > content_type && (*params == ' ' || *params == '\t')) {
-			*params-- = 0;
-		}
-	}
-}
-
-/*!
- * \internal
  * \brief Update the name of the file backing a \c bucket_file
  * \param preferred_file_name The preferred name of the backing file
  */
 static void bucket_file_update_path(struct ast_bucket_file *bucket_file,
 	const char *preferred_file_name)
 {
-	char *ext;
-
 	if (!ast_strlen_zero(preferred_file_name) && strcmp(bucket_file->path, preferred_file_name)) {
 		/* Use the preferred file name if available */
-
 		rename(bucket_file->path, preferred_file_name);
 		ast_copy_string(bucket_file->path, preferred_file_name,
 			sizeof(bucket_file->path));
-	} else if (!strchr(bucket_file->path, '.') && (ext = strrchr(ast_sorcery_object_get_id(bucket_file), '.'))) {
-		/* If we don't have a file extension and were provided one in the URI, use it */
-		char found_ext[32];
-		char new_path[PATH_MAX + sizeof(found_ext)];
+	} else if (!strchr(bucket_file->path, '.')) {
+		struct ast_bucket_metadata *ext =
+			ast_bucket_file_metadata_get(bucket_file, "ext");
 
-		ast_bucket_file_metadata_set(bucket_file, "ext", ext);
-
-		/* Don't pass '.' while checking for supported extension */
-		if (!ast_get_format_for_file_ext(ext + 1)) {
-			/* If the file extension passed in the URI isn't supported check for the
-			 * extension based on the MIME type passed in the Content-Type header before
-			 * giving up.
-			 * If a match is found then retrieve the extension from the supported list
-			 * corresponding to the mime-type and use that to rename the file */
-			struct ast_bucket_metadata *header = ast_bucket_file_metadata_get(bucket_file, "content-type");
-			if (header) {
-				char *mime_type = ast_strdup(header->value);
-				if (mime_type) {
-					normalize_content_type_header(mime_type);
-					if (!ast_strlen_zero(mime_type)) {
-						if (ast_get_extension_for_mime_type(mime_type, found_ext, sizeof(found_ext))) {
-							ext = found_ext;
-						}
-					}
-					ast_free(mime_type);
-				}
-				ao2_ref(header, -1);
+		if (ext) {
+			char *new_path;
+			if (ast_asprintf(&new_path, "%s%s", bucket_file->path, ext->value) != -1) {
+				rename(bucket_file->path, new_path);
+				ast_copy_string(bucket_file->path, new_path, sizeof(bucket_file->path));
+				ast_free(new_path);
 			}
+			ao2_ref(ext, -1);
 		}
-
-		snprintf(new_path, sizeof(new_path), "%s%s", bucket_file->path, ext);
-		rename(bucket_file->path, new_path);
-		ast_copy_string(bucket_file->path, new_path, sizeof(bucket_file->path));
 	}
 }
 
diff --git a/res/res_http_media_cache.c b/res/res_http_media_cache.c
index d761442..5410566 100644
--- a/res/res_http_media_cache.c
+++ b/res/res_http_media_cache.c
@@ -35,10 +35,12 @@
 
 #include <curl/curl.h>
 
+#include "asterisk/file.h"
 #include "asterisk/module.h"
 #include "asterisk/bucket.h"
 #include "asterisk/sorcery.h"
 #include "asterisk/threadstorage.h"
+#include "asterisk/uri.h"
 
 #define GLOBAL_USERAGENT "asterisk-libcurl-agent/1.0"
 
@@ -155,6 +157,115 @@
 	ast_bucket_file_metadata_set(bucket_file, "__actual_expires", time_buf);
 }
 
+static char *file_extension_from_string(const char *str, char *buffer, size_t capacity)
+{
+	const char *ext;
+
+	ext = strrchr(str, '.');
+	if (ext && ast_get_format_for_file_ext(ext + 1)) {
+		ast_debug(3, "Found extension '%s' at end of string\n", ext);
+		ast_copy_string(buffer, ext, capacity);
+		return buffer;
+	}
+
+	return NULL;
+}
+
+static char *file_extension_from_url(struct ast_bucket_file *bucket_file, char *buffer, size_t capacity)
+{
+	return file_extension_from_string(ast_sorcery_object_get_id(bucket_file), buffer, capacity);
+}
+
+/*!
+ * \internal
+ * \brief Normalize the value of a Content-Type header
+ *
+ * This will trim off any optional parameters after the type/subtype.
+ */
+static void normalize_content_type_header(char *content_type)
+{
+	char *params = strchr(content_type, ';');
+
+	if (params) {
+		*params-- = 0;
+		while (params > content_type && (*params == ' ' || *params == '\t')) {
+			*params-- = 0;
+		}
+	}
+}
+
+static char *file_extension_from_content_type(struct ast_bucket_file *bucket_file, char *buffer, size_t capacity)
+{
+	/* Check for the extension based on the MIME type passed in the Content-Type
+	 * header.
+	 *
+	 * If a match is found then retrieve the extension from the supported list
+	 * corresponding to the mime-type and use that to rename the file */
+
+	struct ast_bucket_metadata *header;
+	char *mime_type;
+
+	header = ast_bucket_file_metadata_get(bucket_file, "content-type");
+	if (!header) {
+		return NULL;
+	}
+
+	mime_type = ast_strdup(header->value);
+	if (mime_type) {
+		normalize_content_type_header(mime_type);
+		if (!ast_strlen_zero(mime_type)) {
+			if (ast_get_extension_for_mime_type(mime_type, buffer, sizeof(buffer))) {
+				ast_debug(3, "Derived extension '%s' from MIME type %s\n",
+					buffer,
+					mime_type);
+				ast_free(mime_type);
+				ao2_ref(header, -1);
+				return buffer;
+			}
+		}
+	}
+	ast_free(mime_type);
+	ao2_ref(header, -1);
+
+	return NULL;
+}
+
+static char *file_extension_from_url_path(struct ast_bucket_file *bucket_file, char *buffer, size_t capacity)
+{
+	struct ast_uri *uri;
+
+	uri = ast_uri_parse(ast_sorcery_object_get_id(bucket_file));
+	if (!uri) {
+		ast_log(LOG_ERROR, "Failed to parse URI: %s\n",
+			ast_sorcery_object_get_id(bucket_file));
+		return NULL;
+	}
+
+	/* Just parse it as a string like before, but without the extra cruft */
+	buffer = file_extension_from_string(ast_uri_path(uri), buffer, capacity);
+	ao2_cleanup(uri);
+	return buffer;
+}
+
+static void bucket_file_set_extension(struct ast_bucket_file *bucket_file)
+{
+	/* We will attempt to determine an extension in the following order for backwards
+	 * compatibility:
+	 *
+	 * 1. Look at tail end of URL for extension
+	 * 2. Use the Content-Type header if present
+	 * 3. Parse the URL (assuming we can) and look at the tail of the path
+	 */
+
+	char buffer[64];
+
+	if (file_extension_from_url(bucket_file, buffer, sizeof(buffer))
+	   || file_extension_from_content_type(bucket_file, buffer, sizeof(buffer))
+	   || file_extension_from_url_path(bucket_file, buffer, sizeof(buffer))) {
+		ast_bucket_file_metadata_set(bucket_file, "ext", buffer);
+	}
+}
+
 /*! \internal
  * \brief Return whether or not we should always revalidate against the server
  */
@@ -278,6 +389,7 @@
 
 	if (http_code / 100 == 2) {
 		bucket_file_set_expiration(bucket_file);
+		bucket_file_set_extension(bucket_file);
 		return 0;
 	} else {
 		ast_log(LOG_WARNING, "Failed to retrieve URL '%s': server returned %ld\n",
diff --git a/tests/test_http_media_cache.c b/tests/test_http_media_cache.c
index c197539..90d1aef 100644
--- a/tests/test_http_media_cache.c
+++ b/tests/test_http_media_cache.c
@@ -57,6 +57,7 @@
 	struct timeval expires;
 	const char *status_text;
 	const char *etag;
+	const char *content_type;
 };
 
 static struct test_options options;
@@ -125,6 +126,10 @@
 		}
 	}
 
+	if (!ast_strlen_zero(options.content_type)) {
+		ast_str_append(&http_header, 0, "Content-Type: %s\r\n", options.content_type);
+	}
+
 	if (options.cache_control.maxage) {
 		SET_OR_APPEND_CACHE_CONTROL(cache_control);
 		ast_str_append(&cache_control, 0, "max-age=%d", options.cache_control.maxage);
@@ -220,6 +225,75 @@
 	}
 }
 
+AST_TEST_DEFINE(retrieve_content_type)
+{
+	RAII_VAR(struct ast_bucket_file *, bucket_file, NULL, bucket_file_cleanup);
+	char uri[1024];
+
+	switch (cmd) {
+	case TEST_INIT:
+		info->name = __func__;
+		info->category = CATEGORY;
+		info->summary = "Test retrieval of a resource with a Content-Type header";
+		info->description =
+			"This test covers retrieval of a resource whose URL does not end with\n"
+			"a parseable extension and whose response includes a Content-Type\n"
+			"header that we recognize.";
+		return AST_TEST_NOT_RUN;
+	case TEST_EXECUTE:
+		break;
+	}
+
+	options.send_file = 1;
+	options.status_code = 200;
+	options.status_text = "OK";
+	options.content_type = "audio/wav";
+
+	snprintf(uri, sizeof(uri), "%s/%s", server_uri, "foo.wav?account_id=1234");
+
+	bucket_file = ast_bucket_file_retrieve(uri);
+	ast_test_validate(test, bucket_file != NULL);
+	ast_test_validate(test, !strcmp(uri, ast_sorcery_object_get_id(bucket_file)));
+	ast_test_validate(test, !ast_strlen_zero(bucket_file->path));
+	VALIDATE_STR_METADATA(test, bucket_file, "ext", ".wav");
+
+	return AST_TEST_PASS;
+}
+
+AST_TEST_DEFINE(retrieve_parsed_uri)
+{
+	RAII_VAR(struct ast_bucket_file *, bucket_file, NULL, bucket_file_cleanup);
+	char uri[1024];
+
+	switch (cmd) {
+	case TEST_INIT:
+		info->name = __func__;
+		info->category = CATEGORY;
+		info->summary = "Test retrieval of a resource with a complex URI";
+		info->description =
+			"This test covers retrieval of a resource whose URL does not end with\n"
+			"a parseable extension, but the path portion of the URL does end with\n"
+			"parseable extension.";
+		return AST_TEST_NOT_RUN;
+	case TEST_EXECUTE:
+		break;
+	}
+
+	options.send_file = 1;
+	options.status_code = 200;
+	options.status_text = "OK";
+
+	snprintf(uri, sizeof(uri), "%s/%s", server_uri, "foo.wav?account_id=1234");
+
+	bucket_file = ast_bucket_file_retrieve(uri);
+	ast_test_validate(test, bucket_file != NULL);
+	ast_test_validate(test, !strcmp(uri, ast_sorcery_object_get_id(bucket_file)));
+	ast_test_validate(test, !ast_strlen_zero(bucket_file->path));
+	VALIDATE_STR_METADATA(test, bucket_file, "ext", ".wav");
+
+	return AST_TEST_PASS;
+}
+
 AST_TEST_DEFINE(retrieve_cache_control_directives)
 {
 	RAII_VAR(struct ast_bucket_file *, bucket_file, NULL, bucket_file_cleanup);
@@ -670,6 +744,8 @@
 	AST_TEST_REGISTER(retrieve_etag_expired);
 	AST_TEST_REGISTER(retrieve_cache_control_age);
 	AST_TEST_REGISTER(retrieve_cache_control_directives);
+	AST_TEST_REGISTER(retrieve_parsed_uri);
+	AST_TEST_REGISTER(retrieve_content_type);
 
 	ast_test_register_init(CATEGORY, pre_test_cb);
 
@@ -688,6 +764,8 @@
 	AST_TEST_UNREGISTER(retrieve_etag_expired);
 	AST_TEST_UNREGISTER(retrieve_cache_control_age);
 	AST_TEST_UNREGISTER(retrieve_cache_control_directives);
+	AST_TEST_REGISTER(retrieve_parsed_uri);
+	AST_TEST_REGISTER(retrieve_content_type);
 
 	return 0;
 }

-- 
To view, visit https://gerrit.asterisk.org/c/asterisk/+/16163
To unsubscribe, or for help writing mail filters, visit https://gerrit.asterisk.org/settings

Gerrit-Project: asterisk
Gerrit-Branch: 18
Gerrit-Change-Id: I16d0682f6d794be96539261b3e48f237909139cb
Gerrit-Change-Number: 16163
Gerrit-PatchSet: 5
Gerrit-Owner: Sean Bright <sean at seanbright.com>
Gerrit-Reviewer: Benjamin Keith Ford <bford at digium.com>
Gerrit-Reviewer: Friendly Automation
Gerrit-Reviewer: George Joseph <gjoseph at digium.com>
Gerrit-Reviewer: Joshua Colp <jcolp at sangoma.com>
Gerrit-Reviewer: Kevin Harwell <kharwell at digium.com>
Gerrit-MessageType: merged
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.digium.com/pipermail/asterisk-code-review/attachments/20210719/46125605/attachment-0001.html>


More information about the asterisk-code-review mailing list