[Asterisk-code-review] res_http_media_cache.c: Parse media URLs to find extensions. (asterisk[16])

Fri Jul 2 12:12:24 CDT 2021

Sean Bright has uploaded this change for review. ( https://gerrit.asterisk.org/c/asterisk/+/16143 )


Change subject: res_http_media_cache.c: Parse media URLs to find extensions.
......................................................................

res_http_media_cache.c: Parse media URLs to find extensions.

Use cURL's URL parsing API - when available - to parse playback URLs
in order to find their file extensions.

For backwards compatibility, we first look at the full URL, then at
any Content-Type header, and finally at just the path portion of the
URL.

ASTERISK-27871 #close

Change-Id: I16d0682f6d794be96539261b3e48f237909139cb
---
M main/media_cache.c
M res/res_http_media_cache.c
2 files changed, 167 insertions(+), 51 deletions(-)



  git pull ssh://gerrit.asterisk.org:29418/asterisk refs/changes/43/16143/1

diff --git a/main/media_cache.c b/main/media_cache.c
index b303643..1899fb4 100644
--- a/main/media_cache.c
+++ b/main/media_cache.c
@@ -126,71 +126,30 @@
 
 /*!
  * \internal
- * \brief Normalize the value of a Content-Type header
- *
- * This will trim off any optional parameters after the type/subtype.
- */
-static void normalize_content_type_header(char *content_type)
-{
-	char *params = strchr(content_type, ';');
-
-	if (params) {
-		*params-- = 0;
-		while (params > content_type && (*params == ' ' || *params == '\t')) {
-			*params-- = 0;
-		}
-	}
-}
-
-/*!
- * \internal
  * \brief Update the name of the file backing a \c bucket_file
  * \param preferred_file_name The preferred name of the backing file
  */
 static void bucket_file_update_path(struct ast_bucket_file *bucket_file,
 	const char *preferred_file_name)
 {
-	char *ext;
-
 	if (!ast_strlen_zero(preferred_file_name) && strcmp(bucket_file->path, preferred_file_name)) {
 		/* Use the preferred file name if available */
-
 		rename(bucket_file->path, preferred_file_name);
 		ast_copy_string(bucket_file->path, preferred_file_name,
 			sizeof(bucket_file->path));
-	} else if (!strchr(bucket_file->path, '.') && (ext = strrchr(ast_sorcery_object_get_id(bucket_file), '.'))) {
-		/* If we don't have a file extension and were provided one in the URI, use it */
-		char found_ext[32];
-		char new_path[PATH_MAX + sizeof(found_ext)];
+	} else if (!strchr(bucket_file->path, '.')) {
+		struct ast_bucket_metadata *ext =
+			ast_bucket_file_metadata_get(bucket_file, "ext");
 
-		ast_bucket_file_metadata_set(bucket_file, "ext", ext);
-
-		/* Don't pass '.' while checking for supported extension */
-		if (!ast_get_format_for_file_ext(ext + 1)) {
-			/* If the file extension passed in the URI isn't supported check for the
-			 * extension based on the MIME type passed in the Content-Type header before
-			 * giving up.
-			 * If a match is found then retrieve the extension from the supported list
-			 * corresponding to the mime-type and use that to rename the file */
-			struct ast_bucket_metadata *header = ast_bucket_file_metadata_get(bucket_file, "content-type");
-			if (header) {
-				char *mime_type = ast_strdup(header->value);
-				if (mime_type) {
-					normalize_content_type_header(mime_type);
-					if (!ast_strlen_zero(mime_type)) {
-						if (ast_get_extension_for_mime_type(mime_type, found_ext, sizeof(found_ext))) {
-							ext = found_ext;
-						}
-					}
-					ast_free(mime_type);
-				}
-				ao2_ref(header, -1);
+		if (ext) {
+			char *new_path;
+			if (ast_asprintf(&new_path, "%s%s", bucket_file->path, ext->value) != -1) {
+				rename(bucket_file->path, new_path);
+				ast_copy_string(bucket_file->path, new_path, sizeof(bucket_file->path));
+				ast_free(new_path);
 			}
+			ao2_ref(ext, -1);
 		}
-
-		snprintf(new_path, sizeof(new_path), "%s%s", bucket_file->path, ext);
-		rename(bucket_file->path, new_path);
-		ast_copy_string(bucket_file->path, new_path, sizeof(bucket_file->path));
 	}
 }
 
diff --git a/res/res_http_media_cache.c b/res/res_http_media_cache.c
index d761442..1470f11 100644
--- a/res/res_http_media_cache.c
+++ b/res/res_http_media_cache.c
@@ -35,6 +35,7 @@
 
 #include <curl/curl.h>
 
+#include "asterisk/file.h"
 #include "asterisk/module.h"
 #include "asterisk/bucket.h"
 #include "asterisk/sorcery.h"
@@ -155,6 +156,161 @@
 	ast_bucket_file_metadata_set(bucket_file, "__actual_expires", time_buf);
 }
 
+static char *file_extension_from_string(const char *str, char *buffer, size_t capacity)
+{
+	const char *ext;
+
+	ext = strrchr(str, '.');
+	if (ext && ast_get_format_for_file_ext(ext + 1)) {
+		ast_debug(3, "Found extension '%s' at end of string\n", ext);
+		ast_copy_string(buffer, ext, capacity);
+		return buffer;
+	}
+
+	return NULL;
+}
+
+static char *file_extension_from_url(struct ast_bucket_file *bucket_file, char *buffer, size_t capacity)
+{
+	return file_extension_from_string(ast_sorcery_object_get_id(bucket_file), buffer, capacity);
+}
+
+/*!
+ * \internal
+ * \brief Normalize the value of a Content-Type header
+ *
+ * This will trim off any optional parameters after the type/subtype.
+ */
+static void normalize_content_type_header(char *content_type)
+{
+	char *params = strchr(content_type, ';');
+
+	if (params) {
+		*params-- = 0;
+		while (params > content_type && (*params == ' ' || *params == '\t')) {
+			*params-- = 0;
+		}
+	}
+}
+
+static char *file_extension_from_content_type(struct ast_bucket_file *bucket_file, char *buffer, size_t capacity)
+{
+	/* Check for the extension based on the MIME type passed in the Content-Type
+	 * header.
+	 *
+	 * If a match is found then retrieve the extension from the supported list
+	 * corresponding to the mime-type and use that to rename the file */
+
+	struct ast_bucket_metadata *header;
+	char *mime_type;
+
+	header = ast_bucket_file_metadata_get(bucket_file, "content-type");
+	if (!header) {
+		return NULL;
+	}
+
+	mime_type = ast_strdup(header->value);
+	if (mime_type) {
+		normalize_content_type_header(mime_type);
+		if (!ast_strlen_zero(mime_type)) {
+			if (ast_get_extension_for_mime_type(mime_type, buffer, sizeof(buffer))) {
+				ast_debug(3, "Derived extension '%s' from MIME type %s\n",
+					buffer,
+					mime_type);
+				ast_free(mime_type);
+				ao2_ref(header, -1);
+				return buffer;
+			}
+		}
+	}
+	ast_free(mime_type);
+	ao2_ref(header, -1);
+
+	return NULL;
+}
+
+/* The URL parsing API was introduced in 7.62.0 */
+#if LIBCURL_VERSION_NUM >= 0x073e00
+
+static char *file_extension_from_url_path(struct ast_bucket_file *bucket_file, char *buffer, size_t capacity)
+{
+	CURLUcode rc;
+	const char *url;
+	char *query, *fragment, *path;
+	CURLU *h;
+
+	h = curl_url();
+	if (!h) {
+		ast_log(LOG_ERROR, "Failed to allocate cURL URL handle\n");
+		return NULL;
+	}
+
+	url = ast_sorcery_object_get_id(bucket_file);
+
+	rc = curl_url_set(h, CURLUPART_URL, url, 0);
+	if (rc != CURLUE_OK) {
+		ast_log(LOG_ERROR, "Failed to parse URL: %s\n",
+			ast_sorcery_object_get_id(bucket_file));
+		curl_url_cleanup(h);
+		return NULL;
+	}
+
+	curl_url_get(h, CURLUPART_QUERY, &query, 0);
+	curl_url_get(h, CURLUPART_FRAGMENT, &fragment, 0);
+
+	/* An empty fragment and an empty query string are handled differently (at least in
+	   7.68.0), so explicitly check for an empty fragment */
+	if (query == NULL && fragment == NULL && !ast_ends_with(url, "#")) {
+		curl_free(query);
+		curl_free(fragment);
+		curl_url_cleanup(h);
+
+		/* No query string? No fragment? if we were going to parse we would have
+		   parsed in file_extension_from_url */
+		return NULL;
+	}
+
+	curl_url_get(h, CURLUPART_PATH, &path, 0);
+
+	/* Just parse it as a string like before, but without the extra cruft */
+	buffer = file_extension_from_string(path, buffer, capacity);
+
+	curl_free(path);
+	curl_free(query);
+	curl_free(fragment);
+	curl_url_cleanup(h);
+
+	return buffer;
+}
+
+#else
+
+static char *file_extension_from_url_path(struct ast_bucket_file *bucket_file, char *buffer, size_t capacity)
+{
+	/* NOP */
+}
+
+#endif
+
+static void bucket_file_set_extension(struct ast_bucket_file *bucket_file)
+{
+	/* We will attempt to determine an extension in the following order for backwards
+	 * compatibility:
+	 *
+	 * 1. Look at tail end of URL for extension
+	 * 2. Use the Content-Type header if present
+	 * 3. Parse the URL (assuming we can) and look at the tail of the path
+	 */
+
+	char buffer[64];
+
+	if (file_extension_from_url(bucket_file, buffer, sizeof(buffer))
+	   || file_extension_from_content_type(bucket_file, buffer, sizeof(buffer))
+	   || file_extension_from_url_path(bucket_file, buffer, sizeof(buffer))) {
+		ast_bucket_file_metadata_set(bucket_file, "ext", buffer);
+	}
+}
+
 /*! \internal
  * \brief Return whether or not we should always revalidate against the server
  */
@@ -278,6 +434,7 @@
 
 	if (http_code / 100 == 2) {
 		bucket_file_set_expiration(bucket_file);
+		bucket_file_set_extension(bucket_file);
 		return 0;
 	} else {
 		ast_log(LOG_WARNING, "Failed to retrieve URL '%s': server returned %ld\n",

-- 
To view, visit https://gerrit.asterisk.org/c/asterisk/+/16143
To unsubscribe, or for help writing mail filters, visit https://gerrit.asterisk.org/settings

Gerrit-Project: asterisk
Gerrit-Branch: 16
Gerrit-Change-Id: I16d0682f6d794be96539261b3e48f237909139cb
Gerrit-Change-Number: 16143
Gerrit-PatchSet: 1
Gerrit-Owner: Sean Bright <sean at seanbright.com>
Gerrit-MessageType: newchange
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.digium.com/pipermail/asterisk-code-review/attachments/20210702/9f3cc2cd/attachment-0001.html>