From 570da15dab5726557b46dd0a546f11da69edfcbe Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Tue, 24 Sep 2024 13:16:34 +0900 Subject: [PATCH] backport: unesc. percent-encoded uri, string parts Resolves https://git.seodisparate.com/stephenseo/c_simple_http/issues/6 --- src/helpers.c | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/helpers.h | 30 +++++++++ src/http.c | 18 ++++-- src/test.c | 41 ++++++++++++ 4 files changed, 259 insertions(+), 5 deletions(-) diff --git a/src/helpers.c b/src/helpers.c index c8e49d2..bf3a6fd 100644 --- a/src/helpers.c +++ b/src/helpers.c @@ -18,6 +18,109 @@ // Standard library includes. #include +#include +#include + +int c_simple_http_internal_get_string_part_full_size(void *data, void *ud) { + C_SIMPLE_HTTP_String_Part *part = data; + size_t *count = ud; + + *count += part->size - 1; + + return 0; +} + +int c_simple_http_internal_combine_string_parts_from_list(void *data, + void *ud) { + C_SIMPLE_HTTP_String_Part *part = data; + void **ptrs = ud; + char *buf = ptrs[0]; + size_t *current_count = ptrs[1]; + const size_t *total_count = ptrs[2]; + + if (!part->buf || part->size == 0) { + // Empty string part, just continue. + return 0; + } + + if (*current_count + part->size - 1 > *total_count) { + fprintf(stderr, "ERROR Invalid state combining string parts!\n"); + return 1; + } + + memcpy(buf + *current_count, part->buf, part->size - 1); + + *current_count += part->size - 1; + + return 0; +} + +void c_simple_http_cleanup_attr_string_part(C_SIMPLE_HTTP_String_Part **part) { + if (part && *part) { + if ((*part)->buf) { + free((*part)->buf); + } + free(*part); + } + *part = NULL; +} + +void c_simple_http_cleanup_string_part(void *data) { + C_SIMPLE_HTTP_String_Part *part = data; + if (part) { + if (part->buf) { + free(part->buf); + } + free(part); + } +} + +void c_simple_http_add_string_part( + SDArchiverLinkedList *list, const char *c_string, size_t extra) { + C_SIMPLE_HTTP_String_Part *string_part = + malloc(sizeof(C_SIMPLE_HTTP_String_Part)); + + string_part->size = strlen(c_string) + 1; + string_part->buf = malloc(string_part->size); + memcpy(string_part->buf, c_string, string_part->size); + + string_part->extra = extra; + + simple_archiver_list_add( + list, string_part, c_simple_http_cleanup_string_part); +} + +char *c_simple_http_combine_string_parts(const SDArchiverLinkedList *list) { + if (!list || list->count == 0) { + return NULL; + } + + size_t count = 0; + + simple_archiver_list_get( + list, c_simple_http_internal_get_string_part_full_size, &count); + + char *buf = malloc(count + 1); + size_t current_count = 0; + + void **ptrs = malloc(sizeof(void*) * 3); + ptrs[0] = buf; + ptrs[1] = ¤t_count; + ptrs[2] = &count; + + if (simple_archiver_list_get( + list, c_simple_http_internal_combine_string_parts_from_list, ptrs)) { + free(buf); + free(ptrs); + return NULL; + } + + free(ptrs); + + buf[count] = 0; + + return buf; +} void c_simple_http_helper_to_lowercase_in_place(char *buf, size_t size) { for (size_t idx = 0; idx < size; ++idx) { @@ -41,4 +144,76 @@ char *c_simple_http_helper_to_lowercase(const char *buf, size_t size) { return ret_buf; } +char c_simple_http_helper_hex_to_value(const char upper, const char lower) { + char result = 0; + + if (upper >= '0' && upper <= '9') { + result |= (char)(upper - '0') << 4; + } else if (upper >= 'a' && upper <= 'f') { + result |= (char)(upper - 'a' + 10) << 4; + } else if (upper >= 'A' && upper <= 'F') { + result |= (char)(upper - 'A' + 10) << 4; + } else { + return 0; + } + + if (lower >= '0' && lower <= '9') { + result |= lower - '0'; + } else if (lower >= 'a' && lower <= 'f') { + result |= lower - 'a' + 10; + } else if (lower >= 'A' && lower <= 'F') { + result |= lower - 'A' + 10; + } else { + return 0; + } + + return result; +} + +char *c_simple_http_helper_unescape_uri(const char *uri) { + __attribute__((cleanup(simple_archiver_list_free))) + SDArchiverLinkedList *parts = simple_archiver_list_init(); + + const size_t size = strlen(uri); + size_t idx = 0; + size_t prev_idx = 0; + size_t buf_size; + char *buf; + + for (; idx <= size; ++idx) { + if (uri[idx] == '%' && idx + 2 < size) { + if (idx > prev_idx) { + buf_size = idx - prev_idx + 1; + buf = malloc(buf_size); + memcpy(buf, uri + prev_idx, buf_size - 1); + buf[buf_size - 1] = 0; + c_simple_http_add_string_part(parts, buf, 0); + free(buf); + } + buf = malloc(2); + buf[0] = c_simple_http_helper_hex_to_value(uri[idx + 1], uri[idx + 2]); + buf[1] = 0; + if (buf[0] == 0) { + free(buf); + return NULL; + } + c_simple_http_add_string_part(parts, buf, 0); + free(buf); + prev_idx = idx + 3; + idx += 2; + } + } + + if (idx > prev_idx) { + buf_size = idx - prev_idx + 1; + buf = malloc(buf_size); + memcpy(buf, uri + prev_idx, buf_size - 1); + buf[buf_size - 1] = 0; + c_simple_http_add_string_part(parts, buf, 0); + free(buf); + } + + return c_simple_http_combine_string_parts(parts); +} + // vim: et ts=2 sts=2 sw=2 diff --git a/src/helpers.h b/src/helpers.h index c6743fb..4a63834 100644 --- a/src/helpers.h +++ b/src/helpers.h @@ -20,6 +20,29 @@ // Standard library includes. #include +// Third-party includes. +#include + +typedef struct C_SIMPLE_HTTP_String_Part { + char *buf; + size_t size; + size_t extra; +} C_SIMPLE_HTTP_String_Part; + +void c_simple_http_cleanup_attr_string_part(C_SIMPLE_HTTP_String_Part **); + +/// Assumes "data" is a C_SIMPLE_HTTP_String_Part, "data" was malloced, and +/// "data->buf" was malloced. +void c_simple_http_cleanup_string_part(void *data); + +/// Puts a malloced instance of String_Part into the list. +/// The given c_string will be copied into a newly malloced buffer. +void c_simple_http_add_string_part( + SDArchiverLinkedList *list, const char *c_string, size_t extra); + +/// Combines all String_Parts in the list and returns it as a single buffer. +char *c_simple_http_combine_string_parts(const SDArchiverLinkedList *list); + /// Modifies "buf" in-place to change all uppercase to lowercase alpha chars. void c_simple_http_helper_to_lowercase_in_place(char *buf, size_t size); @@ -27,6 +50,13 @@ void c_simple_http_helper_to_lowercase_in_place(char *buf, size_t size); /// uppercase to lowercase alpha chars. char *c_simple_http_helper_to_lowercase(const char *buf, size_t size); +/// Converts two hexadecimal digits into its corresponding value. +char c_simple_http_helper_hex_to_value(const char upper, const char lower); + +/// Unescapes percent-encoded parts in the given uri string. If this returns +/// non-NULL, it must be free'd. +char *c_simple_http_helper_unescape_uri(const char *uri); + #endif // vim: et ts=2 sts=2 sw=2 diff --git a/src/http.c b/src/http.c index f79797d..710ea73 100644 --- a/src/http.c +++ b/src/http.c @@ -25,7 +25,6 @@ #include // Local includes -#include "constants.h" #include "http_template.h" #include "helpers.h" @@ -111,6 +110,13 @@ char *c_simple_http_request_response( } #ifndef NDEBUG fprintf(stderr, "Parsing request: got path \"%s\"\n", request_path); +#endif + __attribute__((cleanup(simple_archiver_helper_cleanup_c_string))) + char *request_path_unescaped = + c_simple_http_helper_unescape_uri(request_path); +#ifndef NDEBUG + fprintf( + stderr, "Parsing request: unescaped path \"%s\"\n", request_path_unescaped); #endif // skip whitespace until next part. for (; idx < size @@ -158,9 +164,9 @@ char *c_simple_http_request_response( size_t generated_size = 0; __attribute__((cleanup(simple_archiver_helper_cleanup_c_string))) char *stripped_path = c_simple_http_strip_path( - request_path, request_path_idx); + request_path_unescaped, strlen(request_path_unescaped)); char *generated_buf = c_simple_http_path_to_generated( - stripped_path ? stripped_path : request_path, + stripped_path ? stripped_path : request_path_unescaped, templates, &generated_size); @@ -171,8 +177,10 @@ char *c_simple_http_request_response( if ( simple_archiver_hash_map_get( templates->hash_map, - stripped_path ? stripped_path : request_path, - stripped_path ? strlen(stripped_path) + 1 : request_path_idx + 1) + stripped_path ? stripped_path : request_path_unescaped, + stripped_path + ? strlen(stripped_path) + 1 + : strlen(request_path_unescaped) + 1) == NULL) { *out_response_code = C_SIMPLE_HTTP_Response_404_Not_Found; } else { diff --git a/src/test.c b/src/test.c index 66c4d0e..4feb9c4 100644 --- a/src/test.c +++ b/src/test.c @@ -8,6 +8,7 @@ #include "config.h" #include "http_template.h" #include "http.h" +#include "helpers.h" // Third party includes. #include @@ -503,6 +504,46 @@ int main(void) { free(stripped_path_buf); } + // Test helpers. + { + __attribute__((cleanup(simple_archiver_list_free))) + SDArchiverLinkedList *list = simple_archiver_list_init(); + + c_simple_http_add_string_part(list, "one\n", 0); + c_simple_http_add_string_part(list, "two\n", 0); + c_simple_http_add_string_part(list, "three\n", 0); + + __attribute__((cleanup(simple_archiver_helper_cleanup_c_string))) + char *buf = c_simple_http_combine_string_parts(list); + ASSERT_TRUE(buf); + ASSERT_TRUE(strcmp(buf, "one\ntwo\nthree\n") == 0); + free(buf); + buf = NULL; + + char hex_result = c_simple_http_helper_hex_to_value('2', 'f'); + CHECK_TRUE(hex_result == '/'); + hex_result = c_simple_http_helper_hex_to_value('2', 'F'); + CHECK_TRUE(hex_result == '/'); + + hex_result = c_simple_http_helper_hex_to_value('7', 'a'); + CHECK_TRUE(hex_result == 'z'); + hex_result = c_simple_http_helper_hex_to_value('7', 'A'); + CHECK_TRUE(hex_result == 'z'); + + hex_result = c_simple_http_helper_hex_to_value('4', '1'); + CHECK_TRUE(hex_result == 'A'); + + buf = c_simple_http_helper_unescape_uri("%2fderp%2Fdoop%21"); + CHECK_TRUE(strcmp(buf, "/derp/doop!") == 0); + free(buf); + buf = NULL; + + buf = c_simple_http_helper_unescape_uri("%41%42%43%25%5A%5a"); + CHECK_TRUE(strcmp(buf, "ABC%ZZ") == 0); + free(buf); + buf = NULL; + } + RETURN() }