From 856c205f319d2cd7a072292cf9f483684b87d5ab Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Tue, 24 Sep 2024 13:16:34 +0900 Subject: [PATCH] Unescape percent-encoded uri when handling request Resolves https://git.seodisparate.com/stephenseo/c_simple_http/issues/6 --- src/helpers.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/helpers.h | 7 +++++ src/http.c | 19 +++++++++----- src/test.c | 25 ++++++++++++++++++ 4 files changed, 117 insertions(+), 6 deletions(-) diff --git a/src/helpers.c b/src/helpers.c index b2f2448..bf3a6fd 100644 --- a/src/helpers.c +++ b/src/helpers.c @@ -144,4 +144,76 @@ char *c_simple_http_helper_to_lowercase(const char *buf, size_t size) { return ret_buf; } +char c_simple_http_helper_hex_to_value(const char upper, const char lower) { + char result = 0; + + if (upper >= '0' && upper <= '9') { + result |= (char)(upper - '0') << 4; + } else if (upper >= 'a' && upper <= 'f') { + result |= (char)(upper - 'a' + 10) << 4; + } else if (upper >= 'A' && upper <= 'F') { + result |= (char)(upper - 'A' + 10) << 4; + } else { + return 0; + } + + if (lower >= '0' && lower <= '9') { + result |= lower - '0'; + } else if (lower >= 'a' && lower <= 'f') { + result |= lower - 'a' + 10; + } else if (lower >= 'A' && lower <= 'F') { + result |= lower - 'A' + 10; + } else { + return 0; + } + + return result; +} + +char *c_simple_http_helper_unescape_uri(const char *uri) { + __attribute__((cleanup(simple_archiver_list_free))) + SDArchiverLinkedList *parts = simple_archiver_list_init(); + + const size_t size = strlen(uri); + size_t idx = 0; + size_t prev_idx = 0; + size_t buf_size; + char *buf; + + for (; idx <= size; ++idx) { + if (uri[idx] == '%' && idx + 2 < size) { + if (idx > prev_idx) { + buf_size = idx - prev_idx + 1; + buf = malloc(buf_size); + memcpy(buf, uri + prev_idx, buf_size - 1); + buf[buf_size - 1] = 0; + c_simple_http_add_string_part(parts, buf, 0); + free(buf); + } + buf = malloc(2); + buf[0] = c_simple_http_helper_hex_to_value(uri[idx + 1], uri[idx + 2]); + buf[1] = 0; + if (buf[0] == 0) { + free(buf); + return NULL; + } + c_simple_http_add_string_part(parts, buf, 0); + free(buf); + prev_idx = idx + 3; + idx += 2; + } + } + + if (idx > prev_idx) { + buf_size = idx - prev_idx + 1; + buf = malloc(buf_size); + memcpy(buf, uri + prev_idx, buf_size - 1); + buf[buf_size - 1] = 0; + c_simple_http_add_string_part(parts, buf, 0); + free(buf); + } + + return c_simple_http_combine_string_parts(parts); +} + // vim: et ts=2 sts=2 sw=2 diff --git a/src/helpers.h b/src/helpers.h index 6c4554c..4a63834 100644 --- a/src/helpers.h +++ b/src/helpers.h @@ -50,6 +50,13 @@ void c_simple_http_helper_to_lowercase_in_place(char *buf, size_t size); /// uppercase to lowercase alpha chars. char *c_simple_http_helper_to_lowercase(const char *buf, size_t size); +/// Converts two hexadecimal digits into its corresponding value. +char c_simple_http_helper_hex_to_value(const char upper, const char lower); + +/// Unescapes percent-encoded parts in the given uri string. If this returns +/// non-NULL, it must be free'd. +char *c_simple_http_helper_unescape_uri(const char *uri); + #endif // vim: et ts=2 sts=2 sw=2 diff --git a/src/http.c b/src/http.c index 3d77df8..0db0099 100644 --- a/src/http.c +++ b/src/http.c @@ -26,8 +26,6 @@ #include // Local includes -#include "SimpleArchiver/src/data_structures/priority_heap.h" -#include "constants.h" #include "http_template.h" #include "helpers.h" @@ -118,6 +116,13 @@ char *c_simple_http_request_response( } #ifndef NDEBUG fprintf(stderr, "Parsing request: got path \"%s\"\n", request_path); +#endif + __attribute__((cleanup(simple_archiver_helper_cleanup_c_string))) + char *request_path_unescaped = + c_simple_http_helper_unescape_uri(request_path); +#ifndef NDEBUG + fprintf( + stderr, "Parsing request: unescaped path \"%s\"\n", request_path_unescaped); #endif // skip whitespace until next part. for (; idx < size @@ -165,9 +170,9 @@ char *c_simple_http_request_response( size_t generated_size = 0; __attribute__((cleanup(simple_archiver_helper_cleanup_c_string))) char *stripped_path = c_simple_http_strip_path( - request_path, request_path_idx); + request_path_unescaped, strlen(request_path_unescaped)); char *generated_buf = c_simple_http_path_to_generated( - stripped_path ? stripped_path : request_path, + stripped_path ? stripped_path : request_path_unescaped, templates, &generated_size, NULL); // TODO Use the output parameter "filenames list" here. @@ -179,8 +184,10 @@ char *c_simple_http_request_response( if ( simple_archiver_hash_map_get( templates->hash_map, - stripped_path ? stripped_path : request_path, - stripped_path ? strlen(stripped_path) + 1 : request_path_idx + 1) + stripped_path ? stripped_path : request_path_unescaped, + stripped_path + ? strlen(stripped_path) + 1 + : strlen(request_path_unescaped) + 1) == NULL) { *out_response_code = C_SIMPLE_HTTP_Response_404_Not_Found; } else { diff --git a/src/test.c b/src/test.c index 8f049f4..7c807a7 100644 --- a/src/test.c +++ b/src/test.c @@ -568,6 +568,31 @@ int main(void) { char *buf = c_simple_http_combine_string_parts(list); ASSERT_TRUE(buf); ASSERT_TRUE(strcmp(buf, "one\ntwo\nthree\n") == 0); + free(buf); + buf = NULL; + + char hex_result = c_simple_http_helper_hex_to_value('2', 'f'); + CHECK_TRUE(hex_result == '/'); + hex_result = c_simple_http_helper_hex_to_value('2', 'F'); + CHECK_TRUE(hex_result == '/'); + + hex_result = c_simple_http_helper_hex_to_value('7', 'a'); + CHECK_TRUE(hex_result == 'z'); + hex_result = c_simple_http_helper_hex_to_value('7', 'A'); + CHECK_TRUE(hex_result == 'z'); + + hex_result = c_simple_http_helper_hex_to_value('4', '1'); + CHECK_TRUE(hex_result == 'A'); + + buf = c_simple_http_helper_unescape_uri("%2fderp%2Fdoop%21"); + CHECK_TRUE(strcmp(buf, "/derp/doop!") == 0); + free(buf); + buf = NULL; + + buf = c_simple_http_helper_unescape_uri("%41%42%43%25%5A%5a"); + CHECK_TRUE(strcmp(buf, "ABC%ZZ") == 0); + free(buf); + buf = NULL; } // Test html_cache.