| 
 | 1 | +/**  | 
 | 2 | + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.  | 
 | 3 | + * SPDX-License-Identifier: Apache-2.0.  | 
 | 4 | + */  | 
 | 5 | +#include <aws/common/environment.h>  | 
 | 6 | +#include <aws/http/private/no_proxy.h>  | 
 | 7 | + | 
 | 8 | +#ifdef _WIN32  | 
 | 9 | +#    include <ws2tcpip.h>  | 
 | 10 | +#else  | 
 | 11 | +#    include <arpa/inet.h>  | 
 | 12 | +#endif  | 
 | 13 | + | 
 | 14 | +enum hostname_type {  | 
 | 15 | +    HOSTNAME_TYPE_IPV4,  | 
 | 16 | +    HOSTNAME_TYPE_IPV6,  | 
 | 17 | +    HOSTNAME_TYPE_REGULAR,  | 
 | 18 | +};  | 
 | 19 | + | 
 | 20 | +/**  | 
 | 21 | + * s_cidr4_match() returns true if the given IPv4 address is within the  | 
 | 22 | + * specified CIDR address range.  | 
 | 23 | + * Based on the curl implementation Curl_cidr4_match().  | 
 | 24 | + *  | 
 | 25 | + * @param bits The number of network bits in the CIDR notation  | 
 | 26 | + * @param network_part The network pattern to match against (e.g., "192.168.0.0")\  | 
 | 27 | + * @param host_addr Pre-parsed binary representation of the host IP, or NULL to parse from host  | 
 | 28 | + * @return true if the IP address matches the CIDR pattern, false otherwise  | 
 | 29 | + */  | 
 | 30 | +static bool s_cidr4_match(uint64_t bits, struct aws_string *network_part, uint32_t address) {  | 
 | 31 | + | 
 | 32 | +    uint32_t check = 0;  | 
 | 33 | + | 
 | 34 | +    /* Check for valid bits parameter */  | 
 | 35 | +    if (bits > 32) {  | 
 | 36 | +        /* Invalid netmask bits */  | 
 | 37 | +        return false;  | 
 | 38 | +    }  | 
 | 39 | + | 
 | 40 | +    /* Convert network pattern to binary */  | 
 | 41 | +    if (inet_pton(AF_INET, aws_string_c_str(network_part), &check) != 1) {  | 
 | 42 | +        return false;  | 
 | 43 | +    }  | 
 | 44 | + | 
 | 45 | +    if (bits > 0 && bits < 32) {  | 
 | 46 | +        /* Apply the network mask for CIDR comparison */  | 
 | 47 | +        uint32_t mask = 0xffffffff << (32 - bits);  | 
 | 48 | +        uint32_t host_network = ntohl(address);  | 
 | 49 | +        uint32_t check_network = ntohl(check);  | 
 | 50 | + | 
 | 51 | +        /* Compare the masked addresses */  | 
 | 52 | +        return (host_network & mask) == (check_network & mask);  | 
 | 53 | +    }  | 
 | 54 | + | 
 | 55 | +    /* For /32 or no bits specified, use exact match */  | 
 | 56 | +    return address == check;  | 
 | 57 | +}  | 
 | 58 | + | 
 | 59 | +/**  | 
 | 60 | + * s_cidr6_match() returns true if the given IPv6 address is within the  | 
 | 61 | + * specified CIDR address range.  | 
 | 62 | + * Based on the curl implementation Curl_cidr6_match().  | 
 | 63 | + *  | 
 | 64 | + * @param bits The number of network bits in the CIDR notation  | 
 | 65 | + * @param network_part The network pattern to match against (e.g., "2001:db8::")  | 
 | 66 | + * @param host_addr Pre-parsed binary representation of the host IP, or NULL to parse from host  | 
 | 67 | + * @return true if the IP address matches the CIDR pattern, false otherwise  | 
 | 68 | + */  | 
 | 69 | +static bool s_cidr6_match(uint64_t bits, struct aws_string *network_part, uint8_t *address) {  | 
 | 70 | +    uint8_t check[16] = {0};  | 
 | 71 | + | 
 | 72 | +    /* If no bits specified, use full 128 bits for IPv6 */  | 
 | 73 | +    if (!bits) {  | 
 | 74 | +        bits = 128;  | 
 | 75 | +    }  | 
 | 76 | + | 
 | 77 | +    /* Check for valid bits parameter */  | 
 | 78 | +    if (bits > 128) {  | 
 | 79 | +        return false;  | 
 | 80 | +    }  | 
 | 81 | +    /* Convert network pattern to binary */  | 
 | 82 | +    if (inet_pton(AF_INET6, aws_string_c_str(network_part), check) != 1) {  | 
 | 83 | +        return false;  | 
 | 84 | +    }  | 
 | 85 | + | 
 | 86 | +    /* Calculate full bytes and remaining bits in the netmask */  | 
 | 87 | +    uint64_t bytes = bits / 8;  | 
 | 88 | +    uint64_t rest = bits % 8;  | 
 | 89 | + | 
 | 90 | +    /* Compare full bytes of the network part */  | 
 | 91 | +    if (bytes > 0 && memcmp(address, check, (size_t)bytes) != 0) {  | 
 | 92 | +        return false;  | 
 | 93 | +    }  | 
 | 94 | + | 
 | 95 | +    /* If we have remaining bits, compare the partial byte */  | 
 | 96 | +    if (rest > 0 && bytes < 16) {  | 
 | 97 | +        /* Create a mask for the remaining bits */  | 
 | 98 | +        unsigned char mask = (unsigned char)(0xff << (8 - rest));  | 
 | 99 | + | 
 | 100 | +        /* Check if the masked bits match */  | 
 | 101 | +        if ((address[bytes] & mask) != (check[bytes] & mask)) {  | 
 | 102 | +            return false;  | 
 | 103 | +        }  | 
 | 104 | +    }  | 
 | 105 | + | 
 | 106 | +    /* All checks passed, addresses match within the CIDR range */  | 
 | 107 | +    return true;  | 
 | 108 | +}  | 
 | 109 | + | 
 | 110 | +static bool s_is_dot(uint8_t c) {  | 
 | 111 | +    return c == '.';  | 
 | 112 | +}  | 
 | 113 | + | 
 | 114 | +/* The host is expected to be the host result from URL parser. */  | 
 | 115 | +bool aws_http_host_matches_no_proxy(  | 
 | 116 | +    struct aws_allocator *allocator,  | 
 | 117 | +    struct aws_byte_cursor host,  | 
 | 118 | +    struct aws_string *no_proxy_str) {  | 
 | 119 | +    if (host.len == 0 || no_proxy_str == NULL) {  | 
 | 120 | +        return false;  | 
 | 121 | +    }  | 
 | 122 | +    /* Single "*" wildcard matches all hosts */  | 
 | 123 | +    if (aws_string_eq_c_str(no_proxy_str, "*")) {  | 
 | 124 | +        AWS_LOGF_DEBUG(AWS_LS_HTTP_CONNECTION, "wildcard no_proxy found, bypassing any proxy");  | 
 | 125 | +        return true;  | 
 | 126 | +    }  | 
 | 127 | +    bool bypass = false;  | 
 | 128 | +    struct aws_byte_cursor no_proxy_cur = aws_byte_cursor_from_string(no_proxy_str);  | 
 | 129 | +    struct aws_array_list no_proxy_list;  | 
 | 130 | +    struct aws_string *host_str = aws_string_new_from_cursor(allocator, &host);  | 
 | 131 | + | 
 | 132 | +    if (aws_array_list_init_dynamic(&no_proxy_list, allocator, 10, sizeof(struct aws_byte_cursor))) {  | 
 | 133 | +        goto cleanup;  | 
 | 134 | +    }  | 
 | 135 | +    /* Split the NO_PROXY string by commas */  | 
 | 136 | +    if (aws_byte_cursor_split_on_char(&no_proxy_cur, ',', &no_proxy_list)) {  | 
 | 137 | +        goto cleanup;  | 
 | 138 | +    }  | 
 | 139 | + | 
 | 140 | +    /* Store parsed binary addresses for reuse */  | 
 | 141 | +    uint32_t ipv4_addr = 0;  | 
 | 142 | +    uint8_t ipv6_addr[16] = {0};  | 
 | 143 | + | 
 | 144 | +    /* Determine host type and parse address if applicable */  | 
 | 145 | +    enum hostname_type type = HOSTNAME_TYPE_REGULAR;  | 
 | 146 | +    if (inet_pton(AF_INET, aws_string_c_str(host_str), &ipv4_addr) == 1) {  | 
 | 147 | +        type = HOSTNAME_TYPE_IPV4;  | 
 | 148 | +    } else {  | 
 | 149 | +        struct aws_string *host_str_copy = host_str;  | 
 | 150 | +        struct aws_byte_cursor host_copy = host;  | 
 | 151 | +        if (host_copy.ptr[0] == '[' && host_copy.ptr[host_copy.len - 1] == ']') {  | 
 | 152 | +            /* Check if the address is enclosed in brackets and strip them for validation */  | 
 | 153 | +            aws_byte_cursor_advance(&host_copy, 1);  | 
 | 154 | +            host_copy.len--;  | 
 | 155 | +            host_str_copy = aws_string_new_from_cursor(allocator, &host_copy);  | 
 | 156 | +        }  | 
 | 157 | + | 
 | 158 | +        if (inet_pton(AF_INET6, aws_string_c_str(host_str_copy), ipv6_addr) == 1) {  | 
 | 159 | +            /* Update the host str */  | 
 | 160 | +            if (host_str != host_str_copy) {  | 
 | 161 | +                aws_string_destroy(host_str);  | 
 | 162 | +                host_str = host_str_copy;  | 
 | 163 | +            }  | 
 | 164 | +            type = HOSTNAME_TYPE_IPV6;  | 
 | 165 | +        } else {  | 
 | 166 | +            /* Not an IP address, so it's a regular hostname */  | 
 | 167 | +            type = HOSTNAME_TYPE_REGULAR;  | 
 | 168 | +            /* Ignore the trailing dot in the hostname */  | 
 | 169 | +            host = aws_byte_cursor_right_trim_pred(&host, s_is_dot);  | 
 | 170 | +        }  | 
 | 171 | +        if (host_str != host_str_copy) {  | 
 | 172 | +            /* clean up the copy, but don't update the str. */  | 
 | 173 | +            aws_string_destroy(host_str_copy);  | 
 | 174 | +        }  | 
 | 175 | +    }  | 
 | 176 | + | 
 | 177 | +    for (size_t i = 0; i < aws_array_list_length(&no_proxy_list); i++) {  | 
 | 178 | +        struct aws_byte_cursor pattern;  | 
 | 179 | +        if (aws_array_list_get_at(&no_proxy_list, &pattern, i)) {  | 
 | 180 | +            continue;  | 
 | 181 | +        }  | 
 | 182 | + | 
 | 183 | +        /* Trim whitespace from both ends for the pattern */  | 
 | 184 | +        pattern = aws_byte_cursor_trim_pred(&pattern, aws_isspace);  | 
 | 185 | +        if (pattern.len == 0) {  | 
 | 186 | +            /* If pattern is empty, ignore it. */  | 
 | 187 | +            continue;  | 
 | 188 | +        }  | 
 | 189 | +        switch (type) {  | 
 | 190 | +            case HOSTNAME_TYPE_REGULAR: {  | 
 | 191 | +                /**  | 
 | 192 | +                 * A: example.com matches 'example.com'  | 
 | 193 | +                 * B: www.example.com matches 'example.com'  | 
 | 194 | +                 * C: nonexample.com DOES NOT match 'example.com'  | 
 | 195 | +                 */  | 
 | 196 | +                /* Trim dot from both ends for the pattern */  | 
 | 197 | +                pattern = aws_byte_cursor_trim_pred(&pattern, s_is_dot);  | 
 | 198 | +                if (pattern.len == 0) {  | 
 | 199 | +                    /* If pattern is empty, ignore it. */  | 
 | 200 | +                    continue;  | 
 | 201 | +                }  | 
 | 202 | +                if (pattern.len == host.len) {  | 
 | 203 | +                    if (aws_byte_cursor_eq_ignore_case(&pattern, &host)) {  | 
 | 204 | +                        bypass = true;  | 
 | 205 | +                        goto cleanup;  | 
 | 206 | +                    } else {  | 
 | 207 | +                        continue;  | 
 | 208 | +                    }  | 
 | 209 | +                } else if (pattern.len < host.len) {  | 
 | 210 | +                    /* Check if the pattern is a suffix of the host. All the math is safe since pattern.len <  | 
 | 211 | +                     * host.len  | 
 | 212 | +                     */  | 
 | 213 | +                    struct aws_byte_cursor tail_with_extra_byte = host;  | 
 | 214 | +                    /* 1. the byte before the tail should be `.` */  | 
 | 215 | +                    aws_byte_cursor_advance(&tail_with_extra_byte, host.len - pattern.len - 1);  | 
 | 216 | +                    uint8_t var = 0;  | 
 | 217 | +                    /* tail_with_extra_byte will be updated to move over the `.` */  | 
 | 218 | +                    aws_byte_cursor_read_u8(&tail_with_extra_byte, &var);  | 
 | 219 | +                    if (var != '.') {  | 
 | 220 | +                        continue;  | 
 | 221 | +                    }  | 
 | 222 | +                    /* 2. the tail of the host should match the pattern */  | 
 | 223 | +                    if (aws_byte_cursor_eq_ignore_case(&pattern, &tail_with_extra_byte)) {  | 
 | 224 | +                        bypass = true;  | 
 | 225 | +                        goto cleanup;  | 
 | 226 | +                    } else {  | 
 | 227 | +                        continue;  | 
 | 228 | +                    }  | 
 | 229 | +                }  | 
 | 230 | +            } break;  | 
 | 231 | +            case HOSTNAME_TYPE_IPV4:  | 
 | 232 | +            case HOSTNAME_TYPE_IPV6: {  | 
 | 233 | +                /* Extract network part and bits from CIDR notation */  | 
 | 234 | +                struct aws_byte_cursor substr = {0};  | 
 | 235 | +                struct aws_byte_cursor network_part = {0};  | 
 | 236 | +                /* CIDR found. parse the bits */  | 
 | 237 | +                uint64_t network_bits = 0;  | 
 | 238 | +                if (aws_byte_cursor_next_split(&pattern, '/', &substr)) {  | 
 | 239 | +                    network_part = substr;  | 
 | 240 | +                }  | 
 | 241 | +                if (aws_byte_cursor_next_split(&pattern, '/', &substr)) {  | 
 | 242 | +                    /* There is a second part of the pattern after `/`. */  | 
 | 243 | +                    /* Now, take the rest of the pattern after `/` as the bits */  | 
 | 244 | +                    aws_byte_cursor_advance(&pattern, network_part.len + 1);  | 
 | 245 | +                    if (aws_byte_cursor_utf8_parse_u64(pattern, &network_bits)) {  | 
 | 246 | +                        continue;  | 
 | 247 | +                    }  | 
 | 248 | +                }  | 
 | 249 | +                struct aws_string *network_part_str = aws_string_new_from_cursor(allocator, &network_part);  | 
 | 250 | +                if (type == HOSTNAME_TYPE_IPV4) {  | 
 | 251 | +                    if (s_cidr4_match(network_bits, network_part_str, ipv4_addr)) {  | 
 | 252 | +                        bypass = true;  | 
 | 253 | +                        aws_string_destroy(network_part_str);  | 
 | 254 | +                        goto cleanup;  | 
 | 255 | +                    }  | 
 | 256 | +                } else {  | 
 | 257 | +                    if (s_cidr6_match(network_bits, network_part_str, ipv6_addr)) {  | 
 | 258 | +                        bypass = true;  | 
 | 259 | +                        aws_string_destroy(network_part_str);  | 
 | 260 | +                        goto cleanup;  | 
 | 261 | +                    }  | 
 | 262 | +                }  | 
 | 263 | +                aws_string_destroy(network_part_str);  | 
 | 264 | +            } break;  | 
 | 265 | + | 
 | 266 | +            default:  | 
 | 267 | +                /* Invalid stage */  | 
 | 268 | +                AWS_FATAL_ASSERT(false);  | 
 | 269 | +                break;  | 
 | 270 | +        }  | 
 | 271 | +    }  | 
 | 272 | + | 
 | 273 | +cleanup:  | 
 | 274 | +    aws_string_destroy(host_str);  | 
 | 275 | +    aws_array_list_clean_up(&no_proxy_list);  | 
 | 276 | +    return bypass;  | 
 | 277 | +}  | 
0 commit comments