https://github.com/c-ares/c-ares/commit/2724f0e26c8875ce194d68dc15840e9435a1c805 From 2724f0e26c8875ce194d68dc15840e9435a1c805 Mon Sep 17 00:00:00 2001 From: Brad House Date: Thu, 16 Nov 2023 15:20:48 -0500 Subject: [PATCH] optimize: large /etc/hosts files reading profiling found some hot paths that could be optimized to reduce insert times into the cache. Fix By: Brad House (@bradh352) --- a/src/lib/ares__hosts_file.c +++ b/src/lib/ares__hosts_file.c @@ -313,81 +313,51 @@ static ares_hosts_file_t *ares__hosts_file_create(const char *filename) return NULL; } -static ares_bool_t ares__hosts_entry_ipaddr_exists(ares_hosts_entry_t *entry, - const char *ipaddr) -{ - ares__llist_node_t *node; - - for (node = ares__llist_node_first(entry->ips); node != NULL; - node = ares__llist_node_next(node)) { - const char *myaddr = ares__llist_node_val(node); - if (strcmp(myaddr, ipaddr) == 0) { - return ARES_TRUE; - } - } - - return ARES_FALSE; -} - -static ares_bool_t ares__hosts_entry_host_exists(ares_hosts_entry_t *entry, - const char *host) -{ - ares__llist_node_t *node; - - for (node = ares__llist_node_first(entry->ips); node != NULL; - node = ares__llist_node_next(node)) { - const char *myhost = ares__llist_node_val(node); - if (strcasecmp(myhost, host) == 0) { - return ARES_TRUE; - } - } - - return ARES_FALSE; -} +typedef enum { + ARES_MATCH_NONE = 0, + ARES_MATCH_IPADDR = 1, + ARES_MATCH_HOST = 2 +} ares_hosts_file_match_t; -static ares_status_t ares__hosts_file_merge_entry(ares_hosts_entry_t *existing, - ares_hosts_entry_t *entry) +static ares_status_t ares__hosts_file_merge_entry(ares_hosts_file_t *hf, + ares_hosts_entry_t *existing, + ares_hosts_entry_t *entry, + ares_hosts_file_match_t matchtype) { ares__llist_node_t *node; - while ((node = ares__llist_node_first(entry->ips)) != NULL) { - char *ipaddr = ares__llist_node_claim(node); + /* If we matched on IP address, we know there can only be 1, so there's no + * reason to do anything */ + if (matchtype != ARES_MATCH_IPADDR) { + while ((node = ares__llist_node_first(entry->ips)) != NULL) { + const char *ipaddr = ares__llist_node_val(node); - if (ares__hosts_entry_ipaddr_exists(existing, ipaddr)) { - ares_free(ipaddr); - continue; - } + if (ares__htable_strvp_get_direct(hf->iphash, ipaddr) != NULL) { + ares__llist_node_destroy(node); + continue; + } - if (ares__llist_insert_last(existing->ips, ipaddr) == NULL) { - ares_free(ipaddr); - return ARES_ENOMEM; + ares__llist_node_move_parent_last(node, existing->ips); } } while ((node = ares__llist_node_first(entry->hosts)) != NULL) { - char *hostname = ares__llist_node_claim(node); + const char *hostname = ares__llist_node_val(node); - if (ares__hosts_entry_host_exists(existing, hostname)) { - ares_free(hostname); + if (ares__htable_strvp_get_direct(hf->hosthash, hostname) != NULL) { + ares__llist_node_destroy(node); continue; } - if (ares__llist_insert_last(existing->hosts, hostname) == NULL) { - ares_free(hostname); - return ARES_ENOMEM; - } + ares__llist_node_move_parent_last(node, existing->hosts); } ares__hosts_entry_destroy(entry); return ARES_SUCCESS; } -typedef enum { - ARES_MATCH_NONE = 0, - ARES_MATCH_IPADDR = 1, - ARES_MATCH_HOST = 2 -} ares_hosts_file_match_t; + static ares_hosts_file_match_t ares__hosts_file_match(const ares_hosts_file_t *hf, ares_hosts_entry_t *entry, @@ -435,7 +405,7 @@ static ares_status_t ares__hosts_file_add(ares_hosts_file_t *hosts, matchtype = ares__hosts_file_match(hosts, entry, &match); if (matchtype != ARES_MATCH_NONE) { - status = ares__hosts_file_merge_entry(match, entry); + status = ares__hosts_file_merge_entry(hosts, match, entry, matchtype); if (status != ARES_SUCCESS) { ares__hosts_entry_destroy(entry); return status; @@ -481,6 +451,22 @@ static ares_status_t ares__hosts_file_add(ares_hosts_file_t *hosts, return ARES_SUCCESS; } +static ares_bool_t ares__hosts_entry_isdup(ares_hosts_entry_t *entry, + const char *host) +{ + ares__llist_node_t *node; + + for (node = ares__llist_node_first(entry->ips); node != NULL; + node = ares__llist_node_next(node)) { + const char *myhost = ares__llist_node_val(node); + if (strcasecmp(myhost, host) == 0) { + return ARES_TRUE; + } + } + + return ARES_FALSE; +} + static ares_status_t ares__parse_hosts_hostnames(ares__buf_t *buf, ares_hosts_entry_t *entry) { @@ -531,7 +517,7 @@ static ares_status_t ares__parse_hosts_hostnames(ares__buf_t *buf, } /* Don't add a duplicate to the same entry */ - if (ares__hosts_entry_host_exists(entry, hostname)) { + if (ares__hosts_entry_isdup(entry, hostname)) { continue; } --- a/src/lib/ares__htable.c +++ b/src/lib/ares__htable.c @@ -66,7 +66,7 @@ static unsigned int ares__htable_generate_seed(ares__htable_t *htable) static void ares__htable_buckets_destroy(ares__llist_t **buckets, unsigned int size, - unsigned char destroy_vals) + ares_bool_t destroy_vals) { unsigned int i; @@ -94,7 +94,7 @@ void ares__htable_destroy(ares__htable_t *htable) if (htable == NULL) { return; } - ares__htable_buckets_destroy(htable->buckets, htable->size, 1); + ares__htable_buckets_destroy(htable->buckets, htable->size, ARES_TRUE); ares_free(htable); } @@ -180,11 +180,40 @@ static ares_bool_t ares__htable_expand(ares__htable_t *htable) for (i = 0; i < old_size; i++) { ares__llist_node_t *node; - for (node = ares__llist_node_first(htable->buckets[i]); node != NULL; - node = ares__llist_node_next(node)) { + + /* Nothing in this bucket */ + if (htable->buckets[i] == NULL) + continue; + + /* Fast past optimization (most likely case), there is likely only a single + * entry in both the source and destination, check for this to confirm and + * if so, just move the bucket over */ + if (ares__llist_len(htable->buckets[i]) == 1) { + void *val = ares__llist_first_val(htable->buckets[i]); + size_t idx = HASH_IDX(htable, htable->bucket_key(val)); + + if (buckets[idx] == NULL) { + /* Swap! */ + buckets[idx] = htable->buckets[i]; + htable->buckets[i] = NULL; + continue; + } + } + + /* Slow path, collisions */ + while ((node = ares__llist_node_first(htable->buckets[i])) != NULL) { void *val = ares__llist_node_val(node); size_t idx = HASH_IDX(htable, htable->bucket_key(val)); + /* Try fast path again as maybe we popped one collision off and the + * next we can reuse the llist parent */ + if (buckets[idx] == NULL && ares__llist_len(htable->buckets[i]) == 1) { + /* Swap! */ + buckets[idx] = htable->buckets[i]; + htable->buckets[i] = NULL; + break; + } + if (buckets[idx] == NULL) { buckets[idx] = ares__llist_create(htable->bucket_free); } @@ -192,19 +221,17 @@ static ares_bool_t ares__htable_expand(ares__htable_t *htable) goto fail; } - if (ares__llist_insert_first(buckets[idx], val) == NULL) { - goto fail; - } + ares__llist_node_move_parent_first(node, buckets[idx]); } } /* Swap out buckets */ - ares__htable_buckets_destroy(htable->buckets, old_size, 0); + ares__htable_buckets_destroy(htable->buckets, old_size, ARES_FALSE); htable->buckets = buckets; return ARES_TRUE; fail: - ares__htable_buckets_destroy(buckets, htable->size, 0); + ares__htable_buckets_destroy(buckets, htable->size, ARES_FALSE); htable->size = old_size; return ARES_FALSE; --- a/src/lib/ares__llist.c +++ b/src/lib/ares__llist.c @@ -71,24 +71,14 @@ typedef enum { ARES__LLIST_INSERT_BEFORE } ares__llist_insert_type_t; -static ares__llist_node_t *ares__llist_insert_at(ares__llist_t *list, - ares__llist_insert_type_t type, - ares__llist_node_t *at, - void *val) +static void ares__llist_attach_at(ares__llist_t *list, + ares__llist_insert_type_t type, + ares__llist_node_t *at, + ares__llist_node_t *node) { - ares__llist_node_t *node = NULL; - - if (list == NULL || val == NULL) { - return NULL; - } - - node = ares_malloc_zero(sizeof(*node)); - - if (node == NULL) { - return NULL; - } + if (list == NULL || node == NULL) + return; - node->data = val; node->parent = list; if (type == ARES__LLIST_INSERT_BEFORE && (at == list->head || at == NULL)) { @@ -126,6 +116,27 @@ static ares__llist_node_t *ares__llist_insert_at(ares__llist_t *list, } list->cnt++; +} + +static ares__llist_node_t *ares__llist_insert_at(ares__llist_t *list, + ares__llist_insert_type_t type, + ares__llist_node_t *at, + void *val) +{ + ares__llist_node_t *node = NULL; + + if (list == NULL || val == NULL) { + return NULL; + } + + node = ares_malloc_zero(sizeof(*node)); + + if (node == NULL) { + return NULL; + } + + node->data = val; + ares__llist_attach_at(list, type, at, node); return node; } @@ -233,17 +244,14 @@ void *ares__llist_last_val(ares__llist_t *list) return ares__llist_node_val(ares__llist_node_last(list)); } -void *ares__llist_node_claim(ares__llist_node_t *node) +static void ares__llist_node_detach(ares__llist_node_t *node) { - void *val; ares__llist_t *list; - if (node == NULL) { - return NULL; - } + if (node == NULL) + return; list = node->parent; - val = node->data; if (node->prev) { node->prev->next = node->next; @@ -260,9 +268,22 @@ void *ares__llist_node_claim(ares__llist_node_t *node) if (node == list->tail) { list->tail = node->prev; } - ares_free(node); + node->parent = NULL; list->cnt--; +} + +void *ares__llist_node_claim(ares__llist_node_t *node) +{ + void *val; + + if (node == NULL) { + return NULL; + } + + val = node->data; + ares__llist_node_detach(node); + ares_free(node); return val; } @@ -313,3 +334,23 @@ void ares__llist_destroy(ares__llist_t *list) } ares_free(list); } + +void ares__llist_node_move_parent_last(ares__llist_node_t *node, + ares__llist_t *new_parent) +{ + if (node == NULL || new_parent == NULL) + return; + + ares__llist_node_detach(node); + ares__llist_attach_at(new_parent, ARES__LLIST_INSERT_TAIL, NULL, node); +} + +void ares__llist_node_move_parent_first(ares__llist_node_t *node, + ares__llist_t *new_parent) +{ + if (node == NULL || new_parent == NULL) + return; + + ares__llist_node_detach(node); + ares__llist_attach_at(new_parent, ARES__LLIST_INSERT_HEAD, NULL, node); +} --- a/src/lib/ares__llist.h +++ b/src/lib/ares__llist.h @@ -198,6 +198,23 @@ void ares__llist_node_destroy(ares__llist_node_t *node); */ void ares__llist_destroy(ares__llist_t *list); +/*! Detach node from the current list and re-attach it to the new list as the + * last entry. + * + * \param[in] node node to move + * \param[in] parent new list + */ +void ares__llist_node_move_parent_last(ares__llist_node_t *node, + ares__llist_t *new_parent); + +/*! Detach node from the current list and re-attach it to the new list as the + * first entry. + * + * \param[in] node node to move + * \param[in] parent new list + */ +void ares__llist_node_move_parent_first(ares__llist_node_t *node, + ares__llist_t *new_parent); /*! @} */ #endif /* __ARES__LLIST_H */