https://github.com/c-ares/c-ares/commit/a3631763ca30804c3095d99474e05625f0c9b0fa From a3631763ca30804c3095d99474e05625f0c9b0fa Mon Sep 17 00:00:00 2001 From: Brad House Date: Thu, 16 Nov 2023 12:19:39 -0500 Subject: [PATCH] Fix /etc/hosts processing performance with all entries using same IP address Some users use blacklist files like https://github.com/StevenBlack/hosts which can contain 200k+ host entries all pointing to 0.0.0.0. Due to the merge logic in the new hosts processor, all those entries will be associated as aliases for the same ip address. The first issue is that it attempts to check the status of all the hosts for the merged entry, when it should only be checking the new hosts added to the merged entry, so this caused exponential time as the entries got longer. The next issue is if searching for one of those hosts, it would append all the matches as cnames/aliases, but there is zero use for 200k aliases being appended to a lookup, so we are artificially capping this to 100. Bug report reference: https://bugs.gentoo.org/917400 Fix By: Brad House (@bradh352) --- a/src/lib/ares__hosts_file.c +++ b/src/lib/ares__hosts_file.c @@ -422,9 +422,15 @@ static ares_status_t ares__hosts_file_add(ares_hosts_file_t *hosts, ares_hosts_entry_t *entry) { ares_hosts_entry_t *match = NULL; - ares_status_t status = ARES_SUCCESS; + ares_status_t status = ARES_SUCCESS; ares__llist_node_t *node; ares_hosts_file_match_t matchtype; + size_t num_hostnames; + + /* Record the number of hostnames in this entry file. If we merge into an + * existing record, these will be *appended* to the entry, so we'll count + * backwards when adding to the hosts hashtable */ + num_hostnames = ares__llist_len(entry->hosts); matchtype = ares__hosts_file_match(hosts, entry, &match); @@ -450,10 +456,17 @@ static ares_status_t ares__hosts_file_add(ares_hosts_file_t *hosts, } } - for (node = ares__llist_node_first(entry->hosts); node != NULL; - node = ares__llist_node_next(node)) { + /* Go backwards, on a merge, hostnames are appended. Breakout once we've + * consumed all the hosts that we appended */ + for (node = ares__llist_node_last(entry->hosts); node != NULL; + node = ares__llist_node_prev(node)) { const char *val = ares__llist_node_val(node); + if (num_hostnames == 0) + break; + + num_hostnames--; + /* first hostname match wins. If we detect a duplicate hostname for another * ip it will automatically be added to the same entry */ if (ares__htable_strvp_get(hosts->hosthash, val, NULL)) { @@ -950,6 +963,12 @@ ares_status_t ares__hosts_entry_to_hostent(const ares_hosts_entry_t *entry, /* Copy aliases */ naliases = ares__llist_len(entry->hosts) - 1; + + /* Cap at 100, some people use https://github.com/StevenBlack/hosts and we + * don't need 200k+ aliases */ + if (naliases > 100) + naliases = 100; + (*hostent)->h_aliases = ares_malloc_zero((naliases + 1) * sizeof(*(*hostent)->h_aliases)); if ((*hostent)->h_aliases == NULL) { @@ -968,6 +987,10 @@ ares_status_t ares__hosts_entry_to_hostent(const ares_hosts_entry_t *entry, goto fail; } idx++; + + /* Break out if artificially capped */ + if (idx == naliases) + break; node = ares__llist_node_next(node); } @@ -988,6 +1011,7 @@ static ares_status_t const char *primaryhost; ares__llist_node_t *node; ares_status_t status; + size_t cnt = 0; node = ares__llist_node_first(entry->hosts); primaryhost = ares__llist_node_val(node); @@ -997,6 +1021,12 @@ static ares_status_t while (node != NULL) { const char *host = ares__llist_node_val(node); + /* Cap at 100 entries. , some people use https://github.com/StevenBlack/hosts + * and we don't need 200k+ aliases */ + cnt++; + if (cnt > 100) + break; + cname = ares__append_addrinfo_cname(&cnames); if (cname == NULL) { status = ARES_ENOMEM;